├── .Rbuildignore ├── .gitattributes ├── .gitattributes~ ├── .gitignore ├── .travis.yml ├── DESCRIPTION ├── JOSS ├── paper.bib ├── paper.html └── paper.md ├── LICENSE.md ├── NAMESPACE ├── NEWS.md ├── NOTES.txt ├── R ├── RcppExports.R ├── baseline_estimation.R ├── data.R ├── fss_exponential_family.R ├── fss_subset_score.R ├── fss_utility_functions.R ├── mscan_fss.R ├── scan_bayes_negbin.R ├── scan_eb_negbin.R ├── scan_eb_poisson.R ├── scan_eb_zip.R ├── scan_pb_poisson.R ├── scan_permutation.R ├── scanstatistics.R ├── scanstatistics_general.R ├── utility_functions.R ├── zone_utility_functions.R └── zones.R ├── README.Rmd ├── README.md ├── cran-comments.md ├── data ├── NM_geo.rda ├── NM_map.rda └── NM_popcas.rda ├── docs ├── README_figures │ ├── county_scores-1.png │ └── newmexico_map-1.png ├── articles │ ├── articles │ │ ├── make_data.html │ │ └── make_data_files │ │ │ └── figure-html │ │ │ ├── grab_seat_coords-1.png │ │ │ ├── plot_centroids-1.png │ │ │ └── plot_evolution-1.png │ ├── index.html │ ├── introduction.html │ ├── introduction_files │ │ └── figure-html │ │ │ ├── county_scores-1.png │ │ │ └── newmexico_map-1.png │ └── references.bib ├── authors.html ├── index.html ├── jquery.sticky-kit.min.js ├── link.svg ├── news │ └── index.html ├── pkgdown.css ├── pkgdown.js └── reference │ ├── NM_geo.html │ ├── NM_map.html │ ├── NM_popcas.html │ ├── aggregate_per_location.html │ ├── aggregate_per_stream.html │ ├── apply_rowwise.html │ ├── closest_subsets.html │ ├── connected_neighbors.html │ ├── connected_to.html │ ├── coords_to_knn-1.png │ ├── coords_to_knn-2.png │ ├── coords_to_knn.html │ ├── df_to_matrix.html │ ├── dist_to_knn.html │ ├── estimate_baselines.html │ ├── estimate_variances.html │ ├── estimate_zip_params.html │ ├── exponential_lambda.html │ ├── exponential_priority.html │ ├── exponential_qmax.html │ ├── exponential_score.html │ ├── flexible_zones.html │ ├── flipud.html │ ├── gaussian_lambda.html │ ├── gaussian_priority.html │ ├── gaussian_qmax.html │ ├── gaussian_score.html │ ├── get_subset.html │ ├── get_zero_indices.html │ ├── get_zone.html │ ├── gumbel_pvalue.html │ ├── has_converged.html │ ├── if_connected.html │ ├── index.html │ ├── is_connected.html │ ├── knn_zones.html │ ├── mc_pvalue.html │ ├── mscan_fss.html │ ├── poisson_lambda.html │ ├── poisson_priority.html │ ├── poisson_qmax.html │ ├── poisson_score.html │ ├── powerset_zones.html │ ├── print.scanstatistic.html │ ├── prioritize_and_execute.html │ ├── prioritize_cols.html │ ├── reorder_rows.html │ ├── scan_eb_negbin.html │ ├── scan_eb_negbin_cpp.html │ ├── scan_eb_poisson.html │ ├── scan_eb_poisson_cpp.html │ ├── scan_eb_zip.html │ ├── scan_eb_zip_cpp.html │ ├── scan_pb_poisson.html │ ├── scan_pb_poisson_cpp.html │ ├── scanstatistics.html │ ├── score_locations.html │ ├── score_priority_subset.html │ ├── subset_aggregation.html │ ├── subset_aggregation_FF.html │ ├── subset_aggregation_FN_NF.html │ ├── sum_over_subset.html │ ├── sum_reorder_sum.html │ └── top_clusters.html ├── inst ├── CITATION └── image │ ├── county_scores-1.png │ └── newmexico_map-1.png ├── man ├── NM_geo.Rd ├── NM_map.Rd ├── NM_popcas.Rd ├── closest_subsets.Rd ├── connected_neighbors.Rd ├── connected_to.Rd ├── coords_to_knn.Rd ├── df_to_matrix.Rd ├── dist_to_knn.Rd ├── estimate_baselines.Rd ├── estimate_variances.Rd ├── estimate_zip_params.Rd ├── flexible_zones.Rd ├── flipud.Rd ├── get_zero_indices.Rd ├── get_zone.Rd ├── gumbel_pvalue.Rd ├── has_converged.Rd ├── if_connected.Rd ├── is_connected.Rd ├── knn_zones.Rd ├── matrix_to_df.Rd ├── mc_pvalue.Rd ├── permute_matrix.Rd ├── powerset_zones.Rd ├── print.scanstatistic.Rd ├── run_scan.Rd ├── scan_bayes_negbin.Rd ├── scan_bayes_negbin_cpp.Rd ├── scan_eb_negbin.Rd ├── scan_eb_negbin_cpp.Rd ├── scan_eb_poisson.Rd ├── scan_eb_poisson_cpp.Rd ├── scan_eb_zip.Rd ├── scan_eb_zip_cpp.Rd ├── scan_pb_perm_cpp.Rd ├── scan_pb_poisson.Rd ├── scan_pb_poisson_cpp.Rd ├── scan_permutation.Rd ├── scanstatistics.Rd ├── score_locations.Rd └── top_clusters.Rd ├── references.bib ├── scanstatistics.Rproj ├── src ├── BGPscan.h ├── EBNBscan.h ├── EBPOIscan.h ├── EBZIPscan.h ├── FastSubsetScan.h ├── PBPERMscan.h ├── PBPOIabstract.h ├── PBPOIscan.h ├── RcppExports.cpp ├── USTscan.h ├── ZIPutility.h ├── probability_functions.cpp ├── probability_functions.h ├── scan_bayes_negbin.cpp ├── scan_bayes_negbin.h ├── scan_eb_negbin.cpp ├── scan_eb_negbin.h ├── scan_eb_poisson.cpp ├── scan_eb_poisson.h ├── scan_eb_zip.cpp ├── scan_eb_zip.h ├── scan_pb_perm.cpp ├── scan_pb_perm.h ├── scan_pb_poisson.cpp ├── scan_pb_poisson.h ├── scan_utility.cpp └── scan_utility.h ├── tests ├── testthat.R └── testthat │ ├── test_baseline_estimation.R │ ├── test_fss_exponential_family.R │ ├── test_fss_utility_functions.R │ ├── test_probability_functions.R │ ├── test_scan_bayes_negbin.R │ ├── test_scan_eb_negbin.R │ ├── test_scan_eb_poisson.R │ ├── test_scan_eb_zip.R │ ├── test_scan_pb_perm.R │ ├── test_scan_pb_poisson.R │ ├── test_zone_utility_functions.R │ └── test_zones.R └── vignettes ├── articles └── make_data.Rmd ├── introduction.Rmd ├── introduction.html └── references.bib /.Rbuildignore: -------------------------------------------------------------------------------- 1 | ^.*\.Rproj$ 2 | ^\.Rproj\.user$ 3 | ^\.travis\.yml$ 4 | ^README\.Rmd$ 5 | ^README-.*\.png$ 6 | ^README_cache/* 7 | ^README_figures/* 8 | ^references\.bib$ 9 | ^figures/* 10 | vignettes/introduction_cache/* 11 | vignettes/introduction_files/* 12 | vignettes/articles/* 13 | vignettes/.build.timestamp 14 | cran-comments\.md 15 | ^docs/* 16 | NOTES.txt 17 | ^docs$ 18 | JOSS/* 19 | LICENSE.md 20 | -------------------------------------------------------------------------------- /.gitattributes: -------------------------------------------------------------------------------- 1 | # Set the default behavior, in case people don't have core.autocrlf set. 2 | * text=auto 3 | 4 | # Explicitly declare text files you want to always be normalized and converted 5 | # to native line endings on checkout. 6 | *.c text 7 | *.h text 8 | 9 | # Declare files that will always have LF line endings on checkout. 10 | *.sln text eol=lf 11 | 12 | # Denote all files that are truly binary and should not be modified. 13 | *.png binary 14 | *.jpg binary 15 | -------------------------------------------------------------------------------- /.gitattributes~: -------------------------------------------------------------------------------- 1 | # Set the default behavior, in case people don't have core.autocrlf set. 2 | * text=auto 3 | 4 | # Explicitly declare text files you want to always be normalized and converted 5 | # to native line endings on checkout. 6 | *.c text 7 | *.h text 8 | 9 | # Declare files that will always have CRLF line endings on checkout. 10 | *.sln text eol=lf 11 | 12 | # Denote all files that are truly binary and should not be modified. 13 | *.png binary 14 | *.jpg binary 15 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | .Rproj.user/* 2 | R/.dropbox.attr 3 | .Rhistory 4 | .RData 5 | .Rbuildignore 6 | src/*.o 7 | src/*.so 8 | src/*.dll 9 | inst/doc 10 | .Rproj.user 11 | README_cache/ 12 | docs/README_cache/ 13 | docs/articles/articles/make_data_cache/ 14 | vignettes/articles/make_data_cache/ 15 | vignettes/.build.timestamp 16 | -------------------------------------------------------------------------------- /.travis.yml: -------------------------------------------------------------------------------- 1 | # R for travis: see documentation at https://docs.travis-ci.com/user/languages/r 2 | 3 | # Header 4 | language: r 5 | cache: packages 6 | dist: trusty 7 | sudo: required 8 | warnings_are_errors: false 9 | 10 | # env 11 | env: 12 | global: 13 | - R_BUILD_ARGS="--no-build-vignettes --no-manual" 14 | - R_CHECK_ARGS="--no-build-vignettes --no-manual --as-cran" 15 | 16 | # notifications 17 | notifications: 18 | email: 19 | on_success: change 20 | on_failure: change 21 | 22 | # before script 23 | 24 | # after script 25 | after_script: 26 | - ./travis-tool.sh dump_sysinfo 27 | 28 | # after success 29 | after_success: 30 | - Rscript -e 'covr::codecov()' 31 | 32 | after_failure: 33 | - ./travis-tool.sh dump_logs 34 | 35 | 36 | # Custom 37 | 38 | # R packages 39 | r_packages: 40 | - roxygen2 41 | 42 | # R GitHub packages 43 | r_github_packages: 44 | - jimhester/covr 45 | -------------------------------------------------------------------------------- /DESCRIPTION: -------------------------------------------------------------------------------- 1 | Package: scanstatistics 2 | Type: Package 3 | Title: Space-Time Anomaly Detection using Scan Statistics 4 | Description: Detection of anomalous space-time clusters using the scan 5 | statistics methodology. Focuses on prospective surveillance of data streams, 6 | scanning for clusters with ongoing anomalies. Hypothesis testing is made 7 | possible by Monte Carlo simulation. 8 | Version: 1.0.2 9 | Date: 2018-04-04 10 | Authors@R: person("Benjamin", "Allévius", email = "benjak@math.su.se", 11 | role = c("aut", "cre")) 12 | Maintainer: Benjamin Allévius 13 | Encoding: UTF-8 14 | License: GPL (>= 3) 15 | URL: https://github.com/BenjaK/scanstatistics 16 | BugReports: https://github.com/BenjaK/scanstatistics/issues 17 | Depends: R (>= 3.4) 18 | Imports: 19 | dplyr, 20 | emdbook, 21 | ismev, 22 | magrittr, 23 | methods, 24 | plyr, 25 | purrr, 26 | Rcpp, 27 | reliaR, 28 | sets, 29 | tibble, 30 | tidyr 31 | Suggests: 32 | doParallel, 33 | foreach, 34 | ggplot2, 35 | knitr, 36 | MASS, 37 | pscl, 38 | reshape2, 39 | rmarkdown, 40 | sp, 41 | testthat 42 | VignetteBuilder: knitr 43 | RoxygenNote: 6.1.0 44 | ByteCompile: true 45 | SystemRequirements: C++11 46 | LinkingTo: Rcpp, RcppArmadillo 47 | -------------------------------------------------------------------------------- /JOSS/paper.bib: -------------------------------------------------------------------------------- 1 | @article{Kulldorff2001, 2 | author = {Kulldorff, Martin}, 3 | journal = {Journal of the Royal Statistical Society Series a-Statistics in Society}, 4 | pages = {61--72}, 5 | title = {{Prospective time periodic geographical disease surveillance using a scan statistic}}, 6 | volume = {164}, 7 | year = {2001}, 8 | DOI = {10.1111/1467-985X.00186} 9 | } 10 | 11 | @article{Kulldorff2005, 12 | author = {Kulldorff, Martin and Heffernan, Richard and Hartman, Jessica and Assun{\c{c}}{\~{a}}o, Renato M. and Mostashari, Farzad}, 13 | journal = {PLoS Medicine}, 14 | number = {3}, 15 | pages = {0216--0224}, 16 | title = {{A space-time permutation scan statistic for disease outbreak detection}}, 17 | volume = {2}, 18 | year = {2005}, 19 | DOI = {10.1371/journal.pmed.0020059} 20 | } 21 | 22 | @inproceedings{Neill2005, 23 | address = {New York, New York, USA}, 24 | author = {Neill, Daniel B. and Moore, Andrew W. and Sabhnani, Maheshkumar and Daniel, Kenny}, 25 | booktitle = {Proceeding of the eleventh ACM SIGKDD international conference on Knowledge discovery in data mining - KDD '05}, 26 | pages = {218}, 27 | publisher = {ACM Press}, 28 | title = {{Detection of emerging space-time clusters}}, 29 | year = {2005}, 30 | DOI = {10.1145/1081870.1081897} 31 | } 32 | 33 | @incollection{Neill2006, 34 | author = {Neill, Daniel B. and Moore, Andrew W. and Cooper, Gregory F.}, 35 | booktitle = {Advances in Neural Information Processing Systems}, 36 | editor = {Weiss, Y. and Sch{\"{o}}nlkopf, P. B. and Platt, J. C.}, 37 | pages = {1003--1010}, 38 | publisher = {MIT Press}, 39 | title = {{A Bayesian Spatial Scan Statistic}}, 40 | volume = {18}, 41 | year = {2006} 42 | } 43 | 44 | @article{Tango2011, 45 | author = {Tango, Toshiro and Takahashi, Kunihiko and Kohriyama, Kazuaki}, 46 | journal = {Biometrics}, 47 | number = {1}, 48 | pages = {106--115}, 49 | title = {{A Space-Time Scan Statistic for Detecting Emerging Outbreaks}}, 50 | volume = {67}, 51 | year = {2011}, 52 | DOI = {10.1111/j.1541-0420.2010.01412.x} 53 | } 54 | 55 | @techreport{Allevius2017, 56 | author = {All{\'{e}}vius, Benjamin and H{\"{o}}hle, Michael}, 57 | institution = {Stockholm University}, 58 | title = {{An expectation-based space-time scan statistic for ZIP-distributed data}}, 59 | year = {2017} 60 | } 61 | 62 | @article{Allevius2018, 63 | author = {All{\'{e}}vius, Benjamin and H{\"{o}}hle, Michael}, 64 | journal = {Scandinavian Journal of Statistics}, 65 | title = {{An unconditional space-time scan statistic for ZIP-distributed data}}, 66 | year = {2018, in press} 67 | } 68 | 69 | -------------------------------------------------------------------------------- /JOSS/paper.md: -------------------------------------------------------------------------------- 1 | --- 2 | title: 'scanstatistics: space-time anomaly detection using scan statistics' 3 | authors: 4 | - affiliation: 1 5 | name: Benjamin Allévius 6 | orcid: 0000-0002-0927-7183 7 | date: "2 May 2018" 8 | bibliography: paper.bib 9 | tags: 10 | - scan statistic 11 | - cluster detection 12 | - anomaly detection 13 | - spatiotemporal 14 | affiliations: 15 | - index: 1 16 | name: Department of Mathematics, Stockholm University 17 | --- 18 | 19 | # Summary 20 | 21 | The R package `scanstatistics` enables the detection of anomalous space-time 22 | clusters using the scan statistics methodology. Scan statistics are commonly 23 | applied in disease surveillance, where they are used to detect disease outbreaks 24 | as they emerge locally. In this setting, cases of a given disease are recorded 25 | continuously across a country, and are then aggregated spatially to (say) 26 | district level, and temporally to (say) weekly counts. Scan statistics 27 | accomplish the detection task by searching the recent records of clusters of 28 | neighboring districts for patterns that seem anomalous given either past counts 29 | or the counts outside the cluster currently searched. 30 | 31 | The `scanstatistics` package implements several scan statistics, making it a 32 | partially overlapping complement to existing scan statistic software such as 33 | [SaTScan](https://www.satscan.org/). For example, the conditional Poisson 34 | [@Kulldorff2001] and space-time permutation [@Kulldorff2005] scan statistics 35 | are available in both [SaTScan](https://www.satscan.org/) and `scanstatistics`, 36 | while only the latter implements scan statistics for zero-inflated data 37 | [@Allevius2018], count data with overdispersion [@Tango2011], an unconditional 38 | (expectation-based) Poisson scan statistic [@Neill2005], and a Bayesian scan 39 | statistic [@Neill2006]. 40 | 41 | The R package `scanstatistics` is available on 42 | [CRAN](https://cran.r-project.org/package=scanstatistics) and its source code 43 | is available on [GitHub](https://github.com/BenjaK/scanstatistics). 44 | 45 | 46 | # References 47 | -------------------------------------------------------------------------------- /NAMESPACE: -------------------------------------------------------------------------------- 1 | # Generated by roxygen2: do not edit by hand 2 | 3 | S3method(print,scanstatistic) 4 | export(coords_to_knn) 5 | export(df_to_matrix) 6 | export(dist_to_knn) 7 | export(flexible_zones) 8 | export(get_zero_indices) 9 | export(get_zone) 10 | export(knn_zones) 11 | export(scan_bayes_negbin) 12 | export(scan_bayes_negbin_cpp) 13 | export(scan_eb_negbin) 14 | export(scan_eb_negbin_cpp) 15 | export(scan_eb_poisson) 16 | export(scan_eb_poisson_cpp) 17 | export(scan_eb_zip) 18 | export(scan_eb_zip_cpp) 19 | export(scan_pb_perm_cpp) 20 | export(scan_pb_poisson) 21 | export(scan_pb_poisson_cpp) 22 | export(scan_permutation) 23 | export(score_locations) 24 | export(top_clusters) 25 | importFrom(Rcpp,sourceCpp) 26 | importFrom(dplyr,arrange) 27 | importFrom(dplyr,group_by) 28 | importFrom(dplyr,select) 29 | importFrom(dplyr,summarise) 30 | importFrom(ismev,gum.fit) 31 | importFrom(magrittr,"%<>%") 32 | importFrom(magrittr,"%>%") 33 | importFrom(plyr,alply) 34 | importFrom(reliaR,pgumbel) 35 | importFrom(sets,as.set) 36 | importFrom(sets,set_is_empty) 37 | importFrom(sets,set_power) 38 | importFrom(sets,set_union) 39 | importFrom(stats,dist) 40 | importFrom(stats,uniroot) 41 | importFrom(stats,var) 42 | importFrom(tibble,tibble) 43 | importFrom(tidyr,spread_) 44 | importFrom(utils,combn) 45 | useDynLib(scanstatistics, .registration = TRUE) 46 | -------------------------------------------------------------------------------- /NEWS.md: -------------------------------------------------------------------------------- 1 | # scanstatistics 1.0.2 2 | 3 | ## Minor changes 4 | 5 | * Removed unneeded internal functions that caused package to not be loaded. 6 | * Added `CITATION` file with updated citation for the package. 7 | * Fixes to multiple bugs found by Kelly Reeve. 8 | * Added functionality to function `top_clusters`. 9 | 10 | # scanstatistics 1.0.1 11 | 12 | * Fixes to compiler warnings; needed to keep package on CRAN. 13 | 14 | # scanstatistics 1.0 15 | 16 | ## Major changes 17 | 18 | * New interface for main functions: accept data frames or matrices instead of 19 | data tables. 20 | * All scan statistics reimplemented in C++. 21 | * Several new scan statistics available. 22 | 23 | ### Minor changes 24 | 25 | * The functions `knn_zones` and `flexible_zones` now run faster due to change 26 | in algorithms. 27 | 28 | # scanstatistics 0.1 29 | -------------------------------------------------------------------------------- /NOTES.txt: -------------------------------------------------------------------------------- 1 | NOTE TO SELF: be sure to keep the three references.bib listed below identical. 2 | ISSUE: pkgdown moves README.Rmd, vignettes/*.Rmd, and vignettes/articles/*.Rmd 3 | to docs/, and expects the same relative paths for the references.bib file to 4 | hold. They do not. As a workaround, I have put the references.bib file in three 5 | places: 6 | ./references.bib 7 | ./vignettes/references.bib 8 | ./docs/references.bib 9 | A fourth one is automatically copied by pkgdown (when running 10 | pkgdown::build_site()) to ./docs/articles/references.bib. 11 | 12 | # TODO before pushing to GitHub: 13 | * Remove --no-build-vignettes from project options, because Travis build fails 14 | otherwise. 15 | 16 | -------------------------------------------------------------------------------- /R/data.R: -------------------------------------------------------------------------------- 1 | #' Longitude and latitude of New Mexico county seats. 2 | #' 3 | #' A dataset containing the longitude and latitude of the county seats of New 4 | #' Mexico, except for Cibola county. 5 | #' 6 | #' @format A data frame with 33 rows and 7 variables: 7 | #' \describe{ 8 | #' \item{county}{Factor; the counties of New Mexico (no spaces).} 9 | #' \item{seat}{Character; the name of the county seat, i.e. the administrative 10 | #' center or seat of government.} 11 | #' \item{area(km2)}{Numeric; the area in square kilometers of each county.} 12 | #' \item{seat_long}{Numeric; the longitude of the county seat.} 13 | #' \item{seat_lat}{Numeric; the latitude of the county seat.} 14 | #' \item{center_long}{Numeric; the longitude of the geographical center of 15 | #' the county.} 16 | #' \item{center_lat}{Numeric; the latitude of the geographical center of the 17 | #' county.} 18 | #' } 19 | #' @source \url{https://en.wikipedia.org/wiki/List_of_counties_in_New_Mexico} 20 | "NM_geo" 21 | 22 | #' Population and brain cancer cases in New Mexico counties during 1973--1991. 23 | #' 24 | #' A dataset containing the population count and number of brain cancer cases in 25 | #' the counties of New Mexico during the years 1973--1991. The population 26 | #' numbers are interpolations from the censuses conducted in 1973, 1982, and 27 | #' 1991. Interpolations were done using a quadratic function of time. Thus the 28 | #' year-to-year changes are overly smooth but match the census numbers in the 29 | #' three years mentioned. 30 | #' @format A data frame with 608 rows and 4 variables: 31 | #' \describe{ 32 | #' \item{year}{Integer; the year the cases were recorded.} 33 | #' \item{county}{Character; the name of the county (no spaces).} 34 | #' \item{population}{Integer; the population in that county and year.} 35 | #' \item{count}{Integer; the number of brain cancer cases in that county and 36 | #' year.} 37 | #' } 38 | "NM_popcas" 39 | 40 | #' Data to plot the counties of New Mexico. 41 | #' 42 | #' Map data for New Mexico. Was created using \code{ggplot2::map_data}. 43 | #' @format A data frame with 867 rows and 7 variables: 44 | #' \describe{ 45 | #' \item{long}{Numeric; longitude of county polygon corner.} 46 | #' \item{lat}{Numeric; latitude of county polygon corner.} 47 | #' \item{group}{Numeric; grouping by county.} 48 | #' \item{order}{Numeric; order of the polygon corners.} 49 | #' \item{region}{Character; region is "new mexico" for all rows.} 50 | #' \item{subregion}{Character; the county name (with spaces).} 51 | #' \item{county}{Factor; the county name (no spaces).} 52 | #' } 53 | "NM_map" 54 | -------------------------------------------------------------------------------- /R/scanstatistics.R: -------------------------------------------------------------------------------- 1 | #' scanstatistics: Space-time anomaly detection using scan statistics. 2 | #' 3 | #' The scanstatistics package provides two categories of important functions: 4 | #' data preparation functions, and the scan statistics themselves. 5 | #' @section Data preparation functions: 6 | #' These functions prepare your data for use. In particular, it helps you 7 | #' define the \emph{zones} which will be considered by the scan statistics. 8 | #' @section Scan statistics: 9 | #' These are the functions used for space-time anomaly detection. Scan statistic 10 | #' functions for univariate space-time data have a name that begins with 11 | #' \code{scan_} and functions for multivariate space-time data have a name that 12 | #' begins with \code{mscan_}. 13 | #' @docType package 14 | #' @name scanstatistics 15 | NULL 16 | 17 | #' @useDynLib scanstatistics, .registration = TRUE 18 | #' @importFrom Rcpp sourceCpp 19 | NULL 20 | 21 | # Hack based on Hadley Wickhams comment: 22 | # http://stackoverflow.com/a/12429344/897506 23 | globalVariables(c( 24 | ## Variables used unquoted inside functions------------------------------------- 25 | "location", 26 | "log_posterior", 27 | "score", 28 | "time", 29 | "zone"), 30 | package = "scanstatistics", 31 | add = TRUE) 32 | -------------------------------------------------------------------------------- /R/zone_utility_functions.R: -------------------------------------------------------------------------------- 1 | # Functions in this file: 2 | # get_zone 3 | # powerset_zones 4 | 5 | #' Extract a zone from the set of all zones. 6 | #' 7 | #' Extract zone number \eqn{n} from the set of all zones. 8 | #' @param n An integer; the number of the zone you wish to retrieve. 9 | #' @param zones A list of integer vectors, representing the set of all zones. 10 | #' @return An integer vector. 11 | #' @export 12 | #' @examples 13 | #' zones <- list(1L, 2L, 3L, 1:2, c(1L, 3L), c(2L, 3L)) 14 | #' get_zone(4, zones) 15 | get_zone <- function(n, zones) { 16 | if (n > length(zones) || n < 1) { 17 | stop("Zone not found.") 18 | } 19 | zones[[n]] 20 | } 21 | 22 | #' Creates a set of all non-empty subsets of the integers from 1 to \eqn{n}. 23 | #' 24 | #' Creates a list of all \eqn{2^(n-1)} non-empty subsets of the integers from 1 25 | #' to \eqn{n}. 26 | #' @param n An integer larger than 0. 27 | #' @return A list of integer vectors. 28 | #' @importFrom utils combn 29 | #' @keywords internal 30 | powerset_zones <- function(n) { 31 | zones <- sets::set_power(seq_len(n)) - sets::set(sets::as.set(integer(0))) 32 | lapply(zones, FUN = function(x) unlist(as.list(x))) 33 | } 34 | -------------------------------------------------------------------------------- /cran-comments.md: -------------------------------------------------------------------------------- 1 | ## Release summary 2 | 3 | This release removes a number of internal functions that caused the package to 4 | not load in some circumstances. 5 | 6 | ## Test environments 7 | * local Kubuntu 16.04 install, R 3.4.4 8 | * ubuntu 14.04.5 (on travis-ci), R 3.4.4 9 | * win-builder (devel) 10 | 11 | 12 | ## R CMD check results 13 | 14 | ### Using Ubuntu 16.04 15 | There were no ERRORs or WARNINGs. 16 | 17 | There was 1 NOTE: 18 | 19 | * installed size is 6.9Mb, sub-directories of 1Mb or more: libs 6.4Mb 20 | 21 | Explanation (same as given on previous releases): The size is due to use of 22 | templated classes and functions, and virtual functions. The cost, in terms of 23 | code duplication and inability to add new functionality, that would result from 24 | not using these features of C++ is simply too high. Thus, I think the larger 25 | size of the installed package is warranted. 26 | 27 | ### Using Ubuntu 14.04.5 28 | There were no ERRORs or WARNINGs. 29 | 30 | There was 1 NOTE: 31 | 32 | * installed size is 6.2Mb, sub-directories of 1Mb or more: libs 5.7Mb 33 | 34 | Explanation: see above. 35 | 36 | ### Using win-builder 37 | There were no ERRORs, WARNINGs or NOTEs. 38 | 39 | # Downstream dependencies 40 | There are currently no downstream dependencies for this package. 41 | 42 | -------------------------------------------------------------------------------- /data/NM_geo.rda: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/benjamin-allevius/scanstatistics/30b424c568d376934b4ba9fb57c5e98494f52a85/data/NM_geo.rda -------------------------------------------------------------------------------- /data/NM_map.rda: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/benjamin-allevius/scanstatistics/30b424c568d376934b4ba9fb57c5e98494f52a85/data/NM_map.rda -------------------------------------------------------------------------------- /data/NM_popcas.rda: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/benjamin-allevius/scanstatistics/30b424c568d376934b4ba9fb57c5e98494f52a85/data/NM_popcas.rda -------------------------------------------------------------------------------- /docs/README_figures/county_scores-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/benjamin-allevius/scanstatistics/30b424c568d376934b4ba9fb57c5e98494f52a85/docs/README_figures/county_scores-1.png -------------------------------------------------------------------------------- /docs/README_figures/newmexico_map-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/benjamin-allevius/scanstatistics/30b424c568d376934b4ba9fb57c5e98494f52a85/docs/README_figures/newmexico_map-1.png -------------------------------------------------------------------------------- /docs/articles/articles/make_data_files/figure-html/grab_seat_coords-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/benjamin-allevius/scanstatistics/30b424c568d376934b4ba9fb57c5e98494f52a85/docs/articles/articles/make_data_files/figure-html/grab_seat_coords-1.png -------------------------------------------------------------------------------- /docs/articles/articles/make_data_files/figure-html/plot_centroids-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/benjamin-allevius/scanstatistics/30b424c568d376934b4ba9fb57c5e98494f52a85/docs/articles/articles/make_data_files/figure-html/plot_centroids-1.png -------------------------------------------------------------------------------- /docs/articles/articles/make_data_files/figure-html/plot_evolution-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/benjamin-allevius/scanstatistics/30b424c568d376934b4ba9fb57c5e98494f52a85/docs/articles/articles/make_data_files/figure-html/plot_evolution-1.png -------------------------------------------------------------------------------- /docs/articles/index.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | Articles • scanstatistics 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | 25 | 26 | 27 | 28 | 29 | 30 | 31 | 35 | 36 | 37 | 38 | 39 | 40 |
41 |
42 | 94 | 95 | 96 |
97 | 98 | 101 | 102 |
103 |
104 |
105 |

All vignettes

106 |

107 | 108 | 112 |
113 |
114 |
115 | 116 |
117 | 120 | 121 |
122 |

Site built with pkgdown.

123 |
124 | 125 |
126 |
127 | 128 | 129 | 130 | -------------------------------------------------------------------------------- /docs/articles/introduction_files/figure-html/county_scores-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/benjamin-allevius/scanstatistics/30b424c568d376934b4ba9fb57c5e98494f52a85/docs/articles/introduction_files/figure-html/county_scores-1.png -------------------------------------------------------------------------------- /docs/articles/introduction_files/figure-html/newmexico_map-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/benjamin-allevius/scanstatistics/30b424c568d376934b4ba9fb57c5e98494f52a85/docs/articles/introduction_files/figure-html/newmexico_map-1.png -------------------------------------------------------------------------------- /docs/articles/references.bib: -------------------------------------------------------------------------------- 1 | 2 | @ARTICLE{Kulldorff1998, 3 | author = {Martin Kulldorff and William F. Athas and Eric J. Feuer and Barry A. Miller and Charles R. Key}, 4 | title = {Evaluating cluster alarms: A space-time scan statistic and brain cancer in Los Alamos}, 5 | journal = {American Journal of Public Health}, 6 | year = {1998}, 7 | volume = {88}, 8 | pages = {1377–1380}, 9 | number = {9} 10 | } 11 | 12 | @ARTICLE{Tango2011, 13 | author = {Toshiro Tango and Kunihiko Takahashi and Kazuaki Kohriyama}, 14 | title = {A space-time scan statistic for detecting emerging outbreaks}, 15 | journal = {Biometrics}, 16 | year = {2011}, 17 | volume = {67}, 18 | pages = {106–115}, 19 | number = {1} 20 | } 21 | 22 | @mastersthesis{Kjellson2015, 23 | author = {Benjamin Kjellson}, 24 | title = {{Spatiotemporal Outbreak Detection: A Scan Statistic Based on the Zero-Inflated Poisson Distribution}}, 25 | school = {Stockholm University, Division of Mathematical Statistics}, 26 | address = {Sweden}, 27 | year = {2015}, 28 | url = {https://goo.gl/GdseSh} 29 | } 30 | 31 | @Manual{rsatscan, 32 | title = {rsatscan: Tools, Classes, and Methods for Interfacing with SaTScan Stand-Alone Software}, 33 | author = {Ken Kleinman}, 34 | year = {2015}, 35 | note = {R package version 0.3.9200}, 36 | url = {https://CRAN.R-project.org/package=rsatscan}, 37 | } 38 | 39 | @Manual{datatable, 40 | title = {data.table: Extension of Data.frame}, 41 | author = {M Dowle and A Srinivasan and T Short and S Lianoglou with contributions from R Saporta and E Antonyan}, 42 | year = {2015}, 43 | note = {R package version 1.9.6}, 44 | url = {https://CRAN.R-project.org/package=data.table}, 45 | } 46 | 47 | @INPROCEEDINGS{Neill2005, 48 | author = {Daniel B. Neill and Andrew W. Moore and Maheshkumar Sabhnani and Kenny Daniel}, 49 | title = {Detection of emerging space-time clusters}, 50 | booktitle = {Proceedings of the eleventh ACM SIGKDD international conference on 51 | Knowledge discovery in data mining}, 52 | year = {2005}, 53 | pages = {218–227}, 54 | organization = {ACM} 55 | } 56 | 57 | @Article{sets, 58 | title = {Generalized and Customizable Sets in {R}}, 59 | author = {David Meyer and Kurt Hornik}, 60 | journal = {Journal of Statistical Software}, 61 | year = {2009}, 62 | volume = {31}, 63 | number = {2}, 64 | pages = {1–27}, 65 | url = {http://www.jstatsoft.org/v31/i02/} 66 | } 67 | -------------------------------------------------------------------------------- /docs/authors.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | Authors • scanstatistics 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | 25 | 26 | 27 | 28 | 29 | 30 | 31 | 35 | 36 | 37 | 38 | 39 | 40 |
41 |
42 | 94 | 95 | 96 |
97 | 98 |
99 |
100 | 103 | 104 |
    105 |
  • 106 |

    Benjamin Allévius. Author, maintainer. 107 |

    108 |
  • 109 |
110 | 111 |
112 | 113 |
114 | 115 | 116 |
117 | 120 | 121 |
122 |

Site built with pkgdown.

123 |
124 | 125 |
126 |
127 | 128 | 129 | 130 | -------------------------------------------------------------------------------- /docs/jquery.sticky-kit.min.js: -------------------------------------------------------------------------------- 1 | /* 2 | Sticky-kit v1.1.2 | WTFPL | Leaf Corcoran 2015 | http://leafo.net 3 | */ 4 | (function(){var b,f;b=this.jQuery||window.jQuery;f=b(window);b.fn.stick_in_parent=function(d){var A,w,J,n,B,K,p,q,k,E,t;null==d&&(d={});t=d.sticky_class;B=d.inner_scrolling;E=d.recalc_every;k=d.parent;q=d.offset_top;p=d.spacer;w=d.bottoming;null==q&&(q=0);null==k&&(k=void 0);null==B&&(B=!0);null==t&&(t="is_stuck");A=b(document);null==w&&(w=!0);J=function(a,d,n,C,F,u,r,G){var v,H,m,D,I,c,g,x,y,z,h,l;if(!a.data("sticky_kit")){a.data("sticky_kit",!0);I=A.height();g=a.parent();null!=k&&(g=g.closest(k)); 5 | if(!g.length)throw"failed to find stick parent";v=m=!1;(h=null!=p?p&&a.closest(p):b("
"))&&h.css("position",a.css("position"));x=function(){var c,f,e;if(!G&&(I=A.height(),c=parseInt(g.css("border-top-width"),10),f=parseInt(g.css("padding-top"),10),d=parseInt(g.css("padding-bottom"),10),n=g.offset().top+c+f,C=g.height(),m&&(v=m=!1,null==p&&(a.insertAfter(h),h.detach()),a.css({position:"",top:"",width:"",bottom:""}).removeClass(t),e=!0),F=a.offset().top-(parseInt(a.css("margin-top"),10)||0)-q, 6 | u=a.outerHeight(!0),r=a.css("float"),h&&h.css({width:a.outerWidth(!0),height:u,display:a.css("display"),"vertical-align":a.css("vertical-align"),"float":r}),e))return l()};x();if(u!==C)return D=void 0,c=q,z=E,l=function(){var b,l,e,k;if(!G&&(e=!1,null!=z&&(--z,0>=z&&(z=E,x(),e=!0)),e||A.height()===I||x(),e=f.scrollTop(),null!=D&&(l=e-D),D=e,m?(w&&(k=e+u+c>C+n,v&&!k&&(v=!1,a.css({position:"fixed",bottom:"",top:c}).trigger("sticky_kit:unbottom"))),eb&&!v&&(c-=l,c=Math.max(b-u,c),c=Math.min(q,c),m&&a.css({top:c+"px"})))):e>F&&(m=!0,b={position:"fixed",top:c},b.width="border-box"===a.css("box-sizing")?a.outerWidth()+"px":a.width()+"px",a.css(b).addClass(t),null==p&&(a.after(h),"left"!==r&&"right"!==r||h.append(a)),a.trigger("sticky_kit:stick")),m&&w&&(null==k&&(k=e+u+c>C+n),!v&&k)))return v=!0,"static"===g.css("position")&&g.css({position:"relative"}), 8 | a.css({position:"absolute",bottom:d,top:"auto"}).trigger("sticky_kit:bottom")},y=function(){x();return l()},H=function(){G=!0;f.off("touchmove",l);f.off("scroll",l);f.off("resize",y);b(document.body).off("sticky_kit:recalc",y);a.off("sticky_kit:detach",H);a.removeData("sticky_kit");a.css({position:"",bottom:"",top:"",width:""});g.position("position","");if(m)return null==p&&("left"!==r&&"right"!==r||a.insertAfter(h),h.remove()),a.removeClass(t)},f.on("touchmove",l),f.on("scroll",l),f.on("resize", 9 | y),b(document.body).on("sticky_kit:recalc",y),a.on("sticky_kit:detach",H),setTimeout(l,0)}};n=0;for(K=this.length;n 2 | 3 | 5 | 8 | 12 | 13 | -------------------------------------------------------------------------------- /docs/news/index.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | All news • scanstatistics 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | 25 | 26 | 27 | 28 | 29 | 30 | 31 | 35 | 36 | 37 | 38 | 39 | 40 |
41 |
42 | 94 | 95 | 96 |
97 | 98 |
99 | 100 |
101 | 104 | 105 |
106 |
107 |

108 | scanstatistics 1.0

109 |
110 | NA 111 |
112 |
113 | 114 | 123 | 124 |
125 | 126 |
127 | 130 | 131 |
132 |

Site built with pkgdown.

133 |
134 | 135 |
136 |
137 | 138 | 139 | 140 | -------------------------------------------------------------------------------- /docs/pkgdown.css: -------------------------------------------------------------------------------- 1 | /* Sticker footer */ 2 | body > .container { 3 | display: flex; 4 | padding-top: 60px; 5 | min-height: calc(100vh); 6 | flex-direction: column; 7 | } 8 | 9 | body > .container .row { 10 | flex: 1; 11 | } 12 | 13 | footer { 14 | margin-top: 45px; 15 | padding: 35px 0 36px; 16 | border-top: 1px solid #e5e5e5; 17 | color: #666; 18 | display: flex; 19 | } 20 | footer p { 21 | margin-bottom: 0; 22 | } 23 | footer div { 24 | flex: 1; 25 | } 26 | footer .pkgdown { 27 | text-align: right; 28 | } 29 | footer p { 30 | margin-bottom: 0; 31 | } 32 | 33 | img.icon { 34 | float: right; 35 | } 36 | 37 | img { 38 | max-width: 100%; 39 | } 40 | 41 | /* Section anchors ---------------------------------*/ 42 | 43 | a.anchor { 44 | margin-left: -30px; 45 | display:inline-block; 46 | width: 30px; 47 | height: 30px; 48 | visibility: hidden; 49 | 50 | background-image: url(./link.svg); 51 | background-repeat: no-repeat; 52 | background-size: 20px 20px; 53 | background-position: center center; 54 | } 55 | 56 | .hasAnchor:hover a.anchor { 57 | visibility: visible; 58 | } 59 | 60 | @media (max-width: 767px) { 61 | .hasAnchor:hover a.anchor { 62 | visibility: hidden; 63 | } 64 | } 65 | 66 | 67 | /* Fixes for fixed navbar --------------------------*/ 68 | 69 | .contents h1, .contents h2, .contents h3, .contents h4 { 70 | padding-top: 60px; 71 | margin-top: -60px; 72 | } 73 | 74 | /* Static header placement on mobile devices */ 75 | @media (max-width: 767px) { 76 | .navbar-fixed-top { 77 | position: absolute; 78 | } 79 | .navbar { 80 | padding: 0; 81 | } 82 | } 83 | 84 | 85 | /* Sidebar --------------------------*/ 86 | 87 | #sidebar { 88 | margin-top: 30px; 89 | } 90 | #sidebar h2 { 91 | font-size: 1.5em; 92 | margin-top: 1em; 93 | } 94 | 95 | #sidebar h2:first-child { 96 | margin-top: 0; 97 | } 98 | 99 | #sidebar .list-unstyled li { 100 | margin-bottom: 0.5em; 101 | } 102 | 103 | /* Reference index & topics ----------------------------------------------- */ 104 | 105 | .ref-index th {font-weight: normal;} 106 | .ref-index h2 {font-size: 20px;} 107 | 108 | .ref-index td {vertical-align: top;} 109 | .ref-index .alias {width: 40%;} 110 | .ref-index .title {width: 60%;} 111 | 112 | .ref-index .alias {width: 40%;} 113 | .ref-index .title {width: 60%;} 114 | 115 | .ref-arguments th {text-align: right; padding-right: 10px;} 116 | .ref-arguments th, .ref-arguments td {vertical-align: top;} 117 | .ref-arguments .name {width: 20%;} 118 | .ref-arguments .desc {width: 80%;} 119 | 120 | /* Nice scrolling for wide elements --------------------------------------- */ 121 | 122 | table { 123 | display: block; 124 | overflow: auto; 125 | } 126 | 127 | /* Syntax highlighting ---------------------------------------------------- */ 128 | 129 | pre { 130 | word-wrap: normal; 131 | word-break: normal; 132 | border: 1px solid #eee; 133 | } 134 | 135 | pre, code { 136 | background-color: #f8f8f8; 137 | color: #333; 138 | } 139 | 140 | pre .img { 141 | margin: 5px 0; 142 | } 143 | 144 | pre .img img { 145 | background-color: #fff; 146 | display: block; 147 | height: auto; 148 | } 149 | 150 | code a, pre a { 151 | color: #375f84; 152 | } 153 | 154 | .fl {color: #1514b5;} 155 | .fu {color: #000000;} /* function */ 156 | .ch,.st {color: #036a07;} /* string */ 157 | .kw {color: #264D66;} /* keyword */ 158 | .co {color: #888888;} /* comment */ 159 | 160 | .message { color: black; font-weight: bolder;} 161 | .error { color: orange; font-weight: bolder;} 162 | .warning { color: #6A0366; font-weight: bolder;} 163 | 164 | -------------------------------------------------------------------------------- /docs/pkgdown.js: -------------------------------------------------------------------------------- 1 | $(function() { 2 | $("#sidebar").stick_in_parent({offset_top: 40}); 3 | $('body').scrollspy({ 4 | target: '#sidebar', 5 | offset: 60 6 | }); 7 | 8 | var cur_path = paths(location.pathname); 9 | $("#navbar ul li a").each(function(index, value) { 10 | if (value.text == "Home") 11 | return; 12 | if (value.getAttribute("href") === "#") 13 | return; 14 | 15 | var path = paths(value.pathname); 16 | if (is_prefix(cur_path, path)) { 17 | // Add class to parent
  • , and enclosing
  • if in dropdown 18 | var menu_anchor = $(value); 19 | menu_anchor.parent().addClass("active"); 20 | menu_anchor.closest("li.dropdown").addClass("active"); 21 | } 22 | }); 23 | }); 24 | 25 | function paths(pathname) { 26 | var pieces = pathname.split("/"); 27 | pieces.shift(); // always starts with / 28 | 29 | var end = pieces[pieces.length - 1]; 30 | if (end === "index.html" || end === "") 31 | pieces.pop(); 32 | return(pieces); 33 | } 34 | 35 | function is_prefix(needle, haystack) { 36 | if (needle.length > haystack.lengh) 37 | return(false); 38 | 39 | for (var i = 0; i < haystack.length; i++) { 40 | if (needle[i] != haystack[i]) 41 | return(false); 42 | } 43 | 44 | return(true); 45 | } 46 | -------------------------------------------------------------------------------- /docs/reference/NM_map.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | Data to plot the counties of New Mexico. — NM_map • scanstatistics 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | 25 | 26 | 27 | 28 | 29 | 30 | 31 | 35 | 36 | 37 | 38 | 39 | 40 |
    41 |
    42 | 94 | 95 | 96 |
    97 | 98 |
    99 |
    100 | 103 | 104 | 105 |

    Map data for New Mexico. Was created using ggplot2::map_data.

    106 | 107 | 108 |
    NM_map
    109 | 110 |

    Format

    111 | 112 |

    A data frame with 867 rows and 7 variables:

    113 |
    long

    Numeric; longitude of county polygon corner.

    114 |
    lat

    Numeric; latitude of county polygon corner.

    115 |
    group

    Numeric; grouping by county.

    116 |
    order

    Numeric; order of the polygon corners.

    117 |
    region

    Character; region is "new mexico" for all rows.

    118 |
    subregion

    Character; the county name (with spaces).

    119 |
    county

    Factor; the county name (no spaces).

    120 |
    121 | 122 | 123 |
    124 | 132 |
    133 | 134 |
    135 | 138 | 139 |
    140 |

    Site built with pkgdown.

    141 |
    142 | 143 |
    144 |
    145 | 146 | 147 | 148 | -------------------------------------------------------------------------------- /docs/reference/coords_to_knn-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/benjamin-allevius/scanstatistics/30b424c568d376934b4ba9fb57c5e98494f52a85/docs/reference/coords_to_knn-1.png -------------------------------------------------------------------------------- /docs/reference/coords_to_knn-2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/benjamin-allevius/scanstatistics/30b424c568d376934b4ba9fb57c5e98494f52a85/docs/reference/coords_to_knn-2.png -------------------------------------------------------------------------------- /docs/reference/exponential_lambda.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | Compute the score for an individual exponential observation. — exponential_lambda • scanstatistics 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | 25 | 26 | 27 | 28 | 29 | 30 | 31 | 35 | 36 | 37 | 38 | 39 | 40 |
    41 |
    42 | 94 | 95 | 96 |
    97 | 98 |
    99 |
    100 | 103 | 104 | 105 |

    Compute the score for an individual exponential observation.

    106 | 107 | 108 |
    exponential_lambda(c, b)
    109 | 110 |

    Arguments

    111 | 112 | 113 | 114 | 115 | 116 | 117 | 118 | 119 | 120 | 121 |
    c

    A scalar; the observed value.

    b

    A scalar; the expected value.

    122 | 123 |

    Value

    124 | 125 |

    A scalar; the score.

    126 | 127 | 128 |
    129 | 138 |
    139 | 140 |
    141 | 144 | 145 |
    146 |

    Site built with pkgdown.

    147 |
    148 | 149 |
    150 |
    151 | 152 | 153 | 154 | -------------------------------------------------------------------------------- /docs/reference/flipud.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | Flip a matrix upside down — flipud • scanstatistics 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | 25 | 26 | 27 | 28 | 29 | 30 | 31 | 35 | 36 | 37 | 38 | 39 | 40 |
    41 |
    42 | 94 | 95 | 96 |
    97 | 98 |
    99 |
    100 | 103 | 104 | 105 |

    Flip a matrix upside down

    106 | 107 | 108 |
    flipud(x)
    109 | 110 |

    Arguments

    111 | 112 | 113 | 114 | 115 | 116 | 117 |
    x

    A matrix

    118 | 119 |

    Value

    120 | 121 |

    A matrix, x with rows reversed.

    122 | 123 | 124 |
    125 | 134 |
    135 | 136 |
    137 | 140 | 141 |
    142 |

    Site built with pkgdown.

    143 |
    144 | 145 |
    146 |
    147 | 148 | 149 | 150 | -------------------------------------------------------------------------------- /docs/reference/gaussian_qmax.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | Calculates the largest value for which the Gaussian score function is zero. — gaussian_qmax • scanstatistics 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | 25 | 26 | 27 | 28 | 29 | 30 | 31 | 35 | 36 | 37 | 38 | 39 | 40 |
    41 |
    42 | 94 | 95 | 96 |
    97 | 98 |
    99 |
    100 | 103 | 104 | 105 |

    Calculates the largest value for which the Gaussian score function is zero.

    106 | 107 | 108 |
    gaussian_qmax(c, b)
    109 | 110 |

    Arguments

    111 | 112 | 113 | 114 | 115 | 116 | 117 | 118 | 119 | 120 | 121 |
    c

    A scalar; the observed value.

    b

    A scalar; the expected value.

    122 | 123 |

    Value

    124 | 125 |

    A scalar greater than or equal to 1.

    126 | 127 | 128 |
    129 | 138 |
    139 | 140 |
    141 | 144 | 145 |
    146 |

    Site built with pkgdown.

    147 |
    148 | 149 |
    150 |
    151 | 152 | 153 | 154 | -------------------------------------------------------------------------------- /docs/reference/get_subset.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | Get either a location or stream subset. — get_subset • scanstatistics 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | 25 | 26 | 27 | 28 | 29 | 30 | 31 | 35 | 36 | 37 | 38 | 39 | 40 |
    41 |
    42 | 94 | 95 | 96 |
    97 | 98 |
    99 |
    100 | 103 | 104 | 105 |

    Get either a location or stream subset.

    106 | 107 | 108 |
    get_subset(lst, subset, d = 3)
    109 | 110 |

    Arguments

    111 | 112 | 113 | 114 | 115 | 116 | 117 | 118 | 119 | 120 | 121 |
    lst

    A list.

    d

    Integer; either 2 (locations) or 3 (streams).

    122 | 123 |

    Value

    124 | 125 |

    A list.

    126 | 127 | 128 |
    129 | 138 |
    139 | 140 |
    141 | 144 | 145 |
    146 |

    Site built with pkgdown.

    147 |
    148 | 149 |
    150 |
    151 | 152 | 153 | 154 | -------------------------------------------------------------------------------- /docs/reference/get_zero_indices.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | Get indices of zero elements in a vector. — get_zero_indices • scanstatistics 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | 25 | 26 | 27 | 28 | 29 | 30 | 31 | 35 | 36 | 37 | 38 | 39 | 40 |
    41 |
    42 | 94 | 95 | 96 |
    97 | 98 |
    99 |
    100 | 103 | 104 | 105 |

    Get indices of zero elements in a vector.

    106 | 107 | 108 |
    get_zero_indices(v)
    109 | 110 |

    Arguments

    111 | 112 | 113 | 114 | 115 | 116 | 117 |
    v

    An integer vector.

    118 | 119 |

    Value

    120 | 121 |

    A vector with the indices of elements equal to zero in v. 122 | Indices start at zero.

    123 | 124 | 125 |
    126 | 135 |
    136 | 137 |
    138 | 141 | 142 |
    143 |

    Site built with pkgdown.

    144 |
    145 | 146 |
    147 |
    148 | 149 | 150 | 151 | -------------------------------------------------------------------------------- /docs/reference/poisson_lambda.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | Compute the score for an individual Poisson observation. — poisson_lambda • scanstatistics 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | 25 | 26 | 27 | 28 | 29 | 30 | 31 | 35 | 36 | 37 | 38 | 39 | 40 |
    41 |
    42 | 94 | 95 | 96 |
    97 | 98 |
    99 |
    100 | 103 | 104 | 105 |

    Compute the score for an individual Poisson observation.

    106 | 107 | 108 |
    poisson_lambda(c, b)
    109 | 110 |

    Arguments

    111 | 112 | 113 | 114 | 115 | 116 | 117 | 118 | 119 | 120 | 121 |
    c

    A non-negative integer; the observed count.

    b

    A positive scalar; the expected count.

    122 | 123 |

    Value

    124 | 125 |

    A scalar; the score.

    126 | 127 | 128 |
    129 | 138 |
    139 | 140 |
    141 | 144 | 145 |
    146 |

    Site built with pkgdown.

    147 |
    148 | 149 |
    150 |
    151 | 152 | 153 | 154 | -------------------------------------------------------------------------------- /docs/reference/poisson_qmax.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | Calculates the largest value for which the Poisson score function is zero. — poisson_qmax • scanstatistics 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | 25 | 26 | 27 | 28 | 29 | 30 | 31 | 35 | 36 | 37 | 38 | 39 | 40 |
    41 |
    42 | 94 | 95 | 96 |
    97 | 98 |
    99 |
    100 | 103 | 104 | 105 |

    Calculates the largest value for which the Poisson score function is zero.

    106 | 107 | 108 |
    poisson_qmax(c, b)
    109 | 110 |

    Arguments

    111 | 112 | 113 | 114 | 115 | 116 | 117 | 118 | 119 | 120 | 121 |
    c

    A non-negative integer; the observed count.

    b

    A positive scalar; the expected count.

    122 | 123 |

    Value

    124 | 125 |

    A scalar greater than or equal to 1.

    126 | 127 | 128 |
    129 | 138 |
    139 | 140 |
    141 | 144 | 145 |
    146 |

    Site built with pkgdown.

    147 |
    148 | 149 |
    150 |
    151 | 152 | 153 | 154 | -------------------------------------------------------------------------------- /docs/reference/powerset_zones.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | Creates a set of all non-empty subsets of the integers from 1 to \(n\). — powerset_zones • scanstatistics 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | 25 | 26 | 27 | 28 | 29 | 30 | 31 | 35 | 36 | 37 | 38 | 39 | 40 |
    41 |
    42 | 94 | 95 | 96 |
    97 | 98 |
    99 |
    100 | 103 | 104 | 105 |

    Creates a list of all \(2^(n-1)\) non-empty subsets of the integers from 1 106 | to \(n\).

    107 | 108 | 109 |
    powerset_zones(n)
    110 | 111 |

    Arguments

    112 | 113 | 114 | 115 | 116 | 117 | 118 |
    n

    An integer larger than 0.

    119 | 120 |

    Value

    121 | 122 |

    A list of integer vectors.

    123 | 124 | 125 |
    126 | 135 |
    136 | 137 |
    138 | 141 | 142 |
    143 |

    Site built with pkgdown.

    144 |
    145 | 146 |
    147 |
    148 | 149 | 150 | 151 | -------------------------------------------------------------------------------- /docs/reference/print.scanstatistic.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | Print a scanstatistic object. — print.scanstatistic • scanstatistics 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | 25 | 26 | 27 | 28 | 29 | 30 | 31 | 35 | 36 | 37 | 38 | 39 | 40 |
    41 |
    42 | 94 | 95 | 96 |
    97 | 98 |
    99 |
    100 | 103 | 104 | 105 |

    Prints a scanstatistic object and returns it invisibly.

    106 | 107 | 108 |
    # S3 method for scanstatistic
    109 | print(x, ...)
    110 | 111 |

    Arguments

    112 | 113 | 114 | 115 | 116 | 117 | 118 | 119 | 120 | 121 | 122 |
    x

    A an object of class scanstatistic.

    ...

    Further arguments passed to or from other methods.

    123 | 124 | 125 |
    126 | 133 |
    134 | 135 |
    136 | 139 | 140 |
    141 |

    Site built with pkgdown.

    142 |
    143 | 144 |
    145 |
    146 | 147 | 148 | 149 | -------------------------------------------------------------------------------- /inst/CITATION: -------------------------------------------------------------------------------- 1 | bibentry( 2 | bibtype = "Article", 3 | header = "To cite package ‘scanstatistics’ in publications use:", 4 | author = person("Benjamin", "Allévius"), 5 | title = "scanstatistics: space-time anomaly detection using scan statistics", 6 | journal = "Journal of Open Source Software", 7 | year = "2018", 8 | volume = "3", 9 | number = "25", 10 | pages = "515", 11 | doi = "10.21105/joss.00515" 12 | ) 13 | -------------------------------------------------------------------------------- /inst/image/county_scores-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/benjamin-allevius/scanstatistics/30b424c568d376934b4ba9fb57c5e98494f52a85/inst/image/county_scores-1.png -------------------------------------------------------------------------------- /inst/image/newmexico_map-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/benjamin-allevius/scanstatistics/30b424c568d376934b4ba9fb57c5e98494f52a85/inst/image/newmexico_map-1.png -------------------------------------------------------------------------------- /man/NM_geo.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/data.R 3 | \docType{data} 4 | \name{NM_geo} 5 | \alias{NM_geo} 6 | \title{Longitude and latitude of New Mexico county seats.} 7 | \format{A data frame with 33 rows and 7 variables: 8 | \describe{ 9 | \item{county}{Factor; the counties of New Mexico (no spaces).} 10 | \item{seat}{Character; the name of the county seat, i.e. the administrative 11 | center or seat of government.} 12 | \item{area(km2)}{Numeric; the area in square kilometers of each county.} 13 | \item{seat_long}{Numeric; the longitude of the county seat.} 14 | \item{seat_lat}{Numeric; the latitude of the county seat.} 15 | \item{center_long}{Numeric; the longitude of the geographical center of 16 | the county.} 17 | \item{center_lat}{Numeric; the latitude of the geographical center of the 18 | county.} 19 | }} 20 | \source{ 21 | \url{https://en.wikipedia.org/wiki/List_of_counties_in_New_Mexico} 22 | } 23 | \usage{ 24 | NM_geo 25 | } 26 | \description{ 27 | A dataset containing the longitude and latitude of the county seats of New 28 | Mexico, except for Cibola county. 29 | } 30 | \keyword{datasets} 31 | -------------------------------------------------------------------------------- /man/NM_map.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/data.R 3 | \docType{data} 4 | \name{NM_map} 5 | \alias{NM_map} 6 | \title{Data to plot the counties of New Mexico.} 7 | \format{A data frame with 867 rows and 7 variables: 8 | \describe{ 9 | \item{long}{Numeric; longitude of county polygon corner.} 10 | \item{lat}{Numeric; latitude of county polygon corner.} 11 | \item{group}{Numeric; grouping by county.} 12 | \item{order}{Numeric; order of the polygon corners.} 13 | \item{region}{Character; region is "new mexico" for all rows.} 14 | \item{subregion}{Character; the county name (with spaces).} 15 | \item{county}{Factor; the county name (no spaces).} 16 | }} 17 | \usage{ 18 | NM_map 19 | } 20 | \description{ 21 | Map data for New Mexico. Was created using \code{ggplot2::map_data}. 22 | } 23 | \keyword{datasets} 24 | -------------------------------------------------------------------------------- /man/NM_popcas.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/data.R 3 | \docType{data} 4 | \name{NM_popcas} 5 | \alias{NM_popcas} 6 | \title{Population and brain cancer cases in New Mexico counties during 1973--1991.} 7 | \format{A data frame with 608 rows and 4 variables: 8 | \describe{ 9 | \item{year}{Integer; the year the cases were recorded.} 10 | \item{county}{Character; the name of the county (no spaces).} 11 | \item{population}{Integer; the population in that county and year.} 12 | \item{count}{Integer; the number of brain cancer cases in that county and 13 | year.} 14 | }} 15 | \usage{ 16 | NM_popcas 17 | } 18 | \description{ 19 | A dataset containing the population count and number of brain cancer cases in 20 | the counties of New Mexico during the years 1973--1991. The population 21 | numbers are interpolations from the censuses conducted in 1973, 1982, and 22 | 1991. Interpolations were done using a quadratic function of time. Thus the 23 | year-to-year changes are overly smooth but match the census numbers in the 24 | three years mentioned. 25 | } 26 | \keyword{datasets} 27 | -------------------------------------------------------------------------------- /man/closest_subsets.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/zones.R 3 | \name{closest_subsets} 4 | \alias{closest_subsets} 5 | \title{Set of increasing sets from left to right of input vector.} 6 | \usage{ 7 | closest_subsets(v) 8 | } 9 | \arguments{ 10 | \item{v}{An integer vector. Meant to represent the \eqn{k} nearest neighbors 11 | of a location, the first element being the integer identifier of the 12 | location itself.} 13 | } 14 | \value{ 15 | A list of the same length as the input. The first element of the list 16 | is v[1], the second is sort(v[1:2]), the third sort(v[1:3]), and so on. 17 | } 18 | \description{ 19 | Returns a set (list) of the increasing sets (integer vectors) of the input 20 | vector \code{v}, in the sense that the first set contains the first element 21 | of \code{v}, the second set the first and second elements of \code{v}, and so 22 | on. 23 | } 24 | \keyword{internal} 25 | -------------------------------------------------------------------------------- /man/connected_neighbors.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/zones.R 3 | \name{connected_neighbors} 4 | \alias{connected_neighbors} 5 | \title{Find the connected sets for a location and its \eqn{k} nearest neighbors.} 6 | \usage{ 7 | connected_neighbors(neighbors, adjacency_matrix) 8 | } 9 | \arguments{ 10 | \item{neighbors}{A vector of neighbors to a location, the first element 11 | of the vector being the specific location, and the other elements its 12 | other nearest neighbors. Locations should be encoded as integers.} 13 | 14 | \item{adjacency_matrix}{A boolean matrix, with element \eqn{(i,j)} set 15 | to TRUE if location \eqn{j} is adjacent to location \eqn{i}.} 16 | } 17 | \value{ 18 | Returns a \code{set} of \code{set}s, each set of the latter type 19 | containing the location itself and zero or more of its neighbors, if they 20 | are connected. 21 | } 22 | \description{ 23 | Returns a \code{set} of \code{set}s, each set of the latter type containing 24 | the location itself and zero or more of its neighbors, if they are connected. 25 | } 26 | \keyword{internal} 27 | -------------------------------------------------------------------------------- /man/connected_to.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/zones.R 3 | \name{connected_to} 4 | \alias{connected_to} 5 | \title{Return those elements in the second set which are connected to those in the 6 | first.} 7 | \usage{ 8 | connected_to(Z_0, Z_1, adjacency_matrix) 9 | } 10 | \arguments{ 11 | \item{Z_0}{A set of locations, given as integers.} 12 | 13 | \item{Z_1}{A set of locations, given as integers.} 14 | 15 | \item{adjacency_matrix}{A boolean matrix, with element \eqn{(i,j)} set 16 | to TRUE if location \eqn{j} is adjacent to location \eqn{i}.} 17 | } 18 | \value{ 19 | A set, possibly empty, containing those locations in \eqn{Z_1} 20 | that are connected to any of the locations in \eqn{Z_0}. 21 | } 22 | \description{ 23 | Return those elements in the second set \eqn{Z_1} which are connected to 24 | those in the first set \eqn{Z_0}, according to the adjacency matrix. 25 | } 26 | \keyword{internal} 27 | -------------------------------------------------------------------------------- /man/coords_to_knn.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/zones.R 3 | \name{coords_to_knn} 4 | \alias{coords_to_knn} 5 | \title{Get the k nearest neighbors for each location, given its coordinates.} 6 | \usage{ 7 | coords_to_knn(x, k = min(10, nrow(x)), method = "euclidean", p = 2) 8 | } 9 | \arguments{ 10 | \item{x}{a numeric matrix, data frame or \code{"dist"} object.} 11 | 12 | \item{k}{The number of nearest neighbors, counting the location itself.} 13 | 14 | \item{method}{the distance measure to be used. This must be one of 15 | \code{"euclidean"}, \code{"maximum"}, \code{"manhattan"}, 16 | \code{"canberra"}, \code{"binary"} or \code{"minkowski"}. 17 | Any unambiguous substring can be given.} 18 | 19 | \item{p}{The power of the Minkowski distance.} 20 | } 21 | \value{ 22 | An integer matrix of the \eqn{k} nearest neighbors for each location. 23 | Each row corresponds to a location, with the first element of each row 24 | being the location itself. Locations are encoded as integers. 25 | } 26 | \description{ 27 | Get the k nearest neighbors for each location, including the location itself. 28 | This function calls \code{\link[stats]{dist}}, so the options for the 29 | distance measure used is the same as for that one. Distances are calculated 30 | between rows. 31 | } 32 | \examples{ 33 | x <- matrix(c(0, 0, 34 | 1, 0, 35 | 2, 1, 36 | 0, 4, 37 | 1, 3), 38 | ncol = 2, byrow = TRUE) 39 | plot(x) 40 | coords_to_knn(x) 41 | } 42 | -------------------------------------------------------------------------------- /man/df_to_matrix.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/utility_functions.R 3 | \name{df_to_matrix} 4 | \alias{df_to_matrix} 5 | \title{Convert a long data frame to a wide matrix.} 6 | \usage{ 7 | df_to_matrix(df, time_col = 1, location_col = 2, value_col = 3) 8 | } 9 | \arguments{ 10 | \item{df}{A data frame with at least 3 columns.} 11 | 12 | \item{time_col}{Integer or string that specifies the time column.} 13 | 14 | \item{location_col}{Integer or string that specifies the location column.} 15 | 16 | \item{value_col}{Integer or string that specifies the value column.} 17 | } 18 | \value{ 19 | A matrix with time on rows and locations on columns. 20 | } 21 | \description{ 22 | Convert a long data frame to a wide matrix, with time along the row dimension 23 | and locations along the column dimension. Values in the matrix could be e.g. 24 | the observed counts or the population. 25 | } 26 | -------------------------------------------------------------------------------- /man/dist_to_knn.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/zones.R 3 | \name{dist_to_knn} 4 | \alias{dist_to_knn} 5 | \title{Given a distance matrix, find the \eqn{k} nearest neighbors.} 6 | \usage{ 7 | dist_to_knn(x, k = min(10, nrow(x))) 8 | } 9 | \arguments{ 10 | \item{x}{A (square) distance matrix. Elements should be non-negative and the 11 | diagonal zeros, but this is not checked.} 12 | 13 | \item{k}{The number of nearest neighbors, counting the location itself.} 14 | } 15 | \value{ 16 | A matrix of integers, row \eqn{i} containing the \eqn{k} nearest 17 | neighbors of location \eqn{i}, including itself. 18 | } 19 | \description{ 20 | Given a distance matrix, calculate the \eqn{k} nearest neighbors of each 21 | location, including the location itself. The matrix should contain only zeros 22 | on the diagonal, and all other elements should be positive. 23 | } 24 | \examples{ 25 | x <- matrix(c(0, 0, 26 | 1, 0, 27 | 2, 1, 28 | 0, 4, 29 | 1, 3), 30 | ncol = 2, byrow = TRUE) 31 | d <- dist(x, diag = TRUE, upper = TRUE) 32 | dist_to_knn(d, k = 3) 33 | } 34 | -------------------------------------------------------------------------------- /man/estimate_baselines.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/baseline_estimation.R 3 | \name{estimate_baselines} 4 | \alias{estimate_baselines} 5 | \title{Estimate baselines based on observed counts.} 6 | \usage{ 7 | estimate_baselines(counts, population = NULL) 8 | } 9 | \arguments{ 10 | \item{counts}{A matrix of observed counts. Rows indicate time (ordered from 11 | most recent) and columns indicate locations.} 12 | 13 | \item{population}{A matrix or vector of populations for each location 14 | (optional). If a matrix, should be of the same dimensions as 15 | \code{counts}. If a vector, should be of the same length as the number of 16 | columns in \code{counts}.} 17 | } 18 | \value{ 19 | A matrix of baselines of the same dimensions as \code{counts}. 20 | } 21 | \description{ 22 | Estimate the baselines (expected values) for the supplied counts. 23 | } 24 | \keyword{internal} 25 | -------------------------------------------------------------------------------- /man/estimate_variances.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/baseline_estimation.R 3 | \name{estimate_variances} 4 | \alias{estimate_variances} 5 | \title{Estimate variances based on observed counts.} 6 | \usage{ 7 | estimate_variances(counts, baselines = NULL, population = NULL, 8 | constant_dim = 1) 9 | } 10 | \arguments{ 11 | \item{counts}{A matrix of observed counts. Rows indicate time (ordered from 12 | most recent) and columns indicate locations.} 13 | 14 | \item{baselines}{A matrix of the same dimensions as \code{counts} (optional).} 15 | 16 | \item{population}{A matrix or vector of populations for each location 17 | (optional). If a matrix, should be of the same dimensions as 18 | \code{counts}. If a vector, should be of the same length as the number of 19 | columns in \code{counts}.} 20 | 21 | \item{constant_dim}{An integer. If equal to 1, variances are assumed to be 22 | constant over time but different between locations. If equal to 2, 23 | variances are assumed to vary over time but at each time point be equal 24 | for all locations.} 25 | } 26 | \value{ 27 | A matrix of variances of the same dimensions as \code{counts}. 28 | } 29 | \description{ 30 | Estimate the variances for the supplied counts. It is assumed that variances 31 | are constant over time for each location. 32 | } 33 | \keyword{internal} 34 | -------------------------------------------------------------------------------- /man/estimate_zip_params.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/baseline_estimation.R 3 | \name{estimate_zip_params} 4 | \alias{estimate_zip_params} 5 | \title{Estimate the parameters of a ZIP distribution.} 6 | \usage{ 7 | estimate_zip_params(counts, population = NULL, min_p = 0.001, 8 | min_mu = 0.3) 9 | } 10 | \arguments{ 11 | \item{counts}{A matrix or vector of observed counts. Rows indicate time 12 | (ordered from most recent) and columns indicate locations. If a vector, 13 | the elements are assumed to be the counts for each location.} 14 | 15 | \item{population}{A matrix or vector of populations for each location 16 | (optional). If a matrix, should be of the same dimensions as 17 | \code{counts}. If a vector, should be of the same length as the number of 18 | columns in \code{counts}.} 19 | 20 | \item{min_p}{The minimum value you think possible for the structural zero 21 | probability.} 22 | 23 | \item{min_mu}{The mimum value you think possible for the Poisson mean 24 | parameter of the ZIP distribution (before adjusting for population size).} 25 | } 26 | \value{ 27 | A list with two elements: 28 | \describe{ 29 | \item{baselines}{A matrix of the same dimensions as \code{counts}. 30 | If \code{counts} was a vector, a matrix with 1 row will 31 | be returned.} 32 | \item{probs}{A matrix of the same dimensions as \code{counts}. If 33 | \code{counts} was a vector, a matrix with 1 row will be 34 | returned.} 35 | } 36 | } 37 | \description{ 38 | Heuristically estimate the ZIP distribution Poisson mean parameters and the 39 | structural zero probabilities for each location and time point. Assumes the 40 | structural zero probability is constant over time for each location. 41 | } 42 | \keyword{internal} 43 | -------------------------------------------------------------------------------- /man/flexible_zones.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/zones.R 3 | \name{flexible_zones} 4 | \alias{flexible_zones} 5 | \title{Computes the flexibly shaped zones as in Tango (2005).} 6 | \usage{ 7 | flexible_zones(k_nearest, adjacency_matrix) 8 | } 9 | \arguments{ 10 | \item{k_nearest}{An integer matrix of the \eqn{k} nearest neighbors for each 11 | location. Each row corresponds to a location, with the first element of 12 | each row being the location itself. Locations should be encoded as 13 | integers.} 14 | 15 | \item{adjacency_matrix}{A boolean matrix, with element \eqn{(i,j)} set 16 | to TRUE if location \eqn{j} is adjacent to location \eqn{i}.} 17 | } 18 | \value{ 19 | A list of integer vectors. 20 | } 21 | \description{ 22 | Given a matrix of \eqn{k} nearest neighbors and an adjacency matrix 23 | for the locations involved, produces the set of flexibly shaped zones 24 | as a list of integer vectors. The locations in these zones are all connected, 25 | in the sense that any location in the zone can be reached from another by 26 | traveling through adjacent locations within the zone. 27 | } 28 | \examples{ 29 | A <- matrix(c(0,1,0,0,0,0, 30 | 1,0,1,0,0,0, 31 | 0,1,0,0,0,0, 32 | 0,0,0,0,1,0, 33 | 0,0,0,1,0,0, 34 | 0,0,0,0,0,0), 35 | nrow = 6, byrow = TRUE) == 1 36 | nn <- matrix(as.integer(c(1,2,3,4,5,6, 37 | 2,1,3,4,5,6, 38 | 3,2,1,4,5,6, 39 | 4,5,1,6,3,2, 40 | 5,4,6,1,3,2, 41 | 6,5,4,1,3,2)), 42 | nrow = 6, byrow = TRUE) 43 | flexible_zones(nn, A) 44 | } 45 | \references{ 46 | Tango, T. & Takahashi, K. (2005), \emph{A flexibly shaped spatial scan 47 | statistic for detecting clusters}, International Journal of Health 48 | Geographics 4(1). 49 | } 50 | -------------------------------------------------------------------------------- /man/flipud.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/utility_functions.R 3 | \name{flipud} 4 | \alias{flipud} 5 | \title{Flip a matrix upside down} 6 | \usage{ 7 | flipud(x) 8 | } 9 | \arguments{ 10 | \item{x}{A matrix} 11 | } 12 | \value{ 13 | A matrix, \code{x} with rows reversed. 14 | } 15 | \description{ 16 | Flip a matrix upside down 17 | } 18 | \keyword{internal} 19 | -------------------------------------------------------------------------------- /man/get_zero_indices.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/RcppExports.R 3 | \name{get_zero_indices} 4 | \alias{get_zero_indices} 5 | \title{Get indices of zero elements in a vector.} 6 | \usage{ 7 | get_zero_indices(v) 8 | } 9 | \arguments{ 10 | \item{v}{An integer vector.} 11 | } 12 | \value{ 13 | A vector with the indices of elements equal to zero in \code{v}. 14 | Indices start at zero. 15 | } 16 | \description{ 17 | Get indices of zero elements in a vector. 18 | } 19 | \keyword{internal} 20 | -------------------------------------------------------------------------------- /man/get_zone.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/zone_utility_functions.R 3 | \name{get_zone} 4 | \alias{get_zone} 5 | \title{Extract a zone from the set of all zones.} 6 | \usage{ 7 | get_zone(n, zones) 8 | } 9 | \arguments{ 10 | \item{n}{An integer; the number of the zone you wish to retrieve.} 11 | 12 | \item{zones}{A list of integer vectors, representing the set of all zones.} 13 | } 14 | \value{ 15 | An integer vector. 16 | } 17 | \description{ 18 | Extract zone number \eqn{n} from the set of all zones. 19 | } 20 | \examples{ 21 | zones <- list(1L, 2L, 3L, 1:2, c(1L, 3L), c(2L, 3L)) 22 | get_zone(4, zones) 23 | } 24 | -------------------------------------------------------------------------------- /man/gumbel_pvalue.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/scanstatistics_general.R 3 | \name{gumbel_pvalue} 4 | \alias{gumbel_pvalue} 5 | \title{Calculate the Gumbel \eqn{p}-value for a scan statistic.} 6 | \usage{ 7 | gumbel_pvalue(observed, replicates, method = "ML", ...) 8 | } 9 | \arguments{ 10 | \item{observed}{A scalar containing the observed value of the scan statistic, 11 | or a vector of observed values from secondary clusters.} 12 | 13 | \item{replicates}{A vector of Monte Carlo replicates of the scan statistic.} 14 | 15 | \item{method}{Either "ML", for maximum likelihood, or "MoM", for method of 16 | moments.} 17 | 18 | \item{...}{Additional arguments passed to \code{ismev::gum.fit}, which 19 | may include arguments passed along further to \code{optim}.} 20 | } 21 | \value{ 22 | The \eqn{p}-value or \eqn{p}-values corresponding to the observed 23 | scan statistic(s). 24 | } 25 | \description{ 26 | Given an observed scan statistic \eqn{\lambda^*} and a vector of replicate 27 | scan statistics \eqn{\lambda_i}, \eqn{i=1,\ldots,R}, fit a Gumbel 28 | distribution to the replicates and calculate a \eqn{p}-value for the observed 29 | statistic based on the fitted distribution. 30 | \deqn{ 31 | \frac{1 + \sum_{i=1}^R \mathrm{I}(\lambda_i > \lambda^*)}{1 + R} 32 | } 33 | The function is vectorized, so multiple \eqn{p}-values can be calculated if 34 | several scan statistics (e.g. statistics from secondary clusters) are 35 | supplied. 36 | } 37 | \keyword{internal} 38 | -------------------------------------------------------------------------------- /man/has_converged.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/utility_functions.R 3 | \name{has_converged} 4 | \alias{has_converged} 5 | \title{Is the relative error between two numbers is less than the given tolerance?} 6 | \usage{ 7 | has_converged(current, previous, tol = 0.01) 8 | } 9 | \arguments{ 10 | \item{current}{A scalar; the most recent value of the sequence.} 11 | 12 | \item{previous}{A scalar; the second most recent value of the sequence, or a 13 | reference value.} 14 | 15 | \item{tol}{The tolerance, a positive scalar near zero.} 16 | } 17 | \description{ 18 | Given two consecutive numbers in a sequence, return \code{TRUE} if the 19 | relative change is positive but less than the given tolerance. 20 | } 21 | \keyword{internal} 22 | -------------------------------------------------------------------------------- /man/if_connected.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/zones.R 3 | \name{if_connected} 4 | \alias{if_connected} 5 | \title{Return a set of the location and its neighbors if they are connected, 6 | else return the empty set.} 7 | \usage{ 8 | if_connected(distinct_neighbors, location, adjacency_matrix) 9 | } 10 | \arguments{ 11 | \item{distinct_neighbors}{A \code{set} containing the neighboring locations 12 | to the given location, not including the location itself.} 13 | 14 | \item{location}{A location, preferably given as an integer.} 15 | 16 | \item{adjacency_matrix}{A boolean matrix, with element \eqn{(i,j)} set 17 | to TRUE if location \eqn{j} is adjacent to location \eqn{i}.} 18 | } 19 | \value{ 20 | A \code{set} of the given location and the neighbors if they are 21 | connected, else returns the empty set. 22 | } 23 | \description{ 24 | If the location and its neighbors, not including itself, are connected, 25 | then return the set containing the location and its neighbors; 26 | otherwise, return the empty set 27 | } 28 | \keyword{internal} 29 | -------------------------------------------------------------------------------- /man/is_connected.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/zones.R 3 | \name{is_connected} 4 | \alias{is_connected} 5 | \title{Returns TRUE if the neighboring locations are connected to the given 6 | location, FALSE if not.} 7 | \usage{ 8 | is_connected(neighbor_locations, location, adjacency_matrix) 9 | } 10 | \arguments{ 11 | \item{neighbor_locations}{A \code{set} of neighboring locations to the given 12 | location; these neighbors do not include the given location itself.} 13 | 14 | \item{location}{A location, preferably given as an integer.} 15 | 16 | \item{adjacency_matrix}{A boolean matrix, with element \eqn{(i,j)} set 17 | to TRUE if location \eqn{j} is adjacent to location \eqn{i}.} 18 | } 19 | \value{ 20 | Boolean: is the neighbors connected to the given location? 21 | } 22 | \description{ 23 | Returns TRUE if the neighboring locations are connected to the given 24 | location, FALSE if not. 25 | } 26 | \keyword{internal} 27 | -------------------------------------------------------------------------------- /man/knn_zones.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/zones.R 3 | \name{knn_zones} 4 | \alias{knn_zones} 5 | \title{Find the increasing subsets of \eqn{k} nearest neighbors for all locations.} 6 | \usage{ 7 | knn_zones(k_nearest) 8 | } 9 | \arguments{ 10 | \item{k_nearest}{An integer matrix of with \eqn{k} columns and as many rows 11 | as locations. The first element of each row is the integer encoding the 12 | location (and equal to the row number); the following elements are the 13 | \eqn{k-1} nearest neighbors in ascending order of distance.} 14 | } 15 | \value{ 16 | A list of integer vectors. 17 | } 18 | \description{ 19 | Returns the set of increasing nearest neighbor sets for all locations, as 20 | a list of integer vectors. That is, for each location the list returned 21 | contains one vector containing the location itself, another containing the 22 | location and its nearest neighbor, and so on, up to the vector containing the 23 | location and its \eqn{k-1} nearest neighbors. 24 | } 25 | \examples{ 26 | nn <- matrix(c(1L, 2L, 4L, 3L, 5L, 27 | 2L, 1L, 3L, 4L, 5L, 28 | 3L, 2L, 4L, 1L, 5L, 29 | 4L, 1L, 2L, 3L, 5L, 30 | 5L, 3L, 4L, 2L, 1L), 31 | ncol = 5, byrow = TRUE) 32 | knn_zones(nn[, 1:3]) 33 | } 34 | -------------------------------------------------------------------------------- /man/matrix_to_df.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/utility_functions.R 3 | \name{matrix_to_df} 4 | \alias{matrix_to_df} 5 | \title{Convert a matrix to a data frame.} 6 | \usage{ 7 | matrix_to_df(mat, name, locations = NULL, times = NULL) 8 | } 9 | \arguments{ 10 | \item{mat}{A matrix.} 11 | 12 | \item{name}{The name of the third column in the output matrix.} 13 | 14 | \item{locations}{If not \code{NULL}, a vector with the names of the 15 | locations.} 16 | 17 | \item{times}{If not \code{NULL}, a vector with the time points. If 18 | \code{NULL}, the matrix is assumed to be ordered with time point 1 in the 19 | first row.} 20 | } 21 | \value{ 22 | A matrix with columns \code{time, location, name}, where \code{name} 23 | is specified in the input. 24 | } 25 | \description{ 26 | Convert a matrix to a data frame with columns time, location, and one more 27 | containing the elements of the input matrix. 28 | } 29 | \keyword{internal} 30 | -------------------------------------------------------------------------------- /man/mc_pvalue.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/scanstatistics_general.R 3 | \name{mc_pvalue} 4 | \alias{mc_pvalue} 5 | \title{Calculate the Monte Carlo \eqn{p}-value for a scan statistic.} 6 | \usage{ 7 | mc_pvalue(observed, replicates) 8 | } 9 | \arguments{ 10 | \item{observed}{A scalar containing the observed value of the scan statistic, 11 | or a vector of observed values from secondary clusters.} 12 | 13 | \item{replicates}{A vector of Monte Carlo replicates of the scan statistic.} 14 | } 15 | \value{ 16 | The \eqn{p}-value or \eqn{p}-values corresponding to the observed 17 | scan statistic(s). 18 | } 19 | \description{ 20 | Given an observed scan statistic \eqn{\lambda^*} and a vector of replicate 21 | scan statistics \eqn{\lambda_i}, \eqn{i=1,\ldots,R}, calculate the Monte 22 | Carlo \eqn{p}-value as 23 | \deqn{ 24 | \frac{1 + \sum_{i=1}^R \mathrm{I}(\lambda_i > \lambda^*)}{1 + R} 25 | } 26 | The function is vectorized, so multiple \eqn{p}-values can be calculated if 27 | several scan statistics (e.g. statistics from secondary clusters) are 28 | supplied. 29 | } 30 | \keyword{internal} 31 | -------------------------------------------------------------------------------- /man/permute_matrix.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/RcppExports.R 3 | \name{permute_matrix} 4 | \alias{permute_matrix} 5 | \title{Permute the entries of the matrix, preserving row and column marginals.} 6 | \usage{ 7 | permute_matrix(A) 8 | } 9 | \arguments{ 10 | \item{A}{An integer matrix.} 11 | } 12 | \value{ 13 | An integer matrix. 14 | } 15 | \description{ 16 | Permute the entries of the matrix, preserving row and column marginals. 17 | } 18 | \keyword{internal} 19 | -------------------------------------------------------------------------------- /man/powerset_zones.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/zone_utility_functions.R 3 | \name{powerset_zones} 4 | \alias{powerset_zones} 5 | \title{Creates a set of all non-empty subsets of the integers from 1 to \eqn{n}.} 6 | \usage{ 7 | powerset_zones(n) 8 | } 9 | \arguments{ 10 | \item{n}{An integer larger than 0.} 11 | } 12 | \value{ 13 | A list of integer vectors. 14 | } 15 | \description{ 16 | Creates a list of all \eqn{2^(n-1)} non-empty subsets of the integers from 1 17 | to \eqn{n}. 18 | } 19 | \keyword{internal} 20 | -------------------------------------------------------------------------------- /man/print.scanstatistic.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/scanstatistics_general.R 3 | \name{print.scanstatistic} 4 | \alias{print.scanstatistic} 5 | \title{Print a scanstatistic object.} 6 | \usage{ 7 | \method{print}{scanstatistic}(x, ...) 8 | } 9 | \arguments{ 10 | \item{x}{A an object of class \code{scanstatistic}.} 11 | 12 | \item{...}{Further arguments passed to or from other methods.} 13 | } 14 | \description{ 15 | Prints a scanstatistic object and returns it invisibly. 16 | } 17 | \keyword{internal} 18 | -------------------------------------------------------------------------------- /man/run_scan.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/utility_functions.R 3 | \name{run_scan} 4 | \alias{run_scan} 5 | \title{Run a scan statistic analysis.} 6 | \usage{ 7 | run_scan(scanstat, args, gumbel = FALSE) 8 | } 9 | \arguments{ 10 | \item{scanstat}{A scan statistic function.} 11 | 12 | \item{args}{A named list of arguments to be passed to \code{scanstat}.} 13 | 14 | \item{gumbel}{Logical: should a Gumbel P-value be calculated? Default is 15 | \code{FALSE}.} 16 | } 17 | \value{ 18 | A list with components 19 | \describe{ 20 | \item{observed}{The table of observed statistics.} 21 | \item{simulated}{The table of simulated statistics.} 22 | \item{MC_pvalue}{The Monte Carlo P-value of the scan statistic.} 23 | \item{Gumbel_pvalue}{The Gumbel P-value of the scan statistic.} 24 | } 25 | } 26 | \description{ 27 | Run a scan statistic analysis with the given scan statistic and arguments. 28 | } 29 | \keyword{internal} 30 | -------------------------------------------------------------------------------- /man/scan_bayes_negbin_cpp.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/RcppExports.R 3 | \name{scan_bayes_negbin_cpp} 4 | \alias{scan_bayes_negbin_cpp} 5 | \title{Calculate the "Bayesian Spatial Scan Statistic" by Neill et al. (2006).} 6 | \usage{ 7 | scan_bayes_negbin_cpp(counts, baselines, zones, zone_lengths, 8 | outbreak_prob, alpha_null, beta_null, alpha_alt, beta_alt, inc_values, 9 | inc_probs) 10 | } 11 | \arguments{ 12 | \item{counts}{An integer matrix (most recent timepoint in first row).} 13 | 14 | \item{baselines}{A matrix with positive entries (most recent timepoint in 15 | first row).} 16 | 17 | \item{zones}{An integer vector (all zones concatenated; locations indexed 18 | from 0 and up).} 19 | 20 | \item{zone_lengths}{An integer vector.} 21 | 22 | \item{outbreak_prob}{A scalar; the probability of an outbreak (at any time, 23 | any place).} 24 | 25 | \item{alpha_null}{A scalar; the shape parameter for the gamma distribution 26 | under the null hypothesis of no anomaly.} 27 | 28 | \item{beta_null}{A scalar; the scale parameter for the gamma distribution 29 | under the null hypothesis of no anomaly.} 30 | 31 | \item{alpha_alt}{A scalar; the shape parameter for the gamma distribution 32 | under the alternative hypothesis of an anomaly.} 33 | 34 | \item{beta_alt}{A scalar; the scale parameter for the gamma distribution 35 | under the alternative hypothesis of an anomaly.} 36 | 37 | \item{inc_values}{A vector of possible values for the increase in the mean 38 | (and variance) of an anomalous count.} 39 | 40 | \item{inc_probs}{A vector of the prior probabilities of each value in 41 | \code{inc_values}.} 42 | } 43 | \value{ 44 | A list with elements \code{priors} (list), \code{posteriors} (list), 45 | and \code{marginal_data_prob} (scalar). The list \code{priors} has 46 | elements 47 | \describe{ 48 | \item{null_prior}{The prior probability of no anomaly.} 49 | \item{alt_prior}{The prior probability of an anomaly.} 50 | \item{inc_prior}{A vector (matrix with 1 row) of prior probabilities 51 | of each value in the argument \code{m_values}.} 52 | \item{window_prior}{The prior probability of an outbreak in any of the 53 | space-time windows.} 54 | } 55 | The list \code{posteriors} has elements 56 | \describe{ 57 | \item{null_posterior}{The posterior probability of no anomaly.} 58 | \item{alt_posterior}{The posterior probability of an anomaly.} 59 | \item{inc_posterior}{A data frame with columns \code{inc_values} and 60 | \code{inc_posterior}.} 61 | \item{window_posteriors}{A data frame with columns \code{zone}, 62 | \code{duration}, \code{log_posterior} and 63 | \code{log_bayes_factor}, each row 64 | corresponding to a space-time window.} 65 | \item{space_time_posteriors}{A matrix with the posterior anomaly 66 | probability of each location-time 67 | combination.} 68 | \item{location_posteriors}{A vector (matrix with 1 row) with the 69 | posterior probability of an anomaly at each 70 | location.} 71 | } 72 | } 73 | \description{ 74 | Calculate the "Bayesian Spatial Scan Statistic" by Neill et al. (2006), 75 | adapted to a spatio-temporal setting. The scan statistic assumes that, 76 | given the relative risk, the data follows a Poisson distribution. The 77 | relative risk is in turn assigned a Gamma distribution prior, yielding a 78 | negative binomial marginal distribution for the counts. 79 | } 80 | \keyword{internal} 81 | -------------------------------------------------------------------------------- /man/scan_eb_negbin_cpp.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/RcppExports.R 3 | \name{scan_eb_negbin_cpp} 4 | \alias{scan_eb_negbin_cpp} 5 | \title{Calculate the expectation-based negative binomial scan statistic.} 6 | \usage{ 7 | scan_eb_negbin_cpp(counts, baselines, overdisp, zones, zone_lengths, 8 | store_everything, num_mcsim, score_hotspot) 9 | } 10 | \arguments{ 11 | \item{counts}{Integer matrix (most recent timepoint in first row)} 12 | 13 | \item{baselines}{Matrix (most recent timepoint in first row)} 14 | 15 | \item{overdisp}{Matrix (most recent timepoint in first row)} 16 | 17 | \item{zones}{Integer vector (all zones concatenated; locations indexed from 18 | 0 and up)} 19 | 20 | \item{zone_lengths}{Integer vector} 21 | 22 | \item{store_everything}{Boolean} 23 | 24 | \item{num_mcsim}{Integer} 25 | 26 | \item{score_hotspot}{Boolean} 27 | } 28 | \value{ 29 | A list with elements \code{observed} and \code{simulated}, each 30 | being a data frame with columns: 31 | \describe{ 32 | \item{zone}{The top-scoring zone (spatial component of MLC).} 33 | \item{duration}{The corresponding duration (time-length of MLC).} 34 | \item{score}{The value of the loglihood ratio statistic (the scan 35 | statistic).} 36 | \item{relrisk}{The estimated relative risk.} 37 | \item{n_iter}{The number of iterations performed by the EM algorithm.} 38 | } 39 | } 40 | \description{ 41 | Calculate the expectation-based negative binomial scan statistic and Monte 42 | Carlo replicates. 43 | } 44 | \keyword{internal} 45 | -------------------------------------------------------------------------------- /man/scan_eb_poisson_cpp.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/RcppExports.R 3 | \name{scan_eb_poisson_cpp} 4 | \alias{scan_eb_poisson_cpp} 5 | \title{Calculate the expecation-based Poisson scan statistic.} 6 | \usage{ 7 | scan_eb_poisson_cpp(counts, baselines, zones, zone_lengths, 8 | store_everything, num_mcsim) 9 | } 10 | \arguments{ 11 | \item{counts}{An integer matrix (most recent timepoint in first row).} 12 | 13 | \item{baselines}{A matrix with positive entries (most recent timepoint in 14 | first row).} 15 | 16 | \item{zones}{An integer vector (all zones concatenated; locations indexed 17 | from 0 and up).} 18 | 19 | \item{zone_lengths}{An integer vector.} 20 | 21 | \item{store_everything}{A boolean.} 22 | 23 | \item{num_mcsim}{An integer.} 24 | } 25 | \value{ 26 | A list with elements \code{observed} and \code{simulated}, each 27 | being a data frame with columns: 28 | \describe{ 29 | \item{zone}{The top-scoring zone (spatial component of MLC).} 30 | \item{duration}{The corresponding duration (time-length of MLC).} 31 | \item{score}{The value of the loglihood ratio statistic (the scan 32 | statistic).} 33 | \item{relrisk_in}{The estimated relative risk inside.} 34 | \item{relrisk_in}{The estimated relative risk outside.} 35 | } 36 | } 37 | \description{ 38 | Calculate the expectation-based Poisson scan statistic and Monte Carlo 39 | replicates. 40 | } 41 | \keyword{internal} 42 | -------------------------------------------------------------------------------- /man/scan_eb_zip_cpp.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/RcppExports.R 3 | \name{scan_eb_zip_cpp} 4 | \alias{scan_eb_zip_cpp} 5 | \title{Calculate the highest-value EB ZIP loglihood ratio statistic.} 6 | \usage{ 7 | scan_eb_zip_cpp(counts, baselines, probs, zones, zone_lengths, rel_tol, 8 | store_everything, num_mcsim) 9 | } 10 | \arguments{ 11 | \item{counts}{matrix (most recent timepoint in first row)} 12 | 13 | \item{baselines}{matrix (most recent timepoint in first row)} 14 | 15 | \item{probs}{matrix (most recent timepoint in first row)} 16 | 17 | \item{zones}{integer vector (all zones concatenated; locations indexed from 18 | 0 and up)} 19 | 20 | \item{zone_lengths}{integer vector} 21 | 22 | \item{rel_tol}{double} 23 | 24 | \item{store_everything}{boolean} 25 | 26 | \item{num_mcsim}{int} 27 | } 28 | \value{ 29 | A list with elements \code{observed} and \code{simulated}, each 30 | being a data frame with columns: 31 | \describe{ 32 | \item{zone}{The top-scoring zone (spatial component of MLC).} 33 | \item{duration}{The corresponding duration (time-length of MLC).} 34 | \item{score}{The value of the loglihood ratio statistic (the scan 35 | statistic).} 36 | \item{relrisk}{The estimated relative risk.} 37 | \item{n_iter}{The number of iterations performed by the EM algorithm.} 38 | } 39 | } 40 | \description{ 41 | Calculate the expectation-based ZIP loglihood ratio statistic for each zone 42 | and duration, but only keep the zone and duration with the highest value 43 | (the MLC). The estimate of the relative risk is also calculated, along with 44 | the number of iterations the EM algorithm performed. 45 | } 46 | \keyword{internal} 47 | -------------------------------------------------------------------------------- /man/scan_pb_perm_cpp.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/RcppExports.R 3 | \name{scan_pb_perm_cpp} 4 | \alias{scan_pb_perm_cpp} 5 | \title{Calculate the space-time permutation scan statistic.} 6 | \usage{ 7 | scan_pb_perm_cpp(counts, baselines, zones, zone_lengths, store_everything, 8 | num_mcsim) 9 | } 10 | \arguments{ 11 | \item{counts}{An integer matrix (most recent timepoint in first row).} 12 | 13 | \item{baselines}{A matrix with positive entries (most recent timepoint in 14 | first row).} 15 | 16 | \item{zones}{An integer vector (all zones concatenated; locations indexed 17 | from 0 and up)} 18 | 19 | \item{zone_lengths}{An integer vector.} 20 | 21 | \item{store_everything}{A boolean.} 22 | 23 | \item{num_mcsim}{An integer.} 24 | } 25 | \value{ 26 | A list with elements \code{observed} and \code{simulated}, each 27 | being a data frame with columns: 28 | \describe{ 29 | \item{zone}{The top-scoring zone (spatial component of MLC).} 30 | \item{duration}{The corresponding duration (time-length of MLC).} 31 | \item{score}{The value of the loglihood ratio statistic (the scan 32 | statistic).} 33 | \item{relrisk_in}{The estimated relative risk inside.} 34 | \item{relrisk_in}{The estimated relative risk outside.} 35 | } 36 | } 37 | \description{ 38 | Calculate the space-time permutation scan statistic (Kulldorff 2005) and 39 | Monte Carloo replicates. 40 | } 41 | \keyword{internal} 42 | -------------------------------------------------------------------------------- /man/scan_pb_poisson.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/scan_pb_poisson.R 3 | \name{scan_pb_poisson} 4 | \alias{scan_pb_poisson} 5 | \title{Calculate the population-based Poisson scan statistic.} 6 | \usage{ 7 | scan_pb_poisson(counts, zones, population = NULL, n_mcsim = 0, 8 | gumbel = FALSE, max_only = FALSE) 9 | } 10 | \arguments{ 11 | \item{counts}{Either: 12 | \itemize{ 13 | \item A matrix of observed counts. Rows indicate time and are ordered 14 | from least recent (row 1) to most recent (row 15 | \code{nrow(counts)}). Columns indicate locations, numbered from 1 16 | and up. If \code{counts} is a matrix, the optional argument 17 | \code{population} should also be specified. 18 | \item A data frame with columns "time", "location", "count", 19 | "population". 20 | }} 21 | 22 | \item{zones}{A list of integer vectors. Each vector corresponds to a single 23 | zone; its elements are the numbers of the locations in that zone.} 24 | 25 | \item{population}{Optional. A matrix or vector of populations for each 26 | location and time point. Only needed if \code{baselines} are to be 27 | estimated and you want to account for the different populations in each 28 | location (and time). If a matrix, should be of the same dimensions as 29 | \code{counts}. If a vector, should be of the same length as the number of 30 | columns in \code{counts} (the number of locations).} 31 | 32 | \item{n_mcsim}{A non-negative integer; the number of replicate scan 33 | statistics to generate in order to calculate a P-value.} 34 | 35 | \item{gumbel}{Logical: should a Gumbel P-value be calculated? Default is 36 | \code{FALSE}.} 37 | 38 | \item{max_only}{Boolean. If \code{FALSE} (default) the log-likelihood ratio 39 | statistic for each zone and duration is returned. If \code{TRUE}, only the 40 | largest such statistic (i.e. the scan statistic) is returned, along with 41 | the corresponding zone and duration.} 42 | } 43 | \value{ 44 | A list which, in addition to the information about the type of scan 45 | statistic, has the following components: 46 | \describe{ 47 | \item{MLC}{A list containing the number of the zone of the most likely 48 | cluster (MLC), the locations in that zone, the duration of the 49 | MLC, the calculated score, and the relative risk inside and 50 | outside the cluster. In order, the elements of this list are named 51 | \code{zone_number, locations, duration, score, relrisk_in, 52 | relrisk_out}.} 53 | \item{observed}{A data frame containing, for each combination of zone 54 | and duration investigated, the zone number, duration, score, 55 | relative risks. The table is sorted by score with the top-scoring 56 | location on top. If \code{max_only = TRUE}, only contains a single 57 | row corresponding to the MLC.} 58 | \item{replicates}{A data frame of the Monte Carlo replicates of the scan 59 | statistic (if any), and the corresponding zones and durations.} 60 | \item{MC_pvalue}{The Monte Carlo \eqn{P}-value.} 61 | \item{Gumbel_pvalue}{A \eqn{P}-value obtained by fitting a Gumbel 62 | distribution to the replicate scan statistics.} 63 | \item{n_zones}{The number of zones scanned.} 64 | \item{n_locations}{The number of locations.} 65 | \item{max_duration}{The maximum duration considered.} 66 | \item{n_mcsim}{The number of Monte Carlo replicates made.} 67 | } 68 | } 69 | \description{ 70 | Calculate the population-based Poisson scan statistic devised by Kulldorff 71 | (1997, 2001). 72 | } 73 | \examples{ 74 | \dontrun{ 75 | set.seed(1) 76 | # Create location coordinates, calculate nearest neighbors, and create zones 77 | n_locs <- 50 78 | max_duration <- 5 79 | n_total <- n_locs * max_duration 80 | geo <- matrix(rnorm(n_locs * 2), n_locs, 2) 81 | knn_mat <- coords_to_knn(geo, 15) 82 | zones <- knn_zones(knn_mat) 83 | 84 | # Simulate data 85 | population <- matrix(rnorm(n_total, 100, 10), max_duration, n_locs) 86 | counts <- matrix(rpois(n_total, as.vector(population) / 20), 87 | max_duration, n_locs) 88 | 89 | # Inject outbreak/event/anomaly 90 | ob_dur <- 3 91 | ob_cols <- zones[[10]] 92 | ob_rows <- max_duration + 1 - seq_len(ob_dur) 93 | counts[ob_rows, ob_cols] <- matrix( 94 | rpois(ob_dur * length(ob_cols), 2 * population[ob_rows, ob_cols] / 20), 95 | length(ob_rows), length(ob_cols)) 96 | res <- scan_pb_poisson(counts = counts, 97 | zones = zones, 98 | population = population, 99 | n_mcsim = 99, 100 | max_only = FALSE) 101 | } 102 | } 103 | \references{ 104 | Kulldorff, M. (1997). \emph{A spatial scan statistic}. Communications in 105 | Statistics - Theory and Methods, 26, 1481–1496. 106 | 107 | Kulldorff, M. (2001). \emph{Prospective time periodic geographical disease 108 | surveillance using a scan statistic}. Journal of the Royal Statistical 109 | Society, Series A (Statistics in Society), 164, 61–72. 110 | } 111 | -------------------------------------------------------------------------------- /man/scan_pb_poisson_cpp.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/RcppExports.R 3 | \name{scan_pb_poisson_cpp} 4 | \alias{scan_pb_poisson_cpp} 5 | \title{Calculate the population-based Poisson scan statistic.} 6 | \usage{ 7 | scan_pb_poisson_cpp(counts, baselines, zones, zone_lengths, 8 | store_everything, num_mcsim) 9 | } 10 | \arguments{ 11 | \item{counts}{integer matrix (most recent timepoint in first row)} 12 | 13 | \item{baselines}{matrix (most recent timepoint in first row)} 14 | 15 | \item{zones}{integer vector (all zones concatenated; locations indexed from 16 | 0 and up)} 17 | 18 | \item{zone_lengths}{integer vector} 19 | 20 | \item{store_everything}{boolean} 21 | 22 | \item{num_mcsim}{int} 23 | } 24 | \value{ 25 | A list with elements \code{observed} and \code{simulated}, each 26 | being a data frame with columns: 27 | \describe{ 28 | \item{zone}{The top-scoring zone (spatial component of MLC).} 29 | \item{duration}{The corresponding duration (time-length of MLC).} 30 | \item{score}{The value of the loglihood ratio statistic (the scan 31 | statistic).} 32 | \item{relrisk_in}{The estimated relative risk inside.} 33 | \item{relrisk_in}{The estimated relative risk outside.} 34 | } 35 | } 36 | \description{ 37 | Calculate the population-based Poisson scan statistic and Monte Carlo 38 | replicates. 39 | } 40 | \keyword{internal} 41 | -------------------------------------------------------------------------------- /man/scan_permutation.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/scan_permutation.R 3 | \name{scan_permutation} 4 | \alias{scan_permutation} 5 | \title{Calculate the space-time permutation scan statistic.} 6 | \usage{ 7 | scan_permutation(counts, zones, population = NULL, n_mcsim = 0, 8 | gumbel = FALSE, max_only = FALSE) 9 | } 10 | \arguments{ 11 | \item{counts}{Either: 12 | \itemize{ 13 | \item A matrix of observed counts. Rows indicate time and are ordered 14 | from least recent (row 1) to most recent (row 15 | \code{nrow(counts)}). Columns indicate locations, numbered from 1 16 | and up. If \code{counts} is a matrix, the optional argument 17 | \code{population} should also be specified. 18 | \item A data frame with columns "time", "location", "count", 19 | "population". 20 | }} 21 | 22 | \item{zones}{A list of integer vectors. Each vector corresponds to a single 23 | zone; its elements are the numbers of the locations in that zone.} 24 | 25 | \item{population}{Optional. A matrix or vector of populations for each 26 | location and time point. Only needed if \code{baselines} are to be 27 | estimated and you want to account for the different populations in each 28 | location (and time). If a matrix, should be of the same dimensions as 29 | \code{counts}. If a vector, should be of the same length as the number of 30 | columns in \code{counts} (the number of locations).} 31 | 32 | \item{n_mcsim}{A non-negative integer; the number of replicate scan 33 | statistics to generate in order to calculate a P-value.} 34 | 35 | \item{gumbel}{Logical: should a Gumbel P-value be calculated? Default is 36 | \code{FALSE}.} 37 | 38 | \item{max_only}{Boolean. If \code{FALSE} (default) the log-likelihood ratio 39 | statistic for each zone and duration is returned. If \code{TRUE}, only the 40 | largest such statistic (i.e. the scan statistic) is returned, along with 41 | the corresponding zone and duration.} 42 | } 43 | \value{ 44 | A list which, in addition to the information about the type of scan 45 | statistic, has the following components: 46 | \describe{ 47 | \item{MLC}{A list containing the number of the zone of the most likely 48 | cluster (MLC), the locations in that zone, the duration of the 49 | MLC, the calculated score, and the relative risk inside and 50 | outside the cluster. In order, the elements of this list are named 51 | \code{zone_number, locations, duration, score, relrisk_in, 52 | relrisk_out}.} 53 | \item{observed}{A data frame containing, for each combination of zone 54 | and duration investigated, the zone number, duration, score, 55 | relative risks. The table is sorted by score with the top-scoring 56 | location on top. If \code{max_only = TRUE}, only contains a single 57 | row corresponding to the MLC.} 58 | \item{replicates}{A data frame of the Monte Carlo replicates of the scan 59 | statistic (if any), and the corresponding zones and durations.} 60 | \item{MC_pvalue}{The Monte Carlo \eqn{P}-value.} 61 | \item{Gumbel_pvalue}{A \eqn{P}-value obtained by fitting a Gumbel 62 | distribution to the replicate scan statistics.} 63 | \item{n_zones}{The number of zones scanned.} 64 | \item{n_locations}{The number of locations.} 65 | \item{max_duration}{The maximum duration considered.} 66 | \item{n_mcsim}{The number of Monte Carlo replicates made.} 67 | } 68 | } 69 | \description{ 70 | Calculate the space-time permutation scan statistic devised by Kulldorff 71 | (2005). 72 | } 73 | \examples{ 74 | \dontrun{ 75 | set.seed(1) 76 | # Create location coordinates, calculate nearest neighbors, and create zones 77 | n_locs <- 50 78 | max_duration <- 5 79 | n_total <- n_locs * max_duration 80 | geo <- matrix(rnorm(n_locs * 2), n_locs, 2) 81 | knn_mat <- coords_to_knn(geo, 15) 82 | zones <- knn_zones(knn_mat) 83 | 84 | # Simulate data 85 | population <- matrix(rnorm(n_total, 100, 10), max_duration, n_locs) 86 | counts <- matrix(rpois(n_total, as.vector(population) / 20), 87 | max_duration, n_locs) 88 | 89 | # Inject outbreak/event/anomaly 90 | ob_dur <- 3 91 | ob_cols <- zones[[10]] 92 | ob_rows <- max_duration + 1 - seq_len(ob_dur) 93 | counts[ob_rows, ob_cols] <- matrix( 94 | rpois(ob_dur * length(ob_cols), 2 * population[ob_rows, ob_cols] / 20), 95 | length(ob_rows), length(ob_cols)) 96 | res <- scan_permutation(counts = counts, 97 | zones = zones, 98 | population = population, 99 | n_mcsim = 99, 100 | max_only = FALSE) 101 | } 102 | } 103 | \references{ 104 | Kulldorff, M., Heffernan, R., Hartman, J., Assunção, R. M., Mostashari, F. 105 | (2005). \emph{A space-time permutation scan statistic for disease outbreak 106 | detection}. PLoS Medicine, 2(3), 0216-0224. 107 | } 108 | -------------------------------------------------------------------------------- /man/scanstatistics.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/scanstatistics.R 3 | \docType{package} 4 | \name{scanstatistics} 5 | \alias{scanstatistics} 6 | \alias{scanstatistics-package} 7 | \title{scanstatistics: Space-time anomaly detection using scan statistics.} 8 | \description{ 9 | The scanstatistics package provides two categories of important functions: 10 | data preparation functions, and the scan statistics themselves. 11 | } 12 | \section{Data preparation functions}{ 13 | 14 | These functions prepare your data for use. In particular, it helps you 15 | define the \emph{zones} which will be considered by the scan statistics. 16 | } 17 | 18 | \section{Scan statistics}{ 19 | 20 | These are the functions used for space-time anomaly detection. Scan statistic 21 | functions for univariate space-time data have a name that begins with 22 | \code{scan_} and functions for multivariate space-time data have a name that 23 | begins with \code{mscan_}. 24 | } 25 | 26 | -------------------------------------------------------------------------------- /man/score_locations.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/scanstatistics_general.R 3 | \name{score_locations} 4 | \alias{score_locations} 5 | \title{Score each location over zones and duration.} 6 | \usage{ 7 | score_locations(x, zones) 8 | } 9 | \arguments{ 10 | \item{x}{An object of class \code{scanstatistic}.} 11 | 12 | \item{zones}{A list of integer vectors.} 13 | } 14 | \value{ 15 | A \code{data.table} with the following columns: 16 | \describe{ 17 | \item{location}{The locations (as integers).} 18 | \item{total_score}{For each location, the sum of all window statistics 19 | that the location appears in.} 20 | \item{n_zones}{The number of spatial zones that the location appears 21 | in.} 22 | \item{score}{The total score divided by the number of zones and the 23 | maximum duration.} 24 | \item{relative_score}{The score divided by the maximum score.} 25 | } 26 | } 27 | \description{ 28 | For each location, compute the average of the statistic calculated for each 29 | space-time window that the location is included in, i.e. average the 30 | statistic over both zones and the maximum duration. 31 | } 32 | \examples{ 33 | \dontrun{ 34 | # Simple example 35 | set.seed(1) 36 | table <- data.frame(zone = 1:5, duration = 1, score = 5:1) 37 | zones <- list(1:2, 1:3, 2:5, 4:5, c(1, 5)) 38 | x <- list(table = table, n_locations = 5, max_duration = 1, n_zones = 5) 39 | score_locations(x, zones) 40 | } 41 | } 42 | -------------------------------------------------------------------------------- /man/top_clusters.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/scanstatistics_general.R 3 | \name{top_clusters} 4 | \alias{top_clusters} 5 | \title{Get the top (non-overlappig) clusters.} 6 | \usage{ 7 | top_clusters(x, zones, k = 5, overlapping = FALSE, gumbel = FALSE, 8 | alpha = NULL, ...) 9 | } 10 | \arguments{ 11 | \item{x}{An object of class scanstatistics.} 12 | 13 | \item{zones}{A list of integer vectors.} 14 | 15 | \item{k}{An integer, the number of clusters to return.} 16 | 17 | \item{overlapping}{Logical; should the top clusters be allowed to overlap in 18 | the spatial dimension? The default is \code{FALSE}.} 19 | 20 | \item{gumbel}{Logical; should a Gumbel P-value be calculated? The default is 21 | \code{FALSE}.} 22 | 23 | \item{alpha}{A significance level, which if not \code{NULL} will be used to 24 | calculate a critical value for the statistics in the table.} 25 | 26 | \item{...}{Parameters passed to \code{\link[stats]{quantile}}.} 27 | } 28 | \value{ 29 | A data frame with at most \eqn{k} rows, with columns 30 | \code{zone, duration, score} and possibly \code{MC_pvalue, Gumbel_pvalue} 31 | and \code{critical_value}. 32 | } 33 | \description{ 34 | Get the top \eqn{k} space-time clusters according to the statistic calculated 35 | for each cluster (the maximum being the scan statistic). The default is to 36 | return the spatially non-overlapping clusters, i.e. those that do not have 37 | any locations in common. 38 | } 39 | \examples{ 40 | \dontrun{ 41 | set.seed(1) 42 | counts <- matrix(rpois(15, 3), 3, 5) 43 | zones <- list(1:2, 1:3, 2:5, c(1, 3), 4:5, c(1, 5)) 44 | scanres <- scan_permutation(counts, zones, n_mcsim = 5) 45 | top_clusters(scanres, zones, k = 4, overlapping = FALSE) 46 | } 47 | } 48 | -------------------------------------------------------------------------------- /references.bib: -------------------------------------------------------------------------------- 1 | 2 | @ARTICLE{Kulldorff1998, 3 | author = {Martin Kulldorff and William F. Athas and Eric J. Feuer and Barry A. Miller and Charles R. Key}, 4 | title = {Evaluating cluster alarms: A space-time scan statistic and brain cancer in Los Alamos}, 5 | journal = {American Journal of Public Health}, 6 | year = {1998}, 7 | volume = {88}, 8 | pages = {1377–1380}, 9 | number = {9} 10 | } 11 | 12 | @ARTICLE{Tango2011, 13 | author = {Toshiro Tango and Kunihiko Takahashi and Kazuaki Kohriyama}, 14 | title = {A space-time scan statistic for detecting emerging outbreaks}, 15 | journal = {Biometrics}, 16 | year = {2011}, 17 | volume = {67}, 18 | pages = {106–115}, 19 | number = {1} 20 | } 21 | 22 | @mastersthesis{Kjellson2015, 23 | author = {Benjamin Kjellson}, 24 | title = {{Spatiotemporal Outbreak Detection: A Scan Statistic Based on the Zero-Inflated Poisson Distribution}}, 25 | school = {Stockholm University, Division of Mathematical Statistics}, 26 | address = {Sweden}, 27 | year = {2015}, 28 | url = {https://goo.gl/GdseSh} 29 | } 30 | 31 | @Manual{rsatscan, 32 | title = {rsatscan: Tools, Classes, and Methods for Interfacing with SaTScan Stand-Alone Software}, 33 | author = {Ken Kleinman}, 34 | year = {2015}, 35 | note = {R package version 0.3.9200}, 36 | url = {https://CRAN.R-project.org/package=rsatscan}, 37 | } 38 | 39 | @Manual{datatable, 40 | title = {data.table: Extension of Data.frame}, 41 | author = {M Dowle and A Srinivasan and T Short and S Lianoglou with contributions from R Saporta and E Antonyan}, 42 | year = {2015}, 43 | note = {R package version 1.9.6}, 44 | url = {https://CRAN.R-project.org/package=data.table}, 45 | } 46 | 47 | @INPROCEEDINGS{Neill2005, 48 | author = {Daniel B. Neill and Andrew W. Moore and Maheshkumar Sabhnani and Kenny Daniel}, 49 | title = {Detection of emerging space-time clusters}, 50 | booktitle = {Proceedings of the eleventh ACM SIGKDD international conference on 51 | Knowledge discovery in data mining}, 52 | year = {2005}, 53 | pages = {218–227}, 54 | organization = {ACM} 55 | } 56 | 57 | @Article{sets, 58 | title = {Generalized and Customizable Sets in {R}}, 59 | author = {David Meyer and Kurt Hornik}, 60 | journal = {Journal of Statistical Software}, 61 | year = {2009}, 62 | volume = {31}, 63 | number = {2}, 64 | pages = {1–27}, 65 | url = {http://www.jstatsoft.org/v31/i02/} 66 | } 67 | -------------------------------------------------------------------------------- /scanstatistics.Rproj: -------------------------------------------------------------------------------- 1 | Version: 1.0 2 | 3 | RestoreWorkspace: Default 4 | SaveWorkspace: No 5 | AlwaysSaveHistory: No 6 | 7 | EnableCodeIndexing: Yes 8 | UseSpacesForTab: Yes 9 | NumSpacesForTab: 2 10 | Encoding: UTF-8 11 | 12 | RnwWeave: knitr 13 | LaTeX: XeLaTeX 14 | 15 | AutoAppendNewline: Yes 16 | 17 | BuildType: Package 18 | PackageUseDevtools: Yes 19 | PackageInstallArgs: --no-multiarch --with-keep.source 20 | PackageRoxygenize: rd,collate,namespace 21 | -------------------------------------------------------------------------------- /src/FastSubsetScan.h: -------------------------------------------------------------------------------- 1 | // #ifndef FASTSUBSETSCAN_H 2 | // #define FASTSUBSETSCAN_H 3 | // 4 | // #include 5 | // #include 6 | // #include "probability_functions.h" 7 | // #include "RcppArmadillo.h" 8 | // // [[depends(RcppArmadillo)]] 9 | // 10 | // struct Subset { 11 | // arma::uvec locations; 12 | // arma::uvec streams; 13 | // arma::uword duration; 14 | // double score; 15 | // 16 | // Subset(arma::uvec locs, arma::uvec strs, arma::uword dur, double scr) : 17 | // locations {locs}, streams {strs}, duration {dur}, score {scr} {} 18 | // }; 19 | // 20 | // // CT = Cube type, MT = Matrix type, VT = vector type, T = scalar type 21 | // template 22 | // class FastSubsetScan { 23 | // 24 | // public: 25 | // FastSubsetScan(const CT& counts, 26 | // const arma::cube baselines, 27 | // const arma::uvec& zones, 28 | // const arma::uvec& zone_lengths, 29 | // const bool store_everything, 30 | // const arma::uword num_mcsim); 31 | // void run_scan() = 0; 32 | // virtual void run_mcsim() = 0; 33 | // 34 | // virtual Rcpp::List get_scan() = 0; 35 | // virtual Rcpp::List get_mcsim() = 0; 36 | // 37 | // protected: 38 | // arma::uword m_num_locs; 39 | // arma::uword m_num_zones; 40 | // arma::uword m_num_streams; 41 | // arma::uword m_max_dur; 42 | // arma::uword m_num_mcsim; 43 | // bool m_store_everything; 44 | // arma::uword m_mcsim_index; 45 | // arma::uword m_out_length; 46 | // CT m_counts; 47 | // arma::cube m_baselines; 48 | // arma::uvec m_zones; 49 | // arma::uvec m_zone_lengths; 50 | // 51 | // // Values calculated on observed data 52 | // arma::uvec m_zone_numbers; 53 | // arma::uvec m_durations; 54 | // arma::uvec m_streams; 55 | // arma::vec m_scores; 56 | // 57 | // // Values calculated on simulated data 58 | // arma::uvec sim_zone_numbers; 59 | // arma::uvec sim_durations; 60 | // arma::uvec sim_streams; 61 | // arma::vec sim_scores; 62 | // 63 | // // Functions 64 | // virtual double score_fun(T c, double b) = 0; 65 | // 66 | // // struct Subset { 67 | // // arma::uvec locations; 68 | // // arma::uvec streams; 69 | // // int duration; 70 | // // double score; 71 | // // 72 | // // Subset(arma::uvec loc, arma::uvec strms, int dur, double scr) : 73 | // // locations {loc}, streams {strms}, duration {dur}, score {scr} {} 74 | // // }; 75 | // 76 | // }; 77 | // 78 | // template 79 | // inline FastSubsetScan::FastSubsetScan( 80 | // const CT& counts, 81 | // const arma::cube baselines, 82 | // const arma::uvec& zones, 83 | // const arma::uvec& zone_lengths, 84 | // const bool store_everything, 85 | // const arma::uword num_mcsim) : 86 | // m_counts(counts), 87 | // m_num_locs(counts.n_cols), 88 | // m_num_zones(zone_lengths.n_elem), 89 | // m_num_streams(counts.n_slices), 90 | // m_max_dur(counts.n_rows), 91 | // m_zones(zones), 92 | // m_zone_lengths(zone_lengths), 93 | // m_store_everything(store_everything), 94 | // m_num_mcsim(num_mcsim) { 95 | // 96 | // 97 | // 98 | // } 99 | // 100 | // 101 | // template 102 | // class SubsetAggregation : public FastSubsetScan { 103 | // public: 104 | // SubsetAggregation(const CT& counts, 105 | // const arma::cube baselines, 106 | // const arma::uvec& zones, 107 | // const arma::uvec& zone_lengths, 108 | // const bool store_everything, 109 | // const arma::uword num_mcsim); 110 | // 111 | // protected: 112 | // arma::uvec opt_locs(const arma::uvec& streams, const arma::uword w); 113 | // arma::uvec opt_strs(const arma::uvec& streams, const arma::uword w); 114 | // Subset fastloc_naivestream(); 115 | // Subset naiveloc_faststream(); 116 | // std::vector optimize_both(); 117 | // }; 118 | // 119 | // template 120 | // inline SubsetAggregation::SubsetAggregation( 121 | // const CT& counts, 122 | // const arma::cube baselines, 123 | // const arma::uvec& zones, 124 | // const arma::uvec& zone_lengths, 125 | // const bool store_everything, 126 | // const arma::uword num_mcsim) 127 | // : FastSubsetScan(counts, baselines, zones, zone_lengths, 128 | // store_everything, num_mcsim) { 129 | // 130 | // } 131 | // 132 | // template 133 | // inline arma::uvec SubsetAggregation::opt_locs( 134 | // const arma::uvec& streams, const arma::uword w) { 135 | // VT C_i(m_num; 136 | // VT B_i; 137 | // } 138 | // 139 | // 140 | // #endif 141 | -------------------------------------------------------------------------------- /src/PBPERMscan.h: -------------------------------------------------------------------------------- 1 | #ifndef PBPERMSCAN_H 2 | #define PBPERMSCAN_H 3 | 4 | #include "PBPOIabstract.h" 5 | #include "scan_utility.h" 6 | 7 | class PBPERMscan : public PBPOIabstract { 8 | 9 | public: 10 | PBPERMscan(const arma::umat& counts, 11 | const arma::mat& baselines, 12 | const arma::uvec& zones, 13 | const arma::uvec& zone_lengths, 14 | const bool store_everything, 15 | const arma::uword num_mcsim); 16 | 17 | private: 18 | // Each case in counts expanded --> use for permutation 19 | arma::umat m_counts_expanded; 20 | arma::uvec m_time_counts; 21 | arma::uvec m_loc_counts; 22 | void simulate_counts() override; 23 | 24 | }; 25 | 26 | // Implementations ------------------------------------------------------------- 27 | 28 | inline PBPERMscan::PBPERMscan(const arma::umat& counts, 29 | const arma::mat& baselines, 30 | const arma::uvec& zones, 31 | const arma::uvec& zone_lengths, 32 | const bool store_everything, 33 | const arma::uword num_mcsim) 34 | : PBPOIabstract(counts, baselines, zones, zone_lengths, store_everything, 35 | num_mcsim) { 36 | m_counts_expanded = expand_matrix(counts); 37 | m_time_counts = m_counts_expanded.col(0); 38 | // m_loc_counts = m_counts_expanded.col(1); 39 | 40 | } 41 | 42 | // Workhorse functions --------------------------------------------------------- 43 | 44 | inline void PBPERMscan::simulate_counts() { 45 | m_counts_expanded.col(0) = shuffle_time_counts(m_time_counts); 46 | m_counts = arma::cumsum(contract_matrix(m_counts_expanded, 47 | m_counts.n_rows, m_counts.n_cols)); 48 | } 49 | 50 | #endif 51 | -------------------------------------------------------------------------------- /src/PBPOIscan.h: -------------------------------------------------------------------------------- 1 | #ifndef PBPOISCAN_H 2 | #define PBPOISCAN_H 3 | 4 | #include "PBPOIabstract.h" 5 | #include "scan_utility.h" 6 | #include 7 | 8 | class PBPOIscan : public PBPOIabstract { 9 | 10 | public: 11 | PBPOIscan(const arma::umat& counts, 12 | const arma::mat& baselines, 13 | const arma::uvec& zones, 14 | const arma::uvec& zone_lengths, 15 | const bool store_everything, 16 | const arma::uword num_mcsim); 17 | 18 | private: 19 | void simulate_counts() override; 20 | 21 | }; 22 | 23 | // Implementations ------------------------------------------------------------- 24 | 25 | inline PBPOIscan::PBPOIscan(const arma::umat& counts, 26 | const arma::mat& baselines, 27 | const arma::uvec& zones, 28 | const arma::uvec& zone_lengths, 29 | const bool store_everything, 30 | const arma::uword num_mcsim) 31 | : PBPOIabstract(counts, baselines, zones, zone_lengths, store_everything, 32 | num_mcsim) {} 33 | 34 | // Workhorse functions --------------------------------------------------------- 35 | 36 | inline void PBPOIscan::simulate_counts() { 37 | Rcpp::NumericVector probs(m_counts.n_cols * m_counts.n_rows); 38 | probs = vec2NumericVector(arma::vectorise(m_baselines_orig)) / m_total_count; 39 | 40 | arma::uvec vec_counts(m_counts.n_cols * m_counts.n_rows); 41 | vec_counts = IntegerVector2uvec( 42 | Rcpp::RcppArmadillo::Rf_rmultinom(m_total_count, probs)); 43 | 44 | // Columns of m_counts should be cumulative sums 45 | for (arma::uword j = 0; j < m_counts.n_cols; ++j) { 46 | m_counts.col(j) = arma::cumsum( 47 | vec_counts.subvec(j * m_counts.n_rows, (j + 1) * m_counts.n_rows - 1)); 48 | } 49 | } 50 | 51 | #endif 52 | -------------------------------------------------------------------------------- /src/ZIPutility.h: -------------------------------------------------------------------------------- 1 | #ifndef ZIPUTILITY_H 2 | #define ZIPUTILITY_H 3 | 4 | #include 5 | #include "RcppArmadillo.h" 6 | // [[depends(RcppArmadillo)]] 7 | 8 | 9 | // Estimate a structural zero indicator for the ZIP distribution. 10 | // 11 | // Estimate a structural zero indicator for the ZIP distribution. 12 | // @param mu A positive scalar; the expected values of the counts or 13 | // the corresponding population. 14 | // @param p A positive scalar; the structural zero probability. 15 | // @param q A positive scalar; the relative risk. 16 | // @return A scalar; the estimate of the structural zero indicator. 17 | // @keywords internal 18 | inline double zip_zeroindic(const double mu, const double p, const double q) { 19 | return p / (p + (1 - p) * std::exp(-q * mu)); 20 | } 21 | 22 | #endif 23 | -------------------------------------------------------------------------------- /src/probability_functions.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include "probability_functions.h" 3 | using namespace Rcpp; 4 | 5 | 6 | // Poisson distribution -------------------------------------------------------- 7 | 8 | // The log probability mass function of the Poisson distribution. 9 | // 10 | // The log probability mass function of the Poisson distribution, evaluated at 11 | // \code{y}. 12 | // @param y An integer. 13 | // @param mu A positive scalar. 14 | // @return A log-probability. 15 | // @keywords internal 16 | double poisson_lpmf(const double y, const double mu) { 17 | return y * log(mu) - lgamma(y + 1.0) - mu; 18 | } 19 | 20 | // Calculate the Poisson log-likelihood. 21 | // 22 | // Calculate the log-likelihood for the Poisson distribution. 23 | // @param y A non-negative integer vector/matrix; the observed counts. 24 | // @param mu A vector/matrix of positive scalars; the expected values of the 25 | // counts. 26 | // @param q A scalar greater than or equal to 1; the relative risk. 27 | // @return A non-positive scalar; the log-likelihood. 28 | // @keywords internal 29 | double poisson_loglihood(const arma::uvec &y, 30 | const arma::vec &mu, 31 | const double q) { 32 | double loglihood = 0.0; 33 | for (int i = 0; i < y.n_elem; ++i) { 34 | loglihood += poisson_lpmf(y(i), q * mu(i)); 35 | } 36 | return loglihood; 37 | } 38 | 39 | // Negative binomial distribution ---------------------------------------------- 40 | 41 | // Draw an observation from the negative binomial distribution parametrized by 42 | // mean \eqn{\mu} and overdispersion \eqn{\omega = 1 + \mu / \theta}. 43 | int rnbinom2(const double mu, const double omega) { 44 | return (omega - 1.0 < 1e-9 ? 45 | R::rpois(mu) : 46 | R::rnbinom(mu / (omega - 1.0), 1.0 / omega)); 47 | } 48 | 49 | // Zero-inflated Poisson distribution ------------------------------------------ 50 | 51 | // The log probability mass function of the ZIP distribution. 52 | // 53 | // The log probability mass function of the Poisson distribution, evaluated at 54 | // \code{x}. 55 | // @param y A non-negative integer; the observed count. 56 | // @param mu A positive scalar; the expected value of the count. 57 | // @param p A scalar between 0 and 1; the structural zero probability. 58 | // @return A non-positive scalar; the loglihood contribution of the 59 | // observation. 60 | // @keywords internal 61 | double zip_lpmf(const int y, const double mu, const double p) { 62 | if (y == 0) { 63 | return log(p + (1 - p) * exp(-mu)); 64 | } else { 65 | return log(1 - p) + y * log(mu) - lgamma(y + 1.0) - mu; 66 | } 67 | } 68 | 69 | // Calculate the ZIP log-likelihood. 70 | // 71 | // Calculate the (incomplete information) log-likelihood for the zero-inflated 72 | // Poisson distribution. 73 | // @param y A vector/matrix of non-negative integers; the observed counts. 74 | // @param mu A vector/matrix of positive scalars; the expected values of the 75 | // counts. 76 | // @param p A vector of scalars between 0 and 1; the structural zero 77 | // probabilities. 78 | // @param q A scalar greater than or equal to 1; the relative risk. 79 | // @return A non-positive scalar; the incomplete information ZIP loglihood. 80 | // @keywords internal 81 | double zip_loglihood(const arma::uvec &y, 82 | const arma::vec &mu, 83 | const arma::vec &p, 84 | const double q) { 85 | double loglihood = 0.0; 86 | for (int i = 0; i < y.n_elem; ++i) { 87 | loglihood += zip_lpmf(y(i), q * mu(i), p(i)); 88 | } 89 | return loglihood; 90 | } 91 | 92 | // Draw an observation from the ZIP distribution. 93 | // 94 | // Draw a sample (one observation) from the zero-inflated Poisson distribution. 95 | // @param mu Scalar; The Poisson mean parameter. 96 | // @param p Scalar; the structural zero probability. 97 | // @return An integer. 98 | // @keywords internal 99 | int rzip(const double mu, const double p) { 100 | return (R::runif(0.0, 1.0) < p ? 0 : static_cast(R::rpois(mu))); 101 | } 102 | 103 | -------------------------------------------------------------------------------- /src/probability_functions.h: -------------------------------------------------------------------------------- 1 | #ifndef PROBABILITY_FUNCTIONS_H 2 | #define PROBABILITY_FUNCTIONS_H 3 | 4 | #include "RcppArmadillo.h" 5 | // [[depends(RcppArmadillo)]] 6 | 7 | // Poisson distribution 8 | double poisson_lpmf(const double x, const double mu); 9 | double poisson_loglihood(const arma::uvec &y, 10 | const arma::vec &mu, 11 | const double q = 1.0); 12 | 13 | // Negative binomial distribution 14 | int rnbinom2(const double mu, const double omega); 15 | 16 | // ZIP distribution 17 | double zip_lpmf(const int y, const double mu, const double p); 18 | double zip_loglihood(const arma::uvec &y, 19 | const arma::vec &mu, 20 | const arma::vec &p, 21 | const double q = 1.0); 22 | int rzip(const double mu, const double p); 23 | 24 | 25 | #endif 26 | -------------------------------------------------------------------------------- /src/scan_bayes_negbin.cpp: -------------------------------------------------------------------------------- 1 | #include "scan_bayes_negbin.h" 2 | #include "BGPscan.h" 3 | 4 | Rcpp::List scan_bayes_negbin_cpp(const arma::umat& counts, 5 | const arma::mat& baselines, 6 | const arma::uvec& zones, 7 | const arma::uvec& zone_lengths, 8 | const double outbreak_prob, 9 | const double alpha_null, 10 | const double beta_null, 11 | const double alpha_alt, 12 | const double beta_alt, 13 | const arma::vec& inc_values, 14 | const arma::vec& inc_probs) { 15 | BGPscan ob {counts, baselines, zones, zone_lengths, outbreak_prob, 16 | alpha_null, beta_null, alpha_alt, beta_alt, inc_values, inc_probs}; 17 | ob.run_over_inc(); 18 | return ob.get_results(); 19 | } 20 | 21 | -------------------------------------------------------------------------------- /src/scan_bayes_negbin.h: -------------------------------------------------------------------------------- 1 | #ifndef SCAN_BAYES_NEGBIN_H 2 | #define SCAN_BAYES_NEGBIN_H 3 | 4 | #include "RcppArmadillo.h" 5 | // [[depends(RcppArmadillo)]] 6 | 7 | //' Calculate the "Bayesian Spatial Scan Statistic" by Neill et al. (2006). 8 | //' 9 | //' Calculate the "Bayesian Spatial Scan Statistic" by Neill et al. (2006), 10 | //' adapted to a spatio-temporal setting. The scan statistic assumes that, 11 | //' given the relative risk, the data follows a Poisson distribution. The 12 | //' relative risk is in turn assigned a Gamma distribution prior, yielding a 13 | //' negative binomial marginal distribution for the counts. 14 | //' @param counts An integer matrix (most recent timepoint in first row). 15 | //' @param baselines A matrix with positive entries (most recent timepoint in 16 | //' first row). 17 | //' @param zones An integer vector (all zones concatenated; locations indexed 18 | //' from 0 and up). 19 | //' @param zone_lengths An integer vector. 20 | //' @param outbreak_prob A scalar; the probability of an outbreak (at any time, 21 | //' any place). 22 | //' @param alpha_null A scalar; the shape parameter for the gamma distribution 23 | //' under the null hypothesis of no anomaly. 24 | //' @param beta_null A scalar; the scale parameter for the gamma distribution 25 | //' under the null hypothesis of no anomaly. 26 | //' @param alpha_alt A scalar; the shape parameter for the gamma distribution 27 | //' under the alternative hypothesis of an anomaly. 28 | //' @param beta_alt A scalar; the scale parameter for the gamma distribution 29 | //' under the alternative hypothesis of an anomaly. 30 | //' @param inc_values A vector of possible values for the increase in the mean 31 | //' (and variance) of an anomalous count. 32 | //' @param inc_probs A vector of the prior probabilities of each value in 33 | //' \code{inc_values}. 34 | //' @return A list with elements \code{priors} (list), \code{posteriors} (list), 35 | //' and \code{marginal_data_prob} (scalar). The list \code{priors} has 36 | //' elements 37 | //' \describe{ 38 | //' \item{null_prior}{The prior probability of no anomaly.} 39 | //' \item{alt_prior}{The prior probability of an anomaly.} 40 | //' \item{inc_prior}{A vector (matrix with 1 row) of prior probabilities 41 | //' of each value in the argument \code{m_values}.} 42 | //' \item{window_prior}{The prior probability of an outbreak in any of the 43 | //' space-time windows.} 44 | //' } 45 | //' The list \code{posteriors} has elements 46 | //' \describe{ 47 | //' \item{null_posterior}{The posterior probability of no anomaly.} 48 | //' \item{alt_posterior}{The posterior probability of an anomaly.} 49 | //' \item{inc_posterior}{A data frame with columns \code{inc_values} and 50 | //' \code{inc_posterior}.} 51 | //' \item{window_posteriors}{A data frame with columns \code{zone}, 52 | //' \code{duration}, \code{log_posterior} and 53 | //' \code{log_bayes_factor}, each row 54 | //' corresponding to a space-time window.} 55 | //' \item{space_time_posteriors}{A matrix with the posterior anomaly 56 | //' probability of each location-time 57 | //' combination.} 58 | //' \item{location_posteriors}{A vector (matrix with 1 row) with the 59 | //' posterior probability of an anomaly at each 60 | //' location.} 61 | //' } 62 | //' @export 63 | //' @keywords internal 64 | // [[Rcpp::export]] 65 | Rcpp::List scan_bayes_negbin_cpp(const arma::umat& counts, 66 | const arma::mat& baselines, 67 | const arma::uvec& zones, 68 | const arma::uvec& zone_lengths, 69 | const double outbreak_prob, 70 | const double alpha_null, 71 | const double beta_null, 72 | const double alpha_alt, 73 | const double beta_alt, 74 | const arma::vec& inc_values, 75 | const arma::vec& inc_probs); 76 | 77 | 78 | #endif 79 | -------------------------------------------------------------------------------- /src/scan_eb_negbin.cpp: -------------------------------------------------------------------------------- 1 | #include "scan_eb_negbin.h" 2 | #include "EBNBscan.h" 3 | 4 | Rcpp::List scan_eb_negbin_cpp(const arma::umat& counts, 5 | const arma::mat& baselines, 6 | const arma::mat& overdisp, 7 | const arma::uvec& zones, 8 | const arma::uvec& zone_lengths, 9 | const bool store_everything, 10 | const arma::uword num_mcsim, 11 | const bool score_hotspot) { 12 | 13 | EBNBscan ob {counts, baselines, overdisp, zones, zone_lengths, 14 | store_everything, num_mcsim, score_hotspot}; 15 | ob.run_scan(); 16 | ob.run_mcsim(); 17 | return Rcpp::List::create( 18 | Rcpp::Named("observed") = ob.get_scan(), 19 | Rcpp::Named("simulated") = ob.get_mcsim()); 20 | } 21 | 22 | -------------------------------------------------------------------------------- /src/scan_eb_negbin.h: -------------------------------------------------------------------------------- 1 | #ifndef SCAN_EB_NEGBIN_H 2 | #define SCAN_EB_NEGBIN_H 3 | 4 | #include "RcppArmadillo.h" 5 | // [[depends(RcppArmadillo)]] 6 | 7 | //' Calculate the expectation-based negative binomial scan statistic. 8 | //' 9 | //' Calculate the expectation-based negative binomial scan statistic and Monte 10 | //' Carlo replicates. 11 | //' @param counts Integer matrix (most recent timepoint in first row) 12 | //' @param baselines Matrix (most recent timepoint in first row) 13 | //' @param overdisp Matrix (most recent timepoint in first row) 14 | //' @param zones Integer vector (all zones concatenated; locations indexed from 15 | //' 0 and up) 16 | //' @param zone_lengths Integer vector 17 | //' @param store_everything Boolean 18 | //' @param num_mcsim Integer 19 | //' @param score_hotspot Boolean 20 | //' @return A list with elements \code{observed} and \code{simulated}, each 21 | //' being a data frame with columns: 22 | //' \describe{ 23 | //' \item{zone}{The top-scoring zone (spatial component of MLC).} 24 | //' \item{duration}{The corresponding duration (time-length of MLC).} 25 | //' \item{score}{The value of the loglihood ratio statistic (the scan 26 | //' statistic).} 27 | //' \item{relrisk}{The estimated relative risk.} 28 | //' \item{n_iter}{The number of iterations performed by the EM algorithm.} 29 | //' } 30 | //' @export 31 | //' @keywords internal 32 | // [[Rcpp::export]] 33 | Rcpp::List scan_eb_negbin_cpp(const arma::umat& counts, 34 | const arma::mat& baselines, 35 | const arma::mat& overdisp, 36 | const arma::uvec& zones, 37 | const arma::uvec& zone_lengths, 38 | const bool store_everything, 39 | const arma::uword num_mcsim, 40 | const bool score_hotspot); 41 | 42 | 43 | #endif 44 | -------------------------------------------------------------------------------- /src/scan_eb_poisson.cpp: -------------------------------------------------------------------------------- 1 | #include "scan_eb_poisson.h" 2 | #include "EBPOIscan.h" 3 | 4 | Rcpp::List scan_eb_poisson_cpp(const arma::umat& counts, 5 | const arma::mat& baselines, 6 | const arma::uvec& zones, 7 | const arma::uvec& zone_lengths, 8 | const bool store_everything, 9 | const arma::uword num_mcsim) { 10 | 11 | EBPOIscan ob {counts, baselines, zones, zone_lengths, store_everything, 12 | num_mcsim}; 13 | ob.run_scan(); 14 | ob.run_mcsim(); 15 | return Rcpp::List::create( 16 | Rcpp::Named("observed") = ob.get_scan(), 17 | Rcpp::Named("simulated") = ob.get_mcsim()); 18 | } 19 | 20 | -------------------------------------------------------------------------------- /src/scan_eb_poisson.h: -------------------------------------------------------------------------------- 1 | #ifndef SCAN_EB_POI_H 2 | #define SCAN_EB_POI_H 3 | 4 | #include "RcppArmadillo.h" 5 | // [[depends(RcppArmadillo)]] 6 | 7 | //' Calculate the expecation-based Poisson scan statistic. 8 | //' 9 | //' Calculate the expectation-based Poisson scan statistic and Monte Carlo 10 | //' replicates. 11 | //' @param counts An integer matrix (most recent timepoint in first row). 12 | //' @param baselines A matrix with positive entries (most recent timepoint in 13 | //' first row). 14 | //' @param zones An integer vector (all zones concatenated; locations indexed 15 | //' from 0 and up). 16 | //' @param zone_lengths An integer vector. 17 | //' @param store_everything A boolean. 18 | //' @param num_mcsim An integer. 19 | //' @return A list with elements \code{observed} and \code{simulated}, each 20 | //' being a data frame with columns: 21 | //' \describe{ 22 | //' \item{zone}{The top-scoring zone (spatial component of MLC).} 23 | //' \item{duration}{The corresponding duration (time-length of MLC).} 24 | //' \item{score}{The value of the loglihood ratio statistic (the scan 25 | //' statistic).} 26 | //' \item{relrisk_in}{The estimated relative risk inside.} 27 | //' \item{relrisk_in}{The estimated relative risk outside.} 28 | //' } 29 | //' @export 30 | //' @keywords internal 31 | // [[Rcpp::export]] 32 | Rcpp::List scan_eb_poisson_cpp(const arma::umat& counts, 33 | const arma::mat& baselines, 34 | const arma::uvec& zones, 35 | const arma::uvec& zone_lengths, 36 | const bool store_everything, 37 | const arma::uword num_mcsim); 38 | 39 | 40 | #endif 41 | -------------------------------------------------------------------------------- /src/scan_eb_zip.cpp: -------------------------------------------------------------------------------- 1 | #include "scan_eb_zip.h" 2 | #include "EBZIPscan.h" 3 | 4 | Rcpp::List scan_eb_zip_cpp(const arma::umat& counts, 5 | const arma::mat& baselines, 6 | const arma::mat& probs, 7 | const arma::uvec& zones, 8 | const arma::uvec& zone_lengths, 9 | const double rel_tol, 10 | const bool store_everything, 11 | const arma::uword num_mcsim) { 12 | 13 | EBZIPscan ob {counts, baselines, probs, zones, zone_lengths, rel_tol, 14 | store_everything, num_mcsim}; 15 | ob.run_scan(); 16 | ob.run_mcsim(); 17 | return Rcpp::List::create( 18 | Rcpp::Named("observed") = ob.get_scan(), 19 | Rcpp::Named("simulated") = ob.get_mcsim()); 20 | } 21 | 22 | -------------------------------------------------------------------------------- /src/scan_eb_zip.h: -------------------------------------------------------------------------------- 1 | #ifndef SCAN_EB_ZIP_H 2 | #define SCAN_EB_ZIP_H 3 | 4 | #include "RcppArmadillo.h" 5 | // [[depends(RcppArmadillo)]] 6 | 7 | //' Calculate the highest-value EB ZIP loglihood ratio statistic. 8 | //' 9 | //' Calculate the expectation-based ZIP loglihood ratio statistic for each zone 10 | //' and duration, but only keep the zone and duration with the highest value 11 | //' (the MLC). The estimate of the relative risk is also calculated, along with 12 | //' the number of iterations the EM algorithm performed. 13 | //' @param counts matrix (most recent timepoint in first row) 14 | //' @param baselines matrix (most recent timepoint in first row) 15 | //' @param probs matrix (most recent timepoint in first row) 16 | //' @param zones integer vector (all zones concatenated; locations indexed from 17 | //' 0 and up) 18 | //' @param zone_lengths integer vector 19 | //' @param rel_tol double 20 | //' @param store_everything boolean 21 | //' @param num_mcsim int 22 | //' @return A list with elements \code{observed} and \code{simulated}, each 23 | //' being a data frame with columns: 24 | //' \describe{ 25 | //' \item{zone}{The top-scoring zone (spatial component of MLC).} 26 | //' \item{duration}{The corresponding duration (time-length of MLC).} 27 | //' \item{score}{The value of the loglihood ratio statistic (the scan 28 | //' statistic).} 29 | //' \item{relrisk}{The estimated relative risk.} 30 | //' \item{n_iter}{The number of iterations performed by the EM algorithm.} 31 | //' } 32 | //' @export 33 | //' @keywords internal 34 | // [[Rcpp::export]] 35 | Rcpp::List scan_eb_zip_cpp(const arma::umat& counts, 36 | const arma::mat& baselines, 37 | const arma::mat& probs, 38 | const arma::uvec& zones, 39 | const arma::uvec& zone_lengths, 40 | const double rel_tol, 41 | const bool store_everything, 42 | const arma::uword num_mcsim); 43 | 44 | 45 | #endif 46 | -------------------------------------------------------------------------------- /src/scan_pb_perm.cpp: -------------------------------------------------------------------------------- 1 | #include "scan_pb_perm.h" 2 | #include "PBPERMscan.h" 3 | 4 | Rcpp::List scan_pb_perm_cpp(const arma::umat& counts, 5 | const arma::mat& baselines, 6 | const arma::uvec& zones, 7 | const arma::uvec& zone_lengths, 8 | const bool store_everything, 9 | const arma::uword num_mcsim) { 10 | 11 | PBPERMscan ob {counts, baselines, zones, zone_lengths, store_everything, 12 | num_mcsim}; 13 | ob.run_scan(); 14 | ob.run_mcsim(); 15 | return Rcpp::List::create( 16 | Rcpp::Named("observed") = ob.get_scan(), 17 | Rcpp::Named("simulated") = ob.get_mcsim()); 18 | } 19 | 20 | -------------------------------------------------------------------------------- /src/scan_pb_perm.h: -------------------------------------------------------------------------------- 1 | #ifndef SCAN_PB_PERM_H 2 | #define SCAN_PB_PERM_H 3 | 4 | #include "RcppArmadillo.h" 5 | // [[depends(RcppArmadillo)]] 6 | 7 | //' Calculate the space-time permutation scan statistic. 8 | //' 9 | //' Calculate the space-time permutation scan statistic (Kulldorff 2005) and 10 | //' Monte Carloo replicates. 11 | //' @param counts An integer matrix (most recent timepoint in first row). 12 | //' @param baselines A matrix with positive entries (most recent timepoint in 13 | //' first row). 14 | //' @param zones An integer vector (all zones concatenated; locations indexed 15 | //' from 0 and up) 16 | //' @param zone_lengths An integer vector. 17 | //' @param store_everything A boolean. 18 | //' @param num_mcsim An integer. 19 | //' @return A list with elements \code{observed} and \code{simulated}, each 20 | //' being a data frame with columns: 21 | //' \describe{ 22 | //' \item{zone}{The top-scoring zone (spatial component of MLC).} 23 | //' \item{duration}{The corresponding duration (time-length of MLC).} 24 | //' \item{score}{The value of the loglihood ratio statistic (the scan 25 | //' statistic).} 26 | //' \item{relrisk_in}{The estimated relative risk inside.} 27 | //' \item{relrisk_in}{The estimated relative risk outside.} 28 | //' } 29 | //' @export 30 | //' @keywords internal 31 | // [[Rcpp::export]] 32 | Rcpp::List scan_pb_perm_cpp(const arma::umat& counts, 33 | const arma::mat& baselines, 34 | const arma::uvec& zones, 35 | const arma::uvec& zone_lengths, 36 | const bool store_everything, 37 | const arma::uword num_mcsim); 38 | 39 | 40 | #endif 41 | -------------------------------------------------------------------------------- /src/scan_pb_poisson.cpp: -------------------------------------------------------------------------------- 1 | #include "scan_pb_poisson.h" 2 | #include "PBPOIscan.h" 3 | 4 | Rcpp::List scan_pb_poisson_cpp(const arma::umat& counts, 5 | const arma::mat& baselines, 6 | const arma::uvec& zones, 7 | const arma::uvec& zone_lengths, 8 | const bool store_everything, 9 | const arma::uword num_mcsim) { 10 | 11 | PBPOIscan ob {counts, baselines, zones, zone_lengths, store_everything, 12 | num_mcsim}; 13 | ob.run_scan(); 14 | ob.run_mcsim(); 15 | return Rcpp::List::create( 16 | Rcpp::Named("observed") = ob.get_scan(), 17 | Rcpp::Named("simulated") = ob.get_mcsim()); 18 | } 19 | 20 | -------------------------------------------------------------------------------- /src/scan_pb_poisson.h: -------------------------------------------------------------------------------- 1 | #ifndef SCAN_PB_POI_H 2 | #define SCAN_PB_POI_H 3 | 4 | #include "RcppArmadillo.h" 5 | // [[depends(RcppArmadillo)]] 6 | 7 | //' Calculate the population-based Poisson scan statistic. 8 | //' 9 | //' Calculate the population-based Poisson scan statistic and Monte Carlo 10 | //' replicates. 11 | //' @param counts integer matrix (most recent timepoint in first row) 12 | //' @param baselines matrix (most recent timepoint in first row) 13 | //' @param zones integer vector (all zones concatenated; locations indexed from 14 | //' 0 and up) 15 | //' @param zone_lengths integer vector 16 | //' @param store_everything boolean 17 | //' @param num_mcsim int 18 | //' @return A list with elements \code{observed} and \code{simulated}, each 19 | //' being a data frame with columns: 20 | //' \describe{ 21 | //' \item{zone}{The top-scoring zone (spatial component of MLC).} 22 | //' \item{duration}{The corresponding duration (time-length of MLC).} 23 | //' \item{score}{The value of the loglihood ratio statistic (the scan 24 | //' statistic).} 25 | //' \item{relrisk_in}{The estimated relative risk inside.} 26 | //' \item{relrisk_in}{The estimated relative risk outside.} 27 | //' } 28 | //' @export 29 | //' @keywords internal 30 | // [[Rcpp::export]] 31 | Rcpp::List scan_pb_poisson_cpp(const arma::umat& counts, 32 | const arma::mat& baselines, 33 | const arma::uvec& zones, 34 | const arma::uvec& zone_lengths, 35 | const bool store_everything, 36 | const arma::uword num_mcsim); 37 | 38 | 39 | #endif 40 | -------------------------------------------------------------------------------- /src/scan_utility.cpp: -------------------------------------------------------------------------------- 1 | #include "scan_utility.h" 2 | 3 | std::vector get_zero_indices(const arma::uvec& v) { 4 | std::vector zero_idx; 5 | for (arma::uword i = 0; i < v.n_elem; ++i) { 6 | if (v[i] == 0) zero_idx.push_back(i); 7 | } 8 | return zero_idx; 9 | } 10 | 11 | // Rcpp vector to armadillo vectors -------------------------------------------- 12 | 13 | arma::vec NumericVector2vec(const Rcpp::NumericVector& v) { 14 | arma::vec x(v.size()); 15 | for (arma::uword i = 0; i < v.size(); ++i) x.at(i) = v.at(i); 16 | return x; 17 | } 18 | 19 | arma::uvec IntegerVector2uvec(const Rcpp::IntegerVector& v) { 20 | arma::uvec x(v.size()); 21 | for (arma::uword i = 0; i < v.size(); ++i) x.at(i) = v.at(i); 22 | return x; 23 | } 24 | 25 | // armadillo vectors to Rcpp vectors ------------------------------------------- 26 | 27 | Rcpp::NumericVector vec2NumericVector(const arma::vec& v) { 28 | Rcpp::NumericVector x(v.size()); 29 | for (arma::uword i = 0; i < v.size(); ++i) x.at(i) = v.at(i); 30 | return x; 31 | } 32 | 33 | Rcpp::IntegerVector uvec2IntegerVector(const arma::uvec& v) { 34 | Rcpp::IntegerVector x(v.size()); 35 | for (arma::uword i = 0; i < v.size(); ++i) x.at(i) = v.at(i); 36 | return x; 37 | } 38 | 39 | // 40 | 41 | arma::umat expand_matrix(const arma::umat& A) { 42 | arma::umat res(static_cast(arma::accu(A)), 2); 43 | arma::uword index = 0; 44 | for (arma::uword j = 0; j < A.n_cols; ++j) { 45 | for (arma::uword i = 0; i < A.n_rows; ++i) { 46 | for (arma::uword k = 0; k < A(i, j); ++k) { 47 | res(index, 0) = i; 48 | res(index, 1) = j; 49 | ++index; 50 | } 51 | } 52 | } 53 | return res; 54 | } 55 | 56 | arma::umat contract_matrix(const arma::umat& A, 57 | arma::uword nr, arma::uword nc) { 58 | arma::umat out(nr, nc, arma::fill::zeros); 59 | for (arma::uword i = 0; i < A.n_rows; ++i) { 60 | ++out(A(i, 0), A(i, 1)); 61 | } 62 | return out; 63 | } 64 | 65 | // Permute column 0 using Fisher-Yates algorithm 66 | arma::uvec shuffle_time_counts(const arma::uvec& v) { 67 | arma::uvec res(v); 68 | arma::sword k = 0; 69 | for (arma::uword i = res.n_elem - 1; k < res.n_elem; --i, ++k) { 70 | arma::uword j = static_cast(R::runif(0, static_cast(i))); 71 | arma::uword tmp = res.at(i); 72 | res.at(i) = res.at(j); 73 | res.at(j) = tmp; 74 | } 75 | return res; 76 | } 77 | 78 | arma::umat permute_matrix(const arma::umat& A) { 79 | arma::umat x = expand_matrix(A); 80 | x.col(0) = shuffle_time_counts(x.col(0)); 81 | return contract_matrix(x, A.n_rows, A.n_cols); 82 | } 83 | 84 | double log_sum_exp(const arma::vec& v, 85 | const double start_val, 86 | const double max_val) { 87 | double exp_sum = start_val; 88 | for (arma::uword i = 0; i < v.n_elem; ++i) { 89 | exp_sum += std::exp(v(i) - max_val); 90 | } 91 | return max_val + std::log(exp_sum); 92 | } 93 | 94 | double log_sum_exp(const arma::vec& v, const double max_val) { 95 | return log_sum_exp(v, 0.0, max_val); 96 | } 97 | 98 | double log_sum_exp(const arma::vec& v) { 99 | return log_sum_exp(v, 0.0, v.max()); 100 | } 101 | 102 | double log_sum_exp(const double a, const double b) { 103 | double m = std::max(a, b); 104 | return m + std::log(std::exp(a - m) + std::exp(b - m)); 105 | } 106 | 107 | -------------------------------------------------------------------------------- /src/scan_utility.h: -------------------------------------------------------------------------------- 1 | #ifndef SCAN_UTILITY_H 2 | #define SCAN_UTILITY_H 3 | 4 | #include 5 | #include 6 | #include "RcppArmadillo.h" 7 | // [[depends(RcppArmadillo)]] 8 | 9 | 10 | //' Get indices of zero elements in a vector. 11 | //' @param v An integer vector. 12 | //' @return A vector with the indices of elements equal to zero in \code{v}. 13 | //' Indices start at zero. 14 | //' @keywords internal 15 | //' @export 16 | // [[Rcpp::export]] 17 | std::vector get_zero_indices(const arma::uvec& v); 18 | 19 | // Rcpp vector to armadillo vectors -------------------------------------------- 20 | 21 | arma::vec NumericVector2vec(const Rcpp::NumericVector& v); 22 | 23 | arma::uvec IntegerVector2uvec(const Rcpp::IntegerVector& v); 24 | 25 | // armadillo vectors to Rcpp vectors ------------------------------------------- 26 | 27 | // Convert an armadillo vec to an Rcpp NumericVector 28 | Rcpp::NumericVector vec2NumericVector(const arma::vec& v); 29 | 30 | // Convert an armadillo vec to an Rcpp NumericVector 31 | Rcpp::IntegerVector uvec2IntegerVector(const arma::uvec& v); 32 | 33 | 34 | // Comment: could not make template version of above 2 functions to work 35 | 36 | // Functions for permuting the counts in an integer matrix while preserving the 37 | // row and column marginal sums. 38 | // integer matrix --> expand_matrix --> permute one column --> contract matrix 39 | arma::umat expand_matrix(const arma::umat& A); 40 | 41 | arma::umat contract_matrix(const arma::umat& A, arma::uword nr, arma::uword nc); 42 | 43 | // Permute using Fisher-Yates algorithm 44 | arma::uvec shuffle_time_counts(const arma::uvec& v); 45 | 46 | //' Permute the entries of the matrix, preserving row and column marginals. 47 | //' 48 | //' Permute the entries of the matrix, preserving row and column marginals. 49 | //' @param A An integer matrix. 50 | //' @return An integer matrix. 51 | //' @keywords internal 52 | // [[Rcpp::export]] 53 | arma::umat permute_matrix(const arma::umat& A); 54 | 55 | double log_sum_exp(const arma::vec& v, 56 | const double start_val, 57 | const double max_val); 58 | 59 | double log_sum_exp(const arma::vec& v, const double max_val); 60 | 61 | double log_sum_exp(const arma::vec& v); 62 | 63 | double log_sum_exp(const double a, const double b); 64 | 65 | #endif 66 | -------------------------------------------------------------------------------- /tests/testthat.R: -------------------------------------------------------------------------------- 1 | library(testthat) 2 | library(scanstatistics) 3 | 4 | test_check("scanstatistics") 5 | -------------------------------------------------------------------------------- /tests/testthat/test_baseline_estimation.R: -------------------------------------------------------------------------------- 1 | context("Baseline estimation") 2 | 3 | test_that("estimate_baselines: bad input", { 4 | 5 | # Fewer locations implied by counts than population: 6 | expect_error(estimate_baselines(matrix(0, 3, 2), 1:4), 7 | paste0("The number of locations implied must be the same in the", 8 | " counts and the population arguments.")) 9 | 10 | # Fewer locations implied by population than counts: 11 | expect_error(estimate_baselines(matrix(0, 3, 4), 1:2), 12 | paste0("The number of locations implied must be the same in the", 13 | " counts and the population arguments.")) 14 | 15 | # Counts imply spatial analysis, population space-time: 16 | expect_error(estimate_baselines(1:5, matrix(1, 2, 5)), 17 | paste0("If counts is a vector, population should be too.")) 18 | 19 | }) 20 | 21 | test_that("estimate_baselines: works", { 22 | # Spatial analysis: 23 | expect_equal(estimate_baselines(1:5, 11:15), 24 | matrix(11:15 / sum(11:15) * sum(1:5), nrow = 1)) 25 | # Space-time analysis with constant population: 26 | expect_equal(estimate_baselines(matrix(1:8, 4, 2), c(10, 20)), 27 | matrix(rep(c(10, 20), each = 4), 4, 2) / 30 * sum(1:8) / 4) 28 | # Space-time analysis with population varying over time: 29 | co <- matrix(1:4, 2, 2) 30 | pop <- matrix(c(1, 4, 3, 6), 2, 2) 31 | actual <- estimate_baselines(co, pop) 32 | expected <- matrix(c(1/4, 4/10, 3/4, 6/10), 2, 2) * sum(1:4) / 2 33 | expect_equal(actual, expected) 34 | }) 35 | -------------------------------------------------------------------------------- /tests/testthat/test_fss_exponential_family.R: -------------------------------------------------------------------------------- 1 | # context("Fast Subset Scan: Exponential Family Functions") 2 | # 3 | # # Poisson ---------------------------------------------------------------------- 4 | # 5 | # test_that("poisson_qmax", { 6 | # expect_equal(poisson_qmax(5, 5), 1) 7 | # # From Wolfram Alpha: solve 10*log(x)+5*(1-x)=0 8 | # expect_equal(round(poisson_qmax(10, 5), 5), 3.51286) 9 | # }) 10 | # 11 | # test_that("poisson_priority", { 12 | # B <- matrix(c(1, 3, 13 | # 2, 4), 2, 2, byrow = TRUE) 14 | # C <- matrix(c(2, 3, 15 | # 1, 5), 2, 2, byrow = TRUE) 16 | # expected <- matrix(c(poisson_qmax(2, 1), 1, 17 | # 1, poisson_qmax(8, 7)), 18 | # 2, 2, byrow = TRUE) 19 | # actual <- poisson_priority(C, B, poisson_qmax) 20 | # expect_equal(actual, expected) 21 | # }) 22 | # 23 | # test_that("poisson_score", { 24 | # pri_mat <- matrix(c(1, 2, 25 | # 2, 1), 26 | # 2, 2, byrow = TRUE) 27 | # B <- matrix(c(1, 3, 28 | # 2, 4), 2, 2, byrow = TRUE) 29 | # C <- matrix(c(2, 3, 30 | # 1, 5), 2, 2, byrow = TRUE) 31 | # expected <- matrix(c(poisson_lambda(2, 1), poisson_lambda(2+3,1+3), 32 | # poisson_lambda(3+5, 3+4), poisson_lambda(2+1+3+5, 33 | # 1+2+3+4)), 34 | # 2, 2, byrow = TRUE) 35 | # actual <- poisson_score(C, B, pri_mat) 36 | # expect_equal(actual, expected) 37 | # }) 38 | # 39 | # # Gaussian --------------------------------------------------------------------- 40 | # 41 | # test_that("gaussian_priority", { 42 | # B <- matrix(c(1, 3, 43 | # 2, 4), 2, 2, byrow = TRUE) 44 | # C <- matrix(c(2, 3, 45 | # 1, 5), 2, 2, byrow = TRUE) 46 | # expected <- matrix(c(gaussian_qmax(2, 1), 1, 47 | # 1, gaussian_qmax(8, 7)), 48 | # 2, 2, byrow = TRUE) 49 | # actual <- gaussian_priority(C, B, gaussian_qmax) 50 | # expect_equal(actual, expected) 51 | # }) 52 | # 53 | # test_that("gaussian_score", { 54 | # pri_mat <- matrix(c(1, 2, 55 | # 2, 1), 56 | # 2, 2, byrow = TRUE) 57 | # B <- matrix(c(1, 3, 58 | # 2, 4), 2, 2, byrow = TRUE) 59 | # C <- matrix(c(2, 3, 60 | # 1, 5), 2, 2, byrow = TRUE) 61 | # S2 <- matrix(c(1, 2, 62 | # 3, 4), 63 | # 2, 2, byrow = TRUE) 64 | # expected <- matrix(c(gaussian_lambda(2, 1, 1), gaussian_lambda(2+3,1+3, 1+2), 65 | # gaussian_lambda(3+5, 3+4, 2+4), gaussian_lambda(2+1+3+5, 66 | # 1+2+3+4, 67 | # 1+2+3+4)), 68 | # 2, 2, byrow = TRUE) 69 | # actual <- gaussian_score(C, B, S2, pri_mat) 70 | # expect_equal(actual, expected) 71 | # }) 72 | # 73 | # # Exponential ------------------------------------------------------------------ 74 | -------------------------------------------------------------------------------- /tests/testthat/test_fss_utility_functions.R: -------------------------------------------------------------------------------- 1 | # context("Fast Subset Scan: Utility Functions") 2 | # 3 | # test_that("aggregate_per_location: sums properly + dimensions correct", { 4 | # A <- matrix(1:6, 3, 2) 5 | # B <- matrix(-(1:6), 3, 2) 6 | # input <- array(c(A, B), dim = c(3, 2, 2)) 7 | # expected <- apply(A, 2, cumsum) + apply(B, 2, cumsum) 8 | # actual <- aggregate_per_location(input) 9 | # expect_identical(dim(actual), dim(expected)) 10 | # expect_equal(actual, expected) 11 | # }) 12 | # 13 | # test_that("aggregate_per_stream: sums properly + dimensions correct", { 14 | # A <- matrix(1:6, 3, 2) 15 | # B <- matrix(-(1:6), 3, 2) 16 | # input <- array(c(A, B), dim = c(3, 2, 2)) 17 | # expected <- apply(cbind(apply(A, 1, sum), apply(B, 1, sum)), 2, cumsum) 18 | # actual <- aggregate_per_stream(input) 19 | # expect_identical(dim(actual), dim(expected)) 20 | # expect_equal(actual, expected) 21 | # }) 22 | # 23 | # test_that("apply_rowwise: works + dimensions correct", { 24 | # A <- matrix(1:6, 2, 3) 25 | # expected1 <- matrix(c(1, 4, 9, 26 | # 2, 6, 12), 27 | # 2, 3, byrow = TRUE) 28 | # expected2 <- c(9, 12) 29 | # actual1 <- apply_rowwise(A, cumsum) 30 | # actual2 <- apply_rowwise(A, sum) 31 | # expect_equal(actual1, expected1) 32 | # expect_equal(actual2, expected2) 33 | # expect_true(is.matrix(actual1)) 34 | # expect_true(is.vector(actual2)) 35 | # }) 36 | # 37 | # test_that("prioritize_cols: no ties", { 38 | # A <- matrix(c(-1, 0, 1, 39 | # 0, -1, 1, 40 | # 1, 0, -1), 41 | # 3, 3, byrow = TRUE) 42 | # expected <- matrix(c(3, 2, 1, 43 | # 3, 1, 2, 44 | # 1, 2, 3), 45 | # 3, 3, byrow = TRUE) 46 | # actual <- prioritize_cols(A) 47 | # expect_equal(actual, expected) 48 | # }) 49 | # 50 | # test_that("prioritize_cols: ties", { 51 | # A <- matrix(c(1, 1, 0, 52 | # 1, 0, 1, 53 | # 0, 1, 1), 54 | # 3, 3, byrow = TRUE) 55 | # expected <- matrix(c(1, 2, 3, 56 | # 1, 3, 2, 57 | # 2, 3, 1), 58 | # 3, 3, byrow = TRUE) 59 | # actual <- prioritize_cols(A) 60 | # expect_equal(actual, expected) 61 | # }) 62 | # 63 | # test_that("reorder_rows: works", { 64 | # A <- matrix(1:9, 3, 3, byrow = TRUE) 65 | # prios <- matrix(c(2, 3, 1, 66 | # 1, 3, 2, 67 | # 3, 2, 1), 68 | # 3, 3, byrow = TRUE) 69 | # expected <- matrix(c(2, 3, 1, 70 | # 4, 6, 5, 71 | # 9, 8, 7), 72 | # 3, 3, byrow = TRUE) 73 | # actual <- reorder_rows(A, prios) 74 | # expect_equal(actual, expected) 75 | # }) 76 | # 77 | # test_that("prioritize_and_execute", { 78 | # A <- matrix(1:9, 3, 3, byrow = TRUE) 79 | # prios <- matrix(c(2, 3, 1, 80 | # 1, 3, 2, 81 | # 3, 2, 1), 82 | # 3, 3, byrow = TRUE) 83 | # f <- function(x, s = 2) (x + 1) * s 84 | # B <- matrix(c(2, 3, 1, 85 | # 4, 6, 5, 86 | # 9, 8, 7), 87 | # 3, 3, byrow = TRUE) 88 | # expected1 <- (B + 1) * 2 89 | # expected2 <- (B + 1) * 5 90 | # actual1 <- prioritize_and_execute(f, A, prios) 91 | # actual2 <- prioritize_and_execute(f, A, prios, s = 5) 92 | # expect_equal(actual1, expected1) 93 | # expect_equal(actual2, expected2) 94 | # }) 95 | # 96 | # test_that("sum_over_subset: streams", { 97 | # A <- matrix(1:6, 3, 2) 98 | # B <- matrix(-(1:6), 3, 2) 99 | # arr1 <- array(c(A, B), dim = c(3, 2, 2)) 100 | # arr2 <- 2 * arr1 101 | # lst <- list(x = arr1, y = arr2) 102 | # expected1 <- list(x = A + B, y = 2*(A + B)) 103 | # expected2 <- list(x = A, y = 2 * A) 104 | # actual1 <- sum_over_subset(lst, 1:2, 3) 105 | # actual2 <- sum_over_subset(lst, 1, 3) 106 | # expect_equal(actual1, expected1) 107 | # expect_equal(actual2, expected2) 108 | # }) 109 | # 110 | # test_that("sum_over_subset: locations", { 111 | # A <- matrix(1:6, 3, 2) 112 | # B <- matrix(-(1:6), 3, 2) 113 | # arr1 <- array(c(A, B), dim = c(3, 2, 2)) 114 | # arr2 <- 2 * arr1 115 | # lst <- list(x = arr1, y = arr2) 116 | # expected1 <- list(x = cbind(A[, 1] + A[, 2], B[, 1] + B[, 2]), 117 | # y = 2*cbind(A[, 1] + A[, 2], B[, 1] + B[, 2])) 118 | # expected2 <- list(x = cbind(A[, 1], B[, 1]), 119 | # y = 2 * cbind(A[, 1], B[, 1])) 120 | # actual1 <- sum_over_subset(lst, 1:2, 2) 121 | # actual2 <- sum_over_subset(lst, 1, 2) 122 | # expect_equal(actual1, expected1) 123 | # expect_equal(actual2, expected2) 124 | # }) 125 | -------------------------------------------------------------------------------- /tests/testthat/test_probability_functions.R: -------------------------------------------------------------------------------- 1 | # context("C++ probability functions") 2 | # 3 | # 4 | # # ZIP distribution ------------------------------------------------------------- 5 | # 6 | # test_that("zip_lpmf", { 7 | # expect_equal(zip_lpmf(0, 6, 0.2), 8 | # log(0.2 + 0.8 * exp(-6))) 9 | # expect_equal(zip_loglihood(1, 2, 0.2, 3), 10 | # log(0.8) + 1 * log(6) - lgamma(1 + 1) - 6) 11 | # 12 | # }) 13 | # 14 | # test_that("zip_loglihood", { 15 | # expect_equal(zip_loglihood(c(0, 1), c(3, 3), c(0.2, 0.2), 2), 16 | # log(0.2 + 0.8 * exp(-2 * 3)) + 17 | # log(0.8) + 1 * log(2 * 3) - lgamma(1 + 1) - 2 * 3) 18 | # 19 | # }) 20 | # 21 | -------------------------------------------------------------------------------- /tests/testthat/test_scan_eb_poisson.R: -------------------------------------------------------------------------------- 1 | context("EB Poisson statistic tests") 2 | 3 | test_that("scan_eb_poisson", { 4 | 5 | # Helper functions ----------------------------------------------------------- 6 | poisson_lpmf <- function(y, mu) -mu + y * log(mu) 7 | poisson_loglihood <- function(y, mu, q) { 8 | llh <- 0 9 | for (i in 1:length(y)) { 10 | llh <- llh + poisson_lpmf(y[i], q * mu[i]) 11 | } 12 | return(llh) 13 | } 14 | 15 | # Single timepoint ----------------------------------------------------------- 16 | in1 <- list( 17 | counts = matrix(c(1, 0), nrow = 1), 18 | baselines = matrix(c(0.5, 2), nrow = 1), 19 | zones = list(1L, 2L, 1:2)) 20 | in1$zones_flat = unlist(in1$zones) 21 | in1$zone_lengths = unlist(lapply(in1$zones, length)) 22 | 23 | actual1 <- scan_eb_poisson_cpp(in1$counts, 24 | in1$baselines, 25 | in1$zones_flat - 1, 26 | in1$zone_lengths, 27 | store_everything = TRUE, 28 | num_mcsim = 0)$observed 29 | actual1b <- scan_eb_poisson_cpp(in1$counts, 30 | in1$baselines, 31 | in1$zones_flat - 1, 32 | in1$zone_lengths, 33 | store_everything = FALSE, 34 | num_mcsim = 0)$observed 35 | expected1_score <- c(poisson_lpmf(1, 1) - poisson_lpmf(1, 0.5), 0, 0) 36 | expect_equal(actual1$score, expected1_score) 37 | expect_equal(actual1$relrisk, c(2, 1, 1)) 38 | expect_equal(c(actual1[which.max(actual1$score), ]), c(actual1b)) 39 | 40 | # 3 timepoints --------------------------------------------------------------- 41 | in2 <- list( 42 | counts = matrix(c(1, 0, 43 | 2, 1, 44 | 0, 20), nrow = 3, byrow = TRUE), 45 | baselines = matrix(c(0.5, 2, 46 | 0.5, 2, 47 | 0.5, 2), nrow = 3, byrow = TRUE), 48 | zones = list(1L, 2L, 1:2)) 49 | in2$zones_flat = unlist(in2$zones) 50 | in2$zone_lengths = unlist(lapply(in2$zones, length)) 51 | 52 | actual2 <- scan_eb_poisson_cpp(in2$counts, 53 | in2$baselines, 54 | in2$zones_flat - 1, 55 | in2$zone_lengths, 56 | store_everything = TRUE, 57 | num_mcsim = 0)$observed 58 | actual2b <- scan_eb_poisson_cpp(in2$counts, 59 | in2$baselines, 60 | in2$zones_flat - 1, 61 | in2$zone_lengths, 62 | store_everything = FALSE, 63 | num_mcsim = 0)$observed 64 | 65 | expected2_relrisk <- c(# Duration = 1 66 | sum(in2$counts[1, 1]) / sum(in2$baselines[1, 1]), 67 | sum(in2$counts[1, 2]) / sum(in2$baselines[1, 2]), 68 | sum(in2$counts[1, 1:2]) / sum(in2$baselines[1, 1:2]), 69 | # Duration = 2 70 | sum(in2$counts[1:2, 1]) / sum(in2$baselines[1:2, 1]), 71 | sum(in2$counts[1:2, 2]) / sum(in2$baselines[1:2, 2]), 72 | sum(in2$counts[1:2, 1:2]) / sum(in2$baselines[1:2, 1:2]), 73 | # Duration = 3 74 | sum(in2$counts[1:3, 1]) / sum(in2$baselines[1:3, 1]), 75 | sum(in2$counts[1:3, 2]) / sum(in2$baselines[1:3, 2]), 76 | sum(in2$counts[1:3, 1:2]) / sum(in2$baselines[1:3, 1:2])) 77 | expected2_relrisk <- pmax(expected2_relrisk, 1) 78 | relrisk_mat <- matrix(expected2_relrisk, nrow = 3, ncol = 3, byrow = TRUE) 79 | 80 | expected2_score <- c( 81 | # Duration = 1 82 | poisson_lpmf(1, expected2_relrisk[1] * 0.5) - poisson_lpmf(1, 0.5), 0, 0, 83 | # Duration = 2 84 | poisson_lpmf(3, expected2_relrisk[4] * 1) - poisson_lpmf(3, 1), 0, 0, 85 | # Duration = 3 86 | # zone = 1 87 | poisson_loglihood(c(1, 2, 0), rep(0.5, 3), expected2_relrisk[7]) - 88 | poisson_loglihood(c(1, 2, 0), rep(0.5, 3), 1), 89 | # zone = 2 90 | poisson_loglihood(c(0, 1, 20), rep(2, 3), expected2_relrisk[8]) - 91 | poisson_loglihood(c(0, 1, 20), rep(2, 3), 1), 92 | # zone = 3 93 | poisson_loglihood(as.vector(in2$counts), as.vector(in2$baselines), 94 | expected2_relrisk[9]) - 95 | poisson_loglihood(as.vector(in2$counts), as.vector(in2$baselines), 1)) 96 | 97 | expect_equal(actual2$relrisk, expected2_relrisk) 98 | expect_equal(actual2$score, expected2_score) 99 | expect_equal(c(actual2[which.max(actual2$score), ]), c(actual2b)) 100 | }) 101 | -------------------------------------------------------------------------------- /tests/testthat/test_scan_pb_perm.R: -------------------------------------------------------------------------------- 1 | context("Space-time permutation statistic") 2 | 3 | expand_matrix <- function(A) { 4 | res <- matrix(NA, nrow = sum(A), ncol = 2) 5 | index <- 1 6 | for (j in 1:ncol(A)) { 7 | for (i in 1:nrow(A)) { 8 | n <- A[i,j] 9 | for (k in seq_len(n)) { 10 | res[index, 1] <- i 11 | res[index, 2] <- j 12 | index <- index + 1 13 | } 14 | } 15 | } 16 | return(res) 17 | } 18 | 19 | contract_matrix <- function(A, nr, nc) { 20 | res <- matrix(0L, nr, nc) 21 | for (i in 1:nrow(A)) { 22 | res[A[i, 1], A[i, 2]] <- res[A[i, 1], A[i, 2]] + 1L 23 | } 24 | return(res) 25 | } 26 | 27 | 28 | permute_table <- function(A) { 29 | a <- expand_matrix(A) 30 | for (i in nrow(a):1) { 31 | j <- floor(runif(1, 1, nrow(a))) 32 | tmp <- a[i, 1] 33 | a[i, 1] <- a[j, 1] 34 | a[j, 1] <- tmp 35 | } 36 | return(contract_matrix(a, nrow(A), ncol(A))) 37 | } 38 | 39 | test_that("scan_pb_perm_cpp", { 40 | 41 | # 3 timepoints 42 | in2 <- list( 43 | counts = matrix(c(1, 0, 44 | 2, 1, 45 | 0, 20), nrow = 3, byrow = TRUE), 46 | zones = list(1L, 2L, 1:2)) 47 | in2$N <- sum(in2$counts) 48 | in2$baselines <- outer(rowSums(in2$counts), colSums(in2$counts)) / in2$N 49 | in2$zones_flat = unlist(in2$zones) 50 | in2$zone_lengths = unlist(lapply(in2$zones, length)) 51 | 52 | 53 | perm1 <- scan_pb_perm_cpp(in2$counts, 54 | in2$baselines, 55 | in2$zones_flat - 1, 56 | in2$zone_lengths, 57 | store_everything = FALSE, 58 | num_mcsim = 0)$observed 59 | pois1 <- scan_pb_poisson_cpp(in2$counts, 60 | in2$baselines, 61 | in2$zones_flat - 1, 62 | in2$zone_lengths, 63 | store_everything = FALSE, 64 | num_mcsim = 0)$observed 65 | 66 | expect_equal(perm1, pois1) 67 | 68 | set.seed(1) 69 | in2$counts <- permute_matrix(in2$counts) 70 | 71 | # pois2 <- scan_pb_poisson_cpp(in2$counts, 72 | # in2$baselines, 73 | # in2$zones_flat - 1, 74 | # in2$zone_lengths, 75 | # store_everything = FALSE, 76 | # num_mcsim = 0)$observed 77 | # set.seed(1) 78 | # perm2 <- scan_pb_perm_cpp(in2$counts, 79 | # in2$baselines, 80 | # in2$zones_flat - 1, 81 | # in2$zone_lengths, 82 | # store_everything = FALSE, 83 | # num_mcsim = 1)$simulated[1, ] 84 | # 85 | # expect_equal(perm2, pois2) 86 | }) 87 | -------------------------------------------------------------------------------- /tests/testthat/test_zone_utility_functions.R: -------------------------------------------------------------------------------- 1 | context("zone utility functions") 2 | 3 | # zones <- sets::set(sets::as.set(1L), 4 | # sets::as.set(2L), 5 | # sets::as.set(3L), 6 | # sets::as.set(1:2), 7 | # sets::as.set(c(1L, 3L)), 8 | # sets::as.set(c(2L, 3L))) 9 | # 10 | # 11 | # zones <- sets::set(sets::as.set(1L), 12 | # sets::as.set(2L), 13 | # sets::as.set(3L), 14 | # sets::as.set(1:2), 15 | # sets::as.set(c(1L, 3L)), 16 | # sets::as.set(c(2L, 3L)), 17 | # sets::as.set(c(1L, 4L))) -------------------------------------------------------------------------------- /vignettes/references.bib: -------------------------------------------------------------------------------- 1 | 2 | @ARTICLE{Kulldorff1998, 3 | author = {Martin Kulldorff and William F. Athas and Eric J. Feuer and Barry A. Miller and Charles R. Key}, 4 | title = {Evaluating cluster alarms: A space-time scan statistic and brain cancer in Los Alamos}, 5 | journal = {American Journal of Public Health}, 6 | year = {1998}, 7 | volume = {88}, 8 | pages = {1377–1380}, 9 | number = {9} 10 | } 11 | 12 | @article{Kulldorff2001, 13 | author = {Kulldorff, Martin}, 14 | journal = {Journal of the Royal Statistical Society Series a-Statistics in Society}, 15 | pages = {61--72}, 16 | title = {{Prospective time periodic geographical disease surveillance using a scan statistic}}, 17 | volume = {164}, 18 | year = {2001} 19 | } 20 | 21 | @article{Kulldorff2005, 22 | author = {Kulldorff, Martin and Heffernan, Richard and Hartman, Jessica and Assun{\c{c}}{\~{a}}o, Renato M. and Mostashari, Farzad}, 23 | number = {3}, 24 | pages = {0216--0224}, 25 | title = {{A space-time permutation scan statistic for disease outbreak detection}}, 26 | volume = {2}, 27 | year = {2005} 28 | } 29 | 30 | @ARTICLE{Tango2011, 31 | author = {Toshiro Tango and Kunihiko Takahashi and Kazuaki Kohriyama}, 32 | title = {A space-time scan statistic for detecting emerging outbreaks}, 33 | journal = {Biometrics}, 34 | year = {2011}, 35 | volume = {67}, 36 | pages = {106–115}, 37 | number = {1} 38 | } 39 | 40 | @techreport{Allevius2017, 41 | author = {All{\'{e}}vius, Benjamin and H{\"{o}}hle, Michael}, 42 | institution = {Stockholm University}, 43 | title = {{An expectation-based space-time scan statistic for ZIP-distributed data}}, 44 | year = {2017} 45 | } 46 | 47 | @Manual{rsatscan, 48 | title = {rsatscan: Tools, Classes, and Methods for Interfacing with SaTScan Stand-Alone Software}, 49 | author = {Ken Kleinman}, 50 | year = {2015}, 51 | note = {R package version 0.3.9200}, 52 | url = {https://CRAN.R-project.org/package=rsatscan}, 53 | } 54 | 55 | @INPROCEEDINGS{Neill2005, 56 | author = {Daniel B. Neill and Andrew W. Moore and Maheshkumar Sabhnani and Kenny Daniel}, 57 | title = {Detection of emerging space-time clusters}, 58 | booktitle = {Proceedings of the eleventh ACM SIGKDD international conference on 59 | Knowledge discovery in data mining}, 60 | year = {2005}, 61 | pages = {218–227}, 62 | organization = {ACM} 63 | } 64 | 65 | @article{Neill2006, 66 | author = {Neill, Daniel B. and Moore, Andrew W. and Cooper, Gregory F.}, 67 | journal = {Advances in Neural Information Processing Systems}, 68 | pages = {1003}, 69 | title = {{A Bayesian Spatial Scan Statistic}}, 70 | volume = {18}, 71 | year = {2006} 72 | } 73 | 74 | @Article{sets, 75 | title = {Generalized and Customizable Sets in {R}}, 76 | author = {David Meyer and Kurt Hornik}, 77 | journal = {Journal of Statistical Software}, 78 | year = {2009}, 79 | volume = {31}, 80 | number = {2}, 81 | pages = {1–27}, 82 | url = {http://www.jstatsoft.org/v31/i02/} 83 | } 84 | --------------------------------------------------------------------------------