├── tools
    ├── README-r1-1.png
    ├── README-r2-1.png
    ├── README-rb1-1.png
    └── README-rg1-1.png
├── extras
    ├── PseudoInverse.pdf
    ├── sp500_example.pdf
    ├── sp500
    │   ├── sp500_example.pdf
    │   ├── snp500_l1t_0200.png
    │   ├── sp500_long_files
    │   │   └── figure-markdown_github
    │   │   │   ├── r1-1.png
    │   │   │   ├── r2-1.png
    │   │   │   ├── r3-1.png
    │   │   │   ├── r4-1.png
    │   │   │   └── r5-1.png
    │   ├── segmented_Example_files
    │   │   └── figure-markdown_github
    │   │   │   └── r1-1.png
    │   ├── segmented_Example.Rmd
    │   ├── segmented_Example.md
    │   ├── sp500_long.Rmd
    │   ├── sp500_long.md
    │   └── sp500_example.Rmd
    ├── .gitignore
    ├── SegmentationL_files
    │   └── figure-markdown_github
    │   │   ├── r1-1.png
    │   │   ├── r5-1.png
    │   │   ├── r12-1.png
    │   │   ├── r12-2.png
    │   │   ├── r12-3.png
    │   │   ├── r12-4.png
    │   │   └── r12-5.png
    ├── Time_xlin_fits_files
    │   └── figure-markdown_github
    │   │   ├── r2-1.png
    │   │   ├── r2-2.png
    │   │   ├── r2-3.png
    │   │   └── r2-4.png
    ├── Timings_files
    │   └── figure-markdown_github
    │   │   └── unnamed-chunk-1-1.png
    ├── LogisticLinkSegmentation_files
    │   └── figure-markdown_github
    │   │   ├── r1-1.png
    │   │   └── r1-2.png
    ├── check_reverse_dependencies.Rmd
    ├── xlin_fits_py.py
    ├── Timings.Rmd
    ├── check_reverse_dependencies.md
    ├── time_python.ipynb
    ├── LogisticLinkSegmentation.Rmd
    ├── Time_xlin_fits.Rmd
    ├── LogisticLinkSegmentation.md
    ├── DynProg.py
    ├── Timings.md
    ├── time_python_xlin_fits.ipynb
    └── PseudoInverse.tex
├── docs
    ├── reference
    │   ├── Rplot001.png
    │   └── RcppDynProg.html
    ├── articles
    │   ├── RcppDynProg_files
    │   │   ├── figure-html
    │   │   │   ├── r1-1.png
    │   │   │   ├── r2-1.png
    │   │   │   ├── rb1-1.png
    │   │   │   └── rg1-1.png
    │   │   ├── header-attrs-2.4
    │   │   │   └── header-attrs.js
    │   │   └── accessible-code-block-0.0.1
    │   │   │   └── empty-anchor.js
    │   ├── Segmentation_files
    │   │   ├── figure-html
    │   │   │   ├── r1-1.png
    │   │   │   ├── r2-1.png
    │   │   │   └── r5-1.png
    │   │   ├── header-attrs-2.4
    │   │   │   └── header-attrs.js
    │   │   └── accessible-code-block-0.0.1
    │   │   │   └── empty-anchor.js
    │   ├── SegmentationL_files
    │   │   ├── figure-html
    │   │   │   ├── r1-1.png
    │   │   │   └── r5-1.png
    │   │   ├── header-attrs-2.4
    │   │   │   └── header-attrs.js
    │   │   └── accessible-code-block-0.0.1
    │   │   │   └── empty-anchor.js
    │   ├── index.html
    │   └── RcppDynProg.html
    ├── pkgdown.yml
    ├── link.svg
    ├── bootstrap-toc.css
    ├── docsearch.js
    ├── sitemap.xml
    ├── pkgdown.js
    ├── bootstrap-toc.js
    ├── 404.html
    └── authors.html
├── .gitignore
├── tests
    └── tinytest.R
├── _pkgdown.yml
├── .Rbuildignore
├── inst
    └── tinytest
    │   ├── test_const_cost.R
    │   ├── test_lin_cost.R
    │   ├── test_logistic_cost_fn.R
    │   ├── test_xlin_fits.R
    │   ├── test_xlin.R
    │   ├── test_scoring.R
    │   ├── test_dynprog1.R
    │   └── test_logistic_cost.R
├── RcppDynProg.Rproj
├── vignettes
    ├── RcppDynProg.Rmd
    ├── SegmentationL.Rmd
    └── Segmentation.Rmd
├── R
    ├── package.R
    ├── xlin_fits_lm.R
    ├── xlin_fits_V.R
    ├── xlin_fits_R.R
    ├── solve_dyn_R.R
    ├── vtreat_coders.R
    └── utils.R
├── cran-comments.md
├── man
    ├── all_partitions.Rd
    ├── xlin_fits_R.Rd
    ├── xlin_fits_V.Rd
    ├── xlin_fits_lm.Rd
    ├── piecewise_linear.Rd
    ├── piecewise_constant.Rd
    ├── score_solution.Rd
    ├── xlin_fits.Rd
    ├── const_costs.Rd
    ├── RcppDynProg-package.Rd
    ├── const_cost.Rd
    ├── lin_costs.Rd
    ├── piecewise_linear_coder.Rd
    ├── summarize_input.Rd
    ├── xlin_pfits.Rd
    ├── piecewise_constant_coder.Rd
    ├── const_costs_logistic.Rd
    ├── lin_cost.Rd
    ├── const_cost_logistic.Rd
    ├── lin_costs_logistic.Rd
    ├── solve_interval_partition.Rd
    ├── solve_interval_partition_R.Rd
    ├── solve_interval_partition_no_k.Rd
    ├── logistic_fits.Rd
    ├── solve_interval_partition_k.Rd
    ├── lin_cost_logistic.Rd
    ├── xlogistic_fits.Rd
    ├── logistic_solve1.Rd
    ├── solve_for_partition.Rd
    └── solve_for_partitionc.Rd
├── src
    ├── input_summary.h
    ├── xlin_pfits.cpp
    ├── xlin_fits.cpp
    ├── const_costs.cpp
    ├── lin_costs.cpp
    ├── const_costs_logistic.cpp
    ├── input_summary.cpp
    └── lin_costs_logistic.cpp
├── NEWS.md
├── DESCRIPTION
└── NAMESPACE


/tools/README-r1-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/WinVector/RcppDynProg/HEAD/tools/README-r1-1.png


--------------------------------------------------------------------------------
/tools/README-r2-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/WinVector/RcppDynProg/HEAD/tools/README-r2-1.png


--------------------------------------------------------------------------------
/tools/README-rb1-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/WinVector/RcppDynProg/HEAD/tools/README-rb1-1.png


--------------------------------------------------------------------------------
/tools/README-rg1-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/WinVector/RcppDynProg/HEAD/tools/README-rg1-1.png


--------------------------------------------------------------------------------
/extras/PseudoInverse.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/WinVector/RcppDynProg/HEAD/extras/PseudoInverse.pdf


--------------------------------------------------------------------------------
/extras/sp500_example.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/WinVector/RcppDynProg/HEAD/extras/sp500_example.pdf


--------------------------------------------------------------------------------
/docs/reference/Rplot001.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/WinVector/RcppDynProg/HEAD/docs/reference/Rplot001.png


--------------------------------------------------------------------------------
/extras/sp500/sp500_example.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/WinVector/RcppDynProg/HEAD/extras/sp500/sp500_example.pdf


--------------------------------------------------------------------------------
/extras/sp500/snp500_l1t_0200.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/WinVector/RcppDynProg/HEAD/extras/sp500/snp500_l1t_0200.png


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
 1 | *~
 2 | .DS_Store
 3 | .Rproj.user
 4 | *.o
 5 | *.so
 6 | .Rhistory
 7 | inst/doc
 8 | revdep
 9 | CRAN-RELEASE
10 | 


--------------------------------------------------------------------------------
/tests/tinytest.R:
--------------------------------------------------------------------------------
1 | 
2 | if ( requireNamespace("tinytest", quietly=TRUE) ){
3 |   tinytest::test_package("RcppDynProg")
4 | }
5 | 
6 | 


--------------------------------------------------------------------------------
/_pkgdown.yml:
--------------------------------------------------------------------------------
1 | 
2 | navbar:
3 |    right:
4 |     - text: "Sponsor: Win-Vector LLC"
5 |       href: https://win-vector.com/
6 | 
7 | 
8 | 


--------------------------------------------------------------------------------
/extras/.gitignore:
--------------------------------------------------------------------------------
1 | PseudoInverse.aux
2 | PseudoInverse.log
3 | PseudoInverse.out
4 | __pycache__
5 | .ipynb_checkpoints
6 | xlin_data.csv.gz
7 | 


--------------------------------------------------------------------------------
/docs/articles/RcppDynProg_files/figure-html/r1-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/WinVector/RcppDynProg/HEAD/docs/articles/RcppDynProg_files/figure-html/r1-1.png


--------------------------------------------------------------------------------
/docs/articles/RcppDynProg_files/figure-html/r2-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/WinVector/RcppDynProg/HEAD/docs/articles/RcppDynProg_files/figure-html/r2-1.png


--------------------------------------------------------------------------------
/docs/articles/RcppDynProg_files/figure-html/rb1-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/WinVector/RcppDynProg/HEAD/docs/articles/RcppDynProg_files/figure-html/rb1-1.png


--------------------------------------------------------------------------------
/docs/articles/RcppDynProg_files/figure-html/rg1-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/WinVector/RcppDynProg/HEAD/docs/articles/RcppDynProg_files/figure-html/rg1-1.png


--------------------------------------------------------------------------------
/docs/articles/Segmentation_files/figure-html/r1-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/WinVector/RcppDynProg/HEAD/docs/articles/Segmentation_files/figure-html/r1-1.png


--------------------------------------------------------------------------------
/docs/articles/Segmentation_files/figure-html/r2-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/WinVector/RcppDynProg/HEAD/docs/articles/Segmentation_files/figure-html/r2-1.png


--------------------------------------------------------------------------------
/docs/articles/Segmentation_files/figure-html/r5-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/WinVector/RcppDynProg/HEAD/docs/articles/Segmentation_files/figure-html/r5-1.png


--------------------------------------------------------------------------------
/docs/articles/SegmentationL_files/figure-html/r1-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/WinVector/RcppDynProg/HEAD/docs/articles/SegmentationL_files/figure-html/r1-1.png


--------------------------------------------------------------------------------
/docs/articles/SegmentationL_files/figure-html/r5-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/WinVector/RcppDynProg/HEAD/docs/articles/SegmentationL_files/figure-html/r5-1.png


--------------------------------------------------------------------------------
/extras/SegmentationL_files/figure-markdown_github/r1-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/WinVector/RcppDynProg/HEAD/extras/SegmentationL_files/figure-markdown_github/r1-1.png


--------------------------------------------------------------------------------
/extras/SegmentationL_files/figure-markdown_github/r5-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/WinVector/RcppDynProg/HEAD/extras/SegmentationL_files/figure-markdown_github/r5-1.png


--------------------------------------------------------------------------------
/extras/SegmentationL_files/figure-markdown_github/r12-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/WinVector/RcppDynProg/HEAD/extras/SegmentationL_files/figure-markdown_github/r12-1.png


--------------------------------------------------------------------------------
/extras/SegmentationL_files/figure-markdown_github/r12-2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/WinVector/RcppDynProg/HEAD/extras/SegmentationL_files/figure-markdown_github/r12-2.png


--------------------------------------------------------------------------------
/extras/SegmentationL_files/figure-markdown_github/r12-3.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/WinVector/RcppDynProg/HEAD/extras/SegmentationL_files/figure-markdown_github/r12-3.png


--------------------------------------------------------------------------------
/extras/SegmentationL_files/figure-markdown_github/r12-4.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/WinVector/RcppDynProg/HEAD/extras/SegmentationL_files/figure-markdown_github/r12-4.png


--------------------------------------------------------------------------------
/extras/SegmentationL_files/figure-markdown_github/r12-5.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/WinVector/RcppDynProg/HEAD/extras/SegmentationL_files/figure-markdown_github/r12-5.png


--------------------------------------------------------------------------------
/extras/Time_xlin_fits_files/figure-markdown_github/r2-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/WinVector/RcppDynProg/HEAD/extras/Time_xlin_fits_files/figure-markdown_github/r2-1.png


--------------------------------------------------------------------------------
/extras/Time_xlin_fits_files/figure-markdown_github/r2-2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/WinVector/RcppDynProg/HEAD/extras/Time_xlin_fits_files/figure-markdown_github/r2-2.png


--------------------------------------------------------------------------------
/extras/Time_xlin_fits_files/figure-markdown_github/r2-3.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/WinVector/RcppDynProg/HEAD/extras/Time_xlin_fits_files/figure-markdown_github/r2-3.png


--------------------------------------------------------------------------------
/extras/Time_xlin_fits_files/figure-markdown_github/r2-4.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/WinVector/RcppDynProg/HEAD/extras/Time_xlin_fits_files/figure-markdown_github/r2-4.png


--------------------------------------------------------------------------------
/extras/sp500/sp500_long_files/figure-markdown_github/r1-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/WinVector/RcppDynProg/HEAD/extras/sp500/sp500_long_files/figure-markdown_github/r1-1.png


--------------------------------------------------------------------------------
/extras/sp500/sp500_long_files/figure-markdown_github/r2-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/WinVector/RcppDynProg/HEAD/extras/sp500/sp500_long_files/figure-markdown_github/r2-1.png


--------------------------------------------------------------------------------
/extras/sp500/sp500_long_files/figure-markdown_github/r3-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/WinVector/RcppDynProg/HEAD/extras/sp500/sp500_long_files/figure-markdown_github/r3-1.png


--------------------------------------------------------------------------------
/extras/sp500/sp500_long_files/figure-markdown_github/r4-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/WinVector/RcppDynProg/HEAD/extras/sp500/sp500_long_files/figure-markdown_github/r4-1.png


--------------------------------------------------------------------------------
/extras/sp500/sp500_long_files/figure-markdown_github/r5-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/WinVector/RcppDynProg/HEAD/extras/sp500/sp500_long_files/figure-markdown_github/r5-1.png


--------------------------------------------------------------------------------
/extras/Timings_files/figure-markdown_github/unnamed-chunk-1-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/WinVector/RcppDynProg/HEAD/extras/Timings_files/figure-markdown_github/unnamed-chunk-1-1.png


--------------------------------------------------------------------------------
/extras/sp500/segmented_Example_files/figure-markdown_github/r1-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/WinVector/RcppDynProg/HEAD/extras/sp500/segmented_Example_files/figure-markdown_github/r1-1.png


--------------------------------------------------------------------------------
/extras/LogisticLinkSegmentation_files/figure-markdown_github/r1-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/WinVector/RcppDynProg/HEAD/extras/LogisticLinkSegmentation_files/figure-markdown_github/r1-1.png


--------------------------------------------------------------------------------
/extras/LogisticLinkSegmentation_files/figure-markdown_github/r1-2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/WinVector/RcppDynProg/HEAD/extras/LogisticLinkSegmentation_files/figure-markdown_github/r1-2.png


--------------------------------------------------------------------------------
/docs/pkgdown.yml:
--------------------------------------------------------------------------------
 1 | pandoc: 3.1.1
 2 | pkgdown: 2.0.7
 3 | pkgdown_sha: ~
 4 | articles:
 5 |   RcppDynProg: RcppDynProg.html
 6 |   Segmentation: Segmentation.html
 7 |   SegmentationL: SegmentationL.html
 8 | last_built: 2023-08-20T00:23Z
 9 | 
10 | 


--------------------------------------------------------------------------------
/.Rbuildignore:
--------------------------------------------------------------------------------
 1 | ^CRAN-RELEASE$
 2 | ^.*\.Rproj$
 3 | ^\.Rproj\.user$
 4 | ^CrossDynProg.Rproj$
 5 | ^.Rhistory$
 6 | ^extras$
 7 | ^.DS_Store$
 8 | ^cran-comments.md$
 9 | ^revdep$
10 | ^docs$
11 | ^README.Rmd$
12 | ^LICENSE$
13 | ^_pkgdown\.yml$
14 | ^CRAN-SUBMISSION$
15 | 


--------------------------------------------------------------------------------
/inst/tinytest/test_const_cost.R:
--------------------------------------------------------------------------------
 1 | 
 2 | test_const_cost <- function() {
 3 |   y <- c(1, 2, 1, -1, 4, 5, 10)
 4 |   w <- 1 + numeric(length(y))
 5 |   c1 <- const_cost(y, w, 1, 0, length(y)-1)
 6 |   
 7 |   out_est <- (sum(y*w) - y*w)/(sum(w) - w)
 8 |   c2 <- sum(w*(y-out_est)^2)
 9 |   
10 |   expect_true(abs(c1-c2)<=1e-5)
11 | 
12 |   invisible(NULL)
13 | }
14 |   
15 | test_const_cost()
16 | 


--------------------------------------------------------------------------------
/RcppDynProg.Rproj:
--------------------------------------------------------------------------------
 1 | Version: 1.0
 2 | 
 3 | RestoreWorkspace: Default
 4 | SaveWorkspace: Default
 5 | AlwaysSaveHistory: Default
 6 | 
 7 | EnableCodeIndexing: Yes
 8 | UseSpacesForTab: Yes
 9 | NumSpacesForTab: 2
10 | Encoding: UTF-8
11 | 
12 | RnwWeave: Sweave
13 | LaTeX: pdfLaTeX
14 | 
15 | BuildType: Package
16 | PackageUseDevtools: Yes
17 | PackageInstallArgs: --no-multiarch --with-keep.source
18 | 


--------------------------------------------------------------------------------
/vignettes/RcppDynProg.Rmd:
--------------------------------------------------------------------------------
 1 | ---
 2 | title: "RcppDynProg package"
 3 | author: "John Mount"
 4 | date: "`r Sys.Date()`"
 5 | output: rmarkdown::html_vignette
 6 | vignette: >
 7 |   %\VignetteIndexEntry{RcppDynProg package}
 8 |   %\VignetteEngine{knitr::rmarkdown}
 9 |   %\VignetteEncoding{UTF-8}
10 | ---
11 | 
12 | This content has moved to [the package README](https://github.com/WinVector/RcppDynProg).


--------------------------------------------------------------------------------
/inst/tinytest/test_lin_cost.R:
--------------------------------------------------------------------------------
 1 | 
 2 | test_lin_cost <- function() {
 3 |   y <- c(1, 2, 1, -1, 4, 5, 10)
 4 |   x <- seq_len(length(y))
 5 |   w <- 1 + numeric(length(y))
 6 |   c1 <- lin_cost(x, y, w, 1, 0, length(y)-1)
 7 |   
 8 |   out_est <- xlin_fits_lm(x, y, w)
 9 |   c2 <- sum(w*(y-out_est)^2)
10 |   
11 |   expect_true(abs(c1-c2)<=1e-3)
12 | 
13 |   invisible(NULL)
14 | }
15 | 
16 | test_lin_cost()
17 | 
18 | 


--------------------------------------------------------------------------------
/R/package.R:
--------------------------------------------------------------------------------
 1 | 
 2 | 
 3 | #' RcppDynProg
 4 | #' 
 5 | #' Rcpp dynamic programming solutions for partitioning and machine learning problems.
 6 | #' Includes out of sample fitting applications.  
 7 | #' Also supplies additional custom coders for the vtreat package.
 8 | #' Please see \url{https://github.com/WinVector/RcppDynProg} for details.
 9 | #' 
10 | #' @author John Mount
11 | #' @import Rcpp
12 | #' @importFrom Rcpp evalCpp
13 | #' @useDynLib RcppDynProg
14 | "_PACKAGE" 
15 | 


--------------------------------------------------------------------------------
/inst/tinytest/test_logistic_cost_fn.R:
--------------------------------------------------------------------------------
 1 | 
 2 | test_logistic_cost <- function() {
 3 |   d <- data.frame(
 4 |     x = c(1, 2, 3, 4, 5, 6, 7),
 5 |     y = c(0, 0, 1, 0, 1, 1, 0)
 6 |   )
 7 |   w <- c(1, 1, 1, 1, 1, 1, 1)
 8 |   cost <- lin_cost_logistic(d$x, d$y, w, 3, 0, 6)
 9 |   
10 |   m <- glm(y~x, data=d, family = binomial)
11 |   dev <- summary(m)$deviance
12 |   
13 |   expect_true(abs(cost-dev)<=1e-3)
14 | 
15 |   invisible(NULL)
16 | }
17 | 
18 | test_logistic_cost()
19 | 


--------------------------------------------------------------------------------
/inst/tinytest/test_xlin_fits.R:
--------------------------------------------------------------------------------
 1 | 
 2 | test_xlin_fits <- function() {
 3 |   x <- c(1, 2, 3, 4)
 4 |   y <- c(1, 2, 2, 1)
 5 |   w <- c(1, 1, 1, 1)
 6 |   
 7 |   f <- xlin_fits(x, y, w, 0, length(y)-1)
 8 |   f_lm <- xlin_fits_lm(x, y, w)
 9 |   expect_true(max(abs(f - f_lm))<=1e-3)
10 |   f_R <- xlin_fits_R(x, y, w)
11 |   expect_equal(f, f_R)
12 |   f_V <- xlin_fits_V(x, y, w)
13 |   expect_equal(f, f_V)
14 | 
15 |   invisible(NULL)
16 | }
17 | 
18 | test_xlin_fits()
19 | 
20 | 


--------------------------------------------------------------------------------
/inst/tinytest/test_xlin.R:
--------------------------------------------------------------------------------
 1 | 
 2 | test_xlin <- function() {
 3 |   d <- data.frame(
 4 |     x = c(1, 2, 3, 4, 5, 6),
 5 |     y = c(1, 1, 2, 2, 3, 3),
 6 |     w = c(1, 1, 1, 1, 1, 1))
 7 |   fits <- xlin_fits(d$x, d$y, d$w, 0, 5)
 8 |   
 9 |   fe <- vapply(
10 |     1:6,
11 |     function(i) {
12 |       m <- lm(y~x, d[-i, , drop = FALSE])
13 |       predict(m, newdata = d[i, , drop = FALSE])
14 |     }, numeric(1))
15 |   
16 |   mxdiff = max(abs(fits-fe))
17 |   
18 |   expect_true(mxdiff<=1.0e-3)
19 | 
20 |   invisible(NULL)
21 | }
22 | 
23 | test_xlin()
24 | 


--------------------------------------------------------------------------------
/cran-comments.md:
--------------------------------------------------------------------------------
 1 | 
 2 | ## Test Results
 3 | 
 4 |     R CMD check --as-cran RcppDynProg_0.2.1.tar.gz
 5 |     * using R version 4.3.0 (2023-04-21)
 6 |     * using platform: x86_64-apple-darwin20 (64-bit)
 7 | 
 8 |     devtools::check_win_devel()
 9 |     * using R Under development (unstable) (2023-08-19 r84989 ucrt)
10 |     * using platform: x86_64-w64-mingw32
11 |     
12 | ## Downstream dependencies
13 | 
14 |     No strong dependent packages (please see https://github.com/WinVector/RcppDynProg/blob/master/extras/check_reverse_dependencies.md ).
15 | 
16 | Zumel is not a mis-spelling.
17 | 


--------------------------------------------------------------------------------
/man/all_partitions.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/utils.R
 3 | \name{all_partitions}
 4 | \alias{all_partitions}
 5 | \title{Build all partitions into intervals.}
 6 | \usage{
 7 | all_partitions(n, kmax = n)
 8 | }
 9 | \arguments{
10 | \item{n}{integer, sequence lenght to choose from.}
11 | 
12 | \item{kmax}{int, maximum number of segments in solution.}
13 | }
14 | \value{
15 | list of all partitions.
16 | }
17 | \description{
18 | Build all partitions into intervals.
19 | }
20 | \examples{
21 | 
22 | all_partitions(4, 2)
23 | 
24 | }
25 | \keyword{internal}
26 | 


--------------------------------------------------------------------------------
/docs/articles/RcppDynProg_files/header-attrs-2.4/header-attrs.js:
--------------------------------------------------------------------------------
 1 | // Pandoc 2.9 adds attributes on both header and div. We remove the former (to
 2 | // be compatible with the behavior of Pandoc < 2.8).
 3 | document.addEventListener('DOMContentLoaded', function(e) {
 4 |   var hs = document.querySelectorAll("div.section[class*='level'] > :first-child");
 5 |   var i, h, a;
 6 |   for (i = 0; i < hs.length; i++) {
 7 |     h = hs[i];
 8 |     if (!/^h[1-6]$/i.test(h.tagName)) continue;  // it should be a header h1-h6
 9 |     a = h.attributes;
10 |     while (a.length > 0) h.removeAttribute(a[0].name);
11 |   }
12 | });
13 | 


--------------------------------------------------------------------------------
/docs/articles/Segmentation_files/header-attrs-2.4/header-attrs.js:
--------------------------------------------------------------------------------
 1 | // Pandoc 2.9 adds attributes on both header and div. We remove the former (to
 2 | // be compatible with the behavior of Pandoc < 2.8).
 3 | document.addEventListener('DOMContentLoaded', function(e) {
 4 |   var hs = document.querySelectorAll("div.section[class*='level'] > :first-child");
 5 |   var i, h, a;
 6 |   for (i = 0; i < hs.length; i++) {
 7 |     h = hs[i];
 8 |     if (!/^h[1-6]$/i.test(h.tagName)) continue;  // it should be a header h1-h6
 9 |     a = h.attributes;
10 |     while (a.length > 0) h.removeAttribute(a[0].name);
11 |   }
12 | });
13 | 


--------------------------------------------------------------------------------
/docs/articles/SegmentationL_files/header-attrs-2.4/header-attrs.js:
--------------------------------------------------------------------------------
 1 | // Pandoc 2.9 adds attributes on both header and div. We remove the former (to
 2 | // be compatible with the behavior of Pandoc < 2.8).
 3 | document.addEventListener('DOMContentLoaded', function(e) {
 4 |   var hs = document.querySelectorAll("div.section[class*='level'] > :first-child");
 5 |   var i, h, a;
 6 |   for (i = 0; i < hs.length; i++) {
 7 |     h = hs[i];
 8 |     if (!/^h[1-6]$/i.test(h.tagName)) continue;  // it should be a header h1-h6
 9 |     a = h.attributes;
10 |     while (a.length > 0) h.removeAttribute(a[0].name);
11 |   }
12 | });
13 | 


--------------------------------------------------------------------------------
/man/xlin_fits_R.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/xlin_fits_R.R
 3 | \name{xlin_fits_R}
 4 | \alias{xlin_fits_R}
 5 | \title{xlin_fits_R}
 6 | \usage{
 7 | xlin_fits_R(x, y, w)
 8 | }
 9 | \arguments{
10 | \item{x}{NumericVector, x-coords of values to group (length>=2).}
11 | 
12 | \item{y}{NumericVector, values to group in order.}
13 | 
14 | \item{w}{NumericVector, weights (positive).}
15 | }
16 | \value{
17 | vector of predictions.
18 | }
19 | \description{
20 | Calculate out of sample linear fit predictions.
21 | }
22 | \examples{
23 | 
24 | xlin_fits_R(c(1, 2, 3, 4), c(1, 2, 2, 1), c(1, 1, 1, 1))
25 | 
26 | }
27 | \keyword{internal}
28 | 


--------------------------------------------------------------------------------
/man/xlin_fits_V.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/xlin_fits_V.R
 3 | \name{xlin_fits_V}
 4 | \alias{xlin_fits_V}
 5 | \title{xlin_fits_R}
 6 | \usage{
 7 | xlin_fits_V(x, y, w)
 8 | }
 9 | \arguments{
10 | \item{x}{NumericVector, x-coords of values to group (length>=2).}
11 | 
12 | \item{y}{NumericVector, values to group in order.}
13 | 
14 | \item{w}{NumericVector, weights (positive).}
15 | }
16 | \value{
17 | vector of predictions.
18 | }
19 | \description{
20 | Calculate out of sample linear fit predictions.
21 | }
22 | \examples{
23 | 
24 | xlin_fits_V(c(1, 2, 3, 4), c(1, 2, 2, 1), c(1, 1, 1, 1))
25 | 
26 | }
27 | \keyword{internal}
28 | 


--------------------------------------------------------------------------------
/man/xlin_fits_lm.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/xlin_fits_lm.R
 3 | \name{xlin_fits_lm}
 4 | \alias{xlin_fits_lm}
 5 | \title{xlin_fits_R}
 6 | \usage{
 7 | xlin_fits_lm(x, y, w)
 8 | }
 9 | \arguments{
10 | \item{x}{NumericVector, x-coords of values to group (length>=2).}
11 | 
12 | \item{y}{NumericVector, values to group in order.}
13 | 
14 | \item{w}{NumericVector, weights (positive).}
15 | }
16 | \value{
17 | vector of predictions.
18 | }
19 | \description{
20 | Calculate out of sample linear fit predictions.
21 | }
22 | \examples{
23 | 
24 | xlin_fits_lm(c(1, 2, 3, 4), c(1, 2, 2, 1), c(1, 1, 1, 1))
25 | 
26 | }
27 | \keyword{internal}
28 | 


--------------------------------------------------------------------------------
/man/piecewise_linear.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/vtreat_coders.R
 3 | \name{piecewise_linear}
 4 | \alias{piecewise_linear}
 5 | \title{Piecewise linear fit.}
 6 | \usage{
 7 | piecewise_linear(varName, x, y, w = NULL)
 8 | }
 9 | \arguments{
10 | \item{varName}{character, name of variable to work on.}
11 | 
12 | \item{x}{numeric, input values.}
13 | 
14 | \item{y}{numeric, values to estimate.}
15 | 
16 | \item{w}{numeric, weights.}
17 | }
18 | \description{
19 | \code{vtreat} custom coder based on \code{RcppDynProg::solve_for_partition()}.
20 | }
21 | \examples{
22 | 
23 | piecewise_linear("x", 1:8, c(1, 2, 3, 4, 4, 3, 2, 1))
24 | 
25 | }
26 | 


--------------------------------------------------------------------------------
/man/piecewise_constant.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/vtreat_coders.R
 3 | \name{piecewise_constant}
 4 | \alias{piecewise_constant}
 5 | \title{Piecewise constant fit.}
 6 | \usage{
 7 | piecewise_constant(varName, x, y, w = NULL)
 8 | }
 9 | \arguments{
10 | \item{varName}{character, name of variable to work on.}
11 | 
12 | \item{x}{numeric, input values.}
13 | 
14 | \item{y}{numeric, values to estimate.}
15 | 
16 | \item{w}{numeric, weights.}
17 | }
18 | \description{
19 | \code{vtreat} custom coder based on \code{RcppDynProg::solve_for_partition()}.
20 | }
21 | \examples{
22 | 
23 | piecewise_constant("x", 1:8, c(-1, -1, -1, -1, 1, 1, 1, 1))
24 | 
25 | }
26 | 


--------------------------------------------------------------------------------
/man/score_solution.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/utils.R
 3 | \name{score_solution}
 4 | \alias{score_solution}
 5 | \title{compute the price of a partition solution (and check is valid).}
 6 | \usage{
 7 | score_solution(x, solution)
 8 | }
 9 | \arguments{
10 | \item{x}{NumericMatix, for j>=i x(i,j) is the cost of partition element [i,...,j] (inclusive).}
11 | 
12 | \item{solution}{vector of indices}
13 | }
14 | \value{
15 | price
16 | }
17 | \description{
18 | compute the price of a partition solution (and check is valid).
19 | }
20 | \examples{
21 | 
22 | x <- matrix(c(1,1,5,1,1,0,5,0,1), nrow=3)
23 | s <- c(1, 2, 4)
24 | score_solution(x, s)
25 | 
26 | }
27 | 


--------------------------------------------------------------------------------
/R/xlin_fits_lm.R:
--------------------------------------------------------------------------------
 1 | 
 2 | #' xlin_fits_R
 3 | #' 
 4 | #' Calculate out of sample linear fit predictions.
 5 | #' 
 6 | #' @param x NumericVector, x-coords of values to group (length>=2).
 7 | #' @param y NumericVector, values to group in order.
 8 | #' @param w NumericVector, weights (positive).
 9 | #' @return  vector of predictions.
10 | #' 
11 | #' @keywords internal
12 | #' 
13 | #' @examples
14 | #' 
15 | #' xlin_fits_lm(c(1, 2, 3, 4), c(1, 2, 2, 1), c(1, 1, 1, 1))
16 | #' 
17 | #' @export
18 | #' 
19 | xlin_fits_lm <- function(x, y, w) {
20 |   n <- length(y)
21 |   d <- data.frame(x = x, y = y)
22 |   vapply(
23 |     seq_len(n),
24 |     function(i) {
25 |       m <- lm(y ~ x, data = d[-i, ], weights = w[-i])
26 |       predict(m, newdata = d[i, ])
27 |     }, numeric(1))
28 | }


--------------------------------------------------------------------------------
/docs/articles/RcppDynProg_files/accessible-code-block-0.0.1/empty-anchor.js:
--------------------------------------------------------------------------------
 1 | // Hide empty <a> tag within highlighted CodeBlock for screen reader accessibility (see https://github.com/jgm/pandoc/issues/6352#issuecomment-626106786) -->
 2 | // v0.0.1
 3 | // Written by JooYoung Seo (jooyoung@psu.edu) and Atsushi Yasumoto on June 1st, 2020.
 4 | 
 5 | document.addEventListener('DOMContentLoaded', function() {
 6 |   const codeList = document.getElementsByClassName("sourceCode");
 7 |   for (var i = 0; i < codeList.length; i++) {
 8 |     var linkList = codeList[i].getElementsByTagName('a');
 9 |     for (var j = 0; j < linkList.length; j++) {
10 |       if (linkList[j].innerHTML === "") {
11 |         linkList[j].setAttribute('aria-hidden', 'true');
12 |       }
13 |     }
14 |   }
15 | });
16 | 


--------------------------------------------------------------------------------
/docs/articles/SegmentationL_files/accessible-code-block-0.0.1/empty-anchor.js:
--------------------------------------------------------------------------------
 1 | // Hide empty <a> tag within highlighted CodeBlock for screen reader accessibility (see https://github.com/jgm/pandoc/issues/6352#issuecomment-626106786) -->
 2 | // v0.0.1
 3 | // Written by JooYoung Seo (jooyoung@psu.edu) and Atsushi Yasumoto on June 1st, 2020.
 4 | 
 5 | document.addEventListener('DOMContentLoaded', function() {
 6 |   const codeList = document.getElementsByClassName("sourceCode");
 7 |   for (var i = 0; i < codeList.length; i++) {
 8 |     var linkList = codeList[i].getElementsByTagName('a');
 9 |     for (var j = 0; j < linkList.length; j++) {
10 |       if (linkList[j].innerHTML === "") {
11 |         linkList[j].setAttribute('aria-hidden', 'true');
12 |       }
13 |     }
14 |   }
15 | });
16 | 


--------------------------------------------------------------------------------
/docs/articles/Segmentation_files/accessible-code-block-0.0.1/empty-anchor.js:
--------------------------------------------------------------------------------
 1 | // Hide empty <a> tag within highlighted CodeBlock for screen reader accessibility (see https://github.com/jgm/pandoc/issues/6352#issuecomment-626106786) -->
 2 | // v0.0.1
 3 | // Written by JooYoung Seo (jooyoung@psu.edu) and Atsushi Yasumoto on June 1st, 2020.
 4 | 
 5 | document.addEventListener('DOMContentLoaded', function() {
 6 |   const codeList = document.getElementsByClassName("sourceCode");
 7 |   for (var i = 0; i < codeList.length; i++) {
 8 |     var linkList = codeList[i].getElementsByTagName('a');
 9 |     for (var j = 0; j < linkList.length; j++) {
10 |       if (linkList[j].innerHTML === "") {
11 |         linkList[j].setAttribute('aria-hidden', 'true');
12 |       }
13 |     }
14 |   }
15 | });
16 | 


--------------------------------------------------------------------------------
/src/input_summary.h:
--------------------------------------------------------------------------------
 1 | 
 2 | #ifndef INPUT_SUMMARY_H
 3 | #define INPUT_SUMMARY_H
 4 | 
 5 | 
 6 | using Rcpp::NumericVector;
 7 | 
 8 | 
 9 | class input_summary {
10 | public:
11 |   double max_x;
12 |   double min_x;
13 |   bool saw_y_pos;
14 |   double max_x_pos;
15 |   double min_x_pos;
16 |   bool saw_y_neg;
17 |   double max_x_neg;
18 |   double min_x_neg;
19 |   double total_w;
20 |   double total_wy;
21 |   long k_points;
22 |   
23 |   input_summary(const NumericVector &x, const NumericVector &y, 
24 |                 const NumericVector &w,
25 |                 const int i, const int j,
26 |                 const int skip);
27 |   
28 |   bool saw_data() const;
29 |   
30 |   bool x_varies() const;
31 |   
32 |   bool y_varies() const;
33 |   
34 |   bool seperable() const;
35 | };
36 | 
37 | #endif


--------------------------------------------------------------------------------
/man/xlin_fits.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/RcppExports.R
 3 | \name{xlin_fits}
 4 | \alias{xlin_fits}
 5 | \title{xlin_fits}
 6 | \usage{
 7 | xlin_fits(x, y, w, i, j)
 8 | }
 9 | \arguments{
10 | \item{x}{NumericVector, explanatory variable (length>=2).}
11 | 
12 | \item{y}{NumericVector, values fit.}
13 | 
14 | \item{w}{NumericVector, weights (positive).}
15 | 
16 | \item{i}{integer, first index (inclusive).}
17 | 
18 | \item{j}{integer, j>=i+2 last index (inclusive);}
19 | }
20 | \value{
21 | vector of predictions.
22 | }
23 | \description{
24 | Calculate out of sample linear fit predictions using regularization.
25 | Zero indexed.
26 | }
27 | \examples{
28 | 
29 | xlin_fits(c(1, 2, 3, 4), c(1, 2, 2, 1), c(1, 1, 1, 1), 0, 3)
30 | 
31 | }
32 | \keyword{internal}
33 | 


--------------------------------------------------------------------------------
/man/const_costs.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/RcppExports.R
 3 | \name{const_costs}
 4 | \alias{const_costs}
 5 | \title{const_costs}
 6 | \usage{
 7 | const_costs(y, w, min_seg, indices)
 8 | }
 9 | \arguments{
10 | \item{y}{NumericVector, values to group in order.}
11 | 
12 | \item{w}{NumericVector, weights.}
13 | 
14 | \item{min_seg}{positive integer, minimum segment size (>=1).}
15 | 
16 | \item{indices}{IntegerVector, order list of indices to pair.}
17 | }
18 | \value{
19 | xcosts NumericMatix, for j>=i xcosts(i,j) is the cost of partition element [i,...,j] (inclusive).
20 | }
21 | \description{
22 | Built matrix of total out of sample interval square error costs for held-out means.
23 | One indexed.
24 | }
25 | \examples{
26 | 
27 | const_costs(c(1, 1, 2, 2), c(1, 1, 1, 1), 1, 1:4)
28 | 
29 | }
30 | 


--------------------------------------------------------------------------------
/man/RcppDynProg-package.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/package.R
 3 | \docType{package}
 4 | \name{RcppDynProg-package}
 5 | \alias{RcppDynProg}
 6 | \alias{RcppDynProg-package}
 7 | \title{RcppDynProg}
 8 | \description{
 9 | Rcpp dynamic programming solutions for partitioning and machine learning problems.
10 | Includes out of sample fitting applications.  
11 | Also supplies additional custom coders for the vtreat package.
12 | Please see \url{https://github.com/WinVector/RcppDynProg} for details.
13 | }
14 | \seealso{
15 | Useful links:
16 | \itemize{
17 |   \item \url{https://github.com/WinVector/RcppDynProg/}
18 |   \item \url{https://winvector.github.io/RcppDynProg/}
19 |   \item Report bugs at \url{https://github.com/WinVector/RcppDynProg/issues}
20 | }
21 | 
22 | }
23 | \author{
24 | John Mount
25 | }
26 | 


--------------------------------------------------------------------------------
/docs/link.svg:
--------------------------------------------------------------------------------
 1 | <?xml version="1.0" encoding="utf-8"?>
 2 | <!-- Generator: Adobe Illustrator 19.2.1, SVG Export Plug-In . SVG Version: 6.00 Build 0)  -->
 3 | <svg version="1.1" id="Layer_1" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" x="0px" y="0px"
 4 | 	 viewBox="0 0 20 20" style="enable-background:new 0 0 20 20;" xml:space="preserve">
 5 | <style type="text/css">
 6 | 	.st0{fill:#75AADB;}
 7 | </style>
 8 | <path class="st0" d="M4,11.3h1.3v1.3H4c-2,0-4-2.3-4-4.7s2.1-4.7,4-4.7h5.3c1.9,0,4,2.3,4,4.7c0,1.9-1.2,3.6-2.7,4.3v-1.5
 9 | 	C11.4,10.2,12,9.1,12,8c0-1.7-1.4-3.3-2.7-3.3H4C2.7,4.7,1.3,6.3,1.3,8S2.7,11.3,4,11.3z M16,7.3h-1.3v1.3H16c1.3,0,2.7,1.6,2.7,3.3
10 | 	s-1.4,3.3-2.7,3.3h-5.3C9.4,15.3,8,13.7,8,12c0-1.1,0.6-2.2,1.3-2.8V7.7C7.9,8.4,6.7,10.1,6.7,12c0,2.4,2.1,4.7,4,4.7H16
11 | 	c1.9,0,4-2.3,4-4.7S18,7.3,16,7.3z"/>
12 | </svg>
13 | 


--------------------------------------------------------------------------------
/man/const_cost.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/RcppExports.R
 3 | \name{const_cost}
 4 | \alias{const_cost}
 5 | \title{const_cost}
 6 | \usage{
 7 | const_cost(y, w, min_seg, i, j)
 8 | }
 9 | \arguments{
10 | \item{y}{NumericVector, values to group in order.}
11 | 
12 | \item{w}{NumericVector, weights.}
13 | 
14 | \item{min_seg}{positive integer, minimum segment size (>=1).}
15 | 
16 | \item{i}{integer, first index (inclusive).}
17 | 
18 | \item{j}{integer, j>=i last index (inclusive);}
19 | }
20 | \value{
21 | scalar, const cost of [i,...,j] interval (inclusive).
22 | }
23 | \description{
24 | Calculate out of sample total square error cost of using mean of points to estimate other points in interval.
25 | Zero indexed.
26 | }
27 | \examples{
28 | 
29 | const_cost(c(1, 1, 2, 2), c(1, 1, 1, 1), 1, 0, 3)
30 | 
31 | }
32 | \keyword{internal}
33 | 


--------------------------------------------------------------------------------
/man/lin_costs.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/RcppExports.R
 3 | \name{lin_costs}
 4 | \alias{lin_costs}
 5 | \title{lin_costs}
 6 | \usage{
 7 | lin_costs(x, y, w, min_seg, indices)
 8 | }
 9 | \arguments{
10 | \item{x}{NumericVector, x-coords of values to group.}
11 | 
12 | \item{y}{NumericVector, values to group in order.}
13 | 
14 | \item{w}{NumericVector, weights.}
15 | 
16 | \item{min_seg}{positive integer, minimum segment size (>=1).}
17 | 
18 | \item{indices}{IntegerVector, ordered list of indices to pair.}
19 | }
20 | \value{
21 | xcosts NumericMatix, for j>=i xcosts(i,j) is the cost of partition element [i,...,j] (inclusive).
22 | }
23 | \description{
24 | Built matrix of interval costs for held-out linear models.
25 | One indexed.
26 | }
27 | \examples{
28 | 
29 | lin_costs(c(1, 2, 3, 4), c(1, 2, 2, 1), c(1, 1, 1, 1), 1, 1:4)
30 | 
31 | }
32 | 


--------------------------------------------------------------------------------
/man/piecewise_linear_coder.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/vtreat_coders.R
 3 | \name{piecewise_linear_coder}
 4 | \alias{piecewise_linear_coder}
 5 | \title{Piecewise linear fit coder factory.}
 6 | \usage{
 7 | piecewise_linear_coder(
 8 |   penalty = 1,
 9 |   min_n_to_chunk = 1000,
10 |   min_seg = 10,
11 |   max_k = 1000
12 | )
13 | }
14 | \arguments{
15 | \item{penalty}{per-segment cost penalty.}
16 | 
17 | \item{min_n_to_chunk}{minimum n to subdivied problem.}
18 | 
19 | \item{min_seg}{positive integer, minimum segment size.}
20 | 
21 | \item{max_k}{maximum segments to divide into.}
22 | }
23 | \value{
24 | a vtreat coder
25 | }
26 | \description{
27 | Build a piecewise linear fit coder with some parameters bound in.
28 | }
29 | \examples{
30 | 
31 | coder <- piecewise_linear_coder(min_seg = 1)
32 | coder("x", 1:8, c(1, 2, 3, 4, 4, 3, 2, 1))
33 | 
34 | }
35 | 


--------------------------------------------------------------------------------
/man/summarize_input.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/RcppExports.R
 3 | \name{summarize_input}
 4 | \alias{summarize_input}
 5 | \title{Summarize data (for debugging).}
 6 | \usage{
 7 | summarize_input(x, y, w, i, j, skip)
 8 | }
 9 | \arguments{
10 | \item{x}{NumericVector, expanatory variable.}
11 | 
12 | \item{y}{NumericVector, 0/1 values to fit.}
13 | 
14 | \item{w}{NumericVector, weights (required, positive).}
15 | 
16 | \item{i}{integer, first index (inclusive).}
17 | 
18 | \item{j}{integer, last index (inclusive).}
19 | 
20 | \item{skip}{integer, index to skip (-1 to not skip).}
21 | }
22 | \value{
23 | summary list
24 | }
25 | \description{
26 | Summarize data (for debugging).
27 | }
28 | \examples{
29 | 
30 | costs <- matrix(c(1.5, NA ,NA ,1 ,0 , NA, 5, -1, 1), nrow = 3)
31 | solve_interval_partition(costs, nrow(costs))
32 | 
33 | }
34 | \keyword{internal}
35 | 


--------------------------------------------------------------------------------
/man/xlin_pfits.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/RcppExports.R
 3 | \name{xlin_pfits}
 4 | \alias{xlin_pfits}
 5 | \title{xlin_pfits}
 6 | \usage{
 7 | xlin_pfits(x, y, w, i, j)
 8 | }
 9 | \arguments{
10 | \item{x}{NumericVector, explanatory variable (length>=2).}
11 | 
12 | \item{y}{NumericVector, values to fit.}
13 | 
14 | \item{w}{NumericVector, weights (positive).}
15 | 
16 | \item{i}{integer, first index (inclusive).}
17 | 
18 | \item{j}{integer, j>=i+2 last index (inclusive);}
19 | }
20 | \value{
21 | vector of predictions.
22 | }
23 | \description{
24 | Calculate out of sample linear fit predictions using pseudo-inverse.
25 | Please see: \url{https://win-vector.com/2019/01/08/a-beautiful-2-by-2-matrix-identity/}.
26 | Zero indexed.
27 | }
28 | \examples{
29 | 
30 | xlin_pfits(c(1, 2, 3, 4), c(1, 2, 2, 1), c(1, 1, 1, 1), 0, 3)
31 | 
32 | }
33 | \keyword{internal}
34 | 


--------------------------------------------------------------------------------
/man/piecewise_constant_coder.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/vtreat_coders.R
 3 | \name{piecewise_constant_coder}
 4 | \alias{piecewise_constant_coder}
 5 | \title{Piecewise constant fit coder factory.}
 6 | \usage{
 7 | piecewise_constant_coder(
 8 |   penalty = 1,
 9 |   min_n_to_chunk = 1000,
10 |   min_seg = 10,
11 |   max_k = 1000
12 | )
13 | }
14 | \arguments{
15 | \item{penalty}{per-segment cost penalty.}
16 | 
17 | \item{min_n_to_chunk}{minimum n to subdivied problem.}
18 | 
19 | \item{min_seg}{positive integer, minimum segment size.}
20 | 
21 | \item{max_k}{maximum segments to divide into.}
22 | }
23 | \value{
24 | a vtreat coder
25 | }
26 | \description{
27 | Build a piecewise constant fit coder with some parameters bound in.
28 | }
29 | \examples{
30 | 
31 | coder <- piecewise_constant_coder(min_seg = 1)
32 | coder("x", 1:8, c(-1, -1, -1, -1, 1, 1, 1, 1))
33 | 
34 | }
35 | 


--------------------------------------------------------------------------------
/man/const_costs_logistic.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/RcppExports.R
 3 | \name{const_costs_logistic}
 4 | \alias{const_costs_logistic}
 5 | \title{const_costs_logistic}
 6 | \usage{
 7 | const_costs_logistic(y, w, min_seg, indices)
 8 | }
 9 | \arguments{
10 | \item{y}{NumericVector, 0/1 values to group in order (should be in interval [0,1]).}
11 | 
12 | \item{w}{NumericVector, weights (should be positive).}
13 | 
14 | \item{min_seg}{positive integer, minimum segment size (>=1).}
15 | 
16 | \item{indices}{IntegerVector, order list of indices to pair.}
17 | }
18 | \value{
19 | xcosts NumericMatix, for j>=i xcosts(i,j) is the cost of partition element [i,...,j] (inclusive).
20 | }
21 | \description{
22 | Built matrix of interval logistic costs for held-out means.
23 | One indexed.
24 | }
25 | \examples{
26 | 
27 | const_costs_logistic(c(0.1, 0.1, 0.2, 0.2), c(1, 1, 1, 1), 1, 1:4)
28 | 
29 | }
30 | 


--------------------------------------------------------------------------------
/man/lin_cost.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/RcppExports.R
 3 | \name{lin_cost}
 4 | \alias{lin_cost}
 5 | \title{lin_cost}
 6 | \usage{
 7 | lin_cost(x, y, w, min_seg, i, j)
 8 | }
 9 | \arguments{
10 | \item{x}{NumericVector, x-coords of values to group.}
11 | 
12 | \item{y}{NumericVector, values to group in order.}
13 | 
14 | \item{w}{NumericVector, weights.}
15 | 
16 | \item{min_seg}{positive integer, minimum segment size (>=1).}
17 | 
18 | \item{i}{integer, first index (inclusive).}
19 | 
20 | \item{j}{integer, j>=i last index (inclusive);}
21 | }
22 | \value{
23 | scalar, linear cost of [i,...,j] interval (inclusive).
24 | }
25 | \description{
26 | Calculate cost of using linear model fit on points to estimate other points in the interval.
27 | Zero indexed.
28 | }
29 | \examples{
30 | 
31 | lin_cost(c(1, 2, 3, 4), c(1, 2, 2, 1), c(1, 1, 1, 1), 1, 0, 3)
32 | 
33 | }
34 | \keyword{internal}
35 | 


--------------------------------------------------------------------------------
/man/const_cost_logistic.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/RcppExports.R
 3 | \name{const_cost_logistic}
 4 | \alias{const_cost_logistic}
 5 | \title{const_cost_logistic}
 6 | \usage{
 7 | const_cost_logistic(y, w, min_seg, i, j)
 8 | }
 9 | \arguments{
10 | \item{y}{NumericVector, 0/1 values to group in order (should be in interval [0,1]).}
11 | 
12 | \item{w}{NumericVector, weights (should be positive).}
13 | 
14 | \item{min_seg}{positive integer, minimum segment size (>=1).}
15 | 
16 | \item{i}{integer, first index (inclusive).}
17 | 
18 | \item{j}{integer, j>=i last index (inclusive);}
19 | }
20 | \value{
21 | scalar, const cost of [i,...,j] interval (inclusive).
22 | }
23 | \description{
24 | Calculate logistic cost of using mean of points to estimate other points in interval.
25 | Zero indexed.
26 | }
27 | \examples{
28 | 
29 | const_cost_logistic(c(0.1, 0.1, 0.2, 0.2), c(1, 1, 1, 1), 1, 0, 3)
30 | 
31 | }
32 | \keyword{internal}
33 | 


--------------------------------------------------------------------------------
/NEWS.md:
--------------------------------------------------------------------------------
 1 | 
 2 | # RcppDynProg 0.2.1 2023/08/19
 3 | 
 4 |   * Work around https://github.com/r-lib/roxygen2/issues/1491
 5 | 
 6 | # RcppDynProg 0.2.0 2020/12/15
 7 | 
 8 |  * Check API invariants, inspired by Akila Chowdary Kolla's fuzz testing.
 9 | 
10 | # RcppDynProg 0.1.6 2020/11/13
11 | 
12 |  * Remove old test fn.
13 | 
14 | # RcppDynProg 0.1.5 2020/10/17
15 | 
16 |  * Move to tinytest.
17 | 
18 | # RcppDynProg 0.1.4 2020/08/11
19 | 
20 |  * Badges.
21 | 
22 | # RcppDynProg 0.1.3 2019/07/24
23 | 
24 |  * Adjust license.
25 | 
26 | # RcppDynProg 0.1.2 2019/03/31
27 | 
28 |  * Clean up testing pattern a bit.
29 |  * Move ggplot2 out of the vingettes as ggplot2 is breaking things with "VECTOR_ELT() can only be applied to a 'list', not a 'double'".
30 | 
31 | # RcppDynProg 0.1.1 2019/02/02
32 | 
33 |  * Add logistic scoring.
34 |  * No k-bound variation.
35 |  * Add a logistic fitter.
36 |  * Switch to RUnit testing.
37 |  * Stricter floating point.
38 |  * Introduce input_summary class.
39 | 
40 | # RcppDynProg 0.1.0 2019/01/01
41 | 
42 |  * Initial version.
43 | 
44 | 
45 | 


--------------------------------------------------------------------------------
/man/lin_costs_logistic.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/RcppExports.R
 3 | \name{lin_costs_logistic}
 4 | \alias{lin_costs_logistic}
 5 | \title{lin_costs_logistic deviance costs.}
 6 | \usage{
 7 | lin_costs_logistic(x, y, w, min_seg, indices)
 8 | }
 9 | \arguments{
10 | \item{x}{NumericVector, x-coords of values to group.}
11 | 
12 | \item{y}{NumericVector, values to group in order (should be in interval [0,1]).}
13 | 
14 | \item{w}{NumericVector, weights (should be positive).}
15 | 
16 | \item{min_seg}{positive integer, minimum segment size (>=1).}
17 | 
18 | \item{indices}{IntegerVector, ordered list of indices to pair.}
19 | }
20 | \value{
21 | xcosts NumericMatix, for j>=i xcosts(i,j) is the cost of partition element [i,...,j] (inclusive).
22 | }
23 | \description{
24 | Built matrix of interval deviance costs for held-out logistic models.
25 | Fits are evaluated in-sample.
26 | One indexed.
27 | }
28 | \examples{
29 | 
30 | lin_costs_logistic(c(1, 2, 3, 4, 5, 6, 7), c(0, 0, 1, 0, 1, 1, 0), c(1, 1, 1, 1, 1, 1, 1), 3, 1:7)
31 | 
32 | }
33 | 


--------------------------------------------------------------------------------
/man/solve_interval_partition.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/RcppExports.R
 3 | \name{solve_interval_partition}
 4 | \alias{solve_interval_partition}
 5 | \title{solve_interval_partition interval partition problem.}
 6 | \usage{
 7 | solve_interval_partition(x, kmax)
 8 | }
 9 | \arguments{
10 | \item{x}{square NumericMatix, for j>=i x(i,j) is the cost of partition element [i,...,j] (inclusive).}
11 | 
12 | \item{kmax}{int, maximum number of segments in solution.}
13 | }
14 | \value{
15 | dynamic program solution.
16 | }
17 | \description{
18 | Solve a for a minimal cost partition of the integers [1,...,nrow(x)] problem where for j>=i x(i,j).
19 | is the cost of choosing the partition element [i,...,j]. 
20 | Returned solution is an ordered vector v of length k<=kmax where: v[1]==1, v[k]==nrow(x)+1, and the 
21 | partition is of the form [v[i], v[i+1]) (intervals open on the right).
22 | }
23 | \examples{
24 | 
25 | costs <- matrix(c(1.5, NA ,NA ,1 ,0 , NA, 5, -1, 1), nrow = 3)
26 | solve_interval_partition(costs, nrow(costs))
27 | 
28 | }
29 | 


--------------------------------------------------------------------------------
/DESCRIPTION:
--------------------------------------------------------------------------------
 1 | Package: RcppDynProg
 2 | Type: Package
 3 | Title: 'Rcpp' Dynamic Programming
 4 | Version: 0.2.1
 5 | Date: 2023-08-19
 6 | Authors@R: c(
 7 |     person("John", "Mount", email = "jmount@win-vector.com", role = c("aut", "cre")),
 8 |     person("Nina", "Zumel", email = "nzumel@win-vector.com", role = c("aut")),
 9 |     person(family = "Win-Vector LLC", role = c("cph"))
10 |     )
11 | URL: https://github.com/WinVector/RcppDynProg/, https://winvector.github.io/RcppDynProg/
12 | BugReports: https://github.com/WinVector/RcppDynProg/issues
13 | Maintainer: John Mount <jmount@win-vector.com>
14 | Description: Dynamic Programming implemented in 'Rcpp'.  Includes example partition and out of sample fitting applications.  Also supplies additional custom coders for the 'vtreat' package.
15 | License: GPL-2 | GPL-3
16 | Depends:
17 |   R (>= 3.4.0)
18 | Imports:
19 |   wrapr (>= 2.0.4),
20 |   Rcpp (>= 1.0.0),
21 |   utils,
22 |   stats
23 | LinkingTo: 
24 |   Rcpp,
25 |   RcppArmadillo
26 | RoxygenNote: 7.2.3
27 | Suggests:
28 |     tinytest,
29 |     knitr,
30 |     rmarkdown
31 | VignetteBuilder: knitr
32 | 


--------------------------------------------------------------------------------
/man/solve_interval_partition_R.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/solve_dyn_R.R
 3 | \name{solve_interval_partition_R}
 4 | \alias{solve_interval_partition_R}
 5 | \title{solve_interval_partition (R version)}
 6 | \usage{
 7 | solve_interval_partition_R(x, kmax)
 8 | }
 9 | \arguments{
10 | \item{x}{NumericMatix, for j>=i x(i,j) is the cost of partition element [i,...,j] (inclusive).}
11 | 
12 | \item{kmax}{int, maximum number of steps in solution.}
13 | }
14 | \value{
15 | dynamic program solution.
16 | }
17 | \description{
18 | Solve a for a minimal cost partition of the integers [1,...,nrow(x)] problem where for j>=i x(i,j).
19 | is the cost of choosing the partition element [i,...,j]. 
20 | Returned solution is an ordered vector v of length k where: v[1]==1, v[k]==nrow(x)+1, and the 
21 | partition is of the form [v[i], v[i+1]) (intervals open on the right).
22 | }
23 | \examples{
24 | 
25 | x <- matrix(c(1,1,5,1,1,0,5,0,1), nrow=3)
26 | k <- 3
27 | solve_interval_partition_R(x, k)
28 | solve_interval_partition(x, k)
29 | 
30 | }
31 | \keyword{internal}
32 | 


--------------------------------------------------------------------------------
/man/solve_interval_partition_no_k.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/RcppExports.R
 3 | \name{solve_interval_partition_no_k}
 4 | \alias{solve_interval_partition_no_k}
 5 | \title{solve_interval_partition interval partition problem, no boun on the number of steps.}
 6 | \usage{
 7 | solve_interval_partition_no_k(x)
 8 | }
 9 | \arguments{
10 | \item{x}{square NumericMatix, for j>=i x(i,j) is the cost of partition element [i,...,j] (inclusive).}
11 | }
12 | \value{
13 | dynamic program solution.
14 | }
15 | \description{
16 | Not working yet.
17 | }
18 | \details{
19 | Solve a for a minimal cost partition of the integers [1,...,nrow(x)] problem where for j>=i x(i,j).
20 | is the cost of choosing the partition element [i,...,j]. 
21 | Returned solution is an ordered vector v of length k where: v[1]==1, v[k]==nrow(x)+1, and the 
22 | partition is of the form [v[i], v[i+1]) (intervals open on the right).
23 | }
24 | \examples{
25 | 
26 | costs <- matrix(c(1.5, NA ,NA ,1 ,0 , NA, 5, -1, 1), nrow = 3)
27 | solve_interval_partition(costs, nrow(costs))
28 | 
29 | }
30 | 


--------------------------------------------------------------------------------
/extras/sp500/segmented_Example.Rmd:
--------------------------------------------------------------------------------
 1 | ---
 2 | title: "segmented Example"
 3 | output: github_document
 4 | ---
 5 | 
 6 | ```{r r1, fig.height = 6, fig.width = 8, fig.align = "center"}
 7 | library("segmented")
 8 | library("ggplot2")
 9 | 
10 | # Data from: 
11 | # https://finance.yahoo.com/quote/%5EGSPC/history?period1=-630950400&period2=1546416000&interval=1d&filter=history&frequency=1d
12 | sp500 <- read.csv("^GSPC.csv")
13 | sp500$Date <- as.Date(sp500$Date)
14 | sp500$x <- as.numeric(sp500$Date)
15 | sp500$log_price <- log(sp500$Adj.Close)
16 | 
17 | m1 <- lm(log_price ~ x, data = sp500)
18 | # settings from help(segmented)
19 | o <- segmented.lm(m1,seg.Z=~x,psi=list(x=NA), 
20 |     control=seg.control(stop.if.error=FALSE,n.boot=0, it.max=20))
21 | sp500$spred <- exp(predict(o, newdata = sp500))
22 | 
23 | ggplot(data = sp500, aes(x = Date)) +
24 |   geom_line(aes(y=Adj.Close), color = "darkgray") +
25 |   geom_line(aes(y=spred), color = "darkgreen") +
26 |   ggtitle("segmented approximation of historic sp500 data") +
27 |   theme(legend.position = "none") +
28 |   scale_color_brewer(palette = "Dark2") +
29 |   scale_y_log10()
30 | ```


--------------------------------------------------------------------------------
/man/logistic_fits.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/RcppExports.R
 3 | \name{logistic_fits}
 4 | \alias{logistic_fits}
 5 | \title{In sample logistic predictions (in link space).}
 6 | \usage{
 7 | logistic_fits(x, y, w, i, j)
 8 | }
 9 | \arguments{
10 | \item{x}{NumericVector, expanatory variable.}
11 | 
12 | \item{y}{NumericVector, 0/1 values to fit.}
13 | 
14 | \item{w}{NumericVector, weights (required, positive).}
15 | 
16 | \item{i}{integer, first index (inclusive).}
17 | 
18 | \item{j}{integer, last index (inclusive).}
19 | }
20 | \value{
21 | vector of predictions for interval.
22 | }
23 | \description{
24 | logistic regression predictions.
25 | Zero indexed.
26 | }
27 | \examples{
28 | 
29 | set.seed(5)
30 | d <- data.frame(x = rnorm(10))
31 | d$y <- d$x + rnorm(nrow(d))>0
32 | weights <- runif(nrow(d))
33 | m <- glm(y~x, data = d, family = binomial, weights = weights)
34 | d$pred1 <- predict(m, newdata = d, type = "link")
35 | d$pred2 <- logistic_fits(d$x, d$y, weights, 0, nrow(d)-1)
36 | d <- d[order(d$x), , drop = FALSE]
37 | print(d)
38 | 
39 | }
40 | \keyword{internal}
41 | 


--------------------------------------------------------------------------------
/man/solve_interval_partition_k.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/RcppExports.R
 3 | \name{solve_interval_partition_k}
 4 | \alias{solve_interval_partition_k}
 5 | \title{solve_interval_partition interval partition problem with a bound on number of steps.}
 6 | \usage{
 7 | solve_interval_partition_k(x, kmax)
 8 | }
 9 | \arguments{
10 | \item{x}{square NumericMatix, for j>=i x(i,j) is the cost of partition element [i,...,j] (inclusive).}
11 | 
12 | \item{kmax}{int, maximum number of segments in solution.}
13 | }
14 | \value{
15 | dynamic program solution.
16 | }
17 | \description{
18 | Solve a for a minimal cost partition of the integers [1,...,nrow(x)] problem where for j>=i x(i,j).
19 | is the cost of choosing the partition element [i,...,j]. 
20 | Returned solution is an ordered vector v of length k<=kmax where: v[1]==1, v[k]==nrow(x)+1, and the 
21 | partition is of the form [v[i], v[i+1]) (intervals open on the right).
22 | }
23 | \examples{
24 | 
25 | costs <- matrix(c(1.5, NA ,NA ,1 ,0 , NA, 5, -1, 1), nrow = 3)
26 | solve_interval_partition(costs, nrow(costs))
27 | 
28 | }
29 | 


--------------------------------------------------------------------------------
/man/lin_cost_logistic.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/RcppExports.R
 3 | \name{lin_cost_logistic}
 4 | \alias{lin_cost_logistic}
 5 | \title{lin_cost_logistic logistic deviance pricing}
 6 | \usage{
 7 | lin_cost_logistic(x, y, w, min_seg, i, j)
 8 | }
 9 | \arguments{
10 | \item{x}{NumericVector, x-coords of values to group.}
11 | 
12 | \item{y}{NumericVector, values to group in order (should be in interval [0,1]).}
13 | 
14 | \item{w}{NumericVector, weights (positive).}
15 | 
16 | \item{min_seg}{positive integer, minimum segment size (>=1).}
17 | 
18 | \item{i}{integer, first index (inclusive).}
19 | 
20 | \item{j}{integer, j>=i last index (inclusive);}
21 | }
22 | \value{
23 | scalar, linear cost of [i,...,j] interval (inclusive).
24 | }
25 | \description{
26 | Calculate deviance cost of using logistic model fit on points to estimate other points in the interval.
27 | Fits are evaluated in-sample.
28 | Zero indexed.
29 | }
30 | \examples{
31 | 
32 | lin_cost_logistic(c(1, 2, 3, 4, 5, 6, 7), c(0, 0, 1, 0, 1, 1, 0), c(1, 1, 1, 1, 1, 1, 1), 3, 0, 6)
33 | 
34 | }
35 | \keyword{internal}
36 | 


--------------------------------------------------------------------------------
/NAMESPACE:
--------------------------------------------------------------------------------
 1 | # Generated by roxygen2: do not edit by hand
 2 | 
 3 | export(all_partitions)
 4 | export(const_cost)
 5 | export(const_cost_logistic)
 6 | export(const_costs)
 7 | export(const_costs_logistic)
 8 | export(lin_cost)
 9 | export(lin_cost_logistic)
10 | export(lin_costs)
11 | export(lin_costs_logistic)
12 | export(logistic_fits)
13 | export(logistic_solve1)
14 | export(piecewise_constant)
15 | export(piecewise_constant_coder)
16 | export(piecewise_linear)
17 | export(piecewise_linear_coder)
18 | export(score_solution)
19 | export(solve_for_partition)
20 | export(solve_for_partitionc)
21 | export(solve_interval_partition)
22 | export(solve_interval_partition_R)
23 | export(solve_interval_partition_k)
24 | export(solve_interval_partition_no_k)
25 | export(summarize_input)
26 | export(xlin_fits)
27 | export(xlin_fits_R)
28 | export(xlin_fits_V)
29 | export(xlin_fits_lm)
30 | export(xlin_pfits)
31 | export(xlogistic_fits)
32 | import(Rcpp)
33 | importFrom(Rcpp,evalCpp)
34 | importFrom(stats,approx)
35 | importFrom(stats,lm)
36 | importFrom(stats,predict)
37 | importFrom(utils,combn)
38 | importFrom(wrapr,stop_if_dot_args)
39 | useDynLib(RcppDynProg)
40 | 


--------------------------------------------------------------------------------
/man/xlogistic_fits.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/RcppExports.R
 3 | \name{xlogistic_fits}
 4 | \alias{xlogistic_fits}
 5 | \title{Out of sample logistic predictions (in link space).}
 6 | \usage{
 7 | xlogistic_fits(x, y, w, i, j)
 8 | }
 9 | \arguments{
10 | \item{x}{NumericVector, expanatory variable.}
11 | 
12 | \item{y}{NumericVector, 0/1 values to fit.}
13 | 
14 | \item{w}{NumericVector, weights (required, positive).}
15 | 
16 | \item{i}{integer, first index (inclusive).}
17 | 
18 | \item{j}{integer, last index (inclusive).}
19 | }
20 | \value{
21 | vector of predictions for interval.
22 | }
23 | \description{
24 | 1-hold out logistic regression predections.
25 | Zero indexed.
26 | }
27 | \examples{
28 | 
29 | set.seed(5)
30 | d <- data.frame(x = rnorm(10))
31 | d$y <- d$x + rnorm(nrow(d))>0
32 | weights <- runif(nrow(d))
33 | m <- glm(y~x, data = d, family = binomial, weights = weights)
34 | d$pred1 <- predict(m, newdata = d, type = "link")
35 | d$pred2 <- xlogistic_fits(d$x, d$y, weights, 0, nrow(d)-1)
36 | d <- d[order(d$x), , drop = FALSE]
37 | print(d)
38 | 
39 | }
40 | \keyword{internal}
41 | 


--------------------------------------------------------------------------------
/extras/sp500/segmented_Example.md:
--------------------------------------------------------------------------------
 1 | segmented Example
 2 | ================
 3 | 
 4 | ``` r
 5 | library("segmented")
 6 | library("ggplot2")
 7 | 
 8 | # Data from: 
 9 | # https://finance.yahoo.com/quote/%5EGSPC/history?period1=-630950400&period2=1546416000&interval=1d&filter=history&frequency=1d
10 | sp500 <- read.csv("^GSPC.csv")
11 | sp500$Date <- as.Date(sp500$Date)
12 | sp500$x <- as.numeric(sp500$Date)
13 | sp500$log_price <- log(sp500$Adj.Close)
14 | 
15 | m1 <- lm(log_price ~ x, data = sp500)
16 | # settings from help(segmented)
17 | o <- segmented.lm(m1,seg.Z=~x,psi=list(x=NA), 
18 |     control=seg.control(stop.if.error=FALSE,n.boot=0, it.max=20))
19 | sp500$spred <- exp(predict(o, newdata = sp500))
20 | 
21 | ggplot(data = sp500, aes(x = Date)) +
22 |   geom_line(aes(y=Adj.Close), color = "darkgray") +
23 |   geom_line(aes(y=spred), color = "darkgreen") +
24 |   ggtitle("segmented approximation of historic sp500 data") +
25 |   theme(legend.position = "none") +
26 |   scale_color_brewer(palette = "Dark2") +
27 |   scale_y_log10()
28 | ```
29 | 
30 | <img src="segmented_Example_files/figure-markdown_github/r1-1.png" style="display: block; margin: auto;" />
31 | 


--------------------------------------------------------------------------------
/man/logistic_solve1.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/RcppExports.R
 3 | \name{logistic_solve1}
 4 | \alias{logistic_solve1}
 5 | \title{logistic_fit}
 6 | \usage{
 7 | logistic_solve1(x, y, w, initial_link, i, j, skip)
 8 | }
 9 | \arguments{
10 | \item{x}{NumericVector, expanatory variable.}
11 | 
12 | \item{y}{NumericVector, 0/1 values to fit.}
13 | 
14 | \item{w}{NumericVector, weights (required, positive).}
15 | 
16 | \item{initial_link, }{initial link estimates (required, all zeroes is a good start).}
17 | 
18 | \item{i}{integer, first index (inclusive).}
19 | 
20 | \item{j}{integer, last index (inclusive).}
21 | 
22 | \item{skip}{integer, index to skip (-1 to not skip).}
23 | }
24 | \value{
25 | vector of a and b.
26 | }
27 | \description{
28 | Calculate y ~ sigmoid(a + b x) using iteratively re-weighted least squares.
29 | Zero indexed.
30 | }
31 | \examples{
32 | 
33 | set.seed(5)
34 | d <- data.frame(
35 |   x =  rnorm(10),
36 |   y = sample(c(0,1), 10, replace = TRUE)
37 | )
38 | weights <- runif(nrow(d))
39 | m <- glm(y~x, data = d, family = binomial, weights = weights)
40 | coef(m)
41 | logistic_solve1(d$x, d$y, weights, rep(0.0, nrow(d)), 0, nrow(d)-1, -1)
42 | 
43 | }
44 | \keyword{internal}
45 | 


--------------------------------------------------------------------------------
/extras/check_reverse_dependencies.Rmd:
--------------------------------------------------------------------------------
 1 | ---
 2 | title: "check_reverse_dependencies"
 3 | output: github_document
 4 | ---
 5 | 
 6 | ```{r, error=TRUE}
 7 | library("prrd")
 8 | td <- tempdir()
 9 | package = "RcppDynProg"
10 | packageVersion(package)
11 | date()
12 | 
13 | parallelCluster <- NULL
14 | # # parallel doesn't work due to https://github.com/r-lib/liteq/issues/22
15 | #ncores <- parallel::detectCores()
16 | #parallelCluster <- parallel::makeCluster(ncores)
17 | 
18 | orig_dir <- getwd()
19 | print(orig_dir)
20 | setwd(td)
21 | print(td)
22 | 
23 | options(repos = c(CRAN="https://cloud.r-project.org"))
24 | jobsdfe <- enqueueJobs(package=package, directory=td)
25 | 
26 | mk_fn <- function(package, directory) {
27 |   force(package)
28 |   force(directory)
29 |   function(i) {
30 |     library("prrd")
31 |     setwd(directory)
32 |     Sys.sleep(1*i)
33 |     dequeueJobs(package=package, directory=directory)
34 |   }
35 | }
36 | f <- mk_fn(package=package, directory=td)
37 | 
38 | if(!is.null(parallelCluster)) {
39 |   parallel::parLapply(parallelCluster, seq_len(ncores), f)
40 | } else {
41 |   f(0)
42 | }
43 | 
44 | summariseQueue(package=package, directory=td)
45 | 
46 | setwd(orig_dir)
47 | if(!is.null(parallelCluster)) {
48 |   parallel::stopCluster(parallelCluster)
49 | }
50 | 
51 | ```
52 | 
53 | 


--------------------------------------------------------------------------------
/R/xlin_fits_V.R:
--------------------------------------------------------------------------------
 1 | 
 2 | #' xlin_fits_R
 3 | #' 
 4 | #' Calculate out of sample linear fit predictions.
 5 | #' 
 6 | #' @param x NumericVector, x-coords of values to group (length>=2).
 7 | #' @param y NumericVector, values to group in order.
 8 | #' @param w NumericVector, weights (positive).
 9 | #' @return  vector of predictions.
10 | #' 
11 | #' @keywords internal
12 | #' 
13 | #' @examples
14 | #' 
15 | #' xlin_fits_V(c(1, 2, 3, 4), c(1, 2, 2, 1), c(1, 1, 1, 1))
16 | #' 
17 | #' @export
18 | #' 
19 | xlin_fits_V <- function(x, y, w) {
20 |   n = length(y)
21 |   # build fitting data
22 |   regularization = 1.0e-5
23 |   xx_0_0 = numeric(n) + sum(w*1)
24 |   xx_1_0 = numeric(n) + sum(w*x)
25 |   xx_0_1 = numeric(n) + sum(w*x)
26 |   xx_1_1 = numeric(n) + sum(w*x*x)
27 |   xy_0 = numeric(n) + sum(w*y)
28 |   xy_1 = numeric(n) + sum(w*x*y)
29 |   xx_1_0 = xx_1_0 + regularization
30 |   xx_0_1  = xx_0_1  + regularization
31 |   # pull out k'th observation
32 |   xxk_0_0 = xx_0_0 - w*1
33 |   xxk_1_0 = xx_1_0 - w*x
34 |   xxk_0_1 = xx_0_1 - w*x
35 |   xxk_1_1 = xx_1_1 - w*x*x
36 |   xyk_0 = xy_0 - w*y
37 |   xyk_1 = xy_1 - w*x*y
38 |   # solve linear system
39 |   det = xxk_0_0*xxk_1_1 - xxk_0_1*xxk_1_0
40 |   c0 = (xxk_1_1*xyk_0 - xxk_0_1*xyk_1)/det
41 |   c1 = (-xxk_1_0*xyk_0 + xxk_0_0*xyk_1)/det
42 |   # form estimate
43 |   y_est = c0 + c1*x
44 |   return(y_est)
45 | }
46 | 
47 | 


--------------------------------------------------------------------------------
/extras/xlin_fits_py.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python3
 2 | # -*- coding: utf-8 -*-
 3 | """
 4 | Created on Sun Dec 30 08:46:34 2018
 5 | 
 6 | @author: johnmount
 7 | """
 8 | 
 9 | import numpy
10 | 
11 | def xlin_fits_py(x, y, w):
12 |   """return all out of sample fits of y~a*x+b weighted by w, all values numpy arrays"""
13 |   n = len(y)
14 |   # build fitting data
15 |   regularization = 1.0e-5
16 |   xx_0_0 = numpy.zeros(n) + numpy.sum(w*1)
17 |   xx_1_0 = numpy.zeros(n) + numpy.sum(w*x)
18 |   xx_0_1 = numpy.zeros(n) + numpy.sum(w*x)
19 |   xx_1_1 = numpy.zeros(n) + numpy.sum(w*x*x)
20 |   xy_0 = numpy.zeros(n) + numpy.sum(w*y)
21 |   xy_1 = numpy.zeros(n) + numpy.sum(w*x*y)
22 |   xx_1_0 = xx_1_0 + regularization
23 |   xx_0_1  = xx_0_1  + regularization
24 |   # pull out k'th observation
25 |   xxk_0_0 = xx_0_0 - w*1
26 |   xxk_1_0 = xx_1_0 - w*x
27 |   xxk_0_1 = xx_0_1 - w*x
28 |   xxk_1_1 = xx_1_1 - w*x*x
29 |   xyk_0 = xy_0 - w*y
30 |   xyk_1 = xy_1 - w*x*y
31 |   # solve linear system
32 |   det = xxk_0_0*xxk_1_1 - xxk_0_1*xxk_1_0
33 |   c0 = (xxk_1_1*xyk_0 - xxk_0_1*xyk_1)/det
34 |   c1 = (-xxk_1_0*xyk_0 + xxk_0_0*xyk_1)/det
35 |   # form estimate
36 |   y_est = c0 + c1*x
37 |   return(y_est)
38 |   
39 | # x = numpy.asarray([1 ,2, 3, 4])
40 | # y = numpy.asarray([1, 2, 2, 1])
41 | # w = numpy.asarray([1, 1, 1, 1])
42 | # xlin_fits_py(x, y, w)
43 | ## array([2.666715  , 1.28571541, 1.28571214, 2.66666833])


--------------------------------------------------------------------------------
/extras/Timings.Rmd:
--------------------------------------------------------------------------------
 1 | ---
 2 | title: "Timings"
 3 | output: github_document
 4 | ---
 5 | 
 6 | 
 7 | 
 8 | 
 9 | ```{r}
10 | knitr::opts_chunk$set(fig.width=12, fig.height=8) 
11 | library("RcppDynProg")
12 | library("WVPlots")
13 | library("microbenchmark")
14 | library("rqdatatable")
15 | 
16 | 
17 | set.seed(2018)
18 | n <- 500
19 | x <- matrix(runif(n*n), nrow=n, ncol=n)
20 | 
21 | solve_interval_partition(x, n)
22 | 
23 | solve_interval_partition_R(x, n)
24 | 
25 | timings <- microbenchmark(
26 |   solve_interval_partition(x, n),
27 |   solve_interval_partition_R(x, n),
28 |   times = 5L)
29 | 
30 | print(timings)
31 | 
32 | p <- data.frame(timings)
33 | p$seconds <- p$time/1e+9
34 | p$method <- as.factor(p$expr)
35 | p$method <- reorder(p$method, p$seconds)
36 | 
37 | summary <- p %.>%
38 |   project(., 
39 |           mean_seconds = mean(seconds),
40 |           groupby = "method")
41 | print(summary)
42 | ratio <- max(summary$mean_seconds)/min(summary$mean_seconds)
43 | print(ratio)
44 | 
45 | WVPlots::ScatterBoxPlotH(p, 
46 |                          "seconds", "method", 
47 |                          "performance of same dynamic programming code in R and Rcpp (C++)")
48 | ```
49 | 
50 | 
51 | 
52 | 
53 | ---------------------
54 | 
55 | 
56 | Timings on a 2018 Dell XPS-13 laptop, 16 Gib RAM, LPDDR3, 2133 MT/s, Intel(R) Core(TM) i5-8250U CPU @ 1.60GHz (8 cores reported), idle, charged, and plugged into power supply. Ubuntu 18.04.1 LTS.
57 | 
58 | ```{r}
59 | R.version.string
60 | 
61 | R.version
62 | 
63 | sessionInfo()
64 | ```
65 | 
66 | 


--------------------------------------------------------------------------------
/inst/tinytest/test_scoring.R:
--------------------------------------------------------------------------------
 1 | 
 2 | test_scoring <- function() {
 3 |   set.seed(2018)
 4 |   g <- 50
 5 |   d <- data.frame(
 6 |     x = 1:(3*g)) # ordered in x
 7 |   d$y_ideal <- c(rep(0, g), rep(1, g), rep(-1, g))
 8 |   d$y_observed <- d$y_ideal + rnorm(length(d$y_ideal))
 9 |   d$w <- 1 + numeric(nrow(d))
10 |   
11 |   # expected loss of using the mean of other points to
12 |   # estimate each point
13 |   inflated_var <- function(x, penalty) {
14 |     n <- length(x)
15 |     if(n<=1) {
16 |       return(100000)
17 |     }
18 |     meanx <- mean(x)
19 |     (n/(n-1))^2*sum((x-meanx)^2) + penalty/sqrt(n)
20 |   }
21 |   
22 |   c1 <- inflated_var(d$y_observed, 0)
23 |   c2 <- const_cost(d$y_observed, d$w, 1, 0, length(d$y_observed-1)-1)
24 |   expect_true(abs(c1-c2)<1.e-6)
25 |   
26 |   y_permuted <- d$y_ideal[sample.int(nrow(d), nrow(d), replace = FALSE)]
27 |   
28 |   create_cost_matrix <- function(ycol, penalty) {
29 |     n <- length(ycol)
30 |     x <- matrix(0, nrow=n, ncol=n)
31 |     for(i in 1:n) {
32 |       x[i,i] <- 100000 # big penalty
33 |       if(i<n) {
34 |         for(j in (i+1):n) {
35 |           cost <- inflated_var(ycol[i:j], penalty)
36 |           x[i,j] <- cost
37 |           x[j,i] <- x[i,j]
38 |         }
39 |       }
40 |     }
41 |     x
42 |   }
43 |   
44 |   m1 <- create_cost_matrix(d$y_observed, 0)
45 |   indices = seq_len(length(d$y_observed))
46 |   m2 <- const_costs(d$y_observed, d$w, 1, indices)
47 |   d <- abs(m1-m2)
48 |   for(i in seq_len(nrow(d))) {
49 |     d[i,i] <- 0
50 |   }
51 |   mx <- max(d)
52 |   
53 |   expect_true(mx<=1.e-6)
54 | 
55 |   invisible(NULL)
56 | }
57 | 
58 | test_scoring()
59 | 
60 | 


--------------------------------------------------------------------------------
/R/xlin_fits_R.R:
--------------------------------------------------------------------------------
 1 | 
 2 | 
 3 | #' xlin_fits_R
 4 | #' 
 5 | #' Calculate out of sample linear fit predictions.
 6 | #' 
 7 | #' @param x NumericVector, x-coords of values to group (length>=2).
 8 | #' @param y NumericVector, values to group in order.
 9 | #' @param w NumericVector, weights (positive).
10 | #' @return  vector of predictions.
11 | #' 
12 | #' @keywords internal
13 | #' 
14 | #' @examples
15 | #' 
16 | #' xlin_fits_R(c(1, 2, 3, 4), c(1, 2, 2, 1), c(1, 1, 1, 1))
17 | #' 
18 | #' @export
19 | #' 
20 | xlin_fits_R <- function(x, y, w) {
21 |   n = length(y);
22 |   # build fitting data
23 |   regularization = 1.0e-5;
24 |   xx_0_0 = 0;
25 |   xx_1_0 = 0;
26 |   xx_0_1 = 0;
27 |   xx_1_1 = 0;
28 |   xy_0 = 0;
29 |   xy_1 = 0;
30 |   for(k in seq_len(n)) {
31 |     xx_0_0 = xx_0_0 + w[k]*1;
32 |     xx_1_0 = xx_1_0 + w[k]*x[k];
33 |     xx_0_1 = xx_0_1 + w[k]*x[k];
34 |     xx_1_1 = xx_1_1 + w[k]*x[k]*x[k];
35 |     xy_0 = xy_0 + w[k]*y[k];
36 |     xy_1 = xy_1 + w[k]*x[k]*y[k];
37 |   }
38 |   xx_1_0 = xx_1_0 + regularization;
39 |   xx_0_1  = xx_0_1  + regularization;
40 |   fits = numeric(n);
41 |   for(k in seq_len(n)) {
42 |     # pull out k'th observation
43 |     xxk_0_0 = xx_0_0 - w[k]*1;
44 |     xxk_1_0 = xx_1_0 - w[k]*x[k];
45 |     xxk_0_1 = xx_0_1 - w[k]*x[k];
46 |     xxk_1_1 = xx_1_1 - w[k]*x[k]*x[k];
47 |     xyk_0 = xy_0 - w[k]*y[k];
48 |     xyk_1 = xy_1 - w[k]*x[k]*y[k];
49 |     # solve linear system
50 |     det = xxk_0_0*xxk_1_1 - xxk_0_1*xxk_1_0;
51 |     c0 = (xxk_1_1*xyk_0 - xxk_0_1*xyk_1)/det;
52 |     c1 = (-xxk_1_0*xyk_0 + xxk_0_0*xyk_1)/det;
53 |     # form estimate
54 |     y_est = c0 + c1*x[k];
55 |     fits[k] = y_est;
56 |   }
57 |   return(fits);
58 | }
59 | 
60 | 


--------------------------------------------------------------------------------
/extras/check_reverse_dependencies.md:
--------------------------------------------------------------------------------
 1 | check_reverse_dependencies
 2 | ================
 3 | 
 4 | ``` r
 5 | library("prrd")
 6 | td <- tempdir()
 7 | package = "RcppDynProg"
 8 | packageVersion(package)
 9 | ```
10 | 
11 |     ## [1] '0.2.1'
12 | 
13 | ``` r
14 | date()
15 | ```
16 | 
17 |     ## [1] "Sat Aug 19 17:16:44 2023"
18 | 
19 | ``` r
20 | parallelCluster <- NULL
21 | # # parallel doesn't work due to https://github.com/r-lib/liteq/issues/22
22 | #ncores <- parallel::detectCores()
23 | #parallelCluster <- parallel::makeCluster(ncores)
24 | 
25 | orig_dir <- getwd()
26 | print(orig_dir)
27 | ```
28 | 
29 |     ## [1] "/Users/johnmount/Documents/work/RcppDynProg/extras"
30 | 
31 | ``` r
32 | setwd(td)
33 | print(td)
34 | ```
35 | 
36 |     ## [1] "/var/folders/7f/sdjycp_d08n8wwytsbgwqgsw0000gn/T//RtmpouQ3oU"
37 | 
38 | ``` r
39 | options(repos = c(CRAN="https://cloud.r-project.org"))
40 | jobsdfe <- enqueueJobs(package=package, directory=td)
41 | ```
42 | 
43 |     ## Error: No dependencies for RcppDynProg
44 | 
45 | ``` r
46 | mk_fn <- function(package, directory) {
47 |   force(package)
48 |   force(directory)
49 |   function(i) {
50 |     library("prrd")
51 |     setwd(directory)
52 |     Sys.sleep(1*i)
53 |     dequeueJobs(package=package, directory=directory)
54 |   }
55 | }
56 | f <- mk_fn(package=package, directory=td)
57 | 
58 | if(!is.null(parallelCluster)) {
59 |   parallel::parLapply(parallelCluster, seq_len(ncores), f)
60 | } else {
61 |   f(0)
62 | }
63 | ```
64 | 
65 |     ## Error: no such table: metadata
66 | 
67 | ``` r
68 | summariseQueue(package=package, directory=td)
69 | ```
70 | 
71 |     ## Error: no such table: metadata
72 | 
73 | ``` r
74 | setwd(orig_dir)
75 | if(!is.null(parallelCluster)) {
76 |   parallel::stopCluster(parallelCluster)
77 | }
78 | ```
79 | 


--------------------------------------------------------------------------------
/man/solve_for_partition.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/solve_for_partition.R
 3 | \name{solve_for_partition}
 4 | \alias{solve_for_partition}
 5 | \title{Solve for a piecewise linear partiton.}
 6 | \usage{
 7 | solve_for_partition(
 8 |   x,
 9 |   y,
10 |   ...,
11 |   w = NULL,
12 |   penalty = 0,
13 |   min_n_to_chunk = 1000,
14 |   min_seg = 1,
15 |   max_k = length(x)
16 | )
17 | }
18 | \arguments{
19 | \item{x}{numeric, input variable (no NAs).}
20 | 
21 | \item{y}{numeric, result variable (no NAs, same length as x).}
22 | 
23 | \item{...}{not used, force later arguments by name.}
24 | 
25 | \item{w}{numeric, weights (no NAs, positive, same length as x).}
26 | 
27 | \item{penalty}{per-segment cost penalty.}
28 | 
29 | \item{min_n_to_chunk}{minimum n to subdivied problem.}
30 | 
31 | \item{min_seg}{positive integer, minimum segment size.}
32 | 
33 | \item{max_k}{maximum segments to divide into.}
34 | }
35 | \value{
36 | a data frame appropriate for stats::approx().
37 | }
38 | \description{
39 | Solve for a good set of right-exclusive x-cuts such that the 
40 | overall graph of y~x is well-approximated by a piecewise linear
41 | function.  Solution is a ready for use with
42 | with \code{base::findInterval()} and \code{stats::approx()}
43 | (demonstrated in the examples).
44 | }
45 | \examples{
46 | 
47 | # example data
48 | d <- data.frame(
49 |   x = 1:8,
50 |   y = c(1, 2, 3, 4, 4, 3, 2, 1))
51 | 
52 | # solve for break points
53 | soln <- solve_for_partition(d$x, d$y)
54 | # show solution
55 | print(soln)
56 | 
57 | # label each point 
58 | d$group <- base::findInterval(
59 |   d$x,
60 |   soln$x[soln$what=='left'])
61 | # apply piecewise approximation
62 | d$estimate <- stats::approx(
63 |   soln$x,
64 |   soln$pred,
65 |   xout = d$x,
66 |   method = 'linear',
67 |   rule = 2)$y
68 | # show result
69 | print(d)
70 | 
71 | }
72 | 


--------------------------------------------------------------------------------
/man/solve_for_partitionc.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/solve_for_partition.R
 3 | \name{solve_for_partitionc}
 4 | \alias{solve_for_partitionc}
 5 | \title{Solve for a piecewise constant partiton.}
 6 | \usage{
 7 | solve_for_partitionc(
 8 |   x,
 9 |   y,
10 |   ...,
11 |   w = NULL,
12 |   penalty = 0,
13 |   min_n_to_chunk = 1000,
14 |   min_seg = 1,
15 |   max_k = length(x)
16 | )
17 | }
18 | \arguments{
19 | \item{x}{numeric, input variable (no NAs).}
20 | 
21 | \item{y}{numeric, result variable (no NAs, same length as x).}
22 | 
23 | \item{...}{not used, force later arguments by name.}
24 | 
25 | \item{w}{numeric, weights (no NAs, positive, same length as x).}
26 | 
27 | \item{penalty}{per-segment cost penalty.}
28 | 
29 | \item{min_n_to_chunk}{minimum n to subdivied problem.}
30 | 
31 | \item{min_seg}{positive integer, minimum segment size.}
32 | 
33 | \item{max_k}{maximum segments to divide into.}
34 | }
35 | \value{
36 | a data frame appropriate for stats::approx().
37 | }
38 | \description{
39 | Solve for a good set of right-exclusive x-cuts such that the 
40 | overall graph of y~x is well-approximated by a piecewise linear
41 | function. Solution is a ready for use with
42 | with \code{base::findInterval()} and \code{stats::approx()}
43 | (demonstrated in the examples).
44 | }
45 | \examples{
46 | 
47 | # example data
48 | d <- data.frame(
49 |   x = 1:8,
50 |   y = c(-1, -1, -1, -1, 1, 1, 1, 1))
51 | 
52 | # solve for break points
53 | soln <- solve_for_partitionc(d$x, d$y)
54 | # show solution
55 | print(soln)
56 | 
57 | # label each point
58 | d$group <- base::findInterval(
59 |   d$x,
60 |   soln$x[soln$what=='left'])
61 | # apply piecewise approximation
62 | d$estimate <- stats::approx(
63 |   soln$x,
64 |   soln$pred,
65 |   xout = d$x,
66 |   method = 'constant',
67 |   rule = 2)$y
68 | # show result
69 | print(d)
70 | 
71 | }
72 | 


--------------------------------------------------------------------------------
/docs/bootstrap-toc.css:
--------------------------------------------------------------------------------
 1 | /*!
 2 |  * Bootstrap Table of Contents v0.4.1 (http://afeld.github.io/bootstrap-toc/)
 3 |  * Copyright 2015 Aidan Feldman
 4 |  * Licensed under MIT (https://github.com/afeld/bootstrap-toc/blob/gh-pages/LICENSE.md) */
 5 | 
 6 | /* modified from https://github.com/twbs/bootstrap/blob/94b4076dd2efba9af71f0b18d4ee4b163aa9e0dd/docs/assets/css/src/docs.css#L548-L601 */
 7 | 
 8 | /* All levels of nav */
 9 | nav[data-toggle='toc'] .nav > li > a {
10 |   display: block;
11 |   padding: 4px 20px;
12 |   font-size: 13px;
13 |   font-weight: 500;
14 |   color: #767676;
15 | }
16 | nav[data-toggle='toc'] .nav > li > a:hover,
17 | nav[data-toggle='toc'] .nav > li > a:focus {
18 |   padding-left: 19px;
19 |   color: #563d7c;
20 |   text-decoration: none;
21 |   background-color: transparent;
22 |   border-left: 1px solid #563d7c;
23 | }
24 | nav[data-toggle='toc'] .nav > .active > a,
25 | nav[data-toggle='toc'] .nav > .active:hover > a,
26 | nav[data-toggle='toc'] .nav > .active:focus > a {
27 |   padding-left: 18px;
28 |   font-weight: bold;
29 |   color: #563d7c;
30 |   background-color: transparent;
31 |   border-left: 2px solid #563d7c;
32 | }
33 | 
34 | /* Nav: second level (shown on .active) */
35 | nav[data-toggle='toc'] .nav .nav {
36 |   display: none; /* Hide by default, but at >768px, show it */
37 |   padding-bottom: 10px;
38 | }
39 | nav[data-toggle='toc'] .nav .nav > li > a {
40 |   padding-top: 1px;
41 |   padding-bottom: 1px;
42 |   padding-left: 30px;
43 |   font-size: 12px;
44 |   font-weight: normal;
45 | }
46 | nav[data-toggle='toc'] .nav .nav > li > a:hover,
47 | nav[data-toggle='toc'] .nav .nav > li > a:focus {
48 |   padding-left: 29px;
49 | }
50 | nav[data-toggle='toc'] .nav .nav > .active > a,
51 | nav[data-toggle='toc'] .nav .nav > .active:hover > a,
52 | nav[data-toggle='toc'] .nav .nav > .active:focus > a {
53 |   padding-left: 28px;
54 |   font-weight: 500;
55 | }
56 | 
57 | /* from https://github.com/twbs/bootstrap/blob/e38f066d8c203c3e032da0ff23cd2d6098ee2dd6/docs/assets/css/src/docs.css#L631-L634 */
58 | nav[data-toggle='toc'] .nav > .active > ul {
59 |   display: block;
60 | }
61 | 


--------------------------------------------------------------------------------
/inst/tinytest/test_dynprog1.R:
--------------------------------------------------------------------------------
 1 | 
 2 | test_dynprog1 <- function() {
 3 |   x <- matrix(c(1,1,5,1,1,0,5,0,1), nrow=3)
 4 |   expect_true(RcppDynProg:::test_solvers(x, 3))
 5 |   
 6 |   x2 <- matrix(c(0.165439415490255, 0.982703358167782, 0.507731057703495, 
 7 |                     0.423818744486198, 0.866322142072022, 0.807251449674368, 0.0862272190861404, 
 8 |                     0.823423527413979, 0.907007380854338), nrow=3)
 9 |   expect_true(RcppDynProg:::test_solvers(x2, 3))
10 |   
11 |   x3 <- matrix(c(0.15401595, 0.04822183, 0.19091068, 0.45166874, 0.91731301, 0.65618810,
12 |                  0.35495444, 0.12153691, 0.56267106), nrow=3)
13 |   expect_true(RcppDynProg:::test_solvers(x3, 3))
14 |   
15 |   set.seed(1515)
16 |   for(i in 1:10) {
17 |     x3 <- matrix(runif(9), nrow=3)
18 |     expect_true(RcppDynProg:::test_solvers(x3, 3))
19 |   }
20 |   
21 |   set.seed(16515)
22 |   for(i in 1:10) {
23 |     x3 <- matrix(runif(16), nrow=4)
24 |     expect_true(RcppDynProg:::test_solvers(x3, 4))
25 |   }
26 |   
27 |   
28 |   
29 |   
30 |   # should be non-increasing in k
31 |   # expected loss of using the mean of other points to
32 |   # estimate each point
33 |   inflated_var <- function(x) {
34 |     meanx <- mean(x)
35 |     n <- length(x)
36 |     (n/(n-1))*sum((x-meanx)^2) # + 10
37 |   }
38 |   
39 |   set.seed(253)
40 |   g <- 10
41 |   d <- data.frame(
42 |     x = 1:(3*g),
43 |     y = c(rnorm(g), 1 + rnorm(g), -1 + rnorm(g)))
44 |   
45 |   n <- nrow(d)
46 |   x <- matrix(0, nrow=n, ncol=n)
47 |   for(i in 1:n) {
48 |     x[i,i] <- 100000 # big penalty
49 |     if(i<n) {
50 |       for(j in (i+1):n) {
51 |         cost <- inflated_var(d$y[i:j])
52 |         x[i,j] <- cost
53 |         x[j,i] <- x[i,j]
54 |       }
55 |     }
56 |   }
57 |   
58 |   slast <- NA
59 |   for(k in 1:nrow(x)) {
60 |     s1 <- solve_interval_partition_R(x, k)
61 |     sc1 <- score_solution(x, s1)
62 |     s2 <- solve_interval_partition(x, k)
63 |     sc2 <- score_solution(x, s2)
64 |     if(!(abs(sc1-sc2)<=1e-5)) {
65 |       stop("R and C++ solutions disagree")
66 |     }
67 |     if(!is.na(slast)) {
68 |       if(sc1>slast) {
69 |         stop("solution was not monotone in k")
70 |       }
71 |     }
72 |     slast <- sc1
73 |   }
74 | 
75 |   invisible(NULL)
76 | }
77 | 
78 | test_dynprog1()
79 | 
80 | 


--------------------------------------------------------------------------------
/inst/tinytest/test_logistic_cost.R:
--------------------------------------------------------------------------------
 1 | 
 2 | test_logistic_cost <- function() {
 3 |   x <- c(1, 2, 3, 4, 5, 6, 7)
 4 |   y <- c(0, 0, 1, 0, 1, 1, 0)
 5 |   w <- c(1, 1, 1, 1, 1, 1, 1)
 6 |   il <- numeric(length(x))
 7 |   
 8 |   # Think this is the issue we are seeing in Windows.
 9 |   sm3 <- summarize_input(x,y,w,0,3,-1)
10 |   expect3 <- list(max_x = 4, min_x = 1, saw_y_pos = TRUE, max_x_pos = 3, min_x_pos = 3, 
11 |                   saw_y_neg = TRUE, max_x_neg = 4, min_x_neg = 1, total_w = 4, 
12 |                   total_wy = 1, k_points = 4, saw_data = TRUE, x_varies = TRUE, 
13 |                   y_varies = TRUE, seperable = FALSE)
14 |   msg <- wrapr::map_to_char(sm3)
15 |   expect_equal(sm3, expect3, info = msg)
16 |   
17 |   for(k in wrapr::seqi(0, 4)) {
18 |     m1 <- logistic_solve1(x, y, w, il, 0, k, -1)
19 |     msg <- paste("k", k, wrapr::map_to_char(m1))
20 |     expect_true(is.numeric(m1), info = msg)
21 |     expect_equal(2, length(m1), info = msg)
22 |     expect_true(!any(is.na(m1)), info = msg)
23 |     expect_true(!any(is.nan(m1)), info = msg)
24 |     expect_true(!any(is.infinite(m1)), info = msg)
25 |     lf <- logistic_fits(x, y, w, 0, k)
26 |     msg <- paste("k", k, wrapr::map_to_char(m1), wrapr::map_to_char(lf))
27 |     expect_true(is.numeric(lf), info = msg)
28 |     expect_equal(k+1, length(lf), info = msg)
29 |     expect_true(!any(is.na(lf)), info = msg)
30 |     expect_true(!any(is.nan(lf)), info = msg)
31 |     expect_true(!any(is.infinite(lf)), info = msg)
32 |     if(k>=3) {
33 |       d <- data.frame(x = x[1:(k+1)], y  = y[1:(k+1)])
34 |       m <- glm(y~x, data=d, family = binomial)
35 |       cm <- as.numeric(coef(m))
36 |       diff1 <- max(abs(m1-cm))
37 |       msg1 <- paste("coef problem", k, diff1, 
38 |                     "RccpDynProg", wrapr::map_to_char(m1), 
39 |                     "glm", wrapr::map_to_char(cm))
40 |       expect_true(diff1<=1e-3, info = msg1)
41 |       p <- as.numeric(predict(m, newdata = d, type = "link"))
42 |       diff2 <- max(abs(lf-p))
43 |       msg2 <- paste("link problem", k, diff2, 
44 |                     "RccpDynProg", wrapr::map_to_char(lf), 
45 |                     "glm", wrapr::map_to_char(p))
46 |       expect_true(diff2<=1e-3, info = msg2)
47 |     }
48 |   }
49 | 
50 |   invisible(NULL)
51 | }
52 | 
53 | test_logistic_cost()
54 | 
55 | 


--------------------------------------------------------------------------------
/docs/docsearch.js:
--------------------------------------------------------------------------------
 1 | $(function() {
 2 | 
 3 |   // register a handler to move the focus to the search bar
 4 |   // upon pressing shift + "/" (i.e. "?")
 5 |   $(document).on('keydown', function(e) {
 6 |     if (e.shiftKey && e.keyCode == 191) {
 7 |       e.preventDefault();
 8 |       $("#search-input").focus();
 9 |     }
10 |   });
11 | 
12 |   $(document).ready(function() {
13 |     // do keyword highlighting
14 |     /* modified from https://jsfiddle.net/julmot/bL6bb5oo/ */
15 |     var mark = function() {
16 | 
17 |       var referrer = document.URL ;
18 |       var paramKey = "q" ;
19 | 
20 |       if (referrer.indexOf("?") !== -1) {
21 |         var qs = referrer.substr(referrer.indexOf('?') + 1);
22 |         var qs_noanchor = qs.split('#')[0];
23 |         var qsa = qs_noanchor.split('&');
24 |         var keyword = "";
25 | 
26 |         for (var i = 0; i < qsa.length; i++) {
27 |           var currentParam = qsa[i].split('=');
28 | 
29 |           if (currentParam.length !== 2) {
30 |             continue;
31 |           }
32 | 
33 |           if (currentParam[0] == paramKey) {
34 |             keyword = decodeURIComponent(currentParam[1].replace(/\+/g, "%20"));
35 |           }
36 |         }
37 | 
38 |         if (keyword !== "") {
39 |           $(".contents").unmark({
40 |             done: function() {
41 |               $(".contents").mark(keyword);
42 |             }
43 |           });
44 |         }
45 |       }
46 |     };
47 | 
48 |     mark();
49 |   });
50 | });
51 | 
52 | /* Search term highlighting ------------------------------*/
53 | 
54 | function matchedWords(hit) {
55 |   var words = [];
56 | 
57 |   var hierarchy = hit._highlightResult.hierarchy;
58 |   // loop to fetch from lvl0, lvl1, etc.
59 |   for (var idx in hierarchy) {
60 |     words = words.concat(hierarchy[idx].matchedWords);
61 |   }
62 | 
63 |   var content = hit._highlightResult.content;
64 |   if (content) {
65 |     words = words.concat(content.matchedWords);
66 |   }
67 | 
68 |   // return unique words
69 |   var words_uniq = [...new Set(words)];
70 |   return words_uniq;
71 | }
72 | 
73 | function updateHitURL(hit) {
74 | 
75 |   var words = matchedWords(hit);
76 |   var url = "";
77 | 
78 |   if (hit.anchor) {
79 |     url = hit.url_without_anchor + '?q=' + escape(words.join(" ")) + '#' + hit.anchor;
80 |   } else {
81 |     url = hit.url + '?q=' + escape(words.join(" "));
82 |   }
83 | 
84 |   return url;
85 | }
86 | 


--------------------------------------------------------------------------------
/extras/time_python.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "code",
  5 |    "execution_count": 1,
  6 |    "metadata": {},
  7 |    "outputs": [
  8 |     {
  9 |      "name": "stdout",
 10 |      "output_type": "stream",
 11 |      "text": [
 12 |       "[1, 109, 230, 267, 501]\n",
 13 |       "39.42312186780619\n"
 14 |      ]
 15 |     }
 16 |    ],
 17 |    "source": [
 18 |     "\n",
 19 |     "import pandas\n",
 20 |     "import timeit\n",
 21 |     "\n",
 22 |     "from DynProg import *\n",
 23 |     "\n",
 24 |     "x = pandas.read_csv('costs.csv')\n",
 25 |     "\n",
 26 |     "s1 = solve_dynamic_program(x.values, 500)\n",
 27 |     "print(s1)\n",
 28 |     "\n",
 29 |     "reps = 5\n",
 30 |     "\n",
 31 |     "start = timeit.default_timer()\n",
 32 |     "for i in range(reps):\n",
 33 |     "    solve_dynamic_program(x.values, 500)\n",
 34 |     "end = timeit.default_timer()\n",
 35 |     "\n",
 36 |     "delta = end - start\n",
 37 |     "print(delta/reps)\n"
 38 |    ]
 39 |   },
 40 |   {
 41 |    "cell_type": "markdown",
 42 |    "metadata": {},
 43 |    "source": [
 44 |     "Timings on a 2018 Dell XPS-13 laptop, 16 Gib RAM, LPDDR3, 2133 MT/s, Intel(R) Core(TM) i5-8250U CPU @ 1.60GHz (8 cores reported), idle, charged, and plugged into power supply. Ubuntu 18.04.1 LTS.\n"
 45 |    ]
 46 |   },
 47 |   {
 48 |    "cell_type": "code",
 49 |    "execution_count": 2,
 50 |    "metadata": {},
 51 |    "outputs": [
 52 |     {
 53 |      "name": "stdout",
 54 |      "output_type": "stream",
 55 |      "text": [
 56 |       "sys.version_info(major=3, minor=7, micro=0, releaselevel='final', serial=0)\n",
 57 |       "1.15.1\n",
 58 |       "0.23.4\n"
 59 |      ]
 60 |     }
 61 |    ],
 62 |    "source": [
 63 |     "import sys\n",
 64 |     "\n",
 65 |     "print(sys.version_info)\n",
 66 |     "\n",
 67 |     "print(numpy.__version__)\n",
 68 |     "print(pandas.__version__)\n"
 69 |    ]
 70 |   },
 71 |   {
 72 |    "cell_type": "code",
 73 |    "execution_count": null,
 74 |    "metadata": {},
 75 |    "outputs": [],
 76 |    "source": []
 77 |   }
 78 |  ],
 79 |  "metadata": {
 80 |   "kernelspec": {
 81 |    "display_name": "Python 3",
 82 |    "language": "python",
 83 |    "name": "python3"
 84 |   },
 85 |   "language_info": {
 86 |    "codemirror_mode": {
 87 |     "name": "ipython",
 88 |     "version": 3
 89 |    },
 90 |    "file_extension": ".py",
 91 |    "mimetype": "text/x-python",
 92 |    "name": "python",
 93 |    "nbconvert_exporter": "python",
 94 |    "pygments_lexer": "ipython3",
 95 |    "version": "3.7.0"
 96 |   }
 97 |  },
 98 |  "nbformat": 4,
 99 |  "nbformat_minor": 2
100 | }
101 | 


--------------------------------------------------------------------------------
/vignettes/SegmentationL.Rmd:
--------------------------------------------------------------------------------
 1 | ---
 2 | title: "Linear Segmentation"
 3 | author: "John Mount"
 4 | date: "`r Sys.Date()`"
 5 | output: rmarkdown::html_vignette
 6 | vignette: >
 7 |   %\VignetteIndexEntry{Linear Segmentation}
 8 |   %\VignetteEngine{knitr::rmarkdown}
 9 |   %\VignetteEncoding{UTF-8}
10 | ---
11 | 
12 | In this example we fit a piecewise linear function to example data.  
13 | Please see [here](https://github.com/WinVector/RcppDynProg) for a discussion of the methodology.
14 | 
15 | ```{r r1, fig.height = 6, fig.width = 8, fig.align = "center"}
16 | library("RcppDynProg")
17 | 
18 | 
19 | set.seed(2018)
20 | g <- 100
21 | d <- data.frame(
22 |   x = 0.05*(1:(3*g))) # ordered in x
23 | d$y_ideal <- sin((0.3*d$x)^2)
24 | d$y_observed <- d$y_ideal + 0.25*rnorm(length(d$y_ideal))
25 | 
26 | 
27 | # plot
28 | plot(d$x, d$y_observed,
29 |      xlab = "x", ylab = "y",
30 |      main = "raw data\ncircles: observed values, dashed line: unobserved true values")
31 | lines(d$x, d$y_ideal,
32 |      type = "l",
33 |      lty = "dashed")
34 | 
35 | 
36 | x_cuts <- solve_for_partition(d$x, d$y_observed, penalty = 1)
37 | print(x_cuts)
38 | 
39 | d$estimate <- approx(x_cuts$x, x_cuts$pred, xout = d$x, method = "linear", rule = 2)$y
40 | d$group <- as.character(findInterval(d$x, x_cuts[x_cuts$what=="left", "x"]))
41 | ```
42 | 
43 | ```{r r2, fig.height = 6, fig.width = 8, fig.align = "center"}
44 | print(sum((d$y_observed - d$y_ideal)^2))
45 | ```
46 | 
47 | ```{r r3, fig.height = 6, fig.width = 8, fig.align = "center"}
48 | print(sum((d$estimate - d$y_ideal)^2))
49 | ```
50 | 
51 | ```{r r4, fig.height = 6, fig.width = 8, fig.align = "center"}
52 | print(sum((d$estimate - d$y_observed)^2))
53 | ```
54 | 
55 | ```{r r5, fig.height = 6, fig.width = 8, fig.align = "center"}
56 | 
57 | # plot
58 | plot(d$x, d$y_observed,
59 |      xlab = "x", ylab = "y",
60 |      main = "RcppDynProg piecewise linear estimate\ndots: observed values, segments: estimated shape")
61 | points(d$x, d$y_ideal,
62 |      type = "l",
63 |      lty = "dashed")
64 | cmap <- c("#a6cee3",
65 |           "#1f78b4",
66 |           "#b2df8a",
67 |           "#33a02c",
68 |           "#fb9a99",
69 |           "#e31a1c",
70 |           "#fdbf6f",
71 |           "#ff7f00",
72 |           "#cab2d6",
73 |           "#6a3d9a",
74 |           "#ffff99",
75 |           "#b15928")
76 | names(cmap) <- as.character(seq_len(length(cmap)))
77 | points(d$x, d$y_observed, col = cmap[d$group], pch=19)
78 | groups <- sort(unique(d$group))
79 | for(gi in groups) {
80 |   di <- d[d$group==gi, , drop = FALSE]
81 |   lines(di$x, di$estimate, col = cmap[di$group[[1]]], lwd=2)
82 | }
83 | ```
84 | 
85 | 


--------------------------------------------------------------------------------
/src/xlin_pfits.cpp:
--------------------------------------------------------------------------------
 1 | 
 2 | #include <RcppArmadillo.h>
 3 | 
 4 | #include <math.h>
 5 | 
 6 | using Rcpp::NumericVector;
 7 | using Rcpp::NumericMatrix;
 8 | using Rcpp::IntegerVector;
 9 | 
10 | 
11 | 
12 | //' xlin_pfits
13 | //' 
14 | //' Calculate out of sample linear fit predictions using pseudo-inverse.
15 | //' Please see: \url{https://win-vector.com/2019/01/08/a-beautiful-2-by-2-matrix-identity/}.
16 | //' Zero indexed.
17 | //' 
18 | //' @param x NumericVector, explanatory variable (length>=2).
19 | //' @param y NumericVector, values to fit.
20 | //' @param w NumericVector, weights (positive).
21 | //' @param i integer, first index (inclusive).
22 | //' @param j integer, j>=i+2 last index (inclusive);
23 | //' @return  vector of predictions.
24 | //' 
25 | //' @keywords internal
26 | //' 
27 | //' @examples
28 | //' 
29 | //' xlin_pfits(c(1, 2, 3, 4), c(1, 2, 2, 1), c(1, 1, 1, 1), 0, 3)
30 | //' 
31 | //' @export
32 | // [[Rcpp::export]]
33 | NumericVector xlin_pfits(NumericVector x, NumericVector y, NumericVector w,
34 |                         const int i, const int j) {
35 |   const int vlen = x.length();
36 |   if((i<0) || (j>=vlen) || (vlen!=y.length()) || (vlen!=w.length())) {
37 |     throw std::range_error("Inadmissible value");
38 |   }
39 |   // build fitting data
40 |   double xx_0_0 = 0;
41 |   double xx_1_0 = 0;
42 |   double xx_0_1 = 0;
43 |   double xx_1_1 = 0;
44 |   double sy_0 = 0;
45 |   double xy_1 = 0;
46 |   double sum_w = 0;
47 |   for(int k=i; k<=j; ++k) {
48 |     xx_0_0 = xx_0_0 + w(k)*1;
49 |     xx_1_0 = xx_1_0 + w(k)*x(k);
50 |     xx_0_1 = xx_0_1 + w(k)*x(k);
51 |     xx_1_1 = xx_1_1 + w(k)*x(k)*x(k);
52 |     sy_0 = sy_0 + w(k)*y(k);
53 |     xy_1 = xy_1 + w(k)*x(k)*y(k);
54 |     sum_w = sum_w + w(k);
55 |   }
56 |   NumericVector fits = NumericVector(1+j-i);
57 |   for(int k=i; k<=j; ++k) {
58 |     // pull out k'th observation
59 |     const double xxk_0_0 = xx_0_0 - w(k)*1;
60 |     const double xxk_1_0 = xx_1_0 - w(k)*x(k);
61 |     const double xxk_0_1 = xx_0_1 - w(k)*x(k);
62 |     const double xxk_1_1 = xx_1_1 - w(k)*x(k)*x(k);
63 |     const double syk_0 = sy_0 - w(k)*y(k);
64 |     const double xyk_1 = xy_1 - w(k)*x(k)*y(k);
65 |     const double det = xxk_0_0*xxk_1_1 - xxk_0_1*xxk_1_0;
66 |     if(det!=0.0) {
67 |       // solve linear system and form estimate
68 |       const double c0 = (xxk_1_1*syk_0 - xxk_0_1*xyk_1)/det;
69 |       const double c1 = (-xxk_1_0*syk_0 + xxk_0_0*xyk_1)/det;
70 |       // form estimate
71 |       const double y_est = c0 + c1*x(k);
72 |       fits(k-i) = y_est;
73 |     } else {
74 |       // estimate directly from hold-out mean
75 |       fits(k-i) = syk_0/(sum_w - w(k));
76 |     }
77 |   }
78 |   return fits;
79 | }
80 | 


--------------------------------------------------------------------------------
/extras/LogisticLinkSegmentation.Rmd:
--------------------------------------------------------------------------------
 1 | ---
 2 | title: "Logistic Link Segmentation"
 3 | author: "John Mount"
 4 | date: "`r Sys.Date()`"
 5 | output: github_document
 6 | ---
 7 | 
 8 | In this example we fit a piecewise logistic-linear (linear in link-space) function to example data.  
 9 | Please see [here](https://github.com/WinVector/RcppDynProg) for a discussion of the methodology.
10 | 
11 | Logistic link segmentation does not seem to be a good idea. The small segments just don't look like the (hidden) idea distrubitions at small scale.
12 | 
13 | ```{r r1, fig.height = 6, fig.width = 8, fig.align = "center"}
14 | library("RcppDynProg")
15 | library("wrapr")
16 | 
17 | plot <- requireNamespace("ggplot2", quietly = TRUE)
18 | if(plot) {
19 |   library("ggplot2")
20 | }
21 | 
22 | sigmoid <- function(z) {
23 |   1/(1+exp(-z))
24 | }
25 | 
26 | mult <- 2
27 | set.seed(2018)
28 | g <- 200
29 | d <- data.frame(
30 |   x = 0.025*(1:(3*g))) # ordered in x
31 | d$y_ideal <- mult*sin((0.3*d$x)^2)
32 | d$y_observed <- ifelse(runif(length(d$y_ideal)) <= sigmoid(d$y_ideal), 1, 0)
33 | d$y_plot <- (1+mult)*(2*d$y_observed-1)
34 | w <- rep(1.0, nrow(d))
35 | 
36 | if(plot) {
37 |   plt1 <- ggplot(data = d, aes(x = x)) + 
38 |     geom_line(aes(y = y_ideal), linetype=2) +
39 |     geom_point(aes(y = y_plot, color = as.factor(d$y_observed)), alpha = 0.5) +
40 |     geom_smooth(aes(y = y_plot), span = 0.25, se = FALSE) +
41 |     ylab("y") +
42 |     guides(color = FALSE) +
43 |     ggtitle("raw data", 
44 |             subtitle = "dots: observed values, dashed line: unobserved true values")
45 |   print(plt1)
46 | }
47 | 
48 | costs <- lin_costs_logistic(d$x, d$y_observed, w, 40, seq_len(nrow(d)))
49 | costs <- costs + 5
50 | soln <- solve_interval_partition(costs, 20)
51 | print(soln)
52 | 
53 | preds <- numeric(nrow(d))
54 | for(i in seqi(1, length(soln)-1)) {
55 |   predsi <- logistic_fits(d$x, d$y_observed, w, soln[[i]]-1, soln[[i+1]]-2)
56 |   preds[seqi(soln[[i]], soln[[i+1]]-1)] <- predsi
57 | }
58 | d$link <- pmin(5, pmax(-5, preds))
59 | d$group <- findInterval(seq_len(nrow(d)), soln)
60 | 
61 | 
62 | if(plot) {
63 |   plt2 <- ggplot(data = d, aes(x = x)) + 
64 |     geom_line(aes(y = y_ideal), linetype=2) +
65 |     geom_line(aes(y = link, group = group), linetype=3, color="blue") +
66 |     geom_point(aes(y = y_plot, color = as.factor(d$y_observed)), alpha = 0.5) +
67 |     ylab("y") +
68 |     guides(color = FALSE) +
69 |     ggtitle("raw data", 
70 |             subtitle = "dots: observed values, cuts/curve: recovered model")
71 |   for(ci in soln) {
72 |     if((ci>1)&&(ci<nrow(d))) {
73 |       plt2 <- plt2 + geom_vline(xintercept = d$x[[ci]], color = "blue")
74 |     }
75 |   }
76 |   print(plt2)
77 | }
78 | ```
79 | 


--------------------------------------------------------------------------------
/src/xlin_fits.cpp:
--------------------------------------------------------------------------------
 1 | 
 2 | #include <RcppArmadillo.h>
 3 | 
 4 | #include <math.h>
 5 | 
 6 | using Rcpp::NumericVector;
 7 | using Rcpp::NumericMatrix;
 8 | using Rcpp::IntegerVector;
 9 | 
10 | 
11 | NumericVector xlin_fits_worker(const NumericVector &x, const NumericVector &y, 
12 |                                const NumericVector &w,
13 |                                const int i, const int j) {
14 |   const int vlen = x.length();
15 |   if((i<0) || (j>=vlen) || (vlen!=y.length()) || (vlen!=w.length())) {
16 |     throw std::range_error("Inadmissible value");
17 |   }
18 |   // build fitting data
19 |   const double regularization = 1.0e-5;
20 |   double xx_0_0 = 0;
21 |   double xx_1_0 = 0;
22 |   double xx_0_1 = 0;
23 |   double xx_1_1 = 0;
24 |   double xy_0 = 0;
25 |   double xy_1 = 0;
26 |   for(int k=i; k<=j; ++k) {
27 |     xx_0_0 = xx_0_0 + w(k)*1;
28 |     xx_1_0 = xx_1_0 + w(k)*x(k);
29 |     xx_0_1 = xx_0_1 + w(k)*x(k);
30 |     xx_1_1 = xx_1_1 + w(k)*x(k)*x(k);
31 |     xy_0 = xy_0 + w(k)*y(k);
32 |     xy_1 = xy_1 + w(k)*x(k)*y(k);
33 |   }
34 |   xx_1_0 = xx_1_0 + regularization;
35 |   xx_0_1  = xx_0_1  + regularization;
36 |   NumericVector fits = NumericVector(1+j-i);
37 |   for(int k=i; k<=j; ++k) {
38 |     // pull out k'th observation
39 |     const double xxk_0_0 = xx_0_0 - w(k)*1;
40 |     const double xxk_1_0 = xx_1_0 - w(k)*x(k);
41 |     const double xxk_0_1 = xx_0_1 - w(k)*x(k);
42 |     const double xxk_1_1 = xx_1_1 - w(k)*x(k)*x(k);
43 |     const double xyk_0 = xy_0 - w(k)*y(k);
44 |     const double xyk_1 = xy_1 - w(k)*x(k)*y(k);
45 |     // solve linear system
46 |     double y_est = 0.0;
47 |     if(xx_0_0>0.0) {
48 |       const double det = xxk_0_0*xxk_1_1 - xxk_0_1*xxk_1_0;
49 |       if(det!=0.0) {
50 |         const double c0 = (xxk_1_1*xyk_0 - xxk_0_1*xyk_1)/det;
51 |         const double c1 = (-xxk_1_0*xyk_0 + xxk_0_0*xyk_1)/det;
52 |         // form estimate
53 |         y_est = c0 + c1*x(k);
54 |       } else {
55 |         y_est = xy_0/xx_0_0;
56 |       }
57 |     }
58 |     fits(k-i) = y_est;
59 |   }
60 |   return fits;
61 | }
62 | 
63 | 
64 | 
65 | //' xlin_fits
66 | //' 
67 | //' Calculate out of sample linear fit predictions using regularization.
68 | //' Zero indexed.
69 | //' 
70 | //' @param x NumericVector, explanatory variable (length>=2).
71 | //' @param y NumericVector, values fit.
72 | //' @param w NumericVector, weights (positive).
73 | //' @param i integer, first index (inclusive).
74 | //' @param j integer, j>=i+2 last index (inclusive);
75 | //' @return  vector of predictions.
76 | //' 
77 | //' @keywords internal
78 | //' 
79 | //' @examples
80 | //' 
81 | //' xlin_fits(c(1, 2, 3, 4), c(1, 2, 2, 1), c(1, 1, 1, 1), 0, 3)
82 | //' 
83 | //' @export
84 | // [[Rcpp::export]]
85 | NumericVector xlin_fits(NumericVector x, NumericVector y, NumericVector w,
86 |                         const int i, const int j) {
87 |   return xlin_fits_worker(x, y, 
88 |                    w,
89 |                    i, j);
90 | }
91 | 


--------------------------------------------------------------------------------
/extras/Time_xlin_fits.Rmd:
--------------------------------------------------------------------------------
  1 | ---
  2 | title: "Time xlin_fits"
  3 | output: github_document
  4 | ---
  5 | 
  6 | ```{r r2, fig.height = 6, fig.width = 8, fig.align = "center"}
  7 | library("RcppDynProg")
  8 | library("WVPlots")
  9 | library("ggplot2")
 10 | library("microbenchmark")
 11 | library("rqdatatable")
 12 | 
 13 | set.seed(2018)
 14 | 
 15 | # data
 16 | mk_data <- function(g) {
 17 |   d <- data.frame(
 18 |     x = 0.05*(1:(3*g))) # ordered in x
 19 |   n <- nrow(d)
 20 |   d$y_ideal <- sin((0.3*d$x)^2)
 21 |   d$y_observed <- d$y_ideal + 0.25*rnorm(n)
 22 |   d
 23 | }
 24 | 
 25 | d <- mk_data(100)
 26 | n <- nrow(d)
 27 | w <- 1 + numeric(n)
 28 | 
 29 | 
 30 | timings <- microbenchmark(
 31 |   Rcpp = xlin_fits(d$x, d$y_observed, w, 0, n-1),
 32 |   R_lm = xlin_fits_lm(d$x, d$y_observed, w),
 33 |   R_rowwise = xlin_fits_R(d$x, d$y_observed, w),
 34 |   R_vectorized = xlin_fits_V(d$x, d$y_observed, w),
 35 |   times = 10L)
 36 | 
 37 | print(timings)
 38 | 
 39 | p <- data.frame(timings)
 40 | p$seconds <- p$time/1e+9
 41 | p$method <- as.factor(p$expr)
 42 | p$method <- reorder(p$method, p$seconds)
 43 | 
 44 | summary <- p %.>%
 45 |   project(., 
 46 |           mean_seconds = mean(seconds),
 47 |           groupby = "method")
 48 | print(summary)
 49 | 
 50 | 
 51 | WVPlots::ScatterBoxPlotH(
 52 |   p, 
 53 |   "seconds", "method", 
 54 |   paste0("performance of PRESS statistic in R and Rcpp (n=", n, ")"))
 55 | 
 56 | WVPlots::ScatterBoxPlotH(
 57 |   p, 
 58 |   "seconds", "method", 
 59 |   paste0("performance of PRESS statistic in R and Rcpp (n=", n, ")")) + 
 60 |   scale_y_log10()
 61 | 
 62 | d <- mk_data(1000000)
 63 | n <- nrow(d)
 64 | w <- 1 + numeric(n)
 65 | 
 66 | # make data available for Python version
 67 | write.csv(d, file = gzfile("xlin_data.csv.gz"), row.names = FALSE, quote = FALSE)
 68 | 
 69 | timings <- microbenchmark(
 70 |   Rcpp = xlin_fits(d$x, d$y_observed, w, 0, n-1),
 71 |   R_rowwise = xlin_fits_R(d$x, d$y_observed, w),
 72 |   R_vectorized = xlin_fits_V(d$x, d$y_observed, w),
 73 |   times = 10L)
 74 | 
 75 | print(timings)
 76 | 
 77 | p <- data.frame(timings)
 78 | p$seconds <- p$time/1e+9
 79 | p$method <- as.factor(p$expr)
 80 | p$method <- reorder(p$method, p$seconds)
 81 | 
 82 | summary <- p %.>%
 83 |   project(., 
 84 |           mean_seconds = mean(seconds),
 85 |           groupby = "method")
 86 | print(summary)
 87 | 
 88 | 
 89 | WVPlots::ScatterBoxPlotH(
 90 |   p, 
 91 |   "seconds", "method", 
 92 |   paste0("performance of PRESS statistic in R and Rcpp (n=", n, ")"))
 93 | 
 94 | WVPlots::ScatterBoxPlotH(
 95 |   p, 
 96 |   "seconds", "method", 
 97 |   paste0("performance of PRESS statistic in R and Rcpp (n=", n, ")")) + 
 98 |   scale_y_log10()
 99 | 
100 | ```
101 | 
102 | 
103 | 
104 | 
105 | ---------------------
106 | 
107 | 
108 | Timings on a 2018 Dell XPS-13 laptop, 16 Gib RAM, LPDDR3, 2133 MT/s, Intel(R) Core(TM) i5-8250U CPU @ 1.60GHz (8 cores reported), idle, charged, and plugged into power supply. Ubuntu 18.04.1 LTS.
109 | 
110 | ```{r}
111 | R.version.string
112 | 
113 | R.version
114 | 
115 | sessionInfo()
116 | ```
117 | 
118 | 
119 | 


--------------------------------------------------------------------------------
/extras/LogisticLinkSegmentation.md:
--------------------------------------------------------------------------------
 1 | Logistic Link Segmentation
 2 | ================
 3 | John Mount
 4 | 2019-02-02
 5 | 
 6 | In this example we fit a piecewise logistic-linear (linear in link-space) function to example data.
 7 | Please see [here](https://github.com/WinVector/RcppDynProg) for a discussion of the methodology.
 8 | 
 9 | Logistic link segmentation does not seem to be a good idea. The small segments just don't look like the (hidden) idea distrubitions at small scale.
10 | 
11 | ``` r
12 | library("RcppDynProg")
13 | library("wrapr")
14 | ```
15 | 
16 |     ## 
17 |     ## Attaching package: 'wrapr'
18 | 
19 |     ## The following object is masked from 'package:RcppDynProg':
20 |     ## 
21 |     ##     run_package_tests
22 | 
23 | ``` r
24 | plot <- requireNamespace("ggplot2", quietly = TRUE)
25 | if(plot) {
26 |   library("ggplot2")
27 | }
28 | 
29 | sigmoid <- function(z) {
30 |   1/(1+exp(-z))
31 | }
32 | 
33 | mult <- 2
34 | set.seed(2018)
35 | g <- 200
36 | d <- data.frame(
37 |   x = 0.025*(1:(3*g))) # ordered in x
38 | d$y_ideal <- mult*sin((0.3*d$x)^2)
39 | d$y_observed <- ifelse(runif(length(d$y_ideal)) <= sigmoid(d$y_ideal), 1, 0)
40 | d$y_plot <- (1+mult)*(2*d$y_observed-1)
41 | w <- rep(1.0, nrow(d))
42 | 
43 | if(plot) {
44 |   plt1 <- ggplot(data = d, aes(x = x)) + 
45 |     geom_line(aes(y = y_ideal), linetype=2) +
46 |     geom_point(aes(y = y_plot, color = as.factor(d$y_observed)), alpha = 0.5) +
47 |     geom_smooth(aes(y = y_plot), span = 0.25, se = FALSE) +
48 |     ylab("y") +
49 |     guides(color = FALSE) +
50 |     ggtitle("raw data", 
51 |             subtitle = "dots: observed values, dashed line: unobserved true values")
52 |   print(plt1)
53 | }
54 | ```
55 | 
56 |     ## `geom_smooth()` using method = 'loess' and formula 'y ~ x'
57 | 
58 | <img src="LogisticLinkSegmentation_files/figure-markdown_github/r1-1.png" style="display: block; margin: auto;" />
59 | 
60 | ``` r
61 | costs <- lin_costs_logistic(d$x, d$y_observed, w, 40, seq_len(nrow(d)))
62 | costs <- costs + 5
63 | soln <- solve_interval_partition(costs, 20)
64 | print(soln)
65 | ```
66 | 
67 |     ## [1]   1  59 255 315 424 471 601
68 | 
69 | ``` r
70 | preds <- numeric(nrow(d))
71 | for(i in seqi(1, length(soln)-1)) {
72 |   predsi <- logistic_fits(d$x, d$y_observed, w, soln[[i]]-1, soln[[i+1]]-2)
73 |   preds[seqi(soln[[i]], soln[[i+1]]-1)] <- predsi
74 | }
75 | d$link <- pmin(5, pmax(-5, preds))
76 | d$group <- findInterval(seq_len(nrow(d)), soln)
77 | 
78 | 
79 | if(plot) {
80 |   plt2 <- ggplot(data = d, aes(x = x)) + 
81 |     geom_line(aes(y = y_ideal), linetype=2) +
82 |     geom_line(aes(y = link, group = group), linetype=3, color="blue") +
83 |     geom_point(aes(y = y_plot, color = as.factor(d$y_observed)), alpha = 0.5) +
84 |     ylab("y") +
85 |     guides(color = FALSE) +
86 |     ggtitle("raw data", 
87 |             subtitle = "dots: observed values, cuts/curve: recovered model")
88 |   for(ci in soln) {
89 |     if((ci>1)&&(ci<nrow(d))) {
90 |       plt2 <- plt2 + geom_vline(xintercept = d$x[[ci]], color = "blue")
91 |     }
92 |   }
93 |   print(plt2)
94 | }
95 | ```
96 | 
97 | <img src="LogisticLinkSegmentation_files/figure-markdown_github/r1-2.png" style="display: block; margin: auto;" />
98 | 


--------------------------------------------------------------------------------
/src/const_costs.cpp:
--------------------------------------------------------------------------------
 1 | 
 2 | #include <RcppArmadillo.h>
 3 | 
 4 | using Rcpp::NumericVector;
 5 | using Rcpp::NumericMatrix;
 6 | using Rcpp::IntegerVector;
 7 | 
 8 | double const_cost_worker(const NumericVector &y, const NumericVector &w, 
 9 |                   const int min_seg,
10 |                   const int i, const int j) {
11 |   if(j <= (i + (min_seg-1))) {
12 |     return std::numeric_limits<double>::max();
13 |   }
14 |   const int vlen = y.length();
15 |   if((i<0) || (j>=vlen) || (vlen!=w.length()) || (min_seg<1)) {
16 |     throw std::range_error("Inadmissible value");
17 |   }
18 |   double w_ij = 0;
19 |   double sum_ij = 0;
20 |   for(int k=i; k<=j; ++k) {
21 |     sum_ij = sum_ij + y(k)*w(k);
22 |     w_ij = w_ij + w(k);
23 |   }
24 |   double sum_loss = 0.0;
25 |   for(int k=i; k<=j; ++k) {
26 |     const double mean_ijk = (sum_ij - y(k)*w(k))/(w_ij - w(k));
27 |     const double diff = y(k) - mean_ijk;
28 |     const double loss = w(k)*diff*diff;
29 |     sum_loss = sum_loss + loss;
30 |   }
31 |   return sum_loss;
32 | }
33 | 
34 | //' const_cost
35 | //' 
36 | //' Calculate out of sample total square error cost of using mean of points to estimate other points in interval.
37 | //' Zero indexed.
38 | //' 
39 | //' @param y NumericVector, values to group in order.
40 | //' @param w NumericVector, weights.
41 | //' @param min_seg positive integer, minimum segment size (>=1).
42 | //' @param i integer, first index (inclusive).
43 | //' @param j integer, j>=i last index (inclusive);
44 | //' @return scalar, const cost of [i,...,j] interval (inclusive).
45 | //' 
46 | //' @keywords internal
47 | //' 
48 | //' @examples
49 | //' 
50 | //' const_cost(c(1, 1, 2, 2), c(1, 1, 1, 1), 1, 0, 3)
51 | //' 
52 | //' @export
53 | // [[Rcpp::export]]
54 | double const_cost(NumericVector y, NumericVector w, 
55 |                   const int min_seg,
56 |                   const int i, const int j) {
57 |   return const_cost_worker(y, w, min_seg, i, j);
58 | }
59 | 
60 | //' const_costs
61 | //' 
62 | //' Built matrix of total out of sample interval square error costs for held-out means.
63 | //' One indexed.
64 | //' 
65 | //' @param y NumericVector, values to group in order.
66 | //' @param w NumericVector, weights.
67 | //' @param min_seg positive integer, minimum segment size (>=1).
68 | //' @param indices IntegerVector, order list of indices to pair.
69 | //' @return xcosts NumericMatix, for j>=i xcosts(i,j) is the cost of partition element [i,...,j] (inclusive).
70 | //' 
71 | //' 
72 | //' @examples
73 | //' 
74 | //' const_costs(c(1, 1, 2, 2), c(1, 1, 1, 1), 1, 1:4)
75 | //' 
76 | //' @export
77 | // [[Rcpp::export]]
78 | NumericMatrix const_costs(NumericVector y, NumericVector w, 
79 |                           const int min_seg,
80 |                           IntegerVector indices) {
81 |   const int vlen = y.length();
82 |   if(vlen!=w.length() || (min_seg<1)) {
83 |     throw std::range_error("Inadmissible value");
84 |   }
85 |   const int n = indices.size();
86 |   NumericMatrix xcosts = NumericMatrix(n, n);
87 |   const double single_value = std::numeric_limits<double>::max();
88 |   for(int i=0; i<n; ++i) {
89 |     xcosts(i,i) = single_value;
90 |     for(int j=i+1; j<n; ++j) {
91 |       const double sum_loss = const_cost_worker(y, w, min_seg, indices(i)-1, indices(j)-1);
92 |       xcosts(i,j) = sum_loss;
93 |       xcosts(j,i) = sum_loss;
94 |     }
95 |   }
96 |   return xcosts;
97 | }
98 | 


--------------------------------------------------------------------------------
/R/solve_dyn_R.R:
--------------------------------------------------------------------------------
  1 | 
  2 | 
  3 | #' solve_interval_partition (R version)
  4 | #' 
  5 | #' Solve a for a minimal cost partition of the integers [1,...,nrow(x)] problem where for j>=i x(i,j).
  6 | #' is the cost of choosing the partition element [i,...,j]. 
  7 | #' Returned solution is an ordered vector v of length k where: v[1]==1, v[k]==nrow(x)+1, and the 
  8 | #' partition is of the form [v[i], v[i+1]) (intervals open on the right).
  9 | #' 
 10 | #' @param x NumericMatix, for j>=i x(i,j) is the cost of partition element [i,...,j] (inclusive).
 11 | #' @param kmax int, maximum number of steps in solution.
 12 | #' @return dynamic program solution.
 13 | #' 
 14 | #' @keywords internal
 15 | #' 
 16 | #' @examples 
 17 | #' 
 18 | #' x <- matrix(c(1,1,5,1,1,0,5,0,1), nrow=3)
 19 | #' k <- 3
 20 | #' solve_interval_partition_R(x, k)
 21 | #' solve_interval_partition(x, k)
 22 | #' 
 23 | #' @export
 24 | #' 
 25 | solve_interval_partition_R <- function(x, kmax) {
 26 |   # for cleaner notation
 27 |   # solution and x will be indexed from 1 using
 28 |   # R_INDEX_DELTA
 29 |   # intermediate arrays will be padded so indexing
 30 |   # does not need to be shifted
 31 |   R_INDEX_DELTA = 0L;
 32 |   R_SIZE_PAD = 0L;
 33 |   
 34 |   # get shape of problem
 35 |   n = nrow(x);
 36 |   if(kmax>n) {
 37 |     kmax = n;
 38 |   }
 39 |   
 40 |   # get some edge-cases
 41 |   if((kmax<=1)||(n<=1)) {
 42 |     solution = integer(2);
 43 |     solution[1 + R_INDEX_DELTA] = 1;
 44 |     solution[2 + R_INDEX_DELTA] = n+1;
 45 |     return(solution);
 46 |   }
 47 |   
 48 |   # best path cost up to i (row) with exactly k-steps (column)
 49 |   path_costs = matrix(0.0, n + R_SIZE_PAD, kmax + R_SIZE_PAD);
 50 |   # how many steps we actually took
 51 |   k_actual = matrix(0L, n + R_SIZE_PAD, kmax + R_SIZE_PAD);
 52 |   # how we realized each above cost
 53 |   prev_step = matrix(0L, n + R_SIZE_PAD, kmax + R_SIZE_PAD);
 54 |   
 55 |   # fill in initial path and costs tables k = 1 case
 56 |   for(i in seqi(1, n)) {
 57 |     prev_step[i, 1] = 1L;
 58 |     path_costs[i, 1] = x[1 + R_INDEX_DELTA, i + R_INDEX_DELTA];
 59 |     k_actual[i, 1] = 1L;
 60 |   }
 61 |   # refine dynprog table
 62 |   for(ksteps in seqi(2, kmax)) {
 63 |     # compute larger paths
 64 |     for(i in seqi(1, n)) {
 65 |       # no split case
 66 |       pick = i;
 67 |       k_seen = 1;
 68 |       pick_cost = x[1 + R_INDEX_DELTA, i + R_INDEX_DELTA];
 69 |       # split cases
 70 |       for(candidate in seqi(1, i-1)) {
 71 |         cost = path_costs[candidate, ksteps-1] +
 72 |           x[candidate + 1 + R_INDEX_DELTA, i + R_INDEX_DELTA];
 73 |         k_cost = k_actual[candidate, ksteps-1] + 1;
 74 |         if((cost<=pick_cost) &&
 75 |            ((cost<pick_cost)||(k_cost<k_seen))) {
 76 |           pick = candidate;
 77 |           pick_cost = cost;
 78 |           k_seen = k_cost;
 79 |         }
 80 |       }
 81 |       path_costs[i, ksteps] = pick_cost;
 82 |       prev_step[i, ksteps] = pick;
 83 |       k_actual[i, ksteps] = k_seen;
 84 |     }
 85 |   }
 86 |   
 87 |   # now back-chain for solution
 88 |   k_opt = k_actual[n, kmax];
 89 |   solution = integer(k_opt+1);
 90 |   solution[1 + R_INDEX_DELTA] = 1L;
 91 |   solution[k_opt + 1 + R_INDEX_DELTA] = n+1L;
 92 |   i_at = n;
 93 |   k_at = k_opt;
 94 |   while(k_at>1) {
 95 |     prev_i = prev_step[i_at, k_at];
 96 |     solution[k_at + R_INDEX_DELTA] = prev_i + 1;
 97 |     i_at = prev_i;
 98 |     k_at = k_at - 1;
 99 |   }
100 |   
101 |   return(solution);
102 | }
103 | 
104 | 


--------------------------------------------------------------------------------
/docs/sitemap.xml:
--------------------------------------------------------------------------------
  1 | <?xml version="1.0" encoding="UTF-8"?>
  2 | <urlset xmlns="http://www.sitemaps.org/schemas/sitemap/0.9">
  3 |   <url>
  4 |     <loc>/404.html</loc>
  5 |   </url>
  6 |   <url>
  7 |     <loc>/LICENSE-text.html</loc>
  8 |   </url>
  9 |   <url>
 10 |     <loc>/articles/RcppDynProg.html</loc>
 11 |   </url>
 12 |   <url>
 13 |     <loc>/articles/Segmentation.html</loc>
 14 |   </url>
 15 |   <url>
 16 |     <loc>/articles/SegmentationL.html</loc>
 17 |   </url>
 18 |   <url>
 19 |     <loc>/articles/index.html</loc>
 20 |   </url>
 21 |   <url>
 22 |     <loc>/authors.html</loc>
 23 |   </url>
 24 |   <url>
 25 |     <loc>/index.html</loc>
 26 |   </url>
 27 |   <url>
 28 |     <loc>/news/index.html</loc>
 29 |   </url>
 30 |   <url>
 31 |     <loc>/reference/RcppDynProg-package.html</loc>
 32 |   </url>
 33 |   <url>
 34 |     <loc>/reference/RcppDynProg.html</loc>
 35 |   </url>
 36 |   <url>
 37 |     <loc>/reference/all_partitions.html</loc>
 38 |   </url>
 39 |   <url>
 40 |     <loc>/reference/const_cost.html</loc>
 41 |   </url>
 42 |   <url>
 43 |     <loc>/reference/const_cost_logistic.html</loc>
 44 |   </url>
 45 |   <url>
 46 |     <loc>/reference/const_costs.html</loc>
 47 |   </url>
 48 |   <url>
 49 |     <loc>/reference/const_costs_logistic.html</loc>
 50 |   </url>
 51 |   <url>
 52 |     <loc>/reference/index.html</loc>
 53 |   </url>
 54 |   <url>
 55 |     <loc>/reference/lin_cost.html</loc>
 56 |   </url>
 57 |   <url>
 58 |     <loc>/reference/lin_cost_logistic.html</loc>
 59 |   </url>
 60 |   <url>
 61 |     <loc>/reference/lin_costs.html</loc>
 62 |   </url>
 63 |   <url>
 64 |     <loc>/reference/lin_costs_logistic.html</loc>
 65 |   </url>
 66 |   <url>
 67 |     <loc>/reference/logistic_fits.html</loc>
 68 |   </url>
 69 |   <url>
 70 |     <loc>/reference/logistic_solve1.html</loc>
 71 |   </url>
 72 |   <url>
 73 |     <loc>/reference/piecewise_constant.html</loc>
 74 |   </url>
 75 |   <url>
 76 |     <loc>/reference/piecewise_constant_coder.html</loc>
 77 |   </url>
 78 |   <url>
 79 |     <loc>/reference/piecewise_linear.html</loc>
 80 |   </url>
 81 |   <url>
 82 |     <loc>/reference/piecewise_linear_coder.html</loc>
 83 |   </url>
 84 |   <url>
 85 |     <loc>/reference/run_RcppDynProg_tests.html</loc>
 86 |   </url>
 87 |   <url>
 88 |     <loc>/reference/run_package_tests.html</loc>
 89 |   </url>
 90 |   <url>
 91 |     <loc>/reference/score_solution.html</loc>
 92 |   </url>
 93 |   <url>
 94 |     <loc>/reference/solve_for_partition.html</loc>
 95 |   </url>
 96 |   <url>
 97 |     <loc>/reference/solve_for_partitionc.html</loc>
 98 |   </url>
 99 |   <url>
100 |     <loc>/reference/solve_interval_partition.html</loc>
101 |   </url>
102 |   <url>
103 |     <loc>/reference/solve_interval_partition_R.html</loc>
104 |   </url>
105 |   <url>
106 |     <loc>/reference/solve_interval_partition_k.html</loc>
107 |   </url>
108 |   <url>
109 |     <loc>/reference/solve_interval_partition_no_k.html</loc>
110 |   </url>
111 |   <url>
112 |     <loc>/reference/summarize_input.html</loc>
113 |   </url>
114 |   <url>
115 |     <loc>/reference/xlin_fits.html</loc>
116 |   </url>
117 |   <url>
118 |     <loc>/reference/xlin_fits_R.html</loc>
119 |   </url>
120 |   <url>
121 |     <loc>/reference/xlin_fits_V.html</loc>
122 |   </url>
123 |   <url>
124 |     <loc>/reference/xlin_fits_lm.html</loc>
125 |   </url>
126 |   <url>
127 |     <loc>/reference/xlin_pfits.html</loc>
128 |   </url>
129 |   <url>
130 |     <loc>/reference/xlogistic_fits.html</loc>
131 |   </url>
132 | </urlset>
133 | 


--------------------------------------------------------------------------------
/extras/DynProg.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python3
 2 | # -*- coding: utf-8 -*-
 3 | """
 4 | Created on Mon Dec 24 15:36:22 2018
 5 | 
 6 | @author: John Mount
 7 | """
 8 | 
 9 | import numpy
10 | 
11 | # import numpy
12 | # from DynProg import *
13 | # x = numpy.array([1,1,5,1,1,0,5,0,1])
14 | # x.shape = (3,3)
15 | # solve_dynamic_program(x, 3)
16 | #
17 | #' solve_dynamic_program
18 | #' 
19 | #' Solve a for a minimal cost partition of the integers [1,...,nrow(x)] problem where for j>=i x(i,j).
20 | #' is the cost of choosing the partition element [i,...,j]. 
21 | #' Returned solution is an ordered vector v of length k where: v[1]==1, v[k]==nrow(x)+1, and the 
22 | #' partition is of the form [v[i], v[i+1]) (intervals open on the right).
23 | #' 
24 | #' @param x NumericMatix, for j>=i x(i,j) is the cost of partition element [i,...,j] (inclusive).
25 | #' @param kmax int, maximum number of steps in solution. 
26 | #' @return dynamic program solution.
27 | #' 
28 | def solve_dynamic_program(x, kmax):
29 |     """x n by n inclusive interval cost array, kmax maximum number of steps to take"""
30 |     # for cleaner notation
31 |     # solution and x will be indexed from 1 using
32 |     # R_INDEX_DELTA
33 |     # intermediate arrays will be padded so indexing
34 |     # does not need to be shifted
35 |     R_INDEX_DELTA = -1
36 |     R_SIZE_PAD = 1
37 |   
38 |     # get shape of problem
39 |     n = x.shape[0]
40 |     if kmax>n:
41 |         kmax = n
42 |  
43 |     # get some edge-cases
44 |     if (kmax<=1) or (n<=1):
45 |          return [1, n+1]
46 | 
47 |     # best path cost up to i (row) with exactly k-steps (column)
48 |     path_costs = numpy.zeros((n + R_SIZE_PAD, kmax + R_SIZE_PAD))
49 |     # how many steps we actually took
50 |     k_actual = numpy.zeros((n + R_SIZE_PAD, kmax + R_SIZE_PAD))
51 |     # how we realized each above cost
52 |     prev_step = numpy.zeros((n + R_SIZE_PAD, kmax + R_SIZE_PAD))
53 |   
54 |     # fill in path and costs tables
55 |     for i in range(1, n+1):
56 |         prev_step[i, 1] = 1
57 |         path_costs[i, 1] = x[1 + R_INDEX_DELTA, i + R_INDEX_DELTA]
58 |         k_actual[i, 1] = 1
59 | 
60 |     # refine dynprog table
61 |     for ksteps in range(2, kmax+1):
62 |         # compute larger paths
63 |         for i in range(1, n+1):
64 |             # no split case
65 |             pick = i
66 |             k_seen = 1
67 |             pick_cost = x[1 + R_INDEX_DELTA, i + R_INDEX_DELTA]
68 |             # split cases
69 |             for candidate in range(1, i):
70 |                 cost = path_costs[candidate, ksteps-1] + \
71 |                     x[candidate + 1 + R_INDEX_DELTA, i + R_INDEX_DELTA]
72 |                 k_cost = k_actual[candidate, ksteps-1] + 1
73 |                 if (cost<=pick_cost) and \
74 |                     ((cost<pick_cost) or (k_cost<k_seen)):
75 |                     pick = candidate
76 |                     pick_cost = cost
77 |                     k_seen = k_cost
78 |             path_costs[i, ksteps] = pick_cost
79 |             prev_step[i, ksteps] = pick
80 |             k_actual[i, ksteps] = k_seen
81 |  
82 |     # now back-chain for solution
83 |     k_opt = int(k_actual[n, kmax])
84 |     solution = [0]*(k_opt+1)
85 |     solution[1 + R_INDEX_DELTA] = int(1)
86 |     solution[k_opt + 1 + R_INDEX_DELTA] = int(n+1)
87 |     i_at = n
88 |     k_at = k_opt
89 |     while k_at>1:
90 |         prev_i = int(prev_step[i_at, k_at])
91 |         solution[k_at + R_INDEX_DELTA] = int(prev_i + 1)
92 |         i_at = prev_i
93 |         k_at = k_at - 1
94 |     return solution
95 | 
96 | 
97 | 


--------------------------------------------------------------------------------
/src/lin_costs.cpp:
--------------------------------------------------------------------------------
  1 | 
  2 | #include <RcppArmadillo.h>
  3 | 
  4 | using Rcpp::NumericVector;
  5 | using Rcpp::NumericMatrix;
  6 | using Rcpp::IntegerVector;
  7 | 
  8 | 
  9 | NumericVector xlin_fits_worker(const NumericVector &x, const NumericVector &y, 
 10 |                                const NumericVector &w,
 11 |                                const int i, const int j);
 12 | 
 13 | 
 14 | double lin_cost_worker(const NumericVector &x, const NumericVector &y, 
 15 |                        const NumericVector &w,
 16 |                        const int min_seg,
 17 |                        const int i, const int j) {
 18 |   if(j <= (i + (min_seg-1))) {
 19 |     return std::numeric_limits<double>::max();
 20 |   }
 21 |   const int vlen = x.length();
 22 |   if((i<0) || (j>=vlen) || (vlen!=y.length()) || (vlen!=w.length()) || (min_seg<1)) {
 23 |     throw std::range_error("Inadmissible value");
 24 |   }
 25 |   NumericVector fits = xlin_fits_worker(x, y, w, i, j);
 26 |   double sum_loss = 0.0;
 27 |   for(int k=i; k<=j; ++k) {
 28 |     const double y_est = fits(k-i);
 29 |     const double diff = y(k) - y_est;
 30 |     const double loss = diff*diff;
 31 |     sum_loss = sum_loss + loss;
 32 |   }
 33 |   return sum_loss;
 34 | }
 35 | 
 36 | //' lin_cost
 37 | //' 
 38 | //' Calculate cost of using linear model fit on points to estimate other points in the interval.
 39 | //' Zero indexed.
 40 | //' 
 41 | //' @param x NumericVector, x-coords of values to group.
 42 | //' @param y NumericVector, values to group in order.
 43 | //' @param w NumericVector, weights.
 44 | //' @param min_seg positive integer, minimum segment size (>=1).
 45 | //' @param i integer, first index (inclusive).
 46 | //' @param j integer, j>=i last index (inclusive);
 47 | //' @return scalar, linear cost of [i,...,j] interval (inclusive).
 48 | //' 
 49 | //' @keywords internal
 50 | //' 
 51 | //' @examples
 52 | //' 
 53 | //' lin_cost(c(1, 2, 3, 4), c(1, 2, 2, 1), c(1, 1, 1, 1), 1, 0, 3)
 54 | //' 
 55 | //' @export
 56 | // [[Rcpp::export]]
 57 | double lin_cost(NumericVector x, NumericVector y, NumericVector w,
 58 |                 const int min_seg,
 59 |                 const int i, const int j) {
 60 |   return lin_cost_worker(x, y, w, min_seg, i, j);
 61 | }
 62 | 
 63 | //' lin_costs
 64 | //' 
 65 | //' Built matrix of interval costs for held-out linear models.
 66 | //' One indexed.
 67 | //' 
 68 | //' @param x NumericVector, x-coords of values to group.
 69 | //' @param y NumericVector, values to group in order.
 70 | //' @param w NumericVector, weights.
 71 | //' @param min_seg positive integer, minimum segment size (>=1).
 72 | //' @param indices IntegerVector, ordered list of indices to pair.
 73 | //' @return xcosts NumericMatix, for j>=i xcosts(i,j) is the cost of partition element [i,...,j] (inclusive).
 74 | //' 
 75 | //' @examples
 76 | //' 
 77 | //' lin_costs(c(1, 2, 3, 4), c(1, 2, 2, 1), c(1, 1, 1, 1), 1, 1:4)
 78 | //' 
 79 | //' @export
 80 | // [[Rcpp::export]]
 81 | NumericMatrix lin_costs(NumericVector x, NumericVector y, NumericVector w,
 82 |                         const int min_seg,
 83 |                         IntegerVector indices) {
 84 |   const int vlen = x.length();
 85 |   if((vlen!=y.length()) || (vlen!=w.length()) || (min_seg<1)) {
 86 |     throw std::range_error("Inadmissible value");
 87 |   }
 88 |   const int n = indices.size();
 89 |   NumericMatrix xcosts = NumericMatrix(n, n);
 90 |   const double single_value = std::numeric_limits<double>::max();
 91 |   for(int i=0; i<n; ++i) {
 92 |     xcosts(i,i) = single_value;
 93 |     for(int j=i+1; j<n; ++j) {
 94 |       const double sum_loss = lin_cost_worker(x, y, w, min_seg, indices(i)-1, indices(j)-1);
 95 |       xcosts(i,j) = sum_loss;
 96 |       xcosts(j,i) = sum_loss;
 97 |     }
 98 |   }
 99 |   return xcosts;
100 | }
101 | 


--------------------------------------------------------------------------------
/docs/pkgdown.js:
--------------------------------------------------------------------------------
  1 | /* http://gregfranko.com/blog/jquery-best-practices/ */
  2 | (function($) {
  3 |   $(function() {
  4 | 
  5 |     $('.navbar-fixed-top').headroom();
  6 | 
  7 |     $('body').css('padding-top', $('.navbar').height() + 10);
  8 |     $(window).resize(function(){
  9 |       $('body').css('padding-top', $('.navbar').height() + 10);
 10 |     });
 11 | 
 12 |     $('[data-toggle="tooltip"]').tooltip();
 13 | 
 14 |     var cur_path = paths(location.pathname);
 15 |     var links = $("#navbar ul li a");
 16 |     var max_length = -1;
 17 |     var pos = -1;
 18 |     for (var i = 0; i < links.length; i++) {
 19 |       if (links[i].getAttribute("href") === "#")
 20 |         continue;
 21 |       // Ignore external links
 22 |       if (links[i].host !== location.host)
 23 |         continue;
 24 | 
 25 |       var nav_path = paths(links[i].pathname);
 26 | 
 27 |       var length = prefix_length(nav_path, cur_path);
 28 |       if (length > max_length) {
 29 |         max_length = length;
 30 |         pos = i;
 31 |       }
 32 |     }
 33 | 
 34 |     // Add class to parent <li>, and enclosing <li> if in dropdown
 35 |     if (pos >= 0) {
 36 |       var menu_anchor = $(links[pos]);
 37 |       menu_anchor.parent().addClass("active");
 38 |       menu_anchor.closest("li.dropdown").addClass("active");
 39 |     }
 40 |   });
 41 | 
 42 |   function paths(pathname) {
 43 |     var pieces = pathname.split("/");
 44 |     pieces.shift(); // always starts with /
 45 | 
 46 |     var end = pieces[pieces.length - 1];
 47 |     if (end === "index.html" || end === "")
 48 |       pieces.pop();
 49 |     return(pieces);
 50 |   }
 51 | 
 52 |   // Returns -1 if not found
 53 |   function prefix_length(needle, haystack) {
 54 |     if (needle.length > haystack.length)
 55 |       return(-1);
 56 | 
 57 |     // Special case for length-0 haystack, since for loop won't run
 58 |     if (haystack.length === 0) {
 59 |       return(needle.length === 0 ? 0 : -1);
 60 |     }
 61 | 
 62 |     for (var i = 0; i < haystack.length; i++) {
 63 |       if (needle[i] != haystack[i])
 64 |         return(i);
 65 |     }
 66 | 
 67 |     return(haystack.length);
 68 |   }
 69 | 
 70 |   /* Clipboard --------------------------*/
 71 | 
 72 |   function changeTooltipMessage(element, msg) {
 73 |     var tooltipOriginalTitle=element.getAttribute('data-original-title');
 74 |     element.setAttribute('data-original-title', msg);
 75 |     $(element).tooltip('show');
 76 |     element.setAttribute('data-original-title', tooltipOriginalTitle);
 77 |   }
 78 | 
 79 |   if(ClipboardJS.isSupported()) {
 80 |     $(document).ready(function() {
 81 |       var copyButton = "<button type='button' class='btn btn-primary btn-copy-ex' type = 'submit' title='Copy to clipboard' aria-label='Copy to clipboard' data-toggle='tooltip' data-placement='left auto' data-trigger='hover' data-clipboard-copy><i class='fa fa-copy'></i></button>";
 82 | 
 83 |       $("div.sourceCode").addClass("hasCopyButton");
 84 | 
 85 |       // Insert copy buttons:
 86 |       $(copyButton).prependTo(".hasCopyButton");
 87 | 
 88 |       // Initialize tooltips:
 89 |       $('.btn-copy-ex').tooltip({container: 'body'});
 90 | 
 91 |       // Initialize clipboard:
 92 |       var clipboardBtnCopies = new ClipboardJS('[data-clipboard-copy]', {
 93 |         text: function(trigger) {
 94 |           return trigger.parentNode.textContent.replace(/\n#>[^\n]*/g, "");
 95 |         }
 96 |       });
 97 | 
 98 |       clipboardBtnCopies.on('success', function(e) {
 99 |         changeTooltipMessage(e.trigger, 'Copied!');
100 |         e.clearSelection();
101 |       });
102 | 
103 |       clipboardBtnCopies.on('error', function() {
104 |         changeTooltipMessage(e.trigger,'Press Ctrl+C or Command+C to copy');
105 |       });
106 |     });
107 |   }
108 | })(window.jQuery || window.$)
109 | 


--------------------------------------------------------------------------------
/src/const_costs_logistic.cpp:
--------------------------------------------------------------------------------
  1 | 
  2 | #include <RcppArmadillo.h>
  3 | 
  4 | #include <math.h>
  5 | 
  6 | using Rcpp::NumericVector;
  7 | using Rcpp::NumericMatrix;
  8 | using Rcpp::IntegerVector;
  9 | 
 10 | 
 11 | double const_cost_logistic_worker(const NumericVector &y, const NumericVector &w, 
 12 |                                   const int min_seg,
 13 |                                   const int i, const int j) {
 14 |   if(j <= (i + (min_seg-1))) {
 15 |     return std::numeric_limits<double>::max();
 16 |   }
 17 |   const int vlen = y.length();
 18 |   if((i<0) || (j>=vlen) || (vlen!=w.length()) || (min_seg<1)) {
 19 |     throw std::range_error("Inadmissible value");
 20 |   }
 21 |   double w_ij = 0;
 22 |   double sum_ij = 0;
 23 |   for(int k=i; k<=j; ++k) {
 24 |     sum_ij = sum_ij + y(k)*w(k);
 25 |     w_ij = w_ij + w(k);
 26 |   }
 27 |   double sum_loss = 0.0;
 28 |   for(int k=i; k<=j; ++k) {
 29 |     if(w(k)>0.0) {
 30 |       // out of sample estimate
 31 |       const double mean_ijk = (sum_ij - y(k)*w(k))/(w_ij - w(k));
 32 |       double loss = 0.0;
 33 |       if(y(k)>0.0) {
 34 |         loss = loss + y(k)*std::log(mean_ijk);
 35 |       }
 36 |       if(y(k)<1.0) {
 37 |         loss = loss + (1.0-y(k))*std::log(1.0-mean_ijk);
 38 |       }
 39 |       sum_loss = sum_loss + w(k)*loss;
 40 |     }
 41 |   }
 42 |   return sum_loss;
 43 | }
 44 | 
 45 | //' const_cost_logistic
 46 | //' 
 47 | //' Calculate logistic cost of using mean of points to estimate other points in interval.
 48 | //' Zero indexed.
 49 | //' 
 50 | //' @param y NumericVector, 0/1 values to group in order (should be in interval [0,1]).
 51 | //' @param w NumericVector, weights (should be positive).
 52 | //' @param min_seg positive integer, minimum segment size (>=1).
 53 | //' @param i integer, first index (inclusive).
 54 | //' @param j integer, j>=i last index (inclusive);
 55 | //' @return scalar, const cost of [i,...,j] interval (inclusive).
 56 | //' 
 57 | //' @keywords internal
 58 | //' 
 59 | //' @examples
 60 | //' 
 61 | //' const_cost_logistic(c(0.1, 0.1, 0.2, 0.2), c(1, 1, 1, 1), 1, 0, 3)
 62 | //' 
 63 | //' @export
 64 | // [[Rcpp::export]]
 65 | double const_cost_logistic(NumericVector y, NumericVector w, 
 66 |                            const int min_seg,
 67 |                            const int i, const int j) {
 68 |   return const_cost_logistic_worker(y, w, 
 69 |                                     min_seg,
 70 |                                     i, j);
 71 | }
 72 | 
 73 | //' const_costs_logistic
 74 | //' 
 75 | //' Built matrix of interval logistic costs for held-out means.
 76 | //' One indexed.
 77 | //' 
 78 | //' @param y NumericVector, 0/1 values to group in order (should be in interval [0,1]).
 79 | //' @param w NumericVector, weights (should be positive).
 80 | //' @param min_seg positive integer, minimum segment size (>=1).
 81 | //' @param indices IntegerVector, order list of indices to pair.
 82 | //' @return xcosts NumericMatix, for j>=i xcosts(i,j) is the cost of partition element [i,...,j] (inclusive).
 83 | //' 
 84 | //' 
 85 | //' @examples
 86 | //' 
 87 | //' const_costs_logistic(c(0.1, 0.1, 0.2, 0.2), c(1, 1, 1, 1), 1, 1:4)
 88 | //' 
 89 | //' @export
 90 | // [[Rcpp::export]]
 91 | NumericMatrix const_costs_logistic(NumericVector y, NumericVector w, 
 92 |                                    const int min_seg,
 93 |                                    IntegerVector indices) {
 94 |   const int vlen = y.length();
 95 |   if((vlen!=w.length()) || (min_seg<1)) {
 96 |     throw std::range_error("Inadmissible value");
 97 |   }  
 98 |   const int n = indices.size();
 99 |   NumericMatrix xcosts = NumericMatrix(n, n);
100 |   const double single_value = std::numeric_limits<double>::max();
101 |   for(int i=0; i<n; ++i) {
102 |     xcosts(i,i) = single_value;
103 |     for(int j=i+1; j<n; ++j) {
104 |       const double sum_loss = const_cost_logistic_worker(y, w, min_seg, indices(i)-1, indices(j)-1);
105 |       xcosts(i,j) = sum_loss;
106 |       xcosts(j,i) = sum_loss;
107 |     }
108 |   }
109 |   return xcosts;
110 | }
111 | 


--------------------------------------------------------------------------------
/src/input_summary.cpp:
--------------------------------------------------------------------------------
  1 | 
  2 | #include <RcppArmadillo.h>
  3 | 
  4 | #include <math.h>
  5 | 
  6 | using Rcpp::NumericVector;
  7 | using Rcpp::List;
  8 | 
  9 | #include "input_summary.h"
 10 | 
 11 | 
 12 | input_summary::input_summary(const NumericVector &x, const NumericVector &y,
 13 |                              const NumericVector &w,
 14 |                              const int i, const int j,
 15 |                              const int skip) {
 16 |   const int vlen = x.length();
 17 |   if((i<0) || (j>=vlen) || (vlen!=y.length()) || (vlen!=w.length())) {
 18 |     throw std::range_error("Inadmissible value");
 19 |   }
 20 |   max_x = std::numeric_limits<double>::quiet_NaN();
 21 |   min_x = std::numeric_limits<double>::quiet_NaN();
 22 |   saw_y_pos = false;
 23 |   max_x_pos = std::numeric_limits<double>::quiet_NaN();
 24 |   min_x_pos = std::numeric_limits<double>::quiet_NaN();
 25 |   saw_y_neg = false;
 26 |   max_x_neg = std::numeric_limits<double>::quiet_NaN();
 27 |   min_x_neg = std::numeric_limits<double>::quiet_NaN();
 28 |   total_w = 0.0;
 29 |   total_wy = 0.0;
 30 |   k_points = 0L;
 31 |   
 32 |   for(int k=i; k<=j; ++k) {
 33 |     if((k!=skip)&&(w(k)>0)) {
 34 |       if(k_points<=0L) {
 35 |         max_x = x(k);
 36 |         min_x = x(k);
 37 |       } else {
 38 |         max_x = std::max(max_x, x(k));
 39 |         min_x = std::min(min_x, x(k));
 40 |       }
 41 |       total_w = total_w + w(k);
 42 |       total_wy = total_wy + w(k)*y(k);
 43 |       k_points = k_points + 1L;
 44 |       if(y(k)>=0.5) {
 45 |         if(!saw_y_pos) {
 46 |           saw_y_pos = true;
 47 |           max_x_pos = x(k);
 48 |           min_x_pos = x(k);
 49 |         } else {
 50 |           max_x_pos = std::max(max_x_pos, x(k));
 51 |           min_x_pos = std::min(min_x_pos, x(k));
 52 |         }
 53 |       } else {
 54 |         if(!saw_y_neg) {
 55 |           saw_y_neg = true;
 56 |           max_x_neg = x(k);
 57 |           min_x_neg = x(k);
 58 |         } else {
 59 |           max_x_neg = std::max(max_x_neg, x(k));
 60 |           min_x_neg = std::min(min_x_neg, x(k));
 61 |         }
 62 |       }
 63 |     }
 64 |   }
 65 | }
 66 | 
 67 | bool input_summary::saw_data() const {
 68 |   return k_points>0L;
 69 | }
 70 | 
 71 | bool input_summary::x_varies() const {
 72 |   return (k_points>1L)&&(min_x<max_x);
 73 | }
 74 | 
 75 | bool input_summary::y_varies() const {
 76 |   return (k_points>1L)&&(saw_y_neg)&&(saw_y_pos);
 77 | }
 78 | 
 79 | bool input_summary::seperable() const {
 80 |   if(!y_varies()) {
 81 |     return true;
 82 |   }
 83 |   if(!x_varies()) {
 84 |     return false;
 85 |   }
 86 |   if(min_x_pos>max_x_neg) {
 87 |     return true;
 88 |   }
 89 |   if(min_x_neg>max_x_pos) {
 90 |     return true;
 91 |   }
 92 |   return false;
 93 | }
 94 | 
 95 | 
 96 | //' Summarize data (for debugging).
 97 | //' 
 98 | //' @param x NumericVector, expanatory variable.
 99 | //' @param y NumericVector, 0/1 values to fit.
100 | //' @param w NumericVector, weights (required, positive).
101 | //' @param i integer, first index (inclusive).
102 | //' @param j integer, last index (inclusive).
103 | //' @param skip integer, index to skip (-1 to not skip).
104 | //' @return summary list
105 | //' 
106 | //' @keywords internal
107 | //' 
108 | //' 
109 | //' @examples
110 | //' 
111 | //' costs <- matrix(c(1.5, NA ,NA ,1 ,0 , NA, 5, -1, 1), nrow = 3)
112 | //' solve_interval_partition(costs, nrow(costs))
113 | //'
114 | //' @export
115 | // [[Rcpp::export]]
116 | List summarize_input(NumericVector x, NumericVector y, 
117 |                      NumericVector w,
118 |                      const int i, const int j,
119 |                      const int skip) {
120 |   const input_summary isum = input_summary(x, y, w, i, j, skip);
121 |   List ret;
122 |   ret["max_x"] = isum.max_x;
123 |   ret["min_x"] = isum.min_x;
124 |   ret["saw_y_pos"] = isum.saw_y_pos;
125 |   ret["max_x_pos"] = isum.max_x_pos;
126 |   ret["min_x_pos"] = isum.min_x_pos;
127 |   ret["saw_y_neg"] = isum.saw_y_neg;
128 |   ret["max_x_neg"] = isum.max_x_neg;
129 |   ret["min_x_neg"] = isum.min_x_neg;
130 |   ret["total_w"] = isum.total_w;
131 |   ret["total_wy"] = isum.total_wy;
132 |   ret["k_points"] = isum.k_points;
133 |   ret["saw_data"] = isum.saw_data();
134 |   ret["x_varies"] = isum.x_varies();
135 |   ret["y_varies"] = isum.y_varies();
136 |   ret["seperable"] = isum.seperable();
137 |   return ret;
138 | }
139 |   


--------------------------------------------------------------------------------
/R/vtreat_coders.R:
--------------------------------------------------------------------------------
  1 | 
  2 | 
  3 | 
  4 | #' Piecewise linear fit coder factory.
  5 | #' 
  6 | #' Build a piecewise linear fit coder with some parameters bound in.
  7 | #' 
  8 | #' @param penalty per-segment cost penalty.
  9 | #' @param min_n_to_chunk minimum n to subdivied problem.
 10 | #' @param min_seg positive integer, minimum segment size.
 11 | #' @param max_k maximum segments to divide into.
 12 | #' @return a vtreat coder
 13 | #' 
 14 | #' @examples 
 15 | #' 
 16 | #' coder <- piecewise_linear_coder(min_seg = 1)
 17 | #' coder("x", 1:8, c(1, 2, 3, 4, 4, 3, 2, 1))
 18 | #' 
 19 | #' @export
 20 | #' 
 21 | piecewise_linear_coder <- function(penalty = 1,
 22 |                                    min_n_to_chunk = 1000,
 23 |                                    min_seg = 10,
 24 |                                    max_k = 1000) {
 25 |   force(penalty)
 26 |   force(min_n_to_chunk)
 27 |   force(min_seg)
 28 |   force(max_k)
 29 |   function(varName, x, y, w = NULL) {
 30 |     if(length(w)<=0) {
 31 |       w <- 1 + numeric(length(y))
 32 |     }
 33 |     x_cuts <- solve_for_partition(x, y, w = w, 
 34 |                                   penalty = penalty, 
 35 |                                   min_n_to_chunk = min_n_to_chunk,
 36 |                                   min_seg = min_seg,
 37 |                                   max_k = max_k)
 38 |     if(is.null(x_cuts) || (nrow(x_cuts)<=1)) {
 39 |       return(NULL)
 40 |     }
 41 |     estimate <- approx(x_cuts$x, x_cuts$pred, xout = x, method = "linear", rule = 2)$y
 42 |     attr(estimate, "approx_table") <- data.frame(predXs = x_cuts$x, predYs = x_cuts$pred)
 43 |     attr(estimate, "method") <- "linear"
 44 |     return(estimate)
 45 |   }
 46 | }
 47 | 
 48 | 
 49 | #' Piecewise linear fit.
 50 | #' 
 51 | #' \code{vtreat} custom coder based on \code{RcppDynProg::solve_for_partition()}.
 52 | #' 
 53 | #' @param varName character, name of variable to work on.
 54 | #' @param x numeric, input values.
 55 | #' @param y numeric, values to estimate.
 56 | #' @param w numeric, weights.
 57 | #' 
 58 | #' @examples 
 59 | #' 
 60 | #' piecewise_linear("x", 1:8, c(1, 2, 3, 4, 4, 3, 2, 1))
 61 | #' 
 62 | #' @export
 63 | #' 
 64 | piecewise_linear <- piecewise_linear_coder()
 65 | 
 66 | 
 67 | 
 68 | #' Piecewise constant fit coder factory.
 69 | #' 
 70 | #' Build a piecewise constant fit coder with some parameters bound in.
 71 | #' 
 72 | #' @param penalty per-segment cost penalty.
 73 | #' @param min_n_to_chunk minimum n to subdivied problem.
 74 | #' @param min_seg positive integer, minimum segment size.
 75 | #' @param max_k maximum segments to divide into.
 76 | #' @return a vtreat coder
 77 | #' 
 78 | #' @examples 
 79 | #' 
 80 | #' coder <- piecewise_constant_coder(min_seg = 1)
 81 | #' coder("x", 1:8, c(-1, -1, -1, -1, 1, 1, 1, 1))
 82 | #' 
 83 | #' @export
 84 | #' 
 85 | piecewise_constant_coder <- function(penalty = 1,
 86 |                                      min_n_to_chunk = 1000,
 87 |                                      min_seg = 10,
 88 |                                      max_k = 1000) {
 89 |   force(penalty)
 90 |   force(min_n_to_chunk)
 91 |   force(min_seg)
 92 |   force(max_k)
 93 |   function(varName, x, y, w = NULL) {
 94 |     if(length(w)<=0) {
 95 |       w <- 1 + numeric(length(y))
 96 |     }
 97 |     x_cuts <- solve_for_partitionc(x, y, w = w,
 98 |                                    penalty = penalty, 
 99 |                                    min_n_to_chunk = min_n_to_chunk,
100 |                                    min_seg = min_seg,
101 |                                    max_k = max_k)
102 |     if(is.null(x_cuts) || (nrow(x_cuts)<=1)) {
103 |       return(NULL)
104 |     }
105 |     estimate <- approx(x_cuts$x, x_cuts$pred, xout = x, method = "constant", rule = 2)$y
106 |     attr(estimate, "approx_table") <- data.frame(predXs = x_cuts$x, predYs = x_cuts$pred)
107 |     attr(estimate, "method") <- "constant"
108 |     return(estimate)
109 |   }
110 | }
111 | 
112 | #' Piecewise constant fit.
113 | #' 
114 | #' \code{vtreat} custom coder based on \code{RcppDynProg::solve_for_partition()}.
115 | #' 
116 | #' @param varName character, name of variable to work on.
117 | #' @param x numeric, input values.
118 | #' @param y numeric, values to estimate.
119 | #' @param w numeric, weights.
120 | #' 
121 | #' @examples 
122 | #' 
123 | #' piecewise_constant("x", 1:8, c(-1, -1, -1, -1, 1, 1, 1, 1))
124 | #' 
125 | #' @export
126 | #' 
127 | piecewise_constant <- piecewise_constant_coder()
128 | 
129 | 


--------------------------------------------------------------------------------
/R/utils.R:
--------------------------------------------------------------------------------
  1 | 
  2 | 
  3 | #' @importFrom utils combn
  4 | NULL
  5 | 
  6 | #' @importFrom wrapr stop_if_dot_args
  7 | NULL
  8 | 
  9 | 
 10 | 
 11 | #' Increasing whole-number sequence.
 12 | #'
 13 | #' Return an in increaing whole-number sequence from a to b inclusive (return integer(0) if none such). Allows for safe iteraton.
 14 | #' 
 15 | #' TODO: switch to wrapr version after next wrapr release.
 16 | #'
 17 | #' @param a scalar lower bound
 18 | #' @param b scalar upper bound
 19 | #' @return whole number sequence
 20 | #'
 21 | #' @examples
 22 | #'
 23 | #' # print 3, 4, and then 5
 24 | #' for(i in seqi(3, 5)) {
 25 | #'    print(i)
 26 | #' }
 27 | #'
 28 | #' # empty
 29 | #' for(i in seqi(5, 2)) {
 30 | #'    print(i)
 31 | #' }
 32 | #'
 33 | #' @noRd
 34 | #'
 35 | seqi <- function(a, b) {
 36 |   a = ceiling(a)
 37 |   b = floor(b)
 38 |   if(a>b) {
 39 |     return(integer(0))
 40 |   }
 41 |   base::seq(a, b, by = 1L)
 42 | }
 43 | 
 44 | 
 45 | #' Build all partitions into intervals.
 46 | #' 
 47 | #' @param n integer, sequence lenght to choose from.
 48 | #' @param kmax int, maximum number of segments in solution.
 49 | #' @return list of all partitions.
 50 | #' 
 51 | #' @examples 
 52 | #' 
 53 | #' all_partitions(4, 2)
 54 | #' 
 55 | #' @keywords internal
 56 | #' 
 57 | #' @export
 58 | #' 
 59 | all_partitions <- function(n, kmax = n) {
 60 |   # get shape of problem
 61 |   kmax <- min(kmax, n)
 62 |   syms <- seqi(2, n)
 63 |   
 64 |   res <- list(c(1, n+1))
 65 |   for(kf in seqi(1, kmax-1)) {
 66 |     ci = combn(syms, kf)
 67 |     for(j in seq_len(ncol(ci))) {
 68 |       soln <- sort(c(1, ci[, j, drop=TRUE], n+1))
 69 |       res <- c(res, list(soln))
 70 |     }
 71 |   }
 72 |   res
 73 | }
 74 | 
 75 | 
 76 | 
 77 | is.wholenumber <- function(x, tol = .Machine$double.eps^0.5)  {
 78 |   abs(x - round(x)) < tol
 79 | }
 80 | 
 81 | #' compute the price of a partition solution (and check is valid).
 82 | #' 
 83 | #' @param x NumericMatix, for j>=i x(i,j) is the cost of partition element [i,...,j] (inclusive).
 84 | #' @param solution vector of indices
 85 | #' @return price
 86 | #' 
 87 | #' @examples 
 88 | #' 
 89 | #' x <- matrix(c(1,1,5,1,1,0,5,0,1), nrow=3)
 90 | #' s <- c(1, 2, 4)
 91 | #' score_solution(x, s)
 92 | #' 
 93 | #' @export
 94 | #' 
 95 | score_solution <- function(x, solution) {
 96 |   n <- nrow(x)
 97 |   ls <- length(solution)
 98 |   if(ls<2) {
 99 |     stop("solutions must have length at least 2")
100 |   }
101 |   if(ls>(n+1)) {
102 |     stop("soltuions must have length no more than nrow(x)+1")
103 |   }
104 |   if(solution[1]!=1) {
105 |     stop("solution[1] must equal 1")
106 |   }
107 |   if(solution[ls]!=(n+1)) {
108 |     stop("solution[length(solution)] must equal nrow(x)+1")
109 |   }
110 |   if(!isTRUE(all(solution[-1]>solution[-ls]))) {
111 |     stop("solution indices must be increasing")
112 |   }
113 |   if(!isTRUE(all(is.wholenumber(solution)))) {
114 |     stop("solution must be wholenumbers")
115 |   }
116 |   score <- 0
117 |   for(i in seqi(1, ls-1)) {
118 |     score <- score + x[solution[i], solution[i+1]-1]
119 |   }
120 |   return(score)
121 | }
122 | 
123 | 
124 | 
125 | 
126 | test_solvers <- function(x, k) {
127 |   msg <- NULL
128 |   tryCatch({
129 |     sl <- all_partitions(nrow(x), k)
130 |     if(length(sl)<1) {
131 |       stop("brute force didn't return any solutions")
132 |     }
133 |     for(si in sl) {
134 |       if(!(length(si)<=(k+1))) {
135 |         stop("brute solution too long")
136 |       }
137 |     }
138 |     sc <- vapply(
139 |       sl, 
140 |       function(si) {
141 |         score_solution(x, si)
142 |       }, numeric(1))
143 |     sm <- min(sc)
144 |  
145 |     
146 |     soln1 <- solve_interval_partition_R(x, k)
147 |     score1 <- score_solution(x, soln1)
148 |     if(!(length(soln1)<=(k+1))) {
149 |       stop("soln1 too long")
150 |     }
151 |     if(!(abs(score1-sm)<=1e-5)) {
152 |       stop("R solution has wrong score")
153 |     }
154 |     
155 |     soln2 <- solve_interval_partition_k(x, k)
156 |     score2 <- score_solution(x, soln2)
157 |     if(!(length(soln2)<=(k+1))) {
158 |       stop("soln2 too long")
159 |     }
160 |     if(!(abs(score2-sm)<=1e-5)) {
161 |       stop("C++ k solution has wrong score")
162 |     }
163 |     
164 |     soln3 <- solve_interval_partition(x, k)
165 |     score3 <- score_solution(x, soln3)
166 |     if(!(length(soln3)<=(k+1))) {
167 |       stop("soln3 too long")
168 |     }
169 |     if(!(abs(score3-sm)<=1e-5)) {
170 |       stop("C++ solution has wrong score")
171 |     }
172 |     
173 |     if(k>=nrow(x)) {
174 |       soln4 <- solve_interval_partition_no_k(x)
175 |       score4 <- score_solution(x, soln4)
176 |       if(!(length(soln4)<=(k+1))) {
177 |         stop("soln4 too long")
178 |       }
179 |       if(!(abs(score4-sm)<=1e-5)) {
180 |         stop("C++ no_k solution has wrong score")
181 |       }
182 |     }
183 |   },
184 |   error = function(e) { msg <<- paste(as.character(e), sep = " ") }
185 |   )
186 |   if(!is.null(msg)) {
187 |     return(msg)
188 |   }
189 |   
190 |   return(TRUE)
191 | }
192 | 
193 | 


--------------------------------------------------------------------------------
/vignettes/Segmentation.Rmd:
--------------------------------------------------------------------------------
  1 | ---
  2 | title: "Segmentation"
  3 | author: "John Mount"
  4 | date: "`r Sys.Date()`"
  5 | output: rmarkdown::html_vignette
  6 | vignette: >
  7 |   %\VignetteIndexEntry{Segmentation}
  8 |   %\VignetteEngine{knitr::rmarkdown}
  9 |   %\VignetteEncoding{UTF-8}
 10 | ---
 11 | 
 12 | In this example we fit a piecewise constant function to example data.  
 13 | Please see [here](https://github.com/WinVector/RcppDynProg) for a discussion of the methodology.
 14 | 
 15 | ```{r r1, fig.height = 6, fig.width = 8, fig.align = "center"}
 16 | library("RcppDynProg")
 17 | 
 18 | 
 19 | set.seed(2018)
 20 | g <- 50
 21 | d <- data.frame(
 22 |   x = 1:(3*g)) # ordered in x
 23 | d$y_ideal <- c(rep(0, g), rep(1, g), rep(-1, g))
 24 | d$y_observed <- d$y_ideal + rnorm(length(d$y_ideal))
 25 | 
 26 | 
 27 | 
 28 | # plot
 29 | plot(d$x, d$y_observed,
 30 |      xlab = "x", ylab = "y",
 31 |      main = "raw data\ncircles: observed values, dashed line: unobserved true values")
 32 | lines(d$x, d$y_ideal,
 33 |      type = "l",
 34 |      lty = "dashed")
 35 | ```
 36 | 
 37 | As a heuristic, we set our regularization penalty to a value that treats permuted data (no relation between x and y) 
 38 | as a single partition.
 39 | 
 40 | ```{r}
 41 | y_permuted <- d$y_ideal[sample.int(nrow(d), nrow(d), replace = FALSE)]
 42 | 
 43 | 
 44 | solve_with_penalty <- function(ycol, penalty) {
 45 |   n <- length(ycol)
 46 |   indices = seq_len(n)
 47 |   x <- const_costs(ycol, 1+numeric(n), 1, indices)
 48 |   x <- x + penalty
 49 |   solve_interval_partition(x, n)
 50 | }
 51 | 
 52 | lb <- 1
 53 | ub <- 10
 54 | while(length(solve_with_penalty(y_permuted, ub))>2) {
 55 |   ub <- ub*2
 56 | }
 57 | while(TRUE) {
 58 |   mid <- ceiling((ub+lb)/2)
 59 |   if(mid>=ub) {
 60 |     break
 61 |   }
 62 |   si <- solve_with_penalty(y_permuted, mid)
 63 |   if(length(si)<=2) {
 64 |     ub <- mid
 65 |   } else {
 66 |     lb <- mid
 67 |   }
 68 | }
 69 | print(ub)
 70 | ```
 71 | 
 72 | We now use this penalty to segment the data. Notice we recover the actual problem structure.
 73 | 
 74 | ```{r r5, fig.height = 6, fig.width = 8, fig.align = "center"}
 75 | soln <- solve_with_penalty(d$y_observed, ub)
 76 | print(soln)
 77 | 
 78 | 
 79 | d$group <- as.character(findInterval(d$x, soln))
 80 | group_means <- tapply(d$y_observed, d$group, mean)
 81 | d$group_mean <- group_means[d$group]
 82 | d$estimate <- d$group_mean
 83 | 
 84 | print(sum((d$y_observed - d$y_ideal)^2))
 85 |   
 86 | print(sum((d$group_mean - d$y_ideal)^2))
 87 | 
 88 | # plot
 89 | d$group <- as.character(d$group)
 90 | plot(d$x, d$y_observed,
 91 |      xlab = "x", ylab = "y",
 92 |      main = "RcppDynProg piecewise linear estimate\ndots: observed values, segments: estimated shape")
 93 | points(d$x, d$y_ideal,
 94 |      type = "l",
 95 |      lty = "dashed")
 96 | cmap <- c("#a6cee3",
 97 |           "#1f78b4",
 98 |           "#b2df8a",
 99 |           "#33a02c",
100 |           "#fb9a99",
101 |           "#e31a1c",
102 |           "#fdbf6f",
103 |           "#ff7f00",
104 |           "#cab2d6",
105 |           "#6a3d9a",
106 |           "#ffff99",
107 |           "#b15928")
108 | names(cmap) <- as.character(seq_len(length(cmap)))
109 | points(d$x, d$y_observed, col = cmap[d$group], pch=19)
110 | groups <- sort(unique(d$group))
111 | for(gi in groups) {
112 |   di <- d[d$group==gi, , drop = FALSE]
113 |   lines(di$x, di$estimate, col = cmap[di$group[[1]]], lwd=2)
114 | }
115 | ```
116 | 
117 | The same solution through the more succinct `solve_for_partitionc()` interface.
118 | 
119 | ```{r r2, fig.height = 6, fig.width = 8, fig.align = "center"}
120 | # x_cuts <- solve_for_partition(d$x, d$y_observed)
121 | # sometimes a different penalty due to problem chunking
122 | x_cuts <- solve_for_partitionc(d$x, d$y_observed, penalty = ub)
123 | print(x_cuts)
124 | 
125 | d$estimate <- approx(x_cuts$x, x_cuts$pred, xout = d$x, method = "constant", rule = 2)$y
126 | d$group <- as.character(findInterval(d$x, x_cuts[x_cuts$what=="left", "x"]))
127 | 
128 | print(sum((d$y_observed - d$y_ideal)^2))
129 | 
130 | print(sum((d$estimate - d$y_ideal)^2))
131 | 
132 | print(sum((d$estimate - d$y_observed)^2))
133 | 
134 | # plot
135 | d$group <- as.character(d$group)
136 | plot(d$x, d$y_observed,
137 |      xlab = "x", ylab = "y",
138 |      main = "RcppDynProg piecewise constant estimate\ndots: observed values, segments: estimated shape")
139 | points(d$x, d$y_ideal,
140 |      type = "l",
141 |      lty = "dashed")
142 | cmap <- c("#a6cee3",
143 |           "#1f78b4",
144 |           "#b2df8a",
145 |           "#33a02c",
146 |           "#fb9a99",
147 |           "#e31a1c",
148 |           "#fdbf6f",
149 |           "#ff7f00",
150 |           "#cab2d6",
151 |           "#6a3d9a",
152 |           "#ffff99",
153 |           "#b15928")
154 | names(cmap) <- as.character(seq_len(length(cmap)))
155 | points(d$x, d$y_observed, col = cmap[d$group], pch=19)
156 | groups <- sort(unique(d$group))
157 | for(gi in groups) {
158 |   di <- d[d$group==gi, , drop = FALSE]
159 |   lines(di$x, di$estimate, col = cmap[di$group[[1]]], lwd=2)
160 | }
161 | ```
162 | 
163 | 


--------------------------------------------------------------------------------
/src/lin_costs_logistic.cpp:
--------------------------------------------------------------------------------
  1 | 
  2 | #include <RcppArmadillo.h>
  3 | 
  4 | using Rcpp::NumericVector;
  5 | using Rcpp::NumericMatrix;
  6 | using Rcpp::IntegerVector;
  7 | 
  8 | #include "input_summary.h"
  9 | 
 10 | NumericVector xlogistic_fits_worker(const NumericVector &x, const NumericVector &y, 
 11 |                                     const NumericVector &w,
 12 |                                     const int i, const int j);
 13 | 
 14 | NumericVector logistic_fits_worker(const NumericVector &x, const NumericVector &y, 
 15 |                                    const NumericVector &w,
 16 |                                    const int i, const int j);
 17 | 
 18 | 
 19 | double lin_cost_logistic_worker(const NumericVector &x, const NumericVector &y, 
 20 |                                 const NumericVector &w,
 21 |                                 const int min_seg,
 22 |                                 const int i, const int j) {
 23 |   if(j <= (i + (min_seg-1))) {
 24 |     return std::numeric_limits<double>::max();
 25 |   }
 26 |   const int vlen = x.length();
 27 |   if((i<0) || (j>=vlen) || (vlen!=y.length()) || (vlen!=w.length()) || (min_seg<1)) {
 28 |     throw std::range_error("Inadmissible value");
 29 |   }
 30 |   // look for some corner cases
 31 |   const input_summary isum = input_summary(x, y, w, i, j, -1);
 32 |   if((isum.k_points<=1L)||(!isum.y_varies())) {
 33 |     // no data or small enough for perfect fit
 34 |     return 0.0;
 35 |   }
 36 |   if(isum.seperable()) {
 37 |     return 0.0;
 38 |   }
 39 |   // // TODO: try to get out of sample calculation working.
 40 |   // NumericVector fits;
 41 |   // if((j-i)<=100) {
 42 |   //   fits = xlogistic_fits_worker(x, y, w, i, j);
 43 |   // } else {
 44 |   //   fits = logistic_fits_worker(x, y, w, i, j);
 45 |   // }
 46 |   NumericVector fits = logistic_fits_worker(x, y, w, i, j);
 47 |   double sum_loss = 0.0;
 48 |   for(int k=i; k<=j; ++k) {
 49 |     if(w(k)>0.0) {
 50 |       const double y_est = 1/(1+std::exp(-fits(k-i)));
 51 |       double loss = 0.0;
 52 |       if(y(k)>0.0) {
 53 |         loss = loss + y(k)*std::log(y_est);
 54 |       }
 55 |       if(y(k)<1.0) {
 56 |         loss = loss + (1.0-y(k))*std::log(1.0-y_est);
 57 |       }
 58 |       sum_loss = sum_loss + -w(k)*2.0*loss;
 59 |     }
 60 |   }
 61 |   return sum_loss;
 62 | }
 63 | 
 64 | //' lin_cost_logistic logistic deviance pricing
 65 | //' 
 66 | //' Calculate deviance cost of using logistic model fit on points to estimate other points in the interval.
 67 | //' Fits are evaluated in-sample.
 68 | //' Zero indexed.
 69 | //' 
 70 | //' 
 71 | //' @param x NumericVector, x-coords of values to group.
 72 | //' @param y NumericVector, values to group in order (should be in interval [0,1]).
 73 | //' @param w NumericVector, weights (positive).
 74 | //' @param min_seg positive integer, minimum segment size (>=1).
 75 | //' @param i integer, first index (inclusive).
 76 | //' @param j integer, j>=i last index (inclusive);
 77 | //' @return scalar, linear cost of [i,...,j] interval (inclusive).
 78 | //' 
 79 | //' @keywords internal
 80 | //' 
 81 | //' @examples
 82 | //' 
 83 | //' lin_cost_logistic(c(1, 2, 3, 4, 5, 6, 7), c(0, 0, 1, 0, 1, 1, 0), c(1, 1, 1, 1, 1, 1, 1), 3, 0, 6)
 84 | //' 
 85 | //' @export
 86 | // [[Rcpp::export]]
 87 | double lin_cost_logistic(NumericVector x, NumericVector y, NumericVector w,
 88 |                          const int min_seg,
 89 |                          const int i, const int j) {
 90 |   return lin_cost_logistic_worker(x, y, w,
 91 |                                   min_seg,
 92 |                                   i, j);
 93 | }
 94 | 
 95 | //' lin_costs_logistic deviance costs.
 96 | //' 
 97 | //' Built matrix of interval deviance costs for held-out logistic models.
 98 | //' Fits are evaluated in-sample.
 99 | //' One indexed.
100 | //' 
101 | //' 
102 | //' @param x NumericVector, x-coords of values to group.
103 | //' @param y NumericVector, values to group in order (should be in interval [0,1]).
104 | //' @param w NumericVector, weights (should be positive).
105 | //' @param min_seg positive integer, minimum segment size (>=1).
106 | //' @param indices IntegerVector, ordered list of indices to pair.
107 | //' @return xcosts NumericMatix, for j>=i xcosts(i,j) is the cost of partition element [i,...,j] (inclusive).
108 | //' 
109 | //' @examples
110 | //' 
111 | //' lin_costs_logistic(c(1, 2, 3, 4, 5, 6, 7), c(0, 0, 1, 0, 1, 1, 0), c(1, 1, 1, 1, 1, 1, 1), 3, 1:7)
112 | //' 
113 | //' @export
114 | // [[Rcpp::export]]
115 | NumericMatrix lin_costs_logistic(NumericVector x, NumericVector y, NumericVector w,
116 |                                  const int min_seg,
117 |                                  IntegerVector indices) {
118 |   const int vlen = x.length();
119 |   if((vlen!=y.length()) || (vlen!=w.length()) || (min_seg<1)) {
120 |     throw std::range_error("Inadmissible value");
121 |   }
122 |   const int n = indices.size();
123 |   NumericMatrix xcosts = NumericMatrix(n, n);
124 |   const double single_value = std::numeric_limits<double>::max();
125 |   for(int i=0; i<n; ++i) {
126 |     xcosts(i,i) = single_value;
127 |     for(int j=i+1; j<n; ++j) {
128 |       const double sum_loss = lin_cost_logistic_worker(x, y, w, min_seg, indices(i)-1, indices(j)-1);
129 |       xcosts(i,j) = sum_loss;
130 |       xcosts(j,i) = sum_loss;
131 |     }
132 |   }
133 |   return xcosts;
134 | }
135 | 


--------------------------------------------------------------------------------
/docs/bootstrap-toc.js:
--------------------------------------------------------------------------------
  1 | /*!
  2 |  * Bootstrap Table of Contents v0.4.1 (http://afeld.github.io/bootstrap-toc/)
  3 |  * Copyright 2015 Aidan Feldman
  4 |  * Licensed under MIT (https://github.com/afeld/bootstrap-toc/blob/gh-pages/LICENSE.md) */
  5 | (function() {
  6 |   'use strict';
  7 | 
  8 |   window.Toc = {
  9 |     helpers: {
 10 |       // return all matching elements in the set, or their descendants
 11 |       findOrFilter: function($el, selector) {
 12 |         // http://danielnouri.org/notes/2011/03/14/a-jquery-find-that-also-finds-the-root-element/
 13 |         // http://stackoverflow.com/a/12731439/358804
 14 |         var $descendants = $el.find(selector);
 15 |         return $el.filter(selector).add($descendants).filter(':not([data-toc-skip])');
 16 |       },
 17 | 
 18 |       generateUniqueIdBase: function(el) {
 19 |         var text = $(el).text();
 20 |         var anchor = text.trim().toLowerCase().replace(/[^A-Za-z0-9]+/g, '-');
 21 |         return anchor || el.tagName.toLowerCase();
 22 |       },
 23 | 
 24 |       generateUniqueId: function(el) {
 25 |         var anchorBase = this.generateUniqueIdBase(el);
 26 |         for (var i = 0; ; i++) {
 27 |           var anchor = anchorBase;
 28 |           if (i > 0) {
 29 |             // add suffix
 30 |             anchor += '-' + i;
 31 |           }
 32 |           // check if ID already exists
 33 |           if (!document.getElementById(anchor)) {
 34 |             return anchor;
 35 |           }
 36 |         }
 37 |       },
 38 | 
 39 |       generateAnchor: function(el) {
 40 |         if (el.id) {
 41 |           return el.id;
 42 |         } else {
 43 |           var anchor = this.generateUniqueId(el);
 44 |           el.id = anchor;
 45 |           return anchor;
 46 |         }
 47 |       },
 48 | 
 49 |       createNavList: function() {
 50 |         return $('<ul class="nav"></ul>');
 51 |       },
 52 | 
 53 |       createChildNavList: function($parent) {
 54 |         var $childList = this.createNavList();
 55 |         $parent.append($childList);
 56 |         return $childList;
 57 |       },
 58 | 
 59 |       generateNavEl: function(anchor, text) {
 60 |         var $a = $('<a></a>');
 61 |         $a.attr('href', '#' + anchor);
 62 |         $a.text(text);
 63 |         var $li = $('<li></li>');
 64 |         $li.append($a);
 65 |         return $li;
 66 |       },
 67 | 
 68 |       generateNavItem: function(headingEl) {
 69 |         var anchor = this.generateAnchor(headingEl);
 70 |         var $heading = $(headingEl);
 71 |         var text = $heading.data('toc-text') || $heading.text();
 72 |         return this.generateNavEl(anchor, text);
 73 |       },
 74 | 
 75 |       // Find the first heading level (`<h1>`, then `<h2>`, etc.) that has more than one element. Defaults to 1 (for `<h1>`).
 76 |       getTopLevel: function($scope) {
 77 |         for (var i = 1; i <= 6; i++) {
 78 |           var $headings = this.findOrFilter($scope, 'h' + i);
 79 |           if ($headings.length > 1) {
 80 |             return i;
 81 |           }
 82 |         }
 83 | 
 84 |         return 1;
 85 |       },
 86 | 
 87 |       // returns the elements for the top level, and the next below it
 88 |       getHeadings: function($scope, topLevel) {
 89 |         var topSelector = 'h' + topLevel;
 90 | 
 91 |         var secondaryLevel = topLevel + 1;
 92 |         var secondarySelector = 'h' + secondaryLevel;
 93 | 
 94 |         return this.findOrFilter($scope, topSelector + ',' + secondarySelector);
 95 |       },
 96 | 
 97 |       getNavLevel: function(el) {
 98 |         return parseInt(el.tagName.charAt(1), 10);
 99 |       },
100 | 
101 |       populateNav: function($topContext, topLevel, $headings) {
102 |         var $context = $topContext;
103 |         var $prevNav;
104 | 
105 |         var helpers = this;
106 |         $headings.each(function(i, el) {
107 |           var $newNav = helpers.generateNavItem(el);
108 |           var navLevel = helpers.getNavLevel(el);
109 | 
110 |           // determine the proper $context
111 |           if (navLevel === topLevel) {
112 |             // use top level
113 |             $context = $topContext;
114 |           } else if ($prevNav && $context === $topContext) {
115 |             // create a new level of the tree and switch to it
116 |             $context = helpers.createChildNavList($prevNav);
117 |           } // else use the current $context
118 | 
119 |           $context.append($newNav);
120 | 
121 |           $prevNav = $newNav;
122 |         });
123 |       },
124 | 
125 |       parseOps: function(arg) {
126 |         var opts;
127 |         if (arg.jquery) {
128 |           opts = {
129 |             $nav: arg
130 |           };
131 |         } else {
132 |           opts = arg;
133 |         }
134 |         opts.$scope = opts.$scope || $(document.body);
135 |         return opts;
136 |       }
137 |     },
138 | 
139 |     // accepts a jQuery object, or an options object
140 |     init: function(opts) {
141 |       opts = this.helpers.parseOps(opts);
142 | 
143 |       // ensure that the data attribute is in place for styling
144 |       opts.$nav.attr('data-toggle', 'toc');
145 | 
146 |       var $topContext = this.helpers.createChildNavList(opts.$nav);
147 |       var topLevel = this.helpers.getTopLevel(opts.$scope);
148 |       var $headings = this.helpers.getHeadings(opts.$scope, topLevel);
149 |       this.helpers.populateNav($topContext, topLevel, $headings);
150 |     }
151 |   };
152 | 
153 |   $(function() {
154 |     $('nav[data-toggle="toc"]').each(function(i, el) {
155 |       var $nav = $(el);
156 |       Toc.init($nav);
157 |     });
158 |   });
159 | })();
160 | 


--------------------------------------------------------------------------------
/docs/articles/index.html:
--------------------------------------------------------------------------------
 1 | <!DOCTYPE html>
 2 | <!-- Generated by pkgdown: do not edit by hand --><html lang="en"><head><meta http-equiv="Content-Type" content="text/html; charset=UTF-8"><meta charset="utf-8"><meta http-equiv="X-UA-Compatible" content="IE=edge"><meta name="viewport" content="width=device-width, initial-scale=1.0"><title>Articles • RcppDynProg</title><!-- jquery --><script src="https://cdnjs.cloudflare.com/ajax/libs/jquery/3.4.1/jquery.min.js" integrity="sha256-CSXorXvZcTkaix6Yvo6HppcZGetbYMGWSFlBw8HfCJo=" crossorigin="anonymous"></script><!-- Bootstrap --><link rel="stylesheet" href="https://cdnjs.cloudflare.com/ajax/libs/twitter-bootstrap/3.4.1/css/bootstrap.min.css" integrity="sha256-bZLfwXAP04zRMK2BjiO8iu9pf4FbLqX6zitd+tIvLhE=" crossorigin="anonymous"><script src="https://cdnjs.cloudflare.com/ajax/libs/twitter-bootstrap/3.4.1/js/bootstrap.min.js" integrity="sha256-nuL8/2cJ5NDSSwnKD8VqreErSWHtnEP9E7AySL+1ev4=" crossorigin="anonymous"></script><!-- bootstrap-toc --><link rel="stylesheet" href="../bootstrap-toc.css"><script src="../bootstrap-toc.js"></script><!-- Font Awesome icons --><link rel="stylesheet" href="https://cdnjs.cloudflare.com/ajax/libs/font-awesome/5.12.1/css/all.min.css" integrity="sha256-mmgLkCYLUQbXn0B1SRqzHar6dCnv9oZFPEC1g1cwlkk=" crossorigin="anonymous"><link rel="stylesheet" href="https://cdnjs.cloudflare.com/ajax/libs/font-awesome/5.12.1/css/v4-shims.min.css" integrity="sha256-wZjR52fzng1pJHwx4aV2AO3yyTOXrcDW7jBpJtTwVxw=" crossorigin="anonymous"><!-- clipboard.js --><script src="https://cdnjs.cloudflare.com/ajax/libs/clipboard.js/2.0.6/clipboard.min.js" integrity="sha256-inc5kl9MA1hkeYUt+EC3BhlIgyp/2jDIyBLS6k3UxPI=" crossorigin="anonymous"></script><!-- headroom.js --><script src="https://cdnjs.cloudflare.com/ajax/libs/headroom/0.11.0/headroom.min.js" integrity="sha256-AsUX4SJE1+yuDu5+mAVzJbuYNPHj/WroHuZ8Ir/CkE0=" crossorigin="anonymous"></script><script src="https://cdnjs.cloudflare.com/ajax/libs/headroom/0.11.0/jQuery.headroom.min.js" integrity="sha256-ZX/yNShbjqsohH1k95liqY9Gd8uOiE1S4vZc+9KQ1K4=" crossorigin="anonymous"></script><!-- pkgdown --><link href="../pkgdown.css" rel="stylesheet"><script src="../pkgdown.js"></script><meta property="og:title" content="Articles"><!-- mathjax --><script src="https://cdnjs.cloudflare.com/ajax/libs/mathjax/2.7.5/MathJax.js" integrity="sha256-nvJJv9wWKEm88qvoQl9ekL2J+k/RWIsaSScxxlsrv8k=" crossorigin="anonymous"></script><script src="https://cdnjs.cloudflare.com/ajax/libs/mathjax/2.7.5/config/TeX-AMS-MML_HTMLorMML.js" integrity="sha256-84DKXVJXs0/F8OTMzX4UR909+jtl4G7SPypPavF+GfA=" crossorigin="anonymous"></script><!--[if lt IE 9]>
 3 | <script src="https://oss.maxcdn.com/html5shiv/3.7.3/html5shiv.min.js"></script>
 4 | <script src="https://oss.maxcdn.com/respond/1.4.2/respond.min.js"></script>
 5 | <![endif]--></head><body data-spy="scroll" data-target="#toc">
 6 |     
 7 | 
 8 |     <div class="container template-article-index">
 9 |       <header><div class="navbar navbar-default navbar-fixed-top" role="navigation">
10 |   <div class="container">
11 |     <div class="navbar-header">
12 |       <button type="button" class="navbar-toggle collapsed" data-toggle="collapse" data-target="#navbar" aria-expanded="false">
13 |         <span class="sr-only">Toggle navigation</span>
14 |         <span class="icon-bar"></span>
15 |         <span class="icon-bar"></span>
16 |         <span class="icon-bar"></span>
17 |       </button>
18 |       <span class="navbar-brand">
19 |         <a class="navbar-link" href="../index.html">RcppDynProg</a>
20 |         <span class="version label label-default" data-toggle="tooltip" data-placement="bottom" title="">0.2.1</span>
21 |       </span>
22 |     </div>
23 | 
24 |     <div id="navbar" class="navbar-collapse collapse">
25 |       <ul class="nav navbar-nav"><li>
26 |   <a href="../articles/RcppDynProg.html">Get started</a>
27 | </li>
28 | <li>
29 |   <a href="../reference/index.html">Reference</a>
30 | </li>
31 | <li class="dropdown">
32 |   <a href="#" class="dropdown-toggle" data-toggle="dropdown" role="button" data-bs-toggle="dropdown" aria-expanded="false">
33 |     Articles
34 |      
35 |     <span class="caret"></span>
36 |   </a>
37 |   <ul class="dropdown-menu" role="menu"><li>
38 |       <a href="../articles/Segmentation.html">Segmentation</a>
39 |     </li>
40 |     <li>
41 |       <a href="../articles/SegmentationL.html">Linear Segmentation</a>
42 |     </li>
43 |   </ul></li>
44 | <li>
45 |   <a href="../news/index.html">Changelog</a>
46 | </li>
47 |       </ul><ul class="nav navbar-nav navbar-right"><li>
48 |   <a href="https://win-vector.com/" class="external-link">Sponsor: Win-Vector LLC</a>
49 | </li>
50 |       </ul></div><!--/.nav-collapse -->
51 |   </div><!--/.container -->
52 | </div><!--/.navbar -->
53 | 
54 |       
55 | 
56 |       </header><div class="row">
57 |   <div class="col-md-9 contents">
58 |     <div class="page-header">
59 |       <h1>Articles</h1>
60 |     </div>
61 | 
62 |     <div class="section ">
63 |       <h3>All vignettes</h3>
64 |       <p class="section-desc"></p>
65 | 
66 |       <dl><dt><a href="RcppDynProg.html">RcppDynProg package</a></dt>
67 |         <dd>
68 |         </dd><dt><a href="Segmentation.html">Segmentation</a></dt>
69 |         <dd>
70 |         </dd><dt><a href="SegmentationL.html">Linear Segmentation</a></dt>
71 |         <dd>
72 |       </dd></dl></div>
73 |   </div>
74 | </div>
75 | 
76 | 
77 |       <footer><div class="copyright">
78 |   <p></p><p>Developed by John Mount, Nina Zumel.</p>
79 | </div>
80 | 
81 | <div class="pkgdown">
82 |   <p></p><p>Site built with <a href="https://pkgdown.r-lib.org/" class="external-link">pkgdown</a> 2.0.7.</p>
83 | </div>
84 | 
85 |       </footer></div>
86 | 
87 |   
88 | 
89 | 
90 |   
91 | 
92 |   </body></html>
93 | 
94 | 


--------------------------------------------------------------------------------
/extras/Timings.md:
--------------------------------------------------------------------------------
  1 | Timings
  2 | ================
  3 | 
  4 | ``` r
  5 | knitr::opts_chunk$set(fig.width=12, fig.height=8) 
  6 | library("RcppDynProg")
  7 | library("WVPlots")
  8 | library("microbenchmark")
  9 | library("rqdatatable")
 10 | ```
 11 | 
 12 |     ## Loading required package: rquery
 13 | 
 14 | ``` r
 15 | set.seed(2018)
 16 | n <- 500
 17 | x <- matrix(runif(n*n), nrow=n, ncol=n)
 18 | 
 19 | solve_interval_partition(x, n)
 20 | ```
 21 | 
 22 |     ## [1]   1 109 230 267 501
 23 | 
 24 | ``` r
 25 | solve_interval_partition_R(x, n)
 26 | ```
 27 | 
 28 |     ## [1]   1 109 230 267 501
 29 | 
 30 | ``` r
 31 | timings <- microbenchmark(
 32 |   solve_interval_partition(x, n),
 33 |   solve_interval_partition_R(x, n),
 34 |   times = 5L)
 35 | 
 36 | print(timings)
 37 | ```
 38 | 
 39 |     ## Unit: milliseconds
 40 |     ##                              expr         min          lq        mean
 41 |     ##    solve_interval_partition(x, n)    87.30949    87.56452    87.96436
 42 |     ##  solve_interval_partition_R(x, n) 20932.52901 21096.78018 21188.00710
 43 |     ##       median          uq         max neval
 44 |     ##     88.01139    88.22678    88.70962     5
 45 |     ##  21128.80190 21215.39725 21566.52718     5
 46 | 
 47 | ``` r
 48 | p <- data.frame(timings)
 49 | p$seconds <- p$time/1e+9
 50 | p$method <- as.factor(p$expr)
 51 | p$method <- reorder(p$method, p$seconds)
 52 | 
 53 | summary <- p %.>%
 54 |   project(., 
 55 |           mean_seconds = mean(seconds),
 56 |           groupby = "method")
 57 | print(summary)
 58 | ```
 59 | 
 60 |     ##                              method mean_seconds
 61 |     ## 1: solve_interval_partition_R(x, n)  21.18800710
 62 |     ## 2:   solve_interval_partition(x, n)   0.08796436
 63 | 
 64 | ``` r
 65 | ratio <- max(summary$mean_seconds)/min(summary$mean_seconds)
 66 | print(ratio)
 67 | ```
 68 | 
 69 |     ## [1] 240.8704
 70 | 
 71 | ``` r
 72 | WVPlots::ScatterBoxPlotH(p, 
 73 |                          "seconds", "method", 
 74 |                          "performance of same dynamic programming code in R and Rcpp (C++)")
 75 | ```
 76 | 
 77 | ![](Timings_files/figure-markdown_github/unnamed-chunk-1-1.png)
 78 | 
 79 | ------------------------------------------------------------------------
 80 | 
 81 | Timings on a 2018 Dell XPS-13 laptop, 16 Gib RAM, LPDDR3, 2133 MT/s, Intel(R) Core(TM) i5-8250U CPU @ 1.60GHz (8 cores reported), idle, charged, and plugged into power supply. Ubuntu 18.04.1 LTS.
 82 | 
 83 | ``` r
 84 | R.version.string
 85 | ```
 86 | 
 87 |     ## [1] "R version 3.5.1 (2018-07-02)"
 88 | 
 89 | ``` r
 90 | R.version
 91 | ```
 92 | 
 93 |     ##                _                           
 94 |     ## platform       x86_64-pc-linux-gnu         
 95 |     ## arch           x86_64                      
 96 |     ## os             linux-gnu                   
 97 |     ## system         x86_64, linux-gnu           
 98 |     ## status                                     
 99 |     ## major          3                           
100 |     ## minor          5.1                         
101 |     ## year           2018                        
102 |     ## month          07                          
103 |     ## day            02                          
104 |     ## svn rev        74947                       
105 |     ## language       R                           
106 |     ## version.string R version 3.5.1 (2018-07-02)
107 |     ## nickname       Feather Spray
108 | 
109 | ``` r
110 | sessionInfo()
111 | ```
112 | 
113 |     ## R version 3.5.1 (2018-07-02)
114 |     ## Platform: x86_64-pc-linux-gnu (64-bit)
115 |     ## Running under: Ubuntu 18.04.1 LTS
116 |     ## 
117 |     ## Matrix products: default
118 |     ## BLAS: /usr/lib/x86_64-linux-gnu/blas/libblas.so.3.7.1
119 |     ## LAPACK: /usr/lib/x86_64-linux-gnu/lapack/liblapack.so.3.7.1
120 |     ## 
121 |     ## locale:
122 |     ##  [1] LC_CTYPE=en_US.UTF-8       LC_NUMERIC=C              
123 |     ##  [3] LC_TIME=en_US.UTF-8        LC_COLLATE=en_US.UTF-8    
124 |     ##  [5] LC_MONETARY=en_US.UTF-8    LC_MESSAGES=en_US.UTF-8   
125 |     ##  [7] LC_PAPER=en_US.UTF-8       LC_NAME=C                 
126 |     ##  [9] LC_ADDRESS=C               LC_TELEPHONE=C            
127 |     ## [11] LC_MEASUREMENT=en_US.UTF-8 LC_IDENTIFICATION=C       
128 |     ## 
129 |     ## attached base packages:
130 |     ## [1] stats     graphics  grDevices utils     datasets  methods   base     
131 |     ## 
132 |     ## other attached packages:
133 |     ## [1] rqdatatable_1.1.2    rquery_1.2.1         microbenchmark_1.4-6
134 |     ## [4] WVPlots_1.0.7        RcppDynProg_0.1.0   
135 |     ## 
136 |     ## loaded via a namespace (and not attached):
137 |     ##  [1] Rcpp_1.0.0        sigr_1.0.3        pillar_1.3.0     
138 |     ##  [4] compiler_3.5.1    plyr_1.8.4        bindr_0.1.1      
139 |     ##  [7] tools_3.5.1       digest_0.6.18     lattice_0.20-38  
140 |     ## [10] evaluate_0.12     tibble_1.4.2      gtable_0.2.0     
141 |     ## [13] nlme_3.1-137      mgcv_1.8-26       pkgconfig_2.0.2  
142 |     ## [16] rlang_0.3.0.1     Matrix_1.2-15     parallel_3.5.1   
143 |     ## [19] yaml_2.2.0        xfun_0.4          bindrcpp_0.2.2   
144 |     ## [22] gridExtra_2.3     withr_2.1.2       stringr_1.3.1    
145 |     ## [25] dplyr_0.7.8       knitr_1.21        grid_3.5.1       
146 |     ## [28] tidyselect_0.2.5  data.table_1.11.8 glue_1.3.0       
147 |     ## [31] R6_2.3.0          rmarkdown_1.11    wrapr_1.8.2      
148 |     ## [34] ggplot2_3.1.0     purrr_0.2.5       magrittr_1.5     
149 |     ## [37] scales_1.0.0      htmltools_0.3.6   assertthat_0.2.0 
150 |     ## [40] colorspace_1.3-2  labeling_0.3      stringi_1.2.4    
151 |     ## [43] lazyeval_0.2.1    munsell_0.5.0     crayon_1.3.4
152 | 


--------------------------------------------------------------------------------
/docs/404.html:
--------------------------------------------------------------------------------
  1 | <!DOCTYPE html>
  2 | <!-- Generated by pkgdown: do not edit by hand --><html lang="en">
  3 | <head>
  4 | <meta http-equiv="Content-Type" content="text/html; charset=UTF-8">
  5 | <meta charset="utf-8">
  6 | <meta http-equiv="X-UA-Compatible" content="IE=edge">
  7 | <meta name="viewport" content="width=device-width, initial-scale=1.0">
  8 | <title>Page not found (404) • RcppDynProg</title>
  9 | <!-- jquery --><script src="https://cdnjs.cloudflare.com/ajax/libs/jquery/3.4.1/jquery.min.js" integrity="sha256-CSXorXvZcTkaix6Yvo6HppcZGetbYMGWSFlBw8HfCJo=" crossorigin="anonymous"></script><!-- Bootstrap --><link rel="stylesheet" href="https://cdnjs.cloudflare.com/ajax/libs/twitter-bootstrap/3.4.1/css/bootstrap.min.css" integrity="sha256-bZLfwXAP04zRMK2BjiO8iu9pf4FbLqX6zitd+tIvLhE=" crossorigin="anonymous">
 10 | <script src="https://cdnjs.cloudflare.com/ajax/libs/twitter-bootstrap/3.4.1/js/bootstrap.min.js" integrity="sha256-nuL8/2cJ5NDSSwnKD8VqreErSWHtnEP9E7AySL+1ev4=" crossorigin="anonymous"></script><!-- bootstrap-toc --><link rel="stylesheet" href="bootstrap-toc.css">
 11 | <script src="bootstrap-toc.js"></script><!-- Font Awesome icons --><link rel="stylesheet" href="https://cdnjs.cloudflare.com/ajax/libs/font-awesome/5.12.1/css/all.min.css" integrity="sha256-mmgLkCYLUQbXn0B1SRqzHar6dCnv9oZFPEC1g1cwlkk=" crossorigin="anonymous">
 12 | <link rel="stylesheet" href="https://cdnjs.cloudflare.com/ajax/libs/font-awesome/5.12.1/css/v4-shims.min.css" integrity="sha256-wZjR52fzng1pJHwx4aV2AO3yyTOXrcDW7jBpJtTwVxw=" crossorigin="anonymous">
 13 | <!-- clipboard.js --><script src="https://cdnjs.cloudflare.com/ajax/libs/clipboard.js/2.0.6/clipboard.min.js" integrity="sha256-inc5kl9MA1hkeYUt+EC3BhlIgyp/2jDIyBLS6k3UxPI=" crossorigin="anonymous"></script><!-- headroom.js --><script src="https://cdnjs.cloudflare.com/ajax/libs/headroom/0.11.0/headroom.min.js" integrity="sha256-AsUX4SJE1+yuDu5+mAVzJbuYNPHj/WroHuZ8Ir/CkE0=" crossorigin="anonymous"></script><script src="https://cdnjs.cloudflare.com/ajax/libs/headroom/0.11.0/jQuery.headroom.min.js" integrity="sha256-ZX/yNShbjqsohH1k95liqY9Gd8uOiE1S4vZc+9KQ1K4=" crossorigin="anonymous"></script><!-- pkgdown --><link href="pkgdown.css" rel="stylesheet">
 14 | <script src="pkgdown.js"></script><meta property="og:title" content="Page not found (404)">
 15 | <!-- mathjax --><script src="https://cdnjs.cloudflare.com/ajax/libs/mathjax/2.7.5/MathJax.js" integrity="sha256-nvJJv9wWKEm88qvoQl9ekL2J+k/RWIsaSScxxlsrv8k=" crossorigin="anonymous"></script><script src="https://cdnjs.cloudflare.com/ajax/libs/mathjax/2.7.5/config/TeX-AMS-MML_HTMLorMML.js" integrity="sha256-84DKXVJXs0/F8OTMzX4UR909+jtl4G7SPypPavF+GfA=" crossorigin="anonymous"></script><!--[if lt IE 9]>
 16 | <script src="https://oss.maxcdn.com/html5shiv/3.7.3/html5shiv.min.js"></script>
 17 | <script src="https://oss.maxcdn.com/respond/1.4.2/respond.min.js"></script>
 18 | <![endif]-->
 19 | </head>
 20 | <body data-spy="scroll" data-target="#toc">
 21 |     
 22 | 
 23 |     <div class="container template-title-body">
 24 |       <header><div class="navbar navbar-default navbar-fixed-top" role="navigation">
 25 |   <div class="container">
 26 |     <div class="navbar-header">
 27 |       <button type="button" class="navbar-toggle collapsed" data-toggle="collapse" data-target="#navbar" aria-expanded="false">
 28 |         <span class="sr-only">Toggle navigation</span>
 29 |         <span class="icon-bar"></span>
 30 |         <span class="icon-bar"></span>
 31 |         <span class="icon-bar"></span>
 32 |       </button>
 33 |       <span class="navbar-brand">
 34 |         <a class="navbar-link" href="index.html">RcppDynProg</a>
 35 |         <span class="version label label-default" data-toggle="tooltip" data-placement="bottom" title="">0.2.1</span>
 36 |       </span>
 37 |     </div>
 38 | 
 39 |     <div id="navbar" class="navbar-collapse collapse">
 40 |       <ul class="nav navbar-nav">
 41 | <li>
 42 |   <a href="articles/RcppDynProg.html">Get started</a>
 43 | </li>
 44 | <li>
 45 |   <a href="reference/index.html">Reference</a>
 46 | </li>
 47 | <li class="dropdown">
 48 |   <a href="#" class="dropdown-toggle" data-toggle="dropdown" role="button" data-bs-toggle="dropdown" aria-expanded="false">
 49 |     Articles
 50 |      
 51 |     <span class="caret"></span>
 52 |   </a>
 53 |   <ul class="dropdown-menu" role="menu">
 54 | <li>
 55 |       <a href="articles/Segmentation.html">Segmentation</a>
 56 |     </li>
 57 |     <li>
 58 |       <a href="articles/SegmentationL.html">Linear Segmentation</a>
 59 |     </li>
 60 |   </ul>
 61 | </li>
 62 | <li>
 63 |   <a href="news/index.html">Changelog</a>
 64 | </li>
 65 |       </ul>
 66 | <ul class="nav navbar-nav navbar-right">
 67 | <li>
 68 |   <a href="https://win-vector.com/" class="external-link">Sponsor: Win-Vector LLC</a>
 69 | </li>
 70 |       </ul>
 71 | </div>
 72 | <!--/.nav-collapse -->
 73 |   </div>
 74 | <!--/.container -->
 75 | </div>
 76 | <!--/.navbar -->
 77 | 
 78 |       
 79 | 
 80 |       </header><div class="row">
 81 |   <div class="contents col-md-9">
 82 |     <div class="page-header">
 83 |       <h1>Page not found (404)</h1>
 84 |     </div>
 85 | 
 86 | Content not found. Please use links in the navbar.
 87 | 
 88 |   </div>
 89 | 
 90 |   <div class="col-md-3 hidden-xs hidden-sm" id="pkgdown-sidebar">
 91 |     <nav id="toc" data-toggle="toc" class="sticky-top"><h2 data-toc-skip>Contents</h2>
 92 |     </nav>
 93 | </div>
 94 | 
 95 | </div>
 96 | 
 97 | 
 98 | 
 99 |       <footer><div class="copyright">
100 |   <p></p>
101 | <p>Developed by John Mount, Nina Zumel.</p>
102 | </div>
103 | 
104 | <div class="pkgdown">
105 |   <p></p>
106 | <p>Site built with <a href="https://pkgdown.r-lib.org/" class="external-link">pkgdown</a> 2.0.7.</p>
107 | </div>
108 | 
109 |       </footer>
110 | </div>
111 | 
112 |   
113 | 
114 | 
115 |   
116 | 
117 |   </body>
118 | </html>
119 | 


--------------------------------------------------------------------------------
/extras/sp500/sp500_long.Rmd:
--------------------------------------------------------------------------------
  1 | ---
  2 | title: "sp500 long"
  3 | output: github_document
  4 | ---
  5 | 
  6 | ```{r r1, fig.height = 6, fig.width = 8, fig.align = "center"}
  7 | library("RcppDynProg")
  8 | library("ggplot2")
  9 | 
 10 | # Data from: 
 11 | # https://finance.yahoo.com/quote/%5EGSPC/history?period1=-630950400&period2=1546416000&interval=1d&filter=history&frequency=1d
 12 | sp500 <- read.csv("^GSPC.csv")
 13 | sp500$Date <- as.Date(sp500$Date)
 14 | sp500$x <- as.numeric(sp500$Date)
 15 | sp500$log_price <- log(sp500$Adj.Close)
 16 | 
 17 | 
 18 | sp500recent <- sp500[sp500$Date >= as.Date('2000-01-01'), ]
 19 | 
 20 | 
 21 | # find penalty
 22 | sp500recent$permuted <- sp500recent$log_price[sample.int(nrow(sp500recent), 
 23 |                                                          nrow(sp500recent), 
 24 |                                                          replace = FALSE)]
 25 | # look for a penalty that prefers 1 segment on permuted data
 26 | lb <- 0
 27 | ub <- 100
 28 | while(TRUE) {
 29 |   soln <- solve_for_partition(sp500recent$x, sp500recent$permuted, penalty = ub, max_k = 3)
 30 |   if(nrow(soln)==2) {
 31 |     break
 32 |   }
 33 |   lb <- ub
 34 |   ub <- 10*ub
 35 | }
 36 | while(TRUE) {
 37 |   penalty <- ceiling((lb+ub)/2)
 38 |   if(penalty>=ub) {
 39 |     break
 40 |   }
 41 |   soln <- solve_for_partition(sp500recent$x, sp500recent$permuted, penalty = penalty, max_k = 3)
 42 |   if(nrow(soln)==2) {
 43 |     ub <- penalty
 44 |   } else {
 45 |     lb <- penalty
 46 |   }
 47 | }
 48 | print(penalty)
 49 | 
 50 | soln <- solve_for_partition(sp500recent$x, sp500recent$log_price, penalty = penalty)
 51 | sp500recent$estimate <- exp(approx(soln$x, soln$pred, 
 52 |                              xout = sp500recent$x, 
 53 |                              method = "linear", rule = 2)$y)
 54 | sp500recent$group <- as.character(
 55 |   findInterval(sp500recent$x, soln[soln$what=="left", "x"]))
 56 | 
 57 | ggplot(data = sp500recent, aes(x = Date)) +
 58 |   geom_line(aes(y=Adj.Close), color = "darkgray") +
 59 |   geom_line(aes(y=estimate, color = group)) +
 60 |   ggtitle("segment approximation of historic sp500recent data",
 61 |           subtitle = paste("per-segment penalty =", penalty)) +
 62 |   theme(legend.position = "none") +
 63 |   scale_color_brewer(palette = "Dark2") +
 64 |   scale_y_log10()
 65 | ```
 66 | 
 67 | ```{r r2, fig.height = 6, fig.width = 8, fig.align = "center"}
 68 | penalty <- 1
 69 | 
 70 | soln <- solve_for_partition(sp500recent$x, sp500recent$log_price, penalty = penalty)
 71 | sp500recent$estimate <- exp(approx(soln$x, soln$pred, 
 72 |                              xout = sp500recent$x, 
 73 |                              method = "linear", rule = 2)$y)
 74 | sp500recent$group <- as.character(
 75 |   findInterval(sp500recent$x, soln[soln$what=="left", "x"]))
 76 | 
 77 | ggplot(data = sp500recent, aes(x = Date)) +
 78 |   geom_line(aes(y=Adj.Close), color = "darkgray") +
 79 |   geom_line(aes(y=estimate, color = group)) +
 80 |   ggtitle("segment approximation of historic sp500 data",
 81 |           subtitle = paste("per-segment penalty =", penalty)) +
 82 |   theme(legend.position = "none") +
 83 |   scale_color_brewer(palette = "Dark2") +
 84 |   scale_y_log10()
 85 | ```
 86 | 
 87 | ```{r r3, fig.height = 6, fig.width = 8, fig.align = "center"}
 88 | penalty <- 5
 89 | 
 90 | soln <- solve_for_partition(sp500$x, sp500$log_price, penalty = penalty)
 91 | sp500$estimate <- exp(approx(soln$x, soln$pred, 
 92 |                              xout = sp500$x, 
 93 |                              method = "linear", rule = 2)$y)
 94 | 
 95 | sp500$group <- as.character(
 96 |   findInterval(sp500$x, soln[soln$what=="left", "x"]))
 97 | 
 98 | ggplot(data = sp500, aes(x = Date)) +
 99 |   geom_line(aes(y=Adj.Close), color = "darkgray") +
100 |   geom_line(aes(y=estimate, group = group), color = "darkgreen") +
101 |   ggtitle("segment approximation of historic sp500 data",
102 |           subtitle = paste("per-segment penalty =", penalty)) +
103 |   theme(legend.position = "none") +
104 |   scale_color_manual(values = colors) +
105 |   scale_y_log10()
106 | ```
107 | 
108 | Naive gaps (TODO: need to find breakpoints that are good for the no-gap solution).
109 | 
110 | ```{r r4, fig.height = 6, fig.width = 8, fig.align = "center"}
111 | sl <- soln[soln$what=='left', ]
112 | fit <- vtreat:::encode_x_as_lambdas(
113 |   sp500$x, min(sp500$x), max(sp500$x),
114 |   sl$x)
115 | vars <- setdiff(colnames(fit), "intercept")
116 | fit$y <- sp500$log_price
117 | fmla <- wrapr::mk_formula("y", vars)
118 | model <- lm(fmla, data = fit)
119 | sp500$pred <- exp(predict(model, newdata = fit))
120 | 
121 | ggplot(data = sp500, aes(x = Date)) +
122 |   geom_line(aes(y=Adj.Close), color = "darkgray") +
123 |   geom_line(aes(y=pred), color = "darkgreen") +
124 |   ggtitle("segment approximation (no gaps) of historic sp500 data",
125 |           subtitle = paste("per-segment penalty =", penalty)) +
126 |   theme(legend.position = "none") +
127 |   scale_color_brewer(palette = "Dark2") +
128 |   scale_y_log10()
129 | ```
130 | 
131 | Fit piecewise constant on delta series.
132 | 
133 | ```{r r5, fig.height = 6, fig.width = 8, fig.align = "center"}
134 | penalty <- 0.01
135 | 
136 | sp500$delta_log_price <- c(0, sp500$log_price[-1] - sp500$log_price[-nrow(sp500)])
137 |   
138 | soln <- solve_for_partitionc(sp500$x, sp500$delta_log_price, penalty = penalty)
139 | sl <- soln[soln$what=='left', ]
140 | fit <- vtreat:::encode_x_as_lambdas(
141 |   sp500$x, min(sp500$x), max(sp500$x),
142 |   sl$x)
143 | vars <- setdiff(colnames(fit), "intercept")
144 | fit$y <- sp500$log_price
145 | fmla <- wrapr::mk_formula("y", vars)
146 | model <- lm(fmla, data = fit)
147 | sp500$pred <- exp(predict(model, newdata = fit))
148 | 
149 | ggplot(data = sp500, aes(x = Date)) +
150 |   geom_line(aes(y=Adj.Close), color = "darkgray") +
151 |   geom_line(aes(y=pred), color = "darkgreen") +
152 |   ggtitle("segment approximation (slope ests, no gaps) of historic sp500 data",
153 |           subtitle = paste("per-segment penalty =", penalty)) +
154 |   theme(legend.position = "none") +
155 |   scale_color_brewer(palette = "Dark2") +
156 |   scale_y_log10()
157 | ```
158 | 


--------------------------------------------------------------------------------
/extras/time_python_xlin_fits.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "code",
  5 |    "execution_count": 1,
  6 |    "metadata": {},
  7 |    "outputs": [
  8 |     {
  9 |      "data": {
 10 |       "text/plain": [
 11 |        "array([2.666715  , 1.28571541, 1.28571214, 2.66666833])"
 12 |       ]
 13 |      },
 14 |      "execution_count": 1,
 15 |      "metadata": {},
 16 |      "output_type": "execute_result"
 17 |     }
 18 |    ],
 19 |    "source": [
 20 |     "\n",
 21 |     "import numpy\n",
 22 |     "import pandas\n",
 23 |     "import timeit\n",
 24 |     "\n",
 25 |     "from xlin_fits_py import *\n",
 26 |     "\n",
 27 |     "x = numpy.asarray([1 ,2, 3, 4])\n",
 28 |     "y = numpy.asarray([1, 2, 2, 1])\n",
 29 |     "w = numpy.asarray([1, 1, 1, 1])\n",
 30 |     "xlin_fits_py(x, y, w)\n",
 31 |     "\n",
 32 |     "\n"
 33 |    ]
 34 |   },
 35 |   {
 36 |    "cell_type": "code",
 37 |    "execution_count": 2,
 38 |    "metadata": {},
 39 |    "outputs": [
 40 |     {
 41 |      "data": {
 42 |       "text/html": [
 43 |        "<div>\n",
 44 |        "<style scoped>\n",
 45 |        "    .dataframe tbody tr th:only-of-type {\n",
 46 |        "        vertical-align: middle;\n",
 47 |        "    }\n",
 48 |        "\n",
 49 |        "    .dataframe tbody tr th {\n",
 50 |        "        vertical-align: top;\n",
 51 |        "    }\n",
 52 |        "\n",
 53 |        "    .dataframe thead th {\n",
 54 |        "        text-align: right;\n",
 55 |        "    }\n",
 56 |        "</style>\n",
 57 |        "<table border=\"1\" class=\"dataframe\">\n",
 58 |        "  <thead>\n",
 59 |        "    <tr style=\"text-align: right;\">\n",
 60 |        "      <th></th>\n",
 61 |        "      <th>x</th>\n",
 62 |        "      <th>y_ideal</th>\n",
 63 |        "      <th>y_observed</th>\n",
 64 |        "    </tr>\n",
 65 |        "  </thead>\n",
 66 |        "  <tbody>\n",
 67 |        "    <tr>\n",
 68 |        "      <th>0</th>\n",
 69 |        "      <td>0.05</td>\n",
 70 |        "      <td>0.000225</td>\n",
 71 |        "      <td>0.041055</td>\n",
 72 |        "    </tr>\n",
 73 |        "    <tr>\n",
 74 |        "      <th>1</th>\n",
 75 |        "      <td>0.10</td>\n",
 76 |        "      <td>0.000900</td>\n",
 77 |        "      <td>0.502473</td>\n",
 78 |        "    </tr>\n",
 79 |        "    <tr>\n",
 80 |        "      <th>2</th>\n",
 81 |        "      <td>0.15</td>\n",
 82 |        "      <td>0.002025</td>\n",
 83 |        "      <td>-0.027719</td>\n",
 84 |        "    </tr>\n",
 85 |        "    <tr>\n",
 86 |        "      <th>3</th>\n",
 87 |        "      <td>0.20</td>\n",
 88 |        "      <td>0.003600</td>\n",
 89 |        "      <td>-0.260338</td>\n",
 90 |        "    </tr>\n",
 91 |        "    <tr>\n",
 92 |        "      <th>4</th>\n",
 93 |        "      <td>0.25</td>\n",
 94 |        "      <td>0.005625</td>\n",
 95 |        "      <td>0.308052</td>\n",
 96 |        "    </tr>\n",
 97 |        "  </tbody>\n",
 98 |        "</table>\n",
 99 |        "</div>"
100 |       ],
101 |       "text/plain": [
102 |        "      x   y_ideal  y_observed\n",
103 |        "0  0.05  0.000225    0.041055\n",
104 |        "1  0.10  0.000900    0.502473\n",
105 |        "2  0.15  0.002025   -0.027719\n",
106 |        "3  0.20  0.003600   -0.260338\n",
107 |        "4  0.25  0.005625    0.308052"
108 |       ]
109 |      },
110 |      "execution_count": 2,
111 |      "metadata": {},
112 |      "output_type": "execute_result"
113 |     }
114 |    ],
115 |    "source": [
116 |     "d = pandas.read_csv('xlin_data.csv.gz', compression='gzip')\n",
117 |     "d.head()\n"
118 |    ]
119 |   },
120 |   {
121 |    "cell_type": "code",
122 |    "execution_count": 3,
123 |    "metadata": {},
124 |    "outputs": [],
125 |    "source": [
126 |     "x = numpy.asarray(d[\"x\"].tolist())\n",
127 |     "y_observed = numpy.asarray(d[\"y_observed\"].tolist())\n",
128 |     "w = numpy.zeros(len(x)) + 1"
129 |    ]
130 |   },
131 |   {
132 |    "cell_type": "code",
133 |    "execution_count": 4,
134 |    "metadata": {},
135 |    "outputs": [],
136 |    "source": [
137 |     "s1 = xlin_fits_py(x, y_observed, w)"
138 |    ]
139 |   },
140 |   {
141 |    "cell_type": "code",
142 |    "execution_count": 5,
143 |    "metadata": {},
144 |    "outputs": [
145 |     {
146 |      "name": "stdout",
147 |      "output_type": "stream",
148 |      "text": [
149 |       "0.21375216569867916\n"
150 |      ]
151 |     }
152 |    ],
153 |    "source": [
154 |     "\n",
155 |     "reps = 10\n",
156 |     "\n",
157 |     "start = timeit.default_timer()\n",
158 |     "for i in range(reps):\n",
159 |     "    xlin_fits_py(x, y_observed, w)\n",
160 |     "end = timeit.default_timer()\n",
161 |     "\n",
162 |     "delta = end - start\n",
163 |     "print(delta/reps)"
164 |    ]
165 |   },
166 |   {
167 |    "cell_type": "markdown",
168 |    "metadata": {},
169 |    "source": [
170 |     "Timings on a 2018 Dell XPS-13 laptop, 16 Gib RAM, LPDDR3, 2133 MT/s, Intel(R) Core(TM) i5-8250U CPU @ 1.60GHz (8 cores reported), idle, charged, and plugged into power supply. Ubuntu 18.04.1 LTS.\n"
171 |    ]
172 |   },
173 |   {
174 |    "cell_type": "code",
175 |    "execution_count": 6,
176 |    "metadata": {},
177 |    "outputs": [
178 |     {
179 |      "name": "stdout",
180 |      "output_type": "stream",
181 |      "text": [
182 |       "sys.version_info(major=3, minor=7, micro=0, releaselevel='final', serial=0)\n",
183 |       "1.15.1\n",
184 |       "0.23.4\n"
185 |      ]
186 |     }
187 |    ],
188 |    "source": [
189 |     "import sys\n",
190 |     "\n",
191 |     "print(sys.version_info)\n",
192 |     "\n",
193 |     "print(numpy.__version__)\n",
194 |     "print(pandas.__version__)\n"
195 |    ]
196 |   },
197 |   {
198 |    "cell_type": "code",
199 |    "execution_count": null,
200 |    "metadata": {},
201 |    "outputs": [],
202 |    "source": []
203 |   }
204 |  ],
205 |  "metadata": {
206 |   "kernelspec": {
207 |    "display_name": "Python 3",
208 |    "language": "python",
209 |    "name": "python3"
210 |   },
211 |   "language_info": {
212 |    "codemirror_mode": {
213 |     "name": "ipython",
214 |     "version": 3
215 |    },
216 |    "file_extension": ".py",
217 |    "mimetype": "text/x-python",
218 |    "name": "python",
219 |    "nbconvert_exporter": "python",
220 |    "pygments_lexer": "ipython3",
221 |    "version": "3.7.0"
222 |   }
223 |  },
224 |  "nbformat": 4,
225 |  "nbformat_minor": 2
226 | }
227 | 


--------------------------------------------------------------------------------
/docs/articles/RcppDynProg.html:
--------------------------------------------------------------------------------
  1 | <!DOCTYPE html>
  2 | <!-- Generated by pkgdown: do not edit by hand --><html lang="en">
  3 | <head>
  4 | <meta http-equiv="Content-Type" content="text/html; charset=UTF-8">
  5 | <meta charset="utf-8">
  6 | <meta http-equiv="X-UA-Compatible" content="IE=edge">
  7 | <meta name="viewport" content="width=device-width, initial-scale=1.0">
  8 | <title>RcppDynProg package • RcppDynProg</title>
  9 | <!-- jquery --><script src="https://cdnjs.cloudflare.com/ajax/libs/jquery/3.4.1/jquery.min.js" integrity="sha256-CSXorXvZcTkaix6Yvo6HppcZGetbYMGWSFlBw8HfCJo=" crossorigin="anonymous"></script><!-- Bootstrap --><link rel="stylesheet" href="https://cdnjs.cloudflare.com/ajax/libs/twitter-bootstrap/3.4.1/css/bootstrap.min.css" integrity="sha256-bZLfwXAP04zRMK2BjiO8iu9pf4FbLqX6zitd+tIvLhE=" crossorigin="anonymous">
 10 | <script src="https://cdnjs.cloudflare.com/ajax/libs/twitter-bootstrap/3.4.1/js/bootstrap.min.js" integrity="sha256-nuL8/2cJ5NDSSwnKD8VqreErSWHtnEP9E7AySL+1ev4=" crossorigin="anonymous"></script><!-- bootstrap-toc --><link rel="stylesheet" href="../bootstrap-toc.css">
 11 | <script src="../bootstrap-toc.js"></script><!-- Font Awesome icons --><link rel="stylesheet" href="https://cdnjs.cloudflare.com/ajax/libs/font-awesome/5.12.1/css/all.min.css" integrity="sha256-mmgLkCYLUQbXn0B1SRqzHar6dCnv9oZFPEC1g1cwlkk=" crossorigin="anonymous">
 12 | <link rel="stylesheet" href="https://cdnjs.cloudflare.com/ajax/libs/font-awesome/5.12.1/css/v4-shims.min.css" integrity="sha256-wZjR52fzng1pJHwx4aV2AO3yyTOXrcDW7jBpJtTwVxw=" crossorigin="anonymous">
 13 | <!-- clipboard.js --><script src="https://cdnjs.cloudflare.com/ajax/libs/clipboard.js/2.0.6/clipboard.min.js" integrity="sha256-inc5kl9MA1hkeYUt+EC3BhlIgyp/2jDIyBLS6k3UxPI=" crossorigin="anonymous"></script><!-- headroom.js --><script src="https://cdnjs.cloudflare.com/ajax/libs/headroom/0.11.0/headroom.min.js" integrity="sha256-AsUX4SJE1+yuDu5+mAVzJbuYNPHj/WroHuZ8Ir/CkE0=" crossorigin="anonymous"></script><script src="https://cdnjs.cloudflare.com/ajax/libs/headroom/0.11.0/jQuery.headroom.min.js" integrity="sha256-ZX/yNShbjqsohH1k95liqY9Gd8uOiE1S4vZc+9KQ1K4=" crossorigin="anonymous"></script><!-- pkgdown --><link href="../pkgdown.css" rel="stylesheet">
 14 | <script src="../pkgdown.js"></script><meta property="og:title" content="RcppDynProg package">
 15 | <meta property="og:description" content="RcppDynProg">
 16 | <!-- mathjax --><script src="https://cdnjs.cloudflare.com/ajax/libs/mathjax/2.7.5/MathJax.js" integrity="sha256-nvJJv9wWKEm88qvoQl9ekL2J+k/RWIsaSScxxlsrv8k=" crossorigin="anonymous"></script><script src="https://cdnjs.cloudflare.com/ajax/libs/mathjax/2.7.5/config/TeX-AMS-MML_HTMLorMML.js" integrity="sha256-84DKXVJXs0/F8OTMzX4UR909+jtl4G7SPypPavF+GfA=" crossorigin="anonymous"></script><!--[if lt IE 9]>
 17 | <script src="https://oss.maxcdn.com/html5shiv/3.7.3/html5shiv.min.js"></script>
 18 | <script src="https://oss.maxcdn.com/respond/1.4.2/respond.min.js"></script>
 19 | <![endif]-->
 20 | </head>
 21 | <body data-spy="scroll" data-target="#toc">
 22 |     
 23 | 
 24 |     <div class="container template-article">
 25 |       <header><div class="navbar navbar-default navbar-fixed-top" role="navigation">
 26 |   <div class="container">
 27 |     <div class="navbar-header">
 28 |       <button type="button" class="navbar-toggle collapsed" data-toggle="collapse" data-target="#navbar" aria-expanded="false">
 29 |         <span class="sr-only">Toggle navigation</span>
 30 |         <span class="icon-bar"></span>
 31 |         <span class="icon-bar"></span>
 32 |         <span class="icon-bar"></span>
 33 |       </button>
 34 |       <span class="navbar-brand">
 35 |         <a class="navbar-link" href="../index.html">RcppDynProg</a>
 36 |         <span class="version label label-default" data-toggle="tooltip" data-placement="bottom" title="">0.2.1</span>
 37 |       </span>
 38 |     </div>
 39 | 
 40 |     <div id="navbar" class="navbar-collapse collapse">
 41 |       <ul class="nav navbar-nav">
 42 | <li>
 43 |   <a href="../articles/RcppDynProg.html">Get started</a>
 44 | </li>
 45 | <li>
 46 |   <a href="../reference/index.html">Reference</a>
 47 | </li>
 48 | <li class="dropdown">
 49 |   <a href="#" class="dropdown-toggle" data-toggle="dropdown" role="button" data-bs-toggle="dropdown" aria-expanded="false">
 50 |     Articles
 51 |      
 52 |     <span class="caret"></span>
 53 |   </a>
 54 |   <ul class="dropdown-menu" role="menu">
 55 | <li>
 56 |       <a href="../articles/Segmentation.html">Segmentation</a>
 57 |     </li>
 58 |     <li>
 59 |       <a href="../articles/SegmentationL.html">Linear Segmentation</a>
 60 |     </li>
 61 |   </ul>
 62 | </li>
 63 | <li>
 64 |   <a href="../news/index.html">Changelog</a>
 65 | </li>
 66 |       </ul>
 67 | <ul class="nav navbar-nav navbar-right">
 68 | <li>
 69 |   <a href="https://win-vector.com/" class="external-link">Sponsor: Win-Vector LLC</a>
 70 | </li>
 71 |       </ul>
 72 | </div>
 73 | <!--/.nav-collapse -->
 74 |   </div>
 75 | <!--/.container -->
 76 | </div>
 77 | <!--/.navbar -->
 78 | 
 79 |       
 80 | 
 81 |       </header><div class="row">
 82 |   <div class="col-md-9 contents">
 83 |     <div class="page-header toc-ignore">
 84 |       <h1 data-toc-skip>RcppDynProg package</h1>
 85 |                         <h4 data-toc-skip class="author">John Mount</h4>
 86 |             
 87 |             <h4 data-toc-skip class="date">2023-08-19</h4>
 88 |       
 89 |       <small class="dont-index">Source: <a href="https://github.com/WinVector/RcppDynProg/blob/HEAD/vignettes/RcppDynProg.Rmd" class="external-link"><code>vignettes/RcppDynProg.Rmd</code></a></small>
 90 |       <div class="hidden name"><code>RcppDynProg.Rmd</code></div>
 91 | 
 92 |     </div>
 93 | 
 94 |     
 95 |     
 96 | <p>This content has moved to <a href="https://github.com/WinVector/RcppDynProg" class="external-link">the package
 97 | README</a>.</p>
 98 |   </div>
 99 | 
100 |   <div class="col-md-3 hidden-xs hidden-sm" id="pkgdown-sidebar">
101 | 
102 |       </div>
103 | 
104 | </div>
105 | 
106 | 
107 | 
108 |       <footer><div class="copyright">
109 |   <p></p>
110 | <p>Developed by John Mount, Nina Zumel.</p>
111 | </div>
112 | 
113 | <div class="pkgdown">
114 |   <p></p>
115 | <p>Site built with <a href="https://pkgdown.r-lib.org/" class="external-link">pkgdown</a> 2.0.7.</p>
116 | </div>
117 | 
118 |       </footer>
119 | </div>
120 | 
121 |   
122 | 
123 | 
124 |   
125 | 
126 |   </body>
127 | </html>
128 | 


--------------------------------------------------------------------------------
/extras/sp500/sp500_long.md:
--------------------------------------------------------------------------------
  1 | sp500 long
  2 | ================
  3 | 
  4 | ``` r
  5 | library("RcppDynProg")
  6 | library("ggplot2")
  7 | 
  8 | # Data from: 
  9 | # https://finance.yahoo.com/quote/%5EGSPC/history?period1=-630950400&period2=1546416000&interval=1d&filter=history&frequency=1d
 10 | sp500 <- read.csv("^GSPC.csv")
 11 | sp500$Date <- as.Date(sp500$Date)
 12 | sp500$x <- as.numeric(sp500$Date)
 13 | sp500$log_price <- log(sp500$Adj.Close)
 14 | 
 15 | 
 16 | sp500recent <- sp500[sp500$Date >= as.Date('2000-01-01'), ]
 17 | 
 18 | 
 19 | # find penalty
 20 | sp500recent$permuted <- sp500recent$log_price[sample.int(nrow(sp500recent), 
 21 |                                                          nrow(sp500recent), 
 22 |                                                          replace = FALSE)]
 23 | # look for a penalty that prefers 1 segment on permuted data
 24 | lb <- 0
 25 | ub <- 100
 26 | while(TRUE) {
 27 |   soln <- solve_for_partition(sp500recent$x, sp500recent$permuted, penalty = ub, max_k = 3)
 28 |   if(nrow(soln)==2) {
 29 |     break
 30 |   }
 31 |   lb <- ub
 32 |   ub <- 10*ub
 33 | }
 34 | while(TRUE) {
 35 |   penalty <- ceiling((lb+ub)/2)
 36 |   if(penalty>=ub) {
 37 |     break
 38 |   }
 39 |   soln <- solve_for_partition(sp500recent$x, sp500recent$permuted, penalty = penalty, max_k = 3)
 40 |   if(nrow(soln)==2) {
 41 |     ub <- penalty
 42 |   } else {
 43 |     lb <- penalty
 44 |   }
 45 | }
 46 | print(penalty)
 47 | ```
 48 | 
 49 |     ## [1] 10
 50 | 
 51 | ``` r
 52 | soln <- solve_for_partition(sp500recent$x, sp500recent$log_price, penalty = penalty)
 53 | sp500recent$estimate <- exp(approx(soln$x, soln$pred, 
 54 |                              xout = sp500recent$x, 
 55 |                              method = "linear", rule = 2)$y)
 56 | sp500recent$group <- as.character(
 57 |   findInterval(sp500recent$x, soln[soln$what=="left", "x"]))
 58 | 
 59 | ggplot(data = sp500recent, aes(x = Date)) +
 60 |   geom_line(aes(y=Adj.Close), color = "darkgray") +
 61 |   geom_line(aes(y=estimate, color = group)) +
 62 |   ggtitle("segment approximation of historic sp500recent data",
 63 |           subtitle = paste("per-segment penalty =", penalty)) +
 64 |   theme(legend.position = "none") +
 65 |   scale_color_brewer(palette = "Dark2") +
 66 |   scale_y_log10()
 67 | ```
 68 | 
 69 | <img src="sp500_long_files/figure-markdown_github/r1-1.png" style="display: block; margin: auto;" />
 70 | 
 71 | ``` r
 72 | penalty <- 1
 73 | 
 74 | soln <- solve_for_partition(sp500recent$x, sp500recent$log_price, penalty = penalty)
 75 | sp500recent$estimate <- exp(approx(soln$x, soln$pred, 
 76 |                              xout = sp500recent$x, 
 77 |                              method = "linear", rule = 2)$y)
 78 | sp500recent$group <- as.character(
 79 |   findInterval(sp500recent$x, soln[soln$what=="left", "x"]))
 80 | 
 81 | ggplot(data = sp500recent, aes(x = Date)) +
 82 |   geom_line(aes(y=Adj.Close), color = "darkgray") +
 83 |   geom_line(aes(y=estimate, color = group)) +
 84 |   ggtitle("segment approximation of historic sp500 data",
 85 |           subtitle = paste("per-segment penalty =", penalty)) +
 86 |   theme(legend.position = "none") +
 87 |   scale_color_brewer(palette = "Dark2") +
 88 |   scale_y_log10()
 89 | ```
 90 | 
 91 | <img src="sp500_long_files/figure-markdown_github/r2-1.png" style="display: block; margin: auto;" />
 92 | 
 93 | ``` r
 94 | penalty <- 5
 95 | 
 96 | soln <- solve_for_partition(sp500$x, sp500$log_price, penalty = penalty)
 97 | sp500$estimate <- exp(approx(soln$x, soln$pred, 
 98 |                              xout = sp500$x, 
 99 |                              method = "linear", rule = 2)$y)
100 | 
101 | sp500$group <- as.character(
102 |   findInterval(sp500$x, soln[soln$what=="left", "x"]))
103 | 
104 | ggplot(data = sp500, aes(x = Date)) +
105 |   geom_line(aes(y=Adj.Close), color = "darkgray") +
106 |   geom_line(aes(y=estimate, group = group), color = "darkgreen") +
107 |   ggtitle("segment approximation of historic sp500 data",
108 |           subtitle = paste("per-segment penalty =", penalty)) +
109 |   theme(legend.position = "none") +
110 |   scale_color_manual(values = colors) +
111 |   scale_y_log10()
112 | ```
113 | 
114 | <img src="sp500_long_files/figure-markdown_github/r3-1.png" style="display: block; margin: auto;" />
115 | 
116 | Naive gaps (TODO: need to find breakpoints that are good for the no-gap solution).
117 | 
118 | ``` r
119 | sl <- soln[soln$what=='left', ]
120 | fit <- vtreat:::encode_x_as_lambdas(
121 |   sp500$x, min(sp500$x), max(sp500$x),
122 |   sl$x)
123 | vars <- setdiff(colnames(fit), "intercept")
124 | fit$y <- sp500$log_price
125 | fmla <- wrapr::mk_formula("y", vars)
126 | model <- lm(fmla, data = fit)
127 | sp500$pred <- exp(predict(model, newdata = fit))
128 | 
129 | ggplot(data = sp500, aes(x = Date)) +
130 |   geom_line(aes(y=Adj.Close), color = "darkgray") +
131 |   geom_line(aes(y=pred), color = "darkgreen") +
132 |   ggtitle("segment approximation (no gaps) of historic sp500 data",
133 |           subtitle = paste("per-segment penalty =", penalty)) +
134 |   theme(legend.position = "none") +
135 |   scale_color_brewer(palette = "Dark2") +
136 |   scale_y_log10()
137 | ```
138 | 
139 | <img src="sp500_long_files/figure-markdown_github/r4-1.png" style="display: block; margin: auto;" />
140 | 
141 | Fit piecewise constant on delta series.
142 | 
143 | ``` r
144 | penalty <- 0.01
145 | 
146 | sp500$delta_log_price <- c(0, sp500$log_price[-1] - sp500$log_price[-nrow(sp500)])
147 |   
148 | soln <- solve_for_partitionc(sp500$x, sp500$delta_log_price, penalty = penalty)
149 | sl <- soln[soln$what=='left', ]
150 | fit <- vtreat:::encode_x_as_lambdas(
151 |   sp500$x, min(sp500$x), max(sp500$x),
152 |   sl$x)
153 | vars <- setdiff(colnames(fit), "intercept")
154 | fit$y <- sp500$log_price
155 | fmla <- wrapr::mk_formula("y", vars)
156 | model <- lm(fmla, data = fit)
157 | sp500$pred <- exp(predict(model, newdata = fit))
158 | 
159 | ggplot(data = sp500, aes(x = Date)) +
160 |   geom_line(aes(y=Adj.Close), color = "darkgray") +
161 |   geom_line(aes(y=pred), color = "darkgreen") +
162 |   ggtitle("segment approximation (slope ests, no gaps) of historic sp500 data",
163 |           subtitle = paste("per-segment penalty =", penalty)) +
164 |   theme(legend.position = "none") +
165 |   scale_color_brewer(palette = "Dark2") +
166 |   scale_y_log10()
167 | ```
168 | 
169 | <img src="sp500_long_files/figure-markdown_github/r5-1.png" style="display: block; margin: auto;" />
170 | 


--------------------------------------------------------------------------------
/docs/authors.html:
--------------------------------------------------------------------------------
  1 | <!DOCTYPE html>
  2 | <!-- Generated by pkgdown: do not edit by hand --><html lang="en"><head><meta http-equiv="Content-Type" content="text/html; charset=UTF-8"><meta charset="utf-8"><meta http-equiv="X-UA-Compatible" content="IE=edge"><meta name="viewport" content="width=device-width, initial-scale=1.0"><title>Authors and Citation • RcppDynProg</title><!-- jquery --><script src="https://cdnjs.cloudflare.com/ajax/libs/jquery/3.4.1/jquery.min.js" integrity="sha256-CSXorXvZcTkaix6Yvo6HppcZGetbYMGWSFlBw8HfCJo=" crossorigin="anonymous"></script><!-- Bootstrap --><link rel="stylesheet" href="https://cdnjs.cloudflare.com/ajax/libs/twitter-bootstrap/3.4.1/css/bootstrap.min.css" integrity="sha256-bZLfwXAP04zRMK2BjiO8iu9pf4FbLqX6zitd+tIvLhE=" crossorigin="anonymous"><script src="https://cdnjs.cloudflare.com/ajax/libs/twitter-bootstrap/3.4.1/js/bootstrap.min.js" integrity="sha256-nuL8/2cJ5NDSSwnKD8VqreErSWHtnEP9E7AySL+1ev4=" crossorigin="anonymous"></script><!-- bootstrap-toc --><link rel="stylesheet" href="bootstrap-toc.css"><script src="bootstrap-toc.js"></script><!-- Font Awesome icons --><link rel="stylesheet" href="https://cdnjs.cloudflare.com/ajax/libs/font-awesome/5.12.1/css/all.min.css" integrity="sha256-mmgLkCYLUQbXn0B1SRqzHar6dCnv9oZFPEC1g1cwlkk=" crossorigin="anonymous"><link rel="stylesheet" href="https://cdnjs.cloudflare.com/ajax/libs/font-awesome/5.12.1/css/v4-shims.min.css" integrity="sha256-wZjR52fzng1pJHwx4aV2AO3yyTOXrcDW7jBpJtTwVxw=" crossorigin="anonymous"><!-- clipboard.js --><script src="https://cdnjs.cloudflare.com/ajax/libs/clipboard.js/2.0.6/clipboard.min.js" integrity="sha256-inc5kl9MA1hkeYUt+EC3BhlIgyp/2jDIyBLS6k3UxPI=" crossorigin="anonymous"></script><!-- headroom.js --><script src="https://cdnjs.cloudflare.com/ajax/libs/headroom/0.11.0/headroom.min.js" integrity="sha256-AsUX4SJE1+yuDu5+mAVzJbuYNPHj/WroHuZ8Ir/CkE0=" crossorigin="anonymous"></script><script src="https://cdnjs.cloudflare.com/ajax/libs/headroom/0.11.0/jQuery.headroom.min.js" integrity="sha256-ZX/yNShbjqsohH1k95liqY9Gd8uOiE1S4vZc+9KQ1K4=" crossorigin="anonymous"></script><!-- pkgdown --><link href="pkgdown.css" rel="stylesheet"><script src="pkgdown.js"></script><meta property="og:title" content="Authors and Citation"><!-- mathjax --><script src="https://cdnjs.cloudflare.com/ajax/libs/mathjax/2.7.5/MathJax.js" integrity="sha256-nvJJv9wWKEm88qvoQl9ekL2J+k/RWIsaSScxxlsrv8k=" crossorigin="anonymous"></script><script src="https://cdnjs.cloudflare.com/ajax/libs/mathjax/2.7.5/config/TeX-AMS-MML_HTMLorMML.js" integrity="sha256-84DKXVJXs0/F8OTMzX4UR909+jtl4G7SPypPavF+GfA=" crossorigin="anonymous"></script><!--[if lt IE 9]>
  3 | <script src="https://oss.maxcdn.com/html5shiv/3.7.3/html5shiv.min.js"></script>
  4 | <script src="https://oss.maxcdn.com/respond/1.4.2/respond.min.js"></script>
  5 | <![endif]--></head><body data-spy="scroll" data-target="#toc">
  6 |     
  7 | 
  8 |     <div class="container template-citation-authors">
  9 |       <header><div class="navbar navbar-default navbar-fixed-top" role="navigation">
 10 |   <div class="container">
 11 |     <div class="navbar-header">
 12 |       <button type="button" class="navbar-toggle collapsed" data-toggle="collapse" data-target="#navbar" aria-expanded="false">
 13 |         <span class="sr-only">Toggle navigation</span>
 14 |         <span class="icon-bar"></span>
 15 |         <span class="icon-bar"></span>
 16 |         <span class="icon-bar"></span>
 17 |       </button>
 18 |       <span class="navbar-brand">
 19 |         <a class="navbar-link" href="index.html">RcppDynProg</a>
 20 |         <span class="version label label-default" data-toggle="tooltip" data-placement="bottom" title="">0.2.1</span>
 21 |       </span>
 22 |     </div>
 23 | 
 24 |     <div id="navbar" class="navbar-collapse collapse">
 25 |       <ul class="nav navbar-nav"><li>
 26 |   <a href="articles/RcppDynProg.html">Get started</a>
 27 | </li>
 28 | <li>
 29 |   <a href="reference/index.html">Reference</a>
 30 | </li>
 31 | <li class="dropdown">
 32 |   <a href="#" class="dropdown-toggle" data-toggle="dropdown" role="button" data-bs-toggle="dropdown" aria-expanded="false">
 33 |     Articles
 34 |      
 35 |     <span class="caret"></span>
 36 |   </a>
 37 |   <ul class="dropdown-menu" role="menu"><li>
 38 |       <a href="articles/Segmentation.html">Segmentation</a>
 39 |     </li>
 40 |     <li>
 41 |       <a href="articles/SegmentationL.html">Linear Segmentation</a>
 42 |     </li>
 43 |   </ul></li>
 44 | <li>
 45 |   <a href="news/index.html">Changelog</a>
 46 | </li>
 47 |       </ul><ul class="nav navbar-nav navbar-right"><li>
 48 |   <a href="https://win-vector.com/" class="external-link">Sponsor: Win-Vector LLC</a>
 49 | </li>
 50 |       </ul></div><!--/.nav-collapse -->
 51 |   </div><!--/.container -->
 52 | </div><!--/.navbar -->
 53 | 
 54 |       
 55 | 
 56 |       </header><div class="row">
 57 |   <div class="contents col-md-9">
 58 |     <div class="section level2 authors-section">
 59 |       <div class="page-header">
 60 |         <h1>Authors</h1>
 61 |       </div>
 62 | 
 63 |       
 64 |       <ul class="list-unstyled"><li>
 65 |           <p><strong>John Mount</strong>. Author, maintainer. 
 66 |           </p>
 67 |         </li>
 68 |         <li>
 69 |           <p><strong>Nina Zumel</strong>. Author. 
 70 |           </p>
 71 |         </li>
 72 |         <li>
 73 |           <p><strong> Win-Vector LLC</strong>. Copyright holder. 
 74 |           </p>
 75 |         </li>
 76 |       </ul></div>
 77 |     <div class="section level2 citation-section">
 78 |     <div>
 79 |       <h1 id="citation">Citation</h1>
 80 |       <small class="dont-index">Source: <a href="https://github.com/WinVector/RcppDynProg/blob/HEAD/DESCRIPTION" class="external-link"><code>DESCRIPTION</code></a></small>
 81 |     </div>
 82 |     </div>
 83 | 
 84 | 
 85 |     <p>Mount J, Zumel N (2023).
 86 | <em>RcppDynProg: 'Rcpp' Dynamic Programming</em>.
 87 | https://github.com/WinVector/RcppDynProg/, https://winvector.github.io/RcppDynProg/. 
 88 | </p>
 89 |     <pre>@Manual{,
 90 |   title = {RcppDynProg: 'Rcpp' Dynamic Programming},
 91 |   author = {John Mount and Nina Zumel},
 92 |   year = {2023},
 93 |   note = {https://github.com/WinVector/RcppDynProg/, https://winvector.github.io/RcppDynProg/},
 94 | }</pre>
 95 | 
 96 |   </div>
 97 | 
 98 | </div>
 99 | 
100 | 
101 | 
102 |       <footer><div class="copyright">
103 |   <p></p><p>Developed by John Mount, Nina Zumel.</p>
104 | </div>
105 | 
106 | <div class="pkgdown">
107 |   <p></p><p>Site built with <a href="https://pkgdown.r-lib.org/" class="external-link">pkgdown</a> 2.0.7.</p>
108 | </div>
109 | 
110 |       </footer></div>
111 | 
112 |   
113 | 
114 | 
115 |   
116 | 
117 |   </body></html>
118 | 
119 | 


--------------------------------------------------------------------------------
/extras/sp500/sp500_example.Rmd:
--------------------------------------------------------------------------------
  1 | ---
  2 | title: "sp500 Example"
  3 | output: pdf_document
  4 | ---
  5 | 
  6 | Another approach to segmentation is given by Koh, Kim, and Boyd's L1-Trend filter (refs [here](http://web.stanford.edu/~boyd/l1_tf/) and [here](http://web.stanford.edu/~boyd/papers/l1_trend_filter.html)).  The approach is to, given a sequence $y$ of length $N$, use a global optimizer solve for a n-vector $x$ minimizing:
  7 | 
  8 | \[ (1/2)||x-y||^2_2 + \lambda ||D x||_1 \]
  9 | 
 10 | where $D$ is the second-order difference matrix that calculates $x_{k-1} - 2 x_{k} + x_{k+1}$ for all $k$.  Or in other words find $x$ minimizing:
 11 | 
 12 | \[ (1/2)||x-y||^2_2 + \lambda \sum_{k=2}^{N-1} |x_{k-1} - 2 x_{k} + x_{k+1}| \]
 13 | 
 14 | They call the above Hodric-Prescott Filtering.  It is *very* important to note that this sort of filtering is *not* appropriate for forecasting time-series (or *ex ante* work), as it is moving data from the future into the estimate in 2 ways:
 15 | 
 16 |   * $x_{k+1}$ is directly in the penalty term for $x_{k}$ (moving fit quality information known at time k+1 to time k).
 17 |   * It is using a global optimization to find all $x$ simultaneously, meaning choices of later $x$s can affect earlier choices.
 18 | 
 19 | However, this sort of can be used to look at *ex post* (after the fact) changes in behavior.
 20 | 
 21 | Richard Bellman wrote about a related smoothing filter in *Dynamic Programming*, Princeton 1958, Ch 1, section 7, minimizing:
 22 | 
 23 | \[ \sum_{k=1}^{N} g_k(x_{k} - r_{k}) + \sum_{k=2}^{N} h_k(x_{k} - x_{k-1}) \]
 24 | 
 25 | (here we are solving for $x$, given $r$ and the $g_k()$ and $h_k()$ are sequences of arbitrary penalty functions.
 26 | 
 27 | Koh, Kim, and Boyd supplied [implementations of the method in both Matlab and C](http://web.stanford.edu/~boyd/l1_tf/), and Hadley Wickham provided a wrapper to make the `C` code available to [`R`](https://www.r-project.org) users.  Dirk Eddelbuettel worked on an [`Rcpp` adaption of the methodology](https://github.com/eddelbuettel/rcppl1tf).
 28 | 
 29 | The idea is: one can take series data (such as historic S&P 500 prices) and, by picking a smoothing $\lambda$ produce a graph such as the following:
 30 | 
 31 | ![](snp500_l1t_0200.png)
 32 | 
 33 | 
 34 | [`RcppDynProg`](https://github.com/WinVector/RcppDynProg) uses a different, but related methodology.  The user can specify any per-interval penalty, meaning fit quality does not have to be per-point additive.  The default metric is the quality of a linear fit on the segment (discussed [here](https://winvector.github.io/RcppDynProg/articles/RcppDynProg.html)).  Then by specifying a bound on the number of segments or a per-segment penalty (to discourage many small segments) a piecewise linear approximation is built up using a global dynamic program optimizer.  Again: the non-local nature of the segment quality scores (the default score depends on all points in the segment, not just previous points) plus the global optimization mean the segmentation is only available after all the data are known (so not suitable for forecasting a time series forward).
 35 | 
 36 | That being said the results look like the following.
 37 | 
 38 | ```{r r1, fig.height = 6, fig.width = 8, fig.align = "center"}
 39 | library("RcppDynProg")
 40 | library("ggplot2")
 41 | 
 42 | # Data from: https://github.com/eddelbuettel/l1tf/blob/master/data-raw/sp500.csv
 43 | sp500 <- read.csv("sp500.csv")
 44 | sp500$date <- as.Date(sp500$date)
 45 | 
 46 | sp500$x <- as.numeric(sp500$date)
 47 | sp500$permuted <- sp500$raw[sample.int(nrow(sp500), nrow(sp500), replace = FALSE)]
 48 | 
 49 | 
 50 | soln <- solve_for_partition(sp500$x, sp500$raw, penalty = 250000)
 51 | sp500$estimate <- approx(soln$x, soln$pred, 
 52 |                          xout = sp500$x, 
 53 |                          method = "linear", rule = 2)$y
 54 | sp500$group <- as.character(
 55 |   findInterval(sp500$x, soln[soln$what=="left", "x"]))
 56 | 
 57 | ggplot(data = sp500, aes(x = date)) +
 58 |   geom_line(aes(y=raw), color = "lightgray") +
 59 |   geom_line(aes(y=estimate, color = group)) +
 60 |   ggtitle("segment approximation of historic data",
 61 |           subtitle = "per-segment penalty = 250000") +
 62 |   theme(legend.position = "none") +
 63 |   scale_color_brewer(palette = "Dark2")
 64 | ```
 65 | 
 66 | Notice in our definition of piecewise we do not insist the pieces touch.
 67 | A smoother that enforces that can be found [here](https://github.com/WinVector/vtreat/blob/master/R/segmented_variable.R) (demonstrated
 68 | [here](https://github.com/WinVector/RcppDynProg/blob/master/extras/SegmentationL.md) as `PiecewiseV`).
 69 | 
 70 | Or, instead of specifying a penalty, the user can specify a bound on the number of segments allowed.
 71 | 
 72 | 
 73 | ```{r r2, fig.height = 6, fig.width = 8, fig.align = "center"}
 74 | soln <- solve_for_partition(sp500$x, sp500$raw, max_k = 5, penalty = 0)
 75 | sp500$estimate <- approx(soln$x, soln$pred, 
 76 |                          xout = sp500$x, 
 77 |                          method = "linear", rule = 2)$y
 78 | sp500$group <- as.character(
 79 |   findInterval(sp500$x, soln[soln$what=="left", "x"]))
 80 | 
 81 | ggplot(data = sp500, aes(x = date)) +
 82 |   geom_line(aes(y=raw), color = "lightgray") +
 83 |   geom_line(aes(y=estimate, color = group)) +
 84 |   ggtitle("5 segment approximation of historic data") +
 85 |   theme(legend.position = "none") +
 86 |   scale_color_brewer(palette = "Dark2")
 87 | ```
 88 | 
 89 | 
 90 | Or instead of specifying the penalty we can attempt to solve for
 91 | a plausible value using a permutation test.
 92 | 
 93 | 
 94 | ```{r r3, fig.height = 6, fig.width = 8, fig.align = "center"}
 95 | 
 96 | # look for a penalty that prefers 1 segment on permuted data
 97 | lb <- 0
 98 | ub <- 100
 99 | while(TRUE) {
100 |   soln <- solve_for_partition(sp500$x, sp500$permuted, penalty = ub, max_k = 3)
101 |   if(nrow(soln)==2) {
102 |     break
103 |   }
104 |   lb <- ub
105 |   ub <- 10*ub
106 | }
107 | while(TRUE) {
108 |   penalty <- ceiling((lb+ub)/2)
109 |   if(penalty>=ub) {
110 |     break
111 |   }
112 |   soln <- solve_for_partition(sp500$x, sp500$permuted, penalty = penalty, max_k = 3)
113 |   if(nrow(soln)==2) {
114 |     ub <- penalty
115 |   } else {
116 |     lb <- penalty
117 |   }
118 | }
119 | print(penalty)
120 | 
121 | soln <- solve_for_partition(sp500$x, sp500$raw, penalty = penalty)
122 | sp500$estimate <- approx(soln$x, soln$pred, 
123 |                      xout = sp500$x, 
124 |                      method = "linear", rule = 2)$y
125 | sp500$group <- as.character(
126 |   findInterval(sp500$x, soln[soln$what=="left", "x"]))
127 | 
128 | ggplot(data = sp500, aes(x = date)) +
129 |   geom_line(aes(y=raw), color = "lightgray") +
130 |   geom_line(aes(y=estimate, color = group)) +
131 |   ggtitle("segment approximation of historic data",
132 |           subtitle = paste("per-segment penalty =", penalty)) +
133 |   theme(legend.position = "none") +
134 |   scale_color_brewer(palette = "Dark2")
135 | 
136 | ```
137 | 


--------------------------------------------------------------------------------
/extras/PseudoInverse.tex:
--------------------------------------------------------------------------------
  1 | 
  2 | \documentclass{article}
  3 | 
  4 | \usepackage{hyperref}
  5 | \usepackage{amsmath}
  6 | \usepackage{amssymb}
  7 | 
  8 | \newtheorem{theorem}{Theorem}
  9 | 
 10 | \begin{document}
 11 | 
 12 | \title{The 2 by 2 Real Pseudo Inverse}
 13 | \author{John Mount}
 14 | \date{2019-01-07}
 15 | 
 16 | 
 17 | 
 18 | 
 19 | \maketitle
 20 | 
 21 | 
 22 | This is a brief note on the math behind the direct
 23 | PRESS statistic calculation\footnote{\url{http://www.win-vector.com/blog/2014/09/estimating-generalization-error-with-the-press-statistic/}} 
 24 | found in the RcppDynProg package\footnote{\url{https://github.com/WinVector/RcppDynProg}}.
 25 | 
 26 | The actual `C++` code\footnote{\url{https://github.com/WinVector/RcppDynProg/blob/master/src/xlin_fits.cpp}} is a bit ugly and intimidating.  That is because we are using a verbose scalar notation to represent matrix concepts.  In matrix notation we are solving a linear system by inverting a two by two matrix.\footnote{Yes, there are the usual admonitions that one should not invert a matrix to solve a linear system, but for systems this small they do not apply.}  We are in turn inverting the two by two matrix by exploiting the following well know rule of how to invert a two by two matrix.
 27 | 
 28 | 
 29 | If $a d - b c$ is not zero then:
 30 | 
 31 | \[ 
 32 | \begin{pmatrix} a & b \\ c & d \end{pmatrix}^{-1}
 33 | =
 34 | \frac{1}{a d - b c}
 35 | \begin{pmatrix} d & -b \\ -c & a \end{pmatrix}
 36 | \]
 37 | 
 38 | Throughout $a$, $b$, $c$, and $d$ all real scalars.
 39 | 
 40 | This can be re-written as the following general relation.
 41 | 
 42 | \[ 
 43 | \begin{pmatrix} a & b \\ c & d \end{pmatrix}
 44 | \begin{pmatrix} d & -b \\ -c & a \end{pmatrix}
 45 | =
 46 | (a d - b c) 
 47 | \begin{pmatrix} 1 & 0 \\ 0 & 1 \end{pmatrix}
 48 | \]
 49 | 
 50 | The above can be directly checked just by applying the rules for multiplying matrices to the left two matrices.  This has a particularly pleasant presentation if we recognize that $a d - b c$ is  the determinant of the left matrix and use the traditional vertical bar determinant notation.
 51 | 
 52 | \[ 
 53 | \begin{pmatrix} a & b \\ c & d \end{pmatrix}
 54 | \begin{pmatrix} d & -b \\ -c & a \end{pmatrix}
 55 | =
 56 | \begin{vmatrix} a & b \\ c & d \end{vmatrix}
 57 | \begin{pmatrix} 1 & 0 \\ 0 & 1 \end{pmatrix}
 58 | \]
 59 | 
 60 | 
 61 | 
 62 | Now there is an issue of what to do when $a d - b c$ is zero.  For our implementation we apply Tikhonov regularization\footnote{\url{https://en.wikipedia.org/wiki/Tikhonov_regularization}} which (barring the exact numeric coincidence of minus two times the expected value of the independent variable equaling our regularization constant) is going to be non-singular.  For our actual application, we could simply switch degenerate situations to the out-of sample mean implementation\footnote{\url{https://github.com/WinVector/RcppDynProg/blob/master/src/const_costs.cpp}}.
 63 | 
 64 | But, for fun, let's play with the math a bit.
 65 | 
 66 | There is an additional lesser known algebraic relation for two by two matrices.
 67 | 
 68 | 
 69 | \[ 
 70 | \begin{pmatrix} a & b \\ c & d \end{pmatrix}
 71 | \begin{pmatrix} a & c \\ b & d \end{pmatrix}
 72 | \begin{pmatrix} a & b \\ c & d \end{pmatrix}
 73 | =
 74 | (a^2 + b^2 + c^2 + d^2) 
 75 | \begin{pmatrix} a & b \\ c & d \end{pmatrix} 
 76 | +
 77 | (a d - b c)
 78 | \begin{pmatrix} -d & c \\ b & -a \end{pmatrix} 
 79 | \]
 80 | 
 81 | Or (using transpose, matrix squared Frobenius norm, and determinant notation):
 82 | 
 83 | \begin{theorem}
 84 | For any real 2 by 2 matrix
 85 | $\begin{pmatrix} a & b \\ c & d \end{pmatrix}$ we have:
 86 | 
 87 | 
 88 | \[
 89 | \begin{pmatrix} a & b \\ c & d \end{pmatrix}
 90 | \begin{pmatrix} a & b \\ c & d \end{pmatrix}^{\top}
 91 | \begin{pmatrix} a & b \\ c & d \end{pmatrix}
 92 | =
 93 | \begin{Vmatrix} a & b \\ c & d \end{Vmatrix}_2^2
 94 | \begin{pmatrix} a & b \\ c & d \end{pmatrix} 
 95 | +
 96 | \begin{vmatrix} a & b \\ c & d \end{vmatrix}
 97 | \begin{pmatrix} -d & c \\ b & -a \end{pmatrix} 
 98 | \].
 99 | The superscript "top" denoting the transpose operation, the $||.||^2_2$ denoting sum of squares norm, and the single $|.|$ denoting determinant.
100 | \\ $\square$
101 | \label{thm:fmla}
102 | \end{theorem}
103 | 
104 | 
105 | This means, if $a d - b c$ is zero then:
106 | 
107 | 
108 | \[ 
109 | \begin{pmatrix} a & b \\ c & d \end{pmatrix}
110 | \begin{pmatrix} a & b \\ c & d \end{pmatrix}^{\top}
111 | \begin{pmatrix} a & b \\ c & d \end{pmatrix}
112 | =
113 | \begin{Vmatrix} a & b \\ c & d \end{Vmatrix}_2^2
114 | \begin{pmatrix} a & b \\ c & d \end{pmatrix}
115 | \]
116 | 
117 | 
118 | Once we [confirm the above relation\footnote{\url{https://github.com/WinVector/RcppDynProg/blob/master/extras/PseudoInverse.ipynb}} we can also confirm that if $a^2 + b^2 + c^2 + d^2$ is not zero, then the following matrix:
119 | 
120 | \[ 
121 | \begin{pmatrix} a & b \\ c & d \end{pmatrix}^{+}
122 | =
123 | \frac{1}{a^2 + b^2 + c^2 + d^2}
124 | \begin{pmatrix} a & c \\ b & d \end{pmatrix}
125 | \]
126 | 
127 | satisfies all of the conditions for being the Moore-Penrose inverse\footnote{\url{https://en.wikipedia.org/wiki/Moore–Penrose_inverse}} (or pseudo-inverse) of our original matrix.  The superscript-plus denoting the Moore-Penrose inverse operation.
128 | 
129 | Or in transpose notation:
130 | 
131 | \[ 
132 | \begin{pmatrix} a & b \\ c & d \end{pmatrix}^{+}
133 | =
134 | \frac{1}{a^2 + b^2 + c^2 + d^2}
135 | \begin{pmatrix} a & b \\ c & d \end{pmatrix}^{\top}
136 | \]
137 | 
138 | This is called the pseudo-inverse because it acts like an inverse, even for non-invertible matrices.  For $A^{+}$ to be
139 | a More-Penrose inverse we must confirm it obeys the following relations:
140 | 
141 | \begin{align*}
142 | A A^{+} A &= A \\
143 | A^{+} A A^{+} &= A^{+} \\
144 | (A A^{+})^{\top} &= A A^{+} \\
145 | (A^{+} A)^{\top} &= A^{+} A
146 | \end{align*}
147 | 
148 | Theorem \ref{thm:fmla} lets us check the first relation, the other follow quickly as our $A^{+}$ is a simple scalar multiple of the transpose.\footnote{With the same scaling for both $A^{+}$ and $(A^{\top})^{+}$.}
149 | 
150 | All of the above check relations would be true for a classic inverse.  We can think of $A^{+}$ as almost canceling a single $A$ to the left or the $A$ to the right.
151 | 
152 | 
153 | \begin{theorem}
154 | For any real 2 by 2 matrix $\begin{pmatrix} a & b \\ c & d \end{pmatrix}$
155 | 
156 | The  Moore-Penrose inverse is:
157 | 
158 | 
159 | \begin{itemize}
160 | 
161 | 
162 | \item When $a d - b c$ is not zero:
163 | 
164 | \[
165 | \frac{1}{a d - b c}
166 | \begin{pmatrix} d & -b \\ -c & a \end{pmatrix} 
167 | \]
168 | 
169 | \item When $a d - b c$ is zero and $a^2 + b^2 + c^2 + d^2$ is not zero:
170 | 
171 | \[ 
172 | \frac{1}{a^2 + b^2 + c^2 + d^2}
173 | \begin{pmatrix} a & c \\ b & d \end{pmatrix}
174 | \]
175 | 
176 | \item Otherwise:
177 | 
178 | \[ 
179 | \begin{pmatrix} 0 & 0 \\ 0 & 0 \end{pmatrix}
180 | \]
181 | \end{itemize}
182 | .
183 | \\ $\square$
184 | \end{theorem}
185 | 
186 | For general matrices the situation is much more complicated. 
187 | 
188 | The wealth of symmetries and relations is really kind of neat.
189 | 
190 | 
191 | 
192 | 
193 | \end{document}
194 | 


--------------------------------------------------------------------------------
/docs/reference/RcppDynProg.html:
--------------------------------------------------------------------------------
  1 | <!-- Generated by pkgdown: do not edit by hand -->
  2 | <!DOCTYPE html>
  3 | <html lang="en">
  4 |   <head>
  5 |   <meta charset="utf-8">
  6 | <meta http-equiv="X-UA-Compatible" content="IE=edge">
  7 | <meta name="viewport" content="width=device-width, initial-scale=1.0">
  8 | 
  9 | <title>RcppDynProg — RcppDynProg • RcppDynProg</title>
 10 | 
 11 | 
 12 | <!-- jquery -->
 13 | <script src="https://cdnjs.cloudflare.com/ajax/libs/jquery/3.4.1/jquery.min.js" integrity="sha256-CSXorXvZcTkaix6Yvo6HppcZGetbYMGWSFlBw8HfCJo=" crossorigin="anonymous"></script>
 14 | <!-- Bootstrap -->
 15 | 
 16 | <link rel="stylesheet" href="https://cdnjs.cloudflare.com/ajax/libs/twitter-bootstrap/3.4.1/css/bootstrap.min.css" integrity="sha256-bZLfwXAP04zRMK2BjiO8iu9pf4FbLqX6zitd+tIvLhE=" crossorigin="anonymous" />
 17 | 
 18 | <script src="https://cdnjs.cloudflare.com/ajax/libs/twitter-bootstrap/3.4.1/js/bootstrap.min.js" integrity="sha256-nuL8/2cJ5NDSSwnKD8VqreErSWHtnEP9E7AySL+1ev4=" crossorigin="anonymous"></script>
 19 | 
 20 | <!-- bootstrap-toc -->
 21 | <link rel="stylesheet" href="../bootstrap-toc.css">
 22 | <script src="../bootstrap-toc.js"></script>
 23 | 
 24 | <!-- Font Awesome icons -->
 25 | <link rel="stylesheet" href="https://cdnjs.cloudflare.com/ajax/libs/font-awesome/5.12.1/css/all.min.css" integrity="sha256-mmgLkCYLUQbXn0B1SRqzHar6dCnv9oZFPEC1g1cwlkk=" crossorigin="anonymous" />
 26 | <link rel="stylesheet" href="https://cdnjs.cloudflare.com/ajax/libs/font-awesome/5.12.1/css/v4-shims.min.css" integrity="sha256-wZjR52fzng1pJHwx4aV2AO3yyTOXrcDW7jBpJtTwVxw=" crossorigin="anonymous" />
 27 | 
 28 | <!-- clipboard.js -->
 29 | <script src="https://cdnjs.cloudflare.com/ajax/libs/clipboard.js/2.0.6/clipboard.min.js" integrity="sha256-inc5kl9MA1hkeYUt+EC3BhlIgyp/2jDIyBLS6k3UxPI=" crossorigin="anonymous"></script>
 30 | 
 31 | <!-- headroom.js -->
 32 | <script src="https://cdnjs.cloudflare.com/ajax/libs/headroom/0.11.0/headroom.min.js" integrity="sha256-AsUX4SJE1+yuDu5+mAVzJbuYNPHj/WroHuZ8Ir/CkE0=" crossorigin="anonymous"></script>
 33 | <script src="https://cdnjs.cloudflare.com/ajax/libs/headroom/0.11.0/jQuery.headroom.min.js" integrity="sha256-ZX/yNShbjqsohH1k95liqY9Gd8uOiE1S4vZc+9KQ1K4=" crossorigin="anonymous"></script>
 34 | 
 35 | <!-- pkgdown -->
 36 | <link href="../pkgdown.css" rel="stylesheet">
 37 | <script src="../pkgdown.js"></script>
 38 | 
 39 | 
 40 | 
 41 | 
 42 | <meta property="og:title" content="RcppDynProg — RcppDynProg" />
 43 | <meta property="og:description" content="Rcpp dynamic programming solutions for partitioning and machine learning problems.
 44 | Includes out of sample fitting applications.  
 45 | Also supplies additional custom coders for the vtreat package.
 46 | Please see https://github.com/WinVector/RcppDynProg for details." />
 47 | 
 48 | 
 49 | 
 50 | 
 51 | <!-- mathjax -->
 52 | <script src="https://cdnjs.cloudflare.com/ajax/libs/mathjax/2.7.5/MathJax.js" integrity="sha256-nvJJv9wWKEm88qvoQl9ekL2J+k/RWIsaSScxxlsrv8k=" crossorigin="anonymous"></script>
 53 | <script src="https://cdnjs.cloudflare.com/ajax/libs/mathjax/2.7.5/config/TeX-AMS-MML_HTMLorMML.js" integrity="sha256-84DKXVJXs0/F8OTMzX4UR909+jtl4G7SPypPavF+GfA=" crossorigin="anonymous"></script>
 54 | 
 55 | <!--[if lt IE 9]>
 56 | <script src="https://oss.maxcdn.com/html5shiv/3.7.3/html5shiv.min.js"></script>
 57 | <script src="https://oss.maxcdn.com/respond/1.4.2/respond.min.js"></script>
 58 | <![endif]-->
 59 | 
 60 | 
 61 | 
 62 |   </head>
 63 | 
 64 |   <body data-spy="scroll" data-target="#toc">
 65 |     <div class="container template-reference-topic">
 66 |       <header>
 67 |       <div class="navbar navbar-default navbar-fixed-top" role="navigation">
 68 |   <div class="container">
 69 |     <div class="navbar-header">
 70 |       <button type="button" class="navbar-toggle collapsed" data-toggle="collapse" data-target="#navbar" aria-expanded="false">
 71 |         <span class="sr-only">Toggle navigation</span>
 72 |         <span class="icon-bar"></span>
 73 |         <span class="icon-bar"></span>
 74 |         <span class="icon-bar"></span>
 75 |       </button>
 76 |       <span class="navbar-brand">
 77 |         <a class="navbar-link" href="../index.html">RcppDynProg</a>
 78 |         <span class="version label label-default" data-toggle="tooltip" data-placement="bottom" title="Released version">0.2.0</span>
 79 |       </span>
 80 |     </div>
 81 | 
 82 |     <div id="navbar" class="navbar-collapse collapse">
 83 |       <ul class="nav navbar-nav">
 84 |         <li>
 85 |   <a href="../index.html">
 86 |     <span class="fas fa fas fa-home fa-lg"></span>
 87 |      
 88 |   </a>
 89 | </li>
 90 | <li>
 91 |   <a href="../articles/RcppDynProg.html">Get started</a>
 92 | </li>
 93 | <li>
 94 |   <a href="../reference/index.html">Reference</a>
 95 | </li>
 96 | <li class="dropdown">
 97 |   <a href="#" class="dropdown-toggle" data-toggle="dropdown" role="button" aria-expanded="false">
 98 |     Articles
 99 |      
100 |     <span class="caret"></span>
101 |   </a>
102 |   <ul class="dropdown-menu" role="menu">
103 |     <li>
104 |       <a href="../articles/Segmentation.html">Segmentation</a>
105 |     </li>
106 |     <li>
107 |       <a href="../articles/SegmentationL.html">Linear Segmentation</a>
108 |     </li>
109 |   </ul>
110 | </li>
111 | <li>
112 |   <a href="../news/index.html">Changelog</a>
113 | </li>
114 |       </ul>
115 |       <ul class="nav navbar-nav navbar-right">
116 |         <li>
117 |   <a href="https://win-vector.com/">Sponsor: Win-Vector LLC</a>
118 | </li>
119 |       </ul>
120 |       
121 |     </div><!--/.nav-collapse -->
122 |   </div><!--/.container -->
123 | </div><!--/.navbar -->
124 | 
125 |       
126 | 
127 |       </header>
128 | 
129 | <div class="row">
130 |   <div class="col-md-9 contents">
131 |     <div class="page-header">
132 |     <h1>RcppDynProg</h1>
133 |     <small class="dont-index">Source: <a href='https://github.com/WinVector/RcppDynProg/blob/master/R/package.R'><code>R/package.R</code></a></small>
134 |     <div class="hidden name"><code>RcppDynProg.Rd</code></div>
135 |     </div>
136 | 
137 |     <div class="ref-description">
138 |     <p>Rcpp dynamic programming solutions for partitioning and machine learning problems.
139 | Includes out of sample fitting applications.  
140 | Also supplies additional custom coders for the vtreat package.
141 | Please see <a href='https://github.com/WinVector/RcppDynProg'>https://github.com/WinVector/RcppDynProg</a> for details.</p>
142 |     </div>
143 | 
144 | 
145 | 
146 |     <h2 class="hasAnchor" id="author"><a class="anchor" href="#author"></a>Author</h2>
147 | 
148 |     <p>John Mount</p>
149 | 
150 |   </div>
151 |   <div class="col-md-3 hidden-xs hidden-sm" id="pkgdown-sidebar">
152 |     <nav id="toc" data-toggle="toc" class="sticky-top">
153 |       <h2 data-toc-skip>Contents</h2>
154 |     </nav>
155 |   </div>
156 | </div>
157 | 
158 | 
159 |       <footer>
160 |       <div class="copyright">
161 |   <p>Developed by John Mount, Nina Zumel.</p>
162 | </div>
163 | 
164 | <div class="pkgdown">
165 |   <p>Site built with <a href="https://pkgdown.r-lib.org/">pkgdown</a> 1.6.1.</p>
166 | </div>
167 | 
168 |       </footer>
169 |    </div>
170 | 
171 |   
172 | 
173 | 
174 |   </body>
175 | </html>
176 | 
177 | 
178 | 


--------------------------------------------------------------------------------