├── data └── PathData.rda ├── NAMESPACE ├── src ├── Makevars ├── Makevars.win ├── init.c ├── ChannelAttribution.h └── ChannelAttribution.cpp ├── inst └── COPYRIGHTS ├── man ├── Data.Rd ├── heuristic_models.Rd ├── transition_matrix.Rd ├── choose_order.Rd ├── ChannelAttribution-package.Rd ├── auto_markov_model.Rd ├── install_pro.Rd └── markov_model.Rd ├── MD5 ├── LICENSE ├── DESCRIPTION └── R └── ChannelAttribution.R /data/PathData.rda: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cran/ChannelAttribution/HEAD/data/PathData.rda -------------------------------------------------------------------------------- /NAMESPACE: -------------------------------------------------------------------------------- 1 | useDynLib(ChannelAttribution, .registration = TRUE) 2 | importFrom(Rcpp, evalCpp) 3 | exportPattern("^[[:alpha:]]+") 4 | -------------------------------------------------------------------------------- /src/Makevars: -------------------------------------------------------------------------------- 1 | ## Use the R_HOME indirection to support installations of multiple R version 2 | PKG_LIBS = `$(R_HOME)/bin/Rscript -e "Rcpp:::LdFlags()"` $(LAPACK_LIBS) $(BLAS_LIBS) $(FLIBS) 3 | -------------------------------------------------------------------------------- /src/Makevars.win: -------------------------------------------------------------------------------- 1 | ## This assume that we can call Rscript to ask Rcpp about its locations 2 | ## Use the R_HOME indirection to support installations of multiple R version 3 | PKG_LIBS = `$(R_HOME)/bin/Rscript -e "Rcpp:::LdFlags()"` $(LAPACK_LIBS) $(BLAS_LIBS) $(FLIBS) 4 | -------------------------------------------------------------------------------- /inst/COPYRIGHTS: -------------------------------------------------------------------------------- 1 | 2 | Overall license: 3 | ================ 4 | 5 | The aggregation, integration and packaging work is released under the 6 | GNU GPL (>= 3). 7 | 8 | Details: 9 | ======== 10 | 11 | Files: * 12 | Copyright: 2015 - Davide Altomare and David Loris 13 | License: GPL (>= 3) 14 | -------------------------------------------------------------------------------- /src/init.c: -------------------------------------------------------------------------------- 1 | // This file was automatically generated. 2 | 3 | #include 4 | #include 5 | #include 6 | 7 | void R_init_ChannelAttribution(DllInfo* info) { 8 | R_registerRoutines(info, NULL, NULL, NULL, NULL); 9 | R_useDynamicSymbols(info, TRUE); 10 | } 11 | -------------------------------------------------------------------------------- /man/Data.Rd: -------------------------------------------------------------------------------- 1 | \name{Data} 2 | \docType{data} 3 | \alias{Data} 4 | 5 | \title{Customer journeys data.} 6 | 7 | \description{ 8 | Example dataset. 9 | } 10 | \usage{data(PathData)} 11 | \format{ 12 | \code{Data} is a data.frame with 10.000 rows and 4 columns: "path" containing customer paths, "total_conversions" containing total number of conversions, "total_conversion_value" containing total conversion value and "total_null" containing total number of paths that do not lead to conversion. 13 | } 14 | 15 | \keyword{customer journey dataset} 16 | \keyword{customer path data} 17 | \keyword{dataset} 18 | 19 | -------------------------------------------------------------------------------- /MD5: -------------------------------------------------------------------------------- 1 | 9347f1c5db2f2903516578580f447800 *DESCRIPTION 2 | f6591b1b7ad4a4f36c4e51d602c9c74e *LICENSE 3 | 0fd18e1c36da4f9da385328771d6eedf *NAMESPACE 4 | 6b7cc7490c287921627ee3e0c6f8760a *R/ChannelAttribution.R 5 | 20c8d3f97d93e2e318a22508275b5a02 *build/partial.rdb 6 | cb19b1555bffc631df81a901c9a332c4 *data/PathData.rda 7 | a28be19931d6baa438352900876e45cf *inst/COPYRIGHTS 8 | e4274b4c2f92d1765088ac7f360038ae *man/ChannelAttribution-package.Rd 9 | 35f836285db7b64f1bd924667a3a743a *man/Data.Rd 10 | fe7051b88525211d61bb8b43b73fe8a3 *man/auto_markov_model.Rd 11 | 3fd7982a10c8f5671ebfb4a0157d59e7 *man/choose_order.Rd 12 | 3e67b8e594002a8944cb1baa659ca8e7 *man/heuristic_models.Rd 13 | adcba493fbbd09601fceca3cd97cf221 *man/install_pro.Rd 14 | 8861bdcd042816fd621599c41ce3734f *man/markov_model.Rd 15 | e6364a23d02e7976e51a3efc31dcf3f4 *man/transition_matrix.Rd 16 | a8fb60f40f3b9e7b45e3aa4c07a0586f *src/ChannelAttribution.cpp 17 | 7f8735998148162d770e8f2b5123458c *src/ChannelAttribution.h 18 | 95a5e0dc5dbdec9c251c142cb4cad286 *src/Makevars 19 | 41f629f2659787aa816bc2be94756235 *src/Makevars.win 20 | aec8a3ac56d6bc2e5ac197c750102e50 *src/init.c 21 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | ChannelAttribution licence 2 | -------------------------- 3 | 4 | ChannelAttribution: Markov model for online multi-channel attribution 5 | Copyright (C) 2015 - Davide Altomare and David Loris 6 | 7 | ChannelAttribution is free software: you can redistribute it and/or modify 8 | it under the terms of the GNU General Public License as published by 9 | the Free Software Foundation, either version 3 of the License, or 10 | (at your option) any later version. 11 | 12 | ChannelAttribution is distributed in the hope that it will be useful, 13 | but WITHOUT ANY WARRANTY; without even the implied warranty of 14 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 15 | GNU General Public License for more details. 16 | 17 | You should have received a copy of the GNU General Public License 18 | along with ChannelAttribution. If not, see . 19 | 20 | 21 | Rcpp licence 22 | ------------ 23 | 24 | Copyright (C) 2010 - 2020 Dirk Eddelbuettel and Romain Francois 25 | License: GPL (>= 2) 26 | 27 | 28 | RcppArmadillo license 29 | --------------------- 30 | 31 | Copyright: 2010 - 2020 Dirk Eddelbuettel, Romain Francois and Doug Bates 32 | License: GPL (>= 2) 33 | 34 | -------------------------------------------------------------------------------- /DESCRIPTION: -------------------------------------------------------------------------------- 1 | Package: ChannelAttribution 2 | Type: Package 3 | Title: Markov Model for Online Multi-Channel Attribution 4 | Version: 2.2.4 5 | Date: 2025-11-19 6 | Authors@R: c( 7 | person("Davide", "Altomare", email = "info@channelattribution.io", role = c("cre", "aut")), 8 | person("David", "Loris", email = "david@channelattribution.io", role = "aut") 9 | ) 10 | Maintainer: Davide Altomare 11 | Description: Advertisers use a variety of online marketing channels to reach consumers and they want to know the degree each channel contributes to their marketing success. This is called online multi-channel attribution problem. This package contains a probabilistic algorithm for the attribution problem. The model uses a k-order Markov representation to identify structural correlations in the customer journey data. The package also contains three heuristic algorithms (first-touch, last-touch and linear-touch approach) for the same problem. The algorithms are implemented in C++. 12 | License: GPL-3 | file LICENSE 13 | URL: https://channelattribution.io 14 | LinkingTo: Rcpp, RcppArmadillo 15 | Imports: Rcpp 16 | Suggests: curl, jsonlite 17 | NeedsCompilation: yes 18 | Packaged: 2025-11-19 08:40:01 UTC; davide 19 | Author: Davide Altomare [cre, aut], 20 | David Loris [aut] 21 | Repository: CRAN 22 | Date/Publication: 2025-11-19 13:20:22 UTC 23 | -------------------------------------------------------------------------------- /man/heuristic_models.Rd: -------------------------------------------------------------------------------- 1 | \name{heuristic_models} 2 | 3 | 4 | \alias{heuristic_models} 5 | 6 | 7 | \title{Heuristic models for the online attribution problem.} 8 | 9 | \description{Estimate theree heuristic models (first-touch, last-touch and linear) from customer journey data. 10 | } 11 | 12 | 13 | \usage{ 14 | heuristic_models(Data, var_path, var_conv, var_value=NULL, sep=">", flg_pro=TRUE) 15 | } 16 | 17 | 18 | \arguments{ 19 | \item{Data}{data.frame containing paths and conversions.} 20 | \item{var_path}{column name containing paths.} 21 | \item{var_conv}{column name containing total conversions.} 22 | \item{var_value}{column name containing total conversion value.} 23 | \item{sep}{separator between the channels.} 24 | \item{flg_pro}{if TRUE, ChannelAttribution Pro banner is printed.} 25 | } 26 | 27 | 28 | \value{An object of \code{class} \code{data.frame} with the estimated number of conversions and the estimated conversion value attributed to each channel for each model.} 29 | 30 | \author{ 31 | Davide Altomare (\email{info@channelattribution.io}). 32 | } 33 | 34 | 35 | \examples{ 36 | 37 | \dontrun{ 38 | 39 | library(ChannelAttribution) 40 | 41 | data(PathData) 42 | 43 | heuristic_models(Data,"path","total_conversions") 44 | heuristic_models(Data,"path","total_conversions",var_value="total_conversion_value") 45 | 46 | } 47 | 48 | } 49 | 50 | \keyword{first touch} 51 | \keyword{last touch} 52 | \keyword{linear touch} 53 | 54 | 55 | 56 | -------------------------------------------------------------------------------- /man/transition_matrix.Rd: -------------------------------------------------------------------------------- 1 | \name{transition_matrix} 2 | 3 | 4 | \alias{transition_matrix} 5 | 6 | 7 | \title{Transition matrix.} 8 | 9 | \description{Estimate a k-order transition matrix from customer journey data.} 10 | 11 | 12 | 13 | \usage{ 14 | transition_matrix(Data, var_path, var_conv, var_null, order=1, sep=">", 15 | flg_equal=TRUE, flg_pro=TRUE) 16 | } 17 | 18 | 19 | \arguments{ 20 | \item{Data}{data.frame containing customer journeys data.} 21 | \item{var_path}{column name containing paths.} 22 | \item{var_conv}{column name containing total conversions.} 23 | \item{var_null}{column name containing paths that do not lead to conversions.} 24 | \item{order}{Markov Model order.} 25 | \item{sep}{separator between the channels.} 26 | \item{flg_equal}{if TRUE, transitions from a channel to itself will be considered.} 27 | \item{flg_pro}{if TRUE, ChannelAttribution Pro banner is printed.} 28 | } 29 | 30 | 31 | \value{An object of \code{class} \code{List} containing a dataframe with channel names and a dataframe with the estimated transition matrix.} 32 | 33 | \author{ 34 | Davide Altomare (\email{info@channelattribution.io}). 35 | } 36 | 37 | 38 | \examples{ 39 | 40 | \dontrun{ 41 | 42 | library(ChannelAttribution) 43 | 44 | data(PathData) 45 | 46 | transition_matrix(Data, var_path="path", var_conv="total_conversions", 47 | var_null="total_null", order=1, sep=">", flg_equal=TRUE) 48 | 49 | transition_matrix(Data, var_path="path", var_conv="total_conversions", 50 | var_null="total_null", order=3, sep=">", flg_equal=TRUE) 51 | 52 | } 53 | 54 | } 55 | 56 | \keyword{markov model} 57 | \keyword{markov graph} 58 | -------------------------------------------------------------------------------- /src/ChannelAttribution.h: -------------------------------------------------------------------------------- 1 | //ChannelAttribution: Markov model for online multi-channel attribution 2 | //Copyright (C) 2015 - Davide Altomare and David Loris 3 | // 4 | //ChannelAttribution is free software: you can redistribute it and/or modify 5 | //it under the terms of the GNU General Public License as published by 6 | //the Free Software Foundation, either version 3 of the License, or 7 | //(at your option) any later version. 8 | // 9 | //ChannelAttribution is distributed in the hope that it will be useful, 10 | //but WITHOUT ANY WARRANTY; without even the implied warranty of 11 | //MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 12 | //GNU General Public License for more details. 13 | // 14 | //You should have received a copy of the GNU General Public License 15 | //along with ChannelAttribution. If not, see . 16 | 17 | #ifndef header 18 | #define header 19 | 20 | using namespace std; 21 | using namespace Rcpp; 22 | 23 | string to_string(T pNumber); 24 | vector split_string(const string &s, unsigned long int order); 25 | 26 | RcppExport SEXP heuristic_models_cpp(SEXP Data_p, SEXP var_path_p, SEXP var_conv_p, SEXP var_value_p, SEXP sep_p); 27 | RcppExport SEXP choose_order_cpp(SEXP Data_p, SEXP var_path_p, SEXP var_conv_p, SEXP var_null_p, SEXP max_order_p, SEXP sep_p, SEXP ncore_p, SEXP roc_npt_p); 28 | RcppExport SEXP markov_model_cpp(SEXP Data_p, SEXP var_path_p, SEXP var_conv_p, SEXP var_value_p, SEXP var_null_p, SEXP order_p, SEXP nsim_start_p, SEXP max_step_p, SEXP out_more_p, string sep_p, SEXP ncore_p, SEXP nfold_p, SEXP seed_p, SEXP conv_par_p, SEXP rate_step_sim_p, SEXP verbose_p); 29 | transition_matrix_cpp(SEXP Data_p, SEXP var_path_p, SEXP var_conv_p, SEXP var_null_p, SEXP order_p, SEXP sep_p, SEXP flg_equal_p); 30 | 31 | #endif 32 | -------------------------------------------------------------------------------- /man/choose_order.Rd: -------------------------------------------------------------------------------- 1 | \name{choose_order} 2 | 3 | 4 | \alias{choose_order} 5 | 6 | 7 | \title{Choose order for Markov model.} 8 | 9 | \description{Find the minimum Markov Model order that gives a good representation of customers' behaviour for data considered. It requires paths that do not lead to conversion as input. Minimum order is found maximizing a penalized area under ROC curve. 10 | } 11 | 12 | 13 | \usage{ 14 | 15 | choose_order(Data, var_path, var_conv, var_null, max_order=10, sep=">", 16 | ncore=1, roc_npt=100, plot=TRUE, flg_pro=TRUE) 17 | 18 | } 19 | 20 | 21 | \arguments{ 22 | \item{Data}{data.frame containing customer journeys.} 23 | \item{var_path}{column name of Data containing paths.} 24 | \item{var_conv}{column name of Data containing total conversions.} 25 | \item{var_null}{column name of Data containing total paths that do not lead to conversion.} 26 | \item{max_order}{maximum Markov Model order considered.} 27 | \item{sep}{separator between channels.} 28 | \item{ncore}{number of threads used in computation.} 29 | \item{roc_npt}{number of points used for approximating roc and auc.} 30 | \item{plot}{if TRUE, a plot with penalized auc with respect to order will be displayed.} 31 | \item{flg_pro}{if TRUE, ChannelAttribution Pro banner is printed.} 32 | } 33 | 34 | 35 | \value{An object of \code{class} \code{List} with the estimated roc, auc and penalized auc.} 36 | 37 | \author{ 38 | Davide Altomare (\email{info@channelattribution.io}). 39 | } 40 | 41 | 42 | \examples{ 43 | 44 | \dontrun{ 45 | 46 | library(ChannelAttribution) 47 | 48 | data(PathData) 49 | 50 | res=choose_order(Data, var_path="path", var_conv="total_conversions", 51 | var_null="total_null") 52 | 53 | #plot auc and penalized auc 54 | 55 | plot(res$auc$order,res$auc$auc,type="l",xlab="order",ylab="pauc",main="AUC") 56 | lines(res$auc$order,res$auc$pauc,col="red") 57 | legend("right", legend=c("auc","penalized auc"), 58 | col=c("black","red"),lty=1) 59 | 60 | } 61 | } 62 | 63 | \keyword{choose markov model order} 64 | \keyword{choose markov graph order} 65 | -------------------------------------------------------------------------------- /man/ChannelAttribution-package.Rd: -------------------------------------------------------------------------------- 1 | \name{ChannelAttribution-package} 2 | \alias{ChannelAttribution-package} 3 | \alias{ChannelAttribution} 4 | \docType{package} 5 | 6 | \title{ 7 | Markov Model for Online Multi-Channel Attribution 8 | } 9 | 10 | \description{ 11 | Advertisers use a variety of online marketing channels to reach consumers and they want to know the degree each channel contributes to their marketing success. This is called online multi-channel attribution problem. In many cases, advertisers approach this problem through some simple heuristics methods that do not take into account any customer interactions and often tend to underestimate the importance of small channels in marketing contribution. 12 | This package provides a function that approaches the attribution problem in a probabilistic way. It uses a k-order Markov representation to identify structural correlations in the customer journey data. This would allow advertisers to give a more reliable assessment of the marketing contribution of each channel. 13 | The approach basically follows the one presented in Eva Anderl, Ingo Becker, Florian v. Wangenheim, Jan H. Schumann (2014). Differently for them, we solved the estimation process using stochastic simulations. In this way it is also possible to take into account conversion values and their variability in the computation of the channel importance. 14 | The package also contains a function that estimates three heuristic models (first-touch, last-touch and linear-touch approach) for the same problem. 15 | } 16 | 17 | \details{ 18 | \tabular{ll}{ 19 | Package: \tab ChannelAttribution\cr 20 | Type: \tab Package\cr 21 | Version: \tab 2.2.4 \cr 22 | Date: \tab 2025-11-19 \cr 23 | License: \tab GPL (>= 2)\cr 24 | } 25 | Package contains functions for channel attribution in web marketing. 26 | } 27 | 28 | \author{ 29 | Davide Altomare, David Loris 30 | 31 | Maintainer Davide Altomare 32 | } 33 | 34 | \references{ 35 | 36 | ChannelAttribution Official Website: \url{https://channelattribution.io} 37 | 38 | Eva Anderl, Ingo Becker, Florian v. Wangenheim, Jan H. Schumann: Mapping the Customer Journey, 2014, \doi{10.2139/ssrn.2343077} 39 | 40 | } 41 | 42 | \keyword{marketing attribution} 43 | \keyword{web marketing} 44 | \keyword{multi channel marketing} 45 | \keyword{web statistics} 46 | \keyword{channel marketing} 47 | \keyword{online attribution} 48 | \keyword{customer journey} 49 | \keyword{multi channel funnel} 50 | \keyword{channel attribution} 51 | 52 | -------------------------------------------------------------------------------- /man/auto_markov_model.Rd: -------------------------------------------------------------------------------- 1 | \name{auto_markov_model} 2 | 3 | 4 | \alias{auto_markov_model} 5 | 6 | 7 | \title{Automatic Markov Model.} 8 | 9 | \description{Estimate a Markov model from customer journey data after automatically choosing a suitable order. It requires paths that do not lead to conversion as input.} 10 | 11 | 12 | \usage{ 13 | auto_markov_model(Data, var_path, var_conv, var_null, var_value=NULL, 14 | max_order=10, roc_npt=100, plot=FALSE, nsim_start=1e5, 15 | max_step=NULL, out_more=FALSE, sep=">", 16 | ncore=1, nfold=10, seed=0, conv_par=0.05, rate_step_sim=1.5, 17 | verbose=TRUE, flg_pro=TRUE) 18 | } 19 | 20 | \arguments{ 21 | \item{Data}{data.frame containing customer journeys data.} 22 | \item{var_path}{column name containing paths.} 23 | \item{var_conv}{column name containing total conversions.} 24 | \item{var_null}{column name containing total paths that do not lead to conversions.} 25 | \item{var_value}{column name containing total conversion value.} 26 | \item{max_order}{maximum Markov Model order considered.} 27 | \item{roc_npt}{number of points used for approximating roc and auc.} 28 | \item{plot}{if TRUE, a plot with penalized auc with respect to order will be displayed.} 29 | \item{nsim_start}{minimum number of simulations used in computation.} 30 | \item{max_step}{maximum number of steps for a single simulated path. if NULL, it is the maximum number of steps found into Data.} 31 | \item{out_more}{if TRUE, transition probabilities between channels and removal effects will be shown.} 32 | \item{sep}{separator between the channels.} 33 | \item{ncore}{number of threads used in computation.} 34 | \item{nfold}{how many repetitions are used to verify if convergence is reached at each iteration.} 35 | \item{seed}{random seed. Giving this parameter the same value over different runs guarantees that results will not vary.} 36 | \item{conv_par}{convergence parameter for the algorithm. The estimation process ends when the percentage of variation of the results over different repetitions is less than convergence parameter.} 37 | \item{rate_step_sim}{number of simulations used at each iteration is equal to the number of simulations used at previous iteration multiplied by rate_step_sim.} 38 | \item{verbose}{if TRUE, additional information about process convergence will be shown.} 39 | \item{flg_pro}{if TRUE, ChannelAttribution Pro banner is printed.} 40 | } 41 | 42 | 43 | \value{An object of \code{class} \code{data.frame} with the estimated number of conversions and the estimated conversion value attributed to each channel.} 44 | 45 | \author{ 46 | Davide Altomare (\email{info@channelattribution.io}). 47 | } 48 | 49 | 50 | \examples{ 51 | 52 | \dontrun{ 53 | 54 | library(ChannelAttribution) 55 | 56 | data(PathData) 57 | 58 | auto_markov_model(Data, "path", "total_conversions", "total_null") 59 | 60 | } 61 | 62 | } 63 | 64 | \keyword{markov model} 65 | \keyword{markov graph} 66 | -------------------------------------------------------------------------------- /man/install_pro.Rd: -------------------------------------------------------------------------------- 1 | \name{install_pro} 2 | \alias{install_pro} 3 | \title{Install ChannelAttributionPro binary tailored to your OS/arch/R} 4 | 5 | \description{ 6 | Interactively installs the \pkg{ChannelAttributionPro} package by contacting the 7 | ChannelAttribution build service, which returns a platform-appropriate binary (or 8 | source) package URL for your current operating system, CPU architecture, and R 9 | version. At the prompt you can enter: 10 | \itemize{ 11 | \item a \emph{token} (recommended), or 12 | \item a work/university \emph{email} to request a token (the function sends the request and exits). 13 | } 14 | } 15 | 16 | \usage{ 17 | install_pro() 18 | } 19 | 20 | \details{ 21 | When called, the function asks you to enter your ChannelAttributionPro \emph{token}. 22 | If you don't have one, you can enter your work/university email; the function will 23 | request a token for that address and return invisibly, so you can re-run it once the 24 | token arrives. 25 | 26 | Processing steps: 27 | \enumerate{ 28 | \item Detect your platform (\code{os}, \code{os_vers}, \code{arch}) and R minor version. 29 | \item Send a form-encoded request to the ChannelAttribution builder endpoint to obtain a package URL. 30 | \item Install the returned package via \code{\link[utils]{install.packages}} with \code{repos = NULL}. 31 | \item On exit, send a small status message to a monitoring endpoint with minimal text (no local files). 32 | } 33 | 34 | \strong{Dependencies:} 35 | This function requires \pkg{curl} (for HTTPS) and \pkg{jsonlite} (for JSON parsing). If either is missing, 36 | the function aborts with a friendly message. 37 | 38 | \strong{Networking and privacy:} 39 | The function performs outbound HTTPS requests to \url{https://app.channelattribution.io}, sending only the minimal 40 | parameters needed to resolve the correct binary and (on exit) a small status string for diagnostics. No local files 41 | or datasets are uploaded. 42 | 43 | \strong{Interactive prompt:} 44 | Input is collected via base R \code{readline()} (no masking). In some environments the input may be visible. 45 | } 46 | 47 | \value{ 48 | Invisibly returns \code{NULL}. Progress and outcome messages are printed. 49 | } 50 | 51 | \section{Errors and messages}{ 52 | \itemize{ 53 | \item \emph{Invalid or expired token (HTTP 401)}: You will see a message inviting you to contact 54 | \email{info@channelattribution.io} or request a new token. 55 | \item \emph{Network/SSL issues}: Reported as a concise \code{network_or_ssl_error: ...}. 56 | \item \emph{Unexpected builder response}: The function prints diagnostic information (platform and R version) 57 | that you can forward to support. 58 | } 59 | } 60 | 61 | \seealso{ 62 | \code{\link[utils]{install.packages}} 63 | } 64 | 65 | \examples{ 66 | \dontrun{ 67 | ## Typical interactive use: 68 | install_pro() 69 | 70 | ## If you don't have a token at prompt: 71 | ## - enter your work/university email to request one 72 | ## - check your inbox (including spam) 73 | ## - re-run install_pro() with the token 74 | } 75 | } 76 | 77 | \author{ChannelAttribution Team} 78 | \encoding{UTF-8} 79 | \keyword{utilities} 80 | 81 | 82 | -------------------------------------------------------------------------------- /man/markov_model.Rd: -------------------------------------------------------------------------------- 1 | \name{markov_model} 2 | 3 | 4 | \alias{markov_model} 5 | 6 | 7 | \title{Markov model for the online attribution problem.} 8 | 9 | \description{Estimate a k-order Markov model from customer journey data. Differently from markov_model, this function iterates estimation until convergence is reached and enables multiprocessing.} 10 | 11 | 12 | 13 | \usage{ 14 | markov_model(Data, var_path, var_conv, var_value=NULL, var_null=NULL, 15 | order=1, nsim_start=1e5, max_step=NULL, out_more=FALSE, sep=">", 16 | ncore=1, nfold=10, seed=0, conv_par=0.05, rate_step_sim=1.5, 17 | verbose=TRUE, flg_pro=TRUE) 18 | } 19 | 20 | 21 | \arguments{ 22 | \item{Data}{data.frame containing customer journeys data.} 23 | \item{var_path}{column name containing paths.} 24 | \item{var_conv}{column name containing total conversions.} 25 | \item{var_value}{column name containing total conversion value.} 26 | \item{var_null}{column name containing total paths that do not lead to conversions.} 27 | \item{order}{Markov Model order.} 28 | \item{nsim_start}{minimum number of simulations used in computation.} 29 | \item{max_step}{maximum number of steps for a single simulated path. if NULL, it is the maximum number of steps found into Data.} 30 | \item{out_more}{if TRUE, transition probabilities between channels and removal effects will be returned.} 31 | \item{sep}{separator between the channels.} 32 | \item{ncore}{number of threads used in computation.} 33 | \item{nfold}{how many repetitions are used to verify if convergence has been reached at each iteration.} 34 | \item{seed}{random seed. Giving this parameter the same value over different runs guarantees that results will not vary.} 35 | \item{conv_par}{convergence parameter for the algorithm. The estimation process ends when the percentage of variation of the results over different repetions is less than convergence parameter.} 36 | \item{rate_step_sim}{number of simulations used at each iteration is equal to the number of simulations used at previous iteration multiplied by rate_step_sim.} 37 | \item{verbose}{if TRUE, additional information about process convergence will be shown.} 38 | \item{flg_pro}{if TRUE, ChannelAttribution Pro banner is printed.} 39 | } 40 | 41 | 42 | \value{An object of \code{class} \code{data.frame} with the estimated number of conversions and the estimated conversion value attributed to each channel.} 43 | 44 | \author{ 45 | Davide Altomare (\email{info@channelattribution.io}). 46 | } 47 | 48 | 49 | \examples{ 50 | 51 | \dontrun{ 52 | 53 | library(ChannelAttribution) 54 | 55 | data(PathData) 56 | 57 | #Estimate a Makov model using total conversions 58 | markov_model(Data, var_path="path", "total_conversions") 59 | 60 | #Estimate a Makov model using total conversions and revenues 61 | markov_model(Data, "path", "total_conversions", 62 | var_value="total_conversion_value") 63 | 64 | #Estimate a Makov model using total conversions, revenues and paths that do not lead to conversions 65 | markov_model(Data, "path", "total_conversions", 66 | var_value="total_conversion_value", var_null="total_null") 67 | 68 | #Estimate a Makov model returning transition matrix and removal effects 69 | markov_model(Data, "path", "total_conversions", 70 | var_value="total_conversion_value", var_null="total_null", out_more=TRUE) 71 | 72 | #Estimate a Markov model using 4 threads 73 | markov_model(Data, "path", "total_conversions", 74 | var_value="total_conversion_value", ncore=4) 75 | 76 | } 77 | 78 | } 79 | 80 | \keyword{markov model} 81 | \keyword{markov graph} 82 | -------------------------------------------------------------------------------- /R/ChannelAttribution.R: -------------------------------------------------------------------------------- 1 | # ChannelAttribution: Markov model for online multi-channel attribution 2 | # Copyright (C) 2015 - Davide Altomare and David Loris 3 | 4 | # ChannelAttribution is free software: you can redistribute it and/or modify 5 | # it under the terms of the GNU General Public License as published by 6 | # the Free Software Foundation, either version 3 of the License, or 7 | # (at your option) any later version. 8 | 9 | # ChannelAttribution is distributed in the hope that it will be useful, 10 | # but WITHOUT ANY WARRANTY; without even the implied warranty of 11 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 12 | # GNU General Public License for more details. 13 | 14 | # You should have received a copy of the GNU General Public License 15 | # along with ChannelAttribution. If not, see . 16 | 17 | .v=packageVersion("ChannelAttribution") 18 | 19 | .onAttach = function(libname, pkgname) { 20 | 21 | packageStartupMessage(paste0("ChannelAttribution ",.v)) 22 | packageStartupMessage("*** Looking to run more advanced attribution? Install ChannelAttribution Pro for free running install_pro(). Visit https://channelattribution.io for more info.") 23 | 24 | } 25 | 26 | .message_pro="*** Install ChannelAttribution Pro for free running install_pro(). Visit https://channelattribution.io for more info. Set flg_pro=FALSE to hide this message." 27 | 28 | heuristic_models=function(Data, var_path, var_conv, var_value=NULL, sep=">", flg_pro=TRUE){ 29 | 30 | if(!("data.frame"%in%class(Data)|"data.table"%in%class(Data))){ 31 | print("Data must be a data.frame or a data.table") 32 | } 33 | 34 | if(is.character(var_path)){ 35 | if(!var_path%in%names(Data)){ 36 | print("var_path must be a column of Data") 37 | } 38 | }else{ 39 | print("var_path must be a string") 40 | } 41 | if(is.character(var_conv)){ 42 | if(!var_conv%in%names(Data)){ 43 | print("var_conv must be a column of Data") 44 | } 45 | }else{ 46 | print("var_conv must be a string") 47 | } 48 | 49 | if(!is.null(var_value)){ 50 | if(!var_value%in%names(Data)){ 51 | print("var_value must be a column of Data") 52 | } 53 | } 54 | 55 | if(length(sep)>1){stop("Separator must have length 1")} 56 | 57 | if(is.null(var_value)){var_value="0"} 58 | 59 | res=.Call("heuristic_models_cpp", Data, var_path, var_conv, var_value, sep) 60 | 61 | if(flg_pro==TRUE){ 62 | print("*** Install ChannelAttribution Pro for free! Run install_pro(). Set flg_pro=FALSE to hide this message.") 63 | } 64 | 65 | return(as.data.frame(res)) 66 | 67 | } 68 | 69 | choose_order=function(Data, var_path, var_conv, var_null, max_order=10, sep=">", ncore=1, roc_npt=100, plot=TRUE, flg_pro=TRUE){ 70 | 71 | if(!("data.frame"%in%class(Data)|"data.table"%in%class(Data))){ 72 | print("Data must be a data.frame or a data.table") 73 | } 74 | 75 | if(is.character(var_path)){ 76 | if(!var_path%in%names(Data)){ 77 | print("var_path must be a column of Data") 78 | } 79 | }else{ 80 | print("var_path must be a string") 81 | } 82 | 83 | if(is.character(var_conv)){ 84 | if(!var_conv%in%names(Data)){ 85 | print("var_conv must be a column of Data") 86 | } 87 | }else{ 88 | print("var_conv must be a string") 89 | } 90 | 91 | if(!is.null(var_null)){ 92 | if(!var_null%in%names(Data)){ 93 | print("var_null must be a column of Data") 94 | } 95 | } 96 | 97 | if(length(sep)>1){stop("sep must have length 1")} 98 | if(ncore<1){stop("ncore must be >= 1")} 99 | if(roc_npt<10){stop("roc_npt must be >= 10")} 100 | if(!plot%in%c(0,1)){stop("plot must be FALSE or TRUE")} 101 | 102 | res=.Call("choose_order_cpp", Data, var_path, var_conv, var_null, max_order, sep, ncore, roc_npt) 103 | 104 | ck=res$auc$order[res$auc$order!=0] 105 | res$auc$order=res$auc$order[ck] 106 | res$auc$auc=res$auc$auc[ck] 107 | res$auc$pauc=res$auc$pauc[ck] 108 | 109 | best_order=res$auc$order[res$auc$pauc==max(res$auc$pauc)] 110 | 111 | if(best_order==max_order){ 112 | print(paste0("Suggested order not found. Try increasing max_order.")) 113 | }else{ 114 | print(paste0("Suggested order: ", res$auc$order[res$auc$pauc==max(res$auc$pauc)])) 115 | } 116 | 117 | if(plot=="TRUE"){ 118 | plot(res$auc$order,res$auc$pauc,type="l",xlab="order",ylab="penalized auc",main="PENALIZED AUC") 119 | } 120 | 121 | res[['suggested_order']]=best_order 122 | 123 | if(flg_pro==TRUE){ 124 | print(.message_pro) 125 | } 126 | 127 | return(res) 128 | 129 | } 130 | 131 | markov_model=function(Data, var_path, var_conv, var_value=NULL, var_null=NULL, order=1, nsim_start=1e5, max_step=NULL, out_more=FALSE, sep=">", ncore=1, nfold=10, seed=0, conv_par=0.05, rate_step_sim=1.5, verbose=TRUE, flg_pro=TRUE){ 132 | 133 | 134 | if(!("data.frame"%in%class(Data)|"data.table"%in%class(Data))){ 135 | print("Data must be a data.frame or a data.table") 136 | } 137 | 138 | if(is.character(var_path)){ 139 | if(!var_path%in%names(Data)){ 140 | print("var_path must be a column of Data") 141 | } 142 | }else{ 143 | print("var_path must be a string") 144 | } 145 | 146 | if(is.character(var_conv)){ 147 | if(!var_conv%in%names(Data)){ 148 | print("var_conv must be a column of Data") 149 | } 150 | }else{ 151 | print("var_conv must be a string") 152 | } 153 | 154 | if(!is.null(var_value)){ 155 | if(!var_value%in%names(Data)){ 156 | print("var_value must be a column of Data") 157 | } 158 | } 159 | 160 | if(!is.null(var_null)){ 161 | if(!var_null%in%names(Data)){ 162 | print("var_null must be a column of Data") 163 | } 164 | } 165 | 166 | if(order<1){stop("order must be >= 1")} 167 | if(nsim_start<1){stop("nsim_start must be >= 1")} 168 | if(!is.null(max_step)){if(max_step<1){stop("max_step must be >= 1")}} 169 | if(!out_more%in%c(0,1)){stop("out_more must be FALSE or TRUE")} 170 | if(length(sep)>1){stop("sep must have length 1")} 171 | if(ncore<1){stop("ncore must be >= 1")} 172 | if(nfold<1){stop("nfold must be >= 1")} 173 | if(seed<0){stop("seed must be >= 0")} 174 | if(conv_par<0){stop("conv_par must be > 0")} 175 | if(rate_step_sim<0){stop("rate_step_sim must be > 0")} 176 | if(!verbose%in%c(0,1)){stop("verbose must be FALSE or TRUE")} 177 | 178 | if(nrow(Data[which(Data[var_conv]!=0),])==0){stop("Data must have at least one converting path")} 179 | 180 | if(is.null(var_value)){var_value="0"} 181 | if(is.null(var_null)){var_null="0"} 182 | if(is.null(max_step)){max_step=0} 183 | if(!is.null(seed)){set.seed(seed)} 184 | 185 | res=.Call("markov_model_cpp", Data, var_path, var_conv, var_value, var_null, order, nsim_start, max_step, out_more, sep, ncore, nfold, seed, conv_par, rate_step_sim,verbose) 186 | 187 | if(flg_pro==TRUE){ 188 | print(.message_pro) 189 | } 190 | 191 | if(out_more==FALSE){ 192 | return(as.data.frame(res)) 193 | }else{ 194 | return(list(result=as.data.frame(res$result),transition_matrix=as.data.frame(res$transition_matrix),removal_effects=as.data.frame(res$removal_effects))) 195 | } 196 | 197 | } 198 | 199 | 200 | transition_matrix=function(Data, var_path, var_conv, var_null, order=1, sep=">", flg_equal=TRUE, flg_pro=TRUE){ 201 | 202 | if(!("data.frame"%in%class(Data)|"data.table"%in%class(Data))){ 203 | print("Data must be a data.frame or a data.table") 204 | } 205 | 206 | if(is.character(var_path)){ 207 | if(!var_path%in%names(Data)){ 208 | print("var_path must be a column of Data") 209 | } 210 | }else{ 211 | print("var_path must be a string") 212 | } 213 | if(is.character(var_conv)){ 214 | if(!var_conv%in%names(Data)){ 215 | print("var_conv must be a column of Data") 216 | } 217 | }else{ 218 | print("var_conv must be a string") 219 | } 220 | 221 | if(!is.null(var_null)){ 222 | if(!var_null%in%names(Data)){ 223 | print("var_null must be a column of Data") 224 | } 225 | } 226 | 227 | if(order<1){stop("order must be >= 1")} 228 | if(length(sep)>1){stop("sep must have length 1")} 229 | if(!flg_equal%in%c(0,1)){stop("flg_equal must be FALSE or TRUE")} 230 | 231 | if(is.null(var_null)){var_null="0"} 232 | 233 | res=.Call("transition_matrix_cpp", Data, var_path, var_conv, var_null, order, sep, flg_equal) 234 | 235 | if(flg_pro==TRUE){ 236 | print(.message_pro) 237 | } 238 | 239 | return(list(channels=data.frame(id=1:length(res$channels),channel_name=res$channels),transition_matrix=as.data.frame(res$transition_matrix))) 240 | 241 | } 242 | 243 | 244 | auto_markov_model=function(Data, var_path, var_conv, var_null, var_value=NULL, max_order=10, roc_npt=100, plot=FALSE, nsim_start=1e5, max_step=NULL, out_more=FALSE, sep=">", ncore=1, nfold=10, seed=0, conv_par=0.05, rate_step_sim=1.5, verbose=TRUE, flg_pro=TRUE){ 245 | 246 | if(!("data.frame"%in%class(Data)|"data.table"%in%class(Data))){ 247 | print("Data must be a data.frame or a data.table") 248 | } 249 | 250 | if(is.character(var_path)){ 251 | if(!var_path%in%names(Data)){ 252 | print("var_path must be a column of Data") 253 | } 254 | }else{ 255 | print("var_path must be a string") 256 | } 257 | if(is.character(var_conv)){ 258 | if(!var_conv%in%names(Data)){ 259 | print("var_conv must be a column of Data") 260 | } 261 | }else{ 262 | print("var_conv must be a string") 263 | } 264 | 265 | if(!is.null(var_value)){ 266 | if(!var_value%in%names(Data)){ 267 | print("var_value must be a column of Data") 268 | } 269 | } 270 | 271 | if(!is.null(var_null)){ 272 | if(!var_null%in%names(Data)){ 273 | print("var_null must be a column of Data") 274 | } 275 | } 276 | 277 | if(max_order<1){stop("max_order must be >= 1")} 278 | if(roc_npt<10){stop("roc_npt must be >= 10")} 279 | if(!plot%in%c(0,1)){stop("plot must be FALSE or TRUE")} 280 | if(nsim_start<1){stop("nsim_start must be >= 1")} 281 | if(!is.null(max_step)){if(max_step<1){stop("max_step must be >= 1")}} 282 | if(!out_more%in%c(0,1)){stop("out_more must be FALSE or TRUE")} 283 | if(length(sep)>1){stop("sep must have length 1")} 284 | if(ncore<1){stop("ncore must be >= 1")} 285 | if(nfold<1){stop("nfold must be >= 1")} 286 | if(seed<0){stop("seed must be >= 0")} 287 | if(conv_par<0){stop("conv_par must be > 0")} 288 | if(rate_step_sim<0){stop("rate_step_sim must be > 0")} 289 | if(!verbose%in%c(0,1)){stop("verbose must be FALSE or TRUE")} 290 | 291 | order=choose_order(Data, var_path, var_conv, var_null, max_order=max_order, sep=sep, ncore=ncore, roc_npt=roc_npt, plot=plot, flg_pro=FALSE) 292 | order=order[['suggested_order']] 293 | 294 | res=markov_model(Data, var_path, var_conv, var_value=var_value, var_null=var_null, order=order, nsim_start=nsim_start, max_step=max_step, out_more=out_more, sep=sep, ncore=ncore, nfold=nfold, seed=seed, conv_par=conv_par, rate_step_sim=rate_step_sim, verbose=verbose, flg_pro=FALSE) 295 | 296 | if(flg_pro==TRUE){ 297 | print(.message_pro) 298 | } 299 | 300 | if(out_more==FALSE){ 301 | return(as.data.frame(res)) 302 | }else{ 303 | return(list(result=as.data.frame(res$result),transition_matrix=as.data.frame(res$transition_matrix),removal_effects=as.data.frame(res$removal_effects))) 304 | } 305 | 306 | } 307 | 308 | 309 | .request_token_channelattributionpro = function( 310 | email, 311 | endpoint = "https://app.channelattribution.io/genpkg/generate_token.php", 312 | timeout = 60, 313 | verify_ssl = TRUE 314 | ) { 315 | 316 | # Basic email validation (simple but effective for most cases) 317 | is_valid_email = function(x) { 318 | is.character(x) && length(x) == 1L && nzchar(x) && 319 | grepl("^[^@\\s]+@[^@\\s]+\\.[^@\\s]+$", x) 320 | } 321 | if (!is_valid_email(email)) { 322 | stop("Please enter a valid, non-empty email address (e.g., john.black@company.com).", call. = FALSE) 323 | } 324 | 325 | # Build a curl handle with common options 326 | make_handle = function(method = c("POST", "GET")) { 327 | method = match.arg(method) 328 | h = curl::new_handle() 329 | curl::handle_setheaders(h, 330 | "User-Agent" = "capro-token-client/1.0" 331 | ) 332 | curl::handle_setopt( 333 | h, 334 | followlocation = TRUE, 335 | # total timeout (seconds) 336 | timeout = as.numeric(timeout) 337 | ) 338 | # SSL verification knobs 339 | curl::handle_setopt( 340 | h, 341 | ssl_verifypeer = isTRUE(verify_ssl), 342 | ssl_verifyhost = if (isTRUE(verify_ssl)) 2L else 0L 343 | ) 344 | if (identical(method, "POST")) { 345 | curl::handle_setform(h, email = email) 346 | } 347 | h 348 | } 349 | 350 | # Helper to perform request and capture status + body safely 351 | safe_fetch = function(url, handle, query = NULL) { 352 | full_url = if (is.null(query)) url else { 353 | paste0(url, if (grepl("\\?", url, fixed = TRUE)) "&" else "?", curl::curl_escape(names(query)), "=", curl::curl_escape(query)) 354 | } 355 | out = tryCatch({ 356 | res = curl::curl_fetch_memory(full_url, handle) 357 | list( 358 | ok = TRUE, 359 | status = res$status_code, 360 | body = rawToChar(res$content %||% raw()) 361 | ) 362 | }, error = function(e) { 363 | # Distinguish likely TLS/timeout vs generic transport 364 | msg = conditionMessage(e) 365 | if (grepl("SSL|TLS|certificate|timeout|timed out", msg, ignore.case = TRUE)) { 366 | stop(paste0("network_or_ssl_error: ", msg), call. = FALSE) 367 | } else { 368 | stop(paste0("request_error: ", msg), call. = FALSE) 369 | } 370 | }) 371 | out$body = trimws(out$body %||% "") 372 | out 373 | } 374 | 375 | `%||%` = function(x, y) if (is.null(x)) y else x 376 | 377 | # 1) Try POST 378 | post_h = make_handle("POST") 379 | post_res = safe_fetch(endpoint, post_h) 380 | 381 | # 2) If 405/403 and body hints at method, retry with GET 382 | if (post_res$status %in% c(405L, 403L) && 383 | nzchar(post_res$body) && 384 | grepl("method", post_res$body, ignore.case = TRUE)) { 385 | 386 | get_h = make_handle("GET") 387 | get_res = safe_fetch(endpoint, get_h, query = c(email = email)) 388 | return(get_res$body) 389 | } 390 | 391 | # Return body regardless of HTTP status 392 | post_res$body 393 | } 394 | 395 | 396 | install_pro = function() { 397 | 398 | # Base-R secret prompt (no dependencies, works on Unix + Windows) 399 | .read_secret <- function(prompt = "Enter value: ") { 400 | trimws(readline(prompt)) 401 | } 402 | 403 | if (!requireNamespace("curl", quietly = TRUE) || !requireNamespace("jsonlite", quietly = TRUE)) { 404 | stop("This function requires 'curl' and 'jsonlite'. Please install them from CRAN.", call. = FALSE) 405 | } 406 | 407 | # ---------- early reachability check for app.channelattribution.io ---------- 408 | can_reach_app <- function(timeout = 60) { 409 | h <- curl::new_handle() 410 | curl::handle_setheaders(h, "User-Agent" = "capro-r-installer/1.1") 411 | curl::handle_setopt(h, 412 | timeout = as.numeric(timeout), 413 | followlocation = TRUE, 414 | ssl_verifypeer = TRUE, 415 | ssl_verifyhost = 2L 416 | ) 417 | out <- try(curl::curl_fetch_memory("https://app.channelattribution.io", handle = h), silent = TRUE) 418 | if (inherits(out, "try-error")) return(FALSE) 419 | status <- as.integer(out$status_code) 420 | !is.na(status) && status >= 200L && status < 400L 421 | } 422 | 423 | if (!can_reach_app()) { 424 | message( 425 | "It seems that app.channelattribution.io cannot be reached from this environment.\n", 426 | "To install ChannelAttribution Pro you need to reach app.channelattribution.io.\n", 427 | "If you can't reach it, please write us at info@channelattribution.io." 428 | ) 429 | return(invisible(NULL)) 430 | } 431 | 432 | # Prompt: token or email 433 | msg = "Enter your ChannelAttributionPro token. If you don't have one, enter your work/university email to request it: " 434 | token = .read_secret(msg) 435 | 436 | token = trimws(token) 437 | 438 | # If it looks like an email, trigger the token request and stop 439 | if (grepl("@", token, fixed = TRUE)) { 440 | email = token 441 | # simple email sanity check 442 | if (!grepl("^[^@\\s]+@[^@\\s]+\\.[^@\\s]+$", token)) { 443 | stop("Please enter a valid email address or a token.", call. = FALSE) 444 | } 445 | message("Sending a token...") 446 | res_token = .request_token_channelattributionpro(email = email) 447 | message("*** We email the token to eligible work or university addresses - check your inbox and Spam/Junk; if you don't receive it, try a different work/university email, and if it still doesn't arrive, contact info@channelattribution.io.") 448 | return(invisible(NULL)) 449 | } 450 | 451 | if (!nzchar(token)) stop("A non-empty token or email is required.", call. = FALSE) 452 | 453 | `%||%` = function(x, y) if (is.null(x)) y else x 454 | 455 | urlencode_form = function(lst) { 456 | # Build application/x-www-form-urlencoded body 457 | if (length(lst) == 0) return("") 458 | keys = names(lst) 459 | if (is.null(keys)) stop("Form list must be named", call. = FALSE) 460 | kv = character(length(lst)) 461 | for (i in seq_along(lst)) { 462 | k = curl::curl_escape(keys[i]) 463 | v = curl::curl_escape(as.character(lst[[i]])) 464 | kv[i] = paste0(k, "=", v) 465 | } 466 | paste(kv, collapse = "&") 467 | } 468 | 469 | os_release_value = function(keys = c("ID_LIKE", "ID")) { 470 | path = "/etc/os-release" 471 | if (!file.exists(path)) return("") 472 | txt = try(readLines(path, warn = FALSE, encoding = "UTF-8"), silent = TRUE) 473 | if (inherits(txt, "try-error")) return("") 474 | vals = list() 475 | for (ln in txt) { 476 | if (!nzchar(ln) || grepl("^\\s*#", ln)) next 477 | kv = strsplit(ln, "=", fixed = TRUE)[[1]] 478 | if (length(kv) == 2L) { 479 | k = trimws(kv[1]); v = gsub('^"|"$', "", trimws(kv[2])); vals[[k]] = v 480 | } 481 | } 482 | for (k in keys) if (!is.null(vals[[k]])) return(vals[[k]]) 483 | "" 484 | } 485 | 486 | # ---- POST helpers now send x-www-form-urlencoded (not multipart) ---- 487 | http_post_form = function(url, form, timeout = 300) { 488 | h = curl::new_handle() 489 | curl::handle_setheaders(h, 490 | "User-Agent" = "capro-r-installer/1.1", 491 | "Accept" = "application/json,text/html;q=0.8,*/*;q=0.5", 492 | "Content-Type" = "application/x-www-form-urlencoded" 493 | ) 494 | curl::handle_setopt(h, timeout = as.numeric(timeout), followlocation = TRUE) 495 | curl::handle_setopt(h, ssl_verifypeer = TRUE, ssl_verifyhost = 2L) 496 | body = urlencode_form(form) 497 | curl::handle_setopt(h, postfields = body) 498 | out = try(curl::curl_fetch_memory(url, handle = h), silent = TRUE) 499 | if (inherits(out, "try-error")) { 500 | msg = conditionMessage(attr(out, "condition")) 501 | body = if (grepl("SSL|TLS|certificate|timeout|timed out", msg, ignore.case = TRUE)) { 502 | paste0("network_or_ssl_error: ", msg) 503 | } else paste0("request_error: ", msg) 504 | return(list(status = 0L, body = body, headers = list())) 505 | } 506 | list( 507 | status = as.integer(out$status_code), 508 | body = rawToChar(out$content %||% raw()), 509 | headers = out$headers %||% list() 510 | ) 511 | } 512 | 513 | http_get_text = function(url, timeout = 60) { 514 | h = curl::new_handle() 515 | curl::handle_setheaders(h, "User-Agent" = "capro-r-installer/1.1") 516 | curl::handle_setopt(h, timeout = as.numeric(timeout), followlocation = TRUE) 517 | curl::handle_setopt(h, ssl_verifypeer = TRUE, ssl_verifyhost = 2L) 518 | out = try(curl::curl_fetch_memory(url, handle = h), silent = TRUE) 519 | if (inherits(out, "try-error")) return(NULL) 520 | rawToChar(out$content %||% raw()) 521 | } 522 | 523 | extract_href_links = function(html_text) { 524 | if (is.null(html_text) || !nzchar(html_text)) return(character()) 525 | m = gregexpr("]+href\\s*=\\s*\"([^\"]+)\"", html_text, ignore.case = TRUE, perl = TRUE) 526 | hits = regmatches(html_text, m)[[1]] 527 | if (!length(hits)) return(character()) 528 | sub("\".*$", "", sub("^.*href\\s*=\\s*\"", "", hits, perl = TRUE), perl = TRUE) 529 | } 530 | 531 | resolve_pkg_url = function(pkg_value) { 532 | if (is.null(pkg_value)) return(NULL) 533 | pkg_value = trimws(as.character(pkg_value)) 534 | if (!nzchar(pkg_value)) return(NULL) 535 | if (grepl("\\.(zip|tgz|tar\\.gz)$", pkg_value, ignore.case = TRUE)) return(pkg_value) 536 | dir_url = paste0(sub("/+$", "", pkg_value), "/") 537 | listing = http_get_text(dir_url, timeout = 60) 538 | if (is.null(listing)) return(NULL) 539 | links = extract_href_links(listing) 540 | files = links[!grepl("/$", links) & !links %in% c("/", "../")] 541 | if (!length(files)) return(NULL) 542 | cand = files[grepl("\\.(zip|tgz|tar\\.gz)$", files, ignore.case = TRUE)] 543 | if (!length(cand)) cand = files 544 | cand = cand[order(cand)] 545 | paste0(dir_url, cand[length(cand)]) 546 | } 547 | 548 | install_from_url = function(url, os) { 549 | type = if (identical(os, "windows") || identical(os, "macos")) "binary" else "source" 550 | message("Installing from: ", url) 551 | out = try(utils::install.packages(url, repos = NULL, type = type, quiet = FALSE), silent = TRUE) 552 | !inherits(out, "try-error") 553 | } 554 | 555 | system_info_list = function() { 556 | sys = Sys.info() 557 | comp = NA_character_ 558 | try({ 559 | v = suppressWarnings(system("R CMD config CC", intern = TRUE)) 560 | if (length(v) && nzchar(v[1])) comp = v[1] 561 | }, silent = TRUE) 562 | if (!nzchar(comp)) { 563 | for (cmd in c("gcc --version", "clang --version")) { 564 | try({ 565 | v = suppressWarnings(system(cmd, intern = TRUE, ignore.stderr = TRUE)) 566 | if (length(v) && nzchar(v[1])) { comp = v[1]; break } 567 | }, silent = TRUE) 568 | } 569 | } 570 | list( 571 | os = sys[["sysname"]], 572 | release = sys[["release"]], 573 | machine = sys[["machine"]], 574 | r_version = paste0(R.version$major, ".", R.version$minor), 575 | compiler = if (nzchar(comp)) comp else "not found" 576 | ) 577 | } 578 | 579 | system_info_json = function() { 580 | jsonlite::toJSON(system_info_list(), auto_unbox = TRUE, pretty = TRUE) 581 | } 582 | 583 | notify_package_request_once = function(token, action, info, 584 | endpoint = "https://app.channelattribution.io/genpkg/build_check_email.php", 585 | timeout = 60) { 586 | if (!nzchar(token)) return("missing_token_param") 587 | send = function(payload) { 588 | h = curl::new_handle() 589 | curl::handle_setheaders(h, 590 | "User-Agent" = "capro-build-check-r/1.1", 591 | "Accept" = "text/plain, */*", 592 | "Content-Type" = "application/x-www-form-urlencoded" 593 | ) 594 | curl::handle_setopt(h, timeout = as.numeric(timeout), followlocation = TRUE) 595 | curl::handle_setopt(h, ssl_verifypeer = TRUE, ssl_verifyhost = 2L) 596 | body = urlencode_form(list(token = token, action = action, info = payload)) 597 | curl::handle_setopt(h, postfields = body) 598 | out = try(curl::curl_fetch_memory(endpoint, handle = h), silent = TRUE) 599 | if (inherits(out, "try-error")) { 600 | msg = conditionMessage(attr(out, "condition")) 601 | if (grepl("SSL|TLS|certificate|timeout|timed out", msg, ignore.case = TRUE)) { 602 | return(paste0("network_or_ssl_error: ", msg)) 603 | } 604 | return(paste0("request_error: ", msg)) 605 | } 606 | trimws(rawToChar(out$content %||% raw())) 607 | } 608 | resp = send(info) 609 | if (grepl("network_or_ssl_error:|request_error:", resp)) { 610 | small = if (nchar(info, type = "bytes") > 1024L) substr(info, 1L, 1024L) else info 611 | resp2 = send(small) 612 | return(paste0(resp, " | retry: ", resp2)) 613 | } 614 | resp 615 | } 616 | 617 | # ---------- final notifier ---------- 618 | action = "ERROR" 619 | info_blob = "" 620 | notifier_response = NULL 621 | on.exit({ 622 | tok = if (is.null(token)) Sys.getenv("CHPRO_TOKEN", "") else token 623 | info_to_send = info_blob 624 | if (nzchar(info_to_send) && nchar(info_to_send, type = "bytes") > 8192L) { 625 | info_to_send = paste0(substr(info_to_send, 1L, 8192L), "...(truncated)") 626 | } 627 | notifier_response <- try(notify_package_request_once(tok, action, info_to_send), silent = TRUE) 628 | msg = if (inherits(notifier_response, "try-error")) conditionMessage(attr(notifier_response, "condition")) else as.character(notifier_response) 629 | # message("[notifier] build_check_email.php response: ", msg) 630 | if (interactive()) try(utils::flush.console(), silent = TRUE) 631 | }, add = TRUE) 632 | 633 | # ---------- token ---------- 634 | if (is.null(token) || !nzchar(token)) token = Sys.getenv("CHPRO_TOKEN", "") 635 | if (!nzchar(token)) { 636 | message("Missing token. Pass token=... or set CHPRO_TOKEN in the environment.") 637 | return(invisible(NULL)) 638 | } 639 | 640 | # ---------- env detection ---------- 641 | sysname = Sys.info()[["sysname"]] 642 | machine = tolower(Sys.info()[["machine"]] %||% R.version$arch %||% "") 643 | arch = if (grepl("x86_64|amd64", machine)) "amd64" else if (grepl("aarch64|arm64", machine)) "arm64" else "amd64" 644 | 645 | os = NA_character_ 646 | os_vers = NA_character_ 647 | if (identical(sysname, "Darwin")) { 648 | os = "macos"; os_vers = if (identical(arch, "amd64")) "13" else "15" 649 | } else if (identical(sysname, "Windows")) { 650 | os = "windows"; os_vers = "11" 651 | } else { 652 | id_like = os_release_value(c("ID_LIKE", "ID")) 653 | if (grepl("rhel|fedora|centos|rocky|almalinux", id_like, ignore.case = TRUE)) { 654 | os = "rhel"; os_vers = "8" 655 | } else { 656 | os = "ubuntu"; os_vers = "20" 657 | } 658 | } 659 | 660 | r_version_minor = paste0(R.version$major, ".", strsplit(R.version$minor, "\\.")[[1]][1]) 661 | params = list( 662 | os = os, 663 | os_vers = os_vers, 664 | arch = arch, 665 | lang = "r", 666 | lang_vers = r_version_minor, 667 | replace = "0", 668 | uctr = "0", 669 | token = token 670 | ) 671 | params1 = list( 672 | os = os, 673 | os_vers = os_vers, 674 | arch = arch, 675 | lang = "r", 676 | lang_vers = r_version_minor, 677 | replace = "0", 678 | uctr = "0" 679 | ) 680 | 681 | message(sprintf("Detected -> os=%s os_vers=%s arch=%s R=%s", os, os_vers, arch, r_version_minor)) 682 | message("Building the package. Estimated time: 0-30 minutes. Please wait...") 683 | 684 | # ---------- call builder ---------- 685 | builder_url = "https://app.channelattribution.io/genpkg/genpkg.php" 686 | res = http_post_form(builder_url, params, timeout = 32*60) 687 | status = res$status 688 | text = res$body 689 | 690 | if (identical(status, 401L)) { 691 | message("*** Token non valid or expired. Write to info@channelattribution.io.") 692 | action = "ERROR" 693 | info_blob = jsonlite::toJSON(list( 694 | reason = "invalid_token", 695 | builder_status = status, 696 | system = system_info_list(), 697 | params = params1 698 | ), auto_unbox = TRUE, pretty = TRUE) 699 | return(invisible(NULL)) 700 | } 701 | 702 | data = NULL 703 | if (nzchar(text)) { 704 | tmp = try(jsonlite::fromJSON(text, simplifyVector = TRUE), silent = TRUE) 705 | if (!inherits(tmp, "try-error")) data = tmp 706 | } 707 | 708 | if (is.list(data)) { 709 | err = tolower(as.character(data$error %||% "")) 710 | stat = tolower(as.character(data$status %||% "")) 711 | if (grepl("invalid token", err) || (stat %in% c("fail", "error") && grepl("token", err))) { 712 | message("Token non valid or expired. Write to info@channelattribution.io.") 713 | action = "ERROR" 714 | info_blob = jsonlite::toJSON(list( 715 | reason = "invalid_token_in_body", 716 | builder_status = status, 717 | body = substr(text, 1L, 800L), 718 | system = system_info_list(), 719 | params = params1 720 | ), auto_unbox = TRUE, pretty = TRUE) 721 | return(invisible(NULL)) 722 | } 723 | } 724 | 725 | pkg_url = NULL 726 | ok_path = TRUE 727 | if (status %in% c(200L, 409L) && is.list(data) && !is.null(data$pkg)) { 728 | pkg_url = resolve_pkg_url(as.character(data$pkg)[1]) 729 | if (is.null(pkg_url)) ok_path = FALSE 730 | } else { 731 | ok_path = FALSE 732 | message("Installation failed. Builder response:") 733 | message(sprintf(" HTTP %s", status)) 734 | message(sprintf(" Body[0:800]: %s", substr(text %||% "", 1L, 800L))) 735 | message("Send the following information:") 736 | cat(system_info_json(), "\n\n") 737 | message("to info@channelattribution.io.") 738 | action = "ERROR" 739 | info_blob = jsonlite::toJSON(list( 740 | result = "builder_unexpected_response", 741 | builder_status = status, 742 | body = substr(text %||% "", 1L, 800L), 743 | system = system_info_list(), 744 | params = params1 745 | ), auto_unbox = TRUE, pretty = TRUE) 746 | } 747 | 748 | if (ok_path && nzchar(pkg_url)) { 749 | ok = isTRUE(install_from_url(pkg_url, os = os)) 750 | if (ok) { 751 | message("*** Package installed. Restart the session and try: library(ChannelAttributionPro)") 752 | action = "SUCCESS" 753 | info_blob = jsonlite::toJSON(list( 754 | result = "installed", 755 | package = pkg_url, 756 | system = system_info_list(), 757 | params = params1 758 | ), auto_unbox = TRUE, pretty = TRUE) 759 | return(invisible(NULL)) 760 | } else { 761 | message("Installation failed. Send the following information:") 762 | cat(system_info_json(), "\n\n") 763 | message("to info@channelattribution.io.") 764 | action = "ERROR" 765 | info_blob = jsonlite::toJSON(list( 766 | result = "install_failed", 767 | package = pkg_url, 768 | system = system_info_list(), 769 | params = params1 770 | ), auto_unbox = TRUE, pretty = TRUE) 771 | return(invisible(NULL)) 772 | } 773 | } 774 | 775 | invisible(NULL) 776 | } 777 | 778 | -------------------------------------------------------------------------------- /src/ChannelAttribution.cpp: -------------------------------------------------------------------------------- 1 | //ChannelAttribution: Markov model for online multi-channel attribution 2 | //Copyright (C) 2015 - Davide Altomare and David Loris 3 | // 4 | //ChannelAttribution is free software: you can redistribute it and/or modify 5 | //it under the terms of the GNU General Public License as published by 6 | //the Free Software Foundation, either version 3 of the License, or 7 | //(at your option) any later version. 8 | // 9 | //ChannelAttribution is distributed in the hope that it will be useful, 10 | //but WITHOUT ANY WARRANTY; without even the implied warranty of 11 | //MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 12 | //GNU General Public License for more details. 13 | // 14 | //You should have received a copy of the GNU General Public License 15 | //along with ChannelAttribution. If not, see . 16 | 17 | 18 | #define __GXX_EXPERIMENTAL_CXX0X__ 1 19 | 20 | #include 21 | #include 22 | #include 23 | #include 24 | #include 25 | #include 26 | 27 | #include 28 | 29 | #define ARMA_USE_CXX11 30 | #define ARMA_64BIT_WORD 31 | 32 | #ifndef BEGIN_RCPP 33 | #define BEGIN_RCPP 34 | #endif 35 | 36 | #ifndef END_RCPP 37 | #define END_RCPP 38 | #endif 39 | 40 | using namespace std; 41 | using namespace Rcpp; 42 | //using namespace RcppThread; 43 | 44 | // [[Rcpp::depends(RcppArmadillo)]] 45 | using namespace arma; 46 | 47 | template 48 | string to_string(T pNumber) 49 | { 50 | ostringstream oOStrStream; 51 | oOStrStream << pNumber; 52 | return oOStrStream.str(); 53 | } 54 | 55 | 56 | ///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// 57 | 58 | RcppExport SEXP heuristic_models_cpp(SEXP Data_p, SEXP var_path_p, SEXP var_conv_p, SEXP var_value_p, SEXP sep_p) 59 | { 60 | 61 | BEGIN_RCPP 62 | 63 | //inp.a 64 | 65 | List Data(Data_p); 66 | 67 | CharacterVector var_path_0(var_path_p); 68 | string var_path = Rcpp::as(var_path_0); 69 | 70 | CharacterVector var_conv_0(var_conv_p); 71 | string var_conv = Rcpp::as(var_conv_0); 72 | 73 | CharacterVector var_value_0(var_value_p); 74 | string var_value = Rcpp::as(var_value_0); 75 | 76 | CharacterVector sep_0(sep_p); 77 | string sep = Rcpp::as(sep_0); 78 | 79 | //inp.b 80 | 81 | bool flg_var_value; 82 | flg_var_value=0; 83 | if(var_value.compare("0")!=0){ 84 | flg_var_value=1; 85 | } 86 | 87 | CharacterVector vy0 = Data[var_path]; 88 | vector vy = Rcpp::as >(vy0); 89 | 90 | NumericVector vc0 = Data[var_conv]; 91 | vector vc = Rcpp::as >(vc0); 92 | 93 | vector vv; 94 | if(flg_var_value==1){ 95 | NumericVector vv0 = Data[var_value]; 96 | vv = Rcpp::as >(vv0); 97 | } 98 | 99 | unsigned long int i,j,k,lvy,ssize; 100 | bool cfirst; 101 | unsigned long int start_pos,end_pos; 102 | unsigned long int nchannels; 103 | string s,channel,channel_first,channel_last; 104 | 105 | lvy=(unsigned long int) vy.size(); 106 | nchannels=0; 107 | 108 | map mp_channels; 109 | vector vchannels; 110 | 111 | map mp_first_conv; 112 | map mp_first_val; 113 | map mp_last_conv; 114 | map mp_last_val; 115 | map mp_linear_conv; 116 | map mp_linear_val; 117 | map mp0_linear_conv; 118 | map mp0_linear_val; 119 | 120 | vector vchannels_unique; 121 | double nchannels_unique; 122 | string kchannel; 123 | unsigned long int n_path_length; 124 | 125 | for(i=0;i vfirst_conv(nchannels); 229 | vector vlast_conv(nchannels); 230 | vector vlinear_conv(nchannels); 231 | 232 | vector vfirst_val(nchannels); 233 | vector vlast_val(nchannels); 234 | vector vlinear_val(nchannels); 235 | 236 | for(k=0;k S; 269 | SpMat S0; 270 | SpMat S1; 271 | vector lrS0; 272 | vector lrS; 273 | unsigned long int non_zeros,nrows,val0,lval0,i,j,k,s0,lrs0i; 274 | 275 | public: 276 | Fx(unsigned long int nrow0,unsigned long int ncol0): S(nrow0,ncol0), S0(nrow0,ncol0), S1(nrow0,ncol0), lrS0(nrow0,0), lrS(nrow0,0), non_zeros(0), nrows(nrow0) {} 277 | void init(unsigned long int, unsigned long int); 278 | void add(unsigned long int, unsigned long int,unsigned long int); 279 | void cum(); 280 | unsigned long int sim(unsigned long int, double); 281 | double pconv(unsigned long int, unsigned long int); 282 | List tran_matx(vector); 283 | }; 284 | 285 | void Fx::init(unsigned long int nrow1, unsigned long int ncol1) 286 | { 287 | S.reset(); 288 | S.set_size(nrow1,ncol1); 289 | 290 | S0.reset(); 291 | S0.set_size(nrow1,ncol1); 292 | 293 | S1.reset(); 294 | S1.set_size(nrow1,ncol1); 295 | 296 | lrS0.clear(); 297 | lrS0.resize(nrow1); 298 | 299 | lrS.clear(); 300 | lrS.resize(nrow1); 301 | 302 | non_zeros=0; 303 | nrows=nrow1; 304 | } 305 | 306 | 307 | void Fx::add(unsigned long int ichannel_old, unsigned long int ichannel, unsigned long int vxi) 308 | { 309 | 310 | val0=S(ichannel_old,ichannel); //riempire f.p. transizione con vxi 311 | if(val0==0){ 312 | lval0=lrS0[ichannel_old]; 313 | S0(ichannel_old,lval0)=ichannel; 314 | lrS0[ichannel_old]=lval0+1; 315 | ++non_zeros; 316 | } 317 | S(ichannel_old,ichannel)=val0+vxi; 318 | 319 | } 320 | 321 | void Fx::cum() 322 | { 323 | 324 | for(i=0;i0){ 327 | S1(i,0)=S(i,S0(i,0)); 328 | for(j=1;j=s0){return(S0(c,k));} 345 | } 346 | 347 | return 0; 348 | 349 | } 350 | 351 | 352 | List Fx::tran_matx(vector vchannels) 353 | { 354 | 355 | unsigned long int mij,sm3; 356 | vector vM1(non_zeros); 357 | vector vM2(non_zeros); 358 | vector vM3(non_zeros); 359 | vector vsm; 360 | vector vk; 361 | 362 | k=0; 363 | for(i=0;i0){ 368 | vM1[k]=vchannels[i]; 369 | vM2[k]=vchannels[S0(i,j)]; 370 | vM3[k]=mij; 371 | sm3=sm3+mij; 372 | ++k; 373 | } 374 | } 375 | 376 | vsm.push_back(sm3); 377 | vk.push_back(k); 378 | 379 | }//end for 380 | 381 | unsigned long int w=0; 382 | for(k=0;k0){ 403 | res=(double) S(ichannel,nchannels-2)/res; 404 | }else{ 405 | res=0; 406 | } 407 | 408 | return(res); 409 | 410 | } 411 | 412 | 413 | 414 | vector split_string(const string &s, unsigned long int order) { 415 | 416 | char delim=' '; 417 | vector result(order,-1); 418 | stringstream ss (s); 419 | string item; 420 | 421 | unsigned long int h=0; 422 | while (getline (ss, item, delim)) { 423 | result[h]=stoi(item); 424 | h=h+1; 425 | } 426 | 427 | return result; 428 | } 429 | 430 | 431 | 432 | // void print(auto &input) 433 | // { 434 | // for (unsigned long int i = 0; i < (unsigned long int) input.size(); i++) { 435 | // std::cout << input.at(i) << ' '; 436 | // } 437 | // } 438 | 439 | 440 | vector bounds(unsigned long int parts, unsigned long int mem) { 441 | vectorbnd; 442 | unsigned long int delta = mem / parts; 443 | unsigned long int reminder = mem % parts; 444 | unsigned long int N1 = 0, N2 = 0; 445 | bnd.push_back(N1); 446 | for (unsigned long int i = 0; i < parts; ++i) { 447 | N2 = N1 + delta; 448 | if (i == parts - 1) 449 | N2 += reminder; 450 | bnd.push_back(N2); 451 | N1 = N2; 452 | } 453 | return bnd; 454 | } 455 | 456 | 457 | void W_choose_order_1(vector vy, unsigned long int lvy, vector vc, vector vn, unsigned long int roc_npt, unsigned long int nchannels, vector &vorder, vector &vuroc, vector &vuroc_corr, List &L_roc, unsigned long int from_W, unsigned long int to_W) 458 | { 459 | 460 | 461 | for(unsigned long int order = (from_W+1); order < (to_W+1); order++){ 462 | 463 | string s,channel,path; 464 | unsigned long int nchannels_sim,i,ssize,ichannel_old,j,nc,start_pos,end_pos,start_pos_last,ichannel,npassi,vci,vni,vpi,k,h; 465 | vector vchannels_sim; 466 | map mp_channels_sim; 467 | vector vy2(lvy); 468 | bool flg_next_path,flg_next_null; 469 | vector vprev(lvy); 470 | double min_prev,max_prev,pauc,nnodes; 471 | 472 | vector vth(roc_npt); 473 | unsigned long int tp,fn,tn,fp; 474 | double th,tpr,fpr,tpr_old,fpr_old,auc; 475 | 476 | vector vlastc(lvy); 477 | 478 | //output 479 | 480 | vector vtpr(roc_npt+1); 481 | vector vfpr(roc_npt+1); 482 | 483 | nnodes=exp(lgamma(nchannels-3+order-1+1)-lgamma(order+1)-lgamma(nchannels-3-1+1)); 484 | 485 | unsigned long int np=0; 486 | for(i=0;i0){ //se ci sono conversion 610 | ichannel=nchannels_sim-2; 611 | S.add(ichannel_old,ichannel,vci); 612 | 613 | if(vni>0){ 614 | flg_next_null=1; 615 | }else{ 616 | flg_next_path=1; 617 | } 618 | 619 | } 620 | 621 | if(((vni>0) | (flg_next_null==1)) & (flg_next_path==0)){ //se non ci sono conversion 622 | ichannel=nchannels_sim-1; 623 | S.add(ichannel_old,ichannel,vni); 624 | 625 | flg_next_path=1; 626 | } 627 | 628 | }else{ //stato non finale 629 | 630 | if(vpi>0){ 631 | ichannel=atol(channel.c_str()); 632 | S.add(ichannel_old,ichannel,vpi); 633 | } 634 | 635 | } 636 | 637 | if(flg_next_path==0){ 638 | ++npassi; 639 | } 640 | }else{ //stato iniziale 641 | 642 | ichannel=0; 643 | 644 | } 645 | 646 | if(flg_next_path==0){ 647 | ichannel_old=ichannel; 648 | } 649 | 650 | if(flg_next_path==0){ 651 | channel=""; 652 | j=j+1; 653 | } 654 | 655 | }//end while j=0 && k=th) && (vc[i]>0)){ 695 | tp=tp+vc[i]; 696 | }else if((vprev[i]0)){ 697 | fn=fn+vc[i]; 698 | } 699 | 700 | vni=vn[i]; 701 | 702 | if((vprev[i]0)){ 703 | tn=tn+vni; 704 | }else if((vprev[i]>=th) & (vni>0)){ 705 | fp=fp+vni; 706 | } 707 | 708 | } 709 | 710 | tpr=(double)tp/(double)(tp+fn); 711 | fpr=(double)fp/(double)(fp+tn); 712 | 713 | auc=auc+((fpr-fpr_old)*tpr_old)+(((fpr-fpr_old)*(tpr-tpr_old))/2); 714 | 715 | vtpr[h]=tpr; 716 | vfpr[h]=fpr; 717 | 718 | tpr_old=tpr; 719 | fpr_old=fpr; 720 | 721 | h=h+1; 722 | 723 | }//end for k 724 | 725 | vtpr[roc_npt]=1; 726 | vfpr[roc_npt]=1; 727 | auc=auc+((1-fpr_old)*tpr_old)+(((1-fpr_old)*(1-tpr_old))/2); 728 | 729 | pauc=(double)(1-((1-auc)*((np-1)/(np-nnodes-1)))); 730 | if((pauc<0) | (pauc>1)){ 731 | pauc=0; 732 | } 733 | 734 | vuroc[order-1]=auc; 735 | vuroc_corr[order-1]=pauc; 736 | vorder[order-1]=order; 737 | 738 | L_roc[to_string(order)]=Rcpp::List::create(Rcpp::Named("fpr")=vfpr,Rcpp::Named("tpr")=vtpr); 739 | 740 | }//end if(nnodes(var_path_0); 758 | 759 | CharacterVector var_conv_0(var_conv_p); 760 | string var_conv = Rcpp::as(var_conv_0); 761 | 762 | CharacterVector var_null_0(var_null_p); 763 | string var_null = Rcpp::as(var_null_0); 764 | 765 | NumericVector max_order_0(max_order_p); 766 | unsigned long int max_order = Rcpp::as(max_order_0); 767 | 768 | CharacterVector sep_0(sep_p); 769 | string sep = Rcpp::as(sep_0); 770 | 771 | NumericVector ncore_0(ncore_p); 772 | unsigned long int ncore = Rcpp::as(ncore_0); 773 | 774 | 775 | NumericVector roc_npt_0(roc_npt_p); 776 | unsigned long int roc_npt = Rcpp::as(roc_npt_0); 777 | 778 | //inp.b 779 | 780 | CharacterVector vy0 = Data[var_path]; 781 | vector vy = Rcpp::as >(vy0); 782 | 783 | NumericVector vc0 = Data[var_conv]; 784 | vector vc = Rcpp::as >(vc0); 785 | 786 | vector vn; 787 | NumericVector vn0 = Data[var_null]; 788 | vn = Rcpp::as >(vn0); 789 | 790 | unsigned long int i,j,lvy,ssize; 791 | unsigned long int nchannels,npassi; 792 | bool cfirst; 793 | unsigned long int start_pos,end_pos; 794 | string s,channel,path; 795 | map mp_channels,mp_channels_sim; 796 | map mp_npassi; 797 | vector vnpassi; 798 | 799 | lvy=(unsigned long int) vy.size(); 800 | 801 | ////////////////////// 802 | //CODIFICA DA ONE STEP 803 | ////////////////////// 804 | 805 | string channel_test=""; 806 | string channel_old; 807 | 808 | //unsigned long int order; 809 | 810 | nchannels=0; 811 | 812 | mp_channels["(start)"]=0; 813 | vector vchannels; 814 | vchannels.push_back("(start)"); 815 | ++nchannels; 816 | 817 | //ricodifica nomi canale in interi 818 | 819 | for(i=0;i vuroc(max_order); 891 | vector vuroc_corr(max_order); 892 | vector vorder(max_order); 893 | 894 | vector limits = bounds(ncore, max_order); 895 | 896 | if(ncore==1){ 897 | 898 | W_choose_order_1(vy, lvy, vc, vn, roc_npt, nchannels, ref(vorder), ref(vuroc), ref(vuroc_corr), ref(L_roc), limits[0], limits[1]); 899 | 900 | }else{ 901 | 902 | vector threads(ncore); 903 | 904 | //Launch ncore threads: 905 | for(unsigned long int td=0; td v_vui, unsigned long int lvy, vector vc, unsigned long int nch0, vector vv, unsigned long int nfold, unsigned long int nsim_start, map< unsigned long int, vector > mp_channels_sim_inv, unsigned long int max_npassi, unsigned long int nchannels_sim, unsigned long int order, bool flg_var_value, unsigned long int nchannels, Fx S, Fx fV, vector &nconv, vector &ssval, vector< vector > &T, vector< vector > &TV, vector< vector > &V, vector< vector > &VV, vector &v_inc_path, unsigned long int from_W, unsigned long int to_W) 925 | { 926 | 927 | long int id0; 928 | unsigned long int i0,c,npassi0,k0,c_last=0,n_inc_path; 929 | vector C(nchannels,0); 930 | double sval0=0,sn,sm; 931 | bool flg_exit; 932 | 933 | for(unsigned long int run = from_W; run < to_W; run++){ 934 | 935 | mt19937 generator(seed+run); 936 | uniform_real_distribution distribution(0,1); 937 | 938 | n_inc_path=0; 939 | 940 | for(i0=0; i0<(unsigned long int) (nsim_start/nfold); i0++){ 941 | 942 | c=0; 943 | npassi0=0; 944 | 945 | for(k0=0; k0=0){ 970 | C[id0]=1; 971 | }else{ 972 | break; 973 | } 974 | } 975 | } 976 | 977 | c_last=c; //salvo il canale visitato 978 | ++npassi0; 979 | } 980 | 981 | }//end while npassi0 982 | 983 | 984 | if(c==nchannels_sim-2){ //solo se ho raggiunto la conversion assegno +1 ai canali interessati (se ho raggiunto il max numero di passi è come se fossi andato a null) 985 | 986 | nconv[run]=nconv[run]+1;//incremento le conversion 987 | 988 | //genero per il canale c_last un valore di conversion "sval0" 989 | if(flg_var_value==1){ 990 | sval0=v_vui[fV.sim(c_last,distribution(generator))]; 991 | } 992 | 993 | ssval[run]=ssval[run]+sval0; 994 | 995 | for (k0=0; k00){ 1030 | TV[run][k0-1]=(T[run][k0]/sm)*sn; 1031 | } 1032 | } 1033 | 1034 | if(flg_var_value==1){ 1035 | 1036 | V[run][0]=0; //pongo channel start = 0 1037 | V[run][nchannels-2]=0; //pongo channel conversion = 0 1038 | V[run][nchannels-1]=0; //pongo channel null = 0 1039 | 1040 | sn=0; 1041 | for(k0=0;k00){ 1052 | VV[run][k0-1]=(V[run][k0]/sm)*sn; 1053 | } 1054 | } 1055 | 1056 | } 1057 | 1058 | }//end for run 1059 | 1060 | } 1061 | 1062 | string f_print_perc(double num){ 1063 | 1064 | string res; 1065 | if(num>=1){ 1066 | res=to_string((double)(floor(num*10000)/100)).substr(0,6); 1067 | }else if(num>=0.1){ 1068 | res=to_string((double)(floor(num*10000)/100)).substr(0,5); 1069 | }else{ 1070 | res=to_string((double)(floor(num*10000)/100)).substr(0,4); 1071 | } 1072 | return(res); 1073 | } 1074 | 1075 | 1076 | RcppExport SEXP markov_model_cpp(SEXP Data_p, SEXP var_path_p, SEXP var_conv_p, SEXP var_value_p, SEXP var_null_p, SEXP order_p, SEXP nsim_start_p, SEXP max_step_p, SEXP out_more_p, SEXP sep_p, SEXP ncore_p, SEXP nfold_p, SEXP seed_p, SEXP conv_par_p, SEXP rate_step_sim_p, SEXP verbose_p) 1077 | { 1078 | 1079 | BEGIN_RCPP 1080 | 1081 | //inp.a 1082 | 1083 | List Data(Data_p); 1084 | 1085 | CharacterVector var_path_0(var_path_p); 1086 | string var_path = Rcpp::as(var_path_0); 1087 | 1088 | CharacterVector var_conv_0(var_conv_p); 1089 | string var_conv = Rcpp::as(var_conv_0); 1090 | 1091 | CharacterVector var_value_0(var_value_p); 1092 | string var_value = Rcpp::as(var_value_0); 1093 | 1094 | CharacterVector var_null_0(var_null_p); 1095 | string var_null = Rcpp::as(var_null_0); 1096 | 1097 | NumericVector order_0(order_p); 1098 | unsigned long int order = Rcpp::as(order_0); 1099 | 1100 | NumericVector nsim_start_0(nsim_start_p); 1101 | unsigned long int nsim_start = Rcpp::as(nsim_start_0); 1102 | 1103 | NumericVector max_step_0(max_step_p); 1104 | unsigned long int max_step = Rcpp::as(max_step_0); 1105 | 1106 | NumericVector out_more_0(out_more_p); 1107 | unsigned long int out_more = Rcpp::as(out_more_0); 1108 | 1109 | CharacterVector sep_0(sep_p); 1110 | string sep = Rcpp::as(sep_0); 1111 | 1112 | NumericVector ncore_0(ncore_p); 1113 | unsigned long int ncore = Rcpp::as(ncore_0); 1114 | 1115 | NumericVector nfold_0(nfold_p); 1116 | unsigned long int nfold = Rcpp::as(nfold_0); 1117 | 1118 | NumericVector seed_0(seed_p); 1119 | unsigned long int seed = Rcpp::as(seed_0); 1120 | 1121 | NumericVector conv_par_0(conv_par_p); 1122 | double conv_par = Rcpp::as(conv_par_0); 1123 | 1124 | NumericVector rate_step_sim_0(rate_step_sim_p); 1125 | double rate_step_sim = Rcpp::as(rate_step_sim_0); 1126 | 1127 | NumericVector verbose_0(verbose_p); 1128 | unsigned long int verbose = Rcpp::as(verbose_0); 1129 | 1130 | //inp.b 1131 | 1132 | bool flg_var_value; 1133 | flg_var_value=0; 1134 | if(var_value.compare("0")!=0){ 1135 | flg_var_value=1; 1136 | } 1137 | 1138 | bool flg_var_null; 1139 | flg_var_null=0; 1140 | if(var_null.compare("0")!=0){ 1141 | flg_var_null=1; 1142 | } 1143 | 1144 | CharacterVector vy0 = Data[var_path]; 1145 | vector vy = Rcpp::as >(vy0); 1146 | 1147 | NumericVector vc0 = Data[var_conv]; 1148 | vector vc = Rcpp::as >(vc0); 1149 | 1150 | vector vv; 1151 | if(flg_var_value==1){ 1152 | NumericVector vv0 = Data[var_value]; 1153 | vv = Rcpp::as >(vv0); 1154 | } 1155 | 1156 | vector vn; 1157 | if(flg_var_null==1){ 1158 | NumericVector vn0 = Data[var_null]; 1159 | vn = Rcpp::as >(vn0); 1160 | } 1161 | 1162 | unsigned long int i,j,k,lvy,ssize; 1163 | unsigned long int nchannels,nchannels_sim,npassi; 1164 | bool cfirst; 1165 | unsigned long int start_pos,end_pos; 1166 | string s,channel,path; 1167 | map mp_channels,mp_channels_sim; 1168 | map< unsigned long int, vector > mp_channels_sim_inv; 1169 | map mp_npassi; 1170 | vector vnpassi; 1171 | 1172 | lvy=(unsigned long int) vy.size(); 1173 | 1174 | ////////////////////// 1175 | //CODIFICA DA ONE STEP 1176 | ////////////////////// 1177 | 1178 | //mappa dei conversion value 1179 | unsigned long int l_vui=0; 1180 | map mp_vui; 1181 | vector v_vui; 1182 | double vui; 1183 | 1184 | vector rchannels; 1185 | string channel_j; 1186 | 1187 | nchannels=0; 1188 | nchannels_sim=0; 1189 | 1190 | vector vy2(lvy); 1191 | 1192 | mp_channels["(start)"]=0; 1193 | vector vchannels; 1194 | vchannels.push_back("(start)"); 1195 | ++nchannels; 1196 | 1197 | vector vchannels_sim; 1198 | 1199 | //definizione mappa conversion value 1200 | if(flg_var_value==1){ 1201 | for(i=0;i0){ 1203 | vui=vv[i]/vc[i]; 1204 | if(mp_vui.find(vui)==mp_vui.end()){ 1205 | mp_vui[vui]=l_vui; 1206 | v_vui.push_back(vui); 1207 | ++l_vui; 1208 | } 1209 | } 1210 | } 1211 | } 1212 | 1213 | //ricodifica nomi canale in interi 1214 | 1215 | for(i=0;i vtmp(order); 1296 | 1297 | for(i=0;i0){ //se ci sono conversion 1422 | ichannel=nchannels_sim-2; 1423 | S.add(ichannel_old,ichannel,vci); 1424 | 1425 | if(flg_var_value==1){ 1426 | vui=vv[i]/vci; 1427 | fV.add(ichannel_old,mp_vui[vui],vci); 1428 | } 1429 | 1430 | if(vni>0){ 1431 | flg_next_null=1; 1432 | }else{ 1433 | flg_next_path=1; 1434 | } 1435 | 1436 | } 1437 | 1438 | if(((vni>0) | (flg_next_null==1)) & (flg_next_path==0)){ //se non ci sono conversion 1439 | ichannel=nchannels_sim-1; 1440 | S.add(ichannel_old,ichannel,vni); 1441 | flg_next_path=1; 1442 | } 1443 | 1444 | }else{ //stato non finale 1445 | 1446 | if(vpi>0){ 1447 | ichannel=atol(channel.c_str()); 1448 | S.add(ichannel_old,ichannel,vpi); 1449 | } 1450 | 1451 | } 1452 | 1453 | if(flg_next_path==0){ 1454 | ++npassi; 1455 | } 1456 | }else{ //stato iniziale 1457 | 1458 | ichannel=0; 1459 | 1460 | } 1461 | 1462 | if(flg_next_path==0){ 1463 | ichannel_old=ichannel; 1464 | } 1465 | 1466 | if(flg_next_path==0){ 1467 | channel=""; 1468 | j=j+1; 1469 | } 1470 | 1471 | }//end while j0){ 1496 | max_npassi=max_step; 1497 | } 1498 | 1499 | if(nsim_start==0){ 1500 | nsim_start=1e5; 1501 | } 1502 | 1503 | unsigned long int run; 1504 | unsigned long int nch0; 1505 | double sn=0; 1506 | 1507 | vector< vector > T(nfold,vector(nchannels,0)); 1508 | vector< vector > V(nfold,vector(nchannels,0)); 1509 | 1510 | nch0=nchannels-3; 1511 | 1512 | vector< vector > TV(nfold,vector(nch0,0)); 1513 | vector rTV(nch0,0); 1514 | 1515 | vector< vector > VV(nfold,vector(nch0,0)); 1516 | vector rVV(nch0,0); 1517 | 1518 | vector nconv(nfold,0); 1519 | vector ssval(nfold,0); 1520 | 1521 | vector TV_fin(nch0); 1522 | vector VV_fin(nch0); 1523 | vector vtmp1(nfold); 1524 | vector v_res_conv(nfold); 1525 | 1526 | vector v_inc_path(nfold); 1527 | 1528 | double max_res_conv=numeric_limits::infinity(); 1529 | double min_res_conv; 1530 | double res_conv; 1531 | 1532 | unsigned long int id_mz,run_min_res_conv=0; 1533 | 1534 | while(max_res_conv>conv_par){ 1535 | 1536 | min_res_conv=numeric_limits::infinity(); 1537 | 1538 | vector limits = bounds(ncore, nfold); 1539 | 1540 | if(ncore==1){ 1541 | 1542 | W_markov_model_1(seed, v_vui, lvy, vc, nch0, vv, nfold, nsim_start, mp_channels_sim_inv, max_npassi, nchannels_sim, order, flg_var_value, nchannels, S, fV, ref(nconv), ref(ssval), ref(T), ref(TV), ref(V), ref(VV), ref(v_inc_path), limits[0], limits[1]); 1543 | 1544 | }else{ 1545 | 1546 | vector threads(ncore); 1547 | 1548 | //Launch ncore threads: 1549 | for(unsigned long int td=0; td()); 1570 | id_mz=(unsigned long int) (nfold/2); 1571 | if(nfold % 2 == 0){ 1572 | TV_fin[k]=(vtmp1[id_mz-1]+vtmp1[id_mz])/2; 1573 | }else{ 1574 | TV_fin[k]=vtmp1[id_mz]; 1575 | } 1576 | } 1577 | 1578 | if(flg_var_value==1){ 1579 | 1580 | for(k=0; k()); 1585 | id_mz=(unsigned long int) (nfold/2); 1586 | if(nfold % 2 == 0){ 1587 | VV_fin[k]=(vtmp1[id_mz-1]+vtmp1[id_mz])/2; 1588 | }else{ 1589 | VV_fin[k]=vtmp1[id_mz]; 1590 | } 1591 | } 1592 | 1593 | } 1594 | 1595 | max_res_conv=0; 1596 | for(run=0; runmax_res_conv){ 1604 | max_res_conv=res_conv; 1605 | } 1606 | if(res_convconv_par){ 1615 | Rcout << "Number of simulations: "+ to_string(nsim_start) + " - Reaching convergence (wait...): " + f_print_perc(max_res_conv) + "% > " + f_print_perc(conv_par) + "%" << endl; 1616 | }else{ 1617 | Rcout << endl; 1618 | Rcout << "Number of simulations: "+ to_string(nsim_start) + " - Convergence reached: " + f_print_perc(max_res_conv) + "% < " + f_print_perc(conv_par) + "%" << endl; 1619 | } 1620 | } 1621 | 1622 | nsim_start=nsim_start*rate_step_sim; 1623 | 1624 | }//end while(res_conv>conv_par) 1625 | 1626 | vector vchannels0(nch0); 1627 | for(k=1; k<(nch0+1); k++){ 1628 | vchannels0[k-1]=vchannels[k]; 1629 | } 1630 | 1631 | double succ_path=0; 1632 | for(k=0; k(var_path_0); 1704 | 1705 | CharacterVector var_conv_0(var_conv_p); 1706 | string var_conv = Rcpp::as(var_conv_0); 1707 | 1708 | CharacterVector var_null_0(var_null_p); 1709 | string var_null = Rcpp::as(var_null_0); 1710 | 1711 | NumericVector order_0(order_p); 1712 | unsigned long int order = Rcpp::as(order_0); 1713 | 1714 | CharacterVector sep_0(sep_p); 1715 | string sep = Rcpp::as(sep_0); 1716 | 1717 | NumericVector flg_equal_0(flg_equal_p); 1718 | unsigned long int flg_equal = Rcpp::as(flg_equal_0); 1719 | 1720 | //inp.b 1721 | 1722 | bool flg_var_null; 1723 | flg_var_null=0; 1724 | if(var_null.compare("0")!=0){ 1725 | flg_var_null=1; 1726 | } 1727 | 1728 | CharacterVector vy0 = Data[var_path]; 1729 | vector vy = Rcpp::as >(vy0); 1730 | 1731 | NumericVector vc0 = Data[var_conv]; 1732 | vector vc = Rcpp::as >(vc0); 1733 | 1734 | vector vn; 1735 | if(flg_var_null==1){ 1736 | NumericVector vn0 = Data[var_null]; 1737 | vn = Rcpp::as >(vn0); 1738 | } 1739 | 1740 | unsigned long int i,j,lvy,ssize; 1741 | unsigned long int nchannels,nchannels_sim,npassi; 1742 | bool cfirst; 1743 | unsigned long int start_pos,end_pos; 1744 | string s,channel,path; 1745 | map mp_channels,mp_channels_sim; 1746 | map< unsigned long int, vector > mp_channels_sim_inv; 1747 | map mp_npassi; 1748 | vector vnpassi; 1749 | 1750 | lvy=(unsigned long int) vy.size(); 1751 | 1752 | ////////////////////// 1753 | //CODIFICA DA ONE STEP 1754 | ////////////////////// 1755 | 1756 | //mappa dei conversion value 1757 | map mp_vui; 1758 | vector v_vui; 1759 | 1760 | vector rchannels; 1761 | string channel_j; 1762 | 1763 | nchannels=0; 1764 | nchannels_sim=0; 1765 | 1766 | vector vy2(lvy); 1767 | 1768 | mp_channels["(start)"]=0; 1769 | vector vchannels; 1770 | vchannels.push_back("(start)"); 1771 | ++nchannels; 1772 | 1773 | vector vchannels_sim; 1774 | 1775 | //ricodifica nomi canale in interi 1776 | 1777 | for(i=0;i vtmp(order); 1858 | 1859 | for(i=0;i0){ //se ci sono conversion 1984 | ichannel=nchannels_sim-2; 1985 | S.add(ichannel_old,ichannel,vci); 1986 | 1987 | if(vni>0){ 1988 | flg_next_null=1; 1989 | }else{ 1990 | flg_next_path=1; 1991 | } 1992 | 1993 | } 1994 | 1995 | if(((vni>0) | (flg_next_null==1)) & (flg_next_path==0)){ //se non ci sono conversion 1996 | ichannel=nchannels_sim-1; 1997 | S.add(ichannel_old,ichannel,vni); 1998 | flg_next_path=1; 1999 | } 2000 | 2001 | }else{ //stato non finale 2002 | 2003 | if(vpi>0){ 2004 | ichannel=atol(channel.c_str()); 2005 | S.add(ichannel_old,ichannel,vpi); 2006 | } 2007 | 2008 | } 2009 | 2010 | if(flg_next_path==0){ 2011 | ++npassi; 2012 | } 2013 | }else{ //stato iniziale 2014 | 2015 | ichannel=0; 2016 | 2017 | } 2018 | 2019 | if(flg_next_path==0){ 2020 | //channel_old=channel; 2021 | ichannel_old=ichannel; 2022 | } 2023 | 2024 | if(flg_next_path==0){ 2025 | channel=""; 2026 | j=j+1; 2027 | } 2028 | 2029 | }//end while j vchannels0(nch0); 2037 | for(k=1; k<(nch0+1); k++){ 2038 | vchannels0[k-1]=vchannels[k]; 2039 | } 2040 | 2041 | 2042 | List res=List::create(Named("transition_matrix")=S.tran_matx(vchannels_sim), Named("channels") = vchannels0); 2043 | 2044 | return(res); 2045 | 2046 | END_RCPP 2047 | 2048 | } 2049 | --------------------------------------------------------------------------------