├── LICENSE ├── OSSMETADATA ├── README.md ├── pom.xml ├── resources ├── R │ └── RAD │ │ ├── DESCRIPTION │ │ ├── NAMESPACE │ │ ├── R │ │ ├── RcppExports.R │ │ ├── anomaly_detection.R │ │ ├── anomaly_detection_ma.R │ │ └── multiplot.R │ │ ├── man │ │ ├── AnomalyDetection.rpca.Rd │ │ ├── RcppRPCA.Rd │ │ ├── RcppSVT.Rd │ │ └── ggplot_AnomalyDetection.rpca.Rd │ │ ├── src │ │ ├── RcppExports.cpp │ │ └── anomaly_detection.cpp │ │ └── tests │ │ ├── .DS_Store │ │ ├── test1.Rmd │ │ └── tests2.Rmd ├── examples │ ├── data │ │ ├── Audit.csv │ │ ├── Elnino.csv │ │ └── Iris.csv │ ├── models │ │ ├── elnino_linearReg.xml │ │ ├── ensemble_audit_dectree.xml │ │ ├── example.randomForest.xml │ │ ├── single_iris_dectree.xml │ │ └── single_iris_mlp.xml │ └── pig │ │ └── rad.pig └── images │ ├── surus-notext.png │ └── surus-text.png └── src ├── main └── java │ └── org │ └── surus │ ├── math │ ├── AugmentedDickeyFuller.java │ ├── RPCA.java │ └── RidgeRegression.java │ └── pig │ ├── RAD.java │ └── ScorePMML.java └── test └── java └── org └── surus ├── math ├── AugmentedDickeyFuller_Test.java └── RPCA_Test.java └── pig ├── RAD_Test.java ├── ScorePMML_AuditTest.java ├── ScorePMML_ElNinoTest.java └── ScorePMML_IrisTest.java /LICENSE: -------------------------------------------------------------------------------- 1 | 2 | Apache License 3 | Version 2.0, January 2004 4 | http://www.apache.org/licenses/ 5 | 6 | TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION 7 | 8 | 1. Definitions. 9 | 10 | "License" shall mean the terms and conditions for use, reproduction, 11 | and distribution as defined by Sections 1 through 9 of this document. 12 | 13 | "Licensor" shall mean the copyright owner or entity authorized by 14 | the copyright owner that is granting the License. 15 | 16 | "Legal Entity" shall mean the union of the acting entity and all 17 | other entities that control, are controlled by, or are under common 18 | control with that entity. For the purposes of this definition, 19 | "control" means (i) the power, direct or indirect, to cause the 20 | direction or management of such entity, whether by contract or 21 | otherwise, or (ii) ownership of fifty percent (50%) or more of the 22 | outstanding shares, or (iii) beneficial ownership of such entity. 23 | 24 | "You" (or "Your") shall mean an individual or Legal Entity 25 | exercising permissions granted by this License. 26 | 27 | "Source" form shall mean the preferred form for making modifications, 28 | including but not limited to software source code, documentation 29 | source, and configuration files. 30 | 31 | "Object" form shall mean any form resulting from mechanical 32 | transformation or translation of a Source form, including but 33 | not limited to compiled object code, generated documentation, 34 | and conversions to other media types. 35 | 36 | "Work" shall mean the work of authorship, whether in Source or 37 | Object form, made available under the License, as indicated by a 38 | copyright notice that is included in or attached to the work 39 | (an example is provided in the Appendix below). 40 | 41 | "Derivative Works" shall mean any work, whether in Source or Object 42 | form, that is based on (or derived from) the Work and for which the 43 | editorial revisions, annotations, elaborations, or other modifications 44 | represent, as a whole, an original work of authorship. For the purposes 45 | of this License, Derivative Works shall not include works that remain 46 | separable from, or merely link (or bind by name) to the interfaces of, 47 | the Work and Derivative Works thereof. 48 | 49 | "Contribution" shall mean any work of authorship, including 50 | the original version of the Work and any modifications or additions 51 | to that Work or Derivative Works thereof, that is intentionally 52 | submitted to Licensor for inclusion in the Work by the copyright owner 53 | or by an individual or Legal Entity authorized to submit on behalf of 54 | the copyright owner. For the purposes of this definition, "submitted" 55 | means any form of electronic, verbal, or written communication sent 56 | to the Licensor or its representatives, including but not limited to 57 | communication on electronic mailing lists, source code control systems, 58 | and issue tracking systems that are managed by, or on behalf of, the 59 | Licensor for the purpose of discussing and improving the Work, but 60 | excluding communication that is conspicuously marked or otherwise 61 | designated in writing by the copyright owner as "Not a Contribution." 62 | 63 | "Contributor" shall mean Licensor and any individual or Legal Entity 64 | on behalf of whom a Contribution has been received by Licensor and 65 | subsequently incorporated within the Work. 66 | 67 | 2. Grant of Copyright License. Subject to the terms and conditions of 68 | this License, each Contributor hereby grants to You a perpetual, 69 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 70 | copyright license to reproduce, prepare Derivative Works of, 71 | publicly display, publicly perform, sublicense, and distribute the 72 | Work and such Derivative Works in Source or Object form. 73 | 74 | 3. Grant of Patent License. Subject to the terms and conditions of 75 | this License, each Contributor hereby grants to You a perpetual, 76 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 77 | (except as stated in this section) patent license to make, have made, 78 | use, offer to sell, sell, import, and otherwise transfer the Work, 79 | where such license applies only to those patent claims licensable 80 | by such Contributor that are necessarily infringed by their 81 | Contribution(s) alone or by combination of their Contribution(s) 82 | with the Work to which such Contribution(s) was submitted. If You 83 | institute patent litigation against any entity (including a 84 | cross-claim or counterclaim in a lawsuit) alleging that the Work 85 | or a Contribution incorporated within the Work constitutes direct 86 | or contributory patent infringement, then any patent licenses 87 | granted to You under this License for that Work shall terminate 88 | as of the date such litigation is filed. 89 | 90 | 4. Redistribution. You may reproduce and distribute copies of the 91 | Work or Derivative Works thereof in any medium, with or without 92 | modifications, and in Source or Object form, provided that You 93 | meet the following conditions: 94 | 95 | (a) You must give any other recipients of the Work or 96 | Derivative Works a copy of this License; and 97 | 98 | (b) You must cause any modified files to carry prominent notices 99 | stating that You changed the files; and 100 | 101 | (c) You must retain, in the Source form of any Derivative Works 102 | that You distribute, all copyright, patent, trademark, and 103 | attribution notices from the Source form of the Work, 104 | excluding those notices that do not pertain to any part of 105 | the Derivative Works; and 106 | 107 | (d) If the Work includes a "NOTICE" text file as part of its 108 | distribution, then any Derivative Works that You distribute must 109 | include a readable copy of the attribution notices contained 110 | within such NOTICE file, excluding those notices that do not 111 | pertain to any part of the Derivative Works, in at least one 112 | of the following places: within a NOTICE text file distributed 113 | as part of the Derivative Works; within the Source form or 114 | documentation, if provided along with the Derivative Works; or, 115 | within a display generated by the Derivative Works, if and 116 | wherever such third-party notices normally appear. The contents 117 | of the NOTICE file are for informational purposes only and 118 | do not modify the License. You may add Your own attribution 119 | notices within Derivative Works that You distribute, alongside 120 | or as an addendum to the NOTICE text from the Work, provided 121 | that such additional attribution notices cannot be construed 122 | as modifying the License. 123 | 124 | You may add Your own copyright statement to Your modifications and 125 | may provide additional or different license terms and conditions 126 | for use, reproduction, or distribution of Your modifications, or 127 | for any such Derivative Works as a whole, provided Your use, 128 | reproduction, and distribution of the Work otherwise complies with 129 | the conditions stated in this License. 130 | 131 | 5. Submission of Contributions. Unless You explicitly state otherwise, 132 | any Contribution intentionally submitted for inclusion in the Work 133 | by You to the Licensor shall be under the terms and conditions of 134 | this License, without any additional terms or conditions. 135 | Notwithstanding the above, nothing herein shall supersede or modify 136 | the terms of any separate license agreement you may have executed 137 | with Licensor regarding such Contributions. 138 | 139 | 6. Trademarks. This License does not grant permission to use the trade 140 | names, trademarks, service marks, or product names of the Licensor, 141 | except as required for reasonable and customary use in describing the 142 | origin of the Work and reproducing the content of the NOTICE file. 143 | 144 | 7. Disclaimer of Warranty. Unless required by applicable law or 145 | agreed to in writing, Licensor provides the Work (and each 146 | Contributor provides its Contributions) on an "AS IS" BASIS, 147 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or 148 | implied, including, without limitation, any warranties or conditions 149 | of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A 150 | PARTICULAR PURPOSE. You are solely responsible for determining the 151 | appropriateness of using or redistributing the Work and assume any 152 | risks associated with Your exercise of permissions under this License. 153 | 154 | 8. Limitation of Liability. In no event and under no legal theory, 155 | whether in tort (including negligence), contract, or otherwise, 156 | unless required by applicable law (such as deliberate and grossly 157 | negligent acts) or agreed to in writing, shall any Contributor be 158 | liable to You for damages, including any direct, indirect, special, 159 | incidental, or consequential damages of any character arising as a 160 | result of this License or out of the use or inability to use the 161 | Work (including but not limited to damages for loss of goodwill, 162 | work stoppage, computer failure or malfunction, or any and all 163 | other commercial damages or losses), even if such Contributor 164 | has been advised of the possibility of such damages. 165 | 166 | 9. Accepting Warranty or Additional Liability. While redistributing 167 | the Work or Derivative Works thereof, You may choose to offer, 168 | and charge a fee for, acceptance of support, warranty, indemnity, 169 | or other liability obligations and/or rights consistent with this 170 | License. However, in accepting such obligations, You may act only 171 | on Your own behalf and on Your sole responsibility, not on behalf 172 | of any other Contributor, and only if You agree to indemnify, 173 | defend, and hold each Contributor harmless for any liability 174 | incurred by, or claims asserted against, such Contributor by reason 175 | of your accepting any such warranty or additional liability. 176 | 177 | END OF TERMS AND CONDITIONS 178 | 179 | APPENDIX: How to apply the Apache License to your work. 180 | 181 | To apply the Apache License to your work, attach the following 182 | boilerplate notice, with the fields enclosed by brackets "[]" 183 | replaced with your own identifying information. (Don't include 184 | the brackets!) The text should be enclosed in the appropriate 185 | comment syntax for the file format. We also recommend that a 186 | file or class name and description of purpose be included on the 187 | same "printed page" as the copyright notice for easier 188 | identification within third-party archives. 189 | 190 | Copyright 2014 Netflix, Inc. 191 | 192 | Licensed under the Apache License, Version 2.0 (the "License"); 193 | you may not use this file except in compliance with the License. 194 | You may obtain a copy of the License at 195 | 196 | http://www.apache.org/licenses/LICENSE-2.0 197 | 198 | Unless required by applicable law or agreed to in writing, software 199 | distributed under the License is distributed on an "AS IS" BASIS, 200 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 201 | See the License for the specific language governing permissions and 202 | limitations under the License. 203 | 204 | =============================================================================== 205 | 206 | Third-party dependencies that are released under the BSD 3-Clause License: 207 | * JPMML/jpmml (v.1.0.19) - https://github.com/jpmml/jpmml/tree/1.0.19 208 | 209 | Copyright (c) 2009, University of Tartu 210 | All rights reserved. 211 | 212 | Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: 213 | 214 | * Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. 215 | * Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. 216 | * Neither the name of the University of Tartu nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. 217 | 218 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 219 | 220 | =============================================================================== 221 | 222 | Third-party dependencies that are released under the Apache License, Version 2.0: 223 | * Commons Math - http://commons.apache.org/proper/commons-math/ 224 | * Guava - http://guava-libraries.googlecode.com 225 | * Joda-Time - http://joda-time.sourceforge.net/index.html 226 | 227 | -------------------------------------------------------------------------------- /OSSMETADATA: -------------------------------------------------------------------------------- 1 | osslifecycle=archived 2 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | [![NetflixOSS Lifecycle](https://img.shields.io/osslifecycle/Netflix/Surus.svg)]() 2 | 3 | # Surus 4 | 5 | A collection of tools for analysis in Pig and Hive. 6 | 7 | ## Description 8 | 9 | Over the next year we plan to release a handful of our internal user defined functions (UDFs) that have broad adoption across Netflix. The use 10 | cases for these functions are varied in nature (e.g. scoring predictive models, outlier detection, pattern matching, etc.) and together extend 11 | the analytical capabilities of big data. 12 | 13 | ## Functions 14 | * ScorePMML - A tool for scoring predictive models in the cloud. 15 | * Robust Anomaly Detection (RAD) - An implementation of the Robust PCA. 16 | 17 | ## Building Surus 18 | 19 | Surus is a standard Maven project. After cloning the git repository you can simply run the following command from the project root directory: 20 | 21 | mvn clean package 22 | 23 | On the first build, Maven will download all the dependencies from the internet and cache them in the local repository (`~/.m2/repository`), which 24 | can take a considerable amount of time. Subsequent builds will be faster. 25 | 26 | ## Using Surus 27 | 28 | After building Surus you will need to move it to your Hive/Pig instance and register the JAR in your environment. For those 29 | unfamiliar with this process see the [Apache Pig UDF](https://pig.apache.org/docs/r0.14.0/udf.html), 30 | and [Hive Plugin](https://cwiki.apache.org/confluence/display/Hive/HivePlugins), documentation. 31 | 32 | You can also install the anomaly detection R package trivially with this code 33 | 34 | library(devtools) 35 | install_github(repo = "Surus", username = "Netflix", subdir = "resources/R/RAD") 36 | -------------------------------------------------------------------------------- /pom.xml: -------------------------------------------------------------------------------- 1 | 2 | 4 | 6 | 4.0.0 7 | 8 | netflix 9 | surus 10 | 0.1.4 11 | jar 12 | 13 | surus 14 | http://maven.apache.org 15 | 16 | 17 | UTF-8 18 | 19 | 20 | 21 | 22 | org.jpmml 23 | pmml-evaluator 24 | 1.0.19 25 | 26 | 27 | junit 28 | junit-dep 29 | 4.8.2 30 | test 31 | 32 | 33 | org.apache.hadoop 34 | hadoop-core 35 | 1.0.3 36 | provided 37 | 38 | 39 | org.apache.pig 40 | pig 41 | 0.14.0 42 | provided 43 | 44 | 45 | 46 | 47 | 48 | org.apache.maven.plugins 49 | maven-shade-plugin 50 | 2.3 51 | 52 | 53 | package 54 | 55 | shade 56 | 57 | 58 | 59 | 60 | com.google.common.collect 61 | com.shaded.common.collect 62 | 63 | com.google.common.collect.* 64 | 65 | 66 | 67 | 68 | 69 | 70 | 71 | 72 | 73 | 74 | -------------------------------------------------------------------------------- /resources/R/RAD/DESCRIPTION: -------------------------------------------------------------------------------- 1 | Package: RAD 2 | Type: Package 3 | Title: Robust Anomaly Detection 4 | Version: 1.0 5 | Date: 2015-02-19 6 | Authors@R: person("Jeffrey", "Wong", email = "jeffreyw@netflix.com", 7 | role = c("aut", "cre")) 8 | Description: Anomaly detection in time series data. Contains fast and easy to 9 | use C++ implementation of Robust Principal Component Pursuit, which is used 10 | to decompose a time series into a smooth component, a noise component, and 11 | a sparse component of anomalies. This package will grow to serve other 12 | types of anomalies, such as anomalies in histograms and curves. 13 | License: Apache License version 2.0 14 | Imports: 15 | Rcpp (>= 0.11.2), 16 | RcppEigen, 17 | tseries, 18 | ggplot2 19 | LinkingTo: Rcpp, RcppEigen 20 | NeedsCompilation: yes 21 | -------------------------------------------------------------------------------- /resources/R/RAD/NAMESPACE: -------------------------------------------------------------------------------- 1 | # Generated by roxygen2 (4.0.2): do not edit by hand 2 | 3 | export(AnomalyDetection.rpca) 4 | export(RcppRPCA) 5 | export(RcppSVT) 6 | export(ggplot_AnomalyDetection.rpca) 7 | import(ggplot2) 8 | importFrom(Rcpp,evalCpp) 9 | importFrom(tseries,adf.test) 10 | useDynLib(RAD) 11 | -------------------------------------------------------------------------------- /resources/R/RAD/R/RcppExports.R: -------------------------------------------------------------------------------- 1 | # This file was generated by Rcpp::compileAttributes 2 | # Generator token: 10BE3573-1514-4C36-9D1C-5A225CD40393 3 | 4 | #' Singular Value Thresholding on a numeric matrix 5 | #' 6 | #' @param X numeric matrix 7 | #' @param penalty scalar to penalize singular values 8 | #' @return a list with 2 components, the singular value thresholded matrix and its thresholded singular values 9 | #' @export 10 | RcppSVT <- function(X, penalty) { 11 | .Call('RAD_RcppSVT', PACKAGE = 'RAD', X, penalty) 12 | } 13 | 14 | RcppSoftThresholdScalar <- function(x, penalty) { 15 | .Call('RAD_RcppSoftThresholdScalar', PACKAGE = 'RAD', x, penalty) 16 | } 17 | 18 | RcppSoftThresholdVector <- function(x, penalty) { 19 | .Call('RAD_RcppSoftThresholdVector', PACKAGE = 'RAD', x, penalty) 20 | } 21 | 22 | RcppSoftThresholdMatrix <- function(x, penalty) { 23 | .Call('RAD_RcppSoftThresholdMatrix', PACKAGE = 'RAD', x, penalty) 24 | } 25 | 26 | #' Robust Principal Component Pursuit 27 | #' 28 | #' @param X numeric matrix 29 | #' @param Lpenalty scalar to penalize singular values. 30 | #' A default of -1 is used as a sentinel value. Under this sentinel value 31 | #' a smart thresholding algorithm sets the value of Lpenalty, so the user does not need to. 32 | #' @param Spenalty scalar to penalize remainder matrix to find anomalous values. 33 | #' A default of -1 is used as a sentinel value. Under this sentinel value 34 | #' a smart thresholding algorithm setes the value of Spenalty, so the user does not need to. 35 | #' @return a list with 4 matrices. X is decomposed into L + S + E where L is low rank, 36 | #' S is sparse and E is the remainder matrix of noise 37 | #' @importFrom Rcpp evalCpp 38 | #' @export 39 | RcppRPCA <- function(X, Lpenalty = -1, Spenalty = -1, verbose = FALSE) { 40 | .Call('RAD_RcppRPCA', PACKAGE = 'RAD', X, Lpenalty, Spenalty, verbose) 41 | } 42 | 43 | -------------------------------------------------------------------------------- /resources/R/RAD/R/anomaly_detection.R: -------------------------------------------------------------------------------- 1 | #' Time Series Anomaly Detection 2 | #' 3 | #' Fast C++ implementation of time series anomaly detection using Robust Principal Component Pursuit 4 | #' @param X a vector representing a time series, or a data frame where columns are time series. 5 | #' The length of this vector should be divisible by frequency. 6 | #' If X is a vector it will be cast to a matrix of dimension frequency by length(X)/frequency 7 | #' @param frequency the frequency of the seasonality of X 8 | #' @param dates optional vector of dates to be used as a time index in the output 9 | #' @param autodiff boolean. If true, use the Augmented Dickey Fuller Test to determine 10 | #' if differencing is needed to make X stationary 11 | #' @param forcediff boolean. If true, always compute differences 12 | #' @param scale boolean. If true normalize the time series to zero mean and unit variance 13 | #' @param L.penalty a scalar for the amount of thresholding in determining the low rank approximation for X. 14 | #' The default values are chosen to correspond to the smart thresholding values described in Candes' 15 | #' Stable Principal Component Pursuit 16 | #' @param s.penalty a scalar for the amount of thresholding in determining the separation between noise and sparse outliers 17 | #' The default values are chosen to correspond to the smart thresholding values described in Zhou's 18 | #' Stable Principal Component Pursuit 19 | #' @param verbose boolean. If true print status updates while running optimization program 20 | #' @useDynLib RAD 21 | #' @importFrom tseries adf.test 22 | #' @details Robust Principal Component Pursuit is a matrix decomposition algorithm that seeks 23 | #' to separate a matrix X into the sum of three parts X = L + S + E. L is a low rank matrix representing 24 | #' a smooth X, S is a sparse matrix containing corrupted data, and E is noise. To convert a time series 25 | #' into the matrix X we take advantage of seasonality so that each column represents one full period, for 26 | #' example for weekly seasonality each row is a day of week and one column is one full week. 27 | #' 28 | #' While computing the low rank matrix L we take an SVD of X and soft threshold the singular values. 29 | #' This approach allows us to dampen all anomalies across the board simultaneously making the method 30 | #' robust to multiple anomalies. Most techniques such as time series regression and moving averages 31 | #' are not robust when there are two or more anomalies present. 32 | #' 33 | #' Empirical tests show that identifying anomalies is easier if X is stationary. 34 | #' The Augmented Dickey Fuller Test is used to test for stationarity - if X is not stationary 35 | #' then the time series is differenced before calling RPCP. While this test is abstracted away 36 | #' from the user differencing can be forced by setting the forcediff parameter. 37 | #' 38 | #' The thresholding values can be tuned for different applications, however we strongly 39 | #' recommend using the defaults which were proposed by Zhou. 40 | #' For more details on the choice of L.penalty and s.penalty 41 | #' please refer to Zhou's 2010 paper on Stable Principal Component Pursuit. 42 | #' 43 | #' The implementation of RPCP is done in C++ for high performance through RCpp. 44 | #' This function simply preprocesses the time series and calls RcppRPCP. 45 | #' @return 46 | #' \itemize{ 47 | #' \item X_transform. The transformation applied to the time series, 48 | #' can be the identity or could be differencing 49 | #' \item L_transform. The low rank component in the transformed space 50 | #' \item S_transform. The sparse outliers in the transformed space 51 | #' \item E_transform. The noise in the transformed space 52 | #' \item X_original. The original time series 53 | #' \item time. The time index 54 | #' \item name. The name of the time series if X was a named data frame 55 | #' } 56 | #' @references 57 | #' The following are recommended educational material: 58 | #' \itemize{ 59 | #' \item Candes' paper on RPCP \url{http://statweb.stanford.edu/~candes/papers/RobustPCA.pdf} 60 | #' \item Zhou's follow up paper on Stable PCP \url{http://arxiv.org/abs/1001.2363} 61 | #' \item Metamarkets Tech Blog on anomalies in time \url{https://metamarkets.com/2012/algorithmic-trendspotting-the-meaning-of-interesting/} 62 | #' } 63 | #' @export 64 | #' @examples 65 | #' frequency = 7 66 | #' numPeriods = 10 67 | #' ts.sinusoidal = sin((2 * pi / frequency ) * 1:(numPeriods * frequency)) 68 | #' ts = ts.sinusoidal 69 | #' ts = sin((2 * pi / frequency ) * 1:(numPeriods * frequency)) 70 | #' ts[58:60] = 100 71 | #' ggplot_AnomalyDetection.rpca(AnomalyDetection.rpca(ts)) + ggplot2::theme_grey(base_size = 25) 72 | AnomalyDetection.rpca = function(X, frequency=7, dates=NULL, 73 | autodiff = T, 74 | forcediff = F, 75 | scale = T, 76 | L.penalty = 1, 77 | s.penalty=1.4 / sqrt(max(frequency, ifelse(is.data.frame(X), nrow(X), length(X)) / frequency)), 78 | verbose=F) { 79 | if (is.vector(X) & !is.data.frame(X)) X = data.frame(y=X) 80 | time = if (is.null(dates)) 1:nrow(X) else dates 81 | 82 | #look through columns which are separate time series 83 | #transform each column vector into a matrix with nrow = observations per period 84 | #the number of columns will be equal to the number of periods 85 | rpca.ts = apply(X, 2, function(j) { 86 | j.init = j[1] 87 | useddiff = F 88 | if (forcediff) { 89 | useddiff = T 90 | j = c(0, diff(j)) 91 | } 92 | else if (autodiff) { 93 | adf = suppressWarnings(tseries::adf.test(j)) 94 | if (adf$p.value > .05) {useddiff = T; j = c(0, diff(j))} 95 | } 96 | 97 | if (scale) { 98 | j.global.mean = mean(j) 99 | j.global.sd = sd(j) 100 | j.matrix.standard.global = matrix((j - j.global.mean) / j.global.sd, nrow = frequency) 101 | j.matrix = j.matrix.standard.global 102 | } else { 103 | j.global.mean = 0 104 | j.global.sd = 1 105 | j.matrix = matrix(j, nrow = frequency) 106 | } 107 | 108 | list(rpca = RcppRPCA(j.matrix, 109 | Lpenalty = L.penalty, Spenalty = s.penalty, 110 | verbose=verbose), 111 | mean = j.global.mean, 112 | sd = j.global.sd, 113 | diff = useddiff, 114 | j.init = j.init 115 | ) 116 | }) 117 | rpca.ts.stacked = lapply(rpca.ts, function(i) { 118 | if (i$diff) { 119 | X.orig = c(i$j.init + cumsum((as.vector(i$rpca$X)) * i$sd + i$mean)) 120 | X.transform = (as.vector(i$rpca$X)) * i$sd + i$mean 121 | L.transform = (as.vector(i$rpca$L)) * i$sd + i$mean 122 | S.transform = (as.vector(i$rpca$S)) * i$sd 123 | E.transform = (as.vector(i$rpca$E)) * i$sd 124 | 125 | L.orig = cumsum(L.transform) + i$j.init 126 | X.rough = X.orig - L.orig 127 | 128 | #S.orig = cumsum(S.transform) 129 | #E.orig = X.orig - L.orig - S.orig 130 | 131 | ### 132 | # 133 | #X.rough.rpca = RcppRPCA(matrix(X.rough, nrow(i$rpca$X), ncol(i$rpca$X)), 134 | # Lpenalty = 10, 135 | # Spenalty = 2 / sqrt(10)) 136 | #S.orig = as.numeric(X.rough.rpca$S) 137 | #E.orig = X.orig - L.orig - S.orig 138 | 139 | ### 140 | S.orig = softThreshold(X.rough, 3 * (1/sqrt(2)) * sd(E.transform)) 141 | E.orig = X.orig - (L.orig) - S.orig 142 | 143 | data.frame(X.transform = X.transform, 144 | L.transform = L.transform, 145 | S.transform = S.transform, 146 | E.transform = E.transform, 147 | X.orig = X.orig, 148 | time = time)[-1,] 149 | } 150 | else { 151 | data.frame(X.transform = (as.vector(i$rpca$X)) * i$sd + i$mean, 152 | L.transform = (as.vector(i$rpca$L)) * i$sd + i$mean, 153 | S.transform = (as.vector(i$rpca$S)) * i$sd, 154 | E.transform = (as.vector(i$rpca$E)) * i$sd, 155 | X.orig = (as.vector(i$rpca$X)) * i$sd + i$mean, 156 | time = time) 157 | } 158 | }) 159 | names = unlist((mapply(function(df, name) { rep(name, nrow(df)) }, rpca.ts.stacked, names(rpca.ts)))) 160 | #build a report containing anomaly data for all the columns found in X 161 | rpca.ts.stacked = cbind(do.call('rbind', rpca.ts.stacked), name = as.vector(names)) 162 | names(rpca.ts.stacked) = c("X_transform", "L_transform", "S_transform", "E_transform", 163 | "X_original", 164 | "time", "name") 165 | 166 | return (rpca.ts.stacked) 167 | } 168 | 169 | #' ggplot for AnomalyDetection 170 | #' 171 | #' ggplot function which shows the low rank signal in blue, the random noise in green, 172 | #' and any outliers in red. If a transformation was applied, these signals will be plotted 173 | #' in the transformed space, along with the original time series 174 | #' @param anomalyDetection output from AnomalyDetection.rpca 175 | #' @import ggplot2 176 | #' @export 177 | #' @examples 178 | #' frequency = 7 179 | #' numPeriods = 10 180 | #' ts.sinusoidal = sin((2 * pi / frequency ) * 1:(numPeriods * frequency)) 181 | #' ts = ts.sinusoidal 182 | #' ts = sin((2 * pi / frequency ) * 1:(numPeriods * frequency)) 183 | #' ts[58:60] = 100 184 | #' ggplot_AnomalyDetection.rpca(AnomalyDetection.rpca(ts)) + ggplot2::theme_grey(base_size = 25) 185 | ggplot_AnomalyDetection.rpca = function(anomalyDetection) { 186 | ggplot2::ggplot(anomalyDetection, ggplot2::aes(time, X_original)) + 187 | ggplot2::geom_line(size = 1) + 188 | ggplot2::geom_line(ggplot2::aes(y = X_transform), size = 1, color = "black", linetype = 'dashed') + 189 | ggplot2::geom_line(ggplot2::aes(y = L_transform), size = .5, color = "blue") + 190 | ggplot2::geom_line(ggplot2::aes(y = E_transform), size = .5, color = "green") + 191 | ggplot2::geom_point(data = subset(anomalyDetection, abs(S_transform) > 0), color = "red", 192 | ggplot2::aes(size = abs(S_transform))) + 193 | ggplot2::scale_size_continuous(range=c(4,6)) + 194 | ggplot2::facet_wrap(~name, scale = "free") 195 | } 196 | 197 | softThreshold = function(x, penalty) { 198 | sign(x) * pmax(abs(x) - penalty,0) 199 | } 200 | -------------------------------------------------------------------------------- /resources/R/RAD/R/anomaly_detection_ma.R: -------------------------------------------------------------------------------- 1 | AnomalyDetection.ma = function(X, frequency=7) { 2 | if (is.vector(X) & !is.data.frame(X)) X = data.frame(y=X) 3 | 4 | ma.ts = do.call('rbind', apply(X, 2, function(j) { 5 | j.matrix = matrix(j, nrow= frequency) 6 | means = apply(j.matrix[,1:(ncol(j.matrix)-1)], 1, mean) 7 | sds = apply(j.matrix[,1:(ncol(j.matrix)-1)],1,sd) 8 | upperbounds = means + 1.6*sds 9 | lowerbounds = means - 1.6*sds 10 | anomalous = t(apply(cbind(upperbounds, lowerbounds, j.matrix), 1, function(i) { 11 | i[-(1:2)] > i[1] | i[-(1:2)] < i[2] 12 | })) 13 | data.frame(X = j, 14 | time = 1:length(j), 15 | anomaly = as.vector(anomalous)) 16 | })) 17 | ma.ts = cbind(ma.ts, name = rep(names(X), each = nrow(X))) 18 | 19 | return (ma.ts) 20 | } 21 | 22 | ggplot_AnomalyDetection.ma = function(anomalyDetection) { 23 | ggplot(anomalyDetection, 24 | aes(x = time, y=X)) + 25 | geom_line(size = 1) + 26 | geom_point(data = subset(anomalyDetection, anomaly == T), color = 'red', size = 6) + 27 | facet_wrap(~name, scale = 'free') 28 | } -------------------------------------------------------------------------------- /resources/R/RAD/R/multiplot.R: -------------------------------------------------------------------------------- 1 | # Multiple plot function 2 | # 3 | # ggplot objects can be passed in ..., or to plotlist (as a list of ggplot objects) 4 | # - cols: Number of columns in layout 5 | # - layout: A matrix specifying the layout. If present, 'cols' is ignored. 6 | # 7 | # If the layout is something like matrix(c(1,2,3,3), nrow=2, byrow=TRUE), 8 | # then plot 1 will go in the upper left, 2 will go in the upper right, and 9 | # 3 will go all the way across the bottom. 10 | # 11 | multiplot <- function(..., plotlist=NULL, file, cols=1, layout=NULL) { 12 | if (!require(grid)) stop("This function requires the R package 'grid'") 13 | 14 | # Make a list from the ... arguments and plotlist 15 | plots <- c(list(...), plotlist) 16 | 17 | numPlots = length(plots) 18 | 19 | # If layout is NULL, then use 'cols' to determine layout 20 | if (is.null(layout)) { 21 | # Make the panel 22 | # ncol: Number of columns of plots 23 | # nrow: Number of rows needed, calculated from # of cols 24 | layout <- matrix(seq(1, cols * ceiling(numPlots/cols)), 25 | ncol = cols, nrow = ceiling(numPlots/cols)) 26 | } 27 | 28 | if (numPlots==1) { 29 | print(plots[[1]]) 30 | 31 | } else { 32 | # Set up the page 33 | grid.newpage() 34 | pushViewport(viewport(layout = grid.layout(nrow(layout), ncol(layout)))) 35 | 36 | # Make each plot, in the correct location 37 | for (i in 1:numPlots) { 38 | # Get the i,j matrix positions of the regions that contain this subplot 39 | matchidx <- as.data.frame(which(layout == i, arr.ind = TRUE)) 40 | 41 | print(plots[[i]], vp = viewport(layout.pos.row = matchidx$row, 42 | layout.pos.col = matchidx$col)) 43 | } 44 | } 45 | } -------------------------------------------------------------------------------- /resources/R/RAD/man/AnomalyDetection.rpca.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2 (4.0.2): do not edit by hand 2 | \name{AnomalyDetection.rpca} 3 | \alias{AnomalyDetection.rpca} 4 | \title{Time Series Anomaly Detection} 5 | \usage{ 6 | AnomalyDetection.rpca(X, frequency = 7, dates = NULL, autodiff = T, 7 | forcediff = F, scale = T, L.penalty = 1, 8 | s.penalty = 1.4/sqrt(max(frequency, ifelse(is.data.frame(X), nrow(X), 9 | length(X))/frequency)), verbose = F) 10 | } 11 | \arguments{ 12 | \item{X}{a vector representing a time series, or a data frame where columns are time series. 13 | The length of this vector should be divisible by frequency. 14 | If X is a vector it will be cast to a matrix of dimension frequency by length(X)/frequency} 15 | 16 | \item{frequency}{the frequency of the seasonality of X} 17 | 18 | \item{dates}{optional vector of dates to be used as a time index in the output} 19 | 20 | \item{autodiff}{boolean. If true, use the Augmented Dickey Fuller Test to determine 21 | if differencing is needed to make X stationary} 22 | 23 | \item{forcediff}{boolean. If true, always compute differences} 24 | 25 | \item{scale}{boolean. If true normalize the time series to zero mean and unit variance} 26 | 27 | \item{L.penalty}{a scalar for the amount of thresholding in determining the low rank approximation for X. 28 | The default values are chosen to correspond to the smart thresholding values described in Candes' 29 | Stable Principal Component Pursuit} 30 | 31 | \item{s.penalty}{a scalar for the amount of thresholding in determining the separation between noise and sparse outliers 32 | The default values are chosen to correspond to the smart thresholding values described in Zhou's 33 | Stable Principal Component Pursuit} 34 | 35 | \item{verbose}{boolean. If true print status updates while running optimization program} 36 | } 37 | \value{ 38 | \itemize{ 39 | \item X_transform. The transformation applied to the time series, 40 | can be the identity or could be differencing 41 | \item L_transform. The low rank component in the transformed space 42 | \item S_transform. The sparse outliers in the transformed space 43 | \item E_transform. The noise in the transformed space 44 | \item X_original. The original time series 45 | \item time. The time index 46 | \item name. The name of the time series if X was a named data frame 47 | } 48 | } 49 | \description{ 50 | Fast C++ implementation of time series anomaly detection using Robust Principal Component Pursuit 51 | } 52 | \details{ 53 | Robust Principal Component Pursuit is a matrix decomposition algorithm that seeks 54 | to separate a matrix X into the sum of three parts X = L + S + E. L is a low rank matrix representing 55 | a smooth X, S is a sparse matrix containing corrupted data, and E is noise. To convert a time series 56 | into the matrix X we take advantage of seasonality so that each column represents one full period, for 57 | example for weekly seasonality each row is a day of week and one column is one full week. 58 | 59 | While computing the low rank matrix L we take an SVD of X and soft threshold the singular values. 60 | This approach allows us to dampen all anomalies across the board simultaneously making the method 61 | robust to multiple anomalies. Most techniques such as time series regression and moving averages 62 | are not robust when there are two or more anomalies present. 63 | 64 | Empirical tests show that identifying anomalies is easier if X is stationary. 65 | The Augmented Dickey Fuller Test is used to test for stationarity - if X is not stationary 66 | then the time series is differenced before calling RPCP. While this test is abstracted away 67 | from the user differencing can be forced by setting the forcediff parameter. 68 | 69 | The thresholding values can be tuned for different applications, however we strongly 70 | recommend using the defaults which were proposed by Zhou. 71 | For more details on the choice of L.penalty and s.penalty 72 | please refer to Zhou's 2010 paper on Stable Principal Component Pursuit. 73 | 74 | The implementation of RPCP is done in C++ for high performance through RCpp. 75 | This function simply preprocesses the time series and calls RcppRPCP. 76 | } 77 | \examples{ 78 | frequency = 7 79 | numPeriods = 10 80 | ts.sinusoidal = sin((2 * pi / frequency ) * 1:(numPeriods * frequency)) 81 | ts = ts.sinusoidal 82 | ts = sin((2 * pi / frequency ) * 1:(numPeriods * frequency)) 83 | ts[58:60] = 100 84 | ggplot_AnomalyDetection.rpca(AnomalyDetection.rpca(ts)) + ggplot2::theme_grey(base_size = 25) 85 | } 86 | \references{ 87 | The following are recommended educational material: 88 | \itemize{ 89 | \item Candes' paper on RPCP \url{http://statweb.stanford.edu/~candes/papers/RobustPCA.pdf} 90 | \item Zhou's follow up paper on Stable PCP \url{http://arxiv.org/abs/1001.2363} 91 | \item Metamarkets Tech Blog on anomalies in time \url{https://metamarkets.com/2012/algorithmic-trendspotting-the-meaning-of-interesting/} 92 | } 93 | } 94 | 95 | -------------------------------------------------------------------------------- /resources/R/RAD/man/RcppRPCA.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2 (4.0.2): do not edit by hand 2 | \name{RcppRPCA} 3 | \alias{RcppRPCA} 4 | \title{Robust Principal Component Pursuit} 5 | \usage{ 6 | RcppRPCA(X, Lpenalty = -1, Spenalty = -1, verbose = FALSE) 7 | } 8 | \arguments{ 9 | \item{X}{numeric matrix} 10 | 11 | \item{Lpenalty}{scalar to penalize singular values. 12 | A default of -1 is used as a sentinel value. Under this sentinel value 13 | a smart thresholding algorithm sets the value of Lpenalty, so the user does not need to.} 14 | 15 | \item{Spenalty}{scalar to penalize remainder matrix to find anomalous values. 16 | A default of -1 is used as a sentinel value. Under this sentinel value 17 | a smart thresholding algorithm setes the value of Spenalty, so the user does not need to.} 18 | } 19 | \value{ 20 | a list with 4 matrices. X is decomposed into L + S + E where L is low rank, 21 | S is sparse and E is the remainder matrix of noise 22 | } 23 | \description{ 24 | Robust Principal Component Pursuit 25 | } 26 | 27 | -------------------------------------------------------------------------------- /resources/R/RAD/man/RcppSVT.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2 (4.0.2): do not edit by hand 2 | \name{RcppSVT} 3 | \alias{RcppSVT} 4 | \title{Singular Value Thresholding on a numeric matrix} 5 | \usage{ 6 | RcppSVT(X, penalty) 7 | } 8 | \arguments{ 9 | \item{X}{numeric matrix} 10 | 11 | \item{penalty}{scalar to penalize singular values} 12 | } 13 | \value{ 14 | a list with 2 components, the singular value thresholded matrix and its thresholded singular values 15 | } 16 | \description{ 17 | Singular Value Thresholding on a numeric matrix 18 | } 19 | 20 | -------------------------------------------------------------------------------- /resources/R/RAD/man/ggplot_AnomalyDetection.rpca.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2 (4.0.2): do not edit by hand 2 | \name{ggplot_AnomalyDetection.rpca} 3 | \alias{ggplot_AnomalyDetection.rpca} 4 | \title{ggplot for AnomalyDetection} 5 | \usage{ 6 | ggplot_AnomalyDetection.rpca(anomalyDetection) 7 | } 8 | \arguments{ 9 | \item{anomalyDetection}{output from AnomalyDetection.rpca} 10 | } 11 | \description{ 12 | ggplot function which shows the low rank signal in blue, the random noise in green, 13 | and any outliers in red. If a transformation was applied, these signals will be plotted 14 | in the transformed space, along with the original time series 15 | } 16 | \examples{ 17 | frequency = 7 18 | numPeriods = 10 19 | ts.sinusoidal = sin((2 * pi / frequency ) * 1:(numPeriods * frequency)) 20 | ts = ts.sinusoidal 21 | ts = sin((2 * pi / frequency ) * 1:(numPeriods * frequency)) 22 | ts[58:60] = 100 23 | ggplot_AnomalyDetection.rpca(AnomalyDetection.rpca(ts)) + ggplot2::theme_grey(base_size = 25) 24 | } 25 | 26 | -------------------------------------------------------------------------------- /resources/R/RAD/src/RcppExports.cpp: -------------------------------------------------------------------------------- 1 | // This file was generated by Rcpp::compileAttributes 2 | // Generator token: 10BE3573-1514-4C36-9D1C-5A225CD40393 3 | 4 | #include 5 | #include 6 | 7 | using namespace Rcpp; 8 | 9 | // RcppSVT 10 | List RcppSVT(const Eigen::MatrixXd& X, double penalty); 11 | RcppExport SEXP RAD_RcppSVT(SEXP XSEXP, SEXP penaltySEXP) { 12 | BEGIN_RCPP 13 | SEXP __sexp_result; 14 | { 15 | Rcpp::RNGScope __rngScope; 16 | Rcpp::traits::input_parameter< const Eigen::MatrixXd& >::type X(XSEXP ); 17 | Rcpp::traits::input_parameter< double >::type penalty(penaltySEXP ); 18 | List __result = RcppSVT(X, penalty); 19 | PROTECT(__sexp_result = Rcpp::wrap(__result)); 20 | } 21 | UNPROTECT(1); 22 | return __sexp_result; 23 | END_RCPP 24 | } 25 | // RcppSoftThresholdScalar 26 | double RcppSoftThresholdScalar(double x, double penalty); 27 | RcppExport SEXP RAD_RcppSoftThresholdScalar(SEXP xSEXP, SEXP penaltySEXP) { 28 | BEGIN_RCPP 29 | SEXP __sexp_result; 30 | { 31 | Rcpp::RNGScope __rngScope; 32 | Rcpp::traits::input_parameter< double >::type x(xSEXP ); 33 | Rcpp::traits::input_parameter< double >::type penalty(penaltySEXP ); 34 | double __result = RcppSoftThresholdScalar(x, penalty); 35 | PROTECT(__sexp_result = Rcpp::wrap(__result)); 36 | } 37 | UNPROTECT(1); 38 | return __sexp_result; 39 | END_RCPP 40 | } 41 | // RcppSoftThresholdVector 42 | Eigen::ArrayXd RcppSoftThresholdVector(const Eigen::ArrayXd& x, double penalty); 43 | RcppExport SEXP RAD_RcppSoftThresholdVector(SEXP xSEXP, SEXP penaltySEXP) { 44 | BEGIN_RCPP 45 | SEXP __sexp_result; 46 | { 47 | Rcpp::RNGScope __rngScope; 48 | Rcpp::traits::input_parameter< const Eigen::ArrayXd& >::type x(xSEXP ); 49 | Rcpp::traits::input_parameter< double >::type penalty(penaltySEXP ); 50 | Eigen::ArrayXd __result = RcppSoftThresholdVector(x, penalty); 51 | PROTECT(__sexp_result = Rcpp::wrap(__result)); 52 | } 53 | UNPROTECT(1); 54 | return __sexp_result; 55 | END_RCPP 56 | } 57 | // RcppSoftThresholdMatrix 58 | Eigen::MatrixXd RcppSoftThresholdMatrix(const Eigen::MatrixXd& x, double penalty); 59 | RcppExport SEXP RAD_RcppSoftThresholdMatrix(SEXP xSEXP, SEXP penaltySEXP) { 60 | BEGIN_RCPP 61 | SEXP __sexp_result; 62 | { 63 | Rcpp::RNGScope __rngScope; 64 | Rcpp::traits::input_parameter< const Eigen::MatrixXd& >::type x(xSEXP ); 65 | Rcpp::traits::input_parameter< double >::type penalty(penaltySEXP ); 66 | Eigen::MatrixXd __result = RcppSoftThresholdMatrix(x, penalty); 67 | PROTECT(__sexp_result = Rcpp::wrap(__result)); 68 | } 69 | UNPROTECT(1); 70 | return __sexp_result; 71 | END_RCPP 72 | } 73 | // RcppRPCA 74 | List RcppRPCA(Eigen::MatrixXd X, double Lpenalty = -1, double Spenalty = -1, bool verbose = false); 75 | RcppExport SEXP RAD_RcppRPCA(SEXP XSEXP, SEXP LpenaltySEXP, SEXP SpenaltySEXP, SEXP verboseSEXP) { 76 | BEGIN_RCPP 77 | SEXP __sexp_result; 78 | { 79 | Rcpp::RNGScope __rngScope; 80 | Rcpp::traits::input_parameter< Eigen::MatrixXd >::type X(XSEXP ); 81 | Rcpp::traits::input_parameter< double >::type Lpenalty(LpenaltySEXP ); 82 | Rcpp::traits::input_parameter< double >::type Spenalty(SpenaltySEXP ); 83 | Rcpp::traits::input_parameter< bool >::type verbose(verboseSEXP ); 84 | List __result = RcppRPCA(X, Lpenalty, Spenalty, verbose); 85 | PROTECT(__sexp_result = Rcpp::wrap(__result)); 86 | } 87 | UNPROTECT(1); 88 | return __sexp_result; 89 | END_RCPP 90 | } 91 | -------------------------------------------------------------------------------- /resources/R/RAD/src/anomaly_detection.cpp: -------------------------------------------------------------------------------- 1 | // [[Rcpp::depends(RcppEigen)]] 2 | 3 | #include 4 | #include 5 | 6 | using namespace Rcpp; 7 | 8 | // inverts the singular values 9 | // takes advantage of the fact that singular values are never negative 10 | inline Eigen::ArrayXd Dsoft(const Eigen::ArrayXd& d, double penalty) { 11 | Eigen::ArrayXd di(d.size()); 12 | for (int j = 0; j < d.size(); ++j) { 13 | double penalized = d[j] - penalty; 14 | if (penalized < 0) { 15 | di[j] = 0; 16 | } else { 17 | di[j] = penalized; 18 | } 19 | } 20 | return di; 21 | } 22 | 23 | //' Singular Value Thresholding on a numeric matrix 24 | //' 25 | //' @param X numeric matrix 26 | //' @param penalty scalar to penalize singular values 27 | //' @return a list with 2 components, the singular value thresholded matrix and its thresholded singular values 28 | //' @export 29 | // [[Rcpp::export]] 30 | List RcppSVT(const Eigen::MatrixXd& X, double penalty) { 31 | const Eigen::JacobiSVD UDV(X.jacobiSvd(Eigen::ComputeThinU|Eigen::ComputeThinV)); 32 | const Eigen::ArrayXd Ds(Dsoft(UDV.singularValues(), penalty)); 33 | const Eigen::MatrixXd S(UDV.matrixU() * Ds.matrix().asDiagonal() * UDV.matrixV().adjoint()); 34 | 35 | return List::create(Named("Xhat") = S, 36 | Named("d.thresholded") = Ds); 37 | } 38 | 39 | // [[Rcpp::export]] 40 | double RcppSoftThresholdScalar(double x, double penalty) { 41 | //sign(x) * pmax(abs(x) - penalty,0) 42 | double penalized = std::abs(x) - penalty; 43 | if (penalized < 0) return 0; 44 | if (x > 0) return penalized; 45 | return -penalized; 46 | } 47 | 48 | // [[Rcpp::export]] 49 | Eigen::ArrayXd RcppSoftThresholdVector(const Eigen::ArrayXd& x, double penalty) { 50 | int n = x.size(); 51 | Eigen::ArrayXd out(n); 52 | for (int i = 0; i < n; i++) { 53 | out[i] = RcppSoftThresholdScalar(x[i], penalty); 54 | } 55 | return out; 56 | } 57 | 58 | // [[Rcpp::export]] 59 | Eigen::MatrixXd RcppSoftThresholdMatrix(const Eigen::MatrixXd& x, double penalty) { 60 | int m = x.rows(); 61 | int n = x.cols(); 62 | Eigen::MatrixXd out(m,n); 63 | for (int i = 0; i < m; i++) { 64 | for (int j = 0; j < n; j++) { 65 | out(i,j) = RcppSoftThresholdScalar(x(i,j), penalty); 66 | } 67 | } 68 | return out; 69 | } 70 | 71 | double median_rcpp(NumericVector x) { 72 | NumericVector y = clone(x); 73 | int n, half; 74 | double y1, y2; 75 | n = y.size(); 76 | half = n / 2; 77 | if(n % 2 == 1) { 78 | // median for odd length vector 79 | std::nth_element(y.begin(), y.begin()+half, y.end()); 80 | return y[half]; 81 | } else { 82 | // median for even length vector 83 | std::nth_element(y.begin(), y.begin()+half, y.end()); 84 | y1 = y[half]; 85 | std::nth_element(y.begin(), y.begin()+half-1, y.begin()+half); 86 | y2 = y[half-1]; 87 | return (y1 + y2) / 2.0; 88 | } 89 | } 90 | 91 | double mad_rcpp(NumericVector x, double scale_factor = 1.4826) { 92 | return median_rcpp(abs(x - median_rcpp(x))) * scale_factor; 93 | } 94 | 95 | double getDynamicMu(Eigen::MatrixXd E) { 96 | int m = E.rows(); 97 | int n = E.cols(); 98 | 99 | NumericVector Evec = wrap(E.array()); 100 | double E_sd = sd(Evec); 101 | double mu = 0; 102 | if (m > n) { 103 | mu = E_sd * sqrt(2 * m); 104 | } else { 105 | mu = E_sd * sqrt(2 * n); 106 | } 107 | if (mu < .01) return .01; 108 | return mu; 109 | } 110 | 111 | List getL(Eigen::MatrixXd X, Eigen::MatrixXd S, double mu, double L_penalty) { 112 | double L_penalty2 = L_penalty * mu; 113 | 114 | const Eigen::MatrixXd diff(X - S); 115 | 116 | List L = RcppSVT(diff, L_penalty2); 117 | double L_nuclearnorm = as(L[1]).sum(); 118 | return List::create(L[0], L_penalty2 * L_nuclearnorm); 119 | } 120 | 121 | List getS(Eigen::MatrixXd X, Eigen::MatrixXd L, double mu, double s_penalty) { 122 | double s_penalty2 = s_penalty * mu; 123 | 124 | const Eigen::MatrixXd diff(X - L); 125 | 126 | Eigen::MatrixXd S = RcppSoftThresholdMatrix(diff, s_penalty2); 127 | double S_l1norm = S.lpNorm<1>(); 128 | return List::create(S, s_penalty2 * S_l1norm); 129 | } 130 | 131 | List getE(Eigen::MatrixXd X, Eigen::MatrixXd L, Eigen::MatrixXd S) { 132 | const Eigen::MatrixXd E(X - L - S); 133 | return List::create(E, E.squaredNorm()); 134 | } 135 | 136 | double objective(double L, double S, double E) { 137 | return (.5*E) + L + S; 138 | } 139 | 140 | //' Robust Principal Component Pursuit 141 | //' 142 | //' @param X numeric matrix 143 | //' @param Lpenalty scalar to penalize singular values. 144 | //' A default of -1 is used as a sentinel value. Under this sentinel value 145 | //' a smart thresholding algorithm sets the value of Lpenalty, so the user does not need to. 146 | //' @param Spenalty scalar to penalize remainder matrix to find anomalous values. 147 | //' A default of -1 is used as a sentinel value. Under this sentinel value 148 | //' a smart thresholding algorithm setes the value of Spenalty, so the user does not need to. 149 | //' @return a list with 4 matrices. X is decomposed into L + S + E where L is low rank, 150 | //' S is sparse and E is the remainder matrix of noise 151 | //' @importFrom Rcpp evalCpp 152 | //' @export 153 | // [[Rcpp::export]] 154 | List RcppRPCA(Eigen::MatrixXd X, double Lpenalty = -1, double Spenalty = -1, bool verbose = false) { 155 | int m = X.rows(); 156 | int n = X.cols(); 157 | 158 | if (Lpenalty == -1) { 159 | Lpenalty = 1; 160 | } 161 | if (Spenalty == -1) { 162 | if (m > n) { 163 | Spenalty = 1.4 / sqrt(m); 164 | } else { 165 | Spenalty = 1.4 / sqrt(n); 166 | } 167 | } 168 | 169 | int iter = 0; 170 | int maxIter = 1000; 171 | bool converged = false; 172 | double obj_prev = 0.5 * X.squaredNorm(); 173 | double tol = 1e-8 * obj_prev; 174 | double diff = 2 * tol; 175 | double mu = m*n / (4*X.lpNorm<1>()); 176 | 177 | double obj; 178 | Eigen::MatrixXd L_matrix = Eigen::MatrixXd::Zero(m,n); 179 | Eigen::MatrixXd S_matrix = Eigen::MatrixXd::Zero(m,n); 180 | Eigen::MatrixXd E_matrix = Eigen::MatrixXd::Zero(m,n); 181 | while (iter < maxIter & diff > tol) { 182 | List S = getS(X, L_matrix, mu, Spenalty); 183 | S_matrix = S[0]; 184 | List L = getL(X, S_matrix, mu, Lpenalty); 185 | L_matrix = L[0]; 186 | List E = getE(X, L_matrix, S_matrix); 187 | E_matrix = E[0]; 188 | 189 | obj = objective(as(L[1]), as(S[1]), as(E[1])); 190 | if (verbose) { 191 | Rcout << "Objective function: " << obj_prev << " on previous iteration " << iter << std::endl; 192 | Rcout << "Objective function: " << obj << " on iteration " << iter-1 << std::endl; 193 | } 194 | if (verbose) 195 | diff = std::abs(obj_prev - obj); 196 | obj_prev = obj; 197 | mu = getDynamicMu(E_matrix); 198 | iter++; 199 | if (diff < tol) converged = true; 200 | } 201 | if (verbose) { 202 | if (!converged) Rcout << "Failed to converge within " << maxIter << "iterations" << std::endl; 203 | if (converged) Rcout << "Converged within " << iter << " iterations" << std::endl; 204 | } 205 | return List::create(Named("X") = X, Named("L") = L_matrix, Named("S") = S_matrix, Named("E") = E_matrix); 206 | } 207 | -------------------------------------------------------------------------------- /resources/R/RAD/tests/.DS_Store: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Netflix/Surus/05659af43697f6dae5743fd0a1931c210757e1e7/resources/R/RAD/tests/.DS_Store -------------------------------------------------------------------------------- /resources/R/RAD/tests/test1.Rmd: -------------------------------------------------------------------------------- 1 | --- 2 | output: 3 | pdf_document: 4 | fig_width: 9 5 | --- 6 | Robust Anomaly Detection Test Suite 1: Single Anomalous Periods 7 | -------------------------------------------------------- 8 | 9 | The following is a comparison of Robust Anomaly Detection vs a moving average with standard deviation. A series of graphs are presented where red dots will flag the anomaly. The graphs are always arranged as follows: 10 | 11 | - Top left = moving average with standard deviation on raw data 12 | 13 | - Bottom left = moving average with standard deviation on day to day changes (differenced) data 14 | 15 | - Top right = Robust Anomaly Detection on raw data 16 | 17 | - Bottom right = Robust Anomaly Detection on day to day changes 18 | 19 | ```{r global_options, include=FALSE, echo=FALSE} 20 | library(knitr) 21 | opts_chunk$set(echo=FALSE) 22 | ``` 23 | 24 | ```{r, include=FALSE} 25 | require(RAD) 26 | require(ggplot2) 27 | source("../R/multiplot.R") 28 | source("../R/anomaly_detection_ma.R") 29 | ``` 30 | 31 | Setup a 10 week long signal with weekly periodicity. 32 | There are no trends or noise. 33 | An anomaly occurs on days 58, 59, and 60. 34 | Both moving average and RPCA catch the anomaly 35 | ```{r} 36 | frequency = 7 37 | numPeriods = 10 38 | ts.sinusoidal = sin((2 * pi / frequency ) * 1:(numPeriods * frequency)) 39 | 40 | #Periodic data, two outliers next to each other 41 | #Both algorithms detect outlier 42 | ts = ts.sinusoidal 43 | ts = sin((2 * pi / frequency ) * 1:(numPeriods * frequency)) 44 | ts[58:60] = 100 45 | p1 = ggplot_AnomalyDetection.ma(AnomalyDetection.ma(ts)) + theme_grey(base_size = 20) 46 | p2 = ggplot_AnomalyDetection.rpca(AnomalyDetection.rpca(ts, autodiff = F)) + theme_grey(base_size = 20) 47 | p3 = ggplot_AnomalyDetection.ma(AnomalyDetection.ma(c(0, diff(ts)))) + theme_grey(base_size = 20) 48 | p4 = ggplot_AnomalyDetection.rpca(AnomalyDetection.rpca(ts, autodiff = T)) + theme_grey(base_size = 20) 49 | multiplot(p1,p3,p2,p4,cols = 2) 50 | ``` 51 | 52 | Setup a 10 week long signal with weekly periodicity. 53 | There is a consistent increasing linear trend. 54 | An anomaly occurs on on days 69 and 70 at the end of the window. 55 | Both moving average and RPCA catch the anomaly, although the 56 | moving average has extra false positives 57 | ```{r} 58 | ts = ts.sinusoidal + 1:length(ts.sinusoidal) + rnorm(70) 59 | ts[69:70] = 100 60 | p1 = ggplot_AnomalyDetection.ma(AnomalyDetection.ma(ts)) + theme_grey(base_size=25) 61 | p2 = ggplot_AnomalyDetection.rpca(AnomalyDetection.rpca(ts, autodiff=F)) + theme_grey(base_size = 20) 62 | p3 = ggplot_AnomalyDetection.ma(AnomalyDetection.ma(c(0, diff(ts)))) + theme_grey(base_size = 20) 63 | p4 = ggplot_AnomalyDetection.rpca(AnomalyDetection.rpca(ts, autodiff=T)) + theme_grey(base_size = 20) 64 | multiplot(p1,p3,p2,p4,cols=2) 65 | ``` 66 | 67 | Setup a 10 week long signal with no periodicity. 68 | The signal is the cumulative sum of random noise. 69 | An anomaly occurs on days 69 and 70 at the end of the window. 70 | RPCA can only detect the anomaly when using differences, because 71 | the accumulation of noise is like a linear trend. 72 | The moving average cannot detect the anomaly 73 | ```{r} 74 | #Random process 75 | #RPCA cannot detect without autodiff 76 | #Average detects spurious anomaly 77 | set.seed(100) 78 | ts = cumsum(rnorm(70, 1, 1)) 79 | ts[68:70] = 100 80 | p1 = ggplot_AnomalyDetection.ma(AnomalyDetection.ma(ts)) + theme_grey(base_size = 20) 81 | p2 = ggplot_AnomalyDetection.rpca(AnomalyDetection.rpca(ts, autodiff=F)) + theme_grey(base_size = 20) 82 | p3 = ggplot_AnomalyDetection.ma(AnomalyDetection.ma(c(0, diff(ts)))) + theme_grey(base_size = 20) 83 | p4 = ggplot_AnomalyDetection.rpca(AnomalyDetection.rpca(ts, autodiff=T)) + theme_grey(base_size = 20) 84 | multiplot(p1,p3,p2,p4,cols=2) 85 | ``` 86 | 87 | Setup a 10 week long signal with no periodicity. 88 | The signal is the cumulative sum of the cumulative sum of random noise 89 | An anomaly occurs on days 50, 51, and 52. 90 | RPCA can only detect the anomaly when using differences, because 91 | the accumulation of noise is like a linear trend. 92 | The moving average cannot detect the anomaly 93 | ```{r} 94 | set.seed(100) 95 | ts = cumsum(cumsum(rnorm(70, 5, 5))) 96 | ts[50:52] = 100 97 | p1 = ggplot_AnomalyDetection.ma(AnomalyDetection.ma(ts)) + theme_grey(base_size = 20) 98 | p2 = ggplot_AnomalyDetection.rpca(AnomalyDetection.rpca(ts, autodiff=F)) + theme_grey(base_size = 20) 99 | p3 = ggplot_AnomalyDetection.ma(AnomalyDetection.ma(c(0, diff(ts)))) + theme_grey(base_size = 20) 100 | p4 = ggplot_AnomalyDetection.rpca(AnomalyDetection.rpca(ts, autodiff=T)) + theme_grey(base_size = 20) 101 | multiplot(p1,p3,p2,p4,cols=2) 102 | ``` 103 | -------------------------------------------------------------------------------- /resources/R/RAD/tests/tests2.Rmd: -------------------------------------------------------------------------------- 1 | --- 2 | output: 3 | pdf_document: 4 | fig_width: 9 5 | html_document: 6 | fig_width: 9 7 | --- 8 | Robust Anomaly Detection Test Suite 2: Multiple Anomalous Periods 9 | ======================================================== 10 | 11 | ```{r global_options, include=FALSE} 12 | library(knitr) 13 | opts_chunk$set(echo=FALSE) 14 | ``` 15 | 16 | ```{r, include=FALSE} 17 | require(RAD) 18 | require(ggplot2) 19 | source("../R/multiplot.R") 20 | source("../R/anomaly_detection_ma.R") 21 | ``` 22 | 23 | Setup a 10 week long signal with weekly periodicity. 24 | There are no trends or noise. 25 | An anomaly occurs on days 45,46,47 and again on 69 and 70. 26 | The moving average detects a spurious anomaly 27 | ```{r} 28 | frequency = 7 29 | numPeriods = 10 30 | ts.sinusoidal = sin((2 * pi / frequency ) * 1:(numPeriods * frequency)) 31 | 32 | #Periodic 33 | #good 34 | ts = ts.sinusoidal 35 | ts[69:70] = 100 36 | ts[45:47] = 100 37 | p1 = ggplot_AnomalyDetection.ma(AnomalyDetection.ma(ts)) + theme_grey(base_size = 20) 38 | p2 = ggplot_AnomalyDetection.rpca(AnomalyDetection.rpca(ts)) + theme_grey(base_size = 20) 39 | multiplot(p1,p2,cols=2) 40 | ``` 41 | 42 | Setup a 10 week long signal with weekly periodicity. 43 | There is a consistent increasing linear trend. 44 | An anomaly occurs on days 45, 46, 47, and again on 69 and 70 at the end of the window. 45 | The moving average detects a spurious anomaly 46 | ```{r} 47 | #Periodic + trend 48 | #Bad 49 | #No anomaly detected unless removing trend line 50 | ts = ts.sinusoidal + 1:length(ts.sinusoidal) 51 | ts[69:70] = 100 52 | ts[45:47] = 100 53 | p1 = ggplot_AnomalyDetection.ma(AnomalyDetection.ma(ts)) + theme_grey(base_size=25) 54 | p2 = ggplot_AnomalyDetection.rpca(AnomalyDetection.rpca(ts, autodiff=F)) + theme_grey(base_size = 20) 55 | p3 = ggplot_AnomalyDetection.ma(AnomalyDetection.ma(c(0, diff(ts)))) + theme_grey(base_size = 20) 56 | p4 = ggplot_AnomalyDetection.rpca(AnomalyDetection.rpca(ts, autodiff=T)) + theme_grey(base_size = 20) 57 | multiplot(p1,p3,p2,p4,cols=2) 58 | ``` 59 | 60 | Setup a 10 week long signal with no periodicity. 61 | The signal is the cumulative sum of random noise. 62 | An anomaly occurs on days 47,48,49 and again on 68, 69 and 70 at the end of the window. 63 | RPCA can only detect the anomaly when using differences, because 64 | the accumulation of noise is like a linear trend. 65 | The moving average detects the anomalies correctly only when using 66 | the undifferenced series, but still has many false positives 67 | ```{r} 68 | #Random process 69 | #Bad 70 | #No anomaly detected unless removing trend line 71 | #MA technique confused here 72 | set.seed(100) 73 | ts = cumsum(rnorm(70, 1, 4)) 74 | ts[68:70] = 100 75 | ts[47:49] = 100 76 | p1 = ggplot_AnomalyDetection.ma(AnomalyDetection.ma(ts)) + theme_grey(base_size = 20) 77 | p2 = ggplot_AnomalyDetection.rpca(AnomalyDetection.rpca(ts, autodiff=F)) + theme_grey(base_size = 20) 78 | p3 = ggplot_AnomalyDetection.ma(AnomalyDetection.ma(c(0, diff(ts)))) + theme_grey(base_size = 20) 79 | p4 = ggplot_AnomalyDetection.rpca(AnomalyDetection.rpca(ts, autodiff=T)) + theme_grey(base_size = 20) 80 | multiplot(p1,p3,p2,p4,cols=2) 81 | ``` 82 | 83 | Setup a 10 week long signal with no periodicity. 84 | The signal is the cumulative sum of the cumulative sum of random noise 85 | An anomaly occurs on days 10,11,12,13,14 and again on 50, 51, and 52. 86 | RPCA can only detect the anomaly when using differences, because 87 | the accumulation of noise is like a linear trend. 88 | Both techniques pick up spurious anomalies, but Robust AD has fewer 89 | false positives 90 | ```{r} 91 | #Sum of two random processes 92 | #Bad 93 | #No anomaly detected unless removing trend line 94 | set.seed(100) 95 | ts = cumsum(rnorm(70, 1, 1) + rnorm(70, 5, 5)) 96 | ts[50:52] = 100 97 | ts[10:14] = 100 98 | p1 = ggplot_AnomalyDetection.ma(AnomalyDetection.ma(ts)) + theme_grey(base_size = 20) 99 | p2 = ggplot_AnomalyDetection.rpca(AnomalyDetection.rpca(ts)) + theme_grey(base_size = 20) 100 | p3 = ggplot_AnomalyDetection.ma(AnomalyDetection.ma(c(0, diff(ts)))) + theme_grey(base_size = 20) 101 | p4 = ggplot_AnomalyDetection.rpca(AnomalyDetection.rpca(ts, autodiff = T)) + theme_grey(base_size = 20) 102 | multiplot(p1,p3,p2,p4,cols=2) 103 | ``` 104 | -------------------------------------------------------------------------------- /resources/examples/data/Elnino.csv: -------------------------------------------------------------------------------- 1 | buoy_day_ID,buoy,day,latitude,longitude,zon_winds,mer_winds,humidity,airtemp,s_s_temp 2 | 1,1,1,8.96,-140.32,-6.3,-6.4,83.5,27.32,27.57 3 | 2,1,2,8.95,-140.32,-5.7,-3.6,86.4,26.7,27.62 4 | 3,1,3,8.96,-140.32,-6.2,-5.8,83,27.36,27.68 5 | 4,1,4,8.96,-140.34,-6.4,-5.3,82.2,27.32,27.7 6 | 5,1,5,8.96,-140.33,-4.9,-6.2,87.3,27.09,27.85 7 | 6,1,6,8.96,-140.33,-6.3,-4.9,91.5,26.82,27.98 8 | 7,1,7,8.97,-140.32,-6.7,-3.7,94.1,26.62,28.04 9 | 8,1,8,8.96,-140.33,-6.3,-4.8,92,26.89,27.98 10 | 9,1,9,8.97,-140.33,-6.3,-4.9,86.9,27.44,28.13 11 | 10,1,10,8.97,-140.32,-4.2,-2.5,87.3,26.62,28.14 12 | 11,1,11,8.96,-140.32,-6.8,-2.4,86,27.6,28.09 13 | 12,1,12,8.96,-140.33,-7.1,-3.2,82.2,27.87,28.15 14 | 13,1,13,8.96,-140.33,-6.7,-4.7,81.3,27.75,28.19 15 | 14,2,1,4.93,-139.87,-2.9,-1.2,87.6,26.84,28.35 16 | 15,2,2,4.92,-139.86,0.4,-1.6,82.3,27.3,28.48 17 | 16,2,3,4.92,-139.87,-4,-4.6,83,28,28.61 18 | 17,2,4,4.92,-139.88,-6.3,-3.2,87.6,27.61,28.55 19 | 18,2,5,4.93,-139.87,-4.1,3.3,84.6,27.69,28.35 20 | 19,2,6,4.93,-139.87,-5.4,1.9,81.9,27.96,28.48 21 | 20,2,7,4.93,-139.86,-4.7,0.9,80.3,28.08,28.54 22 | 21,2,8,4.92,-139.87,-2.5,-0.9,84.2,27.61,28.72 23 | 22,2,9,4.93,-139.87,-5.1,2,84.6,27.42,28.66 24 | 23,2,10,4.93,-139.86,-1.3,0.7,80,27.65,28.73 25 | 24,2,11,4.93,-139.85,-5.4,-1.2,83.4,27.89,28.84 26 | 25,2,12,4.93,-139.86,-6.2,-4,82.7,27.77,28.63 27 | 26,2,13,4.93,-139.87,-4.8,2.6,82.3,27.42,28.48 28 | 27,3,1,2.00,-139.95,-0.3,3.2,86.8,27.1,27.85 29 | 28,3,2,2.00,-139.96,-1.6,1.1,83.6,27.42,28.06 30 | 29,3,3,1.99,-139.97,-3.9,-2.5,87.5,27.3,28.01 31 | 30,3,4,1.98,-139.96,-4.7,1.4,88.3,26.98,27.95 32 | 31,3,5,1.99,-139.97,-5.3,2.5,82.5,27.14,27.99 33 | 32,3,6,2.00,-139.97,-6.8,0.5,82.8,27.46,27.89 34 | 33,3,7,2.00,-139.97,-5.6,0.5,84,27.02,27.49 35 | 34,3,8,2.00,-139.96,-3.4,2,87.2,26.55,27.27 36 | 35,3,9,2.00,-139.97,-4.5,2.3,82.1,25.59,26.97 37 | 36,3,10,2.00,-139.96,-4.1,0.4,79.3,25.47,27.07 38 | 37,3,11,1.99,-139.97,-5.8,0.7,83.2,26.27,27.37 39 | 38,3,12,2.00,-139.97,-7.2,2.8,83.2,26.23,27.26 40 | 39,3,13,2.00,-139.98,-5.8,0.5,78.1,25.79,27.1 41 | 40,3,14,2.00,-139.99,-5.4,-0.4,77.8,25.91,26.85 42 | 41,4,1,-0.01,-139.94,0.7,0.3,93.1,26.54,26.42 43 | 42,4,2,-0.02,-139.93,-1.5,-0.4,92.3,26.51,26.49 44 | 43,4,3,-0.01,-139.94,-3.4,-1.4,92.3,26.53,26.42 45 | 44,4,4,-0.02,-139.94,-4.3,1.3,92.3,26.18,26.02 46 | 45,4,5,-0.02,-139.94,-3.7,0.4,92.3,25.79,25.77 47 | 46,4,6,-0.01,-139.94,-5.1,-2.5,92.3,26.03,25.7 48 | 47,4,7,-0.02,-139.94,-5.4,-2.1,88.1,25.5,25.15 49 | 48,4,8,-0.02,-139.94,-3.6,-0.6,90.4,25.04,24.82 50 | 49,4,9,-0.02,-139.94,-3.3,0.3,88.8,24.63,24.51 51 | 50,4,10,-0.02,-139.93,-3,-2.7,87.3,24.51,24.36 52 | 51,4,11,-0.02,-139.93,-4.7,-1,90.4,24.19,24.15 53 | 52,4,12,-0.02,-139.93,-5.2,0.2,87.3,24.02,24.1 54 | 53,4,13,-0.01,-139.94,-4.7,-1,88.4,24.21,24.15 55 | 54,5,1,-2.02,-139.96,1.7,1.2,87.3,27.36,28.71 56 | 55,5,2,-2.00,-139.96,-1.1,-1.9,86.9,27.28,28.43 57 | 56,5,3,-2.01,-139.97,-3.7,-2.7,84.8,27.43,28.47 58 | 57,5,4,-2.01,-139.97,-4.4,-3,81.9,27.75,28.83 59 | 58,5,5,-2.02,-139.97,-3.9,-1.4,83.6,27.91,28.92 60 | 59,5,6,-2.02,-139.96,-5.8,-3.4,88.9,27.75,28.71 61 | 60,5,7,-2.03,-139.97,-4.5,-5.2,87.7,27.08,28.27 62 | 61,5,8,-2.02,-139.97,-3.6,-4.5,83.1,26.6,27.7 63 | 62,5,9,-2.02,-139.96,-2.6,-2.3,84.4,26.41,27.27 64 | 63,5,10,-2.02,-139.96,-1.2,-5.1,88.5,25.93,27 65 | 64,5,11,-2.01,-139.96,-4,-4.3,84.4,25.54,26.77 66 | 65,5,12,-2.01,-139.96,-4,-3.1,84.8,25.69,26.87 67 | 66,5,13,-2.02,-139.96,-2.8,-4.5,89.3,25.61,26.86 68 | 67,5,14,-2.02,-139.97,,,,25.26,26.55 69 | 68,6,1,-5.01,-139.92,-0.6,3.5,83.8,28.02,28.94 70 | 69,6,2,-5.01,-139.91,0.2,-2.5,85.4,27.13,28.85 71 | 70,6,3,-5.01,-139.92,-3.9,-3.4,79.8,28.38,29.03 72 | 71,6,4,-5.01,-139.92,-4.2,-2.5,79.4,28.18,28.98 73 | 72,6,5,-5.01,-139.92,-3,0.3,82.6,27.87,29.04 74 | 73,6,6,-5.01,-139.92,-7.9,1.1,78.6,28.45,29.08 75 | 74,6,7,-5.01,-139.92,-5.5,0.5,76.1,28.53,29.15 76 | 75,6,8,-5.01,-139.92,-3.1,-1.7,85.8,27.16,29.05 77 | 76,6,9,-5.01,-139.91,-3.5,-0.8,84.2,27.48,28.99 78 | 77,6,10,-5.01,-139.92,-3.3,-0.7,81.8,27.91,29.05 79 | 78,6,11,-5.00,-139.91,-2.8,-4.9,79.8,28.02,29 80 | 79,6,12,-5.01,-139.92,-4,-1.5,82.2,27.59,28.95 81 | 80,6,13,-5.01,-139.92,-7.8,1,76.1,28.34,28.88 82 | 81,6,14,-5.01,-139.92,,,,27.44,28.85 83 | 82,7,1,5.15,-124.87,-4.8,2.5,90.9,27.85,28.71 84 | 83,7,2,5.14,-124.86,-2.2,4,86.6,28.33,28.64 85 | 84,7,3,5.14,-124.87,-1.6,0.5,88.5,27.37,28.66 86 | 85,7,4,5.15,-124.87,-3,1.8,89.3,27.53,28.68 87 | 86,7,5,5.14,-124.87,-4.6,3.3,83.1,27.81,28.61 88 | 87,7,6,5.14,-124.87,-4.9,1.6,84.6,28.01,28.62 89 | 88,7,7,5.14,-124.87,-4.6,2.4,88.5,27.45,28.77 90 | 89,7,8,5.15,-124.87,-2.2,5.1,86.2,27.25,28.81 91 | 90,7,9,5.14,-124.87,-1.8,3.8,83.1,27.53,28.79 92 | 91,7,10,5.14,-124.87,-3.8,3.4,81.1,27.29,28.75 93 | 92,7,11,5.14,-124.87,-4.2,2.4,80.3,27.53,28.7 94 | 93,7,12,5.14,-124.87,-3.4,2.1,80.3,27.37,28.83 95 | 94,7,13,5.14,-124.87,-3.4,3.1,81.1,27.29,28.93 96 | 95,8,1,1.95,-125.07,-7.1,1.1,79.5,27.14,27.9 97 | 96,8,2,1.96,-125.07,-3.4,1.6,79.1,26.94,27.12 98 | 97,8,3,1.94,-125.07,-3.5,0.8,84.9,26.62,27.03 99 | 98,8,4,1.95,-125.07,-5.8,1.5,84.9,25.89,26.18 100 | 99,8,5,1.96,-125.07,-4.9,2.2,84.1,25.29,25.82 101 | 100,8,6,1.95,-125.07,-5.9,0.7,81.6,25.21,25.56 102 | 101,8,7,1.94,-125.07,-6,1.4,81.2,24.85,25.55 103 | 102,8,8,1.95,-125.07,-4.7,2,82.4,24.45,25.34 104 | 103,8,9,1.95,-125.07,-4.7,2,82.8,24.41,25.05 105 | 104,8,10,1.96,-125.07,-4.7,1.2,81.2,24.49,24.91 106 | 105,8,11,1.95,-125.07,-4.6,0.7,82.8,24.49,24.84 107 | 106,8,12,1.94,-125.07,-4.7,1.2,80.3,24.08,24.92 108 | 107,8,13,1.96,-125.07,-4.5,1.1,79.9,23.84,25.26 109 | 108,9,1,-0.20,-124.37,-3.6,0,89.8,25.09,24.62 110 | 109,9,2,-0.21,-124.37,-0.3,-0.6,89.4,25.39,24.82 111 | 110,9,3,-0.22,-124.36,-1,-0.9,89.4,25.75,25.59 112 | 111,9,4,-0.21,-124.39,-4.2,1,85.6,25.54,25.21 113 | 112,9,5,-0.20,-124.39,-3.6,1.2,85.2,25.13,24.71 114 | 113,9,6,-0.20,-124.39,-4.3,-0.1,84.4,24.7,24.1 115 | 114,9,7,-0.20,-124.4,-3.8,0,85.6,24.03,23.72 116 | 115,9,8,-0.19,-124.39,-3.1,-0.7,84.4,23.94,23.82 117 | 116,9,9,-0.20,-124.39,-3.2,-0.3,84.4,23.87,23.7 118 | 117,9,10,-0.20,-124.38,-2.6,-1.2,86.5,23.59,23.28 119 | 118,9,11,-0.19,-124.38,-3.5,-0.6,84.8,23.28,22.79 120 | 119,9,12,-0.19,-124.39,-3.1,0.7,84.4,22.72,22.44 121 | 120,9,13,-0.19,-124.38,-3.6,0,84.8,22.74,22.56 122 | 121,10,1,-2.03,-124.89,-5.3,-1.8,87,27.06,27.32 123 | 122,10,2,-2.03,-124.89,-0.6,-2.4,89.9,26.51,27.18 124 | 123,10,3,-2.04,-124.88,-0.3,-3.2,91.2,26.12,26.87 125 | 124,10,4,-2.02,-124.89,-4,-1,89.5,26.43,26.82 126 | 125,10,5,-2.03,-124.89,-4.6,-1.3,86.2,26.67,27.19 127 | 126,10,6,-2.04,-124.9,-6.4,-2.8,82.5,26.71,27.13 128 | 127,10,7,-2.04,-124.9,-5.9,-2,81.3,26.31,26.91 129 | 128,10,8,-2.04,-124.9,-3.2,-2.7,85.4,25.92,26.7 130 | 129,10,9,-2.04,-124.89,-4.4,-1.3,84.6,25.76,26.17 131 | 130,10,10,-2.04,-124.89,-3.6,-2.2,87,25.57,25.98 132 | 131,10,11,-2.04,-124.89,-3,-3.5,87.9,25.06,25.81 133 | 132,10,12,-2.04,-124.89,-3.9,-1.8,83.3,25.06,25.41 134 | 133,10,13,-2.03,-124.89,-2.7,-3.2,87.5,24.9,25.42 135 | 134,11,1,-5.02,-124.94,-7.2,-0.9,87.1,28.09,28.19 136 | 135,11,2,-5.01,-124.94,-6,0.1,85.6,28.36,28.23 137 | 136,11,3,-5.02,-124.94,-6.7,-0.2,83,28.32,28.32 138 | 137,11,4,-5.01,-124.94,-6,1.4,79.6,28.17,28.26 139 | 138,11,5,-5.01,-124.94,-5.3,0.6,84.5,27.51,28.41 140 | 139,11,6,-5.01,-124.94,-7.2,-2.8,82.2,28.05,28.46 141 | 140,11,7,-5.02,-124.94,-5.5,-3.5,80,27.89,28.42 142 | 141,11,8,-5.02,-124.95,-4.5,-2.2,83.4,27.82,28.25 143 | 142,11,9,-5.02,-124.93,-2.5,-3.8,81.9,27.7,28.22 144 | 143,11,10,-5.02,-124.94,-4.5,-3.7,80.7,27.97,28.17 145 | 144,11,11,-5.02,-124.94,-2.8,-1.3,79.2,27.74,28.32 146 | 145,11,12,-5.02,-124.93,-5.7,-2,85.2,27,28.21 147 | 146,11,13,-5.01,-124.94,-7.6,1.4,75.5,27.82,28.05 148 | 147,12,1,-7.97,-125.01,-8,0.7,79.6,28.29, 149 | 148,12,2,-7.97,-125.01,-6.1,1,79.6,28.25, 150 | 149,12,3,-7.97,-125.02,-6.2,1.8,76.6,28.05, 151 | 150,12,4,-7.97,-125.02,-5.1,1,71.5,27.81, 152 | 151,12,5,-7.97,-125.02,-6.2,0.4,76.2,27.97, 153 | 152,12,6,-7.97,-125.02,-7.9,0.9,78.3,27.85, 154 | 153,12,7,-7.97,-125.01,-6.6,0.8,83.5,27.18, 155 | 154,12,8,-7.95,-125.01,-6.3,1,75.8,28.05, 156 | 155,12,9,-7.96,-125.01,-6.2,1.6,82.6,27.38, 157 | 156,12,10,-7.97,-125.01,-8.5,-0.3,78.8,27.85, 158 | 157,12,11,-7.97,-125.01,-6.1,2.4,74.9,28.01, 159 | 158,12,12,-7.96,-125.02,-7.7,2.6,79.6,27.42, 160 | 159,12,13,-7.97,-125.02,-7.9,2.3,71.9,27.46, 161 | 160,13,1,8.05,-110.15,-3.9,-1.2,,28.84,29.66 162 | 161,13,2,8.05,-110.15,-0.4,4.2,,27.88,29.61 163 | 162,13,3,8.04,-110.15,4.8,2.5,,28.63,29.6 164 | 163,13,4,8.04,-110.16,2.3,-1.6,,27.87,29.58 165 | 164,13,5,8.04,-110.16,0.5,-2.4,,29.27,29.91 166 | 165,13,6,8.04,-110.15,0.6,-4,,29.26,29.99 167 | 166,13,7,8.04,-110.15,1.4,-5.1,,28.92,29.89 168 | 167,13,8,8.04,-110.15,1.2,-0.6,,28.9,29.9 169 | 168,13,9,8.04,-110.15,3.9,0,,28.77,29.92 170 | 169,13,10,8.05,-110.15,2.2,0.5,,28.2,29.82 171 | 170,13,11,8.04,-110.16,2.9,-2.5,,29.35,30.08 172 | 171,13,12,8.05,-110.16,1.9,1.4,,28.34,30.04 173 | 172,13,13,8.05,-110.15,4.3,4.2,,27.92,29.87 174 | 173,14,1,4.99,-109.96,-3.4,5.9,83.2,27.98,28.89 175 | 174,14,2,4.98,-109.96,-3.1,5.3,79.5,28.02,28.84 176 | 175,14,3,4.98,-109.96,1.4,5.8,81.6,27.7,28.75 177 | 176,14,4,4.97,-109.96,3.1,3.5,80.7,27.51,28.6 178 | 177,14,5,4.98,-109.96,-0.1,4.9,81.1,27.51,28.67 179 | 178,14,6,4.97,-109.96,-0.5,4.7,80.7,27.55,28.74 180 | 179,14,7,4.97,-109.96,-0.7,6,80.7,27.35,28.54 181 | 180,14,8,4.97,-109.96,-1.3,6.3,80.3,27.11,28 182 | 181,14,9,4.98,-109.96,-1.5,4.9,78.7,27.15,27.74 183 | 182,14,10,4.98,-109.96,-2.3,4.2,79.9,27.19,27.6 184 | 183,14,11,4.97,-109.96,-1.4,4.7,80.7,27.19,27.6 185 | 184,14,12,4.98,-109.96,-0.3,6.1,81.1,26.91,27.63 186 | 185,14,13,4.98,-109.96,-0.3,7.1,81.1,26.75,27.7 187 | 186,15,1,2.08,-110.1,-6.1,3.5,86.9,26.35,26.89 188 | 187,15,2,2.08,-110.1,-5.4,3.2,87.3,26.12,26.88 189 | 188,15,3,2.08,-110.1,-1.2,3.7,89.8,25.53,26.3 190 | 189,15,4,2.08,-110.1,0.4,2.6,88.2,25.45,26.09 191 | 190,15,5,2.08,-110.11,-1.9,2.9,87.7,24.91,25.83 192 | 191,15,6,2.07,-110.14,-3.6,2.7,89,24.91,25.84 193 | 192,15,7,2.08,-110.11,-4.5,3.7,88.6,24.67,26.07 194 | 193,15,8,2.08,-110.11,-5.2,4,86.5,24.75,26.16 195 | 194,15,9,2.08,-110.1,-4.9,2.7,83.5,25.26,26.24 196 | 195,15,10,2.08,-110.11,-4.1,1.5,84.8,25.18,26.18 197 | 196,15,11,2.08,-110.1,-3.5,2.5,86.5,25.03,26.09 198 | 197,15,12,2.08,-110.1,-2.8,4.1,86,24.17,25.47 199 | 198,15,13,2.08,-110.1,-2.8,3.9,87.3,23.74,24.56 200 | 199,16,1,-1.98,-110.01,-4.9,0.8,92.2,25.96,25.63 201 | 200,16,2,-1.98,-110,-2.2,1.5,93,25.77,25.28 202 | 201,16,3,-1.98,-110,-2.8,2.8,91.8,25.05,24.85 203 | 202,16,4,-1.98,-110.01,-0.9,0.7,91.4,25.09,25.01 204 | 203,16,5,-1.98,-110.01,-3.1,2.2,89.1,25.21,25.42 205 | 204,16,6,-1.98,-110.01,-4.4,0.3,89.1,25.45,25.59 206 | 205,16,7,-1.98,-110.01,-4.8,-0.1,87.5,25.45,25.66 207 | 206,16,8,-1.98,-110,-4.5,0.7,88.3,25.45,25.34 208 | 207,16,9,-1.98,-110,-3.3,-0.3,89.5,25.33,25.27 209 | 208,16,10,-1.98,-110,-3.5,-0.7,90.6,25.09,25.08 210 | 209,16,11,-1.98,-110,-2.9,-0.4,91,25.29,25.17 211 | 210,16,12,-1.98,-110,-2.7,0,88.3,24.82,25.11 212 | 211,16,13,-1.98,-110,-3.9,0.8,87.1,, 213 | 212,17,1,-4.98,-109.98,-7.1,2.8,86.2,27.28,27.31 214 | 213,17,2,-4.98,-109.99,-6,3.5,81.7,27.44,27.13 215 | 214,17,3,-4.97,-109.99,-4.3,4.7,80.5,27.36,27.19 216 | 215,17,4,-4.98,-109.99,-1.9,1.8,79.7,26.77,27.41 217 | 216,17,5,-4.98,-109.99,-4.5,1.4,79.7,26.93,27.46 218 | 217,17,6,-4.98,-109.98,-5.2,-0.9,82.1,26.62,27.19 219 | 218,17,7,-4.98,-109.99,-5,-0.7,83.3,26.58,26.95 220 | 219,17,8,-4.99,-109.99,-5.3,0,85,26.66,26.83 221 | 220,17,9,-4.98,-109.99,-4.6,-1,85,26.66,26.79 222 | 221,17,10,-4.98,-109.99,-5.4,-1,85,26.54,26.67 223 | 222,17,11,-4.98,-109.99,-4.6,0.7,84.1,26.27,26.56 224 | 223,17,12,-4.98,-109.99,-3.2,-1.2,88.2,25.88,26.64 225 | 224,17,13,-4.98,-109.99,-5.2,1.9,84.6,26.62,26.64 226 | 225,18,1,-8.05,-109.94,,,80.1,27.72,27.99 227 | 226,18,2,-8.05,-109.92,,,78.4,27.56,28.02 228 | 227,18,3,-8.05,-109.93,,,77.3,27.44,28.1 229 | 228,18,4,-8.05,-109.93,,,79.8,27.08,28.15 230 | 229,18,5,-8.05,-109.93,,,83.2,26.72,28.09 231 | 230,18,6,-8.10,-109.94,,,85.3,26.64,27.99 232 | 231,18,7,-8.05,-109.94,,,87.8,26.76,27.97 233 | 232,18,8,-8.05,-109.93,,,84.3,27.4,27.97 234 | 233,18,9,-8.05,-109.93,,,87.8,26.88,28.01 235 | 234,18,10,-8.05,-109.93,,,80.8,27.56,27.99 236 | 235,18,11,-8.05,-109.93,,,79.8,27.24,28.01 237 | 236,18,12,-8.05,-109.93,,,80.8,27.04,28.01 238 | 237,18,13,-8.05,-109.93,,,79.8,27.32,27.93 239 | 238,19,1,8.06,-94.96,-2.3,2.4,86.4,29.14, 240 | 239,19,2,8.06,-94.95,1.7,-0.3,83.7,29.24, 241 | 240,19,3,8.05,-94.96,0.9,1,87.1,28.53 242 | 241,19,4,8.05,-94.95,7,1.3,84.4,29.18 243 | 242,19,5,8.05,-94.96,3.3,2.8,83.7,29.32 244 | 243,19,6,8.06,-94.96,2.8,1.8,84.8,29.09 245 | 244,19,7,8.06,-94.95,0.4,1.1,83.7,28.97 246 | 245,19,8,8.06,-94.98,2,0.8,83.7,28.66 247 | 246,19,9,8.06,-94.95,2.5,1.3,84.1,28.67 248 | 247,19,10,8.05,-94.95,2.6,0.2,84.8,28.73 249 | 248,19,11,8.05,-94.95,2,-1.2,79.8,29.32 250 | 249,19,12,8.05,-94.95,2.8,-2.1,85.2,28.75 251 | 250,19,13,8.06,-94.95,3.9,1.4,87.9,27.74 252 | 251,20,1,5.02,-94.95,,,88.6,27.5 253 | 252,20,2,5.02,-94.95,,,81.5,28.61 254 | 253,20,3,5.02,-94.95,,,88.2, 255 | 254,20,4,5.03,-94.95,,,,28.72 256 | 255,20,5,5.03,-94.95,,,81.1,28.91 257 | 256,20,6,5.02,-94.95,,,83.1,, 258 | 257,20,7,5.02,-94.95,,,85.4,27.96, 259 | 258,20,8,5.02,-94.95,,,27.58,, 260 | 259,20,9,5.02,-94.96,,,83.1,28.3, 261 | 260,20,10,5.02,-94.96,,,,28.19, 262 | 261,20,11,5.03,-94.95,,,85.4,28.57, 263 | 262,20,12,5.03,-94.95,,,87.8,26.96, 264 | 263,20,13,5.03,-94.95,,,85,27.69, 265 | 264,21,1,1.99,-95,-3.2,6.8,81.8,28.12,29.29 266 | 265,21,2,1.99,-95,-1.8,6.6,81.8,27.88,29.23 267 | 266,21,3,1.99,-95,0.1,6.7,82.6,27.73,29.18 268 | 267,21,4,1.99,-95.01,3,5.8,83,27.65,29.08 269 | 268,21,5,1.97,-95.01,0.5,4.5,81.3,27.65,29.14 270 | 269,21,6,1.98,-95,-1.4,5.6,81.8,27.77,29.3 271 | 270,21,7,1.98,-95,-3.4,6.4,80.9,27.65,29.36 272 | 271,21,8,2.00,-95.05,-3.3,4,,,29.3 273 | 272,21,9,2.00,-95.03,1,5.2,,,28.97 274 | 273,21,10,2.00,-95.04,0.5,5.5,,, 275 | 274,21,11,2.00,-95.04,,,,,27.99 276 | 275,21,12,2.00,-95.04,-1,6.3,,,28.21 277 | 276,21,13,2.00,-95.03,-1.4,6.3,,,28.27 278 | 277,22,1,0.11,-94.99,-4.9,4.8,88.3,25.67,27.63 279 | 278,22,2,0.12,-94.99,-4,4.2,91.2,25.15,27.14 280 | 279,22,3,0.12,-94.98,-1.9,4.2,87.9,25.03,26.56 281 | 280,22,4,0.10,-94.98,1.2,3.5,89,24.83,26.45 282 | 281,22,5,0.10,-94.98,-2.8,3,89.4,24.51,26.71 283 | 282,22,6,0.11,-94.98,,,,24.35,27.12 284 | 283,22,7,0.11,-95.2,-3.4,1.9,,,26.76 285 | 284,22,8,0.09,-95.19,-6.5,0.2,,,27.01 286 | 285,22,9,0.10,-95.21,-6.3,-1.1,,, 287 | 286,22,10,0.11,-95.2,-5.2,-0.7,,,25.57 288 | 287,22,11,0.11,-95.2,-5.2,-0.2,,,25.07 289 | 288,22,12,0.10,-95.19,-4.2,0.7,,,24.87 290 | 289,22,13,0.13,-95.19,-3.4,0.6,,,24.21 291 | 290,23,1,-1.97,-95.03,-3.4,2.2,90.8,25.83,25.5 292 | 291,23,2,-1.97,-95.03,-3.6,2.2,89.6,25.99,25.7 293 | 292,23,3,-1.97,-95.03,-1.7,0.7,88.4,25.71,25.61 294 | 293,23,4,-1.98,-95.03,1,-0.2,89.6,25.24,25.42 295 | 294,23,5,-1.97,-95.03,-2.9,0.2,88.8,25,25.54 296 | 295,23,6,-1.97,-95.02,-3.6,1,90.8,, 297 | 296,23,7,-1.97,-95.02,-3.6,1.2,93.2,24.68,24.64 298 | 297,23,8,-1.96,-94.99,-3.5,0.1,92,24.92,24.99 299 | 298,23,9,-1.96,-94.99,-4.5,0.5,89.2,25.04,24.89 300 | 299,23,10,-1.97,-95,-3.8,1,90,25.16,25.16 301 | 300,23,11,-1.97,-95.02,-2.9,2.5,88,25.36,25.51 302 | 301,23,12,-1.98,-95.03,-3,2.3,90.4,25.4,25.51 303 | 302,23,13,-1.98,-95.03,-3.3,2.8,91.6,25.28,25.16 304 | 303,24,1,-5.01,-95.07,-6.6,4.9,77.3,27.63,27.64 305 | 304,24,2,-5.03,-95.07,-6.4,3.5,78.1,27.59,27.63 306 | 305,24,3,-5.02,-95.07,-4.1,2.9,78.9,27.4,27.24 307 | 306,24,4,-5.01,-95.07,-1.7,1.8,81.3,26.68,27.25 308 | 307,24,5,-5.02,-95.07,-3.5,1.4,82.1,26.64,27.26 309 | 308,24,6,-5.02,-95.07,-4.6,3.6,83.3,26.53,27.14 310 | 309,24,7,-5.02,-95.07,-6.1,2.8,79.3,27.04,27.18 311 | 310,24,8,-5.01,-95.06,,,,26.76,27.26 312 | 311,25,1,-8.02,-95.11,-7.8,3.5,75.8,27.38,27.71 313 | 312,25,2,-8.01,-95.1,-7,3.6,78.1,27.03,27.67 314 | 313,25,3,-8.01,-95.1,-6.1,3.9,83.4,26.01,27.65 315 | 314,25,4,-8.01,-95.1,-3.1,4.7,76.5,26.56,27.67 316 | 315,25,5,-8.01,-95.1,-4.6,2,78.1,26.6,27.69 317 | 316,25,6,-8.02,-95.1,-5.9,1.6,82.7,26.64,27.66 318 | 317,25,7,-8.01,-95.1,-7.7,4.6,80,26.79,27.55 319 | 318,25,8,-8.01,-95.1,-7.2,4.5,78.1,26.67,27.5 320 | 319,25,9,-8.02,-95.08,-6.1,2.7,73.9,26.71,27.53 321 | 320,25,10,-8.01,-95.1,-5.2,1.7,76.5,26.71,27.59 322 | 321,25,11,-8.01,-95.1,-3.9,2.1,77.3,26.67,27.61 323 | 322,25,12,-8.01,-95.1,-3.2,3.7,79.6,26.52,27.63 324 | 323,25,13,-8.01,-95.1,-3.4,3.9,78.8,26.48,27.59 325 | 324,26,1,8.00,-179.91,,,,27.78,28.33 326 | 325,26,2,7.99,-179.88,,,,28.22,28.36 327 | 326,26,3,8.00,-179.91,,,,28.02,28.37 328 | 327,26,4,8.00,-179.91,,,,27.94,28.39 329 | 328,26,5,8.00,-179.91,,,,27.21,28.39 330 | 329,26,6,8.00,-179.91,,,,27.98,28.38 331 | 330,26,6,8.00,-179.91,,,,27.86,28.41 332 | 331,26,7,8.00,-179.91,,,,28.18,28.46 333 | 332,26,8,8.00,-179.91,,,,28.1,28.4 334 | 333,26,9,7.99,-179.91,,,,27.98,28.43 335 | 334,26,10,8.00,-179.91,,,,27.94,28.42 336 | 335,26,11,8.00,-179.91,,,,27.66,28.42 337 | 336,26,12,7.99,-179.9,,,,27.78,28.46 338 | 337,26,13,8.00,-179.91,,,,28.06,28.48 339 | 338,26,14,8.00,-179.91,,,,28.06,28.48 340 | 339,27,1,2.02,179.8,-6,-3.2,85.4,28.09,28.41 341 | 340,27,2,2.03,179.8,-6.5,-0.3,90.4,27.19,28.28 342 | 341,27,3,2.03,179.8,-5.6,-1.9,86.6,28.06,28.44 343 | 342,28,1,0.07,-179.87,-4.4,-2.5,91.9,28.06,28.09 344 | 343,28,2,0.08,-179.87,-4.5,-0.1,91.6,28.06,28.26 345 | 344,28,3,0.08,-179.87,-4.7,-1.7,92.6,28.06,28.56 346 | 345,28,4,0.08,-179.87,-4.8,-2.1,90.6,28.37,28.71 347 | 346,28,5,0.08,-179.87,-4.2,0.5,89.2,28.49,28.92 348 | 347,28,6,0.08,-179.87,-3.7,-3,87.5,28.53,29.06 349 | 348,28,7,0.08,-179.87,-1.7,-2.8,89.6,28.14,29.01 350 | 349,28,8,0.08,-179.87,-2.3,-1.7,89.9,28.14,29.06 351 | 350,28,9,0.08,-179.87,-4.4,-1.3,88.2,28.45,29.36 352 | 351,28,10,0.08,-179.87,-4.5,-1.7,87.5,28.45,29.36 353 | 352,28,11,0.08,-179.87,-5.2,-1.4,90.6,27.71,29.23 354 | 353,28,12,0.08,-179.87,-3.6,-2.5,89.6,28.29,28.97 355 | 354,28,13,0.08,-179.87,-4.7,-2.9,89.6,28.33,28.9 356 | 355,28,14,0.07,-179.87,-6.7,-1.7,89.9,28.33,28.49 357 | 356,29,1,-1.99,-179.87,-2.8,-1,88.3,27.61,29.09 358 | 357,29,2,-1.99,-179.86,-4.2,-2.5,87.9,28.04,28.99 359 | 358,29,3,-1.99,-179.86,-3.1,-2.9,87.1,28.39,29.32 360 | 359,29,4,-1.98,-179.85,-3,0.9,84.6,28.59,29.34 361 | 360,29,5,-1.98,-179.85,-2.6,-2.2,86.2,28.35,29.58 362 | 361,29,6,-1.99,-179.85,-2.8,-2.2,87.9,27.26,29.38 363 | 362,29,7,-1.99,-179.85,-2.9,-1.6,83.7,28.59,29.54 364 | 363,29,8,-1.99,-179.85,-4.2,-1.1,83.7,28.74,29.6 365 | 364,29,9,-1.99,-179.85,-2.2,-2.3,82.9,28.66,29.6 366 | 365,29,10,-1.99,-179.85,-4.7,-1.7,83.3,28.55,29.57 367 | 366,29,11,-1.98,-179.85,-2.8,-2,82.1,28.86,29.69 368 | 367,29,12,-1.99,-179.85,-3.2,-4.1,87.1,27.8,29.57 369 | 368,29,13,-1.99,-179.84,-6.6,-2.9,81.6,29.09,29.53 370 | 369,29,14,-1.99,-179.84,-6.6,-2.9,81.6,29.09,29.53 371 | 370,30,1,-5.02,-179.96,,,,,29.34 372 | 371,30,2,-5.03,-179.96,,,,,29.26 373 | 372,30,3,-5.03,-179.95,,,,,29.45 374 | 373,30,4,-5.03,-179.97,,,,,29.44 375 | 374,30,5,-5.02,-179.96,,,,,29.35 376 | 375,30,6,-5.02,-179.96,,,,,29.44 377 | 376,30,7,-5.02,-179.96,,,,,29.25 378 | 377,30,8,-5.02,-179.96,,,,,29.26 379 | 378,30,9,-5.02,-179.96,-3.9,0,,,29.34 380 | 379,30,10,-5.02,-179.96,-2,0.8,,,29.57 381 | 380,30,11,-5.02,-179.96,-3.2,-0.3,,,29.61 382 | 381,30,12,-5.02,-179.96,-1.3,-1.3,,,29.58 383 | 382,30,13,-5.03,-179.96,-4.4,-3.2,,,29.59 384 | 383,30,14,-5.03,-179.96,-6.6,-3,,,29.47 385 | 384,31,1,-7.97,-179.86,-1.8,-1.1,,,29.53 386 | 385,31,2,-7.97,-179.87,-3,1.1,,,29.6 387 | 386,31,3,-7.97,-179.87,-6.5,-0.2,,,29.4 388 | 387,31,4,-7.97,-179.87,-5.6,2.1,,,29.35 389 | 388,31,5,-7.97,-179.86,-4.6,2.1,,,29.33 390 | 389,31,6,-7.97,-179.87,-4.3,1.5,,,29.3 391 | 390,31,7,-7.97,-179.87,-3.4,1.1,,,29.23 392 | 391,31,8,-7.97,-179.86,-3.4,0.1,,,29.24 393 | 392,31,9,-7.97,-179.86,-5.5,1.7,,,29.19 394 | 393,31,10,-7.97,-179.86,-4.7,0.3,,,29.26 395 | 394,31,11,-7.97,-179.87,-5.7,-0.9,,,29.18 396 | 395,31,12,-7.98,-179.86,-5.8,-1.5,,,29.11 397 | 396,31,13,-7.97,-179.87,-6,-1.8,,,29.12 398 | 397,31,14,-7.97,-179.87,-6.8,-1.9,,, 399 | 398,32,1,8.01,-170.06,-7.4,-5,82.6,27.96,28.22 400 | 399,32,2,8.01,-170.06,-7.5,-5.6,82,28,28.2 401 | 400,32,3,8.01,-170.06,-7.3,-5.7,84.7,27.93,28.19 402 | 401,32,4,8.01,-170.06,-7.4,-4.8,84,27.96,28.25 403 | 402,32,5,8.01,-170.06,-6.1,-5.7,82.3,27.93,28.26 404 | 403,32,6,8.00,-170.06,-6.2,-5,84.4,27.85,28.26 405 | 404,32,7,8.01,-170.05,-7,-4.7,84,27.89,28.24 406 | 405,32,8,8.00,-170.05,-5.9,-4.5,80.6,27.85,28.27 407 | 406,32,9,8.00,-170.05,-5.5,-4.3,79.9,27.73,28.29 408 | 407,32,10,8.00,-170.06,-5.3,-4.6,79.2,27.54,28.29 409 | 408,32,11,8.01,-170.06,-5.1,-2.6,77.2,27.61,28.3 410 | 409,32,12,8.00,-170.06,-3.7,-2.7,84.7,27.69,28.39 411 | 410,32,13,8.00,-170.05,-4.6,-2.1,87.4,28,28.41 412 | 411,32,14,8.00,-170.07,-6.7,-1.4,86.4,27.81,28.52 413 | 412,33,1,4.98,-169.96,-7.9,-2.8,,27.83,28.47 414 | 413,33,2,4.97,-169.98,-7.9,-3,,27.97,28.44 415 | 414,33,3,4.97,-169.97,-8.9,-1.8,,27.47,28.39 416 | 415,33,4,4.98,-169.97,-7.8,-2,,27.21,28.33 417 | 416,33,5,4.98,-169.98,-7.6,-3.2,,27.72,28.34 418 | 417,33,6,4.97,-169.98,-7.7,-1,,27.61,28.32 419 | 418,33,7,4.97,-169.97,-8.4,-2.1,,27.81,28.36 420 | 419,33,8,4.97,-169.98,-6.7,-2.6,,27.43,28.31 421 | 420,33,9,4.97,-169.97,-6,-4.9,,27.85,28.27 422 | 421,33,10,4.97,-169.96,-6.2,-4.5,,27.72,28.31 423 | 422,33,11,4.97,-169.98,-5.7,-3.8,,27.71,28.33 424 | 423,33,12,4.97,-169.98,-4.9,-3.5,,27.84,28.39 425 | 424,33,13,4.97,-169.98,-4.2,-1.2,,27.61,28.53 426 | 425,33,14,4.97,-169.98,-6.2,2.2,,26.86,28.5 427 | 426,34,1,1.97,-170.02,-4.8,-3.5,94.9,27.85,28.36 428 | 427,34,2,1.98,-170.02,-5.7,-3.1,96.9,28.05,28.29 429 | 428,34,3,1.98,-170.01,-6.1,-0.4,95.7,27.97,28.22 430 | 429,34,4,1.98,-170.01,-4.9,-0.6,98.6,27.78,28.2 431 | 430,34,5,1.97,-170.01,-4.7,-0.3,99,27.85,28.19 432 | 431,34,6,1.98,-170.01,-4.4,-0.9,97.4,27.89,28.19 433 | 432,34,7,1.98,-170,-4.7,-3.1,96.9,28.01,28.04 434 | 433,34,8,1.98,-170.01,-3.5,-1.3,96.5,27.66,28.21 435 | 434,34,9,1.97,-170,-3.2,-0.9,99.4,26.99,28.34 436 | 435,34,10,1.97,-170,-3.7,-4.4,94.9,27.66,28.38 437 | 436,34,11,1.97,-170,-3.8,-3.4,94.9,27.74,28.43 438 | 437,34,12,1.97,-170,-3.7,-3.2,94.9,27.97,28.56 439 | 438,34,13,1.98,-170.01,-3.9,-1.8,92.5,28.2,28.58 440 | 439,34,14,1.97,-170,-6.3,0.4,89.7,28.24,28.65 441 | 440,35,1,-0.03,-170.02,-4.5,-2.6,90.4,27.72,27.7 442 | 441,35,2,-0.03,-170.03,-4.9,-2.7,89.6,28.15,28.25 443 | 442,35,3,-0.03,-170.02,-5,-1.9,88.3,28.23,28.38 444 | 443,35,4,-0.03,-170.03,-5.6,-1.8,87.9,28.15,28.17 445 | 444,35,5,-0.03,-170.02,-4.8,-0.5,88.7,28.07,28.1 446 | 445,35,6,-0.03,-170.02,-4.4,-2.2,88.3,28.15,28.55 447 | 446,35,7,-0.03,-170.02,-4.9,-2.3,86.2,28.34,28.8 448 | 447,35,8,-0.03,-170.02,-3.5,-1.3,85.3,28.38,28.99 449 | 448,35,9,-0.03,-170.02,-3,-0.6,82.8,28.58,29.24 450 | 449,35,10,-0.03,-170.02,-3.2,-4.4,84.5,28.23,28.87 451 | 450,35,11,-0.03,-170.02,-3.4,-3.2,85.7,28.19,28.74 452 | 451,35,12,-0.03,-170.02,-3.9,-3.9,84.9,28.23,28.49 453 | 452,35,13,-0.04,-170.03,-4.3,-2.6,85.3,28.11,28.2 454 | 453,35,14,-0.03,-170.03,-6,-1.7,85.7,27.99,28.12 455 | 454,36,1,-2.15,-170.02,-4.3,-2.3,87.1,28.97,29.59 456 | 455,36,2,-2.16,-170.02,-4.3,-1.7,94.2,27.97,29.48 457 | 456,36,3,-2.15,-170.02,-4.1,-2.1,91.1,28.65,29.51 458 | 457,36,4,-2.15,-170.02,-4.2,-2.5,90.7,28.81,29.6 459 | 458,36,5,-2.15,-170.02,-3.2,-1.1,89.9,28.69,29.6 460 | 459,36,6,-2.15,-170.01,-4.7,-2,90.3,28.57,29.52 461 | 460,36,7,-2.16,-170.01,-3.7,-4.6,87.1,29.13,29.43 462 | 461,36,8,-2.16,-170.01,-0.6,-1.2,90.7,28.13,29.65 463 | 462,36,9,-2.16,-170.01,-0.7,-0.1,87.1,28.49,29.96 464 | 463,36,10,-2.16,-170.01,-2.8,-0.6,86.7,28.65,29.84 465 | 464,36,11,-2.16,-170.01,-3,-1.3,89.9,28.37,29.7 466 | 465,36,12,-2.16,-170.01,-4.5,-3.5,94.2,27.61,29.55 467 | 466,36,13,-2.16,-170.01,-4,-4.1,86.3,28.49,29.43 468 | 467,36,14,-2.16,-170.01,-7.1,-1,82.8,29.21,29.47 469 | 468,37,1,-5.01,-169.99,,,83.3,28.56,29.54 470 | 469,37,2,-5.01,-169.99,,,80.9,28.84,29.49 471 | 470,37,3,-5.01,-169.99,,,79.7,29.11,29.9 472 | 471,37,4,-5.01,-169.99,,,82.5,29.07,29.82 473 | 472,37,5,-5.01,-169.98,,,83.3,28.95,29.66 474 | 473,37,6,-5.01,-169.99,,,84.1,28.56,29.48 475 | 474,37,7,-5.02,-169.98,,,85.3,28.49,29.38 476 | 475,37,8,-5.01,-169.99,,,82.5,28.87,29.65 477 | 476,37,9,-5.01,-169.99,,,81.3,28.29,29.68 478 | 477,37,10,-5.01,-169.99,,,80.5,28.72,29.73 479 | 478,37,11,-5.01,-169.99,,,80.9,29.15,29.64 480 | 479,37,12,-5.01,-169.99,,,82.9,29.15,29.49 481 | 480,37,13,-5.02,-169.99,,,81.7,28.95,29.5 482 | 481,37,14,-5.01,-169.99,,,77.3,29.34,29.49 483 | 482,38,1,-7.98,-170.03,,,,28.18,29.58 484 | 483,38,2,-7.98,-170.03,,,,28.34,29.51 485 | 484,38,3,-7.98,-170.03,-6.1,1.8,82.5,28.06,29.45 486 | 485,38,4,-7.98,-170.03,,,,28.02,29.33 487 | 486,38,5,-7.98,-170.03,,,,, 488 | 487,38,6,-7.98,-170.03,-6.2,0,81.7,28.7,29.39 489 | 488,38,7,-8.00,-170,,,,, 490 | 489,38,8,-8.00,-170,-3.1,1.1,82.9,28.3,29.38 491 | 490,38,9,-8.00,-170,-3.4,0.5,80,, 492 | 491,38,10,-8.00,-170,-2.7,0,79.1,28.46,29.61 493 | 492,38,11,-8.00,-170,,,,, 494 | 493,38,12,-8.00,-170,,,,, 495 | 494,38,13,-8.00,-170,,,,27.89,29.44 496 | 495,39,1,7.97,-155.01,,,,,27.88 497 | 496,39,2,7.97,-155,,,,,27.88 498 | 497,39,3,7.97,-155.01,,,,,27.88 499 | 498,39,4,7.98,-155,,,,,27.9 500 | 499,39,5,7.97,-155,,,,,27.93 501 | 500,39,6,7.98,-155.01,,,,,27.94 502 | 501,39,7,7.97,-155,,,,,27.94 503 | 502,39,8,7.97,-155,,,,,27.99 504 | 503,39,9,7.97,-155,,,,,28.04 505 | 504,39,10,7.98,-155,,,,,28.03 506 | 505,39,11,7.97,-155.01,,,,,28.1 507 | 506,39,12,7.97,-155,,,,,28.1 508 | 507,39,13,7.98,-155.01,,,,,28.08 509 | 508,39,14,7.98,-155,,,,,28.06 510 | 509,40,1,4.98,-154.94,-6.2,-1.8,89.2,26.67,28.2 511 | 510,40,2,4.97,-154.94,-7,-2.2,91.8,26.71,28.18 512 | 511,40,3,4.98,-154.93,-6.9,-4.1,90.9,27.39,28.2 513 | 512,40,4,4.98,-154.94,-6.9,-2.9,91.4,27.67,28.22 514 | 513,40,5,4.98,-154.93,-6.8,-1.9,91.4,27.47,28.19 515 | 514,40,6,4.98,-154.93,-5.8,-2.2,92.2,26.95,28.21 516 | 515,40,7,4.98,-154.94,-3.1,-3,87.5,27.27,28.14 517 | 516,40,8,4.98,-154.93,-3.3,-1.9,85,27.39,28.41 518 | 517,40,9,4.98,-154.93,0.5,0.3,81.6,28.07,29.12 519 | 518,40,10,4.99,-154.93,-1.6,0.5,85.4,27.51,29.3 520 | 519,40,11,4.99,-154.93,-4.5,0.1,85,27.87,29.17 521 | 520,40,12,4.97,-154.94,-6.2,-0.5,88.8,27.83,28.73 522 | 521,40,13,4.99,-154.93,-8.1,-0.4,90.9,27.59,28.59 523 | 522,40,14,4.98,-154.94,-5.9,-0.3,86.3,27.39,28.49 524 | 523,41,1,1.99,-154.96,-0.9,0.1,,,28.19 525 | 524,41,2,1.98,-154.95,-3.5,-1.8,,,28.27 526 | 525,41,3,1.99,-154.97,-4.5,-1.4,,,28.11 527 | 526,41,4,1.99,-154.96,-4.8,-0.5,,,27.88 528 | 527,41,5,1.99,-154.96,-5.2,1.9,,,27.92 529 | 528,41,6,1.99,-154.96,-4.5,1.4,,,27.84 530 | 529,41,7,1.98,-154.96,-3.7,-1.6,,,27.95 531 | 530,41,8,1.99,-154.96,-2.9,-1.5,,,28.11 532 | 531,41,9,1.98,-154.96,-1.4,1.6,,,28.54 533 | 532,41,10,1.98,-154.95,-3.5,1.6,,,28.37 534 | 533,41,11,1.99,-154.95,-5.4,0.6,,,28.11 535 | 534,41,12,1.99,-154.95,-6.2,1.2,,,27.9 536 | 535,41,13,1.99,-154.95,-7.3,2.8,,,27.77 537 | 536,41,14,1.99,-154.95,-4.2,1.8,,,27.66 538 | 537,42,1,0.00,-154.96,0,-0.1,,25.94,27.12 539 | 538,42,2,-0.01,-154.96,-1.8,-1.8,,26.74,27.48 540 | 539,42,3,0.00,-154.95,-3.6,-2.8,,26.98,27.6 541 | 540,42,4,0.00,-154.96,-4.3,-1.9,,26.94,27.58 542 | 541,42,5,0.00,-154.96,-5.3,-0.9,,27.15,27.69 543 | 542,42,6,0.00,-154.96,-3.8,-1.6,,27.15,27.76 544 | 543,42,7,-0.01,-154.96,-3.4,-2.8,,27.02,28.04 545 | 544,42,8,0.00,-154.96,-2.6,-2.7,,26.98,28.02 546 | 545,42,9,0.00,-154.96,-2.5,0,,26.98,28.07 547 | 546,42,10,0.00,-154.96,-4.1,-1.1,,26.7,27.98 548 | 547,42,11,-0.01,-154.95,-4.9,-1.8,,26.46,27.57 549 | 548,42,12,0.00,-154.95,-6,-1.4,,26.02,27.01 550 | 549,42,13,0.00,-154.96,-6.3,-0.4,,25.82,26.57 551 | 550,42,14,0.00,-154.96,-3.6,-0.6,,25.7,26.49 552 | 551,43,1,-2.00,-154.98,-1.9,1.6,81.5,27.37,28.97 553 | 552,43,2,-2.00,-154.98,-1.3,-0.7,83.3,28.17,29.07 554 | 553,43,3,-1.99,-154.99,-2.2,-3.8,86.4,27.57,29.09 555 | 554,43,4,-2.00,-154.98,-4,-4.8,78.4,28.69,29.42 556 | 555,43,5,-2.00,-154.99,-5.9,-2.5,77.6,29.01,29.53 557 | 556,43,6,-2.00,-155,-5.3,0.2,79.8,29.09,29.56 558 | 557,43,7,-2.00,-154.99,-4.5,-1.3,81.1,28.69,29.58 559 | 558,43,8,-2.00,-154.98,-4.8,-3.7,77.6,28.57,29.49 560 | 559,43,9,-2.01,-154.99,-3.5,-2.7,76.7,28.49,29.55 561 | 560,43,10,-2.00,-154.98,-3.7,-2.7,74.5,28.33,29.5 562 | 561,43,11,-2.00,-154.99,-4.7,-3.7,75.8,28.13,29.51 563 | 562,43,12,-2.00,-154.99,-6.3,-3.2,72.3,28.01,29.39 564 | 563,43,13,-2.01,-154.99,-6.9,-1.5,74.5,28.53,29.3 565 | 564,43,14,-2.00,-154.99,-4.1,-2.6,81.1,27.89,29.2 566 | 565,44,1,-5.00,-154.99,3.1,1.2,83.4,28.08,29.44 567 | 566,44,2,-4.99,-154.99,-2.7,0.4,86.4,27.97,29.23 568 | 567,44,3,-4.99,-154.99,-4.4,-0.9,85.1,28.6,29.49 569 | 568,44,4,-5.00,-154.99,-4.5,-2.7,88.5,27.85,29.37 570 | 569,44,5,-5.00,-154.99,-5.4,-2,82.6,28.56,29.36 571 | 570,44,6,-5.00,-154.99,-5.2,0.6,82.2,28.76,29.44 572 | 571,44,7,-5.00,-154.99,-5,-1,85.1,28.24,29.42 573 | 572,44,8,-5.00,-154.99,-5.2,-1.8,81.3,28.64,29.41 574 | 573,44,9,-5.00,-154.99,-3.2,-0.5,77.5,28.4,29.53 575 | 574,44,10,-5.00,-154.99,-3.5,-1.1,80.1,28.32,29.57 576 | 575,44,11,-5.01,-154.99,-5.7,0.9,86,27.89,29.5 577 | 576,44,12,-4.99,-154.99,-4.5,-2.7,86,27.77,29.38 578 | 577,44,13,-5.00,-154.99,-5.8,-0.9,77.1,28.84,29.39 579 | 578,44,14,-5.00,-154.99,-4.2,1.7,82.2,28.8,29.47 580 | 579,45,1,-8.28,-154.97,2,-0.1,81.8,27.89,29.26 581 | 580,45,2,-8.27,-154.97,-1.1,-1.2,82.1,28.24,29.1 582 | 581,45,3,-8.28,-154.97,-2.8,-1.3,80.3,28.99,29.36 583 | 582,45,4,-8.28,-154.97,-4.7,-0.5,81.4,29.07,29.45 584 | 583,45,5,-8.28,-154.97,-5.4,-0.8,82.1,29.03,29.44 585 | 584,45,6,-8.28,-154.97,-5.5,0,81.4,29.23,29.43 586 | 585,45,7,-8.28,-154.97,-6.2,-2.2,81.8,29.31,29.41 587 | 586,45,8,-8.28,-154.97,-6.2,-0.9,81.4,29.15,29.4 588 | 587,45,9,-8.28,-154.96,-2.9,-1.9,82.1,28.44,29.39 589 | 588,45,10,-8.28,-154.97,-4.2,-2.1,76.7,29.03,29.46 590 | 589,45,11,-8.28,-154.97,-6.2,0.8,75.6,29.11,29.47 591 | 590,45,12,-8.28,-154.96,-5.7,-1.6,77.8,29.03,29.48 592 | 591,45,13,-8.27,-154.97,-5.4,-1.2,82.8,28.48,29.5 593 | 592,45,14,-8.28,-154.97,-6.9,0.9,81.8,28.56,29.4 594 | 593,46,1,2.41,137.41,-3,0.4,84,28.86,29.85 595 | 594,46,2,2.42,137.41,-4.5,0,82,28.86,29.48 596 | 595,46,3,2.41,137.41,-3.9,1,82.8,29.18,29.54 597 | 596,46,4,2.41,137.41,-3.8,-0.6,82.4,29.18,29.65 598 | 597,46,5,2.42,137.41,-2.2,-1.4,78.8,29.25,29.98 599 | 598,46,6,2.41,137.41,-1.4,-1.9,78.8,29.14,30.03 600 | 599,46,7,2.41,137.41,-3.2,-1.3,78.1,29.41,30.15 601 | 600,46,8,2.42,137.42,-4.4,-1.6,79.6,29.53,30.03 602 | 601,46,9,2.42,137.41,-3.6,-1.3,82.8,28.74,29.99 603 | 602,46,10,2.42,137.41,-1.5,1.2,79.6,28.7,29.96 604 | 603,46,11,2.42,137.41,-1.8,0.1,77.7,, 605 | 604,46,12,2.42,137.41,-2.8,-2.2,86,27.78,30 606 | 605,46,13,2.42,137.41,-4.3,0.6,80.4,28.9,29.69 607 | 606,46,14,2.42,137.41,0,0.4,83.6,28.42,29.88 608 | 607,47,1,4.97,147,-5.1,-0.4,80.1,29.04,29.34 609 | 608,47,2,4.97,147,-4.2,-2.9,83.4,28.8,29.35 610 | 609,47,3,4.97,146.99,-6.1,-3.2,80.9,29.12,29.29 611 | 610,47,4,4.97,147,-4.2,-2.3,82.6,28.8,29.38 612 | 611,47,5,4.97,147,-4.7,-0.5,85.1,27.99,29.35 613 | 612,47,6,4.97,147,-3.1,-0.8,81.3,28.03,29.19 614 | 613,47,7,4.97,147,-4,-2.2,83,28.52,29.38 615 | 614,47,8,4.97,147,-4.8,-1.9,80.5,28.64,29.23 616 | 615,47,9,4.97,147,-3.9,-0.7,79.7,28.64,29.33 617 | 616,47,10,4.97,147.01,-4.9,-1.5,81.3,28.39,29.43 618 | 617,47,11,4.97,147,-5.1,-4.1,84.3,28.72,29.3 619 | 618,47,12,4.97,147,-5.5,-3.6,82.6,29.12,29.35 620 | 619,47,13,4.97,147.01,-5.2,-1.6,83.9,28.64,29.44 621 | 620,47,14,4.97,146.99,,,,28.8,29.4 622 | 621,48,1,-0.01,146.99,-2.3,-0.3,80.5,29.16,29.75 623 | 622,48,2,0.01,146.99,-3,-2.6,82.8,29.05,29.55 624 | 623,48,3,0.00,146.99,-6,-0.4,87.5,28.48,29.52 625 | 624,48,4,0.00,146.99,-3.2,-1.8,86.7,28.44,29.34 626 | 625,48,5,0.00,146.99,-0.6,3.3,88.2,27,29.31 627 | 626,48,6,0.00,147,0.5,-0.3,83.6,28.52,29.5 628 | 627,48,7,0.00,147,0.1,-1.9,81.7,29.05,29.85 629 | 628,48,8,0.00,146.99,-0.3,-0.5,81.7,28.9,30.03 630 | 629,48,9,0.00,147,-2.7,0,80.1,29.32,30.34 631 | 630,48,10,0.00,147,-2.5,-1.3,81.3,29.2,30.12 632 | 631,48,11,0.00,147,-3,-3.8,84,29.05,29.92 633 | 632,48,12,0.00,147,-4.6,-0.5,86.7,28.48,29.57 634 | 633,48,13,0.00,147.01,-3,1.2,88.6,27.95,29.46 635 | 634,49,1,8.08,156.01,-6.6,-3,84.5,28.53,28.98 636 | 635,49,2,8.08,156.02,-5.5,-4.1,83.7,28.41,28.98 637 | 636,49,3,8.09,156.02,-6.8,-3.4,81.8,28.41,28.92 638 | 637,49,4,8.08,156.02,-5.1,-2.6,88,27.81,28.91 639 | 638,49,5,8.08,156.02,-6.8,-3.2,87.3,27.81,28.85 640 | 639,49,6,8.08,156.02,-5.1,-4.2,83.7,28.33,28.8 641 | 640,49,7,8.08,156.02,-5.2,-3.4,82.6,28.09,28.81 642 | 641,49,8,8.07,156.01,-5.2,-2,85.3,27.89,28.87 643 | 642,49,9,8.09,156.02,-5.2,-2,85.7,28.41,29 644 | 643,49,10,8.09,156.02,-4.8,-2.8,87.3,28.41,29.07 645 | 644,49,11,8.07,156.02,-5.3,-3.8,86.9,28.41,29.03 646 | 645,49,12,8.09,156.02,-7.5,-2.5,84.9,28.57,28.95 647 | 646,49,13,8.09,156.01,-5.9,-2.5,87.3,28.29,28.94 648 | 647,49,14,8.08,156.02,-5.5,-0.7,94.3,27.29,28.89 649 | 648,50,1,4.98,156.06,-6.3,-3.5,83.7,28.89,29.04 650 | 649,50,2,4.98,156.07,-5.4,-3.4,79.6,28.97,28.9 651 | 650,50,3,4.98,156.07,-6.2,-2.7,82.5,28.78,28.98 652 | 651,50,4,4.99,156.06,-4.2,-1.3,85.7,28.07,29.05 653 | 652,50,5,4.99,156.06,-4.2,-3.3,86.1,27.99,29.15 654 | 653,50,6,4.99,156.06,-4,-4.4,81.2,28.97,29.08 655 | 654,50,7,4.99,156.06,-4.1,-3.5,78.4,28.85,29.04 656 | 655,50,8,4.98,156.06,-4.7,-0.4,78.4,28.85,29.19 657 | 656,50,9,4.99,156.06,-4.6,-2.5,78.8,29.01,29.13 658 | 657,50,10,4.99,156.07,-5.1,-1.8,81.2,28.89,29.23 659 | 658,50,11,4.99,156.07,-5.5,-4.2,80.8,29.17,29.14 660 | 659,50,12,4.99,156.07,-7.2,-1.7,81.2,29.01,29.1 661 | 660,50,13,4.99,156.07,-6.5,-1.8,82.5,28.97,29.08 662 | 661,50,14,4.99,156.08,-3.5,-0.6,85.7,28.23,29.03 663 | 662,51,1,2.08,156.21,-4.5,-3.2,83,28.57,29.33 664 | 663,51,2,2.08,156.21,-5.2,-3.5,81.8,28.76,29.04 665 | 664,51,3,2.08,156.2,-5.4,-2.2,84.6,27.93,29.05 666 | 665,51,4,2.08,156.21,-4.3,0.3,83.8,28.05,28.87 667 | 666,51,5,2.08,156.21,-3,-2,83,28.37,29.06 668 | 667,51,6,2.09,156.21,-2.3,-3.1,82.2,28.41,29.13 669 | 668,51,7,2.09,156.22,-3.4,-3.8,80.2,28.6,28.98 670 | 669,51,8,2.08,156.2,-4.1,-1.2,80.6,28.41,29.05 671 | 670,51,9,2.08,156.21,-3.6,0,85.4,27.26,29.09 672 | 671,51,10,2.09,156.21,-3.5,-2.5,82.2,28.25,28.94 673 | 672,51,11,2.08,156.2,-5.4,-5.1,83,28.64,28.96 674 | 673,51,12,2.08,156.21,-6.3,-2.9,82.6,28.64,28.97 675 | 674,51,13,2.08,156.2,-7,-1.4,82.2,28.72,28.96 676 | 675,51,14,2.07,156.2,-4.7,-0.5,80.2,28.64,28.97 677 | 676,52,1,0.06,156.16,-3,-2.8,86.7,28.26,29.23 678 | 677,52,2,0.06,156.17,-4.4,-1.9,87.8,27.94,29.34 679 | 678,52,3,0.06,156.16,-3.5,-1.2,86.7,27.98,29.48 680 | 679,52,4,0.06,156.16,-3.5,-0.8,86.3,28.22,29.5 681 | 680,52,5,0.06,156.16,-3,-2.2,83.9,28.42,29.52 682 | 681,52,6,0.06,156.17,-0.6,-3.3,83.9,28.26,29.66 683 | 682,52,7,0.06,156.16,-2.4,-2.8,89.4,27.27,29.43 684 | 683,52,8,0.06,156.16,-2.8,1,88.6,27.35,29.38 685 | 684,52,9,0.06,156.16,0.2,-0.8,85.1,27.78,29.55 686 | 685,52,10,0.06,156.17,-3.1,-3.3,83.9,28.26,29.47 687 | 686,52,11,0.06,156.16,-5.3,-5,87.1,28.45,29.16 688 | 687,52,12,0.06,156.17,-6.1,-1.3,87.1,28.26,28.97 689 | 688,52,13,0.06,156.17,-6.7,-1.4,86.3,28.26,28.83 690 | 689,52,14,0.06,156.16,-3.9,-1.6,85.9,28.18,28.89 691 | 690,53,1,-1.98,155.94,-2.3,-2.6,78.3,28.68,29.75 692 | 691,53,2,-1.99,155.94,-3,-0.2,78.3,28.4,29.95 693 | 692,53,3,-1.99,155.94,-3.6,0.1,76.7,28.96,30.1 694 | 693,53,4,-1.99,155.93,-3.6,-0.1,76.3,29.08,30.15 695 | 694,53,5,-1.99,155.94,-2.2,-2,78.8,28.84,29.98 696 | 695,53,6,-1.99,155.94,-0.4,-2.7,77.1,28.76,30.05 697 | 696,53,7,-1.99,155.94,0.9,-1.7,82.9,27.65,29.94 698 | 697,53,8,-1.99,155.94,1.8,2.1,82.9,27.65,29.7 699 | 698,53,9,-1.99,155.94,-1.1,-1.2,81.3,28.16,29.67 700 | 699,53,10,-1.99,155.94,-1.3,-1.8,88.3,30.04,29.6 701 | 700,53,11,-1.99,155.94,-4.8,-4.7,,,29.59 702 | 701,53,12,-1.99,155.94,-5,-1.4,,,29.58 703 | 702,53,13,-1.99,155.94,-6.2,1.1,,,29.54 704 | 703,53,14,-2.00,155.94,-1.6,0,,,29.3 705 | 704,54,1,7.98,165.06,-5.8,-3.4,84.5,28.35,28.72 706 | 705,54,2,7.98,165.07,-5.8,-4.3,83.7,28.44,28.71 707 | 706,54,3,7.99,165.06,-7,-2.4,87.7,28.18,28.7 708 | 707,54,4,7.98,165.07,-7.2,-3.9,87.7,28.42,28.74 709 | 708,54,5,7.99,165.06,-5.8,-4.5,81.4,28.5,28.7 710 | 709,54,6,7.99,165.06,-4.4,-4.2,79.8,28.33,28.76 711 | 710,54,7,7.98,165.07,-4.9,-2.5,84.5,28.32,28.75 712 | 711,54,8,7.99,165.07,-4.1,1.2,92.4,27.67,28.77 713 | 712,54,9,7.98,165.06,-1.5,0.1,92.4,26.97,28.77 714 | 713,54,10,7.98,165.07,-6.1,-4.1,85.3,28.46,28.77 715 | 714,54,11,7.98,165.06,-5.8,-3.8,81.4,28.45,28.77 716 | 715,54,12,7.98,165.06,-6.8,-2.4,88.9,28.17,28.77 717 | 716,54,13,7.98,165.08,-6.7,-4.2,90,28.28,28.77 718 | 717,54,14,7.98,165.06,-6.2,-4.9,88.9,28.4,28.77 719 | 718,55,1,5.03,165.02,-6.2,-3.5,84,28.5,28.68 720 | 719,55,2,5.02,165.03,-6.1,-3.9,84.4,28.44,28.65 721 | 720,55,3,5.02,165.03,-6.2,-0.2,89.3,27.65,28.63 722 | 721,55,4,5.02,165.02,-6.6,-1.8,88.5,28.12,28.77 723 | 722,55,5,5.02,165.02,-5.6,-4.4,86.5,28.13,28.62 724 | 723,55,6,5.02,165.02,-3.9,-5.3,81.2,28.34,28.66 725 | 724,55,7,5.03,165.02,-4,-2.9,84.4,28.24,28.68 726 | 725,55,8,5.03,165.03,-2,3.3,88.5,27.98,28.82 727 | 726,55,9,5.03,165.02,-3.2,-1,84.8,27.97,28.96 728 | 727,55,10,5.03,165.02,-7.2,-3.1,85.3,28.43,28.82 729 | 728,55,11,5.03,165.02,-5.9,-3.8,82.8,28.44,28.79 730 | 729,55,12,5.03,165.02,-6.8,-0.2,84.8,28.55,28.85 731 | 730,55,13,5.02,165.03,-6.6,-0.6,88.5,28.18,28.88 732 | 731,55,14,5.03,165.02,-6.4,-1.6,87.7,28.53,28.87 733 | 732,56,1,0.01,165.01,-1.7,-1.9,,, 734 | 733,56,2,0.00,165.02,-3.50,0.3,,, 735 | 734,56,3,0.00,165.01,-3.80,-0.2,,, 736 | 735,56,4,0.00,165.01,-4.30,-1.5,,, 737 | 736,56,5,0.00,165.01,-2.90,-1.9,,, 738 | 737,56,6,0.00,165.01,-1.10,-2.2,,, 739 | 738,56,7,0.00,165.02,-0.80,-0.9,,, 740 | 739,56,8,0.00,165.02,-2.90,1.5,,, 741 | 740,56,9,0.00,165.01,-3.00,-2.9,,, 742 | 741,56,10,0.00,165.02,-3.90,-5,,, 743 | 742,56,11,0.00,165.01,-4.10,-2.1,,, 744 | 743,56,12,0.00,165.01,-4.90,0,,, 745 | 744,56,13,0.00,165.01,-4.90,-1.6,,, 746 | 745,56,14,0.00,165.01,-4.00,-1.7,,, 747 | 746,57,1,-1.92,164.41,-1.60,-3.40,81.30,28.44, 748 | 747,57,2,-1.92,164.41,-2.00,-0.90,79.20,28.52, 749 | 748,57,3,-1.92,164.42,-3.30,-0.30,78.00,28.67, 750 | 749,57,4,-1.92,164.42,-3.60,-2.30,82.50,28.48, 751 | 750,57,5,-1.93,164.41,-2.30,-2.90,79.60,28.55, 752 | 751,57,6,-1.92,164.42,-0.20,-2.70,80.10,28.49, 753 | 752,57,7,-1.92,164.42,-3.30,1.10,80.90,28.26, 754 | 753,57,8,-1.92,164.42,-3.00,1.70,80.90,28.45, 755 | 754,57,9,-1.93,164.42,-4.40,-2.30,83.30,28.18, 756 | 755,58,1,-5.00,165.20,0.30,-4.40,88.10,26.82,29.35 757 | 756,58,2,-5.00,165.21,-0.40,-1.20,85.00,27.50,29.53 758 | 757,58,3,-4.99,165.21,-1.30,-0.70,75.90,28.72,29.70 759 | 758,58,4,-5.00,165.21,-2.40,-0.20,79.80,28.67,29.84 760 | 759,58,5,-4.99,165.21,-0.30,-2.00,80.70,28.28,29.68 761 | 760,58,6,-5.00,165.21,3.40,1.70,84.10,27.49,29.56 762 | 761,58,7,-4.99,165.21,-3.40,-0.20,85.50,26.61,29.46 763 | 762,58,8,-4.99,165.20,-3.90,1.10,79.80,28.13,29.42 764 | 763,58,9,-4.99,165.21,-6.30,-0.10,85.90,27.34,29.33 765 | 764,58,10,-4.99,165.21,-2.40,-3.10,82.40,27.60,29.39 766 | 765,58,11,-5.00,165.20,-3.70,0.00,84.10,27.21,29.34 767 | 766,58,12,-5.00,165.20,-4.20,1.60,81.50,28.09,29.38 768 | 767,58,13,-5.00,165.20,-5.10,1.10,83.30,28.16,29.46 769 | 768,58,14,-5.00,165.20,-3.90,-1.00,85.90,27.89,29.42 770 | 769,59,1,-8.03,164.82,,,93.30,27.64,28.69 771 | 770,59,2,-8.03,164.81,,,91.80,28.48,28.94 772 | 771,59,3,-8.03,164.82,,,91.20,28.61,29.07 773 | 772,59,4,-8.03,164.82,,,93.60,28.47,28.90 774 | 773,59,5,-8.03,164.82,,,88.80,28.47,29.22 775 | 774,59,6,-8.03,164.82,,,90.60,27.97,29.11 776 | 775,59,7,-8.03,164.82,,,89.50,27.51,28.78 777 | 776,59,8,-8.04,164.82,,,85.90,27.65,28.65 778 | 777,59,9,-8.03,164.82,,,89.50,28.37,28.66 779 | 778,59,10,-8.04,164.82,,,93.60,26.89,28.52 780 | 779,59,11,-8.04,164.82,,,92.30,27.62,28.44 781 | 780,59,12,-8.03,164.81,,,93.20,28.33,28.43 782 | 781,59,13,-8.04,164.82,,,95.50,28.44,28.51 783 | 782,59,14,-8.04,164.81,,,93.40,28.67,28.61 784 | -------------------------------------------------------------------------------- /resources/examples/data/Iris.csv: -------------------------------------------------------------------------------- 1 | "sepal_length","sepal_width","petal_length","petal_width","species","randomfactor","setosa_ind" 2 | "1",5.1,3.5,1.4,0.2,"setosa","4",1 3 | "2",4.9,3,1.4,0.2,"setosa","6",1 4 | "3",4.7,3.2,1.3,0.2,"setosa","4",1 5 | "4",4.6,3.1,1.5,0.2,"setosa","3",1 6 | "5",5,3.6,1.4,0.2,"setosa","4",1 7 | "6",5.4,3.9,1.7,0.4,"setosa","10",1 8 | "7",4.6,3.4,1.4,0.3,"setosa","7",1 9 | "8",5,3.4,1.5,0.2,"setosa","9",1 10 | "9",4.4,2.9,1.4,0.2,"setosa","4",1 11 | "10",4.9,3.1,1.5,0.1,"setosa","5",1 12 | "11",5.4,3.7,1.5,0.2,"setosa","2",1 13 | "12",4.8,3.4,1.6,0.2,"setosa","8",1 14 | "13",4.8,3,1.4,0.1,"setosa","2",1 15 | "14",4.3,3,1.1,0.1,"setosa","8",1 16 | "15",5.8,4,1.2,0.2,"setosa","9",1 17 | "16",5.7,4.4,1.5,0.4,"setosa","8",1 18 | "17",5.4,3.9,1.3,0.4,"setosa","2",1 19 | "18",5.1,3.5,1.4,0.3,"setosa","9",1 20 | "19",5.7,3.8,1.7,0.3,"setosa","9",1 21 | "20",5.1,3.8,1.5,0.3,"setosa","10",1 22 | "21",5.4,3.4,1.7,0.2,"setosa","7",1 23 | "22",5.1,3.7,1.5,0.4,"setosa","3",1 24 | "23",4.6,3.6,1,0.2,"setosa","8",1 25 | "24",5.1,3.3,1.7,0.5,"setosa","3",1 26 | "25",4.8,3.4,1.9,0.2,"setosa","2",1 27 | "26",5,3,1.6,0.2,"setosa","5",1 28 | "27",5,3.4,1.6,0.4,"setosa","5",1 29 | "28",5.2,3.5,1.5,0.2,"setosa","4",1 30 | "29",5.2,3.4,1.4,0.2,"setosa","3",1 31 | "30",4.7,3.2,1.6,0.2,"setosa","9",1 32 | "31",4.8,3.1,1.6,0.2,"setosa","1",1 33 | "32",5.4,3.4,1.5,0.4,"setosa","7",1 34 | "33",5.2,4.1,1.5,0.1,"setosa","8",1 35 | "34",5.5,4.2,1.4,0.2,"setosa","1",1 36 | "35",4.9,3.1,1.5,0.2,"setosa","8",1 37 | "36",5,3.2,1.2,0.2,"setosa","9",1 38 | "37",5.5,3.5,1.3,0.2,"setosa","5",1 39 | "38",4.9,3.6,1.4,0.1,"setosa","3",1 40 | "39",4.4,3,1.3,0.2,"setosa","5",1 41 | "40",5.1,3.4,1.5,0.2,"setosa","1",1 42 | "41",5,3.5,1.3,0.3,"setosa","4",1 43 | "42",4.5,2.3,1.3,0.3,"setosa","7",1 44 | "43",4.4,3.2,1.3,0.2,"setosa","1",1 45 | "44",5,3.5,1.6,0.6,"setosa","7",1 46 | "45",5.1,3.8,1.9,0.4,"setosa","5",1 47 | "46",4.8,3,1.4,0.3,"setosa","7",1 48 | "47",5.1,3.8,1.6,0.2,"setosa","6",1 49 | "48",4.6,3.2,1.4,0.2,"setosa","2",1 50 | "49",5.3,3.7,1.5,0.2,"setosa","1",1 51 | "50",5,3.3,1.4,0.2,"setosa","9",1 52 | "51",7,3.2,4.7,1.4,"versicolor","1",0 53 | "52",6.4,3.2,4.5,1.5,"versicolor","2",0 54 | "53",6.9,3.1,4.9,1.5,"versicolor","7",0 55 | "54",5.5,2.3,4,1.3,"versicolor","6",0 56 | "55",6.5,2.8,4.6,1.5,"versicolor","10",0 57 | "56",5.7,2.8,4.5,1.3,"versicolor","10",0 58 | "57",6.3,3.3,4.7,1.6,"versicolor","5",0 59 | "58",4.9,2.4,3.3,1,"versicolor","7",0 60 | "59",6.6,2.9,4.6,1.3,"versicolor","7",0 61 | "60",5.2,2.7,3.9,1.4,"versicolor","2",0 62 | "61",5,2,3.5,1,"versicolor","7",0 63 | "62",5.9,3,4.2,1.5,"versicolor","3",0 64 | "63",6,2.2,4,1,"versicolor","1",0 65 | "64",6.1,2.9,4.7,1.4,"versicolor","1",0 66 | "65",5.6,2.9,3.6,1.3,"versicolor","4",0 67 | "66",6.7,3.1,4.4,1.4,"versicolor","2",0 68 | "67",5.6,3,4.5,1.5,"versicolor","6",0 69 | "68",5.8,2.7,4.1,1,"versicolor","9",0 70 | "69",6.2,2.2,4.5,1.5,"versicolor","5",0 71 | "70",5.6,2.5,3.9,1.1,"versicolor","1",0 72 | "71",5.9,3.2,4.8,1.8,"versicolor","4",0 73 | "72",6.1,2.8,4,1.3,"versicolor","3",0 74 | "73",6.3,2.5,4.9,1.5,"versicolor","10",0 75 | "74",6.1,2.8,4.7,1.2,"versicolor","6",0 76 | "75",6.4,2.9,4.3,1.3,"versicolor","10",0 77 | "76",6.6,3,4.4,1.4,"versicolor","10",0 78 | "77",6.8,2.8,4.8,1.4,"versicolor","9",0 79 | "78",6.7,3,5,1.7,"versicolor","7",0 80 | "79",6,2.9,4.5,1.5,"versicolor","6",0 81 | "80",5.7,2.6,3.5,1,"versicolor","10",0 82 | "81",5.5,2.4,3.8,1.1,"versicolor","3",0 83 | "82",5.5,2.4,3.7,1,"versicolor","5",0 84 | "83",5.8,2.7,3.9,1.2,"versicolor","9",0 85 | "84",6,2.7,5.1,1.6,"versicolor","3",0 86 | "85",5.4,3,4.5,1.5,"versicolor","1",0 87 | "86",6,3.4,4.5,1.6,"versicolor","1",0 88 | "87",6.7,3.1,4.7,1.5,"versicolor","2",0 89 | "88",6.3,2.3,4.4,1.3,"versicolor","5",0 90 | "89",5.6,3,4.1,1.3,"versicolor","5",0 91 | "90",5.5,2.5,4,1.3,"versicolor","5",0 92 | "91",5.5,2.6,4.4,1.2,"versicolor","2",0 93 | "92",6.1,3,4.6,1.4,"versicolor","7",0 94 | "93",5.8,2.6,4,1.2,"versicolor","6",0 95 | "94",5,2.3,3.3,1,"versicolor","9",0 96 | "95",5.6,2.7,4.2,1.3,"versicolor","4",0 97 | "96",5.7,3,4.2,1.2,"versicolor","6",0 98 | "97",5.7,2.9,4.2,1.3,"versicolor","2",0 99 | "98",6.2,2.9,4.3,1.3,"versicolor","1",0 100 | "99",5.1,2.5,3,1.1,"versicolor","5",0 101 | "100",5.7,2.8,4.1,1.3,"versicolor","3",0 102 | "101",6.3,3.3,6,2.5,"virginica","2",0 103 | "102",5.8,2.7,5.1,1.9,"virginica","1",0 104 | "103",7.1,3,5.9,2.1,"virginica","10",0 105 | "104",6.3,2.9,5.6,1.8,"virginica","9",0 106 | "105",6.5,3,5.8,2.2,"virginica","9",0 107 | "106",7.6,3,6.6,2.1,"virginica","4",0 108 | "107",4.9,2.5,4.5,1.7,"virginica","5",0 109 | "108",7.3,2.9,6.3,1.8,"virginica","4",0 110 | "109",6.7,2.5,5.8,1.8,"virginica","4",0 111 | "110",7.2,3.6,6.1,2.5,"virginica","2",0 112 | "111",6.5,3.2,5.1,2,"virginica","10",0 113 | "112",6.4,2.7,5.3,1.9,"virginica","5",0 114 | "113",6.8,3,5.5,2.1,"virginica","9",0 115 | "114",5.7,2.5,5,2,"virginica","6",0 116 | "115",5.8,2.8,5.1,2.4,"virginica","10",0 117 | "116",6.4,3.2,5.3,2.3,"virginica","10",0 118 | "117",6.5,3,5.5,1.8,"virginica","6",0 119 | "118",7.7,3.8,6.7,2.2,"virginica","5",0 120 | "119",7.7,2.6,6.9,2.3,"virginica","2",0 121 | "120",6,2.2,5,1.5,"virginica","10",0 122 | "121",6.9,3.2,5.7,2.3,"virginica","10",0 123 | "122",5.6,2.8,4.9,2,"virginica","9",0 124 | "123",7.7,2.8,6.7,2,"virginica","6",0 125 | "124",6.3,2.7,4.9,1.8,"virginica","9",0 126 | "125",6.7,3.3,5.7,2.1,"virginica","9",0 127 | "126",7.2,3.2,6,1.8,"virginica","10",0 128 | "127",6.2,2.8,4.8,1.8,"virginica","10",0 129 | "128",6.1,3,4.9,1.8,"virginica","5",0 130 | "129",6.4,2.8,5.6,2.1,"virginica","10",0 131 | "130",7.2,3,5.8,1.6,"virginica","1",0 132 | "131",7.4,2.8,6.1,1.9,"virginica","5",0 133 | "132",7.9,3.8,6.4,2,"virginica","3",0 134 | "133",6.4,2.8,5.6,2.2,"virginica","3",0 135 | "134",6.3,2.8,5.1,1.5,"virginica","8",0 136 | "135",6.1,2.6,5.6,1.4,"virginica","5",0 137 | "136",7.7,3,6.1,2.3,"virginica","9",0 138 | "137",6.3,3.4,5.6,2.4,"virginica","5",0 139 | "138",6.4,3.1,5.5,1.8,"virginica","7",0 140 | "139",6,3,4.8,1.8,"virginica","9",0 141 | "140",6.9,3.1,5.4,2.1,"virginica","7",0 142 | "141",6.7,3.1,5.6,2.4,"virginica","7",0 143 | "142",6.9,3.1,5.1,2.3,"virginica","1",0 144 | "143",5.8,2.7,5.1,1.9,"virginica","9",0 145 | "144",6.8,3.2,5.9,2.3,"virginica","1",0 146 | "145",6.7,3.3,5.7,2.5,"virginica","3",0 147 | "146",6.7,3,5.2,2.3,"virginica","1",0 148 | "147",6.3,2.5,5,1.9,"virginica","2",0 149 | "148",6.5,3,5.2,2,"virginica","1",0 150 | "149",6.2,3.4,5.4,2.3,"virginica","8",0 151 | "150",5.9,3,5.1,1.8,"virginica","9",0 152 | -------------------------------------------------------------------------------- /resources/examples/models/elnino_linearReg.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 |
4 | 5 |
6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | 25 | 26 | 27 | 28 | 29 | 30 | 31 | 32 | 33 | 34 | 35 | 36 | 37 | 38 | 39 | 40 | 41 | 42 | 43 | 44 | 45 | 46 | 47 | 48 |
-------------------------------------------------------------------------------- /resources/examples/models/single_iris_dectree.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 |
4 | 5 |
6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | 25 | 26 | 27 | 28 | 29 | 30 | 31 | 32 | 33 | 34 | 35 | 36 | 37 | 38 | 39 | 40 | 41 | 42 | 43 | 44 | 45 | 46 | 47 | 48 | 49 | 50 | 51 | 52 | 53 | 54 | 55 | 56 | 57 | 58 | 59 | 60 | 61 | 62 | 63 | 64 |
-------------------------------------------------------------------------------- /resources/examples/models/single_iris_mlp.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 |
4 | 5 |
6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | 25 | 26 | 27 | 28 | 29 | 30 | 31 | 32 | 33 | 34 | 35 | 36 | 37 | 38 | 39 | 40 | 41 | 42 | 43 | 44 | 45 | 46 | 47 | 48 | 49 | 50 | 51 | 52 | 53 | 54 | 55 | 56 | 57 | 58 | 59 | 60 | 61 | 62 | 63 | 64 | 65 | 66 | 67 | 68 | 69 | 70 | 71 | 72 | 73 | 74 | 75 | 76 | 77 | 78 | 79 | 80 | 81 | 82 | 83 | 84 | 85 | 86 | 87 | 88 | 89 | 90 | 91 | 92 | 93 | 94 | 95 | 96 | 97 | 98 | 99 | 100 | 101 | 102 | 103 | 104 | 105 | 106 | 107 | 108 | 109 | 110 | 111 | 112 | 113 | 114 | 115 | 116 | 117 | 118 | 119 | 120 | 121 | 122 | 123 | 124 | 125 | 126 | 127 | 128 | 129 | 130 | 131 | 132 | 133 | 134 | 135 | 136 | 137 | 138 | 139 | 140 | 141 |
-------------------------------------------------------------------------------- /resources/examples/pig/rad.pig: -------------------------------------------------------------------------------- 1 | -- Register JARs 2 | REGISTER 's3n://path-to-surus/surus.jar'; 3 | REGISTER 's3n://path-to-datafu/datafu.jar'; 4 | 5 | DEFINE rpca_outliers_daily (inputBag, nWeeks, dateColumnName, metricColumnNames) 6 | returns rpca_outliers_daily 7 | /* 8 | * This macro will take a grouped bag and use RPCA for daily outlier detection. It expects 9 | * that you have already aggregated the data at a daily grain, performed a "group by" on the keys 10 | * of interest, and an ordered inner_date_bag. As an example you could imagine a bag with the 11 | * following structure: 12 | * 13 | * grunt> describe input_data_bag; 14 | * input_data_bag: { 15 | * group: ( 16 | * .... (some high cardinality group) .... 17 | * ), 18 | * inner_data_bag: { 19 | * ( 20 | * utc_dateint: chararray, 21 | * metric_1: long, 22 | * metric_2: long, 23 | * ... (other columns?) .... 24 | * ) 25 | * } 26 | * } 27 | * 28 | * The function expects that the "inner_data_bag" be complete (no missing date values) 29 | * and ordered (e.g. 20141101,20141102,...) which is required by the RPCA function. With 30 | * this data structure you can simply use the MACRO rpca_outliers_daily to calculate the outliers. 31 | * 32 | * grunt> output_data_bag = rpca_outliers_daily(input_data_bag, nWeeks, 'utc_dateint', 'metric_1,metric_2') 33 | * grunt> describe output_data_bag; 34 | * output_data_bag: { 35 | * flatten(group), 36 | * metric: chararray, -- [metric_1, metric_2] 37 | * com_netflix_dse_outlier_evalrpca: { 38 | * ( 39 | * utc_dateint: chararray, 40 | * value: long, 41 | * x_transform: double, 42 | * rsvd_l: double, 43 | * rsvd_s: double, 44 | * rsvd_e: double 45 | * ) 46 | * } 47 | * } 48 | * 49 | * Example of the script being used can be found here: 50 | * -- 51 | * 52 | */ 53 | { 54 | -- RPCA Constructor 55 | DEFINE RPCA org.surus.pig.RAD('value','7','$nWeeks'); 56 | 57 | -- Required for simultaneously process multiple metrics 58 | DEFINE TransposeTupleToBag datafu.pig.util.TransposeTupleToBag(); 59 | DEFINE BagGroupMacro datafu.pig.bags.BagGroup(); 60 | DEFINE Coalesce datafu.pig.util.Coalesce('lazy'); 61 | 62 | -- Performs the data transpose required to process multiple metrics simultaneously 63 | inputBag_exploded = foreach $inputBag { 64 | -- explode the bag to process multiple metrics 65 | inputBag_temp = foreach $1 generate $dateColumnName, flatten(TransposeTupleToBag($metricColumnNames)); 66 | 67 | -- in-memory group on the metric 68 | GENERATE $0 as input_group 69 | , BagGroupMacro(inputBag_temp, inputBag_temp.key) as backfilled_new; 70 | } 71 | 72 | -- flattens in-memory group ... The original grain of inputBag was (group), and now has the grain (group,key) 73 | inputBag_by_metric = foreach inputBag_exploded generate input_group, flatten(backfilled_new); 74 | 75 | -- coalesce values and process outliers 76 | $rpca_outliers_daily = foreach inputBag_by_metric { 77 | -- coalese empty values 78 | inputBag_clean = foreach inputBag_temp generate $dateColumnName, Coalesce($2,0L) as value; 79 | 80 | -- finally, we can process the outliers 81 | generate flatten(input_group), group as metric, RPCA(inputBag_clean); 82 | } 83 | 84 | }; 85 | -------------------------------------------------------------------------------- /resources/images/surus-notext.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Netflix/Surus/05659af43697f6dae5743fd0a1931c210757e1e7/resources/images/surus-notext.png -------------------------------------------------------------------------------- /resources/images/surus-text.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Netflix/Surus/05659af43697f6dae5743fd0a1931c210757e1e7/resources/images/surus-text.png -------------------------------------------------------------------------------- /src/main/java/org/surus/math/AugmentedDickeyFuller.java: -------------------------------------------------------------------------------- 1 | package org.surus.math; 2 | 3 | import org.apache.commons.math3.linear.MatrixUtils; 4 | import org.apache.commons.math3.linear.RealMatrix; 5 | import org.apache.commons.math3.linear.RealVector; 6 | 7 | public class AugmentedDickeyFuller { 8 | 9 | private double[] ts; 10 | private int lag; 11 | private boolean needsDiff = true; 12 | private double[] zeroPaddedDiff; 13 | 14 | private double PVALUE_THRESHOLD = -3.45; 15 | 16 | /** 17 | * Uses the Augmented Dickey Fuller test to determine 18 | * if ts is a stationary time series 19 | * @param ts 20 | * @param lag 21 | */ 22 | public AugmentedDickeyFuller(double[] ts, int lag) { 23 | this.ts = ts; 24 | this.lag = lag; 25 | computeADFStatistics(); 26 | } 27 | 28 | /** 29 | * Uses the Augmented Dickey Fuller test to determine 30 | * if ts is a stationary time series 31 | * @param ts 32 | */ 33 | public AugmentedDickeyFuller(double[] ts) { 34 | this.ts = ts; 35 | this.lag = (int) Math.floor(Math.cbrt((ts.length - 1))); 36 | computeADFStatistics(); 37 | } 38 | 39 | private void computeADFStatistics() { 40 | double[] y = diff(ts); 41 | RealMatrix designMatrix = null; 42 | int k = lag+1; 43 | int n = ts.length - 1; 44 | 45 | RealMatrix z = MatrixUtils.createRealMatrix(laggedMatrix(y, k)); //has rows length(ts) - 1 - k + 1 46 | RealVector zcol1 = z.getColumnVector(0); //has length length(ts) - 1 - k + 1 47 | double[] xt1 = subsetArray(ts, k-1, n-1); //ts[k:(length(ts) - 1)], has length length(ts) - 1 - k + 1 48 | double[] trend = sequence(k,n); //trend k:n, has length length(ts) - 1 - k + 1 49 | if (k > 1) { 50 | RealMatrix yt1 = z.getSubMatrix(0, ts.length - 1 - k, 1, k-1); //same as z but skips first column 51 | //build design matrix as cbind(xt1, 1, trend, yt1) 52 | designMatrix = MatrixUtils.createRealMatrix(ts.length - 1 - k + 1, 3 + k - 1); 53 | designMatrix.setColumn(0, xt1); 54 | designMatrix.setColumn(1, ones(ts.length - 1 - k + 1)); 55 | designMatrix.setColumn(2, trend); 56 | designMatrix.setSubMatrix(yt1.getData(), 0, 3); 57 | 58 | } else { 59 | //build design matrix as cbind(xt1, 1, tt) 60 | designMatrix = MatrixUtils.createRealMatrix(ts.length - 1 - k + 1, 3); 61 | designMatrix.setColumn(0, xt1); 62 | designMatrix.setColumn(1, ones(ts.length - 1 - k + 1)); 63 | designMatrix.setColumn(2, trend); 64 | } 65 | /*OLSMultipleLinearRegression regression = new OLSMultipleLinearRegression(); 66 | regression.setNoIntercept(true); 67 | regression.newSampleData(zcol1.toArray(), designMatrix.getData()); 68 | double[] beta = regression.estimateRegressionParameters(); 69 | double[] sd = regression.estimateRegressionParametersStandardErrors(); 70 | */ 71 | RidgeRegression regression = new RidgeRegression(designMatrix.getData(), zcol1.toArray()); 72 | regression.updateCoefficients(.0001); 73 | double[] beta = regression.getCoefficients(); 74 | double[] sd = regression.getStandarderrors(); 75 | 76 | double t = beta[0] / sd[0]; 77 | if (t <= PVALUE_THRESHOLD) { 78 | this.needsDiff = true; 79 | } else { 80 | this.needsDiff = false; 81 | } 82 | } 83 | 84 | /** 85 | * Takes finite differences of x 86 | * @param x 87 | * @return Returns an array of length x.length-1 of 88 | * the first differences of x 89 | */ 90 | private double[] diff(double[] x) { 91 | double[] diff = new double[x.length - 1]; 92 | double[] zeroPaddedDiff = new double[x.length]; 93 | zeroPaddedDiff[0] = 0; 94 | for (int i = 0; i < diff.length; i++) { 95 | double diff_i = x[i+1] - x[i]; 96 | diff[i] = diff_i; 97 | zeroPaddedDiff[i+1] = diff_i; 98 | } 99 | this.zeroPaddedDiff = zeroPaddedDiff; 100 | return diff; 101 | } 102 | 103 | /** 104 | * Equivalent to matlab and python ones 105 | * @param n 106 | * @return an array of doubles of length n that are 107 | * initialized to 1 108 | */ 109 | private double[] ones(int n) { 110 | double[] ones = new double[n]; 111 | for (int i = 0; i < n; i++) { 112 | ones[i] = 1; 113 | } 114 | return ones; 115 | } 116 | 117 | /** 118 | * Equivalent to R's embed function 119 | * @param x time series vector 120 | * @param lag number of lags, where lag=1 is the same as no lags 121 | * @return a matrix that has x.length - lag + 1 rows by lag columns. 122 | */ 123 | private double[][] laggedMatrix(double[]x, int lag) { 124 | double[][] laggedMatrix = new double[x.length - lag + 1][lag]; 125 | for (int j = 0; j < lag; j++) { //loop through columns 126 | for (int i = 0; i < laggedMatrix.length; i++) { 127 | laggedMatrix[i][j] = x[lag - j - 1 + i]; 128 | } 129 | } 130 | return laggedMatrix; 131 | } 132 | 133 | /** 134 | * Takes x[start] through x[end - 1] 135 | * @param x 136 | * @param start 137 | * @param end 138 | * @return 139 | */ 140 | private double[] subsetArray(double[] x, int start, int end) { 141 | double[] subset = new double[end - start + 1]; 142 | System.arraycopy(x, start, subset, 0, end - start + 1); 143 | return subset; 144 | } 145 | 146 | /** 147 | * Generates a sequence of ints [start, end] 148 | * @param start 149 | * @param end 150 | * @return 151 | */ 152 | private double[] sequence(int start, int end) { 153 | double[] sequence = new double[end - start + 1]; 154 | for (int i = start; i <= end; i++) { 155 | sequence[i - start] = i; 156 | } 157 | return sequence; 158 | } 159 | 160 | public boolean isNeedsDiff() { 161 | return needsDiff; 162 | } 163 | 164 | public double[] getZeroPaddedDiff() { 165 | return zeroPaddedDiff; 166 | } 167 | } 168 | -------------------------------------------------------------------------------- /src/main/java/org/surus/math/RPCA.java: -------------------------------------------------------------------------------- 1 | package org.surus.math; 2 | 3 | import org.apache.commons.math3.linear.MatrixUtils; 4 | import org.apache.commons.math3.linear.RealMatrix; 5 | import org.apache.commons.math3.linear.SingularValueDecomposition; 6 | import org.apache.commons.math3.stat.descriptive.DescriptiveStatistics; 7 | 8 | public class RPCA { 9 | 10 | private RealMatrix X; 11 | private RealMatrix L; 12 | private RealMatrix S; 13 | private RealMatrix E; 14 | 15 | private double lpenalty; 16 | private double spenalty; 17 | 18 | private static final int MAX_ITERS = 228; 19 | 20 | public RPCA(double[][] data, double lpenalty, double spenalty) { 21 | this.X = MatrixUtils.createRealMatrix(data); 22 | this.lpenalty = lpenalty; 23 | this.spenalty = spenalty; 24 | initMatrices(); 25 | computeRSVD(); 26 | } 27 | 28 | public RPCA(RealMatrix X, double lpenalty, double spenalty) { 29 | this.X = X; 30 | this.lpenalty = lpenalty; 31 | this.spenalty = spenalty; 32 | initMatrices(); 33 | computeRSVD(); 34 | } 35 | 36 | private void initMatrices() { 37 | this.L = MatrixUtils.createRealMatrix(this.X.getRowDimension(), this.X.getColumnDimension()); 38 | this.S = MatrixUtils.createRealMatrix(this.X.getRowDimension(), this.X.getColumnDimension()); 39 | this.E = MatrixUtils.createRealMatrix(this.X.getRowDimension(), this.X.getColumnDimension()); 40 | } 41 | 42 | private void computeRSVD() { 43 | double mu = X.getColumnDimension() * X.getRowDimension() / (4 * l1norm(X.getData())); 44 | double objPrev = 0.5*Math.pow(X.getFrobeniusNorm(), 2); 45 | double obj = objPrev; 46 | double tol = 1e-8 * objPrev; 47 | double diff = 2 * tol; 48 | int iter = 0; 49 | 50 | while(diff > tol && iter < MAX_ITERS) { 51 | double nuclearNorm = computeS(mu); 52 | double l1Norm = computeL(mu); 53 | double l2Norm = computeE(); 54 | 55 | obj = computeObjective(nuclearNorm, l1Norm, l2Norm); 56 | diff = Math.abs(objPrev - obj); 57 | objPrev = obj; 58 | 59 | mu = computeDynamicMu(); 60 | 61 | iter = iter + 1; 62 | } 63 | } 64 | 65 | private double[] softThreshold(double[] x, double penalty) { 66 | for(int i = 0; i < x.length; i++) { 67 | x[i] = Math.signum(x[i]) * Math.max(Math.abs(x[i]) - penalty, 0); 68 | } 69 | return x; 70 | } 71 | 72 | private double[][] softThreshold(double[][] x, double penalty) { 73 | for(int i = 0; i < x.length; i++) { 74 | for(int j = 0; j < x[i].length; j++) { 75 | x[i][j] = Math.signum(x[i][j]) * Math.max(Math.abs(x[i][j]) - penalty, 0); 76 | } 77 | } 78 | return x; 79 | } 80 | 81 | private double sum(double[] x) { 82 | double sum = 0; 83 | for (int i = 0; i < x.length; i++) 84 | sum += x[i]; 85 | return (sum); 86 | } 87 | 88 | private double l1norm(double[][] x) { 89 | double l1norm = 0; 90 | for (int i = 0; i < x.length; i++) { 91 | for (int j = 0; j < x[i].length; j++) { 92 | l1norm += Math.abs(x[i][j]); 93 | } 94 | } 95 | return l1norm; 96 | } 97 | 98 | private double computeL(double mu) { 99 | double LPenalty = lpenalty * mu; 100 | SingularValueDecomposition svd = new SingularValueDecomposition(X.subtract(S)); 101 | double[] penalizedD = softThreshold(svd.getSingularValues(), LPenalty); 102 | RealMatrix D_matrix = MatrixUtils.createRealDiagonalMatrix(penalizedD); 103 | L = svd.getU().multiply(D_matrix).multiply(svd.getVT()); 104 | return sum(penalizedD) * LPenalty; 105 | } 106 | 107 | private double computeS(double mu) { 108 | double SPenalty = spenalty * mu; 109 | double[][] penalizedS = softThreshold(X.subtract(L).getData(), SPenalty); 110 | S = MatrixUtils.createRealMatrix(penalizedS); 111 | return l1norm(penalizedS) * SPenalty; 112 | } 113 | 114 | private double computeE() { 115 | E = X.subtract(L).subtract(S); 116 | double norm = E.getFrobeniusNorm(); 117 | return Math.pow(norm, 2); 118 | } 119 | 120 | private double computeObjective(double nuclearnorm, double l1norm, double l2norm) { 121 | return 0.5*l2norm + nuclearnorm + l1norm; 122 | } 123 | 124 | private double computeDynamicMu() { 125 | int m = E.getRowDimension(); 126 | int n = E.getColumnDimension(); 127 | 128 | double E_sd = standardDeviation(E.getData()); 129 | double mu = E_sd * Math.sqrt(2*Math.max(m,n)); 130 | 131 | return Math.max(.01, mu); 132 | } 133 | 134 | /*private double MedianAbsoluteDeviation(double[][] x) { 135 | DescriptiveStatistics stats = new DescriptiveStatistics(); 136 | for (int i = 0; i < x.length; i ++) 137 | for (int j = 0; j < x[i].length; j++) 138 | stats.addValue(x[i][j]); 139 | double median = stats.getPercentile(50); 140 | 141 | DescriptiveStatistics absoluteDeviationStats = new DescriptiveStatistics(); 142 | for (int i = 0; i < x.length; i ++) 143 | for (int j = 0; j < x[i].length; j++) 144 | absoluteDeviationStats.addValue(Math.abs(x[i][j] - median)); 145 | 146 | return absoluteDeviationStats.getPercentile(50) * 1.4826; 147 | }*/ 148 | 149 | private double standardDeviation(double[][] x) { 150 | DescriptiveStatistics stats = new DescriptiveStatistics(); 151 | for (int i = 0; i < x.length; i ++) 152 | for (int j = 0; j < x[i].length; j++) 153 | stats.addValue(x[i][j]); 154 | return stats.getStandardDeviation(); 155 | } 156 | 157 | public RealMatrix getL() { 158 | return L; 159 | } 160 | 161 | public RealMatrix getS() { 162 | return S; 163 | } 164 | 165 | public RealMatrix getE() { 166 | return E; 167 | } 168 | 169 | 170 | 171 | } -------------------------------------------------------------------------------- /src/main/java/org/surus/math/RidgeRegression.java: -------------------------------------------------------------------------------- 1 | package org.surus.math; 2 | 3 | import org.apache.commons.math3.linear.MatrixUtils; 4 | import org.apache.commons.math3.linear.RealMatrix; 5 | import org.apache.commons.math3.linear.RealVector; 6 | import org.apache.commons.math3.linear.SingularValueDecomposition; 7 | 8 | public class RidgeRegression { 9 | 10 | private RealMatrix X; 11 | private SingularValueDecomposition X_svd = null; 12 | private double[] Y; 13 | private double l2penalty; 14 | private double[] coefficients; 15 | private double[] standarderrors; 16 | 17 | private double[] fitted; 18 | private double[] residuals; 19 | 20 | public RidgeRegression(double[][] x, double[] y) { 21 | this.X = MatrixUtils.createRealMatrix(x); 22 | this.X_svd = null; 23 | this.Y = y; 24 | this.l2penalty = 0; 25 | this.coefficients = null; 26 | 27 | this.fitted = new double[y.length]; 28 | this.residuals = new double[y.length]; 29 | } 30 | 31 | public void updateCoefficients(double l2penalty) { 32 | if (this.X_svd == null) { 33 | this.X_svd = new SingularValueDecomposition(X); 34 | } 35 | RealMatrix V = this.X_svd.getV(); 36 | double[] s = this.X_svd.getSingularValues(); 37 | RealMatrix U = this.X_svd.getU(); 38 | 39 | for (int i = 0; i < s.length; i++) { 40 | s[i] = s[i] / (s[i]*s[i] + l2penalty); 41 | } 42 | RealMatrix S = MatrixUtils.createRealDiagonalMatrix(s); 43 | 44 | RealMatrix Z = V.multiply(S).multiply(U.transpose()); 45 | 46 | this.coefficients = Z.operate(this.Y); 47 | 48 | this.fitted = this.X.operate(this.coefficients); 49 | double errorVariance = 0; 50 | for (int i = 0; i < residuals.length; i++) { 51 | this.residuals[i] = this.Y[i] - this.fitted[i]; 52 | errorVariance += this.residuals[i] * this.residuals[i]; 53 | } 54 | errorVariance = errorVariance / (X.getRowDimension() - X.getColumnDimension()); 55 | 56 | RealMatrix errorVarianceMatrix = MatrixUtils.createRealIdentityMatrix(this.Y.length).scalarMultiply(errorVariance); 57 | RealMatrix coefficientsCovarianceMatrix = Z.multiply(errorVarianceMatrix).multiply(Z.transpose()); 58 | this.standarderrors = getDiagonal(coefficientsCovarianceMatrix); 59 | } 60 | 61 | private double[] getDiagonal(RealMatrix X) { 62 | double[] diag = new double[X.getColumnDimension()]; 63 | for (int i = 0; i < diag.length; i++) { 64 | diag[i] = X.getEntry(i, i); 65 | } 66 | return diag; 67 | } 68 | 69 | public double getL2penalty() { 70 | return l2penalty; 71 | } 72 | 73 | public void setL2penalty(double l2penalty) { 74 | this.l2penalty = l2penalty; 75 | } 76 | 77 | public double[] getCoefficients() { 78 | return coefficients; 79 | } 80 | 81 | public double[] getStandarderrors() { 82 | return standarderrors; 83 | } 84 | } 85 | -------------------------------------------------------------------------------- /src/main/java/org/surus/pig/RAD.java: -------------------------------------------------------------------------------- 1 | package org.surus.pig; 2 | 3 | import java.io.IOException; 4 | import java.util.ArrayList; 5 | import java.util.List; 6 | import java.util.Iterator; 7 | 8 | import org.apache.pig.EvalFunc; 9 | import org.apache.pig.data.BagFactory; 10 | import org.apache.pig.data.DataBag; 11 | import org.apache.pig.data.DataType; 12 | import org.apache.pig.data.Tuple; 13 | import org.apache.pig.data.TupleFactory; 14 | import org.apache.pig.impl.logicalLayer.schema.Schema; 15 | import org.apache.pig.impl.logicalLayer.schema.Schema.FieldSchema; 16 | import org.surus.math.AugmentedDickeyFuller; 17 | import org.surus.math.RPCA; 18 | 19 | public class RAD extends EvalFunc { 20 | 21 | private final double LPENALTY_DEFAULT_NO_DIFF = 1; 22 | private final double SPENALTY_DEFAULT_NO_DIFF = 1.4; 23 | private final double LPENALTY_DEFAULT_DIFF = 1; 24 | private final double SPENALTY_DEFAULT_DIFF = 1.4; 25 | 26 | private final String colName; 27 | private final Integer nRows; 28 | private final Integer nCols; 29 | private Double lpenalty; 30 | private Double spenalty; 31 | private Boolean isForceDiff; 32 | 33 | private Schema dataBagSchema; 34 | private final Integer minRecords; 35 | 36 | private final Double eps = 1e-12; 37 | 38 | // Constructor 39 | public RAD(String... parameters) { 40 | 41 | this.colName = parameters[0]; 42 | this.nCols = Integer.parseInt(parameters[1]); 43 | this.nRows = Integer.parseInt(parameters[2]); 44 | 45 | if (parameters.length == 4) { 46 | this.isForceDiff = Boolean.parseBoolean(parameters[3]); 47 | } else if (parameters.length != 3) { 48 | throw new RuntimeException("Invalid parameters list"); 49 | } 50 | 51 | // set other parameters 52 | this.minRecords = 2 * this.nRows; 53 | 54 | } 55 | 56 | // Define Output Schema 57 | @Override 58 | public Schema outputSchema(Schema input) { 59 | 60 | try { 61 | if (input.size() != 1) { 62 | throw new RuntimeException("Expected input to have only a single field"); 63 | } 64 | 65 | // Grab Bag Schema 66 | Schema.FieldSchema inputFieldSchema = input.getField(0); 67 | if (inputFieldSchema.type != DataType.BAG) { 68 | throw new RuntimeException("Expected a BAG as input"); 69 | } 70 | 71 | // Check Bag Schema 72 | Schema inputBagSchema = inputFieldSchema.schema; 73 | if (inputBagSchema.getField(0).type != DataType.TUPLE) { 74 | throw new RuntimeException(String.format("Expected input bag to contain a TUPLE, but instead found %s", 75 | DataType.findTypeName(inputBagSchema.getField(0).type))); 76 | } 77 | 78 | // Define Input Tuple Schema 79 | this.dataBagSchema = inputBagSchema.getField(0).schema; 80 | 81 | this.dataBagSchema.prettyPrint(); 82 | 83 | // Create List of Tuple Values 84 | List fieldSchemas = new ArrayList(); 85 | fieldSchemas.addAll(dataBagSchema.getFields()); 86 | fieldSchemas.add(new Schema.FieldSchema("x_transform", DataType.DOUBLE)); 87 | fieldSchemas.add(new Schema.FieldSchema("rsvd_l", DataType.DOUBLE)); 88 | fieldSchemas.add(new Schema.FieldSchema("rsvd_s", DataType.DOUBLE)); 89 | fieldSchemas.add(new Schema.FieldSchema("rsvd_e", DataType.DOUBLE)); 90 | 91 | // Build Tuple and Wrap in DataBag 92 | FieldSchema tupleFieldSchema = new FieldSchema(null, new Schema(fieldSchemas), DataType.TUPLE); 93 | FieldSchema bagFieldSchema = new FieldSchema(this.getClass().getName().toLowerCase().replace(".", "_"), new Schema(tupleFieldSchema), DataType.BAG); 94 | 95 | // Return Schema 96 | Schema outputSchema = new Schema(bagFieldSchema); 97 | return outputSchema; 98 | 99 | } catch (Throwable t) { 100 | throw new RuntimeException(t); 101 | } 102 | 103 | } 104 | 105 | // Helper Function 106 | public double[][] VectorToMatrix(double[] x, int rows, int cols) { 107 | double[][] input2DArray = new double[rows][cols]; 108 | for (int n= 0; n< x.length; n++) { 109 | int i = n % rows; 110 | int j = (int) Math.floor(n / rows); 111 | input2DArray[i][j] = x[n]; 112 | } 113 | return input2DArray; 114 | } 115 | 116 | // Define Exec 117 | @Override 118 | public DataBag exec(Tuple input) throws IOException { 119 | 120 | // Hack to get the InputSchema on the backend 121 | if (this.dataBagSchema == null) { 122 | this.dataBagSchema = getInputSchema().getField(0).schema.getField(0).schema; 123 | } 124 | 125 | // Check DataTypes 126 | if (!( 127 | (this.dataBagSchema.getField(this.colName).type == DataType.LONG ) || 128 | (this.dataBagSchema.getField(this.colName).type == DataType.INTEGER) || 129 | (this.dataBagSchema.getField(this.colName).type == DataType.DOUBLE ) || 130 | (this.dataBagSchema.getField(this.colName).type == DataType.FLOAT ) 131 | )) { 132 | throw new RuntimeException(String.format("Data type of %s (%s) is not supported,",this.colName, 133 | DataType.findTypeName(this.dataBagSchema.getField(this.colName).type))); 134 | } 135 | 136 | // Hardcode getting the bag 137 | DataBag inputBag = (DataBag) input.get(0); 138 | 139 | // Create TupleFactory for Output Bag Generation 140 | TupleFactory tupleFactory = TupleFactory.getInstance(); 141 | BagFactory bagFactory = BagFactory.getInstance(); 142 | 143 | // Read Data into Memory 144 | List tupleList = new ArrayList(); 145 | Iterator bagIter = inputBag.iterator(); 146 | while (bagIter.hasNext()) { 147 | Tuple tuple = bagIter.next(); 148 | tupleList.add(tuple); 149 | } 150 | 151 | if (tupleList.size() != this.nRows*this.nCols) { 152 | throw new RuntimeException("ERROR: this.nRows * this.nCols != tupleList.size()"); 153 | } 154 | 155 | // Perform Dickey-Fuller Test 156 | double[] inputArray = new double[this.nRows*this.nCols]; 157 | Integer numNonZeroRecords = 0; 158 | for (int n=0; n< inputArray.length; n++) { 159 | if (this.dataBagSchema.getField(this.colName).type == DataType.DOUBLE) { 160 | inputArray[n] = (Double) tupleList.get(n).get(this.dataBagSchema.getPosition(this.colName)); 161 | } else if (this.dataBagSchema.getField(this.colName).type == DataType.FLOAT) { 162 | inputArray[n] = (Float) tupleList.get(n).get(this.dataBagSchema.getPosition(this.colName)); 163 | } else if (this.dataBagSchema.getField(this.colName).type == DataType.LONG ) { 164 | inputArray[n] = (Long) tupleList.get(n).get(this.dataBagSchema.getPosition(this.colName)); 165 | } else if (this.dataBagSchema.getField(this.colName).type == DataType.INTEGER ) { 166 | inputArray[n] = (Integer) tupleList.get(n).get(this.dataBagSchema.getPosition(this.colName)); 167 | } else { 168 | throw new RuntimeException(String.format("Data type of %s (%s) is not supported,",this.colName, 169 | DataType.findTypeName(this.dataBagSchema.getField(this.colName).type))); 170 | } 171 | 172 | if (Math.abs(inputArray[n]) > eps) numNonZeroRecords++; 173 | } 174 | 175 | if (numNonZeroRecords>=this.minRecords) { 176 | AugmentedDickeyFuller dickeyFullerTest = new AugmentedDickeyFuller(inputArray); 177 | double[] inputArrayTransformed = inputArray; 178 | if (this.isForceDiff == null && dickeyFullerTest.isNeedsDiff()) { 179 | // Auto Diff 180 | inputArrayTransformed = dickeyFullerTest.getZeroPaddedDiff(); 181 | } else if (this.isForceDiff) { 182 | // Force Diff 183 | inputArrayTransformed = dickeyFullerTest.getZeroPaddedDiff(); 184 | } 185 | 186 | if (this.spenalty == null) { 187 | this.lpenalty = this.LPENALTY_DEFAULT_NO_DIFF; 188 | this.spenalty = this.SPENALTY_DEFAULT_NO_DIFF / Math.sqrt(Math.max(this.nCols, this.nRows)); 189 | } 190 | 191 | 192 | // Calc Mean 193 | double mean = 0; 194 | for (int n=0; n < inputArrayTransformed.length; n++) { 195 | mean += inputArrayTransformed[n]; 196 | } 197 | mean /= inputArrayTransformed.length; 198 | 199 | // Calc STDEV 200 | double stdev = 0; 201 | for (int n=0; n < inputArrayTransformed.length; n++) { 202 | stdev += Math.pow(inputArrayTransformed[n] - mean,2) ; 203 | } 204 | stdev = Math.sqrt(stdev / (inputArrayTransformed.length - 1)); 205 | 206 | // Transformation: Zero Mean, Unit Variance 207 | for (int n=0; n < inputArrayTransformed.length; n++) { 208 | inputArrayTransformed[n] = (inputArrayTransformed[n]-mean)/stdev; 209 | } 210 | 211 | // Read Input Data into Array 212 | // Read Input Data into Array 213 | double[][] input2DArray = new double[this.nRows][this.nCols]; 214 | input2DArray = VectorToMatrix(inputArrayTransformed, this.nRows, this.nCols); 215 | 216 | RPCA rSVD = new RPCA(input2DArray, this.lpenalty, this.spenalty); 217 | 218 | double[][] outputE = rSVD.getE().getData(); 219 | double[][] outputS = rSVD.getS().getData(); 220 | double[][] outputL = rSVD.getL().getData(); 221 | 222 | // Loop through bag and build output 223 | DataBag outputBag = bagFactory.newDefaultBag(); 224 | for (int n=0; n< inputArray.length; n++) { 225 | 226 | int i = n % this.nRows; 227 | int j = (int) Math.floor(n / this.nRows); 228 | 229 | // Add all previous tuple values 230 | Tuple oldTuple = tupleList.get(n); 231 | Tuple newTuple = tupleFactory.newTuple(oldTuple.size() + 4); 232 | int tupleIndex = 0; 233 | for (int k = 0; k < oldTuple.size(); k++) { 234 | newTuple.set(tupleIndex++, oldTuple.get(k)); 235 | } 236 | 237 | // TODO: Add additional L,S,E matrices 238 | newTuple.set(tupleIndex++, inputArrayTransformed[n]); 239 | newTuple.set(tupleIndex++, outputL[i][j] * stdev + mean); 240 | newTuple.set(tupleIndex++, outputS[i][j] * stdev); 241 | newTuple.set(tupleIndex++, outputE[i][j] * stdev); 242 | 243 | // Add Tuple to DataBag 244 | outputBag.add(newTuple); 245 | 246 | } 247 | // Return Tuple 248 | return outputBag; 249 | 250 | } else { 251 | 252 | // Loop through bag and build output 253 | DataBag outputBag = bagFactory.newDefaultBag(); 254 | for (int n=0; n< inputArray.length; n++) { 255 | 256 | int i = n % this.nRows; 257 | int j = (int) Math.floor(n / this.nRows); 258 | 259 | // Add all previous tuple values 260 | Tuple oldTuple = tupleList.get(n); 261 | Tuple newTuple = tupleFactory.newTuple(oldTuple.size() + 4); 262 | int tupleIndex = 0; 263 | for (int k = 0; k < oldTuple.size(); k++) { 264 | newTuple.set(tupleIndex++, oldTuple.get(k)); 265 | } 266 | 267 | // Add Tuple to DataBag 268 | outputBag.add(newTuple); 269 | 270 | } 271 | // Return Tuple 272 | return outputBag; 273 | 274 | } 275 | 276 | } 277 | 278 | } 279 | -------------------------------------------------------------------------------- /src/main/java/org/surus/pig/ScorePMML.java: -------------------------------------------------------------------------------- 1 | package org.surus.pig; 2 | 3 | import java.io.File; 4 | import java.io.IOException; 5 | import java.util.ArrayList; 6 | import java.util.HashMap; 7 | import java.util.HashSet; 8 | import java.util.LinkedHashMap; 9 | import java.util.List; 10 | import java.util.Map; 11 | 12 | import javax.xml.bind.JAXBException; 13 | 14 | import org.apache.hadoop.conf.Configuration; 15 | import org.apache.hadoop.fs.FSDataInputStream; 16 | import org.apache.hadoop.fs.FileSystem; 17 | import org.apache.hadoop.fs.Path; 18 | import org.apache.pig.EvalFunc; 19 | import org.apache.pig.data.DataType; 20 | import org.apache.pig.data.Tuple; 21 | import org.apache.pig.data.TupleFactory; 22 | import org.apache.pig.impl.logicalLayer.schema.Schema; 23 | import org.apache.pig.impl.logicalLayer.schema.Schema.FieldSchema; 24 | import org.apache.pig.impl.util.UDFContext; 25 | import org.dmg.pmml.DataField; 26 | import org.dmg.pmml.FieldName; 27 | import org.dmg.pmml.IOUtil; 28 | import org.dmg.pmml.OutputField; 29 | import org.dmg.pmml.PMML; 30 | import org.jpmml.evaluator.Evaluator; 31 | import org.jpmml.evaluator.EvaluatorUtil; 32 | import org.jpmml.evaluator.FieldValue; 33 | import org.jpmml.evaluator.ModelEvaluatorFactory; 34 | import org.jpmml.manager.ModelManager; 35 | import org.jpmml.manager.PMMLManager; 36 | import org.xml.sax.SAXException; 37 | 38 | public class ScorePMML extends EvalFunc { 39 | 40 | private Evaluator evaluator = null; 41 | private List activeFields = null; 42 | private List predictedFields = null; 43 | private List outputFields = null; 44 | private String modelPath = null; 45 | private String modelName = null; 46 | private Schema inputTupleSchema = null; 47 | private Map aliasMap = null; 48 | private Boolean failOnTypeMatching = true; 49 | Map preparedRow = new LinkedHashMap(); 50 | 51 | 52 | private static final TupleFactory tf = TupleFactory.getInstance(); 53 | 54 | private static final Map dataTypeMap = new HashMap(); 55 | static { 56 | dataTypeMap.put("STRING" , DataType.CHARARRAY); 57 | dataTypeMap.put("INTEGER", DataType.INTEGER); 58 | dataTypeMap.put("FLOAT" , DataType.DOUBLE); 59 | dataTypeMap.put("LONG" , DataType.DOUBLE); 60 | dataTypeMap.put("DOUBLE" , DataType.DOUBLE); 61 | dataTypeMap.put("BOOLEAN", DataType.DOUBLE); 62 | } 63 | 64 | // Constructor 65 | public ScorePMML(String... params) throws IOException, SAXException, JAXBException { 66 | 67 | // Call Nested Constructor 68 | this(params[0]); 69 | 70 | // Override default failure mode 71 | if (params.length == 2) { 72 | this.failOnTypeMatching = Boolean.parseBoolean(params[1]); 73 | } 74 | 75 | } 76 | 77 | // Constructor 78 | public ScorePMML(String modelPath) throws IOException, SAXException, JAXBException { 79 | 80 | // Set Default failure mode 81 | this.failOnTypeMatching = true; 82 | 83 | // Set Model Path 84 | this.modelPath = modelPath; 85 | System.err.println("modelPath: "+this.modelPath); 86 | 87 | // Set Distributed Cache 88 | int blah = this.modelPath.lastIndexOf("/") + 1; 89 | this.modelName = this.modelPath.substring(blah); 90 | System.err.println("modelName: "+this.modelName); 91 | 92 | } 93 | 94 | public List getCacheFiles() { 95 | String filePath = this.modelPath+"#"+this.modelName; 96 | List list = new ArrayList(1); 97 | list.add(filePath); 98 | System.err.println(filePath+": added to the distributed cache."); 99 | return list; 100 | } 101 | 102 | private void initialize(Schema inputSchema) throws IOException, SAXException, JAXBException { 103 | 104 | this.inputTupleSchema = inputSchema; 105 | 106 | // and, initialize aliasMap: 107 | if (this.aliasMap == null) { 108 | this.aliasMap = new HashMap(); 109 | for (String alias : this.inputTupleSchema.getAliases()) { 110 | this.aliasMap.put(alias,this.inputTupleSchema.getPosition(alias)); // something to cleanup 111 | } 112 | } 113 | 114 | // Get PMML Object 115 | PMML pmml = null; 116 | try { 117 | 118 | /* 119 | * TODO: Make this more robust. Specifically, Angela Ho wanted to refernce a file in the distributed 120 | * cache directly. Obviously, my code doesn't support this, because it would try to open 121 | * the file with the IOUtil Java object, as opposed to the hadoop.fs.Path object. 122 | * 123 | * TODO: This try/catch block is a hack for: 124 | * (1) checking if execution is being done on "back-end." A check for back-end can be done with 125 | * UDFContext.getUDFContext().isFrontend() BUT this does not resolve problems with local-mode. 126 | * (2) enables testing in local-mode without failing unit tests. 127 | */ 128 | 129 | // Try reading file from distributed cache. 130 | pmml = IOUtil.unmarshal(new File("./"+this.modelName)); 131 | System.err.println("Read model from distributed cache!"); 132 | 133 | } catch (Throwable t) { 134 | // If not on the back-end... (and distributed cache not available) ... 135 | 136 | if (this.modelPath.toLowerCase().startsWith("s3n://") || this.modelPath.toLowerCase().startsWith("s3://")) { 137 | // ... read from S3. 138 | Path path = new Path(this.modelPath); 139 | FileSystem fs = path.getFileSystem(new Configuration()); 140 | FSDataInputStream in = fs.open(path); 141 | pmml = IOUtil.unmarshal(in); 142 | System.err.println("Read model from s3!"); 143 | 144 | } else { 145 | // ... read from local file. 146 | pmml = IOUtil.unmarshal(new File(this.modelPath)); 147 | System.err.println("Read model from local disk!"); 148 | } 149 | 150 | } 151 | 152 | // Initialize the pmmlManager 153 | PMMLManager pmmlManager = new PMMLManager(pmml); 154 | 155 | // Initialize the PMML Model Manager 156 | ModelManager modelManager = pmmlManager.getModelManager(null, ModelEvaluatorFactory.getInstance()); 157 | 158 | this.evaluator = (Evaluator)modelManager; // Model Evaluator 159 | this.activeFields = evaluator.getActiveFields(); // input columns 160 | this.predictedFields = evaluator.getPredictedFields(); // predicted columns 161 | this.outputFields = evaluator.getOutputFields(); // derived output columns (based on predicted columns) 162 | 163 | } 164 | 165 | // Define Output Schema 166 | @Override 167 | public Schema outputSchema(Schema input) { 168 | 169 | try { 170 | initialize(input); 171 | } catch (Throwable t) { 172 | throw new RuntimeException("Frontend: Unable to initialize PMML file: ",t); 173 | } 174 | 175 | // Define the output schema: 176 | try { 177 | 178 | // Define Input Tuple Schema 179 | this.inputTupleSchema = input; 180 | HashSet aliases = new HashSet(inputTupleSchema.getAliases()); 181 | Boolean isVerbose = false; 182 | 183 | for (FieldName activeField : this.activeFields) { 184 | 185 | // Check that all active fields are present in dataset: 186 | String activeFieldAlias = activeField.toString().toLowerCase(); 187 | if (!aliases.contains(activeFieldAlias)) { 188 | throw new RuntimeException("ERROR: "+activeFieldAlias+" is not in the input dataset!"); 189 | } 190 | 191 | // Check that all active fields have expected datatypes: 192 | Byte left = this.inputTupleSchema.getField(aliasMap.get(activeFieldAlias)).type; 193 | Byte right = dataTypeMap.get(this.evaluator.getDataField(activeField).getDataType().toString()); 194 | if (left != right) 195 | if (failOnTypeMatching) { 196 | throw new RuntimeException("ERROR: "+activeFieldAlias+" does not match expected type! (Expected: " 197 | +DataType.genTypeToNameMap().get(right)+" Observed: "+DataType.genTypeToNameMap().get(left)+")"); 198 | } else if (UDFContext.getUDFContext().isFrontend() && !isVerbose) { 199 | System.err.println("WARNING: active fields do not match expected type! Please run in strict mode to determine which fields are in violation"); 200 | isVerbose = true; 201 | // System.err.println("WARNING: "+activeFieldAlias+" does not match expected type! (Expected: " 202 | // +DataType.genTypeToNameMap().get(right)+" Observed: "+DataType.genTypeToNameMap().get(left)+")"); 203 | } 204 | } 205 | 206 | // Create List of Tuple Values 207 | List fieldSchemas = new ArrayList(); 208 | 209 | // Predicted Fields 210 | for (FieldName predictedField : this.predictedFields) { 211 | String predictedFieldAlias = "predictedField_" + predictedField.toString().toLowerCase(); 212 | 213 | // Create FieldName 214 | DataField dataField = this.evaluator.getDataField(predictedField); 215 | String dataType = dataField.getDataType().toString(); 216 | 217 | if (dataType == null) { 218 | throw new RuntimeException("Predicted Fields with unknown datatype are not supported! Column: "+predictedFieldAlias+", PMML DataType "+dataType+"."); 219 | } else if (!dataTypeMap.containsKey(dataType)) { 220 | throw new RuntimeException("Column: "+predictedFieldAlias+", PMML DataType "+dataType+" is not currently supported."); 221 | } else { 222 | fieldSchemas.add(new Schema.FieldSchema(predictedFieldAlias,dataTypeMap.get(dataType))); 223 | } 224 | } 225 | 226 | // Output Fields 227 | for (FieldName outputField : this.outputFields) { 228 | String outputFieldAlias = "outputField_" + outputField.toString().toLowerCase(); 229 | 230 | // Create FieldName 231 | OutputField dataField = this.evaluator.getOutputField(outputField); 232 | if (dataField.getDataType() == null) { 233 | fieldSchemas.add(new Schema.FieldSchema(outputFieldAlias,DataType.BYTEARRAY)); 234 | } else if (dataTypeMap.containsKey(dataField.getDataType().toString())) { 235 | fieldSchemas.add(new Schema.FieldSchema(outputFieldAlias,dataTypeMap.get(dataField.getDataType().toString()))); 236 | } else { 237 | throw new RuntimeException("Column: "+outputFieldAlias+", PMML DataType "+dataField.getDataType().toString()+" is not currently supported."); 238 | } 239 | } 240 | 241 | // Build Tuple and Wrap in DataBag 242 | FieldSchema tupleFieldSchema = new FieldSchema("EvalPMML", new Schema(fieldSchemas), DataType.TUPLE); 243 | 244 | // Return Schema 245 | Schema outputSchema = new Schema(tupleFieldSchema); 246 | return outputSchema; 247 | 248 | } catch (Throwable t) { 249 | System.err.println(t); 250 | throw new RuntimeException(t); 251 | } 252 | 253 | } 254 | 255 | // Define Exec 256 | @Override 257 | public Tuple exec(Tuple input) throws IOException { 258 | 259 | // check 260 | int dummy = 0; 261 | 262 | // Initialize Evaluator if null: 263 | if (this.evaluator == null) { 264 | try { 265 | System.out.println("Initializing: "+(dummy++)+" time"); 266 | Schema inputSchema = getInputSchema(); 267 | this.initialize(inputSchema); // something to check 268 | } catch (Throwable t) { 269 | throw new RuntimeException("Backend: Unable to initialize PMML file: ",t); 270 | } 271 | } 272 | 273 | // Initialize Output as Input 274 | Tuple outputTuple = tf.newTuple(this.predictedFields.size() + this.outputFields.size()); 275 | 276 | /* ************************ 277 | // BLOCK: Prepare Data 278 | ************************* */ 279 | 280 | for(FieldName inputField : this.activeFields){ 281 | 282 | // Get Object 283 | Object origBodyCell = (Object) input.get(aliasMap.get(inputField.getValue().toLowerCase())); 284 | 285 | Object bodyCell; 286 | if (origBodyCell instanceof Long) { 287 | bodyCell = ((Long) origBodyCell).doubleValue(); 288 | } else { 289 | bodyCell = origBodyCell; 290 | } 291 | 292 | // Prepare Object for Scoring 293 | this.preparedRow.put(inputField, EvaluatorUtil.prepare(this.evaluator, inputField, bodyCell)); 294 | 295 | // Prepare Object for Scoring 296 | // CC: Removed this b/c I think the "Long" check above resolves any issues. 297 | /* 298 | try { 299 | this.preparedRow.put(inputField, EvaluatorUtil.prepare(this.evaluator, inputField, bodyCell)); 300 | } catch (Throwable t) { 301 | System.err.println("Unable to prepare record, Trouble Parsing: " + inputField.toString() + " (value="+ bodyCell+")"); 302 | System.err.println(t); 303 | throw new RuntimeException(t); 304 | } 305 | */ 306 | 307 | } 308 | 309 | // Score Data 310 | Map result = evaluator.evaluate(this.preparedRow); 311 | 312 | // Append Predicted Fields 313 | int i = 0; 314 | for(FieldName predictedField : this.predictedFields){ 315 | outputTuple.set(i++,EvaluatorUtil.decode(result.get(predictedField))); 316 | } 317 | 318 | for(FieldName outputField : this.outputFields){ 319 | outputTuple.set(i++,EvaluatorUtil.decode(result.get(outputField))); 320 | } 321 | 322 | // Return Tuple: 323 | return outputTuple; 324 | 325 | } 326 | 327 | } 328 | -------------------------------------------------------------------------------- /src/test/java/org/surus/math/AugmentedDickeyFuller_Test.java: -------------------------------------------------------------------------------- 1 | package org.surus.math; 2 | 3 | import static org.junit.Assert.*; 4 | 5 | import java.util.Random; 6 | 7 | import org.junit.Test; 8 | 9 | public class AugmentedDickeyFuller_Test { 10 | 11 | @Test 12 | public void testLinearTrend() { 13 | Random rand = new Random(); 14 | double[] x = new double[100]; 15 | for (int i = 0; i < x.length; i ++) { 16 | x[i] = (i+1) + 5*rand.nextDouble(); 17 | } 18 | AugmentedDickeyFuller adf = new AugmentedDickeyFuller(x); 19 | assertTrue(adf.isNeedsDiff() == true); 20 | } 21 | 22 | @Test 23 | public void testLinearTrendWithOutlier() { 24 | Random rand = new Random(); 25 | double[] x = new double[100]; 26 | for (int i = 0; i < x.length; i ++) { 27 | x[i] = (i+1) + 5*rand.nextDouble(); 28 | } 29 | x[50] = 100; 30 | AugmentedDickeyFuller adf = new AugmentedDickeyFuller(x); 31 | assertTrue(adf.isNeedsDiff() == true); 32 | } 33 | 34 | } 35 | -------------------------------------------------------------------------------- /src/test/java/org/surus/math/RPCA_Test.java: -------------------------------------------------------------------------------- 1 | package org.surus.math; 2 | 3 | import static org.junit.Assert.assertTrue; 4 | 5 | import org.junit.Test; 6 | 7 | public class RPCA_Test { 8 | 9 | public double[][] VectorToMatrix(double[] x, int rows, int cols) { 10 | double[][] input2DArray = new double[rows][cols]; 11 | for (int n= 0; n< x.length; n++) { 12 | int i = n % rows; 13 | int j = (int) Math.floor(n / rows); 14 | input2DArray[i][j] = x[n]; 15 | } 16 | return input2DArray; 17 | } 18 | 19 | public boolean MatrixApproximatelyEquals(double[][] X, double[][] Y, double epsilon) { 20 | boolean testOutput = true; 21 | int printCnt = 0; 22 | 23 | for (int j = 0; j < X[0].length; j++) { 24 | for (int i = 0; i < X.length; i++) { 25 | if (Math.abs(X[i][j] - Y[i][j]) > epsilon) { 26 | System.out.println("("+i+","+j+") Left: "+X[i][j] + " Right: "+Y[i][j]); 27 | printCnt++; 28 | testOutput = false; 29 | } 30 | } 31 | } 32 | return testOutput; 33 | } 34 | 35 | @Test 36 | public void testRSVD() { 37 | System.out.println("Running Test: testRSVD"); 38 | 39 | // X: 40 | double[] ts = new double[] {2.05407309078346,2.85886923211884,2.89728554463089,0.790480493540229,0.548595335194215,1.31367506547418,1.74407133897301,4.06071962679526,2.75651081738515,0.604658754735038,0.182607837501951,-1.262201503678,0.996560864201235,2.74637817075616,0.775004762296101,0.906823901472144,2.6839457174704,-0.0625841462071901,-1.09641353766956,0.00479165991036998,0.449351175604642,3.53152043857777,1.05206417605014,2.7864942275709,-0.691007430091048,-1.02038488026721,-1.35124486835257,0.0621976297222073,2.82421545538541,2.41312411015615,1.27711183784622,0.0988204592711682,1.50691474460298,0.272037685359444,1.9889742629239,3.33907184622517,3.68134545243902,0.751559686193563,0.679120355399832,0.428056866405207,0.351341204822829,1.33498418531095,3.04169869243666,1.22542459625713,1.35457091793328,0.567124649501233,-1.95560538335988,-1.09014280752067,1.80062291606412,0.588637569785287,1.89212604693897,1.38386740607786,0.356716316822486,-2.07161693692556,4,1.44451323393473,3.52551739267569,3.16481926426412,1.83839333727511,0.827646664705546,0.654351159135431,-0.00892931340717523,0.678082675364184}; 41 | double[][] X = VectorToMatrix(ts, 7, 9); 42 | 43 | // E, S, L: 44 | double[] E_r = new double[] {-0.0907627955303747,1.01938662397306,1.7153606207031,0.508734242238024,0.723048984114528,1.05744835689681,0.634974592796234,1.52144373899958,0.636387609902244,-0.816766677690375,-0.130107806055245,-0.998365425612053,0.744951709494425,1.46231154911581,-0.226797959197785,0.141398620170014,1.77717624827034,-0.160279457424966,-0.921736144683016,-0.0307375549137413,-0.0215231023010388,1.67109146682516,-0.344092782391524,1.68469787539411,-0.86328701822436,-0.670845951339157,-1.39451774017965,-0.799528103709266,0.889135246585203,0.737525584567534,0.216923473185421,-0.161161265909894,1.64839382264763,0.0264997493930041,0.980289570670967,1.0549440532891,1.71882828500543,-0.505750385484114,0.377784603637951,0.610288197122763,0.0752672973097475,0.15206212152394,1.19645607409238,-0.200471255130702,0.277569021928143,0.381376624759279,-1.64980949817604,-1.16998330599701,0.925239888962673,-0.656964349367174,0.843823792465116,0.689801114362373,0.200313968866586,-1.77717623601756,1.77717624450864,0.810454981413976,1.22657945137526,1.23085136920443,0.557077001335409,0.539281927359977,0.878791698921523,-0.2497479761408,-0.491748238542012}; 45 | double[] S_r = new double[] {0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0.318707767321735,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,-0.173279535858878,2.09194305945145,0,0,0,0,0,0,0,0}; 46 | double[] L_r = new double[] {2.14483588631383,1.83948260814578,1.18192492392779,0.281746251302205,-0.174453648920312,0.256226708577366,1.10909674617677,2.53927588779568,2.12012320748291,1.42142543242541,0.312715643557195,-0.263836078065949,0.251609154706809,1.28406662164035,1.00180272149389,0.76542528130213,0.588061701878331,0.0976953112177755,-0.174677392986544,0.0355292148241113,0.470874277905681,1.86042897175261,1.39615695844167,1.10179635217679,0.172279588133313,-0.349538928928049,0.0432728718270843,0.861725733431473,1.93508020880021,1.67559852558861,1.0601883646608,0.259981725181062,-0.14147907804465,0.24553793596644,1.00868469225294,2.28412779293606,1.96251716743359,1.25731007167768,0.301335751761881,-0.182231330717556,0.276073907513082,1.18292206378701,1.84524261834428,1.42589585138783,1.07700189600514,0.185748024741955,-0.305795885183839,0.0798404984763351,0.875383027101449,1.24560191915246,1.04830225447385,0.694066291715486,0.1564023479559,-0.121161165049127,0.13088069603991,0.634058252520755,2.29893794130043,1.93396789505969,1.2813163359397,0.288364737345569,-0.224440539786092,0.240818662733625,1.1698309139062}; 47 | 48 | double[][] E_matrix_r = VectorToMatrix(E_r, 7, 9); 49 | double[][] S_matrix_r = VectorToMatrix(S_r, 7, 9); 50 | double[][] L_matrix_r = VectorToMatrix(L_r, 7, 9); 51 | 52 | RPCA rsvd = new RPCA(X, 1, 1.4/3); 53 | 54 | double[][] E = rsvd.getE().getData(); 55 | double[][] S = rsvd.getS().getData(); 56 | double[][] L = rsvd.getL().getData(); 57 | 58 | assertTrue(MatrixApproximatelyEquals(E_matrix_r, E, 0.0001)); 59 | assertTrue(MatrixApproximatelyEquals(S_matrix_r, S, 0.0001)); 60 | assertTrue(MatrixApproximatelyEquals(L_matrix_r, L, 0.0001)); 61 | } 62 | 63 | } 64 | -------------------------------------------------------------------------------- /src/test/java/org/surus/pig/RAD_Test.java: -------------------------------------------------------------------------------- 1 | package org.surus.pig; 2 | 3 | import static org.junit.Assert.*; 4 | 5 | import org.junit.Test; 6 | 7 | import java.util.ArrayList; 8 | import java.util.Iterator; 9 | import java.util.List; 10 | 11 | import org.apache.pig.backend.executionengine.ExecException; 12 | import org.apache.pig.data.BagFactory; 13 | import org.apache.pig.data.DataBag; 14 | import org.apache.pig.data.Tuple; 15 | import org.apache.pig.data.TupleFactory; 16 | import org.apache.pig.data.DataType; 17 | import org.apache.pig.impl.logicalLayer.FrontendException; 18 | import org.apache.pig.impl.logicalLayer.schema.Schema; 19 | import org.apache.pig.impl.logicalLayer.schema.Schema.FieldSchema; 20 | 21 | import org.surus.pig.RAD; 22 | 23 | 24 | public class RAD_Test { 25 | 26 | private TupleFactory tf = TupleFactory.getInstance(); 27 | private BagFactory bf = BagFactory.getInstance(); 28 | 29 | private static final String[] argsDaily8 = new String[]{"metric","8","7"}; 30 | private static final String[] argsDaily9 = new String[]{"metric","9","7","False"}; 31 | 32 | 33 | @Test 34 | public void testNormal() throws Exception { 35 | System.out.println("testNormal"); 36 | 37 | double[] ts = new double[] {2.05407309078346,2.85886923211884,2.89728554463089,0.790480493540229,0.548595335194215,1.31367506547418,1.74407133897301,4.06071962679526,2.75651081738515,0.604658754735038,0.182607837501951,-1.262201503678,0.996560864201235,2.74637817075616,0.775004762296101,0.906823901472144,2.6839457174704,-0.0625841462071901,-1.09641353766956,0.00479165991036998,0.449351175604642,3.53152043857777,1.05206417605014,2.7864942275709,-0.691007430091048,-1.02038488026721,-1.35124486835257,0.0621976297222073,2.82421545538541,2.41312411015615,1.27711183784622,0.0988204592711682,1.50691474460298,0.272037685359444,1.9889742629239,3.33907184622517,3.68134545243902,0.751559686193563,0.679120355399832,0.428056866405207,0.351341204822829,1.33498418531095,3.04169869243666,1.22542459625713,1.35457091793328,0.567124649501233,-1.95560538335988,-1.09014280752067,1.80062291606412,0.588637569785287,1.89212604693897,1.38386740607786,0.356716316822486,-2.07161693692556,4,1.44451323393473,3.52551739267569,3.16481926426412,1.83839333727511,0.827646664705546,0.654351159135431,-0.00892931340717523,0.678082675364184}; 38 | double[] E_r = new double[] {0.3318797478729918,1.373638963651734,1.5863429313355741,-0.13690908975775629,-0.17341746498876717,0.45656608096044515,0.5029180391592517,1.6864361103335357,0.9041099905770569,-0.8601945846628597,-0.43797424973196464,-1.4306784687160095,0.5305755112030833,1.4332243957418884,-0.9225543720714464,-0.48968272112395295,1.2969905519062221,-0.936011207027195,-1.6967451093902703,-0.7685900450169054,-0.7342364348556424,1.1239395771496394,-0.7346252973511546,1.2214527991637296,-1.2219568417836726,-0.9997034788017629,-1.6861131664061504,-1.1927477447840469,1.0418155557468505,0.8807625994533953,-0.03357751903633732,-0.8118290678921689,0.8108046850909548,-0.5663706526498646,0.7314788056938822,1.2544903710465884,1.9742891069463693,-0.6254827173189841,-0.09333463299772303,-0.020202726976659584,-0.3118013823711032,0.04079223440640133,0.7231970417612443,-0.5343365497940273,-0.1640519436117994,-0.026079552263280893,-2.0141760086038945,-1.509009390294657,0.5384928439241734,-0.7732362655677173,0.6211082673104158,0.1455859735298013,-0.7302821616706046,-2.014175981890958,2.014175973413418,0.2514450496241806,1.4414575166495622,1.4769526331968026,0.44081750801343844,0.07149456117262622,0.24164508024661888,-0.6475184991073684,-0.6022063271601131}; 39 | double[] S_r = new double[] {0.0,0.0,0.0,-0.0,-0.0,0.0,0.0,0.0,0.0,-0.0,-0.0,-0.0,0.0,0.0,-0.0,-0.0,0.0,-0.0,-0.0,-0.0,-0.0,0.0,-0.0,0.0,-0.0,-0.0,-0.0,-0.0,0.0,0.0,-0.0,-0.0,0.0,-0.0,0.0,0.0,0.0,-0.0,-0.0,-0.0,-0.0,0.0,0.0,-0.0,-0.0,-0.0,-0.040615044404649886,-0.0,0.0,-0.0,0.0,0.0,-0.0,-1.0637357541633508,0.9275030757193699,0.0,0.0,0.0,0.0,0.0,0.0,-0.0,-0.0}; 40 | double[] L_r = new double[] {1.7221933429104683,1.4852302684671057,1.3109426132953157,0.9273895832979853,0.7220128001829822,0.8571089845137347,1.2411532998137584,2.3742835164617246,1.852400826808093,1.4648533393978977,0.6205820872339156,0.16847696503800913,0.4659853529981517,1.3131537750142714,1.6975591343675474,1.396506622596097,1.3869551655641779,0.8734270608200048,0.6003315717207102,0.7733817049272755,1.1835876104602845,2.407580861428131,1.7866894734012946,1.5650414284071705,0.5309494116926246,-0.020681401465446836,0.3348682980535801,1.2549453745062544,1.7823998996385595,1.5323615107027546,1.3106893568825573,0.910649527163337,0.6961100595120252,0.8384083380093086,1.2574954572300179,2.084581475178582,1.7070563454926502,1.377042403512547,0.772454988397555,0.4482595933818666,0.6631425871939323,1.2941919509045487,2.3185016506754152,1.7597611460511573,1.5186228615450794,0.5932042017645138,0.09918566964866415,0.41886658277398703,1.2621300721399467,1.3618738353530044,1.2710177796285542,1.2382814325480587,1.0869984784930906,1.0062947991287492,1.058320950867212,1.1930681843105493,2.0840598760261275,1.6878666310673174,1.3975758292616716,0.7561521035329198,0.41270607888881206,0.6385891857001931,1.280289002524297}; 41 | 42 | // Input/Output Bag 43 | // Finally wrap inputBag in tuple 44 | Tuple inputTuple = tf.newTuple(); 45 | DataBag expected = bf.newDefaultBag(); 46 | { 47 | // Build Input/Output 48 | inputTuple.append(buildDataBag(ts)); 49 | expected = buildDataBag(ts,L_r,S_r,E_r,false); 50 | 51 | } 52 | 53 | // Initialize Class 54 | RAD rsvd = new RAD(argsDaily9); 55 | Schema outputSchema = rsvd.outputSchema(buildInputSchema2()); 56 | DataBag observed = rsvd.exec(inputTuple); 57 | 58 | // Test 59 | if (approximateCompareBags(expected,observed)) { 60 | System.out.println("PASS"); 61 | } else { 62 | System.out.println("------- EPIC FAIL --------"); 63 | System.out.println("Expected: "+expected.toString()); 64 | System.out.println("Observed: "+observed.toString()); 65 | } 66 | 67 | assertTrue(approximateCompareBags(expected,observed)); 68 | 69 | } 70 | 71 | private Boolean approximateCompareBags(DataBag inputBag1, DataBag inputBag2) throws ExecException { 72 | 73 | // Hardcode Acceptable Error 74 | double errorLimit = 0.0000001; 75 | 76 | Iterator iter1 = inputBag1.iterator(); 77 | Iterator iter2 = inputBag2.iterator(); 78 | while (iter1.hasNext()) { 79 | Tuple tuple1 = iter1.next(); 80 | Tuple tuple2 = iter2.next(); 81 | 82 | // Check error 83 | if (Math.abs((Double) tuple1.get(0) - (Double) tuple2.get(0)) > errorLimit) return false; 84 | // TODO: Add unit test for differenced case 85 | //if (Math.abs((Double) tuple1.get(1) - (Double) tuple2.get(1)) > errorLimit) return false; 86 | if (Math.abs((Double) tuple1.get(2) - (Double) tuple2.get(2)) > errorLimit) return false; 87 | if (Math.abs((Double) tuple1.get(3) - (Double) tuple2.get(3)) > errorLimit) return false; 88 | if (Math.abs((Double) tuple1.get(4) - (Double) tuple2.get(4)) > errorLimit) return false; 89 | 90 | } 91 | 92 | return true; 93 | } 94 | 95 | private DataBag buildDataBag(double[] obj1) { 96 | 97 | DataBag dataBag = bf.newDefaultBag(); 98 | for (int n=0; n fieldSchemas = new ArrayList(); 136 | fieldSchemas.add(new Schema.FieldSchema("metric" , DataType.DOUBLE)); 137 | 138 | // Wrap Inner DataBag 139 | FieldSchema innerTupleFieldSchema = null; 140 | try { 141 | innerTupleFieldSchema = new FieldSchema(null, new Schema(fieldSchemas), DataType.TUPLE); 142 | } catch (FrontendException e) { 143 | e.printStackTrace(); 144 | } 145 | 146 | // Outer Tuple Schema 147 | List fieldSchemaFinal = new ArrayList(); 148 | try { 149 | fieldSchemaFinal.add(new Schema.FieldSchema("dummy_bag", new Schema(innerTupleFieldSchema), DataType.BAG)); 150 | } catch (FrontendException e1) { 151 | e1.printStackTrace(); 152 | } 153 | 154 | // Return Schema 155 | Schema outputSchema = new Schema(fieldSchemaFinal); 156 | return outputSchema; 157 | 158 | } 159 | 160 | 161 | 162 | } 163 | -------------------------------------------------------------------------------- /src/test/java/org/surus/pig/ScorePMML_AuditTest.java: -------------------------------------------------------------------------------- 1 | package org.surus.pig; 2 | 3 | import static org.junit.Assert.*; 4 | 5 | import java.io.IOException; 6 | import java.util.ArrayList; 7 | import java.util.List; 8 | 9 | import javax.xml.bind.JAXBException; 10 | 11 | import org.apache.pig.data.DataType; 12 | import org.apache.pig.data.Tuple; 13 | import org.apache.pig.data.TupleFactory; 14 | import org.apache.pig.impl.logicalLayer.FrontendException; 15 | import org.apache.pig.impl.logicalLayer.schema.Schema; 16 | import org.apache.pig.impl.logicalLayer.schema.Schema.FieldSchema; 17 | import org.junit.*; 18 | import org.surus.pig.ScorePMML; 19 | import org.xml.sax.SAXException; 20 | 21 | public class ScorePMML_AuditTest { 22 | 23 | // Audit Models 24 | private String ensembleAuditModelPath = "./resources/examples/models/ensemble_audit_dectree.xml"; 25 | 26 | // Tuple Factory 27 | private TupleFactory tf = TupleFactory.getInstance(); 28 | 29 | // -------------------------- 30 | // Audit Test Functions 31 | // -------------------------- 32 | 33 | @Test 34 | public void ensembleScoringTest_Audit_1() throws IOException, SAXException, JAXBException { 35 | 36 | Schema inputSchema = buildAuditInputSchema(); 37 | 38 | // Input/Output Bag 39 | Tuple inputTuple = tf.newTuple(); 40 | Tuple expected = tf.newTuple(); 41 | { 42 | // Visit 1, Input: Implicit Signout 43 | inputTuple = this.buildAuditInputEvent(1038288L,45,"Private","Bachelor","Married","Repair",27743.82,"Male",0,55,"UnitedStates",7298,1); 44 | 45 | // Visit 1, Output 46 | expected = this.buildAuditOutputEvent(1038288L,45,"Private","Bachelor","Married","Repair",27743.82,"Male",0,55,"UnitedStates",7298,1,"0"); 47 | } 48 | 49 | // Initialize Class 50 | ScorePMML evalPMML = new ScorePMML(this.ensembleAuditModelPath); 51 | Schema outputScheam = evalPMML.outputSchema(inputSchema); 52 | Tuple observed = evalPMML.exec(inputTuple); 53 | 54 | // Test 55 | if (expected.equals(observed)) { 56 | System.out.println("ensembleScoringTest_Audit_1: PASS"); 57 | } else { 58 | System.out.println("---------- EPIC FAIL: ensembleScoringTest_Audit_1 ----------"); 59 | System.out.println("Expected: " + expected.toString()); 60 | System.out.println("Observed: " + observed.toString()); 61 | System.out.println("-------- END EPIC FAIL --------"); 62 | } 63 | 64 | assertEquals(expected,observed); 65 | } 66 | 67 | // -------------------------- 68 | // Audit Helper Functions 69 | // -------------------------- 70 | 71 | private Schema buildAuditInputSchema() throws FrontendException { 72 | 73 | // Build Field Schema 74 | List fieldSchemas = new ArrayList(); 75 | fieldSchemas.add(new Schema.FieldSchema("id" , DataType.LONG)); 76 | fieldSchemas.add(new Schema.FieldSchema("age" , DataType.INTEGER)); 77 | fieldSchemas.add(new Schema.FieldSchema("employment" , DataType.CHARARRAY)); 78 | fieldSchemas.add(new Schema.FieldSchema("education" , DataType.CHARARRAY)); 79 | fieldSchemas.add(new Schema.FieldSchema("marital" , DataType.CHARARRAY)); 80 | fieldSchemas.add(new Schema.FieldSchema("occupation" , DataType.CHARARRAY)); 81 | fieldSchemas.add(new Schema.FieldSchema("income" , DataType.DOUBLE)); 82 | fieldSchemas.add(new Schema.FieldSchema("gender" , DataType.CHARARRAY)); 83 | fieldSchemas.add(new Schema.FieldSchema("deductions" , DataType.DOUBLE)); 84 | fieldSchemas.add(new Schema.FieldSchema("hours" , DataType.INTEGER)); 85 | fieldSchemas.add(new Schema.FieldSchema("ignore_accounts", DataType.CHARARRAY)); 86 | fieldSchemas.add(new Schema.FieldSchema("risk_adjustment", DataType.INTEGER)); 87 | fieldSchemas.add(new Schema.FieldSchema("target_adjusted", DataType.INTEGER)); 88 | 89 | return new Schema(fieldSchemas); 90 | 91 | } 92 | 93 | private Tuple buildAuditInputEvent( Long ID 94 | , Integer Age 95 | , String Employment 96 | , String Education 97 | , String Marital 98 | , String Occupation 99 | , Double Income 100 | , String Gender 101 | , Integer Deductions 102 | , Integer Hours 103 | , String IGNORE_Accounts 104 | , Integer RISK_Adjustment 105 | , Integer TARGET_Adjusted) { 106 | 107 | 108 | Tuple newTuple = tf.newTuple(); 109 | newTuple.append(ID ); 110 | newTuple.append(Age ); 111 | newTuple.append(Employment ); 112 | newTuple.append(Education ); 113 | newTuple.append(Marital ); 114 | newTuple.append(Occupation ); 115 | newTuple.append(Income ); 116 | newTuple.append(Gender ); 117 | newTuple.append(Deductions ); 118 | newTuple.append(Hours ); 119 | newTuple.append(IGNORE_Accounts); 120 | newTuple.append(RISK_Adjustment); 121 | newTuple.append(TARGET_Adjusted); 122 | 123 | return newTuple; 124 | } 125 | 126 | private Tuple buildAuditOutputEvent( Long ID 127 | , Integer Age 128 | , String Employment 129 | , String Education 130 | , String Marital 131 | , String Occupation 132 | , Double Income 133 | , String Gender 134 | , Integer Deductions 135 | , Integer Hours 136 | , String IGNORE_Accounts 137 | , Integer RISK_Adjustment 138 | , Integer TARGET_Adjusted 139 | , String TARGET_Adjusted_predicted) { 140 | 141 | Tuple newTuple = tf.newTuple(); 142 | newTuple.append(TARGET_Adjusted_predicted); 143 | 144 | return newTuple; 145 | } 146 | 147 | } 148 | -------------------------------------------------------------------------------- /src/test/java/org/surus/pig/ScorePMML_ElNinoTest.java: -------------------------------------------------------------------------------- 1 | package org.surus.pig; 2 | 3 | import static org.junit.Assert.*; 4 | 5 | import java.io.IOException; 6 | import java.util.ArrayList; 7 | import java.util.List; 8 | 9 | import javax.xml.bind.JAXBException; 10 | 11 | import org.apache.pig.data.DataType; 12 | import org.apache.pig.data.Tuple; 13 | import org.apache.pig.data.TupleFactory; 14 | import org.apache.pig.impl.logicalLayer.FrontendException; 15 | import org.apache.pig.impl.logicalLayer.schema.Schema; 16 | import org.apache.pig.impl.logicalLayer.schema.Schema.FieldSchema; 17 | import org.junit.*; 18 | import org.surus.pig.ScorePMML; 19 | import org.xml.sax.SAXException; 20 | 21 | public class ScorePMML_ElNinoTest { 22 | 23 | // ElNino Models 24 | private String regressionElNinoModelPath = "./resources/examples/models/elnino_linearReg.xml"; 25 | 26 | private TupleFactory tf = TupleFactory.getInstance(); 27 | 28 | // -------------------------- 29 | // ElNino Test Functions 30 | // -------------------------- 31 | 32 | @Test 33 | public void regressionScoringTest_ElNino_1() throws IOException, SAXException, JAXBException { 34 | 35 | Schema inputSchema = buildElNinoInputSchema(); 36 | 37 | // Input/Output Bag 38 | Tuple inputTuple = tf.newTuple(); 39 | Tuple expected = tf.newTuple(); 40 | { 41 | // Visit 1, Input: Implicit Signout 42 | inputTuple = this.buildElNinoInputEvent("1","1","1","8.96","-140.32","-6.3","-6.4","83.5","27.32","27.57"); 43 | 44 | // Visit 1, Output 45 | expected = this.buildElNinoOutputEvent("1","1","1","8.96","-140.32","-6.3","-6.4","83.5","27.32","27.57",27.384241597858438); 46 | } 47 | 48 | // Initialize Class 49 | ScorePMML evalPMML = new ScorePMML(this.regressionElNinoModelPath); 50 | Schema outputSchema = evalPMML.outputSchema(inputSchema); 51 | Tuple observed = evalPMML.exec(inputTuple); 52 | 53 | // Test 54 | if (expected.equals(observed)) { 55 | System.out.println("regressionScoringTest_ElNino_1: PASS"); 56 | } else { 57 | System.out.println("---------- EPIC FAIL: regressionScoringTest_ElNino_1 ----------"); 58 | System.out.println("Expected: " + expected.toString()); 59 | System.out.println("Observed: " + observed.toString()); 60 | System.out.println("-------- END EPIC FAIL --------"); 61 | } 62 | 63 | assertEquals(expected,observed); 64 | } 65 | 66 | // -------------------------- 67 | // El Nino Helper Functions 68 | // -------------------------- 69 | 70 | private Schema buildElNinoInputSchema() throws FrontendException { 71 | 72 | // Build Field Schema 73 | List fieldSchemas = new ArrayList(); 74 | fieldSchemas.add(new Schema.FieldSchema("buoy_day_ID", DataType.CHARARRAY)); 75 | fieldSchemas.add(new Schema.FieldSchema("buoy" , DataType.CHARARRAY)); 76 | fieldSchemas.add(new Schema.FieldSchema("day" , DataType.CHARARRAY)); 77 | fieldSchemas.add(new Schema.FieldSchema("latitude" , DataType.DOUBLE )); 78 | fieldSchemas.add(new Schema.FieldSchema("longitude" , DataType.DOUBLE )); 79 | fieldSchemas.add(new Schema.FieldSchema("zon_winds" , DataType.DOUBLE )); 80 | fieldSchemas.add(new Schema.FieldSchema("mer_winds" , DataType.DOUBLE )); 81 | fieldSchemas.add(new Schema.FieldSchema("humidity" , DataType.DOUBLE )); 82 | fieldSchemas.add(new Schema.FieldSchema("airtemp" , DataType.DOUBLE )); 83 | fieldSchemas.add(new Schema.FieldSchema("s_s_temp" , DataType.DOUBLE )); 84 | 85 | return new Schema(fieldSchemas); 86 | 87 | } 88 | 89 | private Tuple buildElNinoInputEvent( String buoy_day_ID, String buoy, String day, String latitude, String longitude, String zon_winds, String mer_winds, String humidity, String airtemp, String s_s_temp) { 90 | 91 | Tuple newTuple = tf.newTuple(); 92 | newTuple.append(buoy_day_ID); 93 | newTuple.append(buoy ); 94 | newTuple.append(day ); 95 | newTuple.append(latitude ); 96 | newTuple.append(longitude ); 97 | newTuple.append(zon_winds ); 98 | newTuple.append(mer_winds ); 99 | newTuple.append(humidity ); 100 | newTuple.append(airtemp ); 101 | newTuple.append(s_s_temp ); 102 | 103 | return newTuple; 104 | } 105 | 106 | private Tuple buildElNinoOutputEvent( String buoy_day_ID, String buoy, String day, String latitude, String longitude, String zon_winds, String mer_winds, String humidity, String airtemp, String s_s_temp, double airtemp_predicted) { 107 | 108 | Tuple newTuple = tf.newTuple(); 109 | newTuple.append(airtemp_predicted); 110 | 111 | return newTuple; 112 | } 113 | 114 | } 115 | -------------------------------------------------------------------------------- /src/test/java/org/surus/pig/ScorePMML_IrisTest.java: -------------------------------------------------------------------------------- 1 | package org.surus.pig; 2 | 3 | import static org.junit.Assert.*; 4 | 5 | import java.io.IOException; 6 | import java.util.ArrayList; 7 | import java.util.List; 8 | 9 | import javax.xml.bind.JAXBException; 10 | 11 | import org.apache.pig.data.DataType; 12 | import org.apache.pig.data.Tuple; 13 | import org.apache.pig.data.TupleFactory; 14 | import org.apache.pig.impl.logicalLayer.FrontendException; 15 | import org.apache.pig.impl.logicalLayer.schema.Schema; 16 | import org.apache.pig.impl.logicalLayer.schema.Schema.FieldSchema; 17 | import org.junit.*; 18 | import org.surus.pig.ScorePMML; 19 | import org.xml.sax.SAXException; 20 | 21 | public class ScorePMML_IrisTest { 22 | 23 | // Iris Models 24 | private String treeIrisModelPath = "./resources/examples/models/single_iris_dectree.xml"; 25 | private String nnIrisModelPath = "./resources/examples/models/single_iris_mlp.xml"; 26 | private String rfIrisModelPath = "./resources/examples/models/example.randomForest.xml"; 27 | 28 | private TupleFactory tf = TupleFactory.getInstance(); 29 | 30 | // -------------------------- 31 | // Iris Test Functions 32 | // -------------------------- 33 | 34 | @Test 35 | public void treeScoringTest_Iris_1() throws IOException, SAXException, JAXBException { 36 | 37 | // Build Input Schema 38 | Schema inputSchema = buildIrisInputSchema(); 39 | 40 | // Input/Output Bag 41 | Tuple inputTuple = tf.newTuple(); 42 | Tuple expected = tf.newTuple(); 43 | { 44 | // Visit 1, Input: Implicit Signout 45 | inputTuple = this.buildIrisInputEvent(5.1,3.5,1.4,0.2,"Iris-setosa"); 46 | 47 | // Visit 1, Output 48 | expected = this.buildIrisOutputEvent("Iris-setosa"); 49 | } 50 | 51 | // Initialize Class 52 | ScorePMML evalPMML = new ScorePMML(this.treeIrisModelPath); 53 | Schema outputScheam = evalPMML.outputSchema(inputSchema); 54 | Tuple observed = evalPMML.exec(inputTuple); 55 | 56 | // Test 57 | if (expected.equals(observed)) { 58 | System.out.println("treeScoringTest_Iris_1: PASS"); 59 | } else { 60 | System.out.println("---------- EPIC FAIL: treeScoringTest_Iris_1 ----------"); 61 | System.out.println("Expected: " + expected.toString()); 62 | System.out.println("Observed: " + observed.toString()); 63 | System.out.println("-------- END EPIC FAIL --------"); 64 | } 65 | 66 | assertEquals(expected,observed); 67 | } 68 | 69 | @Test 70 | public void treeScoringTest_Iris_2() throws IOException, SAXException, JAXBException { 71 | 72 | // Build Input Schema 73 | Schema inputSchema = buildIrisInputSchema(); 74 | 75 | // Input/Output Bag 76 | Tuple inputTuple = tf.newTuple(); 77 | Tuple expected = tf.newTuple(); 78 | { 79 | // Visit 1, Input: Implicit Signout 80 | inputTuple = this.buildIrisInputEvent(5.9,3.2,4.8,1.8,"Iris-versicolor"); 81 | 82 | // Visit 1, Output 83 | expected = this.buildIrisOutputEvent("Iris-virginica"); 84 | } 85 | 86 | // Initialize Class 87 | ScorePMML evalPMML = new ScorePMML(this.treeIrisModelPath); 88 | Schema outputScheam = evalPMML.outputSchema(inputSchema); 89 | Tuple observed = evalPMML.exec(inputTuple); 90 | 91 | // Test 92 | if (expected.equals(observed)) { 93 | System.out.println("treeScoringTest_Iris_2: PASS"); 94 | } else { 95 | System.out.println("---------- EPIC FAIL: treeScoringTest_Iris_2 ----------"); 96 | System.out.println("Expected: " + expected.toString()); 97 | System.out.println("Observed: " + observed.toString()); 98 | System.out.println("-------- END EPIC FAIL --------"); 99 | } 100 | 101 | assertEquals(expected,observed); 102 | } 103 | 104 | @Test 105 | public void nnScoringTest_Iris_1() throws IOException, SAXException, JAXBException { 106 | 107 | // Build Input Schema 108 | Schema inputSchema = buildIrisInputSchema(); 109 | 110 | // Input/Output Bag 111 | Tuple inputTuple = tf.newTuple(); 112 | Tuple expected = tf.newTuple(); 113 | { 114 | // Visit 1, Input: Implicit Signout 115 | inputTuple = this.buildIrisInputEvent(5.9,3.2,4.8,1.8,"Iris-versicolor"); 116 | 117 | // Visit 1, Output 118 | expected = this.buildIrisOutputEvent("Iris-versicolor"); 119 | } 120 | 121 | // Initialize Class 122 | ScorePMML evalPMML = new ScorePMML(this.nnIrisModelPath); 123 | Schema outputScheam = evalPMML.outputSchema(inputSchema); 124 | Tuple observed = evalPMML.exec(inputTuple); 125 | 126 | // Test 127 | if (expected.equals(observed)) { 128 | System.out.println("nnScoringTest_Iris_1: PASS"); 129 | } else { 130 | System.out.println("---------- EPIC FAIL: nnScoringTest_Iris_1 ----------"); 131 | System.out.println("Expected: " + expected.toString()); 132 | System.out.println("Observed: " + observed.toString()); 133 | System.out.println("-------- END EPIC FAIL --------"); 134 | } 135 | 136 | 137 | assertEquals(expected,observed); 138 | } 139 | 140 | 141 | @Test 142 | public void rfScoringTest_Iris_1() throws IOException, SAXException, JAXBException { 143 | 144 | // Build Input Schema 145 | Schema inputSchema = buildIrisInputSchema(); 146 | 147 | // Input/Output Bag 148 | Tuple inputTuple = tf.newTuple(); 149 | Tuple expected = tf.newTuple(); 150 | { 151 | // Visit 1, Input: Implicit Signout 152 | inputTuple = this.buildIrisInputEvent(5.1,3.5,1.4,0.2,"setosa"); 153 | 154 | // Visit 1, Output 155 | expected = this.buildIrisOutputEvent("setosa","setosa",1.0,0.0,0.0); 156 | } 157 | 158 | // Initialize Class 159 | ScorePMML evalPMML = new ScorePMML(this.rfIrisModelPath); 160 | Schema outputSchema = evalPMML.outputSchema(inputSchema); 161 | Tuple observed = evalPMML.exec(inputTuple); 162 | 163 | // Test 164 | if (expected.equals(observed)) { 165 | System.out.println("rfScoringTest_Iris_1: PASS"); 166 | } else { 167 | System.out.println("---------- EPIC FAIL: rfScoringTest_Iris_1 ----------"); 168 | System.out.println("Expected: " + expected.toString()); 169 | System.out.println("Observed: " + observed.toString()); 170 | System.out.println("-------- END EPIC FAIL --------"); 171 | } 172 | 173 | assertEquals(expected,observed); 174 | } 175 | 176 | 177 | 178 | // -------------------------- 179 | // Iris Helper Functions 180 | // -------------------------- 181 | 182 | private Schema buildIrisInputSchema() throws FrontendException { 183 | 184 | // Build Field Schema 185 | List fieldSchemas = new ArrayList(); 186 | fieldSchemas.add(new Schema.FieldSchema("sepal_length" , DataType.DOUBLE)); 187 | fieldSchemas.add(new Schema.FieldSchema("sepal_width" , DataType.DOUBLE)); 188 | fieldSchemas.add(new Schema.FieldSchema("petal_length" , DataType.DOUBLE)); 189 | fieldSchemas.add(new Schema.FieldSchema("petal_width" , DataType.DOUBLE)); 190 | fieldSchemas.add(new Schema.FieldSchema("species" , DataType.CHARARRAY)); 191 | 192 | return new Schema(fieldSchemas); 193 | } 194 | 195 | private Tuple buildIrisInputEvent(double sepal_length, double sepal_width, double petal_length, double petal_width, String inputClass) { 196 | 197 | Tuple newTuple = tf.newTuple(); 198 | newTuple.append(sepal_length); 199 | newTuple.append(sepal_width); 200 | newTuple.append(petal_length); 201 | newTuple.append(petal_width); 202 | newTuple.append(inputClass); 203 | 204 | return newTuple; 205 | } 206 | 207 | private Tuple buildIrisOutputEvent(String predictedClass) { 208 | 209 | Tuple newTuple = tf.newTuple(); 210 | newTuple.append(predictedClass); 211 | 212 | return newTuple; 213 | } 214 | 215 | private Tuple buildIrisOutputEvent(String predictedClass, String outputField_Class, double predictedClass1, double predictedClass2, double predictedClass3) { 216 | 217 | Tuple newTuple = tf.newTuple(); 218 | newTuple.append(predictedClass); 219 | newTuple.append(outputField_Class); 220 | newTuple.append(predictedClass1); 221 | newTuple.append(predictedClass2); 222 | newTuple.append(predictedClass3); 223 | 224 | return newTuple; 225 | } 226 | 227 | } 228 | --------------------------------------------------------------------------------