├── .Rbuildignore ├── .gitignore ├── .travis.yml ├── CONDUCT.md ├── DESCRIPTION ├── LICENSE ├── NAMESPACE ├── NEWS.md ├── R ├── AnomalyDetection-package.R ├── date_utils.R ├── detect_anoms.R ├── raw_data.R ├── ts_anom_detection.R └── vec_anom_detection.R ├── README-unnamed-chunk-4-1.png ├── README-unnamed-chunk-5-1.png ├── README-unnamed-chunk-6-1.png ├── README.Rmd ├── README.md ├── data └── raw_data.rda ├── figs ├── Fig1.png └── Fig2.png ├── inst └── extdata │ └── data.csv ├── man ├── AnomalyDetection.Rd ├── AnomalyDetectionTs.Rd ├── AnomalyDetectionVec.Rd └── raw_data.Rd └── tests ├── testthat.R └── testthat ├── test-NAs.R ├── test-edge.R ├── test-ts.R └── test-vec.R /.Rbuildignore: -------------------------------------------------------------------------------- 1 | LICENSE 2 | ^.*\.Rproj$ 3 | ^\.Rproj\.user$ 4 | ^figs$ 5 | ^\.travis\.yml$ 6 | ^README\.*Rmd$ 7 | ^README\.*html$ 8 | ^NOTES\.*Rmd$ 9 | ^NOTES\.*html$ 10 | ^\.codecov\.yml$ 11 | ^README_files$ 12 | ^doc$ 13 | ^CONDUCT\.md$ 14 | ^README\.Rmd$ 15 | ^README-.*\.png$ 16 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # History files 2 | .Rhistory 3 | 4 | # Example code in package build process 5 | *-Ex.R 6 | 7 | # R data files from past sessions 8 | .Rdata 9 | 10 | # RStudio files 11 | .Rproj.user/ 12 | .Rproj.user 13 | *.Rproj 14 | 15 | -------------------------------------------------------------------------------- /.travis.yml: -------------------------------------------------------------------------------- 1 | # R for travis: see documentation at https://docs.travis-ci.com/user/languages/r 2 | 3 | language: R 4 | sudo: false 5 | cache: packages 6 | -------------------------------------------------------------------------------- /CONDUCT.md: -------------------------------------------------------------------------------- 1 | # Contributor Code of Conduct 2 | 3 | As contributors and maintainers of this project, we pledge to respect all people who 4 | contribute through reporting issues, posting feature requests, updating documentation, 5 | submitting pull requests or patches, and other activities. 6 | 7 | We are committed to making participation in this project a harassment-free experience for 8 | everyone, regardless of level of experience, gender, gender identity and expression, 9 | sexual orientation, disability, personal appearance, body size, race, ethnicity, age, or religion. 10 | 11 | Examples of unacceptable behavior by participants include the use of sexual language or 12 | imagery, derogatory comments or personal attacks, trolling, public or private harassment, 13 | insults, or other unprofessional conduct. 14 | 15 | Project maintainers have the right and responsibility to remove, edit, or reject comments, 16 | commits, code, wiki edits, issues, and other contributions that are not aligned to this 17 | Code of Conduct. Project maintainers who do not follow the Code of Conduct may be removed 18 | from the project team. 19 | 20 | Instances of abusive, harassing, or otherwise unacceptable behavior may be reported by 21 | opening an issue or contacting one or more of the project maintainers. 22 | 23 | This Code of Conduct is adapted from the Contributor Covenant 24 | (http:contributor-covenant.org), version 1.0.0, available at 25 | http://contributor-covenant.org/version/1/0/0/ 26 | -------------------------------------------------------------------------------- /DESCRIPTION: -------------------------------------------------------------------------------- 1 | Package: AnomalyDetection 2 | Type: Package 3 | Title: Anomaly Detection Using Seasonal Hybrid Extreme Studentized Deviate Test 4 | Version: 2.0.1 5 | Date: 2018-04-20 6 | Authors@R: c( 7 | person("Owen", "Vallis", email = "owensvallis@gmail.com",, role = c("aut", "cre")), 8 | person("Jordan", "Hochenbaum", email = "jhochenbaum@gmail.com", role = "aut"), 9 | person("Arun", "Kejariwal", email = "zdkurtz@gmail.com", role = "aut"), 10 | person("Bob", "Rudis", email = "bob@rud.is", role = c("aut", "ctb"), 11 | comment = c(ORCID = "0000-0001-5670-2640")), 12 | person("Yuan", "Tang", role = c("aut", "cph"), 13 | email = "terrytangyuan@gmail.com", 14 | comment = c(ORCID = "0000-0001-5243-233X")), 15 | person("Chris", "Muir", email = "chrismuirrva@gmail.com", role = c("ctb")), 16 | person("Jun", "Cai", role = c("ctb"), 17 | email = "cai-j12@mails.tsinghua.edu.cn", 18 | comment = c(ORCID = "0000-0001-9495-1226")) 19 | ) 20 | Maintainer: 21 | Owen S. Vallis , 22 | Jordan Hochenbaum 23 | Description: A technique for detecting anomalies in seasonal univariate time series. 24 | The methods uses are robust, from a statistical standpoint, in the presence of 25 | seasonality and an underlying trend. These methods can be used in 26 | wide variety of contexts. For example, detecting anomalies in system metrics after 27 | a new software release, user engagement post an 'A/B' test, or for problems in 28 | econometrics, financial engineering, political and social sciences. 29 | ByteCompile: yes 30 | Encoding: UTF-8 31 | Imports: 32 | stringi, 33 | lubridate, 34 | stats 35 | Depends: R (>= 3.0.0) 36 | Suggests: testthat 37 | License: GPL-3 38 | LazyData: true 39 | RoxygenNote: 6.0.1 40 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | GNU GENERAL PUBLIC LICENSE 2 | Version 3, 29 June 2007 3 | 4 | Copyright (C) 2007 Free Software Foundation, Inc. 5 | Everyone is permitted to copy and distribute verbatim copies 6 | of this license document, but changing it is not allowed. 7 | 8 | Preamble 9 | 10 | The GNU General Public License is a free, copyleft license for 11 | software and other kinds of works. 12 | 13 | The licenses for most software and other practical works are designed 14 | to take away your freedom to share and change the works. By contrast, 15 | the GNU General Public License is intended to guarantee your freedom to 16 | share and change all versions of a program--to make sure it remains free 17 | software for all its users. We, the Free Software Foundation, use the 18 | GNU General Public License for most of our software; it applies also to 19 | any other work released this way by its authors. You can apply it to 20 | your programs, too. 21 | 22 | When we speak of free software, we are referring to freedom, not 23 | price. Our General Public Licenses are designed to make sure that you 24 | have the freedom to distribute copies of free software (and charge for 25 | them if you wish), that you receive source code or can get it if you 26 | want it, that you can change the software or use pieces of it in new 27 | free programs, and that you know you can do these things. 28 | 29 | To protect your rights, we need to prevent others from denying you 30 | these rights or asking you to surrender the rights. Therefore, you have 31 | certain responsibilities if you distribute copies of the software, or if 32 | you modify it: responsibilities to respect the freedom of others. 33 | 34 | For example, if you distribute copies of such a program, whether 35 | gratis or for a fee, you must pass on to the recipients the same 36 | freedoms that you received. You must make sure that they, too, receive 37 | or can get the source code. And you must show them these terms so they 38 | know their rights. 39 | 40 | Developers that use the GNU GPL protect your rights with two steps: 41 | (1) assert copyright on the software, and (2) offer you this License 42 | giving you legal permission to copy, distribute and/or modify it. 43 | 44 | For the developers' and authors' protection, the GPL clearly explains 45 | that there is no warranty for this free software. For both users' and 46 | authors' sake, the GPL requires that modified versions be marked as 47 | changed, so that their problems will not be attributed erroneously to 48 | authors of previous versions. 49 | 50 | Some devices are designed to deny users access to install or run 51 | modified versions of the software inside them, although the manufacturer 52 | can do so. This is fundamentally incompatible with the aim of 53 | protecting users' freedom to change the software. The systematic 54 | pattern of such abuse occurs in the area of products for individuals to 55 | use, which is precisely where it is most unacceptable. Therefore, we 56 | have designed this version of the GPL to prohibit the practice for those 57 | products. If such problems arise substantially in other domains, we 58 | stand ready to extend this provision to those domains in future versions 59 | of the GPL, as needed to protect the freedom of users. 60 | 61 | Finally, every program is threatened constantly by software patents. 62 | States should not allow patents to restrict development and use of 63 | software on general-purpose computers, but in those that do, we wish to 64 | avoid the special danger that patents applied to a free program could 65 | make it effectively proprietary. To prevent this, the GPL assures that 66 | patents cannot be used to render the program non-free. 67 | 68 | The precise terms and conditions for copying, distribution and 69 | modification follow. 70 | 71 | TERMS AND CONDITIONS 72 | 73 | 0. Definitions. 74 | 75 | "This License" refers to version 3 of the GNU General Public License. 76 | 77 | "Copyright" also means copyright-like laws that apply to other kinds of 78 | works, such as semiconductor masks. 79 | 80 | "The Program" refers to any copyrightable work licensed under this 81 | License. Each licensee is addressed as "you". "Licensees" and 82 | "recipients" may be individuals or organizations. 83 | 84 | To "modify" a work means to copy from or adapt all or part of the work 85 | in a fashion requiring copyright permission, other than the making of an 86 | exact copy. The resulting work is called a "modified version" of the 87 | earlier work or a work "based on" the earlier work. 88 | 89 | A "covered work" means either the unmodified Program or a work based 90 | on the Program. 91 | 92 | To "propagate" a work means to do anything with it that, without 93 | permission, would make you directly or secondarily liable for 94 | infringement under applicable copyright law, except executing it on a 95 | computer or modifying a private copy. Propagation includes copying, 96 | distribution (with or without modification), making available to the 97 | public, and in some countries other activities as well. 98 | 99 | To "convey" a work means any kind of propagation that enables other 100 | parties to make or receive copies. Mere interaction with a user through 101 | a computer network, with no transfer of a copy, is not conveying. 102 | 103 | An interactive user interface displays "Appropriate Legal Notices" 104 | to the extent that it includes a convenient and prominently visible 105 | feature that (1) displays an appropriate copyright notice, and (2) 106 | tells the user that there is no warranty for the work (except to the 107 | extent that warranties are provided), that licensees may convey the 108 | work under this License, and how to view a copy of this License. If 109 | the interface presents a list of user commands or options, such as a 110 | menu, a prominent item in the list meets this criterion. 111 | 112 | 1. Source Code. 113 | 114 | The "source code" for a work means the preferred form of the work 115 | for making modifications to it. "Object code" means any non-source 116 | form of a work. 117 | 118 | A "Standard Interface" means an interface that either is an official 119 | standard defined by a recognized standards body, or, in the case of 120 | interfaces specified for a particular programming language, one that 121 | is widely used among developers working in that language. 122 | 123 | The "System Libraries" of an executable work include anything, other 124 | than the work as a whole, that (a) is included in the normal form of 125 | packaging a Major Component, but which is not part of that Major 126 | Component, and (b) serves only to enable use of the work with that 127 | Major Component, or to implement a Standard Interface for which an 128 | implementation is available to the public in source code form. A 129 | "Major Component", in this context, means a major essential component 130 | (kernel, window system, and so on) of the specific operating system 131 | (if any) on which the executable work runs, or a compiler used to 132 | produce the work, or an object code interpreter used to run it. 133 | 134 | The "Corresponding Source" for a work in object code form means all 135 | the source code needed to generate, install, and (for an executable 136 | work) run the object code and to modify the work, including scripts to 137 | control those activities. However, it does not include the work's 138 | System Libraries, or general-purpose tools or generally available free 139 | programs which are used unmodified in performing those activities but 140 | which are not part of the work. For example, Corresponding Source 141 | includes interface definition files associated with source files for 142 | the work, and the source code for shared libraries and dynamically 143 | linked subprograms that the work is specifically designed to require, 144 | such as by intimate data communication or control flow between those 145 | subprograms and other parts of the work. 146 | 147 | The Corresponding Source need not include anything that users 148 | can regenerate automatically from other parts of the Corresponding 149 | Source. 150 | 151 | The Corresponding Source for a work in source code form is that 152 | same work. 153 | 154 | 2. Basic Permissions. 155 | 156 | All rights granted under this License are granted for the term of 157 | copyright on the Program, and are irrevocable provided the stated 158 | conditions are met. This License explicitly affirms your unlimited 159 | permission to run the unmodified Program. The output from running a 160 | covered work is covered by this License only if the output, given its 161 | content, constitutes a covered work. This License acknowledges your 162 | rights of fair use or other equivalent, as provided by copyright law. 163 | 164 | You may make, run and propagate covered works that you do not 165 | convey, without conditions so long as your license otherwise remains 166 | in force. You may convey covered works to others for the sole purpose 167 | of having them make modifications exclusively for you, or provide you 168 | with facilities for running those works, provided that you comply with 169 | the terms of this License in conveying all material for which you do 170 | not control copyright. Those thus making or running the covered works 171 | for you must do so exclusively on your behalf, under your direction 172 | and control, on terms that prohibit them from making any copies of 173 | your copyrighted material outside their relationship with you. 174 | 175 | Conveying under any other circumstances is permitted solely under 176 | the conditions stated below. Sublicensing is not allowed; section 10 177 | makes it unnecessary. 178 | 179 | 3. Protecting Users' Legal Rights From Anti-Circumvention Law. 180 | 181 | No covered work shall be deemed part of an effective technological 182 | measure under any applicable law fulfilling obligations under article 183 | 11 of the WIPO copyright treaty adopted on 20 December 1996, or 184 | similar laws prohibiting or restricting circumvention of such 185 | measures. 186 | 187 | When you convey a covered work, you waive any legal power to forbid 188 | circumvention of technological measures to the extent such circumvention 189 | is effected by exercising rights under this License with respect to 190 | the covered work, and you disclaim any intention to limit operation or 191 | modification of the work as a means of enforcing, against the work's 192 | users, your or third parties' legal rights to forbid circumvention of 193 | technological measures. 194 | 195 | 4. Conveying Verbatim Copies. 196 | 197 | You may convey verbatim copies of the Program's source code as you 198 | receive it, in any medium, provided that you conspicuously and 199 | appropriately publish on each copy an appropriate copyright notice; 200 | keep intact all notices stating that this License and any 201 | non-permissive terms added in accord with section 7 apply to the code; 202 | keep intact all notices of the absence of any warranty; and give all 203 | recipients a copy of this License along with the Program. 204 | 205 | You may charge any price or no price for each copy that you convey, 206 | and you may offer support or warranty protection for a fee. 207 | 208 | 5. Conveying Modified Source Versions. 209 | 210 | You may convey a work based on the Program, or the modifications to 211 | produce it from the Program, in the form of source code under the 212 | terms of section 4, provided that you also meet all of these conditions: 213 | 214 | a) The work must carry prominent notices stating that you modified 215 | it, and giving a relevant date. 216 | 217 | b) The work must carry prominent notices stating that it is 218 | released under this License and any conditions added under section 219 | 7. This requirement modifies the requirement in section 4 to 220 | "keep intact all notices". 221 | 222 | c) You must license the entire work, as a whole, under this 223 | License to anyone who comes into possession of a copy. This 224 | License will therefore apply, along with any applicable section 7 225 | additional terms, to the whole of the work, and all its parts, 226 | regardless of how they are packaged. This License gives no 227 | permission to license the work in any other way, but it does not 228 | invalidate such permission if you have separately received it. 229 | 230 | d) If the work has interactive user interfaces, each must display 231 | Appropriate Legal Notices; however, if the Program has interactive 232 | interfaces that do not display Appropriate Legal Notices, your 233 | work need not make them do so. 234 | 235 | A compilation of a covered work with other separate and independent 236 | works, which are not by their nature extensions of the covered work, 237 | and which are not combined with it such as to form a larger program, 238 | in or on a volume of a storage or distribution medium, is called an 239 | "aggregate" if the compilation and its resulting copyright are not 240 | used to limit the access or legal rights of the compilation's users 241 | beyond what the individual works permit. Inclusion of a covered work 242 | in an aggregate does not cause this License to apply to the other 243 | parts of the aggregate. 244 | 245 | 6. Conveying Non-Source Forms. 246 | 247 | You may convey a covered work in object code form under the terms 248 | of sections 4 and 5, provided that you also convey the 249 | machine-readable Corresponding Source under the terms of this License, 250 | in one of these ways: 251 | 252 | a) Convey the object code in, or embodied in, a physical product 253 | (including a physical distribution medium), accompanied by the 254 | Corresponding Source fixed on a durable physical medium 255 | customarily used for software interchange. 256 | 257 | b) Convey the object code in, or embodied in, a physical product 258 | (including a physical distribution medium), accompanied by a 259 | written offer, valid for at least three years and valid for as 260 | long as you offer spare parts or customer support for that product 261 | model, to give anyone who possesses the object code either (1) a 262 | copy of the Corresponding Source for all the software in the 263 | product that is covered by this License, on a durable physical 264 | medium customarily used for software interchange, for a price no 265 | more than your reasonable cost of physically performing this 266 | conveying of source, or (2) access to copy the 267 | Corresponding Source from a network server at no charge. 268 | 269 | c) Convey individual copies of the object code with a copy of the 270 | written offer to provide the Corresponding Source. This 271 | alternative is allowed only occasionally and noncommercially, and 272 | only if you received the object code with such an offer, in accord 273 | with subsection 6b. 274 | 275 | d) Convey the object code by offering access from a designated 276 | place (gratis or for a charge), and offer equivalent access to the 277 | Corresponding Source in the same way through the same place at no 278 | further charge. You need not require recipients to copy the 279 | Corresponding Source along with the object code. If the place to 280 | copy the object code is a network server, the Corresponding Source 281 | may be on a different server (operated by you or a third party) 282 | that supports equivalent copying facilities, provided you maintain 283 | clear directions next to the object code saying where to find the 284 | Corresponding Source. Regardless of what server hosts the 285 | Corresponding Source, you remain obligated to ensure that it is 286 | available for as long as needed to satisfy these requirements. 287 | 288 | e) Convey the object code using peer-to-peer transmission, provided 289 | you inform other peers where the object code and Corresponding 290 | Source of the work are being offered to the general public at no 291 | charge under subsection 6d. 292 | 293 | A separable portion of the object code, whose source code is excluded 294 | from the Corresponding Source as a System Library, need not be 295 | included in conveying the object code work. 296 | 297 | A "User Product" is either (1) a "consumer product", which means any 298 | tangible personal property which is normally used for personal, family, 299 | or household purposes, or (2) anything designed or sold for incorporation 300 | into a dwelling. In determining whether a product is a consumer product, 301 | doubtful cases shall be resolved in favor of coverage. For a particular 302 | product received by a particular user, "normally used" refers to a 303 | typical or common use of that class of product, regardless of the status 304 | of the particular user or of the way in which the particular user 305 | actually uses, or expects or is expected to use, the product. A product 306 | is a consumer product regardless of whether the product has substantial 307 | commercial, industrial or non-consumer uses, unless such uses represent 308 | the only significant mode of use of the product. 309 | 310 | "Installation Information" for a User Product means any methods, 311 | procedures, authorization keys, or other information required to install 312 | and execute modified versions of a covered work in that User Product from 313 | a modified version of its Corresponding Source. The information must 314 | suffice to ensure that the continued functioning of the modified object 315 | code is in no case prevented or interfered with solely because 316 | modification has been made. 317 | 318 | If you convey an object code work under this section in, or with, or 319 | specifically for use in, a User Product, and the conveying occurs as 320 | part of a transaction in which the right of possession and use of the 321 | User Product is transferred to the recipient in perpetuity or for a 322 | fixed term (regardless of how the transaction is characterized), the 323 | Corresponding Source conveyed under this section must be accompanied 324 | by the Installation Information. But this requirement does not apply 325 | if neither you nor any third party retains the ability to install 326 | modified object code on the User Product (for example, the work has 327 | been installed in ROM). 328 | 329 | The requirement to provide Installation Information does not include a 330 | requirement to continue to provide support service, warranty, or updates 331 | for a work that has been modified or installed by the recipient, or for 332 | the User Product in which it has been modified or installed. Access to a 333 | network may be denied when the modification itself materially and 334 | adversely affects the operation of the network or violates the rules and 335 | protocols for communication across the network. 336 | 337 | Corresponding Source conveyed, and Installation Information provided, 338 | in accord with this section must be in a format that is publicly 339 | documented (and with an implementation available to the public in 340 | source code form), and must require no special password or key for 341 | unpacking, reading or copying. 342 | 343 | 7. Additional Terms. 344 | 345 | "Additional permissions" are terms that supplement the terms of this 346 | License by making exceptions from one or more of its conditions. 347 | Additional permissions that are applicable to the entire Program shall 348 | be treated as though they were included in this License, to the extent 349 | that they are valid under applicable law. If additional permissions 350 | apply only to part of the Program, that part may be used separately 351 | under those permissions, but the entire Program remains governed by 352 | this License without regard to the additional permissions. 353 | 354 | When you convey a copy of a covered work, you may at your option 355 | remove any additional permissions from that copy, or from any part of 356 | it. (Additional permissions may be written to require their own 357 | removal in certain cases when you modify the work.) You may place 358 | additional permissions on material, added by you to a covered work, 359 | for which you have or can give appropriate copyright permission. 360 | 361 | Notwithstanding any other provision of this License, for material you 362 | add to a covered work, you may (if authorized by the copyright holders of 363 | that material) supplement the terms of this License with terms: 364 | 365 | a) Disclaiming warranty or limiting liability differently from the 366 | terms of sections 15 and 16 of this License; or 367 | 368 | b) Requiring preservation of specified reasonable legal notices or 369 | author attributions in that material or in the Appropriate Legal 370 | Notices displayed by works containing it; or 371 | 372 | c) Prohibiting misrepresentation of the origin of that material, or 373 | requiring that modified versions of such material be marked in 374 | reasonable ways as different from the original version; or 375 | 376 | d) Limiting the use for publicity purposes of names of licensors or 377 | authors of the material; or 378 | 379 | e) Declining to grant rights under trademark law for use of some 380 | trade names, trademarks, or service marks; or 381 | 382 | f) Requiring indemnification of licensors and authors of that 383 | material by anyone who conveys the material (or modified versions of 384 | it) with contractual assumptions of liability to the recipient, for 385 | any liability that these contractual assumptions directly impose on 386 | those licensors and authors. 387 | 388 | All other non-permissive additional terms are considered "further 389 | restrictions" within the meaning of section 10. If the Program as you 390 | received it, or any part of it, contains a notice stating that it is 391 | governed by this License along with a term that is a further 392 | restriction, you may remove that term. If a license document contains 393 | a further restriction but permits relicensing or conveying under this 394 | License, you may add to a covered work material governed by the terms 395 | of that license document, provided that the further restriction does 396 | not survive such relicensing or conveying. 397 | 398 | If you add terms to a covered work in accord with this section, you 399 | must place, in the relevant source files, a statement of the 400 | additional terms that apply to those files, or a notice indicating 401 | where to find the applicable terms. 402 | 403 | Additional terms, permissive or non-permissive, may be stated in the 404 | form of a separately written license, or stated as exceptions; 405 | the above requirements apply either way. 406 | 407 | 8. Termination. 408 | 409 | You may not propagate or modify a covered work except as expressly 410 | provided under this License. Any attempt otherwise to propagate or 411 | modify it is void, and will automatically terminate your rights under 412 | this License (including any patent licenses granted under the third 413 | paragraph of section 11). 414 | 415 | However, if you cease all violation of this License, then your 416 | license from a particular copyright holder is reinstated (a) 417 | provisionally, unless and until the copyright holder explicitly and 418 | finally terminates your license, and (b) permanently, if the copyright 419 | holder fails to notify you of the violation by some reasonable means 420 | prior to 60 days after the cessation. 421 | 422 | Moreover, your license from a particular copyright holder is 423 | reinstated permanently if the copyright holder notifies you of the 424 | violation by some reasonable means, this is the first time you have 425 | received notice of violation of this License (for any work) from that 426 | copyright holder, and you cure the violation prior to 30 days after 427 | your receipt of the notice. 428 | 429 | Termination of your rights under this section does not terminate the 430 | licenses of parties who have received copies or rights from you under 431 | this License. If your rights have been terminated and not permanently 432 | reinstated, you do not qualify to receive new licenses for the same 433 | material under section 10. 434 | 435 | 9. Acceptance Not Required for Having Copies. 436 | 437 | You are not required to accept this License in order to receive or 438 | run a copy of the Program. Ancillary propagation of a covered work 439 | occurring solely as a consequence of using peer-to-peer transmission 440 | to receive a copy likewise does not require acceptance. However, 441 | nothing other than this License grants you permission to propagate or 442 | modify any covered work. These actions infringe copyright if you do 443 | not accept this License. Therefore, by modifying or propagating a 444 | covered work, you indicate your acceptance of this License to do so. 445 | 446 | 10. Automatic Licensing of Downstream Recipients. 447 | 448 | Each time you convey a covered work, the recipient automatically 449 | receives a license from the original licensors, to run, modify and 450 | propagate that work, subject to this License. You are not responsible 451 | for enforcing compliance by third parties with this License. 452 | 453 | An "entity transaction" is a transaction transferring control of an 454 | organization, or substantially all assets of one, or subdividing an 455 | organization, or merging organizations. If propagation of a covered 456 | work results from an entity transaction, each party to that 457 | transaction who receives a copy of the work also receives whatever 458 | licenses to the work the party's predecessor in interest had or could 459 | give under the previous paragraph, plus a right to possession of the 460 | Corresponding Source of the work from the predecessor in interest, if 461 | the predecessor has it or can get it with reasonable efforts. 462 | 463 | You may not impose any further restrictions on the exercise of the 464 | rights granted or affirmed under this License. For example, you may 465 | not impose a license fee, royalty, or other charge for exercise of 466 | rights granted under this License, and you may not initiate litigation 467 | (including a cross-claim or counterclaim in a lawsuit) alleging that 468 | any patent claim is infringed by making, using, selling, offering for 469 | sale, or importing the Program or any portion of it. 470 | 471 | 11. Patents. 472 | 473 | A "contributor" is a copyright holder who authorizes use under this 474 | License of the Program or a work on which the Program is based. The 475 | work thus licensed is called the contributor's "contributor version". 476 | 477 | A contributor's "essential patent claims" are all patent claims 478 | owned or controlled by the contributor, whether already acquired or 479 | hereafter acquired, that would be infringed by some manner, permitted 480 | by this License, of making, using, or selling its contributor version, 481 | but do not include claims that would be infringed only as a 482 | consequence of further modification of the contributor version. For 483 | purposes of this definition, "control" includes the right to grant 484 | patent sublicenses in a manner consistent with the requirements of 485 | this License. 486 | 487 | Each contributor grants you a non-exclusive, worldwide, royalty-free 488 | patent license under the contributor's essential patent claims, to 489 | make, use, sell, offer for sale, import and otherwise run, modify and 490 | propagate the contents of its contributor version. 491 | 492 | In the following three paragraphs, a "patent license" is any express 493 | agreement or commitment, however denominated, not to enforce a patent 494 | (such as an express permission to practice a patent or covenant not to 495 | sue for patent infringement). To "grant" such a patent license to a 496 | party means to make such an agreement or commitment not to enforce a 497 | patent against the party. 498 | 499 | If you convey a covered work, knowingly relying on a patent license, 500 | and the Corresponding Source of the work is not available for anyone 501 | to copy, free of charge and under the terms of this License, through a 502 | publicly available network server or other readily accessible means, 503 | then you must either (1) cause the Corresponding Source to be so 504 | available, or (2) arrange to deprive yourself of the benefit of the 505 | patent license for this particular work, or (3) arrange, in a manner 506 | consistent with the requirements of this License, to extend the patent 507 | license to downstream recipients. "Knowingly relying" means you have 508 | actual knowledge that, but for the patent license, your conveying the 509 | covered work in a country, or your recipient's use of the covered work 510 | in a country, would infringe one or more identifiable patents in that 511 | country that you have reason to believe are valid. 512 | 513 | If, pursuant to or in connection with a single transaction or 514 | arrangement, you convey, or propagate by procuring conveyance of, a 515 | covered work, and grant a patent license to some of the parties 516 | receiving the covered work authorizing them to use, propagate, modify 517 | or convey a specific copy of the covered work, then the patent license 518 | you grant is automatically extended to all recipients of the covered 519 | work and works based on it. 520 | 521 | A patent license is "discriminatory" if it does not include within 522 | the scope of its coverage, prohibits the exercise of, or is 523 | conditioned on the non-exercise of one or more of the rights that are 524 | specifically granted under this License. You may not convey a covered 525 | work if you are a party to an arrangement with a third party that is 526 | in the business of distributing software, under which you make payment 527 | to the third party based on the extent of your activity of conveying 528 | the work, and under which the third party grants, to any of the 529 | parties who would receive the covered work from you, a discriminatory 530 | patent license (a) in connection with copies of the covered work 531 | conveyed by you (or copies made from those copies), or (b) primarily 532 | for and in connection with specific products or compilations that 533 | contain the covered work, unless you entered into that arrangement, 534 | or that patent license was granted, prior to 28 March 2007. 535 | 536 | Nothing in this License shall be construed as excluding or limiting 537 | any implied license or other defenses to infringement that may 538 | otherwise be available to you under applicable patent law. 539 | 540 | 12. No Surrender of Others' Freedom. 541 | 542 | If conditions are imposed on you (whether by court order, agreement or 543 | otherwise) that contradict the conditions of this License, they do not 544 | excuse you from the conditions of this License. If you cannot convey a 545 | covered work so as to satisfy simultaneously your obligations under this 546 | License and any other pertinent obligations, then as a consequence you may 547 | not convey it at all. For example, if you agree to terms that obligate you 548 | to collect a royalty for further conveying from those to whom you convey 549 | the Program, the only way you could satisfy both those terms and this 550 | License would be to refrain entirely from conveying the Program. 551 | 552 | 13. Use with the GNU Affero General Public License. 553 | 554 | Notwithstanding any other provision of this License, you have 555 | permission to link or combine any covered work with a work licensed 556 | under version 3 of the GNU Affero General Public License into a single 557 | combined work, and to convey the resulting work. The terms of this 558 | License will continue to apply to the part which is the covered work, 559 | but the special requirements of the GNU Affero General Public License, 560 | section 13, concerning interaction through a network will apply to the 561 | combination as such. 562 | 563 | 14. Revised Versions of this License. 564 | 565 | The Free Software Foundation may publish revised and/or new versions of 566 | the GNU General Public License from time to time. Such new versions will 567 | be similar in spirit to the present version, but may differ in detail to 568 | address new problems or concerns. 569 | 570 | Each version is given a distinguishing version number. If the 571 | Program specifies that a certain numbered version of the GNU General 572 | Public License "or any later version" applies to it, you have the 573 | option of following the terms and conditions either of that numbered 574 | version or of any later version published by the Free Software 575 | Foundation. If the Program does not specify a version number of the 576 | GNU General Public License, you may choose any version ever published 577 | by the Free Software Foundation. 578 | 579 | If the Program specifies that a proxy can decide which future 580 | versions of the GNU General Public License can be used, that proxy's 581 | public statement of acceptance of a version permanently authorizes you 582 | to choose that version for the Program. 583 | 584 | Later license versions may give you additional or different 585 | permissions. However, no additional obligations are imposed on any 586 | author or copyright holder as a result of your choosing to follow a 587 | later version. 588 | 589 | 15. Disclaimer of Warranty. 590 | 591 | THERE IS NO WARRANTY FOR THE PROGRAM, TO THE EXTENT PERMITTED BY 592 | APPLICABLE LAW. EXCEPT WHEN OTHERWISE STATED IN WRITING THE COPYRIGHT 593 | HOLDERS AND/OR OTHER PARTIES PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY 594 | OF ANY KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, 595 | THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 596 | PURPOSE. THE ENTIRE RISK AS TO THE QUALITY AND PERFORMANCE OF THE PROGRAM 597 | IS WITH YOU. SHOULD THE PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF 598 | ALL NECESSARY SERVICING, REPAIR OR CORRECTION. 599 | 600 | 16. Limitation of Liability. 601 | 602 | IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING 603 | WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MODIFIES AND/OR CONVEYS 604 | THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES, INCLUDING ANY 605 | GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING OUT OF THE 606 | USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED TO LOSS OF 607 | DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY YOU OR THIRD 608 | PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER PROGRAMS), 609 | EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE POSSIBILITY OF 610 | SUCH DAMAGES. 611 | 612 | 17. Interpretation of Sections 15 and 16. 613 | 614 | If the disclaimer of warranty and limitation of liability provided 615 | above cannot be given local legal effect according to their terms, 616 | reviewing courts shall apply local law that most closely approximates 617 | an absolute waiver of all civil liability in connection with the 618 | Program, unless a warranty or assumption of liability accompanies a 619 | copy of the Program in return for a fee. 620 | 621 | END OF TERMS AND CONDITIONS 622 | 623 | How to Apply These Terms to Your New Programs 624 | 625 | If you develop a new program, and you want it to be of the greatest 626 | possible use to the public, the best way to achieve this is to make it 627 | free software which everyone can redistribute and change under these terms. 628 | 629 | To do so, attach the following notices to the program. It is safest 630 | to attach them to the start of each source file to most effectively 631 | state the exclusion of warranty; and each file should have at least 632 | the "copyright" line and a pointer to where the full notice is found. 633 | 634 | {one line to give the program's name and a brief idea of what it does.} 635 | Copyright (C) {year} {name of author} 636 | 637 | This program is free software: you can redistribute it and/or modify 638 | it under the terms of the GNU General Public License as published by 639 | the Free Software Foundation, either version 3 of the License, or 640 | (at your option) any later version. 641 | 642 | This program is distributed in the hope that it will be useful, 643 | but WITHOUT ANY WARRANTY; without even the implied warranty of 644 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 645 | GNU General Public License for more details. 646 | 647 | You should have received a copy of the GNU General Public License 648 | along with this program. If not, see . 649 | 650 | Also add information on how to contact you by electronic and paper mail. 651 | 652 | If the program does terminal interaction, make it output a short 653 | notice like this when it starts in an interactive mode: 654 | 655 | {project} Copyright (C) {year} {fullname} 656 | This program comes with ABSOLUTELY NO WARRANTY; for details type `show w'. 657 | This is free software, and you are welcome to redistribute it 658 | under certain conditions; type `show c' for details. 659 | 660 | The hypothetical commands `show w' and `show c' should show the appropriate 661 | parts of the General Public License. Of course, your program's commands 662 | might be different; for a GUI interface, you would use an "about box". 663 | 664 | You should also get your employer (if you work as a programmer) or school, 665 | if any, to sign a "copyright disclaimer" for the program, if necessary. 666 | For more information on this, and how to apply and follow the GNU GPL, see 667 | . 668 | 669 | The GNU General Public License does not permit incorporating your program 670 | into proprietary programs. If your program is a subroutine library, you 671 | may consider it more useful to permit linking proprietary applications with 672 | the library. If this is what you want to do, use the GNU Lesser General 673 | Public License instead of this License. But first, please read 674 | . 675 | 676 | -------------------------------------------------------------------------------- /NAMESPACE: -------------------------------------------------------------------------------- 1 | # Generated by roxygen2: do not edit by hand 2 | 3 | export(AnomalyDetectionTs) 4 | export(AnomalyDetectionVec) 5 | export(ad_ts) 6 | export(ad_vec) 7 | importFrom(lubridate,days) 8 | importFrom(lubridate,floor_date) 9 | importFrom(lubridate,hours) 10 | importFrom(stats,aggregate) 11 | importFrom(stats,mad) 12 | importFrom(stats,median) 13 | importFrom(stats,na.omit) 14 | importFrom(stats,qt) 15 | importFrom(stats,quantile) 16 | importFrom(stats,stl) 17 | importFrom(stats,ts) 18 | importFrom(stringi,stri_detect_regex) 19 | -------------------------------------------------------------------------------- /NEWS.md: -------------------------------------------------------------------------------- 1 | # AnomalyDetection 2.0.1 2 | 3 | * Swapped out `stringr` dependency for a `stringi` dependency since stringr also 4 | imports (and, hence, forces installation of) `glue` and `magrittr` which are not needed. 5 | `lubridate` still requires `stringr` but this is step one in the eventual removal 6 | of `lubridate`. The ultimate goal is to have this dependent only on `stringi` 7 | and `stats` and (possibly) even only just `stats`. 8 | * Added package `prefix::` to all imported functions to improve package 9 | safety & readability. 10 | * Added `stats` to the `Imports:` in `DESCRIPTION`. 11 | * Changed tex formatting in comments to markdown and added `@md` roxgen tags 12 | where needed. 13 | * Trimmed more long lines and reformatted a number of sections to improve 14 | readability. 15 | * Added in URL references to the cited USENIX and Technometrics papers references. 16 | * Cleaned up some status, warning and error messages 17 | * Updated `Depends:` R version to require 3.0.0 given `lubridate`'s min R version. 18 | 19 | # AnomalyDetection 2.0.0 20 | 21 | * Added in PR (@gggodhwani) 22 | * Added in PR (@nujnimka) 23 | * Added in PR (@randakar) 24 | * Added in PR (@caijun) inherently resolved 26 | since we return `POSIXct` objects in the data frames now 27 | * Removed plotting code 28 | * Removed tests that tested plotting code 29 | * Updated tests since we only return data frames now 30 | * Updated package to conform to modern CRAN standards 31 | * Added a `NEWS.md` file to track changes to the package. 32 | -------------------------------------------------------------------------------- /R/AnomalyDetection-package.R: -------------------------------------------------------------------------------- 1 | #' Anomaly Detection Using Seasonal Hybrid Extreme Studentized Deviate Test 2 | #' 3 | #' A technique for detecting anomalies in seasonal univariate time series. 4 | #' The methods uses are robust, from a statistical standpoint, in the presence of 5 | #' seasonality and an underlying trend. These methods can be used in 6 | #' wide variety of contexts. For example, detecting anomalies in system metrics after 7 | #' a new software release, user engagement post an 'A/B' test, or for problems in 8 | #' econometrics, financial engineering, political and social sciences. 9 | #' 10 | #' @name AnomalyDetection 11 | #' @docType package 12 | #' @author Owen S. Vallis, Jordan Hochenbaum, Arun Kejariwal; Modernization 13 | #' contributions by Bob Rudis 14 | #' @importFrom stats aggregate mad median na.omit qt quantile stl ts 15 | #' @importFrom stringi stri_detect_regex 16 | #' @importFrom lubridate days floor_date hours 17 | NULL 18 | 19 | -------------------------------------------------------------------------------- /R/date_utils.R: -------------------------------------------------------------------------------- 1 | # custom timestamp formatter 2 | format_timestamp <- function(indf, index = 1) { 3 | 4 | if (class(indf[[index]])[1] == "POSIXlt") return(indf) 5 | 6 | if (stri_detect_regex(indf[[index]][1], "^\\d{4}-\\d{2}-\\d{2} \\d{2}:\\d{2}:\\d{2} \\+\\d{4}$")) { 7 | indf[[index]] <- strptime(indf[[index]], format = "%Y-%m-%d %H:%M:%S", tz = "UTC") 8 | } else if (stri_detect_regex(indf[[index]][1], "^\\d{4}-\\d{2}-\\d{2} \\d{2}:\\d{2}:\\d{2}$")) { 9 | indf[[index]] <- strptime(indf[[index]], format = "%Y-%m-%d %H:%M:%S", tz = "UTC") 10 | } else if (stri_detect_regex(indf[[index]][1], "^\\d{4}-\\d{2}-\\d{2} \\d{2}:\\d{2}$")) { 11 | indf[[index]] <- strptime(indf[[index]], format = "%Y-%m-%d %H:%M", tz = "UTC") 12 | } else if (stri_detect_regex(indf[[index]][1], "^\\d{2}/\\d{2}/\\d{2}$")) { 13 | indf[[index]] <- strptime(indf[[index]], format = "%m/%d/%y", tz = "UTC") 14 | } else if (stri_detect_regex(indf[[index]][1], "^\\d{2}/\\d{2}/\\d{4}$")) { 15 | indf[[index]] <- strptime(indf[[index]], format = "%m/%d/%Y", tz = "UTC") 16 | } else if (stri_detect_regex(indf[[index]][1], "^\\d{4}\\d{2}\\d{2}$")) { 17 | indf[[index]] <- strptime(indf[[index]], format = "%Y%m%d", tz = "UTC") 18 | } else if (stri_detect_regex(indf[[index]][1], "^\\d{4}/\\d{2}/\\d{2}/\\d{2}$")) { 19 | indf[[index]] <- strptime(indf[[index]], format = "%Y/%m/%d/%H", tz = "UTC") 20 | } else if (stri_detect_regex(indf[[index]][1], "^\\d{10}$")) { 21 | indf[[index]] <- as.POSIXlt(indf[[index]], origin = "1970-01-01", tz = "UTC") # Handle Unix seconds in milliseconds 22 | } 23 | 24 | return(indf) 25 | 26 | } 27 | 28 | # determine the granularity of the time series 29 | get_gran <- function(tsdf, index=1) { 30 | 31 | n <- length(tsdf[[index]]) 32 | 33 | # We calculate the granularity from the time difference between the last 2 entries (sorted) 34 | gran <- round(difftime(max(tsdf[[index]]), sort(tsdf[[index]], partial = n - 1)[n - 1], 35 | units = "secs" 36 | )) 37 | 38 | if (gran >= 86400) return("day") 39 | if (gran >= 3600) return("hr") 40 | if (gran >= 60) return("min") 41 | if (gran >= 1) return("sec") 42 | 43 | return("ms") 44 | 45 | } 46 | -------------------------------------------------------------------------------- /R/detect_anoms.R: -------------------------------------------------------------------------------- 1 | # Detects anomalies in a time series using S-H-ESD. 2 | # 3 | # Args: 4 | # data: Time series to perform anomaly detection on. 5 | # k: Maximum number of anomalies that S-H-ESD will detect as a percentage of the data. 6 | # alpha: The level of statistical significance with which to accept or reject anomalies. 7 | # num_obs_per_period: Defines the number of observations in a single period, 8 | # and used during seasonal decomposition. 9 | # use_decomp: Use seasonal decomposition during anomaly detection. 10 | # use_esd: Uses regular ESD instead of hybrid-ESD. Note hybrid-ESD is more 11 | # statistically robust. 12 | # one_tail: If TRUE only positive or negative going anomalies are detected 13 | # depending on if upper_tail is TRUE or FALSE. 14 | # upper_tail: If TRUE and one_tail is also TRUE, detect only positive going 15 | # (right-tailed) anomalies. If FALSE and one_tail is TRUE, only 16 | # detect negative (left-tailed) anomalies. 17 | # verbose: Additionally printing for debugging. 18 | # Returns: 19 | # A list containing the anomalies (anoms) and decomposition components (stl). 20 | detect_anoms <- function(data, k = 0.49, alpha = 0.05, num_obs_per_period = NULL, 21 | use_decomp = TRUE, use_esd = FALSE, one_tail = TRUE, 22 | upper_tail = TRUE, verbose = FALSE) { 23 | 24 | if (is.null(num_obs_per_period)) { 25 | stop("must supply period length for time series decomposition") 26 | } 27 | 28 | num_obs <- nrow(data) 29 | 30 | # Check to make sure we have at least two periods worth of data for anomaly context 31 | if (num_obs < num_obs_per_period * 2) { 32 | stop("Anom detection needs at least 2 periods worth of data") 33 | } 34 | 35 | # Check if our timestamps are posix 36 | posix_timestamp <- if (class(data[[1L]])[1L] == "POSIXlt") TRUE else FALSE 37 | 38 | # Handle NAs 39 | if (length(rle(is.na(c(NA, data[[2L]], NA)))$values) > 3) { 40 | stop( 41 | paste0( 42 | "Data contains non-leading NAs. We suggest replacing NAs with ", 43 | "interpolated values (see na.approx in Zoo package).", 44 | collapse = "") 45 | ) 46 | } else { 47 | data <- stats::na.omit(data) 48 | } 49 | 50 | # -- Step 1: Decompose data. This returns a univarite remainder which will be 51 | # used for anomaly detection. Optionally, we might NOT decompose. 52 | stats::stl( 53 | stats::ts(data[[2L]], frequency = num_obs_per_period), 54 | s.window = "periodic", 55 | robust = TRUE 56 | ) -> data_decomp 57 | 58 | # Remove the seasonal component, and the median of the data to create the univariate remainder 59 | data.frame( 60 | timestamp = data[[1L]], 61 | count = (data[[2L]] - data_decomp$time.series[, "seasonal"] - 62 | stats::median(data[[2L]])) 63 | ) -> data 64 | 65 | # Store the smoothed seasonal component, plus the trend component for use in 66 | # determining the "expected values" option 67 | data.frame( 68 | timestamp = data[[1L]], 69 | count = (as.numeric(trunc(data_decomp$time.series[, "trend"] + 70 | data_decomp$time.series[, "seasonal"]))) 71 | ) -> data_decomp 72 | 73 | if (posix_timestamp) data_decomp <- format_timestamp(data_decomp) 74 | 75 | # Maximum number of outliers that S-H-ESD can detect (e.g. 49% of data) 76 | max_outliers <- trunc(num_obs * k) 77 | 78 | if (max_outliers == 0) { 79 | stop(paste0( 80 | "With longterm=TRUE, AnomalyDetection splits the data into 2 week periods by default. You have ", 81 | num_obs, 82 | " observations in a period, which is too few. Set a higher piecewise_median_period_weeks.") 83 | ) 84 | } 85 | 86 | func_ma <- match.fun(stats::median) 87 | func_sigma <- match.fun(stats::mad) 88 | 89 | ## Define values and vectors. 90 | n <- length(data[[2L]]) 91 | if (posix_timestamp) { 92 | R_idx <- as.POSIXlt(data[[1L]][1L:max_outliers], tz = "UTC") 93 | } else { 94 | R_idx <- 1L:max_outliers 95 | } 96 | 97 | num_anoms <- 0L 98 | 99 | # Compute test statistic until r=max_outliers values have been 100 | # removed from the sample. 101 | for (i in 1L:max_outliers) { 102 | 103 | if (verbose) message(paste(i, "/", max_outliers, "completed")) 104 | 105 | if (one_tail) { 106 | if (upper_tail) { 107 | ares <- data[[2L]] - func_ma(data[[2L]]) 108 | } else { 109 | ares <- func_ma(data[[2L]]) - data[[2L]] 110 | } 111 | } else { 112 | ares <- abs(data[[2L]] - func_ma(data[[2L]])) 113 | } 114 | 115 | # protect against constant time series 116 | data_sigma <- func_sigma(data[[2L]]) 117 | if (data_sigma == 0) break 118 | 119 | ares <- ares / data_sigma 120 | R <- max(ares) 121 | 122 | temp_max_idx <- which(ares == R)[1L] 123 | 124 | R_idx[i] <- data[[1L]][temp_max_idx] 125 | 126 | data <- data[-which(data[[1L]] == R_idx[i]), ] 127 | 128 | ## Compute critical value. 129 | if (one_tail) { 130 | p <- 1 - alpha / (n - i + 1) 131 | } else { 132 | p <- 1 - alpha / (2 * (n - i + 1)) 133 | } 134 | 135 | t <- stats::qt(p, (n - i - 1L)) 136 | lam <- t * (n - i) / sqrt((n - i - 1 + t**2) * (n - i + 1)) 137 | 138 | if (R > lam) num_anoms <- i 139 | 140 | } 141 | 142 | if (num_anoms > 0) { 143 | R_idx <- R_idx[1L:num_anoms] 144 | } else { 145 | R_idx <- NULL 146 | } 147 | 148 | return(list(anoms = R_idx, stl = data_decomp)) 149 | 150 | } 151 | -------------------------------------------------------------------------------- /R/raw_data.R: -------------------------------------------------------------------------------- 1 | #' @name raw_data 2 | #' @title raw_data 3 | #' @description A data frame containing a time series with headings timestamp and count. 4 | #' @docType data 5 | #' @usage data(raw_data) 6 | NULL -------------------------------------------------------------------------------- /R/ts_anom_detection.R: -------------------------------------------------------------------------------- 1 | #' Anomaly Detection Using Seasonal Hybrid ESD Test 2 | #' 3 | #' A technique for detecting anomalies in seasonal univariate time series where the input is a 4 | #' series of pairs. 5 | #' 6 | #' @md 7 | #' @name AnomalyDetectionTs 8 | #' @param x Time series as a two column data frame where the first column consists of the 9 | #' timestamps and the second column consists of the observations. 10 | #' @param max_anoms Maximum number of anomalies that S-H-ESD will detect as a percentage of the 11 | #' data. 12 | #' @param direction Directionality of the anomalies to be detected. One of: 13 | #' `pos`, `neg`, `both`. 14 | #' @param alpha The level of statistical significance with which to accept or reject anomalies. 15 | #' @param only_last Find and report anomalies only within the last day or hr in the time seriess. 16 | #' One of `NULL`, `day`, `hr`. 17 | #' @param threshold Only report positive going anoms above the threshold specified. One of: 18 | #' `None`, `med_max`, `p95`, `p99`. 19 | #' @param e_value Add an additional column to the anoms output containing the expected value. 20 | #' @param longterm Increase anom detection efficacy for time series that are greater than a month. 21 | #' See `Details`` below. 22 | #' @param piecewise_median_period_weeks The piecewise median time window as described in Vallis, 23 | #' Hochenbaum, and Kejariwal (2014). Defaults to 2. 24 | #' @param verbose Enable debug messages 25 | #' @param na.rm Remove any NAs in timestamps.(default: `FALSE`) 26 | #' @return The returned value is a data frame containing timestamps, values, 27 | #' and optionally expected values. 28 | #' @references 29 | #' - Vallis, O., Hochenbaum, J. and Kejariwal, A., (2014) 30 | #' "A Novel Technique for Long-Term Anomaly Detection in the Cloud", 6th USENIX, Philadelphia, PA. 31 | #' () 32 | #' - Rosner, B., (May 1983), "Percentage Points for a Generalized ESD Many-Outlier 33 | #' Procedure", Technometrics, 25(2), pp. 165-172. () 34 | #' @examples 35 | #' data(raw_data) 36 | #' 37 | #' ad_ts(raw_data, max_anoms=0.02, direction='both') 38 | #' 39 | #' # To detect only the anomalies on the last day, run the following: 40 | #' 41 | #' ad_ts(raw_data, max_anoms=0.02, direction='both', only_last="day") 42 | #' @seealso [ad_vec()] 43 | #' @export 44 | AnomalyDetectionTs <- function(x, max_anoms = 0.10, direction = "pos", 45 | alpha = 0.05, only_last = NULL, threshold = "None", 46 | e_value = FALSE, longterm = FALSE, 47 | piecewise_median_period_weeks = 2, 48 | verbose = FALSE, na.rm = FALSE) { 49 | 50 | # Check for supported inputs types 51 | if (!is.data.frame(x)) { 52 | stop("data must be a single data frame.") 53 | } else { 54 | if (ncol(x) != 2 || !is.numeric(x[[2]])) { 55 | stop(paste0("data must be a 2 column data.frame, with the first column being ", 56 | "a set of timestamps, and the second coloumn being numeric values.", 57 | collapse = "")) 58 | } 59 | # Format timestamps if necessary 60 | if (!(class(x[[1]])[1] == "POSIXlt")) x <- format_timestamp(x) 61 | } 62 | 63 | # Rename data frame columns if necessary 64 | if (any((names(x) == c("timestamp", "count")) == FALSE)) { 65 | colnames(x) <- c("timestamp", "count") 66 | } 67 | 68 | if (!is.logical(na.rm)) stop("na.rm must be either TRUE or FALSE") 69 | 70 | # Deal with NAs in timestamps 71 | if (any(is.na(x$timestamp))) { 72 | if (na.rm) { 73 | x <- x[-which(is.na(x$timestamp)), ] 74 | } else { 75 | stop("timestamp contains NAs, please set na.rm to TRUE or remove the NAs manually.") 76 | } 77 | } 78 | 79 | # Sanity check all input parameters 80 | if (max_anoms > .49) { 81 | stop(paste("max_anoms must be less than 50% of the data points (max_anoms =", 82 | round(max_anoms * length(x[[2]]), 0), " data_points =", length(x[[2]]), ").")) 83 | } else if (max_anoms < 0) { 84 | stop("max_anoms must be positive.") 85 | } else if (max_anoms == 0) { 86 | warning("0 max_anoms results in max_outliers being 0.") 87 | } 88 | 89 | if (!direction %in% c("pos", "neg", "both")) { 90 | stop("direction options are: pos | neg | both.") 91 | } 92 | 93 | if (!(0.01 <= alpha || alpha <= 0.1)) { 94 | if (verbose) message("Warning: alpha is the statistical signifigance, and is usually between 0.01 and 0.1") 95 | } 96 | 97 | if (!is.null(only_last) && !only_last %in% c("day", "hr")) { 98 | stop("only_last must be either 'day' or 'hr'") 99 | } 100 | 101 | if (!threshold %in% c("None", "med_max", "p95", "p99")) { 102 | stop("threshold options are: None | med_max | p95 | p99.") 103 | } 104 | 105 | if (!is.logical(e_value)) { 106 | stop("e_value must be either TRUE or FALSE") 107 | } 108 | 109 | if (!is.logical(longterm)) { 110 | stop("longterm must be either TRUE or FALSE") 111 | } 112 | 113 | if (piecewise_median_period_weeks < 2) { 114 | stop("piecewise_median_period_weeks must be at greater than 2 weeks") 115 | } 116 | 117 | # -- Main analysis: Perform S-H-ESD 118 | 119 | # Derive number of observations in a single day. 120 | # Although we derive this in S-H-ESD, we also need it to be minutley later on so we do it here first. 121 | gran <- get_gran(x, 1) 122 | 123 | if (gran == "day") { 124 | num_days_per_line <- 7 125 | if (is.character(only_last) && only_last == "hr") { 126 | only_last <- "day" 127 | } 128 | } else { 129 | num_days_per_line <- 1 130 | } 131 | 132 | # Aggregate data to minutely if secondly 133 | if (gran == "sec" || gran == "ms") { # ref: https://github.com/twitter/AnomalyDetection/pull/69/files 134 | x <- format_timestamp( 135 | stats::aggregate( 136 | x[2], 137 | format(x[1], "%Y-%m-%d %H:%M:00"), 138 | sum) 139 | ) # ref: https://github.com/twitter/AnomalyDetection/pull/44 140 | gran <- "min" # ref: https://github.com/twitter/AnomalyDetection/pull/98/files?diff=unified 141 | } 142 | 143 | period <- switch( 144 | gran, 145 | sec = 3600, # ref: https://github.com/twitter/AnomalyDetection/pull/93/files 146 | ms = 1000, # ref: https://github.com/twitter/AnomalyDetection/pull/69/files 147 | min = 1440, 148 | hr = 24, 149 | # if the data is daily, then we need to bump the period to weekly to get multiple examples 150 | day = 7 151 | ) 152 | num_obs <- length(x[[2]]) 153 | 154 | if (max_anoms < 1 / num_obs) max_anoms <- 1 / num_obs 155 | 156 | # -- Setup for longterm time series 157 | 158 | # If longterm is enabled, break the data into subset data frames and store in all_data 159 | if (longterm) { 160 | # Pre-allocate list with size equal to the number of piecewise_median_period_weeks chunks in x + any left over chunk 161 | # handle edge cases for daily and single column data period lengths 162 | if (gran == "day") { 163 | # STL needs 2*period + 1 observations 164 | num_obs_in_period <- period * piecewise_median_period_weeks + 1 165 | num_days_in_period <- (7 * piecewise_median_period_weeks) + 1 166 | } else { 167 | num_obs_in_period <- period * 7 * piecewise_median_period_weeks 168 | num_days_in_period <- (7 * piecewise_median_period_weeks) 169 | } 170 | 171 | # Store last date in time series 172 | last_date <- x[[1]][num_obs] 173 | 174 | all_data <- vector(mode = "list", length = ceiling(length(x[[1]]) / (num_obs_in_period))) 175 | 176 | # Subset x into piecewise_median_period_weeks chunks 177 | for (j in seq(1, length(x[[1]]), by = num_obs_in_period)) { 178 | 179 | start_date <- x[[1]][j] 180 | end_date <- min(start_date + lubridate::days(num_days_in_period), x[[1]][length(x[[1]])]) 181 | 182 | # if there is at least 14 days left, subset it, otherwise subset last_date - 14days 183 | if (difftime(end_date, start_date, units = "days") == as.difftime(num_days_in_period, units = "days")) { 184 | all_data[[ceiling(j / (num_obs_in_period))]] <- x[x[[1]] >= start_date & x[[1]] < end_date, ] 185 | } else { 186 | all_data[[ceiling(j / (num_obs_in_period))]] <- 187 | x[x[[1]] > (last_date - lubridate::days(num_days_in_period)) & x[[1]] <= last_date, ] 188 | } 189 | 190 | } 191 | 192 | } else { 193 | # If longterm is not enabled, then just overwrite all_data list with x as the only item 194 | all_data <- list(x) 195 | } 196 | 197 | # Create empty data frames to store all anoms and seasonal+trend component from decomposition 198 | all_anoms <- data.frame(timestamp = numeric(0), count = numeric(0)) 199 | seasonal_plus_trend <- data.frame(timestamp = numeric(0), count = numeric(0)) 200 | 201 | # Detect anomalies on all data (either entire data in one-pass, or in 2 week blocks if longterm=TRUE) 202 | for (i in 1:length(all_data)) { 203 | 204 | anomaly_direction <- switch( 205 | direction, 206 | "pos" = data.frame(one_tail = TRUE, upper_tail = TRUE), # upper-tail only (positive going anomalies) 207 | "neg" = data.frame(one_tail = TRUE, upper_tail = FALSE), # lower-tail only (negative going anomalies) 208 | "both" = data.frame(one_tail = FALSE, upper_tail = TRUE) 209 | ) # Both tails. Tail direction is not actually used. 210 | 211 | # detect_anoms actually performs the anomaly detection and returns the results in a list containing the anomalies 212 | # as well as the decomposed components of the time series for further analysis. 213 | s_h_esd_timestamps <- detect_anoms(all_data[[i]], 214 | k = max_anoms, alpha = alpha, num_obs_per_period = period, 215 | use_decomp = TRUE, use_esd = FALSE, 216 | one_tail = anomaly_direction$one_tail, upper_tail = anomaly_direction$upper_tail, 217 | verbose = verbose 218 | ) 219 | 220 | # store decomposed components in local variable and overwrite s_h_esd_timestamps to contain only the anom timestamps 221 | data_decomp <- s_h_esd_timestamps$stl 222 | s_h_esd_timestamps <- s_h_esd_timestamps$anoms 223 | 224 | # -- Step 3: Use detected anomaly timestamps to extract the actual anomalies (timestamp and value) from the data 225 | if (!is.null(s_h_esd_timestamps)) { 226 | anoms <- all_data[[i]][(all_data[[i]][[1]] %in% s_h_esd_timestamps), ] 227 | } else { 228 | anoms <- data.frame(timestamp = numeric(0), count = numeric(0)) 229 | } 230 | 231 | # Filter the anomalies using one of the thresholding functions if applicable 232 | if (threshold != "None") { 233 | 234 | # Calculate daily max values 235 | periodic_maxs <- tapply(x[[2]], as.Date(x[[1]]), FUN = max) 236 | 237 | # Calculate the threshold set by the user 238 | if (threshold == "med_max") { 239 | thresh <- stats::median(periodic_maxs) 240 | } else if (threshold == "p95") { 241 | thresh <- stats::quantile(periodic_maxs, .95) 242 | } else if (threshold == "p99") { 243 | thresh <- stats::quantile(periodic_maxs, .99) 244 | } 245 | # Remove any anoms below the threshold 246 | anoms <- anoms[anoms[[2]] >= thresh, ] 247 | 248 | } 249 | 250 | all_anoms <- rbind(all_anoms, anoms) 251 | seasonal_plus_trend <- rbind(seasonal_plus_trend, data_decomp) 252 | 253 | } 254 | 255 | # Cleanup potential duplicates 256 | all_anoms <- all_anoms[!duplicated(all_anoms[[1]]), ] 257 | seasonal_plus_trend <- seasonal_plus_trend[!duplicated(seasonal_plus_trend[[1]]), ] 258 | 259 | # -- If only_last was set by the user, create subset of the data that represent the most recent day 260 | if (!is.null(only_last)) { 261 | 262 | start_date <- x[[1]][num_obs] - lubridate::days(7) 263 | start_anoms <- x[[1]][num_obs] - lubridate::days(1) 264 | 265 | if (gran == "day") { 266 | # TODO: This might be better set up top at the gran check 267 | breaks <- 3 * 12 268 | num_days_per_line <- 7 269 | } else { 270 | if (only_last == "day") { 271 | breaks <- 12 272 | } else { 273 | # We need to change start_date and start_anoms for the hourly only_last option 274 | start_date <- lubridate::floor_date(x[[1]][num_obs] - lubridate::days(2), "day") 275 | start_anoms <- x[[1]][num_obs] - lubridate::hours(1) 276 | breaks <- 3 277 | } 278 | } 279 | 280 | # subset the last days worth of data 281 | x_subset_single_day <- x[x[[1]] > start_anoms, ] 282 | 283 | x_subset_week <- x[(x[[1]] <= start_anoms) & (x[[1]] > start_date), ] 284 | all_anoms <- all_anoms[all_anoms[[1]] >= x_subset_single_day[[1]][1], ] 285 | num_obs <- length(x_subset_single_day[[2]]) 286 | 287 | } 288 | 289 | # Calculate number of anomalies as a percentage 290 | anom_pct <- (length(all_anoms[[2]]) / num_obs) * 100 291 | 292 | # If there are no anoms, then let's exit 293 | if (anom_pct == 0) { 294 | if (verbose) message("No anomalies detected.") 295 | return(data.frame()) 296 | } 297 | 298 | # Fix to make sure date-time is correct and that we retain hms at midnight 299 | all_anoms[[1]] <- format(all_anoms[[1]], format = "%Y-%m-%d %H:%M:%S") 300 | 301 | # Store expected values if set by user 302 | if (e_value) { 303 | anoms <- data.frame( 304 | timestamp = all_anoms[[1]], anoms = all_anoms[[2]], 305 | expected_value = seasonal_plus_trend[[2]][as.character(as.POSIXlt(seasonal_plus_trend[[1]], tz = "UTC")) %in% all_anoms[[1]]], 306 | stringsAsFactors = FALSE 307 | ) 308 | } else { 309 | anoms <- data.frame(timestamp = all_anoms[[1]], anoms = all_anoms[[2]], 310 | stringsAsFactors = FALSE) 311 | } 312 | 313 | # Make sure we're still a valid POSIXct datetime. 314 | # TODO: Make sure we keep original datetime format and timezone. 315 | 316 | anoms$timestamp <- as.POSIXct(anoms$timestamp, tz = "UTC") 317 | 318 | class(anoms) <- c("tbl_df", "tbl", "data.frame") 319 | 320 | return(anoms) 321 | } 322 | 323 | #' @rdname AnomalyDetectionTs 324 | #' @export 325 | ad_ts <- AnomalyDetectionTs 326 | -------------------------------------------------------------------------------- /R/vec_anom_detection.R: -------------------------------------------------------------------------------- 1 | #' Anomaly Detection Using Seasonal Hybrid ESD Test 2 | #' 3 | #' A technique for detecting anomalies in seasonal univariate time series where the input is a 4 | #' series of observations. 5 | #' 6 | #' @md 7 | #' @name AnomalyDetectionVec 8 | #' @param x Time series as a column data frame, list, or vector, where the column consists of 9 | #' the observations. 10 | #' @param max_anoms Maximum number of anomalies that S-H-ESD will detect as a percentage of the 11 | #' data. 12 | #' @param direction Directionality of the anomalies to be detected. One of: 13 | #' `pos`, `neg`, `both`. 14 | #' @param alpha The level of statistical significance with which to accept or reject anomalies. 15 | #' @param period Defines the number of observations in a single period, and used during seasonal 16 | #' decomposition. 17 | #' @param only_last Find and report anomalies only within the last period in the time series. 18 | #' @param threshold Only report positive going anoms above the threshold specified. One of: 19 | #' `None`, `med_max`, `p95`, `p99`. 20 | #' @param e_value Add an additional column to the anoms output containing the expected value. 21 | #' @param longterm_period Defines the number of observations for which the trend can be considered 22 | #' flat. The value should be an integer multiple of the number of observations in a single period. 23 | #' This increases anom detection efficacy for time series that are greater than a month. 24 | #' @param verbose Enable debug messages 25 | #' @return The returned value is a list with the following components. 26 | #' @return Data frame containing index, values, and optionally expected values. 27 | #' @references 28 | #' - Vallis, O., Hochenbaum, J. and Kejariwal, A., (2014) 29 | #' "A Novel Technique for Long-Term Anomaly Detection in the Cloud", 6th USENIX, Philadelphia, PA. 30 | #' () 31 | #' - Rosner, B., (May 1983), "Percentage Points for a Generalized ESD Many-Outlier 32 | #' Procedure", Technometrics, 25(2), pp. 165-172. () 33 | #' @examples 34 | #' data(raw_data) 35 | #' 36 | #' ad_vec(raw_data[,2], max_anoms=0.02, period=1440, direction='both') 37 | #' 38 | #' # To detect only the anomalies in the last period, run the following: 39 | #' 40 | #' ad_vec( 41 | #' raw_data[,2], max_anoms=0.02, period=1440, direction='both', only_last=TRUE 42 | #' ) 43 | #' @seealso [ad_ts()] 44 | #' @export 45 | AnomalyDetectionVec <- function(x, max_anoms = 0.10, direction = "pos", 46 | alpha = 0.05, period = NULL, only_last = FALSE, 47 | threshold = "None", e_value = FALSE, 48 | longterm_period = NULL, verbose = FALSE) { 49 | 50 | # Check for supported inputs types and add timestamps 51 | if (is.data.frame(x) && ncol(x) == 1 && is.numeric(x[[1]])) { 52 | x <- data.frame(timestamp = c(1:length(x[[1]])), count = x[[1]]) 53 | } else if (is.vector(x) || is.list(x) && is.numeric(x)) { 54 | x <- data.frame(timestamp = c(1:length(x)), count = x) 55 | } else { 56 | stop("data must be a single data frame, list, or vector that holds numeric values.") 57 | } 58 | 59 | # Sanity check all input parameterss 60 | if (max_anoms > .49) { 61 | stop(paste("max_anoms must be less than 50% of the data points (max_anoms =", 62 | round(max_anoms * length(x[[2]]), 0), " data_points =", length(x[[2]]), ").")) 63 | } 64 | 65 | if (!direction %in% c("pos", "neg", "both")) { 66 | stop("direction options are: pos | neg | both.") 67 | } 68 | 69 | if (!(0.01 <= alpha || alpha <= 0.1)) { 70 | if (verbose) { 71 | message( 72 | "Warning: alpha is the statistical signifigance, and is usually between 0.01 and 0.1" 73 | ) 74 | } 75 | } 76 | 77 | if (is.null(period)) { 78 | stop("Period must be set to the number of data points in a single period") 79 | } 80 | 81 | if (!is.logical(only_last)) { 82 | stop("only_last must be either TRUE or FALSE") 83 | } 84 | 85 | if (!threshold %in% c("None", "med_max", "p95", "p99")) { 86 | stop("threshold options are: None | med_max | p95 | p99.") 87 | } 88 | 89 | if (!is.logical(e_value)) { 90 | stop("e_value must be either TRUE or FALSE") 91 | } 92 | 93 | # -- Main analysis: Perform S-H-ESD 94 | 95 | num_obs <- length(x[[2]]) 96 | 97 | if (max_anoms < 1 / num_obs) max_anoms <- 1 / num_obs 98 | 99 | # -- Setup for longterm time series 100 | 101 | # If longterm is enabled, break the data into subset data frames and store in all_data, 102 | if (!is.null(longterm_period)) { 103 | 104 | all_data <- vector(mode = "list", length = ceiling(length(x[[1]]) / (longterm_period))) 105 | 106 | # Subset x into two week chunks 107 | for (j in seq(1, length(x[[1]]), by = longterm_period)) { 108 | 109 | start_index <- x[[1]][j] 110 | end_index <- min((start_index + longterm_period - 1), num_obs) 111 | 112 | # if there is at least longterm_period left, subset it, otherwise subset last_index - longterm_period 113 | if ((end_index - start_index + 1) == longterm_period) { 114 | all_data[[ceiling(j / (longterm_period))]] <- 115 | x[x[[1]] >= start_index & x[[1]] <= end_index, ] 116 | } else { 117 | all_data[[ceiling(j / (longterm_period))]] <- 118 | x[x[[1]] > (num_obs - longterm_period) & x[[1]] <= num_obs, ] 119 | } 120 | 121 | } 122 | 123 | } else { 124 | # If longterm is not enabled, then just overwrite all_data list with x as the only item 125 | all_data <- list(x) 126 | } 127 | 128 | # Create empty data frames to store all anoms and seasonal+trend component from decomposition 129 | all_anoms <- data.frame(timestamp = numeric(0), count = numeric(0)) 130 | seasonal_plus_trend <- data.frame(timestamp = numeric(0), count = numeric(0)) 131 | 132 | # Detect anomalies on all data (either entire data in one-pass, or in 2 week blocks if longterm=TRUE) 133 | for (i in 1:length(all_data)) { 134 | anomaly_direction <- switch( 135 | direction, 136 | "pos" = data.frame(one_tail = TRUE, upper_tail = TRUE), # upper-tail only (positive going anomalies) 137 | "neg" = data.frame(one_tail = TRUE, upper_tail = FALSE), # lower-tail only (negative going anomalies) 138 | "both" = data.frame(one_tail = FALSE, upper_tail = TRUE) 139 | ) # Both tails. Tail direction is not actually used. 140 | 141 | # detect_anoms actually performs the anomaly detection and returns the results in a list containing the anomalies 142 | # as well as the decomposed components of the time series for further analysis. 143 | detect_anoms( 144 | all_data[[i]], 145 | k = max_anoms, alpha = alpha, 146 | num_obs_per_period = period, 147 | use_decomp = TRUE, use_esd = FALSE, 148 | one_tail = anomaly_direction$one_tail, 149 | upper_tail = anomaly_direction$upper_tail, 150 | verbose = verbose 151 | ) -> s_h_esd_timestamps 152 | 153 | # store decomposed components in local variable and overwrite s_h_esd_timestamps to contain only the anom timestamps 154 | data_decomp <- s_h_esd_timestamps$stl 155 | s_h_esd_timestamps <- s_h_esd_timestamps$anoms 156 | 157 | # -- Step 3: Use detected anomaly timestamps to extract the actual anomalies (timestamp and value) from the data 158 | if (!is.null(s_h_esd_timestamps)) { 159 | anoms <- all_data[[i]][all_data[[i]][[1]] %in% s_h_esd_timestamps, ] 160 | } else { 161 | anoms <- data.frame(timestamp = numeric(0), count = numeric(0)) 162 | } 163 | 164 | # Filter the anomalies using one of the thresholding functions if applicable 165 | if (threshold != "None") { 166 | 167 | # Calculate daily max values 168 | if (!is.null(longterm_period)) { 169 | periodic_maxs <- tapply(all_data[[i]][[2]], c(0:(longterm_period - 1)) %/% period, FUN = max) 170 | } else { 171 | periodic_maxs <- tapply(all_data[[i]][[2]], c(0:(num_obs - 1)) %/% period, FUN = max) 172 | } 173 | 174 | # Calculate the threshold set by the user 175 | if (threshold == "med_max") { 176 | thresh <- stats::median(periodic_maxs) 177 | } else if (threshold == "p95") { 178 | thresh <- stats::quantile(periodic_maxs, .95) 179 | } else if (threshold == "p99") { 180 | thresh <- stats::quantile(periodic_maxs, .99) 181 | } 182 | # Remove any anoms below the threshold 183 | anoms <- anoms[anoms[[2]] >= thresh, ] 184 | } 185 | 186 | all_anoms <- rbind(all_anoms, anoms) 187 | seasonal_plus_trend <- rbind(seasonal_plus_trend, data_decomp) 188 | 189 | } 190 | 191 | # Cleanup potential duplicates 192 | all_anoms <- all_anoms[!duplicated(all_anoms[[1]]), ] 193 | seasonal_plus_trend <- seasonal_plus_trend[!duplicated(seasonal_plus_trend[[1]]), ] 194 | 195 | # -- If only_last was set by the user, create subset of the data that represent the most recent period 196 | if (only_last) { 197 | x_subset_single_period <- data.frame(timestamp = x[[1]][(num_obs - period + 1):num_obs], 198 | count = x[[2]][(num_obs - period + 1):num_obs]) 199 | # Let's try and show 7 periods prior 200 | past_obs <- period * 7 201 | # If we don't have that much data, then show what we have - the last period 202 | if (num_obs < past_obs) past_obs <- num_obs - period 203 | 204 | x_subset_previous <- 205 | data.frame(timestamp = x[[1]][(num_obs - past_obs + 1):(num_obs - period + 1)], 206 | count = x[[2]][(num_obs - past_obs + 1):(num_obs - period + 1)]) 207 | 208 | all_anoms <- all_anoms[all_anoms[[1]] >= x_subset_single_period[[1]][1], ] 209 | num_obs <- length(x_subset_single_period[[2]]) 210 | } 211 | 212 | # Calculate number of anomalies as a percentage 213 | anom_pct <- (length(all_anoms[[2]]) / num_obs) * 100 214 | 215 | # If there are no anoms, then let's exit 216 | if (anom_pct == 0) { 217 | if (verbose) message("No anomalies detected.") 218 | return(data.frame()) 219 | } 220 | 221 | # Store expected values if set by user 222 | if (e_value) { 223 | anoms <- data.frame(index = all_anoms[[1]], anoms = all_anoms[[2]], 224 | expected_value = 225 | seasonal_plus_trend[[2]][seasonal_plus_trend[[1]] %in% all_anoms[[1]]]) 226 | } else { 227 | anoms <- data.frame(index = all_anoms[[1]], anoms = all_anoms[[2]]) 228 | } 229 | 230 | class(anoms) <- c("tbl_df", "tbl", "data.frame") 231 | 232 | return(anoms) 233 | 234 | } 235 | 236 | 237 | #' @rdname AnomalyDetectionVec 238 | #' @export 239 | ad_vec <- AnomalyDetectionVec -------------------------------------------------------------------------------- /README-unnamed-chunk-4-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hrbrmstr/AnomalyDetection/27bd9d02469d0e5702eff0b7decbdac3db04b7b3/README-unnamed-chunk-4-1.png -------------------------------------------------------------------------------- /README-unnamed-chunk-5-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hrbrmstr/AnomalyDetection/27bd9d02469d0e5702eff0b7decbdac3db04b7b3/README-unnamed-chunk-5-1.png -------------------------------------------------------------------------------- /README-unnamed-chunk-6-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hrbrmstr/AnomalyDetection/27bd9d02469d0e5702eff0b7decbdac3db04b7b3/README-unnamed-chunk-6-1.png -------------------------------------------------------------------------------- /README.Rmd: -------------------------------------------------------------------------------- 1 | --- 2 | output: github_document 3 | editor_options: 4 | chunk_output_type: console 5 | --- 6 | 7 | [![Travis-CI Build Status](https://travis-ci.org/hrbrmstr/AnomalyDetection.svg?branch=master)](https://travis-ci.org/hrbrmstr/AnomalyDetection) 8 | 9 | 10 | 11 | ```{r, echo = FALSE} 12 | knitr::opts_chunk$set( 13 | collapse = TRUE, 14 | comment = "##", 15 | fig.path = "README-", 16 | fig.width = 8, 17 | fig.height = 6, 18 | fig.retina = 2 19 | ) 20 | ``` 21 | 22 | # AnomalyDetection 23 | 24 | Anomaly Detection Using Seasonal Hybrid Extreme Studentized Deviate Test 25 | 26 | ## Description 27 | 28 | A technique for detecting anomalies in seasonal univariate time series. 29 | The methods uses are robust, from a statistical standpoint, in the presence of 30 | seasonality and an underlying trend. These methods can be used in 31 | wide variety of contexts. For example, detecting anomalies in system metrics after 32 | a new software release, user engagement post an 'A/B' test, or for problems in 33 | econometrics, financial engineering, political and social sciences. 34 | 35 | ## About This Fork 36 | 37 | Twitterfolks launched this package in 2014. Many coding and package standards 38 | have changed. The package now conforms to CRAN standards. 39 | 40 | The plots were nice and all but terribly unnecessary. The two core functions 41 | have been modified to only return tidy data frames (tibbles, actually). This 42 | makes it easier to chain them without having to deal with list element 43 | dereferencing. 44 | 45 | Shorter, snake-case aliases have also been provided: 46 | 47 | - `ad_ts` for `AnomalyDetectionTs` 48 | - `ad_vec` for `AnomalyDetectionVec` 49 | 50 | The original names are still in the package but the `README` and examples 51 | all use the newer, shorter versions. 52 | 53 | The following outstanding PRs from the original repo are included: 54 | 55 | - Added in PR [#98](https://github.com/twitter/AnomalyDetection/pull/98/) (@gggodhwani) 56 | - Added in PR [#93](https://github.com/twitter/AnomalyDetection/pull/93) (@nujnimka) 57 | - Added in PR [#69](https://github.com/twitter/AnomalyDetection/pull/69) (@randakar) 58 | - Added in PR [#44](https://github.com/twitter/AnomalyDetection/pull/44) (@nicolasmiller) 59 | - PR [#92](https://github.com/twitter/AnomalyDetection/pull/92) (@caijun) inherently resolved 60 | 61 | If those authors find this repo, please add yourselves to the `DESCRIPTION` as 62 | contirbutors. 63 | 64 | ## What's Inside The Tin 65 | 66 | The following functions are implemented: 67 | 68 | - `ad_ts`: Anomaly Detection Using Seasonal Hybrid ESD Test 69 | - `ad_vec`: Anomaly Detection Using Seasonal Hybrid ESD Test 70 | 71 | ## How the package works 72 | 73 | The underlying algorithm – referred to as Seasonal Hybrid ESD (S-H-ESD) builds 74 | upon the Generalized ESD test for detecting anomalies. Note that S-H-ESD can 75 | be used to detect both global as well as local anomalies. This is achieved by 76 | employing time series decomposition and using robust statistical metrics, viz., 77 | median together with ESD. In addition, for long time series (say, 6 months of 78 | minutely data), the algorithm employs piecewise approximation - this is rooted 79 | to the fact that trend extraction in the presence of anomalies in non-trivial - 80 | for anomaly detection. 81 | 82 | Besides time series, the package can also be used to detect anomalies in a 83 | vector of numerical values. We have found this very useful as many times the 84 | corresponding timestamps are not available. The package provides rich 85 | visualization support. The user can specify the direction of anomalies, the 86 | window of interest (such as last day, last hour), enable/disable piecewise 87 | approximation; additionally, the x- and y-axis are annotated in a way to assist 88 | visual data analysis. 89 | 90 | ## Installation 91 | 92 | You can install AnomalyDetection from github with: 93 | 94 | ```{r gh-installation, eval = FALSE} 95 | # install.packages("devtools") 96 | devtools::install_github("hrbrmstr/AnomalyDetection") 97 | ``` 98 | 99 | ```{r message=FALSE, warning=FALSE, error=FALSE, include=FALSE} 100 | options(width=120) 101 | ``` 102 | 103 | ## How to get started 104 | 105 | ```{r message=FALSE, warning=FALSE, error=FALSE} 106 | library(AnomalyDetection) 107 | library(hrbrthemes) 108 | library(tidyverse) 109 | ``` 110 | 111 | ```{r message=FALSE, warning=FALSE, error=FALSE} 112 | data(raw_data) 113 | 114 | res <- ad_ts(raw_data, max_anoms=0.02, direction='both') 115 | 116 | glimpse(res) 117 | 118 | # for ggplot2 119 | raw_data$timestamp <- as.POSIXct(raw_data$timestamp) 120 | 121 | ggplot() + 122 | geom_line( 123 | data=raw_data, aes(timestamp, count), 124 | size=0.125, color="lightslategray" 125 | ) + 126 | geom_point( 127 | data=res, aes(timestamp, anoms), color="#cb181d", alpha=1/3 128 | ) + 129 | scale_x_datetime(date_labels="%b\n%Y") + 130 | scale_y_comma() + 131 | theme_ipsum_rc(grid="XY") 132 | ``` 133 | 134 | From the plot, we observe that the input time series experiences both positive 135 | and negative anomalies. Furthermore, many of the anomalies in the time series 136 | are local anomalies within the bounds of the time series’ seasonality (hence, 137 | cannot be detected using the traditional approaches). The anomalies detected 138 | using the proposed technique are annotated on the plot. In case the timestamps 139 | for the plot above were not available, anomaly detection could then carried 140 | out using the AnomalyDetectionVec function; specifically, one can use the 141 | `AnomalyDetectionVec()` method. The equivalent call to the above would be: 142 | 143 | ```{r eval=FALSE} 144 | ad_vec(raw_data[,2], max_anoms=0.02, period=1440, direction='both') 145 | ``` 146 | 147 | Often, anomaly detection is carried out on a periodic basis. For instance, at 148 | times, one may be interested in determining whether there was any anomaly 149 | yesterday. To this end, we support a flag only_last whereby one can subset the 150 | anomalies that occurred during the last day or last hour. 151 | 152 | ```{r message=FALSE, warning=FALSE, error=FALSE} 153 | data(raw_data) 154 | 155 | res <- ad_ts(raw_data, max_anoms=0.02, direction='both', only_last="day") 156 | 157 | glimpse(res) 158 | 159 | # for ggplot2 160 | raw_data$timestamp <- as.POSIXct(raw_data$timestamp) 161 | 162 | ggplot() + 163 | geom_line( 164 | data=raw_data, aes(timestamp, count), 165 | size=0.125, color="lightslategray" 166 | ) + 167 | geom_point( 168 | data=res, aes(timestamp, anoms), color="#cb181d", alpha=1/3 169 | ) + 170 | scale_x_datetime(date_labels="%b\n%Y") + 171 | scale_y_comma() + 172 | theme_ipsum_rc(grid="XY") 173 | ``` 174 | 175 | Anomaly detection for long duration time series can be carried out by setting 176 | the longterm argument to `TRUE`. 177 | 178 | ## Copyright & License 179 | 180 | Copyright © 2015 Twitter, Inc. and other contributors 181 | 182 | Licensed under the GPLv3 183 | 184 | ## Code of Conduct 185 | 186 | Please note that this project is released with a [Contributor Code of Conduct](CONDUCT.md). By participating in this project you agree to abide by its terms. -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | 2 | [![Travis-CI Build 3 | Status](https://travis-ci.org/hrbrmstr/AnomalyDetection.svg?branch=master)](https://travis-ci.org/hrbrmstr/AnomalyDetection) 4 | 5 | 6 | 7 | # AnomalyDetection 8 | 9 | Anomaly Detection Using Seasonal Hybrid Extreme Studentized Deviate Test 10 | 11 | ## Description 12 | 13 | A technique for detecting anomalies in seasonal univariate time series. 14 | The methods uses are robust, from a statistical standpoint, in the 15 | presence of seasonality and an underlying trend. These methods can be 16 | used in wide variety of contexts. For example, detecting anomalies in 17 | system metrics after a new software release, user engagement post an 18 | ‘A/B’ test, or for problems in econometrics, financial engineering, 19 | political and social sciences. 20 | 21 | ## About This Fork 22 | 23 | Twitterfolks launched this package in 2014. Many coding and package 24 | standards have changed. The package now conforms to CRAN standards. 25 | 26 | The plots were nice and all but terribly unnecessary. The two core 27 | functions have been modified to only return tidy data frames (tibbles, 28 | actually). This makes it easier to chain them without having to deal 29 | with list element dereferencing. 30 | 31 | Shorter, snake-case aliases have also been provided: 32 | 33 | - `ad_ts` for `AnomalyDetectionTs` 34 | - `ad_vec` for `AnomalyDetectionVec` 35 | 36 | The original names are still in the package but the `README` and 37 | examples all use the newer, shorter versions. 38 | 39 | The following outstanding PRs from the original repo are included: 40 | 41 | - Added in PR 42 | [\#98](https://github.com/twitter/AnomalyDetection/pull/98/) 43 | (@gggodhwani) 44 | - Added in PR 45 | [\#93](https://github.com/twitter/AnomalyDetection/pull/93) 46 | (@nujnimka) 47 | - Added in PR 48 | [\#69](https://github.com/twitter/AnomalyDetection/pull/69) 49 | (@randakar) 50 | - Added in PR 51 | [\#44](https://github.com/twitter/AnomalyDetection/pull/44) 52 | (@nicolasmiller) 53 | - PR [\#92](https://github.com/twitter/AnomalyDetection/pull/92) 54 | (@caijun) inherently resolved 55 | 56 | If those authors find this repo, please add yourselves to the 57 | `DESCRIPTION` as contirbutors. 58 | 59 | ## What’s Inside The Tin 60 | 61 | The following functions are implemented: 62 | 63 | - `ad_ts`: Anomaly Detection Using Seasonal Hybrid ESD Test 64 | - `ad_vec`: Anomaly Detection Using Seasonal Hybrid ESD Test 65 | 66 | ## How the package works 67 | 68 | The underlying algorithm – referred to as Seasonal Hybrid ESD (S-H-ESD) 69 | builds upon the Generalized ESD test for detecting anomalies. Note that 70 | S-H-ESD can be used to detect both global as well as local anomalies. 71 | This is achieved by employing time series decomposition and using robust 72 | statistical metrics, viz., median together with ESD. In addition, for 73 | long time series (say, 6 months of minutely data), the algorithm employs 74 | piecewise approximation - this is rooted to the fact that trend 75 | extraction in the presence of anomalies in non-trivial - for anomaly 76 | detection. 77 | 78 | Besides time series, the package can also be used to detect anomalies in 79 | a vector of numerical values. We have found this very useful as many 80 | times the corresponding timestamps are not available. The package 81 | provides rich visualization support. The user can specify the direction 82 | of anomalies, the window of interest (such as last day, last hour), 83 | enable/disable piecewise approximation; additionally, the x- and y-axis 84 | are annotated in a way to assist visual data analysis. 85 | 86 | ## Installation 87 | 88 | You can install AnomalyDetection from github with: 89 | 90 | ``` r 91 | # install.packages("devtools") 92 | devtools::install_github("hrbrmstr/AnomalyDetection") 93 | ``` 94 | 95 | ## How to get started 96 | 97 | ``` r 98 | library(AnomalyDetection) 99 | library(hrbrthemes) 100 | library(tidyverse) 101 | ``` 102 | 103 | ``` r 104 | data(raw_data) 105 | 106 | res <- ad_ts(raw_data, max_anoms=0.02, direction='both') 107 | 108 | glimpse(res) 109 | ## Observations: 131 110 | ## Variables: 2 111 | ## $ timestamp 1980-09-25 16:05:00, 1980-09-29 06:40:00, 1980-09-29 21:44:00, 1980-09-30 17:46:00, 1980-09-30 1... 112 | ## $ anoms 21.3510, 193.1036, 148.1740, 52.7478, 49.6582, 35.6067, 32.5045, 30.0555, 31.2614, 30.2551, 27.38... 113 | 114 | # for ggplot2 115 | raw_data$timestamp <- as.POSIXct(raw_data$timestamp) 116 | 117 | ggplot() + 118 | geom_line( 119 | data=raw_data, aes(timestamp, count), 120 | size=0.125, color="lightslategray" 121 | ) + 122 | geom_point( 123 | data=res, aes(timestamp, anoms), color="#cb181d", alpha=1/3 124 | ) + 125 | scale_x_datetime(date_labels="%b\n%Y") + 126 | scale_y_comma() + 127 | theme_ipsum_rc(grid="XY") 128 | ``` 129 | 130 | 131 | 132 | From the plot, we observe that the input time series experiences both 133 | positive and negative anomalies. Furthermore, many of the anomalies in 134 | the time series are local anomalies within the bounds of the time 135 | series’ seasonality (hence, cannot be detected using the traditional 136 | approaches). The anomalies detected using the proposed technique are 137 | annotated on the plot. In case the timestamps for the plot above were 138 | not available, anomaly detection could then carried out using the 139 | AnomalyDetectionVec function; specifically, one can use the 140 | `AnomalyDetectionVec()` method. The equivalent call to the above would 141 | be: 142 | 143 | ``` r 144 | ad_vec(raw_data[,2], max_anoms=0.02, period=1440, direction='both') 145 | ``` 146 | 147 | Often, anomaly detection is carried out on a periodic basis. For 148 | instance, at times, one may be interested in determining whether there 149 | was any anomaly yesterday. To this end, we support a flag only\_last 150 | whereby one can subset the anomalies that occurred during the last day 151 | or last hour. 152 | 153 | ``` r 154 | data(raw_data) 155 | 156 | res <- ad_ts(raw_data, max_anoms=0.02, direction='both', only_last="day") 157 | 158 | glimpse(res) 159 | ## Observations: 25 160 | ## Variables: 2 161 | ## $ timestamp 1980-10-05 01:12:00, 1980-10-05 01:13:00, 1980-10-05 01:14:00, 1980-10-05 01:15:00, 1980-10-05 0... 162 | ## $ anoms 56.4691, 54.9415, 52.0359, 47.7313, 50.5876, 48.2846, 44.6438, 42.3077, 38.8363, 41.0145, 39.5523... 163 | 164 | # for ggplot2 165 | raw_data$timestamp <- as.POSIXct(raw_data$timestamp) 166 | 167 | ggplot() + 168 | geom_line( 169 | data=raw_data, aes(timestamp, count), 170 | size=0.125, color="lightslategray" 171 | ) + 172 | geom_point( 173 | data=res, aes(timestamp, anoms), color="#cb181d", alpha=1/3 174 | ) + 175 | scale_x_datetime(date_labels="%b\n%Y") + 176 | scale_y_comma() + 177 | theme_ipsum_rc(grid="XY") 178 | ``` 179 | 180 | 181 | 182 | Anomaly detection for long duration time series can be carried out by 183 | setting the longterm argument to `TRUE`. 184 | 185 | ## Copyright & License 186 | 187 | Copyright © 2015 Twitter, Inc. and other contributors 188 | 189 | Licensed under the GPLv3 190 | 191 | ## Code of Conduct 192 | 193 | Please note that this project is released with a [Contributor Code of 194 | Conduct](CONDUCT.md). By participating in this project you agree to 195 | abide by its terms. 196 | -------------------------------------------------------------------------------- /data/raw_data.rda: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hrbrmstr/AnomalyDetection/27bd9d02469d0e5702eff0b7decbdac3db04b7b3/data/raw_data.rda -------------------------------------------------------------------------------- /figs/Fig1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hrbrmstr/AnomalyDetection/27bd9d02469d0e5702eff0b7decbdac3db04b7b3/figs/Fig1.png -------------------------------------------------------------------------------- /figs/Fig2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hrbrmstr/AnomalyDetection/27bd9d02469d0e5702eff0b7decbdac3db04b7b3/figs/Fig2.png -------------------------------------------------------------------------------- /inst/extdata/data.csv: -------------------------------------------------------------------------------- 1 | "date","value" 2 | 2015-02-26 08:00:00,28.9189627228525 3 | 2015-02-26 09:00:00,32.5127691537306 4 | 2015-02-26 10:00:00,31.4729374671571 5 | 2015-02-26 11:00:00,33.9512711864407 6 | 2015-02-26 12:00:00,28.1587457128858 7 | 2015-02-26 13:00:00,29.9130028063611 8 | 2015-02-26 14:00:00,27.3270940570894 9 | 2015-02-26 15:00:00,23.5691126279863 10 | 2015-02-26 16:00:00,23.7694753577107 11 | 2015-02-26 17:00:00,22.1789638932496 12 | 2015-02-26 18:00:00,25.4970917225951 13 | 2015-02-26 19:00:00,22.2707993474715 14 | 2015-02-26 20:00:00,23.2640586797066 15 | 2015-02-26 21:00:00,21.1492753623188 16 | 2015-02-26 22:00:00,20.2434988179669 17 | 2015-02-26 23:00:00,26.5411255411255 18 | 2015-02-27 00:00:00,21.1485148514851 19 | 2015-02-27 01:00:00,34.7333333333333 20 | 2015-02-27 02:00:00,17.1758793969849 21 | 2015-02-27 03:00:00,22.7675276752768 22 | 2015-02-27 04:00:00,22.3108348134991 23 | 2015-02-27 05:00:00,25.1653386454183 24 | 2015-02-27 06:00:00,25.9285714285714 25 | 2015-02-27 07:00:00,31.3939202026599 26 | 2015-02-27 08:00:00,33.1833846153846 27 | 2015-02-27 09:00:00,30.0352112676056 28 | 2015-02-27 10:00:00,29.7779839208411 29 | 2015-02-27 11:00:00,29.9924480805538 30 | 2015-02-27 12:00:00,29.7201166180758 31 | 2015-02-27 13:00:00,26.1428571428571 32 | 2015-02-27 14:00:00,23.6824552924118 33 | 2015-02-27 15:00:00,22.2842696629213 34 | 2015-02-27 16:00:00,22.6301775147929 35 | 2015-02-27 17:00:00,22.5957554320364 36 | 2015-02-27 18:00:00,22.29815455594 37 | 2015-02-27 19:00:00,20.4932866316404 38 | 2015-02-27 20:00:00,21.2279792746114 39 | 2015-02-27 21:00:00,18.9512437810945 40 | 2015-02-27 22:00:00,22.5922131147541 41 | 2015-02-27 23:00:00,27.8740157480315 42 | 2015-02-28 00:00:00,21.1547619047619 43 | 2015-02-28 01:00:00,22.3865030674847 44 | 2015-02-28 02:00:00,15.3265306122449 45 | 2015-02-28 03:00:00,20.4968944099379 46 | 2015-02-28 04:00:00,24.6292682926829 47 | 2015-02-28 05:00:00,29.7282099343955 48 | 2015-02-28 06:00:00,26.6695250659631 49 | 2015-02-28 07:00:00,24.4923884514436 50 | 2015-02-28 08:00:00,27.2545 51 | 2015-02-28 09:00:00,27.2625968992248 52 | 2015-02-28 10:00:00,26.2038980509745 53 | 2015-02-28 11:00:00,24.9839908519154 54 | 2015-02-28 12:00:00,27.8708487084871 55 | 2015-02-28 13:00:00,24.6541598694943 56 | 2015-02-28 14:00:00,24.9184149184149 57 | 2015-02-28 15:00:00,30.2276707530648 58 | 2015-02-28 16:00:00,20.6197333333333 59 | 2015-02-28 17:00:00,21.6258000984737 60 | 2015-02-28 18:00:00,19.1630076838639 61 | 2015-02-28 19:00:00,22.91 62 | 2015-02-28 20:00:00,23.6474056603774 63 | 2015-02-28 21:00:00,21.722480620155 64 | 2015-02-28 22:00:00,21.7886363636364 65 | 2015-02-28 23:00:00,24.007874015748 66 | 2015-03-01 00:00:00,21.3317535545024 67 | 2015-03-01 01:00:00,24.4630541871921 68 | 2015-03-01 02:00:00,21.7430167597765 69 | 2015-03-01 03:00:00,19.9725085910653 70 | 2015-03-01 04:00:00,19.8446215139442 71 | 2015-03-01 05:00:00,22.3408577878104 72 | 2015-03-01 06:00:00,24.1132075471698 73 | 2015-03-01 07:00:00,28.1435768261965 74 | 2015-03-01 08:00:00,29.395920502092 75 | 2015-03-01 09:00:00,27.1138755980861 76 | 2015-03-01 10:00:00,26.5910112359551 77 | 2015-03-01 11:00:00,27.5464632454924 78 | 2015-03-01 12:00:00,28.4250337685727 79 | 2015-03-01 13:00:00,31.5130434782609 80 | 2015-03-01 14:00:00,24.9224362311296 81 | 2015-03-01 15:00:00,24.7067520946279 82 | 2015-03-01 16:00:00,19.7013574660633 83 | 2015-03-01 17:00:00,20.2194625407166 84 | 2015-03-01 18:00:00,21.3144303797468 85 | 2015-03-01 19:00:00,24.3891850723534 86 | 2015-03-01 20:00:00,24.7505617977528 87 | 2015-03-01 21:00:00,21.6666666666667 88 | 2015-03-01 22:00:00,26.2348066298343 89 | 2015-03-01 23:00:00,22.0180995475113 90 | 2015-03-02 00:00:00,20.7349397590361 91 | 2015-03-02 01:00:00,17.5254237288136 92 | 2015-03-02 02:00:00,18.0229885057471 93 | 2015-03-02 03:00:00,18.1389728096677 94 | 2015-03-02 04:00:00,18.3296 95 | 2015-03-02 05:00:00,25.229537366548 96 | 2015-03-02 06:00:00,24.0457102672293 97 | 2015-03-02 07:00:00,29.1404624277457 98 | 2015-03-02 08:00:00,29.1745623069001 99 | 2015-03-02 09:00:00,28.9919759277834 100 | 2015-03-02 10:00:00,28.2323561346363 101 | 2015-03-02 11:00:00,26.2308868501529 102 | 2015-03-02 12:00:00,25.9711677078328 103 | 2015-03-02 13:00:00,27.6707317073171 104 | 2015-03-02 14:00:00,27.0718418514947 105 | 2015-03-02 15:00:00,28.7082524271845 106 | 2015-03-02 16:00:00,23.0007584376185 107 | 2015-03-02 17:00:00,21.5746656760773 108 | 2015-03-02 18:00:00,22.2506234413965 109 | 2015-03-02 19:00:00,23.4770922419059 110 | 2015-03-02 20:00:00,27.8022813688213 111 | 2015-03-02 21:00:00,24.4478063540091 112 | 2015-03-02 22:00:00,19.2445652173913 113 | 2015-03-02 23:00:00,22.7572815533981 114 | 2015-03-03 00:00:00,35.4133333333333 115 | 2015-03-03 01:00:00,18.6518987341772 116 | 2015-03-03 02:00:00,17.5462962962963 117 | 2015-03-03 03:00:00,18.0989847715736 118 | 2015-03-03 04:00:00,24.7644376899696 119 | 2015-03-03 05:00:00,26.1242990654206 120 | 2015-03-03 06:00:00,31.7241134751773 121 | 2015-03-03 07:00:00,28.7750582750583 122 | 2015-03-03 08:00:00,27.8094131319001 123 | 2015-03-03 09:00:00,30.6963042313873 124 | 2015-03-03 10:00:00,31.3191870890616 125 | 2015-03-03 11:00:00,29.1392694063927 126 | 2015-03-03 12:00:00,27.4386554621849 127 | 2015-03-03 13:00:00,29.6066176470588 128 | 2015-03-03 14:00:00,25.8543209876543 129 | 2015-03-03 15:00:00,27.005081300813 130 | 2015-03-03 16:00:00,24.5 131 | 2015-03-03 17:00:00,22.9647741400746 132 | 2015-03-03 18:00:00,22.3932729624838 133 | 2015-03-03 19:00:00,21.9032418952618 134 | 2015-03-03 20:00:00,25.3410097431355 135 | 2015-03-03 21:00:00,26.7739837398374 136 | 2015-03-03 22:00:00,23.4526315789474 137 | 2015-03-03 23:00:00,32.3833333333333 138 | 2015-03-04 00:00:00,22.8783783783784 139 | 2015-03-04 01:00:00,19.2167832167832 140 | 2015-03-04 02:00:00,22.2557077625571 141 | 2015-03-04 03:00:00,19.6317280453258 142 | 2015-03-04 04:00:00,21.9059561128527 143 | 2015-03-04 05:00:00,26.7651933701657 144 | 2015-03-04 06:00:00,26.9214780600462 145 | 2015-03-04 07:00:00,31.4672955974843 146 | 2015-03-04 08:00:00,32.0051107325383 147 | 2015-03-04 09:00:00,29.1982942430704 148 | 2015-03-04 10:00:00,27.6325736520289 149 | 2015-03-04 11:00:00,29.2883147386964 150 | 2015-03-04 12:00:00,28.7355223880597 151 | 2015-03-04 13:00:00,29.0514829322888 152 | 2015-03-04 14:00:00,29.6065040650407 153 | 2015-03-04 15:00:00,27.4267291910903 154 | 2015-03-04 16:00:00,25.0244770805518 155 | 2015-03-04 17:00:00,22.4989447024061 156 | 2015-03-04 18:00:00,21.2194793536804 157 | 2015-03-04 19:00:00,22.2119901112485 158 | 2015-03-04 20:00:00,23.1895551257253 159 | 2015-03-04 21:00:00,26.5961844197138 160 | 2015-03-04 22:00:00,38.5714285714286 161 | 2015-03-04 23:00:00,24.7486910994764 162 | 2015-03-05 00:00:00,23.767955801105 163 | 2015-03-05 01:00:00,17.6857142857143 164 | 2015-03-05 02:00:00,19.392 165 | 2015-03-05 03:00:00,22.9975490196078 166 | 2015-03-05 04:00:00,29.4811188811189 167 | 2015-03-05 05:00:00,28.3926746166951 168 | 2015-03-05 06:00:00,24.0354698195395 169 | 2015-03-05 07:00:00,31.9466019417476 170 | 2015-03-05 08:00:00,33.9649474211317 171 | 2015-03-05 09:00:00,30.0051694428489 172 | 2015-03-05 10:00:00,27.4879081015719 173 | 2015-03-05 11:00:00,30.5145929339478 174 | 2015-03-05 12:00:00,29.0211586901763 175 | 2015-03-05 13:00:00,26.5287524366472 176 | 2015-03-05 14:00:00,28.6781091727631 177 | 2015-03-05 15:00:00,28.4192118226601 178 | 2015-03-05 16:00:00,23.6586294416244 179 | 2015-03-05 17:00:00,22.1656104773975 180 | 2015-03-05 18:00:00,20.3252148997135 181 | 2015-03-05 19:00:00,26.006671608599 182 | 2015-03-05 20:00:00,27.7519908987486 183 | 2015-03-05 21:00:00,24.1321428571429 184 | 2015-03-05 22:00:00,21.8695652173913 185 | 2015-03-05 23:00:00,17.9686098654708 186 | 2015-03-06 00:00:00,19.8554913294798 187 | 2015-03-06 01:00:00,18.3038674033149 188 | 2015-03-06 02:00:00,23.3404255319149 189 | 2015-03-06 03:00:00,21.8716049382716 190 | 2015-03-06 04:00:00,27.0773558368495 191 | 2015-03-06 05:00:00,26.8239051094891 192 | 2015-03-06 06:00:00,27.9043309631545 193 | 2015-03-06 07:00:00,30.0811287477954 194 | 2015-03-06 08:00:00,36.1550614394383 195 | 2015-03-06 09:00:00,28.3143695014663 196 | 2015-03-06 10:00:00,30.0440738534842 197 | 2015-03-06 11:00:00,24.9212368728121 198 | 2015-03-06 12:00:00,33.1934541203974 199 | 2015-03-06 13:00:00,30.1344116006693 200 | 2015-03-06 14:00:00,27.8360471645143 201 | 2015-03-06 15:00:00,27.408832807571 202 | 2015-03-06 16:00:00,21.9114611624117 203 | 2015-03-06 17:00:00,23.6295063782585 204 | 2015-03-06 18:00:00,19.0504484304933 205 | 2015-03-06 19:00:00,21.2039127163281 206 | 2015-03-06 20:00:00,26.9337899543379 207 | 2015-03-06 21:00:00,28.1111111111111 208 | 2015-03-06 22:00:00,24.0568862275449 209 | 2015-03-06 23:00:00,32.8805309734513 210 | 2015-03-07 00:00:00,22.0341463414634 211 | 2015-03-07 01:00:00,18.7032967032967 212 | 2015-03-07 02:00:00,23.0352941176471 213 | 2015-03-07 03:00:00,21.95 214 | 2015-03-07 04:00:00,24.1791304347826 215 | 2015-03-07 05:00:00,26.5935214211076 216 | 2015-03-07 06:00:00,27.9241379310345 217 | 2015-03-07 07:00:00,30.812804453723 218 | 2015-03-07 08:00:00,32.7521423862887 219 | 2015-03-07 09:00:00,30.974761255116 220 | 2015-03-07 10:00:00,30.0966936993138 221 | 2015-03-07 11:00:00,27.5620347394541 222 | 2015-03-07 12:00:00,28.1627313337588 223 | 2015-03-07 13:00:00,26.6600241545894 224 | 2015-03-07 14:00:00,26.2295695570805 225 | 2015-03-07 15:00:00,22.2612099644128 226 | 2015-03-07 16:00:00,22.4991139988187 227 | 2015-03-07 17:00:00,21.0336943441637 228 | 2015-03-07 18:00:00,19.8835978835979 229 | 2015-03-07 19:00:00,24.0924229808493 230 | 2015-03-07 20:00:00,21.9704370179949 231 | 2015-03-07 21:00:00,26.9642184557439 232 | 2015-03-07 22:00:00,25.1060171919771 233 | 2015-03-07 23:00:00,21.4139534883721 234 | 2015-03-08 00:00:00,15.7566137566138 235 | 2015-03-08 01:00:00,17.9470198675497 236 | 2015-03-08 03:00:00,18.696335078534 237 | 2015-03-08 04:00:00,20.4647435897436 238 | 2015-03-08 05:00:00,23.7644787644788 239 | 2015-03-08 06:00:00,23.9364089775561 240 | 2015-03-08 07:00:00,29.3616029822926 241 | 2015-03-08 08:00:00,26.8673946957878 242 | 2015-03-08 09:00:00,28.571986970684 243 | 2015-03-08 10:00:00,28.2724885095207 244 | 2015-03-08 11:00:00,25.6255172413793 245 | 2015-03-08 12:00:00,30.0582582582583 246 | 2015-03-08 13:00:00,28.8484320557491 247 | 2015-03-08 14:00:00,27.3226993865031 248 | 2015-03-08 15:00:00,22.62416918429 249 | 2015-03-08 16:00:00,24.8451672010994 250 | 2015-03-08 17:00:00,23.638864628821 251 | 2015-03-08 18:00:00,27.6091370558376 252 | 2015-03-08 19:00:00,27.2916006339144 253 | 2015-03-08 20:00:00,23.8217213114754 254 | 2015-03-08 21:00:00,31.6496913580247 255 | 2015-03-08 22:00:00,24.1781914893617 256 | 2015-03-08 23:00:00,27.6934865900383 257 | 2015-03-09 00:00:00,23.9532710280374 258 | 2015-03-09 01:00:00,18.5704697986577 259 | 2015-03-09 02:00:00,15.2923076923077 260 | 2015-03-09 03:00:00,19.1059602649007 261 | 2015-03-09 04:00:00,21.1688311688312 262 | 2015-03-09 05:00:00,25.1989389920424 263 | 2015-03-09 06:00:00,27.2994923857868 264 | 2015-03-09 07:00:00,36.2478386167147 265 | 2015-03-09 08:00:00,35.6915769474351 266 | 2015-03-09 09:00:00,30.6126230457441 267 | 2015-03-09 10:00:00,28.24609375 268 | 2015-03-09 11:00:00,35.6420454545455 269 | 2015-03-09 12:00:00,28.5615468409586 270 | 2015-03-09 13:00:00,28.6932354483482 271 | 2015-03-09 14:00:00,30.2106625258799 272 | 2015-03-09 15:00:00,26.9251513483764 273 | 2015-03-09 16:00:00,24.2009446114212 274 | 2015-03-09 17:00:00,22.0852225020991 275 | 2015-03-09 18:00:00,22.1582075903064 276 | 2015-03-09 19:00:00,25.4601226993865 277 | 2015-03-09 20:00:00,26.0371859296482 278 | 2015-03-09 21:00:00,23.2621502209131 279 | 2015-03-09 22:00:00,19.9576059850374 280 | 2015-03-09 23:00:00,17.5936073059361 281 | 2015-03-10 00:00:00,22.3809523809524 282 | 2015-03-10 01:00:00,22.4728260869565 283 | 2015-03-10 02:00:00,17.5652173913043 284 | 2015-03-10 03:00:00,22.5100286532951 285 | 2015-03-10 04:00:00,21.1589648798521 286 | 2015-03-10 05:00:00,25.5180586907449 287 | 2015-03-10 06:00:00,28.0383411580595 288 | 2015-03-10 07:00:00,27.8206845238095 289 | 2015-03-10 08:00:00,27.3412790697674 290 | 2015-03-10 09:00:00,30.670303030303 291 | 2015-03-10 10:00:00,31.4211480362538 292 | 2015-03-10 11:00:00,29.2047101449275 293 | 2015-03-10 12:00:00,28.9671760045274 294 | 2015-03-10 13:00:00,24.4802259887006 295 | 2015-03-10 14:00:00,26.904887020494 296 | 2015-03-10 15:00:00,26.2162162162162 297 | 2015-03-10 16:00:00,24.1177015755329 298 | 2015-03-10 17:00:00,21.7310405643739 299 | 2015-03-10 18:00:00,23.4462693571093 300 | 2015-03-10 19:00:00,24.4070351758794 301 | 2015-03-10 20:00:00,23.5047892720307 302 | 2015-03-10 21:00:00,25.0391822827939 303 | 2015-03-10 22:00:00,22.4735376044568 304 | 2015-03-10 23:00:00,21.7454545454545 305 | 2015-03-11 00:00:00,20.8316831683168 306 | 2015-03-11 01:00:00,20.3988095238095 307 | 2015-03-11 02:00:00,25.7205882352941 308 | 2015-03-11 03:00:00,19.3909574468085 309 | 2015-03-11 04:00:00,22.1775510204082 310 | 2015-03-11 05:00:00,24.4704463208685 311 | 2015-03-11 06:00:00,28.2923203963666 312 | 2015-03-11 07:00:00,32.9646978954515 313 | 2015-03-11 08:00:00,31.0298864315601 314 | 2015-03-11 09:00:00,31.1660539215686 315 | 2015-03-11 10:00:00,31.6036697247706 316 | 2015-03-11 11:00:00,29.3050147492625 317 | 2015-03-11 12:00:00,31.3047965998786 318 | 2015-03-11 13:00:00,29.5327650506127 319 | 2015-03-11 14:00:00,27.0792515134838 320 | 2015-03-11 15:00:00,27.8416763678696 321 | 2015-03-11 16:00:00,22.9827755905512 322 | 2015-03-11 17:00:00,21.6953818827709 323 | 2015-03-11 18:00:00,21.072183908046 324 | 2015-03-11 19:00:00,23.8263157894737 325 | 2015-03-11 20:00:00,23.379132231405 326 | 2015-03-11 21:00:00,24.7881219903692 327 | 2015-03-11 22:00:00,28.1360381861575 328 | 2015-03-11 23:00:00,18.3306772908367 329 | 2015-03-12 00:00:00,18.5050505050505 330 | 2015-03-12 01:00:00,19.5061728395062 331 | 2015-03-12 02:00:00,18.8347107438017 332 | 2015-03-12 03:00:00,22.6189111747851 333 | 2015-03-12 04:00:00,31.3786231884058 334 | 2015-03-12 05:00:00,26.8054830287206 335 | 2015-03-12 06:00:00,30.3881818181818 336 | 2015-03-12 07:00:00,34.9729015201586 337 | 2015-03-12 08:00:00,33.3316129032258 338 | 2015-03-12 09:00:00,30.5976261127596 339 | 2015-03-12 10:00:00,30.1004901960784 340 | 2015-03-12 11:00:00,28.2622139764997 341 | 2015-03-12 12:00:00,27.7289088863892 342 | 2015-03-12 13:00:00,26.9488448844885 343 | 2015-03-12 14:00:00,25.3577008928571 344 | 2015-03-12 15:00:00,28.875511396844 345 | 2015-03-12 16:00:00,25.0218905472637 346 | 2015-03-12 17:00:00,23.9646258503401 347 | 2015-03-12 18:00:00,21.2590252707581 348 | 2015-03-12 19:00:00,25.4467213114754 349 | 2015-03-12 20:00:00,27.2155172413793 350 | 2015-03-12 21:00:00,27.4224270353303 351 | 2015-03-12 22:00:00,22.8010335917313 352 | 2015-03-12 23:00:00,20.7558528428094 353 | 2015-03-13 00:00:00,21.1244444444444 354 | 2015-03-13 01:00:00,17.6878048780488 355 | 2015-03-13 02:00:00,17.9139784946237 356 | 2015-03-13 03:00:00,19.6694677871148 357 | 2015-03-13 04:00:00,26.6754385964912 358 | 2015-03-13 05:00:00,26.2011764705882 359 | 2015-03-13 06:00:00,27.7895569620253 360 | 2015-03-13 07:00:00,28.9003584229391 361 | 2015-03-13 08:00:00,26.3352308665402 362 | 2015-03-13 09:00:00,29.5382335506817 363 | 2015-03-13 10:00:00,30.7962732919255 364 | 2015-03-13 11:00:00,26.7798340778558 365 | 2015-03-13 12:00:00,28.118372379778 366 | 2015-03-13 13:00:00,25.3286794648051 367 | 2015-03-13 14:00:00,30.8412979351032 368 | 2015-03-13 15:00:00,23.639974779319 369 | 2015-03-13 16:00:00,23.7446921443737 370 | 2015-03-13 17:00:00,23.5460992907801 371 | 2015-03-13 18:00:00,20.5258215962441 372 | 2015-03-13 19:00:00,20.3337739590218 373 | 2015-03-13 20:00:00,23.8793103448276 374 | 2015-03-13 21:00:00,23.2670807453416 375 | 2015-03-13 22:00:00,23.7845036319613 376 | 2015-03-13 23:00:00,25.0983606557377 377 | 2015-03-14 00:00:00,18.36 378 | 2015-03-14 01:00:00,17.6444444444444 379 | 2015-03-14 02:00:00,19.7090909090909 380 | 2015-03-14 03:00:00,24.988188976378 381 | 2015-03-14 04:00:00,20.0330396475771 382 | 2015-03-14 05:00:00,21.6910466582598 383 | 2015-03-14 06:00:00,28.1556145004421 384 | 2015-03-14 07:00:00,33.249297752809 385 | 2015-03-14 08:00:00,30.2438231469441 386 | 2015-03-14 09:00:00,27.5851334180432 387 | 2015-03-14 10:00:00,29.5948753462604 388 | 2015-03-14 11:00:00,29.5850847457627 389 | 2015-03-14 12:00:00,25.1485557083906 390 | 2015-03-14 13:00:00,25.8072131147541 391 | 2015-03-14 14:00:00,29.7215277777778 392 | 2015-03-14 15:00:00,26.3546831955923 393 | 2015-03-14 16:00:00,22.6961950059453 394 | 2015-03-14 17:00:00,22.1652593486128 395 | 2015-03-14 18:00:00,21.0424242424242 396 | 2015-03-14 19:00:00,23.3754538852578 397 | 2015-03-14 20:00:00,24.2013348164627 398 | 2015-03-14 21:00:00,25.0184331797235 399 | 2015-03-14 22:00:00,20.5744680851064 400 | 2015-03-14 23:00:00,26.706106870229 401 | 2015-03-15 00:00:00,22.8582995951417 402 | 2015-03-15 01:00:00,25.6543209876543 403 | 2015-03-15 02:00:00,21.1559139784946 404 | 2015-03-15 03:00:00,24.4009900990099 405 | 2015-03-15 04:00:00,19.7424657534247 406 | 2015-03-15 05:00:00,24.3513097072419 407 | 2015-03-15 06:00:00,28.3227272727273 408 | 2015-03-15 07:00:00,26.3229813664596 409 | 2015-03-15 08:00:00,27.8095644748079 410 | 2015-03-15 09:00:00,28.7878993881713 411 | 2015-03-15 10:00:00,26.8659003831418 412 | 2015-03-15 11:00:00,27.1098265895954 413 | 2015-03-15 12:00:00,24.7221217600964 414 | 2015-03-15 13:00:00,25.8681983071342 415 | 2015-03-15 14:00:00,27.3538561244329 416 | 2015-03-15 15:00:00,29.4213709677419 417 | 2015-03-15 16:00:00,23.4251405212059 418 | 2015-03-15 17:00:00,20.2107382550336 419 | 2015-03-15 18:00:00,21.0775114737379 420 | 2015-03-15 19:00:00,23.0591278640059 421 | 2015-03-15 20:00:00,25.1794569067296 422 | 2015-03-15 21:00:00,25.8614564831261 423 | 2015-03-15 22:00:00,25.7384196185286 424 | 2015-03-15 23:00:00,21.2105263157895 425 | 2015-03-16 00:00:00,15.9530201342282 426 | 2015-03-16 01:00:00,15.780303030303 427 | 2015-03-16 02:00:00,17.1368421052632 428 | 2015-03-16 03:00:00,22.7872340425532 429 | 2015-03-16 04:00:00,22.8194690265487 430 | 2015-03-16 05:00:00,22.9822222222222 431 | 2015-03-16 06:00:00,26.3947590870668 432 | 2015-03-16 07:00:00,29.5598866052445 433 | 2015-03-16 08:00:00,27.5420382165605 434 | 2015-03-16 09:00:00,31.1468369123622 435 | 2015-03-16 10:00:00,31.5921985815603 436 | 2015-03-16 11:00:00,30.4443005181347 437 | 2015-03-16 12:00:00,27.8133640552995 438 | 2015-03-16 13:00:00,27.444261394838 439 | 2015-03-16 14:00:00,26.5696793002915 440 | 2015-03-16 15:00:00,25.7782581840642 441 | 2015-03-16 16:00:00,26.0707570509649 442 | 2015-03-16 17:00:00,22.9822537710736 443 | 2015-03-16 18:00:00,21.4634489222118 444 | 2015-03-16 19:00:00,22.7584196891192 445 | 2015-03-16 20:00:00,28.5032967032967 446 | 2015-03-16 21:00:00,24.4952015355086 447 | 2015-03-16 22:00:00,20.3276836158192 448 | 2015-03-16 23:00:00,20.2119565217391 449 | 2015-03-17 00:00:00,18.4013605442177 450 | 2015-03-17 01:00:00,22.1764705882353 451 | 2015-03-17 02:00:00,20.093023255814 452 | 2015-03-17 03:00:00,21.4676470588235 453 | 2015-03-17 04:00:00,23.4280510018215 454 | 2015-03-17 05:00:00,27.0607902735562 455 | 2015-03-17 06:00:00,24.5683661645423 456 | 2015-03-17 07:00:00,28.9464384318056 457 | 2015-03-17 08:00:00,27.6609571788413 458 | 2015-03-17 09:00:00,26.0159574468085 459 | 2015-03-17 10:00:00,29.4181818181818 460 | 2015-03-17 11:00:00,25.3056338028169 461 | 2015-03-17 12:00:00,27.4774487471526 462 | 2015-03-17 13:00:00,27.1627358490566 463 | 2015-03-17 14:00:00,27.9890267175572 464 | 2015-03-17 15:00:00,29.8141809290954 465 | 2015-03-17 16:00:00,22.0773955773956 466 | 2015-03-17 17:00:00,22.7342603321746 467 | 2015-03-17 18:00:00,24.2103336045566 468 | 2015-03-17 19:00:00,24.4427807486631 469 | 2015-03-17 20:00:00,28.2345309381238 470 | 2015-03-17 21:00:00,25.3312302839117 471 | 2015-03-17 22:00:00,24.772397094431 472 | 2015-03-17 23:00:00,22.0725190839695 473 | 2015-03-18 00:00:00,17.2985781990521 474 | 2015-03-18 01:00:00,19.555 475 | 2015-03-18 02:00:00,18.3414634146341 476 | 2015-03-18 03:00:00,17.84 477 | 2015-03-18 04:00:00,21.9386733416771 478 | 2015-03-18 05:00:00,24.3677521842732 479 | 2015-03-18 06:00:00,24.8710073710074 480 | 2015-03-18 07:00:00,26.7684729064039 481 | 2015-03-18 08:00:00,30.9969465648855 482 | 2015-03-18 09:00:00,24.9227010217681 483 | 2015-03-18 10:00:00,26.7915921288014 484 | 2015-03-18 11:00:00,26.7732276530163 485 | 2015-03-18 12:00:00,25.3327205882353 486 | 2015-03-18 13:00:00,26.8013513513514 487 | 2015-03-18 14:00:00,26.5617150281507 488 | 2015-03-18 15:00:00,27.6826789838337 489 | 2015-03-18 16:00:00,23.8123271434216 490 | 2015-03-18 17:00:00,21.4181600955794 491 | 2015-03-18 18:00:00,22.2619142971566 492 | 2015-03-18 19:00:00,19.6082926829268 493 | 2015-03-18 20:00:00,23.7971656333038 494 | 2015-03-18 21:00:00,22.2793696275072 495 | 2015-03-18 22:00:00,25.2553191489362 496 | 2015-03-18 23:00:00,23.3793103448276 497 | 2015-03-19 00:00:00,16.6595744680851 498 | 2015-03-19 01:00:00,19.5530303030303 499 | 2015-03-19 02:00:00,16.3501483679525 500 | 2015-03-19 03:00:00,24.1946721311475 501 | 2015-03-19 04:00:00,20.9445161290323 502 | 2015-03-19 05:00:00,22.2085889570552 503 | 2015-03-19 06:00:00,26.5666848121938 504 | 2015-03-19 07:00:00,24.9769094138544 505 | 2015-03-19 08:00:00,27.0605263157895 506 | 2015-03-19 09:00:00,28.5817307692308 507 | 2015-03-19 10:00:00,26.6025163094129 508 | 2015-03-19 11:00:00,25.7365988909427 509 | 2015-03-19 12:00:00,29.5469483568075 510 | 2015-03-19 13:00:00,24.523381294964 511 | 2015-03-19 14:00:00,25.2951464801049 512 | 2015-03-19 15:00:00,27.4358365019011 513 | 2015-03-19 16:00:00,26.2193521935219 514 | 2015-03-19 17:00:00,23.9175090982612 515 | 2015-03-19 18:00:00,21.3403343334762 516 | 2015-03-19 19:00:00,24.770223325062 517 | 2015-03-19 20:00:00,27.6247833622184 518 | 2015-03-19 21:00:00,20.925 519 | 2015-03-19 22:00:00,25.8976034858388 520 | 2015-03-19 23:00:00,21.9247311827957 521 | 2015-03-20 00:00:00,23.8823529411765 522 | 2015-03-20 01:00:00,16.9314285714286 523 | 2015-03-20 02:00:00,22.2440476190476 524 | 2015-03-20 03:00:00,19.3590308370044 525 | 2015-03-20 04:00:00,20.9258809234508 526 | 2015-03-20 05:00:00,22.1901840490798 527 | 2015-03-20 06:00:00,25.4012572027239 528 | 2015-03-20 07:00:00,26.0635481023831 529 | 2015-03-20 08:00:00,26.0008833922262 530 | 2015-03-20 09:00:00,28.3378136200717 531 | 2015-03-20 10:00:00,28.4029149036201 532 | 2015-03-20 11:00:00,26.2695443645084 533 | 2015-03-20 12:00:00,26.6776406035665 534 | 2015-03-20 13:00:00,26.750103950104 535 | 2015-03-20 14:00:00,24.9796015180266 536 | 2015-03-20 15:00:00,24.6272630457934 537 | 2015-03-20 16:00:00,23.064596895343 538 | 2015-03-20 17:00:00,21.9794016674841 539 | 2015-03-20 18:00:00,20.7879417879418 540 | 2015-03-20 19:00:00,20.4537396121884 541 | 2015-03-20 20:00:00,19.0947054436987 542 | 2015-03-20 21:00:00,20.9384941675504 543 | 2015-03-20 22:00:00,20.7981859410431 544 | 2015-03-20 23:00:00,22.8518518518519 545 | 2015-03-21 00:00:00,17.5186721991701 546 | 2015-03-21 01:00:00,15.7032967032967 547 | 2015-03-21 02:00:00,15.7293577981651 548 | 2015-03-21 03:00:00,17.7588424437299 549 | 2015-03-21 04:00:00,24.3440514469453 550 | 2015-03-21 05:00:00,22.3546931407942 551 | 2015-03-21 06:00:00,24.2947692307692 552 | 2015-03-21 07:00:00,23.0253104106972 553 | 2015-03-21 08:00:00,26.0979020979021 554 | 2015-03-21 09:00:00,26.5299607072692 555 | 2015-03-21 10:00:00,26.6897605705553 556 | 2015-03-21 11:00:00,26.9560321715818 557 | 2015-03-21 12:00:00,26.9453870625663 558 | 2015-03-21 13:00:00,24.6965669988926 559 | 2015-03-21 14:00:00,24.6776315789474 560 | 2015-03-21 15:00:00,24.4035549703753 561 | 2015-03-21 16:00:00,22.531660011409 562 | 2015-03-21 17:00:00,22.00655379574 563 | 2015-03-21 18:00:00,19.8923976608187 564 | 2015-03-21 19:00:00,20.1047297297297 565 | 2015-03-21 20:00:00,17.3588979895756 566 | 2015-03-21 21:00:00,16.670626349892 567 | 2015-03-21 22:00:00,21.069387755102 568 | 2015-03-21 23:00:00,18.3180212014134 569 | 2015-03-22 00:00:00,18.2283105022831 570 | 2015-03-22 01:00:00,19.734375 571 | 2015-03-22 02:00:00,22.3967391304348 572 | 2015-03-22 03:00:00,17.3571428571429 573 | 2015-03-22 04:00:00,21.2325581395349 574 | 2015-03-22 05:00:00,23.4267912772586 575 | 2015-03-22 06:00:00,25.1833105335157 576 | 2015-03-22 07:00:00,25.8644432490586 577 | 2015-03-22 08:00:00,28.2544474393531 578 | 2015-03-22 09:00:00,27.1800900450225 579 | 2015-03-22 10:00:00,28.6986771190593 580 | 2015-03-22 11:00:00,29.2204646564508 581 | 2015-03-22 12:00:00,27.3603941811356 582 | 2015-03-22 13:00:00,23.4411187438665 583 | 2015-03-22 14:00:00,27.5479452054795 584 | 2015-03-22 15:00:00,24.367234744365 585 | 2015-03-22 16:00:00,25.631369073992 586 | 2015-03-22 17:00:00,21.7873767258383 587 | 2015-03-22 18:00:00,21.3076588337685 588 | 2015-03-22 19:00:00,18.9932104752667 589 | 2015-03-22 20:00:00,18.671809256662 590 | 2015-03-22 21:00:00,22.8163265306122 591 | 2015-03-22 22:00:00,25.5895316804408 592 | 2015-03-22 23:00:00,31.3552631578947 593 | 2015-03-23 00:00:00,22.4545454545455 594 | 2015-03-23 01:00:00,22.975845410628 595 | 2015-03-23 02:00:00,19.4713114754098 596 | 2015-03-23 03:00:00,18.3803418803419 597 | 2015-03-23 04:00:00,18.4460526315789 598 | 2015-03-23 05:00:00,23.2604248623131 599 | 2015-03-23 06:00:00,24.414367816092 600 | 2015-03-23 07:00:00,25.1814993423937 601 | 2015-03-23 08:00:00,27.236165577342 602 | 2015-03-23 09:00:00,30.8409090909091 603 | 2015-03-23 10:00:00,28.3339230429014 604 | 2015-03-23 11:00:00,24.1212401665895 605 | 2015-03-23 12:00:00,27.8032928942808 606 | 2015-03-23 13:00:00,26.2918770078017 607 | 2015-03-23 14:00:00,28.8463194145502 608 | 2015-03-23 15:00:00,27.6798917944094 609 | 2015-03-23 16:00:00,24.2913143735588 610 | 2015-03-23 17:00:00,22.5641124374278 611 | 2015-03-23 18:00:00,21.6708149337615 612 | 2015-03-23 19:00:00,19.7495088408644 613 | 2015-03-23 20:00:00,20.4964028776978 614 | 2015-03-23 21:00:00,22.4021352313167 615 | 2015-03-23 22:00:00,21.3492723492724 616 | 2015-03-23 23:00:00,17.975 617 | 2015-03-24 00:00:00,20.8951612903226 618 | 2015-03-24 01:00:00,19.2450980392157 619 | 2015-03-24 02:00:00,18.6706827309237 620 | 2015-03-24 03:00:00,16.2162790697674 621 | 2015-03-24 04:00:00,18.4366028708134 622 | 2015-03-24 05:00:00,24.2303543913713 623 | 2015-03-24 06:00:00,23.3825352112676 624 | 2015-03-24 07:00:00,24.1516721620349 625 | 2015-03-24 08:00:00,26.2023201856149 626 | 2015-03-24 09:00:00,26.1901608325449 627 | 2015-03-24 10:00:00,27.2214217098943 628 | 2015-03-24 11:00:00,27.7117619711762 629 | 2015-03-24 12:00:00,25.2040368582712 630 | 2015-03-24 13:00:00,24.8909169926119 631 | 2015-03-24 14:00:00,28.3252336448598 632 | 2015-03-24 15:00:00,25.0474327628362 633 | 2015-03-24 16:00:00,24.1966116807847 634 | 2015-03-24 17:00:00,25.5402629416598 635 | 2015-03-24 18:00:00,21.4756825938567 636 | 2015-03-24 19:00:00,20.8928215353938 637 | 2015-03-24 20:00:00,20.8518024032043 638 | 2015-03-24 21:00:00,22 639 | 2015-03-24 22:00:00,22.1857451403888 640 | 2015-03-24 23:00:00,18.7272727272727 641 | 2015-03-25 00:00:00,18.4285714285714 642 | 2015-03-25 01:00:00,21.51 643 | 2015-03-25 02:00:00,15.8721804511278 644 | 2015-03-25 03:00:00,19.859649122807 645 | 2015-03-25 04:00:00,23.3865853658537 646 | 2015-03-25 05:00:00,20.8534072900158 647 | 2015-03-25 06:00:00,23.74617196702 648 | 2015-03-25 07:00:00,26.0441102756892 649 | 2015-03-25 08:00:00,26.6099603349493 650 | 2015-03-25 09:00:00,29.8168631006346 651 | 2015-03-25 10:00:00,28.4230944774076 652 | 2015-03-25 11:00:00,24.8701176470588 653 | 2015-03-25 12:00:00,28.9628780934922 654 | 2015-03-25 13:00:00,24.5961373390558 655 | 2015-03-25 14:00:00,25.3090047393365 656 | 2015-03-25 15:00:00,25.0683073832245 657 | 2015-03-25 16:00:00,23.8601784955376 658 | 2015-03-25 17:00:00,21.6362520458265 659 | 2015-03-25 18:00:00,22.6860413914575 660 | 2015-03-25 19:00:00,20.0084830339321 661 | 2015-03-25 20:00:00,19.3349120433018 662 | 2015-03-25 21:00:00,19.1688741721854 663 | 2015-03-25 22:00:00,16.9904458598726 664 | 2015-03-25 23:00:00,19.6778523489933 665 | 2015-03-26 00:00:00,17.9490909090909 666 | 2015-03-26 01:00:00,16.2067307692308 667 | 2015-03-26 02:00:00,20.0844594594595 668 | 2015-03-26 03:00:00,19.1165254237288 669 | 2015-03-26 04:00:00,22.1415384615385 670 | 2015-03-26 05:00:00,26.7084048027444 671 | 2015-03-26 06:00:00,24.1033415841584 672 | 2015-03-26 07:00:00,25.2360909530721 673 | 2015-03-26 08:00:00,27.3369928400955 674 | 2015-03-26 09:00:00,28.4140156754265 675 | 2015-03-26 10:00:00,24.100185528757 676 | 2015-03-26 11:00:00,28.0331065759637 677 | 2015-03-26 12:00:00,26.8276604911676 678 | 2015-03-26 13:00:00,24.8519332161687 679 | 2015-03-26 14:00:00,27.9479214253084 680 | 2015-03-26 15:00:00,25.4592881521209 681 | 2015-03-26 16:00:00,24.6757904672015 682 | 2015-03-26 17:00:00,23.2258628221931 683 | 2015-03-26 18:00:00,21.0138376383764 684 | 2015-03-26 19:00:00,18.9338624338624 685 | 2015-03-26 20:00:00,21.3146802325581 686 | 2015-03-26 21:00:00,20.2665941240479 687 | 2015-03-26 22:00:00,23.1523178807947 688 | 2015-03-26 23:00:00,27.9404255319149 689 | 2015-03-27 00:00:00,20.8920187793427 690 | 2015-03-27 01:00:00,17.0045662100457 691 | 2015-03-27 02:00:00,15.7035830618893 692 | 2015-03-27 03:00:00,17.4060606060606 693 | 2015-03-27 04:00:00,20.2689573459716 694 | 2015-03-27 05:00:00,24.8573596358118 695 | 2015-03-27 06:00:00,24.5608938547486 696 | 2015-03-27 07:00:00,24.0371251784864 697 | 2015-03-27 08:00:00,25.0064367816092 698 | 2015-03-27 09:00:00,30.5513253012048 699 | 2015-03-27 10:00:00,27.0618875709128 700 | 2015-03-27 11:00:00,27.2254452926209 701 | 2015-03-27 12:00:00,26.504873294347 702 | 2015-03-27 13:00:00,23.9334001001502 703 | 2015-03-27 14:00:00,24.4025330396476 704 | 2015-03-27 15:00:00,24.6264336428181 705 | 2015-03-27 16:00:00,24.3900370959194 706 | 2015-03-27 17:00:00,22.7385218365062 707 | 2015-03-27 18:00:00,23.4178119846069 708 | 2015-03-27 19:00:00,20.4463311097635 709 | 2015-03-27 20:00:00,19.6402877697842 710 | 2015-03-27 21:00:00,17.4593301435407 711 | 2015-03-27 22:00:00,18.0579710144928 712 | 2015-03-27 23:00:00,22.7525252525253 713 | 2015-03-28 00:00:00,20.9364548494983 714 | 2015-03-28 01:00:00,15.0909090909091 715 | 2015-03-28 02:00:00,15.4363636363636 716 | 2015-03-28 03:00:00,19.2470588235294 717 | 2015-03-28 04:00:00,20.7334437086093 718 | 2015-03-28 05:00:00,20.779744346116 719 | 2015-03-28 06:00:00,23.7188146106134 720 | 2015-03-28 07:00:00,23.9590608067429 721 | 2015-03-28 08:00:00,23.6607422895975 722 | 2015-03-28 09:00:00,23.8080333854982 723 | 2015-03-28 10:00:00,25.1625329815303 724 | 2015-03-28 11:00:00,24.1467081389321 725 | 2015-03-28 12:00:00,27.2292011019284 726 | 2015-03-28 13:00:00,26.2145127118644 727 | 2015-03-28 14:00:00,22.1184558434691 728 | 2015-03-28 15:00:00,24.7711962833914 729 | 2015-03-28 16:00:00,21.2360069244085 730 | 2015-03-28 17:00:00,18.9884918231375 731 | 2015-03-28 18:00:00,19.8089552238806 732 | 2015-03-28 19:00:00,21.0963696369637 733 | 2015-03-28 20:00:00,16.9170274170274 734 | 2015-03-28 21:00:00,16.0174311926605 735 | 2015-03-28 22:00:00,18.3006872852234 736 | 2015-03-28 23:00:00,32.7559523809524 737 | 2015-03-29 00:00:00,27.5529953917051 738 | 2015-03-29 01:00:00,24.4081632653061 739 | 2015-03-29 02:00:00,23.734375 740 | 2015-03-29 03:00:00,21.4514767932489 741 | 2015-03-29 04:00:00,16.919587628866 742 | 2015-03-29 05:00:00,23.0262329485834 743 | 2015-03-29 06:00:00,22.6066763425254 744 | 2015-03-29 07:00:00,22.7418414918415 745 | 2015-03-29 08:00:00,23.4832251082251 746 | 2015-03-29 09:00:00,24.1176770249618 747 | 2015-03-29 10:00:00,25.9053921568627 748 | 2015-03-29 11:00:00,23.6878172588832 749 | 2015-03-29 12:00:00,22.6437768240343 750 | 2015-03-29 13:00:00,24.5553145336226 751 | 2015-03-29 14:00:00,27.0761501850873 752 | 2015-03-29 15:00:00,21.9830754352031 753 | 2015-03-29 16:00:00,22.2315837937385 754 | 2015-03-29 17:00:00,21.2672527472528 755 | 2015-03-29 18:00:00,21.3931902985075 756 | 2015-03-29 19:00:00,19.5539702233251 757 | 2015-03-29 20:00:00,23.5206124852768 758 | 2015-03-29 21:00:00,27.3165467625899 759 | 2015-03-29 22:00:00,21.8833333333333 760 | 2015-03-29 23:00:00,19.547263681592 761 | 2015-03-30 00:00:00,31.5057471264368 762 | 2015-03-30 01:00:00,22.9677419354839 763 | 2015-03-30 02:00:00,17.241935483871 764 | 2015-03-30 03:00:00,21.8156028368794 765 | 2015-03-30 04:00:00,19.6824512534819 766 | 2015-03-30 05:00:00,23.3267918088737 767 | 2015-03-30 06:00:00,23.4364381956649 768 | -------------------------------------------------------------------------------- /man/AnomalyDetection.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/AnomalyDetection-package.R 3 | \docType{package} 4 | \name{AnomalyDetection} 5 | \alias{AnomalyDetection} 6 | \alias{AnomalyDetection-package} 7 | \title{Anomaly Detection Using Seasonal Hybrid Extreme Studentized Deviate Test} 8 | \description{ 9 | A technique for detecting anomalies in seasonal univariate time series. 10 | The methods uses are robust, from a statistical standpoint, in the presence of 11 | seasonality and an underlying trend. These methods can be used in 12 | wide variety of contexts. For example, detecting anomalies in system metrics after 13 | a new software release, user engagement post an 'A/B' test, or for problems in 14 | econometrics, financial engineering, political and social sciences. 15 | } 16 | \author{ 17 | Owen S. Vallis, Jordan Hochenbaum, Arun Kejariwal; Modernization 18 | contributions by Bob Rudis 19 | } 20 | -------------------------------------------------------------------------------- /man/AnomalyDetectionTs.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/ts_anom_detection.R 3 | \name{AnomalyDetectionTs} 4 | \alias{AnomalyDetectionTs} 5 | \alias{ad_ts} 6 | \title{Anomaly Detection Using Seasonal Hybrid ESD Test} 7 | \usage{ 8 | AnomalyDetectionTs(x, max_anoms = 0.1, direction = "pos", alpha = 0.05, 9 | only_last = NULL, threshold = "None", e_value = FALSE, 10 | longterm = FALSE, piecewise_median_period_weeks = 2, verbose = FALSE, 11 | na.rm = FALSE) 12 | 13 | ad_ts(x, max_anoms = 0.1, direction = "pos", alpha = 0.05, 14 | only_last = NULL, threshold = "None", e_value = FALSE, 15 | longterm = FALSE, piecewise_median_period_weeks = 2, verbose = FALSE, 16 | na.rm = FALSE) 17 | } 18 | \arguments{ 19 | \item{x}{Time series as a two column data frame where the first column consists of the 20 | timestamps and the second column consists of the observations.} 21 | 22 | \item{max_anoms}{Maximum number of anomalies that S-H-ESD will detect as a percentage of the 23 | data.} 24 | 25 | \item{direction}{Directionality of the anomalies to be detected. One of: 26 | \code{pos}, \code{neg}, \code{both}.} 27 | 28 | \item{alpha}{The level of statistical significance with which to accept or reject anomalies.} 29 | 30 | \item{only_last}{Find and report anomalies only within the last day or hr in the time seriess. 31 | One of \code{NULL}, \code{day}, \code{hr}.} 32 | 33 | \item{threshold}{Only report positive going anoms above the threshold specified. One of: 34 | \code{None}, \code{med_max}, \code{p95}, \code{p99}.} 35 | 36 | \item{e_value}{Add an additional column to the anoms output containing the expected value.} 37 | 38 | \item{longterm}{Increase anom detection efficacy for time series that are greater than a month. 39 | See `Details`` below.} 40 | 41 | \item{piecewise_median_period_weeks}{The piecewise median time window as described in Vallis, 42 | Hochenbaum, and Kejariwal (2014). Defaults to 2.} 43 | 44 | \item{verbose}{Enable debug messages} 45 | 46 | \item{na.rm}{Remove any NAs in timestamps.(default: \code{FALSE})} 47 | } 48 | \value{ 49 | The returned value is a data frame containing timestamps, values, 50 | and optionally expected values. 51 | } 52 | \description{ 53 | A technique for detecting anomalies in seasonal univariate time series where the input is a 54 | series of pairs. 55 | } 56 | \examples{ 57 | data(raw_data) 58 | 59 | ad_ts(raw_data, max_anoms=0.02, direction='both') 60 | 61 | # To detect only the anomalies on the last day, run the following: 62 | 63 | ad_ts(raw_data, max_anoms=0.02, direction='both', only_last="day") 64 | } 65 | \references{ 66 | \itemize{ 67 | \item Vallis, O., Hochenbaum, J. and Kejariwal, A., (2014) 68 | "A Novel Technique for Long-Term Anomaly Detection in the Cloud", 6th USENIX, Philadelphia, PA. 69 | (\url{https://www.usenix.org/system/files/conference/hotcloud14/hotcloud14-vallis.pdf}) 70 | \item Rosner, B., (May 1983), "Percentage Points for a Generalized ESD Many-Outlier 71 | Procedure", Technometrics, 25(2), pp. 165-172. (\url{https://www.jstor.org/stable/1268549}) 72 | } 73 | } 74 | \seealso{ 75 | \code{\link[=ad_vec]{ad_vec()}} 76 | } 77 | -------------------------------------------------------------------------------- /man/AnomalyDetectionVec.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/vec_anom_detection.R 3 | \name{AnomalyDetectionVec} 4 | \alias{AnomalyDetectionVec} 5 | \alias{ad_vec} 6 | \title{Anomaly Detection Using Seasonal Hybrid ESD Test} 7 | \usage{ 8 | AnomalyDetectionVec(x, max_anoms = 0.1, direction = "pos", alpha = 0.05, 9 | period = NULL, only_last = FALSE, threshold = "None", e_value = FALSE, 10 | longterm_period = NULL, verbose = FALSE) 11 | 12 | ad_vec(x, max_anoms = 0.1, direction = "pos", alpha = 0.05, 13 | period = NULL, only_last = FALSE, threshold = "None", e_value = FALSE, 14 | longterm_period = NULL, verbose = FALSE) 15 | } 16 | \arguments{ 17 | \item{x}{Time series as a column data frame, list, or vector, where the column consists of 18 | the observations.} 19 | 20 | \item{max_anoms}{Maximum number of anomalies that S-H-ESD will detect as a percentage of the 21 | data.} 22 | 23 | \item{direction}{Directionality of the anomalies to be detected. One of: 24 | \code{pos}, \code{neg}, \code{both}.} 25 | 26 | \item{alpha}{The level of statistical significance with which to accept or reject anomalies.} 27 | 28 | \item{period}{Defines the number of observations in a single period, and used during seasonal 29 | decomposition.} 30 | 31 | \item{only_last}{Find and report anomalies only within the last period in the time series.} 32 | 33 | \item{threshold}{Only report positive going anoms above the threshold specified. One of: 34 | \code{None}, \code{med_max}, \code{p95}, \code{p99}.} 35 | 36 | \item{e_value}{Add an additional column to the anoms output containing the expected value.} 37 | 38 | \item{longterm_period}{Defines the number of observations for which the trend can be considered 39 | flat. The value should be an integer multiple of the number of observations in a single period. 40 | This increases anom detection efficacy for time series that are greater than a month.} 41 | 42 | \item{verbose}{Enable debug messages} 43 | } 44 | \value{ 45 | The returned value is a list with the following components. 46 | 47 | Data frame containing index, values, and optionally expected values. 48 | } 49 | \description{ 50 | A technique for detecting anomalies in seasonal univariate time series where the input is a 51 | series of observations. 52 | } 53 | \examples{ 54 | data(raw_data) 55 | 56 | ad_vec(raw_data[,2], max_anoms=0.02, period=1440, direction='both') 57 | 58 | # To detect only the anomalies in the last period, run the following: 59 | 60 | ad_vec( 61 | raw_data[,2], max_anoms=0.02, period=1440, direction='both', only_last=TRUE 62 | ) 63 | } 64 | \references{ 65 | \itemize{ 66 | \item Vallis, O., Hochenbaum, J. and Kejariwal, A., (2014) 67 | "A Novel Technique for Long-Term Anomaly Detection in the Cloud", 6th USENIX, Philadelphia, PA. 68 | (\url{https://www.usenix.org/system/files/conference/hotcloud14/hotcloud14-vallis.pdf}) 69 | \item Rosner, B., (May 1983), "Percentage Points for a Generalized ESD Many-Outlier 70 | Procedure", Technometrics, 25(2), pp. 165-172. (\url{https://www.jstor.org/stable/1268549}) 71 | } 72 | } 73 | \seealso{ 74 | \code{\link[=ad_ts]{ad_ts()}} 75 | } 76 | -------------------------------------------------------------------------------- /man/raw_data.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/raw_data.R 3 | \docType{data} 4 | \name{raw_data} 5 | \alias{raw_data} 6 | \title{raw_data} 7 | \usage{ 8 | data(raw_data) 9 | } 10 | \description{ 11 | A data frame containing a time series with headings timestamp and count. 12 | } 13 | -------------------------------------------------------------------------------- /tests/testthat.R: -------------------------------------------------------------------------------- 1 | library('testthat') 2 | test_check("AnomalyDetection") 3 | -------------------------------------------------------------------------------- /tests/testthat/test-NAs.R: -------------------------------------------------------------------------------- 1 | library(AnomalyDetection) 2 | 3 | context("Testing NAs") 4 | 5 | test_that("check handling of datasets with leading and trailing NAs", { 6 | data(raw_data) 7 | raw_data[1:10, "count"] <- NA 8 | raw_data[length(raw_data[[2L]]), "count"] <- NA 9 | results <- AnomalyDetectionTs(raw_data, max_anoms=0.02, direction='both') 10 | expect_equal(length(results), 2) 11 | expect_equal(length(results[[2L]]), 131) 12 | }) 13 | 14 | test_that("check handling of datasets with NAs in the middle", { 15 | data(raw_data) 16 | raw_data[floor(length(raw_data[[2L]])/2), "count"] <- NA 17 | expect_error(AnomalyDetectionTs(raw_data, max_anoms=0.02, direction='both')) 18 | }) 19 | 20 | test_that("check handling of datasets with leading and trailing NAs", { 21 | data(raw_data) 22 | raw_data[1:10, "count"] <- NA 23 | raw_data[length(raw_data[[2L]]), "count"] <- NA 24 | results <- AnomalyDetectionVec(raw_data[[2]], max_anoms=0.02, period=1440, direction='both') 25 | expect_equal(length(results), 2) 26 | expect_equal(length(results[[2L]]), 131) 27 | }) 28 | 29 | test_that("check handling of datasets with NAs in the middle", { 30 | data(raw_data) 31 | raw_data[floor(length(raw_data[[2L]])/2), "count"] <- NA 32 | expect_error(AnomalyDetectionVec(raw_data[[2L]], max_anoms=0.02, 33 | period=1440, direction='both')) 34 | }) 35 | -------------------------------------------------------------------------------- /tests/testthat/test-edge.R: -------------------------------------------------------------------------------- 1 | library(AnomalyDetection) 2 | 3 | context("Testing edge cases") 4 | 5 | test_that("checking for errors if time series has constant value for all values", { 6 | data <- rep(1,1000) 7 | expect_true({AnomalyDetectionVec(data, period=14, direction='both'); TRUE}) 8 | 9 | }) 10 | 11 | test_that("checking that midnight dates get H%M%S format applied", { 12 | data_file <- system.file("extdata", "data.csv", package = "AnomalyDetection") 13 | data <- read.csv(data_file) 14 | data$date <- as.POSIXct(strptime(data$date, "%Y-%m-%d %H:%M", tz = "UTC")) 15 | anomalyDetectionResult <- AnomalyDetectionTs(data, max_anoms=0.2, threshold = "None", 16 | direction='both', 17 | only_last = "day", e_value = TRUE) 18 | 19 | expect_equal(length(anomalyDetectionResult$anoms), 20 | length(anomalyDetectionResult$expected_value)) 21 | }) 22 | -------------------------------------------------------------------------------- /tests/testthat/test-ts.R: -------------------------------------------------------------------------------- 1 | library(AnomalyDetection) 2 | context("Evaluation: AnomalyDetectionTs") 3 | 4 | test_that("last day, both directions", { 5 | data(raw_data) 6 | results <- AnomalyDetectionTs(raw_data, max_anoms=0.02, direction='both', only_last='day') 7 | expect_equal(length(results), 2) 8 | expect_equal(length(results[[2L]]), 25) 9 | }) 10 | 11 | test_that("both directions, e_value, with longterm", { 12 | data(raw_data) 13 | results <- AnomalyDetectionTs(raw_data, max_anoms=0.02, direction='both', longterm=TRUE, e_value=TRUE) 14 | expect_equal(length(results), 3) 15 | expect_equal(length(results[[2L]]), 131) 16 | }) 17 | 18 | test_that("both directions, e_value, threshold set to med_max", { 19 | data(raw_data) 20 | results <- AnomalyDetectionTs(raw_data, max_anoms=0.02, direction='both', threshold="med_max", e_value=TRUE) 21 | expect_equal(length(results), 3) 22 | expect_equal(length(results[[2L]]), 4) 23 | }) 24 | -------------------------------------------------------------------------------- /tests/testthat/test-vec.R: -------------------------------------------------------------------------------- 1 | library(AnomalyDetection) 2 | 3 | context("Evaluation: AnomalyDetectionVec") 4 | 5 | test_that("last period, both directions", { 6 | data(raw_data) 7 | results <- AnomalyDetectionVec(raw_data[[2L]], max_anoms=0.02, direction='both', period=1440, only_last=TRUE) 8 | expect_equal(length(results), 2) 9 | expect_equal(length(results[[2L]]), 25) 10 | }) 11 | 12 | test_that("both directions, e_value, with longterm", { 13 | data(raw_data) 14 | results <- AnomalyDetectionVec(raw_data[[2L]], max_anoms=0.02, direction='both', period=1440, longterm_period=1440*14, e_value=TRUE) 15 | expect_equal(length(results), 3) 16 | expect_equal(length(results[[2L]]), 131) 17 | }) 18 | 19 | test_that("both directions, e_value, threshold set to med_max", { 20 | data(raw_data) 21 | results <- AnomalyDetectionVec(raw_data[[2L]], max_anoms=0.02, direction='both', period=1440, threshold="med_max", e_value=TRUE) 22 | expect_equal(length(results), 3) 23 | expect_equal(length(results[[2L]]), 6) 24 | }) 25 | --------------------------------------------------------------------------------