├── .Rbuildignore ├── .gitignore ├── .travis.yml ├── DESCRIPTION ├── LICENSE ├── NAMESPACE ├── R ├── date_utils.R ├── detect_anoms.R ├── plot_utils.R ├── raw_data.R ├── ts_anom_detection.R └── vec_anom_detection.R ├── README.md ├── data └── raw_data.rda ├── figs ├── Fig1.png └── Fig2.png ├── inst └── extdata │ └── data.csv ├── man ├── AnomalyDetectionTs.Rd ├── AnomalyDetectionVec.Rd └── raw_data.Rd └── tests ├── testthat.R └── testthat ├── test-NAs.R ├── test-edge.R ├── test-ts.R └── test-vec.R /.Rbuildignore: -------------------------------------------------------------------------------- 1 | LICENSE 2 | .travis.yml 3 | ^.*\.Rproj$ 4 | ^\.Rproj\.user$ 5 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # History files 2 | .Rhistory 3 | 4 | # Example code in package build process 5 | *-Ex.R 6 | 7 | # R data files from past sessions 8 | .Rdata 9 | 10 | # RStudio files 11 | .Rproj.user/ 12 | .Rproj.user 13 | *.Rproj 14 | 15 | -------------------------------------------------------------------------------- /.travis.yml: -------------------------------------------------------------------------------- 1 | # Sample .travis.yml for R projects. 2 | # 3 | # See https://github.com/craigcitro/r-travis 4 | # https://github.com/eddelbuettel/r-travis/ 5 | 6 | language: c 7 | 8 | sudo: true 9 | 10 | env: 11 | global: 12 | - _R_CHECK_FORCE_SUGGESTS_=FALSE 13 | 14 | before_install: 15 | - curl -OL http://raw.github.com/craigcitro/r-travis/master/scripts/travis-tool.sh 16 | - chmod 755 ./travis-tool.sh 17 | - ./travis-tool.sh bootstrap 18 | 19 | install: 20 | - ./travis-tool.sh install_r Rcpp ggplot2 stringr lubridate testthat 21 | 22 | script: 23 | - ./travis-tool.sh run_tests 24 | 25 | notifications: 26 | email: 27 | on_success: change 28 | on_failure: change 29 | 30 | sudo: true 31 | -------------------------------------------------------------------------------- /DESCRIPTION: -------------------------------------------------------------------------------- 1 | Package: AnomalyDetection 2 | Type: Package 3 | Title: Anomaly Detection Using Seasonal Hybrid Extreme Studentized 4 | Deviate Test 5 | Version: 1.0 6 | Date: 2014-09-24 7 | Author: Owen S. Vallis, Jordan Hochenbaum, Arun Kejariwal 8 | Maintainer: Owen S. Vallis , Jordan Hochenbaum 9 | 10 | Description: A technique for detecting anomalies in seasonal univariate time 11 | series. 12 | ByteCompile: yes 13 | Imports: ggplot2, stringr, lubridate 14 | Depends: R (>= 2.10.0) 15 | Suggests: testthat 16 | License: GPL-3 17 | LazyData: true 18 | Packaged: 2014-12-08 07:54:07 UTC; owenvallis 19 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | GNU GENERAL PUBLIC LICENSE 2 | Version 3, 29 June 2007 3 | 4 | Copyright (C) 2007 Free Software Foundation, Inc. 5 | Everyone is permitted to copy and distribute verbatim copies 6 | of this license document, but changing it is not allowed. 7 | 8 | Preamble 9 | 10 | The GNU General Public License is a free, copyleft license for 11 | software and other kinds of works. 12 | 13 | The licenses for most software and other practical works are designed 14 | to take away your freedom to share and change the works. By contrast, 15 | the GNU General Public License is intended to guarantee your freedom to 16 | share and change all versions of a program--to make sure it remains free 17 | software for all its users. We, the Free Software Foundation, use the 18 | GNU General Public License for most of our software; it applies also to 19 | any other work released this way by its authors. You can apply it to 20 | your programs, too. 21 | 22 | When we speak of free software, we are referring to freedom, not 23 | price. Our General Public Licenses are designed to make sure that you 24 | have the freedom to distribute copies of free software (and charge for 25 | them if you wish), that you receive source code or can get it if you 26 | want it, that you can change the software or use pieces of it in new 27 | free programs, and that you know you can do these things. 28 | 29 | To protect your rights, we need to prevent others from denying you 30 | these rights or asking you to surrender the rights. Therefore, you have 31 | certain responsibilities if you distribute copies of the software, or if 32 | you modify it: responsibilities to respect the freedom of others. 33 | 34 | For example, if you distribute copies of such a program, whether 35 | gratis or for a fee, you must pass on to the recipients the same 36 | freedoms that you received. You must make sure that they, too, receive 37 | or can get the source code. And you must show them these terms so they 38 | know their rights. 39 | 40 | Developers that use the GNU GPL protect your rights with two steps: 41 | (1) assert copyright on the software, and (2) offer you this License 42 | giving you legal permission to copy, distribute and/or modify it. 43 | 44 | For the developers' and authors' protection, the GPL clearly explains 45 | that there is no warranty for this free software. For both users' and 46 | authors' sake, the GPL requires that modified versions be marked as 47 | changed, so that their problems will not be attributed erroneously to 48 | authors of previous versions. 49 | 50 | Some devices are designed to deny users access to install or run 51 | modified versions of the software inside them, although the manufacturer 52 | can do so. This is fundamentally incompatible with the aim of 53 | protecting users' freedom to change the software. The systematic 54 | pattern of such abuse occurs in the area of products for individuals to 55 | use, which is precisely where it is most unacceptable. Therefore, we 56 | have designed this version of the GPL to prohibit the practice for those 57 | products. If such problems arise substantially in other domains, we 58 | stand ready to extend this provision to those domains in future versions 59 | of the GPL, as needed to protect the freedom of users. 60 | 61 | Finally, every program is threatened constantly by software patents. 62 | States should not allow patents to restrict development and use of 63 | software on general-purpose computers, but in those that do, we wish to 64 | avoid the special danger that patents applied to a free program could 65 | make it effectively proprietary. To prevent this, the GPL assures that 66 | patents cannot be used to render the program non-free. 67 | 68 | The precise terms and conditions for copying, distribution and 69 | modification follow. 70 | 71 | TERMS AND CONDITIONS 72 | 73 | 0. Definitions. 74 | 75 | "This License" refers to version 3 of the GNU General Public License. 76 | 77 | "Copyright" also means copyright-like laws that apply to other kinds of 78 | works, such as semiconductor masks. 79 | 80 | "The Program" refers to any copyrightable work licensed under this 81 | License. Each licensee is addressed as "you". "Licensees" and 82 | "recipients" may be individuals or organizations. 83 | 84 | To "modify" a work means to copy from or adapt all or part of the work 85 | in a fashion requiring copyright permission, other than the making of an 86 | exact copy. The resulting work is called a "modified version" of the 87 | earlier work or a work "based on" the earlier work. 88 | 89 | A "covered work" means either the unmodified Program or a work based 90 | on the Program. 91 | 92 | To "propagate" a work means to do anything with it that, without 93 | permission, would make you directly or secondarily liable for 94 | infringement under applicable copyright law, except executing it on a 95 | computer or modifying a private copy. Propagation includes copying, 96 | distribution (with or without modification), making available to the 97 | public, and in some countries other activities as well. 98 | 99 | To "convey" a work means any kind of propagation that enables other 100 | parties to make or receive copies. Mere interaction with a user through 101 | a computer network, with no transfer of a copy, is not conveying. 102 | 103 | An interactive user interface displays "Appropriate Legal Notices" 104 | to the extent that it includes a convenient and prominently visible 105 | feature that (1) displays an appropriate copyright notice, and (2) 106 | tells the user that there is no warranty for the work (except to the 107 | extent that warranties are provided), that licensees may convey the 108 | work under this License, and how to view a copy of this License. If 109 | the interface presents a list of user commands or options, such as a 110 | menu, a prominent item in the list meets this criterion. 111 | 112 | 1. Source Code. 113 | 114 | The "source code" for a work means the preferred form of the work 115 | for making modifications to it. "Object code" means any non-source 116 | form of a work. 117 | 118 | A "Standard Interface" means an interface that either is an official 119 | standard defined by a recognized standards body, or, in the case of 120 | interfaces specified for a particular programming language, one that 121 | is widely used among developers working in that language. 122 | 123 | The "System Libraries" of an executable work include anything, other 124 | than the work as a whole, that (a) is included in the normal form of 125 | packaging a Major Component, but which is not part of that Major 126 | Component, and (b) serves only to enable use of the work with that 127 | Major Component, or to implement a Standard Interface for which an 128 | implementation is available to the public in source code form. A 129 | "Major Component", in this context, means a major essential component 130 | (kernel, window system, and so on) of the specific operating system 131 | (if any) on which the executable work runs, or a compiler used to 132 | produce the work, or an object code interpreter used to run it. 133 | 134 | The "Corresponding Source" for a work in object code form means all 135 | the source code needed to generate, install, and (for an executable 136 | work) run the object code and to modify the work, including scripts to 137 | control those activities. However, it does not include the work's 138 | System Libraries, or general-purpose tools or generally available free 139 | programs which are used unmodified in performing those activities but 140 | which are not part of the work. For example, Corresponding Source 141 | includes interface definition files associated with source files for 142 | the work, and the source code for shared libraries and dynamically 143 | linked subprograms that the work is specifically designed to require, 144 | such as by intimate data communication or control flow between those 145 | subprograms and other parts of the work. 146 | 147 | The Corresponding Source need not include anything that users 148 | can regenerate automatically from other parts of the Corresponding 149 | Source. 150 | 151 | The Corresponding Source for a work in source code form is that 152 | same work. 153 | 154 | 2. Basic Permissions. 155 | 156 | All rights granted under this License are granted for the term of 157 | copyright on the Program, and are irrevocable provided the stated 158 | conditions are met. This License explicitly affirms your unlimited 159 | permission to run the unmodified Program. The output from running a 160 | covered work is covered by this License only if the output, given its 161 | content, constitutes a covered work. This License acknowledges your 162 | rights of fair use or other equivalent, as provided by copyright law. 163 | 164 | You may make, run and propagate covered works that you do not 165 | convey, without conditions so long as your license otherwise remains 166 | in force. You may convey covered works to others for the sole purpose 167 | of having them make modifications exclusively for you, or provide you 168 | with facilities for running those works, provided that you comply with 169 | the terms of this License in conveying all material for which you do 170 | not control copyright. Those thus making or running the covered works 171 | for you must do so exclusively on your behalf, under your direction 172 | and control, on terms that prohibit them from making any copies of 173 | your copyrighted material outside their relationship with you. 174 | 175 | Conveying under any other circumstances is permitted solely under 176 | the conditions stated below. Sublicensing is not allowed; section 10 177 | makes it unnecessary. 178 | 179 | 3. Protecting Users' Legal Rights From Anti-Circumvention Law. 180 | 181 | No covered work shall be deemed part of an effective technological 182 | measure under any applicable law fulfilling obligations under article 183 | 11 of the WIPO copyright treaty adopted on 20 December 1996, or 184 | similar laws prohibiting or restricting circumvention of such 185 | measures. 186 | 187 | When you convey a covered work, you waive any legal power to forbid 188 | circumvention of technological measures to the extent such circumvention 189 | is effected by exercising rights under this License with respect to 190 | the covered work, and you disclaim any intention to limit operation or 191 | modification of the work as a means of enforcing, against the work's 192 | users, your or third parties' legal rights to forbid circumvention of 193 | technological measures. 194 | 195 | 4. Conveying Verbatim Copies. 196 | 197 | You may convey verbatim copies of the Program's source code as you 198 | receive it, in any medium, provided that you conspicuously and 199 | appropriately publish on each copy an appropriate copyright notice; 200 | keep intact all notices stating that this License and any 201 | non-permissive terms added in accord with section 7 apply to the code; 202 | keep intact all notices of the absence of any warranty; and give all 203 | recipients a copy of this License along with the Program. 204 | 205 | You may charge any price or no price for each copy that you convey, 206 | and you may offer support or warranty protection for a fee. 207 | 208 | 5. Conveying Modified Source Versions. 209 | 210 | You may convey a work based on the Program, or the modifications to 211 | produce it from the Program, in the form of source code under the 212 | terms of section 4, provided that you also meet all of these conditions: 213 | 214 | a) The work must carry prominent notices stating that you modified 215 | it, and giving a relevant date. 216 | 217 | b) The work must carry prominent notices stating that it is 218 | released under this License and any conditions added under section 219 | 7. This requirement modifies the requirement in section 4 to 220 | "keep intact all notices". 221 | 222 | c) You must license the entire work, as a whole, under this 223 | License to anyone who comes into possession of a copy. This 224 | License will therefore apply, along with any applicable section 7 225 | additional terms, to the whole of the work, and all its parts, 226 | regardless of how they are packaged. This License gives no 227 | permission to license the work in any other way, but it does not 228 | invalidate such permission if you have separately received it. 229 | 230 | d) If the work has interactive user interfaces, each must display 231 | Appropriate Legal Notices; however, if the Program has interactive 232 | interfaces that do not display Appropriate Legal Notices, your 233 | work need not make them do so. 234 | 235 | A compilation of a covered work with other separate and independent 236 | works, which are not by their nature extensions of the covered work, 237 | and which are not combined with it such as to form a larger program, 238 | in or on a volume of a storage or distribution medium, is called an 239 | "aggregate" if the compilation and its resulting copyright are not 240 | used to limit the access or legal rights of the compilation's users 241 | beyond what the individual works permit. Inclusion of a covered work 242 | in an aggregate does not cause this License to apply to the other 243 | parts of the aggregate. 244 | 245 | 6. Conveying Non-Source Forms. 246 | 247 | You may convey a covered work in object code form under the terms 248 | of sections 4 and 5, provided that you also convey the 249 | machine-readable Corresponding Source under the terms of this License, 250 | in one of these ways: 251 | 252 | a) Convey the object code in, or embodied in, a physical product 253 | (including a physical distribution medium), accompanied by the 254 | Corresponding Source fixed on a durable physical medium 255 | customarily used for software interchange. 256 | 257 | b) Convey the object code in, or embodied in, a physical product 258 | (including a physical distribution medium), accompanied by a 259 | written offer, valid for at least three years and valid for as 260 | long as you offer spare parts or customer support for that product 261 | model, to give anyone who possesses the object code either (1) a 262 | copy of the Corresponding Source for all the software in the 263 | product that is covered by this License, on a durable physical 264 | medium customarily used for software interchange, for a price no 265 | more than your reasonable cost of physically performing this 266 | conveying of source, or (2) access to copy the 267 | Corresponding Source from a network server at no charge. 268 | 269 | c) Convey individual copies of the object code with a copy of the 270 | written offer to provide the Corresponding Source. This 271 | alternative is allowed only occasionally and noncommercially, and 272 | only if you received the object code with such an offer, in accord 273 | with subsection 6b. 274 | 275 | d) Convey the object code by offering access from a designated 276 | place (gratis or for a charge), and offer equivalent access to the 277 | Corresponding Source in the same way through the same place at no 278 | further charge. You need not require recipients to copy the 279 | Corresponding Source along with the object code. If the place to 280 | copy the object code is a network server, the Corresponding Source 281 | may be on a different server (operated by you or a third party) 282 | that supports equivalent copying facilities, provided you maintain 283 | clear directions next to the object code saying where to find the 284 | Corresponding Source. Regardless of what server hosts the 285 | Corresponding Source, you remain obligated to ensure that it is 286 | available for as long as needed to satisfy these requirements. 287 | 288 | e) Convey the object code using peer-to-peer transmission, provided 289 | you inform other peers where the object code and Corresponding 290 | Source of the work are being offered to the general public at no 291 | charge under subsection 6d. 292 | 293 | A separable portion of the object code, whose source code is excluded 294 | from the Corresponding Source as a System Library, need not be 295 | included in conveying the object code work. 296 | 297 | A "User Product" is either (1) a "consumer product", which means any 298 | tangible personal property which is normally used for personal, family, 299 | or household purposes, or (2) anything designed or sold for incorporation 300 | into a dwelling. In determining whether a product is a consumer product, 301 | doubtful cases shall be resolved in favor of coverage. For a particular 302 | product received by a particular user, "normally used" refers to a 303 | typical or common use of that class of product, regardless of the status 304 | of the particular user or of the way in which the particular user 305 | actually uses, or expects or is expected to use, the product. A product 306 | is a consumer product regardless of whether the product has substantial 307 | commercial, industrial or non-consumer uses, unless such uses represent 308 | the only significant mode of use of the product. 309 | 310 | "Installation Information" for a User Product means any methods, 311 | procedures, authorization keys, or other information required to install 312 | and execute modified versions of a covered work in that User Product from 313 | a modified version of its Corresponding Source. The information must 314 | suffice to ensure that the continued functioning of the modified object 315 | code is in no case prevented or interfered with solely because 316 | modification has been made. 317 | 318 | If you convey an object code work under this section in, or with, or 319 | specifically for use in, a User Product, and the conveying occurs as 320 | part of a transaction in which the right of possession and use of the 321 | User Product is transferred to the recipient in perpetuity or for a 322 | fixed term (regardless of how the transaction is characterized), the 323 | Corresponding Source conveyed under this section must be accompanied 324 | by the Installation Information. But this requirement does not apply 325 | if neither you nor any third party retains the ability to install 326 | modified object code on the User Product (for example, the work has 327 | been installed in ROM). 328 | 329 | The requirement to provide Installation Information does not include a 330 | requirement to continue to provide support service, warranty, or updates 331 | for a work that has been modified or installed by the recipient, or for 332 | the User Product in which it has been modified or installed. Access to a 333 | network may be denied when the modification itself materially and 334 | adversely affects the operation of the network or violates the rules and 335 | protocols for communication across the network. 336 | 337 | Corresponding Source conveyed, and Installation Information provided, 338 | in accord with this section must be in a format that is publicly 339 | documented (and with an implementation available to the public in 340 | source code form), and must require no special password or key for 341 | unpacking, reading or copying. 342 | 343 | 7. Additional Terms. 344 | 345 | "Additional permissions" are terms that supplement the terms of this 346 | License by making exceptions from one or more of its conditions. 347 | Additional permissions that are applicable to the entire Program shall 348 | be treated as though they were included in this License, to the extent 349 | that they are valid under applicable law. If additional permissions 350 | apply only to part of the Program, that part may be used separately 351 | under those permissions, but the entire Program remains governed by 352 | this License without regard to the additional permissions. 353 | 354 | When you convey a copy of a covered work, you may at your option 355 | remove any additional permissions from that copy, or from any part of 356 | it. (Additional permissions may be written to require their own 357 | removal in certain cases when you modify the work.) You may place 358 | additional permissions on material, added by you to a covered work, 359 | for which you have or can give appropriate copyright permission. 360 | 361 | Notwithstanding any other provision of this License, for material you 362 | add to a covered work, you may (if authorized by the copyright holders of 363 | that material) supplement the terms of this License with terms: 364 | 365 | a) Disclaiming warranty or limiting liability differently from the 366 | terms of sections 15 and 16 of this License; or 367 | 368 | b) Requiring preservation of specified reasonable legal notices or 369 | author attributions in that material or in the Appropriate Legal 370 | Notices displayed by works containing it; or 371 | 372 | c) Prohibiting misrepresentation of the origin of that material, or 373 | requiring that modified versions of such material be marked in 374 | reasonable ways as different from the original version; or 375 | 376 | d) Limiting the use for publicity purposes of names of licensors or 377 | authors of the material; or 378 | 379 | e) Declining to grant rights under trademark law for use of some 380 | trade names, trademarks, or service marks; or 381 | 382 | f) Requiring indemnification of licensors and authors of that 383 | material by anyone who conveys the material (or modified versions of 384 | it) with contractual assumptions of liability to the recipient, for 385 | any liability that these contractual assumptions directly impose on 386 | those licensors and authors. 387 | 388 | All other non-permissive additional terms are considered "further 389 | restrictions" within the meaning of section 10. If the Program as you 390 | received it, or any part of it, contains a notice stating that it is 391 | governed by this License along with a term that is a further 392 | restriction, you may remove that term. If a license document contains 393 | a further restriction but permits relicensing or conveying under this 394 | License, you may add to a covered work material governed by the terms 395 | of that license document, provided that the further restriction does 396 | not survive such relicensing or conveying. 397 | 398 | If you add terms to a covered work in accord with this section, you 399 | must place, in the relevant source files, a statement of the 400 | additional terms that apply to those files, or a notice indicating 401 | where to find the applicable terms. 402 | 403 | Additional terms, permissive or non-permissive, may be stated in the 404 | form of a separately written license, or stated as exceptions; 405 | the above requirements apply either way. 406 | 407 | 8. Termination. 408 | 409 | You may not propagate or modify a covered work except as expressly 410 | provided under this License. Any attempt otherwise to propagate or 411 | modify it is void, and will automatically terminate your rights under 412 | this License (including any patent licenses granted under the third 413 | paragraph of section 11). 414 | 415 | However, if you cease all violation of this License, then your 416 | license from a particular copyright holder is reinstated (a) 417 | provisionally, unless and until the copyright holder explicitly and 418 | finally terminates your license, and (b) permanently, if the copyright 419 | holder fails to notify you of the violation by some reasonable means 420 | prior to 60 days after the cessation. 421 | 422 | Moreover, your license from a particular copyright holder is 423 | reinstated permanently if the copyright holder notifies you of the 424 | violation by some reasonable means, this is the first time you have 425 | received notice of violation of this License (for any work) from that 426 | copyright holder, and you cure the violation prior to 30 days after 427 | your receipt of the notice. 428 | 429 | Termination of your rights under this section does not terminate the 430 | licenses of parties who have received copies or rights from you under 431 | this License. If your rights have been terminated and not permanently 432 | reinstated, you do not qualify to receive new licenses for the same 433 | material under section 10. 434 | 435 | 9. Acceptance Not Required for Having Copies. 436 | 437 | You are not required to accept this License in order to receive or 438 | run a copy of the Program. Ancillary propagation of a covered work 439 | occurring solely as a consequence of using peer-to-peer transmission 440 | to receive a copy likewise does not require acceptance. However, 441 | nothing other than this License grants you permission to propagate or 442 | modify any covered work. These actions infringe copyright if you do 443 | not accept this License. Therefore, by modifying or propagating a 444 | covered work, you indicate your acceptance of this License to do so. 445 | 446 | 10. Automatic Licensing of Downstream Recipients. 447 | 448 | Each time you convey a covered work, the recipient automatically 449 | receives a license from the original licensors, to run, modify and 450 | propagate that work, subject to this License. You are not responsible 451 | for enforcing compliance by third parties with this License. 452 | 453 | An "entity transaction" is a transaction transferring control of an 454 | organization, or substantially all assets of one, or subdividing an 455 | organization, or merging organizations. If propagation of a covered 456 | work results from an entity transaction, each party to that 457 | transaction who receives a copy of the work also receives whatever 458 | licenses to the work the party's predecessor in interest had or could 459 | give under the previous paragraph, plus a right to possession of the 460 | Corresponding Source of the work from the predecessor in interest, if 461 | the predecessor has it or can get it with reasonable efforts. 462 | 463 | You may not impose any further restrictions on the exercise of the 464 | rights granted or affirmed under this License. For example, you may 465 | not impose a license fee, royalty, or other charge for exercise of 466 | rights granted under this License, and you may not initiate litigation 467 | (including a cross-claim or counterclaim in a lawsuit) alleging that 468 | any patent claim is infringed by making, using, selling, offering for 469 | sale, or importing the Program or any portion of it. 470 | 471 | 11. Patents. 472 | 473 | A "contributor" is a copyright holder who authorizes use under this 474 | License of the Program or a work on which the Program is based. The 475 | work thus licensed is called the contributor's "contributor version". 476 | 477 | A contributor's "essential patent claims" are all patent claims 478 | owned or controlled by the contributor, whether already acquired or 479 | hereafter acquired, that would be infringed by some manner, permitted 480 | by this License, of making, using, or selling its contributor version, 481 | but do not include claims that would be infringed only as a 482 | consequence of further modification of the contributor version. For 483 | purposes of this definition, "control" includes the right to grant 484 | patent sublicenses in a manner consistent with the requirements of 485 | this License. 486 | 487 | Each contributor grants you a non-exclusive, worldwide, royalty-free 488 | patent license under the contributor's essential patent claims, to 489 | make, use, sell, offer for sale, import and otherwise run, modify and 490 | propagate the contents of its contributor version. 491 | 492 | In the following three paragraphs, a "patent license" is any express 493 | agreement or commitment, however denominated, not to enforce a patent 494 | (such as an express permission to practice a patent or covenant not to 495 | sue for patent infringement). To "grant" such a patent license to a 496 | party means to make such an agreement or commitment not to enforce a 497 | patent against the party. 498 | 499 | If you convey a covered work, knowingly relying on a patent license, 500 | and the Corresponding Source of the work is not available for anyone 501 | to copy, free of charge and under the terms of this License, through a 502 | publicly available network server or other readily accessible means, 503 | then you must either (1) cause the Corresponding Source to be so 504 | available, or (2) arrange to deprive yourself of the benefit of the 505 | patent license for this particular work, or (3) arrange, in a manner 506 | consistent with the requirements of this License, to extend the patent 507 | license to downstream recipients. "Knowingly relying" means you have 508 | actual knowledge that, but for the patent license, your conveying the 509 | covered work in a country, or your recipient's use of the covered work 510 | in a country, would infringe one or more identifiable patents in that 511 | country that you have reason to believe are valid. 512 | 513 | If, pursuant to or in connection with a single transaction or 514 | arrangement, you convey, or propagate by procuring conveyance of, a 515 | covered work, and grant a patent license to some of the parties 516 | receiving the covered work authorizing them to use, propagate, modify 517 | or convey a specific copy of the covered work, then the patent license 518 | you grant is automatically extended to all recipients of the covered 519 | work and works based on it. 520 | 521 | A patent license is "discriminatory" if it does not include within 522 | the scope of its coverage, prohibits the exercise of, or is 523 | conditioned on the non-exercise of one or more of the rights that are 524 | specifically granted under this License. You may not convey a covered 525 | work if you are a party to an arrangement with a third party that is 526 | in the business of distributing software, under which you make payment 527 | to the third party based on the extent of your activity of conveying 528 | the work, and under which the third party grants, to any of the 529 | parties who would receive the covered work from you, a discriminatory 530 | patent license (a) in connection with copies of the covered work 531 | conveyed by you (or copies made from those copies), or (b) primarily 532 | for and in connection with specific products or compilations that 533 | contain the covered work, unless you entered into that arrangement, 534 | or that patent license was granted, prior to 28 March 2007. 535 | 536 | Nothing in this License shall be construed as excluding or limiting 537 | any implied license or other defenses to infringement that may 538 | otherwise be available to you under applicable patent law. 539 | 540 | 12. No Surrender of Others' Freedom. 541 | 542 | If conditions are imposed on you (whether by court order, agreement or 543 | otherwise) that contradict the conditions of this License, they do not 544 | excuse you from the conditions of this License. If you cannot convey a 545 | covered work so as to satisfy simultaneously your obligations under this 546 | License and any other pertinent obligations, then as a consequence you may 547 | not convey it at all. For example, if you agree to terms that obligate you 548 | to collect a royalty for further conveying from those to whom you convey 549 | the Program, the only way you could satisfy both those terms and this 550 | License would be to refrain entirely from conveying the Program. 551 | 552 | 13. Use with the GNU Affero General Public License. 553 | 554 | Notwithstanding any other provision of this License, you have 555 | permission to link or combine any covered work with a work licensed 556 | under version 3 of the GNU Affero General Public License into a single 557 | combined work, and to convey the resulting work. The terms of this 558 | License will continue to apply to the part which is the covered work, 559 | but the special requirements of the GNU Affero General Public License, 560 | section 13, concerning interaction through a network will apply to the 561 | combination as such. 562 | 563 | 14. Revised Versions of this License. 564 | 565 | The Free Software Foundation may publish revised and/or new versions of 566 | the GNU General Public License from time to time. Such new versions will 567 | be similar in spirit to the present version, but may differ in detail to 568 | address new problems or concerns. 569 | 570 | Each version is given a distinguishing version number. If the 571 | Program specifies that a certain numbered version of the GNU General 572 | Public License "or any later version" applies to it, you have the 573 | option of following the terms and conditions either of that numbered 574 | version or of any later version published by the Free Software 575 | Foundation. If the Program does not specify a version number of the 576 | GNU General Public License, you may choose any version ever published 577 | by the Free Software Foundation. 578 | 579 | If the Program specifies that a proxy can decide which future 580 | versions of the GNU General Public License can be used, that proxy's 581 | public statement of acceptance of a version permanently authorizes you 582 | to choose that version for the Program. 583 | 584 | Later license versions may give you additional or different 585 | permissions. However, no additional obligations are imposed on any 586 | author or copyright holder as a result of your choosing to follow a 587 | later version. 588 | 589 | 15. Disclaimer of Warranty. 590 | 591 | THERE IS NO WARRANTY FOR THE PROGRAM, TO THE EXTENT PERMITTED BY 592 | APPLICABLE LAW. EXCEPT WHEN OTHERWISE STATED IN WRITING THE COPYRIGHT 593 | HOLDERS AND/OR OTHER PARTIES PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY 594 | OF ANY KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, 595 | THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 596 | PURPOSE. THE ENTIRE RISK AS TO THE QUALITY AND PERFORMANCE OF THE PROGRAM 597 | IS WITH YOU. SHOULD THE PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF 598 | ALL NECESSARY SERVICING, REPAIR OR CORRECTION. 599 | 600 | 16. Limitation of Liability. 601 | 602 | IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING 603 | WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MODIFIES AND/OR CONVEYS 604 | THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES, INCLUDING ANY 605 | GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING OUT OF THE 606 | USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED TO LOSS OF 607 | DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY YOU OR THIRD 608 | PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER PROGRAMS), 609 | EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE POSSIBILITY OF 610 | SUCH DAMAGES. 611 | 612 | 17. Interpretation of Sections 15 and 16. 613 | 614 | If the disclaimer of warranty and limitation of liability provided 615 | above cannot be given local legal effect according to their terms, 616 | reviewing courts shall apply local law that most closely approximates 617 | an absolute waiver of all civil liability in connection with the 618 | Program, unless a warranty or assumption of liability accompanies a 619 | copy of the Program in return for a fee. 620 | 621 | END OF TERMS AND CONDITIONS 622 | 623 | How to Apply These Terms to Your New Programs 624 | 625 | If you develop a new program, and you want it to be of the greatest 626 | possible use to the public, the best way to achieve this is to make it 627 | free software which everyone can redistribute and change under these terms. 628 | 629 | To do so, attach the following notices to the program. It is safest 630 | to attach them to the start of each source file to most effectively 631 | state the exclusion of warranty; and each file should have at least 632 | the "copyright" line and a pointer to where the full notice is found. 633 | 634 | {one line to give the program's name and a brief idea of what it does.} 635 | Copyright (C) {year} {name of author} 636 | 637 | This program is free software: you can redistribute it and/or modify 638 | it under the terms of the GNU General Public License as published by 639 | the Free Software Foundation, either version 3 of the License, or 640 | (at your option) any later version. 641 | 642 | This program is distributed in the hope that it will be useful, 643 | but WITHOUT ANY WARRANTY; without even the implied warranty of 644 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 645 | GNU General Public License for more details. 646 | 647 | You should have received a copy of the GNU General Public License 648 | along with this program. If not, see . 649 | 650 | Also add information on how to contact you by electronic and paper mail. 651 | 652 | If the program does terminal interaction, make it output a short 653 | notice like this when it starts in an interactive mode: 654 | 655 | {project} Copyright (C) {year} {fullname} 656 | This program comes with ABSOLUTELY NO WARRANTY; for details type `show w'. 657 | This is free software, and you are welcome to redistribute it 658 | under certain conditions; type `show c' for details. 659 | 660 | The hypothetical commands `show w' and `show c' should show the appropriate 661 | parts of the General Public License. Of course, your program's commands 662 | might be different; for a GUI interface, you would use an "about box". 663 | 664 | You should also get your employer (if you work as a programmer) or school, 665 | if any, to sign a "copyright disclaimer" for the program, if necessary. 666 | For more information on this, and how to apply and follow the GNU GPL, see 667 | . 668 | 669 | The GNU General Public License does not permit incorporating your program 670 | into proprietary programs. If your program is a subroutine library, you 671 | may consider it more useful to permit linking proprietary applications with 672 | the library. If this is what you want to do, use the GNU Lesser General 673 | Public License instead of this License. But first, please read 674 | . 675 | 676 | -------------------------------------------------------------------------------- /NAMESPACE: -------------------------------------------------------------------------------- 1 | # Generated by roxygen2 (4.1.0): do not edit by hand 2 | 3 | export(AnomalyDetectionTs) 4 | export(AnomalyDetectionVec) 5 | -------------------------------------------------------------------------------- /R/date_utils.R: -------------------------------------------------------------------------------- 1 | format_timestamp <- function(indf, index = 1) { 2 | if (class(indf[[index]])[1] == "POSIXlt") { 3 | return(indf) 4 | } 5 | if (stringr::str_detect(indf[[index]][1], "^\\d{4}-\\d{2}-\\d{2} \\d{2}:\\d{2}:\\d{2} \\+\\d{4}$")) { 6 | indf[[index]] <- strptime(indf[[index]], format="%Y-%m-%d %H:%M:%S", tz="UTC") 7 | } 8 | else if (stringr::str_detect(indf[[index]][1], "^\\d{4}-\\d{2}-\\d{2} \\d{2}:\\d{2}:\\d{2}$")) { 9 | indf[[index]] <- strptime(indf[[index]], format="%Y-%m-%d %H:%M:%S", tz="UTC") 10 | } 11 | else if (stringr::str_detect(indf[[index]][1], "^\\d{4}-\\d{2}-\\d{2} \\d{2}:\\d{2}$")) { 12 | indf[[index]] <- strptime(indf[[index]], format="%Y-%m-%d %H:%M", tz="UTC") 13 | } 14 | else if (stringr::str_detect(indf[[index]][1], "^\\d{2}/\\d{2}/\\d{2}$")) { 15 | indf[[index]] <- strptime(indf[[index]], format="%m/%d/%y", tz="UTC") 16 | } 17 | else if (stringr::str_detect(indf[[index]][1], "^\\d{2}/\\d{2}/\\d{4}$")) { 18 | indf[[index]] <- strptime(indf[[index]], format="%m/%d/%Y", tz="UTC") 19 | } 20 | else if (stringr::str_detect(indf[[index]][1], "^\\d{4}\\d{2}\\d{2}$")) { 21 | indf[[index]] <- strptime(indf[[index]], format="%Y%m%d", tz="UTC") 22 | } 23 | else if (stringr::str_detect(indf[[index]][1], "^\\d{4}/\\d{2}/\\d{2}/\\d{2}$")) { 24 | indf[[index]] <- strptime(indf[[index]], format="%Y/%m/%d/%H", tz="UTC") 25 | } 26 | else if (stringr::str_detect(indf[[index]][1], "^\\d{10}$")) { 27 | # Handle Unix seconds in milliseconds 28 | indf[[index]] <- as.POSIXlt(indf[[index]], origin="1970-01-01", tz="UTC") 29 | } 30 | 31 | return(indf) 32 | } 33 | 34 | get_gran = function(tsdf, index=1) { 35 | n = length(tsdf[[index]]) 36 | # We calculate the granularity from the time difference between the last 2 entries (sorted) 37 | gran = round(difftime(max(tsdf[[index]]), sort(tsdf[[index]], partial = n-1)[n-1], 38 | units = "secs")) 39 | 40 | if (gran >= 86400) { 41 | return("day") 42 | } 43 | else if (gran >= 3600) { 44 | return("hr") 45 | } 46 | else if (gran >= 60) { 47 | return("min") 48 | } 49 | else if (gran >= 1) { 50 | return("sec") 51 | } 52 | else { 53 | return("ms") 54 | } 55 | } 56 | -------------------------------------------------------------------------------- /R/detect_anoms.R: -------------------------------------------------------------------------------- 1 | detect_anoms <- function(data, k = 0.49, alpha = 0.05, num_obs_per_period = NULL, 2 | use_decomp = TRUE, use_esd = FALSE, one_tail = TRUE, 3 | upper_tail = TRUE, verbose = FALSE) { 4 | # Detects anomalies in a time series using S-H-ESD. 5 | # 6 | # Args: 7 | # data: Time series to perform anomaly detection on. 8 | # k: Maximum number of anomalies that S-H-ESD will detect as a percentage of the data. 9 | # alpha: The level of statistical significance with which to accept or reject anomalies. 10 | # num_obs_per_period: Defines the number of observations in a single period, and used during seasonal decomposition. 11 | # use_decomp: Use seasonal decomposition during anomaly detection. 12 | # use_esd: Uses regular ESD instead of hybrid-ESD. Note hybrid-ESD is more statistically robust. 13 | # one_tail: If TRUE only positive or negative going anomalies are detected depending on if upper_tail is TRUE or FALSE. 14 | # upper_tail: If TRUE and one_tail is also TRUE, detect only positive going (right-tailed) anomalies. If FALSE and one_tail is TRUE, only detect negative (left-tailed) anomalies. 15 | # verbose: Additionally printing for debugging. 16 | # Returns: 17 | # A list containing the anomalies (anoms) and decomposition components (stl). 18 | 19 | if(is.null(num_obs_per_period)){ 20 | stop("must supply period length for time series decomposition") 21 | } 22 | 23 | num_obs <- nrow(data) 24 | 25 | # Check to make sure we have at least two periods worth of data for anomaly context 26 | if(num_obs < num_obs_per_period * 2){ 27 | stop("Anom detection needs at least 2 periods worth of data") 28 | } 29 | 30 | # Check if our timestamps are posix 31 | posix_timestamp <- if (class(data[[1L]])[1L] == "POSIXlt") TRUE else FALSE 32 | 33 | # Handle NAs 34 | if (length(rle(is.na(c(NA,data[[2L]],NA)))$values)>3){ 35 | stop("Data contains non-leading NAs. We suggest replacing NAs with interpolated values (see na.approx in Zoo package).") 36 | } else { 37 | data <- na.omit(data) 38 | } 39 | 40 | # -- Step 1: Decompose data. This returns a univarite remainder which will be used for anomaly detection. Optionally, we might NOT decompose. 41 | data_decomp <- stl(ts(data[[2L]], frequency = num_obs_per_period), 42 | s.window = "periodic", robust = TRUE) 43 | 44 | # Remove the seasonal component, and the median of the data to create the univariate remainder 45 | data <- data.frame(timestamp = data[[1L]], count = (data[[2L]]-data_decomp$time.series[,"seasonal"]-median(data[[2L]]))) 46 | 47 | # Store the smoothed seasonal component, plus the trend component for use in determining the "expected values" option 48 | data_decomp <- data.frame(timestamp=data[[1L]], count=(as.numeric(trunc(data_decomp$time.series[,"trend"]+data_decomp$time.series[,"seasonal"])))) 49 | 50 | if(posix_timestamp){ 51 | data_decomp <- format_timestamp(data_decomp) 52 | } 53 | # Maximum number of outliers that S-H-ESD can detect (e.g. 49% of data) 54 | max_outliers <- trunc(num_obs*k) 55 | 56 | if(max_outliers == 0){ 57 | stop(paste0("With longterm=TRUE, AnomalyDetection splits the data into 2 week periods by default. You have ", num_obs, " observations in a period, which is too few. Set a higher piecewise_median_period_weeks.")) 58 | } 59 | 60 | func_ma <- match.fun(median) 61 | func_sigma <- match.fun(mad) 62 | 63 | ## Define values and vectors. 64 | n <- length(data[[2L]]) 65 | if (posix_timestamp){ 66 | R_idx <- as.POSIXlt(data[[1L]][1L:max_outliers], tz = "UTC") 67 | } else { 68 | R_idx <- 1L:max_outliers 69 | } 70 | 71 | num_anoms <- 0L 72 | 73 | # Compute test statistic until r=max_outliers values have been 74 | # removed from the sample. 75 | for (i in 1L:max_outliers){ 76 | if(verbose) message(paste(i,"/", max_outliers,"completed")) 77 | 78 | if(one_tail){ 79 | if(upper_tail){ 80 | ares <- data[[2L]] - func_ma(data[[2L]]) 81 | } else { 82 | ares <- func_ma(data[[2L]]) - data[[2L]] 83 | } 84 | } else { 85 | ares = abs(data[[2L]] - func_ma(data[[2L]])) 86 | } 87 | 88 | # protect against constant time series 89 | data_sigma <- func_sigma(data[[2L]]) 90 | if(data_sigma == 0) 91 | break 92 | 93 | ares <- ares/data_sigma 94 | R <- max(ares) 95 | 96 | temp_max_idx <- which(ares == R)[1L] 97 | 98 | R_idx[i] <- data[[1L]][temp_max_idx] 99 | 100 | data <- data[-which(data[[1L]] == R_idx[i]), ] 101 | 102 | ## Compute critical value. 103 | if(one_tail){ 104 | p <- 1 - alpha/(n-i+1) 105 | } else { 106 | p <- 1 - alpha/(2*(n-i+1)) 107 | } 108 | 109 | t <- qt(p,(n-i-1L)) 110 | lam <- t*(n-i) / sqrt((n-i-1+t**2)*(n-i+1)) 111 | 112 | if(R > lam) 113 | num_anoms <- i 114 | } 115 | 116 | if(num_anoms > 0) { 117 | R_idx <- R_idx[1L:num_anoms] 118 | } else { 119 | R_idx = NULL 120 | } 121 | 122 | return(list(anoms = R_idx, stl = data_decomp)) 123 | } 124 | -------------------------------------------------------------------------------- /R/plot_utils.R: -------------------------------------------------------------------------------- 1 | get_range <- function(dfs, index = 2, y_log = F) { 2 | vals <- dfs[[index]] 3 | if(y_log) vals <- vals[vals > 0] 4 | vrange = range(vals, na.rm=TRUE) 5 | vmin = vrange[1] 6 | vmax = vrange[2] 7 | return(c(vmin, vmax)) 8 | } 9 | 10 | add_formatted_y <- function(yrange, y_log = FALSE, expand = TRUE, digits = 1) { 11 | ymin <- yrange[1] 12 | ymax <- yrange[2] 13 | 14 | if (expand) { 15 | ymax <- ymax + (ymax - ymin) * .2 16 | } 17 | 18 | if(abs(ymax) > 1000000) { 19 | divisor <- 1000000 20 | unit <- "M" 21 | } 22 | else if(abs(ymax) > 1000) { 23 | divisor <- 1000 24 | unit <- "k" 25 | } 26 | else { 27 | divisor <- 1 28 | unit <- "" 29 | } 30 | 31 | if(y_log){ 32 | transform = "log10" 33 | } 34 | else { 35 | transform = "identity" 36 | } 37 | 38 | return (ggplot2::scale_y_continuous(breaks=seq(ymin, ymax, length.out=6), limits=c(ymin, ymax), labels=function(x) paste(round(x/divisor, digits=digits),unit,sep=""), trans = transform)) 39 | 40 | } 41 | 42 | add_day_labels_datetime <- function(tsplot, breaks = 6, start = NULL, end = NULL, 43 | days_per_line = 1) { 44 | if (is.null(start)) { 45 | start <- min(tsplot$data[[1]]) 46 | } 47 | 48 | if (is.null(end)) { 49 | end <- max(tsplot$data[[1]]) 50 | } 51 | 52 | start_breaks <- start 53 | attributes(start_breaks)$tzone <- "UTC" 54 | 55 | lines_start <- trunc.POSIXt(start_breaks, units = "days") 56 | attributes(lines_start)$tzone <- "UTC" 57 | 58 | lines_at <- seq(lines_start, end, as.difftime(days_per_line, units = "days")) 59 | lines_at <- lines_at[lines_at > start & lines_at < end] 60 | 61 | minor_breaks <- seq(trunc.POSIXt(start_breaks, units="days"), end, 62 | as.difftime(breaks, units = "hours")) 63 | minor_breaks <- minor_breaks[minor_breaks > start & minor_breaks <= end] 64 | 65 | if (start$min == 0) { 66 | minor_breaks <- as.POSIXct(c(start, minor_breaks)) 67 | } 68 | 69 | outplot <- tsplot + ggplot2::scale_x_datetime(breaks = minor_breaks, 70 | labels = function(x) ifelse(as.POSIXlt(x, tz = "UTC")$hour != 0,strftime(x, format="%kh", tz="UTC"), strftime(x, format="%b %e", tz="UTC")), 71 | expand = c(0, 0)) 72 | 73 | if (length(lines_at) > 0) { 74 | outplot <- outplot + ggplot2::geom_vline(xintercept = as.numeric(lines_at), color = "gray60") 75 | } 76 | 77 | return(outplot) 78 | } 79 | 80 | -------------------------------------------------------------------------------- /R/raw_data.R: -------------------------------------------------------------------------------- 1 | #' @name raw_data 2 | #' @title raw_data 3 | #' @description A data frame containing a time series with headings timestamp and count. 4 | #' @docType data 5 | #' @usage data(raw_data) 6 | NULL -------------------------------------------------------------------------------- /R/ts_anom_detection.R: -------------------------------------------------------------------------------- 1 | #' Anomaly Detection Using Seasonal Hybrid ESD Test 2 | #' 3 | #' A technique for detecting anomalies in seasonal univariate time series where the input is a 4 | #' series of pairs. 5 | #' @name AnomalyDetectionTs 6 | #' @param x Time series as a two column data frame where the first column consists of the 7 | #' timestamps and the second column consists of the observations. 8 | #' @param max_anoms Maximum number of anomalies that S-H-ESD will detect as a percentage of the 9 | #' data. 10 | #' @param direction Directionality of the anomalies to be detected. Options are: 11 | #' \code{'pos' | 'neg' | 'both'}. 12 | #' @param alpha The level of statistical significance with which to accept or reject anomalies. 13 | #' @param only_last Find and report anomalies only within the last day or hr in the time series. 14 | #' \code{NULL | 'day' | 'hr'}. 15 | #' @param threshold Only report positive going anoms above the threshold specified. Options are: 16 | #' \code{'None' | 'med_max' | 'p95' | 'p99'}. 17 | #' @param e_value Add an additional column to the anoms output containing the expected value. 18 | #' @param longterm Increase anom detection efficacy for time series that are greater than a month. 19 | #' See Details below. 20 | #' @param piecewise_median_period_weeks The piecewise median time window as described in Vallis, Hochenbaum, and Kejariwal (2014). 21 | #' Defaults to 2. 22 | #' @param plot A flag indicating if a plot with both the time series and the estimated anoms, 23 | #' indicated by circles, should also be returned. 24 | #' @param y_log Apply log scaling to the y-axis. This helps with viewing plots that have extremely 25 | #' large positive anomalies relative to the rest of the data. 26 | #' @param xlabel X-axis label to be added to the output plot. 27 | #' @param ylabel Y-axis label to be added to the output plot. 28 | #' @details 29 | #' \code{longterm} This option should be set when the input time series is longer than a month. 30 | #' The option enables the approach described in Vallis, Hochenbaum, and Kejariwal (2014).\cr\cr 31 | #' \code{threshold} Filter all negative anomalies and those anomalies whose magnitude is smaller 32 | #' than one of the specified thresholds which include: the median 33 | #' of the daily max values (med_max), the 95th percentile of the daily max values (p95), and the 34 | #' 99th percentile of the daily max values (p99). 35 | #' @param title Title for the output plot. 36 | #' @param verbose Enable debug messages. 37 | #' @param na.rm Remove any NAs in timestamps.(default: FALSE) 38 | #' @return The returned value is a list with the following components. 39 | #' @return \item{anoms}{Data frame containing timestamps, values, and optionally expected values.} 40 | #' @return \item{plot}{A graphical object if plotting was requested by the user. The plot contains 41 | #' the estimated anomalies annotated on the input time series.} 42 | #' @return One can save \code{anoms} to a file in the following fashion: 43 | #' \code{write.csv([["anoms"]], file=)} 44 | #' @return One can save \code{plot} to a file in the following fashion: 45 | #' \code{ggsave(, plot=[["plot"]])} 46 | #' @references Vallis, O., Hochenbaum, J. and Kejariwal, A., (2014) "A Novel Technique for 47 | #' Long-Term Anomaly Detection in the Cloud", 6th USENIX, Philadelphia, PA. 48 | #' @references Rosner, B., (May 1983), "Percentage Points for a Generalized ESD Many-Outlier Procedure" 49 | #' , Technometrics, 25(2), pp. 165-172. 50 | #' 51 | #' @docType data 52 | #' @keywords datasets 53 | #' @name raw_data 54 | #' 55 | #' @examples 56 | #' data(raw_data) 57 | #' AnomalyDetectionTs(raw_data, max_anoms=0.02, direction='both', plot=TRUE) 58 | #' # To detect only the anomalies on the last day, run the following: 59 | #' AnomalyDetectionTs(raw_data, max_anoms=0.02, direction='both', only_last="day", plot=TRUE) 60 | #' @seealso \code{\link{AnomalyDetectionVec}} 61 | #' @export 62 | #' 63 | AnomalyDetectionTs <- function(x, max_anoms = 0.10, direction = 'pos', 64 | alpha = 0.05, only_last = NULL, threshold = 'None', 65 | e_value = FALSE, longterm = FALSE, piecewise_median_period_weeks = 2, plot = FALSE, 66 | y_log = FALSE, xlabel = '', ylabel = 'count', 67 | title = NULL, verbose=FALSE, na.rm = FALSE){ 68 | 69 | # Check for supported inputs types 70 | if(!is.data.frame(x)){ 71 | stop("data must be a single data frame.") 72 | } else { 73 | if(ncol(x) != 2 || !is.numeric(x[[2]])){ 74 | stop("data must be a 2 column data.frame, with the first column being a set of timestamps, and the second coloumn being numeric values.") 75 | } 76 | # Format timestamps if necessary 77 | if (!(class(x[[1]])[1] == "POSIXlt")) { 78 | x <- format_timestamp(x) 79 | } 80 | } 81 | # Rename data frame columns if necessary 82 | if (any((names(x) == c("timestamp", "count")) == FALSE)) { 83 | colnames(x) <- c("timestamp", "count") 84 | } 85 | 86 | if(!is.logical(na.rm)){ 87 | stop("na.rm must be either TRUE (T) or FALSE (F)") 88 | } 89 | 90 | # Deal with NAs in timestamps 91 | if(any(is.na(x$timestamp))){ 92 | if(na.rm){ 93 | x <- x[-which(is.na(x$timestamp)), ] 94 | } else { 95 | stop("timestamp contains NAs, please set na.rm to TRUE or remove the NAs manually.") 96 | } 97 | } 98 | 99 | # Sanity check all input parameters 100 | if(max_anoms > .49){ 101 | stop(paste("max_anoms must be less than 50% of the data points (max_anoms =", round(max_anoms*length(x[[2]]), 0), " data_points =", length(x[[2]]),").")) 102 | } else if(max_anoms < 0){ 103 | stop("max_anoms must be positive.") 104 | } else if(max_anoms == 0){ 105 | warning("0 max_anoms results in max_outliers being 0.") 106 | } 107 | if(!direction %in% c('pos', 'neg', 'both')){ 108 | stop("direction options are: pos | neg | both.") 109 | } 110 | if(!(0.01 <= alpha || alpha <= 0.1)){ 111 | if(verbose) message("Warning: alpha is the statistical signifigance, and is usually between 0.01 and 0.1") 112 | } 113 | if(!is.null(only_last) && !only_last %in% c('day','hr')){ 114 | stop("only_last must be either 'day' or 'hr'") 115 | } 116 | if(!threshold %in% c('None','med_max','p95','p99')){ 117 | stop("threshold options are: None | med_max | p95 | p99.") 118 | } 119 | if(!is.logical(e_value)){ 120 | stop("e_value must be either TRUE (T) or FALSE (F)") 121 | } 122 | if(!is.logical(longterm)){ 123 | stop("longterm must be either TRUE (T) or FALSE (F)") 124 | } 125 | if(piecewise_median_period_weeks < 2){ 126 | stop("piecewise_median_period_weeks must be at greater than 2 weeks") 127 | } 128 | if(!is.logical(plot)){ 129 | stop("plot must be either TRUE (T) or FALSE (F)") 130 | } 131 | if(!is.logical(y_log)){ 132 | stop("y_log must be either TRUE (T) or FALSE (F)") 133 | } 134 | if(!is.character(xlabel)){ 135 | stop("xlabel must be a string") 136 | } 137 | if(!is.character(ylabel)){ 138 | stop("ylabel must be a string") 139 | } 140 | if(!is.character(title) && !is.null(title)){ 141 | stop("title must be a string") 142 | } 143 | if(is.null(title)){ 144 | title <- "" 145 | } else { 146 | title <- paste(title, " : ", sep="") 147 | } 148 | 149 | # -- Main analysis: Perform S-H-ESD 150 | 151 | # Derive number of observations in a single day. 152 | # Although we derive this in S-H-ESD, we also need it to be minutley later on so we do it here first. 153 | gran <- get_gran(x, 1) 154 | 155 | if(gran == "day"){ 156 | num_days_per_line <- 7 157 | if(is.character(only_last) && only_last == 'hr'){ 158 | only_last <- 'day' 159 | } 160 | } else { 161 | num_days_per_line <- 1 162 | } 163 | 164 | # Aggregate data to minutely if secondly 165 | if(gran == "sec"){ 166 | x <- format_timestamp(aggregate(x[2], format(x[1], "%Y-%m-%d %H:%M:00"), eval(parse(text="sum")))) 167 | } 168 | 169 | period = switch(gran, 170 | min = 1440, 171 | hr = 24, 172 | # if the data is daily, then we need to bump the period to weekly to get multiple examples 173 | day = 7) 174 | num_obs <- length(x[[2]]) 175 | 176 | if(max_anoms < 1/num_obs){ 177 | max_anoms <- 1/num_obs 178 | } 179 | 180 | # -- Setup for longterm time series 181 | 182 | # If longterm is enabled, break the data into subset data frames and store in all_data 183 | if(longterm){ 184 | # Pre-allocate list with size equal to the number of piecewise_median_period_weeks chunks in x + any left over chunk 185 | # handle edge cases for daily and single column data period lengths 186 | if(gran == "day"){ 187 | # STL needs 2*period + 1 observations 188 | num_obs_in_period <- period*piecewise_median_period_weeks + 1 189 | num_days_in_period <- (7*piecewise_median_period_weeks) + 1 190 | } else { 191 | num_obs_in_period <- period*7*piecewise_median_period_weeks 192 | num_days_in_period <- (7*piecewise_median_period_weeks) 193 | } 194 | 195 | # Store last date in time series 196 | last_date <- x[[1]][num_obs] 197 | 198 | all_data <- vector(mode="list", length=ceiling(length(x[[1]])/(num_obs_in_period))) 199 | # Subset x into piecewise_median_period_weeks chunks 200 | for(j in seq(1,length(x[[1]]), by=num_obs_in_period)){ 201 | start_date <- x[[1]][j] 202 | end_date <- min(start_date + lubridate::days(num_days_in_period), x[[1]][length(x[[1]])]) 203 | # if there is at least 14 days left, subset it, otherwise subset last_date - 14days 204 | if(difftime(end_date, start_date, units = "days") == as.difftime(num_days_in_period, units="days")){ 205 | all_data[[ceiling(j/(num_obs_in_period))]] <- subset(x, x[[1]] >= start_date & x[[1]] < end_date) 206 | }else{ 207 | all_data[[ceiling(j/(num_obs_in_period))]] <- subset(x, x[[1]] > (last_date-lubridate::days(num_days_in_period)) & x[[1]] <= last_date) 208 | } 209 | } 210 | }else{ 211 | # If longterm is not enabled, then just overwrite all_data list with x as the only item 212 | all_data <- list(x) 213 | } 214 | 215 | # Create empty data frames to store all anoms and seasonal+trend component from decomposition 216 | all_anoms <- data.frame(timestamp=numeric(0), count=numeric(0)) 217 | seasonal_plus_trend <- data.frame(timestamp=numeric(0), count=numeric(0)) 218 | 219 | # Detect anomalies on all data (either entire data in one-pass, or in 2 week blocks if longterm=TRUE) 220 | for(i in 1:length(all_data)) { 221 | 222 | anomaly_direction = switch(direction, 223 | "pos" = data.frame(one_tail=TRUE, upper_tail=TRUE), # upper-tail only (positive going anomalies) 224 | "neg" = data.frame(one_tail=TRUE, upper_tail=FALSE), # lower-tail only (negative going anomalies) 225 | "both" = data.frame(one_tail=FALSE, upper_tail=TRUE)) # Both tails. Tail direction is not actually used. 226 | 227 | # detect_anoms actually performs the anomaly detection and returns the results in a list containing the anomalies 228 | # as well as the decomposed components of the time series for further analysis. 229 | s_h_esd_timestamps <- detect_anoms(all_data[[i]], k=max_anoms, alpha=alpha, num_obs_per_period=period, use_decomp=TRUE, use_esd=FALSE, 230 | one_tail=anomaly_direction$one_tail, upper_tail=anomaly_direction$upper_tail, verbose=verbose) 231 | 232 | # store decomposed components in local variable and overwrite s_h_esd_timestamps to contain only the anom timestamps 233 | data_decomp <- s_h_esd_timestamps$stl 234 | s_h_esd_timestamps <- s_h_esd_timestamps$anoms 235 | 236 | # -- Step 3: Use detected anomaly timestamps to extract the actual anomalies (timestamp and value) from the data 237 | if(!is.null(s_h_esd_timestamps)){ 238 | anoms <- subset(all_data[[i]], (all_data[[i]][[1]] %in% s_h_esd_timestamps)) 239 | } else { 240 | anoms <- data.frame(timestamp=numeric(0), count=numeric(0)) 241 | } 242 | 243 | # Filter the anomalies using one of the thresholding functions if applicable 244 | if(threshold != "None"){ 245 | # Calculate daily max values 246 | periodic_maxs <- tapply(x[[2]],as.Date(x[[1]]),FUN=max) 247 | 248 | # Calculate the threshold set by the user 249 | if(threshold == 'med_max'){ 250 | thresh <- median(periodic_maxs) 251 | }else if (threshold == 'p95'){ 252 | thresh <- quantile(periodic_maxs, .95) 253 | }else if (threshold == 'p99'){ 254 | thresh <- quantile(periodic_maxs, .99) 255 | } 256 | # Remove any anoms below the threshold 257 | anoms <- subset(anoms, anoms[[2]] >= thresh) 258 | } 259 | all_anoms <- rbind(all_anoms, anoms) 260 | seasonal_plus_trend <- rbind(seasonal_plus_trend, data_decomp) 261 | } 262 | 263 | # Cleanup potential duplicates 264 | all_anoms <- all_anoms[!duplicated(all_anoms[[1]]), ] 265 | seasonal_plus_trend <- seasonal_plus_trend[!duplicated(seasonal_plus_trend[[1]]), ] 266 | 267 | # -- If only_last was set by the user, create subset of the data that represent the most recent day 268 | if(!is.null(only_last)){ 269 | start_date <- x[[1]][num_obs]-lubridate::days(7) 270 | start_anoms <- x[[1]][num_obs]-lubridate::days(1) 271 | if(gran == "day"){ 272 | #TODO: This might be better set up top at the gran check 273 | breaks <- 3*12 274 | num_days_per_line <- 7 275 | } else { 276 | if(only_last == 'day'){ 277 | breaks <- 12 278 | }else{ 279 | # We need to change start_date and start_anoms for the hourly only_last option 280 | start_date <- lubridate::floor_date(x[[1]][num_obs]-lubridate::days(2), "day") 281 | start_anoms <- x[[1]][num_obs]-lubridate::hours(1) 282 | breaks <- 3 283 | } 284 | } 285 | 286 | # subset the last days worth of data 287 | x_subset_single_day <- subset(x, (x[[1]] > start_anoms)) 288 | # When plotting anoms for the last day only we only show the previous weeks data 289 | x_subset_week <- subset(x, ((x[[1]] <= start_anoms) & (x[[1]] > start_date))) 290 | all_anoms <- subset(all_anoms, all_anoms[[1]] >= x_subset_single_day[[1]][1]) 291 | num_obs <- length(x_subset_single_day[[2]]) 292 | } 293 | 294 | # Calculate number of anomalies as a percentage 295 | anom_pct <- (length(all_anoms[[2]]) / num_obs) * 100 296 | 297 | # If there are no anoms, then let's exit 298 | if(anom_pct == 0){ 299 | if(verbose) message("No anomalies detected.") 300 | return (list("anoms"=data.frame(), "plot"=plot.new())) 301 | } 302 | 303 | if(plot){ 304 | # -- Build title for plots utilizing parameters set by user 305 | plot_title <- paste(title, round(anom_pct, digits=2), "% Anomalies (alpha=", alpha, ", direction=", direction,")", sep="") 306 | if(longterm){ 307 | plot_title <- paste(plot_title, ", longterm=T", sep="") 308 | } 309 | 310 | # -- Plot raw time series data 311 | color_name <- paste("\"", title, "\"", sep="") 312 | alpha <- 0.8 313 | if(!is.null(only_last)){ 314 | xgraph <- ggplot2::ggplot(x_subset_week, ggplot2::aes_string(x="timestamp", y="count")) + ggplot2::theme_bw() + ggplot2::theme(panel.grid.major = ggplot2::element_blank(), panel.grid.minor = ggplot2::element_blank(), text=ggplot2::element_text(size = 14)) 315 | xgraph <- xgraph + ggplot2::geom_line(data=x_subset_week, ggplot2::aes_string(colour=color_name), alpha=alpha*.33) + ggplot2::geom_line(data=x_subset_single_day, ggplot2::aes_string(color=color_name), alpha=alpha) 316 | week_rng = get_range(x_subset_week, index=2, y_log=y_log) 317 | day_rng = get_range(x_subset_single_day, index=2, y_log=y_log) 318 | yrange = c(min(week_rng[1],day_rng[1]), max(week_rng[2],day_rng[2])) 319 | xgraph <- add_day_labels_datetime(xgraph, breaks=breaks, start=as.POSIXlt(min(x_subset_week[[1]]), tz="UTC"), end=as.POSIXlt(max(x_subset_single_day[[1]]), tz="UTC"), days_per_line=num_days_per_line) 320 | xgraph <- xgraph + ggplot2::labs(x=xlabel, y=ylabel, title=plot_title) 321 | }else{ 322 | xgraph <- ggplot2::ggplot(x, ggplot2::aes_string(x="timestamp", y="count")) + ggplot2::theme_bw() + ggplot2::theme(panel.grid.major = ggplot2::element_line(colour = "gray60"), panel.grid.major.y = ggplot2::element_blank(), panel.grid.minor = ggplot2::element_blank(), text=ggplot2::element_text(size = 14)) 323 | xgraph <- xgraph + ggplot2::geom_line(data=x, ggplot2::aes_string(colour=color_name), alpha=alpha) 324 | yrange <- get_range(x, index=2, y_log=y_log) 325 | xgraph <- xgraph + ggplot2::scale_x_datetime(labels=function(x) ifelse(as.POSIXlt(x, tz="UTC")$hour != 0,strftime(x, format="%kh", tz="UTC"), strftime(x, format="%b %e", tz="UTC")), 326 | expand=c(0,0)) 327 | xgraph <- xgraph + ggplot2::labs(x=xlabel, y=ylabel, title=plot_title) 328 | } 329 | 330 | # Add anoms to the plot as circles. 331 | # We add zzz_ to the start of the name to ensure that the anoms are listed after the data sets. 332 | xgraph <- xgraph + ggplot2::geom_point(data=all_anoms, ggplot2::aes_string(color=paste("\"zzz_",title,"\"",sep="")), size = 3, shape = 1) 333 | 334 | # Hide legend 335 | xgraph <- xgraph + ggplot2::theme(legend.position="none") 336 | 337 | # Use log scaling if set by user 338 | xgraph <- xgraph + add_formatted_y(yrange, y_log=y_log) 339 | 340 | } 341 | 342 | # Fix to make sure date-time is correct and that we retain hms at midnight 343 | all_anoms[[1]] <- format(all_anoms[[1]], format="%Y-%m-%d %H:%M:%S") 344 | 345 | # Store expected values if set by user 346 | if(e_value) { 347 | anoms <- data.frame(timestamp=all_anoms[[1]], anoms=all_anoms[[2]], 348 | expected_value=subset(seasonal_plus_trend[[2]], as.POSIXlt(seasonal_plus_trend[[1]], tz="UTC") %in% all_anoms[[1]]), 349 | stringsAsFactors=FALSE) 350 | } else { 351 | anoms <- data.frame(timestamp=all_anoms[[1]], anoms=all_anoms[[2]], stringsAsFactors=FALSE) 352 | } 353 | 354 | # Make sure we're still a valid POSIXlt datetime. 355 | # TODO: Make sure we keep original datetime format and timezone. 356 | anoms$timestamp <- as.POSIXlt(anoms$timestamp, tz="UTC") 357 | 358 | # Lastly, return anoms and optionally the plot if requested by the user 359 | if(plot){ 360 | return (list(anoms = anoms, plot = xgraph)) 361 | } else { 362 | return (list(anoms = anoms, plot = plot.new())) 363 | } 364 | } 365 | -------------------------------------------------------------------------------- /R/vec_anom_detection.R: -------------------------------------------------------------------------------- 1 | #' Anomaly Detection Using Seasonal Hybrid ESD Test 2 | #' 3 | #' A technique for detecting anomalies in seasonal univariate time series where the input is a 4 | #' series of observations. 5 | #' @name AnomalyDetectionVec 6 | #' @param x Time series as a column data frame, list, or vector, where the column consists of 7 | #' the observations. 8 | #' @param max_anoms Maximum number of anomalies that S-H-ESD will detect as a percentage of the 9 | #' data. 10 | #' @param direction Directionality of the anomalies to be detected. Options are: 11 | #' \code{'pos' | 'neg' | 'both'}. 12 | #' @param alpha The level of statistical significance with which to accept or reject anomalies. 13 | #' @param period Defines the number of observations in a single period, and used during seasonal 14 | #' decomposition. 15 | #' @param only_last Find and report anomalies only within the last period in the time series. 16 | #' @param threshold Only report positive going anoms above the threshold specified. Options are: 17 | #' \code{'None' | 'med_max' | 'p95' | 'p99'}. 18 | #' @param e_value Add an additional column to the anoms output containing the expected value. 19 | #' @param longterm_period Defines the number of observations for which the trend can be considered 20 | #' flat. The value should be an integer multiple of the number of observations in a single period. 21 | #' This increases anom detection efficacy for time series that are greater than a month. 22 | #' @param plot A flag indicating if a plot with both the time series and the estimated anoms, 23 | #' indicated by circles, should also be returned. 24 | #' @param y_log Apply log scaling to the y-axis. This helps with viewing plots that have extremely 25 | #' large positive anomalies relative to the rest of the data. 26 | #' @param xlabel X-axis label to be added to the output plot. 27 | #' @param ylabel Y-axis label to be added to the output plot. 28 | #' @details 29 | #' \code{longterm_period} This option should be set when the input time series is longer than a month. 30 | #' The option enables the approach described in Vallis, Hochenbaum, and Kejariwal (2014).\cr\cr 31 | #' \code{threshold} Filter all negative anomalies and those anomalies whose magnitude is smaller 32 | #' than one of the specified thresholds which include: the median 33 | #' of the daily max values (med_max), the 95th percentile of the daily max values (p95), and the 34 | #' 99th percentile of the daily max values (p99). 35 | #' @param title Title for the output plot. 36 | #' @param verbose Enable debug messages 37 | #' @return The returned value is a list with the following components. 38 | #' @return \item{anoms}{Data frame containing index, values, and optionally expected values.} 39 | #' @return \item{plot}{A graphical object if plotting was requested by the user. The plot contains 40 | #' the estimated anomalies annotated on the input time series.} 41 | #' @return One can save \code{anoms} to a file in the following fashion: 42 | #' \code{write.csv([["anoms"]], file=)} 43 | #' @return One can save \code{plot} to a file in the following fashion: 44 | #' \code{ggsave(, plot=[["plot"]])} 45 | #' @references Vallis, O., Hochenbaum, J. and Kejariwal, A., (2014) "A Novel Technique for 46 | #' Long-Term Anomaly Detection in the Cloud", 6th USENIX, Philadelphia, PA. 47 | #' @references Rosner, B., (May 1983), "Percentage Points for a Generalized ESD Many-Outlier Procedure" 48 | #' , Technometrics, 25(2), pp. 165-172. 49 | #' 50 | #' @docType data 51 | #' @keywords datasets 52 | #' @name raw_data 53 | #' @examples 54 | #' data(raw_data) 55 | #' AnomalyDetectionVec(raw_data[,2], max_anoms=0.02, period=1440, direction='both', plot=TRUE) 56 | #' # To detect only the anomalies in the last period, run the following: 57 | #' AnomalyDetectionVec(raw_data[,2], max_anoms=0.02, period=1440, direction='both', 58 | #' only_last=TRUE, plot=TRUE) 59 | #' @seealso \code{\link{AnomalyDetectionTs}} 60 | #' @export 61 | AnomalyDetectionVec = function(x, max_anoms=0.10, direction='pos', 62 | alpha=0.05, period=NULL, only_last=F, 63 | threshold='None', e_value=F, longterm_period=NULL, 64 | plot=F, y_log=F, xlabel='', ylabel='count', 65 | title=NULL, verbose=FALSE){ 66 | 67 | # Check for supported inputs types and add timestamps 68 | if(is.data.frame(x) && ncol(x) == 1 && is.numeric(x[[1]])){ 69 | x <- data.frame(timestamp=c(1:length(x[[1]])), count=x[[1]]) 70 | } else if(is.vector(x) || is.list(x) && is.numeric(x)) { 71 | x <- data.frame(timestamp=c(1:length(x)), count=x) 72 | } else { 73 | stop("data must be a single data frame, list, or vector that holds numeric values.") 74 | } 75 | 76 | # Sanity check all input parameterss 77 | if(max_anoms > .49){ 78 | stop(paste("max_anoms must be less than 50% of the data points (max_anoms =", round(max_anoms*length(x[[2]]), 0), " data_points =", length(x[[2]]),").")) 79 | } 80 | if(!direction %in% c('pos', 'neg', 'both')){ 81 | stop("direction options are: pos | neg | both.") 82 | } 83 | if(!(0.01 <= alpha || alpha <= 0.1)){ 84 | if(verbose) message("Warning: alpha is the statistical signifigance, and is usually between 0.01 and 0.1") 85 | } 86 | if(is.null(period)){ 87 | stop("Period must be set to the number of data points in a single period") 88 | } 89 | if(!is.logical(only_last)){ 90 | stop("only_last must be either TRUE (T) or FALSE (F)") 91 | } 92 | if(!threshold %in% c('None', 'med_max', 'p95', 'p99')){ 93 | stop("threshold options are: None | med_max | p95 | p99.") 94 | } 95 | if(!is.logical(e_value)){ 96 | stop("e_value must be either TRUE (T) or FALSE (F)") 97 | } 98 | if(!is.logical(plot)){ 99 | stop("plot must be either TRUE (T) or FALSE (F)") 100 | } 101 | if(!is.logical(y_log)){ 102 | stop("y_log must be either TRUE (T) or FALSE (F)") 103 | } 104 | if(!is.character(xlabel)){ 105 | stop("xlabel must be a string") 106 | } 107 | if(!is.character(ylabel)){ 108 | stop("ylabel must be a string") 109 | } 110 | if(!is.character(title) && !is.null(title)){ 111 | stop("title must be a string") 112 | } 113 | if(is.null(title)){ 114 | title <- "" 115 | } else { 116 | title <- paste(title, " : ", sep="") 117 | } 118 | 119 | # -- Main analysis: Perform S-H-ESD 120 | 121 | num_obs <- length(x[[2]]) 122 | 123 | if(max_anoms < 1/num_obs){ 124 | max_anoms <- 1/num_obs 125 | } 126 | 127 | # -- Setup for longterm time series 128 | 129 | # If longterm is enabled, break the data into subset data frames and store in all_data, 130 | if(!is.null(longterm_period)){ 131 | all_data <- vector(mode="list", length=ceiling(length(x[[1]])/(longterm_period))) 132 | # Subset x into two week chunks 133 | for(j in seq(1,length(x[[1]]), by=longterm_period)){ 134 | start_index <- x[[1]][j] 135 | end_index <- min((start_index + longterm_period - 1), num_obs) 136 | # if there is at least longterm_period left, subset it, otherwise subset last_index - longterm_period 137 | if((end_index - start_index + 1) == longterm_period){ 138 | all_data[[ceiling(j/(longterm_period))]] <- subset(x, x[[1]] >= start_index & x[[1]] <= end_index) 139 | }else{ 140 | all_data[[ceiling(j/(longterm_period))]] <- subset(x, x[[1]] > (num_obs-longterm_period) & x[[1]] <= num_obs) 141 | } 142 | } 143 | }else{ 144 | # If longterm is not enabled, then just overwrite all_data list with x as the only item 145 | all_data <- list(x) 146 | } 147 | 148 | # Create empty data frames to store all anoms and seasonal+trend component from decomposition 149 | all_anoms <- data.frame(timestamp=numeric(0), count=numeric(0)) 150 | seasonal_plus_trend <- data.frame(timestamp=numeric(0), count=numeric(0)) 151 | 152 | # Detect anomalies on all data (either entire data in one-pass, or in 2 week blocks if longterm=TRUE) 153 | for(i in 1:length(all_data)) { 154 | 155 | anomaly_direction = switch(direction, 156 | "pos" = data.frame(one_tail=TRUE, upper_tail=TRUE), # upper-tail only (positive going anomalies) 157 | "neg" = data.frame(one_tail=TRUE, upper_tail=FALSE), # lower-tail only (negative going anomalies) 158 | "both" = data.frame(one_tail=FALSE, upper_tail=TRUE)) # Both tails. Tail direction is not actually used. 159 | 160 | # detect_anoms actually performs the anomaly detection and returns the results in a list containing the anomalies 161 | # as well as the decomposed components of the time series for further analysis. 162 | s_h_esd_timestamps <- detect_anoms(all_data[[i]], k=max_anoms, alpha=alpha, num_obs_per_period=period, use_decomp=TRUE, use_esd=FALSE, 163 | one_tail=anomaly_direction$one_tail, upper_tail=anomaly_direction$upper_tail, verbose=verbose) 164 | 165 | # store decomposed components in local variable and overwrite s_h_esd_timestamps to contain only the anom timestamps 166 | data_decomp <- s_h_esd_timestamps$stl 167 | s_h_esd_timestamps <- s_h_esd_timestamps$anoms 168 | 169 | # -- Step 3: Use detected anomaly timestamps to extract the actual anomalies (timestamp and value) from the data 170 | if(!is.null(s_h_esd_timestamps)){ 171 | anoms <- subset(all_data[[i]], (all_data[[i]][[1]] %in% s_h_esd_timestamps)) 172 | } else { 173 | anoms <- data.frame(timestamp=numeric(0), count=numeric(0)) 174 | } 175 | 176 | # Filter the anomalies using one of the thresholding functions if applicable 177 | if(threshold != "None"){ 178 | # Calculate daily max values 179 | if(!is.null(longterm_period)){ 180 | periodic_maxs <- tapply(all_data[[i]][[2]], c(0:(longterm_period-1))%/%period, FUN=max) 181 | }else{ 182 | periodic_maxs <- tapply(all_data[[i]][[2]], c(0:(num_obs-1))%/%period, FUN=max) 183 | } 184 | 185 | # Calculate the threshold set by the user 186 | if(threshold == 'med_max'){ 187 | thresh <- median(periodic_maxs) 188 | }else if (threshold == 'p95'){ 189 | thresh <- quantile(periodic_maxs, .95) 190 | }else if (threshold == 'p99'){ 191 | thresh <- quantile(periodic_maxs, .99) 192 | } 193 | # Remove any anoms below the threshold 194 | anoms <- subset(anoms, anoms[[2]] >= thresh) 195 | } 196 | all_anoms <- rbind(all_anoms, anoms) 197 | seasonal_plus_trend <- rbind(seasonal_plus_trend, data_decomp) 198 | } 199 | 200 | # Cleanup potential duplicates 201 | all_anoms <- all_anoms[!duplicated(all_anoms[[1]]), ] 202 | seasonal_plus_trend <- seasonal_plus_trend[!duplicated(seasonal_plus_trend[[1]]), ] 203 | 204 | # -- If only_last was set by the user, create subset of the data that represent the most recent period 205 | if(only_last){ 206 | x_subset_single_period <- data.frame(timestamp=x[[1]][(num_obs-period+1):num_obs], count=x[[2]][(num_obs-period+1):num_obs]) 207 | # Let's try and show 7 periods prior 208 | past_obs <- period*7 209 | # If we don't have that much data, then show what we have - the last period 210 | if(num_obs < past_obs){ 211 | past_obs <- num_obs-period 212 | } 213 | 214 | # When plotting anoms for the last period only we only show the previous 7 periods of data 215 | x_subset_previous <- data.frame(timestamp=x[[1]][(num_obs-past_obs+1):(num_obs-period+1)], count=x[[2]][(num_obs-past_obs+1):(num_obs-period+1)]) 216 | 217 | all_anoms <- subset(all_anoms, all_anoms[[1]] >= x_subset_single_period[[1]][1]) 218 | num_obs <- length(x_subset_single_period[[2]]) 219 | } 220 | 221 | # Calculate number of anomalies as a percentage 222 | anom_pct <- (length(all_anoms[[2]]) / num_obs) * 100 223 | 224 | # If there are no anoms, then let's exit 225 | if(anom_pct == 0){ 226 | if(verbose) message("No anomalies detected.") 227 | return (list("anoms"=data.frame(), "plot"=plot.new())) 228 | } 229 | 230 | if(plot){ 231 | # -- Build title for plots utilizing parameters set by user 232 | plot_title <- paste(title, round(anom_pct, digits=2), "% Anomalies (alpha=", alpha, ", direction=", direction,")", sep="") 233 | if(!is.null(longterm_period)){ 234 | plot_title <- paste(plot_title, ", longterm=T", sep="") 235 | } 236 | 237 | # -- Plot raw time series data 238 | color_name <- paste("\"", title, "\"", sep="") 239 | alpha <- 0.8 240 | if(only_last){ 241 | all_data <- rbind(x_subset_previous, x_subset_single_period) 242 | lines_at <- seq(1, length(all_data[[2]]), period)+min(all_data[[1]]) 243 | xgraph <- ggplot2::ggplot(all_data, ggplot2::aes_string(x="timestamp", y="count")) + ggplot2::theme_bw() + ggplot2::theme(panel.grid.major = ggplot2::element_blank(), panel.grid.minor = ggplot2::element_blank(), text=ggplot2::element_text(size = 14)) 244 | xgraph <- xgraph + ggplot2::geom_line(data=x_subset_previous, ggplot2::aes_string(colour=color_name), alpha=alpha*.33) + ggplot2::geom_line(data=x_subset_single_period, ggplot2::aes_string(color=color_name), alpha=alpha) 245 | yrange <- get_range(all_data, index=2, y_log=y_log) 246 | xgraph <- xgraph + ggplot2::scale_x_continuous(breaks=lines_at, expand=c(0,0)) 247 | xgraph <- xgraph + ggplot2::geom_vline(xintercept=lines_at, color="gray60") 248 | xgraph <- xgraph + ggplot2::labs(x=xlabel, y=ylabel, title=plot_title) 249 | }else{ 250 | num_periods <- num_obs/period 251 | lines_at <- seq(1, num_obs, period) 252 | 253 | # check to see that we don't have too many breaks 254 | inc <- 2 255 | while(num_periods > 14){ 256 | num_periods <- num_obs/(period*inc) 257 | lines_at <- seq(1, num_obs, period*inc) 258 | inc <- inc + 1 259 | } 260 | xgraph <- ggplot2::ggplot(x, ggplot2::aes_string(x="timestamp", y="count")) + ggplot2::theme_bw() + ggplot2::theme(panel.grid.major = ggplot2::element_blank(), panel.grid.minor = ggplot2::element_blank(), text=ggplot2::element_text(size = 14)) 261 | xgraph <- xgraph + ggplot2::geom_line(data=x, ggplot2::aes_string(colour=color_name), alpha=alpha) 262 | yrange <- get_range(x, index=2, y_log=y_log) 263 | xgraph <- xgraph + ggplot2::scale_x_continuous(breaks=lines_at, expand=c(0,0)) 264 | xgraph <- xgraph + ggplot2::geom_vline(xintercept=lines_at, color="gray60") 265 | xgraph <- xgraph + ggplot2::labs(x=xlabel, y=ylabel, title=plot_title) 266 | } 267 | 268 | # Add anoms to the plot as circles. 269 | # We add zzz_ to the start of the name to ensure that the anoms are listed after the data sets. 270 | xgraph <- xgraph + ggplot2::geom_point(data=all_anoms, ggplot2::aes_string(color=paste("\"zzz_",title,"\"",sep="")), size = 3, shape = 1) 271 | 272 | # Hide legend and timestamps 273 | xgraph <- xgraph + ggplot2::theme(axis.text.x=ggplot2::element_blank()) + ggplot2::theme(legend.position="none") 274 | 275 | # Use log scaling if set by user 276 | xgraph <- xgraph + add_formatted_y(yrange, y_log=y_log) 277 | } 278 | 279 | # Store expected values if set by user 280 | if(e_value) { 281 | anoms <- data.frame(index=all_anoms[[1]], anoms=all_anoms[[2]], expected_value=subset(seasonal_plus_trend[[2]], seasonal_plus_trend[[1]] %in% all_anoms[[1]])) 282 | } else { 283 | anoms <- data.frame(index=all_anoms[[1]], anoms=all_anoms[[2]]) 284 | } 285 | 286 | # Lastly, return anoms and optionally the plot if requested by the user 287 | if(plot){ 288 | return (list(anoms = anoms, plot = xgraph)) 289 | } else { 290 | return (list(anoms = anoms, plot = plot.new())) 291 | } 292 | } 293 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # AnomalyDetection R package 2 | 3 | [![Build Status](https://travis-ci.org/twitter/AnomalyDetection.png)](https://travis-ci.org/twitter/AnomalyDetection) 4 | [![Pending Pull-Requests](http://githubbadges.herokuapp.com/twitter/AnomalyDetection/pulls.svg?style=flat)](https://github.com/twitter/AnomalyDetection/pulls) 5 | [![Github Issues](http://githubbadges.herokuapp.com/twitter/AnomalyDetection/issues.svg)](https://github.com/twitter/AnomalyDetection/issues) 6 | 7 | AnomalyDetection is an open-source R package to detect anomalies which is 8 | robust, from a statistical standpoint, in the presence of seasonality and an 9 | underlying trend. The AnomalyDetection package can be used in wide variety of 10 | contexts. For example, detecting anomalies in system metrics after a new 11 | software release, user engagement post an A/B test, or for problems in 12 | econometrics, financial engineering, political and social sciences. 13 | 14 | ## How the package works 15 | 16 | The underlying algorithm – referred to as Seasonal Hybrid ESD (S-H-ESD) builds 17 | upon the Generalized ESD test for detecting anomalies. Note that S-H-ESD can 18 | be used to detect both global as well as local anomalies. This is achieved by 19 | employing time series decomposition and using robust statistical metrics, viz., 20 | median together with ESD. In addition, for long time series (say, 6 months of 21 | minutely data), the algorithm employs piecewise approximation - this is rooted 22 | to the fact that trend extraction in the presence of anomalies in non-trivial - 23 | for anomaly detection. 24 | 25 | Besides time series, the package can also be used to detect anomalies in a 26 | vector of numerical values. We have found this very useful as many times the 27 | corresponding timestamps are not available. The package provides rich 28 | visualization support. The user can specify the direction of anomalies, the 29 | window of interest (such as last day, last hour), enable/disable piecewise 30 | approximation; additionally, the x- and y-axis are annotated in a way to assist 31 | visual data analysis. 32 | 33 | ## How to get started 34 | 35 | Install the R package using the following commands on the R console: 36 | 37 | ``` 38 | install.packages("devtools") 39 | devtools::install_github("twitter/AnomalyDetection") 40 | library(AnomalyDetection) 41 | ``` 42 | 43 | The function AnomalyDetectionTs is called to detect one or more statistically 44 | significant anomalies in the input time series. The documentation of the 45 | function AnomalyDetectionTs, which can be seen by using the following command, 46 | details the input arguments and the output of the function AnomalyDetectionTs. 47 | 48 | ``` 49 | help(AnomalyDetectionTs) 50 | ``` 51 | 52 | The function AnomalyDetectionVec is called to detect one or more statistically 53 | significant anomalies in a vector of observations. The documentation of the 54 | function AnomalyDetectionVec, which can be seen by using the following command, 55 | details the input arguments and the output of the function AnomalyDetectionVec. 56 | 57 | ``` 58 | help(AnomalyDetectionVec) 59 | ``` 60 | 61 | ## A simple example 62 | 63 | To get started, the user is recommended to use the example dataset which comes 64 | with the packages. Execute the following commands: 65 | 66 | ``` 67 | data(raw_data) 68 | res = AnomalyDetectionTs(raw_data, max_anoms=0.02, direction='both', plot=TRUE) 69 | res$plot 70 | ``` 71 | 72 | ![Fig 1](https://github.com/twitter/AnomalyDetection/blob/master/figs/Fig1.png) 73 | 74 | From the plot, we observe that the input time series experiences both positive 75 | and negative anomalies. Furthermore, many of the anomalies in the time series 76 | are local anomalies within the bounds of the time series’ seasonality (hence, 77 | cannot be detected using the traditional approaches). The anomalies detected 78 | using the proposed technique are annotated on the plot. In case the timestamps 79 | for the plot above were not available, anomaly detection could then carried 80 | out using the AnomalyDetectionVec function; specifically, one can use the 81 | following command: 82 | 83 | ``` 84 | AnomalyDetectionVec(raw_data[,2], max_anoms=0.02, period=1440, direction='both', only_last=FALSE, plot=TRUE) 85 | ``` 86 | 87 | Often, anomaly detection is carried out on a periodic basis. For instance, at 88 | times, one may be interested in determining whether there was any anomaly 89 | yesterday. To this end, we support a flag only_last whereby one can subset the 90 | anomalies that occurred during the last day or last hour. Execute the following 91 | command: 92 | 93 | ``` 94 | res = AnomalyDetectionTs(raw_data, max_anoms=0.02, direction='both', only_last=”day”, plot=TRUE) 95 | res$plot 96 | ``` 97 | 98 | ![Fig 2](https://github.com/twitter/AnomalyDetection/blob/master/figs/Fig2.png) 99 | 100 | From the plot, we observe that only the anomalies that occurred during the last 101 | day have been annotated. Further, the prior six days are included to expose the 102 | seasonal nature of the time series but are put in the background as the window 103 | of prime interest is the last day. 104 | 105 | Anomaly detection for long duration time series can be carried out by setting 106 | the longterm argument to T. 107 | 108 | ## Copyright and License 109 | Copyright 2015 Twitter, Inc and other contributors 110 | 111 | Licensed under the GPLv3 112 | -------------------------------------------------------------------------------- /data/raw_data.rda: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/twitter/AnomalyDetection/1f5deaa1609f8f1964c1e905c7a8ad2d1d0dc718/data/raw_data.rda -------------------------------------------------------------------------------- /figs/Fig1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/twitter/AnomalyDetection/1f5deaa1609f8f1964c1e905c7a8ad2d1d0dc718/figs/Fig1.png -------------------------------------------------------------------------------- /figs/Fig2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/twitter/AnomalyDetection/1f5deaa1609f8f1964c1e905c7a8ad2d1d0dc718/figs/Fig2.png -------------------------------------------------------------------------------- /inst/extdata/data.csv: -------------------------------------------------------------------------------- 1 | "date","value" 2 | 2015-02-26 08:00:00,28.9189627228525 3 | 2015-02-26 09:00:00,32.5127691537306 4 | 2015-02-26 10:00:00,31.4729374671571 5 | 2015-02-26 11:00:00,33.9512711864407 6 | 2015-02-26 12:00:00,28.1587457128858 7 | 2015-02-26 13:00:00,29.9130028063611 8 | 2015-02-26 14:00:00,27.3270940570894 9 | 2015-02-26 15:00:00,23.5691126279863 10 | 2015-02-26 16:00:00,23.7694753577107 11 | 2015-02-26 17:00:00,22.1789638932496 12 | 2015-02-26 18:00:00,25.4970917225951 13 | 2015-02-26 19:00:00,22.2707993474715 14 | 2015-02-26 20:00:00,23.2640586797066 15 | 2015-02-26 21:00:00,21.1492753623188 16 | 2015-02-26 22:00:00,20.2434988179669 17 | 2015-02-26 23:00:00,26.5411255411255 18 | 2015-02-27 00:00:00,21.1485148514851 19 | 2015-02-27 01:00:00,34.7333333333333 20 | 2015-02-27 02:00:00,17.1758793969849 21 | 2015-02-27 03:00:00,22.7675276752768 22 | 2015-02-27 04:00:00,22.3108348134991 23 | 2015-02-27 05:00:00,25.1653386454183 24 | 2015-02-27 06:00:00,25.9285714285714 25 | 2015-02-27 07:00:00,31.3939202026599 26 | 2015-02-27 08:00:00,33.1833846153846 27 | 2015-02-27 09:00:00,30.0352112676056 28 | 2015-02-27 10:00:00,29.7779839208411 29 | 2015-02-27 11:00:00,29.9924480805538 30 | 2015-02-27 12:00:00,29.7201166180758 31 | 2015-02-27 13:00:00,26.1428571428571 32 | 2015-02-27 14:00:00,23.6824552924118 33 | 2015-02-27 15:00:00,22.2842696629213 34 | 2015-02-27 16:00:00,22.6301775147929 35 | 2015-02-27 17:00:00,22.5957554320364 36 | 2015-02-27 18:00:00,22.29815455594 37 | 2015-02-27 19:00:00,20.4932866316404 38 | 2015-02-27 20:00:00,21.2279792746114 39 | 2015-02-27 21:00:00,18.9512437810945 40 | 2015-02-27 22:00:00,22.5922131147541 41 | 2015-02-27 23:00:00,27.8740157480315 42 | 2015-02-28 00:00:00,21.1547619047619 43 | 2015-02-28 01:00:00,22.3865030674847 44 | 2015-02-28 02:00:00,15.3265306122449 45 | 2015-02-28 03:00:00,20.4968944099379 46 | 2015-02-28 04:00:00,24.6292682926829 47 | 2015-02-28 05:00:00,29.7282099343955 48 | 2015-02-28 06:00:00,26.6695250659631 49 | 2015-02-28 07:00:00,24.4923884514436 50 | 2015-02-28 08:00:00,27.2545 51 | 2015-02-28 09:00:00,27.2625968992248 52 | 2015-02-28 10:00:00,26.2038980509745 53 | 2015-02-28 11:00:00,24.9839908519154 54 | 2015-02-28 12:00:00,27.8708487084871 55 | 2015-02-28 13:00:00,24.6541598694943 56 | 2015-02-28 14:00:00,24.9184149184149 57 | 2015-02-28 15:00:00,30.2276707530648 58 | 2015-02-28 16:00:00,20.6197333333333 59 | 2015-02-28 17:00:00,21.6258000984737 60 | 2015-02-28 18:00:00,19.1630076838639 61 | 2015-02-28 19:00:00,22.91 62 | 2015-02-28 20:00:00,23.6474056603774 63 | 2015-02-28 21:00:00,21.722480620155 64 | 2015-02-28 22:00:00,21.7886363636364 65 | 2015-02-28 23:00:00,24.007874015748 66 | 2015-03-01 00:00:00,21.3317535545024 67 | 2015-03-01 01:00:00,24.4630541871921 68 | 2015-03-01 02:00:00,21.7430167597765 69 | 2015-03-01 03:00:00,19.9725085910653 70 | 2015-03-01 04:00:00,19.8446215139442 71 | 2015-03-01 05:00:00,22.3408577878104 72 | 2015-03-01 06:00:00,24.1132075471698 73 | 2015-03-01 07:00:00,28.1435768261965 74 | 2015-03-01 08:00:00,29.395920502092 75 | 2015-03-01 09:00:00,27.1138755980861 76 | 2015-03-01 10:00:00,26.5910112359551 77 | 2015-03-01 11:00:00,27.5464632454924 78 | 2015-03-01 12:00:00,28.4250337685727 79 | 2015-03-01 13:00:00,31.5130434782609 80 | 2015-03-01 14:00:00,24.9224362311296 81 | 2015-03-01 15:00:00,24.7067520946279 82 | 2015-03-01 16:00:00,19.7013574660633 83 | 2015-03-01 17:00:00,20.2194625407166 84 | 2015-03-01 18:00:00,21.3144303797468 85 | 2015-03-01 19:00:00,24.3891850723534 86 | 2015-03-01 20:00:00,24.7505617977528 87 | 2015-03-01 21:00:00,21.6666666666667 88 | 2015-03-01 22:00:00,26.2348066298343 89 | 2015-03-01 23:00:00,22.0180995475113 90 | 2015-03-02 00:00:00,20.7349397590361 91 | 2015-03-02 01:00:00,17.5254237288136 92 | 2015-03-02 02:00:00,18.0229885057471 93 | 2015-03-02 03:00:00,18.1389728096677 94 | 2015-03-02 04:00:00,18.3296 95 | 2015-03-02 05:00:00,25.229537366548 96 | 2015-03-02 06:00:00,24.0457102672293 97 | 2015-03-02 07:00:00,29.1404624277457 98 | 2015-03-02 08:00:00,29.1745623069001 99 | 2015-03-02 09:00:00,28.9919759277834 100 | 2015-03-02 10:00:00,28.2323561346363 101 | 2015-03-02 11:00:00,26.2308868501529 102 | 2015-03-02 12:00:00,25.9711677078328 103 | 2015-03-02 13:00:00,27.6707317073171 104 | 2015-03-02 14:00:00,27.0718418514947 105 | 2015-03-02 15:00:00,28.7082524271845 106 | 2015-03-02 16:00:00,23.0007584376185 107 | 2015-03-02 17:00:00,21.5746656760773 108 | 2015-03-02 18:00:00,22.2506234413965 109 | 2015-03-02 19:00:00,23.4770922419059 110 | 2015-03-02 20:00:00,27.8022813688213 111 | 2015-03-02 21:00:00,24.4478063540091 112 | 2015-03-02 22:00:00,19.2445652173913 113 | 2015-03-02 23:00:00,22.7572815533981 114 | 2015-03-03 00:00:00,35.4133333333333 115 | 2015-03-03 01:00:00,18.6518987341772 116 | 2015-03-03 02:00:00,17.5462962962963 117 | 2015-03-03 03:00:00,18.0989847715736 118 | 2015-03-03 04:00:00,24.7644376899696 119 | 2015-03-03 05:00:00,26.1242990654206 120 | 2015-03-03 06:00:00,31.7241134751773 121 | 2015-03-03 07:00:00,28.7750582750583 122 | 2015-03-03 08:00:00,27.8094131319001 123 | 2015-03-03 09:00:00,30.6963042313873 124 | 2015-03-03 10:00:00,31.3191870890616 125 | 2015-03-03 11:00:00,29.1392694063927 126 | 2015-03-03 12:00:00,27.4386554621849 127 | 2015-03-03 13:00:00,29.6066176470588 128 | 2015-03-03 14:00:00,25.8543209876543 129 | 2015-03-03 15:00:00,27.005081300813 130 | 2015-03-03 16:00:00,24.5 131 | 2015-03-03 17:00:00,22.9647741400746 132 | 2015-03-03 18:00:00,22.3932729624838 133 | 2015-03-03 19:00:00,21.9032418952618 134 | 2015-03-03 20:00:00,25.3410097431355 135 | 2015-03-03 21:00:00,26.7739837398374 136 | 2015-03-03 22:00:00,23.4526315789474 137 | 2015-03-03 23:00:00,32.3833333333333 138 | 2015-03-04 00:00:00,22.8783783783784 139 | 2015-03-04 01:00:00,19.2167832167832 140 | 2015-03-04 02:00:00,22.2557077625571 141 | 2015-03-04 03:00:00,19.6317280453258 142 | 2015-03-04 04:00:00,21.9059561128527 143 | 2015-03-04 05:00:00,26.7651933701657 144 | 2015-03-04 06:00:00,26.9214780600462 145 | 2015-03-04 07:00:00,31.4672955974843 146 | 2015-03-04 08:00:00,32.0051107325383 147 | 2015-03-04 09:00:00,29.1982942430704 148 | 2015-03-04 10:00:00,27.6325736520289 149 | 2015-03-04 11:00:00,29.2883147386964 150 | 2015-03-04 12:00:00,28.7355223880597 151 | 2015-03-04 13:00:00,29.0514829322888 152 | 2015-03-04 14:00:00,29.6065040650407 153 | 2015-03-04 15:00:00,27.4267291910903 154 | 2015-03-04 16:00:00,25.0244770805518 155 | 2015-03-04 17:00:00,22.4989447024061 156 | 2015-03-04 18:00:00,21.2194793536804 157 | 2015-03-04 19:00:00,22.2119901112485 158 | 2015-03-04 20:00:00,23.1895551257253 159 | 2015-03-04 21:00:00,26.5961844197138 160 | 2015-03-04 22:00:00,38.5714285714286 161 | 2015-03-04 23:00:00,24.7486910994764 162 | 2015-03-05 00:00:00,23.767955801105 163 | 2015-03-05 01:00:00,17.6857142857143 164 | 2015-03-05 02:00:00,19.392 165 | 2015-03-05 03:00:00,22.9975490196078 166 | 2015-03-05 04:00:00,29.4811188811189 167 | 2015-03-05 05:00:00,28.3926746166951 168 | 2015-03-05 06:00:00,24.0354698195395 169 | 2015-03-05 07:00:00,31.9466019417476 170 | 2015-03-05 08:00:00,33.9649474211317 171 | 2015-03-05 09:00:00,30.0051694428489 172 | 2015-03-05 10:00:00,27.4879081015719 173 | 2015-03-05 11:00:00,30.5145929339478 174 | 2015-03-05 12:00:00,29.0211586901763 175 | 2015-03-05 13:00:00,26.5287524366472 176 | 2015-03-05 14:00:00,28.6781091727631 177 | 2015-03-05 15:00:00,28.4192118226601 178 | 2015-03-05 16:00:00,23.6586294416244 179 | 2015-03-05 17:00:00,22.1656104773975 180 | 2015-03-05 18:00:00,20.3252148997135 181 | 2015-03-05 19:00:00,26.006671608599 182 | 2015-03-05 20:00:00,27.7519908987486 183 | 2015-03-05 21:00:00,24.1321428571429 184 | 2015-03-05 22:00:00,21.8695652173913 185 | 2015-03-05 23:00:00,17.9686098654708 186 | 2015-03-06 00:00:00,19.8554913294798 187 | 2015-03-06 01:00:00,18.3038674033149 188 | 2015-03-06 02:00:00,23.3404255319149 189 | 2015-03-06 03:00:00,21.8716049382716 190 | 2015-03-06 04:00:00,27.0773558368495 191 | 2015-03-06 05:00:00,26.8239051094891 192 | 2015-03-06 06:00:00,27.9043309631545 193 | 2015-03-06 07:00:00,30.0811287477954 194 | 2015-03-06 08:00:00,36.1550614394383 195 | 2015-03-06 09:00:00,28.3143695014663 196 | 2015-03-06 10:00:00,30.0440738534842 197 | 2015-03-06 11:00:00,24.9212368728121 198 | 2015-03-06 12:00:00,33.1934541203974 199 | 2015-03-06 13:00:00,30.1344116006693 200 | 2015-03-06 14:00:00,27.8360471645143 201 | 2015-03-06 15:00:00,27.408832807571 202 | 2015-03-06 16:00:00,21.9114611624117 203 | 2015-03-06 17:00:00,23.6295063782585 204 | 2015-03-06 18:00:00,19.0504484304933 205 | 2015-03-06 19:00:00,21.2039127163281 206 | 2015-03-06 20:00:00,26.9337899543379 207 | 2015-03-06 21:00:00,28.1111111111111 208 | 2015-03-06 22:00:00,24.0568862275449 209 | 2015-03-06 23:00:00,32.8805309734513 210 | 2015-03-07 00:00:00,22.0341463414634 211 | 2015-03-07 01:00:00,18.7032967032967 212 | 2015-03-07 02:00:00,23.0352941176471 213 | 2015-03-07 03:00:00,21.95 214 | 2015-03-07 04:00:00,24.1791304347826 215 | 2015-03-07 05:00:00,26.5935214211076 216 | 2015-03-07 06:00:00,27.9241379310345 217 | 2015-03-07 07:00:00,30.812804453723 218 | 2015-03-07 08:00:00,32.7521423862887 219 | 2015-03-07 09:00:00,30.974761255116 220 | 2015-03-07 10:00:00,30.0966936993138 221 | 2015-03-07 11:00:00,27.5620347394541 222 | 2015-03-07 12:00:00,28.1627313337588 223 | 2015-03-07 13:00:00,26.6600241545894 224 | 2015-03-07 14:00:00,26.2295695570805 225 | 2015-03-07 15:00:00,22.2612099644128 226 | 2015-03-07 16:00:00,22.4991139988187 227 | 2015-03-07 17:00:00,21.0336943441637 228 | 2015-03-07 18:00:00,19.8835978835979 229 | 2015-03-07 19:00:00,24.0924229808493 230 | 2015-03-07 20:00:00,21.9704370179949 231 | 2015-03-07 21:00:00,26.9642184557439 232 | 2015-03-07 22:00:00,25.1060171919771 233 | 2015-03-07 23:00:00,21.4139534883721 234 | 2015-03-08 00:00:00,15.7566137566138 235 | 2015-03-08 01:00:00,17.9470198675497 236 | 2015-03-08 03:00:00,18.696335078534 237 | 2015-03-08 04:00:00,20.4647435897436 238 | 2015-03-08 05:00:00,23.7644787644788 239 | 2015-03-08 06:00:00,23.9364089775561 240 | 2015-03-08 07:00:00,29.3616029822926 241 | 2015-03-08 08:00:00,26.8673946957878 242 | 2015-03-08 09:00:00,28.571986970684 243 | 2015-03-08 10:00:00,28.2724885095207 244 | 2015-03-08 11:00:00,25.6255172413793 245 | 2015-03-08 12:00:00,30.0582582582583 246 | 2015-03-08 13:00:00,28.8484320557491 247 | 2015-03-08 14:00:00,27.3226993865031 248 | 2015-03-08 15:00:00,22.62416918429 249 | 2015-03-08 16:00:00,24.8451672010994 250 | 2015-03-08 17:00:00,23.638864628821 251 | 2015-03-08 18:00:00,27.6091370558376 252 | 2015-03-08 19:00:00,27.2916006339144 253 | 2015-03-08 20:00:00,23.8217213114754 254 | 2015-03-08 21:00:00,31.6496913580247 255 | 2015-03-08 22:00:00,24.1781914893617 256 | 2015-03-08 23:00:00,27.6934865900383 257 | 2015-03-09 00:00:00,23.9532710280374 258 | 2015-03-09 01:00:00,18.5704697986577 259 | 2015-03-09 02:00:00,15.2923076923077 260 | 2015-03-09 03:00:00,19.1059602649007 261 | 2015-03-09 04:00:00,21.1688311688312 262 | 2015-03-09 05:00:00,25.1989389920424 263 | 2015-03-09 06:00:00,27.2994923857868 264 | 2015-03-09 07:00:00,36.2478386167147 265 | 2015-03-09 08:00:00,35.6915769474351 266 | 2015-03-09 09:00:00,30.6126230457441 267 | 2015-03-09 10:00:00,28.24609375 268 | 2015-03-09 11:00:00,35.6420454545455 269 | 2015-03-09 12:00:00,28.5615468409586 270 | 2015-03-09 13:00:00,28.6932354483482 271 | 2015-03-09 14:00:00,30.2106625258799 272 | 2015-03-09 15:00:00,26.9251513483764 273 | 2015-03-09 16:00:00,24.2009446114212 274 | 2015-03-09 17:00:00,22.0852225020991 275 | 2015-03-09 18:00:00,22.1582075903064 276 | 2015-03-09 19:00:00,25.4601226993865 277 | 2015-03-09 20:00:00,26.0371859296482 278 | 2015-03-09 21:00:00,23.2621502209131 279 | 2015-03-09 22:00:00,19.9576059850374 280 | 2015-03-09 23:00:00,17.5936073059361 281 | 2015-03-10 00:00:00,22.3809523809524 282 | 2015-03-10 01:00:00,22.4728260869565 283 | 2015-03-10 02:00:00,17.5652173913043 284 | 2015-03-10 03:00:00,22.5100286532951 285 | 2015-03-10 04:00:00,21.1589648798521 286 | 2015-03-10 05:00:00,25.5180586907449 287 | 2015-03-10 06:00:00,28.0383411580595 288 | 2015-03-10 07:00:00,27.8206845238095 289 | 2015-03-10 08:00:00,27.3412790697674 290 | 2015-03-10 09:00:00,30.670303030303 291 | 2015-03-10 10:00:00,31.4211480362538 292 | 2015-03-10 11:00:00,29.2047101449275 293 | 2015-03-10 12:00:00,28.9671760045274 294 | 2015-03-10 13:00:00,24.4802259887006 295 | 2015-03-10 14:00:00,26.904887020494 296 | 2015-03-10 15:00:00,26.2162162162162 297 | 2015-03-10 16:00:00,24.1177015755329 298 | 2015-03-10 17:00:00,21.7310405643739 299 | 2015-03-10 18:00:00,23.4462693571093 300 | 2015-03-10 19:00:00,24.4070351758794 301 | 2015-03-10 20:00:00,23.5047892720307 302 | 2015-03-10 21:00:00,25.0391822827939 303 | 2015-03-10 22:00:00,22.4735376044568 304 | 2015-03-10 23:00:00,21.7454545454545 305 | 2015-03-11 00:00:00,20.8316831683168 306 | 2015-03-11 01:00:00,20.3988095238095 307 | 2015-03-11 02:00:00,25.7205882352941 308 | 2015-03-11 03:00:00,19.3909574468085 309 | 2015-03-11 04:00:00,22.1775510204082 310 | 2015-03-11 05:00:00,24.4704463208685 311 | 2015-03-11 06:00:00,28.2923203963666 312 | 2015-03-11 07:00:00,32.9646978954515 313 | 2015-03-11 08:00:00,31.0298864315601 314 | 2015-03-11 09:00:00,31.1660539215686 315 | 2015-03-11 10:00:00,31.6036697247706 316 | 2015-03-11 11:00:00,29.3050147492625 317 | 2015-03-11 12:00:00,31.3047965998786 318 | 2015-03-11 13:00:00,29.5327650506127 319 | 2015-03-11 14:00:00,27.0792515134838 320 | 2015-03-11 15:00:00,27.8416763678696 321 | 2015-03-11 16:00:00,22.9827755905512 322 | 2015-03-11 17:00:00,21.6953818827709 323 | 2015-03-11 18:00:00,21.072183908046 324 | 2015-03-11 19:00:00,23.8263157894737 325 | 2015-03-11 20:00:00,23.379132231405 326 | 2015-03-11 21:00:00,24.7881219903692 327 | 2015-03-11 22:00:00,28.1360381861575 328 | 2015-03-11 23:00:00,18.3306772908367 329 | 2015-03-12 00:00:00,18.5050505050505 330 | 2015-03-12 01:00:00,19.5061728395062 331 | 2015-03-12 02:00:00,18.8347107438017 332 | 2015-03-12 03:00:00,22.6189111747851 333 | 2015-03-12 04:00:00,31.3786231884058 334 | 2015-03-12 05:00:00,26.8054830287206 335 | 2015-03-12 06:00:00,30.3881818181818 336 | 2015-03-12 07:00:00,34.9729015201586 337 | 2015-03-12 08:00:00,33.3316129032258 338 | 2015-03-12 09:00:00,30.5976261127596 339 | 2015-03-12 10:00:00,30.1004901960784 340 | 2015-03-12 11:00:00,28.2622139764997 341 | 2015-03-12 12:00:00,27.7289088863892 342 | 2015-03-12 13:00:00,26.9488448844885 343 | 2015-03-12 14:00:00,25.3577008928571 344 | 2015-03-12 15:00:00,28.875511396844 345 | 2015-03-12 16:00:00,25.0218905472637 346 | 2015-03-12 17:00:00,23.9646258503401 347 | 2015-03-12 18:00:00,21.2590252707581 348 | 2015-03-12 19:00:00,25.4467213114754 349 | 2015-03-12 20:00:00,27.2155172413793 350 | 2015-03-12 21:00:00,27.4224270353303 351 | 2015-03-12 22:00:00,22.8010335917313 352 | 2015-03-12 23:00:00,20.7558528428094 353 | 2015-03-13 00:00:00,21.1244444444444 354 | 2015-03-13 01:00:00,17.6878048780488 355 | 2015-03-13 02:00:00,17.9139784946237 356 | 2015-03-13 03:00:00,19.6694677871148 357 | 2015-03-13 04:00:00,26.6754385964912 358 | 2015-03-13 05:00:00,26.2011764705882 359 | 2015-03-13 06:00:00,27.7895569620253 360 | 2015-03-13 07:00:00,28.9003584229391 361 | 2015-03-13 08:00:00,26.3352308665402 362 | 2015-03-13 09:00:00,29.5382335506817 363 | 2015-03-13 10:00:00,30.7962732919255 364 | 2015-03-13 11:00:00,26.7798340778558 365 | 2015-03-13 12:00:00,28.118372379778 366 | 2015-03-13 13:00:00,25.3286794648051 367 | 2015-03-13 14:00:00,30.8412979351032 368 | 2015-03-13 15:00:00,23.639974779319 369 | 2015-03-13 16:00:00,23.7446921443737 370 | 2015-03-13 17:00:00,23.5460992907801 371 | 2015-03-13 18:00:00,20.5258215962441 372 | 2015-03-13 19:00:00,20.3337739590218 373 | 2015-03-13 20:00:00,23.8793103448276 374 | 2015-03-13 21:00:00,23.2670807453416 375 | 2015-03-13 22:00:00,23.7845036319613 376 | 2015-03-13 23:00:00,25.0983606557377 377 | 2015-03-14 00:00:00,18.36 378 | 2015-03-14 01:00:00,17.6444444444444 379 | 2015-03-14 02:00:00,19.7090909090909 380 | 2015-03-14 03:00:00,24.988188976378 381 | 2015-03-14 04:00:00,20.0330396475771 382 | 2015-03-14 05:00:00,21.6910466582598 383 | 2015-03-14 06:00:00,28.1556145004421 384 | 2015-03-14 07:00:00,33.249297752809 385 | 2015-03-14 08:00:00,30.2438231469441 386 | 2015-03-14 09:00:00,27.5851334180432 387 | 2015-03-14 10:00:00,29.5948753462604 388 | 2015-03-14 11:00:00,29.5850847457627 389 | 2015-03-14 12:00:00,25.1485557083906 390 | 2015-03-14 13:00:00,25.8072131147541 391 | 2015-03-14 14:00:00,29.7215277777778 392 | 2015-03-14 15:00:00,26.3546831955923 393 | 2015-03-14 16:00:00,22.6961950059453 394 | 2015-03-14 17:00:00,22.1652593486128 395 | 2015-03-14 18:00:00,21.0424242424242 396 | 2015-03-14 19:00:00,23.3754538852578 397 | 2015-03-14 20:00:00,24.2013348164627 398 | 2015-03-14 21:00:00,25.0184331797235 399 | 2015-03-14 22:00:00,20.5744680851064 400 | 2015-03-14 23:00:00,26.706106870229 401 | 2015-03-15 00:00:00,22.8582995951417 402 | 2015-03-15 01:00:00,25.6543209876543 403 | 2015-03-15 02:00:00,21.1559139784946 404 | 2015-03-15 03:00:00,24.4009900990099 405 | 2015-03-15 04:00:00,19.7424657534247 406 | 2015-03-15 05:00:00,24.3513097072419 407 | 2015-03-15 06:00:00,28.3227272727273 408 | 2015-03-15 07:00:00,26.3229813664596 409 | 2015-03-15 08:00:00,27.8095644748079 410 | 2015-03-15 09:00:00,28.7878993881713 411 | 2015-03-15 10:00:00,26.8659003831418 412 | 2015-03-15 11:00:00,27.1098265895954 413 | 2015-03-15 12:00:00,24.7221217600964 414 | 2015-03-15 13:00:00,25.8681983071342 415 | 2015-03-15 14:00:00,27.3538561244329 416 | 2015-03-15 15:00:00,29.4213709677419 417 | 2015-03-15 16:00:00,23.4251405212059 418 | 2015-03-15 17:00:00,20.2107382550336 419 | 2015-03-15 18:00:00,21.0775114737379 420 | 2015-03-15 19:00:00,23.0591278640059 421 | 2015-03-15 20:00:00,25.1794569067296 422 | 2015-03-15 21:00:00,25.8614564831261 423 | 2015-03-15 22:00:00,25.7384196185286 424 | 2015-03-15 23:00:00,21.2105263157895 425 | 2015-03-16 00:00:00,15.9530201342282 426 | 2015-03-16 01:00:00,15.780303030303 427 | 2015-03-16 02:00:00,17.1368421052632 428 | 2015-03-16 03:00:00,22.7872340425532 429 | 2015-03-16 04:00:00,22.8194690265487 430 | 2015-03-16 05:00:00,22.9822222222222 431 | 2015-03-16 06:00:00,26.3947590870668 432 | 2015-03-16 07:00:00,29.5598866052445 433 | 2015-03-16 08:00:00,27.5420382165605 434 | 2015-03-16 09:00:00,31.1468369123622 435 | 2015-03-16 10:00:00,31.5921985815603 436 | 2015-03-16 11:00:00,30.4443005181347 437 | 2015-03-16 12:00:00,27.8133640552995 438 | 2015-03-16 13:00:00,27.444261394838 439 | 2015-03-16 14:00:00,26.5696793002915 440 | 2015-03-16 15:00:00,25.7782581840642 441 | 2015-03-16 16:00:00,26.0707570509649 442 | 2015-03-16 17:00:00,22.9822537710736 443 | 2015-03-16 18:00:00,21.4634489222118 444 | 2015-03-16 19:00:00,22.7584196891192 445 | 2015-03-16 20:00:00,28.5032967032967 446 | 2015-03-16 21:00:00,24.4952015355086 447 | 2015-03-16 22:00:00,20.3276836158192 448 | 2015-03-16 23:00:00,20.2119565217391 449 | 2015-03-17 00:00:00,18.4013605442177 450 | 2015-03-17 01:00:00,22.1764705882353 451 | 2015-03-17 02:00:00,20.093023255814 452 | 2015-03-17 03:00:00,21.4676470588235 453 | 2015-03-17 04:00:00,23.4280510018215 454 | 2015-03-17 05:00:00,27.0607902735562 455 | 2015-03-17 06:00:00,24.5683661645423 456 | 2015-03-17 07:00:00,28.9464384318056 457 | 2015-03-17 08:00:00,27.6609571788413 458 | 2015-03-17 09:00:00,26.0159574468085 459 | 2015-03-17 10:00:00,29.4181818181818 460 | 2015-03-17 11:00:00,25.3056338028169 461 | 2015-03-17 12:00:00,27.4774487471526 462 | 2015-03-17 13:00:00,27.1627358490566 463 | 2015-03-17 14:00:00,27.9890267175572 464 | 2015-03-17 15:00:00,29.8141809290954 465 | 2015-03-17 16:00:00,22.0773955773956 466 | 2015-03-17 17:00:00,22.7342603321746 467 | 2015-03-17 18:00:00,24.2103336045566 468 | 2015-03-17 19:00:00,24.4427807486631 469 | 2015-03-17 20:00:00,28.2345309381238 470 | 2015-03-17 21:00:00,25.3312302839117 471 | 2015-03-17 22:00:00,24.772397094431 472 | 2015-03-17 23:00:00,22.0725190839695 473 | 2015-03-18 00:00:00,17.2985781990521 474 | 2015-03-18 01:00:00,19.555 475 | 2015-03-18 02:00:00,18.3414634146341 476 | 2015-03-18 03:00:00,17.84 477 | 2015-03-18 04:00:00,21.9386733416771 478 | 2015-03-18 05:00:00,24.3677521842732 479 | 2015-03-18 06:00:00,24.8710073710074 480 | 2015-03-18 07:00:00,26.7684729064039 481 | 2015-03-18 08:00:00,30.9969465648855 482 | 2015-03-18 09:00:00,24.9227010217681 483 | 2015-03-18 10:00:00,26.7915921288014 484 | 2015-03-18 11:00:00,26.7732276530163 485 | 2015-03-18 12:00:00,25.3327205882353 486 | 2015-03-18 13:00:00,26.8013513513514 487 | 2015-03-18 14:00:00,26.5617150281507 488 | 2015-03-18 15:00:00,27.6826789838337 489 | 2015-03-18 16:00:00,23.8123271434216 490 | 2015-03-18 17:00:00,21.4181600955794 491 | 2015-03-18 18:00:00,22.2619142971566 492 | 2015-03-18 19:00:00,19.6082926829268 493 | 2015-03-18 20:00:00,23.7971656333038 494 | 2015-03-18 21:00:00,22.2793696275072 495 | 2015-03-18 22:00:00,25.2553191489362 496 | 2015-03-18 23:00:00,23.3793103448276 497 | 2015-03-19 00:00:00,16.6595744680851 498 | 2015-03-19 01:00:00,19.5530303030303 499 | 2015-03-19 02:00:00,16.3501483679525 500 | 2015-03-19 03:00:00,24.1946721311475 501 | 2015-03-19 04:00:00,20.9445161290323 502 | 2015-03-19 05:00:00,22.2085889570552 503 | 2015-03-19 06:00:00,26.5666848121938 504 | 2015-03-19 07:00:00,24.9769094138544 505 | 2015-03-19 08:00:00,27.0605263157895 506 | 2015-03-19 09:00:00,28.5817307692308 507 | 2015-03-19 10:00:00,26.6025163094129 508 | 2015-03-19 11:00:00,25.7365988909427 509 | 2015-03-19 12:00:00,29.5469483568075 510 | 2015-03-19 13:00:00,24.523381294964 511 | 2015-03-19 14:00:00,25.2951464801049 512 | 2015-03-19 15:00:00,27.4358365019011 513 | 2015-03-19 16:00:00,26.2193521935219 514 | 2015-03-19 17:00:00,23.9175090982612 515 | 2015-03-19 18:00:00,21.3403343334762 516 | 2015-03-19 19:00:00,24.770223325062 517 | 2015-03-19 20:00:00,27.6247833622184 518 | 2015-03-19 21:00:00,20.925 519 | 2015-03-19 22:00:00,25.8976034858388 520 | 2015-03-19 23:00:00,21.9247311827957 521 | 2015-03-20 00:00:00,23.8823529411765 522 | 2015-03-20 01:00:00,16.9314285714286 523 | 2015-03-20 02:00:00,22.2440476190476 524 | 2015-03-20 03:00:00,19.3590308370044 525 | 2015-03-20 04:00:00,20.9258809234508 526 | 2015-03-20 05:00:00,22.1901840490798 527 | 2015-03-20 06:00:00,25.4012572027239 528 | 2015-03-20 07:00:00,26.0635481023831 529 | 2015-03-20 08:00:00,26.0008833922262 530 | 2015-03-20 09:00:00,28.3378136200717 531 | 2015-03-20 10:00:00,28.4029149036201 532 | 2015-03-20 11:00:00,26.2695443645084 533 | 2015-03-20 12:00:00,26.6776406035665 534 | 2015-03-20 13:00:00,26.750103950104 535 | 2015-03-20 14:00:00,24.9796015180266 536 | 2015-03-20 15:00:00,24.6272630457934 537 | 2015-03-20 16:00:00,23.064596895343 538 | 2015-03-20 17:00:00,21.9794016674841 539 | 2015-03-20 18:00:00,20.7879417879418 540 | 2015-03-20 19:00:00,20.4537396121884 541 | 2015-03-20 20:00:00,19.0947054436987 542 | 2015-03-20 21:00:00,20.9384941675504 543 | 2015-03-20 22:00:00,20.7981859410431 544 | 2015-03-20 23:00:00,22.8518518518519 545 | 2015-03-21 00:00:00,17.5186721991701 546 | 2015-03-21 01:00:00,15.7032967032967 547 | 2015-03-21 02:00:00,15.7293577981651 548 | 2015-03-21 03:00:00,17.7588424437299 549 | 2015-03-21 04:00:00,24.3440514469453 550 | 2015-03-21 05:00:00,22.3546931407942 551 | 2015-03-21 06:00:00,24.2947692307692 552 | 2015-03-21 07:00:00,23.0253104106972 553 | 2015-03-21 08:00:00,26.0979020979021 554 | 2015-03-21 09:00:00,26.5299607072692 555 | 2015-03-21 10:00:00,26.6897605705553 556 | 2015-03-21 11:00:00,26.9560321715818 557 | 2015-03-21 12:00:00,26.9453870625663 558 | 2015-03-21 13:00:00,24.6965669988926 559 | 2015-03-21 14:00:00,24.6776315789474 560 | 2015-03-21 15:00:00,24.4035549703753 561 | 2015-03-21 16:00:00,22.531660011409 562 | 2015-03-21 17:00:00,22.00655379574 563 | 2015-03-21 18:00:00,19.8923976608187 564 | 2015-03-21 19:00:00,20.1047297297297 565 | 2015-03-21 20:00:00,17.3588979895756 566 | 2015-03-21 21:00:00,16.670626349892 567 | 2015-03-21 22:00:00,21.069387755102 568 | 2015-03-21 23:00:00,18.3180212014134 569 | 2015-03-22 00:00:00,18.2283105022831 570 | 2015-03-22 01:00:00,19.734375 571 | 2015-03-22 02:00:00,22.3967391304348 572 | 2015-03-22 03:00:00,17.3571428571429 573 | 2015-03-22 04:00:00,21.2325581395349 574 | 2015-03-22 05:00:00,23.4267912772586 575 | 2015-03-22 06:00:00,25.1833105335157 576 | 2015-03-22 07:00:00,25.8644432490586 577 | 2015-03-22 08:00:00,28.2544474393531 578 | 2015-03-22 09:00:00,27.1800900450225 579 | 2015-03-22 10:00:00,28.6986771190593 580 | 2015-03-22 11:00:00,29.2204646564508 581 | 2015-03-22 12:00:00,27.3603941811356 582 | 2015-03-22 13:00:00,23.4411187438665 583 | 2015-03-22 14:00:00,27.5479452054795 584 | 2015-03-22 15:00:00,24.367234744365 585 | 2015-03-22 16:00:00,25.631369073992 586 | 2015-03-22 17:00:00,21.7873767258383 587 | 2015-03-22 18:00:00,21.3076588337685 588 | 2015-03-22 19:00:00,18.9932104752667 589 | 2015-03-22 20:00:00,18.671809256662 590 | 2015-03-22 21:00:00,22.8163265306122 591 | 2015-03-22 22:00:00,25.5895316804408 592 | 2015-03-22 23:00:00,31.3552631578947 593 | 2015-03-23 00:00:00,22.4545454545455 594 | 2015-03-23 01:00:00,22.975845410628 595 | 2015-03-23 02:00:00,19.4713114754098 596 | 2015-03-23 03:00:00,18.3803418803419 597 | 2015-03-23 04:00:00,18.4460526315789 598 | 2015-03-23 05:00:00,23.2604248623131 599 | 2015-03-23 06:00:00,24.414367816092 600 | 2015-03-23 07:00:00,25.1814993423937 601 | 2015-03-23 08:00:00,27.236165577342 602 | 2015-03-23 09:00:00,30.8409090909091 603 | 2015-03-23 10:00:00,28.3339230429014 604 | 2015-03-23 11:00:00,24.1212401665895 605 | 2015-03-23 12:00:00,27.8032928942808 606 | 2015-03-23 13:00:00,26.2918770078017 607 | 2015-03-23 14:00:00,28.8463194145502 608 | 2015-03-23 15:00:00,27.6798917944094 609 | 2015-03-23 16:00:00,24.2913143735588 610 | 2015-03-23 17:00:00,22.5641124374278 611 | 2015-03-23 18:00:00,21.6708149337615 612 | 2015-03-23 19:00:00,19.7495088408644 613 | 2015-03-23 20:00:00,20.4964028776978 614 | 2015-03-23 21:00:00,22.4021352313167 615 | 2015-03-23 22:00:00,21.3492723492724 616 | 2015-03-23 23:00:00,17.975 617 | 2015-03-24 00:00:00,20.8951612903226 618 | 2015-03-24 01:00:00,19.2450980392157 619 | 2015-03-24 02:00:00,18.6706827309237 620 | 2015-03-24 03:00:00,16.2162790697674 621 | 2015-03-24 04:00:00,18.4366028708134 622 | 2015-03-24 05:00:00,24.2303543913713 623 | 2015-03-24 06:00:00,23.3825352112676 624 | 2015-03-24 07:00:00,24.1516721620349 625 | 2015-03-24 08:00:00,26.2023201856149 626 | 2015-03-24 09:00:00,26.1901608325449 627 | 2015-03-24 10:00:00,27.2214217098943 628 | 2015-03-24 11:00:00,27.7117619711762 629 | 2015-03-24 12:00:00,25.2040368582712 630 | 2015-03-24 13:00:00,24.8909169926119 631 | 2015-03-24 14:00:00,28.3252336448598 632 | 2015-03-24 15:00:00,25.0474327628362 633 | 2015-03-24 16:00:00,24.1966116807847 634 | 2015-03-24 17:00:00,25.5402629416598 635 | 2015-03-24 18:00:00,21.4756825938567 636 | 2015-03-24 19:00:00,20.8928215353938 637 | 2015-03-24 20:00:00,20.8518024032043 638 | 2015-03-24 21:00:00,22 639 | 2015-03-24 22:00:00,22.1857451403888 640 | 2015-03-24 23:00:00,18.7272727272727 641 | 2015-03-25 00:00:00,18.4285714285714 642 | 2015-03-25 01:00:00,21.51 643 | 2015-03-25 02:00:00,15.8721804511278 644 | 2015-03-25 03:00:00,19.859649122807 645 | 2015-03-25 04:00:00,23.3865853658537 646 | 2015-03-25 05:00:00,20.8534072900158 647 | 2015-03-25 06:00:00,23.74617196702 648 | 2015-03-25 07:00:00,26.0441102756892 649 | 2015-03-25 08:00:00,26.6099603349493 650 | 2015-03-25 09:00:00,29.8168631006346 651 | 2015-03-25 10:00:00,28.4230944774076 652 | 2015-03-25 11:00:00,24.8701176470588 653 | 2015-03-25 12:00:00,28.9628780934922 654 | 2015-03-25 13:00:00,24.5961373390558 655 | 2015-03-25 14:00:00,25.3090047393365 656 | 2015-03-25 15:00:00,25.0683073832245 657 | 2015-03-25 16:00:00,23.8601784955376 658 | 2015-03-25 17:00:00,21.6362520458265 659 | 2015-03-25 18:00:00,22.6860413914575 660 | 2015-03-25 19:00:00,20.0084830339321 661 | 2015-03-25 20:00:00,19.3349120433018 662 | 2015-03-25 21:00:00,19.1688741721854 663 | 2015-03-25 22:00:00,16.9904458598726 664 | 2015-03-25 23:00:00,19.6778523489933 665 | 2015-03-26 00:00:00,17.9490909090909 666 | 2015-03-26 01:00:00,16.2067307692308 667 | 2015-03-26 02:00:00,20.0844594594595 668 | 2015-03-26 03:00:00,19.1165254237288 669 | 2015-03-26 04:00:00,22.1415384615385 670 | 2015-03-26 05:00:00,26.7084048027444 671 | 2015-03-26 06:00:00,24.1033415841584 672 | 2015-03-26 07:00:00,25.2360909530721 673 | 2015-03-26 08:00:00,27.3369928400955 674 | 2015-03-26 09:00:00,28.4140156754265 675 | 2015-03-26 10:00:00,24.100185528757 676 | 2015-03-26 11:00:00,28.0331065759637 677 | 2015-03-26 12:00:00,26.8276604911676 678 | 2015-03-26 13:00:00,24.8519332161687 679 | 2015-03-26 14:00:00,27.9479214253084 680 | 2015-03-26 15:00:00,25.4592881521209 681 | 2015-03-26 16:00:00,24.6757904672015 682 | 2015-03-26 17:00:00,23.2258628221931 683 | 2015-03-26 18:00:00,21.0138376383764 684 | 2015-03-26 19:00:00,18.9338624338624 685 | 2015-03-26 20:00:00,21.3146802325581 686 | 2015-03-26 21:00:00,20.2665941240479 687 | 2015-03-26 22:00:00,23.1523178807947 688 | 2015-03-26 23:00:00,27.9404255319149 689 | 2015-03-27 00:00:00,20.8920187793427 690 | 2015-03-27 01:00:00,17.0045662100457 691 | 2015-03-27 02:00:00,15.7035830618893 692 | 2015-03-27 03:00:00,17.4060606060606 693 | 2015-03-27 04:00:00,20.2689573459716 694 | 2015-03-27 05:00:00,24.8573596358118 695 | 2015-03-27 06:00:00,24.5608938547486 696 | 2015-03-27 07:00:00,24.0371251784864 697 | 2015-03-27 08:00:00,25.0064367816092 698 | 2015-03-27 09:00:00,30.5513253012048 699 | 2015-03-27 10:00:00,27.0618875709128 700 | 2015-03-27 11:00:00,27.2254452926209 701 | 2015-03-27 12:00:00,26.504873294347 702 | 2015-03-27 13:00:00,23.9334001001502 703 | 2015-03-27 14:00:00,24.4025330396476 704 | 2015-03-27 15:00:00,24.6264336428181 705 | 2015-03-27 16:00:00,24.3900370959194 706 | 2015-03-27 17:00:00,22.7385218365062 707 | 2015-03-27 18:00:00,23.4178119846069 708 | 2015-03-27 19:00:00,20.4463311097635 709 | 2015-03-27 20:00:00,19.6402877697842 710 | 2015-03-27 21:00:00,17.4593301435407 711 | 2015-03-27 22:00:00,18.0579710144928 712 | 2015-03-27 23:00:00,22.7525252525253 713 | 2015-03-28 00:00:00,20.9364548494983 714 | 2015-03-28 01:00:00,15.0909090909091 715 | 2015-03-28 02:00:00,15.4363636363636 716 | 2015-03-28 03:00:00,19.2470588235294 717 | 2015-03-28 04:00:00,20.7334437086093 718 | 2015-03-28 05:00:00,20.779744346116 719 | 2015-03-28 06:00:00,23.7188146106134 720 | 2015-03-28 07:00:00,23.9590608067429 721 | 2015-03-28 08:00:00,23.6607422895975 722 | 2015-03-28 09:00:00,23.8080333854982 723 | 2015-03-28 10:00:00,25.1625329815303 724 | 2015-03-28 11:00:00,24.1467081389321 725 | 2015-03-28 12:00:00,27.2292011019284 726 | 2015-03-28 13:00:00,26.2145127118644 727 | 2015-03-28 14:00:00,22.1184558434691 728 | 2015-03-28 15:00:00,24.7711962833914 729 | 2015-03-28 16:00:00,21.2360069244085 730 | 2015-03-28 17:00:00,18.9884918231375 731 | 2015-03-28 18:00:00,19.8089552238806 732 | 2015-03-28 19:00:00,21.0963696369637 733 | 2015-03-28 20:00:00,16.9170274170274 734 | 2015-03-28 21:00:00,16.0174311926605 735 | 2015-03-28 22:00:00,18.3006872852234 736 | 2015-03-28 23:00:00,32.7559523809524 737 | 2015-03-29 00:00:00,27.5529953917051 738 | 2015-03-29 01:00:00,24.4081632653061 739 | 2015-03-29 02:00:00,23.734375 740 | 2015-03-29 03:00:00,21.4514767932489 741 | 2015-03-29 04:00:00,16.919587628866 742 | 2015-03-29 05:00:00,23.0262329485834 743 | 2015-03-29 06:00:00,22.6066763425254 744 | 2015-03-29 07:00:00,22.7418414918415 745 | 2015-03-29 08:00:00,23.4832251082251 746 | 2015-03-29 09:00:00,24.1176770249618 747 | 2015-03-29 10:00:00,25.9053921568627 748 | 2015-03-29 11:00:00,23.6878172588832 749 | 2015-03-29 12:00:00,22.6437768240343 750 | 2015-03-29 13:00:00,24.5553145336226 751 | 2015-03-29 14:00:00,27.0761501850873 752 | 2015-03-29 15:00:00,21.9830754352031 753 | 2015-03-29 16:00:00,22.2315837937385 754 | 2015-03-29 17:00:00,21.2672527472528 755 | 2015-03-29 18:00:00,21.3931902985075 756 | 2015-03-29 19:00:00,19.5539702233251 757 | 2015-03-29 20:00:00,23.5206124852768 758 | 2015-03-29 21:00:00,27.3165467625899 759 | 2015-03-29 22:00:00,21.8833333333333 760 | 2015-03-29 23:00:00,19.547263681592 761 | 2015-03-30 00:00:00,31.5057471264368 762 | 2015-03-30 01:00:00,22.9677419354839 763 | 2015-03-30 02:00:00,17.241935483871 764 | 2015-03-30 03:00:00,21.8156028368794 765 | 2015-03-30 04:00:00,19.6824512534819 766 | 2015-03-30 05:00:00,23.3267918088737 767 | 2015-03-30 06:00:00,23.4364381956649 768 | -------------------------------------------------------------------------------- /man/AnomalyDetectionTs.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2 (4.1.0): do not edit by hand 2 | % Please edit documentation in R/ts_anom_detection.R 3 | \docType{data} 4 | \name{AnomalyDetectionTs} 5 | \alias{AnomalyDetectionTs} 6 | \title{Anomaly Detection Using Seasonal Hybrid ESD Test} 7 | \usage{ 8 | AnomalyDetectionTs(x, max_anoms = 0.1, direction = "pos", alpha = 0.05, 9 | only_last = NULL, threshold = "None", e_value = FALSE, 10 | longterm = FALSE, piecewise_median_period_weeks = 2, plot = FALSE, 11 | y_log = FALSE, xlabel = "", ylabel = "count", title = NULL, 12 | verbose = FALSE) 13 | } 14 | \arguments{ 15 | \item{x}{Time series as a two column data frame where the first column consists of the 16 | timestamps and the second column consists of the observations.} 17 | 18 | \item{max_anoms}{Maximum number of anomalies that S-H-ESD will detect as a percentage of the 19 | data.} 20 | 21 | \item{direction}{Directionality of the anomalies to be detected. Options are: 22 | \code{'pos' | 'neg' | 'both'}.} 23 | 24 | \item{alpha}{The level of statistical significance with which to accept or reject anomalies.} 25 | 26 | \item{only_last}{Find and report anomalies only within the last day or hr in the time series. 27 | \code{NULL | 'day' | 'hr'}.} 28 | 29 | \item{threshold}{Only report positive going anoms above the threshold specified. Options are: 30 | \code{'None' | 'med_max' | 'p95' | 'p99'}.} 31 | 32 | \item{e_value}{Add an additional column to the anoms output containing the expected value.} 33 | 34 | \item{longterm}{Increase anom detection efficacy for time series that are greater than a month. 35 | See Details below.} 36 | 37 | \item{piecewise_median_period_weeks}{The piecewise median time window as described in Vallis, Hochenbaum, and Kejariwal (2014). 38 | Defaults to 2.} 39 | 40 | \item{plot}{A flag indicating if a plot with both the time series and the estimated anoms, 41 | indicated by circles, should also be returned.} 42 | 43 | \item{y_log}{Apply log scaling to the y-axis. This helps with viewing plots that have extremely 44 | large positive anomalies relative to the rest of the data.} 45 | 46 | \item{xlabel}{X-axis label to be added to the output plot.} 47 | 48 | \item{ylabel}{Y-axis label to be added to the output plot.} 49 | 50 | \item{title}{Title for the output plot.} 51 | 52 | \item{verbose}{Enable debug messages} 53 | } 54 | \value{ 55 | The returned value is a list with the following components. 56 | 57 | \item{anoms}{Data frame containing timestamps, values, and optionally expected values.} 58 | 59 | \item{plot}{A graphical object if plotting was requested by the user. The plot contains 60 | the estimated anomalies annotated on the input time series.} 61 | 62 | One can save \code{anoms} to a file in the following fashion: 63 | \code{write.csv([["anoms"]], file=)} 64 | 65 | One can save \code{plot} to a file in the following fashion: 66 | \code{ggsave(, plot=[["plot"]])} 67 | } 68 | \description{ 69 | A technique for detecting anomalies in seasonal univariate time series where the input is a 70 | series of pairs. 71 | } 72 | \details{ 73 | \code{longterm} This option should be set when the input time series is longer than a month. 74 | The option enables the approach described in Vallis, Hochenbaum, and Kejariwal (2014).\cr\cr 75 | \code{threshold} Filter all negative anomalies and those anomalies whose magnitude is smaller 76 | than one of the specified thresholds which include: the median 77 | of the daily max values (med_max), the 95th percentile of the daily max values (p95), and the 78 | 99th percentile of the daily max values (p99). 79 | } 80 | \examples{ 81 | data(raw_data) 82 | AnomalyDetectionTs(raw_data, max_anoms=0.02, direction='both', plot=TRUE) 83 | # To detect only the anomalies on the last day, run the following: 84 | AnomalyDetectionTs(raw_data, max_anoms=0.02, direction='both', only_last="day", plot=TRUE) 85 | } 86 | \references{ 87 | Vallis, O., Hochenbaum, J. and Kejariwal, A., (2014) "A Novel Technique for 88 | Long-Term Anomaly Detection in the Cloud", 6th USENIX, Philadelphia, PA. 89 | 90 | Rosner, B., (May 1983), "Percentage Points for a Generalized ESD Many-Outlier Procedure" 91 | , Technometrics, 25(2), pp. 165-172. 92 | } 93 | \seealso{ 94 | \code{\link{AnomalyDetectionVec}} 95 | } 96 | \keyword{datasets} 97 | 98 | -------------------------------------------------------------------------------- /man/AnomalyDetectionVec.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2 (4.1.0): do not edit by hand 2 | % Please edit documentation in R/vec_anom_detection.R 3 | \docType{data} 4 | \name{AnomalyDetectionVec} 5 | \alias{AnomalyDetectionVec} 6 | \title{Anomaly Detection Using Seasonal Hybrid ESD Test} 7 | \usage{ 8 | AnomalyDetectionVec(x, max_anoms = 0.1, direction = "pos", alpha = 0.05, 9 | period = NULL, only_last = F, threshold = "None", e_value = F, 10 | longterm_period = NULL, plot = F, y_log = F, xlabel = "", 11 | ylabel = "count", title = NULL, verbose = FALSE) 12 | } 13 | \arguments{ 14 | \item{x}{Time series as a column data frame, list, or vector, where the column consists of 15 | the observations.} 16 | 17 | \item{max_anoms}{Maximum number of anomalies that S-H-ESD will detect as a percentage of the 18 | data.} 19 | 20 | \item{direction}{Directionality of the anomalies to be detected. Options are: 21 | \code{'pos' | 'neg' | 'both'}.} 22 | 23 | \item{alpha}{The level of statistical significance with which to accept or reject anomalies.} 24 | 25 | \item{period}{Defines the number of observations in a single period, and used during seasonal 26 | decomposition.} 27 | 28 | \item{only_last}{Find and report anomalies only within the last period in the time series.} 29 | 30 | \item{threshold}{Only report positive going anoms above the threshold specified. Options are: 31 | \code{'None' | 'med_max' | 'p95' | 'p99'}.} 32 | 33 | \item{e_value}{Add an additional column to the anoms output containing the expected value.} 34 | 35 | \item{longterm_period}{Defines the number of observations for which the trend can be considered 36 | flat. The value should be an integer multiple of the number of observations in a single period. 37 | This increases anom detection efficacy for time series that are greater than a month.} 38 | 39 | \item{plot}{A flag indicating if a plot with both the time series and the estimated anoms, 40 | indicated by circles, should also be returned.} 41 | 42 | \item{y_log}{Apply log scaling to the y-axis. This helps with viewing plots that have extremely 43 | large positive anomalies relative to the rest of the data.} 44 | 45 | \item{xlabel}{X-axis label to be added to the output plot.} 46 | 47 | \item{ylabel}{Y-axis label to be added to the output plot.} 48 | 49 | \item{title}{Title for the output plot.} 50 | 51 | \item{verbose}{Enable debug messages} 52 | } 53 | \value{ 54 | The returned value is a list with the following components. 55 | 56 | \item{anoms}{Data frame containing index, values, and optionally expected values.} 57 | 58 | \item{plot}{A graphical object if plotting was requested by the user. The plot contains 59 | the estimated anomalies annotated on the input time series.} 60 | 61 | One can save \code{anoms} to a file in the following fashion: 62 | \code{write.csv([["anoms"]], file=)} 63 | 64 | One can save \code{plot} to a file in the following fashion: 65 | \code{ggsave(, plot=[["plot"]])} 66 | } 67 | \description{ 68 | A technique for detecting anomalies in seasonal univariate time series where the input is a 69 | series of observations. 70 | } 71 | \details{ 72 | \code{longterm_period} This option should be set when the input time series is longer than a month. 73 | The option enables the approach described in Vallis, Hochenbaum, and Kejariwal (2014).\cr\cr 74 | \code{threshold} Filter all negative anomalies and those anomalies whose magnitude is smaller 75 | than one of the specified thresholds which include: the median 76 | of the daily max values (med_max), the 95th percentile of the daily max values (p95), and the 77 | 99th percentile of the daily max values (p99). 78 | } 79 | \examples{ 80 | data(raw_data) 81 | AnomalyDetectionVec(raw_data[,2], max_anoms=0.02, period=1440, direction='both', plot=TRUE) 82 | # To detect only the anomalies in the last period, run the following: 83 | AnomalyDetectionVec(raw_data[,2], max_anoms=0.02, period=1440, direction='both', 84 | only_last=TRUE, plot=TRUE) 85 | } 86 | \references{ 87 | Vallis, O., Hochenbaum, J. and Kejariwal, A., (2014) "A Novel Technique for 88 | Long-Term Anomaly Detection in the Cloud", 6th USENIX, Philadelphia, PA. 89 | 90 | Rosner, B., (May 1983), "Percentage Points for a Generalized ESD Many-Outlier Procedure" 91 | , Technometrics, 25(2), pp. 165-172. 92 | } 93 | \seealso{ 94 | \code{\link{AnomalyDetectionTs}} 95 | } 96 | \keyword{datasets} 97 | 98 | -------------------------------------------------------------------------------- /man/raw_data.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2 (4.1.0): do not edit by hand 2 | % Please edit documentation in R/raw_data.R 3 | \docType{data} 4 | \name{raw_data} 5 | \alias{raw_data} 6 | \title{raw_data} 7 | \usage{ 8 | data(raw_data) 9 | } 10 | \description{ 11 | A data frame containing a time series with headings timestamp and count. 12 | } 13 | 14 | -------------------------------------------------------------------------------- /tests/testthat.R: -------------------------------------------------------------------------------- 1 | library('testthat') 2 | test_check("AnomalyDetection") 3 | -------------------------------------------------------------------------------- /tests/testthat/test-NAs.R: -------------------------------------------------------------------------------- 1 | library(AnomalyDetection) 2 | 3 | context("Testing NAs") 4 | 5 | test_that("check handling of datasets with leading and trailing NAs", { 6 | raw_data[1:10, "count"] <- NA 7 | raw_data[length(raw_data[[2L]]), "count"] <- NA 8 | results <- AnomalyDetectionTs(raw_data, max_anoms=0.02, direction='both', plot=T) 9 | expect_equal(length(results$anoms), 2) 10 | expect_equal(length(results$anoms[[2L]]), 131) 11 | expect_equal(class(results$plot), c("gg", "ggplot")) 12 | }) 13 | 14 | test_that("check handling of datasets with NAs in the middle", { 15 | raw_data[floor(length(raw_data[[2L]])/2), "count"] <- NA 16 | expect_error(AnomalyDetectionTs(raw_data, max_anoms=0.02, direction='both')) 17 | }) 18 | 19 | test_that("check handling of datasets with leading and trailing NAs", { 20 | raw_data[1:10, "count"] <- NA 21 | raw_data[length(raw_data[[2L]]), "count"] <- NA 22 | results <- AnomalyDetectionVec(raw_data[[2]], max_anoms=0.02, period=1440, direction='both', plot=T) 23 | expect_equal(length(results$anoms), 2) 24 | expect_equal(length(results$anoms[[2L]]), 131) 25 | expect_equal(class(results$plot), c("gg", "ggplot")) 26 | }) 27 | 28 | test_that("check handling of datasets with NAs in the middle", { 29 | raw_data[floor(length(raw_data[[2L]])/2), "count"] <- NA 30 | expect_error(AnomalyDetectionVec(raw_data[[2L]], max_anoms=0.02, period=1440, direction='both')) 31 | }) 32 | -------------------------------------------------------------------------------- /tests/testthat/test-edge.R: -------------------------------------------------------------------------------- 1 | library(AnomalyDetection) 2 | 3 | context("Testing edge cases") 4 | 5 | test_that("checking for errors if time series has constant value for all values", { 6 | data <- rep(1,1000) 7 | expect_true({AnomalyDetectionVec(data, period=14, plot=T, direction='both'); TRUE}) 8 | 9 | }) 10 | 11 | test_that("checking that midnight dates get H%M%S format applied", { 12 | data_file <- system.file("extdata", "data.csv", package="AnomalyDetection") 13 | data <- read.csv(data_file) 14 | data$date <- as.POSIXct(strptime(data$date, "%Y-%m-%d %H:%M", tz = "UTC")) 15 | anomalyDetectionResult <- AnomalyDetectionTs(data, max_anoms=0.2, threshold = "None", 16 | direction='both', plot=FALSE, 17 | only_last = "day", e_value = TRUE) 18 | 19 | expect_equal(length(anomalyDetectionResult$anoms$anoms), length(anomalyDetectionResult$anoms$expected_value)) 20 | }) 21 | -------------------------------------------------------------------------------- /tests/testthat/test-ts.R: -------------------------------------------------------------------------------- 1 | library(AnomalyDetection) 2 | context("Evaluation: AnomalyDetectionTs") 3 | 4 | test_that("last day, both directions, with plot", { 5 | results <- AnomalyDetectionTs(raw_data, max_anoms=0.02, direction='both', only_last='day', plot=T) 6 | expect_equal(length(results$anoms), 2) 7 | expect_equal(length(results$anoms[[2L]]), 25) 8 | expect_equal(class(results$plot), c("gg", "ggplot")) 9 | }) 10 | 11 | test_that("both directions, e_value, with longterm", { 12 | results <- AnomalyDetectionTs(raw_data, max_anoms=0.02, direction='both', longterm=TRUE, e_value=TRUE) 13 | expect_equal(length(results$anoms), 3) 14 | expect_equal(length(results$anoms[[2L]]), 131) 15 | expect_equal(results$plot, NULL) 16 | }) 17 | 18 | test_that("both directions, e_value, threshold set to med_max", { 19 | results <- AnomalyDetectionTs(raw_data, max_anoms=0.02, direction='both', threshold="med_max", e_value=TRUE) 20 | expect_equal(length(results$anoms), 3) 21 | expect_equal(length(results$anoms[[2L]]), 4) 22 | expect_equal(results$plot, NULL) 23 | }) 24 | -------------------------------------------------------------------------------- /tests/testthat/test-vec.R: -------------------------------------------------------------------------------- 1 | library(AnomalyDetection) 2 | 3 | context("Evaluation: AnomalyDetectionVec") 4 | 5 | test_that("last period, both directions, with plot", { 6 | results <- AnomalyDetectionVec(raw_data[[2L]], max_anoms=0.02, direction='both', period=1440, only_last=TRUE, plot=T) 7 | expect_equal(length(results$anoms), 2) 8 | expect_equal(length(results$anoms[[2L]]), 25) 9 | expect_equal(class(results$plot), c("gg", "ggplot")) 10 | }) 11 | 12 | test_that("both directions, e_value, with longterm", { 13 | results <- AnomalyDetectionVec(raw_data[[2L]], max_anoms=0.02, direction='both', period=1440, longterm_period=1440*14, e_value=TRUE) 14 | expect_equal(length(results$anoms), 3) 15 | expect_equal(length(results$anoms[[2L]]), 131) 16 | expect_equal(results$plot, NULL) 17 | }) 18 | 19 | test_that("both directions, e_value, threshold set to med_max", { 20 | results <- AnomalyDetectionVec(raw_data[[2L]], max_anoms=0.02, direction='both', period=1440, threshold="med_max", e_value=TRUE) 21 | expect_equal(length(results$anoms), 3) 22 | expect_equal(length(results$anoms[[2L]]), 6) 23 | expect_equal(results$plot, NULL) 24 | }) 25 | --------------------------------------------------------------------------------