├── .gitignore ├── LICENSE.txt ├── README.md ├── edgehash.go ├── edgehash_test.go ├── examples ├── README.md ├── auc.py └── main.go ├── midas.go ├── midasr.go ├── nodehash.go └── nodehash_test.go /.gitignore: -------------------------------------------------------------------------------- 1 | ### Project specific ### 2 | 3 | examples/*.csv 4 | examples/scores.txt 5 | 6 | ### Go ### 7 | # Binaries for programs and plugins 8 | *.exe 9 | *.exe~ 10 | *.dll 11 | *.so 12 | *.dylib 13 | 14 | # Test binary, built with `go test -c` 15 | *.test 16 | 17 | # Output of the go coverage tool, specifically when used with LiteIDE 18 | *.out 19 | 20 | # Dependency directories (remove the comment below to include it) 21 | # vendor/ 22 | 23 | ### Go Patch ### 24 | /vendor/ 25 | /Godeps/ 26 | -------------------------------------------------------------------------------- /LICENSE.txt: -------------------------------------------------------------------------------- 1 | 2 | Apache License 3 | Version 2.0, January 2004 4 | http://www.apache.org/licenses/ 5 | 6 | TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION 7 | 8 | 1. Definitions. 9 | 10 | "License" shall mean the terms and conditions for use, reproduction, 11 | and distribution as defined by Sections 1 through 9 of this document. 12 | 13 | "Licensor" shall mean the copyright owner or entity authorized by 14 | the copyright owner that is granting the License. 15 | 16 | "Legal Entity" shall mean the union of the acting entity and all 17 | other entities that control, are controlled by, or are under common 18 | control with that entity. For the purposes of this definition, 19 | "control" means (i) the power, direct or indirect, to cause the 20 | direction or management of such entity, whether by contract or 21 | otherwise, or (ii) ownership of fifty percent (50%) or more of the 22 | outstanding shares, or (iii) beneficial ownership of such entity. 23 | 24 | "You" (or "Your") shall mean an individual or Legal Entity 25 | exercising permissions granted by this License. 26 | 27 | "Source" form shall mean the preferred form for making modifications, 28 | including but not limited to software source code, documentation 29 | source, and configuration files. 30 | 31 | "Object" form shall mean any form resulting from mechanical 32 | transformation or translation of a Source form, including but 33 | not limited to compiled object code, generated documentation, 34 | and conversions to other media types. 35 | 36 | "Work" shall mean the work of authorship, whether in Source or 37 | Object form, made available under the License, as indicated by a 38 | copyright notice that is included in or attached to the work 39 | (an example is provided in the Appendix below). 40 | 41 | "Derivative Works" shall mean any work, whether in Source or Object 42 | form, that is based on (or derived from) the Work and for which the 43 | editorial revisions, annotations, elaborations, or other modifications 44 | represent, as a whole, an original work of authorship. For the purposes 45 | of this License, Derivative Works shall not include works that remain 46 | separable from, or merely link (or bind by name) to the interfaces of, 47 | the Work and Derivative Works thereof. 48 | 49 | "Contribution" shall mean any work of authorship, including 50 | the original version of the Work and any modifications or additions 51 | to that Work or Derivative Works thereof, that is intentionally 52 | submitted to Licensor for inclusion in the Work by the copyright owner 53 | or by an individual or Legal Entity authorized to submit on behalf of 54 | the copyright owner. For the purposes of this definition, "submitted" 55 | means any form of electronic, verbal, or written communication sent 56 | to the Licensor or its representatives, including but not limited to 57 | communication on electronic mailing lists, source code control systems, 58 | and issue tracking systems that are managed by, or on behalf of, the 59 | Licensor for the purpose of discussing and improving the Work, but 60 | excluding communication that is conspicuously marked or otherwise 61 | designated in writing by the copyright owner as "Not a Contribution." 62 | 63 | "Contributor" shall mean Licensor and any individual or Legal Entity 64 | on behalf of whom a Contribution has been received by Licensor and 65 | subsequently incorporated within the Work. 66 | 67 | 2. Grant of Copyright License. Subject to the terms and conditions of 68 | this License, each Contributor hereby grants to You a perpetual, 69 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 70 | copyright license to reproduce, prepare Derivative Works of, 71 | publicly display, publicly perform, sublicense, and distribute the 72 | Work and such Derivative Works in Source or Object form. 73 | 74 | 3. Grant of Patent License. Subject to the terms and conditions of 75 | this License, each Contributor hereby grants to You a perpetual, 76 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 77 | (except as stated in this section) patent license to make, have made, 78 | use, offer to sell, sell, import, and otherwise transfer the Work, 79 | where such license applies only to those patent claims licensable 80 | by such Contributor that are necessarily infringed by their 81 | Contribution(s) alone or by combination of their Contribution(s) 82 | with the Work to which such Contribution(s) was submitted. If You 83 | institute patent litigation against any entity (including a 84 | cross-claim or counterclaim in a lawsuit) alleging that the Work 85 | or a Contribution incorporated within the Work constitutes direct 86 | or contributory patent infringement, then any patent licenses 87 | granted to You under this License for that Work shall terminate 88 | as of the date such litigation is filed. 89 | 90 | 4. Redistribution. You may reproduce and distribute copies of the 91 | Work or Derivative Works thereof in any medium, with or without 92 | modifications, and in Source or Object form, provided that You 93 | meet the following conditions: 94 | 95 | (a) You must give any other recipients of the Work or 96 | Derivative Works a copy of this License; and 97 | 98 | (b) You must cause any modified files to carry prominent notices 99 | stating that You changed the files; and 100 | 101 | (c) You must retain, in the Source form of any Derivative Works 102 | that You distribute, all copyright, patent, trademark, and 103 | attribution notices from the Source form of the Work, 104 | excluding those notices that do not pertain to any part of 105 | the Derivative Works; and 106 | 107 | (d) If the Work includes a "NOTICE" text file as part of its 108 | distribution, then any Derivative Works that You distribute must 109 | include a readable copy of the attribution notices contained 110 | within such NOTICE file, excluding those notices that do not 111 | pertain to any part of the Derivative Works, in at least one 112 | of the following places: within a NOTICE text file distributed 113 | as part of the Derivative Works; within the Source form or 114 | documentation, if provided along with the Derivative Works; or, 115 | within a display generated by the Derivative Works, if and 116 | wherever such third-party notices normally appear. The contents 117 | of the NOTICE file are for informational purposes only and 118 | do not modify the License. You may add Your own attribution 119 | notices within Derivative Works that You distribute, alongside 120 | or as an addendum to the NOTICE text from the Work, provided 121 | that such additional attribution notices cannot be construed 122 | as modifying the License. 123 | 124 | You may add Your own copyright statement to Your modifications and 125 | may provide additional or different license terms and conditions 126 | for use, reproduction, or distribution of Your modifications, or 127 | for any such Derivative Works as a whole, provided Your use, 128 | reproduction, and distribution of the Work otherwise complies with 129 | the conditions stated in this License. 130 | 131 | 5. Submission of Contributions. Unless You explicitly state otherwise, 132 | any Contribution intentionally submitted for inclusion in the Work 133 | by You to the Licensor shall be under the terms and conditions of 134 | this License, without any additional terms or conditions. 135 | Notwithstanding the above, nothing herein shall supersede or modify 136 | the terms of any separate license agreement you may have executed 137 | with Licensor regarding such Contributions. 138 | 139 | 6. Trademarks. This License does not grant permission to use the trade 140 | names, trademarks, service marks, or product names of the Licensor, 141 | except as required for reasonable and customary use in describing the 142 | origin of the Work and reproducing the content of the NOTICE file. 143 | 144 | 7. Disclaimer of Warranty. Unless required by applicable law or 145 | agreed to in writing, Licensor provides the Work (and each 146 | Contributor provides its Contributions) on an "AS IS" BASIS, 147 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or 148 | implied, including, without limitation, any warranties or conditions 149 | of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A 150 | PARTICULAR PURPOSE. You are solely responsible for determining the 151 | appropriateness of using or redistributing the Work and assume any 152 | risks associated with Your exercise of permissions under this License. 153 | 154 | 8. Limitation of Liability. In no event and under no legal theory, 155 | whether in tort (including negligence), contract, or otherwise, 156 | unless required by applicable law (such as deliberate and grossly 157 | negligent acts) or agreed to in writing, shall any Contributor be 158 | liable to You for damages, including any direct, indirect, special, 159 | incidental, or consequential damages of any character arising as a 160 | result of this License or out of the use or inability to use the 161 | Work (including but not limited to damages for loss of goodwill, 162 | work stoppage, computer failure or malfunction, or any and all 163 | other commercial damages or losses), even if such Contributor 164 | has been advised of the possibility of such damages. 165 | 166 | 9. Accepting Warranty or Additional Liability. While redistributing 167 | the Work or Derivative Works thereof, You may choose to offer, 168 | and charge a fee for, acceptance of support, warranty, indemnity, 169 | or other liability obligations and/or rights consistent with this 170 | License. However, in accepting such obligations, You may act only 171 | on Your own behalf and on Your sole responsibility, not on behalf 172 | of any other Contributor, and only if You agree to indemnify, 173 | defend, and hold each Contributor harmless for any liability 174 | incurred by, or claims asserted against, such Contributor by reason 175 | of your accepting any such warranty or additional liability. 176 | 177 | END OF TERMS AND CONDITIONS 178 | 179 | APPENDIX: How to apply the Apache License to your work. 180 | 181 | To apply the Apache License to your work, attach the following 182 | boilerplate notice, with the fields enclosed by brackets "[]" 183 | replaced with your own identifying information. (Don't include 184 | the brackets!) The text should be enclosed in the appropriate 185 | comment syntax for the file format. We also recommend that a 186 | file or class name and description of purpose be included on the 187 | same "printed page" as the copyright notice for easier 188 | identification within third-party archives. 189 | 190 | Copyright [yyyy] [name of copyright owner] 191 | 192 | Licensed under the Apache License, Version 2.0 (the "License"); 193 | you may not use this file except in compliance with the License. 194 | You may obtain a copy of the License at 195 | 196 | http://www.apache.org/licenses/LICENSE-2.0 197 | 198 | Unless required by applicable law or agreed to in writing, software 199 | distributed under the License is distributed on an "AS IS" BASIS, 200 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 201 | See the License for the specific language governing permissions and 202 | limitations under the License. 203 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # MIDAS 2 | 3 | This is an implementation of [MIDAS](https://github.com/bhatiasiddharth/MIDAS) - edge stream anomaly detection but implemented in Go. 4 | 5 | For more information about how it works, please checkout the [resources](#resources) section. 6 | 7 | ## Usage and installation 8 | 9 | [Read the docs](https://godoc.org/github.com/steve0hh/midas) 10 | 11 | Checkout the `examples` folder for detailed instructions 12 | 13 | ```go 14 | import ( 15 | "github.com/steve0hh/midas" 16 | "fmt" 17 | ) 18 | 19 | func main () { 20 | src := []int{2,2,3,3,5,5,7,11,1,2} 21 | dst := []int{3,3,4,4,9,9,73,74,75,76} 22 | times := []int{1,1,2,2,2,2,2,2,2,2} 23 | 24 | 25 | // using function to score the edges 26 | midasAnormScore := midas.Midas(src, dst, times, 2, 769) 27 | midasRAnormScore := midas.MidasR(src, dst, times, 2, 769, 0.6) 28 | 29 | fmt.Println(midasAnormScore) 30 | fmt.Println(midasRAnormScore) 31 | 32 | // using sklearn FitPredict api for midas 33 | m := midas.NewMidasModel(2, 769, 9460) 34 | fmt.Println(m.FitPredict(2,3,1)) 35 | fmt.Println(m.FitPredict(2,3,1)) 36 | fmt.Println(m.FitPredict(3,4,2)) 37 | 38 | // using sklearn FitPredict api for midasR 39 | mr := midas.NewMidasRModel(2, 769, 9460, 0.6) 40 | fmt.Println(mr.FitPredict(2,3,1)) 41 | fmt.Println(mr.FitPredict(2,3,1)) 42 | fmt.Println(mr.FitPredict(3,4,2)) 43 | } 44 | ``` 45 | 46 | ## Resources 47 | 48 | - [Orginal implementation of MIDAS in C++](https://github.com/bhatiasiddharth/MIDAS) 49 | - [MIDAS: Microcluster-Based Detector of Anomalies in Edge Streams](https://www.comp.nus.edu.sg/~sbhatia/assets/pdf/midas.pdf) 50 | 51 | 52 | ## Citation 53 | If you use this code for your research, please consider citing the original paper. 54 | 55 | ``` 56 | @article{bhatia2019midas, 57 | title={MIDAS: Microcluster-Based Detector of Anomalies in Edge Streams}, 58 | author={Bhatia, Siddharth and Hooi, Bryan and Yoon, Minji and Shin, Kijung and Faloutsos, Christos}, 59 | journal={arXiv preprint arXiv:1911.04464}, 60 | year={2019} 61 | } 62 | ``` 63 | 64 | ## Contributing 65 | 66 | Everyone is encouraged to help improve this project. Here are a few ways you can help: 67 | 68 | - Report bugs 69 | - Fix bugs and submit pull requests 70 | - Write, clarify, or fix documentation 71 | - Suggest or add new features 72 | 73 | ## TODOs 74 | 75 | - [ ] Godocs documentation 76 | - [x] Add sklearn/keras fit & predict API 77 | - [ ] More examples and tests 78 | - [ ] Make code more efficient 79 | -------------------------------------------------------------------------------- /edgehash.go: -------------------------------------------------------------------------------- 1 | package midas 2 | 3 | import ( 4 | "math" 5 | "math/rand" 6 | ) 7 | 8 | type EdgeHash struct { 9 | numRows int 10 | numBuckets int 11 | m int 12 | hashA []int 13 | hashB []int 14 | count [][]float64 15 | } 16 | 17 | func randomIntSlice(min int, max int, size int) []int { 18 | a := make([]int, size) 19 | for i, _ := range a { 20 | a[i] = rand.Intn(max-min) + min 21 | } 22 | return a 23 | } 24 | 25 | func NewEdgeHash(numRows int, numBuckets int, m int) *EdgeHash { 26 | count := make([][]float64, numRows) 27 | for i, _ := range count { 28 | count[i] = make([]float64, numBuckets) 29 | } 30 | 31 | return &EdgeHash{ 32 | numRows: numRows, 33 | numBuckets: numBuckets, 34 | m: m, 35 | hashA: randomIntSlice(1, numBuckets, numRows), 36 | hashB: randomIntSlice(0, numBuckets, numRows), 37 | count: count, 38 | } 39 | } 40 | 41 | func (e *EdgeHash) Hash(a int, b int, i int) int { 42 | resid := ((a+e.m*b)*e.hashA[i] + e.hashB[i]) % e.numBuckets 43 | if resid < 0 { 44 | return resid + e.numBuckets 45 | } else { 46 | return resid 47 | } 48 | } 49 | 50 | func (e *EdgeHash) Insert(a int, b int, weight float64) { 51 | for i := 0; i < e.numRows; i++ { 52 | bucket := e.Hash(a, b, i) 53 | e.count[i][bucket] += weight 54 | } 55 | } 56 | 57 | func (e *EdgeHash) GetCount(a int, b int) float64 { 58 | bucket := e.Hash(a, b, 0) 59 | minCount := e.count[0][bucket] 60 | for i := 1; i < e.numRows; i++ { 61 | bucket = e.Hash(a, b, i) 62 | minCount = math.Min(minCount, e.count[i][bucket]) 63 | } 64 | 65 | return minCount 66 | } 67 | 68 | func (e *EdgeHash) Clear() { 69 | for i, row := range e.count { 70 | for j, _ := range row { 71 | e.count[i][j] = 0 72 | } 73 | } 74 | } 75 | 76 | func (e *EdgeHash) Lower(factor float64) { 77 | for i, row := range e.count { 78 | for j, _ := range row { 79 | e.count[i][j] *= factor 80 | } 81 | } 82 | } 83 | -------------------------------------------------------------------------------- /edgehash_test.go: -------------------------------------------------------------------------------- 1 | package midas 2 | 3 | import ( 4 | "testing" 5 | ) 6 | 7 | func TestEdgenodeClear(t *testing.T) { 8 | a := NewEdgeHash(10, 4, 0) 9 | a.count[0][0] = 2 10 | a.Clear() 11 | if a.count[0][0] != 0 { 12 | t.Error("table not cleared") 13 | } 14 | } 15 | -------------------------------------------------------------------------------- /examples/README.md: -------------------------------------------------------------------------------- 1 | ## Running main.go 2 | 3 | **Download data** 4 | 5 | ```bash 6 | $ # Download darpa dataset in MIDAS format 7 | $ curl -O https://www.comp.nus.edu.sg/~sbhatia/assets/datasets/darpa_midas.csv 8 | $ # Download original darpa dataset 9 | $ curl -O https://www.comp.nus.edu.sg/~sbhatia/assets/datasets/darpa_original.csv 10 | ``` 11 | 12 | ```bash 13 | $ go run main.go > scores.txt 14 | $ python auc.py # checking the AUC 15 | AUC: 0.9171735033552131 16 | 17 | $ go run main.go --norelations > scores.txt # use midas instead of midasR 18 | $ python auc.py # checking the AUC 19 | AUC: 0.9467124341220924 20 | ``` 21 | -------------------------------------------------------------------------------- /examples/auc.py: -------------------------------------------------------------------------------- 1 | from sklearn import metrics 2 | from sklearn.metrics import precision_recall_curve 3 | import pandas as pd 4 | 5 | data = pd.read_csv("darpa_original.csv", names=['src', 'dst', 'time', 'label']) 6 | is_anom = data.label != '-' 7 | 8 | scores = pd.read_csv("scores.txt", header=None, squeeze=True) 9 | fpr, tpr, _ = metrics.roc_curve(is_anom, scores) 10 | auc = metrics.roc_auc_score(is_anom, scores) 11 | precision, recall, _ = metrics.precision_recall_curve(is_anom, scores) 12 | print("AUC: ", auc) 13 | -------------------------------------------------------------------------------- /examples/main.go: -------------------------------------------------------------------------------- 1 | package main 2 | 3 | import ( 4 | "encoding/csv" 5 | "os" 6 | "log" 7 | "bufio" 8 | "io" 9 | "strconv" 10 | "github.com/steve0hh/midas" 11 | "fmt" 12 | "flag" 13 | ) 14 | 15 | func main() { 16 | norelations := flag.Bool("norelations", false, "Run MIDAS instead of MIDAS-R") 17 | flag.Parse() 18 | 19 | f, err:= os.Open("./darpa_midas.csv") 20 | if err != nil { 21 | log.Fatal(err) 22 | } 23 | 24 | buff := bufio.NewReader(f) 25 | r := csv.NewReader(buff) 26 | 27 | src := []int{} 28 | dst := []int{} 29 | times := []int{} 30 | 31 | for { 32 | record, err := r.Read() 33 | if err == io.EOF { 34 | break 35 | } 36 | if err != nil { 37 | log.Fatal(err) 38 | } 39 | 40 | s, err := strconv.ParseInt(record[0], 10, 0) 41 | if err != nil { 42 | log.Fatal(err) 43 | } 44 | 45 | d, err := strconv.ParseInt(record[1],10, 0) 46 | if err != nil { 47 | log.Fatal(err) 48 | } 49 | 50 | t, err := strconv.ParseInt(record[2], 10,0) 51 | if err != nil { 52 | log.Fatal(err) 53 | } 54 | 55 | src = append(src, int(s)) 56 | dst = append(dst, int(d)) 57 | times = append(times, int(t)) 58 | } 59 | 60 | var anormScore []float64 61 | if *norelations { 62 | anormScore = midas.Midas(src, dst, times, 2, 769) 63 | }else{ 64 | anormScore = midas.MidasR(src, dst, times, 2, 769, 0.6) 65 | } 66 | 67 | for _, v := range anormScore { 68 | fmt.Println(v) 69 | } 70 | } 71 | -------------------------------------------------------------------------------- /midas.go: -------------------------------------------------------------------------------- 1 | package midas 2 | 3 | import ( 4 | "math" 5 | ) 6 | 7 | func max(a float64, b float64) float64 { 8 | if a > b { 9 | return a 10 | } 11 | return b 12 | } 13 | 14 | func biggest(values []int) int { 15 | b := values[0] 16 | for _, v := range values { 17 | if b < v { 18 | b = v 19 | } 20 | } 21 | return b 22 | } 23 | 24 | 25 | type MidasModel struct { 26 | curCount *EdgeHash 27 | totalCount *EdgeHash 28 | curT int 29 | } 30 | 31 | // Creates a new Midas struct that will enable the use of 32 | // Fit and FitPredict API. 33 | func NewMidasModel(numRows int, numBuckets int, m int) *MidasModel { 34 | return &MidasModel{ 35 | totalCount: NewEdgeHash(numRows, numBuckets, m), 36 | curCount: NewEdgeHash(numRows, numBuckets, m), 37 | curT: 1, 38 | } 39 | } 40 | 41 | // Fit the source, destination and time to the MidasModel struct 42 | // similar to the sklearn api 43 | func (m *MidasModel) Fit(src, dst, time int){ 44 | if time > m.curT { 45 | m.curCount.Clear() 46 | m.curT = time 47 | } 48 | m.curCount.Insert(src, dst, 1) 49 | m.totalCount.Insert(src, dst, 1) 50 | } 51 | 52 | // Fit the source, destination and time to the MidasModel struct and 53 | // calculate the anomaly score 54 | func (m *MidasModel) FitPredict(src, dst, time int) float64{ 55 | m.Fit(src, dst, time) 56 | curMean := m.totalCount.GetCount(src, dst) / float64(m.curT) 57 | sqerr := math.Pow(m.curCount.GetCount(src, dst)-curMean, 2) 58 | var curScore float64 59 | if m.curT == 1 { 60 | curScore = 0 61 | } else { 62 | curScore = sqerr/curMean + sqerr/(curMean*(float64(m.curT)-1)) 63 | } 64 | return curScore 65 | } 66 | 67 | // Takes in a list of source, destination and times to do anomaly score of each edge 68 | // This function mirrors the implementation of https://github.com/bhatiasiddharth/MIDAS 69 | func Midas(src []int, dst []int, times []int, numRows int, numBuckets int) []float64 { 70 | m := biggest(src) 71 | curCount := NewEdgeHash(numRows, numBuckets, m) 72 | totalCount := NewEdgeHash(numRows, numBuckets, m) 73 | anomScore := make([]float64, len(src)) 74 | curT := 1 75 | for i, _ := range src { 76 | if i == 0 || times[i] > curT { 77 | curCount.Clear() 78 | curT = times[i] 79 | } 80 | 81 | curSrc := src[i] 82 | curDst := dst[i] 83 | curCount.Insert(curSrc, curDst, 1) 84 | totalCount.Insert(curSrc, curDst, 1) 85 | curMean := totalCount.GetCount(curSrc, curDst) / float64(curT) 86 | sqerr := math.Pow(curCount.GetCount(curSrc, curDst)-curMean, 2) 87 | 88 | var curScore float64 89 | if curT == 1 { 90 | curScore = 0 91 | } else { 92 | curScore = sqerr/curMean + sqerr/(curMean*(float64(curT)-1)) 93 | } 94 | anomScore[i] = curScore 95 | } 96 | return anomScore 97 | } 98 | -------------------------------------------------------------------------------- /midasr.go: -------------------------------------------------------------------------------- 1 | package midas 2 | 3 | import ( 4 | "math" 5 | ) 6 | 7 | func countsToAnom(tot float64, cur float64, curT int) float64 { 8 | curMean := tot / cur 9 | sqerr := math.Pow(max(0, cur-curMean), 2) 10 | return (sqerr/curMean + sqerr/(curMean*max(1.0, float64(curT-1.0)))) 11 | } 12 | 13 | type MidasRModel struct { 14 | curCount *EdgeHash 15 | totalCount *EdgeHash 16 | srcScore *NodeHash 17 | dstScore *NodeHash 18 | srcTotal *NodeHash 19 | dstTotal *NodeHash 20 | curT int 21 | factor float64 22 | } 23 | 24 | func NewMidasRModel(numRows int, numBuckets int, m int, factor float64) *MidasRModel { 25 | return &MidasRModel{ 26 | curCount: NewEdgeHash(numRows, numBuckets, m), 27 | totalCount: NewEdgeHash(numRows, numBuckets, m), 28 | srcScore: NewNodeHash(numRows, numBuckets), 29 | dstScore: NewNodeHash(numRows, numBuckets), 30 | srcTotal: NewNodeHash(numRows, numBuckets), 31 | dstTotal: NewNodeHash(numRows, numBuckets), 32 | curT: 1, 33 | factor: factor, 34 | } 35 | } 36 | 37 | func (m *MidasRModel) Fit(src, dst, time int) { 38 | if time > m.curT { 39 | m.curCount.Lower(m.factor) 40 | m.srcScore.Lower(m.factor) 41 | m.dstScore.Lower(m.factor) 42 | m.curT = time 43 | } 44 | m.curCount.Insert(src, dst, 1) 45 | m.totalCount.Insert(src, dst, 1) 46 | m.srcScore.Insert(src, 1) 47 | m.dstScore.Insert(src, 1) 48 | m.srcTotal.Insert(src, 1) 49 | m.dstTotal.Insert(dst, 1) 50 | } 51 | 52 | func (m *MidasRModel) FitPredict(src, dst, time int) float64 { 53 | m.Fit(src, dst, time) 54 | score := countsToAnom(m.totalCount.GetCount(src, dst), m.curCount.GetCount(src, dst), time) 55 | scoreSrc := countsToAnom(m.srcTotal.GetCount(src), m.srcScore.GetCount(src), time) 56 | scoreDst := countsToAnom(m.dstTotal.GetCount(dst), m.dstScore.GetCount(dst), time) 57 | combinedScore := max(max(scoreSrc, scoreDst), score) 58 | return math.Log(1 + combinedScore) 59 | } 60 | 61 | func MidasR(src []int, dst []int, times []int, numRows int, numBuckets int, factor float64) []float64 { 62 | m := biggest(src) 63 | curCount := NewEdgeHash(numRows, numBuckets, m) 64 | totalCount := NewEdgeHash(numRows, numBuckets, m) 65 | 66 | srcScore := NewNodeHash(numRows, numBuckets) 67 | dstScore := NewNodeHash(numRows, numBuckets) 68 | srcTotal := NewNodeHash(numRows, numBuckets) 69 | dstTotal := NewNodeHash(numRows, numBuckets) 70 | 71 | anomScore := make([]float64, len(src)) 72 | var curT, curSrc, curDst int 73 | curT = 1 74 | var curScore, curScoreSrc, curScoreDst, combinedScore float64 75 | 76 | for i, _ := range src { 77 | if i == 0 || times[i] > curT { 78 | curCount.Lower(factor) 79 | srcScore.Lower(factor) 80 | dstScore.Lower(factor) 81 | curT = times[i] 82 | } 83 | 84 | curSrc = src[i] 85 | curDst = dst[i] 86 | curCount.Insert(curSrc, curDst, 1) 87 | totalCount.Insert(curSrc, curDst, 1) 88 | srcScore.Insert(curSrc, 1) 89 | dstScore.Insert(curDst, 1) 90 | srcTotal.Insert(curSrc, 1) 91 | dstTotal.Insert(curDst, 1) 92 | curScore = countsToAnom(totalCount.GetCount(curSrc, curDst), curCount.GetCount(curSrc, curDst), curT) 93 | curScoreSrc = countsToAnom(srcTotal.GetCount(curSrc), srcScore.GetCount(curSrc), curT) 94 | curScoreDst = countsToAnom(dstTotal.GetCount(curDst), dstScore.GetCount(curDst), curT) 95 | combinedScore = max(max(curScoreSrc, curScoreDst), curScore) 96 | anomScore[i] = math.Log(1 + combinedScore) 97 | } 98 | return anomScore 99 | } 100 | -------------------------------------------------------------------------------- /nodehash.go: -------------------------------------------------------------------------------- 1 | package midas 2 | 3 | import "math" 4 | 5 | type NodeHash struct { 6 | numRows int 7 | numBuckets int 8 | hashA []int 9 | hashB []int 10 | count [][]float64 11 | } 12 | 13 | func NewNodeHash(numRows int, numBuckets int) *NodeHash { 14 | count := make([][]float64, numRows) 15 | 16 | for i, _ := range count { 17 | count[i] = make([]float64, numBuckets) 18 | } 19 | 20 | return &NodeHash{ 21 | numRows: numRows, 22 | numBuckets: numBuckets, 23 | hashA: randomIntSlice(1, numBuckets, numRows), 24 | hashB: randomIntSlice(0, numBuckets, numRows), 25 | count: count, 26 | } 27 | } 28 | 29 | func (n *NodeHash) Hash(a int, i int) int { 30 | resid := (a*n.hashA[i] + n.hashB[i]) % n.numBuckets 31 | if resid < 0 { 32 | return resid + n.numBuckets 33 | } else { 34 | return resid 35 | } 36 | } 37 | 38 | func (n *NodeHash) Insert(a int, weight float64) { 39 | for i := 0; i < n.numRows; i++ { 40 | bucket := n.Hash(a, i) 41 | n.count[i][bucket] += weight 42 | } 43 | } 44 | 45 | func (n *NodeHash) GetCount(a int) float64 { 46 | bucket := n.Hash(a, 0) 47 | minCount := n.count[0][bucket] 48 | for i := 1; i < n.numRows; i++ { 49 | bucket = n.Hash(a, i) 50 | minCount = math.Min(minCount, n.count[i][bucket]) 51 | } 52 | return minCount 53 | } 54 | 55 | func (n *NodeHash) Clear() { 56 | for i, row := range n.count { 57 | for j, _ := range row { 58 | n.count[i][j] = 0 59 | } 60 | } 61 | } 62 | 63 | func (n *NodeHash) Lower(factor float64) { 64 | for i, row := range n.count { 65 | for j, _ := range row { 66 | n.count[i][j] *= factor 67 | } 68 | } 69 | } 70 | -------------------------------------------------------------------------------- /nodehash_test.go: -------------------------------------------------------------------------------- 1 | package midas 2 | 3 | import ( 4 | "testing" 5 | ) 6 | 7 | func TestNodehashClear(t *testing.T) { 8 | a := NewNodeHash(10, 2) 9 | a.count[0][0] = 2 10 | a.Clear() 11 | if a.count[0][0] != 0 { 12 | t.Error("table not cleared") 13 | } 14 | } 15 | --------------------------------------------------------------------------------