├── codes ├── Vector.h ├── Graph.cpp ├── Graph.h ├── Edgeset.h ├── Embedding.h ├── Edge.h ├── AnomalyDetection.h ├── main.cpp ├── Edgeset.cpp ├── Embedding.cpp ├── Vector.cpp └── AnomalyDetection.cpp ├── METIS ├── manual.pdf ├── metis.lib └── metis.h └── README.md /codes/Vector.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hurenjun/EmbeddingAnomalyDetection/HEAD/codes/Vector.h -------------------------------------------------------------------------------- /METIS/manual.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hurenjun/EmbeddingAnomalyDetection/HEAD/METIS/manual.pdf -------------------------------------------------------------------------------- /METIS/metis.lib: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hurenjun/EmbeddingAnomalyDetection/HEAD/METIS/metis.lib -------------------------------------------------------------------------------- /codes/Graph.cpp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hurenjun/EmbeddingAnomalyDetection/HEAD/codes/Graph.cpp -------------------------------------------------------------------------------- /codes/Graph.h: -------------------------------------------------------------------------------- 1 | /* 2 | * Graph class firstly load a graph from input file, 3 | * then give a community structure initialization according to type: 4 | * 0: random 5 | * 1: heuristic (graph partition) 6 | * 7 | * cmties[i] : the ith community 8 | * cmties[i][0]: size of ith community 9 | * cmties[i][1]--cmties[i][size]: nodes of ith community 10 | */ 11 | 12 | #pragma once 13 | #include "Edge.h" 14 | #include 15 | #include 16 | 17 | using namespace std; 18 | 19 | class Graph 20 | { 21 | public: 22 | Graph(int, string &, int); 23 | ~Graph(); 24 | void printCmties(); 25 | int getNodeCount() const; 26 | int getEdgeCount() const; 27 | int getCmtyCount() const; 28 | Edge* getEdges() const; 29 | int ** getCmties() const; 30 | 31 | private: 32 | int nodeCount, edgeCount, cmtyCount; 33 | Edge *edges; 34 | int **cmties; 35 | 36 | void loadGraph(string &); 37 | void randonPartition(); 38 | int findCmty(int, int[]); 39 | void heuristicPartition(); 40 | void metisPartititon(); 41 | void randomEdgeset(); 42 | int getInterGroupEdgeCount(int *); 43 | }; 44 | 45 | 46 | -------------------------------------------------------------------------------- /codes/Edgeset.h: -------------------------------------------------------------------------------- 1 | /* 2 | * Edgeset: store a array of either edges or non-edges 3 | * and implement the stress computing method for corresponding 4 | * edgeset or non-edgeset 5 | */ 6 | 7 | #pragma once 8 | #include "Edge.h" 9 | #include "Graph.h" 10 | #include "Vector.h" 11 | #include 12 | #include 13 | 14 | using namespace std; 15 | 16 | class Edgeset{ 17 | public: 18 | Edgeset(const Graph & ); 19 | Edgeset(const Edgeset *); 20 | Edgeset(const Edgeset &, int **, int, int, int); 21 | ~Edgeset(); 22 | void printSet(); 23 | Edge getEdge(int) const; 24 | int getSize() const; 25 | double getBalance() const; 26 | double getStress(Vector *) const; 27 | void printStress(Vector *, ofstream &); 28 | private: 29 | int type = 0; 30 | bool sampling = false; 31 | double balance = 0; 32 | Edge *set = NULL; 33 | // edgeCount: edge count of the original graph 34 | // Not Size Of Set 35 | int size, nodeCount, edgeCount, cmtyCount; 36 | 37 | void randomNonedgeSet(const Edgeset &, int **); 38 | void getFullNonedgeSet(const Edgeset &); 39 | bool containEdge(int, int) const; 40 | int getSamplingSize(); 41 | void copyEdge(Edge *); 42 | }; 43 | -------------------------------------------------------------------------------- /codes/Embedding.h: -------------------------------------------------------------------------------- 1 | // Class Embedding implement the main procedure of gradient descent 2 | // Things it do: 3 | // 1). give each node a vector 4 | // 2). optimize the objective function, whose input is all variables in vectors 5 | // Gradient Descent: line search, backtracking that holdd Armijo rule 6 | 7 | #pragma once 8 | #include "Edgeset.h" 9 | #include "Vector.h" 10 | class Embedding 11 | { 12 | public: 13 | Embedding(int, double, double, double, string &, int, int); 14 | Embedding(int, double, double, double, string &, int); 15 | ~Embedding(); 16 | void gradientDescent(); 17 | int getNodeCount() const; 18 | int getEdgeCount() const; 19 | int getCmtyCount() const; 20 | Edgeset* getEdges() const; 21 | Vector* getVectors() const; 22 | void PrintVectors() const; 23 | private: 24 | int nodeCount, edgeCount, cmntCount; 25 | int para_k; 26 | Edgeset *edges = NULL, *nonedges = NULL; 27 | Edge *doubleEdge, *doubleNonedge; 28 | Vector *vectors = NULL; 29 | int **cmnts = NULL; 30 | int * node_comm = NULL; 31 | int iteration = 0; 32 | double *tempDrct = NULL; 33 | double balance = 0; 34 | double alpha = 0.01, beta = 0.8, t = 1, eps = 0.001; 35 | 36 | void initialVectors(); 37 | double moveStepSize(int, double); 38 | void calDescentDirection(); 39 | void iterateVectors(double); 40 | double objectiveFunction(); 41 | void copyNextToCurrent(); 42 | void objectiveFunctionDetail(); 43 | }; 44 | 45 | -------------------------------------------------------------------------------- /codes/Edge.h: -------------------------------------------------------------------------------- 1 | 2 | /* 3 | * Constructor of Edge will automaticly give both start and end -1. 4 | * Create an edge with start < end should use setPointWithSort, 5 | * while create an edge with no constrains should use a setPoint after constructor. 6 | */ 7 | #pragma once 8 | 9 | struct Edge 10 | { 11 | int start, end; 12 | void intraSort() { 13 | if (this->start > this->end) { 14 | int temp = this->start; 15 | this->start = this->end; 16 | this->end = temp; 17 | } 18 | } 19 | 20 | Edge() { 21 | this->start = -1; 22 | this->end = -1; 23 | } 24 | Edge(const int s, const int t) { 25 | this->start = s; 26 | this->end = t; 27 | this->intraSort(); 28 | } 29 | 30 | int getStart() const { 31 | return this->start; 32 | } 33 | int getEnd() const { 34 | return this->end; 35 | } 36 | 37 | void setPoint(const int start, const int end) { 38 | this->start = start; 39 | this->end = end; 40 | } 41 | void setPointWithSort(const int start, const int end) { 42 | this->start = start; 43 | this->end = end; 44 | this->intraSort(); 45 | } 46 | 47 | bool operator < (const Edge & e) const { 48 | if ((start < e.start) || (start == e.start && end < e.end)) { 49 | return true; 50 | } 51 | else{ 52 | return false; 53 | } 54 | } 55 | bool operator == (const Edge & e) const { 56 | if (start == e.start && end == e.end) { 57 | return true; 58 | } 59 | return false; 60 | } 61 | }; -------------------------------------------------------------------------------- /codes/AnomalyDetection.h: -------------------------------------------------------------------------------- 1 | // Class AnomalyDetection load the embedding vectors of a graph 2 | // and then use a specific metric for defined anomaly detection 3 | // 4 | // Matric Z-value > 3: 5 | // compute the z-value of all node-stresses, and labelled nodes whose 6 | // node-stress z-value > 0 as anomalies 7 | 8 | #include "Embedding.h" 9 | #include "Edgeset.h" 10 | #include "Vector.h" 11 | #include 12 | 13 | struct Sorted_Dou_Int_Pair { 14 | double d; 15 | int i; 16 | void SetValue(const int i_i, const double i_d) { 17 | i = i_i; 18 | d = i_d; 19 | } 20 | bool operator < (const Sorted_Dou_Int_Pair & i_other) const { // > actually 21 | if (d > i_other.d) 22 | return true; 23 | return false; 24 | } 25 | }; 26 | 27 | struct IID { 28 | int s, t; double d; 29 | void SetValue(const int ss, const int tt, const double dd) { 30 | s = ss; t = tt; d = dd; 31 | } 32 | bool operator < (const IID & i_other) const { // > actually 33 | if (d > i_other.d) 34 | return true; 35 | return false; 36 | } 37 | }; 38 | 39 | class AnomalyDetection 40 | { 41 | public: 42 | AnomalyDetection(Embedding *, const double); 43 | ~AnomalyDetection(); 44 | void GetPreRecCurve(const string); 45 | double GetF1(const string, const double); 46 | void TopAnomalies(const string, const double); 47 | void TopK(const int); 48 | void RemoveEdge(const string, const string); 49 | void RemoveNode(const string, const string, const double); 50 | void GraphRewrite(const string, const string, const double); 51 | private: 52 | int nodeCount, edgeCount, cmntCount; 53 | double theta; 54 | // create own edgeset for anomaly detection 55 | // and only copy a pointer of vectors 56 | Edgeset *edges = NULL; 57 | Vector *vectors = NULL; 58 | double *nodeStress = NULL; 59 | double * avg_node_stress = NULL; 60 | int *deg, *link_comm, *larg_comm; 61 | int ** node_comm = NULL; 62 | double **node_comm_w = NULL; 63 | int * ncc = NULL; 64 | double avgStress, stdDev, min_total, min_avg, max_total, max_avg; 65 | Sorted_Dou_Int_Pair * anomaly_deg = NULL; 66 | int stress_dis[105]; 67 | void detectAnomaly(); 68 | double getNodeStress(int); 69 | int getEdgeIndex(int, int) const; 70 | void PrintDetailNodeStress(const int, ofstream &); 71 | void GetNodeComm(); 72 | double LargNeiComm(const int) const; 73 | void PrintLargNeiComm(const int, ofstream &) const; 74 | double LenOfVD(const int, const int) const; 75 | }; 76 | -------------------------------------------------------------------------------- /codes/main.cpp: -------------------------------------------------------------------------------- 1 | #include "Graph.h" 2 | #include "Edgeset.h" 3 | #include "Vector.h" 4 | #include "Embedding.h" 5 | #include "AnomalyDetection.h" 6 | #include 7 | #include 8 | #include 9 | #include 10 | #include 11 | #include 12 | 13 | using namespace std; 14 | 15 | void printAnomaly(string & target, vector & anomalies); 16 | int main(int argc, char *argv[]) { 17 | if (argc != 7) { 18 | cout << "Usage: filename d thre eps pa rg" << endl; 19 | cout << "\tfilename: filename of network, (no '.txt' suffix);" << endl; 20 | cout << "\td: number of dimensions, n/500 by default;" << endl; 21 | cout << "\tthre: parameter thre, used in AScore for detecting anomalies;" << endl; 22 | cout << "\teps: parameter eps, stop condition of gradient descent, 0.001 by default;" << endl; 23 | cout << "\tpa: binary number, 1 print anomaly, 0 not;" << endl; 24 | cout << "\trg: binary number, 1 rewrite graph by deleting anomalies, 0 not;" << endl; 25 | exit(1); 26 | } 27 | 28 | cout << "filename: " << argv[1] << endl; 29 | string filename(argv[1]); 30 | int method = 2; 31 | cout << "d = " << argv[2] << endl; 32 | string dStr(argv[2]); 33 | int d = atoi(argv[2]); 34 | cout << "thre = " << argv[3] << endl; 35 | string thre_str(argv[3]); 36 | double thre = atof(argv[3]); 37 | cout << "eps = " << argv[4] << endl; 38 | string epsStr(argv[4]); 39 | double eps = atof(argv[4]); 40 | cout << "pa = " << argv[5] << endl; 41 | string paStr(argv[5]); 42 | double pa = atof(argv[5]); 43 | cout << "rg = " << argv[6] << endl; 44 | string rgStr(argv[6]); 45 | double rg = atof(argv[6]); 46 | 47 | srand((unsigned)time(NULL)); 48 | time_t start = clock(); 49 | try { 50 | Embedding embd(method, 0.04, 0.1, eps, filename + ".txt", d); 51 | embd.gradientDescent(); 52 | AnomalyDetection ad(&embd, 0.10); 53 | //ad.GetPreRecCurve(filename); 54 | cout << "F1: " << ad.GetF1(filename, thre) << endl; 55 | cout << "Total time: " << (clock() - start) / 1000 << endl; 56 | 57 | if (pa == 1) { // rewrite graph by removing edges with large stress 58 | string target = filename + "-" + dStr + '-' + thre_str + "-pa.txt"; 59 | ad.RemoveNode(filename + ".txt", target, thre); 60 | } 61 | if (rg == 1) { 62 | string target = filename + "-" + dStr + '-' + thre_str + "-rg.txt"; 63 | ad.GraphRewrite(filename + ".txt", target, thre); 64 | } 65 | cout << endl; 66 | } 67 | catch (exception e) { 68 | cerr << e.what() << endl; 69 | exit(1); 70 | } 71 | } 72 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # EmbeddingAnomalyDetection 2 | 3 | # Codes for paper "An Embedding Approach to Anomaly Detection.", ICDE, 2016 4 | 5 | ## Usage: command line parameters 6 | filename: filename of network (no '.txt' suffix), e.g., input 'network' for 'network.txt'; 7 | 8 | d: number of dimensions, n/500 by default; 9 | 10 | thre: parameter thre, used in AScore for detecting anomalies; 11 | 12 | eps: parameter eps, stop condition of gradient descent, 0.001 by default; 13 | 14 | pa: binary number, 1 print anomaly, 0 not; 15 | 16 | rg: binary number, 1 rewrite graph by deleting anomalies, 0 not; 17 | 18 | ## network file 19 | first line: n m (#nodes & #edges) 20 | 21 | following m lines: s t (end points of an edge) (The indices of nodes start with 0. Only one edge of each node pair needs to be included in the edge list.) 22 | 23 | ## ground-truth of anomalies 24 | If networks have ground-truth of anomalies, the filename of the ground-truth should by [filename]-anomaly.txt 25 | 26 | E.g., the network filename is 'network.txt', the ground-truth should be 'network-anomaly.txt' 27 | 28 | ## pa file (print anomaly) 29 | first line: #anomalies 30 | 31 | following k lines: node id of an anomaly 32 | 33 | ## rg file (rewrite graph) 34 | The format is the same to network file. 35 | 36 | The node indices are reordered, i.e., indices of anomalies are used by other nodes. 37 | 38 | E.g., original network has 3 edges: <0,1> <0,2> <1,2> 39 | 40 | if 0 is detected as an anomaly, the rg network should only have 1 edge: <0,1>, where the remaining nodes are reordered. 41 | 42 | ## external library 43 | We use the METIS library for graph partitioning. 44 | 45 | The deployment of METIS for MS Visual Studio in x64 platform under Release mode is as follows: 46 | 47 | 1. Open project Property Page; 48 | 49 | 2. Configuration Properties -> VC++ Directories, add the directory containing 'metis.h' & 'metis.lib' into "Include Directories" and "Library Directories"; 50 | 51 | 3. Configuration Properties -> Linker -> Input, add metis.lib into "Additional Dependencies" 52 | 53 | if MSVS reports the unresolved external symbol error, please refer to the following page for help. 54 | https://stackoverflow.com/questions/30412951/unresolved-external-symbol-imp-fprintf-and-imp-iob-func-sdl2/36504365#36504365 55 | 56 | For Linux OS users, please follow the guides in the homepage of METIS. 57 | http://glaros.dtc.umn.edu/gkhome/metis/metis/overview 58 | 59 | ## baselines 60 | 1. ABC: Adaptive Betweenness Centrality 61 | 62 | Reference: Yuichi Yoshada. Almost Linear-Time Algorithms for Adaptive Betweenness Centrality using Hupergraph Sketches. In KDD, 2014. 63 | 64 | 2. OddBall 65 | 66 | Reference: Leman Akoglu, Mary McGlohon, Christos Faloutsos. oddball: Spotting Anomalies in Weighted Graphs. In PAKDD, 2010. 67 | 68 | 3. MDS 69 | 70 | Reference: V. de Silva and J. B. Tenenbaum. Global versus local methods in nonlinear dimensionality reduction. In NIPS, 2002. 71 | -------------------------------------------------------------------------------- /codes/Edgeset.cpp: -------------------------------------------------------------------------------- 1 | #include "Edgeset.h" 2 | #include "Vector.h" 3 | #include 4 | #include 5 | 6 | //constructor of edge set 7 | Edgeset::Edgeset(const Graph & g) { 8 | this->size = g.getEdgeCount(); 9 | this->nodeCount = g.getNodeCount(); 10 | this->cmtyCount = g.getCmtyCount(); 11 | this->edgeCount = g.getEdgeCount(); 12 | this->set = new Edge[this->size]; 13 | this->copyEdge(g.getEdges()); 14 | sort(set, set + size); 15 | } 16 | 17 | //constructor of double edge set 18 | Edgeset::Edgeset(const Edgeset *single) { 19 | /* 20 | * This constructor 21 | * regard the network as a directed one, so double edges and sort it 22 | * it will be quite convenient when extracting edges that connecting with some node 23 | * 24 | * we generate Edge e here by 'setPoint(s, t)' method which do not have a intra-sort process 25 | */ 26 | this->size = single->size * 2; 27 | this->nodeCount = single->nodeCount; 28 | this->cmtyCount = single->cmtyCount; 29 | this->edgeCount = single->edgeCount; 30 | this->set = new Edge[this->size]; 31 | 32 | for (int i = 0; i < single->size; i++) { 33 | this->set[i * 2].setPoint(single->set[i].getStart(), single->set[i].getEnd()); 34 | this->set[i * 2 + 1].setPoint(single->set[i].getEnd(), single->set[i].getStart()); 35 | } 36 | sort(set, set + size); 37 | } 38 | 39 | //constructor of non-edge set 40 | Edgeset::Edgeset(const Edgeset & edges, int ** comm, int nodeCount, int edgeCount, int cmtyCount) { 41 | this->type = 1; 42 | this->nodeCount = nodeCount; 43 | this->edgeCount = edgeCount; 44 | this->cmtyCount = cmtyCount; 45 | //this->sampling = (this->nodeCount <= 1000) ? false : true; 46 | this->sampling = true; 47 | if (!this->sampling) { 48 | this->size = this->nodeCount * (this->nodeCount - 1) / 2 - edges.size; 49 | } 50 | else { 51 | this->size = this->getSamplingSize(); 52 | } 53 | 54 | this->set = new Edge[this->size]; 55 | 56 | if (this->sampling) { 57 | this->randomNonedgeSet(edges, comm); 58 | } 59 | else { 60 | this->getFullNonedgeSet(edges); 61 | } 62 | 63 | this->balance = edges.size * 1.0 / this->size; 64 | } 65 | 66 | Edgeset::~Edgeset() { 67 | if (set != NULL) { 68 | delete[] set; 69 | set = NULL; 70 | } 71 | } 72 | 73 | double Edgeset::getStress(Vector *vectors) const { 74 | double result = 0, len; 75 | int s, t; 76 | int *x = NULL; double *y = NULL; 77 | 78 | switch (this->type) { 79 | case 0: 80 | for (int i = 0; i < this->size; i++) { 81 | s = this->set[i].getStart(); 82 | t = this->set[i].getEnd(); 83 | len = vectors[s].getLengthOfMinus(vectors[t].getVectorNextP()); 84 | result += len * len; 85 | } 86 | break; 87 | case 1: 88 | for (int i = 0; i < this->size; i++) { 89 | s = this->set[i].getStart(); 90 | t = this->set[i].getEnd(); 91 | len = vectors[s].getLengthOfMinus(vectors[t].getVectorNextP()) - 1; 92 | result += this->balance * len * len; 93 | } 94 | break; 95 | } 96 | 97 | return result; 98 | } 99 | 100 | void Edgeset::printStress(Vector *vectors, ofstream &oFile) { 101 | int s, t; 102 | double len; 103 | switch (this->type) { 104 | case 0: 105 | for (int i = 0; i < this->size; i++) { 106 | s = this->set[i].getStart(); 107 | t = this->set[i].getEnd(); 108 | //lvd: length of vectors Difference 109 | len = vectors[s].getLengthOfMinus(vectors[t].getVectorNextP()); 110 | oFile << s << " " << t << " " << len * len << endl; 111 | if (i % 100 == 9) { 112 | oFile.flush(); 113 | } 114 | } 115 | break; 116 | case 1: 117 | for (int i = 0; i < this->size; i++) { 118 | s = this->set[i].getStart(); 119 | t = this->set[i].getEnd(); 120 | //lvd: length of vectors Difference 121 | double len = vectors[s].getLengthOfMinus(vectors[t].getVectorNextP()) - 1; 122 | oFile << s << " " << t << " " << this->balance * len * len << endl; 123 | if (i % 100 == 9) { 124 | oFile.flush(); 125 | } 126 | } 127 | break; 128 | } 129 | } 130 | 131 | Edge Edgeset::getEdge(int k) const{ 132 | return this->set[k]; 133 | } 134 | 135 | int Edgeset::getSize() const{ 136 | return this->size; 137 | } 138 | 139 | double Edgeset::getBalance() const{ 140 | return this->balance; 141 | } 142 | 143 | void Edgeset::copyEdge(Edge *src) { 144 | for (int i = 0; i < size; i++) { 145 | this->set[i].setPointWithSort(src[i].getStart(), src[i].getEnd()); 146 | } 147 | } 148 | 149 | void Edgeset::randomNonedgeSet(const Edgeset & edges, int ** comm) { 150 | for (int i = 0; i < this->size;) { 151 | //int k = (rand() << 16 | rand()) % cmtyCount; 152 | //int s = (rand() << 16 | rand()) % comm[k][0]; s = comm[k][s + 1]; 153 | //int t = (rand() << 16 | rand()) % comm[k][0]; t = comm[k][t + 1]; 154 | int s = ((rand() << 16 | rand()) % nodeCount + nodeCount) % nodeCount; 155 | int t = ((rand() << 16 | rand()) % nodeCount + nodeCount) % nodeCount; 156 | if (s == t) { 157 | continue; 158 | } 159 | if (!edges.containEdge(s, t)) { 160 | this->set[i].setPointWithSort(s, t); 161 | i++; 162 | } 163 | } 164 | } 165 | 166 | void Edgeset::getFullNonedgeSet(const Edgeset & edges) { 167 | int nonedgeIndex = 0; 168 | for (int s = 0; s < this->nodeCount; s++) { 169 | for (int t = s + 1; t < this->nodeCount; t++) { 170 | if (!edges.containEdge(s, t)) { 171 | this->set[nonedgeIndex++].setPoint(s, t); 172 | } 173 | } 174 | } 175 | } 176 | 177 | bool Edgeset::containEdge(int s, int t) const{ 178 | int left = 0; 179 | int right = this->size - 1; 180 | int mid; 181 | Edge e(s, t); 182 | while (left <= right) { 183 | mid = (left + right) >> 1; 184 | if (this->set[mid] == e) { 185 | return true; 186 | } 187 | else if (this->set[mid] < e) { 188 | left = mid + 1; 189 | } 190 | else { 191 | right = mid - 1; 192 | } 193 | } 194 | return false; 195 | } 196 | 197 | int Edgeset::getSamplingSize() { 198 | return edgeCount; 199 | } 200 | 201 | void Edgeset::printSet() { 202 | for (int i = 0; i < size; i++) { 203 | std::cout << set[i].getStart() << " " << set[i].getEnd() << endl; 204 | } 205 | } 206 | -------------------------------------------------------------------------------- /codes/Embedding.cpp: -------------------------------------------------------------------------------- 1 | #include "Embedding.h" 2 | #include "AnomalyDetection.h" 3 | #include 4 | #include 5 | #include 6 | #include 7 | #include 8 | #include 9 | 10 | using namespace std; 11 | 12 | Embedding::Embedding(int sign, double alpha, double beta, double eps, string &path, int dimension, int i_k) { 13 | this->alpha = alpha; 14 | this->beta = beta; 15 | this->eps = eps; 16 | this->para_k = i_k; 17 | /* 18 | * parameter is the method of graph partition 19 | * 0: random method 20 | * 1: method recommended in the article 21 | */ 22 | Graph g(sign % 3, path, dimension); 23 | 24 | this->nodeCount = g.getNodeCount(); 25 | this->edgeCount = g.getEdgeCount(); 26 | this->cmntCount = g.getCmtyCount(); 27 | cmnts = g.getCmties(); 28 | //int ** comm_temp = g.getCmties(); 29 | //cmnts = new int*[this->cmntCount]; 30 | //for (int i = 0; i < this->cmntCount; i++) { 31 | // cmnts[i] = new int[comm_temp[i][0] + 1]; 32 | // cmnts[i][0] = comm_temp[i][0]; 33 | // for (int j = 1; j <= comm_temp[i][0]; j++) { 34 | // cmnts[i][j] = comm_temp[i][j]; 35 | // } 36 | //} 37 | this->initialVectors(); 38 | 39 | this->edges = new Edgeset(g); 40 | this->nonedges = new Edgeset(*this->edges, this->cmnts, this->nodeCount, this->edgeCount, this->cmntCount); 41 | this->doubleEdge = new Edge[2 * edgeCount]; 42 | this->doubleNonedge = new Edge[2 * edgeCount]; 43 | for (int i = 0; i < edgeCount; i++) { 44 | doubleEdge[2 * i].setPoint(edges->getEdge(i).getStart(), edges->getEdge(i).getEnd()); 45 | doubleEdge[2 * i + 1].setPoint(edges->getEdge(i).getEnd(), edges->getEdge(i).getStart()); 46 | doubleNonedge[2 * i].setPoint(nonedges->getEdge(i).getStart(), nonedges->getEdge(i).getEnd()); 47 | doubleNonedge[2 * i + 1].setPoint(nonedges->getEdge(i).getEnd(), nonedges->getEdge(i).getStart()); 48 | } 49 | sort(doubleEdge, doubleEdge + 2 * edgeCount); 50 | sort(doubleNonedge, doubleNonedge + 2 * edgeCount); 51 | this->balance = this->nonedges->getBalance(); 52 | 53 | tempDrct = new double[cmntCount]; 54 | } 55 | 56 | Embedding::Embedding(int sign, double alpha, double beta, double eps, string &path, int dimension) { 57 | this->alpha = alpha; 58 | this->beta = beta; 59 | this->eps = eps; 60 | /* 61 | * parameter is the method of graph partition 62 | * 0: random method 63 | * 1: method recommended in the article 64 | */ 65 | Graph g(sign % 3, path, dimension); 66 | 67 | this->nodeCount = g.getNodeCount(); 68 | this->edgeCount = g.getEdgeCount(); 69 | this->cmntCount = g.getCmtyCount(); 70 | cmnts = g.getCmties(); 71 | this->initialVectors(); 72 | 73 | this->edges = new Edgeset(g); 74 | this->nonedges = new Edgeset(*this->edges, this->cmnts, this->nodeCount, this->edgeCount, this->cmntCount); 75 | this->doubleEdge = new Edge[2 * edgeCount]; 76 | this->doubleNonedge = new Edge[2 * edgeCount]; 77 | for (int i = 0; i < edgeCount; i++) { 78 | doubleEdge[2 * i].setPoint(edges->getEdge(i).getStart(), edges->getEdge(i).getEnd()); 79 | doubleEdge[2 * i + 1].setPoint(edges->getEdge(i).getEnd(), edges->getEdge(i).getStart()); 80 | doubleNonedge[2 * i].setPoint(nonedges->getEdge(i).getStart(), nonedges->getEdge(i).getEnd()); 81 | doubleNonedge[2 * i + 1].setPoint(nonedges->getEdge(i).getEnd(), nonedges->getEdge(i).getStart()); 82 | } 83 | sort(doubleEdge, doubleEdge + 2 * edgeCount); 84 | sort(doubleNonedge, doubleNonedge + 2 * edgeCount); 85 | this->balance = this->nonedges->getBalance(); 86 | 87 | tempDrct = new double[cmntCount]; 88 | } 89 | 90 | Embedding::~Embedding() { 91 | delete edges; edges = NULL; 92 | delete nonedges; nonedges = NULL; 93 | delete[] vectors; vectors = NULL; 94 | delete[] tempDrct; tempDrct = NULL; 95 | delete[] doubleEdge; doubleEdge = NULL; 96 | delete[] doubleNonedge; doubleNonedge = NULL; 97 | if (node_comm != NULL) { delete[] node_comm; node_comm = NULL; } 98 | //if (cmnts != NULL) { 99 | // for (int i = 0; i < cmntCount; i++) { delete[] cmnts[i]; cmnts[i] = NULL; } 100 | // delete[] cmnts; cmnts = NULL; 101 | //} 102 | } 103 | 104 | void Embedding::initialVectors() { 105 | this->vectors = new Vector[this->nodeCount]; 106 | node_comm = new int[this->nodeCount]; 107 | 108 | for (int i = 0; i < this->cmntCount; i++) { 109 | for (int j = 1; j <= this->cmnts[i][0]; j++) { 110 | node_comm[this->cmnts[i][j]] = i; 111 | } 112 | } 113 | 114 | //int p = min((int)ceil(log2(nodeCount * 1.0)), cmntCount); 115 | int p = min((int)ceil(edgeCount * 2.0 / nodeCount), cmntCount); // avg degree 116 | //int p = para_k; 117 | int q = min(p / 4, cmntCount - p); 118 | cout << p << " " << q << endl; 119 | for (int i = 0; i < this->nodeCount; i++) { 120 | this->vectors[i].initialVector(node_comm[i], p, q); 121 | //this->vectors[i].initialVector(nodeCmtyID[i], cmntCount / 20, cmntCount / 20 / 3); 122 | } 123 | } 124 | 125 | void Embedding::gradientDescent() { 126 | double currentOF = 0, lastOF = 0; 127 | iteration = 0; 128 | 129 | lastOF = this->objectiveFunction(); 130 | 131 | while (true) { 132 | iteration++; 133 | if (nonedges != NULL) { 134 | delete nonedges; nonedges = NULL; 135 | } 136 | this->nonedges = new Edgeset(*this->edges, this->cmnts, this->nodeCount, this->edgeCount, this->cmntCount); 137 | for (int i = 0; i < edgeCount; i++) { 138 | doubleNonedge[2 * i].setPoint(nonedges->getEdge(i).getStart(), nonedges->getEdge(i).getEnd()); 139 | doubleNonedge[2 * i + 1].setPoint(nonedges->getEdge(i).getEnd(), nonedges->getEdge(i).getStart()); 140 | } 141 | sort(doubleNonedge, doubleNonedge + 2 * edgeCount); 142 | this->balance = this->nonedges->getBalance(); 143 | 144 | this->calDescentDirection(); 145 | currentOF = this->moveStepSize(1, lastOF); 146 | 147 | this->copyNextToCurrent(); 148 | //cout << "Iteration " << iteration << ": " << currentOF << endl; 149 | if (fabs(lastOF - currentOF) / currentOF < eps || iteration == 50) break; 150 | else lastOF = currentOF; 151 | } 152 | 153 | cout << "Iteration Round Quantity: " << iteration << endl; 154 | //objectiveFunctionDetail(); 155 | } 156 | 157 | 158 | double Embedding::moveStepSize(int method, double lastOF) { 159 | //clock_t start = clock(); 160 | switch (method) { 161 | case 0: { 162 | // Constant Move, can not be used in practice for 0.01 will 163 | // break the descent condition in later iterations. 164 | this->iterateVectors(0.01); 165 | return this->objectiveFunction(); 166 | } 167 | case 1: { 168 | //backtracking line search holding Armijo rule 169 | double t = 1; //initial step size 170 | double pg = 0; 171 | for (int i = 0; i < this->nodeCount; i++) { 172 | pg += this->vectors[i].getVectorSquareSum(2); 173 | } 174 | this->iterateVectors(t); 175 | while (true) { 176 | double curOF = this->objectiveFunction(); 177 | if (curOF < lastOF - alpha * t * pg) { 178 | return curOF; 179 | } 180 | else { 181 | t *= beta; 182 | this->iterateVectors(t); 183 | if (t < 1e-12) { 184 | this->iterateVectors(0); 185 | return lastOF; 186 | } 187 | } 188 | } 189 | break; 190 | } 191 | default: { 192 | return 0; 193 | } 194 | } 195 | } 196 | 197 | void Embedding::calDescentDirection() { 198 | int edgeIndex = 0, nonedgeIndex = 0; 199 | int t; 200 | 201 | for (int i = 0; i < nodeCount; i++) { 202 | for (int j = 0; j < cmntCount; j++) 203 | tempDrct[j] = 0; 204 | while (edgeIndex < 2 * edgeCount && doubleEdge[edgeIndex].getStart() == i) { 205 | // for current edge: 206 | // direction = 2 * |Xs-Xt| * d(|Xs - Xt|) 207 | // |Xs - Xt| = sqrt((Xs1 - Xt1)^2 + ... + (Xsd - Xtd)^2) 208 | // (vectors with d dimension) 209 | t = doubleEdge[edgeIndex].getEnd(); 210 | edgeIndex++; 211 | this->vectors[i].directionPlus(vectors[t].getVectorCurP(), 2, tempDrct); 212 | } 213 | while (nonedgeIndex < 2 * edgeCount && doubleNonedge[nonedgeIndex].getStart() == i) { 214 | // so we will sampling a set of m non - edge relationships 215 | // 216 | // for current non - edge 217 | // direction = 2 * (| Xs - Xt | -1) * d(| Xs - Xt | ) 218 | // | Xs - Xt | = sqrt((Xs1 - Xt1) ^ 2 + ... + (Xsd - Xtd) ^ 2) 219 | // (vectors with d dimension) 220 | 221 | t = doubleNonedge[nonedgeIndex].getEnd(); 222 | nonedgeIndex++; 223 | double lvd = vectors[i].getLengthOfMinus(vectors[t].getVectorNextP()); 224 | if (lvd < 1e-13) { 225 | continue; 226 | } 227 | this->vectors[i].directionPlus(vectors[t].getVectorCurP(), 2 * (lvd - 1) * this->balance / lvd, tempDrct); 228 | } 229 | vectors[i].directionFinal(tempDrct, cmntCount); 230 | } 231 | } 232 | 233 | void Embedding::iterateVectors(double stepSize) { 234 | for (int i = 0; i < this->nodeCount; i++) { 235 | this->vectors[i].iterateVector(stepSize); 236 | } 237 | } 238 | 239 | double Embedding::objectiveFunction() { 240 | double of = 0; 241 | of += this->edges->getStress(this->vectors); 242 | of += this->nonedges->getStress(this->vectors); 243 | return of; 244 | } 245 | 246 | void Embedding::copyNextToCurrent() { 247 | for (int i = 0; i < this->nodeCount; i++) { 248 | vectors[i].copyNextToCur(); 249 | } 250 | } 251 | 252 | void Embedding::objectiveFunctionDetail() { 253 | string writeFile = "Objective Function Detail.txt"; 254 | ofstream oFile; 255 | oFile.open(writeFile); 256 | oFile << "--------------Objective Function Deatils Start------------" << endl; 257 | this->edges->printStress(vectors, oFile); 258 | this->nonedges->printStress(vectors, oFile); 259 | 260 | oFile << "--------------Objective Function Deatils End------------" << endl; 261 | oFile.close(); 262 | } 263 | 264 | void Embedding::PrintVectors() const { 265 | string writeFile = "embedding.txt"; 266 | ofstream oFile; 267 | oFile.open(writeFile); 268 | for (int i = 0; i < nodeCount; i++) { 269 | oFile << i << ": "; 270 | vectors[i].printVector(oFile); 271 | } 272 | oFile.close(); 273 | } 274 | 275 | int Embedding::getNodeCount() const { 276 | return nodeCount; 277 | } 278 | 279 | int Embedding::getEdgeCount() const { 280 | return edgeCount; 281 | } 282 | 283 | int Embedding::getCmtyCount() const { 284 | return cmntCount; 285 | } 286 | 287 | Edgeset* Embedding::getEdges() const { 288 | return edges; 289 | } 290 | 291 | Vector* Embedding::getVectors() const { 292 | return vectors; 293 | } -------------------------------------------------------------------------------- /METIS/metis.h: -------------------------------------------------------------------------------- 1 | /*! 2 | \file metis.h 3 | \brief This file contains function prototypes and constant definitions for METIS 4 | * 5 | \author George 6 | \date Started 8/9/02 7 | \version\verbatim $Id$\endverbatim 8 | */ 9 | 10 | #ifndef _METIS_H_ 11 | #define _METIS_H_ 12 | 13 | /**************************************************************************** 14 | * A set of defines that can be modified by the user 15 | *****************************************************************************/ 16 | 17 | /*-------------------------------------------------------------------------- 18 | Specifies the width of the elementary data type that will hold information 19 | about vertices and their adjacency lists. 20 | 21 | Possible values: 22 | 32 : Use 32 bit signed integers 23 | 64 : Use 64 bit signed integers 24 | 25 | A width of 64 should be specified if the number of vertices or the total 26 | number of edges in the graph exceed the limits of a 32 bit signed integer 27 | i.e., 2^31-1. 28 | Proper use of 64 bit integers requires that the c99 standard datatypes 29 | int32_t and int64_t are supported by the compiler. 30 | GCC does provides these definitions in stdint.h, but it may require some 31 | modifications on other architectures. 32 | --------------------------------------------------------------------------*/ 33 | #define IDXTYPEWIDTH 64 34 | 35 | 36 | /*-------------------------------------------------------------------------- 37 | Specifies the data type that will hold floating-point style information. 38 | 39 | Possible values: 40 | 32 : single precission floating point (float) 41 | 64 : double precission floating point (double) 42 | --------------------------------------------------------------------------*/ 43 | #define REALTYPEWIDTH 64 44 | 45 | 46 | 47 | /**************************************************************************** 48 | * In principle, nothing needs to be changed beyond this point, unless the 49 | * int32_t and int64_t cannot be found in the normal places. 50 | *****************************************************************************/ 51 | 52 | /* Uniform definitions for various compilers */ 53 | #if defined(_MSC_VER) 54 | #define COMPILER_MSC 55 | #endif 56 | #if defined(__ICC) 57 | #define COMPILER_ICC 58 | #endif 59 | #if defined(__GNUC__) 60 | #define COMPILER_GCC 61 | #endif 62 | 63 | /* Include c99 int definitions and need constants. When building the library, 64 | * these are already defined by GKlib; hence the test for _GKLIB_H_ */ 65 | #ifndef _GKLIB_H_ 66 | #ifdef COMPILER_MSC 67 | #include 68 | 69 | typedef __int32 int32_t; 70 | typedef __int64 int64_t; 71 | #define PRId32 "I32d" 72 | #define PRId64 "I64d" 73 | #define SCNd32 "ld" 74 | #define SCNd64 "I64d" 75 | #define INT32_MIN ((int32_t)_I32_MIN) 76 | #define INT32_MAX _I32_MAX 77 | #define INT64_MIN ((int64_t)_I64_MIN) 78 | #define INT64_MAX _I64_MAX 79 | #else 80 | #include 81 | #endif 82 | #endif 83 | 84 | 85 | /*------------------------------------------------------------------------ 86 | * Setup the basic datatypes 87 | *-------------------------------------------------------------------------*/ 88 | #if IDXTYPEWIDTH == 32 89 | typedef int32_t idx_t; 90 | 91 | #define IDX_MAX INT32_MAX 92 | #define IDX_MIN INT32_MIN 93 | 94 | #define SCIDX SCNd32 95 | #define PRIDX PRId32 96 | 97 | #define strtoidx strtol 98 | #define iabs abs 99 | #elif IDXTYPEWIDTH == 64 100 | typedef int64_t idx_t; 101 | 102 | #define IDX_MAX INT64_MAX 103 | #define IDX_MIN INT64_MIN 104 | 105 | #define SCIDX SCNd64 106 | #define PRIDX PRId64 107 | 108 | #ifdef COMPILER_MSC 109 | #define strtoidx _strtoi64 110 | #else 111 | #define strtoidx strtoll 112 | #endif 113 | #define iabs labs 114 | #else 115 | #error "Incorrect user-supplied value fo IDXTYPEWIDTH" 116 | #endif 117 | 118 | 119 | #if REALTYPEWIDTH == 32 120 | typedef float real_t; 121 | 122 | #define SCREAL "f" 123 | #define PRREAL "f" 124 | #define REAL_MAX FLT_MAX 125 | #define REAL_MIN FLT_MIN 126 | #define REAL_EPSILON FLT_EPSILON 127 | 128 | #define rabs fabsf 129 | #define REALEQ(x,y) ((rabs((x)-(y)) <= FLT_EPSILON)) 130 | 131 | #ifdef COMPILER_MSC 132 | #define strtoreal (float)strtod 133 | #else 134 | #define strtoreal strtof 135 | #endif 136 | #elif REALTYPEWIDTH == 64 137 | typedef double real_t; 138 | 139 | #define SCREAL "lf" 140 | #define PRREAL "lf" 141 | #define REAL_MAX DBL_MAX 142 | #define REAL_MIN DBL_MIN 143 | #define REAL_EPSILON DBL_EPSILON 144 | 145 | #define rabs fabs 146 | #define REALEQ(x,y) ((rabs((x)-(y)) <= DBL_EPSILON)) 147 | 148 | #define strtoreal strtod 149 | #else 150 | #error "Incorrect user-supplied value for REALTYPEWIDTH" 151 | #endif 152 | 153 | 154 | /*------------------------------------------------------------------------ 155 | * Constant definitions 156 | *-------------------------------------------------------------------------*/ 157 | /* Metis's version number */ 158 | #define METIS_VER_MAJOR 5 159 | #define METIS_VER_MINOR 1 160 | #define METIS_VER_SUBMINOR 0 161 | 162 | /* The maximum length of the options[] array */ 163 | #define METIS_NOPTIONS 40 164 | 165 | 166 | 167 | /*------------------------------------------------------------------------ 168 | * Function prototypes 169 | *-------------------------------------------------------------------------*/ 170 | 171 | #ifdef _WINDLL 172 | #define METIS_API(type) __declspec(dllexport) type __cdecl 173 | #elif defined(__cdecl) 174 | #define METIS_API(type) type __cdecl 175 | #else 176 | #define METIS_API(type) type 177 | #endif 178 | 179 | 180 | 181 | #ifdef __cplusplus 182 | extern "C" { 183 | #endif 184 | 185 | METIS_API(int) METIS_PartGraphRecursive(idx_t *nvtxs, idx_t *ncon, idx_t *xadj, 186 | idx_t *adjncy, idx_t *vwgt, idx_t *vsize, idx_t *adjwgt, 187 | idx_t *nparts, real_t *tpwgts, real_t *ubvec, idx_t *options, 188 | idx_t *edgecut, idx_t *part); 189 | 190 | METIS_API(int) METIS_PartGraphKway(idx_t *nvtxs, idx_t *ncon, idx_t *xadj, 191 | idx_t *adjncy, idx_t *vwgt, idx_t *vsize, idx_t *adjwgt, 192 | idx_t *nparts, real_t *tpwgts, real_t *ubvec, idx_t *options, 193 | idx_t *edgecut, idx_t *part); 194 | 195 | METIS_API(int) METIS_MeshToDual(idx_t *ne, idx_t *nn, idx_t *eptr, idx_t *eind, 196 | idx_t *ncommon, idx_t *numflag, idx_t **r_xadj, idx_t **r_adjncy); 197 | 198 | METIS_API(int) METIS_MeshToNodal(idx_t *ne, idx_t *nn, idx_t *eptr, idx_t *eind, 199 | idx_t *numflag, idx_t **r_xadj, idx_t **r_adjncy); 200 | 201 | METIS_API(int) METIS_PartMeshNodal(idx_t *ne, idx_t *nn, idx_t *eptr, idx_t *eind, 202 | idx_t *vwgt, idx_t *vsize, idx_t *nparts, real_t *tpwgts, 203 | idx_t *options, idx_t *objval, idx_t *epart, idx_t *npart); 204 | 205 | METIS_API(int) METIS_PartMeshDual(idx_t *ne, idx_t *nn, idx_t *eptr, idx_t *eind, 206 | idx_t *vwgt, idx_t *vsize, idx_t *ncommon, idx_t *nparts, 207 | real_t *tpwgts, idx_t *options, idx_t *objval, idx_t *epart, 208 | idx_t *npart); 209 | 210 | METIS_API(int) METIS_NodeND(idx_t *nvtxs, idx_t *xadj, idx_t *adjncy, idx_t *vwgt, 211 | idx_t *options, idx_t *perm, idx_t *iperm); 212 | 213 | METIS_API(int) METIS_Free(void *ptr); 214 | 215 | METIS_API(int) METIS_SetDefaultOptions(idx_t *options); 216 | 217 | 218 | /* These functions are used by ParMETIS */ 219 | 220 | METIS_API(int) METIS_NodeNDP(idx_t nvtxs, idx_t *xadj, idx_t *adjncy, idx_t *vwgt, 221 | idx_t npes, idx_t *options, idx_t *perm, idx_t *iperm, 222 | idx_t *sizes); 223 | 224 | METIS_API(int) METIS_ComputeVertexSeparator(idx_t *nvtxs, idx_t *xadj, idx_t *adjncy, 225 | idx_t *vwgt, idx_t *options, idx_t *sepsize, idx_t *part); 226 | 227 | METIS_API(int) METIS_NodeRefine(idx_t nvtxs, idx_t *xadj, idx_t *vwgt, idx_t *adjncy, 228 | idx_t *where, idx_t *hmarker, real_t ubfactor); 229 | 230 | 231 | #ifdef __cplusplus 232 | } 233 | #endif 234 | 235 | 236 | 237 | /*------------------------------------------------------------------------ 238 | * Enum type definitions 239 | *-------------------------------------------------------------------------*/ 240 | /*! Return codes */ 241 | typedef enum { 242 | METIS_OK = 1, /*!< Returned normally */ 243 | METIS_ERROR_INPUT = -2, /*!< Returned due to erroneous inputs and/or options */ 244 | METIS_ERROR_MEMORY = -3, /*!< Returned due to insufficient memory */ 245 | METIS_ERROR = -4 /*!< Some other errors */ 246 | } rstatus_et; 247 | 248 | 249 | /*! Operation type codes */ 250 | typedef enum { 251 | METIS_OP_PMETIS, 252 | METIS_OP_KMETIS, 253 | METIS_OP_OMETIS 254 | } moptype_et; 255 | 256 | 257 | /*! Options codes (i.e., options[]) */ 258 | typedef enum { 259 | METIS_OPTION_PTYPE, 260 | METIS_OPTION_OBJTYPE, 261 | METIS_OPTION_CTYPE, 262 | METIS_OPTION_IPTYPE, 263 | METIS_OPTION_RTYPE, 264 | METIS_OPTION_DBGLVL, 265 | METIS_OPTION_NITER, 266 | METIS_OPTION_NCUTS, 267 | METIS_OPTION_SEED, 268 | METIS_OPTION_NO2HOP, 269 | METIS_OPTION_MINCONN, 270 | METIS_OPTION_CONTIG, 271 | METIS_OPTION_COMPRESS, 272 | METIS_OPTION_CCORDER, 273 | METIS_OPTION_PFACTOR, 274 | METIS_OPTION_NSEPS, 275 | METIS_OPTION_UFACTOR, 276 | METIS_OPTION_NUMBERING, 277 | 278 | /* Used for command-line parameter purposes */ 279 | METIS_OPTION_HELP, 280 | METIS_OPTION_TPWGTS, 281 | METIS_OPTION_NCOMMON, 282 | METIS_OPTION_NOOUTPUT, 283 | METIS_OPTION_BALANCE, 284 | METIS_OPTION_GTYPE, 285 | METIS_OPTION_UBVEC 286 | } moptions_et; 287 | 288 | 289 | /*! Partitioning Schemes */ 290 | typedef enum { 291 | METIS_PTYPE_RB, 292 | METIS_PTYPE_KWAY 293 | } mptype_et; 294 | 295 | /*! Graph types for meshes */ 296 | typedef enum { 297 | METIS_GTYPE_DUAL, 298 | METIS_GTYPE_NODAL 299 | } mgtype_et; 300 | 301 | /*! Coarsening Schemes */ 302 | typedef enum { 303 | METIS_CTYPE_RM, 304 | METIS_CTYPE_SHEM 305 | } mctype_et; 306 | 307 | /*! Initial partitioning schemes */ 308 | typedef enum { 309 | METIS_IPTYPE_GROW, 310 | METIS_IPTYPE_RANDOM, 311 | METIS_IPTYPE_EDGE, 312 | METIS_IPTYPE_NODE, 313 | METIS_IPTYPE_METISRB 314 | } miptype_et; 315 | 316 | 317 | /*! Refinement schemes */ 318 | typedef enum { 319 | METIS_RTYPE_FM, 320 | METIS_RTYPE_GREEDY, 321 | METIS_RTYPE_SEP2SIDED, 322 | METIS_RTYPE_SEP1SIDED 323 | } mrtype_et; 324 | 325 | 326 | /*! Debug Levels */ 327 | typedef enum { 328 | METIS_DBG_INFO = 1, /*!< Shows various diagnostic messages */ 329 | METIS_DBG_TIME = 2, /*!< Perform timing analysis */ 330 | METIS_DBG_COARSEN = 4, /*!< Show the coarsening progress */ 331 | METIS_DBG_REFINE = 8, /*!< Show the refinement progress */ 332 | METIS_DBG_IPART = 16, /*!< Show info on initial partitioning */ 333 | METIS_DBG_MOVEINFO = 32, /*!< Show info on vertex moves during refinement */ 334 | METIS_DBG_SEPINFO = 64, /*!< Show info on vertex moves during sep refinement */ 335 | METIS_DBG_CONNINFO = 128, /*!< Show info on minimization of subdomain connectivity */ 336 | METIS_DBG_CONTIGINFO = 256, /*!< Show info on elimination of connected components */ 337 | METIS_DBG_MEMORY = 2048, /*!< Show info related to wspace allocation */ 338 | } mdbglvl_et; 339 | 340 | 341 | /* Types of objectives */ 342 | typedef enum { 343 | METIS_OBJTYPE_CUT, 344 | METIS_OBJTYPE_VOL, 345 | METIS_OBJTYPE_NODE 346 | } mobjtype_et; 347 | 348 | 349 | 350 | #endif /* _METIS_H_ */ 351 | -------------------------------------------------------------------------------- /codes/Vector.cpp: -------------------------------------------------------------------------------- 1 | #include "Vector.h" 2 | #include 3 | #include 4 | #include 5 | #include 6 | 7 | const double Vector::sqrt2d2 = sqrt(2) * 0.5; 8 | 9 | Vector::Vector() { 10 | } 11 | 12 | Vector::~Vector() { 13 | } 14 | 15 | void Vector::initialVector(int index, int p, int q) { 16 | this->p = p; 17 | this->q = q; 18 | vectorCurP[0].setValue(index, sqrt2d2); 19 | vectorCurP[1].index = -1; 20 | vectorNextP[0].setValue(index, sqrt2d2); 21 | vectorNextP[1].index = -1; 22 | vectorCurQ[0].index = -1; 23 | vectorNextQ[0].index = -1; 24 | } 25 | 26 | // vectorDrct += (x - y) * coe 27 | // x = vectorCur 28 | // y is also vectorCur of another node 29 | void Vector::directionPlus(VectorItem * y, double coe, double *tempDrct) { 30 | VectorItem *x = vectorCurP; 31 | for (int i = 0; i < p; i++) { 32 | if (x[i].index == -1) break; 33 | tempDrct[x[i].index] += x[i].weight * coe; 34 | } 35 | for (int i = 0; i < p; i++) { 36 | if (y[i].index == -1) break; 37 | tempDrct[y[i].index] -= y[i].weight * coe; 38 | } 39 | } 40 | 41 | void Vector::directionFinal(double *tempDrct, const int &cmtySize) { 42 | int i; 43 | for (i = 0; i < p + 1; i++) { 44 | if (vectorCurP[i].index == -1) break; 45 | vectorDrctP[i].setValue(vectorCurP[i].index, tempDrct[vectorCurP[i].index]); 46 | tempDrct[vectorCurP[i].index] = INT_MAX; 47 | } 48 | vectorDrctP[i].index = -1; 49 | for (i = 0; i < q + 1; i++) { 50 | if (vectorCurQ[i].index == -1) break; 51 | vectorDrctQ[i].setValue(vectorCurQ[i].index, tempDrct[vectorCurQ[i].index]); 52 | tempDrct[vectorCurQ[i].index] = INT_MAX; 53 | } 54 | vectorDrctQ[i].index = -1; 55 | VectorItem heap[MAX_HEAP_SIZE]; 56 | int heapSize = 0; 57 | for (i = 0; i < cmtySize; i++) { 58 | if (tempDrct[i] < 0) insertMaxHeap(heap, heapSize, i, tempDrct[i]); 59 | } 60 | if (heapSize == 0) vectorDrctOther = NULL; 61 | else { 62 | otherSet[0].vi.setValue(heap[0].index, heap[0].weight); 63 | otherSet[0].next = NULL; 64 | vectorDrctOther = &otherSet[0]; 65 | } 66 | for (i = 1; i < heapSize; i++) { 67 | otherSet[i].vi.setValue(heap[i].index, heap[i].weight); 68 | otherSet[i].next = NULL; 69 | otherSet[i - 1].next = &otherSet[i]; 70 | } 71 | } 72 | 73 | //vectorNext = vectorCur - VectorDrct * stepSize 74 | void Vector::iterateVector(double stepSize) { 75 | // get the top (p + q) max elememts 76 | // use a min heap 77 | VectorItem heap[MAX_HEAP_SIZE]; 78 | int heapSize = 0; 79 | int index; 80 | double weight; 81 | 82 | for (int i = 0; i < p; i++) { 83 | if (vectorCurP[i].index == -1) { 84 | break; 85 | } 86 | index = vectorCurP[i].index; 87 | weight = vectorCurP[i].weight - vectorDrctP[i].weight * stepSize; 88 | insertMinHeap(heap, heapSize, index, weight); 89 | } 90 | for (int i = 0; i < q; i++) { 91 | if (vectorCurQ[i].index == -1) { 92 | break; 93 | } 94 | index = vectorCurQ[i].index; 95 | weight = vectorCurQ[i].weight - vectorDrctQ[i].weight * stepSize; 96 | insertMinHeap(heap, heapSize, index, weight); 97 | } 98 | 99 | LinkList l = vectorDrctOther; 100 | while (l != NULL) { 101 | insertMinHeap(heap, heapSize, l->vi.index, -l->vi.weight * stepSize); 102 | l = l->next; 103 | } 104 | 105 | //for (int i = 0; i < otherSize; i++) { 106 | // insertMinHeap(heap, heapSize, vectorDrctOther[i].index, -vectorDrctOther[i].weight * stepSize); 107 | //} 108 | 109 | // sort by weight 110 | sort(heap, heap + heapSize); 111 | 112 | for (int i = 0; i < min(p, heapSize); i++) { 113 | for (int j = min(p, heapSize) - 1; j > i; j--) { 114 | if (heap[j - 1].index > heap[j].index) { 115 | heap[j].exchange(heap[j - 1]); 116 | } 117 | } 118 | } 119 | 120 | for (int i = p; i < heapSize; i++) { 121 | for (int j = heapSize - 1; j > i; j--) { 122 | if (heap[j - 1].index > heap[j].index) { 123 | heap[j].exchange(heap[j - 1]); 124 | } 125 | } 126 | } 127 | int i; 128 | for (i = 0; i < min(p, heapSize); i++) { 129 | vectorNextP[i].setValue(heap[i].index, heap[i].weight); 130 | } 131 | vectorNextP[i].index = -1; 132 | 133 | for (i = p; i < heapSize; i++) { 134 | vectorNextQ[i - p].setValue(heap[i].index, heap[i].weight); 135 | } 136 | vectorNextQ[i - p].index = -1; 137 | 138 | this->regularize(); 139 | } 140 | 141 | void Vector::copyNextToCur() { 142 | int i; 143 | for (i = 0; i < p; i++) { 144 | if (vectorNextP[i].index == -1) { 145 | break; 146 | } 147 | vectorCurP[i].setValue(vectorNextP[i].index, vectorNextP[i].weight); 148 | } 149 | vectorCurP[i].index = -1; 150 | for (i = 0; i < q; i++) { 151 | if (vectorNextQ[i].index == -1) { 152 | break; 153 | } 154 | vectorCurQ[i].setValue(vectorNextQ[i].index, vectorNextQ[i].weight); 155 | } 156 | vectorCurQ[i].index = -1; 157 | } 158 | 159 | void Vector::printVector(ofstream &oFile) { 160 | double avg = 0; 161 | int k = 0; 162 | for (int i = 0; i < p; i++) { 163 | if (vectorCurP[i].index == -1) { 164 | break; 165 | } 166 | //oFile << '(' << vectorCurP[i].index << ", " << vectorCurP[i].weight << ") "; 167 | avg += vectorCurP[i].weight; 168 | k++; 169 | } 170 | avg /= k; 171 | oFile << "avg=" << avg << "\t"; 172 | for (int i = 0; i < p; i++) { 173 | if (vectorCurP[i].index == -1) break; 174 | if (vectorCurP[i].weight > avg) oFile << '(' << vectorCurP[i].index << ", " << vectorCurP[i].weight << ") "; 175 | } 176 | oFile << endl; 177 | oFile.flush(); 178 | } 179 | 180 | VectorItem* Vector::getVectorCurP() { 181 | return vectorCurP; 182 | } 183 | 184 | VectorItem * Vector::getVectorNextP() { 185 | return vectorNextP; 186 | } 187 | 188 | double Vector::getVectorSquareSum(int type) { 189 | double sum = 0; 190 | if (type == 0) { 191 | for (int i = 0; i < p; i++) { 192 | if (vectorNextP[i].index == -1) { 193 | break; 194 | } 195 | sum += vectorCurP[i].weight * vectorCurP[i].weight; 196 | } 197 | } 198 | else if (type == 1){ 199 | for (int i = 0; i < p; i++) { 200 | if (vectorNextP[i].index == -1) { 201 | break; 202 | } 203 | sum += vectorNextP[i].weight * vectorNextP[i].weight; 204 | } 205 | } 206 | else { 207 | for (int i = 0; i < p; i++) { 208 | if (vectorDrctP[i].index == -1) { 209 | break; 210 | } 211 | sum += vectorDrctP[i].weight * vectorDrctP[i].weight; 212 | } 213 | for (int i = 0; i < q; i++) { 214 | if (vectorDrctQ[i].index == -1) { 215 | break; 216 | } 217 | sum += vectorDrctQ[i].weight * vectorDrctQ[i].weight; 218 | } 219 | LinkList ll = vectorDrctOther; 220 | while (ll != NULL) { 221 | sum += ll->vi.weight * ll->vi.weight; 222 | ll = ll->next; 223 | } 224 | } 225 | return sum; 226 | } 227 | 228 | // return the length of vector(vextorNextP - y); 229 | double Vector::getLengthOfMinus(VectorItem *y) { 230 | double sum = 0; 231 | VectorItem *x = vectorNextP; 232 | int i, j; 233 | for (i = 0, j = 0; x[i].index != -1 && y[j].index != -1;) { 234 | if (x[i].index == y[j].index) { 235 | sum += (x[i].weight - y[j].weight) * (x[i].weight - y[j].weight); 236 | ++i; 237 | ++j; 238 | } 239 | else if (x[i].index < y[j].index) { 240 | sum += (x[i].weight)* (x[i].weight); 241 | ++i; 242 | } 243 | else { 244 | sum += (y[j].weight)* (y[j].weight); 245 | ++j; 246 | } 247 | } 248 | for (; x[i].index != -1; ++i) { 249 | sum += (x[i].weight)* (x[i].weight); 250 | } 251 | for (; y[j].index != -1; ++j) { 252 | sum += (y[j].weight)* (y[j].weight); 253 | } 254 | 255 | return sqrt(sum); 256 | } 257 | 258 | // heap vectorDrctOther and DrctOtherIndex are corressponding ids 259 | // adjust heap upward from pos 260 | // para. pos: index of array, which has changed 261 | void Vector::adjustMaxHeapUpward(int pos, int size, VectorItem *heap) { 262 | int father = (pos - 1) / 2; 263 | while (pos != 0 && heap[pos].weight > heap[father].weight) { 264 | // need to adjust 265 | // exchange pos and father 266 | heap[pos].exchange(heap[father]); 267 | //swapOtherIndex(heap[pos].index, heap[father].index); 268 | pos = father; 269 | father = (pos - 1) / 2; 270 | } 271 | } 272 | 273 | // heap vectorDrctOther and DrctOtherIndex are corressponding ids 274 | // adjust heap download from pos 275 | // para. pos: index of array, which has changed 276 | void Vector::adjustMaxHeapDownward(int pos, int size, VectorItem *heap) { 277 | int son = findLargerSon(pos, size, heap); 278 | while (son != -1) { 279 | heap[pos].exchange(heap[son]); 280 | //swapOtherIndex(heap[pos].index, heap[son].index); 281 | pos = son; 282 | son = findLargerSon(pos, size, heap); 283 | } 284 | } 285 | 286 | void Vector::adjustMinHeapUpward(int pos, int size, VectorItem *heap) { 287 | int father = (pos - 1) / 2; 288 | while (pos != 0 && heap[pos].weight < heap[father].weight) { 289 | heap[pos].exchange(heap[father]); 290 | pos = father; 291 | father = (pos - 1) / 2; 292 | } 293 | } 294 | 295 | void Vector::adjustMinHeapDownward(int pos, int size, VectorItem *heap) { 296 | int son = findSmallerSon(pos, size, heap); 297 | while (son != -1) { 298 | heap[pos].exchange(heap[son]); 299 | pos = son; 300 | son = findSmallerSon(pos, size, heap); 301 | } 302 | } 303 | 304 | // exchange the value of follosing two keys 305 | // MyMap otherIndex 306 | void Vector::swapOtherIndex(int id1, int id2) { 307 | //int temp = otherIndex[id1]; 308 | //otherIndex[id1] = otherIndex[id2]; 309 | //otherIndex[id2] = temp; 310 | } 311 | 312 | // find a larger son of current node 313 | // return -1 of not exist 314 | // para. pos: current node 315 | int Vector::findLargerSon(int pos, int size, VectorItem *heap) { 316 | int leftSon = 2 * pos + 1; 317 | int rightSon = 2 * pos + 2; 318 | double leftSonWeight = leftSon heap[pos].weight && leftSonWeight >= rightSonWeight) { 321 | return leftSon; 322 | } 323 | if (rightSonWeight > heap[pos].weight && rightSonWeight > leftSonWeight) { 324 | return rightSon; 325 | } 326 | return -1; 327 | } 328 | 329 | int Vector::findSmallerSon(int pos, int size, VectorItem* heap) { 330 | int leftSon = 2 * pos + 1; 331 | int rightSon = 2 * pos + 2; 332 | double leftSonWeight = leftSon < size ? heap[leftSon].weight : INT_MAX; 333 | double rightSonWeight = rightSon < size ? heap[rightSon].weight : INT_MAX; 334 | if (leftSonWeight < heap[pos].weight && leftSonWeight <= rightSonWeight) { 335 | return leftSon; 336 | } 337 | if (rightSonWeight < heap[pos].weight && rightSonWeight < leftSonWeight) { 338 | return rightSon; 339 | } 340 | return -1; 341 | } 342 | 343 | // insert VectotItem(index, weight) into maxheap heap 344 | // size is the current size of maxheap 345 | // may do not insert 346 | void Vector::insertMinHeap(VectorItem* heap, int & size, int index, double weight) { 347 | if (weight <= 0) { // remove element that is less than 0 348 | return; 349 | } 350 | if (size < p + q) { // heap is not full, add in current VectorItem directly 351 | heap[size].index = index; 352 | heap[size].weight = weight; 353 | size++; 354 | adjustMinHeapUpward(size - 1, size, heap); 355 | } 356 | else if (weight > heap[0].weight){ //heap is full, and current VectorItem > min VectorItem in heap 357 | heap[0].index = index; 358 | heap[0].weight = weight; 359 | adjustMinHeapDownward(0, size, heap); 360 | } 361 | } 362 | 363 | void Vector::insertMaxHeap(VectorItem *heap, int &size, int index, double weight) { 364 | if (size < p + q) { 365 | heap[size].index = index; 366 | heap[size].weight = weight; 367 | size++; 368 | adjustMaxHeapUpward(size - 1, size, heap); 369 | } 370 | else if (weight < heap[0].weight) { 371 | heap[0].index = index; 372 | heap[0].weight = weight; 373 | adjustMaxHeapDownward(0, size, heap); 374 | } 375 | } 376 | 377 | // Regularize criteria: vector length no longer than sqrt(2) / 2 378 | // The vectot that will be regularized is NextP, not CurP 379 | void Vector::regularize() { 380 | double length = sqrt(getVectorSquareSum(1)); 381 | if (length > sqrt2d2) { 382 | double coe = sqrt2d2 / length; 383 | 384 | for (int i = 0; i < p; i++) { 385 | if (vectorNextP[i].index == -1) { 386 | break; 387 | } 388 | vectorNextP[i].weight *= coe; 389 | } 390 | for (int i = 0; i < q; i++) { 391 | if (vectorNextQ[i].index == -1) { 392 | break; 393 | } 394 | vectorNextQ[i].weight *= coe; 395 | } 396 | } 397 | } 398 | 399 | int Vector::IndexOfMaxW() const { 400 | int index = vectorCurP[0].index; 401 | double maxW = vectorCurP[0].weight; 402 | 403 | for (int i = 1; i < p; i++) { 404 | if (vectorCurP[i].index == -1) break; 405 | if (vectorCurP[i].weight > maxW) { 406 | maxW = vectorCurP[i].weight; 407 | index = vectorCurP[i].index; 408 | } 409 | } 410 | return index; 411 | } 412 | 413 | int Vector::GetDomiDim(int * dim, double * w) const { 414 | double avg = 0; 415 | int k = 0; 416 | for (int i = 0; i < p; i++) { 417 | if (vectorCurP[i].index == -1) { 418 | break; 419 | } 420 | avg += vectorCurP[i].weight; 421 | k++; 422 | } 423 | avg /= k; 424 | int n_dim = 0; 425 | for (int i = 0; i < p; i++) { 426 | if (vectorCurP[i].index == -1) break; 427 | if (vectorCurP[i].weight > avg) { 428 | dim[n_dim] = vectorCurP[i].index; 429 | w[n_dim] = vectorCurP[i].weight; 430 | n_dim++; 431 | } 432 | } 433 | return n_dim; 434 | } -------------------------------------------------------------------------------- /codes/AnomalyDetection.cpp: -------------------------------------------------------------------------------- 1 | #include "AnomalyDetection.h" 2 | #include "Edge.h" 3 | #include 4 | #include 5 | #include 6 | #include 7 | #include 8 | #include 9 | #include 10 | 11 | using namespace std; 12 | 13 | AnomalyDetection::AnomalyDetection(Embedding *embd, const double i_theta) { 14 | this->nodeCount = embd->getNodeCount(); 15 | this->edgeCount = embd->getEdgeCount() * 2; 16 | this->cmntCount = embd->getCmtyCount(); 17 | this->theta = i_theta; 18 | this->vectors = embd->getVectors(); 19 | this->edges = new Edgeset(embd->getEdges()); 20 | this->nodeStress = new double[this->nodeCount]; 21 | this->avg_node_stress = new double[this->nodeCount]; 22 | this->deg = new int[this->nodeCount]; 23 | for (int k = 0; k < nodeCount; k++) { 24 | int left = this->getEdgeIndex(k, 0); 25 | int right = this->getEdgeIndex(k, this->nodeCount - 1); 26 | deg[k] = right - left; 27 | } 28 | this->link_comm = new int[this->nodeCount]; 29 | this->larg_comm = new int[this->nodeCount]; 30 | this->anomaly_deg = new Sorted_Dou_Int_Pair[this->nodeCount]; 31 | this->detectAnomaly(); 32 | 33 | //this->printNodeStress(); 34 | } 35 | 36 | AnomalyDetection::~AnomalyDetection() { 37 | delete edges; edges = NULL; 38 | delete[] nodeStress; nodeStress = NULL; 39 | delete[] avg_node_stress; avg_node_stress = NULL; 40 | if (deg != NULL) { delete[] deg; deg = NULL; } 41 | if (link_comm != NULL) { delete[] link_comm; link_comm = NULL; } 42 | if (anomaly_deg != NULL) { delete[] anomaly_deg; anomaly_deg = NULL; } 43 | if (larg_comm != NULL) { delete[] larg_comm; larg_comm = NULL; } 44 | if (ncc != NULL) { delete[] ncc; ncc = NULL; } 45 | if (node_comm != NULL) { 46 | for (int i = 0; i < nodeCount; i++) { 47 | delete[] node_comm[i]; 48 | delete[] node_comm_w[i]; 49 | } 50 | delete[] node_comm; 51 | delete[] node_comm_w; 52 | } 53 | } 54 | 55 | void AnomalyDetection::detectAnomaly() { 56 | //double totalStress = 0; 57 | //memset(stress_dis, 0, sizeof(int)* 100); 58 | //for (int i = 0; i < this->nodeCount; i++) { 59 | // this->getNodeStress(i); 60 | // totalStress += this->nodeStress[i]; 61 | //} 62 | //avgStress = totalStress / this->nodeCount; 63 | 64 | //ofstream stre_dis_ofs("stress_dis.dat"); 65 | //double cum_prob = 0; 66 | //for (int i = 0; i < 101; i++) { 67 | // cum_prob += stress_dis[i] * 1.0 / edgeCount; 68 | // stre_dis_ofs << "[" << i * 1.0 / 100 << ", " << (i + 1) * 1.0 / 100 << "]: " << stress_dis[i] / 2 69 | // << "\tprob=" << stress_dis[i] * 1.0 / edgeCount << "\tcum_prob=" << cum_prob << endl; 70 | //} 71 | //stre_dis_ofs.close(); 72 | 73 | //stdDev = 0; 74 | //for (int i = 0; i < this->nodeCount; i++) { 75 | // stdDev += (this->nodeStress[i] - avgStress) * (this->nodeStress[i] - avgStress); 76 | //} 77 | //stdDev = sqrt(stdDev / this->nodeCount); 78 | 79 | GetNodeComm(); 80 | for (int i = 0; i < this->nodeCount; i++) { 81 | //if (nodeStress[i] > 1.5) 82 | // //anomaly_deg[i].SetValue(i, abs(nodeStress[i] - avgStress) / stdDev); 83 | // anomaly_deg[i].SetValue(i, avg_node_stress[i]); 84 | //else anomaly_deg[i].SetValue(i, 0); 85 | //anomaly_deg[i].SetValue(i, larg_comm[i] * 1.0 / deg[i]); 86 | anomaly_deg[i].SetValue(i, LargNeiComm(i)); 87 | } 88 | sort(anomaly_deg, anomaly_deg + nodeCount); 89 | 90 | //double totalStress = 0; 91 | //for (int i = 0; i < nodeCount; i++) { 92 | // totalStress += anomaly_deg[i].d; 93 | //} 94 | //avgStress = totalStress / this->nodeCount; 95 | //stdDev = 0; 96 | //for (int i = 0; i < this->nodeCount; i++) { 97 | // stdDev += (anomaly_deg[i].d - avgStress) * (anomaly_deg[i].d - avgStress); 98 | //} 99 | //stdDev = sqrt(stdDev / this->nodeCount); 100 | //cout << "AVG. " << avgStress << endl; 101 | //cout << "STD DEV " << stdDev << endl; 102 | } 103 | 104 | double AnomalyDetection::getNodeStress(int k) { 105 | double ns = 0; 106 | int left = this->getEdgeIndex(k, 0); 107 | int right = this->getEdgeIndex(k, this->nodeCount - 1); 108 | int s, t; 109 | double len; 110 | map nb_comm; 111 | for (int i = left; i < right; i++) { 112 | s = this->edges->getEdge(i).getStart(); 113 | t = this->edges->getEdge(i).getEnd(); 114 | len = vectors[s].getLengthOfMinus(vectors[t].getVectorNextP()); 115 | //if (len * len > 0.1) 116 | stress_dis[(int)floor(len * len * 100)]++; 117 | ns += len * len; 118 | int comm = vectors[t].IndexOfMaxW(); 119 | if (nb_comm.find(comm) == nb_comm.end()) nb_comm.insert(pair(comm, 1)); 120 | else nb_comm.find(comm)->second++; 121 | } 122 | this->nodeStress[k] = ns; 123 | this->avg_node_stress[k] = ns / (right - left); 124 | this->link_comm[k] = nb_comm.size(); 125 | this->deg[k] = right - left; 126 | int max_comm = 0; 127 | for (map::iterator iter = nb_comm.begin(); iter != nb_comm.end(); ++iter) { 128 | if (iter->second > max_comm) max_comm = iter->second; 129 | } 130 | this->larg_comm[k] = max_comm; 131 | //return ns; 132 | return ns / (right - left); 133 | } 134 | 135 | void AnomalyDetection::PrintDetailNodeStress(const int k, ofstream & ofs) { 136 | ofs << k << ": "; 137 | double ns = 0; 138 | int left = this->getEdgeIndex(k, 0); 139 | int right = this->getEdgeIndex(k, this->nodeCount - 1); 140 | int s, t; 141 | double len; 142 | map nb_comm; 143 | for (int i = left; i < right; i++) { 144 | s = this->edges->getEdge(i).getStart(); 145 | t = this->edges->getEdge(i).getEnd(); 146 | len = vectors[s].getLengthOfMinus(vectors[t].getVectorNextP()); 147 | //ofs << vectors[t].IndexOfMaxW() << " "; 148 | int index = vectors[t].IndexOfMaxW(); 149 | map::iterator iter = nb_comm.find(index); 150 | if (iter == nb_comm.end()) nb_comm.insert(pair(index, 1)); 151 | else iter->second++; 152 | //if (len * len > 0.1) { 153 | //ns += len * len; 154 | //ofs << "<" << t << "," << len * len << "> "; 155 | //} 156 | } 157 | 158 | //ofs << "avg=" << ns / (right - left) << " total=" << ns << endl; 159 | ofs << "total: " << nb_comm.size() << "\t"; 160 | for (map::iterator iter = nb_comm.begin(); iter != nb_comm.end(); ++iter) { 161 | ofs << "<" << iter->first << ", " << iter->second << "> "; 162 | } 163 | ofs << endl; 164 | min_total = min(ns, min_total); 165 | min_avg = min(ns / (right - left), min_avg); 166 | max_total = max(ns, max_total); 167 | max_avg = max(ns / (right - left), max_avg); 168 | } 169 | 170 | int AnomalyDetection::getEdgeIndex(int s, int t) const { 171 | Edge e(-1, -1); 172 | e.setPoint(s, t); 173 | if (e == this->edges->getEdge(this->edgeCount - 1)) { 174 | return this->edgeCount; 175 | } 176 | 177 | int left = 0; 178 | int right = this->edgeCount - 1; 179 | while (left < right) { 180 | int mid = (left + right) / 2; 181 | if (e == this->edges->getEdge(mid)) { 182 | return mid; 183 | } 184 | if (e < this->edges->getEdge(mid)) { 185 | right = mid; 186 | } 187 | else { 188 | left = mid + 1; 189 | } 190 | } 191 | return left; 192 | } 193 | 194 | void AnomalyDetection::GetPreRecCurve(const string filename) { 195 | fstream anomaly(filename + "-anomaly.txt"); 196 | if (!anomaly) { 197 | cout << "can not open anomaly file: " + filename + "-anomaly.txt" << endl; 198 | for (int i = 0; i <= 10; i++) { 199 | cout << i << " " << anomaly_deg[i * 1000].d << endl; 200 | } 201 | return; 202 | } 203 | int anomaly_cnt = 0; 204 | set anml; 205 | string line; 206 | while (anomaly) { 207 | getline(anomaly, line); 208 | if (line.length() == 0) 209 | break; 210 | anomaly_cnt++; 211 | anml.insert(stoi(line)); 212 | } 213 | anomaly.close(); 214 | 215 | int found = 0; 216 | //double rec = 0.05; 217 | int rec = anml.size() / 20; 218 | double f1 = 0; 219 | for (int i = 0; i < nodeCount; i++) { 220 | if (anml.find(anomaly_deg[i].i) != anml.end()) { 221 | found++; 222 | //if (found >= anomaly_cnt * rec) { 223 | if (found >= rec) { 224 | double recall = rec * 1.0 / anomaly_cnt; 225 | double prec = found * 1.0 / (i + 1); 226 | cout << recall << " " << prec << " " 227 | << (anomaly_deg[i].d - avgStress) / stdDev << endl; 228 | rec += anml.size() / 20; 229 | f1 = max(f1, 2 * recall * prec / (recall + prec)); 230 | } 231 | } 232 | } 233 | cout << "F1 Score: " << f1 << endl; 234 | } 235 | 236 | void AnomalyDetection::TopAnomalies(const string filename, const double theta) { 237 | fstream anomaly(filename + "-anomaly.txt"); 238 | if (!anomaly) { 239 | cout << "can not open anomaly file: anomaly.txt" << endl; 240 | } 241 | int anomaly_cnt = 0; 242 | set anml; 243 | string line; 244 | while (anomaly) { 245 | getline(anomaly, line); 246 | if (line.length() == 0) 247 | break; 248 | anomaly_cnt++; 249 | anml.insert(stoi(line)); 250 | } 251 | anomaly.close(); 252 | 253 | ofstream stress_ofs(filename + "-stress.txt"); 254 | //stress_ofs << "hub nodes" << endl; 255 | //min_total = 10000; min_avg = 10000; 256 | //max_total = 0; max_avg = 0; 257 | //for (set::iterator iter = anml.begin(); iter != anml.end(); ++iter) { 258 | // if (*iter < 100) { 259 | // //PrintDetailNodeStress(*iter, stress_ofs); 260 | // PrintLargNeiComm(*iter, stress_ofs); 261 | // } 262 | //} 263 | ////stress_ofs << "min_total=" << min_total << " min_avg=" << min_avg << endl; 264 | ////stress_ofs << "max_total=" << max_total << " max_avg=" << max_avg << endl; 265 | //stress_ofs << "outlies" << endl; 266 | //min_total = 10000; min_avg = 10000; 267 | //max_total = 0; max_avg = 0; 268 | //for (set::iterator iter = anml.begin(); iter != anml.end(); ++iter) { 269 | // if (*iter > 100) { 270 | // //PrintDetailNodeStress(*iter, stress_ofs); 271 | // PrintLargNeiComm(*iter, stress_ofs); 272 | // } 273 | //} 274 | //stress_ofs << "min_total=" << min_total << " min_avg=" << min_avg << endl; 275 | //stress_ofs << "max_total=" << max_total << " max_avg=" << max_avg << endl; 276 | int k = 0; 277 | stress_ofs << "top 5000" << endl; 278 | min_total = 10000; min_avg = 10000; 279 | max_total = 0; max_avg = 0; 280 | for (int i = 0; i < nodeCount; i++) { 281 | if (anml.find(anomaly_deg[i].i) != anml.end()) { 282 | stress_ofs << "yes "; 283 | } 284 | k++; 285 | //PrintDetailNodeStress(anomaly_deg[i].i, stress_ofs); 286 | stress_ofs << anomaly_deg[i].d << " " << deg[anomaly_deg[i].i] << "; "; 287 | PrintLargNeiComm(anomaly_deg[i].i, stress_ofs); 288 | if (anomaly_deg[i].d < theta) break; 289 | 290 | } 291 | //stress_ofs << "min_total=" << min_total << " min_avg=" << min_avg << endl; 292 | //stress_ofs << "max_total=" << max_total << " max_avg=" << max_avg << endl; 293 | stress_ofs.close(); 294 | } 295 | 296 | void AnomalyDetection::TopK(const int k) { 297 | ofstream top_ofs("topk.txt"); 298 | for (int i = 0; i < k; i++) { 299 | top_ofs << anomaly_deg[i].i << " " << anomaly_deg[i].d << " "; // << endl; 300 | PrintLargNeiComm(anomaly_deg[i].i, top_ofs); 301 | } 302 | top_ofs.close(); 303 | } 304 | 305 | void AnomalyDetection::GetNodeComm() { 306 | int * domi_dim = new int[cmntCount]; 307 | double * domi_w = new double[cmntCount]; 308 | ncc = new int[nodeCount]; 309 | node_comm = new int*[nodeCount]; 310 | node_comm_w = new double*[nodeCount]; 311 | for (int i = 0; i < nodeCount; i++) { 312 | ncc[i] = vectors[i].GetDomiDim(domi_dim, domi_w); 313 | node_comm[i] = new int[ncc[i]]; 314 | node_comm_w[i] = new double[ncc[i]]; 315 | for (int j = 0; j < ncc[i]; j++) { 316 | node_comm[i][j] = domi_dim[j]; 317 | node_comm_w[i][j] = domi_w[j]; 318 | } 319 | } 320 | 321 | delete[] domi_dim; 322 | delete[] domi_w; 323 | } 324 | 325 | double AnomalyDetection::LargNeiComm(const int s) const { 326 | int left = this->getEdgeIndex(s, 0); 327 | int right = this->getEdgeIndex(s, this->nodeCount - 1); 328 | //map comm_pow; 329 | double * comm_pow = new double[cmntCount]; 330 | //for (int i = 0; i < cmntCount; i++) { comm_pow[i] = 0; } 331 | memset(comm_pow, 0, cmntCount * sizeof(double)); 332 | int t, tar_comm; double stren; 333 | //map::iterator iter; 334 | for (int i = left; i < right; i++) { 335 | t = edges->getEdge(i).getEnd(); 336 | //stren = 1 - vectors[s].getLengthOfMinus(vectors[t].getVectorNextP()); 337 | stren = 1 - LenOfVD(s, t); 338 | double total_w = 0; 339 | for (int j = 0; j < ncc[t]; j++) { 340 | total_w += node_comm_w[t][j]; 341 | } 342 | for (int j = 0; j < ncc[t]; j++) { 343 | tar_comm = node_comm[t][j]; 344 | comm_pow[tar_comm] += stren * node_comm_w[t][j] / total_w; 345 | //comm_pow[tar_comm] += stren * node_comm_w[t][j]; 346 | //iter = comm_pow.find(tar_comm); 347 | //if (iter == comm_pow.end()) comm_pow.insert(pair(tar_comm, stren * node_comm_w[t][j] / total_w)); 348 | //else iter->second += stren * node_comm_w[t][j] / total_w; 349 | } 350 | } 351 | //double avg_pow = 0; 352 | //for (iter = comm_pow.begin(); iter != comm_pow.end(); ++iter) { avg_pow += iter->second; } 353 | //avg_pow /= comm_pow.size(); 354 | //int valid_comm = 0; 355 | //for (iter = comm_pow.begin(); iter != comm_pow.end(); ++iter) { 356 | // if (iter->second > avg_pow) valid_comm++; 357 | //} 358 | //return valid_comm; 359 | sort(comm_pow, comm_pow + cmntCount); 360 | //double base = comm_pow[cmntCount - 1]; 361 | //double avg_pow = 0; 362 | //int valid = 0; 363 | //for (int i = 0; i < cmntCount; i++) { 364 | // avg_pow += comm_pow[i]; 365 | // valid += (comm_pow[i] > 0) ? 1 : 0; 366 | //} 367 | //avg_pow /= valid; 368 | //double score = 0; 369 | //for (int i = cmntCount - 1; i >= 0; i--) { 370 | // //if (comm_pow[i] >= base / 7) 371 | // if (comm_pow[i] >= avg_pow) 372 | // //score += log(1 + comm_pow[i]) / log(1 + base); 373 | // score += (comm_pow[i]) / base; 374 | //} 375 | double max_comm = comm_pow[cmntCount - 1]; 376 | double base = max_comm * theta; 377 | double score = 0; 378 | for (int i = cmntCount - 1; i >= 0; i--) { 379 | if (comm_pow[i] >= base) score += comm_pow[i] / max_comm; 380 | } 381 | delete[] comm_pow; 382 | return score; 383 | } 384 | 385 | void AnomalyDetection::PrintLargNeiComm(const int s, ofstream & ofs) const { 386 | ofs << s << ": "; 387 | int left = this->getEdgeIndex(s, 0); 388 | int right = this->getEdgeIndex(s, this->nodeCount - 1); 389 | map comm_pow; 390 | int t, tar_comm; double stren; map::iterator iter; 391 | for (int i = left; i < right; i++) { 392 | t = edges->getEdge(i).getEnd(); 393 | //stren = 1 - vectors[s].getLengthOfMinus(vectors[t].getVectorNextP()); 394 | stren = 1 - LenOfVD(s, t); 395 | double total_w = 0; 396 | for (int j = 0; j < ncc[t]; j++) { 397 | total_w += node_comm_w[t][j]; 398 | } 399 | for (int j = 0; j < ncc[t]; j++) { 400 | tar_comm = node_comm[t][j]; 401 | iter = comm_pow.find(tar_comm); 402 | if (iter == comm_pow.end()) comm_pow.insert(pair(tar_comm, stren * node_comm_w[t][j] / total_w)); 403 | else iter->second += stren * node_comm_w[t][j] / total_w; 404 | //if (iter == comm_pow.end()) comm_pow.insert(pair(tar_comm, stren * node_comm_w[t][j])); 405 | //else iter->second += stren * node_comm_w[t][j]; 406 | } 407 | } 408 | double max_pow = 0, avg_pow = 0;; 409 | for (iter = comm_pow.begin(); iter != comm_pow.end(); ++iter) { 410 | if (iter->second > max_pow) max_pow = iter->second; 411 | avg_pow += iter->second; 412 | } 413 | avg_pow /= comm_pow.size(); 414 | double base = max_pow * theta; 415 | ofs << max_pow << " " << base << "; "; 416 | for (iter = comm_pow.begin(); iter != comm_pow.end(); ++iter) { 417 | //if (iter->second >= max_pow / 7) 418 | //if (iter->second >= avg_pow) 419 | if (iter->second >= base) 420 | ofs << "<" << iter->first << ", " << iter->second << ">\t"; 421 | } 422 | ofs << endl; 423 | } 424 | 425 | double AnomalyDetection::LenOfVD(const int s, const int t) const { 426 | double sum = 0; 427 | int i, j; 428 | for (i = 0, j = 0; i < ncc[s] && j < ncc[t];) { 429 | if (node_comm[s][i] == node_comm[t][j]) { 430 | sum += (node_comm_w[s][i] - node_comm_w[t][j]) * (node_comm_w[s][i] - node_comm_w[t][j]); 431 | ++i; 432 | ++j; 433 | } 434 | else if (node_comm[s][i] < node_comm[t][j]) { 435 | sum += (node_comm_w[s][i])* (node_comm_w[s][i]); 436 | ++i; 437 | } 438 | else { 439 | sum += (node_comm_w[t][j])* (node_comm_w[t][j]); 440 | ++j; 441 | } 442 | } 443 | for (; i < ncc[s]; ++i) { 444 | sum += (node_comm_w[s][i])* (node_comm_w[s][i]); 445 | } 446 | for (; j < ncc[t]; ++j) { 447 | sum += (node_comm_w[t][j])* (node_comm_w[t][j]); 448 | } 449 | 450 | return sqrt(sum); 451 | } 452 | 453 | void AnomalyDetection::RemoveEdge(const string source, const string target) { 454 | ifstream in(source); 455 | int n, m, s, t, mm; 456 | in >> n >> m; 457 | IID * edges = new IID[m]; 458 | for (int i = 0; i < m; i++) { 459 | in >> s >> t; 460 | edges[i].SetValue(s, t, LenOfVD(s, t)); 461 | } 462 | in.close(); 463 | sort(edges, edges + m); 464 | double avg = 0; 465 | for (int i = 0; i < m; i++) { avg += edges[i].d; } 466 | avg /= m; 467 | double std_dev = 0; 468 | for (int i = 0; i < m; i++) { std_dev += (edges[i].d - avg) * (edges[i].d - avg); } 469 | std_dev = sqrt(std_dev / m); 470 | cout << "avg & std_dev: " << avg << " " << std_dev << endl; 471 | int start = 0; 472 | //for (int i = 0; i < 100; i++) { 473 | // cout << edges[i].s << " " << edges[i].t << " " << edges[i].d << "\t"; 474 | //} 475 | cout << endl; 476 | while ((edges[start].d - avg) / std_dev > 1.5) { start++; } 477 | cout << "start: " << start << endl; 478 | ofstream out(target); 479 | in >> n >> m; 480 | out << n << " " << m - start << endl; 481 | for (start; start < m; start++) { 482 | out << edges[start].s << " " << edges[start].t << endl; 483 | } 484 | out.close(); 485 | delete[] edges; edges = NULL; 486 | } 487 | 488 | void AnomalyDetection::RemoveNode(const string source, const string target, const double thre) { 489 | //int * mapping = new int[nodeCount]; 490 | //fill(mapping, mapping + nodeCount, 0); 491 | //for (int i = 0; i < nodeCount; i++) { 492 | // if (anomaly_deg[i].d >= thre) mapping[anomaly_deg[i].i] = -1; 493 | //} 494 | //int valid = 0; 495 | //for (int i = 0; i < nodeCount; i++) { 496 | // if (mapping[i] != -1) { 497 | // mapping[i] = valid; 498 | // valid++; 499 | // } 500 | //} 501 | 502 | //ifstream in(source); 503 | //ofstream out(target); 504 | //int n, m, s, t; 505 | //in >> n >> m; 506 | //out << valid << endl; 507 | //cout << "valid node: " << valid << endl; 508 | //for (int i = 0; i < m; i++) { 509 | // in >> s >> t; 510 | // if (mapping[s] >= 0 && mapping[t] >= 0) out << mapping[s] << " " << mapping[t] << endl; 511 | //} 512 | //in.close(); 513 | //out.close(); 514 | //delete[] mapping; mapping = NULL; 515 | 516 | ofstream out(target); 517 | int anomaly = 0, valid = 0; 518 | for (int i = 0; i < nodeCount; i++) { 519 | if (anomaly_deg[i].d >= thre) anomaly++; 520 | else valid++; 521 | } 522 | cout << "valid node: " << valid << endl; 523 | out << anomaly << endl; 524 | for (int i = 0; i < nodeCount; i++) { 525 | if (anomaly_deg[i].d >= thre) out << anomaly_deg[i].i << endl; 526 | } 527 | out.close(); 528 | } 529 | 530 | void AnomalyDetection::GraphRewrite(const string source, const string target, const double thre) { 531 | ifstream source_ifs(source); 532 | int n, m, s, t; 533 | source_ifs >> n >> m; 534 | int * mapping = new int[n]; 535 | fill(mapping, mapping + n, 0); 536 | for (int i = 0; i < n; i++) { 537 | if (anomaly_deg[i].d >= thre) mapping[anomaly_deg[i].i] = -1; 538 | } 539 | int valid = 0; 540 | for (int i = 0; i < n; i++) { 541 | if (mapping[i] == 0) { 542 | mapping[i] = valid; 543 | valid++; 544 | } 545 | } 546 | int validEdge = 0; 547 | for (int i = 0; i < m; i++) { 548 | source_ifs >> s >> t; 549 | if (mapping[s] != -1 && mapping[t] != -1) 550 | validEdge++; 551 | } 552 | source_ifs.close(); 553 | source_ifs.open(source); 554 | source_ifs >> n >> m; 555 | ofstream target_ifs(target); 556 | target_ifs << valid << " " << validEdge << endl; 557 | for (int i = 0; i < m; i++) { 558 | source_ifs >> s >> t; 559 | if (mapping[s] != -1 && mapping[t] != -1) 560 | target_ifs << mapping[s] << " " << mapping[t] << endl; 561 | } 562 | source_ifs.close(); 563 | target_ifs.close(); 564 | 565 | delete[] mapping; mapping = NULL; 566 | } 567 | 568 | double AnomalyDetection::GetF1(const string filename, const double thre) { 569 | fstream anomaly(filename + "-anomaly.txt"); 570 | if (!anomaly) { 571 | cout << "can not open anomaly file: anomaly.txt" << endl; 572 | return -1; 573 | } 574 | int anomaly_cnt = 0; 575 | set anml; 576 | string line; 577 | while (anomaly) { 578 | getline(anomaly, line); 579 | if (line.length() == 0) 580 | break; 581 | anomaly_cnt++; 582 | anml.insert(stoi(line)); 583 | } 584 | anomaly.close(); 585 | 586 | int found = 0, detect = 0; 587 | double f1 = 0; double best_sep = -1; 588 | for (int i = 0; i < nodeCount; i++) { 589 | if (anomaly_deg[i].d >= thre) { 590 | detect++; 591 | if (anml.find(anomaly_deg[i].i) != anml.end()) found++; 592 | } 593 | } 594 | double prec = found * 1.0 / detect; 595 | double recall = found * 1.0 / anomaly_cnt; 596 | //if (prec + recall > 0 && 2 * prec * recall / (prec + recall) > f1) { 597 | // f1 = 2 * prec * recall / (prec + recall); 598 | // best_sep = anomaly_deg[i].d; 599 | //} 600 | //cout << "best sep line: " << best_sep << endl; 601 | f1 = 2 * prec * recall / (prec + recall); 602 | return f1; 603 | //cout << "detect count: " << detect << endl; 604 | //return 2 * prec * recall / (prec + recall); 605 | } --------------------------------------------------------------------------------