├── data └── toy │ ├── GraphName.csv │ ├── NodeName.csv │ └── GraphData.csv ├── output ├── coherent.out_1.png ├── coherent.out_2.png ├── coherent.out_bi.png ├── contrast.out_1.png ├── contrast.out_2.png ├── contrast.out_bi.png ├── coherent.out └── contrast.out ├── run.sh ├── .gitignore ├── Makefile ├── src ├── analyze.cpp ├── utils │ └── utils.h └── data │ ├── coherentCoreMining.h │ ├── contrastingSubgraph.h │ └── graph.h ├── README.md ├── post_processing_src └── visualize.py └── LICENSE /data/toy/GraphName.csv: -------------------------------------------------------------------------------- 1 | 0,Graph 1 2 | 1,Graph 2 -------------------------------------------------------------------------------- /output/coherent.out_1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/shangjingbo1226/ContrastSubgraphMining/HEAD/output/coherent.out_1.png -------------------------------------------------------------------------------- /output/coherent.out_2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/shangjingbo1226/ContrastSubgraphMining/HEAD/output/coherent.out_2.png -------------------------------------------------------------------------------- /output/coherent.out_bi.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/shangjingbo1226/ContrastSubgraphMining/HEAD/output/coherent.out_bi.png -------------------------------------------------------------------------------- /output/contrast.out_1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/shangjingbo1226/ContrastSubgraphMining/HEAD/output/contrast.out_1.png -------------------------------------------------------------------------------- /output/contrast.out_2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/shangjingbo1226/ContrastSubgraphMining/HEAD/output/contrast.out_2.png -------------------------------------------------------------------------------- /output/contrast.out_bi.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/shangjingbo1226/ContrastSubgraphMining/HEAD/output/contrast.out_bi.png -------------------------------------------------------------------------------- /run.sh: -------------------------------------------------------------------------------- 1 | make 2 | 3 | ./bin/analyze.exe toy 0 1 1 1 1 1 1 1 4 | 5 | #without core 6 | #./bin/analyze.exe toy 0 1 0 0 1 1 -1 1 7 | 8 | #with no neighbor restriction 9 | #./bin/analyze.exe toy 0 1 1 0 1 1 -1 1 10 | 11 | python post_processing_src/visualize.py 12 | -------------------------------------------------------------------------------- /data/toy/NodeName.csv: -------------------------------------------------------------------------------- 1 | 0,node_0 2 | 1,node_1 3 | 2,node_2 4 | 3,node_3 5 | 4,node_4 6 | 5,node_5 7 | 6,node_6 8 | 7,node_7 9 | 8,node_8 10 | 9,node_9 11 | 10,node_10 12 | 11,node_11 13 | 12,node_12 14 | 13,node_13 15 | 14,node_14 16 | 15,node_15 17 | 16,node_16 18 | 17,node_17 19 | -------------------------------------------------------------------------------- /output/coherent.out: -------------------------------------------------------------------------------- 1 | Graph 1,Graph 2 2 | 5 3 | seeds:node_1, 4 | node_1,node_2,node_3,node_4,node_5, 5 | 0,2.09861,1,1.69315,2.09861, 6 | 2.09861,0,2.09861,1.69315,0, 7 | 1,2.09861,0,0,2.38629, 8 | 1.69315,1.69315,0,0,1.69315, 9 | 2.09861,0,2.38629,1.69315,0, 10 | 0,2.09861,1,1.69315,2.38629, 11 | 2.09861,0,2.09861,1.69315,0, 12 | 1,2.09861,0,0,2.38629, 13 | 1.69315,1.69315,0,0,1.69315, 14 | 2.38629,0,2.38629,1.69315,0, 15 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Prerequisites 2 | *.d 3 | 4 | # Compiled Object files 5 | *.slo 6 | *.lo 7 | *.o 8 | *.obj 9 | 10 | # Precompiled Headers 11 | *.gch 12 | *.pch 13 | 14 | # Compiled Dynamic libraries 15 | *.so 16 | *.dylib 17 | *.dll 18 | 19 | # Fortran module files 20 | *.mod 21 | *.smod 22 | 23 | # Compiled Static libraries 24 | *.lai 25 | *.la 26 | *.a 27 | *.lib 28 | 29 | # Executables 30 | *.exe 31 | *.out 32 | *.app 33 | -------------------------------------------------------------------------------- /Makefile: -------------------------------------------------------------------------------- 1 | CXX = g++ 2 | CFLAGS = -std=c++11 -Wall -O3 -msse2 -fopenmp -I.. 3 | 4 | BIN = ./bin/analyze 5 | .PHONY: clean all 6 | 7 | all: ./bin $(BIN) 8 | 9 | ./bin/analyze: ./src/analyze.cpp ./src/utils/*.h ./src/data/*.h 10 | 11 | ./bin: 12 | mkdir -p bin 13 | 14 | LDFLAGS= -pthread -lm -Wno-unused-result -Wno-sign-compare -Wno-unused-variable -Wno-parentheses -Wno-format 15 | $(BIN) : 16 | $(CXX) $(CFLAGS) $(LDFLAGS) -o $@ $(filter %.cpp %.o %.c, $^) 17 | $(OBJ) : 18 | $(CXX) -c $(CFLAGS) -o $@ $(firstword $(filter %.cpp %.c, $^) ) 19 | 20 | clean : 21 | rm -rf bin 22 | -------------------------------------------------------------------------------- /data/toy/GraphData.csv: -------------------------------------------------------------------------------- 1 | 0,1,2,3 2 | 0,1,3,1 3 | 0,1,4,2 4 | 0,1,5,3 5 | 0,2,3,3 6 | 0,2,4,2 7 | 0,3,5,4 8 | 0,4,5,2 9 | 0,0,1,2 10 | 0,2,6,3 11 | 0,4,8,2 12 | 0,4,9,1 13 | 0,6,8,2 14 | 0,6,9,3 15 | 0,8,9,1 16 | 0,10,11,10 17 | 0,10,12,9 18 | 0,10,13,9 19 | 0,11,12,16 20 | 0,11,13,4 21 | 0,11,14,8 22 | 0,11,15,6 23 | 0,11,16,9 24 | 0,12,13,3 25 | 0,14,15,9 26 | 0,14,16,10 27 | 0,14,17,8 28 | 0,15,16,6 29 | 0,15,17,10 30 | 0,16,17,11 31 | 1,1,2,3 32 | 1,1,3,1 33 | 1,1,4,2 34 | 1,1,5,4 35 | 1,2,3,3 36 | 1,2,4,2 37 | 1,3,5,4 38 | 1,4,5,2 39 | 1,3,6,4 40 | 1,3,7,3 41 | 1,5,8,4 42 | 1,5,9,6 43 | 1,6,7,2 44 | 1,7,8,1 45 | 1,7,9,3 46 | -------------------------------------------------------------------------------- /output/contrast.out: -------------------------------------------------------------------------------- 1 | Graph 1,Graph 2 2 | 9 3 | seeds:node_1,node_2,node_3,node_4,node_5, 4 | node_1,node_2,node_3,node_4,node_5,node_6,node_7,node_8,node_9, 5 | 0,2.09861,1,1.69315,2.09861,0,0,0,0, 6 | 2.09861,0,2.09861,1.69315,0,2.09861,0,0,0, 7 | 1,2.09861,0,0,2.38629,0,0,0,0, 8 | 1.69315,1.69315,0,0,1.69315,0,0,1.69315,1, 9 | 2.09861,0,2.38629,1.69315,0,0,0,0,0, 10 | 0,2.09861,0,0,0,0,0,1.69315,2.09861, 11 | 0,0,0,0,0,0,0,0,0, 12 | 0,0,0,1.69315,0,1.69315,0,0,1, 13 | 0,0,0,1,0,2.09861,0,1,0, 14 | 0,2.09861,1,1.69315,2.38629,0,0,0,0, 15 | 2.09861,0,2.09861,1.69315,0,0,0,0,0, 16 | 1,2.09861,0,0,2.38629,2.38629,2.09861,0,0, 17 | 1.69315,1.69315,0,0,1.69315,0,0,0,0, 18 | 2.38629,0,2.38629,1.69315,0,0,0,2.38629,2.79176, 19 | 0,0,2.38629,0,0,0,1.69315,0,0, 20 | 0,0,2.09861,0,0,1.69315,0,1,2.09861, 21 | 0,0,0,0,2.38629,0,1,0,0, 22 | 0,0,0,0,2.79176,0,2.09861,0,0, 23 | -------------------------------------------------------------------------------- /src/analyze.cpp: -------------------------------------------------------------------------------- 1 | #include "data/contrastingSubgraph.h" 2 | #include "data/coherentCoreMining.h" 3 | const int iter =200; 4 | 5 | bool loadArgs(int argc, char * argv[], int & graph_num_A, int & graph_num_B, bool & core, bool & grow, set & seeds, int & step_core, int & step_contrast){ 6 | if ( argc != 10 && argc != 9 ) { 7 | cerr << "Please give command as following " << endl; 8 | cerr << "./bin/analyze data_folder_name(under data folder) Graph_A_num(int) Graph_B_num(int) use_core(bool) use_neighbor(bool) use_log(bool) step_core(int) step_contrast(int) seed(list of int)" << endl; 9 | cerr << "e.g. ./analyze Test 0 1 1 1 1 5 5 1,2,3,4,5,6" << endl; 10 | cerr << "For general contrast without seeds " << endl; 11 | cerr << "e.g. ./analyze Test 0 1 1 0 1 0 0" << endl; 12 | return false; 13 | } 14 | 15 | DIR_PREFIX = argv[1]; 16 | GRAPH_DIR = "data/" + DIR_PREFIX + "/"; 17 | 18 | graph_num_A = atoi(argv[2]), graph_num_B = atoi(argv[3]); 19 | core = (bool)atoi(argv[4]); 20 | grow = (bool)atoi(argv[5]); 21 | LOG_EDGE = (bool)atoi(argv[6]); 22 | step_core = atoi(argv[7]); 23 | step_contrast = atoi(argv[8]); 24 | cerr << "=== Graph Information ===" << endl; 25 | loadGraphs(); 26 | int i = 0; 27 | if ( argc == 10 ){ 28 | string s = argv[9]; 29 | while ( i < s.length() ) { 30 | int c = 0; 31 | while ( s[i] != ','&& i < s.length() ){ 32 | c *= 10; 33 | c += s[i] - '0'; 34 | i ++; 35 | } 36 | seeds.insert(c); 37 | i++; 38 | } 39 | } 40 | return true; 41 | } 42 | 43 | int main(int argc, char* argv[]) { 44 | 45 | int graph_num_A, graph_num_B, step_core, step_contrast; 46 | bool core, grow; 47 | set seeds; 48 | if ( ! loadArgs(argc, argv, graph_num_A, graph_num_B, core, grow, seeds, step_core, step_contrast) ) 49 | return 0; 50 | 51 | cerr << "=== Settings ===" << endl; 52 | cerr << " Contrsting graph " << graph_names[graph_num_A] << " and " << graph_names[graph_num_B] << endl; 53 | if ( core ) cerr << " Coherent core is ON" << endl; 54 | else cerr << " Coherent core is OFF" << endl; 55 | if ( grow ) cerr << " Neighbor constraint ON, " << "with core neighbor r = " << step_core << " and contrast neighbor r = " << step_contrast << endl; 56 | else cerr << " Neighbor constraint OFF" << endl; 57 | if ( seeds.size() ) { 58 | cerr << " Seeds are given:" << endl; 59 | cerr << " "; 60 | for ( auto m = seeds.begin() ; m != seeds.end() ; ++m ) if ( m != seeds.begin() ) cerr << ',' << node_id2label[*m]; else cerr << node_id2label[*m]; 61 | cerr << endl; 62 | } 63 | 64 | if ( ! core ){ 65 | cerr << "=== Contrast Subgraph ===" << endl; 66 | find_contrast_graph_with_seeds(graph_num_A, graph_num_B, seeds, step_contrast); 67 | } 68 | else { 69 | cerr << "=== Coherent Core ===" << endl; 70 | set core = find_core_graph_with_seeds(graph_num_A, graph_num_B, seeds, step_core); 71 | if ( grow ) { 72 | cerr << "=== Contrast Subgraph ===" << endl; 73 | find_contrast_graph_with_seeds(graph_num_A, graph_num_B, core, step_contrast); 74 | } 75 | else { 76 | cerr << "=== Contrast Subgraph ===" << endl; 77 | find_contrast_graph_with_seeds(graph_num_A, graph_num_B, core, -1); 78 | } 79 | } 80 | cerr << "=== Done ===" << endl; 81 | } 82 | 83 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Contrast Subgraph Mining from Coherent Cores 2 | 3 | ## Publication 4 | 5 | Jingbo Shang, Xiyao Shi, Meng Jiang, Liyuan Liu, Timothy Hanratty, Jiawei Han, "[Contrast Subgraph Mining from Coherent Cores](https://arxiv.org/abs/1802.06189)", submitted to SIGKDD 18, under review. arXiv:1802.06189 [cs.SI] 6 | 7 | ## Requirements 8 | 9 | Linux or MacOS with g++ and Python 2.7 installed. 10 | 11 | Ubuntu: 12 | 13 | * g++ 4.8 `$ sudo apt-get install g++-4.8` 14 | * Python 2.7 `$ sudo apt-get install python2.7` 15 | 16 | MacOS: 17 | 18 | * g++ 6 `$ brew install gcc6` 19 | * Python 2.7 `$ brew install python` 20 | 21 | Python 2.7 package requirements : 22 | 23 | * PIL 24 | * glob 25 | * numpy 26 | * matplotlib 27 | * seaborn 28 | * pandas 29 | 30 | ## Run Command 31 | 32 | Each run will generate a file containing contrast subgraph information and potentially a file containing core subgraph information if `` is enabled. And there is also a post processing toolkit provided to generate heatmap of both contrast subgraph and coherent core subgraph. 33 | 34 | #### Run Toy Example 35 | `$ ./run.sh` 36 | 37 | The default run will run the experiment on a toy example we created. The graph contains `18` nodes and `45` edges in total (i.e. in both layers). We are using node `1` as seed. 38 | 39 | #### Custom Run 40 | ``` 41 | $ ./bin/analyze 42 | $ python post_processing_src/visualize.py 43 | ``` 44 | 45 | Parameter explanation: 46 | 47 | ``` 48 | : string, data folder as described below 49 | : int, index of the first graph/layer 50 | : int, index of the second graph/layer 51 | : bool, 1 to enable coherent core 52 | : bool, 1 to enable neighbor constraint on contrast subgraph 53 | : bool, 1 to enable log edge weight 54 | : int, neighbor step length for coherent core 55 | : int, neighbor step length for contrast subgraph 56 | : optional, comma separated integers, list of node indices as seeds 57 | ``` 58 | 59 | Upon using, a folder containing graph data should be placed in folder `data`. The graph data consists of three parts: 60 | 61 | * `GraphName.csv` comma separated `int`, `string` pair, corresponding to `graph index, graph name`. 62 | 63 | Example file : 64 | ``` 65 | $ head data/toy/GraphName.csv 66 | 0,Graph 1 67 | 1,Graph 2 68 | ``` 69 | 70 | * `NodeName.csv` comma separated `int`, `string` pair, corresponding to `node index, node name`. 71 | 72 | Example file : 73 | ``` 74 | $ head data/toy/NodeName.csv 75 | 0,node_0 76 | 1,node_1 77 | 2,node_2 78 | 3,node_3 79 | 4,node_4 80 | 5,node_5 81 | 6,node_6 82 | 7,node_7 83 | 8,node_8 84 | 9,node_9 85 | ``` 86 | 87 | * `GraphData.csv` comma separated `list` of `int`, correspongding to graph `index, node u, node v, weight(u,v)`. 88 | 89 | Example file : 90 | ``` 91 | $ head data/toy/NodeName.csv 92 | 0,1,2,3 93 | 0,1,3,1 94 | 0,1,4,2 95 | 0,1,5,3 96 | 0,2,3,3 97 | 0,2,4,2 98 | 0,3,5,4 99 | 0,4,5,2 100 | 0,0,1,2 101 | 0,2,6,3 102 | ``` 103 | 104 | ## Result 105 | 106 | After each run of scipt `run.sh`, one can find corresponding raw data and heatmap for contrast subgraph and potentially coherent core in `output` folder. There will be three diagrams for each core/contrast subgraph, one for first graph, one for second graph and one for two diagrams combined in one. 107 | 108 | Find the core and contrast subgraph heatmaps for our toy example below : 109 | ![alt text][toy_core] 110 | 111 | ![alt text][toy_contrast] 112 | 113 | [toy_core]: https://github.com/shangjingbo1226/ContrastSubgraphMining/blob/master/output/coherent.out_bi.png "Heatmap for coherent core in toy data" 114 | 115 | [toy_contrast]: https://github.com/shangjingbo1226/ContrastSubgraphMining/blob/master/output/contrast.out_bi.png "Heatmap for contrast subgraph in toy data" 116 | -------------------------------------------------------------------------------- /src/utils/utils.h: -------------------------------------------------------------------------------- 1 | #ifndef __UTILS_H__ 2 | #define __UTILS_H__ 3 | 4 | #include "omp.h" 5 | 6 | #include 7 | #include 8 | #include 9 | #include 10 | #include 11 | #include 12 | #include 13 | #include 14 | #include 15 | #include 16 | #include 17 | #include 18 | #include 19 | #include 20 | #include 21 | #include 22 | #include 23 | #include 24 | #include 25 | #include 26 | using namespace std; 27 | 28 | #define FOR(i,a) for (__typeof((a).begin()) i = (a).begin(); i != (a).end(); ++ i) 29 | 30 | #define PATTERN_CHUNK_SIZE 10 31 | #define SENTENCE_CHUNK_SIZE 5 32 | #define POINT_CHUNK_SIZE 5 33 | 34 | const int SUFFIX_MASK = (1 << 20) - 1; // should be 2^k - 1 35 | mutex separateMutex[SUFFIX_MASK + 1]; 36 | 37 | const double EPS = 1e-8; 38 | 39 | /*! \brief return a real numer uniform in (0,1) */ 40 | inline double next_double2(){ 41 | return (static_cast( rand() ) + 1.0 ) / (static_cast(RAND_MAX) + 2.0); 42 | } 43 | 44 | //padding a sentence to length l 45 | string padding_sentence(string a, size_t l){ 46 | int pad = l - a.length(); 47 | for ( int i = 0 ; i < pad ; i ++ ) { 48 | a = a + " " ; 49 | } 50 | return a; 51 | } 52 | 53 | /*! \brief return x~N(0,1) */ 54 | inline double sample_normal(){ 55 | double x,y,s; 56 | do{ 57 | x = 2 * next_double2() - 1.0; 58 | y = 2 * next_double2() - 1.0; 59 | s = x*x + y*y; 60 | }while( s >= 1.0 || s == 0.0 ); 61 | 62 | return x * sqrt( -2.0 * log(s) / s ) ; 63 | } 64 | 65 | inline bool myAssert(bool flg, string msg) 66 | { 67 | if (!flg) { 68 | cerr << msg << endl; 69 | // exit(-1); 70 | } 71 | return flg; 72 | } 73 | 74 | inline int sign(double x) 75 | { 76 | return x < -EPS ? -1 : x > EPS; 77 | } 78 | 79 | inline string replaceAll(const string &s, const string &from, const string &to) 80 | { 81 | string ret = ""; 82 | for (size_t i = 0; i < s.size(); ++ i) { 83 | bool found = true; 84 | for (size_t offset = 0; offset < from.size() && found; ++ offset) { 85 | found &= i + offset < s.size() && s[i + offset] == from[offset]; 86 | } 87 | if (found) { 88 | ret += to; 89 | i += from.size() - 1; 90 | } else { 91 | ret += s[i]; 92 | } 93 | } 94 | return ret; 95 | } 96 | 97 | inline double sqr(double x) 98 | { 99 | return x * x; 100 | } 101 | 102 | template 103 | inline void fromString(const string &s, T &x) 104 | { 105 | stringstream in(s); 106 | in >> x; 107 | } 108 | 109 | inline string tolower(const string &a) 110 | { 111 | string ret = a; 112 | for (size_t i = 0; i < ret.size(); ++ i) { 113 | ret[i] = tolower(ret[i]); 114 | } 115 | return ret; 116 | } 117 | 118 | const int MAX_LENGTH = 100000000; 119 | 120 | char line[MAX_LENGTH + 1]; 121 | 122 | inline bool getLine(FILE* in) 123 | { 124 | bool hasNext = fgets(line, MAX_LENGTH, in); 125 | int length = strlen(line); 126 | while (length > 0 && (line[length - 1] == '\n' || line[length - 1] == '\r')) { 127 | -- length; 128 | } 129 | line[length] = 0; 130 | return hasNext; 131 | } 132 | 133 | inline FILE* tryOpen(const string &filename, const string ¶m) 134 | { 135 | FILE* ret = fopen(filename.c_str(), param.c_str()); 136 | if (ret == NULL) { 137 | cerr << "[Warning] failed to open " << filename << " under parameters = " << param << endl; 138 | cerr << "[Warning] trying to open ../" << filename << " instead" << endl; 139 | string temp = "../" + filename; 140 | ret = fopen(temp.c_str(), param.c_str()); 141 | if ( ret == NULL ) { 142 | cerr << "[Warning] alternative address failed" << endl; 143 | } 144 | } 145 | return ret; 146 | } 147 | 148 | inline vector splitBy(const string &line, char sep) 149 | { 150 | vector tokens; 151 | string token = ""; 152 | for (size_t i = 0; i < line.size(); ++ i) { 153 | if (line[i] == sep) { 154 | if (token != "") { 155 | tokens.push_back(token); 156 | } 157 | token = ""; 158 | } else { 159 | token += line[i]; 160 | } 161 | } 162 | if (token != "") { 163 | tokens.push_back(token); 164 | } 165 | return tokens; 166 | } 167 | 168 | inline string strip(string s) 169 | { 170 | while (s.size() > 0 && iswspace(s.back())) { 171 | s.pop_back(); 172 | } 173 | int ptr = 0; 174 | while (ptr < s.size() && iswspace(s[ptr])) { 175 | ++ ptr; 176 | } 177 | s = s.substr(ptr, s.size() - ptr); 178 | return s; 179 | } 180 | 181 | namespace Binary 182 | { 183 | template 184 | inline void write(FILE* out, const T& x) { 185 | fwrite(&x, sizeof(x), 1, out); 186 | } 187 | 188 | template 189 | inline void read(FILE* in, T &size) { 190 | fread(&size, sizeof(size), 1, in); 191 | } 192 | 193 | inline void write(FILE* out, const string &s) { 194 | write(out, s.size()); 195 | if (s.size() > 0) { 196 | fwrite(&s[0], sizeof(char), s.size(), out); 197 | } 198 | } 199 | 200 | inline void read(FILE* in, string &s) { 201 | size_t size; 202 | read(in, size); 203 | s.resize(size); 204 | if (size > 0) { 205 | fread(&s[0], sizeof(char), size, in); 206 | } 207 | } 208 | } 209 | 210 | #endif 211 | -------------------------------------------------------------------------------- /src/data/coherentCoreMining.h: -------------------------------------------------------------------------------- 1 | #ifndef __COHERENTCOREMINING__ 2 | #define __COHERENTCOREMINING__ 3 | 4 | #include "graph.h" 5 | 6 | using namespace std; 7 | 8 | void core_init(){ 9 | sharingEdges.clear(); 10 | sharingEdges.resize(node_id2label.size()); 11 | for ( auto & m : sharingEdges){ 12 | m.clear(); m.resize(0); 13 | } 14 | sharingNodeWeights.clear(); 15 | sharingNodeWeights.resize(node_id2label.size()); 16 | sharingNodeDegrees.clear(); 17 | sharingNodeDegrees.resize(node_id2label.size()); 18 | for ( auto & m : sharingNodeWeights ) m = 0; 19 | for ( auto & m : sharingNodeDegrees ) m = 0; 20 | } 21 | 22 | //A B are the graphs we are interested in 23 | //seeds are the nodes that we want to include in our solution 24 | set find_core_graph_with_seeds(int graph_A, int graph_B, set seed, int max_d){ 25 | //clear all global variables 26 | core_init(); 27 | 28 | using namespace denseSubgraph; 29 | init(); 30 | //The two list represented graphs A and B 31 | auto & A = graph_edges[graph_A]; 32 | auto & B = graph_edges[graph_B]; 33 | 34 | 35 | set connect; 36 | if ( max_d == -1 || !seed.size() ) { 37 | for ( int i = 0 ; i < node_id2label.size() ; i ++ ) { 38 | connect.insert(i); 39 | } 40 | } 41 | else { 42 | vector visited; 43 | visited.resize(node_id2label.size()); 44 | set c1,c2; 45 | for ( auto & m : seed ) { 46 | for ( auto & n : visited ) n = 0; 47 | search(c1, A, visited, m, max_d); 48 | for ( auto & n : visited ) n = 0; 49 | search(c2, B, visited, m, max_d); 50 | } 51 | for ( auto & m : c1 ) connect.insert(m); 52 | for ( auto & m : c2 ) connect.insert(m); 53 | for ( auto & m : seed ) connect.insert(m); 54 | } 55 | 56 | edge_count = 0; 57 | node_count = 0; 58 | double epsContrast = 1e100, epsPenalty = 1e100; 59 | for ( int i = 0 ; i < node_id2label.size() ; i ++ ) { 60 | if ( !mfind(connect, i) ) continue; 61 | int ptr1, ptr2; 62 | ptr1 = ptr2 = 0; 63 | sort(A[i].begin(), A[i].end()); 64 | sort(B[i].begin(), B[i].end()); 65 | while ( ptr1 < A[i].size() && ptr2 < B[i].size() ){ 66 | if ( A[i][ptr1].first < B[i][ptr2].first ) { 67 | ptr1 ++; 68 | } 69 | else if ( A[i][ptr1].first == B[i][ptr2].first){ 70 | if ( mfind(connect, A[i][ptr1].fi) ) { 71 | epsContrast = min(epsContrast, min(A[i][ptr1].se , B[i][ptr2].se)); 72 | sharingEdges[i].pb(mp(A[i][ptr1].fi, min(A[i][ptr1].se , B[i][ptr2].se))); 73 | } 74 | ptr1 ++; ptr2 ++; 75 | } 76 | else { 77 | ptr2 ++; 78 | } 79 | } 80 | double cnt = 0; 81 | for ( auto & m : A[i] ) 82 | if ( mfind(connect, m.fi) ) 83 | cnt += m.se; 84 | for ( auto & m : B[i] ) 85 | if ( mfind(connect, m.fi) ) 86 | cnt += m.se; 87 | cnt = abs(cnt); 88 | sharingNodeWeights[i] = cnt; 89 | cnt = 0.0; 90 | for ( auto & m : sharingEdges[i] ) { 91 | cnt += m.se; 92 | } 93 | sharingNodeDegrees[i] = cnt; 94 | } 95 | for ( int i = 0 ; i < sharingEdges.size() ; i ++ ){ 96 | for ( auto & m : sharingEdges[i]){ 97 | edge_count += m.se; 98 | } 99 | } 100 | 101 | s = 0; t = node_id2label.size() + 1; 102 | for ( int i = 0 ; i < node_id2label.size() ; i ++ ) { 103 | dup[i + 1] = pow(sharingNodeWeights[i], NORM_CONST); 104 | node_count += dup[i + 1]; 105 | epsPenalty = min(epsPenalty, dup[i + 1]); 106 | du[i + 1] = sharingNodeDegrees[i]; 107 | } 108 | 109 | l = epsContrast / node_count ; r = edge_count / epsPenalty ; delta = epsContrast / node_count /node_count; 110 | while ( r - l > delta ) { 111 | mid = (l + r ) /2; 112 | solve(mid, seed, sharingEdges); 113 | // dfs(s); 114 | // int cnt_node = 0; 115 | // for ( int i = 1 ; i <= node_count ; i ++ ){ 116 | // cnt_node += v[i]; 117 | // } 118 | if (( (double)edge_count * node_count - maxflow) > EPSILON_1 /*&& cnt_node > seed.size()*/) l = mid; else r = mid; 119 | 120 | } 121 | cerr << " Final coherent score " << l << endl; 122 | solve(l, seed, sharingEdges); 123 | dfs(s); 124 | 125 | set sharingSubgraph; 126 | int ret_cnt = 0; 127 | for ( int i = 1 ; i <= node_count ; i ++ ) { 128 | if ( v[i] ) { ret_cnt += 1; sharingSubgraph.insert(i - 1); } 129 | } 130 | cerr << " Found " << ret_cnt << " nodes" << endl; 131 | stringstream filebuf; 132 | filebuf << "output/coherent.out"; 133 | FILE * matrixOut = tryOpen(filebuf.str(), "w"); 134 | fprintf(matrixOut, (graph_names[graph_A] + "," + graph_names[graph_B] + "\n").c_str()); 135 | fprintf(matrixOut, "%lld\n", sharingSubgraph.size()); 136 | fprintf(matrixOut, "seeds:"); 137 | for ( auto & i : seed ) { 138 | // fprintf(matrixOut, "%d,", i); 139 | fprintf(matrixOut, "%s,", (node_id2label[i]).c_str()); 140 | } 141 | fprintf(matrixOut, "\n"); 142 | for ( auto & i : sharingSubgraph ) { 143 | // fprintf(matrixOut, "%d,", i); 144 | fprintf(matrixOut, "%s,", (node_id2label[i]).c_str()); 145 | } 146 | fprintf(matrixOut, "\n"); 147 | printMatrix(A, sharingSubgraph, matrixOut); 148 | printMatrix(B, sharingSubgraph, matrixOut); 149 | return sharingSubgraph; 150 | } 151 | 152 | #endif 153 | 154 | -------------------------------------------------------------------------------- /src/data/contrastingSubgraph.h: -------------------------------------------------------------------------------- 1 | #ifndef __CONTRASTING_SUBGRAPH__ 2 | #define __CONTRASTING_SUBGRAPH__ 3 | 4 | #include "graph.h" 5 | 6 | using namespace std; 7 | 8 | void contrast_init(){ 9 | contrast_graph_edges.clear(); 10 | contrast_graph_edges.resize(node_id2label.size()); 11 | for ( auto & m : contrast_graph_edges){ 12 | m.clear(); m.resize(0); 13 | } 14 | contrast_graph_node_weights.clear(); 15 | contrast_graph_node_weights.resize(node_id2label.size()); 16 | contrast_graph_node_degrees.clear(); 17 | contrast_graph_node_degrees.resize(node_id2label.size()); 18 | for ( auto & m : contrast_graph_node_weights ) m = 0; 19 | for ( auto & m : contrast_graph_node_degrees ) m = 0; 20 | } 21 | 22 | //A B are the graphs we are interested in 23 | //seeds are the nodes that we want to include in our solution 24 | set find_contrast_graph_with_seeds(int graph_A, int graph_B, set seed, int max_d){ 25 | //clear all global variables 26 | denseSubgraph::init(); 27 | contrast_init(); 28 | 29 | auto & A = graph_edges[graph_A]; 30 | auto & B = graph_edges[graph_B]; 31 | 32 | using namespace denseSubgraph; 33 | //edge count m node count n 34 | edge_count = 0; 35 | node_count = 0; 36 | double epsContrast = 1e100, epsPenalty = 1e100; 37 | set connect; 38 | connect.clear(); 39 | vector visited; 40 | visited.resize(node_id2label.size()); 41 | set c1, c2; 42 | if ( max_d != -1 && seed.size() ) { 43 | for ( auto & m : seed ) { 44 | for ( auto & m : visited ) m = 0; 45 | search(c1, A, visited, m, max_d); 46 | for ( auto & m : visited ) m = 0; 47 | search(c2, B, visited, m, max_d); 48 | } 49 | } 50 | else { 51 | for ( int i = 0 ; i < node_id2label.size() ; i ++ ) { 52 | connect.insert(i); 53 | } 54 | } 55 | for ( auto & m : c1 ) connect.insert(m); 56 | for ( auto & m : c2 ) connect.insert(m); 57 | for ( auto & m : seed ) connect.insert(m); 58 | 59 | for ( int i = 0 ; i < node_id2label.size() ; i ++ ) { 60 | if ( !mfind(connect, i) ) continue; 61 | int ptr1, ptr2; 62 | ptr1 = ptr2 = 0; 63 | //graph A minus graph B 64 | sort(A[i].begin(), A[i].end()); 65 | sort(B[i].begin(), B[i].end()); 66 | while ( ptr1 < A[i].size() && ptr2 < B[i].size() ){ 67 | if ( A[i][ptr1].first < B[i][ptr2].first ) { 68 | epsContrast = min(epsContrast, A[i][ptr1].se); 69 | contrast_graph_edges[i].pb(mp(A[i][ptr1].fi, A[i][ptr1].se )); 70 | ptr1 ++; 71 | } 72 | else if ( A[i][ptr1].first == B[i][ptr2].first){ 73 | if ( !mfind(connect, A[i][ptr1].fi) ) {ptr1 ++; ptr2 ++; continue;} 74 | if ( abs(A[i][ptr1].se - B[i][ptr2].se) == 0 ) { ptr1 ++; ptr2 ++; continue; } 75 | epsContrast = min(epsContrast, abs(A[i][ptr1].se - B[i][ptr2].se)); 76 | contrast_graph_edges[i].pb(mp(A[i][ptr1].fi, abs(A[i][ptr1].se - B[i][ptr2].se))); 77 | ptr1 ++; ptr2 ++; 78 | } 79 | else { 80 | if ( !mfind(connect, B[i][ptr2].fi) ){ptr2 ++; continue;} 81 | epsContrast = min(epsContrast, B[i][ptr2].se); 82 | contrast_graph_edges[i].pb(mp(B[i][ptr2].fi, B[i][ptr2].se )); 83 | ptr2 ++; 84 | } 85 | } 86 | while ( ptr1 < A[i].size() ) { 87 | if ( !mfind(connect, A[i][ptr1].fi) ) {ptr1 ++; continue;} 88 | epsContrast = min(epsContrast, A[i][ptr1].se); 89 | contrast_graph_edges[i].pb(mp(A[i][ptr1].fi, A[i][ptr1].se )); 90 | ptr1 ++; 91 | } 92 | while ( ptr2 < B[i].size() ){ 93 | if ( !mfind(connect, B[i][ptr2].fi) ) {ptr2 ++; continue;} 94 | epsContrast = min(epsContrast, B[i][ptr2].se); 95 | contrast_graph_edges[i].pb(mp(B[i][ptr2].fi, B[i][ptr2].se )); 96 | ptr2 ++; 97 | } 98 | //node weight by adding degrees in graph A and B 99 | double cnt = 0; 100 | for ( auto & m : A[i] ) 101 | if ( mfind(connect, m.fi) ) 102 | cnt += m.se; 103 | for ( auto & m : B[i] ) 104 | if ( mfind(connect, m.fi) ) 105 | cnt += m.se; 106 | cnt = abs(cnt); 107 | contrast_graph_node_weights[i] = cnt; 108 | //new graph node degree 109 | cnt = 0.0; 110 | for ( auto & m : contrast_graph_edges[i] ) { 111 | cnt += m.se; 112 | } 113 | contrast_graph_node_degrees[i] = cnt; 114 | } 115 | for ( int i = 0 ; i < contrast_graph_edges.size() ; i ++ ){ 116 | for ( auto & m : contrast_graph_edges[i]){ 117 | edge_count += m.se; 118 | } 119 | } 120 | //mincut graph weight on edge (i,t) : 2*g*dup[i] - du[v] 121 | 122 | s = 0; t = node_id2label.size() + 1; 123 | for ( int i = 0 ; i < node_id2label.size() ; i ++ ) { 124 | dup[i + 1] = pow(contrast_graph_node_weights[i], NORM_CONST); 125 | node_count += dup[i + 1]; 126 | epsPenalty = min(epsPenalty, dup[i + 1]); 127 | du[i + 1] = contrast_graph_node_degrees[i]; 128 | } 129 | l = epsContrast / node_count ; r = edge_count / epsPenalty ; delta = epsContrast / node_count /node_count; 130 | while ( r - l > delta ) { 131 | mid = (l + r ) /2; 132 | solve(mid, seed, contrast_graph_edges); 133 | // dfs(s); 134 | // int cnt_node = 0; 135 | // for ( int i = 1 ; i <= node_count ; i ++ ){ 136 | // cnt_node += v[i]; 137 | // } 138 | if (( (double)edge_count * node_count - maxflow) > EPSILON_1) l = mid; else r = mid; 139 | 140 | } 141 | cerr << " Final contrast score " << l << endl; 142 | solve(l, seed, contrast_graph_edges); 143 | dfs(s); 144 | 145 | 146 | int ret_cnt = 0; 147 | set contrast_set; 148 | for ( int i = 1 ; i <= node_count ; i ++ ) { 149 | if ( v[i] ){ ret_cnt ++; contrast_set.insert(i - 1);} 150 | } 151 | cerr << " Found " << ret_cnt << " nodes" << endl; 152 | for (int u : seed) { 153 | myAssert(contrast_set.count(u), "[Error] Seeds are not in the contrast set!"); 154 | } 155 | 156 | 157 | stringstream filebuf; 158 | filebuf << "output/contrast.out"; 159 | FILE * matrixOut = tryOpen(filebuf.str(), "w"); 160 | fprintf(matrixOut, (graph_names[graph_A] + "," + graph_names[graph_B] + "\n").c_str()); 161 | fprintf(matrixOut, "%lld\n", contrast_set.size()); 162 | fprintf(matrixOut, "seeds:"); 163 | for ( auto & i : seed ) { 164 | // fprintf(matrixOut, "%d,", i); 165 | fprintf(matrixOut, "%s,", (node_id2label[i]).c_str()); 166 | } 167 | fprintf(matrixOut, "\n"); 168 | for ( auto & i : contrast_set ) { 169 | // fprintf(matrixOut, "%d,", i); 170 | fprintf(matrixOut, "%s,", (node_id2label[i]).c_str()); 171 | } 172 | fprintf(matrixOut, "\n"); 173 | printMatrix(A, contrast_set, matrixOut); 174 | printMatrix(B, contrast_set, matrixOut); 175 | 176 | return contrast_set; 177 | } 178 | 179 | #endif 180 | 181 | -------------------------------------------------------------------------------- /src/data/graph.h: -------------------------------------------------------------------------------- 1 | #ifndef __GRAPH_H__ 2 | #define __GRAPH_H__ 3 | 4 | #define ll long long 5 | #define MSET(x) memset((x), 0, sizeof((x))) 6 | #define MMSET(x) memset((x), -1, sizeof((x))) 7 | #define rep(i, a, b) for ( int (i) = a; (i) < (b) ; (i) ++ ) 8 | #define repA(i, a, b) for ( int (i) = a ; (i) <= (b) ; (i) ++ ) 9 | #define repD(i, a, b) for ( int (i) = a; (i) >= (b) ; (i) -- ) 10 | #define fi first 11 | #define se second 12 | #define pb push_back 13 | #define mfind(m, item) ((m).find((item)) != (m).end()) 14 | #define mp(a,b) make_pair((a),(b)) 15 | #define mt(a,b,c) make_pair(make_pair((a),(b)), (c)) 16 | #define tfi first.first 17 | #define tse first.second 18 | #define tth second 19 | #define tri pair, int> 20 | #include "../utils/utils.h" 21 | 22 | double NORM_CONST = 0.0; 23 | bool LOG_EDGE = false; 24 | const double epsilon = 1e-10; 25 | const double EPSILON_1 = 1e-4; 26 | 27 | vector>> contrast_graph_edges; 28 | vector contrast_graph_node_weights; 29 | vector contrast_graph_node_degrees; 30 | vector contrasting_subgraph; 31 | 32 | vector>> sharingEdges; 33 | vector sharingNodeWeights; 34 | vector sharingNodeDegrees; 35 | 36 | string DIR_PREFIX; 37 | string GRAPH_DIR; 38 | vector node_id2label; 39 | vector graph_names; 40 | vector>>> graph_edges; 41 | 42 | using namespace std; 43 | 44 | namespace denseSubgraph{ 45 | const int inf=0x3fffffff,u=500000, w=10000000; 46 | int head[u],ver[w],next[w],q[u],d[u],v[u]; 47 | int s,t,tot,i,ans; 48 | double node_count,edge_count; 49 | double edge[w],maxflow,k,l,r,mid,delta, dup[u], du[u]; 50 | 51 | void init(){ 52 | MSET(dup); MSET(du); MSET(head); MSET(ver); MSET(next); MSET(q); MSET(d); MSET(v); MSET(edge); 53 | node_count = edge_count = s = t = tot = i = ans = 0; 54 | maxflow = k = l = r = mid = delta = 0.0; 55 | } 56 | 57 | void add(int x,int y,double z) 58 | { 59 | ver[++tot]=y; 60 | edge[tot]=z; 61 | next[tot]=head[x]; 62 | head[x]=tot; 63 | ver[++tot]=x; 64 | edge[tot]=0; 65 | next[tot]=head[y]; 66 | head[y]=tot; 67 | } 68 | 69 | bool bfs() 70 | { 71 | memset(d,0,sizeof(d)); 72 | int l,r; 73 | l=r=1; q[1]=s; d[s]=1; 74 | while(l<=r) 75 | { 76 | for(int i=head[q[l]];i;i=next[i]) 77 | if(edge[i]>epsilon && !d[ver[i]]) 78 | { 79 | q[++r]=ver[i]; 80 | d[ver[i]]=d[q[l]]+1; 81 | if(ver[i]==t) return 1; 82 | } 83 | l++; 84 | } 85 | return 0; 86 | } 87 | 88 | double dinic(int x,double f) 89 | { 90 | if(x==t) return f; 91 | double temp=f,k; 92 | for(int i=head[x];i;i=next[i]) 93 | if(edge[i]>epsilon && temp>epsilon && d[ver[i]]==d[x]+1) 94 | { 95 | k=dinic(ver[i],min(temp,edge[i])); 96 | if(k & seeds, vector>> & edges) 106 | { 107 | memset(head,0,sizeof(head)); 108 | tot=1; maxflow=0; 109 | for(i=1;i<=node_count;i++) 110 | { 111 | if ( !mfind(seeds, i - 1) ) 112 | add(s,i,edge_count); 113 | else add(s, i, std::numeric_limits::max()); 114 | add(i,t,edge_count+ 2*g * dup[i]-du[i]); 115 | } 116 | for ( int i = 0 ; i < edges.size() ; i ++ ) { 117 | int u = i + 1; 118 | for ( int j = 0 ; j < edges[i].size() ; j ++ ) { 119 | auto & m = edges[i][j]; 120 | int v = m.fi + 1; 121 | add(u,v,m.se); 122 | } 123 | } 124 | while(bfs()) 125 | while((k=dinic(s,inf))>epsilon) maxflow+=k; 126 | } 127 | 128 | void dfs(int x) 129 | { 130 | v[x]=1; ans++; 131 | for(int i=head[x];i;i=next[i]) 132 | if(!v[ver[i]] && edge[i]>epsilon) dfs(ver[i]); 133 | } 134 | 135 | 136 | } 137 | 138 | void printMatrix(vector > > & A, set contrast_set, FILE * matrixOut){ 139 | 140 | // cerr << "Adj Matrix :" << endl; 141 | // cerr << "--------------------------" << endl; 142 | size_t max_l = 0; 143 | for ( auto & i : contrast_set ) { 144 | max_l = max(max_l, node_id2label[i].length()); 145 | } 146 | 147 | // cerr << padding_sentence("", max_l) << " | "; 148 | // for ( auto & i : contrast_set ) { 149 | // cerr << padding_sentence(node_id2label[i], max_l) << " | "; 150 | // } 151 | 152 | // cerr << endl; 153 | for ( auto & i : contrast_set ) { 154 | // cerr << padding_sentence(node_id2label[i], max_l) << " | "; 155 | for ( auto & j : contrast_set ) { 156 | stringstream buf; 157 | double cnt = 0; 158 | for ( auto & m : A[i] ) { 159 | if ( m.fi == j ){ 160 | cnt = m.se; 161 | } 162 | } 163 | buf << cnt; 164 | // cerr << padding_sentence(buf.str(), max_l) << " | "; 165 | fprintf(matrixOut, "%s,", buf.str().c_str()); 166 | } 167 | fprintf(matrixOut, "\n"); 168 | // cerr << endl; 169 | } 170 | // cerr << endl; 171 | } 172 | 173 | void loadNodeLabels() 174 | { 175 | node_id2label.resize(0); 176 | FILE* in = tryOpen(GRAPH_DIR + "NodeName.csv", "r"); 177 | while (getLine(in)) { 178 | vector tokens = splitBy(line, ','); 179 | int id; 180 | string label; 181 | fromString(tokens[0], id); 182 | label = strip(tokens[1]); 183 | node_id2label.push_back(label); 184 | } 185 | fclose(in); 186 | 187 | cerr << " # of total nodes = " << node_id2label.size() << endl; 188 | } 189 | 190 | void loadEdgeList(){ 191 | graph_edges.resize(graph_names.size()); 192 | for ( auto & m : graph_edges ) { 193 | m.resize(node_id2label.size()); 194 | } 195 | int count = 0; 196 | FILE* in = tryOpen(GRAPH_DIR + "GraphData.csv", "r"); 197 | while ( getLine(in)) { 198 | vector tokens = splitBy(line, ','); 199 | int graph_id, u, v; 200 | double w; 201 | fromString(tokens[0], graph_id); 202 | fromString(tokens[1], u); 203 | fromString(tokens[2], v); 204 | fromString(tokens[3], w); 205 | // w = (int)(w / 100.0); 206 | if ( u == v ) continue; 207 | if ( w == 0 ) continue; 208 | if ( LOG_EDGE ) w = 1.0 + log(w); 209 | graph_edges[graph_id][u].push_back(make_pair(v,w)); 210 | graph_edges[graph_id][v].push_back(make_pair(u,w)); 211 | count += 1; 212 | } 213 | fclose(in); 214 | cerr << " # of edges in total = " << count << endl; 215 | } 216 | 217 | void loadEdges() 218 | { 219 | graph_names.resize(0); 220 | FILE* in = tryOpen(GRAPH_DIR + "GraphName.csv", "r"); 221 | while (getLine(in)) { 222 | vector tokens = splitBy(line, ','); 223 | int id; 224 | string name; 225 | fromString(tokens[0], id); 226 | name = tokens[1]; 227 | graph_names.push_back(name); 228 | } 229 | fclose(in); 230 | cerr << " # of graphs = " << graph_names.size() << endl; 231 | 232 | loadEdgeList(); 233 | } 234 | 235 | void loadGraphs() 236 | { 237 | srand(1); 238 | loadNodeLabels(); 239 | loadEdges(); 240 | } 241 | 242 | void search(set & c, vector > > & e, vector & v, int cur, int max_d){ 243 | queue > q; 244 | q.push(mp(cur,0)); 245 | while (!q.empty() ) { 246 | auto n = q.front(); 247 | q.pop(); 248 | if ( n.se > max_d ) continue; 249 | c.insert(n.fi); 250 | if ( v[n.fi] ) { 251 | continue; 252 | } 253 | v[n.fi] = 1; 254 | for ( auto & m : e[n.fi] ) { 255 | q.push(mp(m.fi,n.se + 1 ));; 256 | } 257 | } 258 | } 259 | 260 | 261 | #endif 262 | -------------------------------------------------------------------------------- /post_processing_src/visualize.py: -------------------------------------------------------------------------------- 1 | 2 | import sys 3 | reload(sys) 4 | sys.setdefaultencoding('utf-8') 5 | from PIL import Image 6 | 7 | import glob 8 | import numpy as np 9 | import matplotlib as mpl 10 | mpl.use('Agg') 11 | import matplotlib.pyplot as plt 12 | import seaborn as sn 13 | import pandas as pd 14 | 15 | prefix = "output/" 16 | fileList = glob.glob(prefix + "*.out") 17 | 18 | print "Generating Image" 19 | for fi in fileList : 20 | with open(fi, "r") as f : 21 | graph = f.readline() 22 | if graph[0:4] == "con " : 23 | graph = graph[4:] 24 | graph = (' '.join((graph[:-1]).split())).split(',') 25 | gA = graph[0] 26 | gB = graph[1] 27 | terms = (f.readline()[:-2]).split(',') 28 | terms = [ ' '.join(i.split()) for i in terms] 29 | authors = (f.readline()[:-2]).split(',') 30 | authors = [' '.join(i.split()) for i in authors] 31 | n1 = len(authors) 32 | n2 = len(terms) 33 | mat1 = {} 34 | mat2 = {} 35 | minV = 65536.0 36 | maxV = -1.0 37 | for i in range(n1): 38 | l = f.readline() 39 | l = l[:-2] 40 | l = l.split(',') 41 | l = [float(it) for it in l] 42 | for j, a in enumerate(l) : 43 | mat1[(i,j)] = a 44 | if a < minV : minV = a 45 | if a > maxV : maxV = a 46 | for i in range(n1): 47 | l = f.readline() 48 | l = l[:-2] 49 | l = l.split(',') 50 | l = [float(it) for it in l] 51 | for j, a in enumerate(l) : 52 | mat2[(i,j)] = a 53 | if a < minV : minV = a 54 | if a > maxV : maxV = a 55 | magR = [[0.0, i] for i in range(n1)] 56 | magC = [[0.0, i] for i in range(n2)] 57 | for i in range(n1): 58 | for j in range(n2): 59 | magR[i][0] += mat1[(i,j)] 60 | magC[j][0] += mat1[(i,j)] 61 | magR = sorted(magR, reverse = True) 62 | magC = sorted(magC, reverse = True) 63 | tempM1 = [[0.0 for i in range(n2)] for j in range(n1)] 64 | tempM2 = [[0.0 for i in range(n2)] for j in range(n1)] 65 | for i,it in enumerate(magR): 66 | for j,jt in enumerate(magC): 67 | tempM1[i][j] = mat1[(it[1], jt[1])] 68 | tempM2[i][j] = mat2[(it[1], jt[1])] 69 | mat1 = tempM1 70 | mat2 = tempM2 71 | 72 | newAuthors = ["" for i in range(n1)] 73 | newTerms = ["" for i in range(n2)] 74 | for i,it in enumerate(magR): 75 | newAuthors[i] = authors[it[1]] 76 | for i,it in enumerate(magC): 77 | newTerms[i] = terms[it[1]] 78 | authors = newAuthors 79 | terms = newTerms 80 | df_cm = pd.DataFrame(mat1, index = authors, columns = terms) 81 | 82 | plt.figure(figsize = (24,20)) 83 | sn.heatmap(df_cm, annot=False, vmin = minV, vmax = maxV, cmap="Greens") 84 | plt.title(gA, fontsize = 40) 85 | plt.xticks(rotation = 90, fontsize = 10) 86 | plt.yticks(rotation = 0, fontsize = 18) 87 | plt.savefig(fi + "_1.png") 88 | plt.clf() 89 | plt.close() 90 | 91 | df_cm = pd.DataFrame(mat2, index = authors, columns = terms) 92 | plt.figure(figsize = (24,20)) 93 | sn.heatmap(df_cm, annot=False, vmin = minV, vmax = maxV, cmap="Greens") 94 | plt.title(gB, fontsize = 40) 95 | plt.xticks(rotation = 90, fontsize = 10) 96 | plt.yticks(rotation = 0, fontsize = 18) 97 | plt.savefig(fi + "_2.png") 98 | plt.clf() 99 | plt.close() 100 | 101 | 102 | images = map(Image.open, [fi+ "_1.png", fi + "_2.png"]) 103 | w,h = zip(*(i.size for i in images)) 104 | 105 | t_w = w[0] + int(3.8 / 5.0 * w[0]) 106 | t_h = max(h) 107 | 108 | new_im = Image.new('RGB', (t_w, t_h)) 109 | 110 | x_off = 0 111 | for im in images: 112 | new_im.paste(im, (x_off, 0)) 113 | x_off += int(3.8 / 5.0 * im.size[0]) 114 | 115 | new_im.save(fi + "_bi.png") 116 | 117 | else : 118 | graph = (' '.join((graph[:-1]).split())).split(',') 119 | gA = graph[0] 120 | gB = graph[1] 121 | num = int(f.readline()) 122 | if num == 0 : 123 | continue 124 | seeds = set((f.readline()[6:-2]).split(',')) 125 | authors = (f.readline()[:-2]).split(',') 126 | table = [auth in seeds for auth in authors] 127 | authors = [' '.join(i.split()) for i in authors] 128 | mat1 = {} 129 | mat2 = {} 130 | minV = 65536.0 131 | maxV = -1.0 132 | for i in range(num): 133 | l = f.readline() 134 | l = l[:-2] 135 | l = l.split(',') 136 | l = [float(it) for it in l] 137 | for j, a in enumerate(l) : 138 | mat1[(i,j)] = a 139 | if a < minV : minV = a 140 | if a > maxV : maxV = a 141 | for i in range(num): 142 | l = f.readline() 143 | l = l[:-2] 144 | l = l.split(',') 145 | l = [float(it) for it in l] 146 | for j, a in enumerate(l) : 147 | mat2[(i,j)] = a 148 | if a < minV : minV = a 149 | if a > maxV : maxV = a 150 | magR = [[0.0, 0.0, i] for i in range(num)] 151 | magC = [[0.0, 0.0, i] for i in range(num)] 152 | for i in range(num): 153 | for j in range(num): 154 | magR[i][0] += mat1[(i,j)] 155 | magC[j][0] += mat1[(i,j)] 156 | for i in range(num): 157 | for j in range(num): 158 | magR[i][1] += mat2[(i,j)] 159 | magC[j][1] += mat2[(i,j)] 160 | magR = sorted(magR, reverse = True, key = lambda t : t[1]) 161 | magC = sorted(magC, reverse = True, key = lambda t : t[1]) 162 | magR = sorted(magR, reverse = True, key = lambda t : t[0]) 163 | magC = sorted(magC, reverse = True, key = lambda t : t[0]) 164 | magR = sorted(magR, key = lambda t : table[t[2]]) 165 | magC = sorted(magC, key = lambda t : table[t[2]]) 166 | tempM1 = [[0.0 for i in range(num)] for j in range(num)] 167 | tempM2 = [[0.0 for i in range(num)] for j in range(num)] 168 | for i,it in enumerate(magR): 169 | for j, jt in enumerate(magC): 170 | tempM1[i][j] = mat1[(it[2], jt[2])] 171 | tempM2[i][j] = mat2[(it[2], jt[2])] 172 | mat1 = tempM1 173 | mat2 = tempM2 174 | 175 | 176 | newAuthors = ["" for i in range(num)] 177 | 178 | for i, it in enumerate(magR): 179 | newAuthors[i] = authors[it[2]] 180 | authors = newAuthors 181 | empty = ["" for auth in authors] 182 | 183 | df_cm = pd.DataFrame(mat1, index = authors, columns = authors) 184 | plt.figure(figsize = (24,20)) 185 | sn.heatmap(df_cm, annot=False, vmin = minV, vmax = maxV, cmap="Greens") 186 | plt.title(gA, fontsize = 40) 187 | plt.xticks(rotation = 90) 188 | plt.yticks(rotation = 0) 189 | plt.savefig(fi + "_1.png") 190 | plt.clf() 191 | plt.close() 192 | 193 | df_cm = pd.DataFrame(mat2, index = empty, columns = authors) 194 | plt.figure(figsize = (24,20)) 195 | sn.heatmap(df_cm, annot=False, vmin = minV, vmax = maxV, cmap="Greens") 196 | plt.title(gB, fontsize = 40) 197 | plt.xticks(rotation = 90) 198 | plt.yticks(rotation = 0) 199 | plt.savefig(fi + "_2.png") 200 | plt.clf() 201 | plt.close() 202 | 203 | 204 | images = map(Image.open, [fi+ "_1.png", fi + "_2.png"]) 205 | w,h = zip(*(i.size for i in images)) 206 | 207 | t_w = w[0] + int(2.70 / 5.0 * w[0]) 208 | t_h = max(h) 209 | 210 | new_im = Image.new('RGB', (t_w, t_h)) 211 | 212 | images[1] = images[1].crop((int(0.50 / 5.0 * w[0]), 0, w[0], h[0])) 213 | 214 | x_off = 0 215 | for im in images: 216 | new_im.paste(im, (x_off, 0)) 217 | x_off += int(3.80 / 5.0 * im.size[0]) 218 | 219 | new_im.save(fi + "_bi.png") 220 | 221 | 222 | 223 | 224 | 225 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Apache License 2 | Version 2.0, January 2004 3 | http://www.apache.org/licenses/ 4 | 5 | TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION 6 | 7 | 1. Definitions. 8 | 9 | "License" shall mean the terms and conditions for use, reproduction, 10 | and distribution as defined by Sections 1 through 9 of this document. 11 | 12 | "Licensor" shall mean the copyright owner or entity authorized by 13 | the copyright owner that is granting the License. 14 | 15 | "Legal Entity" shall mean the union of the acting entity and all 16 | other entities that control, are controlled by, or are under common 17 | control with that entity. For the purposes of this definition, 18 | "control" means (i) the power, direct or indirect, to cause the 19 | direction or management of such entity, whether by contract or 20 | otherwise, or (ii) ownership of fifty percent (50%) or more of the 21 | outstanding shares, or (iii) beneficial ownership of such entity. 22 | 23 | "You" (or "Your") shall mean an individual or Legal Entity 24 | exercising permissions granted by this License. 25 | 26 | "Source" form shall mean the preferred form for making modifications, 27 | including but not limited to software source code, documentation 28 | source, and configuration files. 29 | 30 | "Object" form shall mean any form resulting from mechanical 31 | transformation or translation of a Source form, including but 32 | not limited to compiled object code, generated documentation, 33 | and conversions to other media types. 34 | 35 | "Work" shall mean the work of authorship, whether in Source or 36 | Object form, made available under the License, as indicated by a 37 | copyright notice that is included in or attached to the work 38 | (an example is provided in the Appendix below). 39 | 40 | "Derivative Works" shall mean any work, whether in Source or Object 41 | form, that is based on (or derived from) the Work and for which the 42 | editorial revisions, annotations, elaborations, or other modifications 43 | represent, as a whole, an original work of authorship. For the purposes 44 | of this License, Derivative Works shall not include works that remain 45 | separable from, or merely link (or bind by name) to the interfaces of, 46 | the Work and Derivative Works thereof. 47 | 48 | "Contribution" shall mean any work of authorship, including 49 | the original version of the Work and any modifications or additions 50 | to that Work or Derivative Works thereof, that is intentionally 51 | submitted to Licensor for inclusion in the Work by the copyright owner 52 | or by an individual or Legal Entity authorized to submit on behalf of 53 | the copyright owner. For the purposes of this definition, "submitted" 54 | means any form of electronic, verbal, or written communication sent 55 | to the Licensor or its representatives, including but not limited to 56 | communication on electronic mailing lists, source code control systems, 57 | and issue tracking systems that are managed by, or on behalf of, the 58 | Licensor for the purpose of discussing and improving the Work, but 59 | excluding communication that is conspicuously marked or otherwise 60 | designated in writing by the copyright owner as "Not a Contribution." 61 | 62 | "Contributor" shall mean Licensor and any individual or Legal Entity 63 | on behalf of whom a Contribution has been received by Licensor and 64 | subsequently incorporated within the Work. 65 | 66 | 2. Grant of Copyright License. Subject to the terms and conditions of 67 | this License, each Contributor hereby grants to You a perpetual, 68 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 69 | copyright license to reproduce, prepare Derivative Works of, 70 | publicly display, publicly perform, sublicense, and distribute the 71 | Work and such Derivative Works in Source or Object form. 72 | 73 | 3. Grant of Patent License. Subject to the terms and conditions of 74 | this License, each Contributor hereby grants to You a perpetual, 75 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 76 | (except as stated in this section) patent license to make, have made, 77 | use, offer to sell, sell, import, and otherwise transfer the Work, 78 | where such license applies only to those patent claims licensable 79 | by such Contributor that are necessarily infringed by their 80 | Contribution(s) alone or by combination of their Contribution(s) 81 | with the Work to which such Contribution(s) was submitted. If You 82 | institute patent litigation against any entity (including a 83 | cross-claim or counterclaim in a lawsuit) alleging that the Work 84 | or a Contribution incorporated within the Work constitutes direct 85 | or contributory patent infringement, then any patent licenses 86 | granted to You under this License for that Work shall terminate 87 | as of the date such litigation is filed. 88 | 89 | 4. Redistribution. You may reproduce and distribute copies of the 90 | Work or Derivative Works thereof in any medium, with or without 91 | modifications, and in Source or Object form, provided that You 92 | meet the following conditions: 93 | 94 | (a) You must give any other recipients of the Work or 95 | Derivative Works a copy of this License; and 96 | 97 | (b) You must cause any modified files to carry prominent notices 98 | stating that You changed the files; and 99 | 100 | (c) You must retain, in the Source form of any Derivative Works 101 | that You distribute, all copyright, patent, trademark, and 102 | attribution notices from the Source form of the Work, 103 | excluding those notices that do not pertain to any part of 104 | the Derivative Works; and 105 | 106 | (d) If the Work includes a "NOTICE" text file as part of its 107 | distribution, then any Derivative Works that You distribute must 108 | include a readable copy of the attribution notices contained 109 | within such NOTICE file, excluding those notices that do not 110 | pertain to any part of the Derivative Works, in at least one 111 | of the following places: within a NOTICE text file distributed 112 | as part of the Derivative Works; within the Source form or 113 | documentation, if provided along with the Derivative Works; or, 114 | within a display generated by the Derivative Works, if and 115 | wherever such third-party notices normally appear. The contents 116 | of the NOTICE file are for informational purposes only and 117 | do not modify the License. You may add Your own attribution 118 | notices within Derivative Works that You distribute, alongside 119 | or as an addendum to the NOTICE text from the Work, provided 120 | that such additional attribution notices cannot be construed 121 | as modifying the License. 122 | 123 | You may add Your own copyright statement to Your modifications and 124 | may provide additional or different license terms and conditions 125 | for use, reproduction, or distribution of Your modifications, or 126 | for any such Derivative Works as a whole, provided Your use, 127 | reproduction, and distribution of the Work otherwise complies with 128 | the conditions stated in this License. 129 | 130 | 5. Submission of Contributions. Unless You explicitly state otherwise, 131 | any Contribution intentionally submitted for inclusion in the Work 132 | by You to the Licensor shall be under the terms and conditions of 133 | this License, without any additional terms or conditions. 134 | Notwithstanding the above, nothing herein shall supersede or modify 135 | the terms of any separate license agreement you may have executed 136 | with Licensor regarding such Contributions. 137 | 138 | 6. Trademarks. This License does not grant permission to use the trade 139 | names, trademarks, service marks, or product names of the Licensor, 140 | except as required for reasonable and customary use in describing the 141 | origin of the Work and reproducing the content of the NOTICE file. 142 | 143 | 7. Disclaimer of Warranty. Unless required by applicable law or 144 | agreed to in writing, Licensor provides the Work (and each 145 | Contributor provides its Contributions) on an "AS IS" BASIS, 146 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or 147 | implied, including, without limitation, any warranties or conditions 148 | of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A 149 | PARTICULAR PURPOSE. You are solely responsible for determining the 150 | appropriateness of using or redistributing the Work and assume any 151 | risks associated with Your exercise of permissions under this License. 152 | 153 | 8. Limitation of Liability. In no event and under no legal theory, 154 | whether in tort (including negligence), contract, or otherwise, 155 | unless required by applicable law (such as deliberate and grossly 156 | negligent acts) or agreed to in writing, shall any Contributor be 157 | liable to You for damages, including any direct, indirect, special, 158 | incidental, or consequential damages of any character arising as a 159 | result of this License or out of the use or inability to use the 160 | Work (including but not limited to damages for loss of goodwill, 161 | work stoppage, computer failure or malfunction, or any and all 162 | other commercial damages or losses), even if such Contributor 163 | has been advised of the possibility of such damages. 164 | 165 | 9. Accepting Warranty or Additional Liability. While redistributing 166 | the Work or Derivative Works thereof, You may choose to offer, 167 | and charge a fee for, acceptance of support, warranty, indemnity, 168 | or other liability obligations and/or rights consistent with this 169 | License. However, in accepting such obligations, You may act only 170 | on Your own behalf and on Your sole responsibility, not on behalf 171 | of any other Contributor, and only if You agree to indemnify, 172 | defend, and hold each Contributor harmless for any liability 173 | incurred by, or claims asserted against, such Contributor by reason 174 | of your accepting any such warranty or additional liability. 175 | 176 | END OF TERMS AND CONDITIONS 177 | 178 | APPENDIX: How to apply the Apache License to your work. 179 | 180 | To apply the Apache License to your work, attach the following 181 | boilerplate notice, with the fields enclosed by brackets "[]" 182 | replaced with your own identifying information. (Don't include 183 | the brackets!) The text should be enclosed in the appropriate 184 | comment syntax for the file format. We also recommend that a 185 | file or class name and description of purpose be included on the 186 | same "printed page" as the copyright notice for easier 187 | identification within third-party archives. 188 | 189 | Copyright [yyyy] [name of copyright owner] 190 | 191 | Licensed under the Apache License, Version 2.0 (the "License"); 192 | you may not use this file except in compliance with the License. 193 | You may obtain a copy of the License at 194 | 195 | http://www.apache.org/licenses/LICENSE-2.0 196 | 197 | Unless required by applicable law or agreed to in writing, software 198 | distributed under the License is distributed on an "AS IS" BASIS, 199 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 200 | See the License for the specific language governing permissions and 201 | limitations under the License. 202 | --------------------------------------------------------------------------------