├── tools ├── communityAnalyzer │ ├── CMakeLists.txt │ └── source │ │ └── main.cpp ├── f1score │ ├── CMakeLists.txt │ └── source │ │ └── main.cpp ├── cc │ ├── CMakeLists.txt │ └── source │ │ └── main.cpp ├── wcc │ ├── CMakeLists.txt │ └── source │ │ └── main.cpp └── selector │ ├── CMakeLists.txt │ └── source │ └── main.cpp ├── source ├── common │ └── time.cpp ├── main.cpp ├── wcc │ └── wcc.cpp ├── graph │ └── graph.cpp └── communities │ └── communities.cpp ├── include ├── common │ ├── time.h │ └── types.h ├── wcc │ └── wcc.h ├── communities │ └── communities.h └── graph │ └── graph.h ├── CMakeLists.txt ├── README.md └── LICENSE /tools/communityAnalyzer/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | 2 | SET(CMAKE_CXX_FLAGS_DEBUG "-O0 -g -pg -fopenmp") 3 | SET(CMAKE_CXX_FLAGS_RELEASE "-O3 -fopenmp -DNDEBUG") 4 | 5 | INCLUDE_DIRECTORIES(../../include) 6 | AUX_SOURCE_DIRECTORY(./source SOURCE_FILES) 7 | AUX_SOURCE_DIRECTORY(../../source/communities SOURCE_FILES) 8 | AUX_SOURCE_DIRECTORY(../../source/wcc SOURCE_FILES) 9 | AUX_SOURCE_DIRECTORY(../../source/graph SOURCE_FILES) 10 | AUX_SOURCE_DIRECTORY(../../source/common SOURCE_FILES) 11 | ADD_EXECUTABLE(communityAnalyzer ${SOURCE_FILES}) 12 | -------------------------------------------------------------------------------- /tools/f1score/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | #SCD is free software: you can redistribute it and/or modify 2 | #it under the terms of the GNU General Public License as published by 3 | #the Free Software Foundation, either version 3 of the License, or 4 | #(at your option) any later version. 5 | # 6 | #SCD is distributed in the hope that it will be useful, 7 | #but WITHOUT ANY WARRANTY; without even the implied warranty of 8 | #MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 9 | #GNU General Public License for more details. 10 | # 11 | #You should have received a copy of the GNU General Public License 12 | #along with this program. If not, see . 13 | 14 | SET(CMAKE_CXX_FLAGS_DEBUG "-O0 -std=c++11 -g3 -fno-rtti") 15 | SET(CMAKE_CXX_FLAGS_RELEASE "-O2 -std=c++11") 16 | 17 | AUX_SOURCE_DIRECTORY(./source SOURCE_FILES) 18 | ADD_EXECUTABLE(f1score ${SOURCE_FILES}) 19 | -------------------------------------------------------------------------------- /tools/cc/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | #SCD is free software: you can redistribute it and/or modify 2 | #it under the terms of the GNU General Public License as published by 3 | #the Free Software Foundation, either version 3 of the License, or 4 | #(at your option) any later version. 5 | # 6 | #SCD is distributed in the hope that it will be useful, 7 | #but WITHOUT ANY WARRANTY; without even the implied warranty of 8 | #MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 9 | #GNU General Public License for more details. 10 | # 11 | #You should have received a copy of the GNU General Public License 12 | #along with this program. If not, see . 13 | 14 | SET(CMAKE_CXX_FLAGS_DEBUG "-O0 -g -pg -fopenmp") 15 | SET(CMAKE_CXX_FLAGS_RELEASE "-O3 -fopenmp -DNDEBUG") 16 | 17 | INCLUDE_DIRECTORIES(../../include) 18 | AUX_SOURCE_DIRECTORY(./source WCC_SOURCE_FILES) 19 | AUX_SOURCE_DIRECTORY(../../source/communities WCC_SOURCE_FILES) 20 | AUX_SOURCE_DIRECTORY(../../source/wcc WCC_SOURCE_FILES) 21 | AUX_SOURCE_DIRECTORY(../../source/graph WCC_SOURCE_FILES) 22 | AUX_SOURCE_DIRECTORY(../../source/common WCC_SOURCE_FILES) 23 | ADD_EXECUTABLE(cc ${WCC_SOURCE_FILES}) 24 | -------------------------------------------------------------------------------- /tools/wcc/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | #SCD is free software: you can redistribute it and/or modify 2 | #it under the terms of the GNU General Public License as published by 3 | #the Free Software Foundation, either version 3 of the License, or 4 | #(at your option) any later version. 5 | # 6 | #SCD is distributed in the hope that it will be useful, 7 | #but WITHOUT ANY WARRANTY; without even the implied warranty of 8 | #MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 9 | #GNU General Public License for more details. 10 | # 11 | #You should have received a copy of the GNU General Public License 12 | #along with this program. If not, see . 13 | 14 | SET(CMAKE_CXX_FLAGS_DEBUG "-O0 -g -pg -fopenmp") 15 | SET(CMAKE_CXX_FLAGS_RELEASE "-O3 -fopenmp -DNDEBUG") 16 | 17 | INCLUDE_DIRECTORIES(../../include) 18 | AUX_SOURCE_DIRECTORY(./source WCC_SOURCE_FILES) 19 | AUX_SOURCE_DIRECTORY(../../source/communities WCC_SOURCE_FILES) 20 | AUX_SOURCE_DIRECTORY(../../source/wcc WCC_SOURCE_FILES) 21 | AUX_SOURCE_DIRECTORY(../../source/graph WCC_SOURCE_FILES) 22 | AUX_SOURCE_DIRECTORY(../../source/common WCC_SOURCE_FILES) 23 | ADD_EXECUTABLE(wcc ${WCC_SOURCE_FILES}) 24 | -------------------------------------------------------------------------------- /tools/selector/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | #SCD is free software: you can redistribute it and/or modify 2 | #it under the terms of the GNU General Public License as published by 3 | #the Free Software Foundation, either version 3 of the License, or 4 | #(at your option) any later version. 5 | # 6 | #SCD is distributed in the hope that it will be useful, 7 | #but WITHOUT ANY WARRANTY; without even the implied warranty of 8 | #MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 9 | #GNU General Public License for more details. 10 | # 11 | #You should have received a copy of the GNU General Public License 12 | #along with this program. If not, see . 13 | 14 | SET(CMAKE_CXX_FLAGS_DEBUG "-O0 -g -pg -fopenmp") 15 | SET(CMAKE_CXX_FLAGS_RELEASE "-O3 -fopenmp -DNDEBUG") 16 | 17 | INCLUDE_DIRECTORIES(../../include) 18 | AUX_SOURCE_DIRECTORY(./source WCC_SOURCE_FILES) 19 | AUX_SOURCE_DIRECTORY(../../source/communities WCC_SOURCE_FILES) 20 | AUX_SOURCE_DIRECTORY(../../source/wcc WCC_SOURCE_FILES) 21 | AUX_SOURCE_DIRECTORY(../../source/graph WCC_SOURCE_FILES) 22 | AUX_SOURCE_DIRECTORY(../../source/common WCC_SOURCE_FILES) 23 | ADD_EXECUTABLE(selector ${WCC_SOURCE_FILES}) 24 | -------------------------------------------------------------------------------- /source/common/time.cpp: -------------------------------------------------------------------------------- 1 | /*SCD is free software: you can redistribute it and/or modify 2 | it under the terms of the GNU General Public License as published by 3 | the Free Software Foundation, either version 3 of the License, or 4 | (at your option) any later version. 5 | 6 | SCD is distributed in the hope that it will be useful, 7 | but WITHOUT ANY WARRANTY; without even the implied warranty of 8 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 9 | GNU General Public License for more details. 10 | 11 | You should have received a copy of the GNU General Public License 12 | along with this program. If not, see . 13 | */ 14 | 15 | #include 16 | 17 | namespace scd { 18 | 19 | uint64_t StartClock() { 20 | timeval time; 21 | gettimeofday(&time, NULL); 22 | uint64_t initTime = (time.tv_sec * 1000) + (time.tv_usec / 1000); 23 | return initTime; 24 | } 25 | 26 | uint64_t StopClock(uint64_t initTime) { 27 | timeval time; 28 | gettimeofday(&time, NULL); 29 | uint64_t endTime = (time.tv_sec * 1000) + (time.tv_usec / 1000); 30 | return endTime - initTime; 31 | } 32 | } 33 | -------------------------------------------------------------------------------- /include/common/time.h: -------------------------------------------------------------------------------- 1 | /*SCD is free software: you can redistribute it and/or modify 2 | it under the terms of the GNU General Public License as published by 3 | the Free Software Foundation, either version 3 of the License, or 4 | (at your option) any later version. 5 | 6 | SCD is distributed in the hope that it will be useful, 7 | but WITHOUT ANY WARRANTY; without even the implied warranty of 8 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 9 | GNU General Public License for more details. 10 | 11 | You should have received a copy of the GNU General Public License 12 | along with this program. If not, see . 13 | */ 14 | 15 | #ifndef SCD_TIME_H 16 | #define SCD_TIME_H 17 | 18 | #include 19 | #include 20 | #include 21 | 22 | namespace scd { 23 | 24 | /** @brief Gets the current time in miliseconds. 25 | * @return The time in miliseconds.**/ 26 | uint64_t StartClock(); 27 | 28 | /** @brief Gets the time elapsed from a given moment. 29 | * @param[in] The time specifying the moment from whith to compute the time in miliseconds 30 | * @return The time elapsed since the specifyied time in miliseconds.**/ 31 | uint64_t StopClock(uint64_t initTime); 32 | 33 | } 34 | #endif 35 | -------------------------------------------------------------------------------- /CMakeLists.txt: -------------------------------------------------------------------------------- 1 | #SCD is free software: you can redistribute it and/or modify 2 | #it under the terms of the GNU General Public License as published by 3 | #the Free Software Foundation, either version 3 of the License, or 4 | #(at your option) any later version. 5 | # 6 | #SCD is distributed in the hope that it will be useful, 7 | #but WITHOUT ANY WARRANTY; without even the implied warranty of 8 | #MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 9 | #GNU General Public License for more details. 10 | # 11 | #You should have received a copy of the GNU General Public License 12 | #along with this program. If not, see . 13 | 14 | CMAKE_MINIMUM_REQUIRED(VERSION 2.8.2) 15 | PROJECT(SCD CXX) 16 | set( CMAKE_EXPORT_COMPILE_COMMANDS 1 ) 17 | SET(CMAKE_CXX_FLAGS_DEBUG "-O0 -g -pg -fopenmp -DPROFILE ") 18 | SET(CMAKE_CXX_FLAGS_RELEASE "-O3 -fopenmp -DNDEBUG") 19 | #SET(CMAKE_VERBOSE_MAKEFILE ON) 20 | 21 | 22 | if(NOT CMAKE_BUILD_TYPE AND NOT CMAKE_CONFIGURATION_TYPES) 23 | message(STATUS "Setting build type to 'Release' as none was specified.") 24 | set(CMAKE_BUILD_TYPE Release) 25 | endif() 26 | 27 | ADD_SUBDIRECTORY(./tools/wcc) 28 | ADD_SUBDIRECTORY(./tools/f1score) 29 | ADD_SUBDIRECTORY(./tools/selector) 30 | ADD_SUBDIRECTORY(./tools/cc) 31 | ADD_SUBDIRECTORY(./tools/communityAnalyzer) 32 | 33 | INCLUDE_DIRECTORIES(./include) 34 | FILE( GLOB_RECURSE SOURCE_FILES "source/*" ) 35 | ADD_EXECUTABLE(scd ${SOURCE_FILES}) 36 | -------------------------------------------------------------------------------- /include/common/types.h: -------------------------------------------------------------------------------- 1 | 2 | /*SCD is free software: you can redistribute it and/or modify 3 | it under the terms of the GNU General Public License as published by 4 | the Free Software Foundation, either version 3 of the License, or 5 | (at your option) any later version. 6 | 7 | SCD is distributed in the hope that it will be useful, 8 | but WITHOUT ANY WARRANTY; without even the implied warranty of 9 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 10 | GNU General Public License for more details. 11 | 12 | You should have received a copy of the GNU General Public License 13 | along with this program. If not, see . 14 | */ 15 | 16 | #ifndef TYPES_H 17 | #define TYPES_H 18 | 19 | namespace scd { 20 | #ifndef SCD_THREAD_BLOCK_SIZE 21 | #define SCD_THREAD_BLOCK_SIZE 32 22 | #endif 23 | 24 | #ifndef SCD_SCHEDULING 25 | #define SCD_SCHEDULING dynamic 26 | #endif 27 | 28 | typedef bool bool_t; 29 | typedef unsigned char uchar_t; 30 | typedef char char_t; 31 | typedef short int uint16_t; 32 | typedef unsigned int uint32_t; 33 | typedef int int32_t; 34 | typedef long unsigned uint64_t; 35 | typedef float float32_t; 36 | typedef double double64_t; 37 | 38 | /** @brief This struct represents a node in the graph.*/ 39 | struct Node { 40 | uint32_t m_Degree; /**< @brief The degree of the node.*/ 41 | uint32_t m_AdjacencyIndex; /**< @brief The index into the adjacency vector where the adjacencies lay*/ 42 | }; 43 | 44 | 45 | 46 | }; 47 | 48 | #endif 49 | -------------------------------------------------------------------------------- /include/wcc/wcc.h: -------------------------------------------------------------------------------- 1 | /*SCD is free software: you can redistribute it and/or modify 2 | it under the terms of the GNU General Public License as published by 3 | the Free Software Foundation, either version 3 of the License, or 4 | (at your option) any later version. 5 | 6 | SCD is distributed in the hope that it will be useful, 7 | but WITHOUT ANY WARRANTY; without even the implied warranty of 8 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 9 | GNU General Public License for more details. 10 | 11 | You should have received a copy of the GNU General Public License 12 | along with this program. If not, see . 13 | */ 14 | 15 | 16 | #ifndef WCC_H 17 | #define WCC_H 18 | 19 | 20 | #include 21 | #include 22 | #include 23 | namespace scd 24 | { 25 | 26 | /** @brief Computes the size of the intersection between two arrays. 27 | * @param[in] list1 The first array. 28 | * @param[in] size1 The size of the first array. 29 | * @param[in] list2 The second array. 30 | * @param[in] size2 The size of the second array. 31 | * @return The size of the intersection.*/ 32 | uint32_t Intersect( /*const*/ uint32_t* list1, const uint32_t size1, /*const*/ uint32_t* list2, const uint32_t size2 ); 33 | 34 | /** @brief Computes the WCC of a node against a community. 35 | * @param[in] graph The graph. 36 | * @param[in] alfa The alfa parameter controling the cohesiveness of the communities. 37 | * @param[in] communities The assignment of nodes to communities. 38 | * @param[in] labelsIndices The array of indexes of labels into the community inverse index. 39 | * @param[in] communitiesInvIndex The community inverse index. 40 | * @param[in] wccs An array where the WCCs of the nodes will be stored. 41 | * @return The WCC of the node against the community.*/ 42 | double64_t ComputeWCC(const CGraph * graph, const double64_t alfa, const uint32_t * communities, const uint32_t numCommunities, const uint32_t* labelsIndices, const uint32_t * communitiesInvIndex, double64_t* wccs); 43 | 44 | /** @brief Computes the WCC of a node against a community. 45 | * @param[in] graph The graph. 46 | * @param[in] alfa The alfa parameter controling the cohesiveness of the communities. 47 | * @param[in] node The node. 48 | * @param[in] communityLabel The label of the community to test against. 49 | * @param[in] communities The assignment of nodes to communities. 50 | * @param[in] numCommunities The number of communities. 51 | * @param[in] labelsIndices The array of indexes of labels into the community inverse index. 52 | * @param[in] communitiesInvIndex The community inverse index. 53 | * @return The WCC of the node against the community.*/ 54 | double64_t ComputeWCC(const CGraph * graph, const double64_t alfa, uint32_t node, uint32_t communityLabel, const uint32_t * communities, uint32_t communitySize ); 55 | 56 | double64_t ComputeWCC(const CGraph * graph, const double64_t alfa, std::set& community ); 57 | } 58 | 59 | #endif 60 | 61 | 62 | 63 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | SCD 2 | === 3 | 4 | This program is an implementation of the community detection algorithm described in the papers titled 5 | 6 | [High quality, scalable and parallel community detection for large real graphs.](http://www.dama.upc.edu/en/publications/fp546prat.pdf) Arnau Prat-Pérez, David Dominguez-Sal, Josep-Lluis Larriba-Pey - WWW 2014. 7 | 8 | [Put Three and Three Together: Triangle-Driven Community Detection.](http://dl.acm.org/citation.cfm?id=2775108) Arnau Prat-Prez, David Dominguez-Sal, Josep-M. Brunat, Josep-Lluis Larriba Pey - TKDD. 9 | 10 | 11 | Compile 12 | === 13 | 14 | SCD uses CMake 2.8.2 or greater to compile. In order to build SCD, move to SCD directory and type: 15 | 16 | ``` 17 | cd build 18 | cmake -DCMAKE_BUILD_TYPE=Release .. 19 | make 20 | ``` 21 | 22 | This will create a build directory into the SCD folder tree, and configure and build SCD in Release mode. 23 | In order to compile SCD in Debug mode, please replace the last two lines of the snippet above by: 24 | 25 | ``` 26 | cmake -DCMAKE_BUILD_TYPE=Debug .. 27 | make 28 | ``` 29 | 30 | Execution 31 | === 32 | 33 | To execute SCD type, move to the build folder and type: 34 | 35 | ``` 36 | ./scd -f [network file name] 37 | ``` 38 | 39 | where the [network file name] contains the network with an edge per line, and each edge is represented as a pair of numeric identifiers. 40 | IMPORTANT: each edge is interpreted as an undirected edge and can only appear once. 41 | For example, if "1 2" appears in the file, then "2 1" cannot appear too. The next snipped shows a valid network file: 42 | 43 | ``` 44 | 1 2 45 | 4 5 46 | 3 4 47 | 1 3 48 | ``` 49 | 50 | As an example, type: 51 | 52 | ``` 53 | ./scd -f ./network.dat 54 | ``` 55 | 56 | This will run the program, and will output the communities found at network.dat to "./communities.dat", which contains 57 | a community per line, represented as a list of identifiers. network.dat contains a network composed by two cliques of size 5 linked by a single edge. Therefore, communities.dat outputs: 58 | 59 | ``` 60 | 1 2 3 4 5 61 | 6 7 8 9 10 62 | ``` 63 | 64 | Now, we summarize the options of the program: 65 | 66 | * -f [netork file name] : Specifies the network file. 67 | * -o [output file name] : Specifies the output file name (DEFAULT="communities.dat"). 68 | * -n [number of threads]: Specifies the number of threads to run the algorithm (DEFAULT=maximum available cores). 69 | * -l [lookahead]: Specifies the number of lookahead iterations to look before terminating the optimization process (DEFAULT=5). 70 | * -p [partition file name]: Specifies the initial partition to refine from (Optional). 71 | 72 | 73 | Tools 74 | === 75 | 76 | Into the build folder, a folder named tools is automatically created and contains useful tools. The list of tools currently available are the following: 77 | * wcc: Computes, given a graph and a partition, the WCC of the partition. 78 | * Usage: wcc -f [graph file name] -p [partition file name] 79 | * f1score: Computes the f1score between two partitions. 80 | * Usage: f1score [partition file name 1] [partition file name 2] 81 | 82 | 83 | 84 | 85 | 86 | 87 | 88 | -------------------------------------------------------------------------------- /tools/cc/source/main.cpp: -------------------------------------------------------------------------------- 1 | /*SCD is free software: you can redistribute it and/or modify 2 | it under the terms of the GNU General Public License as published by 3 | the Free Software Foundation, either version 3 of the License, or 4 | (at your option) any later version. 5 | 6 | SCD is distributed in the hope that it will be useful, 7 | but WITHOUT ANY WARRANTY; without even the implied warranty of 8 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 9 | GNU General Public License for more details. 10 | 11 | You should have received a copy of the GNU General Public License 12 | along with this program. If not, see . 13 | */ 14 | 15 | #include 16 | #include 17 | #include 18 | #include 19 | #include 20 | #include 21 | #include 22 | #include 23 | #include 24 | 25 | #define CHECK_ARGUMENT_STRING(index, option,variable,setVariable) \ 26 | if( strcmp(argv[index],option) == 0 ){ \ 27 | setVariable = true; \ 28 | if( (index+1) < argc ) { \ 29 | variable = argv[index+1]; \ 30 | } else { \ 31 | printf( "Invalid options.\n" ); \ 32 | return 1;\ 33 | }\ 34 | } 35 | 36 | #define CHECK_ARGUMENT_FLOAT(index, option,variable,setVariable) \ 37 | if( strcmp(argv[index],option) == 0 ){ \ 38 | setVariable = true; \ 39 | if( (index+1) < argc ) { \ 40 | variable = atof(argv[index+1]); \ 41 | } else { \ 42 | printf( "Invalid options.\n" ); \ 43 | return 1;\ 44 | }\ 45 | } 46 | 47 | #define CHECK_ARGUMENT_INT(index, option,variable,setVariable) \ 48 | if( strcmp(argv[index],option) == 0 ){ \ 49 | setVariable = true; \ 50 | if( (index+1) < argc ) { \ 51 | variable = atoi(argv[index+1]); \ 52 | } else { \ 53 | printf( "Invalid options.\n" ); \ 54 | return 1;\ 55 | }\ 56 | } 57 | 58 | #define CHECK_FLAG(index, option,setVariable) \ 59 | if( strcmp(argv[index],option) == 0 ){ \ 60 | setVariable = true; \ 61 | } 62 | 63 | using namespace scd; 64 | 65 | static void PrintUsage() { 66 | printf("Usage: wcc \n"); 67 | printf("Availaible flags:\n"); 68 | printf("\t-f [network file name] : Specifies the network file.\n"); 69 | printf("\t-p [partition file name] : Specifies the partition file name.\n"); 70 | } 71 | 72 | 73 | int main(int argc, char ** argv) { 74 | 75 | bool graphFileNameSet = false; 76 | bool partitionFileNameSet = false; 77 | bool numThreadsSet = false; 78 | bool alphaSet = false; 79 | char_t * graphFileName = NULL; 80 | char_t * partitionFileName = NULL; 81 | uint32_t numThreads = omp_get_num_procs(); 82 | double alpha = 1.0; 83 | 84 | for (uint32_t i = 1; i < argc; i++) { 85 | CHECK_ARGUMENT_STRING(i, "-f", graphFileName, graphFileNameSet) 86 | } 87 | 88 | if (!graphFileNameSet) { 89 | printf("Graph filename not set\n"); 90 | PrintUsage(); 91 | return 1; 92 | } 93 | 94 | CGraph graph; 95 | 96 | //==================== LOAD THE GRAPH ================================== 97 | printf("Graph: %s\n", graphFileName); 98 | graph.Load(graphFileName, numThreads); 99 | 100 | double64_t cc = 0.0; 101 | for( int i = 0; i < graph.GetNumNodes(); ++i ) { 102 | uint32_t degree = graph.GetDegree(i); 103 | uint32_t numTriangles = 0; 104 | const uint32_t* adjacencies = graph.GetNeighbors(i); 105 | for( int j = 0; j < degree; ++j) { 106 | uint32_t neighbor = adjacencies[j]; 107 | const uint32_t* adjacencies2 = graph.GetNeighbors(neighbor); 108 | uint32_t degree2 = graph.GetDegree(neighbor); 109 | uint32_t intersect = Intersect((uint32_t*)adjacencies,degree,(uint32_t*)adjacencies2,degree2); 110 | numTriangles+=intersect; 111 | } 112 | cc+= degree > 1 ? numTriangles / (double64_t)(degree*(degree-1)) : 0; 113 | } 114 | cc /= graph.GetNumNodes(); 115 | printf("CC: %f\n", cc); 116 | //====================================================================== 117 | return 0; 118 | } 119 | 120 | 121 | 122 | -------------------------------------------------------------------------------- /include/communities/communities.h: -------------------------------------------------------------------------------- 1 | /*SCD is free software: you can redistribute it and/or modify 2 | it under the terms of the GNU General Public License as published by 3 | the Free Software Foundation, either version 3 of the License, or 4 | (at your option) any later version. 5 | 6 | SCD is distributed in the hope that it will be useful, 7 | but WITHOUT ANY WARRANTY; without even the implied warranty of 8 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 9 | GNU General Public License for more details. 10 | 11 | You should have received a copy of the GNU General Public License 12 | along with this program. If not, see . 13 | */ 14 | 15 | 16 | #ifndef COMMUNITIES_H 17 | #define COMMUNITIES_H 18 | 19 | #include 20 | #include 21 | 22 | namespace scd { 23 | 24 | #define SCD_SINGLETON 0xffffffff 25 | 26 | /** @brief This struct is a tuple formed by a node id and a clustering coefficient.*/ 27 | struct NodeClustering { 28 | uint32_t m_NodeId; 29 | double64_t m_CC; 30 | uint32_t m_Degree; 31 | }; 32 | 33 | struct CommunityPartition { 34 | uint32_t* m_NodeLabels; /**< @brief The labels of the communities each node belongs to.*/ 35 | uint32_t* m_CommunityIndices; /**< @brief The array of indices for each label into the community array.*/ 36 | uint32_t* m_Communities; /**< @brief The communities.*/ 37 | uint32_t* m_InternalEdges; /**< @brief The number of internal edges of each community.*/ 38 | uint32_t* m_ExternalEdges; /**< @brief The number of external edges of each community.*/ 39 | double64_t* m_NodeWCC; /**< @brief The WCC of the nodes.*/ 40 | uint32_t m_NumCommunities; /**< @brief The number of communities.*/ 41 | uint32_t m_NumNodes; /**< @brief The number of nodes.*/ 42 | double64_t m_WCC; /**< @brief The WCC of this partition.*/ 43 | }; 44 | 45 | 46 | 47 | /** @brief Initializes a partition structure with an initial partition. 48 | * @param[in] graph A pointer to the graph. 49 | * @param[out] partition The partition structure to initializes. 50 | * @param[in] partitionFileName The name of the partition file to load. 51 | * @param[in] alfa The alfa parameter controlling the cohesivness of the communities. 52 | * @return 0 if the computation was successful. 1 if there were errors.*/ 53 | uint32_t LoadPartition( const CGraph* graph, CommunityPartition* partition, const char_t* partitionFileName, const double64_t alfa ); 54 | 55 | 56 | /** @brief Initializes a partition structure with an initial partition. 57 | * @param[in] graph A pointer to the graph. 58 | * @param[out] partition The partition structure to initializes. 59 | * @param[in] alfa The alfa parameter controlling the cohesivness of the communities. 60 | * @return 0 if the computation was successful. 1 if there were errors.*/ 61 | uint32_t InitializeSimplePartition( const CGraph* graph, CommunityPartition* partition, const double64_t alfa ); 62 | 63 | /** @brief Frees the resources used by the partition. 64 | * @param[out] partition The partition to free.*/ 65 | void FreeResources( CommunityPartition* partition ); 66 | 67 | /** @brief Prints the communities into a file. 68 | * @param[in] graph The graph. 69 | * @param[in] partition The partition to print. 70 | * @param[in] fileName The name of the file where the communities will be print. 71 | * @return 0 if the execution was successful. 1 otherwise.*/ 72 | uint32_t PrintPartition( const CGraph* graph, const CommunityPartition* partition, const char_t* fileName); 73 | 74 | 75 | /** @brief Improves the quality of the communities. 76 | * @param[in] graph The graph. 77 | * @param[out] partition The partition to improve. 78 | * @param[in] numThreads The number of threads. 79 | * @param[in] lookahead The number of lookahead iterations. 80 | * @param[in] alfa The alfa parameter controlling the cohesivness of the communities. 81 | * @return 0 if the execution was successful. 0 otherwise.*/ 82 | uint32_t ImproveCommunities( const CGraph* graph, CommunityPartition* partition, uint32_t numThreads, uint32_t lookahead, const double64_t alfa ); 83 | 84 | /** @brief Copies a partition into another partition. 85 | * @param[out] destPartition The destination partition. 86 | * @param[in] sourcePartition The source partition. 87 | * @resutl 0 if the copy was sucecssful. 1 otherwise.*/ 88 | uint32_t CopyPartition( CommunityPartition* destPartition, const CommunityPartition* sourcePartition); 89 | } 90 | 91 | 92 | #endif 93 | 94 | 95 | -------------------------------------------------------------------------------- /include/graph/graph.h: -------------------------------------------------------------------------------- 1 | /*SCD is free software: you can redistribute it and/or modify 2 | it under the terms of the GNU General Public License as published by 3 | the Free Software Foundation, either version 3 of the License, or 4 | (at your option) any later version. 5 | 6 | SCD is distributed in the hope that it will be useful, 7 | but WITHOUT ANY WARRANTY; without even the implied warranty of 8 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 9 | GNU General Public License for more details. 10 | 11 | You should have received a copy of the GNU General Public License 12 | along with this program. If not, see . 13 | */ 14 | 15 | #ifndef CGRAPH_H 16 | #define CGRAPH_H 17 | 18 | #include 19 | #include 20 | #include 21 | #include 22 | #include 23 | #include 24 | #include 25 | #include 26 | #include 27 | 28 | namespace scd { 29 | 30 | /** @brief This class represents a graph.*/ 31 | class CGraph { 32 | public: 33 | CGraph(); 34 | ~CGraph(); 35 | 36 | /** @brief Reads a graph from a file. The file must contain a list of 37 | * undirected edges (an edge cannot appear twice). 38 | * The identifiers of the nodes have to bee between 39 | * 0 and N-1. The edges must be sorted by the first identifier first, 40 | * and then the second. 41 | * @param[in] fileName The name of the file. 42 | * @return 0 if the load was successful. 1 if there were errors.*/ 43 | uint32_t Load(const char_t * fileName, uint32_t numThreads); 44 | 45 | /** @brief Gets the number of nodes in the graph. 46 | * @return The number of nodes.*/ 47 | inline uint32_t GetNumNodes() const { 48 | return m_NumNodes; 49 | } 50 | 51 | /** @brief Gets the number of edges in the graph. 52 | * @return The number of edges.*/ 53 | inline uint32_t GetNumEdges() const { 54 | return m_NumEdges; 55 | } 56 | 57 | /** @brief Gets the degree of a node. 58 | * @param[in] nodeId The identifier of the node. 59 | * @return The degree of the node.*/ 60 | inline uint32_t GetDegree(uint32_t nodeId) const { 61 | assert(nodeId. 13 | */ 14 | 15 | #include 16 | #include 17 | #include 18 | #include 19 | #include 20 | #include 21 | #include 22 | #include 23 | 24 | #define CHECK_ARGUMENT_STRING(index, option,variable,setVariable) \ 25 | if( strcmp(argv[index],option) == 0 ){ \ 26 | setVariable = true; \ 27 | if( (index+1) < argc ) { \ 28 | variable = argv[index+1]; \ 29 | } else { \ 30 | printf( "Invalid options.\n" ); \ 31 | return 1;\ 32 | }\ 33 | } 34 | 35 | #define CHECK_ARGUMENT_FLOAT(index, option,variable,setVariable) \ 36 | if( strcmp(argv[index],option) == 0 ){ \ 37 | setVariable = true; \ 38 | if( (index+1) < argc ) { \ 39 | variable = atof(argv[index+1]); \ 40 | } else { \ 41 | printf( "Invalid options.\n" ); \ 42 | return 1;\ 43 | }\ 44 | } 45 | 46 | #define CHECK_ARGUMENT_INT(index, option,variable,setVariable) \ 47 | if( strcmp(argv[index],option) == 0 ){ \ 48 | setVariable = true; \ 49 | if( (index+1) < argc ) { \ 50 | variable = atoi(argv[index+1]); \ 51 | } else { \ 52 | printf( "Invalid options.\n" ); \ 53 | return 1;\ 54 | }\ 55 | } 56 | 57 | #define CHECK_FLAG(index, option,setVariable) \ 58 | if( strcmp(argv[index],option) == 0 ){ \ 59 | setVariable = true; \ 60 | } 61 | 62 | using namespace scd; 63 | 64 | static void PrintUsage() { 65 | printf("Usage: wcc \n"); 66 | printf("Availaible flags:\n"); 67 | printf("\t-f [network file name] : Specifies the network file.\n"); 68 | printf("\t-p [partition file name] : Specifies the partition file name.\n"); 69 | } 70 | 71 | 72 | int main(int argc, char ** argv) { 73 | 74 | bool graphFileNameSet = false; 75 | bool partitionFileNameSet = false; 76 | bool numThreadsSet = false; 77 | bool alphaSet = false; 78 | char_t * graphFileName = NULL; 79 | char_t * partitionFileName = NULL; 80 | uint32_t numThreads = omp_get_num_procs(); 81 | double alpha = 1.0; 82 | 83 | for (uint32_t i = 1; i < argc; i++) { 84 | CHECK_ARGUMENT_STRING(i, "-f", graphFileName, graphFileNameSet) 85 | CHECK_ARGUMENT_STRING(i, "-p", partitionFileName, partitionFileNameSet) 86 | CHECK_ARGUMENT_FLOAT(i, "-a", alpha, alphaSet) 87 | } 88 | 89 | if (!graphFileNameSet) { 90 | printf("Graph filename not set\n"); 91 | PrintUsage(); 92 | return 1; 93 | } 94 | 95 | if (!partitionFileNameSet) { 96 | printf("Partition filename not set\n"); 97 | PrintUsage(); 98 | return 1; 99 | } 100 | 101 | CGraph graph; 102 | 103 | //==================== LOAD THE GRAPH ================================== 104 | printf("Graph: %s\n", graphFileName); 105 | graph.Load(graphFileName, numThreads); 106 | graph.RemoveEdgesNoTriangles(numThreads); 107 | //====================================================================== 108 | 109 | //=================== LOAD PARTITION ============================ 110 | printf("PartitionFile: %s\n", partitionFileName); 111 | CommunityPartition partition; 112 | LoadPartition(&graph,&partition,partitionFileName, alpha); 113 | //====================================================================== 114 | 115 | printf("*******************************************************\n"); 116 | printf("%-32s %-10d\n", "Number of Communities:", partition.m_NumCommunities); 117 | printf("%-32s %-10f\n", "WCC:", partition.m_WCC / (float32_t) graph.GetNumNodes()); 118 | printf("*******************************************************\n"); 119 | 120 | FreeResources(&partition); 121 | return 0; 122 | } 123 | 124 | 125 | 126 | -------------------------------------------------------------------------------- /tools/selector/source/main.cpp: -------------------------------------------------------------------------------- 1 | /*SCD is free software: you can redistribute it and/or modify 2 | it under the terms of the GNU General Public License as published by 3 | the Free Software Foundation, either version 3 of the License, or 4 | (at your option) any later version. 5 | 6 | SCD is distributed in the hope that it will be useful, 7 | but WITHOUT ANY WARRANTY; without even the implied warranty of 8 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 9 | GNU General Public License for more details. 10 | 11 | You should have received a copy of the GNU General Public License 12 | along with this program. If not, see . 13 | */ 14 | 15 | #include 16 | #include 17 | #include 18 | #include 19 | #include 20 | #include 21 | #include 22 | #include 23 | #include 24 | 25 | #define CHECK_ARGUMENT_STRING(index, option,variable,setVariable) \ 26 | if( strcmp(argv[index],option) == 0 ){ \ 27 | setVariable = true; \ 28 | if( (index+1) < argc ) { \ 29 | variable = argv[index+1]; \ 30 | } else { \ 31 | printf( "Invalid options.\n" ); \ 32 | return 1;\ 33 | }\ 34 | } 35 | 36 | #define CHECK_ARGUMENT_FLOAT(index, option,variable,setVariable) \ 37 | if( strcmp(argv[index],option) == 0 ){ \ 38 | setVariable = true; \ 39 | if( (index+1) < argc ) { \ 40 | variable = atof(argv[index+1]); \ 41 | } else { \ 42 | printf( "Invalid options.\n" ); \ 43 | return 1;\ 44 | }\ 45 | } 46 | 47 | #define CHECK_ARGUMENT_INT(index, option,variable,setVariable) \ 48 | if( strcmp(argv[index],option) == 0 ){ \ 49 | setVariable = true; \ 50 | if( (index+1) < argc ) { \ 51 | variable = atoi(argv[index+1]); \ 52 | } else { \ 53 | printf( "Invalid options.\n" ); \ 54 | return 1;\ 55 | }\ 56 | } 57 | 58 | #define CHECK_FLAG(index, option,setVariable) \ 59 | if( strcmp(argv[index],option) == 0 ){ \ 60 | setVariable = true; \ 61 | } 62 | 63 | using namespace scd; 64 | 65 | static void PrintUsage() { 66 | printf("Usage: selector \n"); 67 | printf("Availaible flags:\n"); 68 | printf("\t-f [network file name] : Specifies the network file.\n"); 69 | printf("\t-o [output file name] : Specifies the output file.\n"); 70 | printf("\t-p [partition file name] : Specifies the partition file name.\n"); 71 | printf("\t-a [alpha value] : Specifies the alpha value.\n"); 72 | printf("\t-min_size [minimum size] : Specifies the minimum size of the communities to find.\n"); 73 | printf("\t-max_size [max size] : Specifies the maximum size of the communities to find.\n"); 74 | printf("\t-min_wcc [min wcc] : Specifies the minimum wcc of the communities to find.\n"); 75 | printf("\t-max_wcc [max wcc] : Specifies the maximum wcc of the communities to find.\n"); 76 | } 77 | 78 | 79 | int main(int argc, char ** argv) { 80 | 81 | bool graphFileNameSet = false; 82 | bool partitionFileNameSet = false; 83 | bool outputFileNameSet = false; 84 | bool numThreadsSet = false; 85 | bool alphaSet = false; 86 | bool minSizeSet = false; 87 | bool maxSizeSet = false; 88 | bool minWCCSet = false; 89 | bool maxWCCSet = false; 90 | uint32_t minSize= 1; 91 | uint32_t maxSize= 10; 92 | double minWCC = 0.0; 93 | double maxWCC = 1.0; 94 | double alpha = 1.0; 95 | char_t * graphFileName = NULL; 96 | char_t * partitionFileName = NULL; 97 | char_t * outputFileName = NULL; 98 | uint32_t numThreads = omp_get_num_procs(); 99 | 100 | for (uint32_t i = 1; i < argc; i++) { 101 | CHECK_ARGUMENT_STRING(i, "-f", graphFileName, graphFileNameSet) 102 | CHECK_ARGUMENT_STRING(i, "-p", partitionFileName, partitionFileNameSet) 103 | CHECK_ARGUMENT_STRING(i, "-o", outputFileName, outputFileNameSet) 104 | CHECK_ARGUMENT_FLOAT(i, "-a", alpha, alphaSet) 105 | CHECK_ARGUMENT_INT(i, "-min_size", minSize, minSizeSet) 106 | CHECK_ARGUMENT_INT(i, "-max_size", maxSize, maxSizeSet) 107 | CHECK_ARGUMENT_FLOAT(i, "-min_wcc", minWCC, minWCCSet) 108 | CHECK_ARGUMENT_FLOAT(i, "-max_wcc", maxWCC, maxWCCSet) 109 | } 110 | 111 | if (!graphFileNameSet || !minSize || !maxSize || !minWCC || !maxWCC || !outputFileNameSet) { 112 | printf("Graph filename not set\n"); 113 | PrintUsage(); 114 | return 1; 115 | } 116 | 117 | if (!partitionFileNameSet) { 118 | printf("Partition filename not set\n"); 119 | PrintUsage(); 120 | return 1; 121 | } 122 | 123 | CGraph graph; 124 | 125 | //==================== LOAD THE GRAPH ================================== 126 | printf("Graph: %s\n", graphFileName); 127 | graph.Load(graphFileName, numThreads); 128 | graph.RemoveEdgesNoTriangles(numThreads); 129 | //====================================================================== 130 | 131 | //=================== LOAD PARTITION =================================== 132 | printf("PartitionFile: %s\n", partitionFileName); 133 | CommunityPartition partition; 134 | LoadPartition(&graph,&partition,partitionFileName, alpha); 135 | //====================================================================== 136 | 137 | std::cout << alpha << " " << minSize << " " << maxSize << " " << minWCC << " " << maxWCC << std::endl; 138 | 139 | std::ofstream outputFile; 140 | outputFile.open(outputFileName); 141 | 142 | for( int i = 0; i < partition.m_NumCommunities; ++i ) { 143 | std::set community; 144 | uint32_t communitySize = partition.m_Communities[partition.m_CommunityIndices[i]]; 145 | if( communitySize < minSize || communitySize > maxSize ) continue; 146 | uint32_t * community_ptr = &partition.m_Communities[partition.m_CommunityIndices[i]+1]; 147 | for( int j = 0; j < communitySize; ++j ) { 148 | community.insert(community_ptr[j]); 149 | } 150 | double score = ComputeWCC(&graph,alpha,community); 151 | if( score < minWCC || score > maxWCC ) continue; 152 | for( std::set::iterator it = community.begin(); it != community.end(); ++it ) { 153 | outputFile << graph.ReMap(*it) << " "; 154 | } 155 | outputFile << std::endl; 156 | std::cout << score << std::endl; 157 | } 158 | 159 | outputFile.close(); 160 | FreeResources(&partition); 161 | return 0; 162 | } 163 | -------------------------------------------------------------------------------- /source/main.cpp: -------------------------------------------------------------------------------- 1 | /*SCD is free software: you can redistribute it and/or modify 2 | it under the terms of the GNU General Public License as published by 3 | the Free Software Foundation, either version 3 of the License, or 4 | (at your option) any later version. 5 | 6 | SCD is distributed in the hope that it will be useful, 7 | but WITHOUT ANY WARRANTY; without even the implied warranty of 8 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 9 | GNU General Public License for more details. 10 | 11 | You should have received a copy of the GNU General Public License 12 | along with this program. If not, see . 13 | */ 14 | 15 | #include 16 | #include 17 | #include 18 | #include 19 | #include 20 | #include 21 | #include 22 | #include 23 | 24 | #define CHECK_ARGUMENT_STRING(index, option,variable,setVariable) \ 25 | if( strcmp(argv[index],option) == 0 ){ \ 26 | setVariable = true; \ 27 | if( (index+1) < argc ) { \ 28 | variable = argv[index+1]; \ 29 | } else { \ 30 | printf( "Invalid options.\n" ); \ 31 | return 1;\ 32 | }\ 33 | } 34 | 35 | #define CHECK_ARGUMENT_FLOAT(index, option,variable,setVariable) \ 36 | if( strcmp(argv[index],option) == 0 ){ \ 37 | setVariable = true; \ 38 | if( (index+1) < argc ) { \ 39 | variable = atof(argv[index+1]); \ 40 | } else { \ 41 | printf( "Invalid options.\n" ); \ 42 | return 1;\ 43 | }\ 44 | } 45 | 46 | #define CHECK_ARGUMENT_INT(index, option,variable,setVariable) \ 47 | if( strcmp(argv[index],option) == 0 ){ \ 48 | setVariable = true; \ 49 | if( (index+1) < argc ) { \ 50 | variable = atoi(argv[index+1]); \ 51 | } else { \ 52 | printf( "Invalid options.\n" ); \ 53 | return 1;\ 54 | }\ 55 | } 56 | 57 | #define CHECK_FLAG(index, option,setVariable) \ 58 | if( strcmp(argv[index],option) == 0 ){ \ 59 | setVariable = true; \ 60 | } 61 | 62 | using namespace scd; 63 | 64 | static void PrintUsage() { 65 | printf("Usage: scd \n"); 66 | printf("Availaible flags:\n"); 67 | printf("\t-f [network file name] : Specifies the network file.\n"); 68 | printf("\t-o [output file name] : Specifies the output file name.\n"); 69 | printf("\t-n [number of threads]: Specifies the number of threads to run the algorithm.\n"); 70 | printf("\t-l [lookahead size]: Sets the size of the lookahead iterations to look.\n"); 71 | printf("\t-p [partition file name]: Specifies the partition file name to start the refinement from.\n"); 72 | printf("\t-a [lookahead size]: Specifies the alfa parameter to control the level of cohesion of the communities. Default: 1.0.\n"); 73 | } 74 | 75 | 76 | int main(int argc, char ** argv) { 77 | 78 | bool graphFileNameSet = false; 79 | bool outputFileNameSet = false; 80 | bool partitionFileNameSet = false; 81 | bool numThreadsSet = false; 82 | bool alfaSet = false; 83 | char_t * graphFileName = NULL; 84 | char_t * outputFileName = NULL; 85 | char_t * partitionFileName = NULL; 86 | uint32_t numThreads = omp_get_num_procs(); 87 | uint32_t lookahead = 5; 88 | bool lookaheadSet = false; 89 | double64_t alfa = 1.0; 90 | 91 | for (uint32_t i = 1; i < argc; i++) { 92 | CHECK_ARGUMENT_STRING(i, "-f", graphFileName, graphFileNameSet) 93 | CHECK_ARGUMENT_STRING(i, "-o", outputFileName, outputFileNameSet) 94 | CHECK_ARGUMENT_STRING(i, "-p", partitionFileName, partitionFileNameSet) 95 | CHECK_ARGUMENT_INT(i, "-n", numThreads, numThreadsSet) 96 | CHECK_ARGUMENT_INT(i, "-l", lookahead, lookaheadSet) 97 | CHECK_ARGUMENT_FLOAT(i, "-a", alfa, alfaSet) 98 | } 99 | 100 | if (!graphFileNameSet) { 101 | printf("Graph filename not set\n"); 102 | PrintUsage(); 103 | return 1; 104 | } 105 | 106 | if (numThreads <= 0) { 107 | printf("Invalid number of threads\n"); 108 | PrintUsage(); 109 | return 2; 110 | } 111 | 112 | if (!outputFileNameSet) { 113 | outputFileName = new char_t[512]; 114 | sprintf(outputFileName, "communities.dat"); 115 | } 116 | 117 | CGraph graph; 118 | ::uint64_t totalTime = 0, 119 | initTime = 0, 120 | spentTime = 0, 121 | loadingTime = 0, 122 | algorithmTime = 0; 123 | 124 | 125 | //==================== LOAD THE GRAPH ================================== 126 | initTime = StartClock(); 127 | printf("Graph: %s\n", graphFileName); 128 | printf("OutputFile: %s\n", outputFileName); 129 | graph.Load(graphFileName, numThreads); 130 | spentTime = StopClock(initTime); 131 | loadingTime = spentTime; 132 | totalTime += spentTime; 133 | printf("Load time: %lu ms\n", spentTime); 134 | //====================================================================== 135 | 136 | 137 | //================ REMOVE EDGES WITHOUT TRIANGLES ====================== 138 | initTime = StartClock(); 139 | printf("Removing edges without triangles ...\n"); 140 | graph.RemoveEdgesNoTriangles(numThreads); 141 | spentTime = StopClock(initTime); 142 | algorithmTime += spentTime; 143 | totalTime += spentTime; 144 | printf("Removing edges without triangles time: %lu ms\n", spentTime); 145 | //====================================================================== 146 | 147 | 148 | //=================== INITIALIZE PARTITION ============================ 149 | initTime = StartClock(); 150 | CommunityPartition partition; 151 | if(partitionFileNameSet) { 152 | printf("Loading partition file %s ... \n", partitionFileName); 153 | if( LoadPartition(&graph,&partition,partitionFileName, alfa) ) { 154 | printf("Error loading partition\n"); 155 | return 1; 156 | } 157 | } else { 158 | printf("Initial partition file not set. Computing initial partition ...\n"); 159 | if (InitializeSimplePartition(&graph, &partition, alfa)) { 160 | printf("Error computing initial partition\n"); 161 | return 1; 162 | } 163 | } 164 | spentTime = StopClock(initTime); 165 | totalTime += spentTime; 166 | printf("Initial partition time: %lu ms\n", spentTime); 167 | //====================================================================== 168 | 169 | 170 | //================ TRANSFER NODES AMONG PARTITIONS ===================== 171 | initTime = StartClock(); 172 | if (ImproveCommunities(&graph, &partition, numThreads, lookahead, alfa)) { 173 | printf("Error while improving communities\n"); 174 | return 1; 175 | } 176 | spentTime = StopClock(initTime); 177 | algorithmTime += spentTime; 178 | totalTime += spentTime; 179 | printf("Improvement execution time: %lu ms\n", spentTime); 180 | //====================================================================== 181 | 182 | 183 | //======================== PRINT RESULTS =============================== 184 | initTime = StartClock(); 185 | PrintPartition(&graph, &partition, outputFileName); 186 | spentTime = StopClock(initTime); 187 | totalTime += spentTime; 188 | printf("Print partition time: %lu ms\n", spentTime); 189 | //====================================================================== 190 | 191 | 192 | printf("\n"); 193 | printf("\n"); 194 | printf("*******************************************************\n"); 195 | printf("%-32s %-10d\n", "Number of Communities:", partition.m_NumCommunities); 196 | printf("%-32s %-10f\n", "WCC:", partition.m_WCC / (float32_t) graph.GetNumNodes()); 197 | printf("%-32s %-10lu ms\n", "Loading time:", loadingTime); 198 | printf("%-32s %-10lu ms\n", "Algorithm time:", algorithmTime); 199 | printf("%-32s %-10lu ms\n", "Total execution time:", totalTime); 200 | printf("*******************************************************\n"); 201 | 202 | FreeResources(&partition); 203 | 204 | if (!outputFileNameSet) { 205 | delete [] outputFileName; 206 | } 207 | return 0; 208 | } 209 | 210 | 211 | 212 | -------------------------------------------------------------------------------- /source/wcc/wcc.cpp: -------------------------------------------------------------------------------- 1 | /*SCD is free software: you can redistribute it and/or modify 2 | it under the terms of the GNU General Public License as published by 3 | the Free Software Foundation, either version 3 of the License, or 4 | (at your option) any later version. 5 | 6 | SCD is distributed in the hope that it will be useful, 7 | but WITHOUT ANY WARRANTY; without even the implied warranty of 8 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 9 | GNU General Public License for more details. 10 | 11 | You should have received a copy of the GNU General Public License 12 | along with this program. If not, see . 13 | */ 14 | 15 | #include 16 | #include 17 | #include 18 | 19 | namespace scd { 20 | 21 | 22 | uint32_t Intersect(/*const*/ uint32_t* list1, const uint32_t size1, /*const*/ uint32_t* list2, const uint32_t size2) { 23 | uint32_t triangles = 0; 24 | 25 | uint32_t* endList1 = list1 + size1; 26 | uint32_t* endList2 = list2 + size2; 27 | 28 | 29 | // v2.0 30 | while (list1 != endList1 && list2 != endList2) { 31 | if (*list1 < *list2) { 32 | list1++; 33 | } else if (*list1 > *list2) { 34 | list2++; 35 | } else { //(*list1 == *list2){ //triangle found 36 | triangles++; 37 | list1++; 38 | list2++; 39 | } 40 | } 41 | 42 | return triangles; 43 | } 44 | 45 | double64_t ComputeWCC(const CGraph * graph, const double64_t alfa, const uint32_t * communities, 46 | const uint32_t numCommunities, const uint32_t* labelsIndices, 47 | const uint32_t * communitiesInvIndex, double64_t* wccs) { 48 | double64_t globalWCC = 0; 49 | 50 | #pragma omp parallel for schedule(static, 8) reduction(+:globalWCC) 51 | for (uint32_t i = 0; i < graph->GetNumNodes(); i++) { 52 | uint32_t communitySize = communitiesInvIndex[labelsIndices[communities[i]]]; 53 | wccs[i] = ComputeWCC(graph, alfa, i, communities[i], communities, communitySize); 54 | globalWCC += wccs[i]; 55 | } 56 | return globalWCC; 57 | } 58 | 59 | 60 | double64_t ComputeWCC(const CGraph * graph, const double64_t alfa, uint32_t node, uint32_t communityLabel, 61 | const uint32_t * communities, uint32_t communitySize) { 62 | 63 | uint32_t internalTriangles = 0; 64 | uint32_t internalTriangleDegree = 0; 65 | uint32_t triangleDegree = 0; 66 | uint32_t node1 = node; 67 | const uint32_t* adjacencies1 = graph->GetNeighbors(node1); 68 | uint32_t degree1 = graph->GetDegree(node1); 69 | 70 | if (communitySize <= 2 ||graph->GetTotalTriangles(node) == 0) { 71 | return 0.0; 72 | } 73 | 74 | 75 | //while(adjacencies1 < endList1){ 76 | for (uint32_t k = 0; k < degree1; k++) { 77 | uint32_t nodeId2 = adjacencies1[k]; 78 | uint32_t degree2 = graph->GetDegree(nodeId2); 79 | bool internal = (communities[nodeId2] == communityLabel); 80 | bool internalTriangleFound = false; 81 | bool triangleFound = false; 82 | const uint32_t* adjacencies2 = graph->GetNeighbors(nodeId2); 83 | 84 | uint32_t* currentNode1 = (uint32_t*) adjacencies1; 85 | uint32_t* currentNode2 = (uint32_t*) adjacencies2; 86 | uint32_t* endAdjacencies1 = (uint32_t*) adjacencies1 + degree1; 87 | uint32_t* endAdjacencies2 = (uint32_t*) adjacencies2 + degree2; 88 | 89 | while (currentNode1 != endAdjacencies1 && currentNode2 != endAdjacencies2){ 90 | if (*currentNode1 == *currentNode2){ 91 | uint32_t sharedNeighbor = *currentNode1; 92 | if (internal && communities[sharedNeighbor] == communityLabel) { 93 | internalTriangleFound = true; 94 | internalTriangles++; 95 | } 96 | triangleFound = true; 97 | currentNode1++; 98 | currentNode2++; 99 | }else if(*currentNode1 < *currentNode2){ 100 | while(*currentNode1 < *currentNode2 && currentNode1 < endAdjacencies1){ 101 | currentNode1++; 102 | } 103 | }else{ 104 | while(*currentNode1 > *currentNode2 && currentNode2 < endAdjacencies2){ 105 | currentNode2++; 106 | } 107 | } 108 | } 109 | 110 | if (internalTriangleFound) { 111 | internalTriangleDegree++; 112 | } 113 | if (triangleFound) { 114 | triangleDegree++; 115 | } 116 | } 117 | 118 | return ((internalTriangles / (double64_t) graph->GetTotalTriangles(node)) * 119 | (triangleDegree / (double64_t) (triangleDegree + alfa*(communitySize - 1 - internalTriangleDegree)))); 120 | } 121 | 122 | double64_t ComputeWCC(const CGraph * graph, const double64_t alfa, std::set& community ){ 123 | double64_t WCC = 0.0; 124 | for( std::set::iterator it = community.begin(); it != community.end(); ++it) { 125 | uint32_t nodeId1 = *it; 126 | uint32_t totalTriangles = graph->GetTotalTriangles(nodeId1); 127 | uint32_t internalTriangles = 0; 128 | uint32_t triangleDegree = 0; 129 | uint32_t internalTriangleDegree = 0; 130 | uint32_t degree1 = graph->GetDegree(nodeId1); 131 | const uint32_t* adjacencies1 = graph->GetNeighbors(nodeId1); 132 | for( int j = 0; j < degree1; ++j ) { 133 | uint32_t nodeId2 = adjacencies1[j]; 134 | uint32_t degree2 = graph->GetDegree(nodeId2); 135 | bool internal = (community.find(nodeId1) != community.end()) && 136 | (community.find(nodeId2) != community.end()); 137 | bool internalTriangleFound = false; 138 | bool triangleFound = false; 139 | const uint32_t* adjacencies2 = graph->GetNeighbors(nodeId2); 140 | 141 | uint32_t* currentNode1 = (uint32_t*) adjacencies1; 142 | uint32_t* currentNode2 = (uint32_t*) adjacencies2; 143 | uint32_t* endAdjacencies1 = (uint32_t*) adjacencies1 + degree1; 144 | uint32_t* endAdjacencies2 = (uint32_t*) adjacencies2 + degree2; 145 | 146 | while (currentNode1 != endAdjacencies1 && currentNode2 != endAdjacencies2){ 147 | if (*currentNode1 == *currentNode2){ 148 | uint32_t sharedNeighbor = *currentNode1; 149 | if (internal && (community.find(sharedNeighbor) != community.end())){ 150 | internalTriangleFound = true; 151 | internalTriangles++; 152 | } 153 | triangleFound = true; 154 | currentNode1++; 155 | currentNode2++; 156 | }else if(*currentNode1 < *currentNode2){ 157 | while(*currentNode1 < *currentNode2 && currentNode1 < endAdjacencies1){ 158 | currentNode1++; 159 | } 160 | }else{ 161 | while(*currentNode1 > *currentNode2 && currentNode2 < endAdjacencies2){ 162 | currentNode2++; 163 | } 164 | } 165 | } 166 | 167 | if (internalTriangleFound) { 168 | internalTriangleDegree++; 169 | } 170 | if (triangleFound) { 171 | triangleDegree++; 172 | } 173 | } 174 | 175 | double64_t denom = (double64_t)(triangleDegree + alfa*(community.size() - 1 - internalTriangleDegree)); 176 | if( denom != 0 ) { 177 | WCC+=((internalTriangles / (double64_t)totalTriangles) * (triangleDegree / denom)); 178 | } 179 | } 180 | return WCC; 181 | } 182 | } 183 | 184 | -------------------------------------------------------------------------------- /source/graph/graph.cpp: -------------------------------------------------------------------------------- 1 | /*SCD is free software: you can redistribute it and/or modify 2 | it under the terms of the GNU General Public License as published by 3 | the Free Software Foundation, either version 3 of the License, or 4 | (at your option) any later version. 5 | 6 | SCD is distributed in the hope that it will be useful, 7 | but WITHOUT ANY WARRANTY; without even the implied warranty of 8 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 9 | GNU General Public License for more details. 10 | 11 | You should have received a copy of the GNU General Public License 12 | along with this program. If not, see . 13 | */ 14 | 15 | #include 16 | #include 17 | #include 18 | #include 19 | #include 20 | #include 21 | #include 22 | #include 23 | #include 24 | #include 25 | 26 | namespace scd { 27 | 28 | /** @brief Compares two unsigned integers. 29 | * @param e1 Void pointer to the first unsigned integer. 30 | * @param e2 Void pointer to the second unsigned integer. 31 | * @return -1 if e1 goes before e2. 1 if e1 goes after e2. 0 if e1 and e2 are equal.*/ 32 | static int Compare_Ids(const void* e1, const void* e2) { 33 | uint32_t id1 = *(uint32_t*)e1; 34 | uint32_t id2 = *(uint32_t*)e2; 35 | if( id1 < id2 ) return -1; 36 | if( id2 < id1 ) return 1; 37 | return 0; 38 | } 39 | 40 | 41 | 42 | 43 | 44 | CGraph::CGraph() : 45 | m_NumNodes(0), 46 | m_NumEdges(0), 47 | m_Nodes(NULL), 48 | m_Adjacencies(NULL), 49 | m_Map(NULL), 50 | m_TotalTriangles(NULL) 51 | { 52 | 53 | } 54 | 55 | CGraph::~CGraph() { 56 | 57 | if( m_Nodes!=NULL ) { 58 | delete [] m_Nodes; 59 | m_Nodes = NULL; 60 | } 61 | 62 | if( m_Adjacencies!=NULL ) { 63 | delete [] m_Adjacencies; 64 | m_Adjacencies = NULL; 65 | } 66 | 67 | if( m_Map != NULL ) { 68 | delete [] m_Map; 69 | m_Map = NULL; 70 | } 71 | 72 | if( m_TotalTriangles != NULL ) { 73 | delete [] m_TotalTriangles; 74 | m_TotalTriangles = NULL; 75 | } 76 | } 77 | 78 | uint32_t CGraph::Load(const char_t * fileName, uint32_t numThreads) { 79 | 80 | printf( "Graph: Loading Graph\n" ); 81 | std::ifstream inFile; 82 | inFile.open((const char *)fileName); 83 | if(!inFile) { 84 | printf( "Graph: Error Openning Graph File\n" ); 85 | return 1; 86 | } 87 | 88 | 89 | printf( "Graph: Relabeling nodes ...\n" ); 90 | timeval time; 91 | gettimeofday(&time, NULL); 92 | uint64_t initTime = (time.tv_sec * 1000) + (time.tv_usec / 1000); 93 | std::map* mapa = new std::map(); 94 | if( !mapa ) { 95 | printf( "\t Graph: Error allocating mapa\n" ); 96 | return 1; 97 | } 98 | uint32_t index = 0; 99 | m_NumEdges = 0; 100 | uint32_t node1; 101 | while( inFile >> node1 ) { 102 | uint32_t node2; 103 | inFile >> node2; 104 | 105 | if(!mapa->count(node1)) { 106 | mapa->insert(std::pair(node1,index)); 107 | index++; 108 | } 109 | 110 | if(!mapa->count(node2)) { 111 | mapa->insert(std::pair(node2,index)); 112 | index++; 113 | } 114 | m_NumEdges++; 115 | } 116 | gettimeofday(&time, NULL); 117 | uint64_t endTime = (time.tv_sec * 1000) + (time.tv_usec / 1000); 118 | printf("Graph: Nodes relabeled in %lu ms\n", endTime - initTime); 119 | 120 | 121 | printf( "Graph: Reading degrees ...\n" ); 122 | //We set the file cursor to the beginning. 123 | inFile.close(); 124 | inFile.open((const char *)fileName); 125 | gettimeofday(&time, NULL); 126 | initTime = (time.tv_sec * 1000) + (time.tv_usec / 1000); 127 | //Allocate space for nodes and initialize de degree. 128 | m_NumNodes = index; 129 | m_Nodes = new Node[m_NumNodes]; 130 | if( !m_Nodes ) { 131 | printf( "Graph: Error allocating nodes\n" ); 132 | return 1; 133 | } 134 | for( uint32_t i = 0; i < m_NumNodes; i++ ) { 135 | m_Nodes[i].m_Degree = 0; 136 | } 137 | 138 | //Compute the degree of each node. 139 | while( inFile >> node1 ) { 140 | uint32_t node2; 141 | inFile >> node2; 142 | m_Nodes[(*mapa->find(node1)).second].m_Degree++; 143 | m_Nodes[(*mapa->find(node2)).second].m_Degree++; 144 | } 145 | 146 | //Computing the adjacency indices, average degree and maximum Degree. 147 | float32_t averageDegree = 0.0f; 148 | float32_t maxDegree = 0.0f; 149 | uint32_t currentAdjacencyIndex = 0; 150 | for( uint32_t i = 0; i < m_NumNodes; i++ ) { 151 | m_Nodes[i].m_AdjacencyIndex = currentAdjacencyIndex; 152 | currentAdjacencyIndex += m_Nodes[i].m_Degree; 153 | averageDegree += m_Nodes[i].m_Degree; 154 | if( m_Nodes[i].m_Degree > maxDegree ) { 155 | maxDegree = m_Nodes[i].m_Degree; 156 | } 157 | } 158 | averageDegree /= (m_NumNodes); 159 | gettimeofday(&time, NULL); 160 | endTime = (time.tv_sec * 1000) + (time.tv_usec / 1000); 161 | printf("\t Graph: Degrees read in %lu ms\n", endTime - initTime); 162 | 163 | printf( "Graph: Reading adjacencies ...\n" ); 164 | gettimeofday(&time, NULL); 165 | initTime = (time.tv_sec * 1000) + (time.tv_usec / 1000); 166 | //We set the file cursor to the beginning. 167 | inFile.close(); 168 | inFile.open((const char *)fileName); 169 | 170 | m_Adjacencies = new uint32_t[m_NumEdges*2]; 171 | if( !m_Adjacencies ) { 172 | printf( "Graph: Error allocating adjacencies\n" ); 173 | return 1; 174 | } 175 | uint32_t* counters = new uint32_t[m_NumNodes]; 176 | for( uint32_t i = 0; i < m_NumNodes; i++ ) { 177 | counters[i] = 0; 178 | } 179 | 180 | //Filling adjacencies 181 | while( inFile >> node1 ) { 182 | uint32_t node2; 183 | inFile >> node2; 184 | uint32_t tail = (*mapa->find(node1)).second; 185 | uint32_t head = (*mapa->find(node2)).second; 186 | assert(counters[tail]::iterator it = mapa->begin();it!=mapa->end();it++) { 218 | m_Map[(*it).second] = (*it).first; 219 | } 220 | 221 | delete mapa; 222 | gettimeofday(&time, NULL); 223 | endTime = (time.tv_sec * 1000) + (time.tv_usec / 1000); 224 | printf("\t Graph: Map array filled in %lu ms\n", endTime - initTime); 225 | 226 | printf( "Graph: Graph Loaded\n" ); 227 | printf( "Graph: Number of Nodes: %u\n", m_NumNodes ); 228 | printf( "Graph: Number of Edges: %u\n", m_NumEdges ); 229 | //printf( "Graph: Clustering coefficient: %f\n", m_CC ); //Not available here 230 | printf( "Graph: Average Degree: %f\n", averageDegree ); 231 | printf( "Graph: Maximum Degree: %f\n", maxDegree ); 232 | printf( "Graph: Memory \n" ); 233 | printf( "..............\n" ); 234 | uint64_t memNodes = (char*)&m_Nodes[m_NumNodes] - (char*)&m_Nodes[0]; 235 | uint64_t memEdges = (char*)&m_Adjacencies[m_NumEdges*2] - (char*)&m_Adjacencies[0]; 236 | uint64_t memMap = (char*)&m_Map[m_NumNodes-1] - (char*)&m_Map[0]; 237 | uint64_t memTotalTriangles = (char*)&m_TotalTriangles[m_NumNodes-1] - (char*)&m_TotalTriangles[0]; 238 | printf( "%-16s %-10lu Bytes\n", "Nodes:", memNodes ); 239 | printf( "%-16s %-10lu Bytes\n", "Adjacencies:", memEdges ); 240 | printf( "%-16s %-10lu Bytes\n", "Map:", memMap ); 241 | printf( "%-16s %-10lu Bytes\n", "TotalTriangles:", memTotalTriangles ); 242 | printf( "%-16s %-10lu Bytes\n", "Total:", memNodes + memEdges + memMap + memTotalTriangles ); 243 | printf( "..............\n" ); 244 | return 0; 245 | } 246 | 247 | 248 | static int compareInt (const void * a, const void * b) 249 | { 250 | if ( *(int*)a > *(int*)b ) return 1; 251 | if ( *(int*)a < *(int*)b ) return -1; 252 | if ( *(int*)a == *(int*)b ) return 0; 253 | } 254 | 255 | 256 | uint32_t CGraph::RemoveEdgesNoTriangles( uint32_t numThreads) { 257 | omp_set_num_threads(numThreads); 258 | m_TotalTriangles = new uint32_t[m_NumNodes]; 259 | if( !m_TotalTriangles ) { 260 | printf("Error allocating total triangles\n"); 261 | return 1; 262 | } 263 | m_CC = 0; 264 | uint32_t numEdgesRemoved = 0; 265 | uint32_t newAdjacencyIndex = 0; 266 | uint32_t* edgesTriangles = new uint32_t[m_NumEdges*2]; 267 | 268 | #pragma omp parallel for schedule(dynamic, 32) 269 | for(uint32_t i = 0; i < m_NumNodes; i++ ) { 270 | uint32_t edgesTrianglesIndex = m_Nodes[i].m_AdjacencyIndex; 271 | m_TotalTriangles[i] = 0; 272 | uint32_t* adjacencyList1 = &m_Adjacencies[m_Nodes[i].m_AdjacencyIndex]; 273 | for(uint32_t j = 0; j < m_Nodes[i].m_Degree; j++) { 274 | uint32_t* adjacencyList2 = &m_Adjacencies[m_Nodes[adjacencyList1[j]].m_AdjacencyIndex]; 275 | if( i < adjacencyList1[j]) { 276 | uint32_t triangles = Intersect(adjacencyList1, m_Nodes[i].m_Degree, adjacencyList2, m_Nodes[adjacencyList1[j]].m_Degree); 277 | edgesTriangles[edgesTrianglesIndex] = triangles; 278 | 279 | uint32_t k = (uint32_t*) bsearch(&i, adjacencyList2, 280 | m_Nodes[adjacencyList1[j]].m_Degree, sizeof(uint32_t), compareInt) 281 | - adjacencyList2; 282 | assert(k != m_Nodes[adjacencyList1[j]].m_Degree); // "ERROR when computing triangles." 283 | edgesTriangles[m_Nodes[adjacencyList1[j]].m_AdjacencyIndex + k ] = triangles; 284 | } 285 | edgesTrianglesIndex++; 286 | } 287 | } 288 | 289 | uint32_t edgesTrianglesIndex = 0; 290 | for(uint32_t i = 0; i < m_NumNodes; i++ ) { 291 | m_TotalTriangles[i] = 0; 292 | uint32_t newDegree = 0; 293 | uint32_t* tempAdjacencies = new uint32_t[m_Nodes[i].m_Degree]; 294 | uint32_t* adjacencyList1 = &m_Adjacencies[m_Nodes[i].m_AdjacencyIndex]; 295 | for(uint32_t j = 0; j < m_Nodes[i].m_Degree; j++) { 296 | uint32_t triangles = edgesTriangles[edgesTrianglesIndex++]; 297 | if( triangles > 0 ) { 298 | tempAdjacencies[newDegree] = adjacencyList1[j]; 299 | newDegree++; 300 | } else { 301 | numEdgesRemoved++; 302 | } 303 | m_TotalTriangles[i] += triangles; 304 | } 305 | 306 | memcpy(&m_Adjacencies[newAdjacencyIndex], tempAdjacencies, sizeof(uint32_t)*newDegree); 307 | m_Nodes[i].m_Degree = newDegree; 308 | m_Nodes[i].m_AdjacencyIndex = newAdjacencyIndex; 309 | newAdjacencyIndex+=newDegree; 310 | delete [] tempAdjacencies; 311 | uint32_t auxPossibleTriangles = m_Nodes[i].m_Degree*(m_Nodes[i].m_Degree - 1); 312 | if( auxPossibleTriangles > 0 ) { 313 | m_CC += m_TotalTriangles[i] / (double64_t)auxPossibleTriangles; 314 | } 315 | } 316 | 317 | delete [] edgesTriangles; 318 | m_CC /= m_NumNodes; 319 | // std::cout << "m_cc inicial: " << m_CC << std::endl; 320 | m_NumEdges-=(numEdgesRemoved/2); 321 | return 0; 322 | } 323 | } 324 | -------------------------------------------------------------------------------- /tools/f1score/source/main.cpp: -------------------------------------------------------------------------------- 1 | /*F1Score is free software: you can redistribute it and/or modify 2 | it under the terms of the GNU General Public License as published by 3 | the Free Software Foundation, either version 3 of the License, or 4 | (at your option) any later version. 5 | 6 | F1Score is distributed in the hope that it will be useful, 7 | but WITHOUT ANY WARRANTY; without even the implied warranty of 8 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 9 | GNU General Public License for more details. 10 | 11 | You should have received a copy of the GNU General Public License 12 | along with this program. If not, see . 13 | */ 14 | #include 15 | #include 16 | #include 17 | #include 18 | #include 19 | #include 20 | #include 21 | #include 22 | #include 23 | #include 24 | #include 25 | 26 | 27 | std::vector*> * partitionA; 28 | std::vector*> * partitionB; 29 | 30 | #define LOWER_THRESHOLD 2 31 | 32 | /** Parses the sets of a file **/ 33 | std::vector*>* ParseSets(std::ifstream& file) { 34 | std::list*> * auxSets = new std::list*>(); 35 | std::string line; 36 | std::string delimiter(" "); 37 | while(std::getline(file,line)) { 38 | std::set* set = new std::set(); 39 | size_t nextToken = 0; 40 | std::istringstream stream(line); 41 | unsigned int node; 42 | while( stream >> node ) { 43 | set->insert(node); 44 | } 45 | 46 | auxSets->push_back(set); 47 | } 48 | 49 | std::vector*> * returnSet = new std::vector*>(); 50 | std::list*>::iterator it; 51 | unsigned int i = 0; 52 | for(it = auxSets->begin();it!=auxSets->end();++it,++i) { 53 | returnSet->push_back(*it); 54 | } 55 | return returnSet; 56 | } 57 | 58 | 59 | double F1Score( std::set& com1, std::set& com2, double* precision, double* recall) { 60 | *precision = 0.0f; 61 | *recall = 0.0f; 62 | 63 | //computing intersection 64 | unsigned int counter = 0; 65 | if( com1.size() < com2.size() ) { 66 | for( std::set::iterator iterCom1 = com1.begin(); iterCom1 != com1.end(); iterCom1++ ) { 67 | if( com2.find( *iterCom1 ) != com2.end() ) { 68 | counter++; 69 | } 70 | } 71 | } else { 72 | for( std::set::iterator iterCom2 = com2.begin(); iterCom2 != com2.end(); iterCom2++ ) { 73 | if( com1.find( *iterCom2 ) != com1.end() ) { 74 | counter++; 75 | } 76 | } 77 | } 78 | *precision = counter / (double) com1.size(); 79 | *recall = counter / (double) com2.size(); 80 | if( *precision + *recall > 0.0f ){ 81 | return 2*(*precision)*(*recall)/((*precision) + (*recall)); 82 | } 83 | return 0.0f; 84 | } 85 | 86 | static double Average( double array[], int size ) { 87 | double accum = 0.0; 88 | for( int i = 0; i < size; ++i ) { 89 | accum+= array[i]; 90 | } 91 | return accum / size; 92 | } 93 | 94 | int main(int argc, char ** argv) 95 | { 96 | if(argc < 4){ 97 | std::cout << "Wrong number of arguments. Usage: F1Score " << std::endl; 98 | exit(0); 99 | } 100 | 101 | std::ifstream inputFileA; 102 | inputFileA.open(argv[1]); 103 | if(!inputFileA.is_open()) { 104 | std::cout << "PARTITION FILE A NOT FOUND" << std::endl; 105 | exit(1); 106 | } 107 | std::ifstream inputFileB; 108 | inputFileB.open(argv[2]); 109 | if(!inputFileB.is_open()){ 110 | std::cout << "PARTITION FILE B NOT FOUND" << std::endl; 111 | exit(1); 112 | } 113 | 114 | std::cout << "Parsing Input Files"<< std::endl; 115 | partitionA = ParseSets(inputFileA); 116 | inputFileA.close(); 117 | 118 | partitionB = ParseSets(inputFileB); 119 | inputFileB.close(); 120 | 121 | std::map > nodePartitionA; 122 | std::map > nodePartitionB; 123 | 124 | std::cout << "Partition A size: " << partitionA->size() << std::endl; 125 | std::cout << "Partition B size: " << partitionB->size() << std::endl; 126 | double maxSetsF1Scores = 0.0f; 127 | double maxTagsF1Scores = 0.0f; 128 | 129 | std::cout << "Creating Indexes of Partition A" << std::endl; 130 | for( unsigned int i = 0; i < partitionA->size(); i++){ 131 | std::set* community = (*partitionA)[i]; 132 | for( auto it = community->begin(); it != community->end(); it++ ){ 133 | unsigned int node = *it; 134 | auto it2 = nodePartitionA.find(node); 135 | nodePartitionA[node].insert(i); 136 | } 137 | } 138 | 139 | std::cout << "Creating Indexes of Partition B" << std::endl; 140 | for( unsigned int i = 0; i < partitionB->size(); i++){ 141 | std::set* community = (*partitionB)[i]; 142 | for( auto it = community->begin(); it != community->end(); it++ ){ 143 | unsigned int node = *it; 144 | auto it2 = nodePartitionB.find(node); 145 | nodePartitionB[node].insert(i); 146 | } 147 | } 148 | 149 | double* f1ScorePartitionA = new double[partitionA->size()]; 150 | double* f1ScorePartitionB = new double[partitionB->size()]; 151 | 152 | double* f1ScorePrecisionPartitionA = new double[partitionA->size()]; 153 | double* f1ScorePrecisionPartitionB = new double[partitionB->size()]; 154 | 155 | double* f1ScoreRecallPartitionA = new double[partitionA->size()]; 156 | double* f1ScoreRecallPartitionB = new double[partitionB->size()]; 157 | 158 | int* bestIdPartitionA = new int[partitionA->size()]; 159 | int* bestIdPartitionB = new int[partitionB->size()]; 160 | 161 | double* precisionPartitionA = new double[partitionA->size()]; 162 | double* precisionPartitionB = new double[partitionB->size()]; 163 | 164 | double* recallPartitionA = new double[partitionA->size()]; 165 | double* recallPartitionB = new double[partitionB->size()]; 166 | 167 | int* coveragePartitionA = new int[partitionA->size()]; 168 | int* coveragePartitionB = new int[partitionB->size()]; 169 | 170 | 171 | /** Initializing arrays to 0 **/ 172 | std::memset(f1ScorePartitionA,0,sizeof(double)*partitionA->size()); 173 | std::memset(f1ScorePrecisionPartitionA,0,sizeof(double)*partitionA->size()); 174 | std::memset(f1ScoreRecallPartitionA,0,sizeof(double)*partitionA->size()); 175 | std::memset(precisionPartitionA,0,sizeof(double)*partitionA->size()); 176 | std::memset(recallPartitionA,0,sizeof(double)*partitionA->size()); 177 | std::memset(coveragePartitionA,0,sizeof(int)*partitionA->size()); 178 | std::memset(bestIdPartitionA,0,sizeof(int)*partitionA->size()); 179 | 180 | std::memset(f1ScorePartitionB,0,sizeof(double)*partitionB->size()); 181 | std::memset(f1ScorePrecisionPartitionB,0,sizeof(double)*partitionB->size()); 182 | std::memset(f1ScoreRecallPartitionB,0,sizeof(double)*partitionB->size()); 183 | std::memset(precisionPartitionB,0,sizeof(double)*partitionB->size()); 184 | std::memset(recallPartitionB,0,sizeof(double)*partitionB->size()); 185 | std::memset(coveragePartitionB,0,sizeof(int)*partitionB->size()); 186 | std::memset(bestIdPartitionB,0,sizeof(int)*partitionB->size()); 187 | 188 | std::cout << "Computing F1Score of partition A" << std::endl; 189 | for( unsigned int i = 0; i < partitionA->size(); i++){ 190 | std::set* community = (*partitionA)[i]; 191 | std::set communities; 192 | 193 | // Selecting candidate communities to compare with. 194 | for( auto it = community->begin(); it != community->end(); it++ ) { 195 | auto it2 = nodePartitionB.find(*it); 196 | if(it2 != nodePartitionB.end()) { 197 | for( auto it3 = (*it2).second.begin(); it3 != (*it2).second.end(); it3++ ) { 198 | communities.insert(*it3); 199 | } 200 | coveragePartitionA[i]++; 201 | } 202 | 203 | } 204 | 205 | for( auto it = communities.begin(); it != communities.end(); it++){ 206 | std::set* community2 = (*partitionB)[*it]; 207 | double precision; 208 | double recall; 209 | double f1Score = F1Score(*community,*community2, &precision, &recall ); 210 | if(f1Score >= f1ScorePartitionA[i]) { 211 | f1ScorePartitionA[i] = f1Score; 212 | f1ScorePrecisionPartitionA[i] = precision; 213 | f1ScoreRecallPartitionA[i] = recall; 214 | bestIdPartitionA[i]=*it; 215 | } 216 | precisionPartitionA[i] = precision > precisionPartitionA[i] ? precision : precisionPartitionA[i]; 217 | recallPartitionA[i] = recall > recallPartitionA[i] ? recall : recallPartitionA[i]; 218 | } 219 | } 220 | 221 | std::cout << "Computing F1Score of partition B" << std::endl; 222 | for( unsigned int i = 0; i < partitionB->size(); i++){ 223 | std::set* community = (*partitionB)[i]; 224 | std::set communities; 225 | 226 | // Selecting candidate communities to compare with. 227 | for( auto it = community->begin(); it != community->end(); it++ ) { 228 | auto it2 = nodePartitionA.find(*it); 229 | if(it2 != nodePartitionA.end()) { 230 | for( auto it3 = (*it2).second.begin(); it3 != (*it2).second.end(); it3++ ) { 231 | communities.insert(*it3); 232 | } 233 | coveragePartitionB[i]++; 234 | } 235 | } 236 | 237 | for( auto it = communities.begin(); it != communities.end(); it++){ 238 | std::set* community2 = (*partitionA)[*it]; 239 | double precision; 240 | double recall; 241 | double f1Score = F1Score(*community,*community2, &precision, &recall ); 242 | if(f1Score >= f1ScorePartitionB[i]) { 243 | f1ScorePartitionB[i] = f1Score; 244 | f1ScorePrecisionPartitionB[i] = precision; 245 | f1ScoreRecallPartitionB[i] = recall; 246 | bestIdPartitionB[i]=*it; 247 | } 248 | precisionPartitionB[i] = precision > precisionPartitionB[i] ? precision : precisionPartitionB[i]; 249 | recallPartitionB[i] = recall > recallPartitionB[i] ? recall : recallPartitionB[i]; 250 | } 251 | } 252 | 253 | std::cout << "Average precision partition A: " << Average(precisionPartitionA, partitionA->size() ) << std::endl; 254 | std::cout << "Average recall partition A: " << Average(recallPartitionA, partitionA->size() ) << std::endl; 255 | 256 | std::cout << "Average precision partition B: " << Average(precisionPartitionB, partitionB->size() ) << std::endl; 257 | std::cout << "Average recall partition B: " << Average(recallPartitionB, partitionB->size() ) << std::endl; 258 | 259 | std::cout << "F1Score: " << (Average( f1ScorePartitionA, partitionA->size() ) + Average( f1ScorePartitionB, partitionB->size() )) / 2 << std::endl; 260 | 261 | std::ofstream outputFileA; 262 | outputFileA.open(std::string(argv[3]).append(".partA").c_str()); 263 | outputFileA << "id|size|precision|recall|f1score|f1scorePrecision|f1scoreRecall|coverage|id2|other_coverage\n"; 264 | for( int i = 0; i < partitionA->size(); ++i) { 265 | std::set* community = (*partitionA)[i]; 266 | outputFileA << i << "|" << community->size() << "|" << precisionPartitionA[i] << "|" << recallPartitionA[i] << "|" << f1ScorePartitionA[i] << "|" << f1ScorePrecisionPartitionA[i] << "|" << f1ScoreRecallPartitionA[i] << "|" << coveragePartitionA[i] / (double) community->size() << "|" << bestIdPartitionA[i] << "|" << coveragePartitionB[bestIdPartitionA[i]] / (double)(*partitionB)[bestIdPartitionA[i]]->size() << "\n"; 267 | } 268 | outputFileA.close(); 269 | 270 | std::ofstream outputFileB; 271 | outputFileB.open(std::string(argv[3]).append(".partB").c_str()); 272 | outputFileB << "id|size|precision|recall|f1score|f1scorePrecision|f1scoreRecall|coverage|id2|other_coverage\n"; 273 | for( int i = 0; i < partitionB->size(); ++i) { 274 | std::set* community = (*partitionB)[i]; 275 | outputFileB << i << "|" << community->size() << "|" << precisionPartitionB[i] << "|" << recallPartitionB[i] << "|" << f1ScorePartitionB[i] << "|" << f1ScorePrecisionPartitionB[i] << "|" << f1ScoreRecallPartitionB[i] << "|" << coveragePartitionB[i] / (double) community->size() << "|" << bestIdPartitionB[i] << "|" << coveragePartitionA[bestIdPartitionB[i]] / (double)(*partitionA)[bestIdPartitionB[i]]->size() << "\n"; 276 | } 277 | outputFileB.close(); 278 | 279 | delete [] f1ScorePartitionA; 280 | delete [] f1ScorePartitionB; 281 | 282 | delete [] f1ScorePrecisionPartitionA; 283 | delete [] f1ScorePrecisionPartitionB; 284 | 285 | delete [] f1ScoreRecallPartitionA; 286 | delete [] f1ScoreRecallPartitionB; 287 | 288 | delete [] precisionPartitionA; 289 | delete [] precisionPartitionB; 290 | 291 | delete [] recallPartitionA; 292 | delete [] recallPartitionB; 293 | 294 | delete [] coveragePartitionA; 295 | delete [] coveragePartitionB; 296 | 297 | delete [] bestIdPartitionA; 298 | delete [] bestIdPartitionB; 299 | 300 | return 0; 301 | } 302 | -------------------------------------------------------------------------------- /tools/communityAnalyzer/source/main.cpp: -------------------------------------------------------------------------------- 1 | 2 | #include 3 | #include 4 | #include 5 | #include 6 | #include 7 | #include 8 | #include 9 | #include 10 | #include 11 | #include 12 | #include 13 | #include 14 | #include 15 | #include 16 | #include 17 | #include 18 | #include 19 | 20 | using namespace scd; 21 | 22 | std::map id_map; 23 | 24 | struct Subgraph 25 | { 26 | unsigned int m_Size; 27 | std::set* m_Adjacencies; 28 | 29 | Subgraph() { 30 | m_Size = 0; 31 | m_Adjacencies = NULL; 32 | } 33 | 34 | ~Subgraph() { 35 | Clear(); 36 | } 37 | void Clear() { 38 | m_Size = 0; 39 | if(m_Adjacencies!=NULL) { 40 | delete [] m_Adjacencies; 41 | m_Adjacencies = NULL; 42 | } 43 | } 44 | }; 45 | 46 | 47 | struct Statistics { 48 | unsigned int m_InternalTriangles; 49 | unsigned int m_TotalTriangles; 50 | unsigned int m_InternalDegree; 51 | unsigned int m_Diameter; // el diametre de la comunitat. 52 | unsigned int m_Bridges; // nombre d'arestes pont dins la comunitat. 53 | unsigned int m_Size; // la mida de la comunitat. 54 | unsigned int m_ConnectedComponents; 55 | unsigned int m_TriangularComponents; 56 | double m_Wcc; 57 | double m_TriangleRatio; // trianglesIn / trianglesTotals. 58 | double m_TriangleDensity; // trianglesIn / possibles trianglesIn. 59 | double m_EdgeDensity; // per cada node, percentatge de nodes de la comunitat que son veins seus. 60 | double m_Conductance; // conductança de la comunitat. 61 | double m_Tpr; // triangle participation ratio. 62 | double m_BridgeRatio; // nombre d'arestes pont dins la comunitat. 63 | double m_CC; 64 | double m_MinimumOverlapp; 65 | double m_MaximumOverlapp; 66 | 67 | Statistics() { 68 | Clear(); 69 | } 70 | 71 | void Clear() { 72 | m_InternalTriangles = 0; 73 | m_TotalTriangles = 0; 74 | m_InternalDegree = 0; 75 | m_Wcc = 0.0; 76 | m_TriangleRatio = 0.0; 77 | m_TriangleDensity = 0.0; 78 | m_TriangularComponents = 0; 79 | m_ConnectedComponents = 0; 80 | m_EdgeDensity = 0.0; 81 | m_Conductance = 0.0; 82 | m_Tpr = 0.0; 83 | m_Diameter = 0; 84 | m_Bridges = 0; 85 | m_BridgeRatio = 0.0; 86 | m_CC = 0.0; 87 | m_Size = 0; 88 | m_MinimumOverlapp = 0.0; 89 | m_MaximumOverlapp = 0.0; 90 | } 91 | }; 92 | 93 | struct Community { 94 | std::set m_Nodes; 95 | Statistics m_Statistics; 96 | Subgraph m_Subgraph; 97 | int m_Id; 98 | 99 | void Clear() { 100 | m_Nodes.clear(); 101 | m_Statistics.Clear(); 102 | m_Subgraph.Clear(); 103 | m_Id = -1; 104 | } 105 | }; 106 | 107 | std::vector communities; 108 | 109 | bool ParseCommunity(std::ifstream& file, const scd::CGraph& graph, Community& community, int id) { 110 | community.Clear(); 111 | std::string line; 112 | if(std::getline(file,line)) { 113 | community.m_Id = id; 114 | std::string delimiter(" "); 115 | size_t nextToken = 0; 116 | std::istringstream stream(line); 117 | unsigned int node; 118 | while( stream >> node ) { 119 | community.m_Nodes.insert(id_map.find(node)->second); 120 | } 121 | return true; 122 | } 123 | return false; 124 | } 125 | 126 | void ComputeSubgraph( const scd::CGraph& graph, Community& community ) { 127 | community.m_Subgraph.m_Size = community.m_Nodes.size(); 128 | community.m_Subgraph.m_Adjacencies = new std::set[community.m_Nodes.size()]; 129 | std::map mapa; 130 | unsigned int k = 0; 131 | for(std::set::iterator it = community.m_Nodes.begin();it!=community.m_Nodes.end();it++,k++) { 132 | mapa.insert(std::pair(*it,k)); 133 | } 134 | k = 0; 135 | for(std::set::iterator it = community.m_Nodes.begin();it!=community.m_Nodes.end();it++,k++) { 136 | unsigned int node = *it; 137 | unsigned int degree = graph.GetDegree(node); 138 | const uint32_t * adjacencies = graph.GetNeighbors(node); 139 | for(unsigned int j = 0; j < degree;j++) { 140 | uint32_t neighbor = adjacencies[j]; 141 | if(community.m_Nodes.find(neighbor) != community.m_Nodes.end()) { 142 | std::map::iterator it2 = mapa.find(neighbor); 143 | if(it2==mapa.end()) { 144 | std::cout << "ERROR contructing subgraph " << std::endl; 145 | } 146 | community.m_Subgraph.m_Adjacencies[k].insert((*it2).second); 147 | } 148 | } 149 | } 150 | } 151 | 152 | void ComputeConnectedComponents(const CGraph& graph, Community& community) { 153 | bool * visited = new bool[community.m_Subgraph.m_Size]; 154 | for(unsigned int j = 0; j < community.m_Subgraph.m_Size;++j) { 155 | visited[j] = false; 156 | } 157 | unsigned int connectedComponents = 0; 158 | for(unsigned int j = 0; j < community.m_Subgraph.m_Size;++j) { 159 | std::list bfsList; 160 | if(!visited[j]) { 161 | connectedComponents++; 162 | visited[j] = true; 163 | bfsList.push_back(j); 164 | while(!bfsList.empty()) { 165 | unsigned int nextNode = bfsList.front(); 166 | bfsList.pop_front(); 167 | for(std::set::iterator it = community.m_Subgraph.m_Adjacencies[nextNode].begin();it!=community.m_Subgraph.m_Adjacencies[nextNode].end();++it) { 168 | unsigned int nextNeighbor = *it; 169 | if(!visited[nextNeighbor]) { 170 | visited[nextNeighbor] = true; 171 | bfsList.push_back(nextNeighbor); 172 | } 173 | } 174 | } 175 | } 176 | 177 | } 178 | delete [] visited; 179 | community.m_Statistics.m_ConnectedComponents = connectedComponents; 180 | } 181 | 182 | 183 | 184 | void ComputeTriangles( const CGraph& graph, Community& community, 185 | const unsigned int nodeId1, 186 | const unsigned int nodeId2, 187 | unsigned int& totalTriangles, 188 | unsigned int& internalTriangles) { 189 | bool trianglePossible = true; 190 | if(community.m_Nodes.find(nodeId1)==community.m_Nodes.end() || community.m_Nodes.find(nodeId2)==community.m_Nodes.end()) { 191 | trianglePossible=false; 192 | } 193 | totalTriangles = 0; 194 | internalTriangles = 0; 195 | unsigned int i = 0; 196 | unsigned int j = 0; 197 | const uint32_t * adjacencies1 = graph.GetNeighbors(nodeId1); 198 | const scd::uint32_t * adjacencies2 = graph.GetNeighbors(nodeId2); 199 | while(i < graph.GetDegree(nodeId1) && j < graph.GetDegree(nodeId2)) { 200 | uint32_t node1 = adjacencies1[i]; 201 | uint32_t node2 = adjacencies2[j]; 202 | if( node1 == node2 ) { 203 | totalTriangles++; 204 | i++; 205 | j++; 206 | if(community.m_Nodes.find(node1)!=community.m_Nodes.end() && trianglePossible) { 207 | internalTriangles++; 208 | } 209 | } 210 | else { 211 | if(adjacencies1[i] < adjacencies2[j]) { 212 | i++; 213 | } 214 | else { 215 | j++; 216 | } 217 | } 218 | } 219 | } 220 | 221 | void ComputeTriangles( const CGraph& graph, Community& community) { 222 | std::set::iterator it = community.m_Nodes.begin(); 223 | for(;it!=community.m_Nodes.end();++it) { 224 | unsigned int node = *it; 225 | unsigned int degree=graph.GetDegree(node); 226 | const uint32_t * adjacencies = graph.GetNeighbors(node); 227 | for(unsigned int j = 0; j < degree;++j) { 228 | unsigned int totalTrianglesAux; 229 | unsigned int internalTrianglesAux; 230 | unsigned int neighbor = adjacencies[j]; 231 | ComputeTriangles(graph,community,node,neighbor,totalTrianglesAux,internalTrianglesAux); 232 | community.m_Statistics.m_TotalTriangles+=totalTrianglesAux; 233 | community.m_Statistics.m_InternalTriangles+=internalTrianglesAux; 234 | } 235 | } 236 | } 237 | 238 | 239 | void ComputeTriangleDensity( const CGraph& graph, Community& community ) { 240 | ComputeTriangles(graph, community); 241 | community.m_Statistics.m_TriangleDensity = (double)community.m_Statistics.m_InternalTriangles/(double)(community.m_Nodes.size()*(community.m_Nodes.size()-1)*(community.m_Nodes.size()-2)); 242 | } 243 | 244 | void ComputeTriangleRatio( const CGraph& graph, Community& community ) { 245 | community.m_Statistics.m_TriangleRatio = 0; 246 | if(community.m_Statistics.m_TotalTriangles>0){ 247 | community.m_Statistics.m_TriangleRatio = (double)community.m_Statistics.m_InternalTriangles/(double)community.m_Statistics.m_TotalTriangles; 248 | } 249 | } 250 | 251 | void ComputeCC( const CGraph& graph, Community& community ) { 252 | std::set::iterator it = community.m_Nodes.begin(); 253 | unsigned int wEdges = 0;; 254 | for(;it!=community.m_Nodes.end();++it) { 255 | unsigned int node = *it; 256 | unsigned int degree=graph.GetDegree(node); 257 | unsigned int internalDegree = 0; 258 | const uint32_t * adjacencies = graph.GetNeighbors(node); 259 | for(unsigned int j = 0; j < degree;++j) { 260 | unsigned int neighbor = adjacencies[j]; 261 | if(community.m_Nodes.find(neighbor)!=community.m_Nodes.end()) ++internalDegree; 262 | } 263 | wEdges += internalDegree*(internalDegree-1); 264 | } 265 | community.m_Statistics.m_CC = community.m_Statistics.m_InternalTriangles / (double)(wEdges); 266 | } 267 | 268 | 269 | void ComputeEdgeDensity( const CGraph& graph, Community& community ) { 270 | std::set::iterator it = community.m_Nodes.begin(); 271 | for(;it!=community.m_Nodes.end();++it) { 272 | unsigned int node = *it; 273 | unsigned int degree=graph.GetDegree(node); 274 | const uint32_t * adjacencies = graph.GetNeighbors(node); 275 | for(unsigned int j = 0; j < degree;++j) { 276 | unsigned int neighbor = adjacencies[j]; 277 | if(community.m_Nodes.find(neighbor)!=community.m_Nodes.end()) community.m_Statistics.m_InternalDegree++; 278 | } 279 | } 280 | community.m_Statistics.m_EdgeDensity = community.m_Statistics.m_InternalDegree / (double)(community.m_Nodes.size()*(community.m_Nodes.size() - 1)); 281 | } 282 | 283 | void ComputeBridges( const CGraph& graph, Community& community ) { 284 | ComputeSubgraph(graph,community); 285 | std::vector visited(community.m_Subgraph.m_Size,false); 286 | std::list dfsList; 287 | std::list stack; 288 | std::list booleanStack; 289 | unsigned int noParent = 0xffffffff; 290 | unsigned int numEdges = 0; 291 | for(unsigned int j = 0; j < community.m_Subgraph.m_Size;j++) { 292 | if( !visited[j] ) { 293 | unsigned int counter = 0; 294 | std::vector labels(community.m_Subgraph.m_Size); 295 | std::vector lowestLabels(community.m_Subgraph.m_Size); 296 | std::vector parents(community.m_Subgraph.m_Size,noParent); 297 | dfsList.push_back(j); 298 | while(!dfsList.empty()){ 299 | unsigned int nextNode = dfsList.back(); 300 | dfsList.pop_back(); 301 | if( !visited[nextNode] ) { 302 | stack.push_back(nextNode); 303 | booleanStack.push_back(true); 304 | visited[nextNode] = true; 305 | labels[nextNode] = counter; 306 | lowestLabels[nextNode] = counter; 307 | counter++; 308 | for(std::set::iterator it = community.m_Subgraph.m_Adjacencies[nextNode].begin(); 309 | it!=community.m_Subgraph.m_Adjacencies[nextNode].end(); 310 | ++it) { 311 | dfsList.push_back(*it); 312 | if( !visited[*it] ){ 313 | parents[*it] = nextNode; 314 | } 315 | } 316 | } else if( parents[nextNode] != 0xffffffff && nextNode != parents[parents[nextNode]]) { 317 | stack.push_back(nextNode); 318 | booleanStack.push_back(false); 319 | } 320 | } 321 | while(!stack.empty()) { 322 | unsigned int nextNode = stack.back(); 323 | stack.pop_back(); 324 | bool action = booleanStack.back(); 325 | booleanStack.pop_back(); 326 | if( action ) { 327 | if( parents[nextNode] != 0xffffffff ) { 328 | lowestLabels[parents[nextNode]] = std::min(lowestLabels[parents[nextNode]], lowestLabels[nextNode]); 329 | if(lowestLabels[nextNode] > labels[parents[nextNode]]) { 330 | community.m_Statistics.m_Bridges++; 331 | } 332 | } 333 | } else { 334 | for(std::set::iterator it = community.m_Subgraph.m_Adjacencies[nextNode].begin(); 335 | it!=community.m_Subgraph.m_Adjacencies[nextNode].end(); 336 | ++it) { 337 | if( parents[nextNode] != 0xffffffff && parents[nextNode] != *it) { 338 | lowestLabels[nextNode] = std::min(lowestLabels[nextNode],labels[*it]); 339 | } 340 | } 341 | } 342 | } 343 | } 344 | numEdges+=community.m_Subgraph.m_Adjacencies[j].size(); 345 | } 346 | } 347 | 348 | 349 | void ComputeBridgeRatio( const CGraph& graph, Community& community ) { 350 | community.m_Statistics.m_BridgeRatio = (community.m_Statistics.m_Bridges*2) / (double)community.m_Statistics.m_InternalDegree; 351 | } 352 | 353 | void ComputeTPR( const CGraph& graph, Community& community ) { 354 | unsigned int numNodesWithTriangle = 0; 355 | for(std::set::iterator it = community.m_Nodes.begin();it!=community.m_Nodes.end();it++) { 356 | unsigned int node = *it; 357 | unsigned int degree = graph.GetDegree(node); 358 | unsigned int outDegree = 0; 359 | unsigned int numTriangles = 0; 360 | const uint32_t * adjacencies = graph.GetNeighbors(node); 361 | for(unsigned int j = 0; j < degree; j++) { 362 | if(community.m_Nodes.find(adjacencies[j]) != community.m_Nodes.end()) { 363 | unsigned int totalTriangles; 364 | unsigned int internalTriangles; 365 | ComputeTriangles(graph,community,node, adjacencies[j], totalTriangles, internalTriangles ); 366 | if( internalTriangles > 0 ) { 367 | numTriangles++; 368 | } 369 | } 370 | } 371 | if( numTriangles > 0 ) { 372 | numNodesWithTriangle++; 373 | } 374 | } 375 | community.m_Statistics.m_Tpr = numNodesWithTriangle/ (double)community.m_Nodes.size(); 376 | } 377 | 378 | static unsigned int SelectMax( const std::vector& distances ) { 379 | unsigned int size = distances.size(); 380 | assert(size > 0); 381 | int max = distances[0]; 382 | unsigned int maxNode = 0; 383 | for( unsigned int i = 1; i < size; ++i ) { 384 | if( distances[i] > max ) { 385 | maxNode = i; 386 | max = distances[i]; 387 | } 388 | } 389 | return maxNode; 390 | } 391 | 392 | void ComputeDiameter(const CGraph& graph, Community& community) { 393 | std::vector distances(community.m_Subgraph.m_Size, 0xffffffff); 394 | int maxExcentricity = 0; 395 | unsigned int numIterations = sqrt(community.m_Subgraph.m_Size); 396 | for( unsigned int k = 0; k < numIterations; k++ ) { 397 | std::vector visited(community.m_Subgraph.m_Size, false); 398 | std::list bfsList; 399 | unsigned int node; 400 | if( k == 0 ) { node = rand() % community.m_Subgraph.m_Size; } 401 | else { node = SelectMax(distances); } 402 | bfsList.push_back(node); 403 | std::list levelList; 404 | levelList.push_back(-1); 405 | while(!bfsList.empty()) { 406 | unsigned int nextNode = bfsList.front(); 407 | bfsList.pop_front(); 408 | int level = levelList.front() + 1; 409 | levelList.pop_front(); 410 | distances[nextNode] = std::min( distances[nextNode], level ); 411 | maxExcentricity = std::max(maxExcentricity, level); 412 | for(std::set::iterator it = community.m_Subgraph.m_Adjacencies[nextNode].begin();it!=community.m_Subgraph.m_Adjacencies[nextNode].end();it++) { 413 | unsigned int nextNeighbor = *it; 414 | if(!visited[nextNeighbor]) { 415 | visited[nextNeighbor] = true; 416 | bfsList.push_back(nextNeighbor); 417 | levelList.push_back(level); 418 | } 419 | } 420 | } 421 | } 422 | community.m_Statistics.m_Diameter = static_cast(maxExcentricity); 423 | } 424 | 425 | void ComputeConductance(const CGraph& graph, Community& community) { 426 | unsigned int outDegree = 0; 427 | unsigned int totalDegree = 0; 428 | std::set::iterator it = community.m_Nodes.begin(); 429 | for(;it!=community.m_Nodes.end();it++) { 430 | unsigned int node = *it; 431 | unsigned int degree = graph.GetDegree(node); 432 | const uint32_t * adjacencies = graph.GetNeighbors(node); 433 | totalDegree += degree; 434 | for(unsigned int j = 0; j < degree;j++) { 435 | unsigned int neighbor = adjacencies[j]; 436 | if(community.m_Nodes.find(neighbor) == community.m_Nodes.end()) { 437 | outDegree++; 438 | } 439 | } 440 | } 441 | community.m_Statistics.m_Conductance = 0; 442 | if(totalDegree>0) { 443 | community.m_Statistics.m_Conductance = outDegree/(double)totalDegree; 444 | } 445 | } 446 | 447 | void RemoveNoTriangles( const CGraph& graph, Community& community, Community& outCommunity ) { 448 | unsigned int numNodesWithTriangle = 0; 449 | for(std::set::iterator it = community.m_Nodes.begin();it!=community.m_Nodes.end();it++) { 450 | unsigned int node = *it; 451 | unsigned int degree = graph.GetDegree(node); 452 | unsigned int outDegree = 0; 453 | unsigned int numTriangles = 0; 454 | const uint32_t * adjacencies = graph.GetNeighbors(node); 455 | for(unsigned int j = 0; j < degree; j++) { 456 | if(community.m_Nodes.find(adjacencies[j]) != community.m_Nodes.end()) { 457 | unsigned int totalTriangles; 458 | unsigned int internalTriangles; 459 | ComputeTriangles(graph,community,node, adjacencies[j], totalTriangles, internalTriangles ); 460 | if( internalTriangles > 0 ) { 461 | numTriangles++; 462 | } 463 | } 464 | } 465 | if( numTriangles > 0 ) { 466 | outCommunity.m_Nodes.insert(node); 467 | } 468 | } 469 | } 470 | 471 | void ComputeTriangularComponents( const CGraph& graph, Community& community) { 472 | Community auxCommunity; 473 | RemoveNoTriangles(graph,community,auxCommunity); 474 | ComputeSubgraph(graph,auxCommunity); 475 | ComputeConnectedComponents(graph,auxCommunity); 476 | community.m_Statistics.m_TriangularComponents = auxCommunity.m_Statistics.m_ConnectedComponents; 477 | } 478 | 479 | 480 | void ComputeOverlapp( const CGraph& graph, Community& community, std::map >& nodeCommunities ) { 481 | std::set alreadyChecked; 482 | int minOverlapp = community.m_Nodes.size(); 483 | int maxOverlapp = 0; 484 | for( std::set::iterator it = community.m_Nodes.begin(); it != community.m_Nodes.end(); ++it ) { 485 | const std::vector< int >& coms = nodeCommunities[*it]; 486 | for( std::vector< int >::const_iterator it2 = coms.begin(); it2 != coms.end(); ++it2 ) { 487 | if( (*it2) != community.m_Id && alreadyChecked.find(*it2) == alreadyChecked.end() ) { 488 | std::set aux; 489 | const std::set< unsigned int >& other = communities[*it2].m_Nodes; 490 | //std::cout << other.size() << std::endl; 491 | // std::cout << community.m_Nodes.size() << std::endl; 492 | std::set_intersection( other.begin(), other.end(), community.m_Nodes.begin(), community.m_Nodes.end(), std::inserter(aux,aux.begin())); 493 | minOverlapp = minOverlapp > aux.size() ? aux.size() : minOverlapp; 494 | maxOverlapp = maxOverlapp < aux.size() ? aux.size() : maxOverlapp; 495 | alreadyChecked.insert( *it2 ); 496 | } 497 | } 498 | } 499 | community.m_Statistics.m_MinimumOverlapp = minOverlapp / (double)community.m_Nodes.size(); 500 | community.m_Statistics.m_MaximumOverlapp = maxOverlapp / (double)community.m_Nodes.size(); 501 | } 502 | 503 | void ComputeStatistics( const CGraph& graph, Community& community ) { 504 | ComputeTriangleDensity(graph,community); 505 | ComputeTriangleRatio(graph,community); 506 | ComputeEdgeDensity(graph,community); 507 | ComputeCC(graph,community); 508 | ComputeBridges(graph,community); 509 | ComputeBridgeRatio(graph,community); 510 | ComputeTPR(graph,community); 511 | ComputeConductance(graph,community); 512 | ComputeDiameter(graph,community); 513 | ComputeTriangularComponents(graph,community); 514 | } 515 | 516 | #define CHECK_ARGUMENT_STRING(index, option,variable,setVariable) \ 517 | if( strcmp(argv[index],option) == 0 ){ \ 518 | setVariable = true; \ 519 | if( (index+1) < argc ) { \ 520 | variable = argv[index+1]; \ 521 | } else { \ 522 | std::cout << "Invalid options" << std::endl; \ 523 | return 1;\ 524 | }\ 525 | } 526 | 527 | #define CHECK_ARGUMENT_FLOAT(index, option,variable,setVariable) \ 528 | if( strcmp(argv[index],option) == 0 ){ \ 529 | setVariable = true; \ 530 | if( (index+1) < argc ) { \ 531 | variable = atof(argv[index+1]); \ 532 | } else { \ 533 | std::cout << "Invalid options" << std::endl; \ 534 | return 1;\ 535 | }\ 536 | } 537 | 538 | #define CHECK_ARGUMENT_INT(index, option,variable,setVariable) \ 539 | if( strcmp(argv[index],option) == 0 ){ \ 540 | setVariable = true; \ 541 | if( (index+1) < argc ) { \ 542 | variable = atoi(argv[index+1]); \ 543 | } else { \ 544 | std::cout << "Invalid options" << std::endl; \ 545 | return 1;\ 546 | }\ 547 | } 548 | 549 | #define CHECK_FLAG(index, option,setVariable) \ 550 | if( strcmp(argv[index],option) == 0 ){ \ 551 | setVariable = true; \ 552 | } 553 | 554 | void PrintUsage() { 555 | std::cout << "communityAnalyzer -f -p -o " << std::endl; 556 | } 557 | 558 | void PrintCommunity( std::ofstream& file, const Community& community ) { 559 | file << community.m_Id << "\t" 560 | << community.m_Statistics.m_TriangleDensity << "\t" 561 | << community.m_Statistics.m_CC << "\t" 562 | << community.m_Statistics.m_EdgeDensity << "\t" 563 | << community.m_Statistics.m_TriangleRatio << "\t" 564 | << community.m_Nodes.size() << "\t" 565 | << community.m_Statistics.m_Diameter << "\t" 566 | << community.m_Statistics.m_Tpr << "\t" 567 | << community.m_Statistics.m_BridgeRatio << "\t" 568 | << community.m_Statistics.m_Conductance << "\t" 569 | << community.m_Statistics.m_TriangularComponents << "\t" 570 | << community.m_Statistics.m_MinimumOverlapp << "\t" 571 | << community.m_Statistics.m_MaximumOverlapp << "\t" 572 | << std::endl; 573 | } 574 | 575 | void PrintFileHeader( std::ofstream& file) { 576 | file << "Id\t" 577 | << "TriangleDensity\t" 578 | << "CC\t" 579 | << "EdgeDensity\t" 580 | << "TriangleRatio\t" 581 | << "Size\t" 582 | << "Diameter\t" 583 | << "Tpr\t" 584 | << "BridgeRatio\t" 585 | << "Conductance\t" 586 | << "TriangularComponents\t" 587 | << "MinimumOverlapp\t" 588 | << "MaximumOverlapp\t" 589 | << std::endl; 590 | } 591 | 592 | int main(int argc, char ** argv) { 593 | char* graphFileName = NULL; 594 | char* partitionFileName = NULL; 595 | char* outputFileName = NULL; 596 | bool graphFileNameSet = false; 597 | bool partitionFileNameSet = false; 598 | bool outputFileNameSet = false; 599 | 600 | for( unsigned int i = 1; i < argc; i++) { 601 | CHECK_ARGUMENT_STRING(i, "-f", graphFileName, graphFileNameSet) 602 | CHECK_ARGUMENT_STRING(i, "-o", outputFileName, outputFileNameSet) 603 | CHECK_ARGUMENT_STRING(i, "-p", partitionFileName, partitionFileNameSet) 604 | } 605 | 606 | if( !graphFileNameSet || !outputFileNameSet || !partitionFileNameSet ) { 607 | PrintUsage(); 608 | exit(1); 609 | } 610 | 611 | CGraph graph; 612 | if(graph.Load(graphFileName,1)!=0) { 613 | std::cout << "ERROR: Unable to load graph" << std::endl; 614 | } 615 | 616 | const uint32_t* map_array = graph.GetMap(); 617 | for( uint32_t i = 0; i < graph.GetNumNodes(); ++i) { 618 | id_map[map_array[i]] = i; 619 | } 620 | 621 | std::ofstream outputFile; 622 | outputFile.open(outputFileName); 623 | PrintFileHeader(outputFile); 624 | std::ifstream partitionFile; 625 | partitionFile.open(partitionFileName); 626 | std::map< int, std::vector > nodeCommunityIndex; 627 | int numParsed = 0; 628 | Community community; 629 | while(ParseCommunity(partitionFile,graph,community, numParsed)) { 630 | communities.push_back(community); 631 | for( std::set< unsigned int >::iterator it = community.m_Nodes.begin(); it != community.m_Nodes.end(); ++it ) { 632 | nodeCommunityIndex[*it].push_back(community.m_Id); 633 | } 634 | ++numParsed; 635 | if( numParsed % 1000 == 0 ) { 636 | std::cout << "Parsed " << numParsed << " communities" << std::endl; 637 | } 638 | } 639 | 640 | int numComputed = 0; 641 | for( int i = 0; i < communities.size(); ++i ) { 642 | if(communities[i].m_Nodes.size() > 2) { 643 | ComputeStatistics(graph,communities[i]); 644 | ComputeOverlapp( graph, communities[i], nodeCommunityIndex ); 645 | PrintCommunity(outputFile,communities[i]); 646 | } 647 | ++numComputed; 648 | if( numComputed % 1000 == 0 ) { 649 | std::cout << "Computed statistics of " << numComputed << " communities" << std::endl; 650 | } 651 | } 652 | partitionFile.close(); 653 | outputFile.close(); 654 | return 0; 655 | } 656 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | GNU GENERAL PUBLIC LICENSE 2 | Version 3, 29 June 2007 3 | 4 | Copyright (C) 2007 Free Software Foundation, Inc. 5 | Everyone is permitted to copy and distribute verbatim copies 6 | of this license document, but changing it is not allowed. 7 | 8 | Preamble 9 | 10 | The GNU General Public License is a free, copyleft license for 11 | software and other kinds of works. 12 | 13 | The licenses for most software and other practical works are designed 14 | to take away your freedom to share and change the works. By contrast, 15 | the GNU General Public License is intended to guarantee your freedom to 16 | share and change all versions of a program--to make sure it remains free 17 | software for all its users. We, the Free Software Foundation, use the 18 | GNU General Public License for most of our software; it applies also to 19 | any other work released this way by its authors. You can apply it to 20 | your programs, too. 21 | 22 | When we speak of free software, we are referring to freedom, not 23 | price. Our General Public Licenses are designed to make sure that you 24 | have the freedom to distribute copies of free software (and charge for 25 | them if you wish), that you receive source code or can get it if you 26 | want it, that you can change the software or use pieces of it in new 27 | free programs, and that you know you can do these things. 28 | 29 | To protect your rights, we need to prevent others from denying you 30 | these rights or asking you to surrender the rights. Therefore, you have 31 | certain responsibilities if you distribute copies of the software, or if 32 | you modify it: responsibilities to respect the freedom of others. 33 | 34 | For example, if you distribute copies of such a program, whether 35 | gratis or for a fee, you must pass on to the recipients the same 36 | freedoms that you received. You must make sure that they, too, receive 37 | or can get the source code. And you must show them these terms so they 38 | know their rights. 39 | 40 | Developers that use the GNU GPL protect your rights with two steps: 41 | (1) assert copyright on the software, and (2) offer you this License 42 | giving you legal permission to copy, distribute and/or modify it. 43 | 44 | For the developers' and authors' protection, the GPL clearly explains 45 | that there is no warranty for this free software. For both users' and 46 | authors' sake, the GPL requires that modified versions be marked as 47 | changed, so that their problems will not be attributed erroneously to 48 | authors of previous versions. 49 | 50 | Some devices are designed to deny users access to install or run 51 | modified versions of the software inside them, although the manufacturer 52 | can do so. This is fundamentally incompatible with the aim of 53 | protecting users' freedom to change the software. The systematic 54 | pattern of such abuse occurs in the area of products for individuals to 55 | use, which is precisely where it is most unacceptable. Therefore, we 56 | have designed this version of the GPL to prohibit the practice for those 57 | products. If such problems arise substantially in other domains, we 58 | stand ready to extend this provision to those domains in future versions 59 | of the GPL, as needed to protect the freedom of users. 60 | 61 | Finally, every program is threatened constantly by software patents. 62 | States should not allow patents to restrict development and use of 63 | software on general-purpose computers, but in those that do, we wish to 64 | avoid the special danger that patents applied to a free program could 65 | make it effectively proprietary. To prevent this, the GPL assures that 66 | patents cannot be used to render the program non-free. 67 | 68 | The precise terms and conditions for copying, distribution and 69 | modification follow. 70 | 71 | TERMS AND CONDITIONS 72 | 73 | 0. Definitions. 74 | 75 | "This License" refers to version 3 of the GNU General Public License. 76 | 77 | "Copyright" also means copyright-like laws that apply to other kinds of 78 | works, such as semiconductor masks. 79 | 80 | "The Program" refers to any copyrightable work licensed under this 81 | License. Each licensee is addressed as "you". "Licensees" and 82 | "recipients" may be individuals or organizations. 83 | 84 | To "modify" a work means to copy from or adapt all or part of the work 85 | in a fashion requiring copyright permission, other than the making of an 86 | exact copy. The resulting work is called a "modified version" of the 87 | earlier work or a work "based on" the earlier work. 88 | 89 | A "covered work" means either the unmodified Program or a work based 90 | on the Program. 91 | 92 | To "propagate" a work means to do anything with it that, without 93 | permission, would make you directly or secondarily liable for 94 | infringement under applicable copyright law, except executing it on a 95 | computer or modifying a private copy. Propagation includes copying, 96 | distribution (with or without modification), making available to the 97 | public, and in some countries other activities as well. 98 | 99 | To "convey" a work means any kind of propagation that enables other 100 | parties to make or receive copies. Mere interaction with a user through 101 | a computer network, with no transfer of a copy, is not conveying. 102 | 103 | An interactive user interface displays "Appropriate Legal Notices" 104 | to the extent that it includes a convenient and prominently visible 105 | feature that (1) displays an appropriate copyright notice, and (2) 106 | tells the user that there is no warranty for the work (except to the 107 | extent that warranties are provided), that licensees may convey the 108 | work under this License, and how to view a copy of this License. If 109 | the interface presents a list of user commands or options, such as a 110 | menu, a prominent item in the list meets this criterion. 111 | 112 | 1. Source Code. 113 | 114 | The "source code" for a work means the preferred form of the work 115 | for making modifications to it. "Object code" means any non-source 116 | form of a work. 117 | 118 | A "Standard Interface" means an interface that either is an official 119 | standard defined by a recognized standards body, or, in the case of 120 | interfaces specified for a particular programming language, one that 121 | is widely used among developers working in that language. 122 | 123 | The "System Libraries" of an executable work include anything, other 124 | than the work as a whole, that (a) is included in the normal form of 125 | packaging a Major Component, but which is not part of that Major 126 | Component, and (b) serves only to enable use of the work with that 127 | Major Component, or to implement a Standard Interface for which an 128 | implementation is available to the public in source code form. A 129 | "Major Component", in this context, means a major essential component 130 | (kernel, window system, and so on) of the specific operating system 131 | (if any) on which the executable work runs, or a compiler used to 132 | produce the work, or an object code interpreter used to run it. 133 | 134 | The "Corresponding Source" for a work in object code form means all 135 | the source code needed to generate, install, and (for an executable 136 | work) run the object code and to modify the work, including scripts to 137 | control those activities. However, it does not include the work's 138 | System Libraries, or general-purpose tools or generally available free 139 | programs which are used unmodified in performing those activities but 140 | which are not part of the work. For example, Corresponding Source 141 | includes interface definition files associated with source files for 142 | the work, and the source code for shared libraries and dynamically 143 | linked subprograms that the work is specifically designed to require, 144 | such as by intimate data communication or control flow between those 145 | subprograms and other parts of the work. 146 | 147 | The Corresponding Source need not include anything that users 148 | can regenerate automatically from other parts of the Corresponding 149 | Source. 150 | 151 | The Corresponding Source for a work in source code form is that 152 | same work. 153 | 154 | 2. Basic Permissions. 155 | 156 | All rights granted under this License are granted for the term of 157 | copyright on the Program, and are irrevocable provided the stated 158 | conditions are met. This License explicitly affirms your unlimited 159 | permission to run the unmodified Program. The output from running a 160 | covered work is covered by this License only if the output, given its 161 | content, constitutes a covered work. This License acknowledges your 162 | rights of fair use or other equivalent, as provided by copyright law. 163 | 164 | You may make, run and propagate covered works that you do not 165 | convey, without conditions so long as your license otherwise remains 166 | in force. You may convey covered works to others for the sole purpose 167 | of having them make modifications exclusively for you, or provide you 168 | with facilities for running those works, provided that you comply with 169 | the terms of this License in conveying all material for which you do 170 | not control copyright. Those thus making or running the covered works 171 | for you must do so exclusively on your behalf, under your direction 172 | and control, on terms that prohibit them from making any copies of 173 | your copyrighted material outside their relationship with you. 174 | 175 | Conveying under any other circumstances is permitted solely under 176 | the conditions stated below. Sublicensing is not allowed; section 10 177 | makes it unnecessary. 178 | 179 | 3. Protecting Users' Legal Rights From Anti-Circumvention Law. 180 | 181 | No covered work shall be deemed part of an effective technological 182 | measure under any applicable law fulfilling obligations under article 183 | 11 of the WIPO copyright treaty adopted on 20 December 1996, or 184 | similar laws prohibiting or restricting circumvention of such 185 | measures. 186 | 187 | When you convey a covered work, you waive any legal power to forbid 188 | circumvention of technological measures to the extent such circumvention 189 | is effected by exercising rights under this License with respect to 190 | the covered work, and you disclaim any intention to limit operation or 191 | modification of the work as a means of enforcing, against the work's 192 | users, your or third parties' legal rights to forbid circumvention of 193 | technological measures. 194 | 195 | 4. Conveying Verbatim Copies. 196 | 197 | You may convey verbatim copies of the Program's source code as you 198 | receive it, in any medium, provided that you conspicuously and 199 | appropriately publish on each copy an appropriate copyright notice; 200 | keep intact all notices stating that this License and any 201 | non-permissive terms added in accord with section 7 apply to the code; 202 | keep intact all notices of the absence of any warranty; and give all 203 | recipients a copy of this License along with the Program. 204 | 205 | You may charge any price or no price for each copy that you convey, 206 | and you may offer support or warranty protection for a fee. 207 | 208 | 5. Conveying Modified Source Versions. 209 | 210 | You may convey a work based on the Program, or the modifications to 211 | produce it from the Program, in the form of source code under the 212 | terms of section 4, provided that you also meet all of these conditions: 213 | 214 | a) The work must carry prominent notices stating that you modified 215 | it, and giving a relevant date. 216 | 217 | b) The work must carry prominent notices stating that it is 218 | released under this License and any conditions added under section 219 | 7. This requirement modifies the requirement in section 4 to 220 | "keep intact all notices". 221 | 222 | c) You must license the entire work, as a whole, under this 223 | License to anyone who comes into possession of a copy. This 224 | License will therefore apply, along with any applicable section 7 225 | additional terms, to the whole of the work, and all its parts, 226 | regardless of how they are packaged. This License gives no 227 | permission to license the work in any other way, but it does not 228 | invalidate such permission if you have separately received it. 229 | 230 | d) If the work has interactive user interfaces, each must display 231 | Appropriate Legal Notices; however, if the Program has interactive 232 | interfaces that do not display Appropriate Legal Notices, your 233 | work need not make them do so. 234 | 235 | A compilation of a covered work with other separate and independent 236 | works, which are not by their nature extensions of the covered work, 237 | and which are not combined with it such as to form a larger program, 238 | in or on a volume of a storage or distribution medium, is called an 239 | "aggregate" if the compilation and its resulting copyright are not 240 | used to limit the access or legal rights of the compilation's users 241 | beyond what the individual works permit. Inclusion of a covered work 242 | in an aggregate does not cause this License to apply to the other 243 | parts of the aggregate. 244 | 245 | 6. Conveying Non-Source Forms. 246 | 247 | You may convey a covered work in object code form under the terms 248 | of sections 4 and 5, provided that you also convey the 249 | machine-readable Corresponding Source under the terms of this License, 250 | in one of these ways: 251 | 252 | a) Convey the object code in, or embodied in, a physical product 253 | (including a physical distribution medium), accompanied by the 254 | Corresponding Source fixed on a durable physical medium 255 | customarily used for software interchange. 256 | 257 | b) Convey the object code in, or embodied in, a physical product 258 | (including a physical distribution medium), accompanied by a 259 | written offer, valid for at least three years and valid for as 260 | long as you offer spare parts or customer support for that product 261 | model, to give anyone who possesses the object code either (1) a 262 | copy of the Corresponding Source for all the software in the 263 | product that is covered by this License, on a durable physical 264 | medium customarily used for software interchange, for a price no 265 | more than your reasonable cost of physically performing this 266 | conveying of source, or (2) access to copy the 267 | Corresponding Source from a network server at no charge. 268 | 269 | c) Convey individual copies of the object code with a copy of the 270 | written offer to provide the Corresponding Source. This 271 | alternative is allowed only occasionally and noncommercially, and 272 | only if you received the object code with such an offer, in accord 273 | with subsection 6b. 274 | 275 | d) Convey the object code by offering access from a designated 276 | place (gratis or for a charge), and offer equivalent access to the 277 | Corresponding Source in the same way through the same place at no 278 | further charge. You need not require recipients to copy the 279 | Corresponding Source along with the object code. If the place to 280 | copy the object code is a network server, the Corresponding Source 281 | may be on a different server (operated by you or a third party) 282 | that supports equivalent copying facilities, provided you maintain 283 | clear directions next to the object code saying where to find the 284 | Corresponding Source. Regardless of what server hosts the 285 | Corresponding Source, you remain obligated to ensure that it is 286 | available for as long as needed to satisfy these requirements. 287 | 288 | e) Convey the object code using peer-to-peer transmission, provided 289 | you inform other peers where the object code and Corresponding 290 | Source of the work are being offered to the general public at no 291 | charge under subsection 6d. 292 | 293 | A separable portion of the object code, whose source code is excluded 294 | from the Corresponding Source as a System Library, need not be 295 | included in conveying the object code work. 296 | 297 | A "User Product" is either (1) a "consumer product", which means any 298 | tangible personal property which is normally used for personal, family, 299 | or household purposes, or (2) anything designed or sold for incorporation 300 | into a dwelling. In determining whether a product is a consumer product, 301 | doubtful cases shall be resolved in favor of coverage. For a particular 302 | product received by a particular user, "normally used" refers to a 303 | typical or common use of that class of product, regardless of the status 304 | of the particular user or of the way in which the particular user 305 | actually uses, or expects or is expected to use, the product. A product 306 | is a consumer product regardless of whether the product has substantial 307 | commercial, industrial or non-consumer uses, unless such uses represent 308 | the only significant mode of use of the product. 309 | 310 | "Installation Information" for a User Product means any methods, 311 | procedures, authorization keys, or other information required to install 312 | and execute modified versions of a covered work in that User Product from 313 | a modified version of its Corresponding Source. The information must 314 | suffice to ensure that the continued functioning of the modified object 315 | code is in no case prevented or interfered with solely because 316 | modification has been made. 317 | 318 | If you convey an object code work under this section in, or with, or 319 | specifically for use in, a User Product, and the conveying occurs as 320 | part of a transaction in which the right of possession and use of the 321 | User Product is transferred to the recipient in perpetuity or for a 322 | fixed term (regardless of how the transaction is characterized), the 323 | Corresponding Source conveyed under this section must be accompanied 324 | by the Installation Information. But this requirement does not apply 325 | if neither you nor any third party retains the ability to install 326 | modified object code on the User Product (for example, the work has 327 | been installed in ROM). 328 | 329 | The requirement to provide Installation Information does not include a 330 | requirement to continue to provide support service, warranty, or updates 331 | for a work that has been modified or installed by the recipient, or for 332 | the User Product in which it has been modified or installed. Access to a 333 | network may be denied when the modification itself materially and 334 | adversely affects the operation of the network or violates the rules and 335 | protocols for communication across the network. 336 | 337 | Corresponding Source conveyed, and Installation Information provided, 338 | in accord with this section must be in a format that is publicly 339 | documented (and with an implementation available to the public in 340 | source code form), and must require no special password or key for 341 | unpacking, reading or copying. 342 | 343 | 7. Additional Terms. 344 | 345 | "Additional permissions" are terms that supplement the terms of this 346 | License by making exceptions from one or more of its conditions. 347 | Additional permissions that are applicable to the entire Program shall 348 | be treated as though they were included in this License, to the extent 349 | that they are valid under applicable law. If additional permissions 350 | apply only to part of the Program, that part may be used separately 351 | under those permissions, but the entire Program remains governed by 352 | this License without regard to the additional permissions. 353 | 354 | When you convey a copy of a covered work, you may at your option 355 | remove any additional permissions from that copy, or from any part of 356 | it. (Additional permissions may be written to require their own 357 | removal in certain cases when you modify the work.) You may place 358 | additional permissions on material, added by you to a covered work, 359 | for which you have or can give appropriate copyright permission. 360 | 361 | Notwithstanding any other provision of this License, for material you 362 | add to a covered work, you may (if authorized by the copyright holders of 363 | that material) supplement the terms of this License with terms: 364 | 365 | a) Disclaiming warranty or limiting liability differently from the 366 | terms of sections 15 and 16 of this License; or 367 | 368 | b) Requiring preservation of specified reasonable legal notices or 369 | author attributions in that material or in the Appropriate Legal 370 | Notices displayed by works containing it; or 371 | 372 | c) Prohibiting misrepresentation of the origin of that material, or 373 | requiring that modified versions of such material be marked in 374 | reasonable ways as different from the original version; or 375 | 376 | d) Limiting the use for publicity purposes of names of licensors or 377 | authors of the material; or 378 | 379 | e) Declining to grant rights under trademark law for use of some 380 | trade names, trademarks, or service marks; or 381 | 382 | f) Requiring indemnification of licensors and authors of that 383 | material by anyone who conveys the material (or modified versions of 384 | it) with contractual assumptions of liability to the recipient, for 385 | any liability that these contractual assumptions directly impose on 386 | those licensors and authors. 387 | 388 | All other non-permissive additional terms are considered "further 389 | restrictions" within the meaning of section 10. If the Program as you 390 | received it, or any part of it, contains a notice stating that it is 391 | governed by this License along with a term that is a further 392 | restriction, you may remove that term. If a license document contains 393 | a further restriction but permits relicensing or conveying under this 394 | License, you may add to a covered work material governed by the terms 395 | of that license document, provided that the further restriction does 396 | not survive such relicensing or conveying. 397 | 398 | If you add terms to a covered work in accord with this section, you 399 | must place, in the relevant source files, a statement of the 400 | additional terms that apply to those files, or a notice indicating 401 | where to find the applicable terms. 402 | 403 | Additional terms, permissive or non-permissive, may be stated in the 404 | form of a separately written license, or stated as exceptions; 405 | the above requirements apply either way. 406 | 407 | 8. Termination. 408 | 409 | You may not propagate or modify a covered work except as expressly 410 | provided under this License. Any attempt otherwise to propagate or 411 | modify it is void, and will automatically terminate your rights under 412 | this License (including any patent licenses granted under the third 413 | paragraph of section 11). 414 | 415 | However, if you cease all violation of this License, then your 416 | license from a particular copyright holder is reinstated (a) 417 | provisionally, unless and until the copyright holder explicitly and 418 | finally terminates your license, and (b) permanently, if the copyright 419 | holder fails to notify you of the violation by some reasonable means 420 | prior to 60 days after the cessation. 421 | 422 | Moreover, your license from a particular copyright holder is 423 | reinstated permanently if the copyright holder notifies you of the 424 | violation by some reasonable means, this is the first time you have 425 | received notice of violation of this License (for any work) from that 426 | copyright holder, and you cure the violation prior to 30 days after 427 | your receipt of the notice. 428 | 429 | Termination of your rights under this section does not terminate the 430 | licenses of parties who have received copies or rights from you under 431 | this License. If your rights have been terminated and not permanently 432 | reinstated, you do not qualify to receive new licenses for the same 433 | material under section 10. 434 | 435 | 9. Acceptance Not Required for Having Copies. 436 | 437 | You are not required to accept this License in order to receive or 438 | run a copy of the Program. Ancillary propagation of a covered work 439 | occurring solely as a consequence of using peer-to-peer transmission 440 | to receive a copy likewise does not require acceptance. However, 441 | nothing other than this License grants you permission to propagate or 442 | modify any covered work. These actions infringe copyright if you do 443 | not accept this License. Therefore, by modifying or propagating a 444 | covered work, you indicate your acceptance of this License to do so. 445 | 446 | 10. Automatic Licensing of Downstream Recipients. 447 | 448 | Each time you convey a covered work, the recipient automatically 449 | receives a license from the original licensors, to run, modify and 450 | propagate that work, subject to this License. You are not responsible 451 | for enforcing compliance by third parties with this License. 452 | 453 | An "entity transaction" is a transaction transferring control of an 454 | organization, or substantially all assets of one, or subdividing an 455 | organization, or merging organizations. If propagation of a covered 456 | work results from an entity transaction, each party to that 457 | transaction who receives a copy of the work also receives whatever 458 | licenses to the work the party's predecessor in interest had or could 459 | give under the previous paragraph, plus a right to possession of the 460 | Corresponding Source of the work from the predecessor in interest, if 461 | the predecessor has it or can get it with reasonable efforts. 462 | 463 | You may not impose any further restrictions on the exercise of the 464 | rights granted or affirmed under this License. For example, you may 465 | not impose a license fee, royalty, or other charge for exercise of 466 | rights granted under this License, and you may not initiate litigation 467 | (including a cross-claim or counterclaim in a lawsuit) alleging that 468 | any patent claim is infringed by making, using, selling, offering for 469 | sale, or importing the Program or any portion of it. 470 | 471 | 11. Patents. 472 | 473 | A "contributor" is a copyright holder who authorizes use under this 474 | License of the Program or a work on which the Program is based. The 475 | work thus licensed is called the contributor's "contributor version". 476 | 477 | A contributor's "essential patent claims" are all patent claims 478 | owned or controlled by the contributor, whether already acquired or 479 | hereafter acquired, that would be infringed by some manner, permitted 480 | by this License, of making, using, or selling its contributor version, 481 | but do not include claims that would be infringed only as a 482 | consequence of further modification of the contributor version. For 483 | purposes of this definition, "control" includes the right to grant 484 | patent sublicenses in a manner consistent with the requirements of 485 | this License. 486 | 487 | Each contributor grants you a non-exclusive, worldwide, royalty-free 488 | patent license under the contributor's essential patent claims, to 489 | make, use, sell, offer for sale, import and otherwise run, modify and 490 | propagate the contents of its contributor version. 491 | 492 | In the following three paragraphs, a "patent license" is any express 493 | agreement or commitment, however denominated, not to enforce a patent 494 | (such as an express permission to practice a patent or covenant not to 495 | sue for patent infringement). To "grant" such a patent license to a 496 | party means to make such an agreement or commitment not to enforce a 497 | patent against the party. 498 | 499 | If you convey a covered work, knowingly relying on a patent license, 500 | and the Corresponding Source of the work is not available for anyone 501 | to copy, free of charge and under the terms of this License, through a 502 | publicly available network server or other readily accessible means, 503 | then you must either (1) cause the Corresponding Source to be so 504 | available, or (2) arrange to deprive yourself of the benefit of the 505 | patent license for this particular work, or (3) arrange, in a manner 506 | consistent with the requirements of this License, to extend the patent 507 | license to downstream recipients. "Knowingly relying" means you have 508 | actual knowledge that, but for the patent license, your conveying the 509 | covered work in a country, or your recipient's use of the covered work 510 | in a country, would infringe one or more identifiable patents in that 511 | country that you have reason to believe are valid. 512 | 513 | If, pursuant to or in connection with a single transaction or 514 | arrangement, you convey, or propagate by procuring conveyance of, a 515 | covered work, and grant a patent license to some of the parties 516 | receiving the covered work authorizing them to use, propagate, modify 517 | or convey a specific copy of the covered work, then the patent license 518 | you grant is automatically extended to all recipients of the covered 519 | work and works based on it. 520 | 521 | A patent license is "discriminatory" if it does not include within 522 | the scope of its coverage, prohibits the exercise of, or is 523 | conditioned on the non-exercise of one or more of the rights that are 524 | specifically granted under this License. You may not convey a covered 525 | work if you are a party to an arrangement with a third party that is 526 | in the business of distributing software, under which you make payment 527 | to the third party based on the extent of your activity of conveying 528 | the work, and under which the third party grants, to any of the 529 | parties who would receive the covered work from you, a discriminatory 530 | patent license (a) in connection with copies of the covered work 531 | conveyed by you (or copies made from those copies), or (b) primarily 532 | for and in connection with specific products or compilations that 533 | contain the covered work, unless you entered into that arrangement, 534 | or that patent license was granted, prior to 28 March 2007. 535 | 536 | Nothing in this License shall be construed as excluding or limiting 537 | any implied license or other defenses to infringement that may 538 | otherwise be available to you under applicable patent law. 539 | 540 | 12. No Surrender of Others' Freedom. 541 | 542 | If conditions are imposed on you (whether by court order, agreement or 543 | otherwise) that contradict the conditions of this License, they do not 544 | excuse you from the conditions of this License. If you cannot convey a 545 | covered work so as to satisfy simultaneously your obligations under this 546 | License and any other pertinent obligations, then as a consequence you may 547 | not convey it at all. For example, if you agree to terms that obligate you 548 | to collect a royalty for further conveying from those to whom you convey 549 | the Program, the only way you could satisfy both those terms and this 550 | License would be to refrain entirely from conveying the Program. 551 | 552 | 13. Use with the GNU Affero General Public License. 553 | 554 | Notwithstanding any other provision of this License, you have 555 | permission to link or combine any covered work with a work licensed 556 | under version 3 of the GNU Affero General Public License into a single 557 | combined work, and to convey the resulting work. The terms of this 558 | License will continue to apply to the part which is the covered work, 559 | but the special requirements of the GNU Affero General Public License, 560 | section 13, concerning interaction through a network will apply to the 561 | combination as such. 562 | 563 | 14. Revised Versions of this License. 564 | 565 | The Free Software Foundation may publish revised and/or new versions of 566 | the GNU General Public License from time to time. Such new versions will 567 | be similar in spirit to the present version, but may differ in detail to 568 | address new problems or concerns. 569 | 570 | Each version is given a distinguishing version number. If the 571 | Program specifies that a certain numbered version of the GNU General 572 | Public License "or any later version" applies to it, you have the 573 | option of following the terms and conditions either of that numbered 574 | version or of any later version published by the Free Software 575 | Foundation. If the Program does not specify a version number of the 576 | GNU General Public License, you may choose any version ever published 577 | by the Free Software Foundation. 578 | 579 | If the Program specifies that a proxy can decide which future 580 | versions of the GNU General Public License can be used, that proxy's 581 | public statement of acceptance of a version permanently authorizes you 582 | to choose that version for the Program. 583 | 584 | Later license versions may give you additional or different 585 | permissions. However, no additional obligations are imposed on any 586 | author or copyright holder as a result of your choosing to follow a 587 | later version. 588 | 589 | 15. Disclaimer of Warranty. 590 | 591 | THERE IS NO WARRANTY FOR THE PROGRAM, TO THE EXTENT PERMITTED BY 592 | APPLICABLE LAW. EXCEPT WHEN OTHERWISE STATED IN WRITING THE COPYRIGHT 593 | HOLDERS AND/OR OTHER PARTIES PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY 594 | OF ANY KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, 595 | THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 596 | PURPOSE. THE ENTIRE RISK AS TO THE QUALITY AND PERFORMANCE OF THE PROGRAM 597 | IS WITH YOU. SHOULD THE PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF 598 | ALL NECESSARY SERVICING, REPAIR OR CORRECTION. 599 | 600 | 16. Limitation of Liability. 601 | 602 | IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING 603 | WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MODIFIES AND/OR CONVEYS 604 | THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES, INCLUDING ANY 605 | GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING OUT OF THE 606 | USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED TO LOSS OF 607 | DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY YOU OR THIRD 608 | PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER PROGRAMS), 609 | EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE POSSIBILITY OF 610 | SUCH DAMAGES. 611 | 612 | 17. Interpretation of Sections 15 and 16. 613 | 614 | If the disclaimer of warranty and limitation of liability provided 615 | above cannot be given local legal effect according to their terms, 616 | reviewing courts shall apply local law that most closely approximates 617 | an absolute waiver of all civil liability in connection with the 618 | Program, unless a warranty or assumption of liability accompanies a 619 | copy of the Program in return for a fee. 620 | 621 | END OF TERMS AND CONDITIONS 622 | 623 | How to Apply These Terms to Your New Programs 624 | 625 | If you develop a new program, and you want it to be of the greatest 626 | possible use to the public, the best way to achieve this is to make it 627 | free software which everyone can redistribute and change under these terms. 628 | 629 | To do so, attach the following notices to the program. It is safest 630 | to attach them to the start of each source file to most effectively 631 | state the exclusion of warranty; and each file should have at least 632 | the "copyright" line and a pointer to where the full notice is found. 633 | 634 | {one line to give the program's name and a brief idea of what it does.} 635 | Copyright (C) {year} {name of author} 636 | 637 | This program is free software: you can redistribute it and/or modify 638 | it under the terms of the GNU General Public License as published by 639 | the Free Software Foundation, either version 3 of the License, or 640 | (at your option) any later version. 641 | 642 | This program is distributed in the hope that it will be useful, 643 | but WITHOUT ANY WARRANTY; without even the implied warranty of 644 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 645 | GNU General Public License for more details. 646 | 647 | You should have received a copy of the GNU General Public License 648 | along with this program. If not, see . 649 | 650 | Also add information on how to contact you by electronic and paper mail. 651 | 652 | If the program does terminal interaction, make it output a short 653 | notice like this when it starts in an interactive mode: 654 | 655 | {project} Copyright (C) {year} {fullname} 656 | This program comes with ABSOLUTELY NO WARRANTY; for details type `show w'. 657 | This is free software, and you are welcome to redistribute it 658 | under certain conditions; type `show c' for details. 659 | 660 | The hypothetical commands `show w' and `show c' should show the appropriate 661 | parts of the General Public License. Of course, your program's commands 662 | might be different; for a GUI interface, you would use an "about box". 663 | 664 | You should also get your employer (if you work as a programmer) or school, 665 | if any, to sign a "copyright disclaimer" for the program, if necessary. 666 | For more information on this, and how to apply and follow the GNU GPL, see 667 | . 668 | 669 | The GNU General Public License does not permit incorporating your program 670 | into proprietary programs. If your program is a subroutine library, you 671 | may consider it more useful to permit linking proprietary applications with 672 | the library. If this is what you want to do, use the GNU Lesser General 673 | Public License instead of this License. But first, please read 674 | . 675 | -------------------------------------------------------------------------------- /source/communities/communities.cpp: -------------------------------------------------------------------------------- 1 | /*SCD is free software: you can redistribute it and/or modify 2 | it under the terms of the GNU General Public License as published by 3 | the Free Software Foundation, either version 3 of the License, or 4 | (at your option) any later version. 5 | 6 | SCD is distributed in the hope that it will be useful, 7 | but WITHOUT ANY WARRANTY; without even the implied warranty of 8 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 9 | GNU General Public License for more details. 10 | 11 | You should have received a copy of the GNU General Public License 12 | along with this program. If not, see . 13 | */ 14 | 15 | #include 16 | #include 17 | #include 18 | #include 19 | #include 20 | #include 21 | #include 22 | #include 23 | #include 24 | #include 25 | #include 26 | #include 27 | #include 28 | #include 29 | #include 30 | #include 31 | 32 | namespace scd { 33 | 34 | #define SCD_INVALID_COMMUNITY 0xffffffff 35 | 36 | uint32_t num_threads = 1; 37 | 38 | /** @brief Types of movements.*/ 39 | enum MovementType { 40 | E_REMOVE, 41 | E_REMOVE_AND_INSERT, 42 | E_NO_MOVEMENT 43 | }; 44 | 45 | /** @brief This struct represents a movement.*/ 46 | struct Movement { 47 | MovementType m_MovementType; 48 | uint32_t m_NodeId; 49 | uint32_t m_Community; 50 | double64_t m_Improvement; 51 | }; 52 | 53 | /** @brief Compares two node clustering. 54 | * @param e1 Void pointer to the first node clustering. 55 | * @param e2 Void pointer to the second node clustering. 56 | * @return -1 if e1 goes before e2. 1 if e1 goes after e2. 0 if e1 and e2 are equal.*/ 57 | static int Compare_NodeClusterings(const void* e1, const void* e2) { 58 | NodeClustering* nC1 = (NodeClustering*) e1; 59 | NodeClustering* nC2 = (NodeClustering*) e2; 60 | if (nC1->m_CC > nC2->m_CC) return -1; 61 | if (nC1->m_CC < nC2->m_CC) return 1; 62 | if (nC1->m_Degree > nC2->m_Degree) return -1; 63 | if (nC1->m_Degree < nC2->m_Degree) return 1; 64 | return 0; 65 | } 66 | 67 | /** @brief Compares two unsigned integers. 68 | * @param e1 Void pointer to the first unsigned integer. 69 | * @param e2 Void pointer to the second unsigned integer. 70 | * @return -1 if e1 goes before e2. 1 if e1 goes after e2. 0 if e1 and e2 are equal.*/ 71 | static int Compare_Ids(const void* e1, const void* e2) { 72 | uint32_t id1 = *(uint32_t*) e1; 73 | uint32_t id2 = *(uint32_t*) e2; 74 | if (id1 < id2) return -1; 75 | if (id2 < id1) return 1; 76 | return 0; 77 | } 78 | 79 | /** @brief this function is used to compress the laberl space of a partition in order to be comprissed between 80 | * 0 and the actual number of communities - 1. 81 | * @param[in] graph The graph where the partition belongs. 82 | * @param[in] communities The array of current labels. 83 | * @param[out] destCommunities The array where the new labeling will be stored.*/ 84 | static uint32_t CompressCommunityLabels(const CGraph* graph, const uint32_t * communities, uint32_t * destCommunities) { 85 | std::map * map = new std::map(); 86 | uint32_t label = 0; 87 | for (uint32_t i = 0; i < graph->GetNumNodes(); i++) { 88 | if (!map->count(communities[i])) { 89 | map->insert(std::pair(communities[i], label)); 90 | label++; 91 | } 92 | } 93 | 94 | for (uint32_t i = 0; i < graph->GetNumNodes(); i++) { 95 | destCommunities[i] = (*(map->find(communities[i]))).second; 96 | } 97 | delete map; 98 | return label; 99 | } 100 | 101 | /** @brief Initializes a partition structure from a labels to communities array. 102 | * @param[in] graph The graph where the partition belongs. 103 | * @param[out] partition The partition structure where the partition will be stored. 104 | * @param[in] communities The array of nodes to community labels from which the partition is initialized. 105 | * @param[in] alfa The alfa parameter controlling the cohesivness of the communities.*/ 106 | static uint32_t InitializeFromLabelsArray(const CGraph* graph, CommunityPartition* partition, const uint32_t* communities, const double64_t alfa) { 107 | 108 | //Initializing default values 109 | partition->m_NodeLabels = NULL; 110 | partition->m_CommunityIndices = NULL; 111 | partition->m_Communities = NULL; 112 | partition->m_InternalEdges = NULL; 113 | partition->m_ExternalEdges = NULL; 114 | partition->m_NodeWCC = NULL; 115 | partition->m_NumCommunities = 0; 116 | partition->m_WCC = 0; 117 | 118 | partition->m_NumNodes = graph->GetNumNodes(); 119 | partition->m_NodeLabels = new uint32_t[graph->GetNumNodes()]; 120 | if (!partition->m_NodeLabels) { 121 | printf("Error allocating node labels %u.\n", graph->GetNumNodes()); 122 | return 1; 123 | } 124 | 125 | uint32_t maxNumCommunities = 0; 126 | for (uint32_t i = 0; i < graph->GetNumNodes(); i++) { 127 | if (communities[i] > maxNumCommunities) { 128 | maxNumCommunities = communities[i]; 129 | } 130 | } 131 | maxNumCommunities++; 132 | 133 | partition->m_NumCommunities = CompressCommunityLabels(graph, communities, partition->m_NodeLabels); 134 | 135 | 136 | //Allocating space to store the communities 137 | partition->m_CommunityIndices = new uint32_t[partition->m_NumCommunities]; 138 | if (!partition->m_CommunityIndices) { 139 | printf("Error allocating labels indices.\n"); 140 | return 1; 141 | } 142 | partition->m_Communities = new uint32_t[partition->m_NumCommunities + graph->GetNumNodes()]; 143 | if (!partition->m_Communities) { 144 | printf("Error allocating inverted index.\n"); 145 | return 1; 146 | } 147 | 148 | partition->m_NodeWCC = new double64_t[graph->GetNumNodes()]; 149 | if (!partition->m_NodeWCC) { 150 | printf("Error allocating node labels %u.\n", graph->GetNumNodes()); 151 | return 1; 152 | } 153 | 154 | //Creating the counters the creation of the inverted index 155 | uint32_t* counters = new uint32_t[partition->m_NumCommunities]; 156 | if (!counters) { 157 | printf("Error allocating counters: %u\n", partition->m_NumCommunities); 158 | return 1; 159 | } 160 | 161 | #pragma omp parallel for schedule(SCD_SCHEDULING, SCD_THREAD_BLOCK_SIZE) 162 | for (uint32_t i = 0; i < partition->m_NumCommunities; i++) { 163 | counters[i] = 0; 164 | } 165 | //Computing community sizes; 166 | for (uint32_t i = 0; i < graph->GetNumNodes(); i++) { 167 | counters[partition->m_NodeLabels[i]]++; 168 | } 169 | //Initializing labels indices. 170 | uint32_t currentIndex = 0; 171 | for (uint32_t i = 0; i < partition->m_NumCommunities; i++) { 172 | if (counters[i] > 0) { 173 | partition->m_CommunityIndices[i] = currentIndex; 174 | partition->m_Communities[currentIndex] = counters[i]; 175 | currentIndex += counters[i] + 1; 176 | } else { 177 | partition->m_CommunityIndices[i] = SCD_INVALID_COMMUNITY; 178 | } 179 | counters[i] = 0; 180 | } 181 | //Initializing the inverted index. 182 | for (uint32_t i = 0; i < graph->GetNumNodes(); i++) { 183 | uint32_t lIndex = partition->m_CommunityIndices[partition->m_NodeLabels[i]]; 184 | assert(lIndex != SCD_INVALID_COMMUNITY); 185 | assert(counters[partition->m_NodeLabels[i]] < partition->m_Communities[lIndex]); 186 | partition->m_Communities[lIndex + counters[partition->m_NodeLabels[i]] + 1] = i; 187 | counters[partition->m_NodeLabels[i]]++; 188 | } 189 | 190 | for (uint32_t i = 0; i < partition->m_NumCommunities; i++) { 191 | if (partition->m_CommunityIndices[i] != SCD_INVALID_COMMUNITY) { 192 | uint32_t lIndex = partition->m_CommunityIndices[i]; 193 | qsort(&(partition->m_Communities[lIndex + 1]), partition->m_Communities[lIndex], sizeof (uint32_t), Compare_Ids); 194 | } 195 | } 196 | delete[] counters; 197 | 198 | partition->m_InternalEdges = new uint32_t[partition->m_NumCommunities]; 199 | if (!partition->m_InternalEdges) { 200 | printf("Error while allocating internal edges.\n"); 201 | return 1; 202 | } 203 | 204 | partition->m_ExternalEdges = new uint32_t[partition->m_NumCommunities]; 205 | if (!partition->m_ExternalEdges) { 206 | printf("Error while allocating external edges.\n"); 207 | return 1; 208 | } 209 | 210 | #pragma omp parallel for schedule(SCD_SCHEDULING, SCD_THREAD_BLOCK_SIZE) 211 | for (uint32_t i = 0; i < partition->m_NumCommunities; i++) { 212 | partition->m_InternalEdges[i] = 0; 213 | partition->m_ExternalEdges[i] = 0; 214 | } 215 | 216 | for (uint32_t i = 0; i < graph->GetNumNodes(); i++) { 217 | const uint32_t* adjacencies = graph->GetNeighbors(i); 218 | uint32_t degree = graph->GetDegree(i); 219 | for (uint32_t j = 0; j < degree; j++) { 220 | if (i < adjacencies[j]) { 221 | if (partition->m_NodeLabels[i] == partition->m_NodeLabels[adjacencies[j]]) { 222 | partition->m_InternalEdges[partition->m_NodeLabels[i]]++; 223 | } else { 224 | partition->m_ExternalEdges[partition->m_NodeLabels[i]]++; 225 | partition->m_ExternalEdges[partition->m_NodeLabels[adjacencies[j]]]++; 226 | } 227 | } 228 | } 229 | } 230 | 231 | partition->m_WCC = ComputeWCC(graph, alfa, partition->m_NodeLabels, partition->m_NumCommunities, partition->m_CommunityIndices, partition->m_Communities, partition->m_NodeWCC); 232 | return 0; 233 | } 234 | 235 | /** @brief Computes the increment on WCC for inserting a node into a community. 236 | @param[in] r The size of the community. 237 | @param[in] d_in The number of edges between the inserted vertex and the community. 238 | @param[in] d_out The number of edges between the inserted vertex and the rest of the graph. 239 | @param[in] c_out The number of edges leaving the community (note that this MUST include d_in). 240 | @param[in] p_in The probability that an edge inside of the community exists. 241 | @param[in] p_ext The probability that two edges leaving the community close a triangle. 242 | @param[in] alfa The alfa parameter controlling the cohesivness of the communities.*/ 243 | static double64_t CheckForIncrement(int32_t r, int32_t d_in, int32_t d_out, uint32_t c_out, double64_t p_in, double64_t p_ext, const double64_t alfa) { 244 | /* double64_t t; 245 | if (r > 0) { 246 | t = (c_out - d_in) / (double64_t) r; 247 | } else { 248 | t = 0.0; 249 | } 250 | double64_t A = 0.0; 251 | double64_t denom = 0.0; 252 | denom = (d_in * (d_in - 1) * p_in + d_out * (d_out + d_in - 1) * p_ext); 253 | if (denom != 0.0 && ((r + d_out) > 0)) { 254 | A = ((d_in * (d_in - 1) * p_in) / denom) * (d_in + d_out) / (double64_t) (r + d_out); 255 | } 256 | double64_t BMinus = 0.0; 257 | denom = (r - 1)*(r - 2) * p_in * p_in * p_in + (d_in - 1) * p_in + t * (r - 1) * p_in * p_ext + t * (t - 1) * p_ext + (d_out) * p_ext; 258 | if (denom != 0.0 && ((r + t) > 0)) { 259 | BMinus = (((d_in - 1) * p_in) / denom) * ((r - 1) * p_in + 1 + t) / (r + t); 260 | } 261 | double64_t CMinus = 0.0; 262 | denom = (r - 1)*(r - 2) * p_in * p_in * p_in + t * (t - 1) * p_ext + t * (r - 1)*(p_in) * p_ext; 263 | if (denom != 0.0 && ((r + t) > 0) && ((r - 1 + t) > 0)) { 264 | CMinus = -(((r - 1)*(r - 2) * p_in * p_in * p_in) / denom) * ((r - 1) * p_in + t) / ((r + t)*(r - 1 + t)); 265 | } 266 | return (A + d_in * BMinus + (r - d_in) * CMinus); 267 | */ 268 | 269 | double64_t t; 270 | if (r > 0) { 271 | t = (c_out - d_in) / (double64_t) r; 272 | } else { 273 | t = 0.0; 274 | } 275 | // Node v 276 | double64_t A = 0.0; 277 | double64_t denom = 0.0; 278 | denom = (d_in * (d_in - 1) * p_in + 279 | d_out * (d_out - 1) * p_ext) + 280 | d_out * d_in * p_ext; 281 | denom *= d_in + d_out + alfa*(r-1-d_in); 282 | if (denom != 0.0) { 283 | A = ((d_in * (d_in - 1) * p_in) * (d_in + d_out)) / (denom); 284 | } 285 | 286 | // Nodes connected with v 287 | double64_t BMinus = 0.0; 288 | denom = (r - 1)*(r - 2) * p_in * p_in * p_in + 289 | 2*(d_in - 1) * p_in + 290 | t * (r - 1) * p_in * p_ext + 291 | t * (t - 1) * p_ext + 292 | (d_out) * p_ext; 293 | denom *= (r-1)*p_in + 1 + t + alfa*(r - (r-1)*p_in - 1); 294 | if (denom != 0.0) { 295 | BMinus = (2*(d_in - 1) * p_in) * ((r - 1) * p_in + 1 + t) / denom; 296 | } 297 | // Nodes not connected with v 298 | double64_t CMinus = 0.0; 299 | denom = (r - 1)*(r - 2) * p_in * p_in * p_in + 300 | t * (t - 1) * p_ext + 301 | t * (r - 1)*(p_in) * p_ext; 302 | denom *= (r-1)*p_in + t + alfa*(r - (r-1)*p_in); 303 | denom *= (r-1)*p_in + t + alfa*(r - (r-1)*p_in - 1); 304 | if (denom != 0.0 && ((r + t) > 0) && ((r - 1 + t) > 0)) { 305 | CMinus = -((r - 1)*(r - 2) * p_in * p_in * p_in) * ((r - 1) * p_in + t)*alfa / denom; 306 | } 307 | // Total 308 | return (A + d_in * BMinus + (r - d_in) * CMinus); 309 | } 310 | 311 | /** @brief Checks the best movement of a vertex. 312 | @param[in] graph The graph. 313 | @param[in] node The node to check the movement. 314 | @param[in] partition The current partition into communities. 315 | @return The movement to perform.*/ 316 | static Movement CheckForBestMovement(const CGraph* graph, uint32_t node, const CommunityPartition* partition, const double64_t alfa) { 317 | 318 | Movement movement; 319 | movement.m_MovementType = E_NO_MOVEMENT; 320 | movement.m_NodeId = node; 321 | 322 | std::map neighborsCommunity; 323 | neighborsCommunity.insert(std::pair(partition->m_NodeLabels[node], 0)); 324 | const uint32_t * adjacencies = graph->GetNeighbors(node); 325 | uint32_t degree = graph->GetDegree(node); 326 | for (uint32_t i = 0; i < degree; i++) { 327 | uint32_t neighbor = adjacencies[i]; 328 | if (partition->m_Communities[partition->m_CommunityIndices[partition->m_NodeLabels[neighbor]]] > 1) { 329 | std::map::iterator it = neighborsCommunity.find(partition->m_NodeLabels[neighbor]); 330 | if (it != neighborsCommunity.end()) { 331 | (*it).second++; 332 | } else { 333 | neighborsCommunity.insert(std::pair(partition->m_NodeLabels[neighbor], 1)); 334 | } 335 | } 336 | } 337 | 338 | bool removeCommunity = false; 339 | uint32_t bestRemoveInternalEdges = 0; 340 | uint32_t auxInternalEdges = (*neighborsCommunity.find(partition->m_NodeLabels[node])).second; 341 | uint32_t community = partition->m_NodeLabels[node]; 342 | uint32_t communityIndex = partition->m_CommunityIndices[community]; 343 | uint32_t communitySize = partition->m_Communities[communityIndex]; 344 | double64_t p_in; 345 | if ((communitySize - 2) != 0 && (communitySize - 1) != 0) { 346 | p_in = (2 * partition->m_InternalEdges[community] - auxInternalEdges * 2) / ((double64_t) (communitySize - 1) * (communitySize - 2)); 347 | } else { 348 | p_in = 0.0f; 349 | } 350 | double64_t p_ext = graph->GetCC(); 351 | double64_t bestRemoveImprovement; 352 | bestRemoveImprovement = -CheckForIncrement(communitySize - 1, auxInternalEdges, 353 | graph->GetDegree(node) - auxInternalEdges, 354 | partition->m_ExternalEdges[community] + auxInternalEdges - (graph->GetDegree(node) - auxInternalEdges ), 355 | p_in, p_ext, alfa); 356 | bestRemoveInternalEdges = auxInternalEdges; 357 | if (bestRemoveImprovement > 0.0f) { 358 | removeCommunity = true; 359 | } 360 | 361 | 362 | uint32_t bestInsertCommunity; 363 | double64_t bestInsertImprovement = -10000000000000.0; 364 | uint32_t bestInsertInternalEdges = 0; 365 | bool insertCommunity = false; 366 | 367 | for (std::map::iterator it = neighborsCommunity.begin(); it != neighborsCommunity.end(); ++it) { 368 | uint32_t community = it->first; 369 | if (community != partition->m_NodeLabels[node]) { 370 | uint32_t auxInternalEdges = it->second; 371 | uint32_t communityIndex = partition->m_CommunityIndices[community]; 372 | uint32_t communitySize = partition->m_Communities[communityIndex]; 373 | double64_t p_in; 374 | if ((communitySize - 1) > 0 && (communitySize > 0)) { 375 | p_in = (2 * partition->m_InternalEdges[community]) / ((double64_t) (communitySize) * (communitySize - 1)); 376 | } else { 377 | p_in = 0.0; 378 | } 379 | 380 | double64_t p_ext = graph->GetCC(); 381 | double64_t auxImprovement = CheckForIncrement(communitySize, auxInternalEdges, graph->GetDegree(node) - auxInternalEdges, 382 | partition->m_ExternalEdges[community], p_in, p_ext, alfa); 383 | if (auxImprovement + bestRemoveImprovement > bestInsertImprovement) { 384 | insertCommunity = true; 385 | bestInsertImprovement = auxImprovement + bestRemoveImprovement; 386 | bestInsertCommunity = community; 387 | bestInsertInternalEdges = auxInternalEdges; 388 | } 389 | } 390 | } 391 | 392 | if (bestInsertImprovement > 0.0f && ((bestInsertImprovement) > bestRemoveImprovement)) { 393 | movement.m_MovementType = E_REMOVE_AND_INSERT; 394 | movement.m_Community = bestInsertCommunity; 395 | movement.m_Improvement = bestInsertImprovement; 396 | } else if (bestRemoveImprovement > 0.0f) { 397 | movement.m_MovementType = E_REMOVE; 398 | movement.m_Improvement = bestRemoveImprovement; 399 | } 400 | return movement; 401 | } 402 | 403 | 404 | /** @brief Performs an improvement step, that is, checks for movements for all the nodes and 405 | and computes the new partitions. 406 | @param[in] graph The graph. 407 | @param[out] partition The current partition. It will be modified with the new partition. 408 | @param[in] alfa The alfa parameter controlling the cohesivness of the communities.*/ 409 | static uint32_t PerformImprovementStep(const CGraph* graph, CommunityPartition* partition, const double64_t alfa) { 410 | std::vector* movements = new std::vector[num_threads]; 411 | uint32_t N = graph->GetNumNodes(); 412 | 413 | #pragma omp parallel for schedule(SCD_SCHEDULING,SCD_THREAD_BLOCK_SIZE) 414 | for (uint32_t i = 0; i < N; i++) { 415 | int thread = omp_get_thread_num(); 416 | if (i % 100000 == 0) { 417 | printf("Thread %d: Checked movements of %d nodes.\n", thread, i); 418 | } 419 | Movement movement; 420 | movement = CheckForBestMovement(graph, i, partition, alfa); 421 | if (movement.m_MovementType != E_NO_MOVEMENT) { 422 | movements[thread].push_back(movement); 423 | } 424 | } 425 | printf("All movements checked\n"); 426 | 427 | uint32_t* tempNodeLabels = new uint32_t[partition->m_NumNodes]; 428 | memcpy(&tempNodeLabels[0], &partition->m_NodeLabels[0], sizeof (uint32_t) * partition->m_NumNodes); 429 | uint32_t totalMovements = 0; 430 | 431 | //uint32_t nextLabel = partition->m_NumCommunities; 432 | uint32_t removeMovements = 0; 433 | uint32_t removeAndInsertMovements = 0; 434 | uint32_t insertMovements = 0; 435 | 436 | 437 | #pragma omp parallel for schedule(static,1) 438 | for (uint32_t thread = 0; thread < num_threads; thread++) { 439 | uint32_t numMovements = movements[thread].size(); 440 | totalMovements += numMovements; 441 | uint32_t nextLabelThread = partition->m_NumCommunities + numMovements * thread; 442 | 443 | for (uint32_t i = 0; i < numMovements; i++) { 444 | Movement movement = (movements[thread])[i]; 445 | switch (movement.m_MovementType) { 446 | case E_REMOVE: 447 | tempNodeLabels[movement.m_NodeId] = nextLabelThread; 448 | removeMovements++; 449 | nextLabelThread++; 450 | break; 451 | case E_REMOVE_AND_INSERT: 452 | tempNodeLabels[movement.m_NodeId] = movement.m_Community; 453 | if (partition->m_Communities[partition->m_CommunityIndices[partition->m_NodeLabels[movement.m_NodeId]]] == 1) { 454 | insertMovements++; 455 | } else { 456 | removeAndInsertMovements++; 457 | } 458 | break; 459 | } 460 | } 461 | } 462 | delete [] movements; 463 | printf(" Number of removes performed: %d\n", removeMovements); 464 | printf(" Number of remove and insert performed: %d\n", removeAndInsertMovements); 465 | printf(" Number of insert performed: %d\n", insertMovements); 466 | FreeResources(partition); 467 | 468 | if (InitializeFromLabelsArray(graph, partition, tempNodeLabels, alfa)) { 469 | printf("Error initializing from label array.\n"); 470 | return 1; 471 | } 472 | delete [] tempNodeLabels; 473 | 474 | return 0; 475 | } 476 | 477 | 478 | /*********************** EXPERIMENTAL **************************************/ 479 | 480 | struct CommunityInteraction { 481 | uint32_t m_CommunityId1; 482 | uint32_t m_CommunityId2; 483 | uint32_t degree; 484 | double64_t m_Improvement; 485 | }; 486 | 487 | static bool CompareByImprovement( const CommunityInteraction& a, const CommunityInteraction& b ) { 488 | if( a.m_Improvement > b.m_Improvement ) return true; 489 | return false; 490 | } 491 | 492 | static bool CompareById( const CommunityInteraction& a, const CommunityInteraction& b ) { 493 | if( a.m_CommunityId1 < b.m_CommunityId1 ) return true; 494 | if( a.m_CommunityId1 > b.m_CommunityId1 ) return false; 495 | if( a.m_CommunityId2 < b.m_CommunityId2 ) return true; 496 | if( a.m_CommunityId2 > b.m_CommunityId2 ) return false; 497 | return false; 498 | } 499 | 500 | static void PrintCommunity(const CommunityPartition* partition, uint32_t communityId) { 501 | const uint32_t* nodes = &partition->m_Communities[partition->m_CommunityIndices[communityId]+1]; 502 | uint32_t size = partition->m_Communities[partition->m_CommunityIndices[communityId]]; 503 | for( uint32_t i = 0; i < size; ++i ) { 504 | printf("%d ", nodes[i]); 505 | } 506 | printf("\n"); 507 | } 508 | 509 | double64_t TestMerge(const CGraph* graph, const CommunityPartition* partition, const double64_t alfa, const CommunityInteraction& interaction) { 510 | // printf("Testing merge %d - %d\n", interaction.m_CommunityId1, interaction.m_CommunityId2); 511 | // PrintCommunity(partition, interaction.m_CommunityId1); 512 | // PrintCommunity(partition, interaction.m_CommunityId2); 513 | 514 | std::set community; 515 | const uint32_t* nodes = &partition->m_Communities[partition->m_CommunityIndices[interaction.m_CommunityId1]+1]; 516 | uint32_t size = partition->m_Communities[partition->m_CommunityIndices[interaction.m_CommunityId1]]; 517 | double64_t before = 0.0; 518 | for( uint32_t i = 0; i < size; ++i ) { 519 | community.insert(nodes[i]); 520 | before += partition->m_NodeWCC[nodes[i]]; 521 | } 522 | 523 | nodes = &partition->m_Communities[partition->m_CommunityIndices[interaction.m_CommunityId2]+1]; 524 | size = partition->m_Communities[partition->m_CommunityIndices[interaction.m_CommunityId2]]; 525 | for( uint32_t i = 0; i < size; ++i ) { 526 | community.insert(nodes[i]); 527 | before += partition->m_NodeWCC[nodes[i]]; 528 | } 529 | /* for( std::set::iterator it = community.begin(); it != community.end(); ++it){ 530 | printf("%d ",*it); 531 | } 532 | printf("\n");*/ 533 | double64_t after = ComputeWCC(graph,alfa,community); 534 | // printf("after: %f, before: %f\n", after, before); 535 | return (after - before); 536 | } 537 | 538 | 539 | void ComputeDegree( const CGraph* graph, const CommunityPartition* partition, const double64_t alfa, CommunityInteraction& interaction) { 540 | interaction.degree = 0; 541 | uint32_t communitySize1 = partition->m_Communities[partition->m_CommunityIndices[interaction.m_CommunityId1]]; 542 | uint32_t communitySize2 = partition->m_Communities[partition->m_CommunityIndices[interaction.m_CommunityId2]]; 543 | uint32_t* community1 = &partition->m_Communities[partition->m_CommunityIndices[interaction.m_CommunityId1]+1]; 544 | uint32_t* community2 = &partition->m_Communities[partition->m_CommunityIndices[interaction.m_CommunityId2]+1]; 545 | for(int i = 0; i < communitySize1; ++i) { 546 | uint32_t node = community1[i]; 547 | const uint32_t* adjacencies = graph->GetNeighbors(node); 548 | uint32_t degree = graph->GetDegree(node); 549 | for( int j = 0; j < degree;++j){ 550 | uint32_t neighbor = adjacencies[j]; 551 | if( partition->m_NodeLabels[neighbor] == interaction.m_CommunityId2 ) { 552 | interaction.degree++; 553 | } 554 | } 555 | } 556 | } 557 | 558 | double ShouldMerge( const CGraph* graph, const CommunityPartition* partition, const double64_t alfa, CommunityInteraction& interaction) { 559 | ComputeDegree(graph,partition,alfa,interaction); 560 | uint32_t communitySize1 = partition->m_Communities[partition->m_CommunityIndices[interaction.m_CommunityId1]]; 561 | uint32_t communitySize2 = partition->m_Communities[partition->m_CommunityIndices[interaction.m_CommunityId2]]; 562 | double64_t p_in1 = (2 * partition->m_InternalEdges[interaction.m_CommunityId1]) / ((double64_t) (communitySize1) * (communitySize1 - 1)); 563 | double64_t p_in2 = (2 * partition->m_InternalEdges[interaction.m_CommunityId2]) / ((double64_t) (communitySize2) * (communitySize2 - 1)); 564 | double64_t p_in = (p_in1*communitySize1 + p_in2*communitySize2) / (communitySize1+communitySize2); 565 | //double64_t p_in = std::max(p_in1,p_in2); 566 | double64_t p_out = interaction.degree / (double64_t)(communitySize1*communitySize2); 567 | double64_t threshold = - (-alfa*p_out + sqrt(alfa*alfa*p_out*p_out-2*alfa*alfa*p_out-4*alfa*p_out*p_out+2*alfa*alfa+2*alfa*p_out+p_out*p_out)+p_out)*p_out / alfa*(p_out-1.0); 568 | //std::cout << p_in << " " << p_out << " " << threshold << std::endl; 569 | return threshold - p_in; 570 | } 571 | 572 | typedef std::set InteractionsSet; 573 | static uint32_t MergeCommunities(const CGraph* graph, CommunityPartition* partition, const double64_t alfa) { 574 | // Look for community interactions. 575 | InteractionsSet candidateMerges(CompareById); 576 | uint32_t N = graph->GetNumNodes(); 577 | for( uint32_t i = 0; i < N; ++i ) { 578 | uint32_t communityLabel1 = partition->m_NodeLabels[i]; 579 | uint32_t degree = graph->GetDegree(i); 580 | const uint32_t* adjacencies = graph->GetNeighbors(i); 581 | for( uint32_t j = 0; j < degree; ++j ) { 582 | uint32_t communityLabel2 = partition->m_NodeLabels[adjacencies[j]]; 583 | if( communityLabel1 != communityLabel2 ) { 584 | CommunityInteraction cI; 585 | cI.degree = 0; 586 | if( communityLabel1 < communityLabel2 ) { 587 | cI.m_CommunityId1 = communityLabel1; 588 | cI.m_CommunityId2 = communityLabel2; 589 | } else { 590 | cI.m_CommunityId2 = communityLabel1; 591 | cI.m_CommunityId1 = communityLabel2; 592 | } 593 | candidateMerges.insert(cI); 594 | } 595 | } 596 | } 597 | std::vector filteredInteractions; 598 | uint32_t earlyFilter = 0; 599 | // Test each community interaction and rank it. 600 | for( InteractionsSet::iterator it = candidateMerges.begin(); it != candidateMerges.end(); ++it ) { 601 | CommunityInteraction cI = *it; 602 | if( partition->m_Communities[partition->m_CommunityIndices[cI.m_CommunityId1]] > 2 && 603 | partition->m_Communities[partition->m_CommunityIndices[cI.m_CommunityId2]] > 2 ) { 604 | earlyFilter++; 605 | double64_t improvement = TestMerge(graph, partition, alfa, cI); 606 | if( improvement > 0.0 ) { 607 | cI.m_Improvement = improvement; 608 | filteredInteractions.push_back(cI); 609 | } 610 | } 611 | } 612 | std::cout << earlyFilter << " " << filteredInteractions.size() << " " << candidateMerges.size() << std::endl; 613 | // Sort community interactions by improvement. 614 | uint32_t* tempNodeLabels = new uint32_t[partition->m_NumNodes]; 615 | memcpy(tempNodeLabels, partition->m_NodeLabels, sizeof(uint32_t)*partition->m_NumNodes); 616 | std::sort(filteredInteractions.begin(), filteredInteractions.end(), CompareByImprovement); 617 | std::set touched; 618 | uint32_t numInteractions = filteredInteractions.size(); 619 | for( uint32_t i = 0; i < numInteractions; ++i ) { 620 | if( (touched.find(filteredInteractions[i].m_CommunityId1) == touched.end()) && 621 | (touched.find(filteredInteractions[i].m_CommunityId2) == touched.end()) ) { 622 | uint32_t communitySize = partition->m_Communities[partition->m_CommunityIndices[filteredInteractions[i].m_CommunityId1]]; 623 | const uint32_t* community = &partition->m_Communities[partition->m_CommunityIndices[filteredInteractions[i].m_CommunityId1]+1]; 624 | for( uint32_t j = 0; j < communitySize; ++j ) { 625 | tempNodeLabels[community[j]] = filteredInteractions[i].m_CommunityId2; 626 | } 627 | touched.insert(filteredInteractions[i].m_CommunityId1); 628 | touched.insert(filteredInteractions[i].m_CommunityId2); 629 | } 630 | } 631 | 632 | // Perform interactions constrained by independence and create a new labels array to create a partition from. 633 | FreeResources(partition); 634 | if (InitializeFromLabelsArray(graph, partition, tempNodeLabels, alfa)) { 635 | printf("Error initializing from label array.\n"); 636 | return 1; 637 | } 638 | delete [] tempNodeLabels; 639 | return 0; 640 | } 641 | /***************************************************************************/ 642 | 643 | /*****************************EXPERIMENTAL 2 ******************************/ 644 | 645 | bool CompareMovements(const Movement& a, const Movement& b) { 646 | if(a.m_Community < b.m_Community) return true; 647 | if(b.m_Community < a.m_Community) return false; 648 | if(a.m_Improvement > b.m_Improvement) return true; 649 | if(b.m_Improvement > a.m_Improvement) return false; 650 | return false; 651 | } 652 | 653 | static uint32_t PerformImprovementStep2(const CGraph* graph, CommunityPartition* partition, const double64_t alfa) { 654 | std::vector* movements = new std::vector[num_threads]; 655 | uint32_t N = graph->GetNumNodes(); 656 | 657 | #pragma omp parallel for schedule(SCD_SCHEDULING,SCD_THREAD_BLOCK_SIZE) 658 | for (uint32_t i = 0; i < N; i++) { 659 | int thread = omp_get_thread_num(); 660 | if (i % 100000 == 0) { 661 | printf("Thread %d: Checked movements of %d nodes.\n", thread, i); 662 | } 663 | Movement movement; 664 | movement = CheckForBestMovement(graph, i, partition, alfa); 665 | if (movement.m_MovementType != E_NO_MOVEMENT) { 666 | movements[thread].push_back(movement); 667 | } 668 | } 669 | printf("All movements checked\n"); 670 | 671 | for( uint32_t i = 0; i < N; i++) { 672 | std::sort((movements[i]).begin(), (movements[i]).end(),CompareMovements); 673 | } 674 | 675 | uint32_t* tempNodeLabels = new uint32_t[partition->m_NumNodes]; 676 | memcpy(&tempNodeLabels[0], &partition->m_NodeLabels[0], sizeof (uint32_t) * partition->m_NumNodes); 677 | uint32_t totalMovements = 0; 678 | 679 | //uint32_t nextLabel = partition->m_NumCommunities; 680 | uint32_t removeMovements = 0; 681 | uint32_t removeAndInsertMovements = 0; 682 | uint32_t insertMovements = 0; 683 | 684 | 685 | #pragma omp parallel for schedule(static,1) 686 | for (uint32_t thread = 0; thread < num_threads; thread++) { 687 | uint32_t numMovements = movements[thread].size(); 688 | totalMovements += numMovements; 689 | uint32_t nextLabelThread = partition->m_NumCommunities + numMovements * thread; 690 | 691 | uint32_t previousCommunity = 100000000; 692 | for (uint32_t i = 0; i < numMovements; i++) { 693 | Movement movement = (movements[thread])[i]; 694 | if(movement.m_Community != previousCommunity) { 695 | previousCommunity = movement.m_Community; 696 | switch (movement.m_MovementType) { 697 | case E_REMOVE: 698 | tempNodeLabels[movement.m_NodeId] = nextLabelThread; 699 | removeMovements++; 700 | nextLabelThread++; 701 | break; 702 | case E_REMOVE_AND_INSERT: 703 | tempNodeLabels[movement.m_NodeId] = movement.m_Community; 704 | if (partition->m_Communities[partition->m_CommunityIndices[partition->m_NodeLabels[movement.m_NodeId]]] == 1) { 705 | insertMovements++; 706 | } else { 707 | removeAndInsertMovements++; 708 | } 709 | break; 710 | } 711 | } 712 | } 713 | } 714 | delete [] movements; 715 | printf(" Number of removes performed: %d\n", removeMovements); 716 | printf(" Number of remove and insert performed: %d\n", removeAndInsertMovements); 717 | printf(" Number of insert performed: %d\n", insertMovements); 718 | FreeResources(partition); 719 | 720 | if (InitializeFromLabelsArray(graph, partition, tempNodeLabels, alfa)) { 721 | printf("Error initializing from label array.\n"); 722 | return 1; 723 | } 724 | delete [] tempNodeLabels; 725 | 726 | return 0; 727 | } 728 | 729 | /* Movement PerformCommunityCentricRefinement( const CGraph* graph, const CommunityPartition partition*, const double64_t alfa, uint32_t* communityId, uint32_t size ) { 730 | Movement bestMovement; 731 | bestMovement.m_Type = E_NO_MOVEMENT; 732 | for( int i = 0; i < size; ++i ) { 733 | uint32_t node = community[i]; 734 | const uint32_t* adjacencies = graph->GetNeighbors(node); 735 | const uint32_t degree = graph->GetDegree(node); 736 | for(uint32_t j = 0; j < degree; ++j ) { 737 | uint32_t neighbor = adjacencies[j]; 738 | if(partition->m_Communities[partition->m_CommunityIndices[partition->m_NodeLabels[neighbor]]] == 1 ) { 739 | 740 | } 741 | } 742 | } 743 | } 744 | */ 745 | 746 | 747 | 748 | /** @brief Measures the memory consumption of a partition. 749 | @param partition The partition to measure. 750 | @return The size in bytes of the structure.*/ 751 | static uint64_t MeasureMemoryConsumption(const CommunityPartition* partition) { 752 | uint64_t memoryConsumption = 0; 753 | memoryConsumption += sizeof (uint32_t) * partition->m_NumNodes; //Labels array consumption. 754 | memoryConsumption += sizeof (uint32_t) * partition->m_NumCommunities; //Community indices consumption. 755 | memoryConsumption += sizeof (uint32_t) *(partition->m_NumCommunities + partition->m_NumNodes); //Communities consumption. 756 | memoryConsumption += sizeof (uint32_t) * partition->m_NumCommunities; //Internal edges consumption. 757 | memoryConsumption += sizeof (uint32_t) * partition->m_NumCommunities; //External edges consumption. 758 | memoryConsumption += sizeof (double64_t)* partition->m_NumNodes; //WCCs consumption. 759 | memoryConsumption += sizeof (uint32_t); //NumNodes consumption. 760 | memoryConsumption += sizeof (uint32_t); //NumCommunities consumption. 761 | memoryConsumption += sizeof (double64_t); //WCC consumption. 762 | return memoryConsumption; 763 | } 764 | 765 | uint32_t LoadPartition( const CGraph* graph, CommunityPartition* partition, const char_t* partitionFileName, const double64_t alfa ) { 766 | 767 | std::map oldToNew; 768 | const uint32_t* newToOld = graph->GetMap(); 769 | for( uint32_t i = 0; i < graph->GetNumNodes(); ++i ) { 770 | oldToNew.insert(std::pair(newToOld[i],i)); 771 | } 772 | 773 | uint32_t* communities = new uint32_t[graph->GetNumNodes()]; 774 | memset(communities,0xff,sizeof(uint32_t)*graph->GetNumNodes()); 775 | if (!communities) { 776 | printf("Unable to allocate partition\n"); 777 | return 1; 778 | } 779 | 780 | std::ifstream partitionFile; 781 | partitionFile.open(partitionFileName); 782 | if(!partitionFile.is_open()) { 783 | printf("Unable to load partition file.\n"); 784 | return 1; 785 | } 786 | std::string line; 787 | uint32_t nextLabel = 0; 788 | while(std::getline(partitionFile,line)) { 789 | std::istringstream stream(line); 790 | uint32_t node; 791 | while( stream >> node ) { 792 | communities[oldToNew[node]] = nextLabel; 793 | } 794 | ++nextLabel; 795 | } 796 | for( uint32_t i=0; iGetNumNodes(); ++i) { 797 | if( communities[i] == 0xffffffff ) { 798 | communities[i] = nextLabel++; 799 | } 800 | } 801 | partitionFile.close(); 802 | InitializeFromLabelsArray(graph,partition,communities, alfa); 803 | delete [] communities; 804 | return 0; 805 | } 806 | 807 | uint32_t InitializeSimplePartition(const CGraph* graph, CommunityPartition* partition, const double64_t alfa) { 808 | //Computing the clustering coefficient of each node of the graph. 809 | NodeClustering* nC = new NodeClustering[graph->GetNumNodes()]; 810 | if (!nC) { 811 | printf("Error allocating node clustering array."); 812 | return 1; 813 | } 814 | for (uint32_t i = 0; i < graph->GetNumNodes(); i++) { 815 | nC[i].m_NodeId = i; 816 | nC[i].m_Degree = graph->GetDegree(i); 817 | nC[i].m_CC = graph->GetTotalTriangles(i) / (double64_t) (nC[i].m_Degree * (nC[i].m_Degree - 1)); 818 | } 819 | qsort(nC, graph->GetNumNodes(), sizeof (NodeClustering), Compare_NodeClusterings); 820 | //Creating a vector to track which nodes have already been visited. 821 | bool * visited = new bool[graph->GetNumNodes()]; 822 | if (!visited) { 823 | printf("Error allocating visited array."); 824 | return 1; 825 | } 826 | 827 | memset(visited, false, graph->GetNumNodes() ); 828 | 829 | uint32_t* communities = new uint32_t[graph->GetNumNodes()]; 830 | if (!communities) { 831 | printf("Error allocating communities array.\n"); 832 | return 1; 833 | } 834 | 835 | uint32_t nextLabel = 0; 836 | for (uint32_t i = 0; i < graph->GetNumNodes(); i++) { 837 | NodeClustering* nodeClustering = &nC[i]; 838 | // printf("%f\n", nodeClustering->m_CC); 839 | if (!visited[nodeClustering->m_NodeId]) { 840 | visited[nodeClustering->m_NodeId] = true; 841 | communities[nodeClustering->m_NodeId] = nextLabel; 842 | const uint32_t* adjacencies1 = graph->GetNeighbors(nodeClustering->m_NodeId); 843 | uint32_t degree = graph->GetDegree(nodeClustering->m_NodeId); 844 | 845 | for (uint32_t j = 0; j < degree; j++) { 846 | if (!visited[adjacencies1[j]]) { 847 | visited[adjacencies1[j]] = true; 848 | communities[adjacencies1[j]] = nextLabel; 849 | } 850 | } 851 | nextLabel++; 852 | } 853 | } 854 | delete [] visited; 855 | delete [] nC; 856 | 857 | InitializeFromLabelsArray(graph, partition, communities, alfa); 858 | delete [] communities; 859 | return 0; 860 | } 861 | 862 | uint32_t CopyPartition(CommunityPartition* destPartition, const CommunityPartition* sourcePartition) { 863 | destPartition->m_NodeLabels = new uint32_t[sourcePartition->m_NumNodes]; 864 | if (!destPartition->m_NodeLabels) { 865 | printf("Error while allocating node labels.\n"); 866 | return 1; 867 | } 868 | 869 | destPartition->m_CommunityIndices = new uint32_t[sourcePartition->m_NumCommunities]; 870 | if (!destPartition->m_CommunityIndices) { 871 | printf("Error while allocating community indices.\n"); 872 | return 1; 873 | } 874 | 875 | destPartition->m_Communities = new uint32_t[sourcePartition->m_NumCommunities + sourcePartition->m_NumNodes]; 876 | if (!destPartition->m_Communities) { 877 | printf("Error while allocating communities.\n"); 878 | return 1; 879 | } 880 | 881 | destPartition->m_InternalEdges = new uint32_t[sourcePartition->m_NumCommunities]; 882 | if (!destPartition->m_InternalEdges) { 883 | printf("Error while allocating InternalEdges.\n"); 884 | return 1; 885 | } 886 | 887 | destPartition->m_ExternalEdges = new uint32_t[sourcePartition->m_NumCommunities]; 888 | if (!destPartition->m_ExternalEdges) { 889 | printf("Error while allocating InternalEdges.\n"); 890 | return 1; 891 | } 892 | 893 | destPartition->m_NodeWCC = new double64_t[sourcePartition->m_NumNodes]; 894 | if (!destPartition->m_NodeWCC) { 895 | printf("Error while allocating WCCs.\n"); 896 | return 1; 897 | } 898 | 899 | memcpy(destPartition->m_NodeLabels, sourcePartition->m_NodeLabels, sizeof (uint32_t) * sourcePartition->m_NumNodes); 900 | memcpy(destPartition->m_CommunityIndices, sourcePartition->m_CommunityIndices, sizeof (uint32_t)*(sourcePartition->m_NumCommunities)); 901 | memcpy(destPartition->m_Communities, sourcePartition->m_Communities, sizeof (uint32_t)*(sourcePartition->m_NumCommunities + sourcePartition->m_NumNodes)); 902 | memcpy(destPartition->m_InternalEdges, sourcePartition->m_InternalEdges, sizeof (uint32_t)*(sourcePartition->m_NumCommunities)); 903 | memcpy(destPartition->m_ExternalEdges, sourcePartition->m_ExternalEdges, sizeof (uint32_t)*(sourcePartition->m_NumCommunities)); 904 | memcpy(destPartition->m_NodeWCC, sourcePartition->m_NodeWCC, sizeof (double64_t)*(sourcePartition->m_NumNodes)); 905 | destPartition->m_NumNodes = sourcePartition->m_NumNodes; 906 | destPartition->m_NumCommunities = sourcePartition->m_NumCommunities; 907 | destPartition->m_WCC = sourcePartition->m_WCC; 908 | return 0; 909 | } 910 | 911 | 912 | uint32_t PrintPartition(const CGraph* graph, const CommunityPartition* partition, const char_t* fileName) { 913 | 914 | std::ofstream outFile; 915 | outFile.open(fileName); 916 | 917 | for (uint32_t i = 0; i < partition->m_NumCommunities; i++) { 918 | if (partition->m_CommunityIndices[i] != SCD_INVALID_COMMUNITY) { 919 | uint32_t* community = &partition->m_Communities[partition->m_CommunityIndices[i] + 1]; 920 | uint32_t communitySize = partition->m_Communities[partition->m_CommunityIndices[i]]; 921 | for (uint32_t j = 0; j < communitySize - 1; j++) { 922 | outFile << graph->ReMap(community[j]) << " "; 923 | } 924 | outFile << graph->ReMap(community[communitySize - 1]) << std::endl; 925 | } 926 | } 927 | outFile.close(); 928 | return 0; 929 | } 930 | 931 | 932 | void FreeResources(CommunityPartition* partition) { 933 | if (partition->m_NodeLabels != NULL) { 934 | delete [] partition->m_NodeLabels; 935 | partition->m_NodeLabels = NULL; 936 | } 937 | 938 | if (partition->m_CommunityIndices != NULL) { 939 | delete [] partition->m_CommunityIndices; 940 | partition->m_CommunityIndices = NULL; 941 | } 942 | 943 | if (partition->m_Communities != NULL) { 944 | delete [] partition->m_Communities; 945 | partition->m_Communities = NULL; 946 | } 947 | 948 | if (partition->m_InternalEdges != NULL) { 949 | delete [] partition->m_InternalEdges; 950 | partition->m_InternalEdges = NULL; 951 | } 952 | 953 | if (partition->m_ExternalEdges != NULL) { 954 | delete [] partition->m_ExternalEdges; 955 | partition->m_ExternalEdges = NULL; 956 | } 957 | 958 | if (partition->m_NodeWCC != NULL) { 959 | delete [] partition->m_NodeWCC; 960 | partition->m_NodeWCC = NULL; 961 | } 962 | partition->m_NumCommunities = 0; 963 | partition->m_NumNodes = 0; 964 | partition->m_WCC = 0.0; 965 | } 966 | 967 | 968 | uint32_t ImproveCommunities(const CGraph* graph, CommunityPartition* partition, uint32_t numThreads, uint32_t lookahead, const double64_t alfa ) { 969 | num_threads = numThreads; 970 | omp_set_num_threads(num_threads); 971 | printf("Maximum number of threads: %d\n", omp_get_max_threads()); 972 | printf("Starting improvement from a partition with WCC: %f\n", partition->m_WCC / graph->GetNumNodes()); 973 | CommunityPartition bestPartition; 974 | CopyPartition(&bestPartition, partition); 975 | 976 | uint32_t remainingTries = lookahead; 977 | bool improve = true; 978 | while(improve) { 979 | while (improve) { 980 | printf("\n"); 981 | uint64_t initTime = StartClock(); 982 | improve = false; 983 | printf("Starting improvement iteration ...\n"); 984 | if (PerformImprovementStep(graph, partition, alfa)) { 985 | printf("Error while performing an improvement step.\n"); 986 | return 1; 987 | } 988 | 989 | printf("New WCC: %f\n", partition->m_WCC / graph->GetNumNodes()); 990 | printf("Best WCC: %f\n", bestPartition.m_WCC / graph->GetNumNodes()); 991 | printf("Memory required by this iteration: %lu bytes \n", MeasureMemoryConsumption(partition) + MeasureMemoryConsumption(&bestPartition)); 992 | 993 | if (partition->m_WCC - bestPartition.m_WCC > 0.0f) { 994 | if (((partition->m_WCC - bestPartition.m_WCC) / bestPartition.m_WCC) > 0.01f) { 995 | remainingTries = lookahead; 996 | } 997 | FreeResources(&bestPartition); 998 | CopyPartition(&bestPartition, partition); 999 | } 1000 | 1001 | 1002 | printf("Iteration time: %lu ms\n", StopClock(initTime)); 1003 | if(remainingTries > 0) { 1004 | remainingTries--; 1005 | improve = true; 1006 | } 1007 | } 1008 | /*EXPERIMENTAL*/ 1009 | /* printf("Trying to merge communities\n"); 1010 | MergeCommunities(graph, partition, alfa); 1011 | printf("Merge: New WCC: %f\n", partition->m_WCC / graph->GetNumNodes()); 1012 | printf("Merge: Best WCC: %f\n", bestPartition.m_WCC / graph->GetNumNodes()); 1013 | if( partition->m_WCC - bestPartition.m_WCC > 0.0f ) { 1014 | printf("Merging communities improved the partition\n"); 1015 | if (((partition->m_WCC - bestPartition.m_WCC) / bestPartition.m_WCC) > 0.01f) { 1016 | remainingTries = lookahead; 1017 | } 1018 | FreeResources(&bestPartition); 1019 | CopyPartition(&bestPartition, partition); 1020 | improve = true; 1021 | } 1022 | */ 1023 | } 1024 | 1025 | FreeResources(partition); 1026 | CopyPartition(partition, &bestPartition); 1027 | FreeResources(&bestPartition); 1028 | return 0; 1029 | } 1030 | } 1031 | --------------------------------------------------------------------------------