├── Makefile ├── ap.h ├── main.cpp ├── README └── ap.cpp /Makefile: -------------------------------------------------------------------------------- 1 | ap: ap.cpp ap.h main.cpp 2 | g++ -O2 -Wall -o ap ap.cpp main.cpp 3 | -------------------------------------------------------------------------------- /ap.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | #include 3 | 4 | std::vector affinityPropagation( 5 | FILE* input, 6 | int prefType = 1, 7 | double damping = 0.9, 8 | int maxit = 1000, 9 | int convit = 50 10 | ); 11 | -------------------------------------------------------------------------------- /main.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include "ap.h" 5 | using namespace std; 6 | 7 | // test 8 | int main(int argc, char** argv) 9 | { 10 | int prefType = 1; 11 | if (argc >= 2) { 12 | prefType = atoi(argv[1]); 13 | } 14 | vector examplar = affinityPropagation(stdin, prefType); 15 | for (size_t i = 0; i < examplar.size(); ++i) { 16 | printf("%d ", examplar[i]); 17 | } 18 | puts(""); 19 | return 0; 20 | } 21 | -------------------------------------------------------------------------------- /README: -------------------------------------------------------------------------------- 1 | ============= 2 | DESCRIPTION 3 | ============= 4 | 5 | This program is an implementation of affinity propagation, a clustering 6 | algorithm by passing messages between data points. 7 | This program is optimized for sparse similarites, where there are O(n) finite 8 | similarites and others are negative infinity similarites. 9 | You can find the details about affinity propagation at the following URL: 10 | http://www.psi.toronto.edu/affinitypropagation/ 11 | 12 | ============== 13 | MAKE AND RUN 14 | ============== 15 | 16 | First, prepare a similarity file like: 17 | 18 | 272 19 | 0 4 -0.866169620986 20 | 8 15 -5.54097758238 21 | 3 221 -0.391640594901 22 | 10 0 -5.89035174724 23 | 77 102 -0.205104158914 24 | 135 248 -1.00319562134 25 | 98 40 -0.992369707296 26 | 68 132 -1.97837136257 27 | ... 28 | 29 | Input specification is following: 30 | First line contains an integer standing for the size of the matrix. 31 | Following lines each contain two integers and a real number standing for 32 | the row index i, the column index j and the similarity s(i,j) respectively. 33 | Note that i and j are 0-origin. 34 | Input ends with an end-of-file. 35 | 36 | Then, make and run. (sim.txt is a similarity file mentioned above) 37 | 38 | $ make 39 | $ ./ap < sim.txt 40 | 41 | ========= 42 | LICENSE 43 | ========= 44 | 45 | The MIT License 46 | 47 | Copyright (c) 2010 NOJIMA Yusuke 48 | 49 | Permission is hereby granted, free of charge, to any person obtaining a copy 50 | of this software and associated documentation files (the "Software"), to deal 51 | in the Software without restriction, including without limitation the rights 52 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 53 | copies of the Software, and to permit persons to whom the Software is 54 | furnished to do so, subject to the following conditions: 55 | 56 | The above copyright notice and this permission notice shall be included in 57 | all copies or substantial portions of the Software. 58 | 59 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 60 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 61 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 62 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 63 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 64 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 65 | THE SOFTWARE. 66 | 67 | -------------------------------------------------------------------------------- /ap.cpp: -------------------------------------------------------------------------------- 1 | // An Implementation of Affinity Propergation 2 | // See: Clustering by Passing Messages Between Data Points 3 | 4 | #include 5 | #include 6 | #include 7 | #include 8 | #include 9 | #include 10 | #include "ap.h" 11 | using namespace std; 12 | 13 | namespace { 14 | struct Edge { 15 | int src; // index of source 16 | int dst; // index of destination 17 | double s; // similarity s(src, dst) 18 | double r; // responsibility r(src, dst) 19 | double a; // availability a(src, dst) 20 | 21 | Edge(int src, int dst, double s): src(src), dst(dst), s(s), r(0), a(0) {} 22 | bool operator<(const Edge& rhs) const { return s < rhs.s; } 23 | }; 24 | 25 | typedef vector Edges; 26 | 27 | struct Graph { 28 | int n; // the number of vertices 29 | Edges* outEdges; // array of out edges of corresponding vertices 30 | Edges* inEdges; // array of in edges of corresponding vertices 31 | vector edges; // all edges 32 | }; 33 | 34 | // Build graph from sparse similarity matrix stored in COO format. 35 | // Input specification is following: 36 | // First line contains an integer standing for the size of the matrix. 37 | // Following lines each contain two integers and a real number standing for 38 | // the row index i, the column index j and the similarity s(i,j) respectively. 39 | // Input ends with an end-of-file. 40 | // Note that this function does not check any errors in the given input. 41 | // Parameter: 42 | // input: Input file handle. 43 | // prefType: 44 | // 1: use median of similarities as preference 45 | // 2: use minimum of similarities as preference 46 | // 3: use min - (max - min) of similarities as preference 47 | Graph* buildGraph(FILE* input, int prefType) 48 | { 49 | Graph* graph = new Graph; 50 | fscanf(input, "%d", &graph->n); 51 | graph->outEdges = new Edges[graph->n]; 52 | graph->inEdges = new Edges[graph->n]; 53 | vector& edges = graph->edges; 54 | 55 | // read similarity matrix 56 | int i, j; 57 | double s; 58 | while (fscanf(input, "%d%d%lf", &i, &j, &s) != EOF) { 59 | if (i == j) { continue; } 60 | edges.push_back(Edge(i, j, s)); 61 | } 62 | 63 | // calculate preferences 64 | double pref; 65 | if (prefType == 1) { 66 | sort(edges.begin(), edges.end()); 67 | int m = edges.size(); 68 | pref = (m % 2) ? edges[m/2].s : (edges[m/2 - 1].s + edges[m/2].s) / 2.0; 69 | } else if (prefType == 2) { 70 | pref = min_element(edges.begin(), edges.end())->s; 71 | } else if (prefType == 3) { 72 | double minValue = min_element(edges.begin(), edges.end())->s; 73 | double maxValue = max_element(edges.begin(), edges.end())->s; 74 | pref = 2*minValue - maxValue; 75 | } else { 76 | assert(false); // invalid prefType 77 | } 78 | for (int i = 0; i < graph->n; ++i) { 79 | edges.push_back(Edge(i, i, pref)); 80 | } 81 | 82 | for (size_t i = 0; i < edges.size(); ++i) { 83 | Edge* p = &edges[i]; 84 | // add small noise to avoid degeneracies 85 | p->s += (1e-16 * p->s + 1e-300) * (rand() / (RAND_MAX + 1.0)); 86 | // add out/in edges to vertices 87 | graph->outEdges[p->src].push_back(p); 88 | graph->inEdges[p->dst].push_back(p); 89 | } 90 | 91 | return graph; 92 | } 93 | 94 | void destroyGraph(Graph* graph) 95 | { 96 | delete [] graph->outEdges; 97 | delete [] graph->inEdges; 98 | delete graph; 99 | } 100 | 101 | inline void update(double& variable, double newValue, double damping) 102 | { 103 | variable = damping * variable + (1.0 - damping) * newValue; 104 | } 105 | 106 | void updateResponsibilities(Graph* graph, double damping) 107 | { 108 | for (int i = 0; i < graph->n; ++i) { 109 | Edges& edges = graph->outEdges[i]; 110 | int m = edges.size(); 111 | double max1 = -HUGE_VAL, max2 = -HUGE_VAL; 112 | double argmax1 = -1; 113 | for (int k = 0; k < m; ++k) { 114 | double value = edges[k]->s + edges[k]->a; 115 | if (value > max1) { swap(max1, value); argmax1 = k; } 116 | if (value > max2) { max2 = value; } 117 | } 118 | // update responsibilities 119 | for (int k = 0; k < m; ++k) { 120 | if (k != argmax1) { 121 | update(edges[k]->r, edges[k]->s - max1, damping); 122 | } else { 123 | update(edges[k]->r, edges[k]->s - max2, damping); 124 | } 125 | } 126 | } 127 | } 128 | 129 | void updateAvailabilities(Graph* graph, double damping) 130 | { 131 | for (int k = 0; k < graph->n; ++k) { 132 | Edges& edges = graph->inEdges[k]; 133 | int m = edges.size(); 134 | // calculate sum of positive responsibilities 135 | double sum = 0.0; 136 | for (int i = 0; i < m-1; ++i) { 137 | sum += max(0.0, edges[i]->r); 138 | } 139 | // calculate availabilities 140 | double rkk = edges[m-1]->r; 141 | for (int i = 0; i < m-1; ++i) { 142 | update(edges[i]->a, min(0.0, rkk + sum - max(0.0, edges[i]->r)), damping); 143 | } 144 | // calculate self-availability 145 | update(edges[m-1]->a, sum, damping); 146 | } 147 | } 148 | 149 | bool updateExamplars(Graph* graph, vector& examplar) 150 | { 151 | bool changed = false; 152 | for (int i = 0; i < graph->n; ++i) { 153 | Edges& edges = graph->outEdges[i]; 154 | int m = edges.size(); 155 | double maxValue = -HUGE_VAL; 156 | int argmax = i; 157 | for (int k = 0; k < m; ++k) { 158 | double value = edges[k]->a + edges[k]->r; 159 | if (value > maxValue) { 160 | maxValue = value; 161 | argmax = edges[k]->dst; 162 | } 163 | } 164 | if (examplar[i] != argmax) { 165 | examplar[i] = argmax; 166 | changed = true; 167 | } 168 | } 169 | return changed; 170 | } 171 | } 172 | 173 | // Cluster data points with Affinity Propagation. 174 | // Parameters: 175 | // input: Input file which contains sparse similarity matrix. see buildGraph(). 176 | // prefType: Specify what kind of preference we use. see buildGraph(). 177 | // damping: The damping factor. (0.5 <= damping < 1.0) 178 | // maxit: The maximum number of iterations. 179 | // convit: Specify how many iterations this algorithm stops when examplars 180 | // did not change for. 181 | // Returns: 182 | // Array of examplars of corresponding data points. 183 | vector affinityPropagation(FILE* input, int prefType, double damping, int maxit, int convit) 184 | { 185 | assert(0.499 < damping && damping < 1.0); 186 | 187 | Graph* graph = buildGraph(input, prefType); 188 | vector examplar(graph->n, -1); 189 | 190 | for (int i = 0, nochange = 0; i < maxit && nochange < convit; ++i, ++nochange) { 191 | updateResponsibilities(graph, damping); 192 | updateAvailabilities(graph, damping); 193 | if (updateExamplars(graph, examplar)) { nochange = 0; } 194 | } 195 | 196 | destroyGraph(graph); 197 | return examplar; 198 | } 199 | --------------------------------------------------------------------------------