├── img.png ├── e-soinn.pdf ├── README.md ├── main.cpp ├── CMakeLists.txt ├── ESOINN.h └── ESOINN.cpp /img.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bes-dev/ESOINN/HEAD/img.png -------------------------------------------------------------------------------- /e-soinn.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bes-dev/ESOINN/HEAD/e-soinn.pdf -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | ESOINN 2 | ===== 3 | 4 | An enhanced self-organizing incremental neural network for online unsupervised learning 5 | 6 | author: Sergei Belousov aka BeS 7 | 8 | implementation ESOINN with C++ and Boost 9 | 10 | original paper: "An enhanced self-organizing incremental neural network for online unsupervised learning" 11 | Shen Furaoa, Tomotaka Ogurab, Osamu Hasegawab, 2007 -------------------------------------------------------------------------------- /main.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | #include 6 | #include "ESOINN.h" 7 | #include 8 | 9 | int main() { 10 | soinn::ESOINN model(4, 4, 30); 11 | std::ifstream ifs; 12 | ifs.open("input.txt"); 13 | ifs.open("iris.txt"); 14 | boost::numeric::ublas::vector input(4); 15 | int num; 16 | int tmp; 17 | for(int t = 0; t < 150; t++) { 18 | for(int i = 0; i < 4; i++) { 19 | ifs>>tmp; 20 | input(i) = double(tmp); 21 | } 22 | model.process(input); 23 | std::cout< 9 | #include 10 | #include 11 | #include 12 | #include 13 | #include 14 | 15 | namespace soinn { 16 | 17 | struct VertexProperties 18 | { 19 | boost::numeric::ublas::vector weight; 20 | int classId; 21 | double density; 22 | int numberOfSignals; 23 | double S; 24 | friend class boost::serialization::access; 25 | template void serialize(Archive & ar, const unsigned int version) 26 | { 27 | ar & BOOST_SERIALIZATION_NVP(weight); 28 | ar & BOOST_SERIALIZATION_NVP(classId); 29 | ar & BOOST_SERIALIZATION_NVP(density); 30 | ar & BOOST_SERIALIZATION_NVP(numberOfSignals); 31 | ar & BOOST_SERIALIZATION_NVP(S); 32 | } 33 | }; 34 | 35 | struct EdgeProperties 36 | { 37 | int age; 38 | friend class boost::serialization::access; 39 | template void serialize(Archive & ar, const unsigned int version) 40 | { 41 | ar & BOOST_SERIALIZATION_NVP(age); 42 | } 43 | }; 44 | 45 | typedef boost::adjacency_list, EdgeProperties> Graph; 46 | typedef boost::graph_traits::vertex_descriptor Vertex; 47 | typedef boost::graph_traits::edge_descriptor Edge; 48 | typedef boost::graph_traits::vertex_iterator VertexIterator; 49 | typedef boost::graph_traits::edge_iterator EdgeIterator; 50 | typedef boost::graph_traits::out_edge_iterator OutEdgeIterator; 51 | typedef boost::graph_traits::adjacency_iterator AdjacencyIterator; 52 | typedef boost::graph_traits::vertices_size_type VerticesSizeType; 53 | typedef std::map ComponentMap; 54 | 55 | class ESOINNException 56 | { 57 | public: 58 | ESOINNException(std::string message):message(message) {} 59 | ~ESOINNException(){} 60 | inline std::string getMessage() {return message;} 61 | private: 62 | std::string message; 63 | }; 64 | 65 | class ESOINN 66 | { 67 | public: 68 | ESOINN(int dim = 2, int ageMax = 30, int iterationThreshold = 50, double c1 = 0.001, double c2 = 1.0); 69 | ~ESOINN(); 70 | void setParams(int dim = 2, int ageMax = 30, int iterationThreshold = 50, double c1 = 0.001, double c2 = 1.0); 71 | void process(const boost::numeric::ublas::vector& inputSignal); 72 | void classify(); 73 | Graph getGraph(); 74 | int getNumberOfClasses(); 75 | int getNumberOfVertices(); 76 | boost::numeric::ublas::vector getCenterOfCluster(int classId); 77 | VertexProperties getBestMatch(boost::numeric::ublas::vector& inputSignal); 78 | void save(std::string filename); 79 | void load(std::string filename); 80 | void clear(); 81 | 82 | private: 83 | int dim; 84 | Graph graph; 85 | int ageMax; 86 | int iterationCount; 87 | int iterationThreshold; 88 | int numberOfClasses; 89 | double c1, c2; 90 | 91 | void addSignal(const boost::numeric::ublas::vector& inputSignal); 92 | std::pair findWinners(const boost::numeric::ublas::vector& inputSignal); 93 | bool isWithinThreshold(const boost::numeric::ublas::vector& inputSignal, Vertex& firstWinner, Vertex& secondWinner); 94 | double getSimilarityThreshold(const Vertex& vertex); 95 | void incrementEdgesAge(Vertex& vertex); 96 | bool needAddEdge(Vertex& firstWinner, Vertex& secondWinner); 97 | bool needMergeClasses(Vertex &a, Vertex &b); 98 | void mergeClasses(int A, int B); 99 | double meanDensity(int classId); 100 | double maxDensity(int classId); 101 | double densityThershold(double mean, double max); 102 | double meanDistance(Vertex& vertex); 103 | void updateDensity(Vertex& vertex); 104 | void updateWeights(Vertex& firstWinner, const boost::numeric::ublas::vector &inputSignal); 105 | void deleteOldEdges(); 106 | void updateClassLabels(); 107 | void markClasses(); 108 | void partitionClasses(); 109 | void markAdjacentVertices(Vertex &vertex, int cID); 110 | void deleteNoiseVertex(); 111 | double distance(const boost::numeric::ublas::vector &x, const boost::numeric::ublas::vector &y); 112 | 113 | private: 114 | friend class boost::serialization::access; 115 | template void serialize(Archive & ar, const unsigned int version) { 116 | ar & BOOST_SERIALIZATION_NVP(dim); 117 | ar & BOOST_SERIALIZATION_NVP(ageMax); 118 | ar & BOOST_SERIALIZATION_NVP(iterationCount); 119 | ar & BOOST_SERIALIZATION_NVP(iterationThreshold); 120 | ar & BOOST_SERIALIZATION_NVP(numberOfClasses); 121 | ar & BOOST_SERIALIZATION_NVP(c1); 122 | ar & BOOST_SERIALIZATION_NVP(c2); 123 | ar & BOOST_SERIALIZATION_NVP(graph); 124 | } 125 | }; 126 | } 127 | 128 | #endif // ESOINN_H 129 | -------------------------------------------------------------------------------- /ESOINN.cpp: -------------------------------------------------------------------------------- 1 | /* 2 | * author: Sergei Belousov aka BeS 3 | * email: belbes122@yandex.ru 4 | */ 5 | #include "ESOINN.h" 6 | 7 | #include 8 | #include 9 | #include 10 | #include 11 | #include 12 | #include 13 | 14 | #define E1(t) 1./t 15 | #define E2(t) 1./(100*t) 16 | 17 | using namespace soinn; 18 | using namespace boost::numeric; 19 | 20 | ESOINN::ESOINN(int dim, int ageMax, int iterationThreshold, double c1, double c2): 21 | dim(dim), 22 | ageMax(ageMax), 23 | iterationThreshold(iterationThreshold), 24 | c1(c1), 25 | c2(c2) 26 | { 27 | } 28 | 29 | ESOINN::~ESOINN() 30 | { 31 | } 32 | 33 | Graph ESOINN::getGraph() 34 | { 35 | return graph; 36 | } 37 | 38 | void ESOINN::process(const boost::numeric::ublas::vector &inputSignal) 39 | { 40 | if(inputSignal.size() != dim) 41 | { 42 | throw ESOINNException(std::string("Incorrect dimension of input signal in ESOINN::addSignal().")); 43 | } 44 | else 45 | { 46 | addSignal(inputSignal); 47 | } 48 | } 49 | 50 | void ESOINN::addSignal(const boost::numeric::ublas::vector &inputSignal) 51 | { 52 | if(boost::num_vertices(graph) < 2) 53 | { 54 | Vertex vertex = boost::add_vertex(graph); 55 | graph[vertex].weight = ublas::vector(inputSignal); 56 | graph[vertex].classId = -1; 57 | graph[vertex].density = 0.; 58 | graph[vertex].numberOfSignals = 0; 59 | graph[vertex].S = 0; 60 | return; 61 | } 62 | Vertex firstWinner, secondWinner; 63 | boost::tie(firstWinner, secondWinner) = findWinners(inputSignal); 64 | if(!isWithinThreshold(inputSignal, firstWinner, secondWinner)) 65 | { 66 | Vertex vertex = boost::add_vertex(graph); 67 | graph[vertex].weight = ublas::vector(inputSignal); 68 | graph[vertex].classId = -1; 69 | graph[vertex].density = 0.; 70 | graph[vertex].numberOfSignals = 0; 71 | graph[vertex].S = 0; 72 | return; 73 | } 74 | incrementEdgesAge(firstWinner); 75 | if(needAddEdge(firstWinner, secondWinner)) 76 | { 77 | Edge e = boost::add_edge(firstWinner, secondWinner, graph).first; 78 | graph[e].age = 0; 79 | } 80 | else 81 | { 82 | boost::remove_edge(firstWinner, secondWinner, graph); 83 | } 84 | updateDensity(firstWinner); 85 | updateWeights(firstWinner, inputSignal); 86 | deleteOldEdges(); 87 | if(iterationCount % iterationThreshold == 0) 88 | { 89 | updateClassLabels(); 90 | } 91 | iterationCount++; 92 | } 93 | 94 | double ESOINN::distance(const boost::numeric::ublas::vector &x, const boost::numeric::ublas::vector &y) 95 | { 96 | return ublas::norm_2( x - y ); 97 | } 98 | 99 | std::pair ESOINN::findWinners(const boost::numeric::ublas::vector &inputSignal) 100 | { 101 | Vertex firstWinner = NULL; 102 | Vertex secondWinner = NULL; 103 | double firstWinnerDistance = std::numeric_limits::max(); 104 | double secondWinnerDistance = std::numeric_limits::max(); 105 | VertexIterator current, end; 106 | boost::tie(current, end) = boost::vertices(graph); 107 | for(; current != end; current++) 108 | { 109 | double dist = distance(inputSignal, graph[*current].weight); 110 | if(dist < firstWinnerDistance) 111 | { 112 | secondWinner = firstWinner; 113 | secondWinnerDistance = firstWinnerDistance; 114 | firstWinner = *current; 115 | firstWinnerDistance = dist; 116 | } 117 | else if(dist < secondWinnerDistance) 118 | { 119 | secondWinner = *current; 120 | secondWinnerDistance = dist; 121 | } 122 | } 123 | return std::pair(firstWinner, secondWinner); 124 | } 125 | 126 | bool ESOINN::isWithinThreshold(const boost::numeric::ublas::vector& inputSignal, Vertex& firstWinner, Vertex& secondWinner) 127 | { 128 | if(distance(inputSignal, graph[firstWinner].weight) > getSimilarityThreshold(firstWinner)) 129 | { 130 | return false; 131 | } 132 | if(distance(inputSignal, graph[secondWinner].weight) > getSimilarityThreshold(secondWinner)) 133 | { 134 | return false; 135 | } 136 | return true; 137 | } 138 | 139 | double ESOINN::getSimilarityThreshold(const Vertex& vertex) 140 | { 141 | double dist = 0.0; 142 | if(!boost::out_degree(vertex, graph)) 143 | { 144 | dist = std::numeric_limits::max(); 145 | VertexIterator current, end; 146 | boost::tie(current, end) = boost::vertices(graph); 147 | for(; current != end; current++) 148 | { 149 | if(*current != vertex) 150 | { 151 | double distCurrent = distance(graph[vertex].weight, graph[*current].weight); 152 | if(distCurrent < dist) 153 | { 154 | dist = distCurrent; 155 | } 156 | } 157 | } 158 | } 159 | else 160 | { 161 | dist = std::numeric_limits::min(); 162 | AdjacencyIterator current, end; 163 | boost::tie(current, end) = boost::adjacent_vertices(vertex, graph); 164 | for(; current != end; current++) 165 | { 166 | double distCurrent = distance(graph[vertex].weight, graph[*current].weight); 167 | if(distCurrent > dist) 168 | { 169 | dist = distCurrent; 170 | } 171 | } 172 | } 173 | return dist; 174 | } 175 | 176 | void ESOINN::incrementEdgesAge(Vertex& vertex) 177 | { 178 | OutEdgeIterator current, end; 179 | boost::tie(current, end) = boost::out_edges(vertex, graph); 180 | for(; current != end; current++) 181 | { 182 | graph[*current].age++; 183 | } 184 | } 185 | 186 | bool ESOINN::needAddEdge(Vertex& firstWinner, Vertex &secondWinner) 187 | { 188 | if(graph[firstWinner].classId == -1 || graph[secondWinner].classId == -1) 189 | { 190 | return true; 191 | } 192 | else if(graph[firstWinner].classId == graph[secondWinner].classId) 193 | { 194 | return true; 195 | } 196 | else if(graph[firstWinner].classId != graph[secondWinner].classId && needMergeClasses(firstWinner, secondWinner)) 197 | { 198 | return true; 199 | } 200 | return false; 201 | } 202 | 203 | bool ESOINN::needMergeClasses(Vertex &a, Vertex &b) 204 | { 205 | int A = graph[a].classId; 206 | double meanA = meanDensity(A); 207 | double maxA = maxDensity(A); 208 | double thresholdA = densityThershold(meanA, maxA); 209 | int B = graph[b].classId; 210 | double meanB = meanDensity(B); 211 | double maxB = maxDensity(B); 212 | double thresholdB = densityThershold(meanB, maxB); 213 | double minAB = std::min(graph[a].density, graph[b].density); 214 | if(minAB > thresholdA * maxA && minAB > thresholdB * maxB) 215 | { 216 | return true; 217 | } 218 | return false; 219 | } 220 | 221 | void ESOINN::mergeClasses(int A, int B) 222 | { 223 | int classId = std::min(A, B); 224 | VertexIterator current, end; 225 | boost::tie(current, end) = boost::vertices(graph); 226 | for(; current != end; current++) 227 | { 228 | if(graph[*current].classId == A || graph[*current].classId == B) 229 | { 230 | graph[*current].classId = classId; 231 | } 232 | } 233 | } 234 | 235 | double ESOINN::meanDensity(int classId) 236 | { 237 | if(classId == -1) return 0.0; 238 | int n = 0; 239 | double density = 0.0; 240 | VertexIterator current, end; 241 | boost::tie(current, end) = boost::vertices(graph); 242 | for(; current != end; current++) 243 | { 244 | if(graph[*current].classId == classId) 245 | { 246 | n++; 247 | density += graph[*current].density; 248 | } 249 | } 250 | density *= 1./double(n); 251 | return density; 252 | } 253 | 254 | double ESOINN::maxDensity(int classId) 255 | { 256 | double density = std::numeric_limits::min(); 257 | VertexIterator current, end; 258 | boost::tie(current, end) = boost::vertices(graph); 259 | for(; current != end; current++) 260 | { 261 | if(graph[*current].density > density && graph[*current].classId == classId) 262 | { 263 | density = graph[*current].density; 264 | } 265 | } 266 | return density; 267 | } 268 | 269 | double ESOINN::densityThershold(double mean, double max) 270 | { 271 | double threshold; 272 | if(2.0 * mean >= max) 273 | { 274 | threshold = 0.0; 275 | } 276 | else if(3.0 * mean >= max && max > 2.0 * mean) 277 | { 278 | threshold = 0.5; 279 | } 280 | else 281 | { 282 | threshold = 1.0; 283 | } 284 | return threshold; 285 | } 286 | 287 | void ESOINN::updateDensity(Vertex& vertex) 288 | { 289 | double mDistance = meanDistance(vertex); 290 | graph[vertex].numberOfSignals++; 291 | graph[vertex].S += 1./((1 + mDistance)*(1 + mDistance)); 292 | graph[vertex].density = graph[vertex].S/double(graph[vertex].numberOfSignals); 293 | } 294 | 295 | void ESOINN::updateWeights(Vertex& firstWinner, const boost::numeric::ublas::vector &inputSignal) 296 | { 297 | graph[firstWinner].weight += E1(graph[firstWinner].numberOfSignals) * (inputSignal - graph[firstWinner].weight); 298 | AdjacencyIterator current, end; 299 | boost::tie(current, end) = boost::adjacent_vertices(firstWinner, graph); 300 | for(; current != end; current++) 301 | { 302 | graph[*current].weight += E2(graph[firstWinner].numberOfSignals) * (inputSignal - graph[*current].weight); 303 | } 304 | } 305 | 306 | double ESOINN::meanDistance(Vertex& vertex) 307 | { 308 | double mDistance = 0.0; 309 | int m = 0; 310 | VertexIterator current, end; 311 | boost::tie(current, end) = boost::vertices(graph); 312 | for(; current != end; current++) 313 | { 314 | if(graph[vertex].classId == graph[*current].classId) 315 | { 316 | mDistance += distance(graph[vertex].weight, graph[*current].weight); 317 | m++; 318 | } 319 | } 320 | mDistance *= 1./double(m); 321 | return mDistance; 322 | } 323 | 324 | void ESOINN::deleteOldEdges() 325 | { 326 | EdgeIterator current, end; 327 | boost::tie(current, end) = boost::edges(graph); 328 | EdgeIterator next = current; 329 | for(;current != end; current = next) 330 | { 331 | next ++; 332 | if(graph[*current].age > ageMax) 333 | { 334 | Vertex vertexS = boost::source(*current, graph); 335 | Vertex vertexT = boost::target(*current, graph); 336 | boost::remove_edge(*current, graph); 337 | } 338 | } 339 | } 340 | 341 | void ESOINN::updateClassLabels() 342 | { 343 | markClasses(); 344 | partitionClasses(); 345 | deleteNoiseVertex(); 346 | } 347 | 348 | void ESOINN::markClasses() 349 | { 350 | std::list vertexList; 351 | VertexIterator begin, end; 352 | boost::tie(begin, end) = boost::vertices(graph); 353 | for(VertexIterator current = begin; current != end; current++) 354 | { 355 | graph[*current].classId = -1; 356 | vertexList.push_back(current); 357 | } 358 | vertexList.sort([&](VertexIterator &a, VertexIterator &b) -> bool 359 | { 360 | if(graph[*a].density > graph[*b].density) return true; 361 | return false; 362 | }); 363 | int classCount = 0; 364 | for(std::list::iterator current = vertexList.begin(); current != vertexList.end(); current++) 365 | { 366 | if(graph[**current].classId == -1) 367 | { 368 | graph[**current].classId = classCount; 369 | markAdjacentVertices(**current, classCount++); 370 | } 371 | } 372 | } 373 | 374 | void ESOINN::partitionClasses() 375 | { 376 | EdgeIterator current, end; 377 | boost::tie(current, end) = boost::edges(graph); 378 | EdgeIterator next = current; 379 | for(;current != end; current = next) { 380 | next ++; 381 | Vertex vertexS = boost::source(*current, graph); 382 | Vertex vertexT = boost::target(*current, graph); 383 | if(graph[vertexS].classId != graph[vertexT].classId) 384 | { 385 | if(needMergeClasses(vertexS, vertexT)) 386 | { 387 | mergeClasses(graph[vertexS].classId, graph[vertexT].classId); 388 | } 389 | else 390 | { 391 | boost::remove_edge(*current, graph); 392 | } 393 | } 394 | } 395 | } 396 | 397 | void ESOINN::markAdjacentVertices(Vertex &vertex, int cID) 398 | { 399 | AdjacencyIterator current, end; 400 | boost::tie(current, end) = boost::adjacent_vertices(vertex, graph); 401 | for(; current != end; current++){ 402 | if(graph[*current].classId == -1 && graph[*current].density < graph[vertex].density) 403 | { 404 | graph[*current].classId = cID; 405 | Vertex v = *current; 406 | markAdjacentVertices(v, cID); 407 | } 408 | } 409 | } 410 | 411 | void ESOINN::deleteNoiseVertex() 412 | { 413 | VertexIterator begin, end; 414 | boost::tie(begin, end) = boost::vertices(graph); 415 | VertexIterator next = begin; 416 | for(VertexIterator current = begin; current != end; current = next) 417 | { 418 | next++; 419 | double mean = meanDensity(graph[*current].classId); 420 | if((boost::out_degree(*current, graph) == 2 && graph[*current].density < c1* mean) || 421 | (boost::out_degree(*current, graph) == 1 && graph[*current].density < c2* mean) || 422 | (boost::out_degree(*current, graph) == 0)) { 423 | boost::clear_vertex(*current, graph); 424 | boost::remove_vertex(*current, graph); 425 | } 426 | } 427 | } 428 | 429 | void ESOINN::classify() 430 | { 431 | deleteNoiseVertex(); 432 | size_t index = 0; 433 | BGL_FORALL_VERTICES(v, graph, Graph) 434 | { 435 | boost::put(boost::vertex_index, graph, v, index++); 436 | } 437 | ComponentMap component; 438 | boost::associative_property_map componentMap(component); 439 | numberOfClasses = connected_components(graph, componentMap); 440 | BGL_FORALL_VERTICES(v, graph, Graph) 441 | { 442 | graph[v].classId = boost::get(componentMap, v); 443 | } 444 | } 445 | 446 | void ESOINN::save(std::string filename) 447 | { 448 | std::ofstream ofs(filename.c_str()); 449 | boost::archive::xml_oarchive oa(ofs); 450 | oa << BOOST_SERIALIZATION_NVP(*this); 451 | } 452 | 453 | void ESOINN::load(std::string filename) 454 | { 455 | clear(); 456 | std::ifstream ifs(filename.c_str()); 457 | boost::archive::xml_iarchive ia(ifs); 458 | ia >> BOOST_SERIALIZATION_NVP(*const_cast(this)); 459 | } 460 | 461 | void ESOINN::clear() 462 | { 463 | graph.clear(); 464 | numberOfClasses = 0; 465 | } 466 | 467 | void ESOINN::setParams(int dim, int ageMax, int iterationThreshold, double c1, double c2) 468 | { 469 | this->dim = dim; 470 | this->ageMax = ageMax; 471 | this->iterationThreshold = iterationThreshold; 472 | this->c1 = c1; 473 | this->c2 = c2; 474 | } 475 | 476 | int ESOINN::getNumberOfClasses() 477 | { 478 | return numberOfClasses; 479 | } 480 | 481 | int ESOINN::getNumberOfVertices() 482 | { 483 | return boost::num_vertices(graph); 484 | } 485 | 486 | boost::numeric::ublas::vector ESOINN::getCenterOfCluster(int classId) 487 | { 488 | double density = -1; 489 | Vertex center; 490 | BGL_FORALL_VERTICES(v, graph, Graph) { 491 | if(graph[v].classId == classId && graph[v].density > density) { 492 | center = v; 493 | density = graph[center].density; 494 | } 495 | } 496 | return graph[center].weight; 497 | } 498 | 499 | VertexProperties ESOINN::getBestMatch(boost::numeric::ublas::vector& inputSignal) 500 | { 501 | Vertex firstWinner = NULL; 502 | double firstWinnerDistance = std::numeric_limits::max(); 503 | VertexIterator current, end; 504 | boost::tie(current, end) = boost::vertices(graph); 505 | for(; current != end; current++) 506 | { 507 | double dist = distance(inputSignal, graph[*current].weight); 508 | if(dist < firstWinnerDistance) 509 | { 510 | firstWinner = *current; 511 | firstWinnerDistance = dist; 512 | } 513 | } 514 | return graph[firstWinner]; 515 | } 516 | --------------------------------------------------------------------------------