├── .gitignore ├── LICENSE ├── README.md ├── requirements-to-freeze.txt ├── requirements.txt └── sbm ├── __init__.py ├── detect.py ├── generate.py ├── recover.py └── sbm.py /.gitignore: -------------------------------------------------------------------------------- 1 | # Byte-compiled / optimized / DLL files 2 | __pycache__/ 3 | *.py[cod] 4 | *$py.class 5 | 6 | # C extensions 7 | *.so 8 | 9 | # Distribution / packaging 10 | .Python 11 | env/ 12 | build/ 13 | develop-eggs/ 14 | dist/ 15 | downloads/ 16 | eggs/ 17 | .eggs/ 18 | lib/ 19 | lib64/ 20 | parts/ 21 | sdist/ 22 | var/ 23 | *.egg-info/ 24 | .installed.cfg 25 | *.egg 26 | 27 | # PyInstaller 28 | # Usually these files are written by a python script from a template 29 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 30 | *.manifest 31 | *.spec 32 | 33 | # Installer logs 34 | pip-log.txt 35 | pip-delete-this-directory.txt 36 | 37 | # Unit test / coverage reports 38 | htmlcov/ 39 | .tox/ 40 | .coverage 41 | .coverage.* 42 | .cache 43 | nosetests.xml 44 | coverage.xml 45 | *,cover 46 | .hypothesis/ 47 | 48 | # Translations 49 | *.mo 50 | *.pot 51 | 52 | # Django stuff: 53 | *.log 54 | local_settings.py 55 | 56 | # Flask stuff: 57 | instance/ 58 | .webassets-cache 59 | 60 | # Scrapy stuff: 61 | .scrapy 62 | 63 | # Sphinx documentation 64 | docs/_build/ 65 | 66 | # PyBuilder 67 | target/ 68 | 69 | # IPython Notebook 70 | .ipynb_checkpoints 71 | 72 | # pyenv 73 | .python-version 74 | 75 | # celery beat schedule file 76 | celerybeat-schedule 77 | 78 | # dotenv 79 | .env 80 | 81 | # virtualenv 82 | venv/ 83 | ENV/ 84 | 85 | # Spyder project settings 86 | .spyderproject 87 | 88 | # Rope project settings 89 | .ropeproject 90 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2016 Tyler Coyner 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # stochastic-blockmodel 2 | 3 | ## Overview 4 | I wanted to have some practice implementing a stochastic block model, and some algorithms that deal with its detection and model recovery. This project will allow one to generate, detect, and recover them. 5 | 6 | ## Definition 7 | From [Wikipedia](https://en.wikipedia.org/wiki/Stochastic_block_model): 8 | 9 | The stochastic block model takes the following parameters: 10 | 11 | * The number *n* of vertices 12 | * a partition of the vertex set {1, ..., n} into disjoint subsets {C_1, ..., C_r} called communities 13 | * a symmetric r x r matrix P of edge probabilities. 14 | The edge set is then sampled at random as follows: any two vertices u in C_i and v in C_j are connected by an edge with probability P_ij. 15 | 16 | ## Generate 17 | One can generate an SBM by doing the following: 18 | 19 | ```python 20 | from sbm.sbm import SBM 21 | 22 | num_vertices = 5 # number of unique vertices 23 | num_communities = 3 # number of communities 24 | community_labels = [0, 1, 1, 0, 2] # community label assigned to each vertices 25 | p_matrix = [ 26 | [.5, .3, .2], 27 | [.6, .2, .2], 28 | [.2, .4, .4], 29 | ] 30 | 31 | model = SBM(num_vertices, num_communities, community_labels, p_matrix) 32 | 33 | print model.block_matrix 34 | ``` 35 | The SBM.block_matrix returned is a 2D numpy array representing the edges that are present (1), and not present (0). 36 | ```python 37 | array([[1, 1, 0, 0, 0], 38 | [1, 0, 0, 1, 0], 39 | [0, 0, 0, 1, 0], 40 | [1, 1, 0, 1, 0], 41 | [0, 1, 0, 1, 1]]) 42 | ``` 43 | 44 | ## Detection 45 | 46 | ## Recovery 47 | 48 | ## Papers 49 | Here are a list of papers that I have found resourceful (some overlapping topics): 50 | * http://tuvalu.santafe.edu/~aaronc/courses/5352/fall2013/csci5352_2013_L16.pdf 51 | * https://arxiv.org/abs/1503.00609v2 52 | * http://arxiv.org/abs/1512.09080v3 53 | * https://arxiv.org/abs/1405.3267v4 54 | * https://arxiv.org/abs/1506.03729v1 55 | * https://arxiv.org/abs/1202.1499v4 56 | -------------------------------------------------------------------------------- /requirements-to-freeze.txt: -------------------------------------------------------------------------------- 1 | numpy 2 | -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | numpy==1.11.1 2 | -------------------------------------------------------------------------------- /sbm/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tcoyze/stochastic-blockmodel/a29af9a78cda5dc153d0ef7a935d5f3cc0fa4382/sbm/__init__.py -------------------------------------------------------------------------------- /sbm/detect.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | -------------------------------------------------------------------------------- /sbm/generate.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tcoyze/stochastic-blockmodel/a29af9a78cda5dc153d0ef7a935d5f3cc0fa4382/sbm/generate.py -------------------------------------------------------------------------------- /sbm/recover.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | -------------------------------------------------------------------------------- /sbm/sbm.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | import logging 3 | import random 4 | 5 | import numpy as np 6 | 7 | 8 | class SBM(object): 9 | 10 | def __init__(self, num_vertices, communities, vertex_labels, p_matrix): 11 | logging.info('Initializing SBM Model ...') 12 | self.num_vertices = num_vertices 13 | self.communities = communities 14 | self.vertex_labels = vertex_labels 15 | self.p_matrix = p_matrix 16 | self.block_matrix = self.generate(self.num_vertices, self.communities, self.vertex_labels, self.p_matrix) 17 | 18 | def detect(self): 19 | logging.info('SBM detection ...') 20 | pass 21 | 22 | def generate(self, num_vertices, num_communities, vertex_labels, p_matrix): 23 | logging.info('Generating SBM (directed graph) ...') 24 | v_label_shape = (1, num_vertices) 25 | p_matrix_shape = (num_communities, num_communities) 26 | block_matrix_shape = (num_vertices, num_vertices) 27 | block_matrix = np.zeros(block_matrix_shape, dtype=int) 28 | 29 | for row, _row in enumerate(block_matrix): 30 | for col, _col in enumerate(block_matrix[row]): 31 | community_a = vertex_labels[row] 32 | community_b = vertex_labels[col] 33 | 34 | p = random.random() 35 | val = p_matrix[community_a][community_b] 36 | 37 | if p <= val: 38 | block_matrix[row][col] = 1 39 | 40 | return block_matrix 41 | 42 | def recover(self): 43 | logging.info('SBM recovery ...') 44 | pass 45 | --------------------------------------------------------------------------------