├── dynetlsm ├── tests │ ├── __init__.py │ ├── test_lsm.py │ └── test_hdp_lcpm.py ├── __init__.py ├── model_selection │ ├── __init__.py │ ├── train_test_split.py │ ├── posterior_vi.py │ └── approx_bic.py ├── datasets │ ├── __init__.py │ ├── raw_data │ │ ├── sampson_groups.txt │ │ ├── sampson_groups_waverers.txt │ │ ├── sampson_names.txt │ │ ├── sampson.R │ │ ├── sampson_0.npy │ │ ├── sampson_1.npy │ │ ├── sampson_2.npy │ │ ├── military_alliances │ │ │ ├── cow_alliances.R │ │ │ └── names.csv │ │ ├── got │ │ │ ├── got-s8-nodes.csv │ │ │ ├── got-s7-nodes.csv │ │ │ ├── got-s5-node.csv │ │ │ ├── got-s3-nodes.csv │ │ │ ├── got-s1-nodes.csv │ │ │ ├── got-s2-nodes.csv │ │ │ ├── got-s6-nodes.csv │ │ │ ├── got-s4-nodes.csv │ │ │ ├── got-s7-edges.csv │ │ │ └── got-s5-edges.csv │ │ └── sampson.npy │ ├── load_alliances.py │ ├── load_got.py │ ├── load_monks.py │ └── detection_limit.py ├── version.py ├── array_utils.py ├── sample_concentration.py ├── procrustes.py ├── metrics.py ├── gaussian_likelihood_fast.pyx ├── sample_auxillary.py ├── text_utils.py ├── network_statistics.py ├── label_utils.py ├── network_likelihoods.py ├── static_network_fast.pyx ├── imputer.py ├── trace_utils.py ├── distributions.py ├── forecast.pyx ├── metropolis.py ├── sample_coefficients.py ├── latent_space.py ├── sample_labels.py ├── case_control_likelihood.py └── sample_latent_positions.py ├── .gitattributes ├── test_requirements.txt ├── images ├── hdp.png ├── lpcm.png ├── dynamic_lsm.png ├── lsm_traces.png ├── static_lsm.png ├── dynamic_label.png ├── alluvial_diagram.png ├── dynamic_lpcm_rw.png ├── dynamic_lsm_rw.png ├── hdp_lpcm_traces.png ├── lsm_latent_space.png ├── static_lsm_prior.png ├── dynamic_lpcm_initial.png ├── dynamic_lsm_initial.png └── hdp_lpcm_latent_space.png ├── requirements.txt ├── MANIFEST.in ├── Makefile ├── setup.cfg ├── ci_scripts └── travis │ ├── test.sh │ ├── success.sh │ └── install.sh ├── appveyor.yml ├── .travis.yml ├── LICENSE ├── .gitignore ├── examples ├── military_alliances.py ├── GoT.py ├── sampson_monks.py ├── homogeneous_dynsbm.R ├── homogeneous_sbm.R ├── inhomogeneous_sbm.R ├── merging_communities.py ├── detection_limit.py ├── inhomogeneous_simulation.py └── homogeneous_simulation.py └── setup.py /dynetlsm/tests/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /.gitattributes: -------------------------------------------------------------------------------- 1 | notebooks/* linguist-documentation 2 | -------------------------------------------------------------------------------- /test_requirements.txt: -------------------------------------------------------------------------------- 1 | pytest 2 | pytest-pep8 3 | pytest-cov 4 | -------------------------------------------------------------------------------- /images/hdp.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/joshloyal/dynetlsm/HEAD/images/hdp.png -------------------------------------------------------------------------------- /images/lpcm.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/joshloyal/dynetlsm/HEAD/images/lpcm.png -------------------------------------------------------------------------------- /dynetlsm/__init__.py: -------------------------------------------------------------------------------- 1 | from .lsm import * 2 | from .lpcm import * 3 | from .hdp_lpcm import * 4 | -------------------------------------------------------------------------------- /images/dynamic_lsm.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/joshloyal/dynetlsm/HEAD/images/dynamic_lsm.png -------------------------------------------------------------------------------- /images/lsm_traces.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/joshloyal/dynetlsm/HEAD/images/lsm_traces.png -------------------------------------------------------------------------------- /images/static_lsm.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/joshloyal/dynetlsm/HEAD/images/static_lsm.png -------------------------------------------------------------------------------- /images/dynamic_label.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/joshloyal/dynetlsm/HEAD/images/dynamic_label.png -------------------------------------------------------------------------------- /images/alluvial_diagram.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/joshloyal/dynetlsm/HEAD/images/alluvial_diagram.png -------------------------------------------------------------------------------- /images/dynamic_lpcm_rw.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/joshloyal/dynetlsm/HEAD/images/dynamic_lpcm_rw.png -------------------------------------------------------------------------------- /images/dynamic_lsm_rw.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/joshloyal/dynetlsm/HEAD/images/dynamic_lsm_rw.png -------------------------------------------------------------------------------- /images/hdp_lpcm_traces.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/joshloyal/dynetlsm/HEAD/images/hdp_lpcm_traces.png -------------------------------------------------------------------------------- /images/lsm_latent_space.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/joshloyal/dynetlsm/HEAD/images/lsm_latent_space.png -------------------------------------------------------------------------------- /images/static_lsm_prior.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/joshloyal/dynetlsm/HEAD/images/static_lsm_prior.png -------------------------------------------------------------------------------- /images/dynamic_lpcm_initial.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/joshloyal/dynetlsm/HEAD/images/dynamic_lpcm_initial.png -------------------------------------------------------------------------------- /images/dynamic_lsm_initial.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/joshloyal/dynetlsm/HEAD/images/dynamic_lsm_initial.png -------------------------------------------------------------------------------- /images/hdp_lpcm_latent_space.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/joshloyal/dynetlsm/HEAD/images/hdp_lpcm_latent_space.png -------------------------------------------------------------------------------- /dynetlsm/model_selection/__init__.py: -------------------------------------------------------------------------------- 1 | from .approx_bic import * 2 | from .posterior_vi import * 3 | from .train_test_split import * 4 | -------------------------------------------------------------------------------- /dynetlsm/datasets/__init__.py: -------------------------------------------------------------------------------- 1 | from .samples_generator import * 2 | from .detection_limit import * 3 | from .load_monks import * 4 | from .load_got import * 5 | from .load_alliances import * 6 | -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | cython 2 | scipy 3 | numpy 4 | scikit-learn 5 | tqdm 6 | networkx 7 | pandas 8 | plac 9 | joblib 10 | matplotlib 11 | seaborn 12 | arviz 13 | pyvis 14 | statsmodels 15 | -------------------------------------------------------------------------------- /MANIFEST.in: -------------------------------------------------------------------------------- 1 | include *.md 2 | recursive-include examples * 3 | recursive-include dynetlsm/datasets *.csv *.npy *.txt 4 | include README.md 5 | include requirements.txt 6 | include test_requirements.txt 7 | -------------------------------------------------------------------------------- /dynetlsm/version.py: -------------------------------------------------------------------------------- 1 | from __future__ import absolute_import 2 | from __future__ import division 3 | from __future__ import print_function 4 | 5 | __all__ = ['__version__', 'VERSION'] 6 | 7 | __version__ = '0.1.0' 8 | VERSION = __version__ 9 | -------------------------------------------------------------------------------- /dynetlsm/datasets/raw_data/sampson_groups.txt: -------------------------------------------------------------------------------- 1 | Turks 2 | Turks 3 | Outcasts 4 | Loyal 5 | Loyal 6 | Loyal 7 | Turks 8 | Loyal 9 | Loyal 10 | Loyal 11 | Loyal 12 | Turks 13 | Outcasts 14 | Turks 15 | Turks 16 | Turks 17 | Outcasts 18 | Outcasts 19 | -------------------------------------------------------------------------------- /dynetlsm/datasets/raw_data/sampson_groups_waverers.txt: -------------------------------------------------------------------------------- 1 | Turks 2 | Turks 3 | Outcasts 4 | Loyal 5 | Loyal 6 | Loyal 7 | Turks 8 | Waverers 9 | Loyal 10 | Waverers 11 | Loyal 12 | Turks 13 | Waverers 14 | Turks 15 | Turks 16 | Turks 17 | Outcasts 18 | Outcasts 19 | -------------------------------------------------------------------------------- /dynetlsm/datasets/raw_data/sampson_names.txt: -------------------------------------------------------------------------------- 1 | John Bosco 2 | Gregory 3 | Basil 4 | Peter 5 | Bonaventure 6 | Berthold 7 | Mark 8 | Victor 9 | Ambrose 10 | Romauld 11 | Louis 12 | Winfrid 13 | Amand 14 | Hugh 15 | Boniface 16 | Albert 17 | Elias 18 | Simplicius 19 | -------------------------------------------------------------------------------- /Makefile: -------------------------------------------------------------------------------- 1 | # makefile to simplify repetitive build env managment tasks under posix 2 | 3 | PYTHON ?= python 4 | PYTEST ?= pytest 5 | 6 | clean: 7 | $(PYTHON) setup.py clean 8 | rm -rf dist 9 | 10 | install-dev: 11 | $(PYTHON) setup.py develop 12 | 13 | test-code: install-dev 14 | $(PYTEST) --showlocals -v sliced 15 | -------------------------------------------------------------------------------- /setup.cfg: -------------------------------------------------------------------------------- 1 | [metadata] 2 | description-file = README.md 3 | 4 | [pep8] 5 | # default is 79 6 | max-line-length=80 7 | 8 | [aliases] 9 | test=pytest 10 | 11 | [tool:pytest] 12 | addopts = 13 | --verbose 14 | --disable-pytest-warnings 15 | --doctest-modules dynetlsm 16 | pep8maxlinelength = 80 17 | 18 | [bdist_wheel] 19 | universal=1 20 | -------------------------------------------------------------------------------- /dynetlsm/tests/test_lsm.py: -------------------------------------------------------------------------------- 1 | from dynetlsm import DynamicNetworkLSM 2 | from dynetlsm.datasets import simple_splitting_dynamic_network 3 | 4 | 5 | def test_lsm_smoke(): 6 | Y, labels = simple_splitting_dynamic_network( 7 | n_nodes=50, n_time_steps=2, random_state=42) 8 | 9 | lsm = DynamicNetworkLSM(n_iter=250, burn=250, tune=250, 10 | n_features=2, random_state=123) 11 | lsm.fit(Y) 12 | 13 | assert lsm.X_.shape == (2, 50, 2) 14 | -------------------------------------------------------------------------------- /ci_scripts/travis/test.sh: -------------------------------------------------------------------------------- 1 | set -e 2 | 3 | # Get into a temp directory to run test from the installed scikit learn and 4 | # check if we do not leave artifacts 5 | mkdir -p $TEST_DIR 6 | cp setup.cfg $TEST_DIR 7 | cd $TEST_DIR 8 | 9 | python --version 10 | python -c "import numpy; print('numpy %s' % numpy.__version__)" 11 | python -c "import scipy; print('scipy %s' % scipy.__version__)" 12 | 13 | if [[ "$COVERAGE" == "true" ]]; then 14 | pytest --cov=$MODULE --pyargs 15 | else 16 | pytest --pyargs 17 | fi 18 | -------------------------------------------------------------------------------- /ci_scripts/travis/success.sh: -------------------------------------------------------------------------------- 1 | set -e 2 | 3 | if [[ "$COVERAGE" == "true" ]]; then 4 | # Need to run coveralls from a git checkout, so we copy .coverage 5 | # from TEST_DIR where nosetests has been run 6 | cp $TEST_DIR/.coverage $TRAVIS_BUILD_DIR 7 | cd $TRAVIS_BUILD_DIR 8 | # Ignore coveralls failures as the coveralls server is not 9 | # very reliable but we don't want travis to report a failure 10 | # in the github UI just because the coverage report failed to 11 | # be published. 12 | coveralls || echo "Coveralls upload failed" 13 | fi -------------------------------------------------------------------------------- /dynetlsm/datasets/raw_data/sampson.R: -------------------------------------------------------------------------------- 1 | library(ergm) 2 | 3 | data(samplk) 4 | 5 | Y1 <- as.matrix(samplk1) 6 | print(colnames(Y1)) 7 | write.table(Y1, 8 | file=paste0('sampson_', 0, '.npy'), 9 | col.names=FALSE, row.names=FALSE) 10 | 11 | Y2 <- as.matrix(samplk2) 12 | write.table(Y2, 13 | file=paste0('sampson_', 1, '.npy'), 14 | col.names=FALSE, row.names=FALSE) 15 | 16 | Y3 <- as.matrix(samplk3) 17 | write.table(Y3, 18 | file=paste0('sampson_', 2, '.npy'), 19 | col.names=FALSE, row.names=FALSE) 20 | -------------------------------------------------------------------------------- /dynetlsm/tests/test_hdp_lcpm.py: -------------------------------------------------------------------------------- 1 | from dynetlsm import DynamicNetworkHDPLPCM 2 | from dynetlsm.datasets import simple_splitting_dynamic_network 3 | 4 | 5 | def test_hdp_lpcm_smoke(): 6 | Y, labels = simple_splitting_dynamic_network( 7 | n_nodes=50, n_time_steps=2, random_state=42) 8 | 9 | lpcm = DynamicNetworkHDPLPCM(n_iter=250, burn=250, tune=250, 10 | n_features=2, n_components=10, 11 | random_state=123) 12 | lpcm.fit(Y) 13 | 14 | assert lpcm.X_.shape == (2, 50, 2) 15 | assert lpcm.z_.shape == (2, 50) 16 | -------------------------------------------------------------------------------- /dynetlsm/datasets/raw_data/sampson_0.npy: -------------------------------------------------------------------------------- 1 | 0 0 1 0 1 0 0 0 0 0 0 0 0 1 0 0 0 0 2 | 1 0 0 0 0 0 1 0 0 0 0 0 0 1 0 0 0 0 3 | 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 4 | 0 0 0 0 1 1 0 0 0 1 0 0 0 0 0 0 0 0 5 | 0 0 0 1 0 0 0 0 0 0 1 0 1 0 0 0 0 0 6 | 1 0 0 1 0 0 0 0 1 0 0 0 0 0 0 0 0 0 7 | 0 1 0 0 0 0 0 1 0 0 0 0 0 0 0 1 0 0 8 | 1 1 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 9 | 0 0 0 0 1 0 0 1 0 0 0 0 0 0 0 1 0 0 10 | 0 0 0 1 0 0 0 1 0 0 0 0 0 1 0 0 0 0 11 | 0 0 0 0 1 0 0 1 0 0 0 0 0 1 0 0 0 0 12 | 1 1 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 13 | 0 0 0 0 1 0 1 0 0 0 0 0 0 0 0 0 0 1 14 | 1 0 0 0 0 0 0 0 0 0 1 1 0 0 1 0 0 0 15 | 1 1 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 16 | 1 1 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 17 | 0 0 1 0 0 0 0 0 0 0 0 0 1 0 0 0 0 1 18 | 1 1 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 19 | -------------------------------------------------------------------------------- /dynetlsm/datasets/raw_data/sampson_1.npy: -------------------------------------------------------------------------------- 1 | 0 1 0 0 1 0 0 1 0 0 0 0 0 0 0 0 0 0 2 | 1 0 0 0 0 0 1 0 0 0 0 1 0 0 1 0 0 0 3 | 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 4 | 0 0 0 0 1 1 0 0 0 0 1 0 0 0 0 0 0 0 5 | 1 0 0 1 0 0 0 0 0 0 1 0 0 0 0 0 0 0 6 | 1 0 0 1 0 0 0 0 1 0 0 0 0 0 0 0 0 0 7 | 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 8 | 0 0 0 1 0 1 0 0 0 1 0 0 0 0 0 0 0 0 9 | 0 0 0 0 1 0 0 1 0 0 0 1 0 0 0 0 0 0 10 | 0 0 0 1 1 0 0 0 0 0 0 0 1 0 0 0 0 0 11 | 0 0 0 1 1 0 0 0 0 0 0 0 0 0 0 1 0 0 12 | 1 1 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 13 | 0 0 0 0 1 0 1 0 0 0 0 0 0 0 0 0 0 1 14 | 1 1 0 0 0 0 0 0 0 0 0 1 0 0 1 0 0 0 15 | 1 1 0 0 1 0 0 0 0 0 0 0 0 1 0 0 0 0 16 | 0 1 0 0 0 0 1 0 0 0 0 1 0 0 0 0 0 0 17 | 0 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 18 | 0 1 0 0 0 0 0 0 0 0 0 0 1 0 0 0 1 0 19 | -------------------------------------------------------------------------------- /dynetlsm/datasets/raw_data/sampson_2.npy: -------------------------------------------------------------------------------- 1 | 0 0 1 0 0 0 0 0 0 0 0 1 0 1 0 0 0 0 2 | 1 0 0 0 0 0 1 0 0 0 0 1 0 0 0 0 0 0 3 | 1 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 1 1 4 | 0 0 0 0 1 1 0 0 0 0 1 0 0 0 0 0 0 0 5 | 0 0 0 1 0 0 0 0 1 0 1 0 0 0 0 0 0 0 6 | 0 0 0 1 1 0 0 0 1 0 0 0 0 0 0 0 0 0 7 | 0 1 0 0 0 0 0 0 0 0 0 1 0 0 0 1 0 0 8 | 0 0 0 1 0 1 0 0 1 0 0 0 0 0 0 0 0 0 9 | 0 0 0 0 1 0 0 1 0 0 0 1 0 0 0 0 0 0 10 | 0 0 0 1 1 0 0 0 1 0 0 0 1 0 0 0 0 0 11 | 0 0 0 0 1 0 0 1 0 0 0 0 0 1 0 0 0 0 12 | 1 1 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 13 | 0 0 0 0 1 0 1 0 0 0 0 0 0 0 0 0 0 1 14 | 1 0 0 0 0 0 0 0 0 0 0 1 0 0 1 0 0 0 15 | 0 1 0 0 0 0 1 0 0 0 0 1 0 0 0 0 0 0 16 | 0 1 0 0 0 0 1 0 0 0 0 0 0 0 1 0 0 0 17 | 0 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 18 | 0 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 19 | -------------------------------------------------------------------------------- /dynetlsm/array_utils.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | 3 | 4 | def triu_indices_from_3d(Y, k=0): 5 | return np.nonzero(~np.stack( 6 | [np.tri(Y.shape[1], Y.shape[2], k=k-1, dtype=np.bool) for 7 | t in range(Y.shape[0])] 8 | )) 9 | 10 | 11 | def diag_indices_from_3d(Y): 12 | return np.nonzero(np.stack( 13 | [np.eye(Y.shape[1], Y.shape[2], dtype=np.bool) for 14 | t in range(Y.shape[0])] 15 | )) 16 | 17 | 18 | def nondiag_indices_from_3d(Y): 19 | return np.nonzero(~np.stack( 20 | [np.eye(Y.shape[1], Y.shape[2], dtype=np.bool) for 21 | t in range(Y.shape[0])] 22 | )) 23 | 24 | 25 | def nondiag_indices_from(Y): 26 | return np.nonzero(~np.eye(Y.shape[0], Y.shape[1], dtype=np.bool)) 27 | -------------------------------------------------------------------------------- /appveyor.yml: -------------------------------------------------------------------------------- 1 | build: false 2 | 3 | environment: 4 | matrix: 5 | - PYTHON: "C:\\Miniconda36-x64" 6 | PYTHON_VERSION: "3.7.x" 7 | PYTHON_ARCH: "64" 8 | 9 | - PYTHON: "C:\\Miniconda36" 10 | PYTHON_VERSION: "3.7.x" 11 | PYTHON_ARCH: "32" 12 | 13 | matrix: 14 | fast_finish: true 15 | 16 | install: 17 | # Prepend miniconda installed Python to the PATH of this build 18 | # Add Library/bin directory to fix issue 19 | # https://github.com/conda/conda/issues/1753 20 | - "SET PATH=%PYTHON%;%PYTHON%\\Scripts;%PYTHON%\\Library\\bin;%PATH%" 21 | - conda install pip scipy numpy scikit-learn=0.22.1 cython -y -q 22 | - conda install pytest pytest-cov -y -q 23 | - pip install . 24 | 25 | test_script: 26 | - mkdir for_test 27 | - cd for_test 28 | - pytest --pyargs 29 | -------------------------------------------------------------------------------- /dynetlsm/sample_concentration.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | 3 | from sklearn.utils import check_random_state 4 | 5 | 6 | def sample_concentration_param(alpha, n_clusters, n_samples, prior_shape=1.0, 7 | prior_rate=1.0, random_state=None): 8 | """Sample concentration parameters as in Escobar and West (1995)""" 9 | rng = check_random_state(random_state) 10 | 11 | # auxillary variable sampler 12 | eta = rng.beta(alpha + 1, n_samples) 13 | 14 | m_shape = prior_shape + n_clusters - 1 15 | m_scale = prior_rate - np.log(eta) 16 | 17 | log_odds = (m_shape / m_scale) * (1 / n_samples) 18 | mix_indicator = rng.binomial(1, log_odds / (1 + log_odds)) 19 | m_shape = m_shape + 1 if mix_indicator else m_shape 20 | 21 | return rng.gamma(shape=m_shape, scale=1. / m_scale) 22 | -------------------------------------------------------------------------------- /dynetlsm/datasets/raw_data/military_alliances/cow_alliances.R: -------------------------------------------------------------------------------- 1 | library(igraph) 2 | library(tidygraph) 3 | library(tidyverse) 4 | 5 | data <- read_csv('alliance_v4.1_by_dyad_yearly.csv') %>% 6 | rename(from = state_name1, to = state_name2) %>% 7 | select(from, to, year, defense) 8 | 9 | names <- as_tbl_graph(data) %>% 10 | activate(nodes) %>% 11 | as_tibble() 12 | 13 | write_csv(names, 'names.csv') 14 | 15 | step_size <- 5 16 | for (year_id in seq(1950, 1975, by = step_size)) { 17 | graph <- as_tbl_graph(data) %>% 18 | activate(edges) %>% 19 | filter(year >= year_id) %>% 20 | filter(year < (year_id + step_size)) 21 | 22 | Y <- as_adjacency_matrix(graph, sparse = FALSE) 23 | write.table(Y, file=paste0('network_', year_id, '.npy'), 24 | col.names = FALSE, row.names = FALSE) 25 | } 26 | -------------------------------------------------------------------------------- /.travis.yml: -------------------------------------------------------------------------------- 1 | dist: trusty 2 | sudo: false 3 | 4 | language: python 5 | notifications: 6 | email: false 7 | cache: 8 | apt: true 9 | # We use three different cache directory 10 | # to work around a Travis bug with multi-platform cache 11 | directories: 12 | - $HOME/.cache/pip 13 | - $HOME/download 14 | env: 15 | global: 16 | # Directory where tests are run from 17 | - TEST_DIR=/tmp/test_dir/ 18 | - MODULE=dynetlsm 19 | matrix: 20 | - DISTRIB="conda" PYTHON_VERSION="3.7" 21 | NUMPY_VERSION="1.18.1" SCIPY_VERSION="1.4.1" CYTHON_VERSION="0.29.14" 22 | 23 | install: source ci_scripts/travis/install.sh 24 | script: bash ci_scripts/travis/test.sh 25 | after_success: source ci_scripts/travis/success.sh 26 | 27 | deploy: 28 | provider: pypi 29 | distributions: sdist bdist_wheel 30 | user: joshloyal 31 | password: 32 | secure: PLEASE_REPLACE_ME 33 | on: 34 | tags: true 35 | repo: joshloyal/dynetlsm 36 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2020, Joshua D. Loyal 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | 23 | -------------------------------------------------------------------------------- /ci_scripts/travis/install.sh: -------------------------------------------------------------------------------- 1 | # Deactivate the travis-provided virtual environment and setup a 2 | # conda-based environment instead 3 | deactivate 4 | 5 | # Use the miniconda installer for faster download / install of conda 6 | # itself 7 | pushd . 8 | cd 9 | mkdir -p download 10 | cd download 11 | echo "Cached in $HOME/download :" 12 | ls -l 13 | echo 14 | if [[ ! -f miniconda.sh ]] 15 | then 16 | wget http://repo.continuum.io/miniconda/Miniconda3-latest-Linux-x86_64.sh \ 17 | -O miniconda.sh 18 | fi 19 | chmod +x miniconda.sh && ./miniconda.sh -b 20 | cd .. 21 | export PATH=/home/travis/miniconda/bin:$PATH 22 | conda update --yes conda 23 | popd 24 | 25 | # Configure the conda environment and put it in the path using the 26 | # provided versions 27 | conda create -n testenv --yes python=$PYTHON_VERSION pip 28 | source activate testenv 29 | 30 | # numeric libraries 31 | conda install --yes \ 32 | numpy=$NUMPY_VERSION scipy=$SCIPY_VERSION cython=$CYTHON_VERSION 33 | 34 | # test libraries 35 | conda install --yes \ 36 | pep8 nose pytest pytest-pep8 pytest-xdist pytest-cov 37 | 38 | if [[ "$COVERAGE" == "true" ]]; then 39 | pip install coverage coveralls 40 | pip install python-coveralls 41 | fi 42 | 43 | python --version 44 | python -c "import numpy; print('numpy %s' % numpy.__version__)" 45 | python -c "import scipy; print('scipy %s' % scipy.__version__)" 46 | python setup.py develop 47 | -------------------------------------------------------------------------------- /dynetlsm/datasets/raw_data/got/got-s8-nodes.csv: -------------------------------------------------------------------------------- 1 | Id,Label 2 | AEGON,Aegon 3 | AERYS,Aerys 4 | ALANNA,Alanna 5 | ALTON,Alton 6 | ALYS,Alys 7 | ARYA,Arya 8 | BERIC,Beric 9 | BRAN,Bran 10 | BRIENNE,Brienne 11 | BRONN,Bronn 12 | CATELYN,Catelyn 13 | CERSEI,Cersei 14 | CERSEIS_BABY,Cersei's Baby 15 | CRAYAH,Crayah 16 | DAENERYS,Daenerys 17 | DAVOS,Davos 18 | DICKON,Dickon 19 | DIRAH,Dirah 20 | DORNISH_PRINCE,Dornish Prince 21 | EDDISON_TOLLETT,Eddison 22 | EDMURE,Edmure 23 | EURON,Euron 24 | GENDRY,Gendry 25 | GILLY,Gilly 26 | GREY_WORM,Grey Worm 27 | HARRY,Harry 28 | HOUND,Sandor 29 | IRONBORN_LORD,Ironborn Lord 30 | JAIME,Jaime 31 | JOFFREY,Joffrey 32 | JON,Jon 33 | JORAH,Jorah 34 | LITTLE_SAM,Little Sam 35 | LITTLEFINGER,Petyr 36 | LYANNA,Lyanna 37 | LYANNA_MORMONT,Lyanna Mormont 38 | MAREI,Marei 39 | MARTHA,Martha 40 | MARWYN,Marwyn 41 | MELISANDRE,Melisandre 42 | MISSANDEI,Missandei 43 | MOUNTAIN,Gregor 44 | NED,Ned 45 | NED_UMBER,Ned Umber 46 | NIGHT_KING,Night King 47 | NORA,Nora 48 | OWEN,Owen 49 | PODRICK,Podrick 50 | QHONO,Qhono 51 | QYBURN,Qyburn 52 | RAMSAY,Ramsay 53 | RANDYLL,Randyll 54 | RHAEGAR,Rhaegar 55 | RIVERLANDS_LORD,Riverlands Lord 56 | ROBERT,Robert 57 | ROBIN,Robin 58 | SAM,Sam 59 | SANSA,Sansa 60 | SARRA,Sarra 61 | TEELA,Teela 62 | THEON,Theon 63 | TORMUND,Tormund 64 | TYRION,Tyrion 65 | TYWIN,Tywin 66 | UNSULLIED_CAPTAIN,Unsullied Captain 67 | VALE_LORD,Vale Lord 68 | VARYS,Varys 69 | VICKY,Vicky 70 | VISERYS,Viserys 71 | WILLA,Willa 72 | WILLIAM,William 73 | MAESTER_WOLKAN,Wolkan 74 | YARA,Yara 75 | YOHN_ROYCE,Yohn Royce -------------------------------------------------------------------------------- /dynetlsm/model_selection/train_test_split.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | 3 | from math import ceil 4 | 5 | from sklearn.utils import check_random_state 6 | 7 | from ..array_utils import triu_indices_from_3d 8 | 9 | 10 | MAX_INT = np.iinfo(np.int32).max 11 | 12 | __all__ = ['train_test_split'] 13 | 14 | 15 | def train_test_split(Y, test_size=0.1, random_state=None): 16 | """Split dyads into training and testing subsets. 17 | 18 | Parameters 19 | ---------- 20 | Y : array-like, shape (n_time_steps, n_nodes, n_nodes) 21 | """ 22 | n_time_steps, n_nodes, _ = Y.shape 23 | 24 | random_state = check_random_state(random_state) 25 | 26 | # number of dyads in an undirected graph with n_nodes nodes 27 | n_dyads = int(0.5 * n_nodes * (n_nodes - 1)) 28 | test_size_type = np.asarray(test_size).dtype.kind 29 | if test_size_type == 'f': 30 | n_test = ceil(test_size * n_dyads) 31 | else: 32 | n_test = int(test_size) 33 | 34 | Y_new = np.zeros_like(Y) 35 | for t in range(n_time_steps): 36 | tril_indices = np.tril_indices_from(Y[t], k=-1) 37 | 38 | perm = random_state.choice( 39 | np.arange(n_dyads), size=n_test, replace=False) 40 | test_indices = perm 41 | 42 | Y_vec = Y[t][tril_indices] 43 | Y_vec[perm] = -1.0 44 | Y_new[t][tril_indices] = Y_vec 45 | Y_new[t] += Y_new[t].T 46 | 47 | 48 | triu_indices = triu_indices_from_3d(Y_new, k=1) 49 | test_indices = Y_new[triu_indices] == -1 50 | return Y_new, test_indices 51 | -------------------------------------------------------------------------------- /dynetlsm/datasets/raw_data/got/got-s7-nodes.csv: -------------------------------------------------------------------------------- 1 | Id,Label 2 | AEGON,Aegon 3 | AERYS,Aerys 4 | ALYS,Alys 5 | ARYA,Arya 6 | BALERION,Balerion 7 | BENJEN,Benjen 8 | BERIC,Beric 9 | BRAN,Bran 10 | BRIENNE,Brienne 11 | BRONN,Bronn 12 | CATELYN,Catelyn 13 | CERSEI,Cersei 14 | DAENERYS,Daenerys 15 | DAVOS,Davos 16 | DICKON,Dickon 17 | EDDISON_TOLLETT,Eddison 18 | DROGO,Drogo 19 | ELLARIA,Ellaria 20 | EURON,Euron 21 | GENDRY,Gendry 22 | GILLY,Gilly 23 | GREY_WORM,Grey Worm 24 | HARRAG,Harrag 25 | HIGH_SEPTON,High Septon (Aerys) 26 | HOT_PIE,Hot Pie 27 | HOUND,Sandor 28 | HOWLAND,Howland 29 | JAIME,Jaime 30 | JEOR,Jeor 31 | JOANNA,Joanna 32 | JOFFREY,Joffrey 33 | JON,Jon 34 | JON_ARRYN,Jon Arryn 35 | JORAH,Jorah 36 | LITTLE_SAM,Little Sam 37 | LITTLEFINGER,Petyr 38 | LYANNA,Lyanna 39 | LYANNA_MORMONT,Lyanna Mormont 40 | LYSA,Lysa 41 | MARGAERY,Margaery 42 | MARWYN,Marwin 43 | MEERA,Meera 44 | MELISANDRE,Melisandre 45 | MISSANDEI,Missandei 46 | MOUNTAIN,Gregor 47 | MYRCELLA,Myrcella 48 | NED,Ned 49 | NED_UMBER,Ned Umber 50 | NIGHT_KING,Night King 51 | NYMERIA,Nymeria 52 | OBARA,Obara 53 | OBERYN,Oberyn 54 | OLENNA,Olenna 55 | PODRICK,Podrick 56 | QHONO,Qhono 57 | QYBURN,Qyburn 58 | RAMSAY,Ramsay 59 | RANDYLL,Randyll 60 | RHAEGAR,Rhaegar 61 | ROBB,Robb 62 | ROBERT,Robert 63 | ROBETT,Robett 64 | SAM,Sam 65 | SANSA,Sansa 66 | SHIREEN,Shireen 67 | STANNIS,Stannis 68 | THEON,Theon 69 | THOROS,Thoros 70 | TOMMEN,Tommen 71 | TORMUND,Tormund 72 | TYCHO,Tycho 73 | TYENE,Tyene 74 | TYRION,Tyrion 75 | TYWIN,Tywin 76 | VARYS,Varys 77 | VISERYS,Viserys 78 | WALDER,Walder 79 | WHITE_WALKER,White Walker 80 | MAESTER_WOLKAN,Wolkan 81 | YARA,Yara 82 | YOHN_ROYCE,Yohn Royce -------------------------------------------------------------------------------- /dynetlsm/datasets/load_alliances.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import pandas as pd 3 | import networkx as nx 4 | 5 | from os.path import dirname, join 6 | 7 | 8 | __all__ = ['load_alliances'] 9 | 10 | 11 | def load_alliances(min_degree=1, directed=False, remove_periphery=True): 12 | module_path = dirname(__file__) 13 | file_path = join(module_path, 'raw_data', 'military_alliances') 14 | 15 | n_nodes = 180 16 | n_years = 6 17 | Y = np.zeros((n_years, n_nodes, n_nodes)) 18 | 19 | if directed: 20 | file_fmt = 'directed_network_{}.npy' 21 | else: 22 | file_fmt = 'network_{}.npy' 23 | 24 | for t, year in enumerate(range(1950, 1980, 5)): 25 | Y[t] = np.loadtxt(join(file_path, file_fmt.format(year))) 26 | 27 | # binarize network 28 | Y = (Y > 0).astype(np.float64) 29 | 30 | # symmetrize network 31 | if not directed: 32 | for t in range(Y.shape[0]): 33 | Y[t] = (Y[t] + Y[t].T) / 2. 34 | Y = (Y > 0).astype(np.float64) 35 | 36 | if remove_periphery: 37 | for t in range(Y.shape[0]): 38 | G = nx.from_numpy_array(Y[t]) 39 | core_id = np.asarray(list(nx.core_number(G).values())) 40 | mask = np.where(core_id <= 2)[0] 41 | Y[t, mask] = 0 42 | Y[t, :, mask] = 0 43 | 44 | # a country must be active for at least min_degree 45 | active_ids = np.where( 46 | (Y.sum(axis=(0, 1)) + Y.sum(axis=(0, 2))) >= min_degree)[0] 47 | Y = np.ascontiguousarray(Y[:, active_ids][:, :, active_ids]) 48 | 49 | # load country names 50 | names = pd.read_csv(join(file_path, 'names.csv')) 51 | names = names.values.ravel()[active_ids] 52 | 53 | return np.ascontiguousarray(Y), names 54 | -------------------------------------------------------------------------------- /dynetlsm/procrustes.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import numpy.linalg as linalg 3 | from scipy.linalg import orthogonal_procrustes 4 | 5 | 6 | def flatten_array(X): 7 | return X.reshape(np.prod(X.shape[:-1]), -1) 8 | 9 | 10 | def compute_procrustes_rotation(X, Y): 11 | """X is the reference and Y is matching X""" 12 | X_center = X - np.mean(X, axis=0) 13 | Y_center = Y - np.mean(Y, axis=0) 14 | 15 | u, s, v = linalg.svd(np.dot(X.T, Y)) 16 | 17 | return np.dot(v, u.T) 18 | 19 | 20 | def static_procrustes_rotation(X, Y): 21 | """Rotate Y to match X""" 22 | #A = compute_procrustes_rotation(X, Y) 23 | #return np.dot(Y - np.mean(Y, axis=0), A) 24 | R, _ = orthogonal_procrustes(Y, X) 25 | return np.dot(Y, R), R 26 | 27 | 28 | def longitudinal_procrustes_rotation(X_ref, X): 29 | """A single procrustes transformation applied across time.""" 30 | n_time_steps, n_nodes = X.shape[:-1] 31 | 32 | X_ref = flatten_array(X_ref) 33 | X = flatten_array(X) 34 | X, R = static_procrustes_rotation(X_ref, X) 35 | return X.reshape(n_time_steps, n_nodes, -1), R 36 | 37 | 38 | def longitudinal_procrustes_transform(X, means, copy=True): 39 | if copy: 40 | # copy data over 41 | X = X.copy() 42 | 43 | if means is not None: 44 | means = means.copy() 45 | 46 | # apply procrustes transformation to samples past the tuning phase 47 | n_samples = X.shape[0] 48 | X_ref = X[0] 49 | for i in range(1, n_samples): 50 | X_new = X[i] 51 | 52 | P = compute_procrustes_rotation(X_ref, X_new) 53 | X[i] = np.dot(X_new, P) 54 | 55 | if means is not None: 56 | mu_new = means[i] 57 | means[i] = np.dot(mu_new, P) 58 | 59 | return X, means 60 | -------------------------------------------------------------------------------- /dynetlsm/metrics.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | 3 | from sklearn.metrics import roc_auc_score, mutual_info_score 4 | from sklearn.metrics.cluster import entropy 5 | 6 | from .array_utils import triu_indices_from_3d 7 | from .array_utils import nondiag_indices_from_3d 8 | 9 | 10 | def network_auc(Y_true, Y_pred, is_directed=False, nan_mask=None): 11 | if is_directed: 12 | indices = nondiag_indices_from_3d(Y_true) 13 | else: 14 | indices = triu_indices_from_3d(Y_true, 1) 15 | 16 | y_fit = Y_pred[indices] 17 | y_true = Y_true[indices] 18 | 19 | if nan_mask is not None: 20 | y_fit = y_fit[~nan_mask] 21 | y_true = y_true[~nan_mask] 22 | 23 | return roc_auc_score(y_true, y_fit) 24 | 25 | 26 | def out_of_sample_auc(y_true, y_pred, test_indices): 27 | indices = triu_indices_from_3d(y_true, k=1) 28 | return roc_auc_score(y_true[indices][test_indices], y_pred) 29 | 30 | 31 | def _network_auc_directed(): 32 | y_true, y_fit = [], [] 33 | 34 | indices = np.triu_indices_from(Y_true[0], 1) 35 | for t in range(Y_true.shape[0]): 36 | y_fit.append(Y_pred[t][indices]) 37 | y_true.append(Y_true[t][indices]) 38 | 39 | return roc_auc_score(np.hstack(y_true), np.hstack(y_fit)) 40 | 41 | 42 | def _network_auc_undirected(Y_true, Y_pred): 43 | y_true, y_fit = [], [] 44 | 45 | indices = triu_indices_from_3d(Y_true, 1) 46 | y_fit = Y_pred[indices] 47 | 48 | return roc_auc_score(np.hstack(y_true), np.hstack(y_fit)) 49 | 50 | 51 | def variation_of_information(labels_true, labels_pred): 52 | entropy_true = entropy(labels_true) 53 | entropy_pred = entropy(labels_pred) 54 | mutual_info = mutual_info_score(labels_true, labels_pred) 55 | 56 | return entropy_true + entropy_pred - 2 * mutual_info 57 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Byte-compiled / optimized / DLL files 2 | __pycache__/ 3 | *.py[cod] 4 | *$py.class 5 | 6 | # C extensions 7 | *.so 8 | *.c 9 | 10 | # Distribution / packaging 11 | .Python 12 | env/ 13 | build/ 14 | develop-eggs/ 15 | dist/ 16 | downloads/ 17 | eggs/ 18 | .eggs/ 19 | lib/ 20 | lib64/ 21 | parts/ 22 | sdist/ 23 | var/ 24 | wheels/ 25 | *.egg-info/ 26 | .installed.cfg 27 | *.egg 28 | 29 | # PyInstaller 30 | # Usually these files are written by a python script from a template 31 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 32 | *.manifest 33 | *.spec 34 | 35 | # Installer logs 36 | pip-log.txt 37 | pip-delete-this-directory.txt 38 | 39 | # Unit test / coverage reports 40 | htmlcov/ 41 | .tox/ 42 | .coverage 43 | .coverage.* 44 | .cache 45 | nosetests.xml 46 | coverage.xml 47 | *.cover 48 | .hypothesis/ 49 | 50 | # Translations 51 | *.mo 52 | *.pot 53 | 54 | # Django stuff: 55 | *.log 56 | local_settings.py 57 | 58 | # Flask stuff: 59 | instance/ 60 | .webassets-cache 61 | 62 | # Scrapy stuff: 63 | .scrapy 64 | 65 | # Sphinx documentation 66 | doc/_build/ 67 | doc/generated/ 68 | doc/modules/ 69 | doc/auto_examples/ 70 | doc/notebooks/ 71 | 72 | # PyBuilder 73 | target/ 74 | 75 | # Jupyter Notebook 76 | .ipynb_checkpoints 77 | 78 | # pyenv 79 | .python-version 80 | 81 | # celery beat schedule file 82 | celerybeat-schedule 83 | 84 | # SageMath parsed files 85 | *.sage.py 86 | 87 | # dotenv 88 | .env 89 | 90 | # virtualenv 91 | .venv 92 | venv/ 93 | ENV/ 94 | 95 | # Spyder project settings 96 | .spyderproject 97 | .spyproject 98 | 99 | # Rope project settings 100 | .ropeproject 101 | 102 | # mkdocs documentation 103 | /site 104 | 105 | # mypy 106 | .mypy_cache/ 107 | -------------------------------------------------------------------------------- /dynetlsm/gaussian_likelihood_fast.pyx: -------------------------------------------------------------------------------- 1 | # encoding: utf-8 2 | # cython: language_level=3 3 | # cython: cdivision=True 4 | # cython: boundscheck=False 5 | # cython: wraparound=False 6 | # cython: nonecheck=False 7 | # cython: initializedcheck=False 8 | from libc.math cimport log, exp, sqrt, M_PI 9 | 10 | import numpy as np 11 | cimport numpy as np 12 | 13 | 14 | ctypedef np.npy_float64 DOUBLE 15 | 16 | 17 | cpdef DOUBLE spherical_normal_log_pdf(DOUBLE[:] x, 18 | DOUBLE[:] mean, 19 | double var) nogil: 20 | cdef int k = 0 21 | cdef int n_features = x.shape[0] 22 | cdef DOUBLE sum_sq = 0.0 23 | 24 | for k in range(n_features): 25 | sum_sq += (x[k] - mean[k]) ** 2 26 | sum_sq *= 0.5 * (1. / var) 27 | return -0.5 * n_features * log(2 * M_PI * var) - sum_sq 28 | 29 | 30 | def compute_gaussian_likelihood(DOUBLE[:, :] X, 31 | DOUBLE[:, :] mu, 32 | DOUBLE[:] sigma, 33 | double lmbda, 34 | bint normalize=True): 35 | cdef int t, k, j = 0 36 | cdef int n_time_steps = X.shape[0] 37 | cdef int n_features = X.shape[1] 38 | cdef int n_components = sigma.shape[0] 39 | cdef DOUBLE[:] muk = np.zeros(n_features, dtype=np.float64) 40 | cdef DOUBLE[:, :] loglik = np.zeros((n_time_steps, n_components), dtype=np.float64) 41 | 42 | for t in range(n_time_steps): 43 | for k in range(n_components): 44 | if t == 0: 45 | loglik[t, k] = spherical_normal_log_pdf(X[t], mu[k], sigma[k]) 46 | else: 47 | for j in range(n_features): 48 | muk[j] = lmbda * mu[k, j] + (1 - lmbda) * X[t-1, j] 49 | loglik[t, k] = spherical_normal_log_pdf(X[t], muk, sigma[k]) 50 | 51 | if normalize: 52 | loglik -= np.max(loglik, axis=1).reshape(-1, 1) 53 | 54 | return np.exp(loglik) 55 | -------------------------------------------------------------------------------- /examples/military_alliances.py: -------------------------------------------------------------------------------- 1 | """ 2 | Runs the analysis of the military alliances network found in the 3 | paper 'A Bayesian nonparametric latent space approach to modeling evolving 4 | communities in dynamic networks' by Joshua Loyal and Yuguo Chen 5 | """ 6 | 7 | from dynetlsm import DynamicNetworkHDPLPCM 8 | from dynetlsm.datasets import load_alliances 9 | from dynetlsm.plots import ( 10 | plot_traces, 11 | alluvial_plot, 12 | plot_latent_space 13 | ) 14 | 15 | 16 | # Load military alliances networks 17 | Y, names = load_alliances() 18 | 19 | # Fit HDP-LPCM 20 | # NOTE: This will take days to sample! 21 | model = DynamicNetworkHDPLPCM(n_iter=400000, 22 | tune=50000, 23 | burn=50000, 24 | tune_interval=1000, 25 | random_state=42, 26 | n_components=25, 27 | selection_type='vi', 28 | is_directed=False).fit(Y) 29 | 30 | # Trace plots 31 | fig, ax = plot_traces(model, figsize=(10, 12)) 32 | fig.savefig('alliances_traces.png', dpi=300) 33 | 34 | # alluvial diagram 35 | fig, ax = alluvial_plot(model.z_, figsize=(10, 5)) 36 | fig.savefig('alliances_alluvial.png', dpi=300) 37 | 38 | # latent space visualizations 39 | for t in range(Y.shape[0]): 40 | fig, ax = plot_latent_space( 41 | model, figsize=(30, 30), t=t, 42 | textsize=30, 43 | node_size=500, 44 | mutation_scale=20, 45 | linewidth=1.0, 46 | connectionstyle='arc3,rad=0.2', 47 | title_text=None, 48 | plot_group_sigma=True, 49 | node_names=names, 50 | node_textsize=20, 51 | repel_strength=0.3, 52 | mask_groups=[1], # NOTE: this may not be background on other settings! 53 | only_show_connected=True, 54 | number_nodes=True, 55 | border=1.0) 56 | fig.savefig('alliances_latent_space_t{}.png'.format(t), dpi=300) 57 | -------------------------------------------------------------------------------- /dynetlsm/sample_auxillary.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | 3 | from sklearn.utils import check_random_state 4 | 5 | 6 | def sample_tables(n, beta, alpha_init, alpha, kappa, random_state=None): 7 | rng = check_random_state(random_state) 8 | 9 | n_time_steps, n_components, _ = n.shape 10 | m = np.zeros((n_time_steps, n_components, n_components), 11 | dtype=np.int) 12 | 13 | # t = 0 does not include a spike 14 | probas = alpha_init * beta 15 | for k in range(n_components): 16 | x = rng.binomial(1, probas[k] / (probas[k] + np.arange(n[0, 0, k]))) 17 | m[0, 0, k] = np.sum(x) 18 | 19 | # include spike for remaining time steps 20 | probas = alpha * beta + kappa * np.eye(n_components) 21 | for t in range(1, n_time_steps): 22 | for j in range(n_components): 23 | for k in range(n_components): 24 | x = rng.binomial( 25 | 1, probas[j, k] / (probas[j, k] + np.arange(n[t, j, k]))) 26 | m[t, j, k] = np.sum(x) 27 | 28 | return m 29 | 30 | 31 | def sample_mbar(m, beta, kappa=1.0, alpha=1.0, random_state=None): 32 | rng = check_random_state(random_state) 33 | n_time_steps, n_components, _ = m.shape 34 | 35 | # sample override variables for t = 1 ... T (do not include t = 0) 36 | w = np.zeros((n_time_steps - 1, n_components), dtype=np.float64) 37 | rho = kappa / (alpha + kappa) 38 | for t in range(n_time_steps - 1): 39 | for j in range(n_components): 40 | w[t, j] = rng.binomial(m[t + 1, j, j], 41 | rho / (rho + beta[j] * (1 - rho))) 42 | 43 | # mbar is determined by m and w 44 | m_bar = np.zeros((n_time_steps - 1, n_components, n_components), 45 | dtype=np.float64) 46 | for t in range(n_time_steps - 1): 47 | m_bar[t] = m[t + 1] - np.diag(w[t]) 48 | 49 | # NOTE: we have to add on the initial distribution transitions 50 | return np.sum(m_bar, axis=(0, 1)) + m[0, 0], w 51 | -------------------------------------------------------------------------------- /examples/GoT.py: -------------------------------------------------------------------------------- 1 | """ 2 | Runs the analysis of the GoT character interactions network found in the 3 | paper 'A Bayesian nonparametric latent space approach to modeling evolving 4 | communities in dynamic networks' by Joshua Loyal and Yuguo Chen 5 | """ 6 | 7 | from dynetlsm import DynamicNetworkHDPLPCM 8 | from dynetlsm.datasets import load_got 9 | from dynetlsm.plots import ( 10 | plot_traces, 11 | alluvial_plot, 12 | plot_latent_space 13 | ) 14 | 15 | 16 | # Load GoT character interaction networks 17 | Y, names = load_got(seasons=[1,2,3,4], weight_min=10) 18 | 19 | # Fit HDP-LPCM 20 | # NOTE: This will take days to sample! 21 | model = DynamicNetworkHDPLPCM(n_iter=400000, 22 | tune=50000, 23 | burn=50000, 24 | tune_interval=1000, 25 | random_state=42, 26 | n_components=25, 27 | selection_type='vi', 28 | is_directed=False).fit(Y) 29 | 30 | # Trace plots 31 | fig, ax = plot_traces(model, figsize=(10, 12)) 32 | fig.savefig('GoT_traces.png', dpi=300) 33 | 34 | # alluvial diagram 35 | fig, ax = alluvial_plot(model.z_, figsize=(10, 5)) 36 | fig.savefig('GoT_alluvial.png', dpi=300) 37 | 38 | # latent space visualizations 39 | for t in range(Y.shape[0]): 40 | fig, ax = plot_latent_space( 41 | model, figsize=(30, 30), t=t, 42 | textsize=50, 43 | node_size=500, 44 | mutation_scale=20, 45 | linewidth=1.0, 46 | connectionstyle='arc3,rad=0.2', 47 | title_text=None, 48 | plot_group_sigma=True, 49 | node_names=names, 50 | node_textsize=15, 51 | repel_strength=0.3, 52 | mask_groups=[5], # NOTE: this may not be background on other settings! 53 | only_show_connected=True, 54 | size_cutoff=2, 55 | number_nodes=True, 56 | border=3.0) 57 | fig.savefig('GoT_latent_space_t{}.png'.format(t), dpi=300) 58 | -------------------------------------------------------------------------------- /dynetlsm/text_utils.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import networkx as nx 3 | 4 | 5 | def repel_labels(X, node_names, datasize, k=1.0, textsize=10, mask=None, 6 | include_number=True, ax=None): 7 | G = nx.DiGraph() 8 | 9 | data_nodes = [] 10 | init_pos = {} 11 | data_fmt = 'data_{}' 12 | mask = [True for i in range(X.shape[0])] if mask is None else mask 13 | 14 | if include_number: 15 | label_fmt = '{} ({})' 16 | else: 17 | label_fmt = '{}' 18 | for i, (x, y) in enumerate(X): 19 | if mask[i]: 20 | data_str = data_fmt.format(i) 21 | if node_names is None: 22 | label_str = "{}".format(i) 23 | else: 24 | label_str = label_fmt.format(node_names[i], i) 25 | data_nodes.append(data_str) 26 | G.add_node(data_str) 27 | G.add_node(label_str) 28 | G.add_edge(label_str, data_str) 29 | init_pos[data_str] = (x, y) 30 | init_pos[label_str] = (x, y) 31 | 32 | pos = nx.spring_layout(G, pos=init_pos, fixed=data_nodes, k=k) 33 | 34 | # undo re-scaling 35 | pos_after = np.vstack([pos[d] for d in data_nodes]) 36 | pos_before = np.vstack([init_pos[d] for d in data_nodes]) 37 | scale, shift_x = np.polyfit(pos_after[:, 0], pos_before[:, 0], 1) 38 | scale, shift_y = np.polyfit(pos_after[:, 1], pos_before[:, 1], 1) 39 | shift = np.array([shift_x, shift_y]) 40 | for key, val in pos.items(): 41 | pos[key] = (val * scale) + shift 42 | 43 | for label, data_str in G.edges(): 44 | ax.annotate(label, 45 | xy=pos[data_str], 46 | xytext=pos[label], 47 | size=textsize, 48 | alpha=0.9, 49 | xycoords='data', 50 | textcoords='data', 51 | arrowprops=dict(arrowstyle='-|>', 52 | shrinkA=0, shrinkB=np.sqrt(datasize) / 2., 53 | connectionstyle='arc3', 54 | mutation_scale=10, 55 | color='black')) 56 | -------------------------------------------------------------------------------- /examples/sampson_monks.py: -------------------------------------------------------------------------------- 1 | """ 2 | Runs the analysis of the Sampson's monastery network found in the 3 | paper 'A Bayesian nonparametric latent space approach to modeling evolving 4 | communities in dynamic networks' by Joshua Loyal and Yuguo Chen 5 | """ 6 | 7 | from dynetlsm import DynamicNetworkHDPLPCM 8 | from dynetlsm.datasets import load_monks 9 | from dynetlsm.plots import ( 10 | plot_traces, 11 | plot_posterior_counts, 12 | alluvial_plot, 13 | plot_latent_space 14 | ) 15 | 16 | 17 | # Load Sampson's monastery network 18 | Y, labels, names = load_monks(dynamic=True, is_directed=False) 19 | 20 | # Fit HDP-LPCM 21 | model = DynamicNetworkHDPLPCM(n_iter=165000, 22 | tune=15000, 23 | burn=20000, 24 | tune_interval=1000, 25 | random_state=42, 26 | selection_type='vi', 27 | is_directed=False).fit(Y) 28 | 29 | # Trace plots 30 | fig, ax = plot_traces(model, figsize=(10, 12)) 31 | fig.savefig('sampson_monks_traces.png', dpi=300) 32 | 33 | # posterior group counts 34 | for t in range(Y.shape[0]): 35 | fig, ax = plot_posterior_counts(model, figsize=(8, 6), fontsize=18, 36 | ticksize=18, t=t, normalize=True, 37 | bar_width=0.25, include_title=False) 38 | ax.set_xticks(range(0, 10)) 39 | ax.set_xlim(0, 9) 40 | fig.savefig('sampson_monks_posterior_counts_t{}.png'.format(t), dpi=300) 41 | 42 | # alluvial diagram 43 | fig, ax = alluvial_plot(model.z_, figsize=(10, 5)) 44 | fig.savefig('sampson_monks_alluvial.png', dpi=300) 45 | 46 | # latent space visualizations 47 | for t in range(Y.shape[0]): 48 | fig, ax = plot_latent_space( 49 | model, figsize=(10, 12), t=t, 50 | node_size=100, 51 | linewidth=1.0, 52 | mutation_scale=30, 53 | connectionstyle='arc3,rad=0.2', 54 | title_text=None, 55 | plot_group_sigma=True, 56 | node_names=names, 57 | node_textsize=10, 58 | repel_strength=0.3, 59 | number_nodes=True, border=1.0) 60 | fig.savefig('sampson_monks_latent_space_t{}.png'.format(t), dpi=300) 61 | -------------------------------------------------------------------------------- /dynetlsm/datasets/load_got.py: -------------------------------------------------------------------------------- 1 | import glob 2 | 3 | import networkx as nx 4 | import numpy as np 5 | import pandas as pd 6 | import scipy.sparse as sp 7 | 8 | from os.path import dirname, join 9 | 10 | from sklearn.preprocessing import LabelEncoder 11 | 12 | 13 | __all__ = ['load_got', 'load_got_edgelists'] 14 | 15 | 16 | def network_from_edgelist(edgelist, n_nodes): 17 | data = np.ones(edgelist.shape[0]) 18 | Y = sp.coo_matrix((data, (edgelist[:, 0], edgelist[:, 1])), 19 | shape=(n_nodes, n_nodes)).toarray() 20 | 21 | # symmetriz and binarize network 22 | Y += Y.T 23 | Y[Y > 0] = 1 24 | 25 | return Y 26 | 27 | 28 | def load_got_edgelists(): 29 | module_path = dirname(__file__) 30 | file_path = join(module_path, 'raw_data', 'got') 31 | 32 | # load edge-lists into one dataframe 33 | data = pd.concat([ 34 | pd.read_csv(file_name, 35 | names=['source', 'target', 'weight', 'season'], skiprows=1) 36 | for file_name in glob.glob(join(file_path, 'got-s*-edges.csv'))]) 37 | 38 | # aggregate multiple edges into a single edge with a weight 39 | data = data.groupby(['source', 'target', 'season'], 40 | as_index=False).agg({'weight': 'sum'}) 41 | 42 | return data 43 | 44 | 45 | def load_got(seasons=None, weight_min=None): 46 | data = load_got_edgelists() 47 | 48 | if seasons is not None: 49 | data.query('season == {}'.format(seasons), inplace=True) 50 | 51 | if weight_min is not None: 52 | data.query('weight >= {}'.format(weight_min), inplace=True) 53 | 54 | # assign integer label ids 55 | encoder = LabelEncoder().fit(data[['source', 'target']].values.ravel()) 56 | data.loc[:, 'source'] = encoder.transform(data['source']) 57 | data.loc[:, 'target'] = encoder.transform(data['target']) 58 | 59 | n_seasons = data['season'].unique().shape[0] 60 | n_nodes = encoder.classes_.shape[0] 61 | Y = np.zeros((n_seasons, n_nodes, n_nodes)) 62 | for season_id in range(1, n_seasons + 1): 63 | season_data = data[data['season'] == season_id] 64 | edgelist = season_data[['source', 'target']].values 65 | Y[season_id - 1] = network_from_edgelist(edgelist, n_nodes=n_nodes) 66 | 67 | return Y, encoder.classes_ 68 | -------------------------------------------------------------------------------- /dynetlsm/network_statistics.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | 3 | from scipy.sparse import csgraph 4 | from sklearn.preprocessing import LabelEncoder 5 | 6 | from .array_utils import nondiag_indices_from 7 | 8 | 9 | def is_dynamic(Y): 10 | return Y.ndim == 3 11 | 12 | 13 | def num_edges(Y, is_directed=False): 14 | return np.sum(Y) if is_directed else 0.5 * np.sum(Y) 15 | 16 | 17 | def density(Y, is_directed=False): 18 | n_edges = num_edges(Y, is_directed=is_directed) 19 | n_nodes = Y.shape[1] if is_dynamic(Y) else Y.shape[0] 20 | 21 | n_possible = n_nodes * (n_nodes - 1) 22 | if is_dynamic(Y): 23 | n_possible *= Y.shape[0] 24 | 25 | if not is_directed: 26 | n_possible *= 0.5 27 | 28 | return n_edges / n_possible 29 | 30 | 31 | def modularity(Y, z, is_directed=False): 32 | if is_dynamic(Y): 33 | n_time_steps = Y.shape[0] 34 | mod_ave = 0 35 | for t in range(n_time_steps): 36 | mod_ave += static_modularity(Y[t], z[t], 37 | is_directed=is_directed) 38 | return mod_ave / n_time_steps 39 | 40 | return static_modularity(Y, z, is_directed=is_directed) 41 | 42 | 43 | def static_modularity(Y, z, is_directed=False): 44 | """modularity for a static network.""" 45 | if is_directed: 46 | n_edges = Y.sum() 47 | degree = 0.5 * (Y.sum(axis=0) + Y.sum(axis=1)) 48 | else: 49 | n_edges = Y.sum() / 2 50 | degree = Y.sum(axis=0) 51 | degree = degree.reshape(-1, 1) 52 | 53 | encoder = LabelEncoder().fit(z) 54 | groups = encoder.transform(z) 55 | n_groups = encoder.classes_.shape[0] 56 | 57 | A = 0.5 * (Y + Y.T) if is_directed else Y 58 | B = A - np.dot(degree, degree.T) / (2 * n_edges) 59 | S = np.eye(n_groups)[groups.astype(np.int)] 60 | 61 | return np.trace(S.T @ B @ S) / (2 * n_edges) 62 | 63 | 64 | def connected_nodes(Y, is_directed=False, size_cutoff=1): 65 | # NOTE: weak connections essentially treats the graph as undirected 66 | n_components, labels = csgraph.connected_components(Y, 67 | directed=is_directed, 68 | connection='weak') 69 | 70 | if n_components == 1: 71 | return np.arange(Y.shape[1]) 72 | 73 | component_sizes = np.bincount(labels) 74 | non_singletons = np.where(component_sizes > size_cutoff)[0] 75 | 76 | return np.in1d(labels, non_singletons) 77 | -------------------------------------------------------------------------------- /dynetlsm/datasets/load_monks.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import pandas as pd 3 | 4 | from os.path import dirname, join 5 | from sklearn.preprocessing import LabelEncoder 6 | 7 | 8 | __all__ = ['load_monks'] 9 | 10 | 11 | def load_monks(dynamic=True, is_directed=True, include_waverers=False, 12 | encode_labels=True): 13 | """Loads Sampson's Monastery Network (1968).""" 14 | if dynamic: 15 | return load_dynamic_monks(encode_labels, include_waverers, 16 | is_directed=is_directed) 17 | else: 18 | return load_static_monks(encode_labels, include_waverers, 19 | is_directed=is_directed) 20 | 21 | 22 | def load_dynamic_monks(encode_labels=True, include_waverers=False, 23 | is_directed=True): 24 | module_path = dirname(__file__) 25 | 26 | n_time_steps = 3 27 | Y = np.empty((n_time_steps, 18, 18), dtype=np.float64) 28 | 29 | for t in range(n_time_steps): 30 | Y[t] = np.loadtxt(join(module_path, 'raw_data', 31 | 'sampson_{}.npy'.format(t))) 32 | # load groups 33 | file_name = ('sampson_groups_waverers.txt' if include_waverers else 34 | 'sampson_groups.txt') 35 | 36 | with open(join(module_path, 'raw_data', file_name)) as f: 37 | groups = np.array([l.rstrip('\n') for l in f.readlines()]) 38 | 39 | if encode_labels: 40 | groups = LabelEncoder().fit_transform(groups) 41 | 42 | with open(join(module_path, 'raw_data', 'sampson_names.txt')) as f: 43 | names = np.array([l.rstrip('\n') for l in f.readlines()]) 44 | 45 | if not is_directed: 46 | Y += Y.transpose((0, 2, 1)) 47 | Y = (Y > 0).astype(np.float64) 48 | 49 | return Y, np.repeat(groups.reshape(1, -1), n_time_steps, axis=0), names 50 | 51 | 52 | def load_static_monks(encode_labels=True, include_waverers=False, 53 | is_directed=True): 54 | module_path = dirname(__file__) 55 | 56 | Y = np.loadtxt(join(module_path, 'raw_data', 'sampson.npy')) 57 | 58 | # load groups 59 | file_name = ('sampson_groups_waverers.txt' if include_waverers else 60 | 'sampson_groups.txt') 61 | with open(join(module_path, 'raw_data', file_name)) as f: 62 | groups = np.array([l.rstrip('\n') for l in f.readlines()]) 63 | 64 | if encode_labels: 65 | groups = LabelEncoder().fit_transform(groups) 66 | 67 | if not is_directed: 68 | Y += Y.transpose((0, 2, 1)) 69 | Y = (Y > 0).astype(np.float64) 70 | 71 | return Y, groups 72 | -------------------------------------------------------------------------------- /dynetlsm/datasets/raw_data/got/got-s5-node.csv: -------------------------------------------------------------------------------- 1 | Id,Label 2 | AEGON,Aegon 3 | AERYS,Aerys 4 | ALLISER_THORNE,Alliser 5 | AREO,Areo 6 | ARYA,Arya 7 | BARRISTAN,Barristan 8 | BLACK_HAIRED_PROSTITUTE,Strumpet 9 | BRAN,Bran 10 | BRAND,Brand 11 | BRIAN,Brian 12 | BRIENNE,Brienne 13 | BRONN,Bronn 14 | CATELYN,Catelyn 15 | CERSEI,Cersei 16 | CERWYN,Cerwyn 17 | DAARIO,Daario 18 | DAENERYS,Daenerys 19 | DAVOS,Davos 20 | DENYS,Denys 21 | DERRYK,Derryk 22 | DORAN,Doran 23 | DORNISH_RIDER,Dornish Rider 24 | EDDISON_TOLLETT,Eddison 25 | ELLARIA,Ellaria 26 | FALYSE,Falyse 27 | GILLY,Gilly 28 | GRENN,Grenn 29 | GREY_WORM,Grey Worm 30 | HIGH_SEPTON,High Septon 31 | HIGH_SPARROW,High Sparrow 32 | HIZDAHR,Hizdahr 33 | JAIME,Jaime 34 | JANOS,Janos 35 | JAQEN,Jaqen 36 | JEOR,Jeor 37 | JOFFREY,Joffrey 38 | JON,Jon 39 | JORAH,Jorah 40 | KARSI,Karsi 41 | KEVAN,Kevan 42 | LANCEL,Lancel 43 | LITTLE_SAM,Little Sam 44 | LITTLEFINGER,Petyr 45 | LOBODA,Loboda 46 | LOLLYS,Lollys 47 | LORAS,Loras 48 | LORD_OF_BONES,Lord of Bones 49 | LORD_WEEBLY,Lord Weebly 50 | LYANNA,Lynanna 51 | LYANNA_MORMONT,Lyanna Mormont 52 | LYSA,Lysa 53 | MACE,Mace 54 | MADAME,Madame 55 | MAESTER_AEMON,Aemon 56 | MAGGY,Maggy 57 | MAGNAR,Magnar 58 | MALKO,Malko 59 | MANCE,Mance 60 | MARGAERY,Margaery 61 | MELARA,Melara 62 | MELISANDRE,Melisandre 63 | MERYN_TRANT,Meryn 64 | MISSANDEI,Missandei 65 | MOSSADOR,Mossador 66 | MOUNTAIN,Mountain 67 | MYRANDA,Myranda 68 | MYRCELLA,Myrcella 69 | NED,Ned 70 | NIGHT_KING,Night King 71 | NYMERIA,Nymeria 72 | OBARA,Obara 73 | OBERYN,Oberyn 74 | OLENNA,Olenna 75 | OLLY,Olly 76 | OLYVAR,Olyvar 77 | OWNER,Owner 78 | PODRICK,Podrick 79 | PYCELLE,Pycelle 80 | PYP,Pyp 81 | QUICK,Quick 82 | QYBURN,Qyburn 83 | RAMSAY,Ramsay 84 | RANDYLL,Randyll 85 | RENLY,Renly 86 | RHAEGAR,Rhaegar 87 | RHAENYRA,Rhaenyra 88 | RICKON,Rickon 89 | ROBB,Robb 90 | ROBERT,Robert 91 | ROBIN,Robin 92 | ROOSE_BOLTON,Roose 93 | ROYCE,Royce 94 | SAM,Sam 95 | SANSA,Sansa 96 | SELWYN,Selwyn 97 | SELYSE,Selyse 98 | SEPTA_UNELLA,Septa Unella 99 | SHAE,Shae 100 | SHIREEN,Shireen 101 | STANNIS,Stannis 102 | STEFFON,Steffon 103 | STRONG,Strong 104 | TANDA,Tanda 105 | THEON,Theon 106 | THIN_MAN,Thin Man 107 | TOMMEN,Tommen 108 | TORMUND,Tormund 109 | TRYSTANE,Trystane 110 | TYCHO,Tycho 111 | TYENE,Tyene 112 | TYRION,Tyrion 113 | TYWIN,Tywin 114 | VARYS,Varys 115 | WAIF,Waif 116 | WALDA,Walda 117 | WALDER,Walder 118 | WOLKAN,Wolkan 119 | OTHELL_YARWYCK,Othell 120 | YOHN_ROYCE,Yohn -------------------------------------------------------------------------------- /dynetlsm/datasets/raw_data/got/got-s3-nodes.csv: -------------------------------------------------------------------------------- 1 | Id,Label 2 | AEGON,Aegon 3 | AERYS,Aerys 4 | ANGUY,Anguy 5 | ARYA,Arya 6 | LITTLE_SAM,Little Sam 7 | BALERION,Balerion 8 | BALON,Balon 9 | BARRISTAN,Barristan 10 | BERIC,Beric 11 | BERT,Bert 12 | BLACK_WALDER,Black Walder 13 | BOROS,Boros 14 | BRAN,Bran 15 | BRANDON_STARK,Brandon 16 | BRIENNE,Brienne 17 | BRONN,Bronn 18 | BRYNDEN,Brynden 19 | CATELYN,Catelyn 20 | CERSEI,Cersei 21 | CRASTER,Craster 22 | DAARIO,Daario 23 | DAENERYS,Daenerys 24 | DAREON,Dareon 25 | DAVOS,Davos 26 | DESMOND,Desmond 27 | DROGO,Drogo 28 | EDDISON_TOLLETT,Eddison 29 | EDMURE,Edmure 30 | ELDRICK,Eldrick 31 | FARMER,Farmer 32 | FREY_SEPTON,Frey Septon 33 | GENDRY,Gendry 34 | GILLY,Gilly 35 | GREIZHEN,Greizhen 36 | GRENN,Grenn 37 | GREY_WORM,Grey Worm 38 | HIGH_SEPTON,High Septon 39 | HOBB,Hobb 40 | HODOR,Hodor 41 | HOSTER,Hoster 42 | HOT_PIE,Hot Pie 43 | HOUND,Sandor 44 | ILYN_PAYNE,Ilyn 45 | JAIME,Jaime 46 | JEOR,Jeor 47 | JOANNA,Joanna 48 | JOFFREY,Joffrey 49 | JOJEN,Jojen 50 | JON,Jon 51 | JORAH,Jorah 52 | JOYEUSE,Joyeuse 53 | KARL_TANNER,Karl 54 | KRAZNYS_MO_NAKLOZ,Kraznys 55 | LITTLEFINGER,Petyr 56 | LOCKE,Locke 57 | LORAS,Loras 58 | LORD_OF_BONES,Lord of Bones 59 | LOTHAR,Lothar 60 | LYSA,Lysa 61 | MACE,Mace 62 | MAESTER_AEMON,Aemon 63 | MAESTER_LUWIN,Luwin 64 | MANCE,Mance 65 | MAREI,Marei 66 | MARGAERY,Margaery 67 | MARTYN,Martyn 68 | MATTHOS,Matthos 69 | MEERA,Meera 70 | MELISANDRE,Melisandre 71 | MERO,Mero 72 | MERRY,Merry 73 | MERYN_TRANT,Meryn 74 | MISSANDEI,Missandei 75 | MOUNTAIN,Gregor 76 | MYCAH,Mycah 77 | MYRANDA,Myranda 78 | NED,Ned 79 | OLENNA,Olenna 80 | OLYVAR,Olyvar 81 | ORELL,Orell 82 | OSHA,Osha 83 | PODRICK,Podrick 84 | PRENDAHL,Prendahl 85 | PYCELLE,Pycelle 86 | PYP,Pyp 87 | QHORIN_HALFHAND,Qhorin 88 | QYBURN,Qyburn 89 | RADZAI_MO_ERAZ,Radzai 90 | RAMSAY,Ramsay 91 | RANDYLL,Randyll 92 | RAST,Rast 93 | LORD_OF_BONES,Lord of Bones 94 | RENLY,Renly 95 | RICKARD_KARSTARK,Rickard Karstark 96 | RICKON,Rickon 97 | ROBB,Robb 98 | ROBERT,Robert 99 | ROOSE_BOLTON,Roose 100 | ROS,Ros 101 | ROSLIN,Roslin 102 | RYLENE,Rylene 103 | SALLADHOR,Salladhor 104 | SAM,Sam 105 | SANSA,Sansa 106 | SELWYN,Selwyn 107 | SELYSE,Selyse 108 | SHAE,Shae 109 | SHIREEN,Shireen 110 | SORCERER,Sorcerer 111 | STANNIS,Stannis 112 | STEELSHANKS_WALTON,Steelshanks Walton 113 | TALISA,Talisa 114 | TARYN_MANT,Taryn 115 | THEON,Theon 116 | THOROS,Thoros 117 | TORMUND,Tormund 118 | TORTURER,Torturer 119 | TYRION,Tyrion 120 | TYWIN,Tywin 121 | VARYS,Varys 122 | VIOLET,Violet 123 | WALDER,Walder 124 | WILLEM_LANNISTER,Willem 125 | YARA,Yara 126 | YGRITTE,Ygritte -------------------------------------------------------------------------------- /dynetlsm/datasets/raw_data/got/got-s1-nodes.csv: -------------------------------------------------------------------------------- 1 | Id,Label 2 | ADDAM_MARBRAND,Addam 3 | AEGON,Aegon 4 | AERYS,Aerys 5 | ALLISER_THORNE,Allister 6 | ARYA,Arya 7 | ASSASSIN,Assassin 8 | BAELOR,Baelor 9 | BALON,Balon 10 | BARRISTAN,Barristan 11 | BENJEN,Benjen 12 | BERIC,Beric 13 | BORCAS,Borcas 14 | BOWEN_MARSH,Bowen 15 | BRAN,Bran 16 | BRANDON_STARK,Brandon 17 | BRONN,Bronn 18 | CATELYN,Catelyn 19 | CERSEI,Cersei 20 | COHOLLO,Cohollo 21 | DAENERYS,Daenerys 22 | DAREON,Dareon 23 | DOREAH,Doreah 24 | DROGO,Drogo 25 | GALBART_GLOVER,Galbart 26 | GARED,Gared 27 | GENDRY,Gendry 28 | GREATJON_UMBER,Greatjon 29 | GRENN,Grenn 30 | HIGH_SEPTON,High Septon 31 | HODOR,Hodor 32 | HOSTER,Hoster 33 | HOT_PIE,Hot Pie 34 | HOUND,Sandor 35 | HUGH_OF_THE_VALE,Hugh of the Vale 36 | ILLYRIO,Illyrio 37 | ILYN_PAYNE,Ilyn 38 | IROGENIA,Irogenia 39 | IRRI,Irri 40 | JAIME,Jaime 41 | JANOS,Janos 42 | JAREMY_RYKKER,Jaremy 43 | JEOR,Jeor 44 | JHIQUI,Jhiqui 45 | JOANNA,Joanna 46 | JOFFREY,Joffrey 47 | JON,Jon 48 | JON_ARRYN,Jon Arryn 49 | JONOS_BRACKEN,Jonos 50 | JORAH,Jorah 51 | JORY_CASSEL,Jory 52 | JOYEUSE,Joyeuse 53 | KEVAN,Kevan 54 | LANCEL,Lancel 55 | LEO_LEFFORD,Leo 56 | LITTLE_BIRD,Little Bird 57 | LITTLEFINGER,Petyr 58 | LOMMY_GREENHANDS,Lommy 59 | LORAS,Loras 60 | LUKE,Luke 61 | LYANNA,Lyanna 62 | LYSA,Lysa 63 | MACE,Mac 64 | MAESTER_AEMON,Aemon 65 | MAESTER_LUWIN,Luwin 66 | MAGO,Mago 67 | MARILLION,Marillion 68 | MASHA_HEDDLE,Masha 69 | MELESSA,Melessa 70 | MERYN_TRANT,Meryn 71 | MHAEGEN,Mhaegen 72 | MIRRI_MAZ_DUUR,Mirri Maz Dur 73 | MORD,Mord 74 | MOUNTAIN,Gregor 75 | MYCAH,Mycah 76 | MYRCELLA,Myrcella 77 | NED,Ned 78 | OLD_NAN,Old Nan 79 | OSHA,Osha 80 | OTHELL_YARWYCK,Othell 81 | OTHOR,Othor 82 | PYCELLE,Pycelle 83 | PYP,Pyp 84 | QOTHO,Qotho 85 | RAKHARO,Rakharo 86 | RANDYLL,Randyll 87 | RAST,Rast 88 | RENLY,Renly 89 | RHAEGAR,Rhaegar 90 | RHAEGO,Rhaego 91 | RICKARD_KARSTARK,Rickard Karstark 92 | RICKARD_STARK,Rickard Stark 93 | RICKON,Rickon 94 | ROBB,Robb 95 | ROBERT,Robert 96 | ROBIN,Robin 97 | RODRIK,Rodrik 98 | ROOSE_BOLTON,Roose 99 | ROS,Ros 100 | ROYCE,Royce 101 | RYGER_RIVERS,Ryger 102 | SAM,Sam 103 | SANSA,Sansa 104 | SEPTA_MORDANE,Septa Mordane 105 | SHAE,Shae 106 | SHAGGA,Shagga 107 | STABLE_BOY,Stable Boy 108 | STANNIS,Stannis 109 | STEFFON,Steffon 110 | STEVRON_FREY,Stevron 111 | SYRIO_FOREL,Syrio 112 | THEON,Theon 113 | TOBHO_MOTT,Tobho Mott 114 | TOMARD,Tomard 115 | TOMMEN,Tommen 116 | TYRION,Tyrion 117 | TYSHA,Tysha 118 | TYWIN,Tywin 119 | VARDIS_EGEN,Vardis 120 | VARLY,Varly 121 | VARYS,Varys 122 | VISERYS,Viserys 123 | WALDER,Walder 124 | WAYMAR_ROYCE,Waymar 125 | WILL,Will 126 | WINE_MERCHANT,Wine Merchant 127 | YOREN,Yoren -------------------------------------------------------------------------------- /dynetlsm/datasets/raw_data/got/got-s2-nodes.csv: -------------------------------------------------------------------------------- 1 | Id,Label 2 | AEGON,Aegon 3 | AERYS,Aerys 4 | ALTON,Alton 5 | AMORY,Amory 6 | ARYA,Arya 7 | BALON,Balon 8 | BARRA,Barra 9 | BENJEN,Benjen 10 | BILLY,Billy 11 | BITER,Biter 12 | BLACK_LORREN,Black Lorren 13 | BOROS,Boros 14 | BRAN,Bran 15 | BRIENNE,Brienne 16 | BRONN,Bronn 17 | CAPTAINS_DAUGHTER,Captain's Daughter 18 | CATELYN,Catelyn 19 | CERSEI,Cersei 20 | COLEN,Colen 21 | CRASTER,Craster 22 | CRESSEN,Cressen 23 | DAENERYS,Daenerys 24 | DAGMER,Dagmer 25 | DAISY,Daisy 26 | DAVOS,Davos 27 | DONTOS,Dontos 28 | DOREAH,Doreah 29 | DROGO,Drogo 30 | DROWNED_PRIEST,Drowned Priest 31 | EDDISON_TOLLETT,Eddison 32 | FARLEN,Farlen 33 | FATHER_SEAWORTH,Father Seaworth 34 | FREY_DAUGHTER,Frey Daughter 35 | GENDRY,Gendry 36 | GERARD,Gerard 37 | GILLY,Gilly 38 | GRENN,Grenn 39 | HARREN,Harren 40 | HAYLENE,Haylene 41 | HIGH_SEPTON,High Septon 42 | HODOR,Hodor 43 | HOT_PIE,Hot Pie 44 | HOUND,Sandor 45 | ILYN_PAYNE,Ilyn 46 | IRRI,Irri 47 | JACKS,Jacks 48 | JAIME,Jaime 49 | JANOS,Janos 50 | JAQEN,Jaqen 51 | JEOR,Jeor 52 | JOANNA,Joanna 53 | JOFFREY,Joffrey 54 | JON,Jon 55 | JON_ARRYN,Jon Arryn 56 | JORAH,Jorah 57 | KEVAN,Kevan 58 | KOVARRO,Kovarro 59 | LANCEL,Lancel 60 | LITTLEFINGER,Petyr 61 | LOMMY,Lommy 62 | LORAS,Loras 63 | LYSA,Lysa 64 | MAESTER_LUWIN,Luwin 65 | MALAKKO,Malakko 66 | MANCE,Mance 67 | MANDON,Mandon 68 | MARGAERY,Margaery 69 | MARYA,Marya 70 | MATTHOS,Matthos 71 | MELESSA,Melessa 72 | MELISANDRE,Melisandre 73 | MERYN_TRANT,Meryn 74 | MHAEGEN,Mhaegen 75 | MOUNTAIN,Gregor 76 | MYRCELLA,Myrcella 77 | NED,Ned 78 | OSHA,Osha 79 | PODRICK,Podrick 80 | POLLIVER,Polliver 81 | PORTAN,Portan 82 | PROTESTER,Protester 83 | PYATT_PREE,Pyatt Pree 84 | PYCELLE,Pycelle 85 | QHORIN,Quorin 86 | QUAITHE,Quaithe 87 | QUENT,Quent 88 | RAKHARO,Rakharo 89 | RAMSAY,Ramsay 90 | RANDYLL,Randyll 91 | LORD_OF_BONES,Lord of Bones 92 | REGINALD,Reginald 93 | RENLY,Renly 94 | RENNICK,Rennick 95 | RHAEGO,Rhaego 96 | RHAENYS,Rhaenys 97 | RICKARD_KARSTARK,Rickard Karstark 98 | RICKON,Rickon 99 | ROBB,Robb 100 | ROBERT,Robert 101 | ROBIN,Robin 102 | RODRIK,Rodrik 103 | ROOSE_BOLTON,Roose 104 | RORGE,Rorge 105 | ROS,Ros 106 | SALLADHOR,Salladhor 107 | SAM,Sam 108 | SANSA,Sansa 109 | SEPTON,Septon 110 | SHAE,Shae 111 | SILK_KING,Silk King 112 | SPICE_KING,Spice King 113 | STANNIS,Stannis 114 | SYRIO_FOREL,Syrio 115 | TALISA,Talisa 116 | THEON,Theon 117 | TICKLER,Tickler 118 | TIMETT,Timett 119 | TOMMEN,Tommen 120 | TORRHEN,Torrhen 121 | TRYSTANE,Trystane 122 | TYRION,Tyrion 123 | TYWIN,Tywin 124 | VARYS,Varys 125 | VISENYA,Visenya 126 | WINTERFELL_SHEPHERD,Shepherd 127 | XARO,Xaro 128 | YARA,Yara 129 | YGRITTE,Ygritte 130 | YOREN,Yoren -------------------------------------------------------------------------------- /dynetlsm/datasets/raw_data/military_alliances/names.csv: -------------------------------------------------------------------------------- 1 | name 2 | United Kingdom 3 | Hanover 4 | Bavaria 5 | Germany 6 | Baden 7 | Saxony 8 | Wuerttemburg 9 | Hesse Electoral 10 | Hesse Grand Ducal 11 | Mecklenburg Schwerin 12 | Austria-Hungary 13 | Netherlands 14 | France 15 | Belgium 16 | Italy 17 | Russia 18 | Spain 19 | Paraguay 20 | Brazil 21 | Ecuador 22 | Peru 23 | Colombia 24 | Argentina 25 | Bolivia 26 | Korea 27 | Yugoslavia 28 | Guatemala 29 | Honduras 30 | El Salvador 31 | Nicaragua 32 | United States of America 33 | Greece 34 | Bulgaria 35 | Romania 36 | Czechoslovakia 37 | Turkey 38 | Poland 39 | Iran 40 | Austria 41 | Estonia 42 | Hungary 43 | Saudi Arabia 44 | Latvia 45 | Iraq 46 | Cuba 47 | Haiti 48 | Dominican Republic 49 | Mexico 50 | Costa Rica 51 | Panama 52 | Venezuela 53 | Chile 54 | Japan 55 | Australia 56 | Mauritania 57 | Somalia 58 | Djibouti 59 | Morocco 60 | Algeria 61 | Tunisia 62 | Libya 63 | Sudan 64 | Egypt 65 | Syria 66 | Lebanon 67 | Jordan 68 | Yemen Arab Republic 69 | Yemen 70 | Yemen People's Republic 71 | Kuwait 72 | Bahrain 73 | Qatar 74 | United Arab Emirates 75 | Albania 76 | Canada 77 | Bahamas 78 | Jamaica 79 | Trinidad and Tobago 80 | Barbados 81 | Dominica 82 | Grenada 83 | St. Lucia 84 | St. Vincent and the Grenadines 85 | Antigua & Barbuda 86 | St. Kitts and Nevis 87 | Belize 88 | Guyana 89 | Suriname 90 | Luxembourg 91 | Portugal 92 | German Federal Republic 93 | Czech Republic 94 | Norway 95 | Denmark 96 | Iceland 97 | Pakistan 98 | Thailand 99 | Philippines 100 | German Democratic Republic 101 | China 102 | Central African Republic 103 | Chad 104 | Cyprus 105 | Afghanistan 106 | Mali 107 | Guinea 108 | Senegal 109 | Benin 110 | Niger 111 | Ivory Coast 112 | Burkina Faso 113 | Togo 114 | Cameroon 115 | Gabon 116 | Congo 117 | Rwanda 118 | India 119 | Myanmar 120 | Cambodia 121 | Vietnam 122 | Kenya 123 | Democratic Republic of the Congo 124 | Burundi 125 | Uganda 126 | Cape Verde 127 | Guinea-Bissau 128 | Gambia 129 | Liberia 130 | Sierra Leone 131 | Ghana 132 | Angola 133 | Ethiopia 134 | South Africa 135 | Mozambique 136 | Malta 137 | North Korea 138 | Moldova 139 | Ukraine 140 | Belarus 141 | Armenia 142 | Georgia 143 | Azerbaijan 144 | Turkmenistan 145 | Tajikistan 146 | Kyrgyzstan 147 | Uzbekistan 148 | Croatia 149 | Slovakia 150 | Lithuania 151 | Indonesia 152 | Kazakhstan 153 | Zimbabwe 154 | Sao Tome and Principe 155 | Equatorial Guinea 156 | Tanzania 157 | Zambia 158 | Sweden 159 | Two Sicilies 160 | Tuscany 161 | Modena 162 | Parma 163 | Uruguay 164 | Finland 165 | Mongolia 166 | New Zealand 167 | Oman 168 | South Korea 169 | Taiwan 170 | Israel 171 | Malaysia 172 | Madagascar 173 | Republic of Vietnam 174 | Mauritius 175 | Bangladesh 176 | Nigeria 177 | Swaziland 178 | Bosnia and Herzegovina 179 | Namibia 180 | Eritrea 181 | South Sudan 182 | -------------------------------------------------------------------------------- /dynetlsm/label_utils.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import scipy.cluster.hierarchy as hc 3 | 4 | from scipy.spatial.distance import squareform 5 | 6 | from .model_selection.approx_bic import calculate_cluster_counts 7 | from .model_selection.approx_bic import calculate_cluster_counts_t 8 | 9 | 10 | def renormalize_weights(model, sample_id): 11 | # re-normalize weights 12 | active_groups = np.unique(model.zs_[sample_id].ravel()) 13 | active_mask = np.in1d(np.arange(model.n_components), active_groups) 14 | n_groups = active_groups.shape[0] 15 | 16 | beta = model.betas_[sample_id, active_groups] 17 | beta /= beta.sum() 18 | 19 | weights = model.weights_[sample_id] 20 | init_w = weights[0, 0, active_groups] 21 | init_w /= init_w.sum() 22 | 23 | n_time_steps, n_nodes, _ = model.Y_fit_.shape 24 | trans_w = np.zeros((n_time_steps, n_groups, n_groups), dtype=np.float64) 25 | for t in range(1, n_time_steps): 26 | trans_w[t] = weights[t, active_groups][:, active_groups] 27 | trans_w[t] /= np.sum(trans_w[t], axis=1).reshape(-1, 1) 28 | 29 | # return_inverse relabels z to start at zero 30 | _, temp_z = np.unique(model.zs_[sample_id].ravel(), return_inverse=True) 31 | z = temp_z.reshape(n_time_steps, n_nodes) 32 | 33 | # relabel mu and sigma as well 34 | mu = model.mus_[sample_id, active_groups] 35 | sigma = model.sigmas_[sample_id, active_groups] 36 | 37 | return z, beta, init_w, trans_w, mu, sigma 38 | 39 | 40 | def calculate_cooccurrence_matrix(z, n_groups=None): 41 | if n_groups is None: 42 | n_groups = np.unqiue(z).shape[0] 43 | 44 | # dummy encode group membership 45 | indicator = np.eye(n_groups)[z] 46 | 47 | return np.dot(indicator, indicator.T) 48 | 49 | 50 | def calculate_posterior_cooccurrence(model, t=0): 51 | # determine burn in samples 52 | n_burn = model.n_burn_ 53 | 54 | n_nodes = model.Y_fit_.shape[1] 55 | cooccurrence_proba = np.zeros((n_nodes, n_nodes)) 56 | n_iter = 0 57 | for z in model.zs_[n_burn:, t]: 58 | n_iter += 1 59 | cooccurrence_proba += calculate_cooccurrence_matrix( 60 | z, n_groups=model.n_components) 61 | 62 | return cooccurrence_proba / n_iter 63 | 64 | 65 | def cluster_posterior_coocurrence(model, t=0, threshold=0.5): 66 | cooccurence_proba = model.cooccurrence_probas_[t] 67 | 68 | # hierarchical clustering with average linkage 69 | linkage = hc.linkage(squareform(1. - cooccurence_proba), method='average', 70 | optimal_ordering=True) 71 | 72 | return hc.fcluster(linkage, t=threshold, criterion='distance') - 1 73 | 74 | 75 | def calculate_posterior_group_counts(model, t=0): 76 | counts = calculate_cluster_counts_t(model)[t] 77 | 78 | freq = np.bincount(counts) 79 | index = np.where(freq != 0)[0] 80 | freq = freq[index] 81 | 82 | return index, freq 83 | -------------------------------------------------------------------------------- /dynetlsm/network_likelihoods.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | 3 | from .latent_space import calculate_distances 4 | from .gaussian_likelihood_fast import compute_gaussian_likelihood 5 | from .array_utils import triu_indices_from_3d, nondiag_indices_from_3d 6 | from .directed_likelihoods_fast import ( 7 | directed_network_loglikelihood_fast, 8 | directed_partial_loglikelihood, directed_intercept_grad, 9 | directed_network_probas, 10 | approx_directed_partial_loglikelihood, 11 | approx_directed_network_loglikelihood) 12 | from .static_network_fast import partial_loglikelihood 13 | 14 | 15 | # TODO: mask nan entries 16 | def dynamic_network_loglikelihood_directed(Y, X, 17 | intercept_in, intercept_out, radii, 18 | squared=False, dist=None): 19 | dist = calculate_distances(X, squared=squared) if dist is None else dist 20 | 21 | return directed_network_loglikelihood_fast(Y, dist, radii, 22 | intercept_in, intercept_out) 23 | 24 | 25 | # TODO: mask nan entries 26 | def dynamic_network_loglikelihood_undirected(Y, X, intercept, squared=False, 27 | dist=None): 28 | dist = calculate_distances(X, squared=squared) if dist is None else dist 29 | 30 | triu_indices = triu_indices_from_3d(dist, k=1) 31 | eta = intercept - dist[triu_indices] 32 | 33 | return np.sum(Y[triu_indices] * eta - np.log(1 + np.exp(eta))) 34 | 35 | 36 | def dynamic_network_loglikelihood(model, sample_id, dist=None): 37 | X = model.Xs_[sample_id] 38 | intercept = model.intercepts_[sample_id] 39 | radii = model.radiis_[sample_id] if model.is_directed else None 40 | if dist is None: 41 | dist = (None if model.case_control_sampler_ else 42 | calculate_distances(X, squared=False)) 43 | 44 | if model.is_directed: 45 | if model.case_control_sampler_ is not None: 46 | loglik = approx_directed_network_loglikelihood( 47 | X, 48 | radii=radii, 49 | in_edges=model.case_control_sampler_.in_edges_, 50 | out_edges=model.case_control_sampler_.out_edges_, 51 | degree=model.case_control_sampler_.degrees_, 52 | control_nodes=model.case_control_sampler_.control_nodes_out_, 53 | intercept_in=intercept[0], 54 | intercept_out=intercept[1], 55 | squared=False) 56 | else: 57 | loglik = dynamic_network_loglikelihood_directed( 58 | model.Y_fit_, X, 59 | intercept_in=intercept[0], 60 | intercept_out=intercept[1], 61 | radii=radii, dist=dist) 62 | else: 63 | loglik = dynamic_network_loglikelihood_undirected( 64 | model.Y_fit_, X, intercept, dist=dist) 65 | 66 | return loglik 67 | -------------------------------------------------------------------------------- /dynetlsm/static_network_fast.pyx: -------------------------------------------------------------------------------- 1 | # encoding: utf-8 2 | # cython: language_level=3 3 | # cython: cdivision=True 4 | # cython: boundscheck=False 5 | # cython: wraparound=False 6 | 7 | from libc.math cimport log, exp, sqrt 8 | 9 | import numpy as np 10 | cimport numpy as np 11 | 12 | 13 | ctypedef np.npy_float64 DOUBLE 14 | ctypedef np.npy_int64 INT 15 | 16 | 17 | def partial_loglikelihood(DOUBLE[:, :] Y, 18 | DOUBLE[:, :] X, 19 | double intercept, 20 | int node_id, 21 | bint squared=False): 22 | cdef int i, d = 0 23 | cdef int n_nodes = Y.shape[0] 24 | cdef int n_features = X.shape[1] 25 | cdef double dist = 0 26 | cdef double eta = 0 27 | cdef double loglik = 0 28 | 29 | for i in range(n_nodes): 30 | dist = 0 31 | eta = 0 32 | if i != node_id: 33 | for d in range(n_features): 34 | dist += (X[i, d] - X[node_id, d]) ** 2 35 | if squared: 36 | eta = intercept - dist 37 | else: 38 | eta = intercept - sqrt(dist) 39 | 40 | # in-case the network is undirected 41 | loglik += Y[node_id, i] * eta 42 | loglik -= log(1 + exp(eta)) 43 | 44 | return loglik 45 | 46 | 47 | def approx_partial_loglikelihood(DOUBLE[:, :] X, 48 | double intercept, 49 | INT[:, :] edges, 50 | INT[:] degrees, 51 | INT[:, :] control_nodes, 52 | int node_id, 53 | bint squared=False): 54 | cdef int j, d = 0 55 | cdef int n_nodes = X.shape[0] 56 | cdef int n_features = X.shape[1] 57 | cdef int n_control = control_nodes.shape[1] 58 | cdef int node_degree = degrees[node_id] 59 | cdef double dist = 0 60 | cdef double eta = 0 61 | cdef double loglik = 0 62 | cdef double control = 0 63 | cdef double control_adj = ( (n_nodes - 1) / n_control) 64 | 65 | # edges 66 | for j in range(node_degree): 67 | dist = 0 68 | eta = 0 69 | for d in range(n_features): 70 | dist += (X[edges[node_id, j], d] - X[node_id, d]) ** 2 71 | if squared: 72 | eta = intercept - dist 73 | else: 74 | eta = intercept - sqrt(dist) 75 | 76 | loglik += eta 77 | 78 | # control estimate 79 | for j in range(n_control): 80 | dist = 0 81 | eta = 0 82 | for d in range(n_features): 83 | dist += (X[control_nodes[node_id, j], d] - X[node_id, d]) ** 2 84 | if squared: 85 | eta = intercept - dist 86 | else: 87 | eta = intercept - sqrt(dist) 88 | 89 | control += log(1 + exp(eta)) 90 | 91 | # add control estimate 92 | loglik -= control_adj * control 93 | 94 | return loglik 95 | -------------------------------------------------------------------------------- /examples/homogeneous_dynsbm.R: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env Rscript 2 | library(reticulate) 3 | library(dynsbm) 4 | library(matrixStats) 5 | 6 | # Set the path to the Python executable file 7 | use_python("~/.virtualenv/stat/bin/python", required = T) 8 | dynetlsm <- import("dynetlsm") 9 | sklearn <- import("sklearn") 10 | 11 | 12 | # choose between easy and hard 13 | sim_type <- 'hard' 14 | # sim_type <- 'easy' 15 | out_dir <- paste0('results_dynsbm_', sim_type) 16 | if (!dir.exists(out_dir)) { 17 | dir.create(out_dir) 18 | } 19 | 20 | compute.icl <- function(dynsbm){ 21 | T <- ncol(dynsbm$membership) 22 | Q <- nrow(dynsbm$trans) 23 | N <- nrow(dynsbm$membership) 24 | pen <- 0.5*Q*log(N*(N-1)*T/2) + 0.25*Q*(Q-1)*T*log(N*(N-1)/2) # binary case 25 | if ("sigma" %in% names(dynsbm)) pen <- 2*pen # continuous case 26 | return(dynsbm$loglikelihood - ifelse(T>1,0.5*Q*(Q-1)*log(N*(T-1)),0) - pen) 27 | } 28 | 29 | 30 | for (seed in 0:49) { 31 | print(seed) 32 | res <- dynetlsm$datasets$homogeneous_simulation( 33 | n_time_steps=6L, n_nodes=120L, random_state=as.integer(seed), 34 | simulation_type = sim_type) 35 | Y <- res[[1]] 36 | z <- res[[3]] 37 | sim_res <- list() 38 | 39 | models <- select.dynsbm(Y, Qmin=1, Qmax=8, edge.type="binary", 40 | nstart=10, fixed.param = FALSE, nb.cores = 8) 41 | 42 | # num of clusters maximizing ICL 43 | icls <- sapply(models, compute.icl) 44 | sim_res['num_clusters'] <- which.max(icls) 45 | 46 | # estimates based on the true number of clusters (G = 6) 47 | sbm <- models[[6]] 48 | sim_res['rand_index'] <- sklearn$metrics$adjusted_rand_score( 49 | as.vector(t(z)), as.vector(sbm$membership)) 50 | sim_res['vi'] <- dynetlsm$metrics$variation_of_information( 51 | as.vector(t(z)), as.vector(sbm$membership)) 52 | 53 | # average statistics 54 | avg_rand <- 0 55 | avg_vi <- 0 56 | for (t in 1:dim(Y)[1]) { 57 | avg_rand <- avg_rand + sklearn$metrics$adjusted_rand_score(z[t,], sbm$membership[,t]) 58 | avg_vi <- avg_vi + dynetlsm$metrics$variation_of_information(z[t,], sbm$membership[,t]) 59 | } 60 | sim_res['avg_rand'] <- avg_rand / dim(Y)[1] 61 | sim_res['avg_vi'] <- avg_vi / dim(Y)[1] 62 | 63 | # in-sample AUC 64 | probas <- array(0, dim=dim(Y)) 65 | for (t in 1:dim(Y)[1]) { 66 | b <- sbm$beta[t,,] 67 | m <- sbm$membership[,t] 68 | Z <- array(0, dim=c(length(m), 6)) 69 | for (i in 1:length(m)) { 70 | if (m[i] == 0) { 71 | Z[i,] <- diag(6)[1,] 72 | } 73 | else{ 74 | Z[i,] <- diag(6)[m[i],] 75 | } 76 | } 77 | probas[t,,] <- Z %*% b %*% t(Z) 78 | } 79 | sim_res['insample_auc'] <- dynetlsm$metrics$network_auc(Y, probas) 80 | 81 | df <- as.data.frame(sim_res) 82 | file_name <- file.path(out_dir, paste0('benchmark_', seed, '.csv')) 83 | write.csv(df, file_name, row.names = FALSE) 84 | } 85 | -------------------------------------------------------------------------------- /examples/homogeneous_sbm.R: -------------------------------------------------------------------------------- 1 | library(reticulate) 2 | library(blockmodels) 3 | library(label.switching) 4 | library(zeallot) 5 | 6 | # Set the path to the Python executable file 7 | use_python("~/.virtualenv/stat/bin/python", required = T) 8 | 9 | #source_python('~/myworkspace/dynetlsm/examples/test.py') 10 | 11 | dynetlsm <- import("dynetlsm") 12 | sklearn <- import("sklearn") 13 | 14 | 15 | # choose between easy and hard 16 | sim_type <- 'hard' 17 | #sim_type <- 'easy' 18 | out_dir <- paste0('results_sbm_', sim_type) 19 | if (!dir.exists(out_dir)) { 20 | dir.create(out_dir) 21 | } 22 | 23 | 24 | for (seed in 0:49) { 25 | res <- dynetlsm$datasets$homogeneous_simulation( 26 | n_time_steps=6L, n_nodes=120L, random_state=as.integer(seed), 27 | simulation_type = sim_type) 28 | Y <- res[[1]] 29 | z <- res[[3]] 30 | sim_res <- list() 31 | 32 | # blockmodels 33 | n_time_steps <- dim(Y)[[1]] 34 | n_nodes <- dim(Y)[[2]] 35 | z_sbm <- matrix(0, nrow=n_time_steps, ncol=n_nodes) 36 | p_sbm <- array(0, dim = c(n_time_steps, n_nodes, 6)) 37 | probas <- array(0, dim=dim(Y)) 38 | avg_rand <- 0 39 | avg_vi <- 0 40 | for (t in 1:n_time_steps) { 41 | sbm_models <- BM_bernoulli('SBM_sym', Y[t,,], explore_min=8, 42 | exploration_factor=1., ncores=8, verbosity=0) 43 | sbm_models$estimate() 44 | 45 | p_sbm[t,,] <- sbm_models$memberships[[6]]$Z 46 | z_sbm[t,] <- apply(p_sbm[t,,], 1, which.max) 47 | sim_res[paste0('num_clusters_', t)] <- which.max(sbm_models$ICL) 48 | avg_rand <- avg_rand + sklearn$metrics$adjusted_rand_score(as.vector(z[t,]), z_sbm[t,]) 49 | avg_vi <- avg_vi + dynetlsm$metrics$variation_of_information(as.vector(z[t,]), z_sbm[t,]) 50 | 51 | b <- sbm_models$model_parameters[[6]]$pi 52 | m <- z_sbm[t,] 53 | Z <- array(0, dim=c(length(m), 6)) 54 | for (i in 1:length(m)) { 55 | if (m[i] == 0) { 56 | Z[i,] <- diag(6)[1,] 57 | } 58 | else{ 59 | Z[i,] <- diag(6)[m[i],] 60 | } 61 | } 62 | probas[t,,] <- Z %*% b %*% t(Z) 63 | } 64 | sim_res['avg_rand'] <- avg_rand / dim(Y)[1] 65 | sim_res['avg_vi'] <- avg_vi / dim(Y)[1] 66 | sim_res['insample_auc'] <- dynetlsm$metrics$network_auc(Y, probas) 67 | 68 | res <- label.switching("ECR", zpivot=z_sbm[1,], z=z_sbm, K=6) 69 | perm <- res$permutations$ECR 70 | for (t in 1:n_time_steps) { 71 | p_sbm[t,,] <- p_sbm[t,,perm[t,]] 72 | z_sbm[t,] <- apply(p_sbm[t,,], 1, which.max) 73 | } 74 | sim_res['rand_index'] <- sklearn$metrics$adjusted_rand_score(as.vector(t(z)), as.vector(t(z_sbm))) 75 | sim_res['vi'] <- dynetlsm$metrics$variation_of_information(as.vector(t(z)), as.vector(t(z_sbm))) 76 | 77 | df <- as.data.frame(sim_res) 78 | file_name <- file.path(out_dir, paste0('benchmark_', seed, '.csv')) 79 | write.csv(df, file_name, row.names = FALSE) 80 | } 81 | -------------------------------------------------------------------------------- /dynetlsm/model_selection/posterior_vi.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | 3 | from ..network_likelihoods import dynamic_network_loglikelihood 4 | 5 | 6 | __all__ = ['posterior_expected_vi', 'time_averaged_posterior_expected_vi', 7 | 'minimize_posterior_expected_vi'] 8 | 9 | 10 | def nonvectorized_posterior_expected_vi(labels, cooccurrence_proba): 11 | """non-vectorized expected VI used for testing""" 12 | vi = 0. 13 | n_samples = labels.shape[0] 14 | for i in range(n_samples): 15 | ind = labels == labels[i] 16 | vi += np.log2(np.sum(ind)) 17 | vi -= 2 * np.log2(np.sum(ind * cooccurrence_proba[i, :])) 18 | vi += np.log2(np.sum(cooccurrence_proba[i, :])) 19 | 20 | return vi / n_samples 21 | 22 | 23 | def posterior_expected_vi(labels, cooccurrence_proba): 24 | """Lower-bound to the posterior expectation of the VI.""" 25 | vi = 0. 26 | 27 | # number of samples and groups 28 | n_samples = labels.shape[0] 29 | n_groups = labels.max() + 1 30 | 31 | # cluster counts and membership indicators 32 | resp = np.zeros((n_samples, n_groups)) 33 | resp[np.arange(n_samples), labels] = 1 34 | nk = np.sum(resp, axis=0) 35 | 36 | # VI calculation 37 | nonzero_mask = nk != 0 # (labels may be non-contiguous) 38 | vi += np.sum(nk[nonzero_mask] * np.log2(nk[nonzero_mask])) 39 | vi -= 2 * np.log2( 40 | np.sum(cooccurrence_proba * resp[:, labels].T, axis=1)).sum() 41 | vi += np.log2(np.sum(cooccurrence_proba, axis=1)).sum() 42 | 43 | return vi / n_samples 44 | 45 | 46 | def time_averaged_posterior_expected_vi(labels, cooccurrence_proba): 47 | """Lower-bound to the time averaged posterior expected VI.""" 48 | vi = 0. 49 | n_time_steps = labels.shape[0] 50 | for t in range(n_time_steps): 51 | vi += posterior_expected_vi(labels[t], cooccurrence_proba[t]) 52 | 53 | return vi / n_time_steps 54 | 55 | 56 | def minimize_posterior_expected_vi(model): 57 | # determine how many samples to burn 58 | n_burn = model.n_burn_ 59 | 60 | # calculated expected VI for the partitions explored by the markov chain 61 | n_samples = model.zs_.shape[0] 62 | sample_ids = np.arange(n_burn, n_samples) 63 | vis = np.zeros(sample_ids.shape[0]) 64 | for i, idx in enumerate(sample_ids): 65 | vis[i] = time_averaged_posterior_expected_vi( 66 | model.zs_[idx], model.cooccurrence_probas_) 67 | 68 | # check for ties 69 | min_ids = np.where(vis == vis.min())[0] 70 | if min_ids.shape[0] > 1: 71 | # choose the configuration with the highest logliklihood log(p(Y | X)) 72 | best_id, best_loglik = None, -np.inf 73 | for min_id in min_ids: 74 | loglik = dynamic_network_loglikelihood( 75 | model, sample_id=sample_ids[min_id]) 76 | if loglik > best_loglik: 77 | best_id = sample_ids[min_id] 78 | best_loglik = loglik 79 | else: 80 | best_id = sample_ids[min_ids[0]] 81 | 82 | return best_id 83 | -------------------------------------------------------------------------------- /dynetlsm/datasets/detection_limit.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | 3 | from functools import lru_cache 4 | from scipy.special import expit 5 | from sklearn.utils import check_random_state 6 | 7 | from .samples_generator import network_from_dynamic_latent_space 8 | 9 | 10 | __all__ = ['make_lookup_table', 'detection_limit_simulation'] 11 | 12 | 13 | @lru_cache() 14 | def make_lookup_table( 15 | n_samples=10000, low=0.1, high=2.5, n_bins=100, random_state=42): 16 | rng = check_random_state(random_state) 17 | 18 | sigma = 0.5 19 | intercept = 1.0 20 | ratio = np.zeros((n_bins, 4)) 21 | mu = np.linspace(low, high, n_bins) 22 | for b, m in enumerate(mu): 23 | mus = m * np.array([[1, 0], 24 | [-1, 0]]) 25 | X = np.sqrt(sigma) * rng.randn(n_samples, 8) 26 | p_in, p_out = 0, 0 27 | for i in range(n_samples): 28 | x = X[i, :2] + mus[0] 29 | y = X[i, 2:4] + mus[0] 30 | x0 = X[i, 4:6] + mus[0] 31 | x1 = X[i, 6:] + mus[1] 32 | p_in += expit(intercept - np.sqrt(((x - x0) ** 2).sum())) 33 | p_out += expit(intercept - np.sqrt(((y - x1) ** 2).sum())) 34 | 35 | ratio[b] = np.array([ 36 | p_out / p_in, m, p_in / n_samples, p_out / n_samples]) 37 | 38 | return ratio 39 | 40 | 41 | def detection_limit_simulation( 42 | n_nodes=120, n_time_steps=4, trans_proba=0.2, lmbda=0.8, r=0.5, 43 | random_state=42): 44 | rng = check_random_state(random_state) 45 | 46 | ratio = make_lookup_table() 47 | idx = np.argmin(np.abs(r - ratio[:, 0])) 48 | mu = ratio[idx, 1] 49 | sigma = 0.5 50 | intercept = 1.0 51 | mus = mu * np.array([[1, 0], 52 | [-1, 0]]) 53 | X, z = [], [] 54 | z0 = rng.choice([0, 1], p=[0.5, 0.5], size=n_nodes) 55 | X0 = sigma * rng.randn(n_nodes, 2) + mus[z0] 56 | X.append(X0) 57 | z.append(z0) 58 | 59 | wt = np.array([[1 - trans_proba, trans_proba], 60 | [trans_proba, 1 - trans_proba]]) 61 | for t in range(1, n_time_steps): 62 | zt = np.zeros(n_nodes, dtype=np.int) 63 | for group_id in range(2): 64 | group_mask = z[t - 1] == group_id 65 | zt[group_mask] = rng.choice(np.arange(2), p=wt[group_id, :], 66 | size=np.sum(group_mask)) 67 | 68 | Xt = np.zeros((n_nodes, 2), dtype=np.float64) 69 | for group_id in range(2): 70 | group_mask = zt == group_id 71 | group_count = np.sum(group_mask) 72 | Xt[group_mask, :] = ( 73 | sigma * rng.randn(group_count, 2) + ( 74 | lmbda * mus[group_id] + (1 - lmbda) * X[t-1][group_mask, :]) 75 | ) 76 | 77 | X.append(Xt) 78 | z.append(zt) 79 | 80 | X = np.stack(X, axis=0) 81 | z = np.vstack(z) 82 | 83 | Y, probas = network_from_dynamic_latent_space( 84 | X, intercept=intercept, random_state=rng) 85 | 86 | return Y, X, z, probas, ratio[idx, 0], mus 87 | -------------------------------------------------------------------------------- /examples/inhomogeneous_sbm.R: -------------------------------------------------------------------------------- 1 | library(reticulate) 2 | library(blockmodels) 3 | library(label.switching) 4 | library(zeallot) 5 | 6 | # Set the path to the Python executable file 7 | use_python("~/.virtualenv/stat/bin/python", required = T) 8 | 9 | 10 | dynetlsm <- import("dynetlsm") 11 | sklearn <- import("sklearn") 12 | 13 | 14 | # choose between easy and hard 15 | sim_type <- 'hard' 16 | # sim_type <- 'easy' 17 | out_dir <- paste0('inhomo_results_sbm_', sim_type) 18 | if (!dir.exists(out_dir)) { 19 | dir.create(out_dir) 20 | } 21 | 22 | 23 | for (seed in 0:49) { 24 | res <- dynetlsm$datasets$inhomogeneous_simulation( 25 | n_time_steps=9L, n_nodes=120L, random_state=as.integer(seed), 26 | simulation_type = sim_type) 27 | Y <- res[[1]] 28 | Y <- Y[1:9,,] 29 | z <- res[[3]] 30 | z <- z[1:9,] 31 | sim_res <- list() 32 | 33 | # blockmodels 34 | n_time_steps <- dim(Y)[[1]] 35 | n_nodes <- dim(Y)[[2]] 36 | z_sbm <- matrix(0, nrow=n_time_steps, ncol=n_nodes) 37 | p_sbm <- array(0, dim = c(n_time_steps, n_nodes, 6)) 38 | probas <- array(0, dim=dim(Y)) 39 | avg_rand <- 0 40 | avg_vi <- 0 41 | n_clusters <- c(2, 2, 2, 6, 6, 6, 4, 4, 4) 42 | for (t in 1:n_time_steps) { 43 | sbm_models <- BM_bernoulli('SBM_sym', Y[t,,], explore_min=8, 44 | exploration_factor=1., ncores=8, verbosity=0) 45 | sbm_models$estimate() 46 | 47 | p_sbm[t,,1:n_clusters[t]] <- sbm_models$memberships[[n_clusters[t]]]$Z 48 | z_sbm[t,] <- apply(p_sbm[t,,], 1, which.max) 49 | sim_res[paste0('num_clusters_', t)] <- which.max(sbm_models$ICL) 50 | avg_rand <- avg_rand + sklearn$metrics$adjusted_rand_score(as.vector(z[t,]), z_sbm[t,]) 51 | avg_vi <- avg_vi + dynetlsm$metrics$variation_of_information(as.vector(z[t,]), z_sbm[t,]) 52 | 53 | b <- sbm_models$model_parameters[[n_clusters[t]]]$pi 54 | m <- z_sbm[t,] 55 | Z <- array(0, dim=c(length(m), n_clusters[t])) 56 | for (i in 1:length(m)) { 57 | if (m[i] == 0) { 58 | Z[i,] <- diag(n_clusters[t])[1,] 59 | } 60 | else{ 61 | Z[i,] <- diag(n_clusters[t])[m[i],] 62 | } 63 | } 64 | probas[t,,] <- Z %*% b %*% t(Z) 65 | } 66 | sim_res['avg_rand'] <- avg_rand / dim(Y)[1] 67 | sim_res['avg_vi'] <- avg_vi / dim(Y)[1] 68 | sim_res['insample_auc'] <- dynetlsm$metrics$network_auc(Y, probas) 69 | 70 | res <- label.switching("ECR", zpivot=z_sbm[1,], z=z_sbm, K=6) 71 | perm <- res$permutations$ECR 72 | for (t in 1:n_time_steps) { 73 | p_sbm[t,,] <- p_sbm[t,,perm[t,]] 74 | z_sbm[t,] <- apply(p_sbm[t,,], 1, which.max) 75 | } 76 | sim_res['rand_index'] <- sklearn$metrics$adjusted_rand_score(as.vector(t(z)), as.vector(t(z_sbm))) 77 | sim_res['vi'] <- dynetlsm$metrics$variation_of_information(as.vector(t(z)), as.vector(t(z_sbm))) 78 | 79 | df <- as.data.frame(sim_res) 80 | file_name <- file.path(out_dir, paste0('benchmark_', seed, '.csv')) 81 | write.csv(df, file_name, row.names = FALSE) 82 | } 83 | -------------------------------------------------------------------------------- /dynetlsm/datasets/raw_data/got/got-s6-nodes.csv: -------------------------------------------------------------------------------- 1 | Id,Label 2 | AERON,Aeron 3 | AERYS,Aerys 4 | ALLISER_THORNE,Alliser 5 | AREO,Areo 6 | ARTHUR,Arthur 7 | ARTHUR_CHILD,Arthur (child) 8 | ARYA,Arya 9 | BALON,Balon 10 | BELICHO,Belicho 11 | BENJEN,Benjen 12 | BERIC,Beric 13 | BIANCA,Bianca 14 | BLACK_WALDER,Black Walder 15 | BOBONO,Bobono 16 | BOWEN_MARSH,Bowen 17 | BRAN,Bran 18 | BRIENNE,Brienne 19 | BRONN,Bronn 20 | BRYNDEN,Brynden 21 | CAMELLO,Camello 22 | CATELYN,Catelyn 23 | CERSEI,Cersei 24 | CLARENZO,Clarenzo 25 | DAARIO,Daario 26 | DAENERYS,Daenerys 27 | DAVOS,Davos 28 | DICKON,Dickon 29 | DIM_DALBA,Dim Dalba 30 | EDDISON_TOLLETT,Eddison 31 | DOM,Dom 32 | DORAN,Doran 33 | DOSH_KHALEEN_LEADER,Dosh Khaleen Leader 34 | DROGO,Drogo 35 | EDMURE,Edmure 36 | ELLARIA,Ellaria 37 | EURON,Euron 38 | FLYNN,Flynn 39 | GATINS,Gatins 40 | GILLY,Gilly 41 | GLOVER,Glover 42 | GREY_WORM,Grey Worm 43 | GUARD_CAPTAIN,Guard Captain 44 | HARALD,Harald 45 | HIGH_SPARROW,High Sparrow 46 | HODOR,Hodor 47 | HOSTER,Hoster 48 | HOUND,Sandor 49 | HOWLAND,Howland 50 | IZEMBARO,Izembaro 51 | JAIME,Jaime 52 | JAQEN,Jaqen 53 | JEOR,Jeor 54 | JOFFREY,Joffrey 55 | JON,Jon 56 | JORAH,Jorah 57 | KEVAN,Kevan 58 | KHAL_MORO,Khal Moro 59 | KINVARA,Kinvara 60 | KRAZNYS_MO_NAKLOZ,Kraznys 61 | LADY_CRANE,Lady Crane 62 | LANCEL,Lancel 63 | LEAF,Leaf 64 | LEM,Lem 65 | LHAZAREEN_WOMAN,Lhazareen Woman 66 | LITTLE_SAM,Little Sam 67 | LITTLEFINGER,Petyr 68 | LORAS,Loras 69 | LOTHAR,Lothar 70 | LYANNA,Lyanna 71 | LYANNA_MORMONT,Lyanna Mormont 72 | MACE,Mace 73 | MAESTER_AEMON,Aemon 74 | MAESTER_CITADEL,Citadel Maester 75 | MAESTER_DORNE,Dorne Maester 76 | MAESTER_MORMONT,Mormont Maester 77 | MAESTER_WOLKAN,Wolkan 78 | MANCE,Mance 79 | MANDERLY,Manderly 80 | MARGAERY,Margaery 81 | MEERA,Meera 82 | MELESSA,Melessa 83 | MELISANDRE,Melisandre 84 | MINISA,Minisa 85 | MISSANDEI,Missandei 86 | MORGAN,Morgan 87 | MOUNTAIN,Gregor 88 | MYRANDA,Myranda 89 | MYRCELLA,Myrcella 90 | NED,Ned 91 | NIGHT_KING,Night King 92 | NYMERIA,Nymeria 93 | OBARA,Obara 94 | OBERYN,Oberyn 95 | OLD_NAN,Old Nan 96 | OLENNA,Olenna 97 | OLLY,Olly 98 | OSHA,Osha 99 | OTHELL_YARWYCK,Othell 100 | PODRICK,Podrick 101 | PYCELLE,Pycelle 102 | QYBURN,Qyburn 103 | RAMSAY,Ramsay 104 | RANDYLL,Randyll 105 | RAY,Ray 106 | RAZDAL,Razdal 107 | RED_PRIEST,Red Priest 108 | RICKARD_KARSTARK,Rickard Karstark 109 | RICKARD_STARK,Rickard Stark 110 | RICKON,Rickon 111 | ROBB,Robb 112 | ROBERT,Robert 113 | ROBETT,Robett 114 | ROBIN,Robin 115 | RODRIK,Rodrik 116 | ROOSE_BOLTON,Roose 117 | SAM,Sam 118 | SANSA,Sansa 119 | SEPTA_UNELLA,Septa Unella 120 | SHIREEN,Shireen 121 | SMALLJON,Smalljon 122 | SON_OF_EDMURE,Edmure's Son 123 | SON_OFWALDA,Roose's Son 124 | STANNIS,Stannis 125 | TALLA,Talla 126 | THEON,Theon 127 | THOROS,Thoros 128 | THREE_EYED_RAVEN,Three Eyed Raven 129 | TOMMEN,Tommen 130 | TORMUND,Tormund 131 | TRYSTANE,Trystane 132 | TYENE,Tyene 133 | TYRION,Tyrion 134 | TYWIN,Tywin 135 | VALA,Vala 136 | VARYS,Varys 137 | WAIF,Waif 138 | WALDA,Walda 139 | WALDER,Walder 140 | WUN_WUN,Wun Wun 141 | YARA,Yara 142 | YEZZAN,Yezzan 143 | YOHN_ROYCE,Yohn -------------------------------------------------------------------------------- /dynetlsm/imputer.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import scipy.stats as stats 3 | 4 | from sklearn.base import BaseEstimator, TransformerMixin 5 | from sklearn.utils.validation import check_is_fitted, check_array, check_random_state 6 | 7 | 8 | from dynetlsm.network_statistics import density 9 | 10 | 11 | class SimpleNetworkImputer(BaseEstimator, TransformerMixin): 12 | """Impute missing values in the network by the most frequent value over 13 | all time points and edges. 14 | """ 15 | def __init__(self, missing_value=-1, strategy='most_frequent', 16 | random_state=123, copy=True): 17 | self.missing_value = missing_value 18 | self.strategy = strategy 19 | self.copy = copy 20 | self.random_state = random_state 21 | 22 | def _validate_input(self, Y): 23 | allowed_strategies = {'most_frequent', 'random'} 24 | if self.strategy not in allowed_strategies: 25 | raise ValueError("Can only use these strategies: {0} " 26 | " got strategy='{1}".format(allowed_strategies, 27 | self.strategy)) 28 | Y = check_array(Y, dtype=np.float64, 29 | force_all_finite='allow-nan', 30 | ensure_2d=False, allow_nd=True, copy=self.copy) 31 | 32 | return Y 33 | 34 | def fit(self, Y): 35 | Y = self._validate_input(Y) 36 | 37 | # statistics are calculated per time point 38 | n_time_steps = Y.shape[0] 39 | self.statistics_ = np.empty(n_time_steps) 40 | for t in range(n_time_steps): 41 | nan_mask = Y[t] == self.missing_value 42 | if not np.any(nan_mask): 43 | self.statistics_[t] = 0.0 44 | else: 45 | if self.strategy == 'most_frequent': 46 | mode = stats.mode(Y[t][~nan_mask].ravel()) 47 | self.statistics_[t] = mode[0][0] 48 | elif self.strategy == 'random': 49 | n_nodes = Y.shape[1] 50 | self.statistics_[t] = ( 51 | Y[t][~nan_mask].sum() / (n_nodes * (n_nodes - 1))) 52 | 53 | return self 54 | 55 | def transform(self, Y): 56 | check_is_fitted(self, 'statistics_') 57 | 58 | Y = self._validate_input(Y) 59 | 60 | if Y.shape[0] != self.statistics_.shape[0]: 61 | raise ValueError("Y has %d time steps, expected %d" 62 | % (Y.shape[0], self.statistics_.shape[0])) 63 | 64 | rng = check_random_state(self.random_state) 65 | for t in range(Y.shape[0]): 66 | 67 | if self.strategy == 'random': 68 | indices = np.triu_indices(Y.shape[1], k=1) 69 | y_vec = Y[t][indices] 70 | nan_mask = y_vec == self.missing_value 71 | y_vec[nan_mask] = rng.choice([0, 1], 72 | p=[1 - self.statistics_[t], self.statistics_[t]], 73 | size=np.sum(nan_mask)) 74 | Y[t][indices] = y_vec 75 | indices = np.tril_indices(Y.shape[1], k=-1) 76 | Y[t][indices] = 0 77 | Y[t] += Y[t].T 78 | else: 79 | nan_mask = Y[t] == self.missing_value 80 | Y[t][nan_mask] = self.statistics_[t] 81 | 82 | return Y 83 | -------------------------------------------------------------------------------- /dynetlsm/trace_utils.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import scipy.stats as stats 3 | 4 | from math import floor, ceil 5 | 6 | from statsmodels.regression.linear_model import yule_walker 7 | 8 | 9 | def mean_detrend(x): 10 | return x - np.mean(x) 11 | 12 | 13 | def xcorr(x, y, normed=True, detrend=mean_detrend, maxlags=10): 14 | Nx = len(x) 15 | if Nx != len(y): 16 | raise ValueError('x and y must be equal length') 17 | 18 | x = detrend(np.asarray(x)) 19 | y = detrend(np.asarray(y)) 20 | 21 | correls = np.correlate(x, y, mode='full') 22 | 23 | if normed: 24 | correls /= np.sqrt(np.dot(x, x) * np.dot(y, y)) 25 | 26 | if maxlags is None: 27 | maxlags = Nx - 1 28 | 29 | if maxlags >= Nx or maxlags < 1: 30 | raise ValueError('maxlags must be None or strictly ' 31 | 'postive < %d' % Nx) 32 | 33 | lags = np.arange(-maxlags, maxlags + 1) 34 | correls = correls[Nx - 1 - maxlags:Nx + maxlags] 35 | 36 | return lags, correls 37 | 38 | 39 | def effective_n(x, lags=None, corr=None, maxlags=100): 40 | """Effective sample size.""" 41 | if lags is None or corr is None: 42 | lags, corr = xcorr(x, x, maxlags=maxlags, normed=True) 43 | 44 | n_samples = x.shape[0] 45 | return n_samples / (1 + 2 * np.sum(corr[lags >= 1])) 46 | 47 | 48 | def aic_ar(sigma, n, p): 49 | """AIC for an AR(p) model with n samples. 50 | Note: Assumes the series is de-meaned. 51 | """ 52 | return 2 * n * np.log(sigma) + 2 * (p + 1) 53 | 54 | 55 | def spec0_ar(sigma, coefs): 56 | return (sigma ** 2) / ((1 - np.sum(coefs)) ** 2) 57 | 58 | 59 | def spectrum0_ar(x, max_order='auto'): 60 | """Calculates f(0) of the spectrum of x using an AR fit.""" 61 | n_samples = x.shape[0] 62 | 63 | if np.allclose(np.var(x), 0.0): 64 | return 0., 0. 65 | 66 | if max_order == 'auto': 67 | max_order = floor(10 * np.log10(n_samples)) 68 | 69 | # calculate f(0) and AIC for each AR(p) model 70 | results = np.zeros((max_order, 3)) 71 | for p in range(1, max_order + 1): 72 | coefs, sigma = yule_walker(x, order=p, demean=True, method='adjusted') 73 | results[p-1] = [p, spec0_ar(sigma, coefs), aic_ar(sigma, n_samples, p)] 74 | 75 | # return result for model minimizing the AIC 76 | min_id = np.argmin(results[:, -1]) 77 | order, var0 = results[min_id, :2] 78 | 79 | return var0 / n_samples, order 80 | 81 | 82 | def geweke_corrected(x, first=0.1, last=0.5): 83 | """Calculate the z-score using Geweke's correction for autocorrelations.""" 84 | n_samples = x.shape[0] 85 | 86 | # extract start and end chains 87 | x1 = x[:ceil(first * n_samples)] 88 | x2 = x[n_samples - floor(last * n_samples):] 89 | 90 | # calculate means 91 | x1_mean = np.mean(x1) 92 | x2_mean = np.mean(x2) 93 | 94 | # calculate variances 95 | x1_var, _ = spectrum0_ar(x1) 96 | x2_var, _ = spectrum0_ar(x2) 97 | 98 | # z score 99 | return (x1_mean - x2_mean) / np.sqrt(x1_var + x2_var) 100 | 101 | 102 | def geweke_diag(x, first=0.1, last=0.5, n_burn=None, corrected=True): 103 | """Performs Geweke's diagnostic on a chain x. 104 | Note: ArviZ and PyMC3 do no correct for autocorrelation and use a naive 105 | z-score! 106 | """ 107 | if n_burn is not None: 108 | x = x[n_burn:] 109 | 110 | z_score = geweke_corrected(x, first=first, last=last) 111 | 112 | # calculate two-sided p-value 113 | p_val = 2 * (1 - stats.norm.cdf(np.abs(z_score))) 114 | 115 | return z_score, p_val 116 | -------------------------------------------------------------------------------- /dynetlsm/distributions.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import scipy.linalg as linalg 3 | import scipy.stats as stats 4 | 5 | from scipy.stats import truncnorm 6 | from scipy.special import gammaln 7 | from sklearn.utils import check_random_state 8 | 9 | 10 | SMALL_EPS = np.finfo('float64').tiny 11 | 12 | 13 | def sample_categorical(probas, rng): 14 | """ 15 | Sample from a Categorical(probas) distribution. 16 | """ 17 | cdf = probas.cumsum() 18 | u = rng.rand(probas.shape[0]) 19 | return (u < cdf).argmax() 20 | 21 | 22 | def spherical_normal_log_pdf(x, mean, var): 23 | """Logarithm of the pdf of a spherical multivariate gaussian 24 | distribution.""" 25 | n_features = mean.shape[0] 26 | sum_sq = np.sum((x - mean) ** 2) 27 | sum_sq *= 0.5 * (1. / var) 28 | return -0.5 * n_features * np.log(2 * np.pi * var) - sum_sq 29 | 30 | 31 | def spherical_normal_pdf(x, mean, var): 32 | """Probability Density Function for a spherical multivariate 33 | gaussian distribution. Note that this is 10x faster than 34 | the more general scipy.stats.multivariate_normal.pdf 35 | """ 36 | n_features = mean.shape[0] 37 | sum_sq = np.sum((x - mean) ** 2) 38 | sum_sq *= 0.5 * (1. / var) 39 | return (1 / (2 * np.pi * var) ** (n_features / 2)) * np.exp(-sum_sq) 40 | 41 | 42 | def multivariate_t_log_pdf(x, df, mu0, S): 43 | """Logarithm of the pdf of a multivariate t distribution.""" 44 | x = np.atleast_1d(x) 45 | mu0 = np.atleast_1d(mu0) 46 | 47 | p = x.shape[0] 48 | if not isinstance(S, np.ndarray): 49 | rss = np.sum((x - mu0) ** 2) / S 50 | log_var = p * np.log(np.sqrt(S)) 51 | else: 52 | L = linalg.cholesky(S) 53 | LinvX = linalg.solve_triangular(L, x - mu0, trans=1) 54 | rss = np.sum(LinvX ** 2, axis=0) 55 | log_var = np.sum(np.log(np.diag(L))) 56 | logdt = (gammaln((p + df) / 2.) - ( 57 | gammaln(df / 2.) + log_var + 58 | (p / 2.) * np.log(df * np.pi)) - 59 | 0.5 * (df + p) * np.log1p(rss / df)) 60 | 61 | return logdt 62 | 63 | 64 | def multivariate_t_pdf(x, df, mu0, S): 65 | """ 66 | Probability Density Function of a multivariate t-distribution with 67 | df degrees of freedom, location parameter mu0, and scale matrix S. 68 | """ 69 | return np.exp(multivariate_t_log_pdf(x, df, mu0, S)) 70 | 71 | 72 | def truncated_normal(mean, var, lower=0, upper=1, size=1, random_state=None): 73 | std = np.sqrt(var) 74 | a = (lower - mean) / std 75 | b = (upper - mean) / std 76 | return truncnorm.rvs(a, b, size=size, loc=mean, scale=std, 77 | random_state=random_state) 78 | 79 | 80 | def truncated_normal_logpdf(x, mean, var, lower=0, upper=1): 81 | std = np.sqrt(var) 82 | a = (lower - mean) / std 83 | b = (upper - mean) / std 84 | return truncnorm.logpdf(x, a, b, loc=mean, scale=std) 85 | 86 | 87 | def sample_dirichlet(alphas, random_state=None): 88 | """The numpy dirichlet sampler is numerically unstable and produces samples 89 | with zero entries. Clip these values before using the sample. 90 | """ 91 | rng = check_random_state(random_state) 92 | if np.any(alphas <= 0.): 93 | alphas = np.clip(alphas, a_min=SMALL_EPS, a_max=None) 94 | return rng.dirichlet(alphas) 95 | 96 | 97 | def dirichlet_logpdf(x, alphas): 98 | if np.any(alphas <= 0.): 99 | alphas = np.clip(alphas, a_min=SMALL_EPS, a_max=None) 100 | if np.any(x <= 0): 101 | x = np.clip(x, a_min=SMALL_EPS, a_max=None) 102 | return stats.dirichlet.logpdf(x, alphas) 103 | -------------------------------------------------------------------------------- /dynetlsm/datasets/raw_data/got/got-s4-nodes.csv: -------------------------------------------------------------------------------- 1 | Id,Label 2 | ADRACK_HUMBLE,Adrack Humble 3 | AERYS,Aerys 4 | ALLISER_THORNE,Alliser Thorne 5 | ANYA_WAYNWOOD,Anya Waynwood 6 | ARYA,Arya 7 | AXELL_FLORENT,Axell Florent 8 | BAELOR,Baelor 9 | BALON,Balon 10 | BALON_DWARF,Balon Dwarf 11 | BARRISTAN,Barristan 12 | BITER,Biter 13 | BLACK_JACK,Black Jack 14 | BOLTON_GUARD,Bolton Guard 15 | BOROS,Boros 16 | BRAN,Bran 17 | BRIENNE,Brienne 18 | BRONN,Bronn 19 | CATELYN,Catelyn 20 | CERSEI,Cersei 21 | COOPER,Cooper 22 | CRASTER,Craster 23 | CRASTERS_BABY,Baby 24 | DAARIO,Daario 25 | DAENERYS,Daenerys 26 | DAVOS,Davos 27 | EDDISON_TOLLETT,Eddison 28 | DONGO_THE_GIANT,Dongo the Giant 29 | DONNEL_HILL,Donnel Hill 30 | DONNEL_WAYNWOOD,Donnel Waynwood 31 | DONTOS,Dontos 32 | DORAN,Doran 33 | DORNISH_LORD,Dornish Lord 34 | DROGO,Drogo 35 | DYING_MAN,Dying Man 36 | ELDER_MEEREEN_SLAVE,Meereen Statesman 37 | ELIA,Elia 38 | ELLARIA,Ellaria 39 | ENDREW,Endrew 40 | FALYSE,Falyse 41 | FARMER_HAMLET,Hamlet Farmer 42 | FENNESZ,Fennesz 43 | FIRST_MATE,First Mate 44 | FOOL,Fool 45 | GILLY,Gilly 46 | GOATHERD,Goatherd 47 | GOATHERDS_SON,Goatherd's Son 48 | GRENN,Grenn 49 | GREY_WORM,Grey Worm 50 | GUYMON,Guymon 51 | HIGH_SEPTON,High Septon 52 | HIZDAHR,Hizdahr 53 | HIZDAHRS_FATHER,Hizdahr's Father 54 | HODOR,Hodor 55 | HOSTER,Hoster 56 | HOT_PIE,Hot Pie 57 | INNKEEPER,Innkeeper 58 | INNKEEPERS_DAUGHTER,Innkeeper's Daughter 59 | JAIME,Jaime 60 | JANOS,Janos 61 | JAQEN,Jaqen 62 | JEOR,Jeor 63 | JOANNA,Joanna 64 | JOFFREY,Joffrey 65 | JOFFREY_DWARF,Joffrey Dwarf 66 | JOJEN,Jojen 67 | JON,Jon 68 | JON_ARRYN,Jon Arryn 69 | JORAH,Jorah 70 | KARL_TANNER,Karl 71 | KEGS,Kegs 72 | LEAF,Leaf 73 | LHARA,Lhara 74 | LITTLE_SAM,Little Sam 75 | LITTLEFINGER,Petyr 76 | LOCKE,Locke 77 | LOLLYS,Lollys 78 | LOMMY,Lommy 79 | LORAS,Loras 80 | LUTHOR,Luthor 81 | LYSA,Lysa 82 | MACE,Mace 83 | MAESTER_AEMON,Aemon 84 | MAG_THE_MIGHTY,Mag the Mighty 85 | MANCE,Mance 86 | MANSERVANT,Manservant 87 | MAREI,Marei 88 | MARGAERY,Margaery 89 | MASTER_MIGHDAL,Master Mighdal 90 | MEERA,Meera 91 | MEEREEN_CHAMPION,Meereen Champion 92 | MEEREEN_SLAVE,Meereen Slave 93 | MELISANDRE,Melisandre 94 | MERYN_TRANT,Meryn 95 | MISSANDEI,Missandei 96 | MOLES_TOWN_MADAM,Mole's Town Madam 97 | MOLES_TOWN_WHORE,Mole's Town Whore 98 | MORAG,Morag 99 | MORGAN,Morgan 100 | MORGANS_FRIEND,Morgan's Friend 101 | MOSSADOR,Mossador 102 | MOUNTAIN,Gregor 103 | MULLY,Mully 104 | MUSICIAN,Musician 105 | MYRANDA,Myranda 106 | MYRCELLA,Myrcella 107 | NED,Ned 108 | NIGHT_KING,Night King 109 | OBERYN,Oberyn 110 | OLENNA,Olenna 111 | OLLY,Olly 112 | OLLYS_MOTHER,Olly's Mother 113 | OLYVAR,Olyvar 114 | ORSON,Orson 115 | ORYS,Orys 116 | ORYS_BROTHER,Orys's Brother 117 | OTHELL_YARWYCK,Othell 118 | PODRICK,Podrick 119 | POLLIVER,Polliver 120 | PYCELLE,Pycelle 121 | PYP,Pyp 122 | QHORIN,Qhorin 123 | QYBURN,Qyburn 124 | RALF,Ralf 125 | RAMSAY,Ramsay 126 | RANDYLL,Randyll 127 | RAST,Rast 128 | RENLY,Renly 129 | RENLY_DWARF,Renly Dwarf 130 | RHAEGAR,Rhaegar 131 | RICKON,Rickon 132 | ROBB,Robb 133 | ROBB_DWARF,Robb Dwarf 134 | ROBERT,Robert 135 | ROBIN,Robin 136 | ROOSE_BOLTON,Roose 137 | RORGE,Rorge 138 | SALLADHOR,Salladhor 139 | SALLY,Sally 140 | SAM,Sam 141 | HOUND,Sandor 142 | SANSA,Sansa 143 | SELWYN,Selwyn 144 | SELYSE,Selyse 145 | SHAE,Shae 146 | SHIREEN,Shireen 147 | SISSY,Sissy 148 | STANNIS,Stannis 149 | STANNIS_DWARF,Stannis Dwarf 150 | STYR,Styr 151 | SYRIO_FOREL,Syrio Forel 152 | TANSY,Tansy 153 | TERNESIO_TERYS,Ternesio Terys 154 | THENN_WARG,Thenn Warg 155 | THEON,Theon 156 | THREE_EYED_RAVEN,Three Eyed Raven 157 | TOMMEN,Tommen 158 | TOMMY,Tommy 159 | TORMUND,Tormund 160 | TYCHO,Tycho 161 | TYRION,Tyrion 162 | TYWIN,Tywin 163 | VANCE_CORBRAY,Vance Corbray 164 | VARYS,Varys 165 | VIOLA,Viola 166 | VISERYS,Viserys 167 | WALDA,Walda 168 | WAYMAR_ROYCE,Waymar Royce 169 | WHITE_WALKER,White Walker 170 | YARA,Yara 171 | YGRITTE,Ygritte 172 | YOHN_ROYCE,Yohn Royce 173 | ZALA,Zala -------------------------------------------------------------------------------- /examples/merging_communities.py: -------------------------------------------------------------------------------- 1 | import matplotlib.pyplot as plt 2 | import numpy as np 3 | import networkx as nx 4 | 5 | from sklearn.metrics import adjusted_rand_score 6 | 7 | from dynetlsm import DynamicNetworkHDPLPCM 8 | from dynetlsm.datasets import merging_dynamic_network 9 | from dynetlsm.procrustes import longitudinal_procrustes_rotation 10 | from dynetlsm.plots import get_colors, normal_contour 11 | 12 | 13 | Y, X, z, intercept, probas, mus, sigmas = merging_dynamic_network( 14 | n_nodes=120, lmbda=0.6, n_time_steps=5, random_state=42) 15 | 16 | 17 | # plot adjacency matrices 18 | fig, ax = plt.subplots(figsize=(15, 4), ncols=Y.shape[0], 19 | nrows=1, sharex=True, sharey=True) 20 | ax = ax.flat 21 | order = np.argsort(z[0]) 22 | for t in range(Y.shape[0]): 23 | ax[t].imshow(Y[t][order, :][:, order], cmap='gray_r') 24 | ax[t].xaxis.set_visible(False) 25 | ax[t].yaxis.set_visible(False) 26 | ax[t].set_title('t = {}'.format(t+1)) 27 | 28 | fig.savefig('merging_adj.png', dpi=300, bbox_inches='tight') 29 | 30 | 31 | # run model 32 | model = DynamicNetworkHDPLPCM( 33 | n_components=10, n_iter=10000, burn=10000, tune=5000, tune_interval=1000, 34 | random_state=42).fit(Y) 35 | 36 | # procrustes rotate to match truth 37 | X_rot, R = longitudinal_procrustes_rotation(X, model.X_) 38 | mu_rot = np.dot(model.mu_, R) 39 | 40 | 41 | # plot results 42 | fig, ax = plt.subplots(figsize=(18, 8), ncols=5, nrows=3, 43 | sharey='row', sharex='row', subplot_kw=dict(aspect=1)) 44 | 45 | colors = get_colors(z[0]) 46 | order = np.argsort(z[0]) 47 | for t in range(Y.shape[0]): 48 | ax[0, t].imshow(Y[t][order, :][:, order], cmap='gray_r') 49 | ax[0, t].xaxis.set_visible(False) 50 | ax[0, t].yaxis.set_visible(False) 51 | ax[0, t].set_title('t = {}'.format(t+1), fontsize=18) 52 | 53 | for t in range(Y.shape[0]): 54 | pos = dict() 55 | for i in range(Y.shape[1]): 56 | pos[i] = X[t, i] 57 | 58 | G = nx.from_numpy_array(Y[t]) 59 | 60 | nx.draw(G, pos=pos, 61 | node_color=colors[z[t]], 62 | edge_color='k', edgecolors='k', 63 | node_size=50, 64 | width=0.75, ax=ax[1, t]) 65 | 66 | for k in np.unique(z[t]): 67 | normal_contour(mus[k], sigmas[k] * np.eye(2), ax=ax[1, t], n_std=[2], 68 | zorder=1, alpha=0.4, facecolor=colors[k], linestyle='--', 69 | linewidth=1, edgecolor='k') 70 | 71 | if t == 0: 72 | ax[1, t].axis('on') 73 | ax[1, t].spines['top'].set_visible(False) 74 | ax[1, t].spines['right'].set_visible(False) 75 | ax[1, t].spines['bottom'].set_visible(False) 76 | ax[1, t].spines['left'].set_visible(False) 77 | ax[1, t].set_ylabel('Truth', fontsize=20) 78 | 79 | ax[1, t].margins(y=0.1) 80 | 81 | for t in range(Y.shape[0]): 82 | pos = dict() 83 | for i in range(Y.shape[1]): 84 | pos[i] = X_rot[t, i] 85 | 86 | G = nx.from_numpy_array(Y[t]) 87 | 88 | nx.draw(G, pos=pos, 89 | node_color=colors[model.z_[t]], 90 | edge_color='k', edgecolors='k', 91 | node_size=50, 92 | width=0.75, ax=ax[2, t]) 93 | 94 | for k in np.unique(model.z_[t]): 95 | normal_contour(mu_rot[k], model.sigma_[k] * np.eye(2), ax=ax[2, t], n_std=[2], 96 | zorder=1, alpha=0.4, facecolor=colors[k], linestyle='--', 97 | linewidth=1, edgecolor='k') 98 | 99 | if t == 0: 100 | ax[2, t].axis('on') 101 | ax[2, t].spines['top'].set_visible(False) 102 | ax[2, t].spines['right'].set_visible(False) 103 | ax[2, t].spines['bottom'].set_visible(False) 104 | ax[2, t].spines['left'].set_visible(False) 105 | ax[2, t].set_ylabel('Estimated', fontsize=20) 106 | 107 | ax[2, t].margins(y=0.1) 108 | 109 | fig.savefig('merging_results.png', dpi=300, bbox_inches='tight') 110 | 111 | 112 | # infered blending coefficient and ARI 113 | print('lambda 95\%: ', np.quantile(model.lambdas_[model.n_burn_:], q=[0.025, 0.975])) 114 | 115 | rand_index = adjusted_rand_score(z.ravel(), model.z_.ravel()) 116 | print('ARI: ', rand_index) 117 | -------------------------------------------------------------------------------- /examples/detection_limit.py: -------------------------------------------------------------------------------- 1 | import glob 2 | import os 3 | import plac 4 | 5 | import pandas as pd 6 | import numpy as np 7 | import matplotlib.pyplot as plt 8 | import seaborn as sns 9 | 10 | from sklearn.utils import check_random_state 11 | from sklearn.metrics import adjusted_rand_score, roc_auc_score 12 | 13 | from dynetlsm import DynamicNetworkHDPLPCM, DynamicNetworkLPCM 14 | from dynetlsm.datasets import detection_limit_simulation 15 | from dynetlsm.model_selection.approx_bic import calculate_cluster_counts 16 | from dynetlsm.model_selection import minimize_posterior_expected_vi 17 | from dynetlsm.model_selection import train_test_split 18 | from dynetlsm.metrics import variation_of_information, out_of_sample_auc 19 | from dynetlsm.network_statistics import density, modularity 20 | 21 | 22 | # group seperation ratio 23 | ratio = 0.5 24 | # ratio = 0.1, 0.25, 0.5, 0.7, 0.8, 0.9 25 | 26 | # tranisition probability 27 | trans_proba = 0.1 28 | # trans_proba = 0.1, 0.2, 0.4 29 | 30 | out_dir = 'results_ratio_{}'.format(trans_proba, ratio) 31 | 32 | 33 | # create a directory to store the results 34 | if not os.path.exists(out_dir): 35 | os.mkdir(out_dir) 36 | 37 | 38 | def counts_per_time_step(z): 39 | n_time_steps = z.shape[0] 40 | group_counts = np.zeros(n_time_steps, dtype=np.int) 41 | for t in range(n_time_steps): 42 | group_counts[t] = np.unique(z[t]).shape[0] 43 | 44 | return group_counts 45 | 46 | def posterior_per_time_step(model): 47 | n_time_steps = model.Y_fit_.shape[0] 48 | probas = np.zeros((n_time_steps, model.n_components + 1)) 49 | for t in range(n_time_steps): 50 | freq = model.posterior_group_counts_[t] 51 | index = model.posterior_group_ids_[t] 52 | probas[t, index] = freq / freq.sum() 53 | 54 | return probas 55 | 56 | 57 | def benchmark_single(n_iter=10000, burn=5000, tune=1000, 58 | outfile_name='benchmark', 59 | ratio=0.5, trans_proba=0.2, 60 | random_state=None): 61 | random_state = check_random_state(random_state) 62 | 63 | # generate simulated networks 64 | Y, X, z, probas, r, _ = detection_limit_simulation( 65 | r=ratio, trans_proba=trans_proba, random_state=random_state) 66 | 67 | 68 | # fit HDP-LPCM 69 | model = DynamicNetworkHDPLPCM(n_iter=n_iter, 70 | burn=burn, 71 | tune=tune, 72 | tune_interval=1000, 73 | is_directed=False, 74 | selection_type='vi', 75 | n_components=5, 76 | random_state=random_state).fit(Y) 77 | 78 | # MAP: number of clusters per time point 79 | map_counts = counts_per_time_step(model.z_) 80 | 81 | # Posterior group count probabilities 82 | probas = posterior_per_time_step(model) 83 | results = pd.DataFrame(probas) 84 | 85 | # create dataframe of results 86 | results['map_counts'] = map_counts 87 | 88 | # Variation of Information 89 | results['vi'] = variation_of_information( 90 | z.ravel(), model.z_.ravel()) 91 | vi = 0. 92 | for t in range(Y.shape[0]): 93 | vi_t = variation_of_information(z[t], model.z_[t]) 94 | results['vi_{}'.format(t)] = vi_t 95 | vi += vi_t 96 | results['vi_avg'] = vi / Y.shape[0] 97 | 98 | 99 | # adjusted rand index 100 | results['rand_index'] = adjusted_rand_score( 101 | z.ravel(), model.z_.ravel()) 102 | adj_rand = 0. 103 | for t in range(Y.shape[0]): 104 | adj_t = adjusted_rand_score(z[t], model.z_[t]) 105 | results['rand_{}'.format(t)] = adj_t 106 | adj_rand += adj_t 107 | results['rand_avg'] = adj_rand / Y.shape[0] 108 | 109 | # info about simulated networks 110 | results['ratio'] = r 111 | 112 | results.to_csv(outfile_name, index=False) 113 | 114 | 115 | # run for 20 different networks 116 | for i in range(20): 117 | benchmark_single( 118 | n_iter=35000, burn=10000, tune=5000, random_state=i, 119 | ratio=ratio, trans_proba=trans_proba, 120 | outfile_name=os.path.join( 121 | out_dir, 'benchmark_{}.csv'.format(i))) 122 | -------------------------------------------------------------------------------- /dynetlsm/forecast.pyx: -------------------------------------------------------------------------------- 1 | # encoding: utf-8 2 | # cython: language_level=3 3 | # cython: cdivision=True 4 | # cython: boundscheck=False 5 | # cython: wraparound=False 6 | # cython: nonecheck=False 7 | # cython: initializedcheck=False 8 | from libc.math cimport log, exp, sqrt, M_PI 9 | 10 | import numpy as np 11 | cimport numpy as np 12 | 13 | 14 | 15 | ctypedef np.npy_float64 DOUBLE 16 | ctypedef np.npy_int64 INT 17 | 18 | cdef inline double expit(double z): 19 | return 1. / (1. + exp(-z)) 20 | 21 | 22 | 23 | cdef double normal_pdf(DOUBLE[:] x, 24 | DOUBLE[:] mean, 25 | double var) nogil: 26 | cdef int k 27 | cdef int n_features = x.shape[0] 28 | cdef double sum_sq = 0.0 29 | 30 | for k in range(n_features): 31 | sum_sq += (x[k] - mean[k]) ** 2 32 | sum_sq *= 0.5 * (1. / var) 33 | 34 | return exp(-0.5 * n_features * log(2 * M_PI * var) - sum_sq) 35 | 36 | 37 | cdef double mixture_normal_pdf(DOUBLE[:] x, 38 | DOUBLE[:] x_prev, 39 | DOUBLE[:] weights, 40 | double lmbda, 41 | DOUBLE[:, :] mean, 42 | DOUBLE[:] sigma): 43 | cdef int k 44 | cdef int n_groups = mean.shape[0] 45 | cdef int n_features = mean.shape[1] 46 | cdef double res = 0 47 | cdef DOUBLE[:] mu = np.zeros(n_features, dtype=np.float64) 48 | 49 | for k in range(n_groups): 50 | for p in range(n_features): 51 | mu[p] = lmbda * mean[k, p] + (1 - lmbda) * x_prev[p] 52 | res += weights[k] * normal_pdf(x, mu, sigma[k]) 53 | 54 | return res 55 | 56 | 57 | def renormalize_weights(z, weights, means, sigmas): 58 | n_components = sigmas.shape[0] 59 | 60 | active_groups, z = np.unique(z, return_inverse=True) 61 | trans_w = weights[active_groups][:, active_groups] 62 | trans_w /= np.sum(trans_w, axis=1).reshape(-1, 1) 63 | 64 | mu = means[active_groups] 65 | sigma = sigmas[active_groups] 66 | 67 | return z, trans_w, mu, sigma 68 | 69 | 70 | cdef inline double euclidean_distance(DOUBLE[:] x, DOUBLE[:] y) nogil: 71 | cdef int n_features = x.shape[0] 72 | cdef double d = 0. 73 | for k in range(n_features): 74 | d += (x[k] - y[k]) ** 2 75 | 76 | return sqrt(d) 77 | 78 | 79 | def marginal_forecast(DOUBLE[:, :] x, 80 | DOUBLE[:, :, :] x_prev, 81 | np.ndarray[np.int64_t, ndim=2, mode='c'] z, 82 | np.ndarray[double, ndim=3, mode='c'] trans_weights, 83 | np.ndarray[double, ndim=3, mode='c'] mus, 84 | np.ndarray[double, ndim=2, mode='c'] sigmas, 85 | DOUBLE[:] intercepts, 86 | DOUBLE[:] lmbdas, 87 | bint renormalize=True): 88 | cdef int i, j, s = 0 89 | cdef int n_iter = x_prev.shape[0] 90 | cdef int n_nodes = x_prev.shape[1] 91 | 92 | cdef double dij, wij 93 | 94 | cdef np.ndarray[double, ndim=2, mode='c'] sum_w = np.zeros( 95 | (n_nodes, n_nodes)) 96 | cdef np.ndarray[double, ndim=2, mode='c'] probas = np.zeros( 97 | (n_nodes, n_nodes)) 98 | cdef np.ndarray[np.int64_t, ndim=1, mode='c'] zs 99 | cdef DOUBLE[:, :] weights, mean 100 | cdef DOUBLE[:] sigma 101 | 102 | for s in range(n_iter): 103 | if renormalize: 104 | zs, weights, mean, sigma = renormalize_weights( 105 | z[s], trans_weights[s], mus[s], sigmas[s]) 106 | else: 107 | weights = trans_weights[s] 108 | mean = mus[s] 109 | sigma = sigmas[s] 110 | zs = z[s] 111 | 112 | for i in range(n_nodes): 113 | for j in range(i): 114 | dij = euclidean_distance(x[i], x[j]) 115 | 116 | wij = mixture_normal_pdf( 117 | x[i], x_prev[s, i], weights[zs[i]], lmbdas[s], mean, sigma) 118 | wij *= mixture_normal_pdf( 119 | x[j], x_prev[s, j], weights[zs[j]], lmbdas[s], mean, sigma) 120 | probas[i, j] += wij * expit(intercepts[s] - dij) / n_iter 121 | sum_w[i, j] += wij / n_iter 122 | 123 | sum_w += sum_w.T 124 | sum_w[np.diag_indices(n_nodes)] = 1 125 | probas += probas.T 126 | probas /= sum_w 127 | 128 | return np.asarray(probas) 129 | -------------------------------------------------------------------------------- /dynetlsm/metropolis.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import scipy.stats as stats 3 | 4 | 5 | def tune_step_size_random_walk(step_size, acc_rate): 6 | """Keep acceptance rate within 25% - 40% acceptance.""" 7 | if acc_rate < 0.001: 8 | step_size *= 0.1 9 | elif acc_rate < 0.05: 10 | step_size *= 0.5 11 | elif acc_rate < 0.25: 12 | step_size *= 0.9 13 | elif acc_rate > 0.95: 14 | step_size *= 10.0 15 | elif acc_rate > 0.75: 16 | step_size *= 2.0 17 | elif acc_rate > 0.4: 18 | step_size *= 1.1 19 | 20 | return step_size 21 | 22 | 23 | def tune_step_size_dirichlet(step_size, acc_rate): 24 | if acc_rate < 0.001: 25 | step_size *= 10.0 26 | elif acc_rate < 0.05: 27 | step_size *= 2 28 | elif acc_rate < 0.25: 29 | step_size *= 1.1 30 | elif acc_rate > 0.95: 31 | step_size *= 0.1 32 | elif acc_rate > 0.75: 33 | step_size *= 0.5 34 | elif acc_rate > 0.4: 35 | step_size *= 0.9 36 | 37 | return step_size 38 | 39 | 40 | def random_walk_metropolis(x0, logp, step_size, random_state): 41 | n_features = x0.shape[0] 42 | 43 | # random walk proposal 44 | x = x0 + step_size * random_state.randn(n_features) 45 | 46 | # accept-reject 47 | accept_ratio = logp(x) - logp(x0) 48 | accepted = 1 49 | u = random_state.rand() 50 | if np.log(u) >= accept_ratio: 51 | x = x0 52 | accepted = 0 53 | 54 | return x, accepted, accept_ratio 55 | 56 | 57 | def dirichlet_metropolis(x0, logp, step_size, random_state, reg=1e-5): 58 | n_nodes = x0.shape[0] 59 | 60 | # scaled dirichlet proposal 61 | x = random_state.dirichlet(step_size * x0) 62 | 63 | # occasionally draws are zero due to precision issues 64 | # add some regularization and re-normalize 65 | if np.any(x == 0.): 66 | x += reg 67 | x /= np.sum(x) 68 | 69 | # accept-reject 70 | accept_ratio = logp(x) - logp(x0) 71 | 72 | # dirichlet proposal 73 | accept_ratio += (stats.dirichlet.logpdf(x0, step_size * x) - 74 | stats.dirichlet.logpdf(x, step_size * x0)) 75 | 76 | accepted = 1 77 | u = random_state.rand() 78 | if np.log(u) >= accept_ratio: 79 | x = x0 80 | accepted = 0 81 | 82 | return x, accepted, accept_ratio 83 | 84 | 85 | class Metropolis(object): 86 | def __init__(self, step_size=0.1, tune=500, tune_interval=100, 87 | proposal_type='random_walk'): 88 | self.step_size = step_size 89 | self.tune = tune 90 | self.tune_interval = tune_interval 91 | self.proposal_type = proposal_type 92 | self.steps_until_tune = tune_interval 93 | self.n_accepted = 0 94 | self.n_steps = 0 95 | 96 | def step(self, x, logp, random_state): 97 | if self.proposal_type == 'dirichlet': 98 | x_new, accepted, _ = dirichlet_metropolis(x, 99 | logp, 100 | self.step_size, 101 | random_state) 102 | elif self.proposal_type == 'random_walk': 103 | x_new, accepted, _ = random_walk_metropolis(x, 104 | logp, 105 | self.step_size, 106 | random_state) 107 | else: 108 | raise ValueError("`proposal_type` must be in " 109 | "{'random_walk', 'dirichlet'}, but got " 110 | "{}".format(self.proposal_type)) 111 | 112 | # track acceptance statistics for adaptation 113 | self.n_accepted += accepted 114 | self.n_steps += 1 115 | 116 | # tune step-sizes if necessary 117 | if self.tune is not None: 118 | self.tune_step_size() 119 | 120 | return x_new 121 | 122 | def tune_step_size(self): 123 | if (self.n_steps < self.tune and self.steps_until_tune == 0): 124 | # tune step size 125 | accept_rate = self.n_accepted / self.tune_interval 126 | 127 | if self.proposal_type == 'dirichlet': 128 | self.step_size = tune_step_size_dirichlet(self.step_size, 129 | accept_rate) 130 | else: 131 | self.step_size = tune_step_size_random_walk(self.step_size, 132 | accept_rate) 133 | self.n_accepted = 0 134 | self.steps_until_tune = self.tune_interval 135 | else: 136 | self.steps_until_tune -= 1 137 | -------------------------------------------------------------------------------- /dynetlsm/sample_coefficients.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | 3 | from sklearn.utils import check_random_state 4 | 5 | from .network_likelihoods import ( 6 | dynamic_network_loglikelihood_directed, 7 | dynamic_network_loglikelihood_undirected, 8 | approx_directed_network_loglikelihood, 9 | ) 10 | 11 | 12 | def sample_intercepts(Y, X, intercepts, intercept_prior, 13 | intercept_variance_prior, samplers, radii=None, 14 | dist=None, is_directed=False, case_control_sampler=None, 15 | squared=False, random_state=None): 16 | rng = check_random_state(random_state) 17 | 18 | if is_directed: 19 | # sample intercept_in 20 | def logp(x): 21 | if case_control_sampler is not None: 22 | # TODO: we do not cache distances here, decrease by 23 | # factor of 2 if we do this 24 | loglik = approx_directed_network_loglikelihood( 25 | X=X, 26 | radii=radii, 27 | in_edges=case_control_sampler.in_edges_, 28 | out_edges=case_control_sampler.out_edges_, 29 | degree=case_control_sampler.degrees_, 30 | control_nodes=case_control_sampler.control_nodes_out_, 31 | intercept_in=x[0], 32 | intercept_out=intercepts[1], 33 | squared=squared) 34 | else: 35 | loglik = dynamic_network_loglikelihood_directed( 36 | Y, X, 37 | intercept_in=x[0], intercept_out=intercepts[1], 38 | radii=radii, 39 | squared=squared, 40 | dist=dist) 41 | loglik -= ((x[0] - intercept_prior[0]) ** 2 / 42 | (2 * intercept_variance_prior)) 43 | return loglik 44 | 45 | intercepts[0] = samplers[0].step( 46 | np.array([intercepts[0]]), logp, rng)[0] 47 | 48 | # sample intercept_out 49 | def logp(x): 50 | if case_control_sampler is not None: 51 | # TODO: we do not cache distances here, decrease by 52 | # factor of 2 if we do this 53 | loglik = approx_directed_network_loglikelihood( 54 | X=X, 55 | radii=radii, 56 | in_edges=case_control_sampler.in_edges_, 57 | out_edges=case_control_sampler.out_edges_, 58 | degree=case_control_sampler.degrees_, 59 | control_nodes=case_control_sampler.control_nodes_out_, 60 | intercept_in=intercepts[0], 61 | intercept_out=x[0], 62 | squared=squared) 63 | else: 64 | loglik = dynamic_network_loglikelihood_directed( 65 | Y, X, 66 | intercept_in=intercepts[0], intercept_out=x[0], 67 | radii=radii, 68 | squared=squared, 69 | dist=dist) 70 | loglik -= ((x[0] - intercept_prior[1]) ** 2 / 71 | (2 * intercept_variance_prior)) 72 | return loglik 73 | 74 | intercepts[1] = samplers[1].step( 75 | np.array([intercepts[1]]), logp, rng)[0] 76 | else: 77 | def logp(x): 78 | loglik = dynamic_network_loglikelihood_undirected(Y, X, 79 | intercept=x, 80 | squared=squared, 81 | dist=dist) 82 | loglik -= ((x - intercept_prior) ** 2 / 83 | (2 * intercept_variance_prior)) 84 | return loglik 85 | 86 | intercepts = samplers[0].step(intercepts, logp, rng) 87 | 88 | return intercepts 89 | 90 | 91 | def sample_radii(Y, X, intercepts, radii, sampler, dist=None, 92 | case_control_sampler=None, squared=False, random_state=None): 93 | rng = check_random_state(random_state) 94 | 95 | def logp(x): 96 | # NOTE: dirichlet prior (this is constant for alpha = 1.0 97 | if case_control_sampler: 98 | # TODO: we do not cache distances here, decrease by 99 | # factor of 2 if we do this 100 | loglik = approx_directed_network_loglikelihood( 101 | X=X, 102 | radii=x, 103 | in_edges=case_control_sampler.in_edges_, 104 | out_edges=case_control_sampler.out_edges_, 105 | degree=case_control_sampler.degrees_, 106 | control_nodes=case_control_sampler.control_nodes_out_, 107 | intercept_in=intercepts[0], 108 | intercept_out=intercepts[1], 109 | squared=squared) 110 | else: 111 | loglik = dynamic_network_loglikelihood_directed( 112 | Y, X, 113 | intercept_in=intercepts[0], 114 | intercept_out=intercepts[1], 115 | radii=x, 116 | squared=squared, 117 | dist=dist) 118 | 119 | return loglik 120 | 121 | return sampler.step(radii, logp, rng) 122 | -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | from __future__ import print_function 2 | 3 | import os 4 | import sys 5 | import contextlib 6 | import subprocess 7 | import glob 8 | 9 | from setuptools import setup, find_packages 10 | from setuptools import Extension 11 | 12 | 13 | HERE = os.path.dirname(os.path.abspath(__file__)) 14 | 15 | # import ``__version__` from code base 16 | exec(open(os.path.join(HERE, 'dynetlsm', 'version.py')).read()) 17 | 18 | 19 | with open('requirements.txt') as f: 20 | INSTALL_REQUIRES = [l.strip() for l in f.readlines() if l] 21 | 22 | 23 | with open('test_requirements.txt') as f: 24 | TEST_REQUIRES = [l.strip() for l in f.readlines() if l] 25 | 26 | 27 | try: 28 | import numpy 29 | except ImportError: 30 | print('numpy is required during installation') 31 | sys.exit(1) 32 | 33 | 34 | try: 35 | import scipy 36 | except ImportError: 37 | print('scipy is required during installation') 38 | sys.exit(1) 39 | 40 | 41 | @contextlib.contextmanager 42 | def chdir(new_dir): 43 | old_dir = os.getcwd() 44 | try: 45 | sys.path.insert(0, new_dir) 46 | yield 47 | finally: 48 | del sys.path[0] 49 | os.chdir(old_dir) 50 | 51 | 52 | def find_cython(dir, files=None): 53 | if files is None: 54 | files = [] 55 | 56 | for file in os.listdir(dir): 57 | path = os.path.join(dir, file) 58 | if os.path.isfile(path) and path.endswith(".pyx"): 59 | files.append(path.replace(os.path.sep, ".")[:-4]) 60 | elif os.path.isdir(path): 61 | find_cython(path, files) 62 | 63 | return files 64 | 65 | 66 | def clean(path): 67 | for name in find_cython(path): 68 | name = name.replace('.', os.path.sep) 69 | for ext in ['*.c', '*.so', '*.o', '*.html']: 70 | file_path = glob.glob(os.path.join(path, name + ext)) 71 | if file_path and os.path.exists(file_path[0]): 72 | os.unlink(file_path[0]) 73 | 74 | 75 | def get_include(): 76 | source_path = os.path.join(HERE, 'src') 77 | return source_path if os.path.exists(source_path) else '' 78 | 79 | 80 | def get_sources(): 81 | files = [] 82 | source_path = get_include() 83 | if source_path: 84 | for name in os.listdir(src_path): 85 | path = os.path.join(src_path, name) 86 | if os.path.isfile(path) and path.endswith(".c"): 87 | files.append(os.path.relpath(path)) 88 | 89 | return files 90 | 91 | 92 | def generate_cython(cython_cov=False): 93 | print("Cythonizing sources") 94 | for source in find_cython(HERE): 95 | source = source.replace('.', os.path.sep) + '.pyx' 96 | cythonize_source(source, cython_cov) 97 | 98 | 99 | def cythonize_source(source, cython_cov=False): 100 | print("Processing %s" % source) 101 | 102 | flags = ['--fast-fail'] 103 | if cython_cov: 104 | flags.extend(['--directive', 'linetrace=True']) 105 | 106 | try: 107 | p = subprocess.call(['cython'] + flags + [source]) 108 | if p != 0: 109 | raise Exception('Cython failed') 110 | except OSError: 111 | raise OSError('Cython needs to be installed') 112 | 113 | 114 | def make_extension(ext_name, macros=[]): 115 | ext_path = ext_name.replace('.', os.path.sep) + '.c' 116 | mod_name = '.'.join(ext_name.split('.')[-2:]) 117 | include_dirs = [numpy.get_include(), "."] 118 | if get_include(): 119 | include_dirs = [get_include] + include_dirs 120 | return Extension( 121 | mod_name, 122 | sources=[os.path.relpath(ext_path)] + get_sources(), 123 | include_dirs=include_dirs, 124 | extra_compile_args=["-O3", "-Wall", "-fPIC"], 125 | define_macros=macros) 126 | 127 | 128 | def generate_extensions(macros=[]): 129 | ext_modules = [] 130 | for mod_name in find_cython(HERE): 131 | ext_modules.append(make_extension(mod_name, macros=macros)) 132 | 133 | return ext_modules 134 | 135 | DISTNAME = 'dynetlsm' 136 | DESCRIPTION = 'dynetlsm' 137 | with open('README.md') as f: 138 | LONG_DESCRIPTION = f.read() 139 | MAINTAINER = 'Joshua D. Loyal' 140 | MAINTAINER_EMAIL = 'jloyal25@gmail.com' 141 | URL = 'https://joshloyal.github.io/dynetlsm' 142 | DOWNLOAD_URL = 'https://pypi.org/project/dynetlsm/#files' 143 | LICENSE = 'MIT' 144 | VERSION = __version__ 145 | CLASSIFIERS = [] 146 | 147 | 148 | 149 | def setup_package(): 150 | if len(sys.argv) > 1 and sys.argv[1] == 'clean': 151 | return clean(HERE) 152 | 153 | cython_cov = 'CYTHON_COV' in os.environ 154 | 155 | macros = [] 156 | if cython_cov: 157 | print("Adding coverage information to cythonized files.") 158 | macros = [('CYTHON_TRACE_NOGIL', 1)] 159 | 160 | with chdir(HERE): 161 | generate_cython(cython_cov) 162 | ext_modules = generate_extensions(macros=macros) 163 | setup( 164 | name=DISTNAME, 165 | maintainer=MAINTAINER, 166 | maintainer_email=MAINTAINER_EMAIL, 167 | description=DESCRIPTION, 168 | license=LICENSE, 169 | url=URL, 170 | version=VERSION, 171 | download_url=DOWNLOAD_URL, 172 | long_description=LONG_DESCRIPTION, 173 | long_description_content_type='text/markdown', 174 | zip_safe=False, 175 | classifiers=CLASSIFIERS, 176 | package_data={ 177 | '': [ 178 | 'dynetlsm' + os.path.sep + '*.pyx', 179 | 'dynetlsm' + os.path.sep + '.pxd' 180 | ] 181 | }, 182 | include_package_data=True, 183 | packages=find_packages(), 184 | install_requires=INSTALL_REQUIRES, 185 | extras_require={'test': TEST_REQUIRES}, 186 | setup_requires=['pytest-runner'], 187 | tests_require=TEST_REQUIRES, 188 | ext_modules=ext_modules 189 | ) 190 | if __name__ == '__main__': 191 | setup_package() 192 | -------------------------------------------------------------------------------- /dynetlsm/latent_space.py: -------------------------------------------------------------------------------- 1 | import networkx as nx 2 | import numpy as np 3 | import scipy.linalg as linalg 4 | 5 | from scipy.sparse import csgraph 6 | from scipy.optimize import minimize 7 | 8 | from sklearn.cluster import KMeans 9 | from sklearn.manifold import MDS 10 | from sklearn.metrics import pairwise_distances, euclidean_distances 11 | 12 | from .procrustes import static_procrustes_rotation 13 | 14 | 15 | __all__ = ['calculate_distances', 'generalized_mds', 'longitudinal_kmeans', 16 | 'initialize_radii'] 17 | 18 | 19 | def calculate_distances(X, metric='euclidean', squared=False): 20 | """Calulates the pairwise distances between latent positions X.""" 21 | if X.ndim == 2: 22 | return pairwise_distances(X, metric=metric) 23 | 24 | n_time_steps, n_nodes, _ = X.shape 25 | 26 | dist = np.empty((n_time_steps, n_nodes, n_nodes)) 27 | for t in range(n_time_steps): 28 | if metric == 'euclidean': 29 | dist[t] = euclidean_distances(X[t], squared=squared) 30 | else: 31 | dist[t] = pairwise_distances(X[t], metric=metric) 32 | 33 | return dist 34 | 35 | 36 | def shortest_path_dissimilarity(Y, unweighted=True): 37 | """Calculate the shortest-path dissimilarty of a static graph.""" 38 | dist = csgraph.shortest_path(Y, directed=False, unweighted=unweighted) 39 | 40 | # impute unconnected components with the largest distance plus 1 41 | inf_mask = np.isinf(dist) 42 | dist[inf_mask] = np.max(dist[~inf_mask]) + 1 43 | 44 | return dist 45 | 46 | 47 | def generalized_mds(Y, n_features=2, is_directed=False, unweighted=True, 48 | lmbda=10, random_state=None): 49 | """Generalized Multi-Dimension Scaling (Sarkar and Moore, 2005).""" 50 | is_dynamic_graph = Y.ndim == 3 51 | if not is_dynamic_graph: 52 | Y = np.expand_dims(Y, axis=0) 53 | 54 | n_time_steps, n_nodes, _ = Y.shape 55 | 56 | # calculate shortest-path dissimilarity for each time step 57 | D = np.empty((n_time_steps, n_nodes, n_nodes)) 58 | for t in range(Y.shape[0]): 59 | D[t] = shortest_path_dissimilarity(Y[t], unweighted=unweighted) 60 | 61 | # compute latent positions based on MDS 62 | X = np.empty((n_time_steps, n_nodes, n_features)) 63 | 64 | # classical multi-dimensional scaling for t = 1 65 | X[0] = MDS(dissimilarity='precomputed', 66 | n_components=n_features, 67 | random_state=random_state).fit_transform(D[0]) 68 | 69 | # minimize the objective function found in Sarkar and Moore 70 | H = np.eye(n_nodes) - (1. / n_nodes) * np.ones((n_nodes, n_nodes)) 71 | for t in range(1, n_time_steps): 72 | alpha = 1 / (1 + lmbda) 73 | beta = lmbda / (1 + lmbda) 74 | XXt = alpha * np.dot(H, np.dot(-0.5 * D[t] ** 2, H)) 75 | XXt = XXt + beta * (np.dot(X[t-1], X[t-1].T)) 76 | 77 | # the optimum is the eigen-decomposition of XXt 78 | evals, evecs = linalg.eigh(XXt) 79 | 80 | # flip so in descending order 81 | evecs = evecs[:, ::-1] 82 | evals = evals[::-1] 83 | 84 | # extract features (top n_features eigenvectors scaled by eigenvalue) 85 | X[t] = evecs[:, :n_features] * np.sqrt(evals[:n_features]) 86 | 87 | # procrustes transformation to fix rotation invariance 88 | X[t], _ = static_procrustes_rotation(X[t-1], X[t]) 89 | 90 | # the directed model scales the space so that it is roughly [-1, 1], 91 | # i.e. same scale as the radii 92 | if is_directed: 93 | X /= n_nodes 94 | 95 | return X if is_dynamic_graph else np.squeeze(X) 96 | 97 | 98 | def longitudinal_kmeans(X, n_clusters=5, var_reg=1e-3, 99 | fixed_clusters=True, random_state=None): 100 | """Longitudinal K-Means Algorithm (Genolini and Falissard, 2010)""" 101 | n_time_steps, n_nodes, n_features = X.shape 102 | 103 | # vectorize latent positions across time 104 | X_vec = np.moveaxis(X, 0, -1).reshape(n_nodes, n_time_steps * n_features) 105 | 106 | # perform normal k-means on the vectorized features 107 | kmeans = KMeans(n_clusters=n_clusters, 108 | random_state=random_state).fit(X_vec) 109 | 110 | # this method assigns a single cluster to each point across time. 111 | labels = kmeans.labels_.reshape(-1, 1) 112 | labels = np.hstack([labels] * n_time_steps).T 113 | 114 | # un-vectorize centers, shape (n_time_steps, n_centers, n_features) 115 | centers_vec = kmeans.cluster_centers_ 116 | if fixed_clusters: 117 | centers = np.empty((n_clusters, n_features)) 118 | for k in range(n_clusters): 119 | muk = centers_vec[k].reshape(-1, n_time_steps).T 120 | centers[k] = muk.mean(axis=0) # average position overtime 121 | else: 122 | centers = np.empty((n_time_steps, n_clusters, n_features)) 123 | for k in range(n_clusters): 124 | centers[:, k] = centers_vec[k].reshape(-1, n_time_steps).T 125 | 126 | # calculate cluster variances (assumed spherical and constant over-time) 127 | variances = np.zeros(n_clusters, dtype=np.float64) 128 | for k in range(n_clusters): 129 | for t in range(n_time_steps): 130 | variances[k] += np.var(X[t][labels[t] == k], axis=0).mean() 131 | variances[k] /= n_time_steps 132 | 133 | # clusters with a single data point will have zero-variance. 134 | # assign a fudge factor in this case 135 | variances[variances == 0.] = var_reg 136 | 137 | return centers, variances, labels 138 | 139 | 140 | def initialize_radii(Y, reg=1e-5): 141 | """Initialize radii to normalized average of out-degree and in-degree 142 | over time. 143 | """ 144 | radii = 0.5 * (Y.sum(axis=(0, 1)) + Y.sum(axis=(0, 2))) 145 | radii /= Y.sum() 146 | 147 | # radii can be zero if no edges are present. Add a small amount 148 | # of social reach to each radii in this case. 149 | if np.any(radii == 0.): 150 | radii += reg 151 | radii /= np.sum(radii) 152 | 153 | return radii 154 | -------------------------------------------------------------------------------- /dynetlsm/model_selection/approx_bic.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | 3 | from ..network_likelihoods import compute_gaussian_likelihood 4 | from ..network_likelihoods import dynamic_network_loglikelihood_undirected 5 | from ..network_likelihoods import dynamic_network_loglikelihood_directed 6 | from ..array_utils import nondiag_indices_from_3d 7 | 8 | 9 | __all__ = ['select_bic'] 10 | 11 | 12 | class DynamicNetworkMixtureModel: 13 | def __init__(self, beta, init_weights, trans_weights, X, mu, sigma, lmbda, 14 | z, intercept, radii=None): 15 | self.beta = beta 16 | self.init_weights = init_weights 17 | self.trans_weights = trans_weights 18 | self.X = X 19 | self.mu = mu 20 | self.sigma = sigma 21 | self.lmbda = lmbda 22 | self.z = z 23 | self.intercept = intercept 24 | self.radii = radii 25 | 26 | 27 | def calculate_cluster_counts_t(model): 28 | n_burn = model.n_burn_ 29 | 30 | z = model.zs_ 31 | n_iter, n_time_steps, _ = z.shape 32 | n_burn = n_burn if n_burn is not None else 0 33 | 34 | counts = np.zeros((n_time_steps, int(n_iter - n_burn)), dtype=np.int) 35 | for t in range(n_time_steps): 36 | for i in range(n_iter - n_burn): 37 | n_clusters = np.unique(z[i + n_burn, t]).shape[0] 38 | counts[t, i] = n_clusters 39 | return counts 40 | 41 | 42 | def calculate_cluster_counts(model): 43 | n_burn = model.n_burn_ 44 | 45 | z = model.zs_ 46 | n_iter = z.shape[0] 47 | n_burn = n_burn if n_burn is not None else 0 48 | 49 | counts = np.zeros(int(n_iter - n_burn), dtype=np.int) 50 | for i in range(n_iter - n_burn): 51 | n_clusters = np.unique(z[i + n_burn].ravel()).shape[0] 52 | counts[i] = n_clusters 53 | return counts 54 | 55 | 56 | def latent_marginal_loglikelihood(X, init_w, trans_w, mu, sigma, lmbda): 57 | n_time_steps, n_nodes, _ = X.shape 58 | n_components = sigma.shape[0] 59 | 60 | loglik = 0.0 61 | for i in range(n_nodes): 62 | gauss_loglik = compute_gaussian_likelihood(X[:, i], mu, sigma, lmbda, 63 | normalize=False) 64 | fwds_msg = init_w * gauss_loglik[0] 65 | c = np.sum(fwds_msg) 66 | loglik += np.log(c) 67 | fwds_msg /= c 68 | 69 | for t in range(1, n_time_steps): 70 | fwds_msg = (gauss_loglik[t] * 71 | np.dot(trans_w[t].T, fwds_msg.reshape(-1, 1)).ravel()) 72 | c = np.sum(fwds_msg) 73 | loglik += np.log(c) 74 | fwds_msg /= c 75 | 76 | return loglik 77 | 78 | 79 | def select_bic(model): 80 | n_time_steps, n_nodes, _ = model.Y_fit_.shape 81 | n_burn = model.n_burn_ 82 | 83 | # determine model sizes available in the posterior samples 84 | counts = calculate_cluster_counts(model) 85 | 86 | bic = [] 87 | models = [] 88 | for k in np.unique(counts): 89 | # determine MAP for model size k 90 | mask = counts != k 91 | map_id = np.ma.array(model.logps_[n_burn:], mask=mask).argmax() + n_burn 92 | 93 | # extract MAP estimators 94 | intercept = model.intercepts_[map_id] 95 | X = model.Xs_[map_id] 96 | mu = model.mus_[map_id] 97 | sigma = model.sigmas_[map_id] 98 | beta = model.betas_[map_id] 99 | weights = model.weights_[map_id] 100 | lmbda = model.lambdas_[map_id] 101 | radii = model.radiis_[map_id] if model.is_directed else None 102 | 103 | # re-normalize weights 104 | active_clusters = np.unique(model.zs_[map_id].ravel()) 105 | active_mask = np.in1d(np.arange(model.n_components), active_clusters) 106 | 107 | beta = beta[active_clusters] 108 | beta /= beta.sum() 109 | 110 | init_w = weights[0, 0, active_clusters] 111 | init_w /= init_w.sum() 112 | 113 | trans_w = np.zeros((n_time_steps, k, k), dtype=np.float64) 114 | for t in range(1, n_time_steps): 115 | trans_w[t] = weights[t, active_clusters][:, active_clusters] 116 | trans_w[t] /= np.sum(trans_w[t], axis=1).reshape(-1, 1) 117 | 118 | # filter cluster components 119 | mu = mu[active_clusters] 120 | sigma = sigma[active_clusters] 121 | 122 | # BIC component for P(Y | X) 123 | if model.is_directed: 124 | loglik_k = dynamic_network_loglikelihood_directed( 125 | model.Y_fit_, X, 126 | intercept_in=intercept[0], 127 | intercept_out=intercept[1], 128 | radii=radii) 129 | bic_k = -2 * loglik_k 130 | 131 | n_params = 2 + n_nodes 132 | nondiag_indices = nondiag_indices_from_3d(model.Y_fit_) 133 | bic_k += n_params * np.log(np.sum(model.Y_fit_[nondiag_indices])) 134 | else: 135 | loglik_k = dynamic_network_loglikelihood_undirected( 136 | model.Y_fit_, X, intercept) 137 | bic_k = -2 * loglik_k 138 | bic_k += np.log(0.5 * ( 139 | np.sum(model.Y_fit_) - np.einsum('ikk', model.Y_fit_).sum())) 140 | 141 | # BIC component for P(X | G) = P(X | mu, sigma, w) 142 | bic_k -= 2 * latent_marginal_loglikelihood( 143 | X, init_w, trans_w, mu, sigma, lmbda) 144 | 145 | n_params = ((model.n_features + 1) * k + # cluster params 146 | (k - 1) + # beta 147 | (k - 1) + # init_weights 148 | (n_time_steps - 1) * k * (k - 1)) # trans_weights 149 | bic_k += n_params * np.log(n_nodes * n_time_steps) 150 | 151 | model_k = DynamicNetworkMixtureModel(init_weights=init_w, 152 | trans_weights=trans_w, 153 | beta=beta, 154 | X=X, mu=mu, sigma=sigma, 155 | lmbda=lmbda, 156 | z=model.zs_[map_id], 157 | intercept=intercept, 158 | radii=radii) 159 | bic.append([k, bic_k, loglik_k, map_id]) 160 | models.append(model_k) 161 | 162 | return np.array(bic), models, counts 163 | -------------------------------------------------------------------------------- /dynetlsm/sample_labels.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | 3 | from sklearn.utils import check_random_state 4 | from .gaussian_likelihood_fast import compute_gaussian_likelihood 5 | from .gaussian_likelihood_fast import spherical_normal_log_pdf 6 | 7 | 8 | def log_normalize(probas): 9 | max_proba = np.max(probas) 10 | probas -= max_proba 11 | np.exp(probas, probas) 12 | probas /= np.sum(probas) 13 | return probas 14 | 15 | 16 | def sample_categorical(probas, rng): 17 | cdf = np.cumsum(probas) 18 | u = rng.uniform(0, cdf[-1]) 19 | return np.sum(u > cdf) 20 | 21 | 22 | def sample_labels_gibbs(X, mu, sigma, lmbda, w0, w, random_state=None): 23 | random_state = check_random_state(random_state) 24 | 25 | n_time_steps, n_nodes, _ = X.shape 26 | n_components = sigma.shape[0] 27 | 28 | # initialize cluster statistics 29 | # NOTE: n[0, 0, k] stores transitions for initial distribution 30 | n = np.zeros((n_time_steps, n_components, n_components)) 31 | resp = np.zeros((n_time_steps, n_nodes, n_components), dtype=np.int) 32 | nk = np.zeros((n_time_steps, n_components), dtype=np.int) 33 | 34 | # initialize labels 35 | z = np.zeros((n_time_steps, n_nodes), dtype=np.int) 36 | 37 | # store sample probabilities 38 | probas = np.zeros(n_components, dtype=np.float64) 39 | 40 | # sample labels for each node 41 | for t in range(n_time_steps): 42 | for i in range(n_nodes): 43 | # FIXME: using 1e-5 hack to avoid log of zero 44 | if t == 0: 45 | for k in range(n_components): 46 | probas[k] = (np.log(w0[k] + 1e-5) + 47 | spherical_normal_log_pdf(X[t, i], 48 | mu[k], 49 | sigma[k])) 50 | else: 51 | for k in range(n_components): 52 | probas[k] = (np.log(w[z[t-1, i], k] + 1e-5) + 53 | spherical_normal_log_pdf( 54 | X[t, i], 55 | lmbda * mu[k] - (1 - lmbda) * X[t-1, i], 56 | sigma[k])) 57 | 58 | # sample zt 59 | probas = log_normalize(probas) 60 | z[t, i] = sample_categorical(probas, random_state) 61 | 62 | # update statistics 63 | if t == 0: 64 | n[0, 0, z[t, i]] += 1 65 | else: 66 | n[t, z[t-1, i], z[t, i]] += 1 67 | resp[t, i, z[t, i]] = 1 68 | nk[t, z[t, i]] += 1 69 | 70 | return z, n, nk, resp 71 | 72 | 73 | def sample_labels_block_lpcm( 74 | X, mu, sigma, lmbda, init_weights, trans_weights, random_state=None): 75 | random_state = check_random_state(random_state) 76 | 77 | n_time_steps, n_nodes, _ = X.shape 78 | n_components = sigma.shape[0] 79 | 80 | # initialize message passing variables 81 | bwds_msg = np.ones((n_time_steps, n_components), 82 | dtype=np.float64) 83 | partial_marg = np.zeros((n_time_steps, n_components), 84 | dtype=np.float64) 85 | 86 | # initialize cluster statistics 87 | # NOTE: n[0, 0, k] stores transitions for initial distribution 88 | n = np.zeros((n_time_steps, n_components, n_components)) 89 | resp = np.zeros((n_time_steps, n_nodes, n_components), dtype=np.int) 90 | nk = np.zeros((n_time_steps, n_components), dtype=np.int) 91 | 92 | # initialize labels 93 | z = np.zeros((n_time_steps, n_nodes), dtype=np.int) 94 | 95 | # sample labels for each node 96 | for i in range(n_nodes): 97 | # calculate likelihood of X_t^i under all groups 98 | # n_time_steps x n_components 99 | likelihood = compute_gaussian_likelihood(X[:, i], mu, sigma, 100 | lmbda, normalize=False) 101 | 102 | # calculate backwards messages and partial likelihoods 103 | # (phi_k * m_k) 104 | for t in range(n_time_steps - 1, 0, -1): 105 | partial_marg[t] = likelihood[t] * bwds_msg[t] 106 | bwds_msg[t-1] = np.dot( 107 | trans_weights, partial_marg[t].reshape(-1, 1)).ravel() 108 | 109 | # helps with underflow (could also divide by maximum) 110 | bwds_msg[t-1] /= np.sum(bwds_msg[t-1]) 111 | partial_marg[0] = likelihood[0] * bwds_msg[0] 112 | 113 | # sample labels forward in time 114 | for t in range(n_time_steps): 115 | if t == 0: 116 | probas = init_weights * partial_marg[0] 117 | else: 118 | probas = trans_weights[z[t-1, i]] * partial_marg[t] 119 | 120 | # sample zt 121 | z[t, i] = sample_categorical(probas, random_state) 122 | 123 | # update statistics 124 | if t == 0: 125 | n[0, 0, z[t, i]] += 1 126 | else: 127 | n[t, z[t-1, i], z[t, i]] += 1 128 | resp[t, i, z[t, i]] = 1 129 | nk[t, z[t, i]] += 1 130 | 131 | return z, n, nk, resp 132 | 133 | 134 | def sample_labels_block(X, mu, sigma, lmbda, w, random_state=None): 135 | random_state = check_random_state(random_state) 136 | 137 | n_time_steps, n_nodes, _ = X.shape 138 | n_components = sigma.shape[0] 139 | 140 | # initialize message passing variables 141 | bwds_msg = np.ones((n_time_steps, n_components), 142 | dtype=np.float64) 143 | partial_marg = np.zeros((n_time_steps, n_components), 144 | dtype=np.float64) 145 | 146 | # initialize cluster statistics 147 | # NOTE: n[0, 0, k] stores transitions for initial distribution 148 | n = np.zeros((n_time_steps, n_components, n_components)) 149 | resp = np.zeros((n_time_steps, n_nodes, n_components), dtype=np.int) 150 | nk = np.zeros((n_time_steps, n_components), dtype=np.int) 151 | 152 | # initialize labels 153 | z = np.zeros((n_time_steps, n_nodes), dtype=np.int) 154 | 155 | # sample labels for each node 156 | for i in range(n_nodes): 157 | # calculate likelihood of X_t^i under all groups 158 | # n_time_steps x n_components 159 | likelihood = compute_gaussian_likelihood(X[:, i], mu, sigma, 160 | lmbda, normalize=False) 161 | 162 | # calculate backwards messages and partial likelihoods 163 | # (phi_k * m_k) 164 | for t in range(n_time_steps - 1, 0, -1): 165 | partial_marg[t] = likelihood[t] * bwds_msg[t] 166 | bwds_msg[t-1] = np.dot(w[t], partial_marg[t].reshape(-1, 1)).ravel() 167 | 168 | # helps with underflow (could also divide by maximum) 169 | bwds_msg[t-1] /= np.sum(bwds_msg[t-1]) 170 | partial_marg[0] = likelihood[0] * bwds_msg[0] 171 | 172 | # sample labels forward in time 173 | for t in range(n_time_steps): 174 | if t == 0: 175 | probas = w[0, 0] * partial_marg[0] 176 | else: 177 | probas = w[t, z[t-1, i]] * partial_marg[t] 178 | 179 | # sample zt 180 | z[t, i] = sample_categorical(probas, random_state) 181 | 182 | # update statistics 183 | if t == 0: 184 | n[0, 0, z[t, i]] += 1 185 | else: 186 | n[t, z[t-1, i], z[t, i]] += 1 187 | resp[t, i, z[t, i]] = 1 188 | nk[t, z[t, i]] += 1 189 | 190 | return z, n, nk, resp 191 | -------------------------------------------------------------------------------- /dynetlsm/datasets/raw_data/sampson.npy: -------------------------------------------------------------------------------- 1 | 0.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 0.000000000000000000e+00 1.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 1.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 1.000000000000000000e+00 0.000000000000000000e+00 1.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 2 | 1.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 1.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 1.000000000000000000e+00 0.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 3 | 1.000000000000000000e+00 1.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 1.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 4 | 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 5 | 1.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 1.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 1.000000000000000000e+00 0.000000000000000000e+00 1.000000000000000000e+00 0.000000000000000000e+00 1.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 6 | 1.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 1.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 7 | 1.000000000000000000e+00 1.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 1.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 1.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 1.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 8 | 1.000000000000000000e+00 1.000000000000000000e+00 0.000000000000000000e+00 1.000000000000000000e+00 0.000000000000000000e+00 1.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 9 | 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 1.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 1.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 1.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 1.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 10 | 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 11 | 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 1.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 1.000000000000000000e+00 0.000000000000000000e+00 1.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 12 | 1.000000000000000000e+00 1.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 1.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 1.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 13 | 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 1.000000000000000000e+00 0.000000000000000000e+00 1.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 1.000000000000000000e+00 14 | 1.000000000000000000e+00 1.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 1.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 15 | 1.000000000000000000e+00 1.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 1.000000000000000000e+00 0.000000000000000000e+00 1.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 1.000000000000000000e+00 0.000000000000000000e+00 1.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 16 | 1.000000000000000000e+00 1.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 1.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 1.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 1.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 17 | 0.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 1.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 1.000000000000000000e+00 18 | 1.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 1.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 1.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 1.000000000000000000e+00 0.000000000000000000e+00 19 | -------------------------------------------------------------------------------- /dynetlsm/case_control_likelihood.py: -------------------------------------------------------------------------------- 1 | import abc 2 | import numbers 3 | import numpy as np 4 | 5 | from sklearn.utils import check_random_state 6 | 7 | 8 | class CaseControlSampler(abc.ABC): 9 | def __init__(self, 10 | n_control=100, 11 | n_resample=100, 12 | random_state=None): 13 | self.n_control = n_control 14 | self.n_resample = n_resample 15 | self.random_state = random_state 16 | 17 | self.n_iter = 0 18 | 19 | @abc.abstractmethod 20 | def init(self, Y): 21 | pass 22 | 23 | @abc.abstractmethod 24 | def sample(self): 25 | pass 26 | 27 | def resample(self): 28 | if self.n_resample is not None and self.n_iter % self.n_resample == 0.: 29 | self.control_nodes_in_, self.control_nodes_out_ = self.sample() 30 | 31 | self.n_iter += 1 32 | 33 | return self.control_nodes_in_, self.control_nodes_out_ 34 | 35 | 36 | class DirectedCaseControlSampler(CaseControlSampler): 37 | def init(self, Y): 38 | n_time_steps, n_nodes, _ = Y.shape 39 | 40 | if isinstance(self.n_control, (numbers.Integral, np.integer)): 41 | self.n_control_ = self.n_control 42 | else: 43 | self.n_control_ = int(self.n_control * n_nodes) 44 | 45 | # compute in-degree / out-degree of each node 46 | self.degrees_ = np.zeros((n_time_steps, n_nodes, 2), dtype=np.int) 47 | for t in range(n_time_steps): 48 | self.degrees_[t, :, 0] = Y[t].sum(axis=0) # in-degree 49 | self.degrees_[t, :, 1] = Y[t].sum(axis=1) # out-degree 50 | 51 | # store indices of edges, i.e. Y_ijt = 1 52 | max_in_degree = int(np.max(self.degrees_[:, :, 0])) 53 | max_out_degree = int(np.max(self.degrees_[:, :, 1])) 54 | self.in_edges_ = np.zeros((n_time_steps, n_nodes, max_in_degree), 55 | dtype=np.int) 56 | self.out_edges_ = np.zeros((n_time_steps, n_nodes, max_out_degree), 57 | dtype=np.int) 58 | for t in range(n_time_steps): 59 | for i in range(n_nodes): 60 | indices = np.where(Y[t, i, :] == 1)[0] 61 | n_edges = indices.shape[0] 62 | if n_edges: 63 | self.out_edges_[t, i, :n_edges] = indices 64 | 65 | indices = np.where(Y[t, :, i] == 1)[0] 66 | n_edges = indices.shape[0] 67 | if n_edges: 68 | self.in_edges_[t, i, :n_edges] = indices 69 | 70 | self.control_nodes_in_, self.control_nodes_out_ = self.sample() 71 | self.n_iter += 1 72 | 73 | return self 74 | 75 | def sample(self): 76 | rng = check_random_state(self.random_state) 77 | n_time_steps, n_nodes, _ = self.out_edges_.shape 78 | 79 | control_nodes_out = np.full((n_time_steps, n_nodes, self.n_control_), 80 | -1.0, dtype=np.int) 81 | control_nodes_in = np.full((n_time_steps, n_nodes, self.n_control_), 82 | -1.0, dtype=np.int) 83 | for t in range(n_time_steps): 84 | for i in range(n_nodes): 85 | out_degree = self.degrees_[t, i, 1] 86 | n_zeros = n_nodes - out_degree - 1 87 | if n_zeros < self.n_control_: 88 | n_sample = n_zeros 89 | else: 90 | n_sample = self.n_control_ 91 | 92 | edges = set.difference( 93 | set(range(n_nodes)), 94 | self.out_edges_[t, i, :out_degree].tolist() + [i]) 95 | control_nodes_out[t, i, :n_sample] = rng.choice(list(edges), 96 | size=n_sample, 97 | replace=False) 98 | 99 | in_degree = self.degrees_[t, i, 0] 100 | n_zeros = n_nodes - in_degree - 1 101 | if n_zeros < self.n_control_: 102 | n_sample = n_zeros 103 | else: 104 | n_sample = self.n_control_ 105 | 106 | edges = set.difference( 107 | set(range(n_nodes)), 108 | self.in_edges_[t, i, :in_degree].tolist() + [i]) 109 | control_nodes_in[t, i, :n_sample] = rng.choice(list(edges), 110 | size=n_sample, 111 | replace=False) 112 | return control_nodes_in, control_nodes_out 113 | 114 | 115 | class MissingDirectedCaseControlSampler(CaseControlSampler): 116 | def init(self, Y): 117 | n_time_steps, n_nodes, _ = Y.shape 118 | 119 | if isinstance(self.n_control, (numbers.Integral, np.integer)): 120 | self.n_control_ = self.n_control 121 | else: 122 | self.n_control_ = int(self.n_control * n_nodes) 123 | 124 | # compute in-degree / out-degree of each node 125 | self.degrees_ = np.zeros((n_time_steps, n_nodes, 2), dtype=np.int) 126 | for t in range(n_time_steps): 127 | self.degrees_[t, :, 0] = Y[t].sum(axis=0) # in-degree 128 | self.degrees_[t, :, 1] = Y[t].sum(axis=1) # out-degree 129 | 130 | # store indices of edges, i.e. Y_ijt = 1 131 | max_in_degree = int(np.max(self.degrees_[:, :, 0])) 132 | max_out_degree = int(np.max(self.degrees_[:, :, 1])) 133 | self.in_edges_ = np.zeros((n_time_steps, n_nodes, max_in_degree), 134 | dtype=np.int) 135 | self.out_edges_ = np.zeros((n_time_steps, n_nodes, max_out_degree), 136 | dtype=np.int) 137 | for t in range(n_time_steps): 138 | for i in range(n_nodes): 139 | indices = np.where(Y[t, i, :] == 1)[0] 140 | n_edges = indices.shape[0] 141 | if n_edges: 142 | self.out_edges_[t, i, :n_edges] = indices 143 | 144 | indices = np.where(Y[t, :, i] == 1)[0] 145 | n_edges = indices.shape[0] 146 | if n_edges: 147 | self.in_edges_[t, i, :n_edges] = indices 148 | 149 | # determine edges (Y_ijt = 1 or Y_jit = 1 for at least one time step) 150 | self.edge_list_ = [] 151 | for i in range(n_nodes): 152 | mask = np.logical_or(Y[:, i, :] == 1, Y[:, :, i] == 1) 153 | mask = mask.sum(axis=0) 154 | self.edge_list_.append(np.unique(np.where(mask > 0)[0])) 155 | 156 | self.control_nodes_ = self.sample() 157 | self.n_iter += 1 158 | 159 | return self 160 | 161 | def sample(self): 162 | rng = check_random_state(self.random_state) 163 | n_nodes = len(self.edge_list_) 164 | 165 | # TODO: n_control_samples can be a fraction of total number of nodes 166 | 167 | control_nodes = np.zeros((n_nodes, self.n_control_), dtype=np.int) 168 | for i in range(n_nodes): 169 | # stratify sample based one connections vs. non-connections 170 | n_connected = int(self.edge_list_[i].shape[0] / n_nodes * 171 | self.n_control_) 172 | if self.edge_list_[i].shape[0] > 0: 173 | n_connected = max(n_connected, 1) 174 | 175 | control_nodes[i, :n_connected] = rng.choice(self.edge_list_[i], 176 | size=n_connected, 177 | replace=False) 178 | 179 | edges = set.difference( 180 | set(range(n_nodes)), self.edge_list_[i].tolist() + [i]) 181 | n_remaining = self.n_control_ - n_connected 182 | control_nodes[i, n_connected:] = rng.choice(list(edges), 183 | size=n_remaining, 184 | replace=False) 185 | 186 | return control_nodes 187 | -------------------------------------------------------------------------------- /examples/inhomogeneous_simulation.py: -------------------------------------------------------------------------------- 1 | """ 2 | Runs the time-inhomogeneous simulations found in the 3 | paper 'A Bayesian nonparametric latent space approach to modeling evolving 4 | communities in dynamic networks' by Joshua Loyal and Yuguo Chen 5 | """ 6 | import glob 7 | import os 8 | import plac 9 | 10 | import pandas as pd 11 | import numpy as np 12 | import matplotlib.pyplot as plt 13 | import seaborn as sns 14 | 15 | from sklearn.utils import check_random_state 16 | from sklearn.metrics import adjusted_rand_score, roc_auc_score 17 | 18 | from dynetlsm import DynamicNetworkHDPLPCM, DynamicNetworkLPCM 19 | from dynetlsm.datasets import inhomogeneous_simulation 20 | from dynetlsm.model_selection.approx_bic import calculate_cluster_counts 21 | from dynetlsm.model_selection import minimize_posterior_expected_vi 22 | from dynetlsm.model_selection import train_test_split 23 | from dynetlsm.metrics import variation_of_information, out_of_sample_auc 24 | from dynetlsm.network_statistics import density, modularity 25 | 26 | 27 | # NOTE: This is meant to be run in parallel on a computer cluster 28 | n_reps = 50 29 | out_dir = 'results' 30 | 31 | # choose between easy and hard 32 | sim_type = 'hard' 33 | #sim_type = 'easy' 34 | 35 | # Set to true for sensitivity analysis 36 | sample_hyperparameters = False 37 | 38 | 39 | def counts_per_time_step(z): 40 | n_time_steps = z.shape[0] 41 | group_counts = np.zeros(n_time_steps, dtype=np.int) 42 | for t in range(n_time_steps): 43 | group_counts[t] = np.unique(z[t]).shape[0] 44 | 45 | return group_counts 46 | 47 | 48 | def posterior_per_time_step(model): 49 | n_time_steps = model.Y_fit_.shape[0] 50 | probas = np.zeros((n_time_steps, model.n_components + 1)) 51 | for t in range(n_time_steps): 52 | freq = model.posterior_group_counts_[t] 53 | index = model.posterior_group_ids_[t] 54 | probas[t, index] = freq / freq.sum() 55 | 56 | return probas 57 | 58 | 59 | def benchmark_single(n_iter=10000, burn=5000, tune=1000, 60 | outfile_name='benchmark', 61 | sim_type='hard', 62 | random_state=None): 63 | random_state = check_random_state(random_state) 64 | 65 | Y, X, z, intercept, _, _, _, _ = inhomogeneous_simulation( 66 | n_nodes=120, random_state=random_state, simulation_type=sim_type) 67 | 68 | Y, Y_ahead = Y[:9], Y[9] 69 | Y_obs, test_indices = train_test_split( 70 | Y, test_size=0.1, random_state=random_state) 71 | 72 | # fit HDP-LPCM 73 | if sample_hyperparameters: 74 | lambda_prior = random_state.uniform(low=0.5) 75 | alpha_kappa_rate = random_state.uniform(low=0.001, high=1) 76 | gamma_prior_rate = random_state.uniform(low=0.001, high=1) 77 | alpha_init_rate = random_state.uniform(low=0.001, high=1) 78 | model = DynamicNetworkHDPLPCM(n_iter=n_iter, 79 | burn=burn, 80 | tune=tune, 81 | tune_interval=1000, 82 | is_directed=False, 83 | selection_type='vi', 84 | lambda_prior=lambda_prior, 85 | lambda_variance_prior=1, 86 | alpha_kappa_shape=1, 87 | alpha_kappa_rate=alpha_kappa_rate, 88 | gamma_prior_rate=gamma_prior_rate, 89 | alpha_init_rate=alpha_init_rate, 90 | n_components=10, 91 | random_state=random_state).fit(Y_obs) 92 | else: 93 | model = DynamicNetworkHDPLPCM(n_iter=n_iter, 94 | burn=burn, 95 | tune=tune, 96 | tune_interval=1000, 97 | is_directed=False, 98 | selection_type='vi', 99 | n_components=10, 100 | random_state=random_state).fit(Y_obs) 101 | 102 | # MAP: number of clusters per time point 103 | map_counts = counts_per_time_step(model.z_) 104 | 105 | # Posterior group count probabilities 106 | probas = posterior_per_time_step(model) 107 | results = pd.DataFrame(probas) 108 | 109 | # create dataframe of results 110 | results['map_counts'] = map_counts 111 | 112 | # goodness-of-fit metrics for MAP 113 | results['insample_auc'] = model.auc_ 114 | results['outsample_auc'] = out_of_sample_auc( 115 | Y, model.missings_, test_indices) 116 | 117 | indices = np.tril_indices(Y.shape[1], k=-1) 118 | 119 | pred_probas = model.forecast_probas_marginalized_[indices] 120 | results['onestep_auc'] = roc_auc_score( 121 | Y_ahead[indices], pred_probas) 122 | 123 | # Variation of Information 124 | results['vi'] = variation_of_information( 125 | z[:9].ravel(), model.z_.ravel()) 126 | vi = 0. 127 | for t in range(Y.shape[0]): 128 | vi_t = variation_of_information(z[t], model.z_[t]) 129 | results['vi_{}'.format(t)] = vi_t 130 | vi += vi_t 131 | results['vi_avg'] = vi / Y.shape[0] 132 | 133 | # adjusted rand index 134 | results['rand_index'] = adjusted_rand_score( 135 | z[:9].ravel(), model.z_.ravel()) 136 | adj_rand = 0. 137 | for t in range(Y.shape[0]): 138 | adj_t = adjusted_rand_score(z[t], model.z_[t]) 139 | results['rand_{}'.format(t)] = adj_t 140 | adj_rand += adj_t 141 | results['rand_avg'] = adj_rand / Y.shape[0] 142 | 143 | results.to_csv(outfile_name, index=False) 144 | 145 | 146 | # create a directory to store the results 147 | if not os.path.exists(out_dir): 148 | os.mkdir(out_dir) 149 | 150 | 151 | for i in range(n_reps): 152 | benchmark_single( 153 | n_iter=35000, burn=10000, tune=5000, random_state=i, 154 | sim_type=sim_type, sample_hyperparameters=sample_hyperparameters, 155 | outfile_name=os.path.join( 156 | out_dir, 'benchmark_{}.csv'.format(i))) 157 | 158 | 159 | # calculate median metric values 160 | n_time_steps = 9 161 | n_groups = 10 162 | 163 | n_files = len(glob.glob('results/*')) 164 | stat_names = ['insample_auc', 'outsample_auc', 'onestep_auc', 'vi', 165 | 'rand_index', 'vi_avg', 'rand_avg'] 166 | data = np.zeros((n_files, len(stat_names))) 167 | for i, file_name in enumerate(glob.glob('results/*')): 168 | df = pd.read_csv(file_name) 169 | data[i] = df.loc[0, stat_names].values 170 | 171 | data = pd.DataFrame(data, columns=stat_names) 172 | print('Median Metrics:') 173 | print(data.median(axis=0)) 174 | print('Metrics SD:') 175 | print(data.std(axis=0)) 176 | 177 | # plot posterior boxplots 178 | data = {'probas': [], 'cluster_number': [], 't': []} 179 | for file_name in glob.glob('results/*'): 180 | df = pd.read_csv(file_name) 181 | for t in range(n_time_steps): 182 | for i in range(1, n_groups): 183 | data['probas'].append(df.iloc[t, i]) 184 | data['cluster_number'].append(i) 185 | data['t'].append(t + 1) 186 | 187 | data = pd.DataFrame(data) 188 | 189 | plt.rc('font', family='sans-serif', size=16) 190 | g = sns.catplot(x='cluster_number', y='probas', col='t', 191 | col_wrap=3, kind='box', data=data) 192 | 193 | for ax in g.axes: 194 | ax.set_ylabel('posterior probability') 195 | ax.set_xlabel('# of groups') 196 | 197 | g.fig.tight_layout() 198 | 199 | plt.savefig('cluster_posterior.png', dpi=300) 200 | 201 | # clear figure 202 | plt.clf() 203 | 204 | # plot selected number of groups for each simulation 205 | data = np.zeros((n_time_steps, n_groups), dtype=np.int) 206 | for sim_id, file_name in enumerate(glob.glob('results/*')): 207 | df = pd.read_csv(file_name) 208 | for t in range(n_time_steps): 209 | data[t, df.iloc[t, n_groups + 1] - 1] +=1 210 | 211 | data = pd.DataFrame(data, columns=range(1, n_groups + 1), index=range(1, n_time_steps + 1)) 212 | mask = data.values == 0 213 | 214 | g = sns.heatmap(data, annot=True, cmap="Blues", cbar=False, mask=mask) 215 | g.set_xlabel('# of groups') 216 | g.set_ylabel('t') 217 | plt.savefig('num_clusters.png', dpi=300) 218 | -------------------------------------------------------------------------------- /dynetlsm/sample_latent_positions.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | 3 | from scipy.sparse import csgraph 4 | from sklearn.utils import check_random_state 5 | 6 | from .network_likelihoods import ( 7 | partial_loglikelihood, 8 | directed_partial_loglikelihood, 9 | approx_directed_partial_loglikelihood 10 | ) 11 | 12 | 13 | def sample_control_nodes(edge_list, n_samples=100, random_state=None): 14 | rng = check_random_state(random_state) 15 | n_nodes = len(edge_list) 16 | 17 | # TODO: n_samples can be a fraction of total number of nodes 18 | 19 | control_nodes = np.zeros((n_nodes, n_samples), dtype=np.int) 20 | for i in range(n_nodes): 21 | # stratify sample based one connections vs. non-connections 22 | n_connected = int(edge_list[i].shape[0] / n_nodes * n_samples) 23 | if edge_list[i].shape[0] > 0: 24 | n_connected = max(n_connected, 1) 25 | 26 | control_nodes[i, :n_connected] = rng.choice(edge_list[i], 27 | size=n_connected, 28 | replace=False) 29 | 30 | edges = set.difference( 31 | set(range(n_nodes)), edge_list[i].tolist() + [i]) 32 | control_nodes[i, n_connected:] = rng.choice( 33 | list(edges), size=n_samples - n_connected, replace=False) 34 | 35 | return control_nodes 36 | 37 | 38 | def sample_control_edges(Y, n_samples=100, random_state=None): 39 | n_time_steps, n_nodes, _ = Y.shape 40 | 41 | n_edges = int(0.5 * n_nodes * (n_nodes)) 42 | edge_list = np.zeros((n_time_steps, n_edges, 2)) 43 | for t in range(n_time_steps): 44 | triu_indices = np.triu_indices_from(Y[t]) 45 | edge_list[t, 0] = triu_indices[0] 46 | edge_list[t, 1] = triu_indices[1] 47 | 48 | edges = np.where(Y[t][triu_indices] == 1)[0] 49 | non_edges = np.where(Y[t][triu_indices] == 0)[0] 50 | 51 | return edge_list, edges, non_edges 52 | 53 | 54 | def case_control_init(Y, is_directed=False, n_samples=100): 55 | n_time_steps, n_nodes, _ = Y.shape 56 | 57 | # compute in-degree / out-degree of each node 58 | degree = np.zeros((n_time_steps, n_nodes, 2), dtype=np.int) 59 | for t in range(n_time_steps): 60 | degree[t, :, 0] = Y[t].sum(axis=0) # in-degree 61 | degree[t, :, 1] = Y[t].sum(axis=1) # out-degree 62 | 63 | # store indices of edges, i.e. Y_ijt = 1 64 | max_in_degree = int(np.max(degree[:, :, 0])) 65 | max_out_degree = int(np.max(degree[:, :, 1])) 66 | in_edges = np.zeros((n_time_steps, n_nodes, max_in_degree), dtype=np.int) 67 | out_edges = np.zeros((n_time_steps, n_nodes, max_out_degree), dtype=np.int) 68 | for t in range(n_time_steps): 69 | for i in range(n_nodes): 70 | indices = np.where(Y[t, i, :] == 1)[0] 71 | n_edges = indices.shape[0] 72 | if n_edges: 73 | out_edges[t, i, :n_edges] = indices 74 | 75 | indices = np.where(Y[t, :, i] == 1)[0] 76 | n_edges = indices.shape[0] 77 | if n_edges: 78 | in_edges[t, i, :n_edges] = indices 79 | 80 | # determine edges (Y_ijt = 1 or Y_jit = 1 for at least one time step) 81 | edge_list = [] 82 | for i in range(n_nodes): 83 | mask = (np.logical_or(Y[:, i, :] == 1, Y[:, :, i] == 1)).astype(np.int) 84 | mask = mask.sum(axis=0) 85 | edge_list.append(np.unique(np.where(mask > 0)[0])) 86 | 87 | if is_directed: 88 | return degree, in_edges, out_edges, edge_list 89 | return degree[:, :, 0], in_edges, edge_list 90 | 91 | 92 | def sample_latent_positions(Y, X, intercept, tau_sq, sigma_sq, samplers, 93 | radii=None, is_directed=False, squared=False, 94 | case_control_sampler=None, random_state=None): 95 | rng = check_random_state(random_state) 96 | n_time_steps, n_nodes, _ = Y.shape 97 | 98 | for t in range(n_time_steps): 99 | for j in range(n_nodes): 100 | def logp(x): 101 | X[t, j] = x 102 | if is_directed: 103 | if case_control_sampler is not None: 104 | loglik = approx_directed_partial_loglikelihood( 105 | X[t], 106 | radii=radii, 107 | in_edges=case_control_sampler.in_edges_[t], 108 | out_edges=case_control_sampler.out_edges_[t], 109 | degree=case_control_sampler.degrees_[t], 110 | control_nodes_in=( 111 | case_control_sampler.control_nodes_in_[t]), 112 | control_nodes_out=( 113 | case_control_sampler.control_nodes_out_[t]), 114 | intercept_in=intercept[0], 115 | intercept_out=intercept[1], 116 | node_id=j, 117 | squared=squared) 118 | else: 119 | loglik = directed_partial_loglikelihood( 120 | Y[t], X[t], 121 | radii=radii, 122 | intercept_in=intercept[0], 123 | intercept_out=intercept[1], 124 | node_id=j, 125 | squared=squared) 126 | else: 127 | loglik = partial_loglikelihood(Y[t], X[t], 128 | intercept, j, 129 | squared=squared) 130 | 131 | # prior 132 | if t == 0: 133 | loglik -= 0.5 * np.sum(x * x) / tau_sq 134 | else: 135 | diff = x - X[t-1, j] 136 | loglik -= 0.5 * np.sum(diff * diff) / sigma_sq 137 | 138 | if t < (n_time_steps - 1): 139 | diff = X[t+1, j] - x 140 | loglik -= 0.5 * np.sum(diff * diff) / sigma_sq 141 | 142 | return loglik 143 | 144 | X[t, j] = samplers[t][j].step(X[t, j].copy(), logp, rng) 145 | 146 | return X 147 | 148 | 149 | def sample_latent_positions_mixture(Y, X, intercept, mu, sigma, lmbda, z, 150 | samplers, radii=None, is_directed=False, 151 | squared=None, case_control_sampler=None, 152 | random_state=None): 153 | rng = check_random_state(random_state) 154 | n_time_steps, n_nodes, _ = Y.shape 155 | 156 | for t in range(n_time_steps): 157 | for j in range(n_nodes): 158 | def logp(x): 159 | X[t, j] = x 160 | if is_directed: 161 | if case_control_sampler: 162 | loglik = approx_directed_partial_loglikelihood( 163 | X[t], 164 | radii=radii, 165 | in_edges=case_control_sampler.in_edges_[t], 166 | out_edges=case_control_sampler.out_edges_[t], 167 | degree=case_control_sampler.degrees_[t], 168 | control_nodes_in=( 169 | case_control_sampler.control_nodes_in_[t]), 170 | control_nodes_out=( 171 | case_control_sampler.control_nodes_out_[t]), 172 | intercept_in=intercept[0], 173 | intercept_out=intercept[1], 174 | node_id=j, 175 | squared=squared) 176 | else: 177 | loglik = directed_partial_loglikelihood( 178 | Y[t], X[t], 179 | radii=radii, 180 | intercept_in=intercept[0], 181 | intercept_out=intercept[1], 182 | node_id=j) 183 | else: 184 | loglik = partial_loglikelihood(Y[t], X[t], 185 | intercept, j) 186 | 187 | # prior P(X_t | X_{t-1}) 188 | if t == 0: 189 | diff = x - mu[z[t, j]] 190 | loglik -= 0.5 * np.sum(diff * diff) / sigma[z[t, j]] 191 | else: 192 | diff = x - (1 - lmbda) * X[t-1, j] - lmbda * mu[z[t, j]] 193 | loglik -= 0.5 * np.sum(diff * diff) / sigma[z[t, j]] 194 | 195 | # prior P(X_{t+1} | X_t) 196 | if t < (n_time_steps - 1): 197 | diff = (X[t+1, j] - (1 - lmbda) * x - 198 | lmbda * mu[z[t+1, j]]) 199 | loglik -= 0.5 * np.sum(diff * diff) / sigma[z[t+1, j]] 200 | 201 | return loglik 202 | 203 | X[t, j] = samplers[t][j].step(X[t, j].copy(), 204 | logp, rng) 205 | 206 | return X 207 | -------------------------------------------------------------------------------- /examples/homogeneous_simulation.py: -------------------------------------------------------------------------------- 1 | """ 2 | Runs the time-homogeneous simulations found in the 3 | paper 'A Bayesian nonparametric latent space approach to modeling evolving 4 | communities in dynamic networks' by Joshua Loyal and Yuguo Chen 5 | """ 6 | import glob 7 | import os 8 | import plac 9 | 10 | import pandas as pd 11 | import numpy as np 12 | import matplotlib.pyplot as plt 13 | import seaborn as sns 14 | 15 | from sklearn.utils import check_random_state 16 | from sklearn.metrics import adjusted_rand_score, roc_auc_score 17 | 18 | from dynetlsm import DynamicNetworkHDPLPCM, DynamicNetworkLPCM 19 | from dynetlsm.datasets import synthetic_static_community_dynamic_network 20 | from dynetlsm.datasets import homogeneous_simulation 21 | from dynetlsm.model_selection.approx_bic import calculate_cluster_counts 22 | from dynetlsm.model_selection import minimize_posterior_expected_vi 23 | from dynetlsm.model_selection import train_test_split 24 | from dynetlsm.metrics import variation_of_information, out_of_sample_auc 25 | from dynetlsm.network_statistics import density, modularity 26 | 27 | 28 | # NOTE: This is meant to be run in parallel on a computer cluster 29 | n_reps = 50 30 | out_dir = 'results' 31 | 32 | # Choose between hdp_lpcm and lpcm 33 | model_type = 'hdp_lpcm' 34 | # model_type = 'lpcm' 35 | 36 | # Set to true for sensitivity analysis 37 | sample_hyperparameters = False 38 | 39 | # Choose between vi and map 40 | selection_type = 'vi' 41 | # selection_type = 'map' 42 | 43 | # choose between easy and hard 44 | sim_type = 'hard' 45 | #sim_type = 'easy' 46 | 47 | 48 | def counts_per_time_step(z): 49 | n_time_steps = z.shape[0] 50 | group_counts = np.zeros(n_time_steps, dtype=np.int) 51 | for t in range(n_time_steps): 52 | group_counts[t] = np.unique(z[t]).shape[0] 53 | 54 | return group_counts 55 | 56 | def posterior_per_time_step(model): 57 | n_time_steps = model.Y_fit_.shape[0] 58 | probas = np.zeros((n_time_steps, model.n_components + 1)) 59 | for t in range(n_time_steps): 60 | freq = model.posterior_group_counts_[t] 61 | index = model.posterior_group_ids_[t] 62 | probas[t, index] = freq / freq.sum() 63 | 64 | return probas 65 | 66 | 67 | def benchmark_single(n_iter=10000, burn=5000, tune=1000, 68 | outfile_name='benchmark', 69 | model_type='hdp_lpcm', 70 | selection_type='map', 71 | sim_type='hard', 72 | sample_hyperparameters=False, 73 | random_state=None): 74 | random_state = check_random_state(random_state) 75 | 76 | Y, X, z, intercept, _, _, _, _ = homogeneous_simulation( 77 | n_time_steps=7, n_nodes=120, 78 | simulation_type=sim_type, 79 | random_state=random_state) 80 | 81 | Y, Y_ahead = Y[:6], Y[6] 82 | Y_obs, test_indices = train_test_split( 83 | Y, test_size=0.1, random_state=random_state) 84 | 85 | # fit HDP-LPCM 86 | if model_type == 'hdp_lpcm': 87 | if sample_hyperparameters: 88 | lambda_prior = random_state.uniform(low=0.5) 89 | alpha_kappa_rate = random_state.uniform(low=0.001, high=1) 90 | gamma_prior_rate = random_state.uniform(low=0.001, high=1) 91 | alpha_init_rate = random_state.uniform(low=0.001, high=1) 92 | model = DynamicNetworkHDPLPCM(n_iter=n_iter, 93 | burn=burn, 94 | tune=tune, 95 | tune_interval=1000, 96 | is_directed=False, 97 | selection_type='vi', 98 | lambda_prior=lambda_prior, 99 | lambda_variance_prior=1, 100 | alpha_kappa_shape=1, 101 | alpha_kappa_rate=alpha_kappa_rate, 102 | gamma_prior_rate=gamma_prior_rate, 103 | alpha_init_rate=alpha_init_rate, 104 | n_components=10, 105 | random_state=random_state).fit(Y_obs) 106 | else: 107 | model = DynamicNetworkHDPLPCM(n_iter=n_iter, 108 | burn=burn, 109 | tune=tune, 110 | tune_interval=1000, 111 | is_directed=False, 112 | selection_type='vi', 113 | n_components=10, 114 | random_state=random_state).fit(Y_obs) 115 | else: 116 | model = DynamicNetworkLPCM(n_iter=n_iter, 117 | burn=burn, 118 | tune=tune, 119 | tune_interval=1000, 120 | is_directed=False, 121 | selection_type=selection_type, 122 | n_components=6, 123 | random_state=random_state).fit(Y_obs) 124 | 125 | # MAP: number of clusters per time point 126 | map_counts = counts_per_time_step(model.z_) 127 | 128 | # Posterior group count probabilities 129 | if model_type == 'hdp_lpcm': 130 | probas = posterior_per_time_step(model) 131 | results = pd.DataFrame(probas) 132 | else: 133 | results = pd.DataFrame() 134 | 135 | # create dataframe of results 136 | results['map_counts'] = map_counts 137 | 138 | # goodness-of-fit metrics for MAP 139 | results['insample_auc'] = model.auc_ 140 | results['outsample_auc'] = out_of_sample_auc( 141 | Y, model.missings_, test_indices) 142 | 143 | # one-step ahead predictions 144 | indices = np.tril_indices(Y.shape[1], k=-1) 145 | 146 | pred_probas = model.forecast_probas_marginalized_[indices] 147 | results['onestep_auc'] = roc_auc_score( 148 | Y_ahead[indices], pred_probas) 149 | 150 | # Variation of Information 151 | results['vi'] = variation_of_information( 152 | z[:6].ravel(), model.z_[:6].ravel()) 153 | vi = 0. 154 | for t in range(Y.shape[0]): 155 | vi_t = variation_of_information(z[t], model.z_[t]) 156 | results['vi_{}'.format(t)] = vi_t 157 | vi += vi_t 158 | results['vi_avg'] = vi / Y.shape[0] 159 | 160 | 161 | # adjusted rand index 162 | results['rand_index'] = adjusted_rand_score( 163 | z[:6].ravel(), model.z_[:6].ravel()) 164 | adj_rand = 0. 165 | for t in range(Y.shape[0]): 166 | adj_t = adjusted_rand_score(z[t], model.z_[t]) 167 | results['rand_{}'.format(t)] = adj_t 168 | adj_rand += adj_t 169 | results['rand_avg'] = adj_rand / Y.shape[0] 170 | 171 | results.to_csv(outfile_name, index=False) 172 | 173 | 174 | # create a directory to store the results 175 | if not os.path.exists('results'): 176 | os.mkdir(out_dir) 177 | 178 | for i in range(n_reps): 179 | benchmark_single( 180 | n_iter=35000, burn=10000, tune=5000, random_state=i, 181 | model_type=model_type, selection_type=selection_type, 182 | sim_type=sim_type, sample_hyperparameters=sample_hyperparameters, 183 | outfile_name=os.path.join( 184 | out_dir, 'benchmark_{}.csv'.format(i))) 185 | 186 | 187 | # calculate median metric values 188 | n_time_steps = 6 189 | 190 | if model_type == 'lpcm': 191 | n_groups = 6 192 | else: 193 | n_groups = 10 194 | 195 | n_files = len(glob.glob('results/*')) 196 | stat_names = ['insample_auc', 'outsample_auc', 'onestep_auc', 'vi', 197 | 'rand_index', 'vi_avg', 'rand_avg'] 198 | data = np.zeros((n_files, len(stat_names))) 199 | for i, file_name in enumerate(glob.glob('results/*')): 200 | df = pd.read_csv(file_name) 201 | data[i] = df.loc[0, stat_names].values 202 | 203 | data = pd.DataFrame(data, columns=stat_names) 204 | print('Median Metrics:') 205 | print(data.median(axis=0)) 206 | print('Metrics SD:') 207 | print(data.std(axis=0)) 208 | 209 | # plot posterior boxplots 210 | data = {'probas': [], 'cluster_number': [], 't': []} 211 | for file_name in glob.glob('results/*'): 212 | df = pd.read_csv(file_name) 213 | for t in range(n_time_steps): 214 | for i in range(1, n_groups): 215 | data['probas'].append(df.iloc[t, i]) 216 | data['cluster_number'].append(i) 217 | data['t'].append(t + 1) 218 | 219 | data = pd.DataFrame(data) 220 | 221 | plt.rc('font', family='sans-serif', size=16) 222 | g = sns.catplot(x='cluster_number', y='probas', col='t', 223 | col_wrap=3, kind='box', data=data) 224 | 225 | for ax in g.axes: 226 | ax.set_ylabel('posterior probability') 227 | ax.set_xlabel('# of groups') 228 | 229 | g.fig.tight_layout() 230 | 231 | plt.savefig('cluster_posterior.png', dpi=300) 232 | 233 | # clear figure 234 | plt.clf() 235 | 236 | # plot selected number of groups for each simulation 237 | data = np.zeros((n_time_steps, n_groups), dtype=np.int) 238 | for sim_id, file_name in enumerate(glob.glob('results/*')): 239 | df = pd.read_csv(file_name) 240 | for t in range(n_time_steps): 241 | if model_type == 'lpcm': 242 | data[t, df.iloc[t, 0] - 1] +=1 243 | else: 244 | data[t, df.iloc[t, n_groups + 1] - 1] +=1 245 | 246 | data = pd.DataFrame(data, columns=range(1, n_groups + 1), index=range(1, n_time_steps + 1)) 247 | mask = data.values == 0 248 | 249 | g = sns.heatmap(data, annot=True, cmap="Blues", cbar=False, mask=mask) 250 | g.set_xlabel('# of groups') 251 | g.set_ylabel('t') 252 | plt.savefig('num_clusters.png', dpi=300) 253 | -------------------------------------------------------------------------------- /dynetlsm/datasets/raw_data/got/got-s7-edges.csv: -------------------------------------------------------------------------------- 1 | Source,Target,Weight,Season 2 | DAENERYS,TYRION,179,7 3 | DAENERYS,JON,178,7 4 | CERSEI,JAIME,172,7 5 | ARYA,SANSA,156,7 6 | JON,TYRION,107,7 7 | LITTLEFINGER,SANSA,107,7 8 | DAVOS,JON,92,7 9 | BRONN,JAIME,85,7 10 | JON,SANSA,79,7 11 | MARWYN,SAM,75,7 12 | CERSEI,TYRION,74,7 13 | DAENERYS,VARYS,65,7 14 | BERIC,HOUND,63,7 15 | JON,JORAH,59,7 16 | HOUND,THOROS,57,7 17 | HOUND,TORMUND,57,7 18 | DAVOS,GENDRY,55,7 19 | JON,TORMUND,50,7 20 | DAVOS,TYRION,46,7 21 | TYRION,VARYS,45,7 22 | BRAN,SANSA,44,7 23 | CERSEI,QYBURN,44,7 24 | BERIC,JON,41,7 25 | CERSEI,EURON,38,7 26 | CERSEI,TYCHO,37,7 27 | JAIME,OLENNA,37,7 28 | JON,MISSANDEI,36,7 29 | DAENERYS,MISSANDEI,35,7 30 | JON,THEON,35,7 31 | DAENERYS,JORAH,34,7 32 | JAIME,TYRION,34,7 33 | ARYA,HOT_PIE,33,7 34 | CERSEI,DAENERYS,33,7 35 | CERSEI,MOUNTAIN,33,7 36 | DAENERYS,DAVOS,33,7 37 | JAIME,RANDYLL,33,7 38 | DAVOS,MISSANDEI,31,7 39 | GREY_WORM,MISSANDEI,31,7 40 | MISSANDEI,TYRION,31,7 41 | BERIC,THOROS,30,7 42 | GENDRY,JON,30,7 43 | BRIENNE,SANSA,29,7 44 | BERIC,TORMUND,27,7 45 | HOUND,JON,27,7 46 | JORAH,SAM,27,7 47 | EURON,JAIME,26,7 48 | HOUND,JORAH,26,7 49 | BRIENNE,PODRICK,25,7 50 | DICKON,RANDYLL,25,7 51 | ELLARIA,YARA,25,7 52 | JAIME,QYBURN,25,7 53 | ARYA,LITTLEFINGER,24,7 54 | CERSEI,JON,24,7 55 | DICKON,JAIME,24,7 56 | HARRAG,THEON,24,7 57 | JORAH,TORMUND,24,7 58 | THEON,YARA,24,7 59 | JAIME,MOUNTAIN,23,7 60 | SANSA,YOHN_ROYCE,23,7 61 | ARYA,BRAN,21,7 62 | BRAN,MEERA,21,7 63 | BRAN,SAM,21,7 64 | CERSEI,ELLARIA,21,7 65 | ELLARIA,TYENE,21,7 66 | JON,NED,21,7 67 | MELISANDRE,VARYS,21,7 68 | MISSANDEI,VARYS,21,7 69 | DAENERYS,JAIME,20,7 70 | GILLY,SAM,20,7 71 | JON,LITTLEFINGER,20,7 72 | ARYA,BRIENNE,19,7 73 | BRAN,LITTLEFINGER,19,7 74 | BRONN,TYRION,19,7 75 | EURON,YARA,19,7 76 | JORAH,THOROS,19,7 77 | BERIC,JORAH,18,7 78 | BRIENNE,HOUND,18,7 79 | EURON,THEON,18,7 80 | GENDRY,HOUND,18,7 81 | GREY_WORM,TYRION,18,7 82 | JON,VARYS,18,7 83 | JON,NIGHT_KING,17,7 84 | JORAH,TYRION,17,7 85 | SANSA,MAESTER_WOLKAN,17,7 86 | DAENERYS,OLENNA,16,7 87 | JORAH,MARWYN,16,7 88 | CERSEI,TYENE,15,7 89 | CERSEI,TYWIN,15,7 90 | GENDRY,TORMUND,15,7 91 | BRIENNE,LITTLEFINGER,14,7 92 | DAENERYS,RANDYLL,14,7 93 | DAVOS,JORAH,14,7 94 | GENDRY,THOROS,14,7 95 | GREY_WORM,VARYS,14,7 96 | MOUNTAIN,QYBURN,14,7 97 | NED,SANSA,14,7 98 | DAENERYS,GREY_WORM,13,7 99 | DAENERYS,MELISANDRE,13,7 100 | GENDRY,JORAH,13,7 101 | DAVOS,THEON,12,7 102 | JON,THOROS,12,7 103 | PODRICK,TYRION,12,7 104 | AERYS,DAENERYS,11,7 105 | BERIC,GENDRY,11,7 106 | BRONN,DICKON,11,7 107 | LITTLEFINGER,MAESTER_WOLKAN,11,7 108 | LITTLEFINGER,YOHN_ROYCE,11,7 109 | LYANNA_MORMONT,ROBETT,11,7 110 | THOROS,TORMUND,11,7 111 | BRIENNE,JAIME,10,7 112 | DAENERYS,DICKON,10,7 113 | DAENERYS,NIGHT_KING,10,7 114 | DAVOS,VARYS,10,7 115 | MISSANDEI,THEON,10,7 116 | MOUNTAIN,TYRION,10,7 117 | THEON,TYRION,10,7 118 | TYRION,TYWIN,10,7 119 | ARYA,JON,9,7 120 | BRIENNE,JON,9,7 121 | CERSEI,SANSA,9,7 122 | DAVOS,SANSA,9,7 123 | EURON,MOUNTAIN,9,7 124 | HOUND,TYRION,9,7 125 | NYMERIA,OBARA,9,7 126 | ARYA,NED,8,7 127 | BERIC,NIGHT_KING,8,7 128 | BRAN,JON,8,7 129 | BRONN,DAENERYS,8,7 130 | BRONN,PODRICK,8,7 131 | BRONN,RANDYLL,8,7 132 | ELLARIA,THEON,8,7 133 | EURON,TYRION,8,7 134 | JON,ROBETT,8,7 135 | ROBETT,YOHN_ROYCE,8,7 136 | BENJEN,JON,7,7 137 | BRAN,LYANNA,7,7 138 | BRAN,MAESTER_WOLKAN,7,7 139 | CERSEI,JOFFREY,7,7 140 | DAENERYS,EURON,7,7 141 | DAENERYS,THEON,7,7 142 | DAVOS,TORMUND,7,7 143 | ELLARIA,TYRION,7,7 144 | JORAH,MISSANDEI,7,7 145 | JORAH,THEON,7,7 146 | LYANNA,RHAEGAR,7,7 147 | LYANNA_MORMONT,SANSA,7,7 148 | OBARA,TYENE,7,7 149 | PODRICK,SANSA,7,7 150 | RANDYLL,TYRION,7,7 151 | ROBETT,SANSA,7,7 152 | ARYA,JOFFREY,6,7 153 | ARYA,YOHN_ROYCE,6,7 154 | BRIENNE,TORMUND,6,7 155 | CERSEI,RANDYLL,6,7 156 | DAENERYS,HOUND,6,7 157 | ELLARIA,MOUNTAIN,6,7 158 | ELLARIA,QYBURN,6,7 159 | EURON,TYENE,6,7 160 | GENDRY,ROBERT,6,7 161 | GILLY,LITTLE_SAM,6,7 162 | JAIME,JON,6,7 163 | JOFFREY,SANSA,6,7 164 | JON,LYANNA_MORMONT,6,7 165 | JORAH,VARYS,6,7 166 | LITTLE_SAM,SAM,6,7 167 | LITTLEFINGER,ROBETT,6,7 168 | LYANNA_MORMONT,YOHN_ROYCE,6,7 169 | NIGHT_KING,TORMUND,6,7 170 | THEON,VARYS,6,7 171 | TYENE,YARA,6,7 172 | AERYS,CERSEI,5,7 173 | AERYS,JON,5,7 174 | ALYS,NED_UMBER,5,7 175 | ARYA,CATELYN,5,7 176 | ARYA,CERSEI,5,7 177 | ARYA,MAESTER_WOLKAN,5,7 178 | ARYA,PODRICK,5,7 179 | BRIENNE,BRONN,5,7 180 | BRONN,CERSEI,5,7 181 | CERSEI,NED,5,7 182 | CERSEI,OLENNA,5,7 183 | DAENERYS,TORMUND,5,7 184 | DAVOS,HOUND,5,7 185 | ELLARIA,EURON,5,7 186 | EURON,JON,5,7 187 | HOUND,MOUNTAIN,5,7 188 | HOUND,NIGHT_KING,5,7 189 | JOFFREY,NED,5,7 190 | JOFFREY,TYWIN,5,7 191 | JON,MAESTER_WOLKAN,5,7 192 | JON,MOUNTAIN,5,7 193 | JON,PODRICK,5,7 194 | JON,YOHN_ROYCE,5,7 195 | LITTLEFINGER,PODRICK,5,7 196 | NED,ROBB,5,7 197 | QYBURN,TYENE,5,7 198 | AERYS,TYRION,4,7 199 | ARYA,ROBETT,4,7 200 | BERIC,DAENERYS,4,7 201 | BRAN,EDDISON_TOLLETT,4,7 202 | BRAN,NED,4,7 203 | BRAN,RHAEGAR,4,7 204 | BRIENNE,CERSEI,4,7 205 | BRIENNE,DAVOS,4,7 206 | BRIENNE,LYANNA_MORMONT,4,7 207 | BRIENNE,ROBETT,4,7 208 | BRIENNE,TYRION,4,7 209 | BRIENNE,YOHN_ROYCE,4,7 210 | CATELYN,LITTLEFINGER,4,7 211 | CERSEI,DAVOS,4,7 212 | CERSEI,HOUND,4,7 213 | CERSEI,MYRCELLA,4,7 214 | CERSEI,THEON,4,7 215 | DAVOS,LITTLEFINGER,4,7 216 | DAVOS,PODRICK,4,7 217 | DICKON,TYRION,4,7 218 | EDDISON_TOLLETT,MEERA,4,7 219 | ELLARIA,OBARA,4,7 220 | EURON,NYMERIA,4,7 221 | EURON,OBARA,4,7 222 | EURON,QYBURN,4,7 223 | GENDRY,TYRION,4,7 224 | GREY_WORM,THEON,4,7 225 | HOUND,THEON,4,7 226 | JAIME,TYWIN,4,7 227 | JEOR,JON,4,7 228 | JEOR,JORAH,4,7 229 | JEOR,SAM,4,7 230 | JOFFREY,TYRION,4,7 231 | JON,LYANNA,4,7 232 | JON,MELISANDRE,4,7 233 | JON,NED_UMBER,4,7 234 | JON,QYBURN,4,7 235 | JORAH,NIGHT_KING,4,7 236 | LITTLEFINGER,LYANNA_MORMONT,4,7 237 | LITTLEFINGER,NED,4,7 238 | LYANNA,NED,4,7 239 | LYSA,SANSA,4,7 240 | MAESTER_WOLKAN,YOHN_ROYCE,4,7 241 | MELISANDRE,TYRION,4,7 242 | MOUNTAIN,OBERYN,4,7 243 | MOUNTAIN,TYENE,4,7 244 | NYMERIA,TYENE,4,7 245 | NYMERIA,YARA,4,7 246 | OBARA,YARA,4,7 247 | OLENNA,RANDYLL,4,7 248 | OLENNA,TYRION,4,7 249 | PODRICK,TORMUND,4,7 250 | QYBURN,TYRION,4,7 251 | ROBB,SANSA,4,7 252 | AEGON,DAENERYS,3,7 253 | AERYS,VARYS,3,7 254 | ALYS,JON,3,7 255 | ARYA,LYANNA_MORMONT,3,7 256 | ARYA,ROBB,3,7 257 | ARYA,ROBERT,3,7 258 | BALERION,QYBURN,3,7 259 | BERIC,DAVOS,3,7 260 | BRAN,DAENERYS,3,7 261 | BRAN,YOHN_ROYCE,3,7 262 | BRIENNE,MOUNTAIN,3,7 263 | BRIENNE,QYBURN,3,7 264 | BRIENNE,THEON,3,7 265 | BRONN,DAVOS,3,7 266 | BRONN,HOUND,3,7 267 | BRONN,JON,3,7 268 | BRONN,VARYS,3,7 269 | CATELYN,SANSA,3,7 270 | CERSEI,OBERYN,3,7 271 | DAENERYS,ELLARIA,3,7 272 | DAENERYS,NED,3,7 273 | DAENERYS,VISERYS,3,7 274 | DAENERYS,YARA,3,7 275 | DAVOS,EURON,3,7 276 | DAVOS,JAIME,3,7 277 | DAVOS,LYANNA_MORMONT,3,7 278 | DAVOS,NIGHT_KING,3,7 279 | DAVOS,ROBERT,3,7 280 | ELLARIA,OLENNA,3,7 281 | ELLARIA,VARYS,3,7 282 | HARRAG,YARA,3,7 283 | HOUND,JAIME,3,7 284 | HOUND,PODRICK,3,7 285 | HOUND,QYBURN,3,7 286 | HOUND,VARYS,3,7 287 | JAIME,JOFFREY,3,7 288 | JAIME,TOMMEN,3,7 289 | JOFFREY,OLENNA,3,7 290 | JON,SAM,3,7 291 | LITTLEFINGER,MEERA,3,7 292 | LITTLEFINGER,TORMUND,3,7 293 | MOUNTAIN,THEON,3,7 294 | NED,ROBERT,3,7 295 | RAMSAY,SANSA,3,7 296 | ROBERT,VARYS,3,7 297 | SANSA,TORMUND,3,7 298 | SANSA,TYRION,3,7 299 | THEON,TYENE,3,7 300 | TYCHO,TYWIN,3,7 301 | TYRION,YARA,3,7 302 | AERYS,NED,2,7 303 | AERYS,OLENNA,2,7 304 | AERYS,SANSA,2,7 305 | ARYA,WALDER,2,7 306 | BALERION,CERSEI,2,7 307 | BERIC,NED,2,7 308 | BRAN,BRIENNE,2,7 309 | BRAN,NIGHT_KING,2,7 310 | BRAN,PODRICK,2,7 311 | BRAN,TYRION,2,7 312 | BRIENNE,EURON,2,7 313 | BRIENNE,HOT_PIE,2,7 314 | BRIENNE,JORAH,2,7 315 | BRIENNE,QHONO,2,7 316 | BRIENNE,VARYS,2,7 317 | BRONN,GREY_WORM,2,7 318 | BRONN,JORAH,2,7 319 | BRONN,OLENNA,2,7 320 | BRONN,THEON,2,7 321 | CATELYN,TYRION,2,7 322 | CERSEI,LITTLEFINGER,2,7 323 | CERSEI,NIGHT_KING,2,7 324 | CERSEI,ROBB,2,7 325 | CERSEI,VARYS,2,7 326 | CERSEI,YARA,2,7 327 | DAENERYS,DROGO,2,7 328 | DAENERYS,GENDRY,2,7 329 | DAENERYS,LITTLEFINGER,2,7 330 | DAENERYS,MOUNTAIN,2,7 331 | DAENERYS,QYBURN,2,7 332 | DAENERYS,ROBERT,2,7 333 | DAENERYS,TYWIN,2,7 334 | DAVOS,GREY_WORM,2,7 335 | DAVOS,MOUNTAIN,2,7 336 | DAVOS,QYBURN,2,7 337 | DAVOS,ROBETT,2,7 338 | DAVOS,YOHN_ROYCE,2,7 339 | DICKON,OLENNA,2,7 340 | DICKON,SAM,2,7 341 | DROGO,JON,2,7 342 | DROGO,JORAH,2,7 343 | ELLARIA,GREY_WORM,2,7 344 | ELLARIA,JAIME,2,7 345 | ELLARIA,MISSANDEI,2,7 346 | ELLARIA,NYMERIA,2,7 347 | ELLARIA,OBERYN,2,7 348 | EURON,HOUND,2,7 349 | GENDRY,MELISANDRE,2,7 350 | GREY_WORM,JAIME,2,7 351 | GREY_WORM,JON,2,7 352 | GREY_WORM,JORAH,2,7 353 | GREY_WORM,MELISANDRE,2,7 354 | GREY_WORM,OLENNA,2,7 355 | GREY_WORM,YARA,2,7 356 | HIGH_SEPTON,SAM,2,7 357 | HOT_PIE,JON,2,7 358 | HOUND,MISSANDEI,2,7 359 | HOUND,QHONO,2,7 360 | HOUND,WHITE_WALKER,2,7 361 | HOWLAND,MEERA,2,7 362 | JAIME,SANSA,2,7 363 | JAIME,THEON,2,7 364 | JAIME,TYENE,2,7 365 | JEOR,TYRION,2,7 366 | JOANNA,TYWIN,2,7 367 | JOFFREY,ROBB,2,7 368 | JOFFREY,ROBERT,2,7 369 | JOFFREY,TOMMEN,2,7 370 | JON,ROBB,2,7 371 | JON,WHITE_WALKER,2,7 372 | JON_ARRYN,LYSA,2,7 373 | JON_ARRYN,SANSA,2,7 374 | JORAH,PODRICK,2,7 375 | JORAH,WHITE_WALKER,2,7 376 | LYANNA_MORMONT,PODRICK,2,7 377 | LYANNA_MORMONT,TORMUND,2,7 378 | MARGAERY,OLENNA,2,7 379 | MARWYN,MAESTER_WOLKAN,2,7 380 | MARWYN,ROBERT,2,7 381 | MARWYN,SHIREEN,2,7 382 | MEERA,NED,2,7 383 | MELISANDRE,MISSANDEI,2,7 384 | MELISANDRE,THOROS,2,7 385 | MISSANDEI,OLENNA,2,7 386 | MISSANDEI,YARA,2,7 387 | MOUNTAIN,MYRCELLA,2,7 388 | MOUNTAIN,RANDYLL,2,7 389 | MYRCELLA,OBERYN,2,7 390 | NED,TYRION,2,7 391 | NIGHT_KING,TYRION,2,7 392 | NYMERIA,THEON,2,7 393 | OBARA,THEON,2,7 394 | OLENNA,THEON,2,7 395 | OLENNA,VARYS,2,7 396 | OLENNA,YARA,2,7 397 | PODRICK,QHONO,2,7 398 | PODRICK,ROBETT,2,7 399 | PODRICK,THEON,2,7 400 | PODRICK,VARYS,2,7 401 | PODRICK,YOHN_ROYCE,2,7 402 | QHONO,TYRION,2,7 403 | QYBURN,RANDYLL,2,7 404 | QYBURN,THEON,2,7 405 | RHAEGAR,ROBERT,2,7 406 | ROBB,ROBERT,2,7 407 | ROBERT,SAM,2,7 408 | ROBERT,SANSA,2,7 409 | ROBETT,TORMUND,2,7 410 | SAM,STANNIS,2,7 411 | TORMUND,WHITE_WALKER,2,7 412 | TORMUND,YOHN_ROYCE,2,7 413 | VARYS,YARA,2,7 -------------------------------------------------------------------------------- /dynetlsm/datasets/raw_data/got/got-s5-edges.csv: -------------------------------------------------------------------------------- 1 | Source,Target,Weight,Season 2 | ARYA,JAQEN,148,5 3 | JORAH,TYRION,141,5 4 | BRONN,JAIME,121,5 5 | TYRION,VARYS,110,5 6 | DAENERYS,TYRION,98,5 7 | CERSEI,HIGH_SPARROW,96,5 8 | BRIENNE,PODRICK,90,5 9 | JON,STANNIS,90,5 10 | LITTLEFINGER,SANSA,89,5 11 | JON,SAM,85,5 12 | GILLY,SAM,83,5 13 | DAARIO,DAENERYS,81,5 14 | MARGAERY,TOMMEN,79,5 15 | SANSA,THEON,77,5 16 | DAENERYS,HIZDAHR,69,5 17 | DAVOS,STANNIS,68,5 18 | JON,TORMUND,63,5 19 | CERSEI,TOMMEN,62,5 20 | JAIME,MYRCELLA,58,5 21 | MYRANDA,SANSA,58,5 22 | RAMSAY,THEON,58,5 23 | MELISANDRE,STANNIS,52,5 24 | RAMSAY,ROOSE_BOLTON,51,5 25 | RAMSAY,SANSA,51,5 26 | CERSEI,LITTLEFINGER,50,5 27 | ARYA,WAIF,48,5 28 | SHIREEN,STANNIS,47,5 29 | CERSEI,MARGAERY,46,5 30 | CERSEI,QYBURN,41,5 31 | JON,OLLY,41,5 32 | CERSEI,JAIME,40,5 33 | DAENERYS,JORAH,40,5 34 | JON,MELISANDRE,39,5 35 | BARRISTAN,DAENERYS,38,5 36 | DAVOS,JON,35,5 37 | DAVOS,SHIREEN,35,5 38 | BRONN,TYENE,34,5 39 | DORAN,ELLARIA,33,5 40 | JON,MANCE,33,5 41 | ALLISER_THORNE,JON,32,5 42 | JAQEN,WAIF,32,5 43 | HIGH_SPARROW,OLENNA,30,5 44 | LORAS,MARGAERY,30,5 45 | OLLY,SAM,30,5 46 | GREY_WORM,MISSANDEI,29,5 47 | CERSEI,MACE,28,5 48 | DAENERYS,MOSSADOR,28,5 49 | JON,KARSI,28,5 50 | MYRANDA,RAMSAY,28,5 51 | AREO,DORAN,27,5 52 | BRIENNE,LITTLEFINGER,27,5 53 | DAARIO,TYRION,27,5 54 | CERSEI,OLENNA,26,5 55 | DAARIO,HIZDAHR,24,5 56 | DORAN,JAIME,24,5 57 | MYRCELLA,TRYSTANE,24,5 58 | SAM,STANNIS,24,5 59 | DAENERYS,MISSANDEI,22,5 60 | LITTLEFINGER,ROOSE_BOLTON,22,5 61 | CERSEI,KEVAN,21,5 62 | HIGH_SPARROW,LORAS,21,5 63 | CERSEI,MAGGY,20,5 64 | DAARIO,GREY_WORM,20,5 65 | JANOS,JON,20,5 66 | SELYSE,SHIREEN,20,5 67 | AREO,BRONN,19,5 68 | ELLARIA,JAIME,19,5 69 | KARSI,TORMUND,19,5 70 | TYCHO,MACE,19,5 71 | AREO,JAIME,18,5 72 | CERSEI,LANCEL,18,5 73 | CERSEI,PYCELLE,18,5 74 | GILLY,SHIREEN,18,5 75 | JON,MAESTER_AEMON,18,5 76 | SELYSE,STANNIS,18,5 77 | BLACK_HAIRED_PROSTITUTE,TYRION,17,5 78 | DAARIO,JORAH,17,5 79 | HIGH_SPARROW,MARGAERY,17,5 80 | MAESTER_AEMON,SAM,17,5 81 | MALKO,TYRION,17,5 82 | ARYA,MERYN_TRANT,16,5 83 | DAARIO,MISSANDEI,16,5 84 | JAIME,TRYSTANE,16,5 85 | JON,LOBODA,16,5 86 | LITTLEFINGER,OLENNA,16,5 87 | MISSANDEI,TYRION,16,5 88 | ROOSE_BOLTON,SANSA,16,5 89 | AREO,ELLARIA,15,5 90 | AREO,MYRCELLA,15,5 91 | BRIENNE,SANSA,15,5 92 | CERSEI,LORAS,15,5 93 | LORAS,OLYVAR,15,5 94 | ALLISER_THORNE,SAM,14,5 95 | BRONN,LOLLYS,14,5 96 | CERSEI,SEPTA_UNELLA,14,5 97 | DAENERYS,GREY_WORM,14,5 98 | LANCEL,LITTLEFINGER,14,5 99 | LITTLEFINGER,RAMSAY,14,5 100 | MANCE,STANNIS,14,5 101 | MARGAERY,OLENNA,14,5 102 | NYMERIA,TYENE,14,5 103 | ARYA,THIN_MAN,13,5 104 | CERSEI,HIGH_SEPTON,13,5 105 | CERSEI,MERYN_TRANT,13,5 106 | DAVOS,MELISANDRE,13,5 107 | HIZDAHR,TYRION,13,5 108 | MELISANDRE,SELYSE,13,5 109 | MYRANDA,THEON,13,5 110 | BRONN,DORNISH_RIDER,12,5 111 | CERSEI,MELARA,12,5 112 | CERSEI,TYWIN,12,5 113 | EDDISON_TOLLETT,JON,12,5 114 | JORAH,MALKO,12,5 115 | LORD_OF_BONES,TORMUND,12,5 116 | BRAND,DERRYK,11,5 117 | ELLARIA,OBARA,11,5 118 | ELLARIA,TYENE,11,5 119 | HIGH_SEPTON,OLYVAR,11,5 120 | JAQEN,THIN_MAN,11,5 121 | DAVOS,OLLY,10,5 122 | ELLARIA,MYRCELLA,10,5 123 | OLLY,STANNIS,10,5 124 | PYCELLE,QYBURN,10,5 125 | BRIENNE,STANNIS,9,5 126 | BRONN,MYRCELLA,9,5 127 | DAENERYS,VARYS,9,5 128 | DAVOS,SELYSE,9,5 129 | DORAN,MYRCELLA,9,5 130 | DORAN,TRYSTANE,9,5 131 | HIGH_SPARROW,OLYVAR,9,5 132 | JORAH,MISSANDEI,9,5 133 | LITTLEFINGER,ROYCE,9,5 134 | LORAS,OLENNA,9,5 135 | LORAS,TOMMEN,9,5 136 | MACE,QYBURN,9,5 137 | MELISANDRE,SHIREEN,9,5 138 | RAMSAY,WALDA,9,5 139 | ALLISER_THORNE,OLLY,8,5 140 | AREO,TRYSTANE,8,5 141 | BRIENNE,RENLY,8,5 142 | GILLY,LITTLE_SAM,8,5 143 | HIGH_SPARROW,TOMMEN,8,5 144 | HIZDAHR,MISSANDEI,8,5 145 | JANOS,SAM,8,5 146 | KARSI,LOBODA,8,5 147 | LANCEL,LORAS,8,5 148 | ROOSE_BOLTON,THEON,8,5 149 | SANSA,WALDA,8,5 150 | TYRION,TYWIN,8,5 151 | AREO,OBARA,7,5 152 | BARRISTAN,DAARIO,7,5 153 | BRONN,OBARA,7,5 154 | CATELYN,SANSA,7,5 155 | CERSEI,MYRCELLA,7,5 156 | DERRYK,SAM,7,5 157 | GILLY,OLLY,7,5 158 | GREY_WORM,JORAH,7,5 159 | HIZDAHR,MOSSADOR,7,5 160 | MACE,MERYN_TRANT,7,5 161 | MACE,PYCELLE,7,5 162 | MARGAERY,OLYVAR,7,5 163 | OWNER,TYRION,7,5 164 | PODRICK,SANSA,7,5 165 | RAMSAY,STANNIS,7,5 166 | ALLISER_THORNE,JANOS,6,5 167 | BRAND,SAM,6,5 168 | BRONN,DORAN,6,5 169 | BRONN,TRYSTANE,6,5 170 | CERSEI,TYRION,6,5 171 | DAARIO,MOSSADOR,6,5 172 | EDDISON_TOLLETT,SAM,6,5 173 | ELLARIA,OBERYN,6,5 174 | GILLY,JON,6,5 175 | GILLY,SELYSE,6,5 176 | HIGH_SEPTON,LANCEL,6,5 177 | HIGH_SEPTON,QYBURN,6,5 178 | JAIME,TYWIN,6,5 179 | JON,SELYSE,6,5 180 | JORAH,VARYS,6,5 181 | LITTLE_SAM,SAM,6,5 182 | MADAME,MERYN_TRANT,6,5 183 | MANCE,TORMUND,6,5 184 | MYRANDA,WALDA,6,5 185 | MYRCELLA,OBARA,6,5 186 | NYMERIA,OBARA,6,5 187 | OBARA,TYENE,6,5 188 | ROOSE_BOLTON,WALDA,6,5 189 | ALLISER_THORNE,STANNIS,5,5 190 | BRAN,RICKON,5,5 191 | BRAN,THEON,5,5 192 | BRIENNE,CATELYN,5,5 193 | BRONN,NYMERIA,5,5 194 | CATELYN,LITTLEFINGER,5,5 195 | CERSEI,ROBERT,5,5 196 | DAENERYS,OWNER,5,5 197 | DERRYK,GILLY,5,5 198 | DORAN,OBERYN,5,5 199 | EDDISON_TOLLETT,OLLY,5,5 200 | ELLARIA,NYMERIA,5,5 201 | HIGH_SEPTON,MACE,5,5 202 | JON,LORD_OF_BONES,5,5 203 | KEVAN,TOMMEN,5,5 204 | LITTLEFINGER,STANNIS,5,5 205 | MELISANDRE,SAM,5,5 206 | NED,STANNIS,5,5 207 | PODRICK,STANNIS,5,5 208 | RICKON,THEON,5,5 209 | SAM,SELYSE,5,5 210 | ALLISER_THORNE,DAVOS,4,5 211 | ALLISER_THORNE,EDDISON_TOLLETT,4,5 212 | ALLISER_THORNE,MAESTER_AEMON,4,5 213 | ALLISER_THORNE,TORMUND,4,5 214 | AREO,NYMERIA,4,5 215 | AREO,TYENE,4,5 216 | ARYA,MACE,4,5 217 | BARRISTAN,MOSSADOR,4,5 218 | CERSEI,OLYVAR,4,5 219 | CERSEI,ROOSE_BOLTON,4,5 220 | CERSEI,VARYS,4,5 221 | DORNISH_RIDER,JAIME,4,5 222 | EDDISON_TOLLETT,STANNIS,4,5 223 | ELLARIA,TRYSTANE,4,5 224 | GILLY,MAESTER_AEMON,4,5 225 | GILLY,STANNIS,4,5 226 | HIGH_SEPTON,HIGH_SPARROW,4,5 227 | HIGH_SPARROW,RENLY,4,5 228 | HIGH_SPARROW,ROBERT,4,5 229 | HIZDAHR,JORAH,4,5 230 | JAIME,TYENE,4,5 231 | JON,NIGHT_KING,4,5 232 | JORAH,OWNER,4,5 233 | LITTLEFINGER,ROBIN,4,5 234 | LITTLEFINGER,THEON,4,5 235 | LOBODA,TORMUND,4,5 236 | MERYN_TRANT,QYBURN,4,5 237 | MYRANDA,ROOSE_BOLTON,4,5 238 | QUICK,STRONG,4,5 239 | ROBERT,STANNIS,4,5 240 | ROOSE_BOLTON,STANNIS,4,5 241 | ROOSE_BOLTON,TYWIN,4,5 242 | SAM,SHIREEN,4,5 243 | SANSA,YOHN_ROYCE,4,5 244 | THEON,WALDA,4,5 245 | AEGON,MAESTER_AEMON,3,5 246 | AERYS,DAENERYS,3,5 247 | ARYA,NED,3,5 248 | ARYA,TYCHO,3,5 249 | BARRISTAN,GREY_WORM,3,5 250 | BARRISTAN,RHAEGAR,3,5 251 | BRAN,SANSA,3,5 252 | CERSEI,JOFFREY,3,5 253 | CERSEI,OBERYN,3,5 254 | CERSEI,SANSA,3,5 255 | DORAN,TOMMEN,3,5 256 | EDDISON_TOLLETT,JANOS,3,5 257 | GILLY,JANOS,3,5 258 | GREY_WORM,MOSSADOR,3,5 259 | GREY_WORM,TYRION,3,5 260 | JAIME,LOLLYS,3,5 261 | JAIME,OBARA,3,5 262 | JAIME,TYRION,3,5 263 | JAIME,VARYS,3,5 264 | JANOS,OLLY,3,5 265 | JANOS,STANNIS,3,5 266 | JEOR,JON,3,5 267 | JOFFREY,MARGAERY,3,5 268 | JOFFREY,TOMMEN,3,5 269 | JON,OTHELL_YARWYCK,3,5 270 | JON,ROBB,3,5 271 | KEVAN,LANCEL,3,5 272 | KEVAN,PYCELLE,3,5 273 | KEVAN,QYBURN,3,5 274 | LITTLE_SAM,OLLY,3,5 275 | LITTLEFINGER,LYANNA,3,5 276 | LITTLEFINGER,LYSA,3,5 277 | LITTLEFINGER,NED,3,5 278 | LITTLEFINGER,PODRICK,3,5 279 | LITTLEFINGER,YOHN_ROYCE,3,5 280 | LORD_WEEBLY,SAM,3,5 281 | LYANNA,SANSA,3,5 282 | MANCE,MELISANDRE,3,5 283 | NED,ROBERT,3,5 284 | NED,SANSA,3,5 285 | OBARA,OBERYN,3,5 286 | PODRICK,RENLY,3,5 287 | PODRICK,TYRION,3,5 288 | RICKON,SANSA,3,5 289 | ROYCE,SANSA,3,5 290 | SANSA,STANNIS,3,5 291 | SHAE,TYRION,3,5 292 | SHAE,TYWIN,3,5 293 | STANNIS,TORMUND,3,5 294 | AEGON,RHAENYRA,2,5 295 | AERYS,BARRISTAN,2,5 296 | ALLISER_THORNE,DENYS,2,5 297 | ALLISER_THORNE,MANCE,2,5 298 | ARYA,MADAME,2,5 299 | ARYA,MOUNTAIN,2,5 300 | ARYA,SANSA,2,5 301 | ARYA,WALDER,2,5 302 | BARRISTAN,HIZDAHR,2,5 303 | BRIAN,JON,2,5 304 | BRIENNE,ROBERT,2,5 305 | BRIENNE,SELWYN,2,5 306 | BRONN,CERSEI,2,5 307 | BRONN,FALYSE,2,5 308 | CATELYN,NED,2,5 309 | CATELYN,ROBB,2,5 310 | CERSEI,ELLARIA,2,5 311 | CERSEI,JORAH,2,5 312 | CERWYN,RAMSAY,2,5 313 | DAARIO,QUICK,2,5 314 | DAARIO,STRONG,2,5 315 | DAENERYS,QUICK,2,5 316 | DAENERYS,ROBERT,2,5 317 | DAENERYS,STRONG,2,5 318 | DAVOS,EDDISON_TOLLETT,2,5 319 | DAVOS,SAM,2,5 320 | DORAN,OBARA,2,5 321 | DORAN,TYENE,2,5 322 | EDDISON_TOLLETT,GILLY,2,5 323 | EDDISON_TOLLETT,LITTLE_SAM,2,5 324 | EDDISON_TOLLETT,PYP,2,5 325 | EDDISON_TOLLETT,TORMUND,2,5 326 | GILLY,MELISANDRE,2,5 327 | GRENN,PYP,2,5 328 | HIGH_SEPTON,PYCELLE,2,5 329 | HIGH_SPARROW,MERYN_TRANT,2,5 330 | HIGH_SPARROW,SEPTA_UNELLA,2,5 331 | HIZDAHR,OWNER,2,5 332 | HIZDAHR,QUICK,2,5 333 | HIZDAHR,STRONG,2,5 334 | JAIME,LITTLEFINGER,2,5 335 | JAIME,NYMERIA,2,5 336 | JAIME,TOMMEN,2,5 337 | JAQEN,MACE,2,5 338 | JAQEN,MERYN_TRANT,2,5 339 | JEOR,SAM,2,5 340 | JOFFREY,RENLY,2,5 341 | JOFFREY,ROBERT,2,5 342 | JOFFREY,SANSA,2,5 343 | JOFFREY,TYRION,2,5 344 | JOFFREY,TYWIN,2,5 345 | JON,LITTLE_SAM,2,5 346 | JON,LORD_WEEBLY,2,5 347 | JON,ROOSE_BOLTON,2,5 348 | JON,SHIREEN,2,5 349 | JORAH,MOSSADOR,2,5 350 | KARSI,NIGHT_KING,2,5 351 | KEVAN,TYWIN,2,5 352 | LANCEL,OLYVAR,2,5 353 | LANCEL,PYCELLE,2,5 354 | LITTLE_SAM,MELISANDRE,2,5 355 | LITTLE_SAM,STANNIS,2,5 356 | LITTLEFINGER,LORAS,2,5 357 | LITTLEFINGER,MYRANDA,2,5 358 | LITTLEFINGER,OLYVAR,2,5 359 | LITTLEFINGER,RENLY,2,5 360 | LITTLEFINGER,ROBB,2,5 361 | LITTLEFINGER,TOMMEN,2,5 362 | LITTLEFINGER,WALDA,2,5 363 | LOLLYS,TANDA,2,5 364 | LORAS,TYWIN,2,5 365 | LYANNA_MORMONT,STANNIS,2,5 366 | MACE,MERYN_TRANT,2,5 367 | MACE,THIN_MAN,2,5 368 | MACE,TOMMEN,2,5 369 | MAESTER_WOLKAN,ROOSE_BOLTON,2,5 370 | MAGGY,MELARA,2,5 371 | MAGGY,ROBERT,2,5 372 | MAGNAR,SAM,2,5 373 | MANCE,SAM,2,5 374 | MARGAERY,ROBERT,2,5 375 | MELISANDRE,OLLY,2,5 376 | MERYN_TRANT,MOUNTAIN,2,5 377 | MERYN_TRANT,THIN_MAN,2,5 378 | MOSSADOR,TYRION,2,5 379 | NED,THEON,2,5 380 | OLENNA,OLYVAR,2,5 381 | OLYVAR,TOMMEN,2,5 382 | PYCELLE,VARYS,2,5 383 | QUICK,TYRION,2,5 384 | RANDYLL,SAM,2,5 385 | RHAEGAR,ROBERT,2,5 386 | ROBB,SANSA,2,5 387 | ROBB,THEON,2,5 388 | ROBERT,TYWIN,2,5 389 | ROBERT,VARYS,2,5 390 | ROBIN,ROYCE,2,5 391 | ROBIN,SANSA,2,5 392 | STANNIS,STEFFON,2,5 393 | STRONG,TYRION,2,5 394 | THIN_MAN,WAIF,2,5 395 | TOMMEN,TYWIN,2,5 396 | TYCHO,JAQEN,2,5 397 | TYCHO,MERYN_TRANT,2,5 398 | TYCHO,THIN_MAN,2,5 --------------------------------------------------------------------------------