├── dynetlsm
    ├── tests
    │   ├── __init__.py
    │   ├── test_lsm.py
    │   └── test_hdp_lcpm.py
    ├── __init__.py
    ├── model_selection
    │   ├── __init__.py
    │   ├── train_test_split.py
    │   ├── posterior_vi.py
    │   └── approx_bic.py
    ├── datasets
    │   ├── __init__.py
    │   ├── raw_data
    │   │   ├── sampson_groups.txt
    │   │   ├── sampson_groups_waverers.txt
    │   │   ├── sampson_names.txt
    │   │   ├── sampson.R
    │   │   ├── sampson_0.npy
    │   │   ├── sampson_1.npy
    │   │   ├── sampson_2.npy
    │   │   ├── military_alliances
    │   │   │   ├── cow_alliances.R
    │   │   │   └── names.csv
    │   │   ├── got
    │   │   │   ├── got-s8-nodes.csv
    │   │   │   ├── got-s7-nodes.csv
    │   │   │   ├── got-s5-node.csv
    │   │   │   ├── got-s3-nodes.csv
    │   │   │   ├── got-s1-nodes.csv
    │   │   │   ├── got-s2-nodes.csv
    │   │   │   ├── got-s6-nodes.csv
    │   │   │   ├── got-s4-nodes.csv
    │   │   │   ├── got-s7-edges.csv
    │   │   │   └── got-s5-edges.csv
    │   │   └── sampson.npy
    │   ├── load_alliances.py
    │   ├── load_got.py
    │   ├── load_monks.py
    │   └── detection_limit.py
    ├── version.py
    ├── array_utils.py
    ├── sample_concentration.py
    ├── procrustes.py
    ├── metrics.py
    ├── gaussian_likelihood_fast.pyx
    ├── sample_auxillary.py
    ├── text_utils.py
    ├── network_statistics.py
    ├── label_utils.py
    ├── network_likelihoods.py
    ├── static_network_fast.pyx
    ├── imputer.py
    ├── trace_utils.py
    ├── distributions.py
    ├── forecast.pyx
    ├── metropolis.py
    ├── sample_coefficients.py
    ├── latent_space.py
    ├── sample_labels.py
    ├── case_control_likelihood.py
    └── sample_latent_positions.py
├── .gitattributes
├── test_requirements.txt
├── images
    ├── hdp.png
    ├── lpcm.png
    ├── dynamic_lsm.png
    ├── lsm_traces.png
    ├── static_lsm.png
    ├── dynamic_label.png
    ├── alluvial_diagram.png
    ├── dynamic_lpcm_rw.png
    ├── dynamic_lsm_rw.png
    ├── hdp_lpcm_traces.png
    ├── lsm_latent_space.png
    ├── static_lsm_prior.png
    ├── dynamic_lpcm_initial.png
    ├── dynamic_lsm_initial.png
    └── hdp_lpcm_latent_space.png
├── requirements.txt
├── MANIFEST.in
├── Makefile
├── setup.cfg
├── ci_scripts
    └── travis
    │   ├── test.sh
    │   ├── success.sh
    │   └── install.sh
├── appveyor.yml
├── .travis.yml
├── LICENSE
├── .gitignore
├── examples
    ├── military_alliances.py
    ├── GoT.py
    ├── sampson_monks.py
    ├── homogeneous_dynsbm.R
    ├── homogeneous_sbm.R
    ├── inhomogeneous_sbm.R
    ├── merging_communities.py
    ├── detection_limit.py
    ├── inhomogeneous_simulation.py
    └── homogeneous_simulation.py
└── setup.py


/dynetlsm/tests/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/.gitattributes:
--------------------------------------------------------------------------------
1 | notebooks/* linguist-documentation
2 | 


--------------------------------------------------------------------------------
/test_requirements.txt:
--------------------------------------------------------------------------------
1 | pytest
2 | pytest-pep8
3 | pytest-cov
4 | 


--------------------------------------------------------------------------------
/images/hdp.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/joshloyal/dynetlsm/HEAD/images/hdp.png


--------------------------------------------------------------------------------
/images/lpcm.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/joshloyal/dynetlsm/HEAD/images/lpcm.png


--------------------------------------------------------------------------------
/dynetlsm/__init__.py:
--------------------------------------------------------------------------------
1 | from .lsm import *
2 | from .lpcm import *
3 | from .hdp_lpcm import *
4 | 


--------------------------------------------------------------------------------
/images/dynamic_lsm.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/joshloyal/dynetlsm/HEAD/images/dynamic_lsm.png


--------------------------------------------------------------------------------
/images/lsm_traces.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/joshloyal/dynetlsm/HEAD/images/lsm_traces.png


--------------------------------------------------------------------------------
/images/static_lsm.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/joshloyal/dynetlsm/HEAD/images/static_lsm.png


--------------------------------------------------------------------------------
/images/dynamic_label.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/joshloyal/dynetlsm/HEAD/images/dynamic_label.png


--------------------------------------------------------------------------------
/images/alluvial_diagram.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/joshloyal/dynetlsm/HEAD/images/alluvial_diagram.png


--------------------------------------------------------------------------------
/images/dynamic_lpcm_rw.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/joshloyal/dynetlsm/HEAD/images/dynamic_lpcm_rw.png


--------------------------------------------------------------------------------
/images/dynamic_lsm_rw.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/joshloyal/dynetlsm/HEAD/images/dynamic_lsm_rw.png


--------------------------------------------------------------------------------
/images/hdp_lpcm_traces.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/joshloyal/dynetlsm/HEAD/images/hdp_lpcm_traces.png


--------------------------------------------------------------------------------
/images/lsm_latent_space.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/joshloyal/dynetlsm/HEAD/images/lsm_latent_space.png


--------------------------------------------------------------------------------
/images/static_lsm_prior.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/joshloyal/dynetlsm/HEAD/images/static_lsm_prior.png


--------------------------------------------------------------------------------
/images/dynamic_lpcm_initial.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/joshloyal/dynetlsm/HEAD/images/dynamic_lpcm_initial.png


--------------------------------------------------------------------------------
/images/dynamic_lsm_initial.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/joshloyal/dynetlsm/HEAD/images/dynamic_lsm_initial.png


--------------------------------------------------------------------------------
/images/hdp_lpcm_latent_space.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/joshloyal/dynetlsm/HEAD/images/hdp_lpcm_latent_space.png


--------------------------------------------------------------------------------
/dynetlsm/model_selection/__init__.py:
--------------------------------------------------------------------------------
1 | from .approx_bic import *
2 | from .posterior_vi import *
3 | from .train_test_split import *
4 | 


--------------------------------------------------------------------------------
/dynetlsm/datasets/__init__.py:
--------------------------------------------------------------------------------
1 | from .samples_generator import *
2 | from .detection_limit import *
3 | from .load_monks import *
4 | from .load_got import *
5 | from .load_alliances import *
6 | 


--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
 1 | cython
 2 | scipy
 3 | numpy
 4 | scikit-learn
 5 | tqdm
 6 | networkx
 7 | pandas
 8 | plac
 9 | joblib
10 | matplotlib
11 | seaborn
12 | arviz
13 | pyvis
14 | statsmodels
15 | 


--------------------------------------------------------------------------------
/MANIFEST.in:
--------------------------------------------------------------------------------
1 | include *.md
2 | recursive-include examples *
3 | recursive-include dynetlsm/datasets *.csv *.npy *.txt
4 | include README.md
5 | include requirements.txt
6 | include test_requirements.txt
7 | 


--------------------------------------------------------------------------------
/dynetlsm/version.py:
--------------------------------------------------------------------------------
1 | from __future__ import absolute_import
2 | from __future__ import division
3 | from __future__ import print_function
4 | 
5 | __all__ = ['__version__', 'VERSION']
6 | 
7 | __version__ = '0.1.0'
8 | VERSION = __version__
9 | 


--------------------------------------------------------------------------------
/dynetlsm/datasets/raw_data/sampson_groups.txt:
--------------------------------------------------------------------------------
 1 | Turks
 2 | Turks
 3 | Outcasts
 4 | Loyal
 5 | Loyal
 6 | Loyal
 7 | Turks
 8 | Loyal
 9 | Loyal
10 | Loyal
11 | Loyal
12 | Turks
13 | Outcasts
14 | Turks
15 | Turks
16 | Turks
17 | Outcasts
18 | Outcasts
19 | 


--------------------------------------------------------------------------------
/dynetlsm/datasets/raw_data/sampson_groups_waverers.txt:
--------------------------------------------------------------------------------
 1 | Turks
 2 | Turks
 3 | Outcasts
 4 | Loyal
 5 | Loyal
 6 | Loyal
 7 | Turks
 8 | Waverers
 9 | Loyal
10 | Waverers
11 | Loyal
12 | Turks
13 | Waverers
14 | Turks
15 | Turks
16 | Turks
17 | Outcasts
18 | Outcasts
19 | 


--------------------------------------------------------------------------------
/dynetlsm/datasets/raw_data/sampson_names.txt:
--------------------------------------------------------------------------------
 1 | John Bosco
 2 | Gregory
 3 | Basil
 4 | Peter
 5 | Bonaventure
 6 | Berthold
 7 | Mark
 8 | Victor
 9 | Ambrose
10 | Romauld
11 | Louis
12 | Winfrid
13 | Amand
14 | Hugh
15 | Boniface
16 | Albert
17 | Elias
18 | Simplicius
19 | 


--------------------------------------------------------------------------------
/Makefile:
--------------------------------------------------------------------------------
 1 | # makefile to simplify repetitive build env managment tasks under posix
 2 | 
 3 | PYTHON ?= python
 4 | PYTEST ?= pytest
 5 | 
 6 | clean:
 7 | 	$(PYTHON) setup.py clean
 8 | 	rm -rf dist
 9 | 
10 | install-dev:
11 | 	$(PYTHON) setup.py develop
12 | 
13 | test-code: install-dev
14 | 	$(PYTEST) --showlocals -v sliced
15 | 


--------------------------------------------------------------------------------
/setup.cfg:
--------------------------------------------------------------------------------
 1 | [metadata]
 2 | description-file = README.md
 3 | 
 4 | [pep8]
 5 | # default is 79
 6 | max-line-length=80
 7 | 
 8 | [aliases]
 9 | test=pytest
10 | 
11 | [tool:pytest]
12 | addopts =
13 |     --verbose
14 |     --disable-pytest-warnings
15 |     --doctest-modules dynetlsm
16 | pep8maxlinelength = 80
17 | 
18 | [bdist_wheel]
19 | universal=1
20 | 


--------------------------------------------------------------------------------
/dynetlsm/tests/test_lsm.py:
--------------------------------------------------------------------------------
 1 | from dynetlsm import DynamicNetworkLSM
 2 | from dynetlsm.datasets import simple_splitting_dynamic_network
 3 | 
 4 | 
 5 | def test_lsm_smoke():
 6 |     Y, labels = simple_splitting_dynamic_network(
 7 |         n_nodes=50, n_time_steps=2, random_state=42)
 8 | 
 9 |     lsm = DynamicNetworkLSM(n_iter=250, burn=250, tune=250,
10 |                             n_features=2, random_state=123)
11 |     lsm.fit(Y)
12 | 
13 |     assert lsm.X_.shape == (2, 50, 2)
14 | 


--------------------------------------------------------------------------------
/ci_scripts/travis/test.sh:
--------------------------------------------------------------------------------
 1 | set -e
 2 | 
 3 | # Get into a temp directory to run test from the installed scikit learn and
 4 | # check if we do not leave artifacts
 5 | mkdir -p $TEST_DIR
 6 | cp setup.cfg $TEST_DIR
 7 | cd $TEST_DIR
 8 | 
 9 | python --version
10 | python -c "import numpy; print('numpy %s' % numpy.__version__)"
11 | python -c "import scipy; print('scipy %s' % scipy.__version__)"
12 | 
13 | if [[ "$COVERAGE" == "true" ]]; then
14 |     pytest --cov=$MODULE --pyargs
15 | else
16 |     pytest --pyargs
17 | fi
18 | 


--------------------------------------------------------------------------------
/ci_scripts/travis/success.sh:
--------------------------------------------------------------------------------
 1 | set -e
 2 | 
 3 | if [[ "$COVERAGE" == "true" ]]; then
 4 |     # Need to run coveralls from a git checkout, so we copy .coverage
 5 |     # from TEST_DIR where nosetests has been run
 6 |     cp $TEST_DIR/.coverage $TRAVIS_BUILD_DIR
 7 |     cd $TRAVIS_BUILD_DIR
 8 |     # Ignore coveralls failures as the coveralls server is not
 9 |     # very reliable but we don't want travis to report a failure
10 |     # in the github UI just because the coverage report failed to
11 |     # be published.
12 |     coveralls || echo "Coveralls upload failed"
13 | fi


--------------------------------------------------------------------------------
/dynetlsm/datasets/raw_data/sampson.R:
--------------------------------------------------------------------------------
 1 | library(ergm)
 2 | 
 3 | data(samplk)
 4 | 
 5 | Y1 <- as.matrix(samplk1)
 6 | print(colnames(Y1))
 7 | write.table(Y1,
 8 |             file=paste0('sampson_', 0, '.npy'),
 9 |             col.names=FALSE, row.names=FALSE)
10 | 
11 | Y2 <- as.matrix(samplk2)
12 | write.table(Y2,
13 |             file=paste0('sampson_', 1, '.npy'),
14 |             col.names=FALSE, row.names=FALSE)
15 | 
16 | Y3 <- as.matrix(samplk3)
17 | write.table(Y3,
18 |             file=paste0('sampson_', 2, '.npy'),
19 |             col.names=FALSE, row.names=FALSE)
20 | 


--------------------------------------------------------------------------------
/dynetlsm/tests/test_hdp_lcpm.py:
--------------------------------------------------------------------------------
 1 | from dynetlsm import DynamicNetworkHDPLPCM
 2 | from dynetlsm.datasets import simple_splitting_dynamic_network
 3 | 
 4 | 
 5 | def test_hdp_lpcm_smoke():
 6 |     Y, labels = simple_splitting_dynamic_network(
 7 |         n_nodes=50, n_time_steps=2, random_state=42)
 8 | 
 9 |     lpcm = DynamicNetworkHDPLPCM(n_iter=250, burn=250, tune=250,
10 |                                  n_features=2, n_components=10,
11 |                                  random_state=123)
12 |     lpcm.fit(Y)
13 | 
14 |     assert lpcm.X_.shape == (2, 50, 2)
15 |     assert lpcm.z_.shape == (2, 50)
16 | 


--------------------------------------------------------------------------------
/dynetlsm/datasets/raw_data/sampson_0.npy:
--------------------------------------------------------------------------------
 1 | 0 0 1 0 1 0 0 0 0 0 0 0 0 1 0 0 0 0
 2 | 1 0 0 0 0 0 1 0 0 0 0 0 0 1 0 0 0 0
 3 | 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0
 4 | 0 0 0 0 1 1 0 0 0 1 0 0 0 0 0 0 0 0
 5 | 0 0 0 1 0 0 0 0 0 0 1 0 1 0 0 0 0 0
 6 | 1 0 0 1 0 0 0 0 1 0 0 0 0 0 0 0 0 0
 7 | 0 1 0 0 0 0 0 1 0 0 0 0 0 0 0 1 0 0
 8 | 1 1 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0
 9 | 0 0 0 0 1 0 0 1 0 0 0 0 0 0 0 1 0 0
10 | 0 0 0 1 0 0 0 1 0 0 0 0 0 1 0 0 0 0
11 | 0 0 0 0 1 0 0 1 0 0 0 0 0 1 0 0 0 0
12 | 1 1 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0
13 | 0 0 0 0 1 0 1 0 0 0 0 0 0 0 0 0 0 1
14 | 1 0 0 0 0 0 0 0 0 0 1 1 0 0 1 0 0 0
15 | 1 1 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0
16 | 1 1 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0
17 | 0 0 1 0 0 0 0 0 0 0 0 0 1 0 0 0 0 1
18 | 1 1 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0
19 | 


--------------------------------------------------------------------------------
/dynetlsm/datasets/raw_data/sampson_1.npy:
--------------------------------------------------------------------------------
 1 | 0 1 0 0 1 0 0 1 0 0 0 0 0 0 0 0 0 0
 2 | 1 0 0 0 0 0 1 0 0 0 0 1 0 0 1 0 0 0
 3 | 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1
 4 | 0 0 0 0 1 1 0 0 0 0 1 0 0 0 0 0 0 0
 5 | 1 0 0 1 0 0 0 0 0 0 1 0 0 0 0 0 0 0
 6 | 1 0 0 1 0 0 0 0 1 0 0 0 0 0 0 0 0 0
 7 | 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0
 8 | 0 0 0 1 0 1 0 0 0 1 0 0 0 0 0 0 0 0
 9 | 0 0 0 0 1 0 0 1 0 0 0 1 0 0 0 0 0 0
10 | 0 0 0 1 1 0 0 0 0 0 0 0 1 0 0 0 0 0
11 | 0 0 0 1 1 0 0 0 0 0 0 0 0 0 0 1 0 0
12 | 1 1 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0
13 | 0 0 0 0 1 0 1 0 0 0 0 0 0 0 0 0 0 1
14 | 1 1 0 0 0 0 0 0 0 0 0 1 0 0 1 0 0 0
15 | 1 1 0 0 1 0 0 0 0 0 0 0 0 1 0 0 0 0
16 | 0 1 0 0 0 0 1 0 0 0 0 1 0 0 0 0 0 0
17 | 0 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1
18 | 0 1 0 0 0 0 0 0 0 0 0 0 1 0 0 0 1 0
19 | 


--------------------------------------------------------------------------------
/dynetlsm/datasets/raw_data/sampson_2.npy:
--------------------------------------------------------------------------------
 1 | 0 0 1 0 0 0 0 0 0 0 0 1 0 1 0 0 0 0
 2 | 1 0 0 0 0 0 1 0 0 0 0 1 0 0 0 0 0 0
 3 | 1 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 1 1
 4 | 0 0 0 0 1 1 0 0 0 0 1 0 0 0 0 0 0 0
 5 | 0 0 0 1 0 0 0 0 1 0 1 0 0 0 0 0 0 0
 6 | 0 0 0 1 1 0 0 0 1 0 0 0 0 0 0 0 0 0
 7 | 0 1 0 0 0 0 0 0 0 0 0 1 0 0 0 1 0 0
 8 | 0 0 0 1 0 1 0 0 1 0 0 0 0 0 0 0 0 0
 9 | 0 0 0 0 1 0 0 1 0 0 0 1 0 0 0 0 0 0
10 | 0 0 0 1 1 0 0 0 1 0 0 0 1 0 0 0 0 0
11 | 0 0 0 0 1 0 0 1 0 0 0 0 0 1 0 0 0 0
12 | 1 1 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0
13 | 0 0 0 0 1 0 1 0 0 0 0 0 0 0 0 0 0 1
14 | 1 0 0 0 0 0 0 0 0 0 0 1 0 0 1 0 0 0
15 | 0 1 0 0 0 0 1 0 0 0 0 1 0 0 0 0 0 0
16 | 0 1 0 0 0 0 1 0 0 0 0 0 0 0 1 0 0 0
17 | 0 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1
18 | 0 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0
19 | 


--------------------------------------------------------------------------------
/dynetlsm/array_utils.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | 
 3 | 
 4 | def triu_indices_from_3d(Y, k=0):
 5 |     return np.nonzero(~np.stack(
 6 |         [np.tri(Y.shape[1], Y.shape[2], k=k-1, dtype=np.bool) for
 7 |             t in range(Y.shape[0])]
 8 |     ))
 9 | 
10 | 
11 | def diag_indices_from_3d(Y):
12 |     return np.nonzero(np.stack(
13 |         [np.eye(Y.shape[1], Y.shape[2], dtype=np.bool) for
14 |             t in range(Y.shape[0])]
15 |     ))
16 | 
17 | 
18 | def nondiag_indices_from_3d(Y):
19 |     return np.nonzero(~np.stack(
20 |         [np.eye(Y.shape[1], Y.shape[2], dtype=np.bool) for
21 |             t in range(Y.shape[0])]
22 |     ))
23 | 
24 | 
25 | def nondiag_indices_from(Y):
26 |     return np.nonzero(~np.eye(Y.shape[0], Y.shape[1], dtype=np.bool))
27 | 


--------------------------------------------------------------------------------
/appveyor.yml:
--------------------------------------------------------------------------------
 1 | build: false
 2 | 
 3 | environment:
 4 |   matrix:
 5 |     - PYTHON: "C:\\Miniconda36-x64"
 6 |       PYTHON_VERSION: "3.7.x"
 7 |       PYTHON_ARCH: "64"
 8 | 
 9 |     - PYTHON: "C:\\Miniconda36"
10 |       PYTHON_VERSION: "3.7.x"
11 |       PYTHON_ARCH: "32"
12 | 
13 | matrix:
14 |     fast_finish: true
15 | 
16 | install:
17 |   # Prepend miniconda installed Python to the PATH of this build
18 |   # Add Library/bin directory to fix issue
19 |   # https://github.com/conda/conda/issues/1753
20 |   - "SET PATH=%PYTHON%;%PYTHON%\\Scripts;%PYTHON%\\Library\\bin;%PATH%"
21 |   - conda install pip scipy numpy scikit-learn=0.22.1 cython -y -q
22 |   - conda install pytest pytest-cov -y -q
23 |   - pip install .
24 | 
25 | test_script:
26 |   - mkdir for_test
27 |   - cd for_test
28 |   - pytest --pyargs
29 | 


--------------------------------------------------------------------------------
/dynetlsm/sample_concentration.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | 
 3 | from sklearn.utils import check_random_state
 4 | 
 5 | 
 6 | def sample_concentration_param(alpha, n_clusters, n_samples, prior_shape=1.0,
 7 |                                prior_rate=1.0, random_state=None):
 8 |     """Sample concentration parameters as in Escobar and West (1995)"""
 9 |     rng = check_random_state(random_state)
10 | 
11 |     # auxillary variable sampler
12 |     eta = rng.beta(alpha + 1, n_samples)
13 | 
14 |     m_shape = prior_shape + n_clusters - 1
15 |     m_scale = prior_rate - np.log(eta)
16 | 
17 |     log_odds = (m_shape / m_scale) * (1 / n_samples)
18 |     mix_indicator = rng.binomial(1, log_odds / (1 + log_odds))
19 |     m_shape = m_shape + 1 if mix_indicator else m_shape
20 | 
21 |     return rng.gamma(shape=m_shape, scale=1. / m_scale)
22 | 


--------------------------------------------------------------------------------
/dynetlsm/datasets/raw_data/military_alliances/cow_alliances.R:
--------------------------------------------------------------------------------
 1 | library(igraph)
 2 | library(tidygraph)
 3 | library(tidyverse)
 4 | 
 5 | data <- read_csv('alliance_v4.1_by_dyad_yearly.csv') %>%
 6 |     rename(from = state_name1, to = state_name2) %>%
 7 |     select(from, to, year, defense)
 8 | 
 9 | names <- as_tbl_graph(data) %>%
10 |     activate(nodes) %>%
11 |     as_tibble()
12 | 
13 | write_csv(names, 'names.csv')
14 | 
15 | step_size <- 5
16 | for (year_id in seq(1950, 1975, by = step_size)) {
17 |     graph <- as_tbl_graph(data) %>%
18 |         activate(edges) %>%
19 |         filter(year >= year_id) %>%
20 |         filter(year < (year_id + step_size))
21 | 
22 |     Y <- as_adjacency_matrix(graph, sparse = FALSE)
23 |     write.table(Y, file=paste0('network_', year_id, '.npy'),
24 |                 col.names = FALSE, row.names = FALSE)
25 | }
26 | 


--------------------------------------------------------------------------------
/.travis.yml:
--------------------------------------------------------------------------------
 1 | dist: trusty
 2 | sudo: false
 3 | 
 4 | language: python
 5 | notifications:
 6 |     email: false
 7 | cache:
 8 |   apt: true
 9 |   # We use three different cache directory
10 |   # to work around a Travis bug with multi-platform cache
11 |   directories:
12 |   - $HOME/.cache/pip
13 |   - $HOME/download
14 | env:
15 |   global:
16 |     # Directory where tests are run from
17 |     - TEST_DIR=/tmp/test_dir/
18 |     - MODULE=dynetlsm
19 |   matrix:
20 |     - DISTRIB="conda" PYTHON_VERSION="3.7"
21 |       NUMPY_VERSION="1.18.1" SCIPY_VERSION="1.4.1" CYTHON_VERSION="0.29.14"
22 | 
23 | install: source ci_scripts/travis/install.sh
24 | script: bash ci_scripts/travis/test.sh
25 | after_success: source ci_scripts/travis/success.sh
26 | 
27 | deploy:
28 |   provider: pypi
29 |   distributions: sdist bdist_wheel
30 |   user: joshloyal
31 |   password:
32 |     secure: PLEASE_REPLACE_ME
33 |   on:
34 |     tags: true
35 |     repo: joshloyal/dynetlsm
36 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2020, Joshua D. Loyal
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 
23 | 


--------------------------------------------------------------------------------
/ci_scripts/travis/install.sh:
--------------------------------------------------------------------------------
 1 | # Deactivate the travis-provided virtual environment and setup a
 2 | # conda-based environment instead
 3 | deactivate
 4 | 
 5 | # Use the miniconda installer for faster download / install of conda
 6 | # itself
 7 | pushd .
 8 | cd
 9 | mkdir -p download
10 | cd download
11 | echo "Cached in $HOME/download :"
12 | ls -l
13 | echo
14 | if [[ ! -f miniconda.sh ]]
15 |    then
16 |    wget http://repo.continuum.io/miniconda/Miniconda3-latest-Linux-x86_64.sh \
17 |        -O miniconda.sh
18 |    fi
19 | chmod +x miniconda.sh && ./miniconda.sh -b
20 | cd ..
21 | export PATH=/home/travis/miniconda/bin:$PATH
22 | conda update --yes conda
23 | popd
24 | 
25 | # Configure the conda environment and put it in the path using the
26 | # provided versions
27 | conda create -n testenv --yes python=$PYTHON_VERSION pip
28 | source activate testenv
29 | 
30 | # numeric libraries
31 | conda install --yes \
32 |       numpy=$NUMPY_VERSION scipy=$SCIPY_VERSION cython=$CYTHON_VERSION
33 | 
34 | # test libraries
35 | conda install --yes \
36 |       pep8 nose pytest pytest-pep8 pytest-xdist pytest-cov
37 | 
38 | if [[ "$COVERAGE" == "true" ]]; then
39 |     pip install coverage coveralls
40 |     pip install python-coveralls
41 | fi
42 | 
43 | python --version
44 | python -c "import numpy; print('numpy %s' % numpy.__version__)"
45 | python -c "import scipy; print('scipy %s' % scipy.__version__)"
46 | python setup.py develop
47 | 


--------------------------------------------------------------------------------
/dynetlsm/datasets/raw_data/got/got-s8-nodes.csv:
--------------------------------------------------------------------------------
 1 | Id,Label
 2 | AEGON,Aegon
 3 | AERYS,Aerys
 4 | ALANNA,Alanna
 5 | ALTON,Alton
 6 | ALYS,Alys
 7 | ARYA,Arya
 8 | BERIC,Beric
 9 | BRAN,Bran
10 | BRIENNE,Brienne
11 | BRONN,Bronn
12 | CATELYN,Catelyn
13 | CERSEI,Cersei
14 | CERSEIS_BABY,Cersei's Baby
15 | CRAYAH,Crayah
16 | DAENERYS,Daenerys
17 | DAVOS,Davos
18 | DICKON,Dickon
19 | DIRAH,Dirah
20 | DORNISH_PRINCE,Dornish Prince
21 | EDDISON_TOLLETT,Eddison
22 | EDMURE,Edmure
23 | EURON,Euron
24 | GENDRY,Gendry
25 | GILLY,Gilly
26 | GREY_WORM,Grey Worm
27 | HARRY,Harry
28 | HOUND,Sandor
29 | IRONBORN_LORD,Ironborn Lord
30 | JAIME,Jaime
31 | JOFFREY,Joffrey
32 | JON,Jon
33 | JORAH,Jorah
34 | LITTLE_SAM,Little Sam
35 | LITTLEFINGER,Petyr
36 | LYANNA,Lyanna
37 | LYANNA_MORMONT,Lyanna Mormont
38 | MAREI,Marei
39 | MARTHA,Martha
40 | MARWYN,Marwyn
41 | MELISANDRE,Melisandre
42 | MISSANDEI,Missandei
43 | MOUNTAIN,Gregor
44 | NED,Ned
45 | NED_UMBER,Ned Umber
46 | NIGHT_KING,Night King
47 | NORA,Nora
48 | OWEN,Owen
49 | PODRICK,Podrick
50 | QHONO,Qhono
51 | QYBURN,Qyburn
52 | RAMSAY,Ramsay
53 | RANDYLL,Randyll
54 | RHAEGAR,Rhaegar
55 | RIVERLANDS_LORD,Riverlands Lord
56 | ROBERT,Robert
57 | ROBIN,Robin
58 | SAM,Sam
59 | SANSA,Sansa
60 | SARRA,Sarra
61 | TEELA,Teela
62 | THEON,Theon
63 | TORMUND,Tormund
64 | TYRION,Tyrion
65 | TYWIN,Tywin
66 | UNSULLIED_CAPTAIN,Unsullied Captain
67 | VALE_LORD,Vale Lord
68 | VARYS,Varys
69 | VICKY,Vicky
70 | VISERYS,Viserys
71 | WILLA,Willa
72 | WILLIAM,William
73 | MAESTER_WOLKAN,Wolkan
74 | YARA,Yara
75 | YOHN_ROYCE,Yohn Royce


--------------------------------------------------------------------------------
/dynetlsm/model_selection/train_test_split.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | 
 3 | from math import ceil
 4 | 
 5 | from sklearn.utils import check_random_state
 6 | 
 7 | from ..array_utils import triu_indices_from_3d
 8 | 
 9 | 
10 | MAX_INT = np.iinfo(np.int32).max
11 | 
12 | __all__ = ['train_test_split']
13 | 
14 | 
15 | def train_test_split(Y, test_size=0.1, random_state=None):
16 |     """Split dyads into training and testing subsets.
17 | 
18 |     Parameters
19 |     ----------
20 |     Y : array-like, shape  (n_time_steps, n_nodes, n_nodes)
21 |     """
22 |     n_time_steps, n_nodes, _ = Y.shape
23 | 
24 |     random_state = check_random_state(random_state)
25 | 
26 |     # number of dyads in an undirected graph with n_nodes nodes
27 |     n_dyads = int(0.5 * n_nodes * (n_nodes - 1))
28 |     test_size_type = np.asarray(test_size).dtype.kind
29 |     if test_size_type == 'f':
30 |         n_test = ceil(test_size * n_dyads)
31 |     else:
32 |         n_test = int(test_size)
33 | 
34 |     Y_new = np.zeros_like(Y)
35 |     for t in range(n_time_steps):
36 |         tril_indices = np.tril_indices_from(Y[t], k=-1)
37 | 
38 |         perm = random_state.choice(
39 |             np.arange(n_dyads), size=n_test, replace=False)
40 |         test_indices = perm
41 | 
42 |         Y_vec = Y[t][tril_indices]
43 |         Y_vec[perm] = -1.0
44 |         Y_new[t][tril_indices] = Y_vec
45 |         Y_new[t] += Y_new[t].T
46 | 
47 | 
48 |     triu_indices =  triu_indices_from_3d(Y_new, k=1)
49 |     test_indices = Y_new[triu_indices] == -1
50 |     return Y_new, test_indices
51 | 


--------------------------------------------------------------------------------
/dynetlsm/datasets/raw_data/got/got-s7-nodes.csv:
--------------------------------------------------------------------------------
 1 | Id,Label
 2 | AEGON,Aegon
 3 | AERYS,Aerys
 4 | ALYS,Alys
 5 | ARYA,Arya
 6 | BALERION,Balerion
 7 | BENJEN,Benjen
 8 | BERIC,Beric
 9 | BRAN,Bran
10 | BRIENNE,Brienne
11 | BRONN,Bronn
12 | CATELYN,Catelyn
13 | CERSEI,Cersei
14 | DAENERYS,Daenerys
15 | DAVOS,Davos
16 | DICKON,Dickon
17 | EDDISON_TOLLETT,Eddison
18 | DROGO,Drogo
19 | ELLARIA,Ellaria
20 | EURON,Euron
21 | GENDRY,Gendry
22 | GILLY,Gilly
23 | GREY_WORM,Grey Worm
24 | HARRAG,Harrag
25 | HIGH_SEPTON,High Septon (Aerys)
26 | HOT_PIE,Hot Pie
27 | HOUND,Sandor
28 | HOWLAND,Howland
29 | JAIME,Jaime
30 | JEOR,Jeor
31 | JOANNA,Joanna
32 | JOFFREY,Joffrey
33 | JON,Jon
34 | JON_ARRYN,Jon Arryn
35 | JORAH,Jorah
36 | LITTLE_SAM,Little Sam
37 | LITTLEFINGER,Petyr
38 | LYANNA,Lyanna
39 | LYANNA_MORMONT,Lyanna Mormont
40 | LYSA,Lysa
41 | MARGAERY,Margaery
42 | MARWYN,Marwin
43 | MEERA,Meera
44 | MELISANDRE,Melisandre
45 | MISSANDEI,Missandei
46 | MOUNTAIN,Gregor
47 | MYRCELLA,Myrcella
48 | NED,Ned
49 | NED_UMBER,Ned Umber
50 | NIGHT_KING,Night King
51 | NYMERIA,Nymeria
52 | OBARA,Obara
53 | OBERYN,Oberyn
54 | OLENNA,Olenna
55 | PODRICK,Podrick
56 | QHONO,Qhono
57 | QYBURN,Qyburn
58 | RAMSAY,Ramsay
59 | RANDYLL,Randyll
60 | RHAEGAR,Rhaegar
61 | ROBB,Robb
62 | ROBERT,Robert
63 | ROBETT,Robett
64 | SAM,Sam
65 | SANSA,Sansa
66 | SHIREEN,Shireen
67 | STANNIS,Stannis
68 | THEON,Theon
69 | THOROS,Thoros
70 | TOMMEN,Tommen
71 | TORMUND,Tormund
72 | TYCHO,Tycho
73 | TYENE,Tyene
74 | TYRION,Tyrion
75 | TYWIN,Tywin
76 | VARYS,Varys
77 | VISERYS,Viserys
78 | WALDER,Walder
79 | WHITE_WALKER,White Walker
80 | MAESTER_WOLKAN,Wolkan
81 | YARA,Yara
82 | YOHN_ROYCE,Yohn Royce


--------------------------------------------------------------------------------
/dynetlsm/datasets/load_alliances.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | import pandas as pd
 3 | import networkx as nx
 4 | 
 5 | from os.path import dirname, join
 6 | 
 7 | 
 8 | __all__ = ['load_alliances']
 9 | 
10 | 
11 | def load_alliances(min_degree=1, directed=False, remove_periphery=True):
12 |     module_path = dirname(__file__)
13 |     file_path = join(module_path, 'raw_data', 'military_alliances')
14 | 
15 |     n_nodes = 180
16 |     n_years = 6
17 |     Y = np.zeros((n_years, n_nodes, n_nodes))
18 | 
19 |     if directed:
20 |         file_fmt = 'directed_network_{}.npy'
21 |     else:
22 |         file_fmt = 'network_{}.npy'
23 | 
24 |     for t, year in enumerate(range(1950, 1980, 5)):
25 |         Y[t] = np.loadtxt(join(file_path, file_fmt.format(year)))
26 | 
27 |     # binarize network
28 |     Y = (Y > 0).astype(np.float64)
29 | 
30 |     # symmetrize network
31 |     if not directed:
32 |         for t in range(Y.shape[0]):
33 |             Y[t] = (Y[t] + Y[t].T) / 2.
34 |         Y = (Y > 0).astype(np.float64)
35 | 
36 |     if remove_periphery:
37 |         for t in range(Y.shape[0]):
38 |             G = nx.from_numpy_array(Y[t])
39 |             core_id = np.asarray(list(nx.core_number(G).values()))
40 |             mask = np.where(core_id <= 2)[0]
41 |             Y[t, mask] = 0
42 |             Y[t, :, mask] = 0
43 | 
44 |     # a country must be active for at least min_degree
45 |     active_ids = np.where(
46 |         (Y.sum(axis=(0, 1)) + Y.sum(axis=(0, 2))) >= min_degree)[0]
47 |     Y = np.ascontiguousarray(Y[:, active_ids][:, :, active_ids])
48 | 
49 |     # load country names
50 |     names = pd.read_csv(join(file_path, 'names.csv'))
51 |     names = names.values.ravel()[active_ids]
52 | 
53 |     return np.ascontiguousarray(Y), names
54 | 


--------------------------------------------------------------------------------
/dynetlsm/procrustes.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | import numpy.linalg as linalg
 3 | from scipy.linalg import orthogonal_procrustes
 4 | 
 5 | 
 6 | def flatten_array(X):
 7 |     return X.reshape(np.prod(X.shape[:-1]), -1)
 8 | 
 9 | 
10 | def compute_procrustes_rotation(X, Y):
11 |     """X is the reference and Y is matching X"""
12 |     X_center = X - np.mean(X, axis=0)
13 |     Y_center = Y - np.mean(Y, axis=0)
14 | 
15 |     u, s, v = linalg.svd(np.dot(X.T, Y))
16 | 
17 |     return np.dot(v, u.T)
18 | 
19 | 
20 | def static_procrustes_rotation(X, Y):
21 |     """Rotate Y to match X"""
22 |     #A = compute_procrustes_rotation(X, Y)
23 |     #return np.dot(Y - np.mean(Y, axis=0), A)
24 |     R, _ = orthogonal_procrustes(Y, X)
25 |     return np.dot(Y, R), R
26 | 
27 | 
28 | def longitudinal_procrustes_rotation(X_ref, X):
29 |     """A single procrustes transformation applied across time."""
30 |     n_time_steps, n_nodes = X.shape[:-1]
31 | 
32 |     X_ref = flatten_array(X_ref)
33 |     X = flatten_array(X)
34 |     X, R = static_procrustes_rotation(X_ref, X)
35 |     return X.reshape(n_time_steps, n_nodes, -1), R
36 | 
37 | 
38 | def longitudinal_procrustes_transform(X, means, copy=True):
39 |     if copy:
40 |         # copy data over
41 |         X = X.copy()
42 | 
43 |         if means is not None:
44 |             means = means.copy()
45 | 
46 |     # apply procrustes transformation to samples past the tuning phase
47 |     n_samples = X.shape[0]
48 |     X_ref = X[0]
49 |     for i in range(1, n_samples):
50 |         X_new = X[i]
51 | 
52 |         P = compute_procrustes_rotation(X_ref, X_new)
53 |         X[i] = np.dot(X_new, P)
54 | 
55 |         if means is not None:
56 |             mu_new = means[i]
57 |             means[i] = np.dot(mu_new, P)
58 | 
59 |     return X, means
60 | 


--------------------------------------------------------------------------------
/dynetlsm/metrics.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | 
 3 | from sklearn.metrics import roc_auc_score, mutual_info_score
 4 | from sklearn.metrics.cluster import entropy
 5 | 
 6 | from .array_utils import triu_indices_from_3d
 7 | from .array_utils import nondiag_indices_from_3d
 8 | 
 9 | 
10 | def network_auc(Y_true, Y_pred, is_directed=False, nan_mask=None):
11 |     if is_directed:
12 |         indices = nondiag_indices_from_3d(Y_true)
13 |     else:
14 |         indices = triu_indices_from_3d(Y_true, 1)
15 | 
16 |     y_fit = Y_pred[indices]
17 |     y_true = Y_true[indices]
18 | 
19 |     if nan_mask is not None:
20 |         y_fit = y_fit[~nan_mask]
21 |         y_true = y_true[~nan_mask]
22 | 
23 |     return roc_auc_score(y_true, y_fit)
24 | 
25 | 
26 | def out_of_sample_auc(y_true, y_pred, test_indices):
27 |     indices = triu_indices_from_3d(y_true, k=1)
28 |     return roc_auc_score(y_true[indices][test_indices], y_pred)
29 | 
30 | 
31 | def _network_auc_directed():
32 |     y_true, y_fit = [], []
33 | 
34 |     indices = np.triu_indices_from(Y_true[0], 1)
35 |     for t in range(Y_true.shape[0]):
36 |         y_fit.append(Y_pred[t][indices])
37 |         y_true.append(Y_true[t][indices])
38 | 
39 |     return roc_auc_score(np.hstack(y_true), np.hstack(y_fit))
40 | 
41 | 
42 | def _network_auc_undirected(Y_true, Y_pred):
43 |     y_true, y_fit = [], []
44 | 
45 |     indices = triu_indices_from_3d(Y_true, 1)
46 |     y_fit = Y_pred[indices]
47 | 
48 |     return roc_auc_score(np.hstack(y_true), np.hstack(y_fit))
49 | 
50 | 
51 | def variation_of_information(labels_true, labels_pred):
52 |     entropy_true = entropy(labels_true)
53 |     entropy_pred = entropy(labels_pred)
54 |     mutual_info = mutual_info_score(labels_true, labels_pred)
55 | 
56 |     return entropy_true + entropy_pred - 2 * mutual_info
57 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
  1 | # Byte-compiled / optimized / DLL files
  2 | __pycache__/
  3 | *.py[cod]
  4 | *$py.class
  5 | 
  6 | # C extensions
  7 | *.so
  8 | *.c
  9 | 
 10 | # Distribution / packaging
 11 | .Python
 12 | env/
 13 | build/
 14 | develop-eggs/
 15 | dist/
 16 | downloads/
 17 | eggs/
 18 | .eggs/
 19 | lib/
 20 | lib64/
 21 | parts/
 22 | sdist/
 23 | var/
 24 | wheels/
 25 | *.egg-info/
 26 | .installed.cfg
 27 | *.egg
 28 | 
 29 | # PyInstaller
 30 | #  Usually these files are written by a python script from a template
 31 | #  before PyInstaller builds the exe, so as to inject date/other infos into it.
 32 | *.manifest
 33 | *.spec
 34 | 
 35 | # Installer logs
 36 | pip-log.txt
 37 | pip-delete-this-directory.txt
 38 | 
 39 | # Unit test / coverage reports
 40 | htmlcov/
 41 | .tox/
 42 | .coverage
 43 | .coverage.*
 44 | .cache
 45 | nosetests.xml
 46 | coverage.xml
 47 | *.cover
 48 | .hypothesis/
 49 | 
 50 | # Translations
 51 | *.mo
 52 | *.pot
 53 | 
 54 | # Django stuff:
 55 | *.log
 56 | local_settings.py
 57 | 
 58 | # Flask stuff:
 59 | instance/
 60 | .webassets-cache
 61 | 
 62 | # Scrapy stuff:
 63 | .scrapy
 64 | 
 65 | # Sphinx documentation
 66 | doc/_build/
 67 | doc/generated/
 68 | doc/modules/
 69 | doc/auto_examples/
 70 | doc/notebooks/
 71 | 
 72 | # PyBuilder
 73 | target/
 74 | 
 75 | # Jupyter Notebook
 76 | .ipynb_checkpoints
 77 | 
 78 | # pyenv
 79 | .python-version
 80 | 
 81 | # celery beat schedule file
 82 | celerybeat-schedule
 83 | 
 84 | # SageMath parsed files
 85 | *.sage.py
 86 | 
 87 | # dotenv
 88 | .env
 89 | 
 90 | # virtualenv
 91 | .venv
 92 | venv/
 93 | ENV/
 94 | 
 95 | # Spyder project settings
 96 | .spyderproject
 97 | .spyproject
 98 | 
 99 | # Rope project settings
100 | .ropeproject
101 | 
102 | # mkdocs documentation
103 | /site
104 | 
105 | # mypy
106 | .mypy_cache/
107 | 


--------------------------------------------------------------------------------
/dynetlsm/gaussian_likelihood_fast.pyx:
--------------------------------------------------------------------------------
 1 | # encoding: utf-8
 2 | # cython: language_level=3
 3 | # cython: cdivision=True
 4 | # cython: boundscheck=False
 5 | # cython: wraparound=False
 6 | # cython: nonecheck=False
 7 | # cython: initializedcheck=False
 8 | from libc.math cimport log, exp, sqrt, M_PI
 9 | 
10 | import numpy as np
11 | cimport numpy as np
12 | 
13 | 
14 | ctypedef np.npy_float64 DOUBLE
15 | 
16 | 
17 | cpdef DOUBLE spherical_normal_log_pdf(DOUBLE[:] x,
18 |                                       DOUBLE[:] mean,
19 |                                       double var) nogil:
20 |     cdef int k = 0
21 |     cdef int n_features = x.shape[0]
22 |     cdef DOUBLE sum_sq = 0.0
23 | 
24 |     for k in range(n_features):
25 |         sum_sq += (x[k] - mean[k]) ** 2
26 |     sum_sq *= 0.5 * (1. / var)
27 |     return -0.5 * n_features * log(2 * M_PI * var) - sum_sq
28 | 
29 | 
30 | def compute_gaussian_likelihood(DOUBLE[:, :] X,
31 |                                 DOUBLE[:, :] mu,
32 |                                 DOUBLE[:] sigma,
33 |                                 double lmbda,
34 |                                 bint normalize=True):
35 |     cdef int t, k, j = 0
36 |     cdef int n_time_steps = X.shape[0]
37 |     cdef int n_features = X.shape[1]
38 |     cdef int n_components = sigma.shape[0]
39 |     cdef DOUBLE[:]  muk = np.zeros(n_features, dtype=np.float64)
40 |     cdef DOUBLE[:, :] loglik = np.zeros((n_time_steps, n_components), dtype=np.float64)
41 | 
42 |     for t in range(n_time_steps):
43 |         for k in range(n_components):
44 |             if t == 0:
45 |                 loglik[t, k] = spherical_normal_log_pdf(X[t], mu[k], sigma[k])
46 |             else:
47 |                 for j in range(n_features):
48 |                     muk[j] = lmbda * mu[k, j] + (1 - lmbda) * X[t-1, j]
49 |                 loglik[t, k] = spherical_normal_log_pdf(X[t], muk, sigma[k])
50 | 
51 |     if normalize:
52 |         loglik -= np.max(loglik, axis=1).reshape(-1, 1)
53 | 
54 |     return np.exp(loglik)
55 | 


--------------------------------------------------------------------------------
/examples/military_alliances.py:
--------------------------------------------------------------------------------
 1 | """
 2 | Runs the analysis of the military alliances network found in the
 3 | paper 'A Bayesian nonparametric latent space approach to modeling evolving
 4 | communities in dynamic networks' by Joshua Loyal and Yuguo Chen
 5 | """
 6 | 
 7 | from dynetlsm import DynamicNetworkHDPLPCM
 8 | from dynetlsm.datasets import load_alliances
 9 | from dynetlsm.plots import (
10 |     plot_traces,
11 |     alluvial_plot,
12 |     plot_latent_space
13 | )
14 | 
15 | 
16 | # Load military alliances networks
17 | Y, names = load_alliances()
18 | 
19 | # Fit HDP-LPCM
20 | # NOTE: This will take days to sample!
21 | model = DynamicNetworkHDPLPCM(n_iter=400000,
22 |                               tune=50000,
23 |                               burn=50000,
24 |                               tune_interval=1000,
25 |                               random_state=42,
26 |                               n_components=25,
27 |                               selection_type='vi',
28 |                               is_directed=False).fit(Y)
29 | 
30 | # Trace plots
31 | fig, ax = plot_traces(model, figsize=(10, 12))
32 | fig.savefig('alliances_traces.png', dpi=300)
33 | 
34 | # alluvial diagram
35 | fig, ax = alluvial_plot(model.z_, figsize=(10, 5))
36 | fig.savefig('alliances_alluvial.png', dpi=300)
37 | 
38 | # latent space visualizations
39 | for t in range(Y.shape[0]):
40 |     fig, ax = plot_latent_space(
41 |         model, figsize=(30, 30), t=t,
42 |         textsize=30,
43 |         node_size=500,
44 |         mutation_scale=20,
45 |         linewidth=1.0,
46 |         connectionstyle='arc3,rad=0.2',
47 |         title_text=None,
48 |         plot_group_sigma=True,
49 |         node_names=names,
50 |         node_textsize=20,
51 |         repel_strength=0.3,
52 |         mask_groups=[1], # NOTE: this may not be background on other settings!
53 |         only_show_connected=True,
54 |         number_nodes=True,
55 |         border=1.0)
56 |     fig.savefig('alliances_latent_space_t{}.png'.format(t), dpi=300)
57 | 


--------------------------------------------------------------------------------
/dynetlsm/sample_auxillary.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | 
 3 | from sklearn.utils import check_random_state
 4 | 
 5 | 
 6 | def sample_tables(n, beta, alpha_init, alpha, kappa, random_state=None):
 7 |     rng = check_random_state(random_state)
 8 | 
 9 |     n_time_steps, n_components, _ = n.shape
10 |     m = np.zeros((n_time_steps, n_components, n_components),
11 |                  dtype=np.int)
12 | 
13 |     # t = 0 does not include a spike
14 |     probas = alpha_init * beta
15 |     for k in range(n_components):
16 |         x = rng.binomial(1, probas[k] / (probas[k] + np.arange(n[0, 0, k])))
17 |         m[0, 0, k] = np.sum(x)
18 | 
19 |     # include spike for remaining time steps
20 |     probas = alpha * beta + kappa * np.eye(n_components)
21 |     for t in range(1, n_time_steps):
22 |         for j in range(n_components):
23 |             for k in range(n_components):
24 |                 x = rng.binomial(
25 |                     1, probas[j, k] / (probas[j, k] + np.arange(n[t, j, k])))
26 |                 m[t, j, k] = np.sum(x)
27 | 
28 |     return m
29 | 
30 | 
31 | def sample_mbar(m, beta, kappa=1.0, alpha=1.0, random_state=None):
32 |     rng = check_random_state(random_state)
33 |     n_time_steps, n_components, _ = m.shape
34 | 
35 |     # sample override variables for t = 1 ... T (do not include t = 0)
36 |     w = np.zeros((n_time_steps - 1, n_components), dtype=np.float64)
37 |     rho = kappa / (alpha + kappa)
38 |     for t in range(n_time_steps - 1):
39 |         for j in range(n_components):
40 |             w[t, j] = rng.binomial(m[t + 1, j, j],
41 |                                    rho / (rho + beta[j] * (1 - rho)))
42 | 
43 |     # mbar is determined by m and w
44 |     m_bar = np.zeros((n_time_steps - 1, n_components, n_components),
45 |                      dtype=np.float64)
46 |     for t in range(n_time_steps - 1):
47 |         m_bar[t] = m[t + 1] - np.diag(w[t])
48 | 
49 |     # NOTE: we have to add on the initial distribution transitions
50 |     return np.sum(m_bar, axis=(0, 1)) + m[0, 0], w
51 | 


--------------------------------------------------------------------------------
/examples/GoT.py:
--------------------------------------------------------------------------------
 1 | """
 2 | Runs the analysis of the GoT character interactions network found in the
 3 | paper 'A Bayesian nonparametric latent space approach to modeling evolving
 4 | communities in dynamic networks' by Joshua Loyal and Yuguo Chen
 5 | """
 6 | 
 7 | from dynetlsm import DynamicNetworkHDPLPCM
 8 | from dynetlsm.datasets import load_got
 9 | from dynetlsm.plots import (
10 |     plot_traces,
11 |     alluvial_plot,
12 |     plot_latent_space
13 | )
14 | 
15 | 
16 | # Load GoT character interaction networks
17 | Y, names = load_got(seasons=[1,2,3,4], weight_min=10)
18 | 
19 | # Fit HDP-LPCM
20 | # NOTE: This will take days to sample!
21 | model = DynamicNetworkHDPLPCM(n_iter=400000,
22 |                               tune=50000,
23 |                               burn=50000,
24 |                               tune_interval=1000,
25 |                               random_state=42,
26 |                               n_components=25,
27 |                               selection_type='vi',
28 |                               is_directed=False).fit(Y)
29 | 
30 | # Trace plots
31 | fig, ax = plot_traces(model, figsize=(10, 12))
32 | fig.savefig('GoT_traces.png', dpi=300)
33 | 
34 | # alluvial diagram
35 | fig, ax = alluvial_plot(model.z_, figsize=(10, 5))
36 | fig.savefig('GoT_alluvial.png', dpi=300)
37 | 
38 | # latent space visualizations
39 | for t in range(Y.shape[0]):
40 |     fig, ax = plot_latent_space(
41 |         model, figsize=(30, 30), t=t,
42 |         textsize=50,
43 |         node_size=500,
44 |         mutation_scale=20,
45 |         linewidth=1.0,
46 |         connectionstyle='arc3,rad=0.2',
47 |         title_text=None,
48 |         plot_group_sigma=True,
49 |         node_names=names,
50 |         node_textsize=15,
51 |         repel_strength=0.3,
52 |         mask_groups=[5], # NOTE: this may not be background on other settings!
53 |         only_show_connected=True,
54 |         size_cutoff=2,
55 |         number_nodes=True,
56 |         border=3.0)
57 |     fig.savefig('GoT_latent_space_t{}.png'.format(t), dpi=300)
58 | 


--------------------------------------------------------------------------------
/dynetlsm/text_utils.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | import networkx as nx
 3 | 
 4 | 
 5 | def repel_labels(X, node_names, datasize, k=1.0, textsize=10, mask=None,
 6 |                  include_number=True, ax=None):
 7 |     G = nx.DiGraph()
 8 | 
 9 |     data_nodes = []
10 |     init_pos = {}
11 |     data_fmt = 'data_{}'
12 |     mask = [True for i in range(X.shape[0])] if mask is None else mask
13 | 
14 |     if include_number:
15 |         label_fmt = '{} ({})'
16 |     else:
17 |         label_fmt = '{}'
18 |     for i, (x, y) in enumerate(X):
19 |         if mask[i]:
20 |             data_str = data_fmt.format(i)
21 |             if node_names is None:
22 |                 label_str = "{}".format(i)
23 |             else:
24 |                 label_str = label_fmt.format(node_names[i], i)
25 |             data_nodes.append(data_str)
26 |             G.add_node(data_str)
27 |             G.add_node(label_str)
28 |             G.add_edge(label_str, data_str)
29 |             init_pos[data_str] = (x, y)
30 |             init_pos[label_str] = (x, y)
31 | 
32 |     pos = nx.spring_layout(G, pos=init_pos, fixed=data_nodes, k=k)
33 | 
34 |     # undo re-scaling
35 |     pos_after = np.vstack([pos[d] for d in data_nodes])
36 |     pos_before = np.vstack([init_pos[d] for d in data_nodes])
37 |     scale, shift_x = np.polyfit(pos_after[:, 0], pos_before[:, 0], 1)
38 |     scale, shift_y = np.polyfit(pos_after[:, 1], pos_before[:, 1], 1)
39 |     shift = np.array([shift_x, shift_y])
40 |     for key, val in pos.items():
41 |         pos[key] = (val * scale) + shift
42 | 
43 |     for label, data_str in G.edges():
44 |         ax.annotate(label,
45 |                     xy=pos[data_str],
46 |                     xytext=pos[label],
47 |                     size=textsize,
48 |                     alpha=0.9,
49 |                     xycoords='data',
50 |                     textcoords='data',
51 |                     arrowprops=dict(arrowstyle='-|>',
52 |                                     shrinkA=0, shrinkB=np.sqrt(datasize) / 2.,
53 |                                     connectionstyle='arc3',
54 |                                     mutation_scale=10,
55 |                                     color='black'))
56 | 


--------------------------------------------------------------------------------
/examples/sampson_monks.py:
--------------------------------------------------------------------------------
 1 | """
 2 | Runs the analysis of the Sampson's monastery network found in the
 3 | paper 'A Bayesian nonparametric latent space approach to modeling evolving
 4 | communities in dynamic networks' by Joshua Loyal and Yuguo Chen
 5 | """
 6 | 
 7 | from dynetlsm import DynamicNetworkHDPLPCM
 8 | from dynetlsm.datasets import load_monks
 9 | from dynetlsm.plots import (
10 |     plot_traces,
11 |     plot_posterior_counts,
12 |     alluvial_plot,
13 |     plot_latent_space
14 | )
15 | 
16 | 
17 | # Load Sampson's monastery network
18 | Y, labels, names = load_monks(dynamic=True, is_directed=False)
19 | 
20 | # Fit HDP-LPCM
21 | model = DynamicNetworkHDPLPCM(n_iter=165000,
22 |                               tune=15000,
23 |                               burn=20000,
24 |                               tune_interval=1000,
25 |                               random_state=42,
26 |                               selection_type='vi',
27 |                               is_directed=False).fit(Y)
28 | 
29 | # Trace plots
30 | fig, ax = plot_traces(model, figsize=(10, 12))
31 | fig.savefig('sampson_monks_traces.png', dpi=300)
32 | 
33 | # posterior group counts
34 | for t in range(Y.shape[0]):
35 |     fig, ax = plot_posterior_counts(model, figsize=(8, 6), fontsize=18,
36 |                                     ticksize=18, t=t, normalize=True,
37 |                                     bar_width=0.25, include_title=False)
38 |     ax.set_xticks(range(0, 10))
39 |     ax.set_xlim(0, 9)
40 |     fig.savefig('sampson_monks_posterior_counts_t{}.png'.format(t), dpi=300)
41 | 
42 | # alluvial diagram
43 | fig, ax = alluvial_plot(model.z_, figsize=(10, 5))
44 | fig.savefig('sampson_monks_alluvial.png', dpi=300)
45 | 
46 | # latent space visualizations
47 | for t in range(Y.shape[0]):
48 |     fig, ax = plot_latent_space(
49 |         model, figsize=(10, 12), t=t,
50 |         node_size=100,
51 |         linewidth=1.0,
52 |         mutation_scale=30,
53 |         connectionstyle='arc3,rad=0.2',
54 |         title_text=None,
55 |         plot_group_sigma=True,
56 |         node_names=names,
57 |         node_textsize=10,
58 |         repel_strength=0.3,
59 |         number_nodes=True, border=1.0)
60 |     fig.savefig('sampson_monks_latent_space_t{}.png'.format(t), dpi=300)
61 | 


--------------------------------------------------------------------------------
/dynetlsm/datasets/load_got.py:
--------------------------------------------------------------------------------
 1 | import glob
 2 | 
 3 | import networkx as nx
 4 | import numpy as np
 5 | import pandas as pd
 6 | import scipy.sparse as sp
 7 | 
 8 | from os.path import dirname, join
 9 | 
10 | from sklearn.preprocessing import LabelEncoder
11 | 
12 | 
13 | __all__ = ['load_got', 'load_got_edgelists']
14 | 
15 | 
16 | def network_from_edgelist(edgelist, n_nodes):
17 |     data = np.ones(edgelist.shape[0])
18 |     Y = sp.coo_matrix((data, (edgelist[:, 0], edgelist[:, 1])),
19 |                       shape=(n_nodes, n_nodes)).toarray()
20 | 
21 |     # symmetriz and binarize network
22 |     Y += Y.T
23 |     Y[Y > 0] = 1
24 | 
25 |     return Y
26 | 
27 | 
28 | def load_got_edgelists():
29 |     module_path = dirname(__file__)
30 |     file_path = join(module_path, 'raw_data', 'got')
31 | 
32 |     # load edge-lists into one dataframe
33 |     data = pd.concat([
34 |         pd.read_csv(file_name,
35 |                     names=['source', 'target', 'weight', 'season'], skiprows=1)
36 |         for file_name in glob.glob(join(file_path, 'got-s*-edges.csv'))])
37 | 
38 |     # aggregate multiple edges into a single edge with a weight
39 |     data = data.groupby(['source', 'target', 'season'],
40 |                         as_index=False).agg({'weight': 'sum'})
41 | 
42 |     return data
43 | 
44 | 
45 | def load_got(seasons=None, weight_min=None):
46 |     data = load_got_edgelists()
47 | 
48 |     if seasons is not None:
49 |         data.query('season == {}'.format(seasons), inplace=True)
50 | 
51 |     if weight_min is not None:
52 |         data.query('weight >= {}'.format(weight_min), inplace=True)
53 | 
54 |     # assign integer label ids
55 |     encoder = LabelEncoder().fit(data[['source', 'target']].values.ravel())
56 |     data.loc[:, 'source'] = encoder.transform(data['source'])
57 |     data.loc[:, 'target'] = encoder.transform(data['target'])
58 | 
59 |     n_seasons = data['season'].unique().shape[0]
60 |     n_nodes = encoder.classes_.shape[0]
61 |     Y = np.zeros((n_seasons, n_nodes, n_nodes))
62 |     for season_id in range(1, n_seasons + 1):
63 |         season_data = data[data['season'] == season_id]
64 |         edgelist = season_data[['source', 'target']].values
65 |         Y[season_id - 1] = network_from_edgelist(edgelist, n_nodes=n_nodes)
66 | 
67 |     return Y, encoder.classes_
68 | 


--------------------------------------------------------------------------------
/dynetlsm/network_statistics.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | 
 3 | from scipy.sparse import csgraph
 4 | from sklearn.preprocessing import LabelEncoder
 5 | 
 6 | from .array_utils import nondiag_indices_from
 7 | 
 8 | 
 9 | def is_dynamic(Y):
10 |     return Y.ndim == 3
11 | 
12 | 
13 | def num_edges(Y, is_directed=False):
14 |     return np.sum(Y) if is_directed else 0.5 * np.sum(Y)
15 | 
16 | 
17 | def density(Y, is_directed=False):
18 |     n_edges = num_edges(Y, is_directed=is_directed)
19 |     n_nodes = Y.shape[1] if is_dynamic(Y) else Y.shape[0]
20 | 
21 |     n_possible = n_nodes * (n_nodes - 1)
22 |     if is_dynamic(Y):
23 |         n_possible *= Y.shape[0]
24 | 
25 |     if not is_directed:
26 |         n_possible *= 0.5
27 | 
28 |     return n_edges / n_possible
29 | 
30 | 
31 | def modularity(Y, z, is_directed=False):
32 |     if is_dynamic(Y):
33 |         n_time_steps = Y.shape[0]
34 |         mod_ave = 0
35 |         for t in range(n_time_steps):
36 |             mod_ave += static_modularity(Y[t], z[t],
37 |                                          is_directed=is_directed)
38 |         return mod_ave / n_time_steps
39 | 
40 |     return static_modularity(Y, z, is_directed=is_directed)
41 | 
42 | 
43 | def static_modularity(Y, z, is_directed=False):
44 |     """modularity for a static network."""
45 |     if is_directed:
46 |         n_edges = Y.sum()
47 |         degree = 0.5 * (Y.sum(axis=0) + Y.sum(axis=1))
48 |     else:
49 |         n_edges = Y.sum() / 2
50 |         degree = Y.sum(axis=0)
51 |     degree = degree.reshape(-1, 1)
52 | 
53 |     encoder = LabelEncoder().fit(z)
54 |     groups = encoder.transform(z)
55 |     n_groups = encoder.classes_.shape[0]
56 | 
57 |     A = 0.5 * (Y + Y.T) if is_directed else Y
58 |     B = A - np.dot(degree, degree.T) / (2 * n_edges)
59 |     S = np.eye(n_groups)[groups.astype(np.int)]
60 | 
61 |     return np.trace(S.T @  B @ S) / (2 * n_edges)
62 | 
63 | 
64 | def connected_nodes(Y, is_directed=False, size_cutoff=1):
65 |     # NOTE: weak connections essentially treats the graph as undirected
66 |     n_components, labels = csgraph.connected_components(Y,
67 |                                                         directed=is_directed,
68 |                                                         connection='weak')
69 | 
70 |     if n_components == 1:
71 |         return np.arange(Y.shape[1])
72 | 
73 |     component_sizes = np.bincount(labels)
74 |     non_singletons = np.where(component_sizes > size_cutoff)[0]
75 | 
76 |     return np.in1d(labels, non_singletons)
77 | 


--------------------------------------------------------------------------------
/dynetlsm/datasets/load_monks.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | import pandas as pd
 3 | 
 4 | from os.path import dirname, join
 5 | from sklearn.preprocessing import LabelEncoder
 6 | 
 7 | 
 8 | __all__ = ['load_monks']
 9 | 
10 | 
11 | def load_monks(dynamic=True, is_directed=True, include_waverers=False,
12 |                encode_labels=True):
13 |     """Loads Sampson's Monastery Network (1968)."""
14 |     if dynamic:
15 |         return load_dynamic_monks(encode_labels, include_waverers,
16 |                                   is_directed=is_directed)
17 |     else:
18 |         return load_static_monks(encode_labels, include_waverers,
19 |                                  is_directed=is_directed)
20 | 
21 | 
22 | def load_dynamic_monks(encode_labels=True, include_waverers=False,
23 |                        is_directed=True):
24 |     module_path = dirname(__file__)
25 | 
26 |     n_time_steps = 3
27 |     Y = np.empty((n_time_steps, 18, 18), dtype=np.float64)
28 | 
29 |     for t in range(n_time_steps):
30 |         Y[t] = np.loadtxt(join(module_path, 'raw_data',
31 |                                'sampson_{}.npy'.format(t)))
32 |     # load groups
33 |     file_name = ('sampson_groups_waverers.txt' if include_waverers else
34 |                  'sampson_groups.txt')
35 | 
36 |     with open(join(module_path, 'raw_data', file_name)) as f:
37 |         groups = np.array([l.rstrip('\n') for l in f.readlines()])
38 | 
39 |     if encode_labels:
40 |         groups = LabelEncoder().fit_transform(groups)
41 | 
42 |     with open(join(module_path, 'raw_data', 'sampson_names.txt')) as f:
43 |         names = np.array([l.rstrip('\n') for l in f.readlines()])
44 | 
45 |     if not is_directed:
46 |         Y += Y.transpose((0, 2, 1))
47 |         Y = (Y > 0).astype(np.float64)
48 | 
49 |     return Y, np.repeat(groups.reshape(1, -1), n_time_steps, axis=0), names
50 | 
51 | 
52 | def load_static_monks(encode_labels=True, include_waverers=False,
53 |                       is_directed=True):
54 |     module_path = dirname(__file__)
55 | 
56 |     Y = np.loadtxt(join(module_path, 'raw_data', 'sampson.npy'))
57 | 
58 |     # load groups
59 |     file_name = ('sampson_groups_waverers.txt' if include_waverers else
60 |                  'sampson_groups.txt')
61 |     with open(join(module_path, 'raw_data', file_name)) as f:
62 |         groups = np.array([l.rstrip('\n') for l in f.readlines()])
63 | 
64 |     if encode_labels:
65 |         groups = LabelEncoder().fit_transform(groups)
66 | 
67 |     if not is_directed:
68 |         Y += Y.transpose((0, 2, 1))
69 |         Y = (Y > 0).astype(np.float64)
70 | 
71 |     return Y, groups
72 | 


--------------------------------------------------------------------------------
/dynetlsm/datasets/raw_data/got/got-s5-node.csv:
--------------------------------------------------------------------------------
  1 | Id,Label
  2 | AEGON,Aegon
  3 | AERYS,Aerys
  4 | ALLISER_THORNE,Alliser
  5 | AREO,Areo
  6 | ARYA,Arya
  7 | BARRISTAN,Barristan
  8 | BLACK_HAIRED_PROSTITUTE,Strumpet
  9 | BRAN,Bran
 10 | BRAND,Brand
 11 | BRIAN,Brian
 12 | BRIENNE,Brienne
 13 | BRONN,Bronn
 14 | CATELYN,Catelyn
 15 | CERSEI,Cersei
 16 | CERWYN,Cerwyn
 17 | DAARIO,Daario
 18 | DAENERYS,Daenerys
 19 | DAVOS,Davos
 20 | DENYS,Denys
 21 | DERRYK,Derryk
 22 | DORAN,Doran
 23 | DORNISH_RIDER,Dornish Rider
 24 | EDDISON_TOLLETT,Eddison
 25 | ELLARIA,Ellaria
 26 | FALYSE,Falyse
 27 | GILLY,Gilly
 28 | GRENN,Grenn
 29 | GREY_WORM,Grey Worm
 30 | HIGH_SEPTON,High Septon
 31 | HIGH_SPARROW,High Sparrow
 32 | HIZDAHR,Hizdahr
 33 | JAIME,Jaime
 34 | JANOS,Janos
 35 | JAQEN,Jaqen
 36 | JEOR,Jeor
 37 | JOFFREY,Joffrey
 38 | JON,Jon
 39 | JORAH,Jorah
 40 | KARSI,Karsi
 41 | KEVAN,Kevan
 42 | LANCEL,Lancel
 43 | LITTLE_SAM,Little Sam
 44 | LITTLEFINGER,Petyr
 45 | LOBODA,Loboda
 46 | LOLLYS,Lollys
 47 | LORAS,Loras
 48 | LORD_OF_BONES,Lord of Bones
 49 | LORD_WEEBLY,Lord Weebly
 50 | LYANNA,Lynanna
 51 | LYANNA_MORMONT,Lyanna Mormont
 52 | LYSA,Lysa
 53 | MACE,Mace
 54 | MADAME,Madame
 55 | MAESTER_AEMON,Aemon
 56 | MAGGY,Maggy
 57 | MAGNAR,Magnar
 58 | MALKO,Malko
 59 | MANCE,Mance
 60 | MARGAERY,Margaery
 61 | MELARA,Melara
 62 | MELISANDRE,Melisandre
 63 | MERYN_TRANT,Meryn
 64 | MISSANDEI,Missandei
 65 | MOSSADOR,Mossador
 66 | MOUNTAIN,Mountain
 67 | MYRANDA,Myranda
 68 | MYRCELLA,Myrcella
 69 | NED,Ned
 70 | NIGHT_KING,Night King
 71 | NYMERIA,Nymeria
 72 | OBARA,Obara
 73 | OBERYN,Oberyn
 74 | OLENNA,Olenna
 75 | OLLY,Olly
 76 | OLYVAR,Olyvar
 77 | OWNER,Owner
 78 | PODRICK,Podrick
 79 | PYCELLE,Pycelle
 80 | PYP,Pyp
 81 | QUICK,Quick
 82 | QYBURN,Qyburn
 83 | RAMSAY,Ramsay
 84 | RANDYLL,Randyll
 85 | RENLY,Renly
 86 | RHAEGAR,Rhaegar
 87 | RHAENYRA,Rhaenyra
 88 | RICKON,Rickon
 89 | ROBB,Robb
 90 | ROBERT,Robert
 91 | ROBIN,Robin
 92 | ROOSE_BOLTON,Roose
 93 | ROYCE,Royce
 94 | SAM,Sam
 95 | SANSA,Sansa
 96 | SELWYN,Selwyn
 97 | SELYSE,Selyse
 98 | SEPTA_UNELLA,Septa Unella
 99 | SHAE,Shae
100 | SHIREEN,Shireen
101 | STANNIS,Stannis
102 | STEFFON,Steffon
103 | STRONG,Strong
104 | TANDA,Tanda
105 | THEON,Theon
106 | THIN_MAN,Thin Man
107 | TOMMEN,Tommen
108 | TORMUND,Tormund
109 | TRYSTANE,Trystane
110 | TYCHO,Tycho
111 | TYENE,Tyene
112 | TYRION,Tyrion
113 | TYWIN,Tywin
114 | VARYS,Varys
115 | WAIF,Waif
116 | WALDA,Walda
117 | WALDER,Walder
118 | WOLKAN,Wolkan
119 | OTHELL_YARWYCK,Othell
120 | YOHN_ROYCE,Yohn


--------------------------------------------------------------------------------
/dynetlsm/datasets/raw_data/got/got-s3-nodes.csv:
--------------------------------------------------------------------------------
  1 | Id,Label
  2 | AEGON,Aegon
  3 | AERYS,Aerys
  4 | ANGUY,Anguy
  5 | ARYA,Arya
  6 | LITTLE_SAM,Little Sam
  7 | BALERION,Balerion
  8 | BALON,Balon
  9 | BARRISTAN,Barristan
 10 | BERIC,Beric
 11 | BERT,Bert
 12 | BLACK_WALDER,Black Walder
 13 | BOROS,Boros
 14 | BRAN,Bran
 15 | BRANDON_STARK,Brandon
 16 | BRIENNE,Brienne
 17 | BRONN,Bronn
 18 | BRYNDEN,Brynden
 19 | CATELYN,Catelyn
 20 | CERSEI,Cersei
 21 | CRASTER,Craster
 22 | DAARIO,Daario
 23 | DAENERYS,Daenerys
 24 | DAREON,Dareon
 25 | DAVOS,Davos
 26 | DESMOND,Desmond
 27 | DROGO,Drogo
 28 | EDDISON_TOLLETT,Eddison
 29 | EDMURE,Edmure
 30 | ELDRICK,Eldrick
 31 | FARMER,Farmer
 32 | FREY_SEPTON,Frey Septon
 33 | GENDRY,Gendry
 34 | GILLY,Gilly
 35 | GREIZHEN,Greizhen
 36 | GRENN,Grenn
 37 | GREY_WORM,Grey Worm
 38 | HIGH_SEPTON,High Septon
 39 | HOBB,Hobb
 40 | HODOR,Hodor
 41 | HOSTER,Hoster
 42 | HOT_PIE,Hot Pie
 43 | HOUND,Sandor
 44 | ILYN_PAYNE,Ilyn
 45 | JAIME,Jaime
 46 | JEOR,Jeor
 47 | JOANNA,Joanna
 48 | JOFFREY,Joffrey
 49 | JOJEN,Jojen
 50 | JON,Jon
 51 | JORAH,Jorah
 52 | JOYEUSE,Joyeuse
 53 | KARL_TANNER,Karl
 54 | KRAZNYS_MO_NAKLOZ,Kraznys
 55 | LITTLEFINGER,Petyr
 56 | LOCKE,Locke
 57 | LORAS,Loras
 58 | LORD_OF_BONES,Lord of Bones
 59 | LOTHAR,Lothar
 60 | LYSA,Lysa
 61 | MACE,Mace
 62 | MAESTER_AEMON,Aemon
 63 | MAESTER_LUWIN,Luwin
 64 | MANCE,Mance
 65 | MAREI,Marei
 66 | MARGAERY,Margaery
 67 | MARTYN,Martyn
 68 | MATTHOS,Matthos
 69 | MEERA,Meera
 70 | MELISANDRE,Melisandre
 71 | MERO,Mero
 72 | MERRY,Merry
 73 | MERYN_TRANT,Meryn
 74 | MISSANDEI,Missandei
 75 | MOUNTAIN,Gregor
 76 | MYCAH,Mycah
 77 | MYRANDA,Myranda
 78 | NED,Ned
 79 | OLENNA,Olenna
 80 | OLYVAR,Olyvar
 81 | ORELL,Orell
 82 | OSHA,Osha
 83 | PODRICK,Podrick
 84 | PRENDAHL,Prendahl
 85 | PYCELLE,Pycelle
 86 | PYP,Pyp
 87 | QHORIN_HALFHAND,Qhorin
 88 | QYBURN,Qyburn
 89 | RADZAI_MO_ERAZ,Radzai
 90 | RAMSAY,Ramsay
 91 | RANDYLL,Randyll
 92 | RAST,Rast
 93 | LORD_OF_BONES,Lord of Bones
 94 | RENLY,Renly
 95 | RICKARD_KARSTARK,Rickard Karstark
 96 | RICKON,Rickon
 97 | ROBB,Robb
 98 | ROBERT,Robert
 99 | ROOSE_BOLTON,Roose
100 | ROS,Ros
101 | ROSLIN,Roslin
102 | RYLENE,Rylene
103 | SALLADHOR,Salladhor
104 | SAM,Sam
105 | SANSA,Sansa
106 | SELWYN,Selwyn
107 | SELYSE,Selyse
108 | SHAE,Shae
109 | SHIREEN,Shireen
110 | SORCERER,Sorcerer
111 | STANNIS,Stannis
112 | STEELSHANKS_WALTON,Steelshanks Walton
113 | TALISA,Talisa
114 | TARYN_MANT,Taryn
115 | THEON,Theon
116 | THOROS,Thoros
117 | TORMUND,Tormund
118 | TORTURER,Torturer
119 | TYRION,Tyrion
120 | TYWIN,Tywin
121 | VARYS,Varys
122 | VIOLET,Violet
123 | WALDER,Walder
124 | WILLEM_LANNISTER,Willem
125 | YARA,Yara
126 | YGRITTE,Ygritte


--------------------------------------------------------------------------------
/dynetlsm/datasets/raw_data/got/got-s1-nodes.csv:
--------------------------------------------------------------------------------
  1 | Id,Label
  2 | ADDAM_MARBRAND,Addam
  3 | AEGON,Aegon
  4 | AERYS,Aerys
  5 | ALLISER_THORNE,Allister
  6 | ARYA,Arya
  7 | ASSASSIN,Assassin
  8 | BAELOR,Baelor
  9 | BALON,Balon
 10 | BARRISTAN,Barristan
 11 | BENJEN,Benjen
 12 | BERIC,Beric
 13 | BORCAS,Borcas
 14 | BOWEN_MARSH,Bowen
 15 | BRAN,Bran
 16 | BRANDON_STARK,Brandon
 17 | BRONN,Bronn
 18 | CATELYN,Catelyn
 19 | CERSEI,Cersei
 20 | COHOLLO,Cohollo
 21 | DAENERYS,Daenerys
 22 | DAREON,Dareon
 23 | DOREAH,Doreah
 24 | DROGO,Drogo
 25 | GALBART_GLOVER,Galbart
 26 | GARED,Gared
 27 | GENDRY,Gendry
 28 | GREATJON_UMBER,Greatjon
 29 | GRENN,Grenn
 30 | HIGH_SEPTON,High Septon
 31 | HODOR,Hodor
 32 | HOSTER,Hoster
 33 | HOT_PIE,Hot Pie
 34 | HOUND,Sandor
 35 | HUGH_OF_THE_VALE,Hugh of the Vale
 36 | ILLYRIO,Illyrio
 37 | ILYN_PAYNE,Ilyn
 38 | IROGENIA,Irogenia
 39 | IRRI,Irri
 40 | JAIME,Jaime
 41 | JANOS,Janos
 42 | JAREMY_RYKKER,Jaremy
 43 | JEOR,Jeor
 44 | JHIQUI,Jhiqui
 45 | JOANNA,Joanna
 46 | JOFFREY,Joffrey
 47 | JON,Jon
 48 | JON_ARRYN,Jon Arryn
 49 | JONOS_BRACKEN,Jonos
 50 | JORAH,Jorah
 51 | JORY_CASSEL,Jory
 52 | JOYEUSE,Joyeuse
 53 | KEVAN,Kevan
 54 | LANCEL,Lancel
 55 | LEO_LEFFORD,Leo
 56 | LITTLE_BIRD,Little Bird
 57 | LITTLEFINGER,Petyr
 58 | LOMMY_GREENHANDS,Lommy
 59 | LORAS,Loras
 60 | LUKE,Luke
 61 | LYANNA,Lyanna
 62 | LYSA,Lysa
 63 | MACE,Mac
 64 | MAESTER_AEMON,Aemon
 65 | MAESTER_LUWIN,Luwin
 66 | MAGO,Mago
 67 | MARILLION,Marillion
 68 | MASHA_HEDDLE,Masha
 69 | MELESSA,Melessa
 70 | MERYN_TRANT,Meryn
 71 | MHAEGEN,Mhaegen
 72 | MIRRI_MAZ_DUUR,Mirri Maz Dur
 73 | MORD,Mord
 74 | MOUNTAIN,Gregor
 75 | MYCAH,Mycah
 76 | MYRCELLA,Myrcella
 77 | NED,Ned
 78 | OLD_NAN,Old Nan
 79 | OSHA,Osha
 80 | OTHELL_YARWYCK,Othell
 81 | OTHOR,Othor
 82 | PYCELLE,Pycelle
 83 | PYP,Pyp
 84 | QOTHO,Qotho
 85 | RAKHARO,Rakharo
 86 | RANDYLL,Randyll
 87 | RAST,Rast
 88 | RENLY,Renly
 89 | RHAEGAR,Rhaegar
 90 | RHAEGO,Rhaego
 91 | RICKARD_KARSTARK,Rickard Karstark
 92 | RICKARD_STARK,Rickard Stark
 93 | RICKON,Rickon
 94 | ROBB,Robb
 95 | ROBERT,Robert
 96 | ROBIN,Robin
 97 | RODRIK,Rodrik
 98 | ROOSE_BOLTON,Roose
 99 | ROS,Ros
100 | ROYCE,Royce
101 | RYGER_RIVERS,Ryger
102 | SAM,Sam
103 | SANSA,Sansa
104 | SEPTA_MORDANE,Septa Mordane
105 | SHAE,Shae
106 | SHAGGA,Shagga
107 | STABLE_BOY,Stable Boy
108 | STANNIS,Stannis
109 | STEFFON,Steffon
110 | STEVRON_FREY,Stevron
111 | SYRIO_FOREL,Syrio
112 | THEON,Theon
113 | TOBHO_MOTT,Tobho Mott
114 | TOMARD,Tomard
115 | TOMMEN,Tommen
116 | TYRION,Tyrion
117 | TYSHA,Tysha
118 | TYWIN,Tywin
119 | VARDIS_EGEN,Vardis
120 | VARLY,Varly
121 | VARYS,Varys
122 | VISERYS,Viserys
123 | WALDER,Walder
124 | WAYMAR_ROYCE,Waymar
125 | WILL,Will
126 | WINE_MERCHANT,Wine Merchant
127 | YOREN,Yoren


--------------------------------------------------------------------------------
/dynetlsm/datasets/raw_data/got/got-s2-nodes.csv:
--------------------------------------------------------------------------------
  1 | Id,Label
  2 | AEGON,Aegon
  3 | AERYS,Aerys
  4 | ALTON,Alton
  5 | AMORY,Amory
  6 | ARYA,Arya
  7 | BALON,Balon
  8 | BARRA,Barra
  9 | BENJEN,Benjen
 10 | BILLY,Billy
 11 | BITER,Biter
 12 | BLACK_LORREN,Black Lorren
 13 | BOROS,Boros
 14 | BRAN,Bran
 15 | BRIENNE,Brienne
 16 | BRONN,Bronn
 17 | CAPTAINS_DAUGHTER,Captain's Daughter
 18 | CATELYN,Catelyn
 19 | CERSEI,Cersei
 20 | COLEN,Colen
 21 | CRASTER,Craster
 22 | CRESSEN,Cressen
 23 | DAENERYS,Daenerys
 24 | DAGMER,Dagmer
 25 | DAISY,Daisy
 26 | DAVOS,Davos
 27 | DONTOS,Dontos
 28 | DOREAH,Doreah
 29 | DROGO,Drogo
 30 | DROWNED_PRIEST,Drowned Priest
 31 | EDDISON_TOLLETT,Eddison
 32 | FARLEN,Farlen
 33 | FATHER_SEAWORTH,Father Seaworth
 34 | FREY_DAUGHTER,Frey Daughter
 35 | GENDRY,Gendry
 36 | GERARD,Gerard
 37 | GILLY,Gilly
 38 | GRENN,Grenn
 39 | HARREN,Harren
 40 | HAYLENE,Haylene
 41 | HIGH_SEPTON,High Septon
 42 | HODOR,Hodor
 43 | HOT_PIE,Hot Pie
 44 | HOUND,Sandor
 45 | ILYN_PAYNE,Ilyn
 46 | IRRI,Irri
 47 | JACKS,Jacks
 48 | JAIME,Jaime
 49 | JANOS,Janos
 50 | JAQEN,Jaqen
 51 | JEOR,Jeor
 52 | JOANNA,Joanna
 53 | JOFFREY,Joffrey
 54 | JON,Jon
 55 | JON_ARRYN,Jon Arryn
 56 | JORAH,Jorah
 57 | KEVAN,Kevan
 58 | KOVARRO,Kovarro
 59 | LANCEL,Lancel
 60 | LITTLEFINGER,Petyr
 61 | LOMMY,Lommy
 62 | LORAS,Loras
 63 | LYSA,Lysa
 64 | MAESTER_LUWIN,Luwin
 65 | MALAKKO,Malakko
 66 | MANCE,Mance
 67 | MANDON,Mandon
 68 | MARGAERY,Margaery
 69 | MARYA,Marya
 70 | MATTHOS,Matthos
 71 | MELESSA,Melessa
 72 | MELISANDRE,Melisandre
 73 | MERYN_TRANT,Meryn
 74 | MHAEGEN,Mhaegen
 75 | MOUNTAIN,Gregor
 76 | MYRCELLA,Myrcella
 77 | NED,Ned
 78 | OSHA,Osha
 79 | PODRICK,Podrick
 80 | POLLIVER,Polliver
 81 | PORTAN,Portan
 82 | PROTESTER,Protester
 83 | PYATT_PREE,Pyatt Pree
 84 | PYCELLE,Pycelle
 85 | QHORIN,Quorin
 86 | QUAITHE,Quaithe
 87 | QUENT,Quent
 88 | RAKHARO,Rakharo
 89 | RAMSAY,Ramsay
 90 | RANDYLL,Randyll
 91 | LORD_OF_BONES,Lord of Bones
 92 | REGINALD,Reginald
 93 | RENLY,Renly
 94 | RENNICK,Rennick
 95 | RHAEGO,Rhaego
 96 | RHAENYS,Rhaenys
 97 | RICKARD_KARSTARK,Rickard Karstark
 98 | RICKON,Rickon
 99 | ROBB,Robb
100 | ROBERT,Robert
101 | ROBIN,Robin
102 | RODRIK,Rodrik
103 | ROOSE_BOLTON,Roose
104 | RORGE,Rorge
105 | ROS,Ros
106 | SALLADHOR,Salladhor
107 | SAM,Sam
108 | SANSA,Sansa
109 | SEPTON,Septon
110 | SHAE,Shae
111 | SILK_KING,Silk King
112 | SPICE_KING,Spice King
113 | STANNIS,Stannis
114 | SYRIO_FOREL,Syrio
115 | TALISA,Talisa
116 | THEON,Theon
117 | TICKLER,Tickler
118 | TIMETT,Timett
119 | TOMMEN,Tommen
120 | TORRHEN,Torrhen
121 | TRYSTANE,Trystane
122 | TYRION,Tyrion
123 | TYWIN,Tywin
124 | VARYS,Varys
125 | VISENYA,Visenya
126 | WINTERFELL_SHEPHERD,Shepherd
127 | XARO,Xaro
128 | YARA,Yara
129 | YGRITTE,Ygritte
130 | YOREN,Yoren


--------------------------------------------------------------------------------
/dynetlsm/datasets/raw_data/military_alliances/names.csv:
--------------------------------------------------------------------------------
  1 | name
  2 | United Kingdom
  3 | Hanover
  4 | Bavaria
  5 | Germany
  6 | Baden
  7 | Saxony
  8 | Wuerttemburg
  9 | Hesse Electoral
 10 | Hesse Grand Ducal
 11 | Mecklenburg Schwerin
 12 | Austria-Hungary
 13 | Netherlands
 14 | France
 15 | Belgium
 16 | Italy
 17 | Russia
 18 | Spain
 19 | Paraguay
 20 | Brazil
 21 | Ecuador
 22 | Peru
 23 | Colombia
 24 | Argentina
 25 | Bolivia
 26 | Korea
 27 | Yugoslavia
 28 | Guatemala
 29 | Honduras
 30 | El Salvador
 31 | Nicaragua
 32 | United States of America
 33 | Greece
 34 | Bulgaria
 35 | Romania
 36 | Czechoslovakia
 37 | Turkey
 38 | Poland
 39 | Iran
 40 | Austria
 41 | Estonia
 42 | Hungary
 43 | Saudi Arabia
 44 | Latvia
 45 | Iraq
 46 | Cuba
 47 | Haiti
 48 | Dominican Republic
 49 | Mexico
 50 | Costa Rica
 51 | Panama
 52 | Venezuela
 53 | Chile
 54 | Japan
 55 | Australia
 56 | Mauritania
 57 | Somalia
 58 | Djibouti
 59 | Morocco
 60 | Algeria
 61 | Tunisia
 62 | Libya
 63 | Sudan
 64 | Egypt
 65 | Syria
 66 | Lebanon
 67 | Jordan
 68 | Yemen Arab Republic
 69 | Yemen
 70 | Yemen People's Republic
 71 | Kuwait
 72 | Bahrain
 73 | Qatar
 74 | United Arab Emirates
 75 | Albania
 76 | Canada
 77 | Bahamas
 78 | Jamaica
 79 | Trinidad and Tobago
 80 | Barbados
 81 | Dominica
 82 | Grenada
 83 | St. Lucia
 84 | St. Vincent and the Grenadines
 85 | Antigua & Barbuda
 86 | St. Kitts and Nevis
 87 | Belize
 88 | Guyana
 89 | Suriname
 90 | Luxembourg
 91 | Portugal
 92 | German Federal Republic
 93 | Czech Republic
 94 | Norway
 95 | Denmark
 96 | Iceland
 97 | Pakistan
 98 | Thailand
 99 | Philippines
100 | German Democratic Republic
101 | China
102 | Central African Republic
103 | Chad
104 | Cyprus
105 | Afghanistan
106 | Mali
107 | Guinea
108 | Senegal
109 | Benin
110 | Niger
111 | Ivory Coast
112 | Burkina Faso
113 | Togo
114 | Cameroon
115 | Gabon
116 | Congo
117 | Rwanda
118 | India
119 | Myanmar
120 | Cambodia
121 | Vietnam
122 | Kenya
123 | Democratic Republic of the Congo
124 | Burundi
125 | Uganda
126 | Cape Verde
127 | Guinea-Bissau
128 | Gambia
129 | Liberia
130 | Sierra Leone
131 | Ghana
132 | Angola
133 | Ethiopia
134 | South Africa
135 | Mozambique
136 | Malta
137 | North Korea
138 | Moldova
139 | Ukraine
140 | Belarus
141 | Armenia
142 | Georgia
143 | Azerbaijan
144 | Turkmenistan
145 | Tajikistan
146 | Kyrgyzstan
147 | Uzbekistan
148 | Croatia
149 | Slovakia
150 | Lithuania
151 | Indonesia
152 | Kazakhstan
153 | Zimbabwe
154 | Sao Tome and Principe
155 | Equatorial Guinea
156 | Tanzania
157 | Zambia
158 | Sweden
159 | Two Sicilies
160 | Tuscany
161 | Modena
162 | Parma
163 | Uruguay
164 | Finland
165 | Mongolia
166 | New Zealand
167 | Oman
168 | South Korea
169 | Taiwan
170 | Israel
171 | Malaysia
172 | Madagascar
173 | Republic of Vietnam
174 | Mauritius
175 | Bangladesh
176 | Nigeria
177 | Swaziland
178 | Bosnia and Herzegovina
179 | Namibia
180 | Eritrea
181 | South Sudan
182 | 


--------------------------------------------------------------------------------
/dynetlsm/label_utils.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | import scipy.cluster.hierarchy as hc
 3 | 
 4 | from scipy.spatial.distance import squareform
 5 | 
 6 | from .model_selection.approx_bic import calculate_cluster_counts
 7 | from .model_selection.approx_bic import calculate_cluster_counts_t
 8 | 
 9 | 
10 | def renormalize_weights(model, sample_id):
11 |     # re-normalize weights
12 |     active_groups = np.unique(model.zs_[sample_id].ravel())
13 |     active_mask = np.in1d(np.arange(model.n_components), active_groups)
14 |     n_groups = active_groups.shape[0]
15 | 
16 |     beta = model.betas_[sample_id, active_groups]
17 |     beta /= beta.sum()
18 | 
19 |     weights = model.weights_[sample_id]
20 |     init_w = weights[0, 0, active_groups]
21 |     init_w /= init_w.sum()
22 | 
23 |     n_time_steps, n_nodes, _ = model.Y_fit_.shape
24 |     trans_w = np.zeros((n_time_steps, n_groups, n_groups), dtype=np.float64)
25 |     for t in range(1, n_time_steps):
26 |         trans_w[t] = weights[t, active_groups][:, active_groups]
27 |         trans_w[t] /= np.sum(trans_w[t], axis=1).reshape(-1, 1)
28 | 
29 |     # return_inverse relabels z to start at zero
30 |     _, temp_z = np.unique(model.zs_[sample_id].ravel(), return_inverse=True)
31 |     z = temp_z.reshape(n_time_steps, n_nodes)
32 | 
33 |     # relabel mu and sigma as well
34 |     mu = model.mus_[sample_id, active_groups]
35 |     sigma = model.sigmas_[sample_id, active_groups]
36 | 
37 |     return z, beta, init_w, trans_w, mu, sigma
38 | 
39 | 
40 | def calculate_cooccurrence_matrix(z, n_groups=None):
41 |     if n_groups is None:
42 |         n_groups = np.unqiue(z).shape[0]
43 | 
44 |     # dummy encode group membership
45 |     indicator = np.eye(n_groups)[z]
46 | 
47 |     return np.dot(indicator, indicator.T)
48 | 
49 | 
50 | def calculate_posterior_cooccurrence(model, t=0):
51 |     # determine burn in samples
52 |     n_burn = model.n_burn_
53 | 
54 |     n_nodes = model.Y_fit_.shape[1]
55 |     cooccurrence_proba = np.zeros((n_nodes, n_nodes))
56 |     n_iter = 0
57 |     for z in model.zs_[n_burn:, t]:
58 |         n_iter += 1
59 |         cooccurrence_proba += calculate_cooccurrence_matrix(
60 |                                 z, n_groups=model.n_components)
61 | 
62 |     return cooccurrence_proba / n_iter
63 | 
64 | 
65 | def cluster_posterior_coocurrence(model, t=0, threshold=0.5):
66 |     cooccurence_proba = model.cooccurrence_probas_[t]
67 | 
68 |     # hierarchical clustering with average linkage
69 |     linkage = hc.linkage(squareform(1. - cooccurence_proba), method='average',
70 |                          optimal_ordering=True)
71 | 
72 |     return hc.fcluster(linkage, t=threshold, criterion='distance') - 1
73 | 
74 | 
75 | def calculate_posterior_group_counts(model, t=0):
76 |     counts = calculate_cluster_counts_t(model)[t]
77 | 
78 |     freq = np.bincount(counts)
79 |     index = np.where(freq != 0)[0]
80 |     freq = freq[index]
81 | 
82 |     return index, freq
83 | 


--------------------------------------------------------------------------------
/dynetlsm/network_likelihoods.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | 
 3 | from .latent_space import calculate_distances
 4 | from .gaussian_likelihood_fast import compute_gaussian_likelihood
 5 | from .array_utils import triu_indices_from_3d, nondiag_indices_from_3d
 6 | from .directed_likelihoods_fast import (
 7 |     directed_network_loglikelihood_fast,
 8 |     directed_partial_loglikelihood, directed_intercept_grad,
 9 |     directed_network_probas,
10 |     approx_directed_partial_loglikelihood,
11 |     approx_directed_network_loglikelihood)
12 | from .static_network_fast import partial_loglikelihood
13 | 
14 | 
15 | # TODO: mask nan entries
16 | def dynamic_network_loglikelihood_directed(Y, X,
17 |                                            intercept_in, intercept_out, radii,
18 |                                            squared=False, dist=None):
19 |     dist = calculate_distances(X, squared=squared) if dist is None else dist
20 | 
21 |     return directed_network_loglikelihood_fast(Y, dist, radii,
22 |                                                intercept_in, intercept_out)
23 | 
24 | 
25 | # TODO: mask nan entries
26 | def dynamic_network_loglikelihood_undirected(Y, X, intercept, squared=False,
27 |                                              dist=None):
28 |     dist = calculate_distances(X, squared=squared) if dist is None else dist
29 | 
30 |     triu_indices = triu_indices_from_3d(dist, k=1)
31 |     eta = intercept - dist[triu_indices]
32 | 
33 |     return np.sum(Y[triu_indices] * eta - np.log(1 + np.exp(eta)))
34 | 
35 | 
36 | def dynamic_network_loglikelihood(model, sample_id, dist=None):
37 |     X = model.Xs_[sample_id]
38 |     intercept = model.intercepts_[sample_id]
39 |     radii = model.radiis_[sample_id] if model.is_directed else None
40 |     if dist is None:
41 |         dist = (None if model.case_control_sampler_ else
42 |                 calculate_distances(X, squared=False))
43 | 
44 |     if model.is_directed:
45 |         if model.case_control_sampler_ is not None:
46 |             loglik = approx_directed_network_loglikelihood(
47 |                 X,
48 |                 radii=radii,
49 |                 in_edges=model.case_control_sampler_.in_edges_,
50 |                 out_edges=model.case_control_sampler_.out_edges_,
51 |                 degree=model.case_control_sampler_.degrees_,
52 |                 control_nodes=model.case_control_sampler_.control_nodes_out_,
53 |                 intercept_in=intercept[0],
54 |                 intercept_out=intercept[1],
55 |                 squared=False)
56 |         else:
57 |             loglik = dynamic_network_loglikelihood_directed(
58 |                 model.Y_fit_, X,
59 |                 intercept_in=intercept[0],
60 |                 intercept_out=intercept[1],
61 |                 radii=radii, dist=dist)
62 |     else:
63 |         loglik = dynamic_network_loglikelihood_undirected(
64 |             model.Y_fit_, X, intercept, dist=dist)
65 | 
66 |     return loglik
67 | 


--------------------------------------------------------------------------------
/dynetlsm/static_network_fast.pyx:
--------------------------------------------------------------------------------
 1 | # encoding: utf-8
 2 | # cython: language_level=3
 3 | # cython: cdivision=True
 4 | # cython: boundscheck=False
 5 | # cython: wraparound=False
 6 | 
 7 | from libc.math cimport log, exp, sqrt
 8 | 
 9 | import numpy as np
10 | cimport numpy as np
11 | 
12 | 
13 | ctypedef np.npy_float64 DOUBLE
14 | ctypedef np.npy_int64 INT
15 | 
16 | 
17 | def partial_loglikelihood(DOUBLE[:, :] Y,
18 |                           DOUBLE[:, :] X,
19 |                           double intercept,
20 |                           int node_id,
21 |                           bint squared=False):
22 |     cdef int i, d = 0
23 |     cdef int n_nodes = Y.shape[0]
24 |     cdef int n_features = X.shape[1]
25 |     cdef double dist = 0
26 |     cdef double eta = 0
27 |     cdef double loglik  = 0
28 | 
29 |     for i in range(n_nodes):
30 |         dist = 0
31 |         eta = 0
32 |         if i != node_id:
33 |             for d in range(n_features):
34 |                 dist += (X[i, d] - X[node_id, d]) ** 2
35 |             if squared:
36 |                 eta = intercept - dist
37 |             else:
38 |                 eta = intercept - sqrt(dist)
39 | 
40 |             # in-case the network is undirected
41 |             loglik += Y[node_id, i] * eta
42 |             loglik -= log(1 + exp(eta))
43 | 
44 |     return loglik
45 | 
46 | 
47 | def approx_partial_loglikelihood(DOUBLE[:, :] X,
48 |                                  double intercept,
49 |                                  INT[:, :] edges,
50 |                                  INT[:] degrees,
51 |                                  INT[:, :] control_nodes,
52 |                                  int node_id,
53 |                                  bint squared=False):
54 |     cdef int j, d = 0
55 |     cdef int n_nodes = X.shape[0]
56 |     cdef int n_features = X.shape[1]
57 |     cdef int n_control = control_nodes.shape[1]
58 |     cdef int node_degree = degrees[node_id]
59 |     cdef double dist = 0
60 |     cdef double eta = 0
61 |     cdef double loglik  = 0
62 |     cdef double control = 0
63 |     cdef double control_adj = (<double> (n_nodes - 1) / <double> n_control)
64 | 
65 |     # edges
66 |     for j in range(node_degree):
67 |         dist = 0
68 |         eta = 0
69 |         for d in range(n_features):
70 |             dist += (X[edges[node_id, j], d] - X[node_id, d]) ** 2
71 |         if squared:
72 |             eta = intercept - dist
73 |         else:
74 |             eta = intercept - sqrt(dist)
75 | 
76 |         loglik += eta
77 | 
78 |     # control estimate
79 |     for j in range(n_control):
80 |         dist = 0
81 |         eta = 0
82 |         for d in range(n_features):
83 |             dist += (X[control_nodes[node_id, j], d] - X[node_id, d]) ** 2
84 |         if squared:
85 |             eta = intercept - dist
86 |         else:
87 |             eta = intercept - sqrt(dist)
88 | 
89 |         control += log(1 + exp(eta))
90 | 
91 |     # add control estimate
92 |     loglik -= control_adj * control
93 | 
94 |     return loglik
95 | 


--------------------------------------------------------------------------------
/examples/homogeneous_dynsbm.R:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env Rscript
 2 | library(reticulate)
 3 | library(dynsbm)
 4 | library(matrixStats)
 5 | 
 6 | # Set the path to the Python executable file
 7 | use_python("~/.virtualenv/stat/bin/python", required = T)
 8 | dynetlsm <- import("dynetlsm")
 9 | sklearn <- import("sklearn")
10 | 
11 | 
12 | # choose between easy and hard
13 | sim_type <- 'hard'
14 | # sim_type <- 'easy'
15 | out_dir <- paste0('results_dynsbm_', sim_type)
16 | if (!dir.exists(out_dir)) {
17 |     dir.create(out_dir)
18 | }
19 | 
20 | compute.icl <- function(dynsbm){
21 |     T <- ncol(dynsbm$membership)
22 |     Q <- nrow(dynsbm$trans)
23 |     N <- nrow(dynsbm$membership)
24 |     pen <- 0.5*Q*log(N*(N-1)*T/2) + 0.25*Q*(Q-1)*T*log(N*(N-1)/2) # binary case
25 |     if ("sigma" %in% names(dynsbm)) pen <- 2*pen # continuous case
26 |     return(dynsbm$loglikelihood - ifelse(T>1,0.5*Q*(Q-1)*log(N*(T-1)),0) - pen)
27 | }
28 | 
29 | 
30 | for (seed in 0:49) {
31 |     print(seed)
32 |     res <- dynetlsm$datasets$homogeneous_simulation(
33 |         n_time_steps=6L, n_nodes=120L, random_state=as.integer(seed),
34 |         simulation_type = sim_type)
35 |     Y <- res[[1]]
36 |     z <- res[[3]]
37 |     sim_res <- list()
38 | 
39 |     models <- select.dynsbm(Y, Qmin=1, Qmax=8, edge.type="binary",
40 |                             nstart=10, fixed.param = FALSE, nb.cores = 8)
41 | 
42 | # num of clusters maximizing ICL
43 |     icls <- sapply(models, compute.icl)
44 |     sim_res['num_clusters'] <- which.max(icls)
45 | 
46 | # estimates based on the true number of clusters (G = 6)
47 |     sbm <- models[[6]]
48 |     sim_res['rand_index'] <- sklearn$metrics$adjusted_rand_score(
49 |         as.vector(t(z)), as.vector(sbm$membership))
50 |     sim_res['vi'] <- dynetlsm$metrics$variation_of_information(
51 |         as.vector(t(z)), as.vector(sbm$membership))
52 | 
53 | # average statistics
54 |     avg_rand <- 0
55 |     avg_vi <- 0
56 |     for (t in 1:dim(Y)[1]) {
57 |         avg_rand <- avg_rand + sklearn$metrics$adjusted_rand_score(z[t,], sbm$membership[,t])
58 |         avg_vi <- avg_vi + dynetlsm$metrics$variation_of_information(z[t,], sbm$membership[,t])
59 |     }
60 |     sim_res['avg_rand'] <- avg_rand / dim(Y)[1]
61 |     sim_res['avg_vi'] <- avg_vi / dim(Y)[1]
62 | 
63 | # in-sample AUC
64 |     probas <- array(0, dim=dim(Y))
65 |     for (t in 1:dim(Y)[1]) {
66 |         b <- sbm$beta[t,,]
67 |         m <- sbm$membership[,t]
68 |         Z <- array(0, dim=c(length(m), 6))
69 |         for (i in 1:length(m)) {
70 |             if (m[i] == 0) {
71 |                 Z[i,] <- diag(6)[1,]
72 |             }
73 |             else{
74 |                 Z[i,] <- diag(6)[m[i],]
75 |             }
76 |         }
77 |         probas[t,,] <- Z %*% b %*% t(Z)
78 |     }
79 |     sim_res['insample_auc'] <- dynetlsm$metrics$network_auc(Y, probas)
80 | 
81 |     df <- as.data.frame(sim_res)
82 |     file_name <- file.path(out_dir, paste0('benchmark_', seed, '.csv'))
83 |     write.csv(df, file_name, row.names = FALSE)
84 | }
85 | 


--------------------------------------------------------------------------------
/examples/homogeneous_sbm.R:
--------------------------------------------------------------------------------
 1 | library(reticulate)
 2 | library(blockmodels)
 3 | library(label.switching)
 4 | library(zeallot)
 5 | 
 6 | # Set the path to the Python executable file
 7 | use_python("~/.virtualenv/stat/bin/python", required = T)
 8 | 
 9 | #source_python('~/myworkspace/dynetlsm/examples/test.py')
10 | 
11 | dynetlsm <- import("dynetlsm")
12 | sklearn <- import("sklearn")
13 | 
14 | 
15 | # choose between easy and hard
16 | sim_type <- 'hard'
17 | #sim_type <- 'easy'
18 | out_dir <- paste0('results_sbm_', sim_type)
19 | if (!dir.exists(out_dir)) {
20 |     dir.create(out_dir)
21 | }
22 | 
23 | 
24 | for (seed in 0:49) {
25 |     res <- dynetlsm$datasets$homogeneous_simulation(
26 |         n_time_steps=6L, n_nodes=120L, random_state=as.integer(seed),
27 |          simulation_type = sim_type)
28 |     Y <- res[[1]]
29 |     z <- res[[3]]
30 |     sim_res <- list()
31 | 
32 |     # blockmodels
33 |     n_time_steps <- dim(Y)[[1]]
34 |     n_nodes <- dim(Y)[[2]]
35 |     z_sbm <- matrix(0, nrow=n_time_steps, ncol=n_nodes)
36 |     p_sbm <- array(0, dim = c(n_time_steps, n_nodes, 6))
37 |     probas <- array(0, dim=dim(Y))
38 |     avg_rand <- 0
39 |     avg_vi <- 0
40 |     for (t in 1:n_time_steps) {
41 |         sbm_models <- BM_bernoulli('SBM_sym', Y[t,,], explore_min=8,
42 |                                    exploration_factor=1., ncores=8, verbosity=0)
43 |         sbm_models$estimate()
44 | 
45 |         p_sbm[t,,] <- sbm_models$memberships[[6]]$Z
46 |         z_sbm[t,] <- apply(p_sbm[t,,], 1, which.max)
47 |         sim_res[paste0('num_clusters_', t)] <- which.max(sbm_models$ICL)
48 |         avg_rand <- avg_rand + sklearn$metrics$adjusted_rand_score(as.vector(z[t,]), z_sbm[t,])
49 |         avg_vi <- avg_vi + dynetlsm$metrics$variation_of_information(as.vector(z[t,]), z_sbm[t,])
50 | 
51 |         b <- sbm_models$model_parameters[[6]]$pi
52 |         m <- z_sbm[t,]
53 |         Z <- array(0, dim=c(length(m), 6))
54 |         for (i in 1:length(m)) {
55 |             if (m[i] == 0) {
56 |                 Z[i,] <- diag(6)[1,]
57 |             }
58 |             else{
59 |                 Z[i,] <- diag(6)[m[i],]
60 |             }
61 |         }
62 |         probas[t,,] <- Z %*% b %*% t(Z)
63 |     }
64 |     sim_res['avg_rand'] <- avg_rand / dim(Y)[1]
65 |     sim_res['avg_vi'] <- avg_vi / dim(Y)[1]
66 |     sim_res['insample_auc'] <- dynetlsm$metrics$network_auc(Y, probas)
67 | 
68 |     res <- label.switching("ECR", zpivot=z_sbm[1,], z=z_sbm, K=6)
69 |     perm <- res$permutations$ECR
70 |     for (t in 1:n_time_steps) {
71 |         p_sbm[t,,] <- p_sbm[t,,perm[t,]]
72 |         z_sbm[t,] <- apply(p_sbm[t,,], 1, which.max)
73 |     }
74 |     sim_res['rand_index'] <- sklearn$metrics$adjusted_rand_score(as.vector(t(z)), as.vector(t(z_sbm)))
75 |     sim_res['vi'] <- dynetlsm$metrics$variation_of_information(as.vector(t(z)), as.vector(t(z_sbm)))
76 | 
77 |     df <- as.data.frame(sim_res)
78 |     file_name <- file.path(out_dir, paste0('benchmark_', seed, '.csv'))
79 |     write.csv(df, file_name, row.names = FALSE)
80 | }
81 | 


--------------------------------------------------------------------------------
/dynetlsm/model_selection/posterior_vi.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | 
 3 | from ..network_likelihoods import dynamic_network_loglikelihood
 4 | 
 5 | 
 6 | __all__ = ['posterior_expected_vi', 'time_averaged_posterior_expected_vi',
 7 |            'minimize_posterior_expected_vi']
 8 | 
 9 | 
10 | def nonvectorized_posterior_expected_vi(labels, cooccurrence_proba):
11 |     """non-vectorized expected VI used for testing"""
12 |     vi = 0.
13 |     n_samples = labels.shape[0]
14 |     for i in range(n_samples):
15 |         ind = labels == labels[i]
16 |         vi += np.log2(np.sum(ind))
17 |         vi -= 2 * np.log2(np.sum(ind * cooccurrence_proba[i, :]))
18 |         vi += np.log2(np.sum(cooccurrence_proba[i, :]))
19 | 
20 |     return vi / n_samples
21 | 
22 | 
23 | def posterior_expected_vi(labels, cooccurrence_proba):
24 |     """Lower-bound to the posterior expectation of the VI."""
25 |     vi = 0.
26 | 
27 |     # number of samples and groups
28 |     n_samples = labels.shape[0]
29 |     n_groups = labels.max() + 1
30 | 
31 |     # cluster counts and membership indicators
32 |     resp = np.zeros((n_samples, n_groups))
33 |     resp[np.arange(n_samples), labels] = 1
34 |     nk = np.sum(resp, axis=0)
35 | 
36 |     # VI calculation
37 |     nonzero_mask = nk != 0  # (labels may be non-contiguous)
38 |     vi += np.sum(nk[nonzero_mask] * np.log2(nk[nonzero_mask]))
39 |     vi -= 2 * np.log2(
40 |         np.sum(cooccurrence_proba * resp[:, labels].T,  axis=1)).sum()
41 |     vi += np.log2(np.sum(cooccurrence_proba, axis=1)).sum()
42 | 
43 |     return vi / n_samples
44 | 
45 | 
46 | def time_averaged_posterior_expected_vi(labels, cooccurrence_proba):
47 |     """Lower-bound to the time averaged posterior expected VI."""
48 |     vi = 0.
49 |     n_time_steps = labels.shape[0]
50 |     for t in range(n_time_steps):
51 |         vi += posterior_expected_vi(labels[t], cooccurrence_proba[t])
52 | 
53 |     return vi / n_time_steps
54 | 
55 | 
56 | def minimize_posterior_expected_vi(model):
57 |     # determine how many samples to burn
58 |     n_burn = model.n_burn_
59 | 
60 |     # calculated expected VI for the partitions explored by the markov chain
61 |     n_samples = model.zs_.shape[0]
62 |     sample_ids = np.arange(n_burn, n_samples)
63 |     vis = np.zeros(sample_ids.shape[0])
64 |     for i, idx in enumerate(sample_ids):
65 |         vis[i] = time_averaged_posterior_expected_vi(
66 |             model.zs_[idx], model.cooccurrence_probas_)
67 | 
68 |     # check for ties
69 |     min_ids = np.where(vis == vis.min())[0]
70 |     if min_ids.shape[0] > 1:
71 |         # choose the configuration with the highest logliklihood log(p(Y | X))
72 |         best_id, best_loglik = None, -np.inf
73 |         for min_id in min_ids:
74 |             loglik = dynamic_network_loglikelihood(
75 |                 model, sample_id=sample_ids[min_id])
76 |             if loglik > best_loglik:
77 |                 best_id = sample_ids[min_id]
78 |                 best_loglik = loglik
79 |     else:
80 |         best_id = sample_ids[min_ids[0]]
81 | 
82 |     return best_id
83 | 


--------------------------------------------------------------------------------
/dynetlsm/datasets/detection_limit.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | 
 3 | from functools import lru_cache
 4 | from scipy.special import expit
 5 | from sklearn.utils import check_random_state
 6 | 
 7 | from .samples_generator import network_from_dynamic_latent_space
 8 | 
 9 | 
10 | __all__ = ['make_lookup_table', 'detection_limit_simulation']
11 | 
12 | 
13 | @lru_cache()
14 | def make_lookup_table(
15 |         n_samples=10000, low=0.1, high=2.5, n_bins=100, random_state=42):
16 |     rng = check_random_state(random_state)
17 | 
18 |     sigma = 0.5
19 |     intercept = 1.0
20 |     ratio = np.zeros((n_bins, 4))
21 |     mu = np.linspace(low, high, n_bins)
22 |     for b, m in enumerate(mu):
23 |         mus = m * np.array([[1, 0],
24 |                              [-1, 0]])
25 |         X = np.sqrt(sigma) * rng.randn(n_samples, 8)
26 |         p_in, p_out = 0, 0
27 |         for i in range(n_samples):
28 |             x = X[i, :2] + mus[0]
29 |             y = X[i, 2:4] + mus[0]
30 |             x0 = X[i, 4:6] + mus[0]
31 |             x1 = X[i, 6:] + mus[1]
32 |             p_in += expit(intercept - np.sqrt(((x - x0) ** 2).sum()))
33 |             p_out += expit(intercept - np.sqrt(((y - x1) ** 2).sum()))
34 | 
35 |         ratio[b] = np.array([
36 |             p_out / p_in, m, p_in / n_samples, p_out / n_samples])
37 | 
38 |     return ratio
39 | 
40 | 
41 | def detection_limit_simulation(
42 |         n_nodes=120, n_time_steps=4, trans_proba=0.2, lmbda=0.8, r=0.5,
43 |         random_state=42):
44 |     rng = check_random_state(random_state)
45 | 
46 |     ratio = make_lookup_table()
47 |     idx = np.argmin(np.abs(r - ratio[:, 0]))
48 |     mu = ratio[idx, 1]
49 |     sigma = 0.5
50 |     intercept = 1.0
51 |     mus = mu * np.array([[1, 0],
52 |                          [-1, 0]])
53 |     X, z = [], []
54 |     z0 = rng.choice([0, 1], p=[0.5, 0.5], size=n_nodes)
55 |     X0 = sigma * rng.randn(n_nodes, 2) + mus[z0]
56 |     X.append(X0)
57 |     z.append(z0)
58 | 
59 |     wt = np.array([[1 - trans_proba, trans_proba],
60 |                    [trans_proba, 1 - trans_proba]])
61 |     for t in range(1, n_time_steps):
62 |         zt = np.zeros(n_nodes, dtype=np.int)
63 |         for group_id in range(2):
64 |             group_mask = z[t - 1] == group_id
65 |             zt[group_mask] = rng.choice(np.arange(2), p=wt[group_id, :],
66 |                                         size=np.sum(group_mask))
67 | 
68 |         Xt = np.zeros((n_nodes, 2), dtype=np.float64)
69 |         for group_id in range(2):
70 |             group_mask = zt == group_id
71 |             group_count = np.sum(group_mask)
72 |             Xt[group_mask, :] = (
73 |                 sigma * rng.randn(group_count, 2) + (
74 |                     lmbda * mus[group_id] + (1 - lmbda) * X[t-1][group_mask, :])
75 |             )
76 | 
77 |         X.append(Xt)
78 |         z.append(zt)
79 | 
80 |     X = np.stack(X, axis=0)
81 |     z = np.vstack(z)
82 | 
83 |     Y, probas = network_from_dynamic_latent_space(
84 |         X, intercept=intercept, random_state=rng)
85 | 
86 |     return Y, X, z, probas, ratio[idx, 0], mus
87 | 


--------------------------------------------------------------------------------
/examples/inhomogeneous_sbm.R:
--------------------------------------------------------------------------------
 1 | library(reticulate)
 2 | library(blockmodels)
 3 | library(label.switching)
 4 | library(zeallot)
 5 | 
 6 | # Set the path to the Python executable file
 7 | use_python("~/.virtualenv/stat/bin/python", required = T)
 8 | 
 9 | 
10 | dynetlsm <- import("dynetlsm")
11 | sklearn <- import("sklearn")
12 | 
13 | 
14 | # choose between easy and hard
15 | sim_type <- 'hard'
16 | # sim_type <- 'easy'
17 | out_dir <- paste0('inhomo_results_sbm_', sim_type)
18 | if (!dir.exists(out_dir)) {
19 |     dir.create(out_dir)
20 | }
21 | 
22 | 
23 | for (seed in 0:49) {
24 |     res <- dynetlsm$datasets$inhomogeneous_simulation(
25 |         n_time_steps=9L, n_nodes=120L, random_state=as.integer(seed),
26 |         simulation_type = sim_type)
27 |     Y <- res[[1]]
28 |     Y <- Y[1:9,,]
29 |     z <- res[[3]]
30 |     z <- z[1:9,]
31 |     sim_res <- list()
32 | 
33 |     # blockmodels
34 |     n_time_steps <- dim(Y)[[1]]
35 |     n_nodes <- dim(Y)[[2]]
36 |     z_sbm <- matrix(0, nrow=n_time_steps, ncol=n_nodes)
37 |     p_sbm <- array(0, dim = c(n_time_steps, n_nodes, 6))
38 |     probas <- array(0, dim=dim(Y))
39 |     avg_rand <- 0
40 |     avg_vi <- 0
41 |     n_clusters <- c(2, 2, 2, 6, 6, 6, 4, 4, 4)
42 |     for (t in 1:n_time_steps) {
43 |         sbm_models <- BM_bernoulli('SBM_sym', Y[t,,], explore_min=8,
44 |                                    exploration_factor=1., ncores=8, verbosity=0)
45 |         sbm_models$estimate()
46 | 
47 |         p_sbm[t,,1:n_clusters[t]] <- sbm_models$memberships[[n_clusters[t]]]$Z
48 |         z_sbm[t,] <- apply(p_sbm[t,,], 1, which.max)
49 |         sim_res[paste0('num_clusters_', t)] <- which.max(sbm_models$ICL)
50 |         avg_rand <- avg_rand + sklearn$metrics$adjusted_rand_score(as.vector(z[t,]), z_sbm[t,])
51 |         avg_vi <- avg_vi + dynetlsm$metrics$variation_of_information(as.vector(z[t,]), z_sbm[t,])
52 | 
53 |         b <- sbm_models$model_parameters[[n_clusters[t]]]$pi
54 |         m <- z_sbm[t,]
55 |         Z <- array(0, dim=c(length(m), n_clusters[t]))
56 |         for (i in 1:length(m)) {
57 |             if (m[i] == 0) {
58 |                 Z[i,] <- diag(n_clusters[t])[1,]
59 |             }
60 |             else{
61 |                 Z[i,] <- diag(n_clusters[t])[m[i],]
62 |             }
63 |         }
64 |         probas[t,,] <- Z %*% b %*% t(Z)
65 |     }
66 |     sim_res['avg_rand'] <- avg_rand / dim(Y)[1]
67 |     sim_res['avg_vi'] <- avg_vi / dim(Y)[1]
68 |     sim_res['insample_auc'] <- dynetlsm$metrics$network_auc(Y, probas)
69 | 
70 |     res <- label.switching("ECR", zpivot=z_sbm[1,], z=z_sbm, K=6)
71 |     perm <- res$permutations$ECR
72 |     for (t in 1:n_time_steps) {
73 |         p_sbm[t,,] <- p_sbm[t,,perm[t,]]
74 |         z_sbm[t,] <- apply(p_sbm[t,,], 1, which.max)
75 |     }
76 |     sim_res['rand_index'] <- sklearn$metrics$adjusted_rand_score(as.vector(t(z)), as.vector(t(z_sbm)))
77 |     sim_res['vi'] <- dynetlsm$metrics$variation_of_information(as.vector(t(z)), as.vector(t(z_sbm)))
78 | 
79 |     df <- as.data.frame(sim_res)
80 |     file_name <- file.path(out_dir, paste0('benchmark_', seed, '.csv'))
81 |     write.csv(df, file_name, row.names = FALSE)
82 | }
83 | 


--------------------------------------------------------------------------------
/dynetlsm/datasets/raw_data/got/got-s6-nodes.csv:
--------------------------------------------------------------------------------
  1 | Id,Label
  2 | AERON,Aeron
  3 | AERYS,Aerys
  4 | ALLISER_THORNE,Alliser
  5 | AREO,Areo
  6 | ARTHUR,Arthur
  7 | ARTHUR_CHILD,Arthur (child)
  8 | ARYA,Arya
  9 | BALON,Balon
 10 | BELICHO,Belicho
 11 | BENJEN,Benjen
 12 | BERIC,Beric
 13 | BIANCA,Bianca
 14 | BLACK_WALDER,Black Walder
 15 | BOBONO,Bobono
 16 | BOWEN_MARSH,Bowen
 17 | BRAN,Bran
 18 | BRIENNE,Brienne
 19 | BRONN,Bronn
 20 | BRYNDEN,Brynden
 21 | CAMELLO,Camello
 22 | CATELYN,Catelyn
 23 | CERSEI,Cersei
 24 | CLARENZO,Clarenzo
 25 | DAARIO,Daario
 26 | DAENERYS,Daenerys
 27 | DAVOS,Davos
 28 | DICKON,Dickon
 29 | DIM_DALBA,Dim Dalba
 30 | EDDISON_TOLLETT,Eddison
 31 | DOM,Dom
 32 | DORAN,Doran
 33 | DOSH_KHALEEN_LEADER,Dosh Khaleen Leader
 34 | DROGO,Drogo
 35 | EDMURE,Edmure
 36 | ELLARIA,Ellaria
 37 | EURON,Euron
 38 | FLYNN,Flynn
 39 | GATINS,Gatins
 40 | GILLY,Gilly
 41 | GLOVER,Glover
 42 | GREY_WORM,Grey Worm
 43 | GUARD_CAPTAIN,Guard Captain
 44 | HARALD,Harald
 45 | HIGH_SPARROW,High Sparrow
 46 | HODOR,Hodor
 47 | HOSTER,Hoster
 48 | HOUND,Sandor
 49 | HOWLAND,Howland
 50 | IZEMBARO,Izembaro
 51 | JAIME,Jaime
 52 | JAQEN,Jaqen
 53 | JEOR,Jeor
 54 | JOFFREY,Joffrey
 55 | JON,Jon
 56 | JORAH,Jorah
 57 | KEVAN,Kevan
 58 | KHAL_MORO,Khal Moro
 59 | KINVARA,Kinvara
 60 | KRAZNYS_MO_NAKLOZ,Kraznys
 61 | LADY_CRANE,Lady Crane
 62 | LANCEL,Lancel
 63 | LEAF,Leaf
 64 | LEM,Lem
 65 | LHAZAREEN_WOMAN,Lhazareen Woman
 66 | LITTLE_SAM,Little Sam
 67 | LITTLEFINGER,Petyr
 68 | LORAS,Loras
 69 | LOTHAR,Lothar
 70 | LYANNA,Lyanna
 71 | LYANNA_MORMONT,Lyanna Mormont
 72 | MACE,Mace
 73 | MAESTER_AEMON,Aemon
 74 | MAESTER_CITADEL,Citadel Maester
 75 | MAESTER_DORNE,Dorne Maester
 76 | MAESTER_MORMONT,Mormont Maester
 77 | MAESTER_WOLKAN,Wolkan
 78 | MANCE,Mance
 79 | MANDERLY,Manderly
 80 | MARGAERY,Margaery
 81 | MEERA,Meera
 82 | MELESSA,Melessa
 83 | MELISANDRE,Melisandre
 84 | MINISA,Minisa
 85 | MISSANDEI,Missandei
 86 | MORGAN,Morgan
 87 | MOUNTAIN,Gregor
 88 | MYRANDA,Myranda
 89 | MYRCELLA,Myrcella
 90 | NED,Ned
 91 | NIGHT_KING,Night King
 92 | NYMERIA,Nymeria
 93 | OBARA,Obara
 94 | OBERYN,Oberyn
 95 | OLD_NAN,Old Nan
 96 | OLENNA,Olenna
 97 | OLLY,Olly
 98 | OSHA,Osha
 99 | OTHELL_YARWYCK,Othell
100 | PODRICK,Podrick
101 | PYCELLE,Pycelle
102 | QYBURN,Qyburn
103 | RAMSAY,Ramsay
104 | RANDYLL,Randyll
105 | RAY,Ray
106 | RAZDAL,Razdal
107 | RED_PRIEST,Red Priest
108 | RICKARD_KARSTARK,Rickard Karstark
109 | RICKARD_STARK,Rickard Stark
110 | RICKON,Rickon
111 | ROBB,Robb
112 | ROBERT,Robert
113 | ROBETT,Robett
114 | ROBIN,Robin
115 | RODRIK,Rodrik
116 | ROOSE_BOLTON,Roose
117 | SAM,Sam
118 | SANSA,Sansa
119 | SEPTA_UNELLA,Septa Unella
120 | SHIREEN,Shireen
121 | SMALLJON,Smalljon
122 | SON_OF_EDMURE,Edmure's Son
123 | SON_OFWALDA,Roose's Son
124 | STANNIS,Stannis
125 | TALLA,Talla
126 | THEON,Theon
127 | THOROS,Thoros
128 | THREE_EYED_RAVEN,Three Eyed Raven
129 | TOMMEN,Tommen
130 | TORMUND,Tormund
131 | TRYSTANE,Trystane
132 | TYENE,Tyene
133 | TYRION,Tyrion
134 | TYWIN,Tywin
135 | VALA,Vala
136 | VARYS,Varys
137 | WAIF,Waif
138 | WALDA,Walda
139 | WALDER,Walder
140 | WUN_WUN,Wun Wun
141 | YARA,Yara
142 | YEZZAN,Yezzan
143 | YOHN_ROYCE,Yohn


--------------------------------------------------------------------------------
/dynetlsm/imputer.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | import scipy.stats as stats
 3 | 
 4 | from sklearn.base import BaseEstimator, TransformerMixin
 5 | from sklearn.utils.validation import check_is_fitted, check_array, check_random_state
 6 | 
 7 | 
 8 | from dynetlsm.network_statistics import density
 9 | 
10 | 
11 | class SimpleNetworkImputer(BaseEstimator, TransformerMixin):
12 |     """Impute missing values in the network by the most frequent value over
13 |     all time points and edges.
14 |     """
15 |     def __init__(self, missing_value=-1, strategy='most_frequent',
16 |                  random_state=123, copy=True):
17 |         self.missing_value = missing_value
18 |         self.strategy = strategy
19 |         self.copy = copy
20 |         self.random_state = random_state
21 | 
22 |     def _validate_input(self, Y):
23 |         allowed_strategies = {'most_frequent', 'random'}
24 |         if self.strategy not in allowed_strategies:
25 |             raise ValueError("Can only use these strategies: {0} "
26 |                              " got strategy='{1}".format(allowed_strategies,
27 |                                                          self.strategy))
28 |         Y = check_array(Y, dtype=np.float64,
29 |                         force_all_finite='allow-nan',
30 |                         ensure_2d=False, allow_nd=True, copy=self.copy)
31 | 
32 |         return Y
33 | 
34 |     def fit(self, Y):
35 |         Y = self._validate_input(Y)
36 | 
37 |         # statistics are calculated per time point
38 |         n_time_steps = Y.shape[0]
39 |         self.statistics_ = np.empty(n_time_steps)
40 |         for t in range(n_time_steps):
41 |             nan_mask = Y[t] == self.missing_value
42 |             if not np.any(nan_mask):
43 |                 self.statistics_[t] = 0.0
44 |             else:
45 |                 if self.strategy == 'most_frequent':
46 |                     mode = stats.mode(Y[t][~nan_mask].ravel())
47 |                     self.statistics_[t] = mode[0][0]
48 |                 elif self.strategy == 'random':
49 |                     n_nodes = Y.shape[1]
50 |                     self.statistics_[t] = (
51 |                         Y[t][~nan_mask].sum() / (n_nodes * (n_nodes - 1)))
52 | 
53 |         return self
54 | 
55 |     def transform(self, Y):
56 |         check_is_fitted(self, 'statistics_')
57 | 
58 |         Y = self._validate_input(Y)
59 | 
60 |         if Y.shape[0] != self.statistics_.shape[0]:
61 |             raise ValueError("Y has %d time steps, expected %d"
62 |                              % (Y.shape[0], self.statistics_.shape[0]))
63 | 
64 |         rng = check_random_state(self.random_state)
65 |         for t in range(Y.shape[0]):
66 | 
67 |             if self.strategy == 'random':
68 |                 indices = np.triu_indices(Y.shape[1], k=1)
69 |                 y_vec = Y[t][indices]
70 |                 nan_mask = y_vec == self.missing_value
71 |                 y_vec[nan_mask] = rng.choice([0, 1],
72 |                     p=[1 - self.statistics_[t], self.statistics_[t]],
73 |                     size=np.sum(nan_mask))
74 |                 Y[t][indices] = y_vec
75 |                 indices = np.tril_indices(Y.shape[1], k=-1)
76 |                 Y[t][indices] = 0
77 |                 Y[t] += Y[t].T
78 |             else:
79 |                 nan_mask = Y[t] == self.missing_value
80 |                 Y[t][nan_mask] = self.statistics_[t]
81 | 
82 |         return Y
83 | 


--------------------------------------------------------------------------------
/dynetlsm/trace_utils.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | import scipy.stats as stats
  3 | 
  4 | from math import floor, ceil
  5 | 
  6 | from statsmodels.regression.linear_model import yule_walker
  7 | 
  8 | 
  9 | def mean_detrend(x):
 10 |     return x - np.mean(x)
 11 | 
 12 | 
 13 | def xcorr(x, y, normed=True, detrend=mean_detrend, maxlags=10):
 14 |     Nx = len(x)
 15 |     if Nx != len(y):
 16 |         raise ValueError('x and y must be equal length')
 17 | 
 18 |     x = detrend(np.asarray(x))
 19 |     y = detrend(np.asarray(y))
 20 | 
 21 |     correls = np.correlate(x, y, mode='full')
 22 | 
 23 |     if normed:
 24 |         correls /= np.sqrt(np.dot(x, x) * np.dot(y, y))
 25 | 
 26 |     if maxlags is None:
 27 |         maxlags = Nx - 1
 28 | 
 29 |     if maxlags >= Nx or maxlags < 1:
 30 |         raise ValueError('maxlags must be None or strictly '
 31 |                          'postive < %d' % Nx)
 32 | 
 33 |     lags = np.arange(-maxlags, maxlags + 1)
 34 |     correls = correls[Nx - 1 - maxlags:Nx + maxlags]
 35 | 
 36 |     return lags, correls
 37 | 
 38 | 
 39 | def effective_n(x, lags=None, corr=None, maxlags=100):
 40 |     """Effective sample size."""
 41 |     if lags is None or corr is None:
 42 |         lags, corr = xcorr(x, x, maxlags=maxlags, normed=True)
 43 | 
 44 |     n_samples = x.shape[0]
 45 |     return n_samples / (1 + 2 * np.sum(corr[lags >= 1]))
 46 | 
 47 | 
 48 | def aic_ar(sigma, n, p):
 49 |     """AIC for an AR(p) model with n samples.
 50 |     Note: Assumes the series is de-meaned.
 51 |     """
 52 |     return 2 * n * np.log(sigma) + 2 * (p + 1)
 53 | 
 54 | 
 55 | def spec0_ar(sigma, coefs):
 56 |     return (sigma ** 2) / ((1 - np.sum(coefs)) ** 2)
 57 | 
 58 | 
 59 | def spectrum0_ar(x, max_order='auto'):
 60 |     """Calculates f(0) of the spectrum of x using an AR fit."""
 61 |     n_samples = x.shape[0]
 62 | 
 63 |     if np.allclose(np.var(x), 0.0):
 64 |         return 0., 0.
 65 | 
 66 |     if max_order == 'auto':
 67 |         max_order = floor(10 * np.log10(n_samples))
 68 | 
 69 |     # calculate f(0) and AIC for each AR(p) model
 70 |     results = np.zeros((max_order, 3))
 71 |     for p in range(1, max_order + 1):
 72 |         coefs, sigma = yule_walker(x, order=p, demean=True, method='adjusted')
 73 |         results[p-1] = [p, spec0_ar(sigma, coefs), aic_ar(sigma, n_samples, p)]
 74 | 
 75 |     # return result for model minimizing the AIC
 76 |     min_id = np.argmin(results[:, -1])
 77 |     order, var0 = results[min_id, :2]
 78 | 
 79 |     return var0 / n_samples, order
 80 | 
 81 | 
 82 | def geweke_corrected(x, first=0.1, last=0.5):
 83 |     """Calculate the z-score using Geweke's correction for autocorrelations."""
 84 |     n_samples = x.shape[0]
 85 | 
 86 |     # extract start and end chains
 87 |     x1 = x[:ceil(first * n_samples)]
 88 |     x2 = x[n_samples - floor(last * n_samples):]
 89 | 
 90 |     # calculate means
 91 |     x1_mean = np.mean(x1)
 92 |     x2_mean = np.mean(x2)
 93 | 
 94 |     # calculate variances
 95 |     x1_var, _ = spectrum0_ar(x1)
 96 |     x2_var, _ = spectrum0_ar(x2)
 97 | 
 98 |     # z score
 99 |     return (x1_mean - x2_mean) / np.sqrt(x1_var + x2_var)
100 | 
101 | 
102 | def geweke_diag(x, first=0.1, last=0.5, n_burn=None, corrected=True):
103 |     """Performs Geweke's diagnostic on a chain x.
104 |     Note: ArviZ and PyMC3 do no correct for autocorrelation and use a naive
105 |     z-score!
106 |     """
107 |     if n_burn is not None:
108 |         x = x[n_burn:]
109 | 
110 |     z_score = geweke_corrected(x, first=first, last=last)
111 | 
112 |     # calculate two-sided p-value
113 |     p_val = 2 * (1 - stats.norm.cdf(np.abs(z_score)))
114 | 
115 |     return z_score, p_val
116 | 


--------------------------------------------------------------------------------
/dynetlsm/distributions.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | import scipy.linalg as linalg
  3 | import scipy.stats as stats
  4 | 
  5 | from scipy.stats import truncnorm
  6 | from scipy.special import gammaln
  7 | from sklearn.utils import check_random_state
  8 | 
  9 | 
 10 | SMALL_EPS = np.finfo('float64').tiny
 11 | 
 12 | 
 13 | def sample_categorical(probas, rng):
 14 |     """
 15 |     Sample from a Categorical(probas) distribution.
 16 |     """
 17 |     cdf = probas.cumsum()
 18 |     u = rng.rand(probas.shape[0])
 19 |     return (u < cdf).argmax()
 20 | 
 21 | 
 22 | def spherical_normal_log_pdf(x, mean, var):
 23 |     """Logarithm of the pdf of a spherical multivariate gaussian
 24 |     distribution."""
 25 |     n_features = mean.shape[0]
 26 |     sum_sq = np.sum((x - mean) ** 2)
 27 |     sum_sq *= 0.5 * (1. / var)
 28 |     return -0.5 * n_features * np.log(2 * np.pi * var) - sum_sq
 29 | 
 30 | 
 31 | def spherical_normal_pdf(x, mean, var):
 32 |     """Probability Density Function for a spherical multivariate
 33 |     gaussian distribution. Note that this is 10x faster than
 34 |     the more general scipy.stats.multivariate_normal.pdf
 35 |     """
 36 |     n_features = mean.shape[0]
 37 |     sum_sq = np.sum((x - mean) ** 2)
 38 |     sum_sq *= 0.5 * (1. / var)
 39 |     return (1 / (2 * np.pi * var) ** (n_features / 2)) * np.exp(-sum_sq)
 40 | 
 41 | 
 42 | def multivariate_t_log_pdf(x, df, mu0, S):
 43 |     """Logarithm of the pdf of a multivariate t distribution."""
 44 |     x = np.atleast_1d(x)
 45 |     mu0 = np.atleast_1d(mu0)
 46 | 
 47 |     p = x.shape[0]
 48 |     if not isinstance(S, np.ndarray):
 49 |         rss = np.sum((x - mu0) ** 2) / S
 50 |         log_var = p * np.log(np.sqrt(S))
 51 |     else:
 52 |         L = linalg.cholesky(S)
 53 |         LinvX = linalg.solve_triangular(L, x - mu0, trans=1)
 54 |         rss = np.sum(LinvX ** 2, axis=0)
 55 |         log_var = np.sum(np.log(np.diag(L)))
 56 |     logdt = (gammaln((p + df) / 2.) - (
 57 |         gammaln(df / 2.) + log_var +
 58 |         (p / 2.) * np.log(df * np.pi)) -
 59 |             0.5 * (df + p) * np.log1p(rss / df))
 60 | 
 61 |     return logdt
 62 | 
 63 | 
 64 | def multivariate_t_pdf(x, df, mu0, S):
 65 |     """
 66 |     Probability Density Function of a multivariate t-distribution with
 67 |     df degrees of freedom, location parameter mu0, and scale matrix S.
 68 |     """
 69 |     return np.exp(multivariate_t_log_pdf(x, df, mu0, S))
 70 | 
 71 | 
 72 | def truncated_normal(mean, var, lower=0, upper=1, size=1, random_state=None):
 73 |     std = np.sqrt(var)
 74 |     a = (lower - mean) / std
 75 |     b = (upper - mean) / std
 76 |     return truncnorm.rvs(a, b, size=size, loc=mean, scale=std,
 77 |                          random_state=random_state)
 78 | 
 79 | 
 80 | def truncated_normal_logpdf(x, mean, var, lower=0, upper=1):
 81 |     std = np.sqrt(var)
 82 |     a = (lower - mean) / std
 83 |     b = (upper - mean) / std
 84 |     return truncnorm.logpdf(x, a, b, loc=mean, scale=std)
 85 | 
 86 | 
 87 | def sample_dirichlet(alphas, random_state=None):
 88 |     """The numpy dirichlet sampler is numerically unstable and produces samples
 89 |     with zero entries. Clip these values before using the sample.
 90 |     """
 91 |     rng = check_random_state(random_state)
 92 |     if np.any(alphas <= 0.):
 93 |         alphas = np.clip(alphas, a_min=SMALL_EPS, a_max=None)
 94 |     return rng.dirichlet(alphas)
 95 | 
 96 | 
 97 | def dirichlet_logpdf(x, alphas):
 98 |     if np.any(alphas <= 0.):
 99 |         alphas = np.clip(alphas, a_min=SMALL_EPS, a_max=None)
100 |     if np.any(x <= 0):
101 |         x = np.clip(x, a_min=SMALL_EPS, a_max=None)
102 |     return stats.dirichlet.logpdf(x, alphas)
103 | 


--------------------------------------------------------------------------------
/dynetlsm/datasets/raw_data/got/got-s4-nodes.csv:
--------------------------------------------------------------------------------
  1 | Id,Label
  2 | ADRACK_HUMBLE,Adrack Humble
  3 | AERYS,Aerys
  4 | ALLISER_THORNE,Alliser Thorne
  5 | ANYA_WAYNWOOD,Anya Waynwood
  6 | ARYA,Arya
  7 | AXELL_FLORENT,Axell Florent
  8 | BAELOR,Baelor
  9 | BALON,Balon
 10 | BALON_DWARF,Balon Dwarf
 11 | BARRISTAN,Barristan
 12 | BITER,Biter
 13 | BLACK_JACK,Black Jack
 14 | BOLTON_GUARD,Bolton Guard
 15 | BOROS,Boros
 16 | BRAN,Bran
 17 | BRIENNE,Brienne
 18 | BRONN,Bronn
 19 | CATELYN,Catelyn
 20 | CERSEI,Cersei
 21 | COOPER,Cooper
 22 | CRASTER,Craster
 23 | CRASTERS_BABY,Baby
 24 | DAARIO,Daario
 25 | DAENERYS,Daenerys
 26 | DAVOS,Davos
 27 | EDDISON_TOLLETT,Eddison
 28 | DONGO_THE_GIANT,Dongo the Giant
 29 | DONNEL_HILL,Donnel Hill
 30 | DONNEL_WAYNWOOD,Donnel Waynwood
 31 | DONTOS,Dontos
 32 | DORAN,Doran
 33 | DORNISH_LORD,Dornish Lord
 34 | DROGO,Drogo
 35 | DYING_MAN,Dying Man
 36 | ELDER_MEEREEN_SLAVE,Meereen Statesman
 37 | ELIA,Elia
 38 | ELLARIA,Ellaria
 39 | ENDREW,Endrew
 40 | FALYSE,Falyse
 41 | FARMER_HAMLET,Hamlet Farmer
 42 | FENNESZ,Fennesz
 43 | FIRST_MATE,First Mate
 44 | FOOL,Fool
 45 | GILLY,Gilly
 46 | GOATHERD,Goatherd
 47 | GOATHERDS_SON,Goatherd's Son
 48 | GRENN,Grenn
 49 | GREY_WORM,Grey Worm
 50 | GUYMON,Guymon
 51 | HIGH_SEPTON,High Septon
 52 | HIZDAHR,Hizdahr
 53 | HIZDAHRS_FATHER,Hizdahr's Father
 54 | HODOR,Hodor
 55 | HOSTER,Hoster
 56 | HOT_PIE,Hot Pie
 57 | INNKEEPER,Innkeeper
 58 | INNKEEPERS_DAUGHTER,Innkeeper's Daughter
 59 | JAIME,Jaime
 60 | JANOS,Janos
 61 | JAQEN,Jaqen
 62 | JEOR,Jeor
 63 | JOANNA,Joanna
 64 | JOFFREY,Joffrey
 65 | JOFFREY_DWARF,Joffrey Dwarf
 66 | JOJEN,Jojen
 67 | JON,Jon
 68 | JON_ARRYN,Jon Arryn
 69 | JORAH,Jorah
 70 | KARL_TANNER,Karl
 71 | KEGS,Kegs
 72 | LEAF,Leaf
 73 | LHARA,Lhara
 74 | LITTLE_SAM,Little Sam
 75 | LITTLEFINGER,Petyr
 76 | LOCKE,Locke
 77 | LOLLYS,Lollys
 78 | LOMMY,Lommy
 79 | LORAS,Loras
 80 | LUTHOR,Luthor
 81 | LYSA,Lysa
 82 | MACE,Mace
 83 | MAESTER_AEMON,Aemon
 84 | MAG_THE_MIGHTY,Mag the Mighty
 85 | MANCE,Mance
 86 | MANSERVANT,Manservant
 87 | MAREI,Marei
 88 | MARGAERY,Margaery
 89 | MASTER_MIGHDAL,Master Mighdal
 90 | MEERA,Meera
 91 | MEEREEN_CHAMPION,Meereen Champion
 92 | MEEREEN_SLAVE,Meereen Slave
 93 | MELISANDRE,Melisandre
 94 | MERYN_TRANT,Meryn
 95 | MISSANDEI,Missandei
 96 | MOLES_TOWN_MADAM,Mole's Town Madam
 97 | MOLES_TOWN_WHORE,Mole's Town Whore
 98 | MORAG,Morag
 99 | MORGAN,Morgan
100 | MORGANS_FRIEND,Morgan's Friend
101 | MOSSADOR,Mossador
102 | MOUNTAIN,Gregor
103 | MULLY,Mully
104 | MUSICIAN,Musician
105 | MYRANDA,Myranda
106 | MYRCELLA,Myrcella
107 | NED,Ned
108 | NIGHT_KING,Night King
109 | OBERYN,Oberyn
110 | OLENNA,Olenna
111 | OLLY,Olly
112 | OLLYS_MOTHER,Olly's Mother
113 | OLYVAR,Olyvar
114 | ORSON,Orson
115 | ORYS,Orys
116 | ORYS_BROTHER,Orys's Brother
117 | OTHELL_YARWYCK,Othell
118 | PODRICK,Podrick
119 | POLLIVER,Polliver
120 | PYCELLE,Pycelle
121 | PYP,Pyp
122 | QHORIN,Qhorin
123 | QYBURN,Qyburn
124 | RALF,Ralf
125 | RAMSAY,Ramsay
126 | RANDYLL,Randyll
127 | RAST,Rast
128 | RENLY,Renly
129 | RENLY_DWARF,Renly Dwarf
130 | RHAEGAR,Rhaegar
131 | RICKON,Rickon
132 | ROBB,Robb
133 | ROBB_DWARF,Robb Dwarf
134 | ROBERT,Robert
135 | ROBIN,Robin
136 | ROOSE_BOLTON,Roose
137 | RORGE,Rorge
138 | SALLADHOR,Salladhor
139 | SALLY,Sally
140 | SAM,Sam
141 | HOUND,Sandor
142 | SANSA,Sansa
143 | SELWYN,Selwyn
144 | SELYSE,Selyse
145 | SHAE,Shae
146 | SHIREEN,Shireen
147 | SISSY,Sissy
148 | STANNIS,Stannis
149 | STANNIS_DWARF,Stannis Dwarf
150 | STYR,Styr
151 | SYRIO_FOREL,Syrio Forel
152 | TANSY,Tansy
153 | TERNESIO_TERYS,Ternesio Terys
154 | THENN_WARG,Thenn Warg
155 | THEON,Theon
156 | THREE_EYED_RAVEN,Three Eyed Raven
157 | TOMMEN,Tommen
158 | TOMMY,Tommy
159 | TORMUND,Tormund
160 | TYCHO,Tycho
161 | TYRION,Tyrion
162 | TYWIN,Tywin
163 | VANCE_CORBRAY,Vance Corbray
164 | VARYS,Varys
165 | VIOLA,Viola
166 | VISERYS,Viserys
167 | WALDA,Walda
168 | WAYMAR_ROYCE,Waymar Royce
169 | WHITE_WALKER,White Walker
170 | YARA,Yara
171 | YGRITTE,Ygritte
172 | YOHN_ROYCE,Yohn Royce
173 | ZALA,Zala


--------------------------------------------------------------------------------
/examples/merging_communities.py:
--------------------------------------------------------------------------------
  1 | import matplotlib.pyplot as plt
  2 | import numpy as np
  3 | import networkx as nx
  4 | 
  5 | from sklearn.metrics import adjusted_rand_score
  6 | 
  7 | from dynetlsm import DynamicNetworkHDPLPCM
  8 | from dynetlsm.datasets import merging_dynamic_network
  9 | from dynetlsm.procrustes import longitudinal_procrustes_rotation
 10 | from dynetlsm.plots import get_colors, normal_contour
 11 | 
 12 | 
 13 | Y, X, z, intercept, probas, mus, sigmas = merging_dynamic_network(
 14 |     n_nodes=120, lmbda=0.6, n_time_steps=5, random_state=42)
 15 | 
 16 | 
 17 | # plot adjacency matrices
 18 | fig, ax = plt.subplots(figsize=(15, 4), ncols=Y.shape[0],
 19 |                        nrows=1, sharex=True, sharey=True)
 20 | ax = ax.flat
 21 | order = np.argsort(z[0])
 22 | for t in range(Y.shape[0]):
 23 |     ax[t].imshow(Y[t][order, :][:, order], cmap='gray_r')
 24 |     ax[t].xaxis.set_visible(False)
 25 |     ax[t].yaxis.set_visible(False)
 26 |     ax[t].set_title('t = {}'.format(t+1))
 27 | 
 28 | fig.savefig('merging_adj.png', dpi=300, bbox_inches='tight')
 29 | 
 30 | 
 31 | # run model
 32 | model = DynamicNetworkHDPLPCM(
 33 |     n_components=10, n_iter=10000, burn=10000, tune=5000, tune_interval=1000,
 34 |     random_state=42).fit(Y)
 35 | 
 36 | # procrustes rotate to match truth
 37 | X_rot, R = longitudinal_procrustes_rotation(X, model.X_)
 38 | mu_rot = np.dot(model.mu_, R)
 39 | 
 40 | 
 41 | # plot results
 42 | fig, ax = plt.subplots(figsize=(18, 8), ncols=5, nrows=3,
 43 |                        sharey='row', sharex='row', subplot_kw=dict(aspect=1))
 44 | 
 45 | colors = get_colors(z[0])
 46 | order = np.argsort(z[0])
 47 | for t in range(Y.shape[0]):
 48 |     ax[0, t].imshow(Y[t][order, :][:, order], cmap='gray_r')
 49 |     ax[0, t].xaxis.set_visible(False)
 50 |     ax[0, t].yaxis.set_visible(False)
 51 |     ax[0, t].set_title('t = {}'.format(t+1), fontsize=18)
 52 | 
 53 | for t in range(Y.shape[0]):
 54 |     pos = dict()
 55 |     for i in range(Y.shape[1]):
 56 |         pos[i] = X[t, i]
 57 | 
 58 |     G = nx.from_numpy_array(Y[t])
 59 | 
 60 |     nx.draw(G, pos=pos,
 61 |         node_color=colors[z[t]],
 62 |         edge_color='k', edgecolors='k',
 63 |         node_size=50,
 64 |         width=0.75, ax=ax[1, t])
 65 | 
 66 |     for k in np.unique(z[t]):
 67 |         normal_contour(mus[k], sigmas[k] * np.eye(2), ax=ax[1, t], n_std=[2],
 68 |                    zorder=1, alpha=0.4, facecolor=colors[k], linestyle='--',
 69 |                    linewidth=1, edgecolor='k')
 70 | 
 71 |     if t == 0:
 72 |         ax[1, t].axis('on')
 73 |         ax[1, t].spines['top'].set_visible(False)
 74 |         ax[1, t].spines['right'].set_visible(False)
 75 |         ax[1, t].spines['bottom'].set_visible(False)
 76 |         ax[1, t].spines['left'].set_visible(False)
 77 |         ax[1, t].set_ylabel('Truth', fontsize=20)
 78 | 
 79 |     ax[1, t].margins(y=0.1)
 80 | 
 81 | for t in range(Y.shape[0]):
 82 |     pos = dict()
 83 |     for i in range(Y.shape[1]):
 84 |         pos[i] = X_rot[t, i]
 85 | 
 86 |     G = nx.from_numpy_array(Y[t])
 87 | 
 88 |     nx.draw(G, pos=pos,
 89 |         node_color=colors[model.z_[t]],
 90 |         edge_color='k', edgecolors='k',
 91 |         node_size=50,
 92 |         width=0.75, ax=ax[2, t])
 93 | 
 94 |     for k in np.unique(model.z_[t]):
 95 |         normal_contour(mu_rot[k], model.sigma_[k] * np.eye(2), ax=ax[2, t], n_std=[2],
 96 |                    zorder=1, alpha=0.4, facecolor=colors[k], linestyle='--',
 97 |                    linewidth=1, edgecolor='k')
 98 | 
 99 |     if t == 0:
100 |         ax[2, t].axis('on')
101 |         ax[2, t].spines['top'].set_visible(False)
102 |         ax[2, t].spines['right'].set_visible(False)
103 |         ax[2, t].spines['bottom'].set_visible(False)
104 |         ax[2, t].spines['left'].set_visible(False)
105 |         ax[2, t].set_ylabel('Estimated', fontsize=20)
106 | 
107 |     ax[2, t].margins(y=0.1)
108 | 
109 | fig.savefig('merging_results.png', dpi=300, bbox_inches='tight')
110 | 
111 | 
112 | # infered blending coefficient and ARI
113 | print('lambda 95\%: ', np.quantile(model.lambdas_[model.n_burn_:], q=[0.025, 0.975]))
114 | 
115 | rand_index = adjusted_rand_score(z.ravel(), model.z_.ravel())
116 | print('ARI: ', rand_index)
117 | 


--------------------------------------------------------------------------------
/examples/detection_limit.py:
--------------------------------------------------------------------------------
  1 | import glob
  2 | import os
  3 | import plac
  4 | 
  5 | import pandas as pd
  6 | import numpy as np
  7 | import matplotlib.pyplot as plt
  8 | import seaborn as sns
  9 | 
 10 | from sklearn.utils import check_random_state
 11 | from sklearn.metrics import adjusted_rand_score, roc_auc_score
 12 | 
 13 | from dynetlsm import DynamicNetworkHDPLPCM, DynamicNetworkLPCM
 14 | from dynetlsm.datasets import detection_limit_simulation
 15 | from dynetlsm.model_selection.approx_bic import calculate_cluster_counts
 16 | from dynetlsm.model_selection import minimize_posterior_expected_vi
 17 | from dynetlsm.model_selection import train_test_split
 18 | from dynetlsm.metrics import variation_of_information, out_of_sample_auc
 19 | from dynetlsm.network_statistics import density, modularity
 20 | 
 21 | 
 22 | # group seperation ratio
 23 | ratio = 0.5
 24 | # ratio = 0.1, 0.25, 0.5, 0.7, 0.8, 0.9
 25 | 
 26 | # tranisition probability
 27 | trans_proba = 0.1
 28 | # trans_proba = 0.1, 0.2, 0.4
 29 | 
 30 | out_dir = 'results_ratio_{}'.format(trans_proba, ratio)
 31 | 
 32 | 
 33 | # create a directory to store the results
 34 | if not os.path.exists(out_dir):
 35 |     os.mkdir(out_dir)
 36 | 
 37 | 
 38 | def counts_per_time_step(z):
 39 |     n_time_steps = z.shape[0]
 40 |     group_counts = np.zeros(n_time_steps, dtype=np.int)
 41 |     for t in range(n_time_steps):
 42 |         group_counts[t] = np.unique(z[t]).shape[0]
 43 | 
 44 |     return group_counts
 45 | 
 46 | def posterior_per_time_step(model):
 47 |     n_time_steps = model.Y_fit_.shape[0]
 48 |     probas = np.zeros((n_time_steps, model.n_components + 1))
 49 |     for t in range(n_time_steps):
 50 |         freq = model.posterior_group_counts_[t]
 51 |         index = model.posterior_group_ids_[t]
 52 |         probas[t, index] = freq / freq.sum()
 53 | 
 54 |     return probas
 55 | 
 56 | 
 57 | def benchmark_single(n_iter=10000, burn=5000, tune=1000,
 58 |                      outfile_name='benchmark',
 59 |                      ratio=0.5, trans_proba=0.2,
 60 |                      random_state=None):
 61 |     random_state = check_random_state(random_state)
 62 | 
 63 |     # generate simulated networks
 64 |     Y, X, z, probas, r, _ = detection_limit_simulation(
 65 |         r=ratio, trans_proba=trans_proba, random_state=random_state)
 66 | 
 67 | 
 68 |     # fit HDP-LPCM
 69 |     model = DynamicNetworkHDPLPCM(n_iter=n_iter,
 70 |                                   burn=burn,
 71 |                                   tune=tune,
 72 |                                   tune_interval=1000,
 73 |                                   is_directed=False,
 74 |                                   selection_type='vi',
 75 |                                   n_components=5,
 76 |                                   random_state=random_state).fit(Y)
 77 | 
 78 |     # MAP: number of clusters per time point
 79 |     map_counts = counts_per_time_step(model.z_)
 80 | 
 81 |     # Posterior group count probabilities
 82 |     probas = posterior_per_time_step(model)
 83 |     results = pd.DataFrame(probas)
 84 | 
 85 |     # create dataframe of results
 86 |     results['map_counts'] = map_counts
 87 | 
 88 |     # Variation of Information
 89 |     results['vi'] = variation_of_information(
 90 |         z.ravel(), model.z_.ravel())
 91 |     vi = 0.
 92 |     for t in range(Y.shape[0]):
 93 |         vi_t = variation_of_information(z[t], model.z_[t])
 94 |         results['vi_{}'.format(t)] = vi_t
 95 |         vi += vi_t
 96 |     results['vi_avg'] = vi / Y.shape[0]
 97 | 
 98 | 
 99 |     # adjusted rand index
100 |     results['rand_index'] = adjusted_rand_score(
101 |         z.ravel(), model.z_.ravel())
102 |     adj_rand = 0.
103 |     for t in range(Y.shape[0]):
104 |         adj_t = adjusted_rand_score(z[t], model.z_[t])
105 |         results['rand_{}'.format(t)] = adj_t
106 |         adj_rand += adj_t
107 |     results['rand_avg'] = adj_rand / Y.shape[0]
108 | 
109 |     # info about simulated networks
110 |     results['ratio'] = r
111 | 
112 |     results.to_csv(outfile_name, index=False)
113 | 
114 | 
115 | # run for 20 different networks
116 | for i in range(20):
117 |     benchmark_single(
118 |         n_iter=35000, burn=10000, tune=5000, random_state=i,
119 |         ratio=ratio, trans_proba=trans_proba,
120 |         outfile_name=os.path.join(
121 |             out_dir, 'benchmark_{}.csv'.format(i)))
122 | 


--------------------------------------------------------------------------------
/dynetlsm/forecast.pyx:
--------------------------------------------------------------------------------
  1 | # encoding: utf-8
  2 | # cython: language_level=3
  3 | # cython: cdivision=True
  4 | # cython: boundscheck=False
  5 | # cython: wraparound=False
  6 | # cython: nonecheck=False
  7 | # cython: initializedcheck=False
  8 | from libc.math cimport log, exp, sqrt, M_PI
  9 | 
 10 | import numpy as np
 11 | cimport numpy as np
 12 | 
 13 | 
 14 | 
 15 | ctypedef np.npy_float64 DOUBLE
 16 | ctypedef np.npy_int64 INT
 17 | 
 18 | cdef inline double expit(double z):
 19 |     return 1. / (1. + exp(-z))
 20 | 
 21 | 
 22 | 
 23 | cdef double normal_pdf(DOUBLE[:] x,
 24 |                        DOUBLE[:] mean,
 25 |                        double var) nogil:
 26 |     cdef int k
 27 |     cdef int n_features = x.shape[0]
 28 |     cdef double sum_sq = 0.0
 29 | 
 30 |     for k in range(n_features):
 31 |         sum_sq += (x[k] - mean[k]) ** 2
 32 |     sum_sq *= 0.5 * (1. / var)
 33 | 
 34 |     return exp(-0.5 * n_features * log(2 * M_PI * var) - sum_sq)
 35 | 
 36 | 
 37 | cdef double mixture_normal_pdf(DOUBLE[:] x,
 38 |                                DOUBLE[:] x_prev,
 39 |                                DOUBLE[:] weights,
 40 |                                double lmbda,
 41 |                                DOUBLE[:, :] mean,
 42 |                                DOUBLE[:] sigma):
 43 |     cdef int k
 44 |     cdef int n_groups = mean.shape[0]
 45 |     cdef int n_features = mean.shape[1]
 46 |     cdef double res = 0
 47 |     cdef DOUBLE[:] mu = np.zeros(n_features, dtype=np.float64)
 48 | 
 49 |     for k in range(n_groups):
 50 |         for p in range(n_features):
 51 |             mu[p] = lmbda * mean[k, p] + (1 - lmbda) * x_prev[p]
 52 |         res += weights[k] * normal_pdf(x, mu, sigma[k])
 53 | 
 54 |     return res
 55 | 
 56 | 
 57 | def renormalize_weights(z, weights, means, sigmas):
 58 |     n_components = sigmas.shape[0]
 59 | 
 60 |     active_groups, z = np.unique(z, return_inverse=True)
 61 |     trans_w = weights[active_groups][:, active_groups]
 62 |     trans_w /= np.sum(trans_w, axis=1).reshape(-1, 1)
 63 | 
 64 |     mu = means[active_groups]
 65 |     sigma = sigmas[active_groups]
 66 | 
 67 |     return z, trans_w, mu, sigma
 68 | 
 69 | 
 70 | cdef inline double euclidean_distance(DOUBLE[:] x, DOUBLE[:] y) nogil:
 71 |     cdef int n_features = x.shape[0]
 72 |     cdef double d = 0.
 73 |     for k in range(n_features):
 74 |         d += (x[k] - y[k]) ** 2
 75 | 
 76 |     return sqrt(d)
 77 | 
 78 | 
 79 | def marginal_forecast(DOUBLE[:, :] x,
 80 |                       DOUBLE[:, :, :] x_prev,
 81 |                       np.ndarray[np.int64_t, ndim=2, mode='c'] z,
 82 |                       np.ndarray[double, ndim=3, mode='c'] trans_weights,
 83 |                       np.ndarray[double, ndim=3, mode='c'] mus,
 84 |                       np.ndarray[double, ndim=2, mode='c'] sigmas,
 85 |                       DOUBLE[:] intercepts,
 86 |                       DOUBLE[:] lmbdas,
 87 |                       bint renormalize=True):
 88 |     cdef int i, j, s = 0
 89 |     cdef int n_iter = x_prev.shape[0]
 90 |     cdef int n_nodes = x_prev.shape[1]
 91 | 
 92 |     cdef double dij, wij
 93 | 
 94 |     cdef np.ndarray[double, ndim=2, mode='c'] sum_w = np.zeros(
 95 |         (n_nodes, n_nodes))
 96 |     cdef np.ndarray[double, ndim=2, mode='c'] probas = np.zeros(
 97 |         (n_nodes, n_nodes))
 98 |     cdef np.ndarray[np.int64_t, ndim=1, mode='c'] zs
 99 |     cdef DOUBLE[:, :] weights, mean
100 |     cdef DOUBLE[:] sigma
101 | 
102 |     for s in range(n_iter):
103 |         if renormalize:
104 |             zs, weights, mean, sigma = renormalize_weights(
105 |                 z[s], trans_weights[s], mus[s], sigmas[s])
106 |         else:
107 |             weights = trans_weights[s]
108 |             mean = mus[s]
109 |             sigma = sigmas[s]
110 |             zs = z[s]
111 | 
112 |         for i in range(n_nodes):
113 |             for j in range(i):
114 |                 dij = euclidean_distance(x[i], x[j])
115 | 
116 |                 wij = mixture_normal_pdf(
117 |                     x[i], x_prev[s, i], weights[zs[i]], lmbdas[s], mean, sigma)
118 |                 wij *= mixture_normal_pdf(
119 |                     x[j], x_prev[s, j], weights[zs[j]], lmbdas[s], mean, sigma)
120 |                 probas[i, j] += wij * expit(intercepts[s] - dij) / n_iter
121 |                 sum_w[i, j] += wij / n_iter
122 | 
123 |     sum_w += sum_w.T
124 |     sum_w[np.diag_indices(n_nodes)] = 1
125 |     probas += probas.T
126 |     probas /= sum_w
127 | 
128 |     return np.asarray(probas)
129 | 


--------------------------------------------------------------------------------
/dynetlsm/metropolis.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | import scipy.stats as stats
  3 | 
  4 | 
  5 | def tune_step_size_random_walk(step_size, acc_rate):
  6 |     """Keep acceptance rate within 25% - 40% acceptance."""
  7 |     if acc_rate < 0.001:
  8 |         step_size *= 0.1
  9 |     elif acc_rate < 0.05:
 10 |         step_size *= 0.5
 11 |     elif acc_rate < 0.25:
 12 |         step_size *= 0.9
 13 |     elif acc_rate > 0.95:
 14 |         step_size *= 10.0
 15 |     elif acc_rate > 0.75:
 16 |         step_size *= 2.0
 17 |     elif acc_rate > 0.4:
 18 |         step_size *= 1.1
 19 | 
 20 |     return step_size
 21 | 
 22 | 
 23 | def tune_step_size_dirichlet(step_size, acc_rate):
 24 |     if acc_rate < 0.001:
 25 |         step_size *= 10.0
 26 |     elif acc_rate < 0.05:
 27 |         step_size *= 2
 28 |     elif acc_rate < 0.25:
 29 |         step_size *= 1.1
 30 |     elif acc_rate > 0.95:
 31 |         step_size *= 0.1
 32 |     elif acc_rate > 0.75:
 33 |         step_size *= 0.5
 34 |     elif acc_rate > 0.4:
 35 |         step_size *= 0.9
 36 | 
 37 |     return step_size
 38 | 
 39 | 
 40 | def random_walk_metropolis(x0, logp, step_size, random_state):
 41 |     n_features = x0.shape[0]
 42 | 
 43 |     # random walk proposal
 44 |     x = x0 + step_size * random_state.randn(n_features)
 45 | 
 46 |     # accept-reject
 47 |     accept_ratio = logp(x) - logp(x0)
 48 |     accepted = 1
 49 |     u = random_state.rand()
 50 |     if np.log(u) >= accept_ratio:
 51 |         x = x0
 52 |         accepted = 0
 53 | 
 54 |     return x, accepted, accept_ratio
 55 | 
 56 | 
 57 | def dirichlet_metropolis(x0, logp, step_size, random_state, reg=1e-5):
 58 |     n_nodes = x0.shape[0]
 59 | 
 60 |     # scaled dirichlet proposal
 61 |     x = random_state.dirichlet(step_size * x0)
 62 | 
 63 |     # occasionally draws are zero due to precision issues
 64 |     # add some regularization and re-normalize
 65 |     if np.any(x == 0.):
 66 |         x += reg
 67 |         x /= np.sum(x)
 68 | 
 69 |     # accept-reject
 70 |     accept_ratio = logp(x) - logp(x0)
 71 | 
 72 |     # dirichlet proposal
 73 |     accept_ratio += (stats.dirichlet.logpdf(x0, step_size * x) -
 74 |                      stats.dirichlet.logpdf(x, step_size * x0))
 75 | 
 76 |     accepted = 1
 77 |     u = random_state.rand()
 78 |     if np.log(u) >= accept_ratio:
 79 |         x = x0
 80 |         accepted = 0
 81 | 
 82 |     return x, accepted, accept_ratio
 83 | 
 84 | 
 85 | class Metropolis(object):
 86 |     def __init__(self, step_size=0.1, tune=500, tune_interval=100,
 87 |                  proposal_type='random_walk'):
 88 |         self.step_size = step_size
 89 |         self.tune = tune
 90 |         self.tune_interval = tune_interval
 91 |         self.proposal_type = proposal_type
 92 |         self.steps_until_tune = tune_interval
 93 |         self.n_accepted = 0
 94 |         self.n_steps = 0
 95 | 
 96 |     def step(self, x, logp, random_state):
 97 |         if self.proposal_type == 'dirichlet':
 98 |             x_new, accepted, _ = dirichlet_metropolis(x,
 99 |                                                       logp,
100 |                                                       self.step_size,
101 |                                                       random_state)
102 |         elif self.proposal_type == 'random_walk':
103 |             x_new, accepted, _ = random_walk_metropolis(x,
104 |                                                         logp,
105 |                                                         self.step_size,
106 |                                                         random_state)
107 |         else:
108 |             raise ValueError("`proposal_type` must be in "
109 |                              "{'random_walk', 'dirichlet'}, but got "
110 |                              "{}".format(self.proposal_type))
111 | 
112 |         # track acceptance statistics for adaptation
113 |         self.n_accepted += accepted
114 |         self.n_steps += 1
115 | 
116 |         # tune step-sizes if necessary
117 |         if self.tune is not None:
118 |             self.tune_step_size()
119 | 
120 |         return x_new
121 | 
122 |     def tune_step_size(self):
123 |         if (self.n_steps < self.tune and self.steps_until_tune == 0):
124 |             # tune step size
125 |             accept_rate = self.n_accepted / self.tune_interval
126 | 
127 |             if self.proposal_type == 'dirichlet':
128 |                 self.step_size = tune_step_size_dirichlet(self.step_size,
129 |                                                           accept_rate)
130 |             else:
131 |                 self.step_size = tune_step_size_random_walk(self.step_size,
132 |                                                             accept_rate)
133 |             self.n_accepted = 0
134 |             self.steps_until_tune = self.tune_interval
135 |         else:
136 |             self.steps_until_tune -= 1
137 | 


--------------------------------------------------------------------------------
/dynetlsm/sample_coefficients.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | 
  3 | from sklearn.utils import check_random_state
  4 | 
  5 | from .network_likelihoods import (
  6 |     dynamic_network_loglikelihood_directed,
  7 |     dynamic_network_loglikelihood_undirected,
  8 |     approx_directed_network_loglikelihood,
  9 | )
 10 | 
 11 | 
 12 | def sample_intercepts(Y, X, intercepts, intercept_prior,
 13 |                       intercept_variance_prior, samplers, radii=None,
 14 |                       dist=None, is_directed=False, case_control_sampler=None,
 15 |                       squared=False, random_state=None):
 16 |     rng = check_random_state(random_state)
 17 | 
 18 |     if is_directed:
 19 |         # sample intercept_in
 20 |         def logp(x):
 21 |             if case_control_sampler is not None:
 22 |                 # TODO: we do not cache distances here, decrease by
 23 |                 #       factor of 2 if we do this
 24 |                 loglik = approx_directed_network_loglikelihood(
 25 |                     X=X,
 26 |                     radii=radii,
 27 |                     in_edges=case_control_sampler.in_edges_,
 28 |                     out_edges=case_control_sampler.out_edges_,
 29 |                     degree=case_control_sampler.degrees_,
 30 |                     control_nodes=case_control_sampler.control_nodes_out_,
 31 |                     intercept_in=x[0],
 32 |                     intercept_out=intercepts[1],
 33 |                     squared=squared)
 34 |             else:
 35 |                 loglik = dynamic_network_loglikelihood_directed(
 36 |                     Y, X,
 37 |                     intercept_in=x[0], intercept_out=intercepts[1],
 38 |                     radii=radii,
 39 |                     squared=squared,
 40 |                     dist=dist)
 41 |             loglik -= ((x[0] - intercept_prior[0]) ** 2 /
 42 |                        (2 * intercept_variance_prior))
 43 |             return loglik
 44 | 
 45 |         intercepts[0] = samplers[0].step(
 46 |                                 np.array([intercepts[0]]), logp, rng)[0]
 47 | 
 48 |         # sample intercept_out
 49 |         def logp(x):
 50 |             if case_control_sampler is not None:
 51 |                 # TODO: we do not cache distances here, decrease by
 52 |                 #       factor of 2 if we do this
 53 |                 loglik = approx_directed_network_loglikelihood(
 54 |                     X=X,
 55 |                     radii=radii,
 56 |                     in_edges=case_control_sampler.in_edges_,
 57 |                     out_edges=case_control_sampler.out_edges_,
 58 |                     degree=case_control_sampler.degrees_,
 59 |                     control_nodes=case_control_sampler.control_nodes_out_,
 60 |                     intercept_in=intercepts[0],
 61 |                     intercept_out=x[0],
 62 |                     squared=squared)
 63 |             else:
 64 |                 loglik = dynamic_network_loglikelihood_directed(
 65 |                     Y, X,
 66 |                     intercept_in=intercepts[0], intercept_out=x[0],
 67 |                     radii=radii,
 68 |                     squared=squared,
 69 |                     dist=dist)
 70 |             loglik -= ((x[0] - intercept_prior[1]) ** 2 /
 71 |                        (2 * intercept_variance_prior))
 72 |             return loglik
 73 | 
 74 |         intercepts[1] = samplers[1].step(
 75 |                             np.array([intercepts[1]]), logp, rng)[0]
 76 |     else:
 77 |         def logp(x):
 78 |             loglik = dynamic_network_loglikelihood_undirected(Y, X,
 79 |                                                               intercept=x,
 80 |                                                               squared=squared,
 81 |                                                               dist=dist)
 82 |             loglik -= ((x - intercept_prior) ** 2 /
 83 |                        (2 * intercept_variance_prior))
 84 |             return loglik
 85 | 
 86 |         intercepts = samplers[0].step(intercepts, logp, rng)
 87 | 
 88 |     return intercepts
 89 | 
 90 | 
 91 | def sample_radii(Y, X, intercepts, radii, sampler, dist=None,
 92 |                  case_control_sampler=None, squared=False, random_state=None):
 93 |     rng = check_random_state(random_state)
 94 | 
 95 |     def logp(x):
 96 |         # NOTE: dirichlet prior (this is constant for alpha = 1.0
 97 |         if case_control_sampler:
 98 |             # TODO: we do not cache distances here, decrease by
 99 |             #       factor of 2 if we do this
100 |             loglik = approx_directed_network_loglikelihood(
101 |                         X=X,
102 |                         radii=x,
103 |                         in_edges=case_control_sampler.in_edges_,
104 |                         out_edges=case_control_sampler.out_edges_,
105 |                         degree=case_control_sampler.degrees_,
106 |                         control_nodes=case_control_sampler.control_nodes_out_,
107 |                         intercept_in=intercepts[0],
108 |                         intercept_out=intercepts[1],
109 |                         squared=squared)
110 |         else:
111 |             loglik = dynamic_network_loglikelihood_directed(
112 |                        Y, X,
113 |                        intercept_in=intercepts[0],
114 |                        intercept_out=intercepts[1],
115 |                        radii=x,
116 |                        squared=squared,
117 |                        dist=dist)
118 | 
119 |         return loglik
120 | 
121 |     return sampler.step(radii, logp, rng)
122 | 


--------------------------------------------------------------------------------
/setup.py:
--------------------------------------------------------------------------------
  1 | from __future__ import print_function
  2 | 
  3 | import os
  4 | import sys
  5 | import contextlib
  6 | import subprocess
  7 | import glob
  8 | 
  9 | from setuptools import setup, find_packages
 10 | from setuptools import Extension
 11 | 
 12 | 
 13 | HERE = os.path.dirname(os.path.abspath(__file__))
 14 | 
 15 | # import ``__version__` from code base
 16 | exec(open(os.path.join(HERE, 'dynetlsm', 'version.py')).read())
 17 | 
 18 | 
 19 | with open('requirements.txt') as f:
 20 |     INSTALL_REQUIRES = [l.strip() for l in f.readlines() if l]
 21 | 
 22 | 
 23 | with open('test_requirements.txt') as f:
 24 |     TEST_REQUIRES = [l.strip() for l in f.readlines() if l]
 25 | 
 26 | 
 27 | try:
 28 |     import numpy
 29 | except ImportError:
 30 |     print('numpy is required during installation')
 31 |     sys.exit(1)
 32 | 
 33 | 
 34 | try:
 35 |     import scipy
 36 | except ImportError:
 37 |     print('scipy is required during installation')
 38 |     sys.exit(1)
 39 | 
 40 | 
 41 | @contextlib.contextmanager
 42 | def chdir(new_dir):
 43 |     old_dir = os.getcwd()
 44 |     try:
 45 |         sys.path.insert(0, new_dir)
 46 |         yield
 47 |     finally:
 48 |         del sys.path[0]
 49 |         os.chdir(old_dir)
 50 | 
 51 | 
 52 | def find_cython(dir, files=None):
 53 |     if files is None:
 54 |         files = []
 55 | 
 56 |     for file in os.listdir(dir):
 57 |         path = os.path.join(dir, file)
 58 |         if os.path.isfile(path) and path.endswith(".pyx"):
 59 |             files.append(path.replace(os.path.sep, ".")[:-4])
 60 |         elif os.path.isdir(path):
 61 |             find_cython(path, files)
 62 | 
 63 |     return files
 64 | 
 65 | 
 66 | def clean(path):
 67 |     for name in find_cython(path):
 68 |         name = name.replace('.', os.path.sep)
 69 |         for ext in ['*.c', '*.so', '*.o', '*.html']:
 70 |             file_path = glob.glob(os.path.join(path, name + ext))
 71 |             if file_path and os.path.exists(file_path[0]):
 72 |                 os.unlink(file_path[0])
 73 | 
 74 | 
 75 | def get_include():
 76 |     source_path = os.path.join(HERE, 'src')
 77 |     return source_path if os.path.exists(source_path) else ''
 78 | 
 79 | 
 80 | def get_sources():
 81 |     files = []
 82 |     source_path = get_include()
 83 |     if source_path:
 84 |         for name in os.listdir(src_path):
 85 |             path = os.path.join(src_path, name)
 86 |             if os.path.isfile(path) and path.endswith(".c"):
 87 |                 files.append(os.path.relpath(path))
 88 | 
 89 |     return files
 90 | 
 91 | 
 92 | def generate_cython(cython_cov=False):
 93 |     print("Cythonizing sources")
 94 |     for source in find_cython(HERE):
 95 |         source = source.replace('.', os.path.sep) + '.pyx'
 96 |         cythonize_source(source, cython_cov)
 97 | 
 98 | 
 99 | def cythonize_source(source, cython_cov=False):
100 |     print("Processing %s" % source)
101 | 
102 |     flags = ['--fast-fail']
103 |     if cython_cov:
104 |         flags.extend(['--directive', 'linetrace=True'])
105 | 
106 |     try:
107 |         p = subprocess.call(['cython'] + flags + [source])
108 |         if p != 0:
109 |             raise Exception('Cython failed')
110 |     except OSError:
111 |         raise OSError('Cython needs to be installed')
112 | 
113 | 
114 | def make_extension(ext_name, macros=[]):
115 |     ext_path = ext_name.replace('.', os.path.sep) + '.c'
116 |     mod_name = '.'.join(ext_name.split('.')[-2:])
117 |     include_dirs = [numpy.get_include(), "."]
118 |     if get_include():
119 |         include_dirs = [get_include] + include_dirs
120 |     return Extension(
121 |         mod_name,
122 |         sources=[os.path.relpath(ext_path)] + get_sources(),
123 |         include_dirs=include_dirs,
124 |         extra_compile_args=["-O3", "-Wall", "-fPIC"],
125 |         define_macros=macros)
126 | 
127 | 
128 | def generate_extensions(macros=[]):
129 |     ext_modules = []
130 |     for mod_name in find_cython(HERE):
131 |         ext_modules.append(make_extension(mod_name, macros=macros))
132 | 
133 |     return ext_modules
134 | 
135 | DISTNAME = 'dynetlsm'
136 | DESCRIPTION = 'dynetlsm'
137 | with open('README.md') as f:
138 |     LONG_DESCRIPTION = f.read()
139 | MAINTAINER = 'Joshua D. Loyal'
140 | MAINTAINER_EMAIL = 'jloyal25@gmail.com'
141 | URL = 'https://joshloyal.github.io/dynetlsm'
142 | DOWNLOAD_URL = 'https://pypi.org/project/dynetlsm/#files'
143 | LICENSE = 'MIT'
144 | VERSION = __version__
145 | CLASSIFIERS = []
146 | 
147 | 
148 | 
149 | def setup_package():
150 |     if len(sys.argv) > 1 and sys.argv[1] == 'clean':
151 |         return clean(HERE)
152 | 
153 |     cython_cov = 'CYTHON_COV' in os.environ
154 | 
155 |     macros = []
156 |     if cython_cov:
157 |         print("Adding coverage information to cythonized files.")
158 |         macros =  [('CYTHON_TRACE_NOGIL', 1)]
159 | 
160 |     with chdir(HERE):
161 |         generate_cython(cython_cov)
162 |         ext_modules = generate_extensions(macros=macros)
163 |         setup(
164 |             name=DISTNAME,
165 |             maintainer=MAINTAINER,
166 |             maintainer_email=MAINTAINER_EMAIL,
167 |             description=DESCRIPTION,
168 |             license=LICENSE,
169 |             url=URL,
170 |             version=VERSION,
171 |             download_url=DOWNLOAD_URL,
172 |             long_description=LONG_DESCRIPTION,
173 |             long_description_content_type='text/markdown',
174 |             zip_safe=False,
175 |             classifiers=CLASSIFIERS,
176 |             package_data={
177 |                 '': [
178 |                     'dynetlsm' + os.path.sep + '*.pyx',
179 |                     'dynetlsm' + os.path.sep + '.pxd'
180 |                 ]
181 |             },
182 |             include_package_data=True,
183 |             packages=find_packages(),
184 |             install_requires=INSTALL_REQUIRES,
185 |             extras_require={'test': TEST_REQUIRES},
186 |             setup_requires=['pytest-runner'],
187 |             tests_require=TEST_REQUIRES,
188 |             ext_modules=ext_modules
189 |         )
190 | if __name__ == '__main__':
191 |     setup_package()
192 | 


--------------------------------------------------------------------------------
/dynetlsm/latent_space.py:
--------------------------------------------------------------------------------
  1 | import networkx as nx
  2 | import numpy as np
  3 | import scipy.linalg as linalg
  4 | 
  5 | from scipy.sparse import csgraph
  6 | from scipy.optimize import minimize
  7 | 
  8 | from sklearn.cluster import KMeans
  9 | from sklearn.manifold import MDS
 10 | from sklearn.metrics import pairwise_distances, euclidean_distances
 11 | 
 12 | from .procrustes import static_procrustes_rotation
 13 | 
 14 | 
 15 | __all__ = ['calculate_distances', 'generalized_mds', 'longitudinal_kmeans',
 16 |            'initialize_radii']
 17 | 
 18 | 
 19 | def calculate_distances(X, metric='euclidean', squared=False):
 20 |     """Calulates the pairwise distances between latent positions X."""
 21 |     if X.ndim == 2:
 22 |         return pairwise_distances(X, metric=metric)
 23 | 
 24 |     n_time_steps, n_nodes, _ = X.shape
 25 | 
 26 |     dist = np.empty((n_time_steps, n_nodes, n_nodes))
 27 |     for t in range(n_time_steps):
 28 |         if metric == 'euclidean':
 29 |             dist[t] = euclidean_distances(X[t], squared=squared)
 30 |         else:
 31 |             dist[t] = pairwise_distances(X[t], metric=metric)
 32 | 
 33 |     return dist
 34 | 
 35 | 
 36 | def shortest_path_dissimilarity(Y, unweighted=True):
 37 |     """Calculate the shortest-path dissimilarty of a static graph."""
 38 |     dist = csgraph.shortest_path(Y, directed=False, unweighted=unweighted)
 39 | 
 40 |     # impute unconnected components with the largest distance plus 1
 41 |     inf_mask = np.isinf(dist)
 42 |     dist[inf_mask] = np.max(dist[~inf_mask]) + 1
 43 | 
 44 |     return dist
 45 | 
 46 | 
 47 | def generalized_mds(Y, n_features=2, is_directed=False, unweighted=True,
 48 |                     lmbda=10, random_state=None):
 49 |     """Generalized Multi-Dimension Scaling (Sarkar and Moore, 2005)."""
 50 |     is_dynamic_graph = Y.ndim == 3
 51 |     if not is_dynamic_graph:
 52 |         Y = np.expand_dims(Y, axis=0)
 53 | 
 54 |     n_time_steps, n_nodes, _ = Y.shape
 55 | 
 56 |     # calculate shortest-path dissimilarity for each time step
 57 |     D = np.empty((n_time_steps, n_nodes, n_nodes))
 58 |     for t in range(Y.shape[0]):
 59 |         D[t] = shortest_path_dissimilarity(Y[t], unweighted=unweighted)
 60 | 
 61 |     # compute latent positions based on MDS
 62 |     X = np.empty((n_time_steps, n_nodes, n_features))
 63 | 
 64 |     # classical multi-dimensional scaling for t = 1
 65 |     X[0] = MDS(dissimilarity='precomputed',
 66 |                n_components=n_features,
 67 |                random_state=random_state).fit_transform(D[0])
 68 | 
 69 |     # minimize the objective function found in Sarkar and Moore
 70 |     H = np.eye(n_nodes) - (1. / n_nodes) * np.ones((n_nodes, n_nodes))
 71 |     for t in range(1, n_time_steps):
 72 |         alpha = 1 / (1 + lmbda)
 73 |         beta = lmbda / (1 + lmbda)
 74 |         XXt = alpha * np.dot(H, np.dot(-0.5 * D[t] ** 2, H))
 75 |         XXt = XXt + beta * (np.dot(X[t-1], X[t-1].T))
 76 | 
 77 |         # the optimum is the eigen-decomposition of XXt
 78 |         evals, evecs = linalg.eigh(XXt)
 79 | 
 80 |         # flip so in descending order
 81 |         evecs = evecs[:, ::-1]
 82 |         evals = evals[::-1]
 83 | 
 84 |         # extract features (top n_features eigenvectors scaled by eigenvalue)
 85 |         X[t] = evecs[:, :n_features] * np.sqrt(evals[:n_features])
 86 | 
 87 |         # procrustes transformation to fix rotation invariance
 88 |         X[t], _ = static_procrustes_rotation(X[t-1], X[t])
 89 | 
 90 |     # the directed model scales the space so that it is roughly [-1, 1],
 91 |     # i.e. same scale as the radii
 92 |     if is_directed:
 93 |         X /= n_nodes
 94 | 
 95 |     return X if is_dynamic_graph else np.squeeze(X)
 96 | 
 97 | 
 98 | def longitudinal_kmeans(X, n_clusters=5, var_reg=1e-3,
 99 |                         fixed_clusters=True, random_state=None):
100 |     """Longitudinal K-Means Algorithm (Genolini and Falissard, 2010)"""
101 |     n_time_steps, n_nodes, n_features = X.shape
102 | 
103 |     # vectorize latent positions across time
104 |     X_vec = np.moveaxis(X, 0, -1).reshape(n_nodes, n_time_steps * n_features)
105 | 
106 |     # perform normal k-means on the vectorized features
107 |     kmeans = KMeans(n_clusters=n_clusters,
108 |                     random_state=random_state).fit(X_vec)
109 | 
110 |     # this method assigns a single cluster to each point across time.
111 |     labels = kmeans.labels_.reshape(-1, 1)
112 |     labels = np.hstack([labels] * n_time_steps).T
113 | 
114 |     # un-vectorize centers, shape (n_time_steps, n_centers, n_features)
115 |     centers_vec = kmeans.cluster_centers_
116 |     if fixed_clusters:
117 |         centers = np.empty((n_clusters, n_features))
118 |         for k in range(n_clusters):
119 |             muk = centers_vec[k].reshape(-1, n_time_steps).T
120 |             centers[k] = muk.mean(axis=0)  # average position overtime
121 |     else:
122 |         centers = np.empty((n_time_steps, n_clusters, n_features))
123 |         for k in range(n_clusters):
124 |             centers[:, k] = centers_vec[k].reshape(-1, n_time_steps).T
125 | 
126 |     # calculate cluster variances (assumed spherical and constant over-time)
127 |     variances = np.zeros(n_clusters, dtype=np.float64)
128 |     for k in range(n_clusters):
129 |         for t in range(n_time_steps):
130 |             variances[k] += np.var(X[t][labels[t] == k], axis=0).mean()
131 |         variances[k] /= n_time_steps
132 | 
133 |     # clusters with a single data point will have zero-variance.
134 |     # assign a fudge factor in this case
135 |     variances[variances == 0.] = var_reg
136 | 
137 |     return centers, variances, labels
138 | 
139 | 
140 | def initialize_radii(Y, reg=1e-5):
141 |     """Initialize radii to normalized average of out-degree and in-degree
142 |     over time.
143 |     """
144 |     radii = 0.5 * (Y.sum(axis=(0, 1)) + Y.sum(axis=(0, 2)))
145 |     radii /= Y.sum()
146 | 
147 |     # radii can be zero if no edges are present. Add a small amount
148 |     # of social reach to each radii in this case.
149 |     if np.any(radii == 0.):
150 |         radii += reg
151 |         radii /= np.sum(radii)
152 | 
153 |     return radii
154 | 


--------------------------------------------------------------------------------
/dynetlsm/model_selection/approx_bic.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | 
  3 | from ..network_likelihoods import compute_gaussian_likelihood
  4 | from ..network_likelihoods import dynamic_network_loglikelihood_undirected
  5 | from ..network_likelihoods import dynamic_network_loglikelihood_directed
  6 | from ..array_utils import nondiag_indices_from_3d
  7 | 
  8 | 
  9 | __all__ = ['select_bic']
 10 | 
 11 | 
 12 | class DynamicNetworkMixtureModel:
 13 |     def __init__(self, beta, init_weights, trans_weights, X, mu, sigma, lmbda,
 14 |                  z, intercept, radii=None):
 15 |         self.beta = beta
 16 |         self.init_weights = init_weights
 17 |         self.trans_weights = trans_weights
 18 |         self.X = X
 19 |         self.mu = mu
 20 |         self.sigma = sigma
 21 |         self.lmbda = lmbda
 22 |         self.z = z
 23 |         self.intercept = intercept
 24 |         self.radii = radii
 25 | 
 26 | 
 27 | def calculate_cluster_counts_t(model):
 28 |     n_burn = model.n_burn_
 29 | 
 30 |     z = model.zs_
 31 |     n_iter, n_time_steps, _ = z.shape
 32 |     n_burn = n_burn if n_burn is not None else 0
 33 | 
 34 |     counts = np.zeros((n_time_steps, int(n_iter - n_burn)), dtype=np.int)
 35 |     for t in range(n_time_steps):
 36 |         for i in range(n_iter - n_burn):
 37 |             n_clusters = np.unique(z[i + n_burn, t]).shape[0]
 38 |             counts[t, i] = n_clusters
 39 |     return counts
 40 | 
 41 | 
 42 | def calculate_cluster_counts(model):
 43 |     n_burn = model.n_burn_
 44 | 
 45 |     z = model.zs_
 46 |     n_iter = z.shape[0]
 47 |     n_burn = n_burn if n_burn is not None else 0
 48 | 
 49 |     counts = np.zeros(int(n_iter - n_burn), dtype=np.int)
 50 |     for i in range(n_iter - n_burn):
 51 |         n_clusters = np.unique(z[i + n_burn].ravel()).shape[0]
 52 |         counts[i] = n_clusters
 53 |     return counts
 54 | 
 55 | 
 56 | def latent_marginal_loglikelihood(X, init_w, trans_w, mu, sigma, lmbda):
 57 |     n_time_steps, n_nodes, _ = X.shape
 58 |     n_components = sigma.shape[0]
 59 | 
 60 |     loglik = 0.0
 61 |     for i in range(n_nodes):
 62 |         gauss_loglik = compute_gaussian_likelihood(X[:, i], mu, sigma, lmbda,
 63 |                                                    normalize=False)
 64 |         fwds_msg = init_w * gauss_loglik[0]
 65 |         c = np.sum(fwds_msg)
 66 |         loglik += np.log(c)
 67 |         fwds_msg /= c
 68 | 
 69 |         for t in range(1, n_time_steps):
 70 |             fwds_msg = (gauss_loglik[t] *
 71 |                         np.dot(trans_w[t].T, fwds_msg.reshape(-1, 1)).ravel())
 72 |             c = np.sum(fwds_msg)
 73 |             loglik += np.log(c)
 74 |             fwds_msg /= c
 75 | 
 76 |     return loglik
 77 | 
 78 | 
 79 | def select_bic(model):
 80 |     n_time_steps, n_nodes, _ = model.Y_fit_.shape
 81 |     n_burn = model.n_burn_
 82 | 
 83 |     # determine model sizes available in the posterior samples
 84 |     counts = calculate_cluster_counts(model)
 85 | 
 86 |     bic = []
 87 |     models = []
 88 |     for k in np.unique(counts):
 89 |         # determine MAP for model size k
 90 |         mask = counts != k
 91 |         map_id = np.ma.array(model.logps_[n_burn:], mask=mask).argmax() + n_burn
 92 | 
 93 |         # extract MAP estimators
 94 |         intercept = model.intercepts_[map_id]
 95 |         X = model.Xs_[map_id]
 96 |         mu = model.mus_[map_id]
 97 |         sigma = model.sigmas_[map_id]
 98 |         beta = model.betas_[map_id]
 99 |         weights = model.weights_[map_id]
100 |         lmbda = model.lambdas_[map_id]
101 |         radii = model.radiis_[map_id] if model.is_directed else None
102 | 
103 |         # re-normalize weights
104 |         active_clusters = np.unique(model.zs_[map_id].ravel())
105 |         active_mask = np.in1d(np.arange(model.n_components), active_clusters)
106 | 
107 |         beta = beta[active_clusters]
108 |         beta /= beta.sum()
109 | 
110 |         init_w = weights[0, 0, active_clusters]
111 |         init_w /= init_w.sum()
112 | 
113 |         trans_w = np.zeros((n_time_steps, k, k), dtype=np.float64)
114 |         for t in range(1, n_time_steps):
115 |             trans_w[t] = weights[t, active_clusters][:, active_clusters]
116 |             trans_w[t] /= np.sum(trans_w[t], axis=1).reshape(-1, 1)
117 | 
118 |         # filter cluster components
119 |         mu = mu[active_clusters]
120 |         sigma = sigma[active_clusters]
121 | 
122 |         # BIC component for P(Y | X)
123 |         if model.is_directed:
124 |             loglik_k = dynamic_network_loglikelihood_directed(
125 |                             model.Y_fit_, X,
126 |                             intercept_in=intercept[0],
127 |                             intercept_out=intercept[1],
128 |                             radii=radii)
129 |             bic_k = -2 * loglik_k
130 | 
131 |             n_params = 2 + n_nodes
132 |             nondiag_indices = nondiag_indices_from_3d(model.Y_fit_)
133 |             bic_k += n_params * np.log(np.sum(model.Y_fit_[nondiag_indices]))
134 |         else:
135 |             loglik_k = dynamic_network_loglikelihood_undirected(
136 |                 model.Y_fit_, X, intercept)
137 |             bic_k = -2 * loglik_k
138 |             bic_k += np.log(0.5 * (
139 |                 np.sum(model.Y_fit_) - np.einsum('ikk', model.Y_fit_).sum()))
140 | 
141 |         # BIC component for P(X | G) = P(X | mu, sigma, w)
142 |         bic_k -= 2 * latent_marginal_loglikelihood(
143 |             X, init_w, trans_w, mu, sigma, lmbda)
144 | 
145 |         n_params = ((model.n_features + 1) * k +        # cluster params
146 |                     (k - 1) +                           # beta
147 |                     (k - 1) +                           # init_weights
148 |                     (n_time_steps - 1) * k * (k - 1))   # trans_weights
149 |         bic_k += n_params * np.log(n_nodes * n_time_steps)
150 | 
151 |         model_k = DynamicNetworkMixtureModel(init_weights=init_w,
152 |                                              trans_weights=trans_w,
153 |                                              beta=beta,
154 |                                              X=X, mu=mu, sigma=sigma,
155 |                                              lmbda=lmbda,
156 |                                              z=model.zs_[map_id],
157 |                                              intercept=intercept,
158 |                                              radii=radii)
159 |         bic.append([k, bic_k, loglik_k, map_id])
160 |         models.append(model_k)
161 | 
162 |     return np.array(bic), models, counts
163 | 


--------------------------------------------------------------------------------
/dynetlsm/sample_labels.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | 
  3 | from sklearn.utils import check_random_state
  4 | from .gaussian_likelihood_fast import compute_gaussian_likelihood
  5 | from .gaussian_likelihood_fast import spherical_normal_log_pdf
  6 | 
  7 | 
  8 | def log_normalize(probas):
  9 |     max_proba = np.max(probas)
 10 |     probas -= max_proba
 11 |     np.exp(probas, probas)
 12 |     probas /= np.sum(probas)
 13 |     return probas
 14 | 
 15 | 
 16 | def sample_categorical(probas, rng):
 17 |     cdf = np.cumsum(probas)
 18 |     u = rng.uniform(0, cdf[-1])
 19 |     return np.sum(u > cdf)
 20 | 
 21 | 
 22 | def sample_labels_gibbs(X, mu, sigma, lmbda, w0, w, random_state=None):
 23 |     random_state = check_random_state(random_state)
 24 | 
 25 |     n_time_steps, n_nodes, _ = X.shape
 26 |     n_components = sigma.shape[0]
 27 | 
 28 |     # initialize cluster statistics
 29 |     # NOTE: n[0, 0, k] stores transitions for initial distribution
 30 |     n = np.zeros((n_time_steps, n_components, n_components))
 31 |     resp = np.zeros((n_time_steps, n_nodes, n_components), dtype=np.int)
 32 |     nk = np.zeros((n_time_steps, n_components), dtype=np.int)
 33 | 
 34 |     # initialize labels
 35 |     z = np.zeros((n_time_steps, n_nodes), dtype=np.int)
 36 | 
 37 |     # store sample probabilities
 38 |     probas = np.zeros(n_components, dtype=np.float64)
 39 | 
 40 |     # sample labels for each node
 41 |     for t in range(n_time_steps):
 42 |         for i in range(n_nodes):
 43 |             # FIXME: using 1e-5 hack to avoid log of zero
 44 |             if t == 0:
 45 |                 for k in range(n_components):
 46 |                     probas[k] = (np.log(w0[k] + 1e-5) +
 47 |                                  spherical_normal_log_pdf(X[t, i],
 48 |                                                           mu[k],
 49 |                                                           sigma[k]))
 50 |             else:
 51 |                 for k in range(n_components):
 52 |                     probas[k] = (np.log(w[z[t-1, i], k] + 1e-5) +
 53 |                                  spherical_normal_log_pdf(
 54 |                                     X[t, i],
 55 |                                     lmbda * mu[k] - (1 - lmbda) * X[t-1, i],
 56 |                                     sigma[k]))
 57 | 
 58 |             # sample zt
 59 |             probas = log_normalize(probas)
 60 |             z[t, i] = sample_categorical(probas, random_state)
 61 | 
 62 |             # update statistics
 63 |             if t == 0:
 64 |                 n[0, 0, z[t, i]] += 1
 65 |             else:
 66 |                 n[t, z[t-1, i], z[t, i]] += 1
 67 |             resp[t, i, z[t, i]] = 1
 68 |             nk[t, z[t, i]] += 1
 69 | 
 70 |     return z, n, nk, resp
 71 | 
 72 | 
 73 | def sample_labels_block_lpcm(
 74 |     X, mu, sigma, lmbda, init_weights, trans_weights, random_state=None):
 75 |     random_state = check_random_state(random_state)
 76 | 
 77 |     n_time_steps, n_nodes, _ = X.shape
 78 |     n_components = sigma.shape[0]
 79 | 
 80 |     # initialize message passing variables
 81 |     bwds_msg = np.ones((n_time_steps, n_components),
 82 |                        dtype=np.float64)
 83 |     partial_marg = np.zeros((n_time_steps, n_components),
 84 |                             dtype=np.float64)
 85 | 
 86 |     # initialize cluster statistics
 87 |     # NOTE: n[0, 0, k] stores transitions for initial distribution
 88 |     n = np.zeros((n_time_steps, n_components, n_components))
 89 |     resp = np.zeros((n_time_steps, n_nodes, n_components), dtype=np.int)
 90 |     nk = np.zeros((n_time_steps, n_components), dtype=np.int)
 91 | 
 92 |     # initialize labels
 93 |     z = np.zeros((n_time_steps, n_nodes), dtype=np.int)
 94 | 
 95 |     # sample labels for each node
 96 |     for i in range(n_nodes):
 97 |         # calculate likelihood of X_t^i under all groups
 98 |         # n_time_steps x n_components
 99 |         likelihood = compute_gaussian_likelihood(X[:, i], mu, sigma,
100 |                                                  lmbda, normalize=False)
101 | 
102 |         # calculate backwards messages and partial likelihoods
103 |         # (phi_k * m_k)
104 |         for t in range(n_time_steps - 1, 0, -1):
105 |             partial_marg[t] = likelihood[t] * bwds_msg[t]
106 |             bwds_msg[t-1] = np.dot(
107 |                 trans_weights, partial_marg[t].reshape(-1, 1)).ravel()
108 | 
109 |             # helps with underflow (could also divide by maximum)
110 |             bwds_msg[t-1] /= np.sum(bwds_msg[t-1])
111 |         partial_marg[0] = likelihood[0] * bwds_msg[0]
112 | 
113 |         # sample labels forward in time
114 |         for t in range(n_time_steps):
115 |             if t == 0:
116 |                 probas = init_weights * partial_marg[0]
117 |             else:
118 |                 probas = trans_weights[z[t-1, i]] * partial_marg[t]
119 | 
120 |             # sample zt
121 |             z[t, i] = sample_categorical(probas, random_state)
122 | 
123 |             # update statistics
124 |             if t == 0:
125 |                 n[0, 0, z[t, i]] += 1
126 |             else:
127 |                 n[t, z[t-1, i], z[t, i]] += 1
128 |             resp[t, i, z[t, i]] = 1
129 |             nk[t, z[t, i]] += 1
130 | 
131 |     return z, n, nk, resp
132 | 
133 | 
134 | def sample_labels_block(X, mu, sigma, lmbda, w, random_state=None):
135 |     random_state = check_random_state(random_state)
136 | 
137 |     n_time_steps, n_nodes, _ = X.shape
138 |     n_components = sigma.shape[0]
139 | 
140 |     # initialize message passing variables
141 |     bwds_msg = np.ones((n_time_steps, n_components),
142 |                        dtype=np.float64)
143 |     partial_marg = np.zeros((n_time_steps, n_components),
144 |                             dtype=np.float64)
145 | 
146 |     # initialize cluster statistics
147 |     # NOTE: n[0, 0, k] stores transitions for initial distribution
148 |     n = np.zeros((n_time_steps, n_components, n_components))
149 |     resp = np.zeros((n_time_steps, n_nodes, n_components), dtype=np.int)
150 |     nk = np.zeros((n_time_steps, n_components), dtype=np.int)
151 | 
152 |     # initialize labels
153 |     z = np.zeros((n_time_steps, n_nodes), dtype=np.int)
154 | 
155 |     # sample labels for each node
156 |     for i in range(n_nodes):
157 |         # calculate likelihood of X_t^i under all groups
158 |         # n_time_steps x n_components
159 |         likelihood = compute_gaussian_likelihood(X[:, i], mu, sigma,
160 |                                                  lmbda, normalize=False)
161 | 
162 |         # calculate backwards messages and partial likelihoods
163 |         # (phi_k * m_k)
164 |         for t in range(n_time_steps - 1, 0, -1):
165 |             partial_marg[t] = likelihood[t] * bwds_msg[t]
166 |             bwds_msg[t-1] = np.dot(w[t], partial_marg[t].reshape(-1, 1)).ravel()
167 | 
168 |             # helps with underflow (could also divide by maximum)
169 |             bwds_msg[t-1] /= np.sum(bwds_msg[t-1])
170 |         partial_marg[0] = likelihood[0] * bwds_msg[0]
171 | 
172 |         # sample labels forward in time
173 |         for t in range(n_time_steps):
174 |             if t == 0:
175 |                 probas = w[0, 0] * partial_marg[0]
176 |             else:
177 |                 probas = w[t, z[t-1, i]] * partial_marg[t]
178 | 
179 |             # sample zt
180 |             z[t, i] = sample_categorical(probas, random_state)
181 | 
182 |             # update statistics
183 |             if t == 0:
184 |                 n[0, 0, z[t, i]] += 1
185 |             else:
186 |                 n[t, z[t-1, i], z[t, i]] += 1
187 |             resp[t, i, z[t, i]] = 1
188 |             nk[t, z[t, i]] += 1
189 | 
190 |     return z, n, nk, resp
191 | 


--------------------------------------------------------------------------------
/dynetlsm/datasets/raw_data/sampson.npy:
--------------------------------------------------------------------------------
 1 | 0.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 0.000000000000000000e+00 1.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 1.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 1.000000000000000000e+00 0.000000000000000000e+00 1.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00
 2 | 1.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 1.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 1.000000000000000000e+00 0.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00
 3 | 1.000000000000000000e+00 1.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 1.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00
 4 | 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00
 5 | 1.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 1.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 1.000000000000000000e+00 0.000000000000000000e+00 1.000000000000000000e+00 0.000000000000000000e+00 1.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00
 6 | 1.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 1.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00
 7 | 1.000000000000000000e+00 1.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 1.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 1.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 1.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00
 8 | 1.000000000000000000e+00 1.000000000000000000e+00 0.000000000000000000e+00 1.000000000000000000e+00 0.000000000000000000e+00 1.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00
 9 | 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 1.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 1.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 1.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 1.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00
10 | 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00
11 | 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 1.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 1.000000000000000000e+00 0.000000000000000000e+00 1.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00
12 | 1.000000000000000000e+00 1.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 1.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 1.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00
13 | 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 1.000000000000000000e+00 0.000000000000000000e+00 1.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 1.000000000000000000e+00
14 | 1.000000000000000000e+00 1.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 1.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00
15 | 1.000000000000000000e+00 1.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 1.000000000000000000e+00 0.000000000000000000e+00 1.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 1.000000000000000000e+00 0.000000000000000000e+00 1.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00
16 | 1.000000000000000000e+00 1.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 1.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 1.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 1.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00
17 | 0.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 1.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 1.000000000000000000e+00
18 | 1.000000000000000000e+00 1.000000000000000000e+00 1.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 1.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 1.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 1.000000000000000000e+00 0.000000000000000000e+00
19 | 


--------------------------------------------------------------------------------
/dynetlsm/case_control_likelihood.py:
--------------------------------------------------------------------------------
  1 | import abc
  2 | import numbers
  3 | import numpy as np
  4 | 
  5 | from sklearn.utils import check_random_state
  6 | 
  7 | 
  8 | class CaseControlSampler(abc.ABC):
  9 |     def __init__(self,
 10 |                  n_control=100,
 11 |                  n_resample=100,
 12 |                  random_state=None):
 13 |         self.n_control = n_control
 14 |         self.n_resample = n_resample
 15 |         self.random_state = random_state
 16 | 
 17 |         self.n_iter = 0
 18 | 
 19 |     @abc.abstractmethod
 20 |     def init(self, Y):
 21 |         pass
 22 | 
 23 |     @abc.abstractmethod
 24 |     def sample(self):
 25 |         pass
 26 | 
 27 |     def resample(self):
 28 |         if self.n_resample is not None and self.n_iter % self.n_resample == 0.:
 29 |             self.control_nodes_in_, self.control_nodes_out_ = self.sample()
 30 | 
 31 |         self.n_iter += 1
 32 | 
 33 |         return self.control_nodes_in_, self.control_nodes_out_
 34 | 
 35 | 
 36 | class DirectedCaseControlSampler(CaseControlSampler):
 37 |     def init(self, Y):
 38 |         n_time_steps, n_nodes, _ = Y.shape
 39 | 
 40 |         if isinstance(self.n_control, (numbers.Integral, np.integer)):
 41 |             self.n_control_ = self.n_control
 42 |         else:
 43 |             self.n_control_ = int(self.n_control * n_nodes)
 44 | 
 45 |         # compute in-degree / out-degree of each node
 46 |         self.degrees_ = np.zeros((n_time_steps, n_nodes, 2), dtype=np.int)
 47 |         for t in range(n_time_steps):
 48 |             self.degrees_[t, :, 0] = Y[t].sum(axis=0)  # in-degree
 49 |             self.degrees_[t, :, 1] = Y[t].sum(axis=1)  # out-degree
 50 | 
 51 |         # store indices of edges, i.e. Y_ijt = 1
 52 |         max_in_degree = int(np.max(self.degrees_[:, :, 0]))
 53 |         max_out_degree = int(np.max(self.degrees_[:, :, 1]))
 54 |         self.in_edges_ = np.zeros((n_time_steps, n_nodes, max_in_degree),
 55 |                                   dtype=np.int)
 56 |         self.out_edges_ = np.zeros((n_time_steps, n_nodes, max_out_degree),
 57 |                                    dtype=np.int)
 58 |         for t in range(n_time_steps):
 59 |             for i in range(n_nodes):
 60 |                 indices = np.where(Y[t, i, :] == 1)[0]
 61 |                 n_edges = indices.shape[0]
 62 |                 if n_edges:
 63 |                     self.out_edges_[t, i, :n_edges] = indices
 64 | 
 65 |                 indices = np.where(Y[t, :, i] == 1)[0]
 66 |                 n_edges = indices.shape[0]
 67 |                 if n_edges:
 68 |                     self.in_edges_[t, i, :n_edges] = indices
 69 | 
 70 |         self.control_nodes_in_, self.control_nodes_out_ = self.sample()
 71 |         self.n_iter += 1
 72 | 
 73 |         return self
 74 | 
 75 |     def sample(self):
 76 |         rng = check_random_state(self.random_state)
 77 |         n_time_steps, n_nodes, _ = self.out_edges_.shape
 78 | 
 79 |         control_nodes_out = np.full((n_time_steps, n_nodes, self.n_control_),
 80 |                                     -1.0, dtype=np.int)
 81 |         control_nodes_in = np.full((n_time_steps, n_nodes, self.n_control_),
 82 |                                    -1.0, dtype=np.int)
 83 |         for t in range(n_time_steps):
 84 |             for i in range(n_nodes):
 85 |                 out_degree = self.degrees_[t, i, 1]
 86 |                 n_zeros = n_nodes - out_degree - 1
 87 |                 if n_zeros < self.n_control_:
 88 |                     n_sample = n_zeros
 89 |                 else:
 90 |                     n_sample = self.n_control_
 91 | 
 92 |                 edges = set.difference(
 93 |                     set(range(n_nodes)),
 94 |                     self.out_edges_[t, i, :out_degree].tolist() + [i])
 95 |                 control_nodes_out[t, i, :n_sample] = rng.choice(list(edges),
 96 |                                                                 size=n_sample,
 97 |                                                                 replace=False)
 98 | 
 99 |                 in_degree = self.degrees_[t, i, 0]
100 |                 n_zeros = n_nodes - in_degree - 1
101 |                 if n_zeros < self.n_control_:
102 |                     n_sample = n_zeros
103 |                 else:
104 |                     n_sample = self.n_control_
105 | 
106 |                 edges = set.difference(
107 |                     set(range(n_nodes)),
108 |                     self.in_edges_[t, i, :in_degree].tolist() + [i])
109 |                 control_nodes_in[t, i, :n_sample] = rng.choice(list(edges),
110 |                                                                size=n_sample,
111 |                                                                replace=False)
112 |         return control_nodes_in, control_nodes_out
113 | 
114 | 
115 | class MissingDirectedCaseControlSampler(CaseControlSampler):
116 |     def init(self, Y):
117 |         n_time_steps, n_nodes, _ = Y.shape
118 | 
119 |         if isinstance(self.n_control, (numbers.Integral, np.integer)):
120 |             self.n_control_ = self.n_control
121 |         else:
122 |             self.n_control_ = int(self.n_control * n_nodes)
123 | 
124 |         # compute in-degree / out-degree of each node
125 |         self.degrees_ = np.zeros((n_time_steps, n_nodes, 2), dtype=np.int)
126 |         for t in range(n_time_steps):
127 |             self.degrees_[t, :, 0] = Y[t].sum(axis=0)  # in-degree
128 |             self.degrees_[t, :, 1] = Y[t].sum(axis=1)  # out-degree
129 | 
130 |         # store indices of edges, i.e. Y_ijt = 1
131 |         max_in_degree = int(np.max(self.degrees_[:, :, 0]))
132 |         max_out_degree = int(np.max(self.degrees_[:, :, 1]))
133 |         self.in_edges_ = np.zeros((n_time_steps, n_nodes, max_in_degree),
134 |                                   dtype=np.int)
135 |         self.out_edges_ = np.zeros((n_time_steps, n_nodes, max_out_degree),
136 |                                    dtype=np.int)
137 |         for t in range(n_time_steps):
138 |             for i in range(n_nodes):
139 |                 indices = np.where(Y[t, i, :] == 1)[0]
140 |                 n_edges = indices.shape[0]
141 |                 if n_edges:
142 |                     self.out_edges_[t, i, :n_edges] = indices
143 | 
144 |                 indices = np.where(Y[t, :, i] == 1)[0]
145 |                 n_edges = indices.shape[0]
146 |                 if n_edges:
147 |                     self.in_edges_[t, i, :n_edges] = indices
148 | 
149 |         # determine edges (Y_ijt = 1 or Y_jit = 1 for at least one time step)
150 |         self.edge_list_ = []
151 |         for i in range(n_nodes):
152 |             mask = np.logical_or(Y[:, i, :] == 1, Y[:, :, i] == 1)
153 |             mask = mask.sum(axis=0)
154 |             self.edge_list_.append(np.unique(np.where(mask > 0)[0]))
155 | 
156 |         self.control_nodes_ = self.sample()
157 |         self.n_iter += 1
158 | 
159 |         return self
160 | 
161 |     def sample(self):
162 |         rng = check_random_state(self.random_state)
163 |         n_nodes = len(self.edge_list_)
164 | 
165 |         # TODO: n_control_samples can be a fraction of total number of nodes
166 | 
167 |         control_nodes = np.zeros((n_nodes, self.n_control_), dtype=np.int)
168 |         for i in range(n_nodes):
169 |             # stratify sample based one connections vs. non-connections
170 |             n_connected = int(self.edge_list_[i].shape[0] / n_nodes *
171 |                               self.n_control_)
172 |             if self.edge_list_[i].shape[0] > 0:
173 |                 n_connected = max(n_connected, 1)
174 | 
175 |             control_nodes[i, :n_connected] = rng.choice(self.edge_list_[i],
176 |                                                         size=n_connected,
177 |                                                         replace=False)
178 | 
179 |             edges = set.difference(
180 |                 set(range(n_nodes)), self.edge_list_[i].tolist() + [i])
181 |             n_remaining = self.n_control_ - n_connected
182 |             control_nodes[i, n_connected:] = rng.choice(list(edges),
183 |                                                         size=n_remaining,
184 |                                                         replace=False)
185 | 
186 |         return control_nodes
187 | 


--------------------------------------------------------------------------------
/examples/inhomogeneous_simulation.py:
--------------------------------------------------------------------------------
  1 | """
  2 | Runs the time-inhomogeneous simulations found in the
  3 | paper 'A Bayesian nonparametric latent space approach to modeling evolving
  4 | communities in dynamic networks' by Joshua Loyal and Yuguo Chen
  5 | """
  6 | import glob
  7 | import os
  8 | import plac
  9 | 
 10 | import pandas as pd
 11 | import numpy as np
 12 | import matplotlib.pyplot as plt
 13 | import seaborn as sns
 14 | 
 15 | from sklearn.utils import check_random_state
 16 | from sklearn.metrics import adjusted_rand_score, roc_auc_score
 17 | 
 18 | from dynetlsm import DynamicNetworkHDPLPCM, DynamicNetworkLPCM
 19 | from dynetlsm.datasets import inhomogeneous_simulation
 20 | from dynetlsm.model_selection.approx_bic import calculate_cluster_counts
 21 | from dynetlsm.model_selection import minimize_posterior_expected_vi
 22 | from dynetlsm.model_selection import train_test_split
 23 | from dynetlsm.metrics import variation_of_information, out_of_sample_auc
 24 | from dynetlsm.network_statistics import density, modularity
 25 | 
 26 | 
 27 | # NOTE: This is meant to be run in parallel on a computer cluster
 28 | n_reps = 50
 29 | out_dir = 'results'
 30 | 
 31 | # choose between easy and hard
 32 | sim_type = 'hard'
 33 | #sim_type = 'easy'
 34 | 
 35 | # Set to true for sensitivity analysis
 36 | sample_hyperparameters = False
 37 | 
 38 | 
 39 | def counts_per_time_step(z):
 40 |     n_time_steps = z.shape[0]
 41 |     group_counts = np.zeros(n_time_steps, dtype=np.int)
 42 |     for t in range(n_time_steps):
 43 |         group_counts[t] = np.unique(z[t]).shape[0]
 44 | 
 45 |     return group_counts
 46 | 
 47 | 
 48 | def posterior_per_time_step(model):
 49 |     n_time_steps = model.Y_fit_.shape[0]
 50 |     probas = np.zeros((n_time_steps, model.n_components + 1))
 51 |     for t in range(n_time_steps):
 52 |         freq = model.posterior_group_counts_[t]
 53 |         index = model.posterior_group_ids_[t]
 54 |         probas[t, index] = freq / freq.sum()
 55 | 
 56 |     return probas
 57 | 
 58 | 
 59 | def benchmark_single(n_iter=10000, burn=5000, tune=1000,
 60 |                      outfile_name='benchmark',
 61 |                      sim_type='hard',
 62 |                      random_state=None):
 63 |     random_state = check_random_state(random_state)
 64 | 
 65 |     Y, X, z, intercept, _, _, _, _ = inhomogeneous_simulation(
 66 |         n_nodes=120, random_state=random_state, simulation_type=sim_type)
 67 | 
 68 |     Y, Y_ahead = Y[:9], Y[9]
 69 |     Y_obs, test_indices = train_test_split(
 70 |         Y, test_size=0.1, random_state=random_state)
 71 | 
 72 |     # fit HDP-LPCM
 73 |     if sample_hyperparameters:
 74 |         lambda_prior = random_state.uniform(low=0.5)
 75 |         alpha_kappa_rate = random_state.uniform(low=0.001, high=1)
 76 |         gamma_prior_rate = random_state.uniform(low=0.001, high=1)
 77 |         alpha_init_rate = random_state.uniform(low=0.001, high=1)
 78 |         model = DynamicNetworkHDPLPCM(n_iter=n_iter,
 79 |                                       burn=burn,
 80 |                                       tune=tune,
 81 |                                       tune_interval=1000,
 82 |                                       is_directed=False,
 83 |                                       selection_type='vi',
 84 |                                       lambda_prior=lambda_prior,
 85 |                                       lambda_variance_prior=1,
 86 |                                       alpha_kappa_shape=1,
 87 |                                       alpha_kappa_rate=alpha_kappa_rate,
 88 |                                       gamma_prior_rate=gamma_prior_rate,
 89 |                                       alpha_init_rate=alpha_init_rate,
 90 |                                       n_components=10,
 91 |                                       random_state=random_state).fit(Y_obs)
 92 |     else:
 93 |         model = DynamicNetworkHDPLPCM(n_iter=n_iter,
 94 |                                       burn=burn,
 95 |                                       tune=tune,
 96 |                                       tune_interval=1000,
 97 |                                       is_directed=False,
 98 |                                       selection_type='vi',
 99 |                                       n_components=10,
100 |                                       random_state=random_state).fit(Y_obs)
101 | 
102 |     # MAP: number of clusters per time point
103 |     map_counts = counts_per_time_step(model.z_)
104 | 
105 |     # Posterior group count probabilities
106 |     probas = posterior_per_time_step(model)
107 |     results = pd.DataFrame(probas)
108 | 
109 |     # create dataframe of results
110 |     results['map_counts'] = map_counts
111 | 
112 |     # goodness-of-fit metrics for MAP
113 |     results['insample_auc'] = model.auc_
114 |     results['outsample_auc'] = out_of_sample_auc(
115 |         Y, model.missings_, test_indices)
116 | 
117 |     indices = np.tril_indices(Y.shape[1], k=-1)
118 | 
119 |     pred_probas = model.forecast_probas_marginalized_[indices]
120 |     results['onestep_auc'] = roc_auc_score(
121 |         Y_ahead[indices], pred_probas)
122 | 
123 |     # Variation of Information
124 |     results['vi'] = variation_of_information(
125 |         z[:9].ravel(), model.z_.ravel())
126 |     vi = 0.
127 |     for t in range(Y.shape[0]):
128 |         vi_t = variation_of_information(z[t], model.z_[t])
129 |         results['vi_{}'.format(t)] = vi_t
130 |         vi += vi_t
131 |     results['vi_avg'] = vi / Y.shape[0]
132 | 
133 |     # adjusted rand index
134 |     results['rand_index'] = adjusted_rand_score(
135 |         z[:9].ravel(), model.z_.ravel())
136 |     adj_rand = 0.
137 |     for t in range(Y.shape[0]):
138 |         adj_t = adjusted_rand_score(z[t], model.z_[t])
139 |         results['rand_{}'.format(t)] = adj_t
140 |         adj_rand += adj_t
141 |     results['rand_avg'] = adj_rand / Y.shape[0]
142 | 
143 |     results.to_csv(outfile_name, index=False)
144 | 
145 | 
146 | # create a directory to store the results
147 | if not os.path.exists(out_dir):
148 |     os.mkdir(out_dir)
149 | 
150 | 
151 | for i in range(n_reps):
152 |     benchmark_single(
153 |         n_iter=35000, burn=10000, tune=5000, random_state=i,
154 |         sim_type=sim_type, sample_hyperparameters=sample_hyperparameters,
155 |         outfile_name=os.path.join(
156 |             out_dir, 'benchmark_{}.csv'.format(i)))
157 | 
158 | 
159 | # calculate median metric values
160 | n_time_steps = 9
161 | n_groups = 10
162 | 
163 | n_files = len(glob.glob('results/*'))
164 | stat_names = ['insample_auc', 'outsample_auc', 'onestep_auc', 'vi',
165 |               'rand_index', 'vi_avg', 'rand_avg']
166 | data = np.zeros((n_files, len(stat_names)))
167 | for i, file_name in enumerate(glob.glob('results/*')):
168 |     df = pd.read_csv(file_name)
169 |     data[i] = df.loc[0, stat_names].values
170 | 
171 | data = pd.DataFrame(data, columns=stat_names)
172 | print('Median Metrics:')
173 | print(data.median(axis=0))
174 | print('Metrics SD:')
175 | print(data.std(axis=0))
176 | 
177 | # plot posterior boxplots
178 | data = {'probas': [], 'cluster_number': [], 't': []}
179 | for file_name in glob.glob('results/*'):
180 |     df = pd.read_csv(file_name)
181 |     for t in range(n_time_steps):
182 |         for i in range(1, n_groups):
183 |             data['probas'].append(df.iloc[t, i])
184 |             data['cluster_number'].append(i)
185 |             data['t'].append(t + 1)
186 | 
187 | data = pd.DataFrame(data)
188 | 
189 | plt.rc('font', family='sans-serif', size=16)
190 | g = sns.catplot(x='cluster_number', y='probas', col='t',
191 |                 col_wrap=3, kind='box', data=data)
192 | 
193 | for ax in g.axes:
194 |     ax.set_ylabel('posterior probability')
195 |     ax.set_xlabel('# of groups')
196 | 
197 | g.fig.tight_layout()
198 | 
199 | plt.savefig('cluster_posterior.png', dpi=300)
200 | 
201 | # clear figure
202 | plt.clf()
203 | 
204 | # plot selected number of groups for each simulation
205 | data = np.zeros((n_time_steps, n_groups), dtype=np.int)
206 | for sim_id, file_name in enumerate(glob.glob('results/*')):
207 |     df = pd.read_csv(file_name)
208 |     for t in range(n_time_steps):
209 |         data[t, df.iloc[t, n_groups + 1] - 1] +=1
210 | 
211 | data = pd.DataFrame(data, columns=range(1, n_groups + 1), index=range(1, n_time_steps + 1))
212 | mask = data.values == 0
213 | 
214 | g = sns.heatmap(data, annot=True, cmap="Blues", cbar=False, mask=mask)
215 | g.set_xlabel('# of groups')
216 | g.set_ylabel('t')
217 | plt.savefig('num_clusters.png', dpi=300)
218 | 


--------------------------------------------------------------------------------
/dynetlsm/sample_latent_positions.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | 
  3 | from scipy.sparse import csgraph
  4 | from sklearn.utils import check_random_state
  5 | 
  6 | from .network_likelihoods import (
  7 |     partial_loglikelihood,
  8 |     directed_partial_loglikelihood,
  9 |     approx_directed_partial_loglikelihood
 10 | )
 11 | 
 12 | 
 13 | def sample_control_nodes(edge_list, n_samples=100, random_state=None):
 14 |     rng = check_random_state(random_state)
 15 |     n_nodes = len(edge_list)
 16 | 
 17 |     # TODO: n_samples can be a fraction of total number of nodes
 18 | 
 19 |     control_nodes = np.zeros((n_nodes, n_samples), dtype=np.int)
 20 |     for i in range(n_nodes):
 21 |         # stratify sample based one connections vs. non-connections
 22 |         n_connected = int(edge_list[i].shape[0] / n_nodes * n_samples)
 23 |         if edge_list[i].shape[0] > 0:
 24 |             n_connected = max(n_connected, 1)
 25 | 
 26 |         control_nodes[i, :n_connected] = rng.choice(edge_list[i],
 27 |                                                     size=n_connected,
 28 |                                                     replace=False)
 29 | 
 30 |         edges = set.difference(
 31 |             set(range(n_nodes)), edge_list[i].tolist() + [i])
 32 |         control_nodes[i, n_connected:] = rng.choice(
 33 |             list(edges), size=n_samples - n_connected, replace=False)
 34 | 
 35 |     return control_nodes
 36 | 
 37 | 
 38 | def sample_control_edges(Y, n_samples=100, random_state=None):
 39 |     n_time_steps, n_nodes, _ = Y.shape
 40 | 
 41 |     n_edges = int(0.5 * n_nodes * (n_nodes))
 42 |     edge_list = np.zeros((n_time_steps, n_edges, 2))
 43 |     for t in range(n_time_steps):
 44 |         triu_indices = np.triu_indices_from(Y[t])
 45 |         edge_list[t, 0] = triu_indices[0]
 46 |         edge_list[t, 1] = triu_indices[1]
 47 | 
 48 |         edges = np.where(Y[t][triu_indices] == 1)[0]
 49 |         non_edges = np.where(Y[t][triu_indices] == 0)[0]
 50 | 
 51 |     return edge_list, edges, non_edges
 52 | 
 53 | 
 54 | def case_control_init(Y, is_directed=False, n_samples=100):
 55 |     n_time_steps, n_nodes, _ = Y.shape
 56 | 
 57 |     # compute in-degree / out-degree of each node
 58 |     degree = np.zeros((n_time_steps, n_nodes, 2), dtype=np.int)
 59 |     for t in range(n_time_steps):
 60 |         degree[t, :, 0] = Y[t].sum(axis=0)  # in-degree
 61 |         degree[t, :, 1] = Y[t].sum(axis=1)  # out-degree
 62 | 
 63 |     # store indices of edges, i.e. Y_ijt = 1
 64 |     max_in_degree = int(np.max(degree[:, :, 0]))
 65 |     max_out_degree = int(np.max(degree[:, :, 1]))
 66 |     in_edges = np.zeros((n_time_steps, n_nodes, max_in_degree), dtype=np.int)
 67 |     out_edges = np.zeros((n_time_steps, n_nodes, max_out_degree), dtype=np.int)
 68 |     for t in range(n_time_steps):
 69 |         for i in range(n_nodes):
 70 |             indices = np.where(Y[t, i, :] == 1)[0]
 71 |             n_edges = indices.shape[0]
 72 |             if n_edges:
 73 |                 out_edges[t, i, :n_edges] = indices
 74 | 
 75 |             indices = np.where(Y[t, :, i] == 1)[0]
 76 |             n_edges = indices.shape[0]
 77 |             if n_edges:
 78 |                 in_edges[t, i, :n_edges] = indices
 79 | 
 80 |     # determine edges (Y_ijt = 1 or Y_jit = 1 for at least one time step)
 81 |     edge_list = []
 82 |     for i in range(n_nodes):
 83 |         mask = (np.logical_or(Y[:, i, :] == 1, Y[:, :, i] == 1)).astype(np.int)
 84 |         mask = mask.sum(axis=0)
 85 |         edge_list.append(np.unique(np.where(mask > 0)[0]))
 86 | 
 87 |     if is_directed:
 88 |         return degree, in_edges, out_edges, edge_list
 89 |     return degree[:, :, 0], in_edges, edge_list
 90 | 
 91 | 
 92 | def sample_latent_positions(Y, X, intercept, tau_sq, sigma_sq, samplers,
 93 |                             radii=None, is_directed=False, squared=False,
 94 |                             case_control_sampler=None, random_state=None):
 95 |     rng = check_random_state(random_state)
 96 |     n_time_steps, n_nodes, _ = Y.shape
 97 | 
 98 |     for t in range(n_time_steps):
 99 |         for j in range(n_nodes):
100 |             def logp(x):
101 |                 X[t, j] = x
102 |                 if is_directed:
103 |                     if case_control_sampler is not None:
104 |                         loglik = approx_directed_partial_loglikelihood(
105 |                             X[t],
106 |                             radii=radii,
107 |                             in_edges=case_control_sampler.in_edges_[t],
108 |                             out_edges=case_control_sampler.out_edges_[t],
109 |                             degree=case_control_sampler.degrees_[t],
110 |                             control_nodes_in=(
111 |                                 case_control_sampler.control_nodes_in_[t]),
112 |                             control_nodes_out=(
113 |                                 case_control_sampler.control_nodes_out_[t]),
114 |                             intercept_in=intercept[0],
115 |                             intercept_out=intercept[1],
116 |                             node_id=j,
117 |                             squared=squared)
118 |                     else:
119 |                         loglik = directed_partial_loglikelihood(
120 |                                     Y[t], X[t],
121 |                                     radii=radii,
122 |                                     intercept_in=intercept[0],
123 |                                     intercept_out=intercept[1],
124 |                                     node_id=j,
125 |                                     squared=squared)
126 |                 else:
127 |                     loglik = partial_loglikelihood(Y[t], X[t],
128 |                                                    intercept, j,
129 |                                                    squared=squared)
130 | 
131 |                 # prior
132 |                 if t == 0:
133 |                     loglik -= 0.5 * np.sum(x * x) / tau_sq
134 |                 else:
135 |                     diff = x - X[t-1, j]
136 |                     loglik -= 0.5 * np.sum(diff * diff) / sigma_sq
137 | 
138 |                 if t < (n_time_steps - 1):
139 |                     diff = X[t+1, j] - x
140 |                     loglik -= 0.5 * np.sum(diff * diff) / sigma_sq
141 | 
142 |                 return loglik
143 | 
144 |             X[t, j] = samplers[t][j].step(X[t, j].copy(), logp, rng)
145 | 
146 |     return X
147 | 
148 | 
149 | def sample_latent_positions_mixture(Y, X, intercept, mu, sigma, lmbda, z,
150 |                                     samplers, radii=None, is_directed=False,
151 |                                     squared=None, case_control_sampler=None,
152 |                                     random_state=None):
153 |     rng = check_random_state(random_state)
154 |     n_time_steps, n_nodes, _ = Y.shape
155 | 
156 |     for t in range(n_time_steps):
157 |         for j in range(n_nodes):
158 |             def logp(x):
159 |                 X[t, j] = x
160 |                 if is_directed:
161 |                     if case_control_sampler:
162 |                         loglik = approx_directed_partial_loglikelihood(
163 |                             X[t],
164 |                             radii=radii,
165 |                             in_edges=case_control_sampler.in_edges_[t],
166 |                             out_edges=case_control_sampler.out_edges_[t],
167 |                             degree=case_control_sampler.degrees_[t],
168 |                             control_nodes_in=(
169 |                                 case_control_sampler.control_nodes_in_[t]),
170 |                             control_nodes_out=(
171 |                                 case_control_sampler.control_nodes_out_[t]),
172 |                             intercept_in=intercept[0],
173 |                             intercept_out=intercept[1],
174 |                             node_id=j,
175 |                             squared=squared)
176 |                     else:
177 |                         loglik = directed_partial_loglikelihood(
178 |                                     Y[t], X[t],
179 |                                     radii=radii,
180 |                                     intercept_in=intercept[0],
181 |                                     intercept_out=intercept[1],
182 |                                     node_id=j)
183 |                 else:
184 |                     loglik = partial_loglikelihood(Y[t], X[t],
185 |                                                    intercept, j)
186 | 
187 |                 # prior P(X_t | X_{t-1})
188 |                 if t == 0:
189 |                     diff = x - mu[z[t, j]]
190 |                     loglik -= 0.5 * np.sum(diff * diff) / sigma[z[t, j]]
191 |                 else:
192 |                     diff = x - (1 - lmbda) * X[t-1, j] - lmbda * mu[z[t, j]]
193 |                     loglik -= 0.5 * np.sum(diff * diff) / sigma[z[t, j]]
194 | 
195 |                 # prior P(X_{t+1} | X_t)
196 |                 if t < (n_time_steps - 1):
197 |                     diff = (X[t+1, j] - (1 - lmbda) * x -
198 |                             lmbda * mu[z[t+1, j]])
199 |                     loglik -= 0.5 * np.sum(diff * diff) / sigma[z[t+1, j]]
200 | 
201 |                 return loglik
202 | 
203 |             X[t, j] = samplers[t][j].step(X[t, j].copy(),
204 |                                           logp, rng)
205 | 
206 |     return X
207 | 


--------------------------------------------------------------------------------
/examples/homogeneous_simulation.py:
--------------------------------------------------------------------------------
  1 | """
  2 | Runs the time-homogeneous simulations found in the
  3 | paper 'A Bayesian nonparametric latent space approach to modeling evolving
  4 | communities in dynamic networks' by Joshua Loyal and Yuguo Chen
  5 | """
  6 | import glob
  7 | import os
  8 | import plac
  9 | 
 10 | import pandas as pd
 11 | import numpy as np
 12 | import matplotlib.pyplot as plt
 13 | import seaborn as sns
 14 | 
 15 | from sklearn.utils import check_random_state
 16 | from sklearn.metrics import adjusted_rand_score, roc_auc_score
 17 | 
 18 | from dynetlsm import DynamicNetworkHDPLPCM, DynamicNetworkLPCM
 19 | from dynetlsm.datasets import synthetic_static_community_dynamic_network
 20 | from dynetlsm.datasets import homogeneous_simulation
 21 | from dynetlsm.model_selection.approx_bic import calculate_cluster_counts
 22 | from dynetlsm.model_selection import minimize_posterior_expected_vi
 23 | from dynetlsm.model_selection import train_test_split
 24 | from dynetlsm.metrics import variation_of_information, out_of_sample_auc
 25 | from dynetlsm.network_statistics import density, modularity
 26 | 
 27 | 
 28 | # NOTE: This is meant to be run in parallel on a computer cluster
 29 | n_reps = 50
 30 | out_dir = 'results'
 31 | 
 32 | # Choose between hdp_lpcm and lpcm
 33 | model_type = 'hdp_lpcm'
 34 | # model_type = 'lpcm'
 35 | 
 36 | # Set to true for sensitivity analysis
 37 | sample_hyperparameters = False
 38 | 
 39 | # Choose between vi and map
 40 | selection_type = 'vi'
 41 | # selection_type = 'map'
 42 | 
 43 | # choose between easy and hard
 44 | sim_type = 'hard'
 45 | #sim_type = 'easy'
 46 | 
 47 | 
 48 | def counts_per_time_step(z):
 49 |     n_time_steps = z.shape[0]
 50 |     group_counts = np.zeros(n_time_steps, dtype=np.int)
 51 |     for t in range(n_time_steps):
 52 |         group_counts[t] = np.unique(z[t]).shape[0]
 53 | 
 54 |     return group_counts
 55 | 
 56 | def posterior_per_time_step(model):
 57 |     n_time_steps = model.Y_fit_.shape[0]
 58 |     probas = np.zeros((n_time_steps, model.n_components + 1))
 59 |     for t in range(n_time_steps):
 60 |         freq = model.posterior_group_counts_[t]
 61 |         index = model.posterior_group_ids_[t]
 62 |         probas[t, index] = freq / freq.sum()
 63 | 
 64 |     return probas
 65 | 
 66 | 
 67 | def benchmark_single(n_iter=10000, burn=5000, tune=1000,
 68 |                      outfile_name='benchmark',
 69 |                      model_type='hdp_lpcm',
 70 |                      selection_type='map',
 71 |                      sim_type='hard',
 72 |                      sample_hyperparameters=False,
 73 |                      random_state=None):
 74 |     random_state = check_random_state(random_state)
 75 | 
 76 |     Y, X, z, intercept, _, _, _, _ = homogeneous_simulation(
 77 |         n_time_steps=7, n_nodes=120,
 78 |         simulation_type=sim_type,
 79 |         random_state=random_state)
 80 | 
 81 |     Y, Y_ahead = Y[:6], Y[6]
 82 |     Y_obs, test_indices = train_test_split(
 83 |         Y, test_size=0.1, random_state=random_state)
 84 | 
 85 |     # fit HDP-LPCM
 86 |     if model_type == 'hdp_lpcm':
 87 |         if sample_hyperparameters:
 88 |             lambda_prior = random_state.uniform(low=0.5)
 89 |             alpha_kappa_rate = random_state.uniform(low=0.001, high=1)
 90 |             gamma_prior_rate = random_state.uniform(low=0.001, high=1)
 91 |             alpha_init_rate = random_state.uniform(low=0.001, high=1)
 92 |             model = DynamicNetworkHDPLPCM(n_iter=n_iter,
 93 |                                           burn=burn,
 94 |                                           tune=tune,
 95 |                                           tune_interval=1000,
 96 |                                           is_directed=False,
 97 |                                           selection_type='vi',
 98 |                                           lambda_prior=lambda_prior,
 99 |                                           lambda_variance_prior=1,
100 |                                           alpha_kappa_shape=1,
101 |                                           alpha_kappa_rate=alpha_kappa_rate,
102 |                                           gamma_prior_rate=gamma_prior_rate,
103 |                                           alpha_init_rate=alpha_init_rate,
104 |                                           n_components=10,
105 |                                           random_state=random_state).fit(Y_obs)
106 |         else:
107 |             model = DynamicNetworkHDPLPCM(n_iter=n_iter,
108 |                                           burn=burn,
109 |                                           tune=tune,
110 |                                           tune_interval=1000,
111 |                                           is_directed=False,
112 |                                           selection_type='vi',
113 |                                           n_components=10,
114 |                                           random_state=random_state).fit(Y_obs)
115 |     else:
116 |         model = DynamicNetworkLPCM(n_iter=n_iter,
117 |                                    burn=burn,
118 |                                    tune=tune,
119 |                                    tune_interval=1000,
120 |                                    is_directed=False,
121 |                                    selection_type=selection_type,
122 |                                    n_components=6,
123 |                                    random_state=random_state).fit(Y_obs)
124 | 
125 |     # MAP: number of clusters per time point
126 |     map_counts = counts_per_time_step(model.z_)
127 | 
128 |     # Posterior group count probabilities
129 |     if model_type == 'hdp_lpcm':
130 |         probas = posterior_per_time_step(model)
131 |         results = pd.DataFrame(probas)
132 |     else:
133 |         results = pd.DataFrame()
134 | 
135 |     # create dataframe of results
136 |     results['map_counts'] = map_counts
137 | 
138 |     # goodness-of-fit metrics for MAP
139 |     results['insample_auc'] = model.auc_
140 |     results['outsample_auc'] = out_of_sample_auc(
141 |         Y, model.missings_, test_indices)
142 | 
143 |     # one-step ahead predictions
144 |     indices = np.tril_indices(Y.shape[1], k=-1)
145 | 
146 |     pred_probas = model.forecast_probas_marginalized_[indices]
147 |     results['onestep_auc'] = roc_auc_score(
148 |         Y_ahead[indices], pred_probas)
149 | 
150 |     # Variation of Information
151 |     results['vi'] = variation_of_information(
152 |         z[:6].ravel(), model.z_[:6].ravel())
153 |     vi = 0.
154 |     for t in range(Y.shape[0]):
155 |         vi_t = variation_of_information(z[t], model.z_[t])
156 |         results['vi_{}'.format(t)] = vi_t
157 |         vi += vi_t
158 |     results['vi_avg'] = vi / Y.shape[0]
159 | 
160 | 
161 |     # adjusted rand index
162 |     results['rand_index'] = adjusted_rand_score(
163 |         z[:6].ravel(), model.z_[:6].ravel())
164 |     adj_rand = 0.
165 |     for t in range(Y.shape[0]):
166 |         adj_t = adjusted_rand_score(z[t], model.z_[t])
167 |         results['rand_{}'.format(t)] = adj_t
168 |         adj_rand += adj_t
169 |     results['rand_avg'] = adj_rand / Y.shape[0]
170 | 
171 |     results.to_csv(outfile_name, index=False)
172 | 
173 | 
174 | # create a directory to store the results
175 | if not os.path.exists('results'):
176 |     os.mkdir(out_dir)
177 | 
178 | for i in range(n_reps):
179 |     benchmark_single(
180 |         n_iter=35000, burn=10000, tune=5000, random_state=i,
181 |         model_type=model_type, selection_type=selection_type,
182 |         sim_type=sim_type, sample_hyperparameters=sample_hyperparameters,
183 |         outfile_name=os.path.join(
184 |             out_dir, 'benchmark_{}.csv'.format(i)))
185 | 
186 | 
187 | # calculate median metric values
188 | n_time_steps = 6
189 | 
190 | if model_type == 'lpcm':
191 |     n_groups = 6
192 | else:
193 |     n_groups = 10
194 | 
195 | n_files = len(glob.glob('results/*'))
196 | stat_names = ['insample_auc', 'outsample_auc', 'onestep_auc', 'vi',
197 |               'rand_index', 'vi_avg', 'rand_avg']
198 | data = np.zeros((n_files, len(stat_names)))
199 | for i, file_name in enumerate(glob.glob('results/*')):
200 |     df = pd.read_csv(file_name)
201 |     data[i] = df.loc[0, stat_names].values
202 | 
203 | data = pd.DataFrame(data, columns=stat_names)
204 | print('Median Metrics:')
205 | print(data.median(axis=0))
206 | print('Metrics SD:')
207 | print(data.std(axis=0))
208 | 
209 | # plot posterior boxplots
210 | data = {'probas': [], 'cluster_number': [], 't': []}
211 | for file_name in glob.glob('results/*'):
212 |     df = pd.read_csv(file_name)
213 |     for t in range(n_time_steps):
214 |         for i in range(1, n_groups):
215 |             data['probas'].append(df.iloc[t, i])
216 |             data['cluster_number'].append(i)
217 |             data['t'].append(t + 1)
218 | 
219 | data = pd.DataFrame(data)
220 | 
221 | plt.rc('font', family='sans-serif', size=16)
222 | g = sns.catplot(x='cluster_number', y='probas', col='t',
223 |                 col_wrap=3, kind='box', data=data)
224 | 
225 | for ax in g.axes:
226 |     ax.set_ylabel('posterior probability')
227 |     ax.set_xlabel('# of groups')
228 | 
229 | g.fig.tight_layout()
230 | 
231 | plt.savefig('cluster_posterior.png', dpi=300)
232 | 
233 | # clear figure
234 | plt.clf()
235 | 
236 | # plot selected number of groups for each simulation
237 | data = np.zeros((n_time_steps, n_groups), dtype=np.int)
238 | for sim_id, file_name in enumerate(glob.glob('results/*')):
239 |     df = pd.read_csv(file_name)
240 |     for t in range(n_time_steps):
241 |         if model_type == 'lpcm':
242 |             data[t, df.iloc[t, 0] - 1] +=1
243 |         else:
244 |             data[t, df.iloc[t, n_groups + 1] - 1] +=1
245 | 
246 | data = pd.DataFrame(data, columns=range(1, n_groups + 1), index=range(1, n_time_steps + 1))
247 | mask = data.values == 0
248 | 
249 | g = sns.heatmap(data, annot=True, cmap="Blues", cbar=False, mask=mask)
250 | g.set_xlabel('# of groups')
251 | g.set_ylabel('t')
252 | plt.savefig('num_clusters.png', dpi=300)
253 | 


--------------------------------------------------------------------------------
/dynetlsm/datasets/raw_data/got/got-s7-edges.csv:
--------------------------------------------------------------------------------
  1 | Source,Target,Weight,Season
  2 | DAENERYS,TYRION,179,7
  3 | DAENERYS,JON,178,7
  4 | CERSEI,JAIME,172,7
  5 | ARYA,SANSA,156,7
  6 | JON,TYRION,107,7
  7 | LITTLEFINGER,SANSA,107,7
  8 | DAVOS,JON,92,7
  9 | BRONN,JAIME,85,7
 10 | JON,SANSA,79,7
 11 | MARWYN,SAM,75,7
 12 | CERSEI,TYRION,74,7
 13 | DAENERYS,VARYS,65,7
 14 | BERIC,HOUND,63,7
 15 | JON,JORAH,59,7
 16 | HOUND,THOROS,57,7
 17 | HOUND,TORMUND,57,7
 18 | DAVOS,GENDRY,55,7
 19 | JON,TORMUND,50,7
 20 | DAVOS,TYRION,46,7
 21 | TYRION,VARYS,45,7
 22 | BRAN,SANSA,44,7
 23 | CERSEI,QYBURN,44,7
 24 | BERIC,JON,41,7
 25 | CERSEI,EURON,38,7
 26 | CERSEI,TYCHO,37,7
 27 | JAIME,OLENNA,37,7
 28 | JON,MISSANDEI,36,7
 29 | DAENERYS,MISSANDEI,35,7
 30 | JON,THEON,35,7
 31 | DAENERYS,JORAH,34,7
 32 | JAIME,TYRION,34,7
 33 | ARYA,HOT_PIE,33,7
 34 | CERSEI,DAENERYS,33,7
 35 | CERSEI,MOUNTAIN,33,7
 36 | DAENERYS,DAVOS,33,7
 37 | JAIME,RANDYLL,33,7
 38 | DAVOS,MISSANDEI,31,7
 39 | GREY_WORM,MISSANDEI,31,7
 40 | MISSANDEI,TYRION,31,7
 41 | BERIC,THOROS,30,7
 42 | GENDRY,JON,30,7
 43 | BRIENNE,SANSA,29,7
 44 | BERIC,TORMUND,27,7
 45 | HOUND,JON,27,7
 46 | JORAH,SAM,27,7
 47 | EURON,JAIME,26,7
 48 | HOUND,JORAH,26,7
 49 | BRIENNE,PODRICK,25,7
 50 | DICKON,RANDYLL,25,7
 51 | ELLARIA,YARA,25,7
 52 | JAIME,QYBURN,25,7
 53 | ARYA,LITTLEFINGER,24,7
 54 | CERSEI,JON,24,7
 55 | DICKON,JAIME,24,7
 56 | HARRAG,THEON,24,7
 57 | JORAH,TORMUND,24,7
 58 | THEON,YARA,24,7
 59 | JAIME,MOUNTAIN,23,7
 60 | SANSA,YOHN_ROYCE,23,7
 61 | ARYA,BRAN,21,7
 62 | BRAN,MEERA,21,7
 63 | BRAN,SAM,21,7
 64 | CERSEI,ELLARIA,21,7
 65 | ELLARIA,TYENE,21,7
 66 | JON,NED,21,7
 67 | MELISANDRE,VARYS,21,7
 68 | MISSANDEI,VARYS,21,7
 69 | DAENERYS,JAIME,20,7
 70 | GILLY,SAM,20,7
 71 | JON,LITTLEFINGER,20,7
 72 | ARYA,BRIENNE,19,7
 73 | BRAN,LITTLEFINGER,19,7
 74 | BRONN,TYRION,19,7
 75 | EURON,YARA,19,7
 76 | JORAH,THOROS,19,7
 77 | BERIC,JORAH,18,7
 78 | BRIENNE,HOUND,18,7
 79 | EURON,THEON,18,7
 80 | GENDRY,HOUND,18,7
 81 | GREY_WORM,TYRION,18,7
 82 | JON,VARYS,18,7
 83 | JON,NIGHT_KING,17,7
 84 | JORAH,TYRION,17,7
 85 | SANSA,MAESTER_WOLKAN,17,7
 86 | DAENERYS,OLENNA,16,7
 87 | JORAH,MARWYN,16,7
 88 | CERSEI,TYENE,15,7
 89 | CERSEI,TYWIN,15,7
 90 | GENDRY,TORMUND,15,7
 91 | BRIENNE,LITTLEFINGER,14,7
 92 | DAENERYS,RANDYLL,14,7
 93 | DAVOS,JORAH,14,7
 94 | GENDRY,THOROS,14,7
 95 | GREY_WORM,VARYS,14,7
 96 | MOUNTAIN,QYBURN,14,7
 97 | NED,SANSA,14,7
 98 | DAENERYS,GREY_WORM,13,7
 99 | DAENERYS,MELISANDRE,13,7
100 | GENDRY,JORAH,13,7
101 | DAVOS,THEON,12,7
102 | JON,THOROS,12,7
103 | PODRICK,TYRION,12,7
104 | AERYS,DAENERYS,11,7
105 | BERIC,GENDRY,11,7
106 | BRONN,DICKON,11,7
107 | LITTLEFINGER,MAESTER_WOLKAN,11,7
108 | LITTLEFINGER,YOHN_ROYCE,11,7
109 | LYANNA_MORMONT,ROBETT,11,7
110 | THOROS,TORMUND,11,7
111 | BRIENNE,JAIME,10,7
112 | DAENERYS,DICKON,10,7
113 | DAENERYS,NIGHT_KING,10,7
114 | DAVOS,VARYS,10,7
115 | MISSANDEI,THEON,10,7
116 | MOUNTAIN,TYRION,10,7
117 | THEON,TYRION,10,7
118 | TYRION,TYWIN,10,7
119 | ARYA,JON,9,7
120 | BRIENNE,JON,9,7
121 | CERSEI,SANSA,9,7
122 | DAVOS,SANSA,9,7
123 | EURON,MOUNTAIN,9,7
124 | HOUND,TYRION,9,7
125 | NYMERIA,OBARA,9,7
126 | ARYA,NED,8,7
127 | BERIC,NIGHT_KING,8,7
128 | BRAN,JON,8,7
129 | BRONN,DAENERYS,8,7
130 | BRONN,PODRICK,8,7
131 | BRONN,RANDYLL,8,7
132 | ELLARIA,THEON,8,7
133 | EURON,TYRION,8,7
134 | JON,ROBETT,8,7
135 | ROBETT,YOHN_ROYCE,8,7
136 | BENJEN,JON,7,7
137 | BRAN,LYANNA,7,7
138 | BRAN,MAESTER_WOLKAN,7,7
139 | CERSEI,JOFFREY,7,7
140 | DAENERYS,EURON,7,7
141 | DAENERYS,THEON,7,7
142 | DAVOS,TORMUND,7,7
143 | ELLARIA,TYRION,7,7
144 | JORAH,MISSANDEI,7,7
145 | JORAH,THEON,7,7
146 | LYANNA,RHAEGAR,7,7
147 | LYANNA_MORMONT,SANSA,7,7
148 | OBARA,TYENE,7,7
149 | PODRICK,SANSA,7,7
150 | RANDYLL,TYRION,7,7
151 | ROBETT,SANSA,7,7
152 | ARYA,JOFFREY,6,7
153 | ARYA,YOHN_ROYCE,6,7
154 | BRIENNE,TORMUND,6,7
155 | CERSEI,RANDYLL,6,7
156 | DAENERYS,HOUND,6,7
157 | ELLARIA,MOUNTAIN,6,7
158 | ELLARIA,QYBURN,6,7
159 | EURON,TYENE,6,7
160 | GENDRY,ROBERT,6,7
161 | GILLY,LITTLE_SAM,6,7
162 | JAIME,JON,6,7
163 | JOFFREY,SANSA,6,7
164 | JON,LYANNA_MORMONT,6,7
165 | JORAH,VARYS,6,7
166 | LITTLE_SAM,SAM,6,7
167 | LITTLEFINGER,ROBETT,6,7
168 | LYANNA_MORMONT,YOHN_ROYCE,6,7
169 | NIGHT_KING,TORMUND,6,7
170 | THEON,VARYS,6,7
171 | TYENE,YARA,6,7
172 | AERYS,CERSEI,5,7
173 | AERYS,JON,5,7
174 | ALYS,NED_UMBER,5,7
175 | ARYA,CATELYN,5,7
176 | ARYA,CERSEI,5,7
177 | ARYA,MAESTER_WOLKAN,5,7
178 | ARYA,PODRICK,5,7
179 | BRIENNE,BRONN,5,7
180 | BRONN,CERSEI,5,7
181 | CERSEI,NED,5,7
182 | CERSEI,OLENNA,5,7
183 | DAENERYS,TORMUND,5,7
184 | DAVOS,HOUND,5,7
185 | ELLARIA,EURON,5,7
186 | EURON,JON,5,7
187 | HOUND,MOUNTAIN,5,7
188 | HOUND,NIGHT_KING,5,7
189 | JOFFREY,NED,5,7
190 | JOFFREY,TYWIN,5,7
191 | JON,MAESTER_WOLKAN,5,7
192 | JON,MOUNTAIN,5,7
193 | JON,PODRICK,5,7
194 | JON,YOHN_ROYCE,5,7
195 | LITTLEFINGER,PODRICK,5,7
196 | NED,ROBB,5,7
197 | QYBURN,TYENE,5,7
198 | AERYS,TYRION,4,7
199 | ARYA,ROBETT,4,7
200 | BERIC,DAENERYS,4,7
201 | BRAN,EDDISON_TOLLETT,4,7
202 | BRAN,NED,4,7
203 | BRAN,RHAEGAR,4,7
204 | BRIENNE,CERSEI,4,7
205 | BRIENNE,DAVOS,4,7
206 | BRIENNE,LYANNA_MORMONT,4,7
207 | BRIENNE,ROBETT,4,7
208 | BRIENNE,TYRION,4,7
209 | BRIENNE,YOHN_ROYCE,4,7
210 | CATELYN,LITTLEFINGER,4,7
211 | CERSEI,DAVOS,4,7
212 | CERSEI,HOUND,4,7
213 | CERSEI,MYRCELLA,4,7
214 | CERSEI,THEON,4,7
215 | DAVOS,LITTLEFINGER,4,7
216 | DAVOS,PODRICK,4,7
217 | DICKON,TYRION,4,7
218 | EDDISON_TOLLETT,MEERA,4,7
219 | ELLARIA,OBARA,4,7
220 | EURON,NYMERIA,4,7
221 | EURON,OBARA,4,7
222 | EURON,QYBURN,4,7
223 | GENDRY,TYRION,4,7
224 | GREY_WORM,THEON,4,7
225 | HOUND,THEON,4,7
226 | JAIME,TYWIN,4,7
227 | JEOR,JON,4,7
228 | JEOR,JORAH,4,7
229 | JEOR,SAM,4,7
230 | JOFFREY,TYRION,4,7
231 | JON,LYANNA,4,7
232 | JON,MELISANDRE,4,7
233 | JON,NED_UMBER,4,7
234 | JON,QYBURN,4,7
235 | JORAH,NIGHT_KING,4,7
236 | LITTLEFINGER,LYANNA_MORMONT,4,7
237 | LITTLEFINGER,NED,4,7
238 | LYANNA,NED,4,7
239 | LYSA,SANSA,4,7
240 | MAESTER_WOLKAN,YOHN_ROYCE,4,7
241 | MELISANDRE,TYRION,4,7
242 | MOUNTAIN,OBERYN,4,7
243 | MOUNTAIN,TYENE,4,7
244 | NYMERIA,TYENE,4,7
245 | NYMERIA,YARA,4,7
246 | OBARA,YARA,4,7
247 | OLENNA,RANDYLL,4,7
248 | OLENNA,TYRION,4,7
249 | PODRICK,TORMUND,4,7
250 | QYBURN,TYRION,4,7
251 | ROBB,SANSA,4,7
252 | AEGON,DAENERYS,3,7
253 | AERYS,VARYS,3,7
254 | ALYS,JON,3,7
255 | ARYA,LYANNA_MORMONT,3,7
256 | ARYA,ROBB,3,7
257 | ARYA,ROBERT,3,7
258 | BALERION,QYBURN,3,7
259 | BERIC,DAVOS,3,7
260 | BRAN,DAENERYS,3,7
261 | BRAN,YOHN_ROYCE,3,7
262 | BRIENNE,MOUNTAIN,3,7
263 | BRIENNE,QYBURN,3,7
264 | BRIENNE,THEON,3,7
265 | BRONN,DAVOS,3,7
266 | BRONN,HOUND,3,7
267 | BRONN,JON,3,7
268 | BRONN,VARYS,3,7
269 | CATELYN,SANSA,3,7
270 | CERSEI,OBERYN,3,7
271 | DAENERYS,ELLARIA,3,7
272 | DAENERYS,NED,3,7
273 | DAENERYS,VISERYS,3,7
274 | DAENERYS,YARA,3,7
275 | DAVOS,EURON,3,7
276 | DAVOS,JAIME,3,7
277 | DAVOS,LYANNA_MORMONT,3,7
278 | DAVOS,NIGHT_KING,3,7
279 | DAVOS,ROBERT,3,7
280 | ELLARIA,OLENNA,3,7
281 | ELLARIA,VARYS,3,7
282 | HARRAG,YARA,3,7
283 | HOUND,JAIME,3,7
284 | HOUND,PODRICK,3,7
285 | HOUND,QYBURN,3,7
286 | HOUND,VARYS,3,7
287 | JAIME,JOFFREY,3,7
288 | JAIME,TOMMEN,3,7
289 | JOFFREY,OLENNA,3,7
290 | JON,SAM,3,7
291 | LITTLEFINGER,MEERA,3,7
292 | LITTLEFINGER,TORMUND,3,7
293 | MOUNTAIN,THEON,3,7
294 | NED,ROBERT,3,7
295 | RAMSAY,SANSA,3,7
296 | ROBERT,VARYS,3,7
297 | SANSA,TORMUND,3,7
298 | SANSA,TYRION,3,7
299 | THEON,TYENE,3,7
300 | TYCHO,TYWIN,3,7
301 | TYRION,YARA,3,7
302 | AERYS,NED,2,7
303 | AERYS,OLENNA,2,7
304 | AERYS,SANSA,2,7
305 | ARYA,WALDER,2,7
306 | BALERION,CERSEI,2,7
307 | BERIC,NED,2,7
308 | BRAN,BRIENNE,2,7
309 | BRAN,NIGHT_KING,2,7
310 | BRAN,PODRICK,2,7
311 | BRAN,TYRION,2,7
312 | BRIENNE,EURON,2,7
313 | BRIENNE,HOT_PIE,2,7
314 | BRIENNE,JORAH,2,7
315 | BRIENNE,QHONO,2,7
316 | BRIENNE,VARYS,2,7
317 | BRONN,GREY_WORM,2,7
318 | BRONN,JORAH,2,7
319 | BRONN,OLENNA,2,7
320 | BRONN,THEON,2,7
321 | CATELYN,TYRION,2,7
322 | CERSEI,LITTLEFINGER,2,7
323 | CERSEI,NIGHT_KING,2,7
324 | CERSEI,ROBB,2,7
325 | CERSEI,VARYS,2,7
326 | CERSEI,YARA,2,7
327 | DAENERYS,DROGO,2,7
328 | DAENERYS,GENDRY,2,7
329 | DAENERYS,LITTLEFINGER,2,7
330 | DAENERYS,MOUNTAIN,2,7
331 | DAENERYS,QYBURN,2,7
332 | DAENERYS,ROBERT,2,7
333 | DAENERYS,TYWIN,2,7
334 | DAVOS,GREY_WORM,2,7
335 | DAVOS,MOUNTAIN,2,7
336 | DAVOS,QYBURN,2,7
337 | DAVOS,ROBETT,2,7
338 | DAVOS,YOHN_ROYCE,2,7
339 | DICKON,OLENNA,2,7
340 | DICKON,SAM,2,7
341 | DROGO,JON,2,7
342 | DROGO,JORAH,2,7
343 | ELLARIA,GREY_WORM,2,7
344 | ELLARIA,JAIME,2,7
345 | ELLARIA,MISSANDEI,2,7
346 | ELLARIA,NYMERIA,2,7
347 | ELLARIA,OBERYN,2,7
348 | EURON,HOUND,2,7
349 | GENDRY,MELISANDRE,2,7
350 | GREY_WORM,JAIME,2,7
351 | GREY_WORM,JON,2,7
352 | GREY_WORM,JORAH,2,7
353 | GREY_WORM,MELISANDRE,2,7
354 | GREY_WORM,OLENNA,2,7
355 | GREY_WORM,YARA,2,7
356 | HIGH_SEPTON,SAM,2,7
357 | HOT_PIE,JON,2,7
358 | HOUND,MISSANDEI,2,7
359 | HOUND,QHONO,2,7
360 | HOUND,WHITE_WALKER,2,7
361 | HOWLAND,MEERA,2,7
362 | JAIME,SANSA,2,7
363 | JAIME,THEON,2,7
364 | JAIME,TYENE,2,7
365 | JEOR,TYRION,2,7
366 | JOANNA,TYWIN,2,7
367 | JOFFREY,ROBB,2,7
368 | JOFFREY,ROBERT,2,7
369 | JOFFREY,TOMMEN,2,7
370 | JON,ROBB,2,7
371 | JON,WHITE_WALKER,2,7
372 | JON_ARRYN,LYSA,2,7
373 | JON_ARRYN,SANSA,2,7
374 | JORAH,PODRICK,2,7
375 | JORAH,WHITE_WALKER,2,7
376 | LYANNA_MORMONT,PODRICK,2,7
377 | LYANNA_MORMONT,TORMUND,2,7
378 | MARGAERY,OLENNA,2,7
379 | MARWYN,MAESTER_WOLKAN,2,7
380 | MARWYN,ROBERT,2,7
381 | MARWYN,SHIREEN,2,7
382 | MEERA,NED,2,7
383 | MELISANDRE,MISSANDEI,2,7
384 | MELISANDRE,THOROS,2,7
385 | MISSANDEI,OLENNA,2,7
386 | MISSANDEI,YARA,2,7
387 | MOUNTAIN,MYRCELLA,2,7
388 | MOUNTAIN,RANDYLL,2,7
389 | MYRCELLA,OBERYN,2,7
390 | NED,TYRION,2,7
391 | NIGHT_KING,TYRION,2,7
392 | NYMERIA,THEON,2,7
393 | OBARA,THEON,2,7
394 | OLENNA,THEON,2,7
395 | OLENNA,VARYS,2,7
396 | OLENNA,YARA,2,7
397 | PODRICK,QHONO,2,7
398 | PODRICK,ROBETT,2,7
399 | PODRICK,THEON,2,7
400 | PODRICK,VARYS,2,7
401 | PODRICK,YOHN_ROYCE,2,7
402 | QHONO,TYRION,2,7
403 | QYBURN,RANDYLL,2,7
404 | QYBURN,THEON,2,7
405 | RHAEGAR,ROBERT,2,7
406 | ROBB,ROBERT,2,7
407 | ROBERT,SAM,2,7
408 | ROBERT,SANSA,2,7
409 | ROBETT,TORMUND,2,7
410 | SAM,STANNIS,2,7
411 | TORMUND,WHITE_WALKER,2,7
412 | TORMUND,YOHN_ROYCE,2,7
413 | VARYS,YARA,2,7


--------------------------------------------------------------------------------
/dynetlsm/datasets/raw_data/got/got-s5-edges.csv:
--------------------------------------------------------------------------------
  1 | Source,Target,Weight,Season
  2 | ARYA,JAQEN,148,5
  3 | JORAH,TYRION,141,5
  4 | BRONN,JAIME,121,5
  5 | TYRION,VARYS,110,5
  6 | DAENERYS,TYRION,98,5
  7 | CERSEI,HIGH_SPARROW,96,5
  8 | BRIENNE,PODRICK,90,5
  9 | JON,STANNIS,90,5
 10 | LITTLEFINGER,SANSA,89,5
 11 | JON,SAM,85,5
 12 | GILLY,SAM,83,5
 13 | DAARIO,DAENERYS,81,5
 14 | MARGAERY,TOMMEN,79,5
 15 | SANSA,THEON,77,5
 16 | DAENERYS,HIZDAHR,69,5
 17 | DAVOS,STANNIS,68,5
 18 | JON,TORMUND,63,5
 19 | CERSEI,TOMMEN,62,5
 20 | JAIME,MYRCELLA,58,5
 21 | MYRANDA,SANSA,58,5
 22 | RAMSAY,THEON,58,5
 23 | MELISANDRE,STANNIS,52,5
 24 | RAMSAY,ROOSE_BOLTON,51,5
 25 | RAMSAY,SANSA,51,5
 26 | CERSEI,LITTLEFINGER,50,5
 27 | ARYA,WAIF,48,5
 28 | SHIREEN,STANNIS,47,5
 29 | CERSEI,MARGAERY,46,5
 30 | CERSEI,QYBURN,41,5
 31 | JON,OLLY,41,5
 32 | CERSEI,JAIME,40,5
 33 | DAENERYS,JORAH,40,5
 34 | JON,MELISANDRE,39,5
 35 | BARRISTAN,DAENERYS,38,5
 36 | DAVOS,JON,35,5
 37 | DAVOS,SHIREEN,35,5
 38 | BRONN,TYENE,34,5
 39 | DORAN,ELLARIA,33,5
 40 | JON,MANCE,33,5
 41 | ALLISER_THORNE,JON,32,5
 42 | JAQEN,WAIF,32,5
 43 | HIGH_SPARROW,OLENNA,30,5
 44 | LORAS,MARGAERY,30,5
 45 | OLLY,SAM,30,5
 46 | GREY_WORM,MISSANDEI,29,5
 47 | CERSEI,MACE,28,5
 48 | DAENERYS,MOSSADOR,28,5
 49 | JON,KARSI,28,5
 50 | MYRANDA,RAMSAY,28,5
 51 | AREO,DORAN,27,5
 52 | BRIENNE,LITTLEFINGER,27,5
 53 | DAARIO,TYRION,27,5
 54 | CERSEI,OLENNA,26,5
 55 | DAARIO,HIZDAHR,24,5
 56 | DORAN,JAIME,24,5
 57 | MYRCELLA,TRYSTANE,24,5
 58 | SAM,STANNIS,24,5
 59 | DAENERYS,MISSANDEI,22,5
 60 | LITTLEFINGER,ROOSE_BOLTON,22,5
 61 | CERSEI,KEVAN,21,5
 62 | HIGH_SPARROW,LORAS,21,5
 63 | CERSEI,MAGGY,20,5
 64 | DAARIO,GREY_WORM,20,5
 65 | JANOS,JON,20,5
 66 | SELYSE,SHIREEN,20,5
 67 | AREO,BRONN,19,5
 68 | ELLARIA,JAIME,19,5
 69 | KARSI,TORMUND,19,5
 70 | TYCHO,MACE,19,5
 71 | AREO,JAIME,18,5
 72 | CERSEI,LANCEL,18,5
 73 | CERSEI,PYCELLE,18,5
 74 | GILLY,SHIREEN,18,5
 75 | JON,MAESTER_AEMON,18,5
 76 | SELYSE,STANNIS,18,5
 77 | BLACK_HAIRED_PROSTITUTE,TYRION,17,5
 78 | DAARIO,JORAH,17,5
 79 | HIGH_SPARROW,MARGAERY,17,5
 80 | MAESTER_AEMON,SAM,17,5
 81 | MALKO,TYRION,17,5
 82 | ARYA,MERYN_TRANT,16,5
 83 | DAARIO,MISSANDEI,16,5
 84 | JAIME,TRYSTANE,16,5
 85 | JON,LOBODA,16,5
 86 | LITTLEFINGER,OLENNA,16,5
 87 | MISSANDEI,TYRION,16,5
 88 | ROOSE_BOLTON,SANSA,16,5
 89 | AREO,ELLARIA,15,5
 90 | AREO,MYRCELLA,15,5
 91 | BRIENNE,SANSA,15,5
 92 | CERSEI,LORAS,15,5
 93 | LORAS,OLYVAR,15,5
 94 | ALLISER_THORNE,SAM,14,5
 95 | BRONN,LOLLYS,14,5
 96 | CERSEI,SEPTA_UNELLA,14,5
 97 | DAENERYS,GREY_WORM,14,5
 98 | LANCEL,LITTLEFINGER,14,5
 99 | LITTLEFINGER,RAMSAY,14,5
100 | MANCE,STANNIS,14,5
101 | MARGAERY,OLENNA,14,5
102 | NYMERIA,TYENE,14,5
103 | ARYA,THIN_MAN,13,5
104 | CERSEI,HIGH_SEPTON,13,5
105 | CERSEI,MERYN_TRANT,13,5
106 | DAVOS,MELISANDRE,13,5
107 | HIZDAHR,TYRION,13,5
108 | MELISANDRE,SELYSE,13,5
109 | MYRANDA,THEON,13,5
110 | BRONN,DORNISH_RIDER,12,5
111 | CERSEI,MELARA,12,5
112 | CERSEI,TYWIN,12,5
113 | EDDISON_TOLLETT,JON,12,5
114 | JORAH,MALKO,12,5
115 | LORD_OF_BONES,TORMUND,12,5
116 | BRAND,DERRYK,11,5
117 | ELLARIA,OBARA,11,5
118 | ELLARIA,TYENE,11,5
119 | HIGH_SEPTON,OLYVAR,11,5
120 | JAQEN,THIN_MAN,11,5
121 | DAVOS,OLLY,10,5
122 | ELLARIA,MYRCELLA,10,5
123 | OLLY,STANNIS,10,5
124 | PYCELLE,QYBURN,10,5
125 | BRIENNE,STANNIS,9,5
126 | BRONN,MYRCELLA,9,5
127 | DAENERYS,VARYS,9,5
128 | DAVOS,SELYSE,9,5
129 | DORAN,MYRCELLA,9,5
130 | DORAN,TRYSTANE,9,5
131 | HIGH_SPARROW,OLYVAR,9,5
132 | JORAH,MISSANDEI,9,5
133 | LITTLEFINGER,ROYCE,9,5
134 | LORAS,OLENNA,9,5
135 | LORAS,TOMMEN,9,5
136 | MACE,QYBURN,9,5
137 | MELISANDRE,SHIREEN,9,5
138 | RAMSAY,WALDA,9,5
139 | ALLISER_THORNE,OLLY,8,5
140 | AREO,TRYSTANE,8,5
141 | BRIENNE,RENLY,8,5
142 | GILLY,LITTLE_SAM,8,5
143 | HIGH_SPARROW,TOMMEN,8,5
144 | HIZDAHR,MISSANDEI,8,5
145 | JANOS,SAM,8,5
146 | KARSI,LOBODA,8,5
147 | LANCEL,LORAS,8,5
148 | ROOSE_BOLTON,THEON,8,5
149 | SANSA,WALDA,8,5
150 | TYRION,TYWIN,8,5
151 | AREO,OBARA,7,5
152 | BARRISTAN,DAARIO,7,5
153 | BRONN,OBARA,7,5
154 | CATELYN,SANSA,7,5
155 | CERSEI,MYRCELLA,7,5
156 | DERRYK,SAM,7,5
157 | GILLY,OLLY,7,5
158 | GREY_WORM,JORAH,7,5
159 | HIZDAHR,MOSSADOR,7,5
160 | MACE,MERYN_TRANT,7,5
161 | MACE,PYCELLE,7,5
162 | MARGAERY,OLYVAR,7,5
163 | OWNER,TYRION,7,5
164 | PODRICK,SANSA,7,5
165 | RAMSAY,STANNIS,7,5
166 | ALLISER_THORNE,JANOS,6,5
167 | BRAND,SAM,6,5
168 | BRONN,DORAN,6,5
169 | BRONN,TRYSTANE,6,5
170 | CERSEI,TYRION,6,5
171 | DAARIO,MOSSADOR,6,5
172 | EDDISON_TOLLETT,SAM,6,5
173 | ELLARIA,OBERYN,6,5
174 | GILLY,JON,6,5
175 | GILLY,SELYSE,6,5
176 | HIGH_SEPTON,LANCEL,6,5
177 | HIGH_SEPTON,QYBURN,6,5
178 | JAIME,TYWIN,6,5
179 | JON,SELYSE,6,5
180 | JORAH,VARYS,6,5
181 | LITTLE_SAM,SAM,6,5
182 | MADAME,MERYN_TRANT,6,5
183 | MANCE,TORMUND,6,5
184 | MYRANDA,WALDA,6,5
185 | MYRCELLA,OBARA,6,5
186 | NYMERIA,OBARA,6,5
187 | OBARA,TYENE,6,5
188 | ROOSE_BOLTON,WALDA,6,5
189 | ALLISER_THORNE,STANNIS,5,5
190 | BRAN,RICKON,5,5
191 | BRAN,THEON,5,5
192 | BRIENNE,CATELYN,5,5
193 | BRONN,NYMERIA,5,5
194 | CATELYN,LITTLEFINGER,5,5
195 | CERSEI,ROBERT,5,5
196 | DAENERYS,OWNER,5,5
197 | DERRYK,GILLY,5,5
198 | DORAN,OBERYN,5,5
199 | EDDISON_TOLLETT,OLLY,5,5
200 | ELLARIA,NYMERIA,5,5
201 | HIGH_SEPTON,MACE,5,5
202 | JON,LORD_OF_BONES,5,5
203 | KEVAN,TOMMEN,5,5
204 | LITTLEFINGER,STANNIS,5,5
205 | MELISANDRE,SAM,5,5
206 | NED,STANNIS,5,5
207 | PODRICK,STANNIS,5,5
208 | RICKON,THEON,5,5
209 | SAM,SELYSE,5,5
210 | ALLISER_THORNE,DAVOS,4,5
211 | ALLISER_THORNE,EDDISON_TOLLETT,4,5
212 | ALLISER_THORNE,MAESTER_AEMON,4,5
213 | ALLISER_THORNE,TORMUND,4,5
214 | AREO,NYMERIA,4,5
215 | AREO,TYENE,4,5
216 | ARYA,MACE,4,5
217 | BARRISTAN,MOSSADOR,4,5
218 | CERSEI,OLYVAR,4,5
219 | CERSEI,ROOSE_BOLTON,4,5
220 | CERSEI,VARYS,4,5
221 | DORNISH_RIDER,JAIME,4,5
222 | EDDISON_TOLLETT,STANNIS,4,5
223 | ELLARIA,TRYSTANE,4,5
224 | GILLY,MAESTER_AEMON,4,5
225 | GILLY,STANNIS,4,5
226 | HIGH_SEPTON,HIGH_SPARROW,4,5
227 | HIGH_SPARROW,RENLY,4,5
228 | HIGH_SPARROW,ROBERT,4,5
229 | HIZDAHR,JORAH,4,5
230 | JAIME,TYENE,4,5
231 | JON,NIGHT_KING,4,5
232 | JORAH,OWNER,4,5
233 | LITTLEFINGER,ROBIN,4,5
234 | LITTLEFINGER,THEON,4,5
235 | LOBODA,TORMUND,4,5
236 | MERYN_TRANT,QYBURN,4,5
237 | MYRANDA,ROOSE_BOLTON,4,5
238 | QUICK,STRONG,4,5
239 | ROBERT,STANNIS,4,5
240 | ROOSE_BOLTON,STANNIS,4,5
241 | ROOSE_BOLTON,TYWIN,4,5
242 | SAM,SHIREEN,4,5
243 | SANSA,YOHN_ROYCE,4,5
244 | THEON,WALDA,4,5
245 | AEGON,MAESTER_AEMON,3,5
246 | AERYS,DAENERYS,3,5
247 | ARYA,NED,3,5
248 | ARYA,TYCHO,3,5
249 | BARRISTAN,GREY_WORM,3,5
250 | BARRISTAN,RHAEGAR,3,5
251 | BRAN,SANSA,3,5
252 | CERSEI,JOFFREY,3,5
253 | CERSEI,OBERYN,3,5
254 | CERSEI,SANSA,3,5
255 | DORAN,TOMMEN,3,5
256 | EDDISON_TOLLETT,JANOS,3,5
257 | GILLY,JANOS,3,5
258 | GREY_WORM,MOSSADOR,3,5
259 | GREY_WORM,TYRION,3,5
260 | JAIME,LOLLYS,3,5
261 | JAIME,OBARA,3,5
262 | JAIME,TYRION,3,5
263 | JAIME,VARYS,3,5
264 | JANOS,OLLY,3,5
265 | JANOS,STANNIS,3,5
266 | JEOR,JON,3,5
267 | JOFFREY,MARGAERY,3,5
268 | JOFFREY,TOMMEN,3,5
269 | JON,OTHELL_YARWYCK,3,5
270 | JON,ROBB,3,5
271 | KEVAN,LANCEL,3,5
272 | KEVAN,PYCELLE,3,5
273 | KEVAN,QYBURN,3,5
274 | LITTLE_SAM,OLLY,3,5
275 | LITTLEFINGER,LYANNA,3,5
276 | LITTLEFINGER,LYSA,3,5
277 | LITTLEFINGER,NED,3,5
278 | LITTLEFINGER,PODRICK,3,5
279 | LITTLEFINGER,YOHN_ROYCE,3,5
280 | LORD_WEEBLY,SAM,3,5
281 | LYANNA,SANSA,3,5
282 | MANCE,MELISANDRE,3,5
283 | NED,ROBERT,3,5
284 | NED,SANSA,3,5
285 | OBARA,OBERYN,3,5
286 | PODRICK,RENLY,3,5
287 | PODRICK,TYRION,3,5
288 | RICKON,SANSA,3,5
289 | ROYCE,SANSA,3,5
290 | SANSA,STANNIS,3,5
291 | SHAE,TYRION,3,5
292 | SHAE,TYWIN,3,5
293 | STANNIS,TORMUND,3,5
294 | AEGON,RHAENYRA,2,5
295 | AERYS,BARRISTAN,2,5
296 | ALLISER_THORNE,DENYS,2,5
297 | ALLISER_THORNE,MANCE,2,5
298 | ARYA,MADAME,2,5
299 | ARYA,MOUNTAIN,2,5
300 | ARYA,SANSA,2,5
301 | ARYA,WALDER,2,5
302 | BARRISTAN,HIZDAHR,2,5
303 | BRIAN,JON,2,5
304 | BRIENNE,ROBERT,2,5
305 | BRIENNE,SELWYN,2,5
306 | BRONN,CERSEI,2,5
307 | BRONN,FALYSE,2,5
308 | CATELYN,NED,2,5
309 | CATELYN,ROBB,2,5
310 | CERSEI,ELLARIA,2,5
311 | CERSEI,JORAH,2,5
312 | CERWYN,RAMSAY,2,5
313 | DAARIO,QUICK,2,5
314 | DAARIO,STRONG,2,5
315 | DAENERYS,QUICK,2,5
316 | DAENERYS,ROBERT,2,5
317 | DAENERYS,STRONG,2,5
318 | DAVOS,EDDISON_TOLLETT,2,5
319 | DAVOS,SAM,2,5
320 | DORAN,OBARA,2,5
321 | DORAN,TYENE,2,5
322 | EDDISON_TOLLETT,GILLY,2,5
323 | EDDISON_TOLLETT,LITTLE_SAM,2,5
324 | EDDISON_TOLLETT,PYP,2,5
325 | EDDISON_TOLLETT,TORMUND,2,5
326 | GILLY,MELISANDRE,2,5
327 | GRENN,PYP,2,5
328 | HIGH_SEPTON,PYCELLE,2,5
329 | HIGH_SPARROW,MERYN_TRANT,2,5
330 | HIGH_SPARROW,SEPTA_UNELLA,2,5
331 | HIZDAHR,OWNER,2,5
332 | HIZDAHR,QUICK,2,5
333 | HIZDAHR,STRONG,2,5
334 | JAIME,LITTLEFINGER,2,5
335 | JAIME,NYMERIA,2,5
336 | JAIME,TOMMEN,2,5
337 | JAQEN,MACE,2,5
338 | JAQEN,MERYN_TRANT,2,5
339 | JEOR,SAM,2,5
340 | JOFFREY,RENLY,2,5
341 | JOFFREY,ROBERT,2,5
342 | JOFFREY,SANSA,2,5
343 | JOFFREY,TYRION,2,5
344 | JOFFREY,TYWIN,2,5
345 | JON,LITTLE_SAM,2,5
346 | JON,LORD_WEEBLY,2,5
347 | JON,ROOSE_BOLTON,2,5
348 | JON,SHIREEN,2,5
349 | JORAH,MOSSADOR,2,5
350 | KARSI,NIGHT_KING,2,5
351 | KEVAN,TYWIN,2,5
352 | LANCEL,OLYVAR,2,5
353 | LANCEL,PYCELLE,2,5
354 | LITTLE_SAM,MELISANDRE,2,5
355 | LITTLE_SAM,STANNIS,2,5
356 | LITTLEFINGER,LORAS,2,5
357 | LITTLEFINGER,MYRANDA,2,5
358 | LITTLEFINGER,OLYVAR,2,5
359 | LITTLEFINGER,RENLY,2,5
360 | LITTLEFINGER,ROBB,2,5
361 | LITTLEFINGER,TOMMEN,2,5
362 | LITTLEFINGER,WALDA,2,5
363 | LOLLYS,TANDA,2,5
364 | LORAS,TYWIN,2,5
365 | LYANNA_MORMONT,STANNIS,2,5
366 | MACE,MERYN_TRANT,2,5
367 | MACE,THIN_MAN,2,5
368 | MACE,TOMMEN,2,5
369 | MAESTER_WOLKAN,ROOSE_BOLTON,2,5
370 | MAGGY,MELARA,2,5
371 | MAGGY,ROBERT,2,5
372 | MAGNAR,SAM,2,5
373 | MANCE,SAM,2,5
374 | MARGAERY,ROBERT,2,5
375 | MELISANDRE,OLLY,2,5
376 | MERYN_TRANT,MOUNTAIN,2,5
377 | MERYN_TRANT,THIN_MAN,2,5
378 | MOSSADOR,TYRION,2,5
379 | NED,THEON,2,5
380 | OLENNA,OLYVAR,2,5
381 | OLYVAR,TOMMEN,2,5
382 | PYCELLE,VARYS,2,5
383 | QUICK,TYRION,2,5
384 | RANDYLL,SAM,2,5
385 | RHAEGAR,ROBERT,2,5
386 | ROBB,SANSA,2,5
387 | ROBB,THEON,2,5
388 | ROBERT,TYWIN,2,5
389 | ROBERT,VARYS,2,5
390 | ROBIN,ROYCE,2,5
391 | ROBIN,SANSA,2,5
392 | STANNIS,STEFFON,2,5
393 | STRONG,TYRION,2,5
394 | THIN_MAN,WAIF,2,5
395 | TOMMEN,TYWIN,2,5
396 | TYCHO,JAQEN,2,5
397 | TYCHO,MERYN_TRANT,2,5
398 | TYCHO,THIN_MAN,2,5


--------------------------------------------------------------------------------