├── .gitattributes
├── .gitignore
├── .pylintrc
├── CONTRIBUTING.rst
├── DESCRIPTION.rst
├── HISTORY.rst
├── LICENSE.txt
├── Makefile
├── README.md
├── README.rst
├── docs
├── .gitignore
├── Makefile
├── clustering.html
├── conf.py
├── contact.html
├── contribute.html
├── contributing.rst
├── css
│ └── pathpy.css
├── docs.html
├── edu.html
├── example_network.png
├── example_network_scaled.png
├── history.rst
├── index.html
├── index.rst
├── installation.rst
├── js
│ ├── menu.js
│ └── sidebar.js
├── lotr.html
├── make.bat
├── manual
│ ├── algorithms
│ │ ├── centralities.html
│ │ ├── components.html
│ │ ├── index.html
│ │ ├── infomap.html
│ │ ├── modularity.html
│ │ ├── path_measures.html
│ │ ├── random_graphs.html
│ │ ├── random_walk.html
│ │ ├── shortest_paths.html
│ │ ├── spectral.html
│ │ ├── statistics.html
│ │ └── temporal_walk.html
│ ├── classes
│ │ ├── dag.html
│ │ ├── higher_order_network.html
│ │ ├── index.html
│ │ ├── markov_sequence.html
│ │ ├── multi_order_model.html
│ │ ├── network.html
│ │ ├── paths.html
│ │ ├── rolling_time_window.html
│ │ └── temporal_network.html
│ ├── index.html
│ ├── path_extraction
│ │ ├── dag_paths.html
│ │ ├── index.html
│ │ ├── origin_destination_stats.html
│ │ ├── random_walk.html
│ │ └── temporal_paths.html
│ ├── utils
│ │ ├── default_containers.html
│ │ ├── exceptions.html
│ │ ├── index.html
│ │ └── log.html
│ └── visualisation
│ │ ├── alluvial.html
│ │ ├── html.html
│ │ ├── index.html
│ │ ├── pdf.html
│ │ └── tikz.html
├── menu.html
├── multiorder.png
├── news.html
├── pathpy_logo.png
├── readme.rst
├── resources
│ └── pathpy_user_meeting.zip
├── sidebar.html
├── tutorial.html
├── tutorial
│ ├── first_steps.html
│ ├── higher_order.html
│ ├── ide.html
│ ├── index.html
│ ├── issues.html
│ ├── model_selection.html
│ ├── paths.html
│ └── temporal_nets.html
└── usage.rst
├── multiorder.png
├── pathpy.pyproj
├── pathpy
├── __init__.py
├── algorithms
│ ├── __init__.py
│ ├── centralities.py
│ ├── components.py
│ ├── infomap.py
│ ├── modularity.py
│ ├── path_measures.py
│ ├── random_graphs.py
│ ├── random_walk.py
│ ├── shortest_paths.py
│ ├── spectral.py
│ ├── statistics.py
│ └── temporal_walk.py
├── classes
│ ├── __init__.py
│ ├── dag.py
│ ├── higher_order_network.py
│ ├── markov_sequence.py
│ ├── multi_order_model.py
│ ├── network.py
│ ├── paths.py
│ ├── rolling_time_window.py
│ └── temporal_network.py
├── path_extraction
│ ├── __init__.py
│ ├── dag_paths.py
│ ├── origin_destination_stats.py
│ ├── random_walk.py
│ └── temporal_paths.py
├── utils
│ ├── __init__.py
│ ├── default_containers.py
│ ├── exceptions.py
│ └── log.py
├── visualisation
│ ├── __init__.py
│ ├── alluvial.py
│ ├── html.py
│ ├── pdf.py
│ └── tikz.py
└── visualisation_assets
│ ├── d3.v4.min.js
│ ├── diffusion_template.html
│ ├── network_template.html
│ ├── paths_template.html
│ ├── tempnet_template.html
│ └── walk_template.html
├── pathpy_logo.png
├── setup.cfg
├── setup.py
└── tests
├── README.md
├── __init__.py
├── conftest.py
├── test_DAG.py
├── test_HigherOrderNetwork.py
├── test_MultiOrderModel.py
├── test_Network.py
├── test_OriginDestinationPaths.py
├── test_Path.py
├── test_TemporalNetwork.py
├── test_centralities.py
├── test_data
├── edge_frequency.edge
├── example_int.tedges
├── example_network.edges
├── example_origin_destination.csv
├── example_timestamp.tedges
├── ngram_simple.ngram
└── test_tempnets.db
├── test_estimation.py
├── test_random_graphs.py
└── test_spectral.py
/.gitattributes:
--------------------------------------------------------------------------------
1 | # Auto detect text files and perform LF normalization
2 | * text=auto
3 |
4 | # Custom for Visual Studio
5 | *.cs diff=csharp
6 |
7 | # Standard to msysgit
8 | *.doc diff=astextplain
9 | *.DOC diff=astextplain
10 | *.docx diff=astextplain
11 | *.DOCX diff=astextplain
12 | *.dot diff=astextplain
13 | *.DOT diff=astextplain
14 | *.pdf diff=astextplain
15 | *.PDF diff=astextplain
16 | *.rtf diff=astextplain
17 | *.RTF diff=astextplain
18 |
--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | # Byte-compiled / optimized / DLL files
2 | __pycache__/
3 | *.py[cod]
4 |
5 | # C extensions
6 | *.so
7 |
8 | # Distribution / packaging
9 | .Python
10 | env/
11 | build/
12 | develop-eggs/
13 | dist/
14 | downloads/
15 | eggs/
16 | lib/
17 | lib64/
18 | parts/
19 | sdist/
20 | var/
21 | *.egg-info/
22 | .installed.cfg
23 | *.egg
24 |
25 | # PyInstaller
26 | # Usually these files are written by a python script from a template
27 | # before PyInstaller builds the exe, so as to inject date/other infos into it.
28 | *.manifest
29 | *.spec
30 |
31 | # Sphinx Documentation
32 |
33 | # Installer logs
34 | pip-log.txt
35 | pip-delete-this-directory.txt
36 |
37 | # Unit test / coverage reports
38 | htmlcov/
39 | .tox/
40 | .coverage
41 | .cache
42 | nosetests.xml
43 | coverage.xml
44 |
45 | # Translations
46 | *.mo
47 | *.pot
48 |
49 | # Django stuff:
50 | *.log
51 |
52 | # PyBuilder
53 | target/
54 |
55 | # =========================
56 | # Operating System Files
57 | # =========================
58 |
59 | # OSX
60 | # =========================
61 |
62 | .DS_Store
63 | .AppleDouble
64 | .LSOverride
65 |
66 | # Thumbnails
67 | ._*
68 |
69 | # Files that might appear on external disk
70 | .Spotlight-V100
71 | .Trashes
72 |
73 | # Directories potentially created on remote AFP share
74 | .AppleDB
75 | .AppleDesktop
76 | Network Trash Folder
77 | Temporary Items
78 | .apdisk
79 |
80 | # Windows
81 | # =========================
82 |
83 | # Windows image file caches
84 | Thumbs.db
85 | ehthumbs.db
86 |
87 | # Folder config file
88 | Desktop.ini
89 | # Recycle Bin used on file shares
90 | $RECYCLE.BIN/
91 |
92 | # Windows Installer files
93 | *.cab
94 | *.msi
95 | *.msm
96 | *.msp
97 |
98 | # Windows shortcuts
99 | *.lnk
100 | pyTempNets.v12.suo
101 | *.TMP
102 | *.suo
103 | .idea/
104 | .eggs/
105 | prof/*
106 | *.swp
107 |
108 | # Visual Studio Code
109 | # ==================
110 |
111 | .vscode/
112 | docs/_build/
113 | .pytest_cache
114 | .pytest_cache/v/cache
115 | .pytest_cache/v/cache/lastfailed
116 | .pytest_cache/v/cache/nodeids
117 |
--------------------------------------------------------------------------------
/CONTRIBUTING.rst:
--------------------------------------------------------------------------------
1 | .. highlight:: shell
2 |
3 | ============
4 | Contributing
5 | ============
6 |
7 | Contributions are welcome, and they are greatly appreciated! Every little bit
8 | helps, and credit will always be given.
9 |
10 | You can contribute in many ways:
11 |
12 | Types of Contributions
13 | ----------------------
14 |
15 | Report Bugs
16 | ~~~~~~~~~~~
17 |
18 | Report bugs at https://github.com/uzhdag/pathpy/issues.
19 |
20 | If you are reporting a bug, please include:
21 |
22 | * Your operating system name and version.
23 | * Any details about your local setup that might be helpful in troubleshooting.
24 | * Detailed steps to reproduce the bug.
25 |
26 | Fix Bugs
27 | ~~~~~~~~
28 |
29 | Look through the GitHub issues for bugs. Anything tagged with "bug" and "help
30 | wanted" is open to whoever wants to implement it.
31 |
32 | Implement Features
33 | ~~~~~~~~~~~~~~~~~~
34 |
35 | Look through the GitHub issues for features. Anything tagged with "enhancement"
36 | and "help wanted" is open to whoever wants to implement it.
37 |
38 | Write Documentation
39 | ~~~~~~~~~~~~~~~~~~~
40 |
41 | pathpy could always use more documentation, whether as part of the
42 | official pathpy docs, in docstrings, or even on the web in blog posts,
43 | articles, and such.
44 |
45 | Submit Feedback
46 | ~~~~~~~~~~~~~~~
47 |
48 | The best way to send feedback is to file an issue at https://github.com/uzhdag/pathpy/issues.
49 |
50 | If you are proposing a feature:
51 |
52 | * Explain in detail how it would work.
53 | * Keep the scope as narrow as possible, to make it easier to implement.
54 | * Remember that this is a volunteer-driven project, and that contributions
55 | are welcome :)
56 |
57 | Get Started!
58 | ------------
59 |
60 | Ready to contribute? Here's how to set up `pathpy` for local development.
61 |
62 | 1. Fork the `pathpy` repo on GitHub.
63 | 2. Clone your fork locally::
64 |
65 | $ git clone git@github.com:your_name_here/pathpy.git
66 |
67 | 3. Install your local copy into a virtualenv. Assuming you have virtualenvwrapper installed, this is how you set up your fork for local development::
68 |
69 | $ mkvirtualenv pathpy
70 | $ cd pathpy/
71 | $ python setup.py develop
72 |
73 | 4. Create a branch for local development::
74 |
75 | $ git checkout -b name-of-your-bugfix-or-feature
76 |
77 | Now you can make your changes locally.
78 |
79 | 5. When you're done making changes, check that your changes pass flake8 and the
80 | tests, including testing other Python versions with tox::
81 |
82 | $ flake8 pathpy tests
83 | $ python setup.py test or py.test
84 | $ tox
85 |
86 | To get flake8 and tox, just pip install them into your virtualenv.
87 |
88 | 6. Commit your changes and push your branch to GitHub::
89 |
90 | $ git add .
91 | $ git commit -m "Your detailed description of your changes."
92 | $ git push origin name-of-your-bugfix-or-feature
93 |
94 | 7. Submit a pull request through the GitHub website.
95 |
96 | Pull Request Guidelines
97 | -----------------------
98 |
99 | Before you submit a pull request, check that it meets these guidelines:
100 |
101 | 1. The pull request should include tests.
102 | 2. If the pull request adds functionality, the docs should be updated. Put
103 | your new functionality into a function with a docstring, and add the
104 | feature to the list in README.rst.
105 | 3. The pull request should work for Python 2.7, 3.4, 3.5 and 3.6, and for PyPy. Check
106 | https://travis-ci.org/uzhdag/pathpy/pull_requests
107 | and make sure that the tests pass for all supported Python versions.
108 |
109 | Tips
110 | ----
111 |
112 | To run a subset of tests::
113 |
114 | $ py.test tests.test_pathpy
115 |
116 |
117 | Deploying
118 | ---------
119 |
120 | A reminder for the maintainers on how to deploy.
121 | Make sure all your changes are committed (including an entry in HISTORY.rst).
122 | Then run::
123 |
124 | $ bumpversion patch # possible: major / minor / patch
125 | $ git push
126 | $ git push --tags
127 |
128 | Travis will then deploy to PyPI if tests pass.
129 |
--------------------------------------------------------------------------------
/DESCRIPTION.rst:
--------------------------------------------------------------------------------
1 | pathpy is an OpenSource python package for the analysis of time series data on networks using higher- and multi order network models. The package is tailored to analyse temporal networks as well as sequential data that capture multiple short, independent paths observed in an underlying graph topology. Examples for data that can be analysed include time-stamped social networks, user click streams in information networks, biological pathways, or information cascades in social networks.
--------------------------------------------------------------------------------
/HISTORY.rst:
--------------------------------------------------------------------------------
1 | History
2 | =======
3 |
4 | 2.2.0 (2019-09-21)
5 | ------------------
6 |
7 | * Several Bug Fixes for API and visualisations
8 |
9 |
10 | 2.0.0 (2018-08-17)
11 | ------------------
12 |
13 | * PyPi Release of 2.0 release version.
14 |
15 | 2.0.0a (2018-08-07)
16 | -------------------
17 |
18 | * First public release of 2.0 alpha on PyPI.
19 |
20 | 1.2.1 (2018-02-23)
21 | ------------------
22 |
23 | * First test release on PyPI.
24 |
--------------------------------------------------------------------------------
/Makefile:
--------------------------------------------------------------------------------
1 | .PHONY: clean clean-test clean-pyc clean-build docs help
2 | .DEFAULT_GOAL := help
3 |
4 | define BROWSER_PYSCRIPT
5 | import os, webbrowser, sys
6 |
7 | try:
8 | from urllib import pathname2url
9 | except:
10 | from urllib.request import pathname2url
11 |
12 | webbrowser.open("file://" + pathname2url(os.path.abspath(sys.argv[1])))
13 | endef
14 | export BROWSER_PYSCRIPT
15 |
16 | define PRINT_HELP_PYSCRIPT
17 | import re, sys
18 |
19 | for line in sys.stdin:
20 | match = re.match(r'^([a-zA-Z_-]+):.*?## (.*)$$', line)
21 | if match:
22 | target, help = match.groups()
23 | print("%-20s %s" % (target, help))
24 | endef
25 | export PRINT_HELP_PYSCRIPT
26 |
27 | BROWSER := python -c "$$BROWSER_PYSCRIPT"
28 |
29 | help:
30 | @python -c "$$PRINT_HELP_PYSCRIPT" < $(MAKEFILE_LIST)
31 |
32 | clean: clean-build clean-pyc clean-test ## remove all build, test, coverage and Python artifacts
33 |
34 | clean-build: ## remove build artifacts
35 | rm -fr build/
36 | rm -fr dist/
37 | rm -fr .eggs/
38 | find . -name '*.egg-info' -exec rm -fr {} +
39 | find . -name '*.egg' -exec rm -f {} +
40 |
41 | clean-pyc: ## remove Python file artifacts
42 | find . -name '*.pyc' -exec rm -f {} +
43 | find . -name '*.pyo' -exec rm -f {} +
44 | find . -name '*~' -exec rm -f {} +
45 | find . -name '__pycache__' -exec rm -fr {} +
46 |
47 | clean-test: ## remove test and coverage artifacts
48 | rm -fr .tox/
49 | rm -f .coverage
50 | rm -fr htmlcov/
51 |
52 | lint: ## check style with flake8
53 | flake8 pathpy tests
54 |
55 | test: ## run tests quickly with the default Python
56 | pytest
57 |
58 | test-all: ## run tests on every Python version with tox
59 | pytest --runslow
60 |
61 | coverage: ## check code coverage quickly with the default Python
62 | pytest --cov pathpy \
63 | --runslow \
64 | --latex \
65 | --networkx \
66 | --cov-report html \
67 | --cov-report term \
68 | --doctest-modules
69 |
70 | docs: ## generate Sphinx HTML documentation, including API docs
71 | rm -f docs/pathpy.rst
72 | rm -f docs/modules.rst
73 | sphinx-apidoc -o docs/ pathpy
74 | $(MAKE) -C docs clean
75 | $(MAKE) -C docs html
76 | $(BROWSER) docs/_build/html/index.html
77 |
78 | servedocs: docs ## compile the docs watching for changes
79 | watchmedo shell-command -p '*.rst' -c '$(MAKE) -C docs html' -R -D .
80 |
81 | release: dist ## package and upload a release
82 | twine check dist/*
83 | twine upload -p $(PYPI_PASSWORD) -u $(PYPI_USER) dist/*
84 |
85 | dist: clean ## builds source and wheel package
86 | python setup.py sdist
87 | python setup.py bdist_wheel
88 | ls -l dist
89 |
90 | install: clean ## install the package to the active Python's site-packages
91 | python setup.py install
92 |
--------------------------------------------------------------------------------
/README.rst:
--------------------------------------------------------------------------------
1 | Introduction
2 | ============
3 |
4 | pathpy is an OpenSource python package for the analysis of time
5 | series data on networks using higher- and multi-order network models.
6 |
7 | pathpy is specifically tailored to analyse temporal networks as
8 | well as time series and sequence data that capture multiple short,
9 | independent paths observed in an underlying graph or network.
10 | Examples for data that can be analysed with pathpy include time-stamped
11 | social networks, user click streams in information networks, biological
12 | pathways, citation networks, or information cascades in social networks.
13 |
14 | Unifying the modelling and analysis of path statistics and temporal networks,
15 | pathpy provides efficient methods to extract causal or time-respecting paths from
16 | time-stamped network data. The current package distributed via the PyPI name
17 | pathpy2 supersedes the packages pyTempnets as well as version 1.0 of pathpy.
18 |
19 | pathpy facilitates the analysis of temporal correlations in time
20 | series data on networks. It uses model selection and statistical
21 | learning to generate optimal higher- and multi-order models that capture both
22 | topological and temporal characteristics. It can help to answer the important
23 | question when a network abstraction of complex systems is
24 | justified and when higher-order representations are needed instead.
25 |
26 | The theoretical foundation of this package, higher- and multi-order network
27 | models, was developed in the following published works:
28 |
29 | 1. I Scholtes: When is a network a network? Multi-Order Graphical Model
30 | Selection in Pathways and Temporal Networks, In KDD'17 - Proceedings
31 | of the 23rd ACM SIGKDD International Conference on Knowledge Discovery and
32 | Data Mining, Halifax, Nova Scotia, Canada, August 13-17, 2017
33 | http://dl.acm.org/citation.cfm?id=3098145
34 | 2. I Scholtes, N Wider, A Garas: Higher-Order Aggregate Networks in the
35 | Analysis of Temporal Networks: Path structures and centralities
36 | In The European Physical Journal B, 89:61, March 2016
37 | http://dx.doi.org/10.1140/epjb/e2016-60663-0
38 | 3. I Scholtes, N Wider, R Pfitzner, A Garas, CJ Tessone, F Schweitzer:
39 | Causality-driven slow-down and speed-up of diffusion in
40 | non-Markovian temporal networks, In Nature Communications, 5, September 2014
41 | http://www.nature.com/ncomms/2014/140924/ncomms6024/full/ncomms6024.html
42 | 4. R Pfitzner, I Scholtes, A Garas, CJ Tessone, F Schweitzer:
43 | Betweenness preference: Quantifying correlations in the topological
44 | dynamics of temporal networks, Phys Rev Lett, 110(19), 198701, May 2013
45 | http://journals.aps.org/prl/abstract/10.1103/PhysRevLett.110.198701
46 |
47 | pathpy extends higher-order modelling approaches towards multi-order models
48 | for paths that capture temporal correlations at multiple length scales
49 | simultaneously. All mathematical details of the framework can be found in the
50 | openly available preprint at https://arxiv.org/abs/1702.05499.
51 |
52 | A broader view on higher-order models in the analyis of complex systems can be
53 | found at https://arxiv.org/abs/1806.05977.
54 |
55 | pathpy is fully integrated with jupyter, providing rich and interactive in-line
56 | visualisations of networks, temporal networks, higher-, and multi-order models.
57 | Visualisations can be exported to HTML5 files that can be shared and published
58 | onthe Web.
59 |
60 |
61 | Download and installation
62 | =========================
63 |
64 | pathpy is pure python code. It has no platform-specific dependencies
65 | and should thus work on all platforms. pathpy requires python 3.x.
66 | It builds on numpy and scipy. The latest release version 2.0 of pathpy
67 | can be installed by typing:
68 |
69 | pip install pathpy2
70 |
71 | Please make sure that you use the pyPI name pathpy2 as the package name pathpy is currently blocked.
72 |
73 | Tutorial
74 | ========
75 |
76 | A comprehensive 3 hour hands-on tutorial that shows how you can use pathpy
77 | to analyse data on pathways and temporal networks is available online at:
78 |
79 | https://ingoscholtes.github.io/kdd2018-tutorial/
80 |
81 | An explanatory video that introduces the science behind pathpy is available here:
82 |
83 | https://youtu.be/CxJkVrD2ZlM
84 |
85 | A promotional video showcasing some of pathpy's features is available here:
86 |
87 | https://youtu.be/QIPqFaR2Z5c
88 |
89 |
90 | Documentation
91 | =============
92 |
93 | The code is fully documented via docstrings which are accessible through
94 | python's built-in help system. Just type help(SYMBOL_NAME) to see
95 | the documentation of a class or method. A reference manual is available
96 | here https://ingoscholtes.github.io/pathpy/hierarchy.html
97 |
98 |
99 | Releases and Versioning
100 | =======================
101 |
102 | The first public beta release of pathpy (released February 17 2017) is
103 | v1.0-beta. Following versions are named MAJOR.MINOR.PATCH according to semantic
104 | versioning. The current version is 2.0.0.
105 |
106 | Known Issues
107 | ============
108 |
109 | - Depending on whether or not scipy has been compiled
110 | with or without the numerics package MKL, considerable
111 | numerical differences can occur, e.g. for eigenvalue
112 | centralities, PageRank, and other measures that depend
113 | on the eigenvectors and eigenvalues of matrices.
114 | Please refer to scipy.show_config() to show compilation flags.
115 | - Interactive visualisations in jupyter are currently only
116 | supported for juypter notebooks, stand-alone HTML files,
117 | and the jupyter display integrated in IDEs like Visual
118 | Studio Code (which we highly recommend to work with pathpy).
119 | Due to its new widget mechanism, interactive d3js
120 | visualisations are currently not available for jupyterLab.
121 | Due to the complex document object model generated by
122 | jupyter notebooks, visualisation performance is best in
123 | stand-alone HTML files and in Visual Studio Code.
124 | - The visualisation of temporal networks currently does
125 | not support the drawing of edge arrows for directed
126 | edges. However, a powerful templating mechanism is
127 | available to support custom interactive and dynamic
128 | visualizations of temporal networks.
129 | - The visualisation of paths in terms of alluvial diagrams
130 | within jupyter is currently unstable for networks with
131 | large delay. This is due to the asynchronous loading of
132 | external scripts.
133 |
134 |
135 | Acknowledgements
136 | ================
137 |
138 | The research behind this data analysis framework was generously funded by the Swiss
139 | State Secretariate for Education, Research and Innovation via Grant C14.0036.
140 | The development of the predecessor package pyTempNets was further supported by the MTEC
141 | Foundation in the context of the project "The Influence of Interaction Patterns on
142 | Success in Socio-Technical Systems: From Theory to Practice."
143 |
144 | The further development of pathpy is currently supported by the
145 | Swiss National Science Foundation via Grant 176938. See details at:
146 |
147 | http://p3.snf.ch/Project-176938
148 |
149 |
150 | Contributors
151 | ============
152 |
153 | Ingo Scholtes (project lead, development)
154 | Luca Verginer (development, test suite integration)
155 |
156 |
157 | Past Contributors
158 | =================
159 | Roman Cattaneo (development)
160 | Nicolas Wider (testing)
161 |
162 |
163 | Copyright
164 | =========
165 |
166 | pathpy is licensed under the GNU Affero General Public
167 | License. See https://choosealicense.com/licenses/agpl-3.0/
168 |
169 | (c) ETH Zürich & University of Zurich, 2015 - 2018
170 |
--------------------------------------------------------------------------------
/docs/.gitignore:
--------------------------------------------------------------------------------
1 | /pathpy.rst
2 | /pathpy.*.rst
3 | /modules.rst
4 |
--------------------------------------------------------------------------------
/docs/Makefile:
--------------------------------------------------------------------------------
1 | # Minimal makefile for Sphinx documentation
2 | #
3 |
4 | # You can set these variables from the command line.
5 | SPHINXOPTS =
6 | SPHINXBUILD = python -msphinx
7 | SPHINXPROJ = pathpy
8 | SOURCEDIR = .
9 | BUILDDIR = _build
10 |
11 | # Put it first so that "make" without argument is like "make help".
12 | help:
13 | @$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)
14 |
15 | .PHONY: help Makefile
16 |
17 | # Catch-all target: route all unknown targets to Sphinx using the new
18 | # "make mode" option. $(O) is meant as a shortcut for $(SPHINXOPTS).
19 | %: Makefile
20 | @$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)
21 |
--------------------------------------------------------------------------------
/docs/conf.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python
2 | # -*- coding: utf-8 -*-
3 | #
4 | # pathpy documentation build configuration file, created by
5 | # sphinx-quickstart on Fri Jun 9 13:47:02 2017.
6 | #
7 | # This file is execfile()d with the current directory set to its
8 | # containing dir.
9 | #
10 | # Note that not all possible configuration values are present in this
11 | # autogenerated file.
12 | #
13 | # All configuration values have a default; values that are commented out
14 | # serve to show the default.
15 |
16 | # If extensions (or modules to document with autodoc) are in another
17 | # directory, add these directories to sys.path here. If the directory is
18 | # relative to the documentation root, use os.path.abspath to make it
19 | # absolute, like shown here.
20 | #
21 | import os
22 | import sys
23 | sys.path.insert(0, os.path.abspath('..'))
24 |
25 | import pathpy
26 |
27 | # -- General configuration ---------------------------------------------
28 |
29 | # If your documentation needs a minimal Sphinx version, state it here.
30 | #
31 | # needs_sphinx = '1.0'
32 |
33 | # Add any Sphinx extension module names here, as strings. They can be
34 | # extensions coming with Sphinx (named 'sphinx.ext.*') or your custom ones.
35 | extensions = ['sphinx.ext.autodoc', 'sphinx.ext.viewcode', 'sphinx.ext.napoleon']
36 | # Add any paths that contain templates here, relative to this directory.
37 | templates_path = ['_templates']
38 |
39 | # The suffix(es) of source filenames.
40 | # You can specify multiple suffix as a list of string:
41 | #
42 | # source_suffix = ['.rst', '.md']
43 | source_suffix = '.rst'
44 |
45 | # The master toctree document.
46 | master_doc = 'index'
47 |
48 | # General information about the project.
49 | project = u'pathpy'
50 | copyright = u"2018, Ingo Scholtes"
51 | author = u"Ingo Scholtes"
52 |
53 | # The version info for the project you're documenting, acts as replacement
54 | # for |version| and |release|, also used in various other places throughout
55 | # the built documents.
56 | #
57 | # The short X.Y version.
58 | version = pathpy.__version__
59 | # The full version, including alpha/beta/rc tags.
60 | release = pathpy.__version__
61 |
62 | # The language for content autogenerated by Sphinx. Refer to documentation
63 | # for a list of supported languages.
64 | #
65 | # This is also used if you do content translation via gettext catalogs.
66 | # Usually you set "language" from the command line for these cases.
67 | language = None
68 |
69 | # List of patterns, relative to source directory, that match files and
70 | # directories to ignore when looking for source files.
71 | # This patterns also effect to html_static_path and html_extra_path
72 | exclude_patterns = ['_build', 'Thumbs.db', '.DS_Store']
73 |
74 | # The name of the Pygments (syntax highlighting) style to use.
75 | pygments_style = 'sphinx'
76 |
77 | # If true, `todo` and `todoList` produce output, else they produce nothing.
78 | todo_include_todos = False
79 |
80 |
81 | # -- Options for HTML output -------------------------------------------
82 |
83 | # The theme to use for HTML and HTML Help pages. See the documentation for
84 | # a list of builtin themes.
85 | #
86 | html_theme = "sphinx_rtd_theme"
87 |
88 | # Theme options are theme-specific and customize the look and feel of a
89 | # theme further. For a list of options available for each theme, see the
90 | # documentation.
91 | #
92 | # html_theme_options = {}
93 |
94 | # Add any paths that contain custom static files (such as style sheets) here,
95 | # relative to this directory. They are copied after the builtin static files,
96 | # so a file named "default.css" will overwrite the builtin "default.css".
97 | html_static_path = ['_static']
98 |
99 |
100 | # -- Options for HTMLHelp output ---------------------------------------
101 |
102 | # Output file base name for HTML help builder.
103 | htmlhelp_basename = 'pathpydoc'
104 |
105 |
106 | # -- Options for LaTeX output ------------------------------------------
107 |
108 | latex_elements = {
109 | # The paper size ('letterpaper' or 'a4paper').
110 | #
111 | # 'papersize': 'letterpaper',
112 |
113 | # The font size ('10pt', '11pt' or '12pt').
114 | #
115 | # 'pointsize': '10pt',
116 |
117 | # Additional stuff for the LaTeX preamble.
118 | #
119 | # 'preamble': '',
120 |
121 | # Latex figure (float) alignment
122 | #
123 | # 'figure_align': 'htbp',
124 | }
125 |
126 | # Grouping the document tree into LaTeX files. List of tuples
127 | # (source start file, target name, title, author, documentclass
128 | # [howto, manual, or own class]).
129 | latex_documents = [
130 | (master_doc, 'pathpy.tex',
131 | u'pathpy Documentation',
132 | u'Ingo Scholtes', 'manual'),
133 | ]
134 |
135 |
136 | # -- Options for manual page output ------------------------------------
137 |
138 | # One entry per manual page. List of tuples
139 | # (source start file, name, description, authors, manual section).
140 | man_pages = [
141 | (master_doc, 'pathpy',
142 | u'pathpy Documentation',
143 | [author], 1)
144 | ]
145 |
146 |
147 | # -- Options for Texinfo output ----------------------------------------
148 |
149 | # Grouping the document tree into Texinfo files. List of tuples
150 | # (source start file, target name, title, author,
151 | # dir menu entry, description, category)
152 | texinfo_documents = [
153 | (master_doc, 'pathpy',
154 | u'pathpy Documentation',
155 | author,
156 | 'pathpy',
157 | 'One line description of project.',
158 | 'Miscellaneous'),
159 | ]
160 |
161 |
162 |
163 |
--------------------------------------------------------------------------------
/docs/contact.html:
--------------------------------------------------------------------------------
1 |
2 |
3 |
pathpy is brought to you by the Data Analytics Group at University of Wuppertal and University of Zurich. The following people and organizations have contributed to the development:
26 |
27 |
Current Contributors
28 | Prof. Dr. Ingo Scholtes (project lead, development)
29 | Prof. Dr. Jürgen Hackl (chief architect, development)
30 | Dr. Luca Verginer (development, test suite integration)
31 | Christoph Gote (testing)
32 | Vincenzo Perri (testing)
33 | Luka Petrovic (testing)
34 | Max Kortenbruck (documentation, testing)
35 |
36 |
Past Contributors
37 | Roman Cattaneo (development)
38 | Dr. Nicolas Wider (testing)
39 |
40 |
pathpy is fully documented via docstrings. This means that python-friendly editors like pycharm, Visual Studio Code, or web-based jupyter notebooks will automatically display the documentation of classes, methods, and symbols as you type or hover over a symbol or method.
26 |
In a python interpreter, you can print the in-line documentation of a symbol by calling the help function:
27 |
28 |
help(pathpy.TemporalNetwork)
29 |
30 |
31 |
Help on class TemporalNetwork in module pathpy.classes.temporal_network:
32 |
33 | class TemporalNetwork(builtins.object)
34 | | This class represents a sequence of time-stamped edges.
35 | | Instances of this class can be used to generate path statistics
36 | | based on the time-respecting paths resulting from a given maximum
37 | | time difference between consecutive time-stamped edges.
38 | |
39 | | Methods defined here:
40 | |
41 | | __init__(self, tedges=None)
42 | | Constructor that generates a temporal network instance.
43 | |
44 | [...]
45 |
46 |
To simplify the browsing of the documentation, we provide a continuously updated, auto-generated HTML-based reference manual.
47 |
If you experience any issues, please consider submitting an issue to our issue tracker.
Our team is comitted to assist users in leveraging the power of higher-order network analytics. A short tutorial that walks you through the setup of pathpy, and which introduces some fundamental concepts is available here. In addition, we regularly offer talks, lectures, demonstrations, and tutorials that introduce higher-order network analytics with pathpy. Below we collect information on those events, which often include publicly available educational material and video recordings.
26 |
Are you interest in hosting a lecture or tutorial on pathpy in your institution? Or are you using pathpy to teach (higher-order) network analytics? We would be excited to hear from you!
27 |
28 |
pathpy Summer School Lecture
29 |
On September 8, we gave a two-hour lecture on higher-order models of temporal networks at the 5th International Summer School on Data Science (SSDS 2020), held virtually in Split, Croatia. The lecture was followed by a hands-on tutorial on pathpy taught by Vincenzo Perri. The material is available here
30 |
31 |
Tutorial et EuroCSS
32 |
Between September 2 - 4 2019 we will give a half-day hands-on tutorial on dynamic social network analysis with pathpy at the European Symposium on Computational Social Science in Zurich, Switzerland. Please stay stuned, as we prepare the tutorial material.
33 |
34 |
pathpy User Meeting
35 |
At the first pathy user meeting, held at ETH Zürich on June 17th 2019, we gave an overview of some of pathpy's latest feature additions. The code that I presented can be found here.
36 |
37 |
Lecture Series at ETH Zurich
38 |
In the spring semester 2019, Frank Schweitzer teaches a lecture on Complex Networks. Through assignment sheets and exercises, students are introduced to the modelling of complex networks with pathpy.
39 |
40 |
Lecture Series at University of Zurich
41 |
In the fall semester 2018, Ingo Scholtes offered a series of 12 lectures on Statistical Network Analysis. In the accompanying exercises, students will get an introduction to graph analytics with pathpy.
42 |
43 |
Hands-on Tutorial at Complexity Science Hub
44 |
In September 2018, Ingo Scholtes gave a half-day tutorial on higher-order network analytics with pathpy at the Complexity Science Hub in Vienna, Austria. All material of this tutorial - including slides, data, assignments, and solutions to live coding sessions - is available online.
45 |
46 |
Hands-on Tutorial at KDD'18
47 |
In August 2018, we will give a full-day hands-on tutorial on Mining Time Series Data with Higher-order Network Models at the flagship data science conference KDD'19 in London, UK. The first half of the tutorial introduces higher-order network analysis with pathpy. The second half of the tutorial covers higher-order clustering with the flow compression algorithm InfoMap. All material - including slides, data, assignments, and solutions to live coding sessions - is available online.
48 |
49 |
Lecture at ETH Zürich Symposium
50 |
In April 2018, Ingo Scholtes gave a lecture on optimal higher-order network models at the symposium on Networks, Time and Causality at ETH Zürich. A video recording of the symposium is available below.
51 |
52 |
53 |
Lecture at Northeastern University, Boston
54 |
In November 2017, Ingo Scholtes gave a lecture on multi-order network analysis at the Network Science Institute of Northeastern University in Boston, MA, USA. A video recording of the lecture is available below.
pathpy is an Open Source python package providing higher-order network analytics for time series data.
26 |
pathpy is tailored to analyse time-stamped network data as well as sequential data that capture multiple short paths observed in a graph or network. Examples for data that can be analysed with pathpy include high-resolution time-stamped network data, dynamic social networks, user click streams on the Web, biological pathway data, citation graphs, passenger trajectories in transportation networks, or information propagation in social networks.
27 |
Unifying the analysis of time series data on networks, pathpy provides efficient methods to extract causal or time-respecting paths in time-stamped social networks. It facilitates the analysis of higher-order dependencies and uses principled model selection techniques to infer models that capture both topological and temporal characteristics. It allows to answer the question when network models of time series data are justified and when higher-order models are needed.
28 |
pathpy is fully integrated with jupyter, providing rich interactive visualisations of networks, temporal networks, higher-, and multi-order models. Visualisations can be exported to HTML5 files that can be shared and published on the Web. You can find examples in our gallery.
29 |
30 |
31 |
32 |
33 |
34 |
35 |
36 |
37 |
What is the science behind pathpy?
38 |
The theoretical foundation of this package, higher- and multi-order network models, was developed in the following peer-reviewed research articles:
An explanatory video with a high-level introduction of the the science behind pathpy is available here. A broader view on the importance of higher-order network models in network analysis can be found in this recent article.
47 |
48 |
49 |
50 |
51 |
52 |
53 |
54 |
55 |
How can I learn how to use pathpy?
56 |
57 |
A step-by-step introduction that shows how to install pathpy and how to perform basic network analysis and visualisation tasks can be found here.
58 |
59 |
We further provide an extensive collection of educational resources, including lectures, tutorials, exercises, and data. If you are interested to host a such an educational event within your institution, please contact us.
The afternoon session of this year's edition of the NetSci Satellite on Higher-Order Models in Network Science will feature a demo session, showcasing the latest features of pathpy3. The session will be given by Jürgen Hackl. 2020/09/17
26 |
27 |
pathpy Summer School Lecture
28 |
Today, Ingo Scholtes will teach a lecture on higher-order models of temporal networks at the 5th International Summer School on Data Science (SSDS 2020), held virtually in Split, Croatia. The lecture will be followed up by a hands-on tutorial on pathpy taught by Vincenzo Perri. 2020/09/08
29 |
30 |
Statistical Network Analysis with pathpy
31 |
A new MSc lecture series on Statistical Network Analysis held at the University of Wuppertal in the summer semester 2020 features a series of practice lectures and exercises building on the latest version of pathpy3. The course gives a deep dive into the statistical modelling and analysis of complex networks across discplines. 2020/04/01
32 |
33 |
pathpy Tutorial at NetSci-X
34 |
Jürgen Hackl teaches a half-day tutorial on pathpy at NetSci-X, held in Tokyo, Japan. He will also give a sneak peak of the latest version pathpy3, which is currently in development. 2020/01/20
35 |
36 |
First pathpy user meeting
37 |
The first pathpy user meeting will take place today at 14:00 at ETH Zürich, WEV building in Weinbergstrasse 56/58. 2019/06/17
On Wednesday, we will give a talk and tutorial on higher-order data analytics at the Complexity Science Hub in Vienna. A general introduction to higher-order network analytics will be followed by a 4 hour live tutorial, in which we demonstrate these techniques with pathpy. 2018/09/03
47 |
48 |
Release of pathpy2
49 |
Today we released a greatly improved version 2 of pathpy, an OpenSource software package bringing higher-order network analytics to python. pathpy is now available via the python package index. A comprehensive educational tutorial on pathpy is available online. 2018/08/23
50 |
51 |
Hands-on tutorial at KDD'18
52 |
On August 22 we will give a hands-on tutorial on Higher-order Data Analytics for Temporal Network Data at KDD'18 in London, UK. In this tutorial we introduce higher-order graph analytics with the software package pathpy. 2018/08/22
53 |
54 |
Demo at NetSci'18
55 |
Today we will give a brief demo of the latest version of pathpy in the NetSci satellite on Higher-Order Network Models (HONS). 2018/06/11
While you are free to use the python editor of your choice, we recommend new users to have a look at Visual Studio Code, a platform-independent Open Source development environment available for Windows, MacOS, and Linux. It supports a wide array of programming languages, including python.
27 |
Just download the installation archive and execute it. Once the installation has completed, you can run Visual Studio Code either by clicking the icon or typing
28 |
> code
29 | in the terminal.
30 |
A key feature of Visual Studio Code's approach to python is built-in support for jupyter notebooks, which will allow you to edit python code in a convenient editor, while seeing interactive output of your code in a split window. Since pathpy comes with a jupyter integration, you can display interactive visualisations of networks, temporal networks, and higher-order networks directly in Visual Studio Code. The following video gives an idea how this integration looks like:
31 |
32 |
33 |
To conveniently work with python and jupyter in Visual Studio Code, you need to install the python extension, which adds python code editing, debugging, and linting functionality as well as jupyter support. You can install it free of charge using Visual Studio Code's extension manager. Open Visual Studio Code and click the Extensions icon in the bottom of the left menu bar or press Ctrl+Shift+X. This will bring up the Extensions window. Type python in the search box and click the first result. In the window on the right, click Install. Once this is complete, click Reload to complete the installation.
34 |
35 | Now that Visual Studio Code is set up, you can verify your development environment. Create a new file in Visual Studio Code and add the following code:
36 |
47 | When you type this code, so-called code lensesRun Cell|Run Below will appear above the two #%% tags. These tags, which will simply be ignored if you run the file with the standard python interpreter, mark the start of a cell in a jupyter notebook that we can execute directly from Visual Studio Code. If you click Run Cell, a new jupyter kernel will be started automatically, and the code in the current cell, i.e. the lines of code between the current #%% tag and the next tag, will be executed. An output window automatically opens, which displays the output of your code.
48 |
Hint: Some users occasionally experience an issue where the jupyter kernel fails to launch. One reason could be a missing installation of the python package jupytext. If you experience this problem, try to install the package by running:
49 |
50 |
> pip install jupytext
51 |
52 |
53 |
The combination of Visual Studio Code, jupyter and pathpy provides you with a convenient environment to complete data science tasks for (time series) data on complex networks. To learn more about how you can use pathpy to read, analyze, and visualize such data, just move to the next units.
26 | pathpy is pure python code. It has no platform-specific dependencies and should thus run without problems on any operating system. pathpy requires python 3.x. Since the end of life of python 2.x is scheduled for January 1st 2020, we will not offer a backport of pathpy for python 2. If you have dependencies on python 2.x, please consider using 2to3.
27 |
pathpy builds on the data science packages numpy and scipy, which will be set up automaticaly if you install pathpy using the package manager pip. Even though any python 3.x installation is fine, we recommend using the latest Anaconda distribution, an OpenSource python distribution that is pre-configured for data science and machine learning tasks.
28 |
How can I install pathpy?
29 |
The latest release version of pathpy can be installed via the python package index pypi. Just open a terminal and type:
30 |
31 |
> pip install pathpy2
32 |
33 |
If you have both a python 2 and a python 3 setup, you may have to use the command pip3 to specifically refer to your python 3 installation. Also, make sure to use the pyPI name pathpy2, since the pyPI package name pathpy has been namesquatted. We are currently working on a solution to this issue.
34 |
If you want to install the latest development version, you can directly download and install it from our github repository. In this version, some issues of the last release version may already be fixed, but please be aware that this code may be in flow. You can use pip to install the development version as follows:
You can check your installation to make sure that everything is working fine. In the terminal, run the python interpreter and import the package as follows:
39 |
40 |
41 | > python
42 | Python 3.6.6 |Anaconda, Inc.| (default) [...]
43 | Type "help", "copyright", "credits" or "license" for more information.
44 | >>> import pathpy as pp
45 | >>> help(pp)
46 | Help on package pathpy:
47 |
48 | NAME
49 | pathpy
50 |
51 | DESCRIPTION
52 | An OpenSource python package to analyze and
53 | visualize time series data on complex networks.
54 | [...]
55 |
56 |
57 |
If you see the help message above, pathpy has been set up successfully and you are ready to take your first steps.
58 |
We would like users to be aware of the following known issues in the latest version of pathpy:
28 |
29 |
Depending on whether or not the dependency package scipy has been compiled with the numerics package MKL or openblas, considerable numerical differences can occur, e.g. for eigenvalue centralities, PageRank, spectral clustering, and other measures that depend on the eigenvectors and eigenvalues of matrices. Please refer to scipy.show_config() to show compilation flags. We are currently investigating this issue.
30 |
Interactive visualisations in jupyter are currently only supported for juypter notebooks, stand-alone HTML files, and the jupyter display integrated in IDEs like Visual Studio Code. Due to its new widget mechanism, interactive d3js visualizations are currently not available for jupyterLab.
31 |
The visualisation module currently does not support the drawing of edge arrows for temporal networks with directed edges. However, a powerful templating mechanism is available to support custom interactive and dynamic visualisations both for static and temporal networks.
32 |
The visualisation of paths in terms of alluvial diagrams within jupyter is currently unstable. This is due to the asynchronous loading of external scripts and possible network latencies e.g. in wireless networks. We will replace this functionality in a future version.
33 |
34 |
35 |
We are working on a solution to these issues in the next release. An up-to-date list of open issues can be found at our issue tracker. For the installation of development versions in which your issue might already have been fixed please refer to the installation instructions above.
36 |
37 |
38 |
39 |
45 |
46 |
47 |
48 |
49 |
50 |
51 |
--------------------------------------------------------------------------------
/docs/usage.rst:
--------------------------------------------------------------------------------
1 | =====
2 | Usage
3 | =====
4 |
5 | To use pathpy in a project::
6 |
7 | import pathpy
8 |
--------------------------------------------------------------------------------
/multiorder.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/uzhdag/pathpy/1ddaf7d710aa3b5ed119de4f810c7c919eda3241/multiorder.png
--------------------------------------------------------------------------------
/pathpy.pyproj:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 | Debug
5 | 2.0
6 | {896991fc-0289-4bae-b680-88e19508c91f}
7 |
8 |
9 |
10 |
11 | .
12 | .
13 |
14 |
15 |
16 |
17 | 10.0
18 | $(MSBuildExtensionsPath32)\Microsoft\VisualStudio\v$(VisualStudioVersion)
19 | pathpy
20 |
21 |
22 |
23 |
24 | $(MSBuildExtensionsPath32)\Microsoft\VisualStudio\v$(VisualStudioVersion)\Python Tools\Microsoft.PythonTools.targets
25 |
26 |
27 |
28 | Code
29 |
30 |
31 | Code
32 |
33 |
34 | Code
35 |
36 |
37 | Code
38 |
39 |
40 | Code
41 |
42 |
43 | Code
44 |
45 |
46 |
47 |
48 |
49 |
50 |
51 |
52 |
53 |
54 |
55 |
56 |
57 |
58 |
59 |
60 |
61 |
62 |
63 |
64 |
65 |
66 |
67 |
68 |
69 |
70 |
71 |
72 |
73 |
74 |
75 |
76 |
--------------------------------------------------------------------------------
/pathpy/__init__.py:
--------------------------------------------------------------------------------
1 | """
2 | An OpenSource python package to analyze and
3 | visualize time series data on complex networks.
4 | """
5 |
6 | __author__ = """Ingo Scholtes"""
7 | __email__ = 'scholtes@ifi.uzh.ch'
8 | __version__ = '2.2.0'
9 |
10 | from .classes import *
11 | import pathpy.path_extraction
12 | import pathpy.visualisation
13 | import pathpy.algorithms.centralities
14 | import pathpy.algorithms.components
15 | import pathpy.algorithms.shortest_paths
16 | import pathpy.algorithms.centralities
17 | import pathpy.algorithms.random_walk
18 | import pathpy.algorithms.temporal_walk
19 | import pathpy.algorithms.spectral
20 | import pathpy.algorithms.path_measures
21 | import pathpy.algorithms.infomap
22 | import pathpy.algorithms.modularity
23 | import pathpy.algorithms.random_graphs
24 | from .algorithms import statistics
25 |
26 | import pathpy.utils
27 |
28 | global ENABLE_MULTICORE_SUPPORT
29 | ENABLE_MULTICORE_SUPPORT = False
30 |
--------------------------------------------------------------------------------
/pathpy/algorithms/__init__.py:
--------------------------------------------------------------------------------
1 | """
2 | Contains a collection of algorithms and measures for networks, higher-order models, and paths
3 | """
--------------------------------------------------------------------------------
/pathpy/algorithms/components.py:
--------------------------------------------------------------------------------
1 | """
2 | Algorithms to calculate connected components in networks.
3 | """
4 |
5 | # -*- coding: utf-8 -*-
6 | # pathpy is an OpenSource python package for the analysis of time series data
7 | # on networks using higher- and multi order graphical models.
8 | #
9 | # Copyright (C) 2016-2018 Ingo Scholtes, ETH Zürich/Universität Zürich
10 | #
11 | # This program is free software: you can redistribute it and/or modify
12 | # it under the terms of the GNU Affero General Public License as published
13 | # by the Free Software Foundation, either version 3 of the License, or
14 | # (at your option) any later version.
15 | #
16 | # This program is distributed in the hope that it will be useful,
17 | # but WITHOUT ANY WARRANTY; without even the implied warranty of
18 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
19 | # GNU Affero General Public License for more details.
20 | #
21 | # You should have received a copy of the GNU Affero General Public License
22 | # along with this program. If not, see .
23 | #
24 | # Contact the developer:
25 | #
26 | # E-mail: scholtes@ifi.uzh.ch
27 | # Web: http://www.ingoscholtes.net
28 | from collections import defaultdict
29 |
30 | import numpy as _np
31 | import scipy.sparse.linalg as _sla
32 |
33 | from pathpy import Network
34 | from pathpy.utils import Log, Severity
35 | from pathpy.utils import PathpyError
36 |
37 | __all__ = ['connected_components']
38 |
39 |
40 | def connected_components(network, lanczos_vecs=None, maxiter=None):
41 | """
42 | Calculates connected components based on the spectrum of the Laplacian matrix
43 | """
44 | L = network.laplacian_matrix(weighted=True)
45 | n = network.ncount()-2
46 | if lanczos_vecs is None:
47 | lanczos_vecs = min(n, max(2*n + 1, 20))
48 | if maxiter is None:
49 | maxiter = n*10
50 | vals, vecs = _sla.eigs(L, k=n, which="SM", return_eigenvectors=True)
51 |
52 | components = defaultdict(set)
53 | c = 0
54 |
55 | # use eigenvectors of zero eigenvalues to map nodes to components
56 | for i in range(n):
57 | if _np.isclose(vals[i], 0, atol=1.e-12):
58 | min_v = _np.min(vecs[:,i])
59 | for i in _np.where(_np.isclose(vecs[:,i], min_v))[0]:
60 | components[c].add(i)
61 | c += 1
62 | return components
63 |
64 |
65 |
66 | def reduce_to_gcc(network):
67 | """
68 | Reduces the network to the largest connected component.
69 | Connected components are calculated using Tarjan's algorithm.
70 | """
71 |
72 | # these are used as nonlocal variables (!)
73 | index = 0
74 | S = []
75 | indices = defaultdict(lambda: None)
76 | low_link = defaultdict(lambda: None)
77 | on_stack = defaultdict(lambda: False)
78 | components = {}
79 |
80 | # Tarjan's algorithm
81 | def strong_connect(v):
82 | nonlocal index
83 | nonlocal S
84 | nonlocal indices
85 | nonlocal low_link
86 | nonlocal on_stack
87 | nonlocal components
88 |
89 | indices[v] = index
90 | low_link[v] = index
91 | index += 1
92 | S.append(v)
93 | on_stack[v] = True
94 |
95 | for w in network.successors[v]:
96 | if indices[w] is None:
97 | strong_connect(w)
98 | low_link[v] = min(low_link[v], low_link[w])
99 | elif on_stack[w]:
100 | low_link[v] = min(low_link[v], indices[w])
101 |
102 | # create component of node v
103 | if low_link[v] == indices[v]:
104 | components[v] = set()
105 | while True:
106 | w = S.pop()
107 | on_stack[w] = False
108 | components[v].add(w)
109 | if v == w:
110 | break
111 |
112 | # compute strongly connected components
113 | for v in network.nodes:
114 | if indices[v] is None:
115 | strong_connect(v)
116 | # print('node {v}, size = {n}, component = {component}'.format(v=v, component=components[v], n = len(components[v]) ))
117 |
118 | max_size = 0
119 | for v in components:
120 | if len(components[v]) > max_size:
121 | scc = components[v]
122 | max_size = len(components[v])
123 |
124 | # Reduce higher-order network to SCC
125 | for v in list(network.nodes):
126 | if v not in scc:
127 | network.remove_node(v)
128 |
--------------------------------------------------------------------------------
/pathpy/algorithms/modularity.py:
--------------------------------------------------------------------------------
1 | """
2 | Spectral and information-theoretic measures that can be calculated
3 | based on higher-order models of paths.
4 | """
5 |
6 | # -*- coding: utf-8 -*-
7 | # pathpy is an OpenSource python package for the analysis of time series data
8 | # on networks using higher- and multi order graphical models.
9 | #
10 | # Copyright (C) 2016-2018 Ingo Scholtes, ETH Zürich/Universität Zürich
11 | #
12 | # This program is free software: you can redistribute it and/or modify
13 | # it under the terms of the GNU Affero General Public License as published
14 | # by the Free Software Foundation, either version 3 of the License, or
15 | # (at your option) any later version.
16 | #
17 | # This program is distributed in the hope that it will be useful,
18 | # but WITHOUT ANY WARRANTY; without even the implied warranty of
19 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
20 | # GNU Affero General Public License for more details.
21 | #
22 | # You should have received a copy of the GNU Affero General Public License
23 | # along with this program. If not, see .
24 | #
25 | # Contact the developer:
26 | #
27 | # E-mail: scholtes@ifi.uzh.ch
28 | # Web: http://www.ingoscholtes.net
29 | from collections import defaultdict
30 |
31 | import numpy as np
32 | import scipy.sparse.linalg as sla
33 |
34 | from pathpy import HigherOrderNetwork
35 | from pathpy.utils import Log, Severity
36 | from pathpy import Paths
37 | from pathpy.utils import PathpyError
38 |
39 |
40 | __all__ = ['q', 'q_max', 'assortativity_coeff', 'find_communities']
41 |
42 | def q(network, C=None, delta=None):
43 | assert C is None or delta is None, 'Error: Cannot use clustering and delta-function simultaneously'
44 |
45 | m = network.ecount()
46 | A = network.adjacency_matrix(weighted=False)
47 | idx = network.node_to_name_map()
48 | q = 0.0
49 | for v in network.nodes:
50 | for w in network.nodes:
51 | if (C != None and C[v] == C[w]) or (delta != None and delta(v,w)):
52 | q += A[idx[v], idx[w]] - network.nodes[v]['degree']*network.nodes[w]['degree']/(2*m)
53 | q /= 2*m
54 | return q
55 |
56 |
57 | def q_max(network, C=None, delta=None):
58 | assert C is None or delta is None, 'Error: Cannot use clustering and delta-function simultaneously'
59 |
60 | m = network.ecount()
61 | idx = network.node_to_name_map()
62 | q = 0.0
63 | for v in network.nodes:
64 | for w in network.nodes:
65 | if (C != None and C[v] == C[w]) or (delta != None and delta(v,w)):
66 | q -= network.nodes[v]['degree']*network.nodes[w]['degree']/(2*m)
67 | q /= 2*m
68 | return q
69 |
70 | def assortativity_coeff(network, C=None):
71 | C, q_opt = find_communities(network)
72 | return q_opt/q_max(network, C)
73 |
74 |
75 | def q_merge(network, C, merge=None):
76 | m = network.ecount()
77 | n = network.ncount()
78 | A = network.adjacency_matrix(weighted=False)
79 | idx = network.node_to_name_map()
80 | q = 0.0
81 | for v in network.nodes:
82 | for w in network.nodes:
83 | if C[v] == C[w] or (merge is not None and C[v] in merge and C[w] in merge):
84 | q += A[idx[v], idx[w]] - network.nodes[v]['degree']*network.nodes[w]['degree']/(2*m)
85 | q /= 2*m
86 | return q
87 |
88 |
89 | def find_communities(network, iterations=100):
90 | # start with each node being in a separate cluster
91 | C = {}
92 | community_to_nodes = {}
93 | c = 0
94 | for n in network.nodes:
95 | C[n] = c
96 | community_to_nodes[c] = set([n])
97 | c += 1
98 | q_current = q(network, C)
99 | communities = list(C.values())
100 |
101 | for i in range(iterations):
102 | # randomly choose two communities
103 | x, y = np.random.choice(communities, size=2)
104 | # check Q of merged communities
105 | q_new = q_merge(network, C, merge=set([x, y]))
106 | if q_new > q_current:
107 | # actually merge the communities
108 | for n in community_to_nodes[x]:
109 | C[n] = y
110 | community_to_nodes[y] = community_to_nodes[y] | community_to_nodes[x]
111 | q_current = q_new
112 | communities.remove(x)
113 | del community_to_nodes[x]
114 | return C, q_current
--------------------------------------------------------------------------------
/pathpy/algorithms/random_walk.py:
--------------------------------------------------------------------------------
1 | """
2 | Algorithms to calculate shortest paths and distances in higher-order networks and paths.
3 | """
4 | # -*- coding: utf-8 -*-
5 |
6 | # pathpy is an OpenSource python package for the analysis of time series data
7 | # on networks using higher- and multi order graphical models.
8 | #
9 | # Copyright (C) 2016-2018 Ingo Scholtes, ETH Zürich/Universität Zürich
10 | #
11 | # This program is free software: you can redistribute it and/or modify
12 | # it under the terms of the GNU Affero General Public License as published
13 | # by the Free Software Foundation, either version 3 of the License, or
14 | # (at your option) any later version.
15 | #
16 | # This program is distributed in the hope that it will be useful,
17 | # but WITHOUT ANY WARRANTY; without even the implied warranty of
18 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
19 | # GNU Affero General Public License for more details.
20 | #
21 | # You should have received a copy of the GNU Affero General Public License
22 | # along with this program. If not, see .
23 | #
24 | # Contact the developer:
25 | #
26 | # E-mail: scholtes@ifi.uzh.ch
27 | # Web: http://www.ingoscholtes.net
28 | from functools import singledispatch
29 | from collections import defaultdict
30 |
31 | from pathpy.utils import Log, Severity
32 | from pathpy.utils import PathpyNotImplemented
33 | from pathpy.classes import TemporalNetwork
34 | from pathpy.classes import Network
35 | from pathpy.classes import HigherOrderNetwork
36 | import numpy as _np
37 |
38 | __all__ = ['generate_walk']
39 |
40 | @singledispatch
41 | def generate_walk(network, l=100, start_node=None):
42 | """
43 | Generate a random walk trajectory of a given length, based on
44 | a weighted/directed/undirected network, temporal network, or
45 | higher-order network.
46 |
47 | Parameters:
48 | -----------
49 | network: Network, TemporalNetwork, HigherOrderNetwork
50 | The temporal, first-order, or higher-order network, which
51 | will be used to randomly generate a walk through a network.
52 | l: int
53 | The (maximum) length of the walk to be generated. If a node
54 | with out-degree zero is encountered, the walk is terminated
55 | even if l has not been reached.
56 | start_node: str
57 | The (higher-order) node in which the random walk will be started.
58 | Default is None, in which case a random start node will be chosen.
59 | """
60 | T = network.transition_matrix().todense().transpose()
61 | idx_map = network.node_to_name_map()
62 | nodes = _np.array([v for v in network.nodes])
63 |
64 | itinerary = []
65 |
66 | if start_node is None:
67 | start_node = _np.random.choice(nodes)
68 |
69 | # choose random start node
70 | itinerary.append(start_node)
71 | for j in range(l):
72 | # get transition probability vector T[idx -> . ]
73 | prob = _np.array(T[idx_map[itinerary[-1]], :])[0, :]
74 | nz = prob.nonzero()[0]
75 | # make one random transition
76 | if nz.shape[0] > 0:
77 | next_node = _np.random.choice(a=nodes[nz], p=prob[nz])
78 | # add node to path
79 | itinerary.append(next_node)
80 | else: # no neighbor
81 | break
82 | return itinerary
83 |
84 |
85 | @generate_walk.register(HigherOrderNetwork)
86 | def _temporal_walk(higher_order_net, l=100, start_node=None):
87 |
88 | T = higher_order_net.transition_matrix().todense().transpose()
89 | idx_map = higher_order_net.node_to_name_map()
90 | nodes = _np.array([v for v in higher_order_net.nodes])
91 |
92 | itinerary = []
93 |
94 | if start_node is None:
95 | start_node = _np.random.choice(nodes)
96 | last = start_node
97 |
98 | # choose random start node
99 | for x in higher_order_net.higher_order_node_to_path(start_node):
100 | itinerary.append(x)
101 | for j in range(l):
102 | # get transition probability vector T[idx -> . ]
103 | prob = _np.array(T[idx_map[last], :])[0, :]
104 | nz = prob.nonzero()[0]
105 | # make one random transition
106 | if nz.shape[0] > 0:
107 | next_node = _np.random.choice(a=nodes[nz], p=prob[nz])
108 | # add node to path
109 | itinerary.append(higher_order_net.higher_order_node_to_path(next_node)[-1])
110 | last = next_node
111 | else: # no neighbor
112 | break
113 | return itinerary
114 |
115 |
116 | @generate_walk.register(TemporalNetwork)
117 | def _temporal_walk(tempnet, l=100, start_node=None):
118 |
119 | itinerary = []
120 | if start_node is None:
121 | current_node = _np.random.choice(tempnet.nodes)
122 | else:
123 | current_node = start_node
124 | itinerary.append(current_node)
125 | steps = 0
126 | for t in tempnet.ordered_times:
127 | prev_node = current_node
128 | # find possible targets in time t
129 | targets = set()
130 | for (v, w, time) in tempnet.time[t]:
131 | if v == current_node:
132 | targets.add(w)
133 | # move to random target
134 | if targets:
135 | current_node = _np.random.choice(list(targets))
136 | steps += 1
137 | # add currently visited node
138 | if current_node != prev_node:
139 | itinerary.append(current_node)
140 | if steps == l:
141 | break
142 | return itinerary
143 |
--------------------------------------------------------------------------------
/pathpy/algorithms/statistics.py:
--------------------------------------------------------------------------------
1 | """
2 | Collection of statistical measures for paths, (higher-order) networks, and temporal networks
3 | """
4 | # -*- coding: utf-8 -*-
5 |
6 | # pathpy is an OpenSource python package for the analysis of time series data
7 | # on networks using higher- and multi order graphical models.
8 | #
9 | # Copyright (C) 2016-2018 Ingo Scholtes, ETH Zürich/Universität Zürich
10 | #
11 | # This program is free software: you can redistribute it and/or modify
12 | # it under the terms of the GNU Affero General Public License as published
13 | # by the Free Software Foundation, either version 3 of the License, or
14 | # (at your option) any later version.
15 | #
16 | # This program is distributed in the hope that it will be useful,
17 | # but WITHOUT ANY WARRANTY; without even the implied warranty of
18 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
19 | # GNU Affero General Public License for more details.
20 | #
21 | # You should have received a copy of the GNU Affero General Public License
22 | # along with this program. If not, see .
23 | #
24 | # Contact the developer:
25 | #
26 | # E-mail: scholtes@ifi.uzh.ch
27 | # Web: http://www.ingoscholtes.net
28 | from collections import defaultdict
29 | from collections import Counter
30 |
31 | import numpy as _np
32 | import scipy as sp
33 |
34 | from pathpy .utils import Log, Severity
35 | from pathpy import Network
36 |
37 | from pathpy.utils import PathpyNotImplemented
38 |
39 |
40 | def local_clustering_coefficient(network, v):
41 | r"""Calculates the local clustering coefficient of a node in a directed or undirected network.
42 | The local clustering coefficient of any node with an (out-)degree smaller than two is defined
43 | as zero. For all other nodes, it is defined as:
44 |
45 | cc(c) := 2*k(i)/(d_i(d_i-1))
46 |
47 | or
48 |
49 | cc(c) := k(i)/(d_out_i(d_out_i-1))
50 |
51 | in undirected and directed networks respectively.
52 |
53 | Parameters
54 | ----------
55 | network: Network
56 | The network in which to calculate the local clustering coefficient.
57 | node: str
58 | The node for which the local clustering coefficient shall be calculated.
59 | """
60 | if network.directed and network.nodes[v]['outdegree'] < 2:
61 | return 0.0
62 | if not network.directed and network.nodes[v]['degree'] < 2:
63 | return 0.0
64 | k_i = 0.0
65 | for i in network.successors[v]:
66 | for j in network.successors[v]:
67 | if (i, j) in network.edges:
68 | k_i += 1.0
69 | if not network.directed:
70 | return k_i/(network.nodes[v]['degree']*(network.nodes[v]['degree']-1.0))
71 | return k_i/(network.nodes[v]['outdegree']*(network.nodes[v]['outdegree']-1.0))
72 |
73 |
74 | def avg_clustering_coefficient(network):
75 | r"""Calculates the average (global) clustering coefficient of a directed or undirected network.
76 |
77 | Parameters
78 | ----------
79 | network: Network
80 | The network in which to calculate the local clustering coefficient.
81 | """
82 | return _np.mean([ local_clustering_coefficient(network, v) for v in network.nodes])
83 |
84 |
85 | def mean_degree(network, degree='degree'):
86 | r"""Calculates the mean (in/out)-degree of a directed or undirected network.
87 |
88 | Parameters
89 | ----------
90 | network: Network
91 | The network in which to calculate the mean degree
92 | """
93 | assert degree is 'degree' or degree is 'indegree' or degree is 'outdegree', \
94 | 'Unknown degree property'
95 | return _np.mean([network.nodes[x][degree] for x in network.nodes])
96 |
97 |
98 | def degree_dist(network, degree='degree'):
99 | r"""Calculates the (in/out)-degree distribution of a directed or undirected network.
100 |
101 | Parameters
102 | ----------
103 | network: Network
104 | The network for which to calculate the degree distribution
105 | """
106 | assert degree is 'degree' or degree is 'indegree' or degree is 'outdegree',\
107 | 'Unknown degree property'
108 | p_k = Counter([network.nodes[x][degree] for x in network.nodes])
109 | for x in p_k:
110 | p_k[x] = p_k[x]/network.ncount()
111 | return p_k
112 |
113 |
114 | def degree_moment(network, k, degree='degree'):
115 | r"""Calculates the k-th moment of the (in/out)-degree distribution of a
116 | directed or undirected network.
117 |
118 | Parameters
119 | ----------
120 | network: Network
121 | The network in which to calculate the k-th moment of the degree distribution
122 | """
123 | p_k = degree_dist(network, degree)
124 | mom = 0
125 | for x in p_k:
126 | mom += x**k * p_k[x]
127 | return mom
128 |
129 |
130 | def generating_func(network, x, degree='degree'):
131 | r"""Returns f(x) where f is the probability generating function for the
132 | (in/out)-degree distribution P(k) for a network. The function is defined in the interval [0,1].
133 | The value returned is from the range [0,1]. The following properties hold:
134 |
135 | [1/k! d^k/dx f]_{x=0} = P(k) with d^k/dx f being the k-th derivative of f by x
136 | f'(1) = with f' being the first derivative and the mean degree
137 | [(x d/dx)^m f]_{x=1} = with being the m-th raw moment of P
138 |
139 | Parameters
140 | ----------
141 | x: float, list, numpy.ndarray
142 | The argument(s) for which the value f(x) shall be computed.
143 |
144 | Returns
145 | -------
146 | Either a single float value f(x) (if x is float) or a numpy.ndarray
147 | containing the function values f(x) for all arguments in x
148 |
149 | Example
150 | -------
151 | >>> import pathpy as pp
152 | >>> import numpy as np
153 | >>> import matplotlib.pyplot as plt
154 |
155 | >>> n = pp.Network()
156 | >>> n.add_edge('a', 'b')
157 | >>> n.add_edge('b', 'c')
158 | >>> n.add_edge('a', 'c')
159 | >>> n.add_edge('c', 'd')
160 | >>> n.add_edge('d', 'e')
161 | >>> n.add_edge('d', 'f')
162 | >>> n.add_edge('e', 'f')
163 |
164 | >>> # print single value f(x)
165 | >>> stat = pp.statistics.generating_func(n, 0.3)
166 | >>> print('{:0.3f}'.format(stat))
167 | 0.069
168 |
169 | >>> # plot generating function
170 | >>> x = np.linspace(0, 1, 20)
171 | >>> y = pp.statistics.generating_func(n, x)
172 | >>> x = plt.plot(x, y)
173 | """
174 |
175 | assert isinstance(x, (float, list, _np.ndarray)), \
176 | 'Argument can only be float, list or numpy.ndarray'
177 |
178 | p_k = degree_dist(network, degree)
179 |
180 | if isinstance(x, float):
181 | x_range = [x]
182 | else:
183 | x_range = x
184 |
185 | values = defaultdict(lambda: 0)
186 | for k in p_k:
187 | for v in x_range:
188 | values[v] += p_k[k] * v**k
189 |
190 | if len(x_range) > 1:
191 | return _np.array(list(values.values()))
192 | else:
193 | return values[x]
194 |
195 |
196 | def molloy_reed_fraction(network, degree='degree'):
197 | r"""Calculates the Molloy-Reed fraction / based on the (in/out)-degree
198 | distribution of a directed or undirected network.
199 |
200 | Parameters
201 | ----------
202 | network: Network
203 | The network in which to calculate the Molloy-Reed fraction
204 | """
205 | return degree_moment(network, k=2, degree=degree)/degree_moment(network, k=1, degree=degree)
206 |
--------------------------------------------------------------------------------
/pathpy/algorithms/temporal_walk.py:
--------------------------------------------------------------------------------
1 | """
2 | Algorithms to calculate shortest paths and distances in higher-order networks and paths.
3 | """
4 | # -*- coding: utf-8 -*-
5 |
6 | # pathpy is an OpenSource python package for the analysis of time series data
7 | # on networks using higher- and multi order graphical models.
8 | #
9 | # Copyright (C) 2016-2018 Ingo Scholtes, ETH Zürich/Universität Zürich
10 | #
11 | # This program is free software: you can redistribute it and/or modify
12 | # it under the terms of the GNU Affero General Public License as published
13 | # by the Free Software Foundation, either version 3 of the License, or
14 | # (at your option) any later version.
15 | #
16 | # This program is distributed in the hope that it will be useful,
17 | # but WITHOUT ANY WARRANTY; without even the implied warranty of
18 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
19 | # GNU Affero General Public License for more details.
20 | #
21 | # You should have received a copy of the GNU Affero General Public License
22 | # along with this program. If not, see .
23 | #
24 | # Contact the developer:
25 | #
26 | # E-mail: scholtes@ifi.uzh.ch
27 | # Web: http://www.ingoscholtes.net
28 |
29 | from pathpy.utils import Log, Severity
30 | from pathpy.classes import TemporalNetwork
31 | from pathpy.algorithms import random_walk
32 |
33 | def generate_walk(tempnet, l=100, start_node=None):
34 | """
35 | DEPRECATED
36 | """
37 | Log.add('The temporal_walk.generate_walk function is deprecated. \
38 | Please use random_walk.generate_walk instead.', Severity.WARNING)
39 | return random_walk.generate_walk(tempnet, l, start_node)
40 |
--------------------------------------------------------------------------------
/pathpy/classes/__init__.py:
--------------------------------------------------------------------------------
1 | """
2 | An internal module that groups basic classes of pathpy. Please use top-level namespace instead.
3 | """
4 |
5 | from .paths import Paths
6 | from .dag import DAG
7 | from .network import Network
8 | from .higher_order_network import HigherOrderNetwork
9 | from .multi_order_model import MultiOrderModel
10 | from .temporal_network import TemporalNetwork
11 | from .markov_sequence import MarkovSequence
12 | from .rolling_time_window import RollingTimeWindow
13 |
--------------------------------------------------------------------------------
/pathpy/classes/markov_sequence.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 |
3 | # pathpy is an OpenSource python package for the analysis of time series data
4 | # on networks using higher- and multi order graphical models.
5 | #
6 | # Copyright (C) 2016-2018 Ingo Scholtes, ETH Zürich/Universität Zürich
7 | #
8 | # This program is free software: you can redistribute it and/or modify
9 | # it under the terms of the GNU Affero General Public License as published
10 | # by the Free Software Foundation, either version 3 of the License, or
11 | # (at your option) any later version.
12 | #
13 | # This program is distributed in the hope that it will be useful,
14 | # but WITHOUT ANY WARRANTY; without even the implied warranty of
15 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 | # GNU Affero General Public License for more details.
17 | #
18 | # You should have received a copy of the GNU Affero General Public License
19 | # along with this program. If not, see .
20 | #
21 | # Contact the developer:
22 | #
23 | # E-mail: scholtes@ifi.uzh.ch
24 | # Web: http://www.ingoscholtes.net
25 | from collections import defaultdict
26 | import numpy as np
27 |
28 | from pathpy.utils import Log
29 |
30 |
31 | np.seterr(all='warn')
32 |
33 |
34 | class MarkovSequence:
35 | """
36 | A class that can be used to fit
37 | higher-order Markov chain models for
38 | sequences generated from concatenated paths
39 | """
40 |
41 | def __init__(self, sequence):
42 | """
43 | Generates a Markov model for a sequence, given
44 | as a single list of strings
45 | """
46 |
47 | # The sequence to be modeled
48 | self.sequence = sequence
49 |
50 | # The transition probabilities of higher-order Markov chains
51 | self.P = {}
52 |
53 | # the set of states of higher-order Markov chains
54 | self.states = {1: set(sequence)}
55 |
56 | def fit_markov_model(self, k=1):
57 | """ Generates a k-th order Markov model
58 | for the underlying sequence
59 | """
60 |
61 | # TODO: Add support for k=0
62 |
63 | assert self.sequence, "Error: Empty sequence"
64 |
65 | # MLE fit of transition probabilities
66 | self.P[k] = defaultdict(lambda: defaultdict(lambda: 0.0))
67 |
68 | Log.add('Fitting Markov model with order k = ' + str(k))
69 |
70 | # Generate initial memory prefix
71 | mem = (())
72 | for s in self.sequence[:k]:
73 | mem += (s,)
74 |
75 | # count state transitions
76 | for s in self.sequence[k:]:
77 | self.P[k][mem][s] += 1.0
78 |
79 | # shift memory by one element
80 | mem = mem[1:] + (s,)
81 |
82 | # normalize transitions
83 | for m in self.P[k]:
84 | S = float(sum(self.P[k][m].values()))
85 | for s in self.P[k][m]:
86 | self.P[k][m][s] /= S
87 | Log.add('finished.')
88 |
89 | def likelihood(self, k=1, log=True):
90 | """
91 | Returns the likelihood of the sequence
92 | assuming a k-th order Markov model
93 | """
94 |
95 | if k not in self.P:
96 | self.fit_markov_model(k)
97 |
98 | L = 0
99 |
100 | # Generate initial prefix
101 | mem = (())
102 | for s in self.sequence[:k]:
103 | mem += (s,)
104 |
105 | for s in self.sequence[k:]:
106 | L += np.log(self.P[k][mem][s])
107 |
108 | # shift memory by one element
109 | mem = mem[1:] + (s,)
110 |
111 | if log:
112 | return L
113 |
114 | return np.exp(L)
115 |
116 | def bic(self, k=1, m=1):
117 | """ Returns the Bayesian Information Criterion
118 | assuming a k-th order Markov model """
119 |
120 | if k not in self.P:
121 | self.fit_markov_model(k)
122 |
123 | if m not in self.P:
124 | self.fit_markov_model(m)
125 |
126 | L_k = self.likelihood(k, log=True)
127 | L_m = self.likelihood(m, log=True)
128 |
129 | s = len(self.states[1])
130 | n = len(self.sequence)-k
131 |
132 | # the transition matrix of a first-order model with s states has s**2 entries,
133 | # subject to the constraint that entries in each row must sum up to one (thus
134 | # effectively reducing the degrees of freedom by a factor of s, i.e. we have
135 | # s^2-s^1. Generalizing this to order k, we arrive at
136 | # s^k * (s-1) = s^(k+1)-s^k degrees of freedom
137 | bic = np.log(n) * (s ** k - s ** m) * (s - 1) - 2.0 * (L_k - L_m)
138 |
139 | return bic
140 |
141 | def aic(self, k=1, m=1):
142 | """ Returns the Akaike Information Criterion
143 | assuming a k-th order Markov model """
144 |
145 | if k not in self.P:
146 | self.fit_markov_model(k)
147 |
148 | if m not in self.P:
149 | self.fit_markov_model(m)
150 |
151 | L_k = self.likelihood(k, log=True)
152 | L_m = self.likelihood(m, log=True)
153 |
154 | s = len(self.states[1])
155 |
156 | aic = 2 * (s**k - s**m) * (s-1) - 2.0 * (L_k - L_m)
157 |
158 | return aic
159 |
160 | def estimate_order(self, maxOrder, method='BIC'):
161 | """ Estimates the optimal order of a Markov model
162 | based on Likelihood, BIC or AIC """
163 |
164 | if method not in {'BIC', 'AIC', 'Likelihood'}: # pragma: no cover
165 | msg = "Expecting method 'AIC', 'BIC' or 'Likelihood', got '%s'" % method
166 | raise ValueError(msg)
167 |
168 | values = []
169 | orders = []
170 |
171 | # We need k < m for the BIC and AIC calculation, which
172 | # is why we only test up to maxOrder - 1
173 | for k in range(1, maxOrder):
174 | if k not in self.P:
175 | self.fit_markov_model(k)
176 |
177 | orders.append(k)
178 |
179 | if method == 'AIC':
180 | values.append(self.aic(k, maxOrder))
181 | elif method == 'BIC':
182 | values.append(self.bic(k, maxOrder))
183 | elif method == 'Likelihood':
184 | values.append(self.likelihood(k, log=True))
185 |
186 | if method == 'Likelihood':
187 | values.append(self.likelihood(maxOrder, log=True))
188 | orders.append(maxOrder)
189 |
190 | # return order at which likelihood is maximized
191 | return orders[np.argmax(values)]
192 |
193 | # return order at which BIC/AIC are minimized
194 | return orders[np.argmin(values)]
195 |
--------------------------------------------------------------------------------
/pathpy/classes/rolling_time_window.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 |
3 | # pathpy is an OpenSource python package for the analysis of time series data
4 | # on networks using higher- and multi order graphical models.
5 | #
6 | # Copyright (C) 2016-2018 Ingo Scholtes, ETH Zürich/Universität Zürich
7 | #
8 | # This program is free software: you can redistribute it and/or modify
9 | # it under the terms of the GNU Affero General Public License as published
10 | # by the Free Software Foundation, either version 3 of the License, or
11 | # (at your option) any later version.
12 | #
13 | # This program is distributed in the hope that it will be useful,
14 | # but WITHOUT ANY WARRANTY; without even the implied warranty of
15 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 | # GNU Affero General Public License for more details.
17 | #
18 | # You should have received a copy of the GNU Affero General Public License
19 | # along with this program. If not, see .
20 | #
21 | # Contact the developer:
22 | #
23 | # E-mail: scholtes@ifi.uzh.ch
24 | # Web: http://www.ingoscholtes.net
25 | from pathpy.utils import Log, Severity
26 | from pathpy.classes import Network
27 |
28 |
29 | class RollingTimeWindow:
30 | r"""
31 | An iterable rolling time window that can be used to perform time slice
32 | analysis of time-stamped network data.
33 | """
34 |
35 | def __init__(self, temporal_net, window_size, step_size=1, directed=True, return_window=False):
36 | r"""
37 | Initialises a RollingTimeWindow instance that can be used to
38 | iterate through a sequence of time-slice networks for a given
39 | TemporalNetwork instance.
40 |
41 | Parameters:
42 | -----------
43 | temporal_net: TemporalNetwork
44 | TemporalNetwork instance that will be used to generate the
45 | sequence of time-slice networks.
46 | window_size: int
47 | The width of the rolling time window used to create
48 | time-slice networks.
49 | step_size: int
50 | The step size in time units by which the starting time of the rolling
51 | window will be incremented on each iteration. Default is 1.
52 | directed: bool
53 | Whether or not the generated time-slice networks should be directed.
54 | Default is true.
55 | return_window: bool
56 | Whether or not the iterator shall return the current time window
57 | as a second return value. Default is False.
58 |
59 | Returns
60 | -------
61 | RollingTimeWindow
62 | An iterable sequence of tuples Network, [window_start, window_end]
63 |
64 | Examples
65 | --------
66 | >>> t = pathpy.TemporalNetwork.read_file(DATA)
67 | >>>
68 | >>> for n in pathpy.RollingTimeWindow(t, window_size=100):
69 | >>> print(n)
70 | >>>
71 | >>> for n, w in pathpy.RollingTimeWindow(t, window_size=100, step_size=10, return_window=True):
72 | >>> print('Time window starting at {0} and ending at {1}'.format(w[0], w[1]))
73 | >>> print(network)
74 | """
75 | self.temporal_network = temporal_net
76 | self.window_size = window_size
77 | self.step_size = step_size
78 | self.current_time = min(temporal_net.ordered_times)
79 | self.max_time = max(temporal_net.ordered_times)
80 | self.directed = directed
81 | self.return_window = return_window
82 |
83 | def __iter__(self):
84 | return self
85 |
86 |
87 | def __next__(self):
88 | if self.current_time+self.window_size <= self.max_time:
89 | time_window = [self.current_time, self.current_time+self.window_size]
90 | n = Network.from_temporal_network(self.temporal_network, min_time=self.current_time,
91 | max_time=self.current_time+self.window_size,
92 | directed=self.directed)
93 | self.current_time += self.step_size
94 | if self.return_window:
95 | return n, time_window
96 | else:
97 | return n
98 | else:
99 | raise StopIteration()
100 |
--------------------------------------------------------------------------------
/pathpy/path_extraction/__init__.py:
--------------------------------------------------------------------------------
1 | """
2 | Provides methods to generate path statistics based on origin destination statistics,
3 | directed acyclic graphs, temporal networks, or random walks in a network.
4 | """
5 | from .dag_paths import paths_from_dag
6 |
7 | from .temporal_paths import paths_from_temporal_network
8 | from .temporal_paths import paths_from_temporal_network_dag
9 | from .temporal_paths import paths_from_temporal_network_single
10 | from .temporal_paths import sample_paths_from_temporal_network_dag
11 | from .temporal_paths import generate_causal_tree
12 |
13 | from .random_walk import random_walk
14 | from .random_walk import paths_from_random_walk
15 | from .random_walk import random_paths
16 |
17 | from .origin_destination_stats import paths_from_origin_destination
18 | from .origin_destination_stats import paths_to_origin_destination
19 | from .origin_destination_stats import read_origin_destination
20 |
--------------------------------------------------------------------------------
/pathpy/path_extraction/origin_destination_stats.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 |
3 | # pathpy is an OpenSource python package for the analysis of time series data
4 | # on networks using higher- and multi order graphical models.
5 | #
6 | # Copyright (C) 2016-2018 Ingo Scholtes, ETH Zürich/Universität Zürich
7 | #
8 | # This program is free software: you can redistribute it and/or modify
9 | # it under the terms of the GNU Affero General Public License as published
10 | # by the Free Software Foundation, either version 3 of the License, or
11 | # (at your option) any later version.
12 | #
13 | # This program is distributed in the hope that it will be useful,
14 | # but WITHOUT ANY WARRANTY; without even the implied warranty of
15 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 | # GNU Affero General Public License for more details.
17 | #
18 | # You should have received a copy of the GNU Affero General Public License
19 | # along with this program. If not, see .
20 | #
21 | # Contact the developer:
22 | #
23 | # E-mail: scholtes@ifi.uzh.ch
24 | # Web: http://www.ingoscholtes.net
25 | from collections import defaultdict
26 |
27 | from pathpy import Paths
28 | from pathpy.utils import Log
29 | from pathpy.algorithms.shortest_paths import shortest_paths
30 |
31 | import numpy as np
32 |
33 | def read_origin_destination(filename, separator=','):
34 | """Reads origin/destination statistics from a csv file
35 | with the following structure:
36 |
37 | origin1,destination1,weight
38 | origin2,destination2,weight
39 | origin3,destination3,weight
40 |
41 | Parameters
42 | ----------
43 | filename: str
44 | path to the file containing the origin/destination statistics
45 | separator: str
46 | arbitrary separation character (default: ',')
47 |
48 | Returns
49 | -------
50 | list
51 | """
52 | origin_destination_list = []
53 | Log.add('Reading origin/destination statistics from file ...')
54 |
55 | with open(filename, 'r') as f:
56 | line = f.readline()
57 | while line:
58 | fields = line.rstrip().split(separator)
59 | origin_destination_list.append((fields[0].strip(), fields[1].strip(), float(fields[2].strip())))
60 | line = f.readline()
61 | Log.add('Finished.')
62 |
63 | return origin_destination_list
64 |
65 | def paths_from_origin_destination(origin_destination_list, network,
66 | distribute_weight=True):
67 | """Extracts shortest path statistics based on origin/destination data.
68 | Such data capture the statistics of the origin (i.e. the start node) and destination
69 | (i.e. the target) node of itineraries in a given network.
70 |
71 | Common examples include passenger origin and destination statistics in transportation
72 | networks. The methods in this class can be used to read origin/destination data from
73 | a file and generate path statistics based on the assumption that all paths from an
74 | origin and a destination follow the shortest path in the network.
75 |
76 | Extracts pathway statistics by calculating shortest paths between all origin and
77 | destination pairs in a given network.
78 |
79 | Parameters
80 | ----------
81 | origin_destination_list: list
82 | A list of tuples (o, d, w) containing the origin (o), destination (d),
83 | and (possibly float) weight w of paths.
84 | network:
85 | The network topology for which shortest paths will be calculated. Names of nodes
86 | in the network must match the node names used in the origin destination list.
87 | distribute_weight: bool
88 | If set to True, the weight of an origin-destination pair will be equally distributed
89 | (in terms of whole integer observations) across multiple shortest paths between the
90 | origin and destination. If False, the weight will be assigned to a randomly chosen
91 | shortest path. Default is True.
92 |
93 | Returns
94 | -------
95 | paths
96 | """
97 | assert network is not None, \
98 | 'Error: extraction of origin destination paths requires a network topology'
99 |
100 | all_paths = shortest_paths(network)
101 |
102 | paths = Paths()
103 | # OD is a list of tuples of the form (origin_node, destination_node, weight)
104 | # that indicates that the shortest path from origin_node to destination_node was
105 | # observed weight times
106 | Log.add('Starting origin destination path calculation ...')
107 | for (o, d, w) in origin_destination_list:
108 | assert o in network.nodes, 'Error: could not find node ' + str(o) + ' in network'
109 | assert d in network.nodes, 'Error: could not find node ' + str(d) + ' in network'
110 | sp = list(all_paths[o][d])
111 | num_paths = len(sp)
112 | if distribute_weight and num_paths > 1:
113 | # to avoid introducing false correlations that are not justified by the
114 | # available data, the (integer) weight of an origin destination pair can be
115 | # distributed among all possible shortest paths between a pair of nodes,
116 | # while constraining the weight of shortest paths to integers.
117 | for i in range(int(w)):
118 | paths.add_path(sp[i % num_paths], frequency=(0, 1))
119 | else:
120 | # in this case, the full weight of an origin destination path will be
121 | # assigned to a random single shortest path in the network
122 | paths.add_path(sp[np.random.randint(num_paths)], frequency=(0, w))
123 | Log.add('finished.')
124 | return paths
125 |
126 |
127 | def paths_to_origin_destination(paths):
128 | """
129 | Returns a list that contains path frequencies between all
130 | origin destination pairs in a path object. The result can e.g. be used to
131 | create shortest path models that preserve the origin-destination statistics in real
132 | path data.
133 |
134 | Parameters
135 | ----------
136 | paths: Paths
137 | collection of weighted paths based on which origin destination
138 | statistics shall be computed
139 |
140 | Returns
141 | -------
142 | list of tuples (o, d, w) where o is origin, d is destination, and w is the weight
143 | """
144 | od_stats = defaultdict(lambda: 0.0)
145 |
146 | Log.add('Calculating origin/destination statistics from paths ...')
147 | # iterate through all paths and create path statistics
148 | for x in paths.paths:
149 | for p in paths.paths[x]:
150 | o = p[0]
151 | d = p[-1]
152 | if paths.paths[x][p][1] > 0:
153 | od_stats[o, d] += paths.paths[x][p][1]
154 | od_list = [ (od[0], od[1], f) for od, f in od_stats.items()]
155 | Log.add('finished.')
156 | return od_list
157 |
--------------------------------------------------------------------------------
/pathpy/path_extraction/random_walk.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 |
3 | # pathpy is an OpenSource python package for the analysis of time series data
4 | # on networks using higher- and multi order graphical models.
5 | #
6 | # Copyright (C) 2016-2018 Ingo Scholtes, ETH Zürich/Universität Zürich
7 | #
8 | # This program is free software: you can redistribute it and/or modify
9 | # it under the terms of the GNU Affero General Public License as published
10 | # by the Free Software Foundation, either version 3 of the License, or
11 | # (at your option) any later version.
12 | #
13 | # This program is distributed in the hope that it will be useful,
14 | # but WITHOUT ANY WARRANTY; without even the implied warranty of
15 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 | # GNU Affero General Public License for more details.
17 | #
18 | # You should have received a copy of the GNU Affero General Public License
19 | # along with this program. If not, see .
20 | #
21 | # Contact the developer:
22 | #
23 | # E-mail: scholtes@ifi.uzh.ch
24 | # Web: http://www.ingoscholtes.net
25 |
26 | import collections as _co
27 | import random
28 |
29 | import numpy as _np
30 |
31 | from pathpy.utils import Log, Severity
32 | from pathpy.classes.network import Network
33 | from pathpy.classes.paths import Paths
34 | from pathpy import algorithms
35 |
36 | def random_walk(network, l, n=1, start_node=None):
37 | """
38 | [DEPRECATED]
39 | Generates n paths of a random walker in the given network
40 | and returns them as a paths object.
41 | Each path has a length of l steps.
42 | Parameters
43 | ----------
44 | network: Network, TemporalNetwork, HigherOrderNetwork
45 | The network structure on which the random walks will be simulated.
46 | int: l
47 | The (maximum) length of each random walk path. A path will
48 | terminate if a node with outdegree zero is reached.
49 | int: n
50 | The number of random walk paths to generate.
51 | """
52 | Log.add('The path_extraction.random_walk function is deprecated. Please use paths_from_random_walk instead.', Severity.WARNING)
53 | return paths_from_random_walk(network, l, n, start_node)
54 |
55 |
56 | def paths_from_random_walk(network, l, n=1, start_node=None):
57 | """
58 | Generates n paths of a random walker in the given network
59 | and returns them as a paths object.
60 | Each path has a length of l steps.
61 | Parameters
62 | ----------
63 | network: Network, TemporalNetwork, HigherOrderNetwork
64 | The network structure on which the random walks will be simulated.
65 | int: l
66 | The (maximum) length of each random walk path. A path will
67 | terminate if a node with outdegree zero is reached.
68 | int: n
69 | The number of random walk paths to generate.
70 | """
71 | p = Paths()
72 | for i in range(n):
73 | path = algorithms.random_walk.generate_walk(network, l, start_node)
74 | p.add_path(tuple(path))
75 | return p
76 |
77 | def random_paths(network, paths_orig, rand_frac=1.0):
78 | """
79 | Generates Markovian paths of a random walker in a given network
80 | and returns them as a paths object.
81 | Parameters
82 | ----------
83 | network: Network
84 | The network structure on which the random walks will be simulated.
85 | paths_orig: Paths
86 | Paths that we want to randomise
87 | rand_frac: float
88 | The fraction of paths that will be randomised
89 | """
90 | p_rnd = Paths()
91 | for l in paths_orig.paths:
92 | for path, pcounts in paths_orig.paths[l].items():
93 | if pcounts[1] > 0:
94 | n_path = int(pcounts[1])
95 | n_path_rand = _np.random.binomial(n_path, rand_frac)
96 | n_path_keep = n_path - n_path_rand
97 |
98 | ## Add the random paths
99 | if n_path_rand > 0:
100 | p_rnd += paths_from_random_walk(network, l, n_path_rand, path[0])
101 |
102 | ## Keep the rest
103 | if n_path_keep > 0:
104 | p_rnd.add_path(path, frequency=n_path_keep)
105 | return p_rnd
--------------------------------------------------------------------------------
/pathpy/utils/__init__.py:
--------------------------------------------------------------------------------
1 | """
2 | A collection of logging functions and exceptions.
3 | """
4 | from .log import Log
5 | from .log import Severity
6 | from .exceptions import PathpyException
7 | from .exceptions import EmptySCCError
8 | from .exceptions import PathpyNotImplemented
9 | from .exceptions import PathsTooShort
10 | from .exceptions import PathpyError
11 |
--------------------------------------------------------------------------------
/pathpy/utils/default_containers.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 | # pathpy is an OpenSource python package for the analysis of time series data
3 | # on networks using higher- and multi order graphical models.
4 | #
5 | # Copyright (C) 2016-2018 Ingo Scholtes, ETH Zürich/Universität Zürich
6 | #
7 | # This program is free software: you can redistribute it and/or modify
8 | # it under the terms of the GNU Affero General Public License as published
9 | # by the Free Software Foundation, either version 3 of the License, or
10 | # (at your option) any later version.
11 | #
12 | # This program is distributed in the hope that it will be useful,
13 | # but WITHOUT ANY WARRANTY; without even the implied warranty of
14 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 | # GNU Affero General Public License for more details.
16 | #
17 | # You should have received a copy of the GNU Affero General Public License
18 | # along with this program. If not, see .
19 | #
20 | # Contact the developer:
21 | #
22 | # E-mail: scholtes@ifi.uzh.ch
23 | # Web: http://www.ingoscholtes.net
24 |
25 | """
26 | Provides default containers for various classes
27 | which are used to store nodes, edges and similar objects.
28 |
29 | To make the various classes pickle-able the defaultdicts need to be publicly addressable
30 | function names, this means that no lambda functions are allowed.
31 |
32 | All pathpy classes which required a default value as a container, should use these here.
33 | """
34 |
35 | from collections import defaultdict
36 | import numpy as np
37 |
38 |
39 | def nested_zero_default():
40 | """
41 | Returns a nested default dict (2 levels)
42 | with a numpy zero array of length 0 as default
43 | """
44 | return defaultdict(zero_array_default)
45 |
46 |
47 | def _zero_array():
48 | """
49 | Returns a zero numpy array of length 2
50 | """
51 | return np.array([0.0, 0.0])
52 |
53 |
54 | def zero_array_default():
55 | """
56 | Returns a default dict with numpy zero array af length 2 as default
57 | """
58 | return defaultdict(_zero_array)
59 |
--------------------------------------------------------------------------------
/pathpy/utils/exceptions.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 | # pathpy is an OpenSource python package for the analysis of time series data
3 | # on networks using higher- and multi order graphical models.
4 | #
5 | # Copyright (C) 2016-2018 Ingo Scholtes, ETH Zürich/Universität Zürich
6 | #
7 | # This program is free software: you can redistribute it and/or modify
8 | # it under the terms of the GNU Affero General Public License as published
9 | # by the Free Software Foundation, either version 3 of the License, or
10 | # (at your option) any later version.
11 | #
12 | # This program is distributed in the hope that it will be useful,
13 | # but WITHOUT ANY WARRANTY; without even the implied warranty of
14 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 | # GNU Affero General Public License for more details.
16 | #
17 | # You should have received a copy of the GNU Affero General Public License
18 | # along with this program. If not, see .
19 | #
20 | # Contact the developer:
21 | #
22 | # E-mail: scholtes@ifi.uzh.ch
23 | # Web: http://www.ingoscholtes.net
24 | """
25 | Classes to signal errors and exceptions in pathpy.
26 | """
27 |
28 | class PathpyException(Exception):
29 | """
30 | Base class for exceptions in Pathpy.
31 | """
32 |
33 |
34 | class PathpyError(PathpyException):
35 | """
36 | Exception for a serious error in Pathpy
37 | """
38 |
39 |
40 | class PathpyNotImplemented(PathpyException):
41 | """
42 | Exception for procedure not implemented in pathpy
43 | """
44 |
45 |
46 | class EmptySCCError(PathpyException):
47 | """
48 | This exception is thrown whenever a non-empty strongly
49 | connected component is needed, but when we encounter an empty one.
50 | """
51 |
52 |
53 | class PathsTooShort(PathpyException):
54 | """
55 | This exception if thrown if available paths are too
56 | short for the requested operation.
57 | """
58 |
--------------------------------------------------------------------------------
/pathpy/utils/log.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 |
3 | # pathpy is an OpenSource python package for the analysis of time series data
4 | # on networks using higher- and multi order graphical models.
5 | #
6 | # Copyright (C) 2016-2018 Ingo Scholtes, ETH Zürich/Universität Zürich
7 | #
8 | # This program is free software: you can redistribute it and/or modify
9 | # it under the terms of the GNU Affero General Public License as published
10 | # by the Free Software Foundation, either version 3 of the License, or
11 | # (at your option) any later version.
12 | #
13 | # This program is distributed in the hope that it will be useful,
14 | # but WITHOUT ANY WARRANTY; without even the implied warranty of
15 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 | # GNU Affero General Public License for more details.
17 | #
18 | # You should have received a copy of the GNU Affero General Public License
19 | # along with this program. If not, see .
20 | #
21 | # Contact the developer:
22 | #
23 | # E-mail: scholtes@ifi.uzh.ch
24 | # Web: http://www.ingoscholtes.net
25 | import enum
26 | from datetime import datetime
27 | import sys
28 |
29 |
30 | __all__ = ["Severity", "Log"]
31 |
32 |
33 | class Severity(enum.IntEnum):
34 | """ An enumeration that can be used to indicate
35 | the severity of log messages, and which can be
36 | used to filter messages based on severities.
37 | """
38 |
39 | # Error messages
40 | ERROR = 4
41 |
42 | # Warning messages
43 | WARNING = 3
44 |
45 | # Informational messages (default minimum level)
46 | INFO = 2
47 |
48 | # Messages regarding timing and performance
49 | TIMING = 1
50 |
51 | # Debug messages (really verbose)
52 | DEBUG = 0
53 |
54 |
55 | class Log:
56 | """ A simple logging class, that allows to select what messages should
57 | be recorded in the output, and where these message should be directed.
58 | """
59 |
60 | # the output stream to which log entries will be written
61 | output_stream = sys.stdout
62 |
63 | # The minimum severity level of messages to be logged
64 | min_severity = Severity.INFO
65 |
66 | @staticmethod
67 | def set_min_severity(severity): # pragma: no cover
68 | """ Sets the minimum sveerity level a message
69 | needs to have in order to be recorded in the output stream.
70 | By default, any message which has a severity of at least
71 | Severity.INFO will be written to the output stream. All messages
72 | with lower priority will be surpressed.
73 | """
74 | Log.min_severity = severity
75 |
76 | @staticmethod
77 | def set_output_stream(stream): # pragma: no cover
78 | """ Sets the output stream to which all messages will be
79 | written. By default, this is sys.stdout, but it can be
80 | changed in order to redirect the log to a logfile.
81 | """
82 | Log.output_stream = stream
83 |
84 | @staticmethod
85 | def add(msg, severity=Severity.INFO): # pragma: no cover
86 | """ Adds a message with the given severity to the log. This message will be written
87 | to the log output stream, which by default is sys.stdout. A newline character
88 | will be added to the message by default.
89 | """
90 | if severity >= Log.min_severity:
91 | ts = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
92 | Log.output_stream.write(ts + ' [' + str(severity) + ']\t' + msg + '\n')
93 | Log.output_stream.flush()
94 |
--------------------------------------------------------------------------------
/pathpy/visualisation/__init__.py:
--------------------------------------------------------------------------------
1 | """provides html and tikz visualisations for networks, temporal networks, and paths"""
2 |
3 | from .html import plot
4 | from .html import export_html
5 | from .html import plot_diffusion
6 | from .html import export_html_diffusion
7 | from .html import plot_walk
8 | from .html import export_html_walk
9 |
10 | from .tikz import export_tikz
11 |
12 | from .pdf import svg_to_pdf
13 | from .pdf import svg_to_png
14 |
--------------------------------------------------------------------------------
/pathpy/visualisation/alluvial.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 |
3 | # pathpy is an OpenSource python package for the analysis of time series data
4 | # on networks using higher- and multi order graphical models.
5 | #
6 | # Copyright (C) 2016-2018 Ingo Scholtes, ETH Zürich/Universität Zürich
7 | #
8 | # This program is free software: you can redistribute it and/or modify
9 | # it under the terms of the GNU Affero General Public License as published
10 | # by the Free Software Foundation, either version 3 of the License, or
11 | # (at your option) any later version.
12 | #
13 | # This program is distributed in the hope that it will be useful,
14 | # but WITHOUT ANY WARRANTY; without even the implied warranty of
15 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 | # GNU Affero General Public License for more details.
17 | #
18 | # You should have received a copy of the GNU Affero General Public License
19 | # along with this program. If not, see .
20 | #
21 | # Contact the developer:
22 | #
23 | # E-mail: scholtes@ifi.uzh.ch
24 | # Web: http://www.ingoscholtes.net
25 | import json
26 | import os
27 | from string import Template
28 |
29 | import string
30 | import random
31 |
32 | import collections as _co
33 |
34 | from pathpy.classes.higher_order_network import HigherOrderNetwork
35 | from pathpy.classes.paths import Paths
36 | from pathpy.classes.network import Network
37 |
38 | import numpy as _np
39 |
40 | def generate_memory_net(paths, node, self_loops=True):
41 | """
42 | Helper class that generates a directed and weighted
43 | memory network where weights capture path statistics.
44 | """
45 | n = Network(directed=True)
46 |
47 | # consider all (sub-)paths of length two
48 | # through the focal node
49 | for p in paths.paths[2]:
50 | if p[1] == node:
51 | if self_loops or (p[0] != node and p[2] != node):
52 | src = 'src_{0}'.format(p[0])
53 | tgt = 'tgt_{0}'.format(p[2])
54 | mem = 'mem_{0}_{1}'.format(p[0], p[1])
55 | # calculate frequency of sub-paths src->focal_node->*, i.e. paths that
56 | # continue through the focal_node
57 | # w_1 = 0
58 | # for x in paths.nodes:
59 | # ct = p[:2]+(x,)
60 | # if ct in paths.paths[2] and x != focal_node:
61 | # w_1 += paths.paths[2][ct].sum()
62 |
63 | # calculate frequency of (sub-)path src -> focal_node -> tgt
64 | w_2 = paths.paths[2][p].sum()
65 | n.add_edge(src, mem, weight=1)
66 | n.add_edge(mem, tgt, weight=w_2)
67 |
68 |
69 | # adjust weights of links to memory nodes:
70 | for m in n.nodes:
71 | if m.startswith('mem'):
72 | for u in n.predecessors[m]:
73 | n.edges[(u,m)]['weight'] = n.nodes[m]['outweight']
74 | n.nodes[m]['inweight'] = n.nodes[m]['outweight']
75 | return n
76 |
77 |
78 | def generate_memory_net_markov(network, focal_node, self_loops=True):
79 | """
80 | Generates a directed and weighted network with flow values based
81 | on a network and an assumption of Markov flows.
82 | """
83 | n = Network(directed=True)
84 |
85 | out_weight = _np.sum(network.nodes[focal_node]['outweight'])
86 |
87 | for u in network.predecessors[focal_node]:
88 | for w in network.successors[focal_node]:
89 | if self_loops or (u!= focal_node and w != focal_node):
90 | src = 'src_{0}'.format(u)
91 | tgt = 'tgt_{0}'.format(w)
92 | mem = 'mem_{0}_{1}'.format(u, focal_node)
93 |
94 | w_1 = _np.sum(network.edges[(u, focal_node)]['weight'])
95 |
96 | # at random, we expect the flow to be proportional to the relative edge weight
97 | w_2 = w_1 * (_np.sum(network.edges[(focal_node, w)]['weight'])/out_weight)
98 | n.add_edge(src, mem, weight=w_1)
99 | n.add_edge(mem, tgt, weight=w_2)
100 | return n
101 |
102 |
103 | def generate_diffusion_net(paths, node=None, markov=True, steps=5):
104 | """
105 | """
106 | g1 = HigherOrderNetwork(paths, k=1)
107 | map_1 = g1.node_to_name_map()
108 |
109 | prob = _np.zeros(g1.ncount())
110 | prob = prob.transpose()
111 | if node is None:
112 | node = g1.nodes[0]
113 |
114 | prob[map_1[node]] = 1.0
115 |
116 | T = g1.transition_matrix()
117 |
118 | flow_net = Network(directed=True)
119 |
120 | if markov:
121 | # if markov == True flows are given by first-order transition matrix
122 | for t in range(1, steps+1):
123 | # calculate flow from i to j in step t
124 | for i in g1.nodes:
125 | for j in g1.nodes:
126 | i_to_j = prob[map_1[i]] * T[map_1[j], map_1[i]]
127 | if i_to_j > 0:
128 | flow_net.add_edge('{0}_{1}'.format(i, t-1), '{0}_{1}'.format(j, t), weight = i_to_j)
129 | prob = T.dot(prob)
130 | else:
131 | # if markov == False calculate flows based on paths starting in initial_node
132 | for p in paths.paths[steps]:
133 | if p[0] == node:
134 | for t in range(len(p)-1):
135 | flow_net.add_edge('{0}_{1}'.format(p[t], t), '{0}_{1}'.format(p[t+1], t+1), weight = paths.paths[steps][p].sum())
136 |
137 | # normalize flows and balance in- and out-weight for all nodes
138 | # normalization = flow_net.nodes['{0}_{1}'.format(initial_node, 0)]['outweight']
139 |
140 | flow_net.nodes[node+'_0']['inweight'] = 1.0
141 | Q = [node+'_0']
142 | # adjust weights using BFS
143 | while Q:
144 | v = Q.pop()
145 | # print(v)
146 | inweight = flow_net.nodes[v]['inweight']
147 | outweight = flow_net.nodes[v]['outweight']
148 |
149 | for w in flow_net.successors[v]:
150 | flow_net.nodes[w]['inweight'] = flow_net.nodes[w]['inweight'] - flow_net.edges[(v,w)]['weight']
151 | flow_net.edges[(v,w)]['weight'] = (inweight/outweight) * flow_net.edges[(v,w)]['weight']
152 | flow_net.nodes[w]['inweight'] = flow_net.nodes[w]['inweight'] + flow_net.edges[(v,w)]['weight']
153 | Q.append(w)
154 | return flow_net
155 |
--------------------------------------------------------------------------------
/pathpy/visualisation/pdf.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 |
3 | # pathpy is an OpenSource python package for the analysis of time series data
4 | # on networks using higher- and multi order graphical models.
5 | #
6 | # Copyright (C) 2016-2018 Ingo Scholtes, ETH Zürich/Universität Zürich
7 | #
8 | # This program is free software: you can redistribute it and/or modify
9 | # it under the terms of the GNU Affero General Public License as published
10 | # by the Free Software Foundation, either version 3 of the License, or
11 | # (at your option) any later version.
12 | #
13 | # This program is distributed in the hope that it will be useful,
14 | # but WITHOUT ANY WARRANTY; without even the implied warranty of
15 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 | # GNU Affero General Public License for more details.
17 | #
18 | # You should have received a copy of the GNU Affero General Public License
19 | # along with this program. If not, see .
20 | #
21 | # Contact the developer:
22 | #
23 | # E-mail: scholtes@ifi.uzh.ch
24 | # Web: http://www.ingoscholtes.net
25 |
26 | def svg_to_pdf(svg_file, output_file):
27 | """
28 | Method to convert an SVG file to a PDF file, suitable for
29 | scholarly publications. This method requires the third-party library
30 | svglib.
31 | """
32 | # uses svglib to render a SVG to PDF
33 | from svglib.svglib import svg2rlg
34 | from reportlab.graphics import renderPDF
35 |
36 | drawing = svg2rlg(svg_file)
37 | renderPDF.drawToFile(drawing, output_file)
38 |
39 |
40 | def svg_to_png(svg_file, output_file):
41 | """
42 | Method to convert an SVG file to a PNG file. This method
43 | requires the third-party library svglib.
44 | """
45 | # uses svglib to render a SVG to PDF
46 | from svglib.svglib import svg2rlg
47 | from reportlab.graphics import renderPM
48 |
49 | drawing = svg2rlg(svg_file)
50 | renderPM.drawToFile(drawing, output_file, fmt='PNG')
51 |
--------------------------------------------------------------------------------
/pathpy/visualisation_assets/diffusion_template.html:
--------------------------------------------------------------------------------
1 |
30 |
31 | [save svg]
33 |
35 |
36 |
37 |
38 |
39 |
40 |
--------------------------------------------------------------------------------
/pathpy/visualisation_assets/network_template.html:
--------------------------------------------------------------------------------
1 |
22 |
23 |
27 |
28 |
29 |
--------------------------------------------------------------------------------
/pathpy/visualisation_assets/paths_template.html:
--------------------------------------------------------------------------------
1 |
29 | [save svg]
31 |
33 |
34 |
35 |
36 |
37 |
38 |
145 |
--------------------------------------------------------------------------------
/pathpy_logo.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/uzhdag/pathpy/1ddaf7d710aa3b5ed119de4f810c7c919eda3241/pathpy_logo.png
--------------------------------------------------------------------------------
/setup.cfg:
--------------------------------------------------------------------------------
1 | [bdist_wheel]
2 | universal = 1
3 |
4 | [flake8]
5 | exclude = docs
6 | max-line-length = 90
7 | max-complexity = 10
8 |
9 | [aliases]
10 | # Define setup.py command aliases here
11 | test = pytest
12 |
13 | [tool:pytest]
14 | collect_ignore = ['setup.py']
15 | norecursedirs = 'docs'
16 | filterwarnings =
17 | ignore::UserWarning
18 |
--------------------------------------------------------------------------------
/setup.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python
2 | # -*- coding: utf-8 -*-
3 |
4 | """pathpy setup script."""
5 |
6 | from setuptools import setup, find_packages
7 | from pathpy import __version__
8 |
9 |
10 | with open('README.rst', encoding='utf-8') as readme_file, open('HISTORY.rst', encoding='utf-8') as history_file:
11 | readme = readme_file.read()
12 | history = history_file.read()
13 |
14 | install_requirements = ['numpy', 'scipy']
15 |
16 | setup_requirements = ['pytest-runner']
17 |
18 | setup(
19 | author="Ingo Scholtes",
20 | author_email='scholtes@ifi.uzh.ch',
21 | license='AGPL-3.0+',
22 | classifiers=[
23 | 'Development Status :: 5 - Production/Stable',
24 | 'Intended Audience :: Science/Research',
25 | 'Topic :: Scientific/Engineering :: Artificial Intelligence',
26 | 'License :: OSI Approved :: GNU Affero General Public License v3 or later (AGPLv3+)',
27 | 'Programming Language :: Python :: 3',
28 | 'Programming Language :: Python :: 3.5',
29 | 'Programming Language :: Python :: 3.6',
30 | ],
31 | description="An OpenSource python package for the analysis and visualisation of time series data on"
32 | " complex networks with higher- and multi-order graphical models.",
33 | install_requires=install_requirements,
34 | setup_requires=setup_requirements,
35 | long_description=readme + '\n\n' + history,
36 | python_requires='>=3.5',
37 | keywords='network analysis temporal networks pathways sequence modeling graph mining',
38 | name='pathpy2',
39 | packages=find_packages(),
40 | test_suite='tests',
41 | url='https://www.pathpy.net',
42 | version=__version__,
43 | include_package_data=True,
44 | package_data={'pathpy': ['visualisation_assets/*.html'] + ['visualisation_assets/*.js']},
45 | zip_safe=False
46 | )
47 |
--------------------------------------------------------------------------------
/tests/README.md:
--------------------------------------------------------------------------------
1 | # Unit tests for pypath
2 |
3 | This directory contains the unit tests for methods and functions
4 | in pathpy.
5 | The testing framework [pytest](doc.pytest.org/)
6 | is required to run the tests.
7 |
8 | To run the test suite (without slow tests) run
9 | ```bash
10 | $ pytest tests
11 | ```
12 |
13 | ## Slow functions
14 |
15 | Slow functions can be decorated with `slow` to mark them
16 | as skippable if you require only a quick check.
17 | To run all tests add the flag `--runslow`:
18 | ```bash
19 | $ pytest --runslow
20 | ```
21 |
22 | ## Coverage report
23 |
24 | To compute a coverage report of the tests you need to install
25 | [coverage.py](https://coverage.readthedocs.io/en/coverage-4.3.4/)
26 | as well as its `pytest` integration
27 | [pytest-cov][1]
28 | ```bash
29 | $ pytest tests/ --runslow --cov=pathpy --cov-report html
30 | ```
31 | which will create an html coverage report in the same directory.
32 |
33 | [1]: https://pypi.python.org/pypi/pytest-cov
34 |
--------------------------------------------------------------------------------
/tests/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/uzhdag/pathpy/1ddaf7d710aa3b5ed119de4f810c7c919eda3241/tests/__init__.py
--------------------------------------------------------------------------------
/tests/conftest.py:
--------------------------------------------------------------------------------
1 | import os
2 | import random
3 |
4 | import pytest
5 | import numpy as np
6 |
7 | import pathpy as pp
8 |
9 | test_directory = os.path.dirname(os.path.abspath(__file__))
10 | test_data_dir = os.path.join(test_directory, 'test_data')
11 |
12 |
13 | def pytest_addoption(parser):
14 | parser.addoption("--runslow", action="store_true", help="run slow tests")
15 | parser.addoption("--latex", action="store_true", help="set `pdflatex` as available")
16 | parser.addoption("--networkx", action="store_true", help="set `networkx` as available")
17 |
18 |
19 | def pytest_runtest_setup(item):
20 | if 'slow' in item.keywords and not item.config.getvalue("runslow"):
21 | pytest.skip("need --runslow option to run")
22 |
23 | if 'latex' in item.keywords and not item.config.getvalue("latex"):
24 | pytest.skip("need --latex option to run")
25 |
26 | if 'networkx' in item.keywords and not item.config.getvalue("networkx"):
27 | pytest.skip("need --networkx option to run")
28 |
29 |
30 | @pytest.fixture()
31 | def test_data_directory():
32 | return test_data_dir
33 |
34 |
35 | @pytest.fixture()
36 | def path_from_ngram_file():
37 | """load the example file as pypath.Path"""
38 | ngram_file_path = os.path.join(test_data_dir, 'ngram_simple.ngram')
39 | path = pp.Paths.read_file(ngram_file_path, frequency=True)
40 | return path
41 |
42 |
43 | @pytest.fixture()
44 | def path_from_edge_file():
45 | file_path = os.path.join(test_data_dir, 'edge_frequency.edge')
46 | path = pp.Paths.read_edges(file_path, weight=True)
47 | return path
48 |
49 |
50 | @pytest.fixture()
51 | def path_from_edge_file_undirected():
52 | file_path = os.path.join(test_data_dir, 'edge_frequency.edge')
53 | path = pp.Paths.read_edges(file_path, weight=True, undirected=True, maxlines=5)
54 | return path
55 |
56 |
57 | def generate_random_path(size, rnd_seed, num_nodes=None):
58 | """Generate a Path with random path sequences"""
59 | if num_nodes is None:
60 | import string
61 | node_set = string.ascii_lowercase
62 | else:
63 | node_set = [str(x) for x in range(num_nodes)]
64 |
65 | def random_ngram(p_len, nodes):
66 | num_elements = len(nodes)
67 | sequence = np.random.choice(num_elements, p_len)
68 | path = [nodes[i] for i in sequence]
69 | return ','.join(path)
70 |
71 | np.random.seed(rnd_seed)
72 | paths = pp.Paths()
73 | for _ in range(size):
74 | frequency = np.random.randint(1, 4)
75 | path_length = np.random.randint(1, 10)
76 | path_to_add = random_ngram(path_length, node_set)
77 | paths.add_path(path_to_add, frequency=(0, frequency))
78 |
79 | return paths
80 |
81 |
82 | @pytest.fixture(scope='function')
83 | def random_paths():
84 | """Generate a Path with random path sequences"""
85 | return generate_random_path
86 |
87 |
88 | def generate_random_network(n=10, m=20, directed=True, weighted=True, seed=0):
89 | """Generate a random Network"""
90 | random.seed(seed)
91 | net = pp.Network(directed)
92 | for i in range(n):
93 | net.add_node(str(i))
94 | for i in range(m):
95 | v, w = random.sample(list(net.nodes), 2)
96 | if not weighted:
97 | net.add_edge(v, w)
98 | else:
99 | net.add_edge(v, w, weight=random.randint(0, 10))
100 | return net
101 |
102 |
103 | @pytest.fixture(scope='function')
104 | def random_network():
105 | """Generate a random network"""
106 | return generate_random_network
107 |
108 |
109 | def generate_random_temporal_network(n=10, m=20, min_t=0, max_t=100, seed=0):
110 | """
111 |
112 | Parameters
113 | ----------
114 | n: int
115 | number of nodes
116 | m: int
117 | number of edges
118 | min_t: int
119 | starting time
120 | max_t: int
121 | end time
122 | seed: int
123 | seed for random number generator
124 |
125 | Returns
126 | -------
127 |
128 | """
129 | random.seed(seed)
130 | node_set = [str(i) for i in range(n)]
131 | source_nodes = [random.sample(node_set, 1)[0] for _ in range(m)]
132 | target_nodes = [random.sample(node_set, 1)[0] for _ in range(m)]
133 | times = [random.randint(min_t, max_t) for _ in range(m)]
134 |
135 | tedges = list(zip(source_nodes, target_nodes, times))
136 |
137 | return pp.TemporalNetwork(tedges)
138 |
139 |
140 | @pytest.fixture(scope='function')
141 | def random_temp_network():
142 | """Generate a random network"""
143 | return generate_random_temporal_network
144 |
145 |
146 | @pytest.fixture()
147 | def temporal_network_object():
148 | t = pp.TemporalNetwork()
149 | # Path of length two
150 | t.add_edge("c", "e", 1)
151 | t.add_edge("e", "f", 2)
152 |
153 | # Path of length two
154 | t.add_edge("a", "e", 3)
155 | t.add_edge("e", "g", 4)
156 |
157 | # Path of length two
158 | t.add_edge("c", "e", 5)
159 | t.add_edge("e", "f", 6)
160 |
161 | # Path of length two
162 | t.add_edge("a", "e", 7)
163 | t.add_edge("e", "g", 8)
164 |
165 | # Path of length two
166 | t.add_edge("c", "e", 9)
167 | t.add_edge("e", "f", 10)
168 |
169 | # The next two edges continue the previous path to ( c-> e-> f-> e -> b )
170 | t.add_edge("f", "e", 11)
171 | t.add_edge("e", "b", 12)
172 |
173 | # This is an isolated edge (i.e. path of length one)
174 | t.add_edge("e", "b", 13)
175 |
176 | # Path of length two
177 | t.add_edge("c", "e", 14)
178 | t.add_edge("e", "f", 15)
179 |
180 | # Path of length two
181 | t.add_edge("b", "e", 16)
182 | t.add_edge("e", "g", 17)
183 |
184 | # Path of length two
185 | t.add_edge("c", "e", 18)
186 | t.add_edge("e", "f", 19)
187 |
188 | # Path of length two
189 | t.add_edge("c", "e", 20)
190 | t.add_edge("e", "f", 21)
191 |
192 | return t
193 |
194 |
195 | @pytest.fixture()
196 | def dag_object():
197 | dag = pp.DAG()
198 | # For this DAG, the following five paths between the root and the leaves exist
199 | # for the following mapping:
200 | # mapping = {'a': 'A', 'b': 'B', 'c': 'A', 'e': 'B',
201 | # 'f': 'B', 'g': 'A', 'h': 'A','i': 'B', 'j': 'A' }
202 |
203 | # h -> i ( A -> B )
204 | # h -> j ( A -> A )
205 | # a -> b -> e ( A -> B -> B )
206 | # a -> c -> g ( A -> A -> A )
207 | # a -> b -> f -> g ( A -> B -> B -> A )
208 | # a -> c -> b -> e ( A -> A -> B -> B )
209 | # a -> c -> b -> f -> g ( A -> A -> B -> B -> A )
210 | dag.add_edge('a', 'b')
211 | dag.add_edge('a', 'c')
212 | dag.add_edge('c', 'b')
213 | dag.add_edge('b', 'e')
214 | dag.add_edge('b', 'f')
215 | dag.add_edge('f', 'g')
216 | dag.add_edge('c', 'g')
217 | dag.add_edge('h', 'i')
218 | dag.add_edge('h', 'j')
219 | return dag
220 |
--------------------------------------------------------------------------------
/tests/test_MultiOrderModel.py:
--------------------------------------------------------------------------------
1 | import pytest
2 | import numpy as np
3 |
4 | import pathpy as pp
5 |
6 |
7 | def test_print(random_paths):
8 | p = random_paths(90, 0, 20)
9 | multi = pp.MultiOrderModel(p, max_order=3)
10 | print(multi)
11 |
12 |
13 | @pytest.mark.parametrize('k', (1, 2, 3))
14 | def test_init(random_paths, k):
15 | p = random_paths(90, 0, 20)
16 | multi = pp.MultiOrderModel(p, max_order=k)
17 | assert len(multi.layers) == k+1
18 |
19 |
20 | # @pytest.mark.slow
21 | # @pytest.mark.parametrize('k', (1, 2))
22 | # def test_parallel(random_paths, k):
23 | # """assert that the parallel calculation is equal to the
24 | # sequential"""
25 | # p = random_paths(90, 0, 20)
26 | # multi_seq = pp.MultiOrderModel(p, max_order=k)
27 | #
28 | # pp.ENABLE_MULTICORE_SUPPORT = True
29 | # assert pp.ENABLE_MULTICORE_SUPPORT
30 | #
31 | # multi_parallel = pp.MultiOrderModel(p, max_order=k)
32 | #
33 | # assert multi_parallel.model_size(k) == multi_seq.model_size(k)
34 | # for k in multi_parallel.transition_matrices:
35 | # assert np.sum(multi_parallel.transition_matrices[k] - multi_seq.transition_matrices[k]) == pytest.approx(0)
36 |
37 |
38 | # TODO: how to properly test this function?
39 | @pytest.mark.parametrize('method', ('AIC', 'BIC', 'AICc'))
40 | @pytest.mark.parametrize('k', (2, 3))
41 | def test_test_network_hypothesis(random_paths, k, method):
42 | p = random_paths(20, 40, 6)
43 | multi = pp.MultiOrderModel(p, max_order=k)
44 | (is_net, ic0, ic1) = multi.test_network_hypothesis(p, method=method)
45 |
46 |
47 | @pytest.mark.parametrize(
48 | 'method, k, e_ic0, e_ic1', (
49 | ('AIC', 1, 853.7904463041854, 829.9533867847043),
50 | ('BIC', 3, 862.234843574755, 885.6864087704643),
51 | ('AICc', 3, 856.3359008496399, 1305.9533867847044)
52 | )
53 | )
54 | def test_test_network_hypothesis_values(random_paths, k, method, e_ic0, e_ic1):
55 | p = random_paths(20, 40, 6)
56 | multi = pp.MultiOrderModel(p, max_order=k)
57 | (is_net, ic0, ic1) = multi.test_network_hypothesis(p, method=method)
58 | assert e_ic0 == pytest.approx(ic0)
59 | assert e_ic1 == pytest.approx(ic1)
60 |
61 |
62 | @pytest.mark.parametrize('k', (1, 2, 3))
63 | def test_write_state_file(random_paths, k, tmpdir):
64 | file_path = str(tmpdir.mkdir("sub").join("multi_order_state"))
65 | p = random_paths(20, 40, 6)
66 | multi = pp.MultiOrderModel(p, max_order=k)
67 |
68 | for i in range(1, k+1):
69 | multi.save_state_file(file_path + '.' + str(i), layer=i)
70 |
71 |
72 | def test_estimate_order_1():
73 | """Example without second-order correlations"""
74 | paths = pp.Paths()
75 |
76 | paths.add_path('a,c')
77 | paths.add_path('b,c')
78 | paths.add_path('c,d')
79 | paths.add_path('c,e')
80 |
81 | for k in range(4):
82 | paths.add_path('a,c,d')
83 | paths.add_path('b,c,e')
84 | paths.add_path('b,c,d')
85 | paths.add_path('a,c,e')
86 |
87 | m = pp.MultiOrderModel(paths, max_order=2)
88 | assert m.estimate_order() == 1, \
89 | "Error, wrongly detected higher-order correlations"
90 |
91 |
92 | def test_estimate_order_2():
93 | # Example with second-order correlations
94 | paths = pp.Paths()
95 |
96 | paths.add_path('a,c')
97 | paths.add_path('b,c')
98 | paths.add_path('c,d')
99 | paths.add_path('c,e')
100 |
101 | for k in range(4):
102 | paths.add_path('a,c,d')
103 | paths.add_path('b,c,e')
104 |
105 | m = pp.MultiOrderModel(paths, max_order=2)
106 | assert m.estimate_order() == 2
107 |
108 |
109 | def test_save_statefile(random_paths, tmpdir):
110 | file_path = str(tmpdir.join("statefile.sf"))
111 | p = random_paths(3, 20, 6)
112 | multi = pp.MultiOrderModel(p, max_order=2)
113 | multi.save_state_file(file_path, layer=2)
114 | with open(file_path) as f:
115 | for line in f:
116 | assert '{' not in line # make sure that we did not write a dictionary
117 |
118 |
119 | def test_single_path_likelihood(random_paths):
120 | p1 = random_paths(size=10, rnd_seed=20, num_nodes=10) # type: pp.Paths
121 | p2 = random_paths(size=100, rnd_seed=0, num_nodes=50)
122 | p12 = p1 + p2
123 | mom = pp.MultiOrderModel(p12, max_order=3)
124 | lkh1 = mom.likelihood(p1)
125 | lkh2 = mom.likelihood(p2)
126 | lkh12 = mom.likelihood(p12)
127 |
128 | assert lkh1 > lkh2 # second paths must be
129 | assert (lkh1 + lkh2) == pytest.approx(lkh12)
130 |
131 | assert mom.path_likelihood(('1', '2'), layer=0, freq=4) < 0
132 |
133 | lkl_last = None
134 | for i in range(3): # likelihoods must be increasing
135 | lkl = mom.path_likelihood(('6', '7', '2', '0', '6'), layer=i, freq=9)
136 | if lkl_last is not None:
137 | assert lkl >= lkl_last
138 | lkl_last = lkl
139 |
140 | path_likelihoods = []
141 | for p, freq in p12.paths[3].items(): # print the path with the highest likelihood
142 | lkl = mom.path_likelihood(p, layer=2, freq=freq.sum(), log=False)
143 | path_likelihoods.append((lkl, p))
144 |
145 | assert max(path_likelihoods)[1] == ('23', '32', '19', '8')
146 |
--------------------------------------------------------------------------------
/tests/test_Network.py:
--------------------------------------------------------------------------------
1 | import random
2 | import pytest
3 |
4 | @pytest.mark.parametrize('directed', (True, False))
5 | @pytest.mark.parametrize('weighted', (True, False))
6 | def test_add_node(random_network, directed, weighted):
7 | """
8 | Test node creation
9 | """
10 | net = random_network(n=10, m=20, directed=directed, weighted=weighted)
11 |
12 | assert net.directed == directed
13 |
14 | vc_before = net.ncount()
15 | ec_before = net.ecount()
16 |
17 | assert 'v' not in net.nodes
18 |
19 | net.add_node('v', test1='x', test2=42)
20 |
21 | assert 'v' in net.nodes
22 |
23 | assert net.nodes['v']['test1'] == 'x'
24 | assert net.nodes['v']['test2'] == 42
25 | if directed:
26 | assert net.nodes['v']['indegree'] == 0
27 | assert net.nodes['v']['outdegree'] == 0
28 | else:
29 | assert net.nodes['v']['degree'] == 0
30 | assert net.nodes['v']['inweight'] == 0
31 | assert net.nodes['v']['outweight'] == 0
32 |
33 | assert net.ncount() == vc_before + 1
34 | assert net.ecount() == ec_before
35 |
36 |
37 | @pytest.mark.parametrize('directed', (True, False))
38 | @pytest.mark.parametrize('weighted', (True, False))
39 | def test_remove_node(random_network, directed, weighted):
40 | """
41 | Test node removal
42 | """
43 | net = random_network(n=10, m=20, directed=directed, weighted=weighted)
44 |
45 | to_remove = random.choice(list(net.nodes))
46 |
47 | # collect values before removal
48 | v_c = net.ncount()
49 | e_c = net.ecount()
50 | t_w = net.total_edge_weight()
51 |
52 | incident_edges = [(v, w) for (v, w) in net.edges if v == to_remove or w == to_remove]
53 | weight_incident = sum([net.edges[e]['weight'] for e in incident_edges])
54 | successors = [w for w in net.successors[to_remove]]
55 | predecessors = [v for v in net.predecessors[to_remove]]
56 |
57 | net.remove_node(to_remove)
58 |
59 | # test values after removal
60 | assert to_remove not in net.nodes
61 | assert net.ncount() == v_c-1
62 | assert net.ecount() == e_c - len(incident_edges)
63 | assert net.total_edge_weight() == t_w - weight_incident
64 |
65 | for e in incident_edges:
66 | assert e not in net.edges
67 |
68 | for w in successors:
69 | assert to_remove not in net.predecessors[w]
70 | assert to_remove not in net.successors[w]
71 | for v in predecessors:
72 | assert to_remove not in net.predecessors[v]
73 | assert to_remove not in net.successors[v]
74 |
75 |
76 |
77 | @pytest.mark.parametrize('directed', (True, False))
78 | @pytest.mark.parametrize('weighted', (True, False))
79 | def test_add_edge(random_network, directed, weighted):
80 | """
81 | Test edge creation
82 | """
83 | net = random_network(n=10, m=20, directed=directed, weighted=weighted)
84 |
85 | # draw pair of nodes that are not already connected
86 | (v, w) = random.choice(list(net.edges))
87 | while (v, w) in net.edges:
88 | v, w = random.sample(list(net.nodes), 2)
89 |
90 | if weighted:
91 | weight_to_add = random.randint(1, 10)
92 | else:
93 | weight_to_add = 1
94 |
95 | # collect values before removal
96 | v_c = net.ncount()
97 | e_c = net.ecount()
98 | t_w = net.total_edge_weight()
99 |
100 | if weighted:
101 | net.add_edge(v, w, weight=weight_to_add)
102 | else:
103 | net.add_edge(v, w)
104 |
105 | # test values after removal
106 | assert v in net.nodes
107 | assert w in net.nodes
108 | assert net.ncount() == v_c
109 | assert net.ecount() == e_c + 1
110 | assert net.total_edge_weight() == t_w + weight_to_add
111 | assert (v, w) in net.edges
112 |
113 | assert w in net.successors[v]
114 | assert v in net.predecessors[w]
115 |
116 | if not directed:
117 | assert w in net.predecessors[v]
118 | assert v in net.successors[w]
119 |
120 |
121 | def test_import_from_networkx():
122 | # TODO: add test for weighted networks
123 | from pathpy.classes.network import network_from_networkx
124 | import networkx as nx
125 |
126 | g = nx.generators.barabasi_albert_graph(20, 10)
127 | relabeling = {i: str(i) for i in g}
128 | nx.relabel_nodes(g, relabeling, copy=False)
129 | for i, edge in enumerate(g.edges):
130 | g.edges[edge]['custom'] = i
131 |
132 | net = network_from_networkx(g)
133 | assert net.ncount() == len(g)
134 | assert net.ecount() == len(g.edges)
135 | for edge in net.edges:
136 | assert net.edges[edge]['custom'] == g.edges[edge]['custom']
137 |
138 |
139 | def test_export_netwokx():
140 | # TODO: test directed graph
141 | from pathpy.classes.network import network_from_networkx
142 | from pathpy.classes.network import network_to_networkx
143 | import networkx as nx
144 |
145 | g = nx.generators.karate_club_graph()
146 | # pathpy.Network will implicitely recast all labels to str so to have
147 | # a comparable network to start with do the same here
148 | relabel = {i: str(i) for i in g}
149 | nx.relabel_nodes(g, relabel, copy=False)
150 | for i, edge in enumerate(g.edges):
151 | g.edges[edge]['custom'] = i
152 | g.edges[edge]['weight'] = (i % 4) + 100
153 |
154 | for i, node in enumerate(g.nodes):
155 | g.nodes[node]['custom'] = "{} unique string".format(i)
156 |
157 | net = network_from_networkx(g)
158 | g_back = network_to_networkx(net)
159 |
160 | nx_degrees = g.degree(weight='weight')
161 |
162 | assert len(g_back) == len(g)
163 | assert len(g_back.edges) == len(g.edges)
164 | assert dict(g_back.degree) == dict(g.degree)
165 | for edge in g_back.edges:
166 | assert net.edges[edge]['weight'] == g.edges[edge]['weight']
167 | assert net.edges[edge]['custom'] == g.edges[edge]['custom']
168 | assert g_back.edges[edge]['custom'] == g.edges[edge]['custom']
169 | assert g_back.edges[edge]['weight'] == g.edges[edge]['weight']
170 |
171 | for node in g_back.nodes:
172 | assert g_back.nodes[node]['custom'] == g.nodes[node]['custom']
173 | assert nx_degrees[node] == net.nodes[node]['inweight']
174 | assert nx_degrees[node] == net.nodes[node]['outweight']
175 |
176 |
177 | def test_read_edges(test_data_directory):
178 | import os
179 | import pathpy
180 |
181 | edge_file = os.path.join(test_data_directory, "example_int.tedges")
182 |
183 | net = pathpy.Network.read_file(edge_file, weighted=True, header=True, directed=True)
184 | assert net.ncount() == 5
185 | assert net.ecount() == 6
186 |
187 |
188 | def test_diagonal_values():
189 | from pathpy.classes.network import Network
190 | net = Network()
191 | net.add_edge('a', 'b')
192 | net.add_edge('a', 'a')
193 | adj = net.adjacency_matrix().todense()
194 | assert adj.sum() == 3
195 | assert adj[0, 0] == 1
196 | assert adj[1, 1] == 0
197 | assert adj.diagonal().sum() == 1
198 |
199 | # test directed case
200 | net = Network(directed=True)
201 | net.add_edge('a', 'b')
202 | net.add_edge('a', 'a')
203 | adj = net.adjacency_matrix().todense()
204 | assert adj.sum() == 2
205 | assert adj[0, 0] == 1
206 | assert adj[1, 1] == 0
207 | assert adj.diagonal().sum() == 1
208 |
209 |
--------------------------------------------------------------------------------
/tests/test_OriginDestinationPaths.py:
--------------------------------------------------------------------------------
1 | import os
2 | import pathpy as pp
3 |
4 | def test_extract_distribute(test_data_directory, ):
5 | network_path = os.path.join(test_data_directory, 'example_network.edges')
6 | od_path = os.path.join(test_data_directory, 'example_origin_destination.csv')
7 |
8 | # read the network topology
9 | p = pp.Paths.read_edges(network_path, undirected=True)
10 | network = pp.HigherOrderNetwork(p)
11 |
12 | OD = pp.path_extraction.read_origin_destination(od_path)
13 |
14 | paths = pp.path_extraction.paths_from_origin_destination(OD, network)
15 |
16 | assert (paths.paths[3][('A', 'B', 'F', 'H')][1] == 2.0 and
17 | paths.paths[3][('A', 'C', 'G', 'H')][1] == 3.0) or \
18 | (paths.paths[3][('A', 'B', 'F', 'H')][1] == 3.0 and
19 | paths.paths[3][('A', 'C', 'G', 'H')][1] == 2.0)
20 | assert paths.paths[3][('D', 'B', 'C', 'E')][1] == 7.0
21 | assert paths.paths[2][('A', 'B', 'F')][1] == 3.0
22 | assert paths.paths[2][('B', 'C', 'E')][1] == 3.0
23 |
24 |
25 | def test_extract_single(test_data_directory, ):
26 | network_path = os.path.join(test_data_directory, 'example_network.edges')
27 | od_path = os.path.join(test_data_directory, 'example_origin_destination.csv')
28 |
29 | # read the network topology
30 | p = pp.Paths.read_edges(network_path, undirected=True)
31 | network = pp.HigherOrderNetwork(p)
32 |
33 | OD = pp.path_extraction.read_origin_destination(od_path)
34 |
35 | paths = pp.path_extraction.paths_from_origin_destination(OD, network,
36 | distribute_weight=False)
37 |
38 | assert (paths.paths[3][('A', 'B', 'F', 'H')][1] == 5.0 and
39 | paths.paths[3][('A', 'C', 'G', 'H')][1] == 0.0) or \
40 | (paths.paths[3][('A', 'B', 'F', 'H')][1] == 0.0 and
41 | paths.paths[3][('A', 'C', 'G', 'H')][1] == 5.0)
42 | assert paths.paths[3][('D', 'B', 'C', 'E')][1] == 7.0
43 | assert paths.paths[2][('A', 'B', 'F')][1] == 3.0
44 | assert paths.paths[2][('B', 'C', 'E')][1] == 3.0
45 |
--------------------------------------------------------------------------------
/tests/test_TemporalNetwork.py:
--------------------------------------------------------------------------------
1 | import pathpy as pp
2 | import os
3 | import numpy as np
4 | import sqlite3
5 | from pytest import mark
6 |
7 | def test_read_temporal_file_int(test_data_directory, ):
8 | file_path = os.path.join(test_data_directory, 'example_int.tedges')
9 | t = pp.TemporalNetwork.read_file(file_path)
10 | times = t.ordered_times
11 | expected_times = [0, 2, 4, 5, 6, 8]
12 | assert times == expected_times
13 |
14 | activities = sorted(list(t.activities.values()))
15 | expected_activities = [[], [], [], [], [0, 2, 5], [2], [4], [6], [8]]
16 | assert expected_activities == activities
17 |
18 |
19 | def test_read_temporal_file_time_stamp(test_data_directory, ):
20 | file_path = os.path.join(test_data_directory, 'example_timestamp.tedges')
21 | t = pp.TemporalNetwork.read_file(file_path, timestamp_format="%Y-%m-%d %H:%M")
22 | times = t.ordered_times
23 | time_diffs = [j - i for i, j in zip(times[:-1], times[1:])]
24 | expected_diffs = [10800, 15060, 264960]
25 | # TODO: The actual time number depends on local set by the user
26 | assert time_diffs == expected_diffs
27 |
28 |
29 | def test_filter_temporal_edges(temporal_network_object):
30 | t = temporal_network_object
31 |
32 | def filter_func(v, w, time):
33 | return time % 2 == 0
34 |
35 | filtered = t.filter_edges(filter_func)
36 | times = filtered.ordered_times
37 | expected = [2, 4, 6, 8, 10, 12, 14, 16, 18, 20]
38 | assert times == expected
39 |
40 |
41 | def test_get_interpath_times(temporal_network_object):
42 | t = temporal_network_object
43 | inter_time = dict(t.inter_path_times())
44 | expected = {'e': [1, 1, 1, 1, 1, 1, 1, 1, 1, 1],
45 | 'b': [4, 3], 'f': [9, 5, 1]
46 | }
47 | assert inter_time == expected
48 |
49 |
50 | def test_shuffle_edges(temporal_network_object):
51 | t = temporal_network_object
52 |
53 | np.random.seed(90)
54 | t1 = t.shuffle_edges(with_replacement=True)
55 | times1 = len(t1.tedges)
56 | expected1 = len(t.tedges)
57 | assert times1 == expected1
58 |
59 | np.random.seed(90)
60 | t2 = t.shuffle_edges(l=4, with_replacement=False)
61 | edges2 = len(t2.tedges)
62 | expected2 = 4
63 | assert edges2 == expected2
64 |
65 |
66 | def test_inter_event_times(temporal_network_object):
67 | time_diffs = temporal_network_object.inter_event_times()
68 | # all time differences are 1
69 | assert (time_diffs == 1).all()
70 |
71 |
72 | def test_inter_path_times(temporal_network_object):
73 | t = temporal_network_object
74 | path_times = dict(t.inter_path_times())
75 | expected = {'f': [9, 5, 1],
76 | 'e': [1, 1, 1, 1, 1, 1, 1, 1, 1, 1],
77 | 'b': [4, 3]}
78 | assert path_times == expected
79 |
80 |
81 | def test_temporal_summary(temporal_network_object):
82 | print(temporal_network_object)
83 |
84 |
85 | def test_export_tikz_unfolded_network(temporal_network_object, tmpdir):
86 | t = temporal_network_object # type: pp.TemporalNetwork
87 | file_path = str(tmpdir.mkdir("sub").join("multi_order_state"))
88 | pp.visualisation.export_tikz(t, file_path)
89 |
90 |
91 | def test_from_sqlite_int(test_data_directory, ):
92 | file_path = os.path.join(test_data_directory, 'test_tempnets.db')
93 | con = sqlite3.connect(file_path)
94 | con.row_factory = sqlite3.Row
95 | cursor = con.execute('SELECT source, target, time FROM example_int')
96 |
97 | t = pp.TemporalNetwork.from_sqlite(cursor)
98 | times = t.ordered_times
99 | expected_times = [0, 2, 4, 5, 6, 8]
100 | assert times == expected_times
101 |
102 | activities = sorted(list(t.activities.values()))
103 | expected_activities = [[], [], [], [], [0, 2, 5], [2], [4], [6], [8]]
104 | assert expected_activities == activities
105 |
106 |
107 | def test_from_sqlite_timestamps(test_data_directory, ):
108 | file_path = os.path.join(test_data_directory, 'test_tempnets.db')
109 | con = sqlite3.connect(file_path)
110 | con.row_factory = sqlite3.Row
111 | cursor = con.execute('SELECT source, target, time FROM example_timestamp')
112 | t = pp.TemporalNetwork.from_sqlite(cursor, timestamp_format="%Y-%m-%d %H:%M")
113 | times = t.ordered_times
114 | time_diffs = [j - i for i, j in zip(times[:-1], times[1:])]
115 | expected_diffs = [10800, 15060, 264960]
116 | # TODO: The actual time number depends on local set by the user
117 | assert time_diffs == expected_diffs
118 |
119 |
120 | def test_write_html(temporal_network_object, tmpdir):
121 | file_path = str(tmpdir.mkdir("sub").join("d3_temp.html"))
122 | t = temporal_network_object
123 | pp.visualisation.export_html(t, file_path)
124 |
125 |
126 | @mark.latex
127 | @mark.parametrize('is_dag', (False, True))
128 | @mark.parametrize('split_dir', (False, True))
129 | def test_write_tikz(temporal_network_object, tmpdir, is_dag, split_dir):
130 | dir_path = tmpdir
131 | file_path = str(dir_path.join("temp.tikz"))
132 | print(file_path)
133 | t = temporal_network_object
134 | t.write_tikz(file_path, dag=is_dag, split_directions=split_dir)
135 |
136 | cmd = "cd {}; pdflatex " \
137 | " -interaction nonstopmode {} > /dev/null".format(str(dir_path), file_path)
138 | exit_code = os.system(cmd)
139 | print(dir_path)
140 | assert exit_code == 0
141 |
--------------------------------------------------------------------------------
/tests/test_centralities.py:
--------------------------------------------------------------------------------
1 | import pathpy as pp
2 | import pytest
3 |
4 | # absolute eigenvalue difference tolerance
5 | EIGEN_ABS_TOL = 1e-2
6 |
7 |
8 | @pytest.mark.parametrize('k, e_sum, e_var', (
9 | (3, 27.5833333, 0.0085720486),
10 | (2, 55.0, 0.046875),
11 | (1, 55, 0.046875),
12 | ))
13 | def test_closeness_centrality_hon(random_paths, k, e_sum, e_var):
14 | import numpy as np
15 | p = random_paths(50, 0, 8)
16 | hon = pp.HigherOrderNetwork(p, k=k)
17 | closeness = pp.algorithms.centralities.closeness(hon)
18 | np_closeness = np.array(list(closeness.values()))
19 | assert np_closeness.sum() == pytest.approx(e_sum)
20 | assert np_closeness.var() == pytest.approx(e_var)
21 |
22 |
23 | @pytest.mark.parametrize('k, norm, e_sum, e_var, e_max', (
24 | (2, False, 3.0, 0.296875, 1.5),
25 | (1, False, 2.0, 0.00694444, 0.333333333),
26 | (2, True, 2.0, 0.1319444444, 1),
27 | ))
28 | def test_betweenness_centrality_hon(random_paths, norm, k, e_sum, e_var, e_max):
29 | import numpy as np
30 | p = random_paths(50, 0, 8)
31 | hon = pp.HigherOrderNetwork(p, k=k)
32 | betweenness = pp.algorithms.centralities.betweenness(hon, normalized=norm)
33 | values = np.array(list(betweenness.values()))
34 | assert values.sum() == pytest.approx(e_sum)
35 | assert max(values) == pytest.approx(e_max)
36 | assert values.var() == pytest.approx(e_var)
37 |
38 |
39 | @pytest.mark.xfail
40 | @pytest.mark.parametrize('k, sub, projection, e_sum, e_var', (
41 | (1, True, 'scaled', 2.823103290, 0.0004701220779),
42 | (1, False, 'scaled', 2.82310329017, 0.00047012207),
43 | (2, False, 'all', 2.030946758666, 0.0168478112),
44 | (2, True, 'all', 2.030946758, 0.0168478112489),
45 | (2, False, 'last', 1.7463870380802424, 0.0077742413305),
46 | (2, False, 'first', 1.7461339874793731, 0.0083696967427),
47 | (2, True, 'last', 1.746387038080242, 0.007774241),
48 | (2, True, 'first', 1.7461339874793727, 0.0083696967427313),
49 | ))
50 | def test_eigen_centrality_hon(random_paths, sub, projection, k, e_sum, e_var):
51 | import numpy as np
52 | p = random_paths(50, 0, 8)
53 | hon = pp.HigherOrderNetwork(p, k=k)
54 | eigen = pp.algorithms.centralities.eigenvector(hon, projection, sub)
55 | values = np.array(list(eigen.values()))
56 | assert values.sum() == pytest.approx(e_sum, abs=EIGEN_ABS_TOL)
57 | assert values.var() == pytest.approx(e_var, abs=EIGEN_ABS_TOL)
58 |
59 |
60 | @pytest.mark.parametrize('k, sub, proj, e_sum, e_var', (
61 | (2, False, 'all', 1, 0.000399240558236),
62 | (1, False, 'scaled', 1, 6.111199022e-05),
63 | (2, False, 'scaled', 1, 0.00039924055823),
64 | (2, False, 'last', 1, 0.00045826544),
65 | (2, False, 'first', 1, 0.000345796913),
66 | (2, True, 'all', 1, 0.000399240558),
67 | (1, True, 'scaled', 1, 6.111199022e-05),
68 | (2, True, 'scaled', 1, 0.000399240558236666),
69 | (2, True, 'last', 1, 0.000458265),
70 | (2, True, 'first', 1, 0.0003457969),
71 | ))
72 | def test_pagerank_centrality_hon(random_paths, sub, proj, k, e_sum, e_var):
73 | import numpy as np
74 | p = random_paths(50, 0, 8)
75 | hon = pp.HigherOrderNetwork(p, k=k)
76 | page = pp.algorithms.centralities.pagerank(hon, include_sub_paths=sub, projection=proj)
77 | values = np.array(list(page.values()))
78 | assert values.sum() == pytest.approx(e_sum)
79 | assert values.var() == pytest.approx(e_var)
80 |
81 |
82 | def test_betweenness_centrality_paths(path_from_ngram_file):
83 | p = path_from_ngram_file
84 | betweenness_centrality = pp.algorithms.centralities.betweenness(p, normalized=False)
85 | betweenness = {n: c for n, c in betweenness_centrality.items()}
86 | expected = {'b': 2.0, 'a': 3.0, 'e': 0, 'c': 3.0, 'd': 5.0}
87 | assert betweenness == expected
88 |
89 |
90 | def test_betweenness_centrality_paths_norm(path_from_ngram_file):
91 | p = path_from_ngram_file
92 | betweenness_centrality = pp.algorithms.centralities.betweenness(p, normalized=True)
93 | betweenness = max(c for c in betweenness_centrality.values())
94 | expected_norm_max = 1
95 | assert pytest.approx(betweenness) == expected_norm_max
96 |
97 |
98 | def test_closeness_centrality_paths(path_from_ngram_file):
99 | p = path_from_ngram_file
100 | closeness_centrality = pp.algorithms.centralities.closeness(p, normalized=False)
101 | closeness_sum = sum(c for c in closeness_centrality.values())
102 | expected_sum = 9.833333333333332
103 | assert closeness_sum == pytest.approx(expected_sum)
104 |
105 | nodes = {n for n in closeness_centrality}
106 | expected_nodes = {'a', 'b', 'c', 'd', 'e'}
107 | assert nodes == expected_nodes
108 |
109 |
110 | def test_closeness_centrality_paths_norm(path_from_ngram_file):
111 | p = path_from_ngram_file
112 | closeness_centrality = pp.algorithms.centralities.closeness(p, normalized=True)
113 | closeness_max = max(c for c in closeness_centrality.values())
114 | expected_max = 1
115 | assert closeness_max == pytest.approx(expected_max)
116 |
117 |
118 | def test_visitation_probabilities(path_from_ngram_file):
119 | p = path_from_ngram_file
120 | v_prob = pp.algorithms.centralities.visitation_probabilities(p)
121 | prob_sum = sum(p for p in v_prob.values())
122 | assert prob_sum == pytest.approx(1)
123 |
124 | max_prob = max(p for p in v_prob.values())
125 | expected_max = 0.3125
126 | assert max_prob == pytest.approx(expected_max)
127 |
128 |
--------------------------------------------------------------------------------
/tests/test_data/edge_frequency.edge:
--------------------------------------------------------------------------------
1 | 1,2,45,3,3,3
2 | 1,3,2,23,2,2
3 | 1,5,5,12,5,2
4 | 3,5,2,11,45,2
5 | 5,3,4,12,2,2
6 | 5,2,1,12,4,1
7 |
--------------------------------------------------------------------------------
/tests/test_data/example_int.tedges:
--------------------------------------------------------------------------------
1 | source,target,time
2 | 1,2,0
3 | 1,2,2
4 | 1,3,5
5 | 3,2,6
6 | 2,1,8
7 | 4,5,2
8 | 5,3,4
--------------------------------------------------------------------------------
/tests/test_data/example_network.edges:
--------------------------------------------------------------------------------
1 | A,B
2 | A,C
3 | B,C
4 | B,D
5 | B,F
6 | C,G
7 | C,E
8 | D,F
9 | E,G
10 | F,H
11 | G,H
--------------------------------------------------------------------------------
/tests/test_data/example_origin_destination.csv:
--------------------------------------------------------------------------------
1 | A,H,5
2 | B,E,3
3 | D,E,7
4 | A,F,3
--------------------------------------------------------------------------------
/tests/test_data/example_timestamp.tedges:
--------------------------------------------------------------------------------
1 | source,target,time
2 | 1,4,2000-03-04 12:45
3 | 2,4,2000-03-04 15:45
4 | 5,2,2000-03-04 19:56
5 | 8,2,2000-03-07 21:32
--------------------------------------------------------------------------------
/tests/test_data/ngram_simple.ngram:
--------------------------------------------------------------------------------
1 | a,b,c,d,a,b,2
2 | d,e,d,a,b,4
3 |
--------------------------------------------------------------------------------
/tests/test_data/test_tempnets.db:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/uzhdag/pathpy/1ddaf7d710aa3b5ed119de4f810c7c919eda3241/tests/test_data/test_tempnets.db
--------------------------------------------------------------------------------
/tests/test_estimation.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 | """
3 | Created on Fri Feb 20 11:59:22 2015
4 | @author: Ingo Scholtes
5 |
6 | (c) Copyright ETH Zurich, Chair of Systems Design, 2015-2017
7 | """
8 |
9 | import pathpy as pp
10 | import numpy as _np
11 | import pytest
12 | # mark to be used as decorator on slow functions such that they are only run
13 | # when explicitly called with `$ pytest --runslow`
14 | slow = pytest.mark.slow
15 |
16 |
17 |
18 | @pytest.mark.parametrize('method', ('BIC', 'AIC'))
19 | def test_markov_sequence(method):
20 | _np.random.seed(90)
21 | x = list(map(str, _np.random.choice(range(10), 1000)))
22 | ms = pp.MarkovSequence(x)
23 | detected_order = ms.estimate_order(maxOrder=4, method=method)
24 | assert detected_order == 1, \
25 | "Error, wrongly detected higher-order correlations"
26 |
27 |
28 | def test_estimate_order_strongly_connected():
29 | """
30 | Example with single strongly connected component in first-
31 | and two connected components in second-order network
32 | """
33 | paths = pp.Paths()
34 |
35 | ngram_list = ['a,b,c', 'b,c,b', 'c,b,a',
36 | 'b,a,b', 'e,b,f', 'b,f,b',
37 | 'f,b,e', 'b,e,b']
38 |
39 | for ngram in ngram_list:
40 | paths.add_path(ngram)
41 |
42 | g1 = pp.HigherOrderNetwork(paths, k=1)
43 | pp.algorithms.components.reduce_to_gcc(g1)
44 | assert g1.ncount() == 5, "Error, wrong number of nodes in first-order network"
45 | assert g1.ecount() == 8, "Error, wrong number of links in first-order network"
46 |
47 | g2 = pp.HigherOrderNetwork(paths, k=2)
48 | pp.algorithms.components.reduce_to_gcc(g2)
49 | assert g2.ncount() == 4, "Error, wrong number of nodes in second-order network"
50 | assert g2.ecount() == 4, "Error, wrong number of links in second-order network"
51 |
52 | # test mapping of higher-order nodes and paths
53 | assert g2.higher_order_node_to_path('a,b') == ('a', 'b'), \
54 | "Error: mapping from higher-order node to first-order path failed"
55 | assert g2.higher_order_path_to_first_order(('a,b', 'b,c')) == ('a', 'b', 'c'), \
56 | "Error: mapping from higher-order path to first-order path failed"
57 |
58 |
59 | def test_temp_net_extraction(temporal_network_object):
60 | t = temporal_network_object
61 | paths = pp.path_extraction.paths_from_temporal_network(t, delta=1)
62 |
63 | assert paths.observation_count == 10, \
64 | "Extracted wrong number of time-respecting paths"
65 |
66 |
67 | def test_betweenness_preference_empty():
68 | t = pp.TemporalNetwork()
69 | paths = pp.path_extraction.paths_from_temporal_network(t, delta=3)
70 | assert len(paths.nodes) == 0
71 |
72 | betweenness_pref = pp.algorithms.path_measures.betweenness_preference(paths, 'e', method='MLE')
73 | expected = 0.0
74 | assert betweenness_pref == pytest.approx(expected)
75 |
76 |
77 | def test_betweenness_preference_mle(temporal_network_object):
78 | t = temporal_network_object
79 |
80 | # Extract (time-respecting) paths
81 | p = pp.path_extraction.paths_from_temporal_network(t, delta=1)
82 | betweenness_pref = pp.algorithms.path_measures.betweenness_preference(p, 'e', method='MLE')
83 | expected = 1.2954618442383219
84 | assert betweenness_pref == pytest.approx(expected)
85 |
86 |
87 | def test_betweenness_preference_miller(temporal_network_object):
88 | t = temporal_network_object
89 | p = pp.path_extraction.paths_from_temporal_network(t, delta=1)
90 |
91 | betweenness_pref = pp.algorithms.path_measures.betweenness_preference(p, 'e', method='Miller')
92 | expected = 0.99546184423832196
93 | assert betweenness_pref == pytest.approx(expected)
94 |
95 |
96 | def test_betweenness_preference_normalized(temporal_network_object):
97 | t = temporal_network_object
98 | p = pp.path_extraction.paths_from_temporal_network(t, delta=1)
99 | # test normalize
100 | betweenness_pref = pp.algorithms.path_measures.betweenness_preference(p, 'e', normalized=True)
101 | expected_norm = 1
102 | assert betweenness_pref == pytest.approx(expected_norm)
103 |
104 |
105 | def test_slow_down_factor_random(random_paths):
106 | paths = random_paths(90, 90)
107 | slow_down_factor = pp.algorithms.path_measures.slow_down_factor(paths)
108 | expected = 4.05
109 | assert slow_down_factor == pytest.approx(expected, rel=1e-2), \
110 | "Got slowdown factor %f but expected %f +- 1e-2" % (slow_down_factor, expected)
111 |
112 |
113 | def test_get_distance_matrix_temporal(temporal_network_object):
114 | p = pp.path_extraction.paths_from_temporal_network(temporal_network_object)
115 | shortest_paths_dict = pp.algorithms.shortest_paths.distance_matrix(p)
116 |
117 | path_distances = dict()
118 | for k in shortest_paths_dict:
119 | for p in shortest_paths_dict[k]:
120 | path_distances[(k, p)] = shortest_paths_dict[k][p]
121 |
122 | expected_distances = {
123 | ('c', 'e'): 1,
124 | ('c', 'f'): 2,
125 | ('c', 'c'): 0,
126 | ('b', 'g'): 2,
127 | ('f', 'e'): 1,
128 | ('c', 'b'): 4,
129 | ('a', 'a'): 0,
130 | ('a', 'g'): 2,
131 | ('g', 'g'): 0,
132 | ('e', 'g'): 1,
133 | ('e', 'e'): 0,
134 | ('b', 'b'): 0,
135 | ('e', 'b'): 1,
136 | ('e', 'f'): 1,
137 | ('f', 'b'): 2,
138 | ('a', 'e'): 1,
139 | ('f', 'f'): 0,
140 | ('b', 'e'): 1
141 | }
142 | assert path_distances == expected_distances
143 |
144 |
145 | def test_get_distance_matrix_empty():
146 | p = pp.Paths()
147 | shortest_paths_dict = pp.algorithms.shortest_paths.distance_matrix(p)
148 | assert len(shortest_paths_dict) == 0
149 |
150 | @slow
151 | def test_entropy_growth_rate_ratio_mle(random_paths):
152 | p = random_paths(100, 500)
153 | mle_ratio = pp.algorithms.path_measures.entropy_growth_rate_ratio(p, method="MLE")
154 | mle_expected = 0.10515408343772015
155 | assert mle_ratio == pytest.approx(mle_expected)
156 |
157 |
158 | @slow
159 | def test_entropy_growth_rate_ratio_miller(random_paths):
160 | p = random_paths(100, 500)
161 | miller_ratio = pp.algorithms.path_measures.entropy_growth_rate_ratio(p, method="Miller")
162 | miller_expected = 0.88685603746914599
163 | assert miller_ratio == pytest.approx(miller_expected)
164 |
--------------------------------------------------------------------------------
/tests/test_random_graphs.py:
--------------------------------------------------------------------------------
1 | import pathpy as pp
2 |
3 | import pytest
4 | import numpy as np
5 |
6 |
7 | def test_is_graphic_sequence():
8 |
9 | sequence = [2, 2, 90]
10 | assert pp.algorithms.random_graphs.is_graphic_sequence(sequence) is False, \
11 | 'Wrongly detected graphic sequence'
12 |
13 | sequence = [1, 1]
14 | assert pp.algorithms.random_graphs.is_graphic_sequence(sequence), \
15 | 'Wrongly rejected graphic sequence'
16 |
17 | sequence = [1, 2, 3]
18 | assert pp.algorithms.random_graphs.is_graphic_sequence(sequence) is False, \
19 | 'Wrongly detected graphic sequence'
20 |
21 | sequence = [2, 2]
22 | assert pp.algorithms.random_graphs.is_graphic_sequence(sequence, self_loops=True), \
23 | 'Wrongly rejected graphic sequence'
24 |
25 | sequence = [2]
26 | assert pp.algorithms.random_graphs.is_graphic_sequence(sequence, self_loops=True), \
27 | 'Wrongly rejected graphic sequence'
28 |
29 | sequence = [2]
30 | assert pp.algorithms.random_graphs.is_graphic_sequence(sequence, self_loops=True,
31 | multi_edges=True), \
32 | 'Wrongly rejected graphic sequence'
33 |
34 | sequence = [2]
35 | assert pp.algorithms.random_graphs.is_graphic_sequence(sequence, self_loops=False) is False, \
36 | 'Wrongly detected graphic sequence'
37 |
38 | sequence = [3, 3]
39 | assert pp.algorithms.random_graphs.is_graphic_sequence(sequence, multi_edges=True,
40 | self_loops=True), \
41 | 'Wrongly rejected graphic sequence'
42 |
43 | sequence = [1, 3]
44 | assert pp.algorithms.random_graphs.is_graphic_sequence(sequence, self_loops=True), \
45 | 'Wrongly rejected graphic sequence'
46 |
47 | sequence = [1, 2]
48 | assert pp.algorithms.random_graphs.is_graphic_sequence(sequence, self_loops=True) is False, \
49 | 'Wrongly detected graphic sequence'
50 |
51 | for i in range(10):
52 | g = pp.algorithms.random_graphs.erdoes_renyi_gnp(n=100, p=0.03, self_loops=False)
53 | assert pp.algorithms.random_graphs.is_graphic_sequence([x for x in g.degrees() if x > 0]), \
54 | 'Wrongly rejected degree sequence of randomly generated graph'
55 |
56 | for i in range(10):
57 | g = pp.algorithms.random_graphs.erdoes_renyi_gnp(n=100, p=0.03, self_loops=True)
58 | # HACK: correct degrees for self_loops. Need to consistently define degrees of self-loops as two in pathpy!
59 | for e in g.edges:
60 | if e[0] == e[1]:
61 | g.nodes[e[0]]['degree'] += 1
62 | assert pp.algorithms.random_graphs.is_graphic_sequence([x for x in g.degrees() if x > 0],
63 | self_loops=True), \
64 | 'Wrongly rejected degree sequence of randomly generated graph'
65 |
--------------------------------------------------------------------------------
/tests/test_spectral.py:
--------------------------------------------------------------------------------
1 | import pathpy as pp
2 | import pytest
3 |
4 | # absolute eigenvalue difference tolerance
5 | EIGEN_ABS_TOL = 1e-2
6 |
7 |
8 | @pytest.mark.parametrize('k, sub, e_gap', (
9 | (2, False, 1e-9),
10 | (1, False, 1e-5),
11 | (2, True, 1),
12 | ))
13 | def test_eigen_value_gap(random_paths, k, sub, e_gap):
14 | import numpy as np
15 | p = random_paths(200, 0, 40)
16 | hon = pp.HigherOrderNetwork(p, k=k)
17 | np.random.seed(0)
18 | eigen_gap = pp.algorithms.spectral.eigenvalue_gap(hon, include_sub_paths=sub, lanczos_vectors=90)
19 | assert eigen_gap
20 |
21 |
22 | @pytest.mark.xfail
23 | @pytest.mark.parametrize('k, norm, e_sum, e_var', (
24 | (3, True, 1, 0.0036494914419765924),
25 | (2, False, 2765.72998141474, 8.661474971012986),
26 | (1, True, 1, 0.04948386659908706),
27 | ))
28 | def test_fiedler_vector_sparse(random_paths, k, norm, e_sum, e_var):
29 | import numpy as np
30 | p = random_paths(90, 0, 20)
31 | hon = pp.HigherOrderNetwork(p, k=k)
32 | fv = pp.algorithms.spectral.fiedler_vector_sparse(hon, normalized=norm)
33 | assert fv.var() == pytest.approx(e_var, abs=EIGEN_ABS_TOL)
34 | assert np.sum(fv) == pytest.approx(e_sum, abs=EIGEN_ABS_TOL)
35 |
36 |
37 | @pytest.mark.xfail
38 | @pytest.mark.parametrize('k, e_sum, e_var', (
39 | (3, 1, 0.003649586067168485),
40 | (2, (1.0000000000000002+0j), 0.0031136096467386416),
41 | (1, (-0.0009514819500764382+0.1190367717310192j), 0.049999999999999996),
42 | ))
43 | def test_fiedler_vector_dense(random_paths, k, e_sum, e_var):
44 | import numpy as np
45 | p = random_paths(90, 0, 20)
46 | hon = pp.HigherOrderNetwork(p, k=k)
47 | fv = pp.algorithms.spectral.fiedler_vector_dense(hon)
48 | assert fv.var() == pytest.approx(e_var, abs=EIGEN_ABS_TOL)
49 | assert np.sum(fv) == pytest.approx(e_sum, abs=EIGEN_ABS_TOL)
50 |
51 |
52 | @pytest.mark.xfail
53 | @pytest.mark.parametrize('k, e_sum', (
54 | (3, 0.9967398214809227),
55 | (2, 0.24345712528855065),
56 | (1, 0.7143571081268268),
57 | ))
58 | def test_algebraic_connectivity(random_paths, k, e_sum):
59 | import pathpy
60 | p = random_paths(120, 0, 40)
61 | hon = pp.HigherOrderNetwork(p, k=k)
62 | ac = pp.algorithms.spectral.algebraic_connectivity(hon, lanczos_vectors=60, maxiter=40)
63 | assert ac == pytest.approx(e_sum, rel=1e-7)
64 |
--------------------------------------------------------------------------------