├── .gitattributes ├── .gitignore ├── .pylintrc ├── CONTRIBUTING.rst ├── DESCRIPTION.rst ├── HISTORY.rst ├── LICENSE.txt ├── Makefile ├── README.md ├── README.rst ├── docs ├── .gitignore ├── Makefile ├── clustering.html ├── conf.py ├── contact.html ├── contribute.html ├── contributing.rst ├── css │ └── pathpy.css ├── docs.html ├── edu.html ├── example_network.png ├── example_network_scaled.png ├── history.rst ├── index.html ├── index.rst ├── installation.rst ├── js │ ├── menu.js │ └── sidebar.js ├── lotr.html ├── make.bat ├── manual │ ├── algorithms │ │ ├── centralities.html │ │ ├── components.html │ │ ├── index.html │ │ ├── infomap.html │ │ ├── modularity.html │ │ ├── path_measures.html │ │ ├── random_graphs.html │ │ ├── random_walk.html │ │ ├── shortest_paths.html │ │ ├── spectral.html │ │ ├── statistics.html │ │ └── temporal_walk.html │ ├── classes │ │ ├── dag.html │ │ ├── higher_order_network.html │ │ ├── index.html │ │ ├── markov_sequence.html │ │ ├── multi_order_model.html │ │ ├── network.html │ │ ├── paths.html │ │ ├── rolling_time_window.html │ │ └── temporal_network.html │ ├── index.html │ ├── path_extraction │ │ ├── dag_paths.html │ │ ├── index.html │ │ ├── origin_destination_stats.html │ │ ├── random_walk.html │ │ └── temporal_paths.html │ ├── utils │ │ ├── default_containers.html │ │ ├── exceptions.html │ │ ├── index.html │ │ └── log.html │ └── visualisation │ │ ├── alluvial.html │ │ ├── html.html │ │ ├── index.html │ │ ├── pdf.html │ │ └── tikz.html ├── menu.html ├── multiorder.png ├── news.html ├── pathpy_logo.png ├── readme.rst ├── resources │ └── pathpy_user_meeting.zip ├── sidebar.html ├── tutorial.html ├── tutorial │ ├── first_steps.html │ ├── higher_order.html │ ├── ide.html │ ├── index.html │ ├── issues.html │ ├── model_selection.html │ ├── paths.html │ └── temporal_nets.html └── usage.rst ├── multiorder.png ├── pathpy.pyproj ├── pathpy ├── __init__.py ├── algorithms │ ├── __init__.py │ ├── centralities.py │ ├── components.py │ ├── infomap.py │ ├── modularity.py │ ├── path_measures.py │ ├── random_graphs.py │ ├── random_walk.py │ ├── shortest_paths.py │ ├── spectral.py │ ├── statistics.py │ └── temporal_walk.py ├── classes │ ├── __init__.py │ ├── dag.py │ ├── higher_order_network.py │ ├── markov_sequence.py │ ├── multi_order_model.py │ ├── network.py │ ├── paths.py │ ├── rolling_time_window.py │ └── temporal_network.py ├── path_extraction │ ├── __init__.py │ ├── dag_paths.py │ ├── origin_destination_stats.py │ ├── random_walk.py │ └── temporal_paths.py ├── utils │ ├── __init__.py │ ├── default_containers.py │ ├── exceptions.py │ └── log.py ├── visualisation │ ├── __init__.py │ ├── alluvial.py │ ├── html.py │ ├── pdf.py │ └── tikz.py └── visualisation_assets │ ├── d3.v4.min.js │ ├── diffusion_template.html │ ├── network_template.html │ ├── paths_template.html │ ├── tempnet_template.html │ └── walk_template.html ├── pathpy_logo.png ├── setup.cfg ├── setup.py └── tests ├── README.md ├── __init__.py ├── conftest.py ├── test_DAG.py ├── test_HigherOrderNetwork.py ├── test_MultiOrderModel.py ├── test_Network.py ├── test_OriginDestinationPaths.py ├── test_Path.py ├── test_TemporalNetwork.py ├── test_centralities.py ├── test_data ├── edge_frequency.edge ├── example_int.tedges ├── example_network.edges ├── example_origin_destination.csv ├── example_timestamp.tedges ├── ngram_simple.ngram └── test_tempnets.db ├── test_estimation.py ├── test_random_graphs.py └── test_spectral.py /.gitattributes: -------------------------------------------------------------------------------- 1 | # Auto detect text files and perform LF normalization 2 | * text=auto 3 | 4 | # Custom for Visual Studio 5 | *.cs diff=csharp 6 | 7 | # Standard to msysgit 8 | *.doc diff=astextplain 9 | *.DOC diff=astextplain 10 | *.docx diff=astextplain 11 | *.DOCX diff=astextplain 12 | *.dot diff=astextplain 13 | *.DOT diff=astextplain 14 | *.pdf diff=astextplain 15 | *.PDF diff=astextplain 16 | *.rtf diff=astextplain 17 | *.RTF diff=astextplain 18 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Byte-compiled / optimized / DLL files 2 | __pycache__/ 3 | *.py[cod] 4 | 5 | # C extensions 6 | *.so 7 | 8 | # Distribution / packaging 9 | .Python 10 | env/ 11 | build/ 12 | develop-eggs/ 13 | dist/ 14 | downloads/ 15 | eggs/ 16 | lib/ 17 | lib64/ 18 | parts/ 19 | sdist/ 20 | var/ 21 | *.egg-info/ 22 | .installed.cfg 23 | *.egg 24 | 25 | # PyInstaller 26 | # Usually these files are written by a python script from a template 27 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 28 | *.manifest 29 | *.spec 30 | 31 | # Sphinx Documentation 32 | 33 | # Installer logs 34 | pip-log.txt 35 | pip-delete-this-directory.txt 36 | 37 | # Unit test / coverage reports 38 | htmlcov/ 39 | .tox/ 40 | .coverage 41 | .cache 42 | nosetests.xml 43 | coverage.xml 44 | 45 | # Translations 46 | *.mo 47 | *.pot 48 | 49 | # Django stuff: 50 | *.log 51 | 52 | # PyBuilder 53 | target/ 54 | 55 | # ========================= 56 | # Operating System Files 57 | # ========================= 58 | 59 | # OSX 60 | # ========================= 61 | 62 | .DS_Store 63 | .AppleDouble 64 | .LSOverride 65 | 66 | # Thumbnails 67 | ._* 68 | 69 | # Files that might appear on external disk 70 | .Spotlight-V100 71 | .Trashes 72 | 73 | # Directories potentially created on remote AFP share 74 | .AppleDB 75 | .AppleDesktop 76 | Network Trash Folder 77 | Temporary Items 78 | .apdisk 79 | 80 | # Windows 81 | # ========================= 82 | 83 | # Windows image file caches 84 | Thumbs.db 85 | ehthumbs.db 86 | 87 | # Folder config file 88 | Desktop.ini 89 | # Recycle Bin used on file shares 90 | $RECYCLE.BIN/ 91 | 92 | # Windows Installer files 93 | *.cab 94 | *.msi 95 | *.msm 96 | *.msp 97 | 98 | # Windows shortcuts 99 | *.lnk 100 | pyTempNets.v12.suo 101 | *.TMP 102 | *.suo 103 | .idea/ 104 | .eggs/ 105 | prof/* 106 | *.swp 107 | 108 | # Visual Studio Code 109 | # ================== 110 | 111 | .vscode/ 112 | docs/_build/ 113 | .pytest_cache 114 | .pytest_cache/v/cache 115 | .pytest_cache/v/cache/lastfailed 116 | .pytest_cache/v/cache/nodeids 117 | -------------------------------------------------------------------------------- /CONTRIBUTING.rst: -------------------------------------------------------------------------------- 1 | .. highlight:: shell 2 | 3 | ============ 4 | Contributing 5 | ============ 6 | 7 | Contributions are welcome, and they are greatly appreciated! Every little bit 8 | helps, and credit will always be given. 9 | 10 | You can contribute in many ways: 11 | 12 | Types of Contributions 13 | ---------------------- 14 | 15 | Report Bugs 16 | ~~~~~~~~~~~ 17 | 18 | Report bugs at https://github.com/uzhdag/pathpy/issues. 19 | 20 | If you are reporting a bug, please include: 21 | 22 | * Your operating system name and version. 23 | * Any details about your local setup that might be helpful in troubleshooting. 24 | * Detailed steps to reproduce the bug. 25 | 26 | Fix Bugs 27 | ~~~~~~~~ 28 | 29 | Look through the GitHub issues for bugs. Anything tagged with "bug" and "help 30 | wanted" is open to whoever wants to implement it. 31 | 32 | Implement Features 33 | ~~~~~~~~~~~~~~~~~~ 34 | 35 | Look through the GitHub issues for features. Anything tagged with "enhancement" 36 | and "help wanted" is open to whoever wants to implement it. 37 | 38 | Write Documentation 39 | ~~~~~~~~~~~~~~~~~~~ 40 | 41 | pathpy could always use more documentation, whether as part of the 42 | official pathpy docs, in docstrings, or even on the web in blog posts, 43 | articles, and such. 44 | 45 | Submit Feedback 46 | ~~~~~~~~~~~~~~~ 47 | 48 | The best way to send feedback is to file an issue at https://github.com/uzhdag/pathpy/issues. 49 | 50 | If you are proposing a feature: 51 | 52 | * Explain in detail how it would work. 53 | * Keep the scope as narrow as possible, to make it easier to implement. 54 | * Remember that this is a volunteer-driven project, and that contributions 55 | are welcome :) 56 | 57 | Get Started! 58 | ------------ 59 | 60 | Ready to contribute? Here's how to set up `pathpy` for local development. 61 | 62 | 1. Fork the `pathpy` repo on GitHub. 63 | 2. Clone your fork locally:: 64 | 65 | $ git clone git@github.com:your_name_here/pathpy.git 66 | 67 | 3. Install your local copy into a virtualenv. Assuming you have virtualenvwrapper installed, this is how you set up your fork for local development:: 68 | 69 | $ mkvirtualenv pathpy 70 | $ cd pathpy/ 71 | $ python setup.py develop 72 | 73 | 4. Create a branch for local development:: 74 | 75 | $ git checkout -b name-of-your-bugfix-or-feature 76 | 77 | Now you can make your changes locally. 78 | 79 | 5. When you're done making changes, check that your changes pass flake8 and the 80 | tests, including testing other Python versions with tox:: 81 | 82 | $ flake8 pathpy tests 83 | $ python setup.py test or py.test 84 | $ tox 85 | 86 | To get flake8 and tox, just pip install them into your virtualenv. 87 | 88 | 6. Commit your changes and push your branch to GitHub:: 89 | 90 | $ git add . 91 | $ git commit -m "Your detailed description of your changes." 92 | $ git push origin name-of-your-bugfix-or-feature 93 | 94 | 7. Submit a pull request through the GitHub website. 95 | 96 | Pull Request Guidelines 97 | ----------------------- 98 | 99 | Before you submit a pull request, check that it meets these guidelines: 100 | 101 | 1. The pull request should include tests. 102 | 2. If the pull request adds functionality, the docs should be updated. Put 103 | your new functionality into a function with a docstring, and add the 104 | feature to the list in README.rst. 105 | 3. The pull request should work for Python 2.7, 3.4, 3.5 and 3.6, and for PyPy. Check 106 | https://travis-ci.org/uzhdag/pathpy/pull_requests 107 | and make sure that the tests pass for all supported Python versions. 108 | 109 | Tips 110 | ---- 111 | 112 | To run a subset of tests:: 113 | 114 | $ py.test tests.test_pathpy 115 | 116 | 117 | Deploying 118 | --------- 119 | 120 | A reminder for the maintainers on how to deploy. 121 | Make sure all your changes are committed (including an entry in HISTORY.rst). 122 | Then run:: 123 | 124 | $ bumpversion patch # possible: major / minor / patch 125 | $ git push 126 | $ git push --tags 127 | 128 | Travis will then deploy to PyPI if tests pass. 129 | -------------------------------------------------------------------------------- /DESCRIPTION.rst: -------------------------------------------------------------------------------- 1 | pathpy is an OpenSource python package for the analysis of time series data on networks using higher- and multi order network models. The package is tailored to analyse temporal networks as well as sequential data that capture multiple short, independent paths observed in an underlying graph topology. Examples for data that can be analysed include time-stamped social networks, user click streams in information networks, biological pathways, or information cascades in social networks. -------------------------------------------------------------------------------- /HISTORY.rst: -------------------------------------------------------------------------------- 1 | History 2 | ======= 3 | 4 | 2.2.0 (2019-09-21) 5 | ------------------ 6 | 7 | * Several Bug Fixes for API and visualisations 8 | 9 | 10 | 2.0.0 (2018-08-17) 11 | ------------------ 12 | 13 | * PyPi Release of 2.0 release version. 14 | 15 | 2.0.0a (2018-08-07) 16 | ------------------- 17 | 18 | * First public release of 2.0 alpha on PyPI. 19 | 20 | 1.2.1 (2018-02-23) 21 | ------------------ 22 | 23 | * First test release on PyPI. 24 | -------------------------------------------------------------------------------- /Makefile: -------------------------------------------------------------------------------- 1 | .PHONY: clean clean-test clean-pyc clean-build docs help 2 | .DEFAULT_GOAL := help 3 | 4 | define BROWSER_PYSCRIPT 5 | import os, webbrowser, sys 6 | 7 | try: 8 | from urllib import pathname2url 9 | except: 10 | from urllib.request import pathname2url 11 | 12 | webbrowser.open("file://" + pathname2url(os.path.abspath(sys.argv[1]))) 13 | endef 14 | export BROWSER_PYSCRIPT 15 | 16 | define PRINT_HELP_PYSCRIPT 17 | import re, sys 18 | 19 | for line in sys.stdin: 20 | match = re.match(r'^([a-zA-Z_-]+):.*?## (.*)$$', line) 21 | if match: 22 | target, help = match.groups() 23 | print("%-20s %s" % (target, help)) 24 | endef 25 | export PRINT_HELP_PYSCRIPT 26 | 27 | BROWSER := python -c "$$BROWSER_PYSCRIPT" 28 | 29 | help: 30 | @python -c "$$PRINT_HELP_PYSCRIPT" < $(MAKEFILE_LIST) 31 | 32 | clean: clean-build clean-pyc clean-test ## remove all build, test, coverage and Python artifacts 33 | 34 | clean-build: ## remove build artifacts 35 | rm -fr build/ 36 | rm -fr dist/ 37 | rm -fr .eggs/ 38 | find . -name '*.egg-info' -exec rm -fr {} + 39 | find . -name '*.egg' -exec rm -f {} + 40 | 41 | clean-pyc: ## remove Python file artifacts 42 | find . -name '*.pyc' -exec rm -f {} + 43 | find . -name '*.pyo' -exec rm -f {} + 44 | find . -name '*~' -exec rm -f {} + 45 | find . -name '__pycache__' -exec rm -fr {} + 46 | 47 | clean-test: ## remove test and coverage artifacts 48 | rm -fr .tox/ 49 | rm -f .coverage 50 | rm -fr htmlcov/ 51 | 52 | lint: ## check style with flake8 53 | flake8 pathpy tests 54 | 55 | test: ## run tests quickly with the default Python 56 | pytest 57 | 58 | test-all: ## run tests on every Python version with tox 59 | pytest --runslow 60 | 61 | coverage: ## check code coverage quickly with the default Python 62 | pytest --cov pathpy \ 63 | --runslow \ 64 | --latex \ 65 | --networkx \ 66 | --cov-report html \ 67 | --cov-report term \ 68 | --doctest-modules 69 | 70 | docs: ## generate Sphinx HTML documentation, including API docs 71 | rm -f docs/pathpy.rst 72 | rm -f docs/modules.rst 73 | sphinx-apidoc -o docs/ pathpy 74 | $(MAKE) -C docs clean 75 | $(MAKE) -C docs html 76 | $(BROWSER) docs/_build/html/index.html 77 | 78 | servedocs: docs ## compile the docs watching for changes 79 | watchmedo shell-command -p '*.rst' -c '$(MAKE) -C docs html' -R -D . 80 | 81 | release: dist ## package and upload a release 82 | twine check dist/* 83 | twine upload -p $(PYPI_PASSWORD) -u $(PYPI_USER) dist/* 84 | 85 | dist: clean ## builds source and wheel package 86 | python setup.py sdist 87 | python setup.py bdist_wheel 88 | ls -l dist 89 | 90 | install: clean ## install the package to the active Python's site-packages 91 | python setup.py install 92 | -------------------------------------------------------------------------------- /README.rst: -------------------------------------------------------------------------------- 1 | Introduction 2 | ============ 3 | 4 | pathpy is an OpenSource python package for the analysis of time 5 | series data on networks using higher- and multi-order network models. 6 | 7 | pathpy is specifically tailored to analyse temporal networks as 8 | well as time series and sequence data that capture multiple short, 9 | independent paths observed in an underlying graph or network. 10 | Examples for data that can be analysed with pathpy include time-stamped 11 | social networks, user click streams in information networks, biological 12 | pathways, citation networks, or information cascades in social networks. 13 | 14 | Unifying the modelling and analysis of path statistics and temporal networks, 15 | pathpy provides efficient methods to extract causal or time-respecting paths from 16 | time-stamped network data. The current package distributed via the PyPI name 17 | pathpy2 supersedes the packages pyTempnets as well as version 1.0 of pathpy. 18 | 19 | pathpy facilitates the analysis of temporal correlations in time 20 | series data on networks. It uses model selection and statistical 21 | learning to generate optimal higher- and multi-order models that capture both 22 | topological and temporal characteristics. It can help to answer the important 23 | question when a network abstraction of complex systems is 24 | justified and when higher-order representations are needed instead. 25 | 26 | The theoretical foundation of this package, higher- and multi-order network 27 | models, was developed in the following published works: 28 | 29 | 1. I Scholtes: When is a network a network? Multi-Order Graphical Model 30 | Selection in Pathways and Temporal Networks, In KDD'17 - Proceedings 31 | of the 23rd ACM SIGKDD International Conference on Knowledge Discovery and 32 | Data Mining, Halifax, Nova Scotia, Canada, August 13-17, 2017 33 | http://dl.acm.org/citation.cfm?id=3098145 34 | 2. I Scholtes, N Wider, A Garas: Higher-Order Aggregate Networks in the 35 | Analysis of Temporal Networks: Path structures and centralities 36 | In The European Physical Journal B, 89:61, March 2016 37 | http://dx.doi.org/10.1140/epjb/e2016-60663-0 38 | 3. I Scholtes, N Wider, R Pfitzner, A Garas, CJ Tessone, F Schweitzer: 39 | Causality-driven slow-down and speed-up of diffusion in 40 | non-Markovian temporal networks, In Nature Communications, 5, September 2014 41 | http://www.nature.com/ncomms/2014/140924/ncomms6024/full/ncomms6024.html 42 | 4. R Pfitzner, I Scholtes, A Garas, CJ Tessone, F Schweitzer: 43 | Betweenness preference: Quantifying correlations in the topological 44 | dynamics of temporal networks, Phys Rev Lett, 110(19), 198701, May 2013 45 | http://journals.aps.org/prl/abstract/10.1103/PhysRevLett.110.198701 46 | 47 | pathpy extends higher-order modelling approaches towards multi-order models 48 | for paths that capture temporal correlations at multiple length scales 49 | simultaneously. All mathematical details of the framework can be found in the 50 | openly available preprint at https://arxiv.org/abs/1702.05499. 51 | 52 | A broader view on higher-order models in the analyis of complex systems can be 53 | found at https://arxiv.org/abs/1806.05977. 54 | 55 | pathpy is fully integrated with jupyter, providing rich and interactive in-line 56 | visualisations of networks, temporal networks, higher-, and multi-order models. 57 | Visualisations can be exported to HTML5 files that can be shared and published 58 | onthe Web. 59 | 60 | 61 | Download and installation 62 | ========================= 63 | 64 | pathpy is pure python code. It has no platform-specific dependencies 65 | and should thus work on all platforms. pathpy requires python 3.x. 66 | It builds on numpy and scipy. The latest release version 2.0 of pathpy 67 | can be installed by typing: 68 | 69 | pip install pathpy2 70 | 71 | Please make sure that you use the pyPI name pathpy2 as the package name pathpy is currently blocked. 72 | 73 | Tutorial 74 | ======== 75 | 76 | A comprehensive 3 hour hands-on tutorial that shows how you can use pathpy 77 | to analyse data on pathways and temporal networks is available online at: 78 | 79 | https://ingoscholtes.github.io/kdd2018-tutorial/ 80 | 81 | An explanatory video that introduces the science behind pathpy is available here: 82 | 83 | https://youtu.be/CxJkVrD2ZlM 84 | 85 | A promotional video showcasing some of pathpy's features is available here: 86 | 87 | https://youtu.be/QIPqFaR2Z5c 88 | 89 | 90 | Documentation 91 | ============= 92 | 93 | The code is fully documented via docstrings which are accessible through 94 | python's built-in help system. Just type help(SYMBOL_NAME) to see 95 | the documentation of a class or method. A reference manual is available 96 | here https://ingoscholtes.github.io/pathpy/hierarchy.html 97 | 98 | 99 | Releases and Versioning 100 | ======================= 101 | 102 | The first public beta release of pathpy (released February 17 2017) is 103 | v1.0-beta. Following versions are named MAJOR.MINOR.PATCH according to semantic 104 | versioning. The current version is 2.0.0. 105 | 106 | Known Issues 107 | ============ 108 | 109 | - Depending on whether or not scipy has been compiled 110 | with or without the numerics package MKL, considerable 111 | numerical differences can occur, e.g. for eigenvalue 112 | centralities, PageRank, and other measures that depend 113 | on the eigenvectors and eigenvalues of matrices. 114 | Please refer to scipy.show_config() to show compilation flags. 115 | - Interactive visualisations in jupyter are currently only 116 | supported for juypter notebooks, stand-alone HTML files, 117 | and the jupyter display integrated in IDEs like Visual 118 | Studio Code (which we highly recommend to work with pathpy). 119 | Due to its new widget mechanism, interactive d3js 120 | visualisations are currently not available for jupyterLab. 121 | Due to the complex document object model generated by 122 | jupyter notebooks, visualisation performance is best in 123 | stand-alone HTML files and in Visual Studio Code. 124 | - The visualisation of temporal networks currently does 125 | not support the drawing of edge arrows for directed 126 | edges. However, a powerful templating mechanism is 127 | available to support custom interactive and dynamic 128 | visualizations of temporal networks. 129 | - The visualisation of paths in terms of alluvial diagrams 130 | within jupyter is currently unstable for networks with 131 | large delay. This is due to the asynchronous loading of 132 | external scripts. 133 | 134 | 135 | Acknowledgements 136 | ================ 137 | 138 | The research behind this data analysis framework was generously funded by the Swiss 139 | State Secretariate for Education, Research and Innovation via Grant C14.0036. 140 | The development of the predecessor package pyTempNets was further supported by the MTEC 141 | Foundation in the context of the project "The Influence of Interaction Patterns on 142 | Success in Socio-Technical Systems: From Theory to Practice." 143 | 144 | The further development of pathpy is currently supported by the 145 | Swiss National Science Foundation via Grant 176938. See details at: 146 | 147 | http://p3.snf.ch/Project-176938 148 | 149 | 150 | Contributors 151 | ============ 152 | 153 | Ingo Scholtes (project lead, development) 154 | Luca Verginer (development, test suite integration) 155 | 156 | 157 | Past Contributors 158 | ================= 159 | Roman Cattaneo (development) 160 | Nicolas Wider (testing) 161 | 162 | 163 | Copyright 164 | ========= 165 | 166 | pathpy is licensed under the GNU Affero General Public 167 | License. See https://choosealicense.com/licenses/agpl-3.0/ 168 | 169 | (c) ETH Zürich & University of Zurich, 2015 - 2018 170 | -------------------------------------------------------------------------------- /docs/.gitignore: -------------------------------------------------------------------------------- 1 | /pathpy.rst 2 | /pathpy.*.rst 3 | /modules.rst 4 | -------------------------------------------------------------------------------- /docs/Makefile: -------------------------------------------------------------------------------- 1 | # Minimal makefile for Sphinx documentation 2 | # 3 | 4 | # You can set these variables from the command line. 5 | SPHINXOPTS = 6 | SPHINXBUILD = python -msphinx 7 | SPHINXPROJ = pathpy 8 | SOURCEDIR = . 9 | BUILDDIR = _build 10 | 11 | # Put it first so that "make" without argument is like "make help". 12 | help: 13 | @$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) 14 | 15 | .PHONY: help Makefile 16 | 17 | # Catch-all target: route all unknown targets to Sphinx using the new 18 | # "make mode" option. $(O) is meant as a shortcut for $(SPHINXOPTS). 19 | %: Makefile 20 | @$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) 21 | -------------------------------------------------------------------------------- /docs/conf.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding: utf-8 -*- 3 | # 4 | # pathpy documentation build configuration file, created by 5 | # sphinx-quickstart on Fri Jun 9 13:47:02 2017. 6 | # 7 | # This file is execfile()d with the current directory set to its 8 | # containing dir. 9 | # 10 | # Note that not all possible configuration values are present in this 11 | # autogenerated file. 12 | # 13 | # All configuration values have a default; values that are commented out 14 | # serve to show the default. 15 | 16 | # If extensions (or modules to document with autodoc) are in another 17 | # directory, add these directories to sys.path here. If the directory is 18 | # relative to the documentation root, use os.path.abspath to make it 19 | # absolute, like shown here. 20 | # 21 | import os 22 | import sys 23 | sys.path.insert(0, os.path.abspath('..')) 24 | 25 | import pathpy 26 | 27 | # -- General configuration --------------------------------------------- 28 | 29 | # If your documentation needs a minimal Sphinx version, state it here. 30 | # 31 | # needs_sphinx = '1.0' 32 | 33 | # Add any Sphinx extension module names here, as strings. They can be 34 | # extensions coming with Sphinx (named 'sphinx.ext.*') or your custom ones. 35 | extensions = ['sphinx.ext.autodoc', 'sphinx.ext.viewcode', 'sphinx.ext.napoleon'] 36 | # Add any paths that contain templates here, relative to this directory. 37 | templates_path = ['_templates'] 38 | 39 | # The suffix(es) of source filenames. 40 | # You can specify multiple suffix as a list of string: 41 | # 42 | # source_suffix = ['.rst', '.md'] 43 | source_suffix = '.rst' 44 | 45 | # The master toctree document. 46 | master_doc = 'index' 47 | 48 | # General information about the project. 49 | project = u'pathpy' 50 | copyright = u"2018, Ingo Scholtes" 51 | author = u"Ingo Scholtes" 52 | 53 | # The version info for the project you're documenting, acts as replacement 54 | # for |version| and |release|, also used in various other places throughout 55 | # the built documents. 56 | # 57 | # The short X.Y version. 58 | version = pathpy.__version__ 59 | # The full version, including alpha/beta/rc tags. 60 | release = pathpy.__version__ 61 | 62 | # The language for content autogenerated by Sphinx. Refer to documentation 63 | # for a list of supported languages. 64 | # 65 | # This is also used if you do content translation via gettext catalogs. 66 | # Usually you set "language" from the command line for these cases. 67 | language = None 68 | 69 | # List of patterns, relative to source directory, that match files and 70 | # directories to ignore when looking for source files. 71 | # This patterns also effect to html_static_path and html_extra_path 72 | exclude_patterns = ['_build', 'Thumbs.db', '.DS_Store'] 73 | 74 | # The name of the Pygments (syntax highlighting) style to use. 75 | pygments_style = 'sphinx' 76 | 77 | # If true, `todo` and `todoList` produce output, else they produce nothing. 78 | todo_include_todos = False 79 | 80 | 81 | # -- Options for HTML output ------------------------------------------- 82 | 83 | # The theme to use for HTML and HTML Help pages. See the documentation for 84 | # a list of builtin themes. 85 | # 86 | html_theme = "sphinx_rtd_theme" 87 | 88 | # Theme options are theme-specific and customize the look and feel of a 89 | # theme further. For a list of options available for each theme, see the 90 | # documentation. 91 | # 92 | # html_theme_options = {} 93 | 94 | # Add any paths that contain custom static files (such as style sheets) here, 95 | # relative to this directory. They are copied after the builtin static files, 96 | # so a file named "default.css" will overwrite the builtin "default.css". 97 | html_static_path = ['_static'] 98 | 99 | 100 | # -- Options for HTMLHelp output --------------------------------------- 101 | 102 | # Output file base name for HTML help builder. 103 | htmlhelp_basename = 'pathpydoc' 104 | 105 | 106 | # -- Options for LaTeX output ------------------------------------------ 107 | 108 | latex_elements = { 109 | # The paper size ('letterpaper' or 'a4paper'). 110 | # 111 | # 'papersize': 'letterpaper', 112 | 113 | # The font size ('10pt', '11pt' or '12pt'). 114 | # 115 | # 'pointsize': '10pt', 116 | 117 | # Additional stuff for the LaTeX preamble. 118 | # 119 | # 'preamble': '', 120 | 121 | # Latex figure (float) alignment 122 | # 123 | # 'figure_align': 'htbp', 124 | } 125 | 126 | # Grouping the document tree into LaTeX files. List of tuples 127 | # (source start file, target name, title, author, documentclass 128 | # [howto, manual, or own class]). 129 | latex_documents = [ 130 | (master_doc, 'pathpy.tex', 131 | u'pathpy Documentation', 132 | u'Ingo Scholtes', 'manual'), 133 | ] 134 | 135 | 136 | # -- Options for manual page output ------------------------------------ 137 | 138 | # One entry per manual page. List of tuples 139 | # (source start file, name, description, authors, manual section). 140 | man_pages = [ 141 | (master_doc, 'pathpy', 142 | u'pathpy Documentation', 143 | [author], 1) 144 | ] 145 | 146 | 147 | # -- Options for Texinfo output ---------------------------------------- 148 | 149 | # Grouping the document tree into Texinfo files. List of tuples 150 | # (source start file, target name, title, author, 151 | # dir menu entry, description, category) 152 | texinfo_documents = [ 153 | (master_doc, 'pathpy', 154 | u'pathpy Documentation', 155 | author, 156 | 'pathpy', 157 | 'One line description of project.', 158 | 'Miscellaneous'), 159 | ] 160 | 161 | 162 | 163 | -------------------------------------------------------------------------------- /docs/contact.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | pathpy | contact 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 |
20 | 21 |
22 |
23 |

Contact

24 |

pathpy is brought to you by the Machine Learning for Complex Networks at the Center for Artificial Intelligence and Data Science of Julius-Maximilians-Universität Würzburg.

25 |

For any inquiries, please contact:

26 |

Univ.-Prof. Dr. Ingo Scholtes
27 | Machine Learning for Complex Networks
28 | Center for Artificial Intelligence and Data Science
29 | Julius-Maximilians-Universität Würzburg
30 | Am Hubland
31 | Würzburg
32 | Germany

33 |
34 |
35 |
36 |
37 |
38 | 39 | 40 | 46 | 47 |
48 | 49 | 50 | 51 | -------------------------------------------------------------------------------- /docs/contribute.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | pathpy | contribute 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 |
21 | 22 |
23 |
24 |

Who is developing pathpy?

25 |

pathpy is brought to you by the Data Analytics Group at University of Wuppertal and University of Zurich. The following people and organizations have contributed to the development:

26 | 27 |

Current Contributors

28 | Prof. Dr. Ingo Scholtes (project lead, development)
29 | Prof. Dr. Jürgen Hackl (chief architect, development)
30 | Dr. Luca Verginer (development, test suite integration)
31 | Christoph Gote (testing)
32 | Vincenzo Perri (testing)
33 | Luka Petrovic (testing)
34 | Max Kortenbruck (documentation, testing) 35 | 36 |

Past Contributors

37 | Roman Cattaneo (development)
38 | Dr. Nicolas Wider (testing) 39 | 40 |

Acknowledgments

41 |

The development of pathpy is generously funded by the Swiss National Science Foundation via grant 176938. Some of the research behind pathpy was previously funded by the Swiss State Secretariate for Education, Research and Innovation via grant C14.0036. The development of a predecessor package was supported by the MTEC Foundation in the context of the project The Influence of Interaction Patterns on Success in Socio-Technical Systems: From Theory to Practice.

42 |

The development, testing, and documentation of pathpy is supported by the Chair of Systems Design at ETH Zürich, Switzerland.

43 |
44 |
45 |
46 | github.com/pathpy/pathpy 47 |
48 |
49 | 50 |
51 |
52 |

How can I contribute?

53 |

Did you find a bug? Would you like to propose new functionality? We welcome your contributions.

54 |
55 |
56 | 57 | 63 | 64 |
65 | 66 | 67 | 68 | -------------------------------------------------------------------------------- /docs/contributing.rst: -------------------------------------------------------------------------------- 1 | .. include:: ../CONTRIBUTING.rst 2 | -------------------------------------------------------------------------------- /docs/css/pathpy.css: -------------------------------------------------------------------------------- 1 | html,body,h1,h2,h3,h4,h5,h6 {font-family: "Roboto", sans-serif;} 2 | .w3-sidebar { 3 | z-index: 3; 4 | width: 250px; 5 | top: 43px; 6 | bottom: 0; 7 | height: inherit; 8 | } -------------------------------------------------------------------------------- /docs/docs.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | pathpy | documentation 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 |
21 | 22 |
23 |
24 |

Documentation

25 |

pathpy is fully documented via docstrings. This means that python-friendly editors like pycharm, Visual Studio Code, or web-based jupyter notebooks will automatically display the documentation of classes, methods, and symbols as you type or hover over a symbol or method.

26 |

In a python interpreter, you can print the in-line documentation of a symbol by calling the help function:

27 |
28 |
help(pathpy.TemporalNetwork)
29 |
30 |
31 |
Help on class TemporalNetwork in module pathpy.classes.temporal_network:
32 | 
33 | class TemporalNetwork(builtins.object)
34 |   |  This class represents a sequence of time-stamped edges.
35 |   |   Instances of this class can be used to generate path statistics
36 |   |   based on the time-respecting paths resulting from a given maximum
37 |   |   time difference between consecutive time-stamped edges.
38 |   |  
39 |   |  Methods defined here:
40 |   |  
41 |   |  __init__(self, tedges=None)
42 |   |      Constructor that generates a temporal network instance.
43 |   |      
44 |  [...]
45 |
46 |

To simplify the browsing of the documentation, we provide a continuously updated, auto-generated HTML-based reference manual.

47 |

If you experience any issues, please consider submitting an issue to our issue tracker.

48 |
49 |
50 |
51 | www.pathpy.net/manual 52 |
53 |
54 | 55 | 61 | 62 |
63 | 64 | 65 | 66 | -------------------------------------------------------------------------------- /docs/edu.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | pathpy | education 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 |
21 | 22 |
23 |
24 |

Educational resources

25 |

Our team is comitted to assist users in leveraging the power of higher-order network analytics. A short tutorial that walks you through the setup of pathpy, and which introduces some fundamental concepts is available here. In addition, we regularly offer talks, lectures, demonstrations, and tutorials that introduce higher-order network analytics with pathpy. Below we collect information on those events, which often include publicly available educational material and video recordings.

26 |

Are you interest in hosting a lecture or tutorial on pathpy in your institution? Or are you using pathpy to teach (higher-order) network analytics? We would be excited to hear from you!

27 | 28 |

pathpy Summer School Lecture

29 |

On September 8, we gave a two-hour lecture on higher-order models of temporal networks at the 5th International Summer School on Data Science (SSDS 2020), held virtually in Split, Croatia. The lecture was followed by a hands-on tutorial on pathpy taught by Vincenzo Perri. The material is available here

30 | 31 |

Tutorial et EuroCSS

32 |

Between September 2 - 4 2019 we will give a half-day hands-on tutorial on dynamic social network analysis with pathpy at the European Symposium on Computational Social Science in Zurich, Switzerland. Please stay stuned, as we prepare the tutorial material.

33 | 34 |

pathpy User Meeting

35 |

At the first pathy user meeting, held at ETH Zürich on June 17th 2019, we gave an overview of some of pathpy's latest feature additions. The code that I presented can be found here.

36 | 37 |

Lecture Series at ETH Zurich

38 |

In the spring semester 2019, Frank Schweitzer teaches a lecture on Complex Networks. Through assignment sheets and exercises, students are introduced to the modelling of complex networks with pathpy.

39 | 40 |

Lecture Series at University of Zurich

41 |

In the fall semester 2018, Ingo Scholtes offered a series of 12 lectures on Statistical Network Analysis. In the accompanying exercises, students will get an introduction to graph analytics with pathpy.

42 | 43 |

Hands-on Tutorial at Complexity Science Hub

44 |

In September 2018, Ingo Scholtes gave a half-day tutorial on higher-order network analytics with pathpy at the Complexity Science Hub in Vienna, Austria. All material of this tutorial - including slides, data, assignments, and solutions to live coding sessions - is available online.

45 | 46 |

Hands-on Tutorial at KDD'18

47 |

In August 2018, we will give a full-day hands-on tutorial on Mining Time Series Data with Higher-order Network Models at the flagship data science conference KDD'19 in London, UK. The first half of the tutorial introduces higher-order network analysis with pathpy. The second half of the tutorial covers higher-order clustering with the flow compression algorithm InfoMap. All material - including slides, data, assignments, and solutions to live coding sessions - is available online.

48 | 49 |

Lecture at ETH Zürich Symposium

50 |

In April 2018, Ingo Scholtes gave a lecture on optimal higher-order network models at the symposium on Networks, Time and Causality at ETH Zürich. A video recording of the symposium is available below.

51 | 52 | 53 |

Lecture at Northeastern University, Boston

54 |

In November 2017, Ingo Scholtes gave a lecture on multi-order network analysis at the Network Science Institute of Northeastern University in Boston, MA, USA. A video recording of the lecture is available below.

55 | 56 |
57 |
58 |
59 | www.pathpy.net/tutorial 60 |
61 |
62 | 63 | 69 | 70 | 71 |
72 | 73 | 74 | 75 | -------------------------------------------------------------------------------- /docs/example_network.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/uzhdag/pathpy/1ddaf7d710aa3b5ed119de4f810c7c919eda3241/docs/example_network.png -------------------------------------------------------------------------------- /docs/example_network_scaled.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/uzhdag/pathpy/1ddaf7d710aa3b5ed119de4f810c7c919eda3241/docs/example_network_scaled.png -------------------------------------------------------------------------------- /docs/history.rst: -------------------------------------------------------------------------------- 1 | .. include:: ../HISTORY.rst 2 | -------------------------------------------------------------------------------- /docs/index.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | pathpy 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 |
21 | 22 |
23 |
24 |

What is pathpy?

25 |

pathpy is an Open Source python package providing higher-order network analytics for time series data.

26 |

pathpy is tailored to analyse time-stamped network data as well as sequential data that capture multiple short paths observed in a graph or network. Examples for data that can be analysed with pathpy include high-resolution time-stamped network data, dynamic social networks, user click streams on the Web, biological pathway data, citation graphs, passenger trajectories in transportation networks, or information propagation in social networks.

27 |

Unifying the analysis of time series data on networks, pathpy provides efficient methods to extract causal or time-respecting paths in time-stamped social networks. It facilitates the analysis of higher-order dependencies and uses principled model selection techniques to infer models that capture both topological and temporal characteristics. It allows to answer the question when network models of time series data are justified and when higher-order models are needed.

28 |

pathpy is fully integrated with jupyter, providing rich interactive visualisations of networks, temporal networks, higher-, and multi-order models. Visualisations can be exported to HTML5 files that can be shared and published on the Web. You can find examples in our gallery.

29 |
30 |
31 | 32 |
33 |
34 | 35 |
36 |
37 |

What is the science behind pathpy?

38 |

The theoretical foundation of this package, higher- and multi-order network models, was developed in the following peer-reviewed research articles:

39 |
    40 |
  1. R Lambiotte, M Rosvall, I Scholtes: From networks to optimal models of complex systems, Nature Physics 15, 313-320, March 2019
  2. 41 |
  3. I Scholtes: When is a network a network? Multi-Order Graphical Model Selection in Pathways and Temporal Networks, In KDD'17 - Proceedings of the 23rd ACM SIGKDD International Conference on Knowledge Discovery and Data Mining, Halifax, Nova Scotia, Canada, August 13-17, 2017
  4. 42 |
  5. I Scholtes, N Wider, A Garas: Higher-Order Aggregate Networks in the Analysis of Temporal Networks: Path structures and centralities, The European Physical Journal B, 89:61, March 2016
  6. 43 |
  7. I Scholtes, N Wider, R Pfitzner, A Garas, CJ Tessone, F Schweitzer: Causality-driven slow-down and speed-up of diffusion in non-Markovian temporal networks, Nature Communications, 5, September 2014
  8. 44 |
  9. R Pfitzner, I Scholtes, A Garas, CJ Tessone, F Schweitzer: Betweenness preference: Quantifying correlations in the topological dynamics of temporal networks, Phys Rev Lett, 110(19), 198701, May 2013
  10. 45 |
46 |

An explanatory video with a high-level introduction of the the science behind pathpy is available here. A broader view on the importance of higher-order network models in network analysis can be found in this recent article.

47 |
48 |
49 | 50 |
51 |
52 | 53 |
54 |
55 |

How can I learn how to use pathpy?

56 | 57 |

A step-by-step introduction that shows how to install pathpy and how to perform basic network analysis and visualisation tasks can be found here.

58 | 59 |

We further provide an extensive collection of educational resources, including lectures, tutorials, exercises, and data. If you are interested to host a such an educational event within your institution, please contact us.

60 | 61 |
62 |
63 | 64 |
65 |
66 | 67 |
68 |
69 |

What about licensing?

70 |

pathpy is released under the GNU Affero General Public License. 71 |

72 |
73 | 74 | 80 | 81 | 82 |
83 | 84 | 85 | 86 | -------------------------------------------------------------------------------- /docs/index.rst: -------------------------------------------------------------------------------- 1 | Welcome to pathpy's documentation! 2 | ====================================== 3 | 4 | .. toctree:: 5 | :maxdepth: 2 6 | :caption: Contents: 7 | 8 | readme 9 | installation 10 | usage 11 | modules 12 | contributing 13 | history 14 | 15 | Indices and tables 16 | ================== 17 | * :ref:`genindex` 18 | * :ref:`modindex` 19 | * :ref:`search` 20 | -------------------------------------------------------------------------------- /docs/installation.rst: -------------------------------------------------------------------------------- 1 | .. highlight:: shell 2 | 3 | ============ 4 | Installation 5 | ============ 6 | 7 | 8 | Stable release 9 | -------------- 10 | 11 | To install pathpy, run this command in your terminal: 12 | 13 | .. code-block:: console 14 | 15 | $ pip install pathpy 16 | 17 | This is the preferred method to install pathpy, as it will always install the most recent stable release. 18 | 19 | If you don't have `pip`_ installed, this `Python installation guide`_ can guide 20 | you through the process. 21 | 22 | .. _pip: https://pip.pypa.io 23 | .. _Python installation guide: http://docs.python-guide.org/en/latest/starting/installation/ 24 | 25 | 26 | From sources 27 | ------------ 28 | 29 | The sources for pathpy can be downloaded from the `Github repo`_. 30 | 31 | You can either clone the public repository: 32 | 33 | .. code-block:: console 34 | 35 | $ git clone git://github.com/IngoScholtes/pathpy 36 | 37 | Or download the `tarball`_: 38 | 39 | .. code-block:: console 40 | 41 | $ curl -OL https://github.com/IngoScholtes/pathpy/tarball/master 42 | 43 | Once you have a copy of the source, you can install it with: 44 | 45 | .. code-block:: console 46 | 47 | $ python setup.py install 48 | 49 | 50 | .. _Github repo: https://github.com/IngoScholtes/pathpy 51 | .. _tarball: https://github.com/IngoScholtes/pathpy/tarball/master 52 | -------------------------------------------------------------------------------- /docs/js/menu.js: -------------------------------------------------------------------------------- 1 | $(document).ready(function(){ 2 | $("#nav").load("/menu.html"); 3 | //$("a").find(`[href="` + document.location.pathname.match(/[^\/]+$/)[0] + `"]`).attr('class', 'w3-theme'); 4 | }); -------------------------------------------------------------------------------- /docs/js/sidebar.js: -------------------------------------------------------------------------------- 1 | $(document).ready(function(){ 2 | $("#side").load("/sidebar.html"); 3 | }); -------------------------------------------------------------------------------- /docs/make.bat: -------------------------------------------------------------------------------- 1 | @ECHO OFF 2 | 3 | pushd %~dp0 4 | 5 | REM Command file for Sphinx documentation 6 | 7 | if "%SPHINXBUILD%" == "" ( 8 | set SPHINXBUILD=python -msphinx 9 | ) 10 | set SOURCEDIR=. 11 | set BUILDDIR=_build 12 | set SPHINXPROJ=pathpy 13 | 14 | if "%1" == "" goto help 15 | 16 | %SPHINXBUILD% >NUL 2>NUL 17 | if errorlevel 9009 ( 18 | echo. 19 | echo.The Sphinx module was not found. Make sure you have Sphinx installed, 20 | echo.then set the SPHINXBUILD environment variable to point to the full 21 | echo.path of the 'sphinx-build' executable. Alternatively you may add the 22 | echo.Sphinx directory to PATH. 23 | echo. 24 | echo.If you don't have Sphinx installed, grab it from 25 | echo.http://sphinx-doc.org/ 26 | exit /b 1 27 | ) 28 | 29 | %SPHINXBUILD% -M %1 %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% 30 | goto end 31 | 32 | :help 33 | %SPHINXBUILD% -M help %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% 34 | 35 | :end 36 | popd 37 | -------------------------------------------------------------------------------- /docs/manual/utils/index.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | pathpy.utils API documentation 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 |
18 |
19 |
20 |

pathpy.utils module

21 |
22 |
23 |

A collection of logging functions and exceptions.

24 |
25 | Source code 26 |
"""
27 | A collection of logging functions and exceptions.
28 | """
29 | from .log import Log
30 | from .log import Severity
31 | from .exceptions import PathpyException
32 | from .exceptions import EmptySCCError
33 | from .exceptions import PathpyNotImplemented
34 | from .exceptions import PathsTooShort
35 | from .exceptions import PathpyError
36 |
37 |
38 |
39 |

Sub-modules

40 |
41 |
pathpy.utils.default_containers
42 |
43 |

Provides default containers for various classes 44 | which are used to store nodes, edges and similar objects …

45 |
46 |
pathpy.utils.exceptions
47 |
48 |

Classes to signal errors and exceptions in pathpy.

49 |
50 |
pathpy.utils.log
51 |
52 |
53 |
54 |
55 |
56 |
57 |
58 |
59 |
60 |
61 |
62 |
63 | 83 |
84 | 87 | 88 | 89 | 90 | -------------------------------------------------------------------------------- /docs/manual/visualisation/index.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | pathpy.visualisation API documentation 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 |
18 |
19 |
20 |

pathpy.visualisation module

21 |
22 |
23 |

provides html and tikz visualisations for networks, temporal networks, and paths

24 |
25 | Source code 26 |
"""provides html and tikz visualisations for networks, temporal networks, and paths"""
27 | 
28 | from .html import plot
29 | from .html import export_html
30 | from .html import plot_diffusion
31 | from .html import export_html_diffusion
32 | from .html import plot_walk
33 | from .html import export_html_walk
34 | 
35 | from .tikz import export_tikz
36 | 
37 | from .pdf import svg_to_pdf
38 | from .pdf import svg_to_png
39 |
40 |
41 |
42 |

Sub-modules

43 |
44 |
pathpy.visualisation.alluvial
45 |
46 |
47 |
48 |
pathpy.visualisation.html
49 |
50 |
51 |
52 |
pathpy.visualisation.pdf
53 |
54 |
55 |
56 |
pathpy.visualisation.tikz
57 |
58 |
59 |
60 |
61 |
62 |
63 |
64 |
65 |
66 |
67 |
68 |
69 | 90 |
91 | 94 | 95 | 96 | 97 | -------------------------------------------------------------------------------- /docs/menu.html: -------------------------------------------------------------------------------- 1 |
2 |
3 | pathpy 4 | getting started 5 | education 6 |
7 | 8 | 12 |
13 | documentation 14 | news 15 | contribute 16 | contact 17 |
18 |
-------------------------------------------------------------------------------- /docs/multiorder.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/uzhdag/pathpy/1ddaf7d710aa3b5ed119de4f810c7c919eda3241/docs/multiorder.png -------------------------------------------------------------------------------- /docs/news.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | pathpy | news 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 |
21 |
22 |

News

23 | 24 |

pathpy3 demo at HONS Satellite

25 |

The afternoon session of this year's edition of the NetSci Satellite on Higher-Order Models in Network Science will feature a demo session, showcasing the latest features of pathpy3. The session will be given by Jürgen Hackl.
2020/09/17

26 | 27 |

pathpy Summer School Lecture

28 |

Today, Ingo Scholtes will teach a lecture on higher-order models of temporal networks at the 5th International Summer School on Data Science (SSDS 2020), held virtually in Split, Croatia. The lecture will be followed up by a hands-on tutorial on pathpy taught by Vincenzo Perri.
2020/09/08

29 | 30 |

Statistical Network Analysis with pathpy

31 |

A new MSc lecture series on Statistical Network Analysis held at the University of Wuppertal in the summer semester 2020 features a series of practice lectures and exercises building on the latest version of pathpy3. The course gives a deep dive into the statistical modelling and analysis of complex networks across discplines.
2020/04/01

32 | 33 |

pathpy Tutorial at NetSci-X

34 |

Jürgen Hackl teaches a half-day tutorial on pathpy at NetSci-X, held in Tokyo, Japan. He will also give a sneak peak of the latest version pathpy3, which is currently in development.
2020/01/20

35 | 36 |

First pathpy user meeting

37 |

The first pathpy user meeting will take place today at 14:00 at ETH Zürich, WEV building in Weinbergstrasse 56/58.
2019/06/17

38 | 39 |

pathpy featured in Nature Physics

40 |

Today our perspectives article From networks to optimal higher-order models of complex systems was published in the journal Nature Physics. The article highlights the need for higher-order models of complex systems, which are the basis for higher-order network analytics in pathpy.
2019/03/25

41 | 42 |

pathpy tutorial at EuroCSS

43 |

Great new: The 2019 edition of the European Symposium Series on Computational Social Science (EuroCSS) will feature a half-day tutorial introducing dynamic social network analysis with pathpy. See you in Zurich!
2019/03/24

44 | 45 |

Tutorial at Complexity Science Hub

46 |

On Wednesday, we will give a talk and tutorial on higher-order data analytics at the Complexity Science Hub in Vienna. A general introduction to higher-order network analytics will be followed by a 4 hour live tutorial, in which we demonstrate these techniques with pathpy.
2018/09/03

47 | 48 |

Release of pathpy2

49 |

Today we released a greatly improved version 2 of pathpy, an OpenSource software package bringing higher-order network analytics to python. pathpy is now available via the python package index. A comprehensive educational tutorial on pathpy is available online.
2018/08/23

50 | 51 |

Hands-on tutorial at KDD'18

52 |

On August 22 we will give a hands-on tutorial on Higher-order Data Analytics for Temporal Network Data at KDD'18 in London, UK. In this tutorial we introduce higher-order graph analytics with the software package pathpy.
2018/08/22

53 | 54 |

Demo at NetSci'18

55 |

Today we will give a brief demo of the latest version of pathpy in the NetSci satellite on Higher-Order Network Models (HONS).
2018/06/11

56 | 57 |
58 | 59 | 65 | 66 | 67 |
68 | 69 | 70 | 71 | -------------------------------------------------------------------------------- /docs/pathpy_logo.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/uzhdag/pathpy/1ddaf7d710aa3b5ed119de4f810c7c919eda3241/docs/pathpy_logo.png -------------------------------------------------------------------------------- /docs/readme.rst: -------------------------------------------------------------------------------- 1 | .. include:: ../README.rst 2 | -------------------------------------------------------------------------------- /docs/resources/pathpy_user_meeting.zip: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/uzhdag/pathpy/1ddaf7d710aa3b5ed119de4f810c7c919eda3241/docs/resources/pathpy_user_meeting.zip -------------------------------------------------------------------------------- /docs/sidebar.html: -------------------------------------------------------------------------------- 1 |
2 | Installing pathpy 3 | Development tools 4 | Network basics 5 | Temporal networks 6 | Path statistics 7 | Higher-order models 8 | Model selection 9 | 10 | 11 | Known issues 12 |
-------------------------------------------------------------------------------- /docs/tutorial/ide.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | pathpy | first steps 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 |
22 | 23 |
24 |
25 |

Which development tools do you recommend?

26 |

While you are free to use the python editor of your choice, we recommend new users to have a look at Visual Studio Code, a platform-independent Open Source development environment available for Windows, MacOS, and Linux. It supports a wide array of programming languages, including python.

27 |

Just download the installation archive and execute it. Once the installation has completed, you can run Visual Studio Code either by clicking the icon or typing 28 |

> code
29 | in the terminal.

30 |

A key feature of Visual Studio Code's approach to python is built-in support for jupyter notebooks, which will allow you to edit python code in a convenient editor, while seeing interactive output of your code in a split window. Since pathpy comes with a jupyter integration, you can display interactive visualisations of networks, temporal networks, and higher-order networks directly in Visual Studio Code. The following video gives an idea how this integration looks like:

31 | 32 |

33 |

To conveniently work with python and jupyter in Visual Studio Code, you need to install the python extension, which adds python code editing, debugging, and linting functionality as well as jupyter support. You can install it free of charge using Visual Studio Code's extension manager. Open Visual Studio Code and click the Extensions icon in the bottom of the left menu bar or press Ctrl+Shift+X. This will bring up the Extensions window. Type python in the search box and click the first result. In the window on the right, click Install. Once this is complete, click Reload to complete the installation.

34 |

35 | Now that Visual Studio Code is set up, you can verify your development environment. Create a new file in Visual Studio Code and add the following code: 36 |

37 |
#%%
38 | import pathpy as pp
39 | n = pp.Network()
40 | n.add_edge('a', 'b')
41 | n.add_edge('b', 'c')
42 | print(n)
43 | 
44 | #%%
45 | n
46 |
47 | When you type this code, so-called code lenses Run Cell|Run Below will appear above the two #%% tags. These tags, which will simply be ignored if you run the file with the standard python interpreter, mark the start of a cell in a jupyter notebook that we can execute directly from Visual Studio Code. If you click Run Cell, a new jupyter kernel will be started automatically, and the code in the current cell, i.e. the lines of code between the current #%% tag and the next tag, will be executed. An output window automatically opens, which displays the output of your code.

48 |

Hint: Some users occasionally experience an issue where the jupyter kernel fails to launch. One reason could be a missing installation of the python package jupytext. If you experience this problem, try to install the package by running: 49 |

50 |
> pip install jupytext
51 |
52 |

53 |

The combination of Visual Studio Code, jupyter and pathpy provides you with a convenient environment to complete data science tasks for (time series) data on complex networks. To learn more about how you can use pathpy to read, analyze, and visualize such data, just move to the next units.

54 |
55 |
56 | 57 | 63 | 64 |
65 | 66 | 67 | 68 | -------------------------------------------------------------------------------- /docs/tutorial/index.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | pathpy | first steps 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 |
22 | 23 |
24 |
25 |

What are the system requirements?

26 | pathpy is pure python code. It has no platform-specific dependencies and should thus run without problems on any operating system. pathpy requires python 3.x. Since the end of life of python 2.x is scheduled for January 1st 2020, we will not offer a backport of pathpy for python 2. If you have dependencies on python 2.x, please consider using 2to3.

27 |

pathpy builds on the data science packages numpy and scipy, which will be set up automaticaly if you install pathpy using the package manager pip. Even though any python 3.x installation is fine, we recommend using the latest Anaconda distribution, an OpenSource python distribution that is pre-configured for data science and machine learning tasks.

28 |

How can I install pathpy?

29 |

The latest release version of pathpy can be installed via the python package index pypi. Just open a terminal and type:

30 |
31 |
> pip install pathpy2
32 |
33 |

If you have both a python 2 and a python 3 setup, you may have to use the command pip3 to specifically refer to your python 3 installation. Also, make sure to use the pyPI name pathpy2, since the pyPI package name pathpy has been namesquatted. We are currently working on a solution to this issue.

34 |

If you want to install the latest development version, you can directly download and install it from our github repository. In this version, some issues of the last release version may already be fixed, but please be aware that this code may be in flow. You can use pip to install the development version as follows:

35 |
36 |
> pip install git+git://github.com/uzhdag/pathpy.git
37 |
38 |

You can check your installation to make sure that everything is working fine. In the terminal, run the python interpreter and import the package as follows: 39 |

40 |
41 | > python
42 | Python 3.6.6 |Anaconda, Inc.| (default) [...]
43 | Type "help", "copyright", "credits" or "license" for more information.
44 | >>> import pathpy as pp
45 | >>> help(pp)
46 | Help on package pathpy:
47 | 
48 | NAME
49 |     pathpy
50 | 
51 | DESCRIPTION
52 |     An OpenSource python package to analyze and
53 |     visualize time series data on complex networks.
54 | [...]
55 |         
56 |
57 |

If you see the help message above, pathpy has been set up successfully and you are ready to take your first steps. 58 |

59 |
60 | 61 | 67 | 68 |
69 | 70 | 71 | 72 | -------------------------------------------------------------------------------- /docs/tutorial/issues.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | pathpy | issues 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 |
22 | 23 |
24 |
25 |
26 |

Are there known issues?

27 |

We would like users to be aware of the following known issues in the latest version of pathpy: 28 |

    29 |
  • Depending on whether or not the dependency package scipy has been compiled with the numerics package MKL or openblas, considerable numerical differences can occur, e.g. for eigenvalue centralities, PageRank, spectral clustering, and other measures that depend on the eigenvectors and eigenvalues of matrices. Please refer to scipy.show_config() to show compilation flags. We are currently investigating this issue.
  • 30 |
  • Interactive visualisations in jupyter are currently only supported for juypter notebooks, stand-alone HTML files, and the jupyter display integrated in IDEs like Visual Studio Code. Due to its new widget mechanism, interactive d3js visualizations are currently not available for jupyterLab.
  • 31 |
  • The visualisation module currently does not support the drawing of edge arrows for temporal networks with directed edges. However, a powerful templating mechanism is available to support custom interactive and dynamic visualisations both for static and temporal networks.
  • 32 |
  • The visualisation of paths in terms of alluvial diagrams within jupyter is currently unstable. This is due to the asynchronous loading of external scripts and possible network latencies e.g. in wireless networks. We will replace this functionality in a future version.
  • 33 |
34 |

35 |

We are working on a solution to these issues in the next release. An up-to-date list of open issues can be found at our issue tracker. For the installation of development versions in which your issue might already have been fixed please refer to the installation instructions above.

36 |
37 |
38 | 39 | 45 | 46 | 47 |
48 | 49 | 50 | 51 | -------------------------------------------------------------------------------- /docs/usage.rst: -------------------------------------------------------------------------------- 1 | ===== 2 | Usage 3 | ===== 4 | 5 | To use pathpy in a project:: 6 | 7 | import pathpy 8 | -------------------------------------------------------------------------------- /multiorder.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/uzhdag/pathpy/1ddaf7d710aa3b5ed119de4f810c7c919eda3241/multiorder.png -------------------------------------------------------------------------------- /pathpy.pyproj: -------------------------------------------------------------------------------- 1 |  2 | 3 | 4 | Debug 5 | 2.0 6 | {896991fc-0289-4bae-b680-88e19508c91f} 7 | 8 | 9 | 10 | 11 | . 12 | . 13 | 14 | 15 | 16 | 17 | 10.0 18 | $(MSBuildExtensionsPath32)\Microsoft\VisualStudio\v$(VisualStudioVersion) 19 | pathpy 20 | 21 | 22 | 23 | 24 | $(MSBuildExtensionsPath32)\Microsoft\VisualStudio\v$(VisualStudioVersion)\Python Tools\Microsoft.PythonTools.targets 25 | 26 | 27 | 28 | Code 29 | 30 | 31 | Code 32 | 33 | 34 | Code 35 | 36 | 37 | Code 38 | 39 | 40 | Code 41 | 42 | 43 | Code 44 | 45 | 46 | 47 | 48 | 49 | 50 | 51 | 52 | 53 | 54 | 55 | 56 | 57 | 58 | 59 | 60 | 61 | 62 | 63 | 64 | 65 | 66 | 67 | 68 | 69 | 70 | 71 | 72 | 73 | 74 | 75 | 76 | -------------------------------------------------------------------------------- /pathpy/__init__.py: -------------------------------------------------------------------------------- 1 | """ 2 | An OpenSource python package to analyze and 3 | visualize time series data on complex networks. 4 | """ 5 | 6 | __author__ = """Ingo Scholtes""" 7 | __email__ = 'scholtes@ifi.uzh.ch' 8 | __version__ = '2.2.0' 9 | 10 | from .classes import * 11 | import pathpy.path_extraction 12 | import pathpy.visualisation 13 | import pathpy.algorithms.centralities 14 | import pathpy.algorithms.components 15 | import pathpy.algorithms.shortest_paths 16 | import pathpy.algorithms.centralities 17 | import pathpy.algorithms.random_walk 18 | import pathpy.algorithms.temporal_walk 19 | import pathpy.algorithms.spectral 20 | import pathpy.algorithms.path_measures 21 | import pathpy.algorithms.infomap 22 | import pathpy.algorithms.modularity 23 | import pathpy.algorithms.random_graphs 24 | from .algorithms import statistics 25 | 26 | import pathpy.utils 27 | 28 | global ENABLE_MULTICORE_SUPPORT 29 | ENABLE_MULTICORE_SUPPORT = False 30 | -------------------------------------------------------------------------------- /pathpy/algorithms/__init__.py: -------------------------------------------------------------------------------- 1 | """ 2 | Contains a collection of algorithms and measures for networks, higher-order models, and paths 3 | """ -------------------------------------------------------------------------------- /pathpy/algorithms/components.py: -------------------------------------------------------------------------------- 1 | """ 2 | Algorithms to calculate connected components in networks. 3 | """ 4 | 5 | # -*- coding: utf-8 -*- 6 | # pathpy is an OpenSource python package for the analysis of time series data 7 | # on networks using higher- and multi order graphical models. 8 | # 9 | # Copyright (C) 2016-2018 Ingo Scholtes, ETH Zürich/Universität Zürich 10 | # 11 | # This program is free software: you can redistribute it and/or modify 12 | # it under the terms of the GNU Affero General Public License as published 13 | # by the Free Software Foundation, either version 3 of the License, or 14 | # (at your option) any later version. 15 | # 16 | # This program is distributed in the hope that it will be useful, 17 | # but WITHOUT ANY WARRANTY; without even the implied warranty of 18 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 19 | # GNU Affero General Public License for more details. 20 | # 21 | # You should have received a copy of the GNU Affero General Public License 22 | # along with this program. If not, see . 23 | # 24 | # Contact the developer: 25 | # 26 | # E-mail: scholtes@ifi.uzh.ch 27 | # Web: http://www.ingoscholtes.net 28 | from collections import defaultdict 29 | 30 | import numpy as _np 31 | import scipy.sparse.linalg as _sla 32 | 33 | from pathpy import Network 34 | from pathpy.utils import Log, Severity 35 | from pathpy.utils import PathpyError 36 | 37 | __all__ = ['connected_components'] 38 | 39 | 40 | def connected_components(network, lanczos_vecs=None, maxiter=None): 41 | """ 42 | Calculates connected components based on the spectrum of the Laplacian matrix 43 | """ 44 | L = network.laplacian_matrix(weighted=True) 45 | n = network.ncount()-2 46 | if lanczos_vecs is None: 47 | lanczos_vecs = min(n, max(2*n + 1, 20)) 48 | if maxiter is None: 49 | maxiter = n*10 50 | vals, vecs = _sla.eigs(L, k=n, which="SM", return_eigenvectors=True) 51 | 52 | components = defaultdict(set) 53 | c = 0 54 | 55 | # use eigenvectors of zero eigenvalues to map nodes to components 56 | for i in range(n): 57 | if _np.isclose(vals[i], 0, atol=1.e-12): 58 | min_v = _np.min(vecs[:,i]) 59 | for i in _np.where(_np.isclose(vecs[:,i], min_v))[0]: 60 | components[c].add(i) 61 | c += 1 62 | return components 63 | 64 | 65 | 66 | def reduce_to_gcc(network): 67 | """ 68 | Reduces the network to the largest connected component. 69 | Connected components are calculated using Tarjan's algorithm. 70 | """ 71 | 72 | # these are used as nonlocal variables (!) 73 | index = 0 74 | S = [] 75 | indices = defaultdict(lambda: None) 76 | low_link = defaultdict(lambda: None) 77 | on_stack = defaultdict(lambda: False) 78 | components = {} 79 | 80 | # Tarjan's algorithm 81 | def strong_connect(v): 82 | nonlocal index 83 | nonlocal S 84 | nonlocal indices 85 | nonlocal low_link 86 | nonlocal on_stack 87 | nonlocal components 88 | 89 | indices[v] = index 90 | low_link[v] = index 91 | index += 1 92 | S.append(v) 93 | on_stack[v] = True 94 | 95 | for w in network.successors[v]: 96 | if indices[w] is None: 97 | strong_connect(w) 98 | low_link[v] = min(low_link[v], low_link[w]) 99 | elif on_stack[w]: 100 | low_link[v] = min(low_link[v], indices[w]) 101 | 102 | # create component of node v 103 | if low_link[v] == indices[v]: 104 | components[v] = set() 105 | while True: 106 | w = S.pop() 107 | on_stack[w] = False 108 | components[v].add(w) 109 | if v == w: 110 | break 111 | 112 | # compute strongly connected components 113 | for v in network.nodes: 114 | if indices[v] is None: 115 | strong_connect(v) 116 | # print('node {v}, size = {n}, component = {component}'.format(v=v, component=components[v], n = len(components[v]) )) 117 | 118 | max_size = 0 119 | for v in components: 120 | if len(components[v]) > max_size: 121 | scc = components[v] 122 | max_size = len(components[v]) 123 | 124 | # Reduce higher-order network to SCC 125 | for v in list(network.nodes): 126 | if v not in scc: 127 | network.remove_node(v) 128 | -------------------------------------------------------------------------------- /pathpy/algorithms/modularity.py: -------------------------------------------------------------------------------- 1 | """ 2 | Spectral and information-theoretic measures that can be calculated 3 | based on higher-order models of paths. 4 | """ 5 | 6 | # -*- coding: utf-8 -*- 7 | # pathpy is an OpenSource python package for the analysis of time series data 8 | # on networks using higher- and multi order graphical models. 9 | # 10 | # Copyright (C) 2016-2018 Ingo Scholtes, ETH Zürich/Universität Zürich 11 | # 12 | # This program is free software: you can redistribute it and/or modify 13 | # it under the terms of the GNU Affero General Public License as published 14 | # by the Free Software Foundation, either version 3 of the License, or 15 | # (at your option) any later version. 16 | # 17 | # This program is distributed in the hope that it will be useful, 18 | # but WITHOUT ANY WARRANTY; without even the implied warranty of 19 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 20 | # GNU Affero General Public License for more details. 21 | # 22 | # You should have received a copy of the GNU Affero General Public License 23 | # along with this program. If not, see . 24 | # 25 | # Contact the developer: 26 | # 27 | # E-mail: scholtes@ifi.uzh.ch 28 | # Web: http://www.ingoscholtes.net 29 | from collections import defaultdict 30 | 31 | import numpy as np 32 | import scipy.sparse.linalg as sla 33 | 34 | from pathpy import HigherOrderNetwork 35 | from pathpy.utils import Log, Severity 36 | from pathpy import Paths 37 | from pathpy.utils import PathpyError 38 | 39 | 40 | __all__ = ['q', 'q_max', 'assortativity_coeff', 'find_communities'] 41 | 42 | def q(network, C=None, delta=None): 43 | assert C is None or delta is None, 'Error: Cannot use clustering and delta-function simultaneously' 44 | 45 | m = network.ecount() 46 | A = network.adjacency_matrix(weighted=False) 47 | idx = network.node_to_name_map() 48 | q = 0.0 49 | for v in network.nodes: 50 | for w in network.nodes: 51 | if (C != None and C[v] == C[w]) or (delta != None and delta(v,w)): 52 | q += A[idx[v], idx[w]] - network.nodes[v]['degree']*network.nodes[w]['degree']/(2*m) 53 | q /= 2*m 54 | return q 55 | 56 | 57 | def q_max(network, C=None, delta=None): 58 | assert C is None or delta is None, 'Error: Cannot use clustering and delta-function simultaneously' 59 | 60 | m = network.ecount() 61 | idx = network.node_to_name_map() 62 | q = 0.0 63 | for v in network.nodes: 64 | for w in network.nodes: 65 | if (C != None and C[v] == C[w]) or (delta != None and delta(v,w)): 66 | q -= network.nodes[v]['degree']*network.nodes[w]['degree']/(2*m) 67 | q /= 2*m 68 | return q 69 | 70 | def assortativity_coeff(network, C=None): 71 | C, q_opt = find_communities(network) 72 | return q_opt/q_max(network, C) 73 | 74 | 75 | def q_merge(network, C, merge=None): 76 | m = network.ecount() 77 | n = network.ncount() 78 | A = network.adjacency_matrix(weighted=False) 79 | idx = network.node_to_name_map() 80 | q = 0.0 81 | for v in network.nodes: 82 | for w in network.nodes: 83 | if C[v] == C[w] or (merge is not None and C[v] in merge and C[w] in merge): 84 | q += A[idx[v], idx[w]] - network.nodes[v]['degree']*network.nodes[w]['degree']/(2*m) 85 | q /= 2*m 86 | return q 87 | 88 | 89 | def find_communities(network, iterations=100): 90 | # start with each node being in a separate cluster 91 | C = {} 92 | community_to_nodes = {} 93 | c = 0 94 | for n in network.nodes: 95 | C[n] = c 96 | community_to_nodes[c] = set([n]) 97 | c += 1 98 | q_current = q(network, C) 99 | communities = list(C.values()) 100 | 101 | for i in range(iterations): 102 | # randomly choose two communities 103 | x, y = np.random.choice(communities, size=2) 104 | # check Q of merged communities 105 | q_new = q_merge(network, C, merge=set([x, y])) 106 | if q_new > q_current: 107 | # actually merge the communities 108 | for n in community_to_nodes[x]: 109 | C[n] = y 110 | community_to_nodes[y] = community_to_nodes[y] | community_to_nodes[x] 111 | q_current = q_new 112 | communities.remove(x) 113 | del community_to_nodes[x] 114 | return C, q_current -------------------------------------------------------------------------------- /pathpy/algorithms/random_walk.py: -------------------------------------------------------------------------------- 1 | """ 2 | Algorithms to calculate shortest paths and distances in higher-order networks and paths. 3 | """ 4 | # -*- coding: utf-8 -*- 5 | 6 | # pathpy is an OpenSource python package for the analysis of time series data 7 | # on networks using higher- and multi order graphical models. 8 | # 9 | # Copyright (C) 2016-2018 Ingo Scholtes, ETH Zürich/Universität Zürich 10 | # 11 | # This program is free software: you can redistribute it and/or modify 12 | # it under the terms of the GNU Affero General Public License as published 13 | # by the Free Software Foundation, either version 3 of the License, or 14 | # (at your option) any later version. 15 | # 16 | # This program is distributed in the hope that it will be useful, 17 | # but WITHOUT ANY WARRANTY; without even the implied warranty of 18 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 19 | # GNU Affero General Public License for more details. 20 | # 21 | # You should have received a copy of the GNU Affero General Public License 22 | # along with this program. If not, see . 23 | # 24 | # Contact the developer: 25 | # 26 | # E-mail: scholtes@ifi.uzh.ch 27 | # Web: http://www.ingoscholtes.net 28 | from functools import singledispatch 29 | from collections import defaultdict 30 | 31 | from pathpy.utils import Log, Severity 32 | from pathpy.utils import PathpyNotImplemented 33 | from pathpy.classes import TemporalNetwork 34 | from pathpy.classes import Network 35 | from pathpy.classes import HigherOrderNetwork 36 | import numpy as _np 37 | 38 | __all__ = ['generate_walk'] 39 | 40 | @singledispatch 41 | def generate_walk(network, l=100, start_node=None): 42 | """ 43 | Generate a random walk trajectory of a given length, based on 44 | a weighted/directed/undirected network, temporal network, or 45 | higher-order network. 46 | 47 | Parameters: 48 | ----------- 49 | network: Network, TemporalNetwork, HigherOrderNetwork 50 | The temporal, first-order, or higher-order network, which 51 | will be used to randomly generate a walk through a network. 52 | l: int 53 | The (maximum) length of the walk to be generated. If a node 54 | with out-degree zero is encountered, the walk is terminated 55 | even if l has not been reached. 56 | start_node: str 57 | The (higher-order) node in which the random walk will be started. 58 | Default is None, in which case a random start node will be chosen. 59 | """ 60 | T = network.transition_matrix().todense().transpose() 61 | idx_map = network.node_to_name_map() 62 | nodes = _np.array([v for v in network.nodes]) 63 | 64 | itinerary = [] 65 | 66 | if start_node is None: 67 | start_node = _np.random.choice(nodes) 68 | 69 | # choose random start node 70 | itinerary.append(start_node) 71 | for j in range(l): 72 | # get transition probability vector T[idx -> . ] 73 | prob = _np.array(T[idx_map[itinerary[-1]], :])[0, :] 74 | nz = prob.nonzero()[0] 75 | # make one random transition 76 | if nz.shape[0] > 0: 77 | next_node = _np.random.choice(a=nodes[nz], p=prob[nz]) 78 | # add node to path 79 | itinerary.append(next_node) 80 | else: # no neighbor 81 | break 82 | return itinerary 83 | 84 | 85 | @generate_walk.register(HigherOrderNetwork) 86 | def _temporal_walk(higher_order_net, l=100, start_node=None): 87 | 88 | T = higher_order_net.transition_matrix().todense().transpose() 89 | idx_map = higher_order_net.node_to_name_map() 90 | nodes = _np.array([v for v in higher_order_net.nodes]) 91 | 92 | itinerary = [] 93 | 94 | if start_node is None: 95 | start_node = _np.random.choice(nodes) 96 | last = start_node 97 | 98 | # choose random start node 99 | for x in higher_order_net.higher_order_node_to_path(start_node): 100 | itinerary.append(x) 101 | for j in range(l): 102 | # get transition probability vector T[idx -> . ] 103 | prob = _np.array(T[idx_map[last], :])[0, :] 104 | nz = prob.nonzero()[0] 105 | # make one random transition 106 | if nz.shape[0] > 0: 107 | next_node = _np.random.choice(a=nodes[nz], p=prob[nz]) 108 | # add node to path 109 | itinerary.append(higher_order_net.higher_order_node_to_path(next_node)[-1]) 110 | last = next_node 111 | else: # no neighbor 112 | break 113 | return itinerary 114 | 115 | 116 | @generate_walk.register(TemporalNetwork) 117 | def _temporal_walk(tempnet, l=100, start_node=None): 118 | 119 | itinerary = [] 120 | if start_node is None: 121 | current_node = _np.random.choice(tempnet.nodes) 122 | else: 123 | current_node = start_node 124 | itinerary.append(current_node) 125 | steps = 0 126 | for t in tempnet.ordered_times: 127 | prev_node = current_node 128 | # find possible targets in time t 129 | targets = set() 130 | for (v, w, time) in tempnet.time[t]: 131 | if v == current_node: 132 | targets.add(w) 133 | # move to random target 134 | if targets: 135 | current_node = _np.random.choice(list(targets)) 136 | steps += 1 137 | # add currently visited node 138 | if current_node != prev_node: 139 | itinerary.append(current_node) 140 | if steps == l: 141 | break 142 | return itinerary 143 | -------------------------------------------------------------------------------- /pathpy/algorithms/statistics.py: -------------------------------------------------------------------------------- 1 | """ 2 | Collection of statistical measures for paths, (higher-order) networks, and temporal networks 3 | """ 4 | # -*- coding: utf-8 -*- 5 | 6 | # pathpy is an OpenSource python package for the analysis of time series data 7 | # on networks using higher- and multi order graphical models. 8 | # 9 | # Copyright (C) 2016-2018 Ingo Scholtes, ETH Zürich/Universität Zürich 10 | # 11 | # This program is free software: you can redistribute it and/or modify 12 | # it under the terms of the GNU Affero General Public License as published 13 | # by the Free Software Foundation, either version 3 of the License, or 14 | # (at your option) any later version. 15 | # 16 | # This program is distributed in the hope that it will be useful, 17 | # but WITHOUT ANY WARRANTY; without even the implied warranty of 18 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 19 | # GNU Affero General Public License for more details. 20 | # 21 | # You should have received a copy of the GNU Affero General Public License 22 | # along with this program. If not, see . 23 | # 24 | # Contact the developer: 25 | # 26 | # E-mail: scholtes@ifi.uzh.ch 27 | # Web: http://www.ingoscholtes.net 28 | from collections import defaultdict 29 | from collections import Counter 30 | 31 | import numpy as _np 32 | import scipy as sp 33 | 34 | from pathpy .utils import Log, Severity 35 | from pathpy import Network 36 | 37 | from pathpy.utils import PathpyNotImplemented 38 | 39 | 40 | def local_clustering_coefficient(network, v): 41 | r"""Calculates the local clustering coefficient of a node in a directed or undirected network. 42 | The local clustering coefficient of any node with an (out-)degree smaller than two is defined 43 | as zero. For all other nodes, it is defined as: 44 | 45 | cc(c) := 2*k(i)/(d_i(d_i-1)) 46 | 47 | or 48 | 49 | cc(c) := k(i)/(d_out_i(d_out_i-1)) 50 | 51 | in undirected and directed networks respectively. 52 | 53 | Parameters 54 | ---------- 55 | network: Network 56 | The network in which to calculate the local clustering coefficient. 57 | node: str 58 | The node for which the local clustering coefficient shall be calculated. 59 | """ 60 | if network.directed and network.nodes[v]['outdegree'] < 2: 61 | return 0.0 62 | if not network.directed and network.nodes[v]['degree'] < 2: 63 | return 0.0 64 | k_i = 0.0 65 | for i in network.successors[v]: 66 | for j in network.successors[v]: 67 | if (i, j) in network.edges: 68 | k_i += 1.0 69 | if not network.directed: 70 | return k_i/(network.nodes[v]['degree']*(network.nodes[v]['degree']-1.0)) 71 | return k_i/(network.nodes[v]['outdegree']*(network.nodes[v]['outdegree']-1.0)) 72 | 73 | 74 | def avg_clustering_coefficient(network): 75 | r"""Calculates the average (global) clustering coefficient of a directed or undirected network. 76 | 77 | Parameters 78 | ---------- 79 | network: Network 80 | The network in which to calculate the local clustering coefficient. 81 | """ 82 | return _np.mean([ local_clustering_coefficient(network, v) for v in network.nodes]) 83 | 84 | 85 | def mean_degree(network, degree='degree'): 86 | r"""Calculates the mean (in/out)-degree of a directed or undirected network. 87 | 88 | Parameters 89 | ---------- 90 | network: Network 91 | The network in which to calculate the mean degree 92 | """ 93 | assert degree is 'degree' or degree is 'indegree' or degree is 'outdegree', \ 94 | 'Unknown degree property' 95 | return _np.mean([network.nodes[x][degree] for x in network.nodes]) 96 | 97 | 98 | def degree_dist(network, degree='degree'): 99 | r"""Calculates the (in/out)-degree distribution of a directed or undirected network. 100 | 101 | Parameters 102 | ---------- 103 | network: Network 104 | The network for which to calculate the degree distribution 105 | """ 106 | assert degree is 'degree' or degree is 'indegree' or degree is 'outdegree',\ 107 | 'Unknown degree property' 108 | p_k = Counter([network.nodes[x][degree] for x in network.nodes]) 109 | for x in p_k: 110 | p_k[x] = p_k[x]/network.ncount() 111 | return p_k 112 | 113 | 114 | def degree_moment(network, k, degree='degree'): 115 | r"""Calculates the k-th moment of the (in/out)-degree distribution of a 116 | directed or undirected network. 117 | 118 | Parameters 119 | ---------- 120 | network: Network 121 | The network in which to calculate the k-th moment of the degree distribution 122 | """ 123 | p_k = degree_dist(network, degree) 124 | mom = 0 125 | for x in p_k: 126 | mom += x**k * p_k[x] 127 | return mom 128 | 129 | 130 | def generating_func(network, x, degree='degree'): 131 | r"""Returns f(x) where f is the probability generating function for the 132 | (in/out)-degree distribution P(k) for a network. The function is defined in the interval [0,1]. 133 | The value returned is from the range [0,1]. The following properties hold: 134 | 135 | [1/k! d^k/dx f]_{x=0} = P(k) with d^k/dx f being the k-th derivative of f by x 136 | f'(1) = with f' being the first derivative and the mean degree 137 | [(x d/dx)^m f]_{x=1} = with being the m-th raw moment of P 138 | 139 | Parameters 140 | ---------- 141 | x: float, list, numpy.ndarray 142 | The argument(s) for which the value f(x) shall be computed. 143 | 144 | Returns 145 | ------- 146 | Either a single float value f(x) (if x is float) or a numpy.ndarray 147 | containing the function values f(x) for all arguments in x 148 | 149 | Example 150 | ------- 151 | >>> import pathpy as pp 152 | >>> import numpy as np 153 | >>> import matplotlib.pyplot as plt 154 | 155 | >>> n = pp.Network() 156 | >>> n.add_edge('a', 'b') 157 | >>> n.add_edge('b', 'c') 158 | >>> n.add_edge('a', 'c') 159 | >>> n.add_edge('c', 'd') 160 | >>> n.add_edge('d', 'e') 161 | >>> n.add_edge('d', 'f') 162 | >>> n.add_edge('e', 'f') 163 | 164 | >>> # print single value f(x) 165 | >>> stat = pp.statistics.generating_func(n, 0.3) 166 | >>> print('{:0.3f}'.format(stat)) 167 | 0.069 168 | 169 | >>> # plot generating function 170 | >>> x = np.linspace(0, 1, 20) 171 | >>> y = pp.statistics.generating_func(n, x) 172 | >>> x = plt.plot(x, y) 173 | """ 174 | 175 | assert isinstance(x, (float, list, _np.ndarray)), \ 176 | 'Argument can only be float, list or numpy.ndarray' 177 | 178 | p_k = degree_dist(network, degree) 179 | 180 | if isinstance(x, float): 181 | x_range = [x] 182 | else: 183 | x_range = x 184 | 185 | values = defaultdict(lambda: 0) 186 | for k in p_k: 187 | for v in x_range: 188 | values[v] += p_k[k] * v**k 189 | 190 | if len(x_range) > 1: 191 | return _np.array(list(values.values())) 192 | else: 193 | return values[x] 194 | 195 | 196 | def molloy_reed_fraction(network, degree='degree'): 197 | r"""Calculates the Molloy-Reed fraction / based on the (in/out)-degree 198 | distribution of a directed or undirected network. 199 | 200 | Parameters 201 | ---------- 202 | network: Network 203 | The network in which to calculate the Molloy-Reed fraction 204 | """ 205 | return degree_moment(network, k=2, degree=degree)/degree_moment(network, k=1, degree=degree) 206 | -------------------------------------------------------------------------------- /pathpy/algorithms/temporal_walk.py: -------------------------------------------------------------------------------- 1 | """ 2 | Algorithms to calculate shortest paths and distances in higher-order networks and paths. 3 | """ 4 | # -*- coding: utf-8 -*- 5 | 6 | # pathpy is an OpenSource python package for the analysis of time series data 7 | # on networks using higher- and multi order graphical models. 8 | # 9 | # Copyright (C) 2016-2018 Ingo Scholtes, ETH Zürich/Universität Zürich 10 | # 11 | # This program is free software: you can redistribute it and/or modify 12 | # it under the terms of the GNU Affero General Public License as published 13 | # by the Free Software Foundation, either version 3 of the License, or 14 | # (at your option) any later version. 15 | # 16 | # This program is distributed in the hope that it will be useful, 17 | # but WITHOUT ANY WARRANTY; without even the implied warranty of 18 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 19 | # GNU Affero General Public License for more details. 20 | # 21 | # You should have received a copy of the GNU Affero General Public License 22 | # along with this program. If not, see . 23 | # 24 | # Contact the developer: 25 | # 26 | # E-mail: scholtes@ifi.uzh.ch 27 | # Web: http://www.ingoscholtes.net 28 | 29 | from pathpy.utils import Log, Severity 30 | from pathpy.classes import TemporalNetwork 31 | from pathpy.algorithms import random_walk 32 | 33 | def generate_walk(tempnet, l=100, start_node=None): 34 | """ 35 | DEPRECATED 36 | """ 37 | Log.add('The temporal_walk.generate_walk function is deprecated. \ 38 | Please use random_walk.generate_walk instead.', Severity.WARNING) 39 | return random_walk.generate_walk(tempnet, l, start_node) 40 | -------------------------------------------------------------------------------- /pathpy/classes/__init__.py: -------------------------------------------------------------------------------- 1 | """ 2 | An internal module that groups basic classes of pathpy. Please use top-level namespace instead. 3 | """ 4 | 5 | from .paths import Paths 6 | from .dag import DAG 7 | from .network import Network 8 | from .higher_order_network import HigherOrderNetwork 9 | from .multi_order_model import MultiOrderModel 10 | from .temporal_network import TemporalNetwork 11 | from .markov_sequence import MarkovSequence 12 | from .rolling_time_window import RollingTimeWindow 13 | -------------------------------------------------------------------------------- /pathpy/classes/markov_sequence.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | # pathpy is an OpenSource python package for the analysis of time series data 4 | # on networks using higher- and multi order graphical models. 5 | # 6 | # Copyright (C) 2016-2018 Ingo Scholtes, ETH Zürich/Universität Zürich 7 | # 8 | # This program is free software: you can redistribute it and/or modify 9 | # it under the terms of the GNU Affero General Public License as published 10 | # by the Free Software Foundation, either version 3 of the License, or 11 | # (at your option) any later version. 12 | # 13 | # This program is distributed in the hope that it will be useful, 14 | # but WITHOUT ANY WARRANTY; without even the implied warranty of 15 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 16 | # GNU Affero General Public License for more details. 17 | # 18 | # You should have received a copy of the GNU Affero General Public License 19 | # along with this program. If not, see . 20 | # 21 | # Contact the developer: 22 | # 23 | # E-mail: scholtes@ifi.uzh.ch 24 | # Web: http://www.ingoscholtes.net 25 | from collections import defaultdict 26 | import numpy as np 27 | 28 | from pathpy.utils import Log 29 | 30 | 31 | np.seterr(all='warn') 32 | 33 | 34 | class MarkovSequence: 35 | """ 36 | A class that can be used to fit 37 | higher-order Markov chain models for 38 | sequences generated from concatenated paths 39 | """ 40 | 41 | def __init__(self, sequence): 42 | """ 43 | Generates a Markov model for a sequence, given 44 | as a single list of strings 45 | """ 46 | 47 | # The sequence to be modeled 48 | self.sequence = sequence 49 | 50 | # The transition probabilities of higher-order Markov chains 51 | self.P = {} 52 | 53 | # the set of states of higher-order Markov chains 54 | self.states = {1: set(sequence)} 55 | 56 | def fit_markov_model(self, k=1): 57 | """ Generates a k-th order Markov model 58 | for the underlying sequence 59 | """ 60 | 61 | # TODO: Add support for k=0 62 | 63 | assert self.sequence, "Error: Empty sequence" 64 | 65 | # MLE fit of transition probabilities 66 | self.P[k] = defaultdict(lambda: defaultdict(lambda: 0.0)) 67 | 68 | Log.add('Fitting Markov model with order k = ' + str(k)) 69 | 70 | # Generate initial memory prefix 71 | mem = (()) 72 | for s in self.sequence[:k]: 73 | mem += (s,) 74 | 75 | # count state transitions 76 | for s in self.sequence[k:]: 77 | self.P[k][mem][s] += 1.0 78 | 79 | # shift memory by one element 80 | mem = mem[1:] + (s,) 81 | 82 | # normalize transitions 83 | for m in self.P[k]: 84 | S = float(sum(self.P[k][m].values())) 85 | for s in self.P[k][m]: 86 | self.P[k][m][s] /= S 87 | Log.add('finished.') 88 | 89 | def likelihood(self, k=1, log=True): 90 | """ 91 | Returns the likelihood of the sequence 92 | assuming a k-th order Markov model 93 | """ 94 | 95 | if k not in self.P: 96 | self.fit_markov_model(k) 97 | 98 | L = 0 99 | 100 | # Generate initial prefix 101 | mem = (()) 102 | for s in self.sequence[:k]: 103 | mem += (s,) 104 | 105 | for s in self.sequence[k:]: 106 | L += np.log(self.P[k][mem][s]) 107 | 108 | # shift memory by one element 109 | mem = mem[1:] + (s,) 110 | 111 | if log: 112 | return L 113 | 114 | return np.exp(L) 115 | 116 | def bic(self, k=1, m=1): 117 | """ Returns the Bayesian Information Criterion 118 | assuming a k-th order Markov model """ 119 | 120 | if k not in self.P: 121 | self.fit_markov_model(k) 122 | 123 | if m not in self.P: 124 | self.fit_markov_model(m) 125 | 126 | L_k = self.likelihood(k, log=True) 127 | L_m = self.likelihood(m, log=True) 128 | 129 | s = len(self.states[1]) 130 | n = len(self.sequence)-k 131 | 132 | # the transition matrix of a first-order model with s states has s**2 entries, 133 | # subject to the constraint that entries in each row must sum up to one (thus 134 | # effectively reducing the degrees of freedom by a factor of s, i.e. we have 135 | # s^2-s^1. Generalizing this to order k, we arrive at 136 | # s^k * (s-1) = s^(k+1)-s^k degrees of freedom 137 | bic = np.log(n) * (s ** k - s ** m) * (s - 1) - 2.0 * (L_k - L_m) 138 | 139 | return bic 140 | 141 | def aic(self, k=1, m=1): 142 | """ Returns the Akaike Information Criterion 143 | assuming a k-th order Markov model """ 144 | 145 | if k not in self.P: 146 | self.fit_markov_model(k) 147 | 148 | if m not in self.P: 149 | self.fit_markov_model(m) 150 | 151 | L_k = self.likelihood(k, log=True) 152 | L_m = self.likelihood(m, log=True) 153 | 154 | s = len(self.states[1]) 155 | 156 | aic = 2 * (s**k - s**m) * (s-1) - 2.0 * (L_k - L_m) 157 | 158 | return aic 159 | 160 | def estimate_order(self, maxOrder, method='BIC'): 161 | """ Estimates the optimal order of a Markov model 162 | based on Likelihood, BIC or AIC """ 163 | 164 | if method not in {'BIC', 'AIC', 'Likelihood'}: # pragma: no cover 165 | msg = "Expecting method 'AIC', 'BIC' or 'Likelihood', got '%s'" % method 166 | raise ValueError(msg) 167 | 168 | values = [] 169 | orders = [] 170 | 171 | # We need k < m for the BIC and AIC calculation, which 172 | # is why we only test up to maxOrder - 1 173 | for k in range(1, maxOrder): 174 | if k not in self.P: 175 | self.fit_markov_model(k) 176 | 177 | orders.append(k) 178 | 179 | if method == 'AIC': 180 | values.append(self.aic(k, maxOrder)) 181 | elif method == 'BIC': 182 | values.append(self.bic(k, maxOrder)) 183 | elif method == 'Likelihood': 184 | values.append(self.likelihood(k, log=True)) 185 | 186 | if method == 'Likelihood': 187 | values.append(self.likelihood(maxOrder, log=True)) 188 | orders.append(maxOrder) 189 | 190 | # return order at which likelihood is maximized 191 | return orders[np.argmax(values)] 192 | 193 | # return order at which BIC/AIC are minimized 194 | return orders[np.argmin(values)] 195 | -------------------------------------------------------------------------------- /pathpy/classes/rolling_time_window.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | # pathpy is an OpenSource python package for the analysis of time series data 4 | # on networks using higher- and multi order graphical models. 5 | # 6 | # Copyright (C) 2016-2018 Ingo Scholtes, ETH Zürich/Universität Zürich 7 | # 8 | # This program is free software: you can redistribute it and/or modify 9 | # it under the terms of the GNU Affero General Public License as published 10 | # by the Free Software Foundation, either version 3 of the License, or 11 | # (at your option) any later version. 12 | # 13 | # This program is distributed in the hope that it will be useful, 14 | # but WITHOUT ANY WARRANTY; without even the implied warranty of 15 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 16 | # GNU Affero General Public License for more details. 17 | # 18 | # You should have received a copy of the GNU Affero General Public License 19 | # along with this program. If not, see . 20 | # 21 | # Contact the developer: 22 | # 23 | # E-mail: scholtes@ifi.uzh.ch 24 | # Web: http://www.ingoscholtes.net 25 | from pathpy.utils import Log, Severity 26 | from pathpy.classes import Network 27 | 28 | 29 | class RollingTimeWindow: 30 | r""" 31 | An iterable rolling time window that can be used to perform time slice 32 | analysis of time-stamped network data. 33 | """ 34 | 35 | def __init__(self, temporal_net, window_size, step_size=1, directed=True, return_window=False): 36 | r""" 37 | Initialises a RollingTimeWindow instance that can be used to 38 | iterate through a sequence of time-slice networks for a given 39 | TemporalNetwork instance. 40 | 41 | Parameters: 42 | ----------- 43 | temporal_net: TemporalNetwork 44 | TemporalNetwork instance that will be used to generate the 45 | sequence of time-slice networks. 46 | window_size: int 47 | The width of the rolling time window used to create 48 | time-slice networks. 49 | step_size: int 50 | The step size in time units by which the starting time of the rolling 51 | window will be incremented on each iteration. Default is 1. 52 | directed: bool 53 | Whether or not the generated time-slice networks should be directed. 54 | Default is true. 55 | return_window: bool 56 | Whether or not the iterator shall return the current time window 57 | as a second return value. Default is False. 58 | 59 | Returns 60 | ------- 61 | RollingTimeWindow 62 | An iterable sequence of tuples Network, [window_start, window_end] 63 | 64 | Examples 65 | -------- 66 | >>> t = pathpy.TemporalNetwork.read_file(DATA) 67 | >>> 68 | >>> for n in pathpy.RollingTimeWindow(t, window_size=100): 69 | >>> print(n) 70 | >>> 71 | >>> for n, w in pathpy.RollingTimeWindow(t, window_size=100, step_size=10, return_window=True): 72 | >>> print('Time window starting at {0} and ending at {1}'.format(w[0], w[1])) 73 | >>> print(network) 74 | """ 75 | self.temporal_network = temporal_net 76 | self.window_size = window_size 77 | self.step_size = step_size 78 | self.current_time = min(temporal_net.ordered_times) 79 | self.max_time = max(temporal_net.ordered_times) 80 | self.directed = directed 81 | self.return_window = return_window 82 | 83 | def __iter__(self): 84 | return self 85 | 86 | 87 | def __next__(self): 88 | if self.current_time+self.window_size <= self.max_time: 89 | time_window = [self.current_time, self.current_time+self.window_size] 90 | n = Network.from_temporal_network(self.temporal_network, min_time=self.current_time, 91 | max_time=self.current_time+self.window_size, 92 | directed=self.directed) 93 | self.current_time += self.step_size 94 | if self.return_window: 95 | return n, time_window 96 | else: 97 | return n 98 | else: 99 | raise StopIteration() 100 | -------------------------------------------------------------------------------- /pathpy/path_extraction/__init__.py: -------------------------------------------------------------------------------- 1 | """ 2 | Provides methods to generate path statistics based on origin destination statistics, 3 | directed acyclic graphs, temporal networks, or random walks in a network. 4 | """ 5 | from .dag_paths import paths_from_dag 6 | 7 | from .temporal_paths import paths_from_temporal_network 8 | from .temporal_paths import paths_from_temporal_network_dag 9 | from .temporal_paths import paths_from_temporal_network_single 10 | from .temporal_paths import sample_paths_from_temporal_network_dag 11 | from .temporal_paths import generate_causal_tree 12 | 13 | from .random_walk import random_walk 14 | from .random_walk import paths_from_random_walk 15 | from .random_walk import random_paths 16 | 17 | from .origin_destination_stats import paths_from_origin_destination 18 | from .origin_destination_stats import paths_to_origin_destination 19 | from .origin_destination_stats import read_origin_destination 20 | -------------------------------------------------------------------------------- /pathpy/path_extraction/origin_destination_stats.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | # pathpy is an OpenSource python package for the analysis of time series data 4 | # on networks using higher- and multi order graphical models. 5 | # 6 | # Copyright (C) 2016-2018 Ingo Scholtes, ETH Zürich/Universität Zürich 7 | # 8 | # This program is free software: you can redistribute it and/or modify 9 | # it under the terms of the GNU Affero General Public License as published 10 | # by the Free Software Foundation, either version 3 of the License, or 11 | # (at your option) any later version. 12 | # 13 | # This program is distributed in the hope that it will be useful, 14 | # but WITHOUT ANY WARRANTY; without even the implied warranty of 15 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 16 | # GNU Affero General Public License for more details. 17 | # 18 | # You should have received a copy of the GNU Affero General Public License 19 | # along with this program. If not, see . 20 | # 21 | # Contact the developer: 22 | # 23 | # E-mail: scholtes@ifi.uzh.ch 24 | # Web: http://www.ingoscholtes.net 25 | from collections import defaultdict 26 | 27 | from pathpy import Paths 28 | from pathpy.utils import Log 29 | from pathpy.algorithms.shortest_paths import shortest_paths 30 | 31 | import numpy as np 32 | 33 | def read_origin_destination(filename, separator=','): 34 | """Reads origin/destination statistics from a csv file 35 | with the following structure: 36 | 37 | origin1,destination1,weight 38 | origin2,destination2,weight 39 | origin3,destination3,weight 40 | 41 | Parameters 42 | ---------- 43 | filename: str 44 | path to the file containing the origin/destination statistics 45 | separator: str 46 | arbitrary separation character (default: ',') 47 | 48 | Returns 49 | ------- 50 | list 51 | """ 52 | origin_destination_list = [] 53 | Log.add('Reading origin/destination statistics from file ...') 54 | 55 | with open(filename, 'r') as f: 56 | line = f.readline() 57 | while line: 58 | fields = line.rstrip().split(separator) 59 | origin_destination_list.append((fields[0].strip(), fields[1].strip(), float(fields[2].strip()))) 60 | line = f.readline() 61 | Log.add('Finished.') 62 | 63 | return origin_destination_list 64 | 65 | def paths_from_origin_destination(origin_destination_list, network, 66 | distribute_weight=True): 67 | """Extracts shortest path statistics based on origin/destination data. 68 | Such data capture the statistics of the origin (i.e. the start node) and destination 69 | (i.e. the target) node of itineraries in a given network. 70 | 71 | Common examples include passenger origin and destination statistics in transportation 72 | networks. The methods in this class can be used to read origin/destination data from 73 | a file and generate path statistics based on the assumption that all paths from an 74 | origin and a destination follow the shortest path in the network. 75 | 76 | Extracts pathway statistics by calculating shortest paths between all origin and 77 | destination pairs in a given network. 78 | 79 | Parameters 80 | ---------- 81 | origin_destination_list: list 82 | A list of tuples (o, d, w) containing the origin (o), destination (d), 83 | and (possibly float) weight w of paths. 84 | network: 85 | The network topology for which shortest paths will be calculated. Names of nodes 86 | in the network must match the node names used in the origin destination list. 87 | distribute_weight: bool 88 | If set to True, the weight of an origin-destination pair will be equally distributed 89 | (in terms of whole integer observations) across multiple shortest paths between the 90 | origin and destination. If False, the weight will be assigned to a randomly chosen 91 | shortest path. Default is True. 92 | 93 | Returns 94 | ------- 95 | paths 96 | """ 97 | assert network is not None, \ 98 | 'Error: extraction of origin destination paths requires a network topology' 99 | 100 | all_paths = shortest_paths(network) 101 | 102 | paths = Paths() 103 | # OD is a list of tuples of the form (origin_node, destination_node, weight) 104 | # that indicates that the shortest path from origin_node to destination_node was 105 | # observed weight times 106 | Log.add('Starting origin destination path calculation ...') 107 | for (o, d, w) in origin_destination_list: 108 | assert o in network.nodes, 'Error: could not find node ' + str(o) + ' in network' 109 | assert d in network.nodes, 'Error: could not find node ' + str(d) + ' in network' 110 | sp = list(all_paths[o][d]) 111 | num_paths = len(sp) 112 | if distribute_weight and num_paths > 1: 113 | # to avoid introducing false correlations that are not justified by the 114 | # available data, the (integer) weight of an origin destination pair can be 115 | # distributed among all possible shortest paths between a pair of nodes, 116 | # while constraining the weight of shortest paths to integers. 117 | for i in range(int(w)): 118 | paths.add_path(sp[i % num_paths], frequency=(0, 1)) 119 | else: 120 | # in this case, the full weight of an origin destination path will be 121 | # assigned to a random single shortest path in the network 122 | paths.add_path(sp[np.random.randint(num_paths)], frequency=(0, w)) 123 | Log.add('finished.') 124 | return paths 125 | 126 | 127 | def paths_to_origin_destination(paths): 128 | """ 129 | Returns a list that contains path frequencies between all 130 | origin destination pairs in a path object. The result can e.g. be used to 131 | create shortest path models that preserve the origin-destination statistics in real 132 | path data. 133 | 134 | Parameters 135 | ---------- 136 | paths: Paths 137 | collection of weighted paths based on which origin destination 138 | statistics shall be computed 139 | 140 | Returns 141 | ------- 142 | list of tuples (o, d, w) where o is origin, d is destination, and w is the weight 143 | """ 144 | od_stats = defaultdict(lambda: 0.0) 145 | 146 | Log.add('Calculating origin/destination statistics from paths ...') 147 | # iterate through all paths and create path statistics 148 | for x in paths.paths: 149 | for p in paths.paths[x]: 150 | o = p[0] 151 | d = p[-1] 152 | if paths.paths[x][p][1] > 0: 153 | od_stats[o, d] += paths.paths[x][p][1] 154 | od_list = [ (od[0], od[1], f) for od, f in od_stats.items()] 155 | Log.add('finished.') 156 | return od_list 157 | -------------------------------------------------------------------------------- /pathpy/path_extraction/random_walk.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | # pathpy is an OpenSource python package for the analysis of time series data 4 | # on networks using higher- and multi order graphical models. 5 | # 6 | # Copyright (C) 2016-2018 Ingo Scholtes, ETH Zürich/Universität Zürich 7 | # 8 | # This program is free software: you can redistribute it and/or modify 9 | # it under the terms of the GNU Affero General Public License as published 10 | # by the Free Software Foundation, either version 3 of the License, or 11 | # (at your option) any later version. 12 | # 13 | # This program is distributed in the hope that it will be useful, 14 | # but WITHOUT ANY WARRANTY; without even the implied warranty of 15 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 16 | # GNU Affero General Public License for more details. 17 | # 18 | # You should have received a copy of the GNU Affero General Public License 19 | # along with this program. If not, see . 20 | # 21 | # Contact the developer: 22 | # 23 | # E-mail: scholtes@ifi.uzh.ch 24 | # Web: http://www.ingoscholtes.net 25 | 26 | import collections as _co 27 | import random 28 | 29 | import numpy as _np 30 | 31 | from pathpy.utils import Log, Severity 32 | from pathpy.classes.network import Network 33 | from pathpy.classes.paths import Paths 34 | from pathpy import algorithms 35 | 36 | def random_walk(network, l, n=1, start_node=None): 37 | """ 38 | [DEPRECATED] 39 | Generates n paths of a random walker in the given network 40 | and returns them as a paths object. 41 | Each path has a length of l steps. 42 | Parameters 43 | ---------- 44 | network: Network, TemporalNetwork, HigherOrderNetwork 45 | The network structure on which the random walks will be simulated. 46 | int: l 47 | The (maximum) length of each random walk path. A path will 48 | terminate if a node with outdegree zero is reached. 49 | int: n 50 | The number of random walk paths to generate. 51 | """ 52 | Log.add('The path_extraction.random_walk function is deprecated. Please use paths_from_random_walk instead.', Severity.WARNING) 53 | return paths_from_random_walk(network, l, n, start_node) 54 | 55 | 56 | def paths_from_random_walk(network, l, n=1, start_node=None): 57 | """ 58 | Generates n paths of a random walker in the given network 59 | and returns them as a paths object. 60 | Each path has a length of l steps. 61 | Parameters 62 | ---------- 63 | network: Network, TemporalNetwork, HigherOrderNetwork 64 | The network structure on which the random walks will be simulated. 65 | int: l 66 | The (maximum) length of each random walk path. A path will 67 | terminate if a node with outdegree zero is reached. 68 | int: n 69 | The number of random walk paths to generate. 70 | """ 71 | p = Paths() 72 | for i in range(n): 73 | path = algorithms.random_walk.generate_walk(network, l, start_node) 74 | p.add_path(tuple(path)) 75 | return p 76 | 77 | def random_paths(network, paths_orig, rand_frac=1.0): 78 | """ 79 | Generates Markovian paths of a random walker in a given network 80 | and returns them as a paths object. 81 | Parameters 82 | ---------- 83 | network: Network 84 | The network structure on which the random walks will be simulated. 85 | paths_orig: Paths 86 | Paths that we want to randomise 87 | rand_frac: float 88 | The fraction of paths that will be randomised 89 | """ 90 | p_rnd = Paths() 91 | for l in paths_orig.paths: 92 | for path, pcounts in paths_orig.paths[l].items(): 93 | if pcounts[1] > 0: 94 | n_path = int(pcounts[1]) 95 | n_path_rand = _np.random.binomial(n_path, rand_frac) 96 | n_path_keep = n_path - n_path_rand 97 | 98 | ## Add the random paths 99 | if n_path_rand > 0: 100 | p_rnd += paths_from_random_walk(network, l, n_path_rand, path[0]) 101 | 102 | ## Keep the rest 103 | if n_path_keep > 0: 104 | p_rnd.add_path(path, frequency=n_path_keep) 105 | return p_rnd -------------------------------------------------------------------------------- /pathpy/utils/__init__.py: -------------------------------------------------------------------------------- 1 | """ 2 | A collection of logging functions and exceptions. 3 | """ 4 | from .log import Log 5 | from .log import Severity 6 | from .exceptions import PathpyException 7 | from .exceptions import EmptySCCError 8 | from .exceptions import PathpyNotImplemented 9 | from .exceptions import PathsTooShort 10 | from .exceptions import PathpyError 11 | -------------------------------------------------------------------------------- /pathpy/utils/default_containers.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # pathpy is an OpenSource python package for the analysis of time series data 3 | # on networks using higher- and multi order graphical models. 4 | # 5 | # Copyright (C) 2016-2018 Ingo Scholtes, ETH Zürich/Universität Zürich 6 | # 7 | # This program is free software: you can redistribute it and/or modify 8 | # it under the terms of the GNU Affero General Public License as published 9 | # by the Free Software Foundation, either version 3 of the License, or 10 | # (at your option) any later version. 11 | # 12 | # This program is distributed in the hope that it will be useful, 13 | # but WITHOUT ANY WARRANTY; without even the implied warranty of 14 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 15 | # GNU Affero General Public License for more details. 16 | # 17 | # You should have received a copy of the GNU Affero General Public License 18 | # along with this program. If not, see . 19 | # 20 | # Contact the developer: 21 | # 22 | # E-mail: scholtes@ifi.uzh.ch 23 | # Web: http://www.ingoscholtes.net 24 | 25 | """ 26 | Provides default containers for various classes 27 | which are used to store nodes, edges and similar objects. 28 | 29 | To make the various classes pickle-able the defaultdicts need to be publicly addressable 30 | function names, this means that no lambda functions are allowed. 31 | 32 | All pathpy classes which required a default value as a container, should use these here. 33 | """ 34 | 35 | from collections import defaultdict 36 | import numpy as np 37 | 38 | 39 | def nested_zero_default(): 40 | """ 41 | Returns a nested default dict (2 levels) 42 | with a numpy zero array of length 0 as default 43 | """ 44 | return defaultdict(zero_array_default) 45 | 46 | 47 | def _zero_array(): 48 | """ 49 | Returns a zero numpy array of length 2 50 | """ 51 | return np.array([0.0, 0.0]) 52 | 53 | 54 | def zero_array_default(): 55 | """ 56 | Returns a default dict with numpy zero array af length 2 as default 57 | """ 58 | return defaultdict(_zero_array) 59 | -------------------------------------------------------------------------------- /pathpy/utils/exceptions.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # pathpy is an OpenSource python package for the analysis of time series data 3 | # on networks using higher- and multi order graphical models. 4 | # 5 | # Copyright (C) 2016-2018 Ingo Scholtes, ETH Zürich/Universität Zürich 6 | # 7 | # This program is free software: you can redistribute it and/or modify 8 | # it under the terms of the GNU Affero General Public License as published 9 | # by the Free Software Foundation, either version 3 of the License, or 10 | # (at your option) any later version. 11 | # 12 | # This program is distributed in the hope that it will be useful, 13 | # but WITHOUT ANY WARRANTY; without even the implied warranty of 14 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 15 | # GNU Affero General Public License for more details. 16 | # 17 | # You should have received a copy of the GNU Affero General Public License 18 | # along with this program. If not, see . 19 | # 20 | # Contact the developer: 21 | # 22 | # E-mail: scholtes@ifi.uzh.ch 23 | # Web: http://www.ingoscholtes.net 24 | """ 25 | Classes to signal errors and exceptions in pathpy. 26 | """ 27 | 28 | class PathpyException(Exception): 29 | """ 30 | Base class for exceptions in Pathpy. 31 | """ 32 | 33 | 34 | class PathpyError(PathpyException): 35 | """ 36 | Exception for a serious error in Pathpy 37 | """ 38 | 39 | 40 | class PathpyNotImplemented(PathpyException): 41 | """ 42 | Exception for procedure not implemented in pathpy 43 | """ 44 | 45 | 46 | class EmptySCCError(PathpyException): 47 | """ 48 | This exception is thrown whenever a non-empty strongly 49 | connected component is needed, but when we encounter an empty one. 50 | """ 51 | 52 | 53 | class PathsTooShort(PathpyException): 54 | """ 55 | This exception if thrown if available paths are too 56 | short for the requested operation. 57 | """ 58 | -------------------------------------------------------------------------------- /pathpy/utils/log.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | # pathpy is an OpenSource python package for the analysis of time series data 4 | # on networks using higher- and multi order graphical models. 5 | # 6 | # Copyright (C) 2016-2018 Ingo Scholtes, ETH Zürich/Universität Zürich 7 | # 8 | # This program is free software: you can redistribute it and/or modify 9 | # it under the terms of the GNU Affero General Public License as published 10 | # by the Free Software Foundation, either version 3 of the License, or 11 | # (at your option) any later version. 12 | # 13 | # This program is distributed in the hope that it will be useful, 14 | # but WITHOUT ANY WARRANTY; without even the implied warranty of 15 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 16 | # GNU Affero General Public License for more details. 17 | # 18 | # You should have received a copy of the GNU Affero General Public License 19 | # along with this program. If not, see . 20 | # 21 | # Contact the developer: 22 | # 23 | # E-mail: scholtes@ifi.uzh.ch 24 | # Web: http://www.ingoscholtes.net 25 | import enum 26 | from datetime import datetime 27 | import sys 28 | 29 | 30 | __all__ = ["Severity", "Log"] 31 | 32 | 33 | class Severity(enum.IntEnum): 34 | """ An enumeration that can be used to indicate 35 | the severity of log messages, and which can be 36 | used to filter messages based on severities. 37 | """ 38 | 39 | # Error messages 40 | ERROR = 4 41 | 42 | # Warning messages 43 | WARNING = 3 44 | 45 | # Informational messages (default minimum level) 46 | INFO = 2 47 | 48 | # Messages regarding timing and performance 49 | TIMING = 1 50 | 51 | # Debug messages (really verbose) 52 | DEBUG = 0 53 | 54 | 55 | class Log: 56 | """ A simple logging class, that allows to select what messages should 57 | be recorded in the output, and where these message should be directed. 58 | """ 59 | 60 | # the output stream to which log entries will be written 61 | output_stream = sys.stdout 62 | 63 | # The minimum severity level of messages to be logged 64 | min_severity = Severity.INFO 65 | 66 | @staticmethod 67 | def set_min_severity(severity): # pragma: no cover 68 | """ Sets the minimum sveerity level a message 69 | needs to have in order to be recorded in the output stream. 70 | By default, any message which has a severity of at least 71 | Severity.INFO will be written to the output stream. All messages 72 | with lower priority will be surpressed. 73 | """ 74 | Log.min_severity = severity 75 | 76 | @staticmethod 77 | def set_output_stream(stream): # pragma: no cover 78 | """ Sets the output stream to which all messages will be 79 | written. By default, this is sys.stdout, but it can be 80 | changed in order to redirect the log to a logfile. 81 | """ 82 | Log.output_stream = stream 83 | 84 | @staticmethod 85 | def add(msg, severity=Severity.INFO): # pragma: no cover 86 | """ Adds a message with the given severity to the log. This message will be written 87 | to the log output stream, which by default is sys.stdout. A newline character 88 | will be added to the message by default. 89 | """ 90 | if severity >= Log.min_severity: 91 | ts = datetime.now().strftime("%Y-%m-%d %H:%M:%S") 92 | Log.output_stream.write(ts + ' [' + str(severity) + ']\t' + msg + '\n') 93 | Log.output_stream.flush() 94 | -------------------------------------------------------------------------------- /pathpy/visualisation/__init__.py: -------------------------------------------------------------------------------- 1 | """provides html and tikz visualisations for networks, temporal networks, and paths""" 2 | 3 | from .html import plot 4 | from .html import export_html 5 | from .html import plot_diffusion 6 | from .html import export_html_diffusion 7 | from .html import plot_walk 8 | from .html import export_html_walk 9 | 10 | from .tikz import export_tikz 11 | 12 | from .pdf import svg_to_pdf 13 | from .pdf import svg_to_png 14 | -------------------------------------------------------------------------------- /pathpy/visualisation/alluvial.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | # pathpy is an OpenSource python package for the analysis of time series data 4 | # on networks using higher- and multi order graphical models. 5 | # 6 | # Copyright (C) 2016-2018 Ingo Scholtes, ETH Zürich/Universität Zürich 7 | # 8 | # This program is free software: you can redistribute it and/or modify 9 | # it under the terms of the GNU Affero General Public License as published 10 | # by the Free Software Foundation, either version 3 of the License, or 11 | # (at your option) any later version. 12 | # 13 | # This program is distributed in the hope that it will be useful, 14 | # but WITHOUT ANY WARRANTY; without even the implied warranty of 15 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 16 | # GNU Affero General Public License for more details. 17 | # 18 | # You should have received a copy of the GNU Affero General Public License 19 | # along with this program. If not, see . 20 | # 21 | # Contact the developer: 22 | # 23 | # E-mail: scholtes@ifi.uzh.ch 24 | # Web: http://www.ingoscholtes.net 25 | import json 26 | import os 27 | from string import Template 28 | 29 | import string 30 | import random 31 | 32 | import collections as _co 33 | 34 | from pathpy.classes.higher_order_network import HigherOrderNetwork 35 | from pathpy.classes.paths import Paths 36 | from pathpy.classes.network import Network 37 | 38 | import numpy as _np 39 | 40 | def generate_memory_net(paths, node, self_loops=True): 41 | """ 42 | Helper class that generates a directed and weighted 43 | memory network where weights capture path statistics. 44 | """ 45 | n = Network(directed=True) 46 | 47 | # consider all (sub-)paths of length two 48 | # through the focal node 49 | for p in paths.paths[2]: 50 | if p[1] == node: 51 | if self_loops or (p[0] != node and p[2] != node): 52 | src = 'src_{0}'.format(p[0]) 53 | tgt = 'tgt_{0}'.format(p[2]) 54 | mem = 'mem_{0}_{1}'.format(p[0], p[1]) 55 | # calculate frequency of sub-paths src->focal_node->*, i.e. paths that 56 | # continue through the focal_node 57 | # w_1 = 0 58 | # for x in paths.nodes: 59 | # ct = p[:2]+(x,) 60 | # if ct in paths.paths[2] and x != focal_node: 61 | # w_1 += paths.paths[2][ct].sum() 62 | 63 | # calculate frequency of (sub-)path src -> focal_node -> tgt 64 | w_2 = paths.paths[2][p].sum() 65 | n.add_edge(src, mem, weight=1) 66 | n.add_edge(mem, tgt, weight=w_2) 67 | 68 | 69 | # adjust weights of links to memory nodes: 70 | for m in n.nodes: 71 | if m.startswith('mem'): 72 | for u in n.predecessors[m]: 73 | n.edges[(u,m)]['weight'] = n.nodes[m]['outweight'] 74 | n.nodes[m]['inweight'] = n.nodes[m]['outweight'] 75 | return n 76 | 77 | 78 | def generate_memory_net_markov(network, focal_node, self_loops=True): 79 | """ 80 | Generates a directed and weighted network with flow values based 81 | on a network and an assumption of Markov flows. 82 | """ 83 | n = Network(directed=True) 84 | 85 | out_weight = _np.sum(network.nodes[focal_node]['outweight']) 86 | 87 | for u in network.predecessors[focal_node]: 88 | for w in network.successors[focal_node]: 89 | if self_loops or (u!= focal_node and w != focal_node): 90 | src = 'src_{0}'.format(u) 91 | tgt = 'tgt_{0}'.format(w) 92 | mem = 'mem_{0}_{1}'.format(u, focal_node) 93 | 94 | w_1 = _np.sum(network.edges[(u, focal_node)]['weight']) 95 | 96 | # at random, we expect the flow to be proportional to the relative edge weight 97 | w_2 = w_1 * (_np.sum(network.edges[(focal_node, w)]['weight'])/out_weight) 98 | n.add_edge(src, mem, weight=w_1) 99 | n.add_edge(mem, tgt, weight=w_2) 100 | return n 101 | 102 | 103 | def generate_diffusion_net(paths, node=None, markov=True, steps=5): 104 | """ 105 | """ 106 | g1 = HigherOrderNetwork(paths, k=1) 107 | map_1 = g1.node_to_name_map() 108 | 109 | prob = _np.zeros(g1.ncount()) 110 | prob = prob.transpose() 111 | if node is None: 112 | node = g1.nodes[0] 113 | 114 | prob[map_1[node]] = 1.0 115 | 116 | T = g1.transition_matrix() 117 | 118 | flow_net = Network(directed=True) 119 | 120 | if markov: 121 | # if markov == True flows are given by first-order transition matrix 122 | for t in range(1, steps+1): 123 | # calculate flow from i to j in step t 124 | for i in g1.nodes: 125 | for j in g1.nodes: 126 | i_to_j = prob[map_1[i]] * T[map_1[j], map_1[i]] 127 | if i_to_j > 0: 128 | flow_net.add_edge('{0}_{1}'.format(i, t-1), '{0}_{1}'.format(j, t), weight = i_to_j) 129 | prob = T.dot(prob) 130 | else: 131 | # if markov == False calculate flows based on paths starting in initial_node 132 | for p in paths.paths[steps]: 133 | if p[0] == node: 134 | for t in range(len(p)-1): 135 | flow_net.add_edge('{0}_{1}'.format(p[t], t), '{0}_{1}'.format(p[t+1], t+1), weight = paths.paths[steps][p].sum()) 136 | 137 | # normalize flows and balance in- and out-weight for all nodes 138 | # normalization = flow_net.nodes['{0}_{1}'.format(initial_node, 0)]['outweight'] 139 | 140 | flow_net.nodes[node+'_0']['inweight'] = 1.0 141 | Q = [node+'_0'] 142 | # adjust weights using BFS 143 | while Q: 144 | v = Q.pop() 145 | # print(v) 146 | inweight = flow_net.nodes[v]['inweight'] 147 | outweight = flow_net.nodes[v]['outweight'] 148 | 149 | for w in flow_net.successors[v]: 150 | flow_net.nodes[w]['inweight'] = flow_net.nodes[w]['inweight'] - flow_net.edges[(v,w)]['weight'] 151 | flow_net.edges[(v,w)]['weight'] = (inweight/outweight) * flow_net.edges[(v,w)]['weight'] 152 | flow_net.nodes[w]['inweight'] = flow_net.nodes[w]['inweight'] + flow_net.edges[(v,w)]['weight'] 153 | Q.append(w) 154 | return flow_net 155 | -------------------------------------------------------------------------------- /pathpy/visualisation/pdf.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | # pathpy is an OpenSource python package for the analysis of time series data 4 | # on networks using higher- and multi order graphical models. 5 | # 6 | # Copyright (C) 2016-2018 Ingo Scholtes, ETH Zürich/Universität Zürich 7 | # 8 | # This program is free software: you can redistribute it and/or modify 9 | # it under the terms of the GNU Affero General Public License as published 10 | # by the Free Software Foundation, either version 3 of the License, or 11 | # (at your option) any later version. 12 | # 13 | # This program is distributed in the hope that it will be useful, 14 | # but WITHOUT ANY WARRANTY; without even the implied warranty of 15 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 16 | # GNU Affero General Public License for more details. 17 | # 18 | # You should have received a copy of the GNU Affero General Public License 19 | # along with this program. If not, see . 20 | # 21 | # Contact the developer: 22 | # 23 | # E-mail: scholtes@ifi.uzh.ch 24 | # Web: http://www.ingoscholtes.net 25 | 26 | def svg_to_pdf(svg_file, output_file): 27 | """ 28 | Method to convert an SVG file to a PDF file, suitable for 29 | scholarly publications. This method requires the third-party library 30 | svglib. 31 | """ 32 | # uses svglib to render a SVG to PDF 33 | from svglib.svglib import svg2rlg 34 | from reportlab.graphics import renderPDF 35 | 36 | drawing = svg2rlg(svg_file) 37 | renderPDF.drawToFile(drawing, output_file) 38 | 39 | 40 | def svg_to_png(svg_file, output_file): 41 | """ 42 | Method to convert an SVG file to a PNG file. This method 43 | requires the third-party library svglib. 44 | """ 45 | # uses svglib to render a SVG to PDF 46 | from svglib.svglib import svg2rlg 47 | from reportlab.graphics import renderPM 48 | 49 | drawing = svg2rlg(svg_file) 50 | renderPM.drawToFile(drawing, output_file, fmt='PNG') 51 | -------------------------------------------------------------------------------- /pathpy/visualisation_assets/diffusion_template.html: -------------------------------------------------------------------------------- 1 | 30 | 31 | [save svg] 33 | 34 | 35 | 36 | 37 | 38 | 39 | 40 | -------------------------------------------------------------------------------- /pathpy/visualisation_assets/network_template.html: -------------------------------------------------------------------------------- 1 | 22 | 23 | 24 | [save svg] 26 | 27 | 28 | 29 | -------------------------------------------------------------------------------- /pathpy/visualisation_assets/paths_template.html: -------------------------------------------------------------------------------- 1 | 29 | [save svg] 31 | 32 | 33 | 34 | 35 | 36 | 37 | 38 | 145 | -------------------------------------------------------------------------------- /pathpy_logo.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/uzhdag/pathpy/1ddaf7d710aa3b5ed119de4f810c7c919eda3241/pathpy_logo.png -------------------------------------------------------------------------------- /setup.cfg: -------------------------------------------------------------------------------- 1 | [bdist_wheel] 2 | universal = 1 3 | 4 | [flake8] 5 | exclude = docs 6 | max-line-length = 90 7 | max-complexity = 10 8 | 9 | [aliases] 10 | # Define setup.py command aliases here 11 | test = pytest 12 | 13 | [tool:pytest] 14 | collect_ignore = ['setup.py'] 15 | norecursedirs = 'docs' 16 | filterwarnings = 17 | ignore::UserWarning 18 | -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding: utf-8 -*- 3 | 4 | """pathpy setup script.""" 5 | 6 | from setuptools import setup, find_packages 7 | from pathpy import __version__ 8 | 9 | 10 | with open('README.rst', encoding='utf-8') as readme_file, open('HISTORY.rst', encoding='utf-8') as history_file: 11 | readme = readme_file.read() 12 | history = history_file.read() 13 | 14 | install_requirements = ['numpy', 'scipy'] 15 | 16 | setup_requirements = ['pytest-runner'] 17 | 18 | setup( 19 | author="Ingo Scholtes", 20 | author_email='scholtes@ifi.uzh.ch', 21 | license='AGPL-3.0+', 22 | classifiers=[ 23 | 'Development Status :: 5 - Production/Stable', 24 | 'Intended Audience :: Science/Research', 25 | 'Topic :: Scientific/Engineering :: Artificial Intelligence', 26 | 'License :: OSI Approved :: GNU Affero General Public License v3 or later (AGPLv3+)', 27 | 'Programming Language :: Python :: 3', 28 | 'Programming Language :: Python :: 3.5', 29 | 'Programming Language :: Python :: 3.6', 30 | ], 31 | description="An OpenSource python package for the analysis and visualisation of time series data on" 32 | " complex networks with higher- and multi-order graphical models.", 33 | install_requires=install_requirements, 34 | setup_requires=setup_requirements, 35 | long_description=readme + '\n\n' + history, 36 | python_requires='>=3.5', 37 | keywords='network analysis temporal networks pathways sequence modeling graph mining', 38 | name='pathpy2', 39 | packages=find_packages(), 40 | test_suite='tests', 41 | url='https://www.pathpy.net', 42 | version=__version__, 43 | include_package_data=True, 44 | package_data={'pathpy': ['visualisation_assets/*.html'] + ['visualisation_assets/*.js']}, 45 | zip_safe=False 46 | ) 47 | -------------------------------------------------------------------------------- /tests/README.md: -------------------------------------------------------------------------------- 1 | # Unit tests for pypath 2 | 3 | This directory contains the unit tests for methods and functions 4 | in pathpy. 5 | The testing framework [pytest](doc.pytest.org/) 6 | is required to run the tests. 7 | 8 | To run the test suite (without slow tests) run 9 | ```bash 10 | $ pytest tests 11 | ``` 12 | 13 | ## Slow functions 14 | 15 | Slow functions can be decorated with `slow` to mark them 16 | as skippable if you require only a quick check. 17 | To run all tests add the flag `--runslow`: 18 | ```bash 19 | $ pytest --runslow 20 | ``` 21 | 22 | ## Coverage report 23 | 24 | To compute a coverage report of the tests you need to install 25 | [coverage.py](https://coverage.readthedocs.io/en/coverage-4.3.4/) 26 | as well as its `pytest` integration 27 | [pytest-cov][1] 28 | ```bash 29 | $ pytest tests/ --runslow --cov=pathpy --cov-report html 30 | ``` 31 | which will create an html coverage report in the same directory. 32 | 33 | [1]: https://pypi.python.org/pypi/pytest-cov 34 | -------------------------------------------------------------------------------- /tests/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/uzhdag/pathpy/1ddaf7d710aa3b5ed119de4f810c7c919eda3241/tests/__init__.py -------------------------------------------------------------------------------- /tests/conftest.py: -------------------------------------------------------------------------------- 1 | import os 2 | import random 3 | 4 | import pytest 5 | import numpy as np 6 | 7 | import pathpy as pp 8 | 9 | test_directory = os.path.dirname(os.path.abspath(__file__)) 10 | test_data_dir = os.path.join(test_directory, 'test_data') 11 | 12 | 13 | def pytest_addoption(parser): 14 | parser.addoption("--runslow", action="store_true", help="run slow tests") 15 | parser.addoption("--latex", action="store_true", help="set `pdflatex` as available") 16 | parser.addoption("--networkx", action="store_true", help="set `networkx` as available") 17 | 18 | 19 | def pytest_runtest_setup(item): 20 | if 'slow' in item.keywords and not item.config.getvalue("runslow"): 21 | pytest.skip("need --runslow option to run") 22 | 23 | if 'latex' in item.keywords and not item.config.getvalue("latex"): 24 | pytest.skip("need --latex option to run") 25 | 26 | if 'networkx' in item.keywords and not item.config.getvalue("networkx"): 27 | pytest.skip("need --networkx option to run") 28 | 29 | 30 | @pytest.fixture() 31 | def test_data_directory(): 32 | return test_data_dir 33 | 34 | 35 | @pytest.fixture() 36 | def path_from_ngram_file(): 37 | """load the example file as pypath.Path""" 38 | ngram_file_path = os.path.join(test_data_dir, 'ngram_simple.ngram') 39 | path = pp.Paths.read_file(ngram_file_path, frequency=True) 40 | return path 41 | 42 | 43 | @pytest.fixture() 44 | def path_from_edge_file(): 45 | file_path = os.path.join(test_data_dir, 'edge_frequency.edge') 46 | path = pp.Paths.read_edges(file_path, weight=True) 47 | return path 48 | 49 | 50 | @pytest.fixture() 51 | def path_from_edge_file_undirected(): 52 | file_path = os.path.join(test_data_dir, 'edge_frequency.edge') 53 | path = pp.Paths.read_edges(file_path, weight=True, undirected=True, maxlines=5) 54 | return path 55 | 56 | 57 | def generate_random_path(size, rnd_seed, num_nodes=None): 58 | """Generate a Path with random path sequences""" 59 | if num_nodes is None: 60 | import string 61 | node_set = string.ascii_lowercase 62 | else: 63 | node_set = [str(x) for x in range(num_nodes)] 64 | 65 | def random_ngram(p_len, nodes): 66 | num_elements = len(nodes) 67 | sequence = np.random.choice(num_elements, p_len) 68 | path = [nodes[i] for i in sequence] 69 | return ','.join(path) 70 | 71 | np.random.seed(rnd_seed) 72 | paths = pp.Paths() 73 | for _ in range(size): 74 | frequency = np.random.randint(1, 4) 75 | path_length = np.random.randint(1, 10) 76 | path_to_add = random_ngram(path_length, node_set) 77 | paths.add_path(path_to_add, frequency=(0, frequency)) 78 | 79 | return paths 80 | 81 | 82 | @pytest.fixture(scope='function') 83 | def random_paths(): 84 | """Generate a Path with random path sequences""" 85 | return generate_random_path 86 | 87 | 88 | def generate_random_network(n=10, m=20, directed=True, weighted=True, seed=0): 89 | """Generate a random Network""" 90 | random.seed(seed) 91 | net = pp.Network(directed) 92 | for i in range(n): 93 | net.add_node(str(i)) 94 | for i in range(m): 95 | v, w = random.sample(list(net.nodes), 2) 96 | if not weighted: 97 | net.add_edge(v, w) 98 | else: 99 | net.add_edge(v, w, weight=random.randint(0, 10)) 100 | return net 101 | 102 | 103 | @pytest.fixture(scope='function') 104 | def random_network(): 105 | """Generate a random network""" 106 | return generate_random_network 107 | 108 | 109 | def generate_random_temporal_network(n=10, m=20, min_t=0, max_t=100, seed=0): 110 | """ 111 | 112 | Parameters 113 | ---------- 114 | n: int 115 | number of nodes 116 | m: int 117 | number of edges 118 | min_t: int 119 | starting time 120 | max_t: int 121 | end time 122 | seed: int 123 | seed for random number generator 124 | 125 | Returns 126 | ------- 127 | 128 | """ 129 | random.seed(seed) 130 | node_set = [str(i) for i in range(n)] 131 | source_nodes = [random.sample(node_set, 1)[0] for _ in range(m)] 132 | target_nodes = [random.sample(node_set, 1)[0] for _ in range(m)] 133 | times = [random.randint(min_t, max_t) for _ in range(m)] 134 | 135 | tedges = list(zip(source_nodes, target_nodes, times)) 136 | 137 | return pp.TemporalNetwork(tedges) 138 | 139 | 140 | @pytest.fixture(scope='function') 141 | def random_temp_network(): 142 | """Generate a random network""" 143 | return generate_random_temporal_network 144 | 145 | 146 | @pytest.fixture() 147 | def temporal_network_object(): 148 | t = pp.TemporalNetwork() 149 | # Path of length two 150 | t.add_edge("c", "e", 1) 151 | t.add_edge("e", "f", 2) 152 | 153 | # Path of length two 154 | t.add_edge("a", "e", 3) 155 | t.add_edge("e", "g", 4) 156 | 157 | # Path of length two 158 | t.add_edge("c", "e", 5) 159 | t.add_edge("e", "f", 6) 160 | 161 | # Path of length two 162 | t.add_edge("a", "e", 7) 163 | t.add_edge("e", "g", 8) 164 | 165 | # Path of length two 166 | t.add_edge("c", "e", 9) 167 | t.add_edge("e", "f", 10) 168 | 169 | # The next two edges continue the previous path to ( c-> e-> f-> e -> b ) 170 | t.add_edge("f", "e", 11) 171 | t.add_edge("e", "b", 12) 172 | 173 | # This is an isolated edge (i.e. path of length one) 174 | t.add_edge("e", "b", 13) 175 | 176 | # Path of length two 177 | t.add_edge("c", "e", 14) 178 | t.add_edge("e", "f", 15) 179 | 180 | # Path of length two 181 | t.add_edge("b", "e", 16) 182 | t.add_edge("e", "g", 17) 183 | 184 | # Path of length two 185 | t.add_edge("c", "e", 18) 186 | t.add_edge("e", "f", 19) 187 | 188 | # Path of length two 189 | t.add_edge("c", "e", 20) 190 | t.add_edge("e", "f", 21) 191 | 192 | return t 193 | 194 | 195 | @pytest.fixture() 196 | def dag_object(): 197 | dag = pp.DAG() 198 | # For this DAG, the following five paths between the root and the leaves exist 199 | # for the following mapping: 200 | # mapping = {'a': 'A', 'b': 'B', 'c': 'A', 'e': 'B', 201 | # 'f': 'B', 'g': 'A', 'h': 'A','i': 'B', 'j': 'A' } 202 | 203 | # h -> i ( A -> B ) 204 | # h -> j ( A -> A ) 205 | # a -> b -> e ( A -> B -> B ) 206 | # a -> c -> g ( A -> A -> A ) 207 | # a -> b -> f -> g ( A -> B -> B -> A ) 208 | # a -> c -> b -> e ( A -> A -> B -> B ) 209 | # a -> c -> b -> f -> g ( A -> A -> B -> B -> A ) 210 | dag.add_edge('a', 'b') 211 | dag.add_edge('a', 'c') 212 | dag.add_edge('c', 'b') 213 | dag.add_edge('b', 'e') 214 | dag.add_edge('b', 'f') 215 | dag.add_edge('f', 'g') 216 | dag.add_edge('c', 'g') 217 | dag.add_edge('h', 'i') 218 | dag.add_edge('h', 'j') 219 | return dag 220 | -------------------------------------------------------------------------------- /tests/test_MultiOrderModel.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | import numpy as np 3 | 4 | import pathpy as pp 5 | 6 | 7 | def test_print(random_paths): 8 | p = random_paths(90, 0, 20) 9 | multi = pp.MultiOrderModel(p, max_order=3) 10 | print(multi) 11 | 12 | 13 | @pytest.mark.parametrize('k', (1, 2, 3)) 14 | def test_init(random_paths, k): 15 | p = random_paths(90, 0, 20) 16 | multi = pp.MultiOrderModel(p, max_order=k) 17 | assert len(multi.layers) == k+1 18 | 19 | 20 | # @pytest.mark.slow 21 | # @pytest.mark.parametrize('k', (1, 2)) 22 | # def test_parallel(random_paths, k): 23 | # """assert that the parallel calculation is equal to the 24 | # sequential""" 25 | # p = random_paths(90, 0, 20) 26 | # multi_seq = pp.MultiOrderModel(p, max_order=k) 27 | # 28 | # pp.ENABLE_MULTICORE_SUPPORT = True 29 | # assert pp.ENABLE_MULTICORE_SUPPORT 30 | # 31 | # multi_parallel = pp.MultiOrderModel(p, max_order=k) 32 | # 33 | # assert multi_parallel.model_size(k) == multi_seq.model_size(k) 34 | # for k in multi_parallel.transition_matrices: 35 | # assert np.sum(multi_parallel.transition_matrices[k] - multi_seq.transition_matrices[k]) == pytest.approx(0) 36 | 37 | 38 | # TODO: how to properly test this function? 39 | @pytest.mark.parametrize('method', ('AIC', 'BIC', 'AICc')) 40 | @pytest.mark.parametrize('k', (2, 3)) 41 | def test_test_network_hypothesis(random_paths, k, method): 42 | p = random_paths(20, 40, 6) 43 | multi = pp.MultiOrderModel(p, max_order=k) 44 | (is_net, ic0, ic1) = multi.test_network_hypothesis(p, method=method) 45 | 46 | 47 | @pytest.mark.parametrize( 48 | 'method, k, e_ic0, e_ic1', ( 49 | ('AIC', 1, 853.7904463041854, 829.9533867847043), 50 | ('BIC', 3, 862.234843574755, 885.6864087704643), 51 | ('AICc', 3, 856.3359008496399, 1305.9533867847044) 52 | ) 53 | ) 54 | def test_test_network_hypothesis_values(random_paths, k, method, e_ic0, e_ic1): 55 | p = random_paths(20, 40, 6) 56 | multi = pp.MultiOrderModel(p, max_order=k) 57 | (is_net, ic0, ic1) = multi.test_network_hypothesis(p, method=method) 58 | assert e_ic0 == pytest.approx(ic0) 59 | assert e_ic1 == pytest.approx(ic1) 60 | 61 | 62 | @pytest.mark.parametrize('k', (1, 2, 3)) 63 | def test_write_state_file(random_paths, k, tmpdir): 64 | file_path = str(tmpdir.mkdir("sub").join("multi_order_state")) 65 | p = random_paths(20, 40, 6) 66 | multi = pp.MultiOrderModel(p, max_order=k) 67 | 68 | for i in range(1, k+1): 69 | multi.save_state_file(file_path + '.' + str(i), layer=i) 70 | 71 | 72 | def test_estimate_order_1(): 73 | """Example without second-order correlations""" 74 | paths = pp.Paths() 75 | 76 | paths.add_path('a,c') 77 | paths.add_path('b,c') 78 | paths.add_path('c,d') 79 | paths.add_path('c,e') 80 | 81 | for k in range(4): 82 | paths.add_path('a,c,d') 83 | paths.add_path('b,c,e') 84 | paths.add_path('b,c,d') 85 | paths.add_path('a,c,e') 86 | 87 | m = pp.MultiOrderModel(paths, max_order=2) 88 | assert m.estimate_order() == 1, \ 89 | "Error, wrongly detected higher-order correlations" 90 | 91 | 92 | def test_estimate_order_2(): 93 | # Example with second-order correlations 94 | paths = pp.Paths() 95 | 96 | paths.add_path('a,c') 97 | paths.add_path('b,c') 98 | paths.add_path('c,d') 99 | paths.add_path('c,e') 100 | 101 | for k in range(4): 102 | paths.add_path('a,c,d') 103 | paths.add_path('b,c,e') 104 | 105 | m = pp.MultiOrderModel(paths, max_order=2) 106 | assert m.estimate_order() == 2 107 | 108 | 109 | def test_save_statefile(random_paths, tmpdir): 110 | file_path = str(tmpdir.join("statefile.sf")) 111 | p = random_paths(3, 20, 6) 112 | multi = pp.MultiOrderModel(p, max_order=2) 113 | multi.save_state_file(file_path, layer=2) 114 | with open(file_path) as f: 115 | for line in f: 116 | assert '{' not in line # make sure that we did not write a dictionary 117 | 118 | 119 | def test_single_path_likelihood(random_paths): 120 | p1 = random_paths(size=10, rnd_seed=20, num_nodes=10) # type: pp.Paths 121 | p2 = random_paths(size=100, rnd_seed=0, num_nodes=50) 122 | p12 = p1 + p2 123 | mom = pp.MultiOrderModel(p12, max_order=3) 124 | lkh1 = mom.likelihood(p1) 125 | lkh2 = mom.likelihood(p2) 126 | lkh12 = mom.likelihood(p12) 127 | 128 | assert lkh1 > lkh2 # second paths must be 129 | assert (lkh1 + lkh2) == pytest.approx(lkh12) 130 | 131 | assert mom.path_likelihood(('1', '2'), layer=0, freq=4) < 0 132 | 133 | lkl_last = None 134 | for i in range(3): # likelihoods must be increasing 135 | lkl = mom.path_likelihood(('6', '7', '2', '0', '6'), layer=i, freq=9) 136 | if lkl_last is not None: 137 | assert lkl >= lkl_last 138 | lkl_last = lkl 139 | 140 | path_likelihoods = [] 141 | for p, freq in p12.paths[3].items(): # print the path with the highest likelihood 142 | lkl = mom.path_likelihood(p, layer=2, freq=freq.sum(), log=False) 143 | path_likelihoods.append((lkl, p)) 144 | 145 | assert max(path_likelihoods)[1] == ('23', '32', '19', '8') 146 | -------------------------------------------------------------------------------- /tests/test_Network.py: -------------------------------------------------------------------------------- 1 | import random 2 | import pytest 3 | 4 | @pytest.mark.parametrize('directed', (True, False)) 5 | @pytest.mark.parametrize('weighted', (True, False)) 6 | def test_add_node(random_network, directed, weighted): 7 | """ 8 | Test node creation 9 | """ 10 | net = random_network(n=10, m=20, directed=directed, weighted=weighted) 11 | 12 | assert net.directed == directed 13 | 14 | vc_before = net.ncount() 15 | ec_before = net.ecount() 16 | 17 | assert 'v' not in net.nodes 18 | 19 | net.add_node('v', test1='x', test2=42) 20 | 21 | assert 'v' in net.nodes 22 | 23 | assert net.nodes['v']['test1'] == 'x' 24 | assert net.nodes['v']['test2'] == 42 25 | if directed: 26 | assert net.nodes['v']['indegree'] == 0 27 | assert net.nodes['v']['outdegree'] == 0 28 | else: 29 | assert net.nodes['v']['degree'] == 0 30 | assert net.nodes['v']['inweight'] == 0 31 | assert net.nodes['v']['outweight'] == 0 32 | 33 | assert net.ncount() == vc_before + 1 34 | assert net.ecount() == ec_before 35 | 36 | 37 | @pytest.mark.parametrize('directed', (True, False)) 38 | @pytest.mark.parametrize('weighted', (True, False)) 39 | def test_remove_node(random_network, directed, weighted): 40 | """ 41 | Test node removal 42 | """ 43 | net = random_network(n=10, m=20, directed=directed, weighted=weighted) 44 | 45 | to_remove = random.choice(list(net.nodes)) 46 | 47 | # collect values before removal 48 | v_c = net.ncount() 49 | e_c = net.ecount() 50 | t_w = net.total_edge_weight() 51 | 52 | incident_edges = [(v, w) for (v, w) in net.edges if v == to_remove or w == to_remove] 53 | weight_incident = sum([net.edges[e]['weight'] for e in incident_edges]) 54 | successors = [w for w in net.successors[to_remove]] 55 | predecessors = [v for v in net.predecessors[to_remove]] 56 | 57 | net.remove_node(to_remove) 58 | 59 | # test values after removal 60 | assert to_remove not in net.nodes 61 | assert net.ncount() == v_c-1 62 | assert net.ecount() == e_c - len(incident_edges) 63 | assert net.total_edge_weight() == t_w - weight_incident 64 | 65 | for e in incident_edges: 66 | assert e not in net.edges 67 | 68 | for w in successors: 69 | assert to_remove not in net.predecessors[w] 70 | assert to_remove not in net.successors[w] 71 | for v in predecessors: 72 | assert to_remove not in net.predecessors[v] 73 | assert to_remove not in net.successors[v] 74 | 75 | 76 | 77 | @pytest.mark.parametrize('directed', (True, False)) 78 | @pytest.mark.parametrize('weighted', (True, False)) 79 | def test_add_edge(random_network, directed, weighted): 80 | """ 81 | Test edge creation 82 | """ 83 | net = random_network(n=10, m=20, directed=directed, weighted=weighted) 84 | 85 | # draw pair of nodes that are not already connected 86 | (v, w) = random.choice(list(net.edges)) 87 | while (v, w) in net.edges: 88 | v, w = random.sample(list(net.nodes), 2) 89 | 90 | if weighted: 91 | weight_to_add = random.randint(1, 10) 92 | else: 93 | weight_to_add = 1 94 | 95 | # collect values before removal 96 | v_c = net.ncount() 97 | e_c = net.ecount() 98 | t_w = net.total_edge_weight() 99 | 100 | if weighted: 101 | net.add_edge(v, w, weight=weight_to_add) 102 | else: 103 | net.add_edge(v, w) 104 | 105 | # test values after removal 106 | assert v in net.nodes 107 | assert w in net.nodes 108 | assert net.ncount() == v_c 109 | assert net.ecount() == e_c + 1 110 | assert net.total_edge_weight() == t_w + weight_to_add 111 | assert (v, w) in net.edges 112 | 113 | assert w in net.successors[v] 114 | assert v in net.predecessors[w] 115 | 116 | if not directed: 117 | assert w in net.predecessors[v] 118 | assert v in net.successors[w] 119 | 120 | 121 | def test_import_from_networkx(): 122 | # TODO: add test for weighted networks 123 | from pathpy.classes.network import network_from_networkx 124 | import networkx as nx 125 | 126 | g = nx.generators.barabasi_albert_graph(20, 10) 127 | relabeling = {i: str(i) for i in g} 128 | nx.relabel_nodes(g, relabeling, copy=False) 129 | for i, edge in enumerate(g.edges): 130 | g.edges[edge]['custom'] = i 131 | 132 | net = network_from_networkx(g) 133 | assert net.ncount() == len(g) 134 | assert net.ecount() == len(g.edges) 135 | for edge in net.edges: 136 | assert net.edges[edge]['custom'] == g.edges[edge]['custom'] 137 | 138 | 139 | def test_export_netwokx(): 140 | # TODO: test directed graph 141 | from pathpy.classes.network import network_from_networkx 142 | from pathpy.classes.network import network_to_networkx 143 | import networkx as nx 144 | 145 | g = nx.generators.karate_club_graph() 146 | # pathpy.Network will implicitely recast all labels to str so to have 147 | # a comparable network to start with do the same here 148 | relabel = {i: str(i) for i in g} 149 | nx.relabel_nodes(g, relabel, copy=False) 150 | for i, edge in enumerate(g.edges): 151 | g.edges[edge]['custom'] = i 152 | g.edges[edge]['weight'] = (i % 4) + 100 153 | 154 | for i, node in enumerate(g.nodes): 155 | g.nodes[node]['custom'] = "{} unique string".format(i) 156 | 157 | net = network_from_networkx(g) 158 | g_back = network_to_networkx(net) 159 | 160 | nx_degrees = g.degree(weight='weight') 161 | 162 | assert len(g_back) == len(g) 163 | assert len(g_back.edges) == len(g.edges) 164 | assert dict(g_back.degree) == dict(g.degree) 165 | for edge in g_back.edges: 166 | assert net.edges[edge]['weight'] == g.edges[edge]['weight'] 167 | assert net.edges[edge]['custom'] == g.edges[edge]['custom'] 168 | assert g_back.edges[edge]['custom'] == g.edges[edge]['custom'] 169 | assert g_back.edges[edge]['weight'] == g.edges[edge]['weight'] 170 | 171 | for node in g_back.nodes: 172 | assert g_back.nodes[node]['custom'] == g.nodes[node]['custom'] 173 | assert nx_degrees[node] == net.nodes[node]['inweight'] 174 | assert nx_degrees[node] == net.nodes[node]['outweight'] 175 | 176 | 177 | def test_read_edges(test_data_directory): 178 | import os 179 | import pathpy 180 | 181 | edge_file = os.path.join(test_data_directory, "example_int.tedges") 182 | 183 | net = pathpy.Network.read_file(edge_file, weighted=True, header=True, directed=True) 184 | assert net.ncount() == 5 185 | assert net.ecount() == 6 186 | 187 | 188 | def test_diagonal_values(): 189 | from pathpy.classes.network import Network 190 | net = Network() 191 | net.add_edge('a', 'b') 192 | net.add_edge('a', 'a') 193 | adj = net.adjacency_matrix().todense() 194 | assert adj.sum() == 3 195 | assert adj[0, 0] == 1 196 | assert adj[1, 1] == 0 197 | assert adj.diagonal().sum() == 1 198 | 199 | # test directed case 200 | net = Network(directed=True) 201 | net.add_edge('a', 'b') 202 | net.add_edge('a', 'a') 203 | adj = net.adjacency_matrix().todense() 204 | assert adj.sum() == 2 205 | assert adj[0, 0] == 1 206 | assert adj[1, 1] == 0 207 | assert adj.diagonal().sum() == 1 208 | 209 | -------------------------------------------------------------------------------- /tests/test_OriginDestinationPaths.py: -------------------------------------------------------------------------------- 1 | import os 2 | import pathpy as pp 3 | 4 | def test_extract_distribute(test_data_directory, ): 5 | network_path = os.path.join(test_data_directory, 'example_network.edges') 6 | od_path = os.path.join(test_data_directory, 'example_origin_destination.csv') 7 | 8 | # read the network topology 9 | p = pp.Paths.read_edges(network_path, undirected=True) 10 | network = pp.HigherOrderNetwork(p) 11 | 12 | OD = pp.path_extraction.read_origin_destination(od_path) 13 | 14 | paths = pp.path_extraction.paths_from_origin_destination(OD, network) 15 | 16 | assert (paths.paths[3][('A', 'B', 'F', 'H')][1] == 2.0 and 17 | paths.paths[3][('A', 'C', 'G', 'H')][1] == 3.0) or \ 18 | (paths.paths[3][('A', 'B', 'F', 'H')][1] == 3.0 and 19 | paths.paths[3][('A', 'C', 'G', 'H')][1] == 2.0) 20 | assert paths.paths[3][('D', 'B', 'C', 'E')][1] == 7.0 21 | assert paths.paths[2][('A', 'B', 'F')][1] == 3.0 22 | assert paths.paths[2][('B', 'C', 'E')][1] == 3.0 23 | 24 | 25 | def test_extract_single(test_data_directory, ): 26 | network_path = os.path.join(test_data_directory, 'example_network.edges') 27 | od_path = os.path.join(test_data_directory, 'example_origin_destination.csv') 28 | 29 | # read the network topology 30 | p = pp.Paths.read_edges(network_path, undirected=True) 31 | network = pp.HigherOrderNetwork(p) 32 | 33 | OD = pp.path_extraction.read_origin_destination(od_path) 34 | 35 | paths = pp.path_extraction.paths_from_origin_destination(OD, network, 36 | distribute_weight=False) 37 | 38 | assert (paths.paths[3][('A', 'B', 'F', 'H')][1] == 5.0 and 39 | paths.paths[3][('A', 'C', 'G', 'H')][1] == 0.0) or \ 40 | (paths.paths[3][('A', 'B', 'F', 'H')][1] == 0.0 and 41 | paths.paths[3][('A', 'C', 'G', 'H')][1] == 5.0) 42 | assert paths.paths[3][('D', 'B', 'C', 'E')][1] == 7.0 43 | assert paths.paths[2][('A', 'B', 'F')][1] == 3.0 44 | assert paths.paths[2][('B', 'C', 'E')][1] == 3.0 45 | -------------------------------------------------------------------------------- /tests/test_TemporalNetwork.py: -------------------------------------------------------------------------------- 1 | import pathpy as pp 2 | import os 3 | import numpy as np 4 | import sqlite3 5 | from pytest import mark 6 | 7 | def test_read_temporal_file_int(test_data_directory, ): 8 | file_path = os.path.join(test_data_directory, 'example_int.tedges') 9 | t = pp.TemporalNetwork.read_file(file_path) 10 | times = t.ordered_times 11 | expected_times = [0, 2, 4, 5, 6, 8] 12 | assert times == expected_times 13 | 14 | activities = sorted(list(t.activities.values())) 15 | expected_activities = [[], [], [], [], [0, 2, 5], [2], [4], [6], [8]] 16 | assert expected_activities == activities 17 | 18 | 19 | def test_read_temporal_file_time_stamp(test_data_directory, ): 20 | file_path = os.path.join(test_data_directory, 'example_timestamp.tedges') 21 | t = pp.TemporalNetwork.read_file(file_path, timestamp_format="%Y-%m-%d %H:%M") 22 | times = t.ordered_times 23 | time_diffs = [j - i for i, j in zip(times[:-1], times[1:])] 24 | expected_diffs = [10800, 15060, 264960] 25 | # TODO: The actual time number depends on local set by the user 26 | assert time_diffs == expected_diffs 27 | 28 | 29 | def test_filter_temporal_edges(temporal_network_object): 30 | t = temporal_network_object 31 | 32 | def filter_func(v, w, time): 33 | return time % 2 == 0 34 | 35 | filtered = t.filter_edges(filter_func) 36 | times = filtered.ordered_times 37 | expected = [2, 4, 6, 8, 10, 12, 14, 16, 18, 20] 38 | assert times == expected 39 | 40 | 41 | def test_get_interpath_times(temporal_network_object): 42 | t = temporal_network_object 43 | inter_time = dict(t.inter_path_times()) 44 | expected = {'e': [1, 1, 1, 1, 1, 1, 1, 1, 1, 1], 45 | 'b': [4, 3], 'f': [9, 5, 1] 46 | } 47 | assert inter_time == expected 48 | 49 | 50 | def test_shuffle_edges(temporal_network_object): 51 | t = temporal_network_object 52 | 53 | np.random.seed(90) 54 | t1 = t.shuffle_edges(with_replacement=True) 55 | times1 = len(t1.tedges) 56 | expected1 = len(t.tedges) 57 | assert times1 == expected1 58 | 59 | np.random.seed(90) 60 | t2 = t.shuffle_edges(l=4, with_replacement=False) 61 | edges2 = len(t2.tedges) 62 | expected2 = 4 63 | assert edges2 == expected2 64 | 65 | 66 | def test_inter_event_times(temporal_network_object): 67 | time_diffs = temporal_network_object.inter_event_times() 68 | # all time differences are 1 69 | assert (time_diffs == 1).all() 70 | 71 | 72 | def test_inter_path_times(temporal_network_object): 73 | t = temporal_network_object 74 | path_times = dict(t.inter_path_times()) 75 | expected = {'f': [9, 5, 1], 76 | 'e': [1, 1, 1, 1, 1, 1, 1, 1, 1, 1], 77 | 'b': [4, 3]} 78 | assert path_times == expected 79 | 80 | 81 | def test_temporal_summary(temporal_network_object): 82 | print(temporal_network_object) 83 | 84 | 85 | def test_export_tikz_unfolded_network(temporal_network_object, tmpdir): 86 | t = temporal_network_object # type: pp.TemporalNetwork 87 | file_path = str(tmpdir.mkdir("sub").join("multi_order_state")) 88 | pp.visualisation.export_tikz(t, file_path) 89 | 90 | 91 | def test_from_sqlite_int(test_data_directory, ): 92 | file_path = os.path.join(test_data_directory, 'test_tempnets.db') 93 | con = sqlite3.connect(file_path) 94 | con.row_factory = sqlite3.Row 95 | cursor = con.execute('SELECT source, target, time FROM example_int') 96 | 97 | t = pp.TemporalNetwork.from_sqlite(cursor) 98 | times = t.ordered_times 99 | expected_times = [0, 2, 4, 5, 6, 8] 100 | assert times == expected_times 101 | 102 | activities = sorted(list(t.activities.values())) 103 | expected_activities = [[], [], [], [], [0, 2, 5], [2], [4], [6], [8]] 104 | assert expected_activities == activities 105 | 106 | 107 | def test_from_sqlite_timestamps(test_data_directory, ): 108 | file_path = os.path.join(test_data_directory, 'test_tempnets.db') 109 | con = sqlite3.connect(file_path) 110 | con.row_factory = sqlite3.Row 111 | cursor = con.execute('SELECT source, target, time FROM example_timestamp') 112 | t = pp.TemporalNetwork.from_sqlite(cursor, timestamp_format="%Y-%m-%d %H:%M") 113 | times = t.ordered_times 114 | time_diffs = [j - i for i, j in zip(times[:-1], times[1:])] 115 | expected_diffs = [10800, 15060, 264960] 116 | # TODO: The actual time number depends on local set by the user 117 | assert time_diffs == expected_diffs 118 | 119 | 120 | def test_write_html(temporal_network_object, tmpdir): 121 | file_path = str(tmpdir.mkdir("sub").join("d3_temp.html")) 122 | t = temporal_network_object 123 | pp.visualisation.export_html(t, file_path) 124 | 125 | 126 | @mark.latex 127 | @mark.parametrize('is_dag', (False, True)) 128 | @mark.parametrize('split_dir', (False, True)) 129 | def test_write_tikz(temporal_network_object, tmpdir, is_dag, split_dir): 130 | dir_path = tmpdir 131 | file_path = str(dir_path.join("temp.tikz")) 132 | print(file_path) 133 | t = temporal_network_object 134 | t.write_tikz(file_path, dag=is_dag, split_directions=split_dir) 135 | 136 | cmd = "cd {}; pdflatex " \ 137 | " -interaction nonstopmode {} > /dev/null".format(str(dir_path), file_path) 138 | exit_code = os.system(cmd) 139 | print(dir_path) 140 | assert exit_code == 0 141 | -------------------------------------------------------------------------------- /tests/test_centralities.py: -------------------------------------------------------------------------------- 1 | import pathpy as pp 2 | import pytest 3 | 4 | # absolute eigenvalue difference tolerance 5 | EIGEN_ABS_TOL = 1e-2 6 | 7 | 8 | @pytest.mark.parametrize('k, e_sum, e_var', ( 9 | (3, 27.5833333, 0.0085720486), 10 | (2, 55.0, 0.046875), 11 | (1, 55, 0.046875), 12 | )) 13 | def test_closeness_centrality_hon(random_paths, k, e_sum, e_var): 14 | import numpy as np 15 | p = random_paths(50, 0, 8) 16 | hon = pp.HigherOrderNetwork(p, k=k) 17 | closeness = pp.algorithms.centralities.closeness(hon) 18 | np_closeness = np.array(list(closeness.values())) 19 | assert np_closeness.sum() == pytest.approx(e_sum) 20 | assert np_closeness.var() == pytest.approx(e_var) 21 | 22 | 23 | @pytest.mark.parametrize('k, norm, e_sum, e_var, e_max', ( 24 | (2, False, 3.0, 0.296875, 1.5), 25 | (1, False, 2.0, 0.00694444, 0.333333333), 26 | (2, True, 2.0, 0.1319444444, 1), 27 | )) 28 | def test_betweenness_centrality_hon(random_paths, norm, k, e_sum, e_var, e_max): 29 | import numpy as np 30 | p = random_paths(50, 0, 8) 31 | hon = pp.HigherOrderNetwork(p, k=k) 32 | betweenness = pp.algorithms.centralities.betweenness(hon, normalized=norm) 33 | values = np.array(list(betweenness.values())) 34 | assert values.sum() == pytest.approx(e_sum) 35 | assert max(values) == pytest.approx(e_max) 36 | assert values.var() == pytest.approx(e_var) 37 | 38 | 39 | @pytest.mark.xfail 40 | @pytest.mark.parametrize('k, sub, projection, e_sum, e_var', ( 41 | (1, True, 'scaled', 2.823103290, 0.0004701220779), 42 | (1, False, 'scaled', 2.82310329017, 0.00047012207), 43 | (2, False, 'all', 2.030946758666, 0.0168478112), 44 | (2, True, 'all', 2.030946758, 0.0168478112489), 45 | (2, False, 'last', 1.7463870380802424, 0.0077742413305), 46 | (2, False, 'first', 1.7461339874793731, 0.0083696967427), 47 | (2, True, 'last', 1.746387038080242, 0.007774241), 48 | (2, True, 'first', 1.7461339874793727, 0.0083696967427313), 49 | )) 50 | def test_eigen_centrality_hon(random_paths, sub, projection, k, e_sum, e_var): 51 | import numpy as np 52 | p = random_paths(50, 0, 8) 53 | hon = pp.HigherOrderNetwork(p, k=k) 54 | eigen = pp.algorithms.centralities.eigenvector(hon, projection, sub) 55 | values = np.array(list(eigen.values())) 56 | assert values.sum() == pytest.approx(e_sum, abs=EIGEN_ABS_TOL) 57 | assert values.var() == pytest.approx(e_var, abs=EIGEN_ABS_TOL) 58 | 59 | 60 | @pytest.mark.parametrize('k, sub, proj, e_sum, e_var', ( 61 | (2, False, 'all', 1, 0.000399240558236), 62 | (1, False, 'scaled', 1, 6.111199022e-05), 63 | (2, False, 'scaled', 1, 0.00039924055823), 64 | (2, False, 'last', 1, 0.00045826544), 65 | (2, False, 'first', 1, 0.000345796913), 66 | (2, True, 'all', 1, 0.000399240558), 67 | (1, True, 'scaled', 1, 6.111199022e-05), 68 | (2, True, 'scaled', 1, 0.000399240558236666), 69 | (2, True, 'last', 1, 0.000458265), 70 | (2, True, 'first', 1, 0.0003457969), 71 | )) 72 | def test_pagerank_centrality_hon(random_paths, sub, proj, k, e_sum, e_var): 73 | import numpy as np 74 | p = random_paths(50, 0, 8) 75 | hon = pp.HigherOrderNetwork(p, k=k) 76 | page = pp.algorithms.centralities.pagerank(hon, include_sub_paths=sub, projection=proj) 77 | values = np.array(list(page.values())) 78 | assert values.sum() == pytest.approx(e_sum) 79 | assert values.var() == pytest.approx(e_var) 80 | 81 | 82 | def test_betweenness_centrality_paths(path_from_ngram_file): 83 | p = path_from_ngram_file 84 | betweenness_centrality = pp.algorithms.centralities.betweenness(p, normalized=False) 85 | betweenness = {n: c for n, c in betweenness_centrality.items()} 86 | expected = {'b': 2.0, 'a': 3.0, 'e': 0, 'c': 3.0, 'd': 5.0} 87 | assert betweenness == expected 88 | 89 | 90 | def test_betweenness_centrality_paths_norm(path_from_ngram_file): 91 | p = path_from_ngram_file 92 | betweenness_centrality = pp.algorithms.centralities.betweenness(p, normalized=True) 93 | betweenness = max(c for c in betweenness_centrality.values()) 94 | expected_norm_max = 1 95 | assert pytest.approx(betweenness) == expected_norm_max 96 | 97 | 98 | def test_closeness_centrality_paths(path_from_ngram_file): 99 | p = path_from_ngram_file 100 | closeness_centrality = pp.algorithms.centralities.closeness(p, normalized=False) 101 | closeness_sum = sum(c for c in closeness_centrality.values()) 102 | expected_sum = 9.833333333333332 103 | assert closeness_sum == pytest.approx(expected_sum) 104 | 105 | nodes = {n for n in closeness_centrality} 106 | expected_nodes = {'a', 'b', 'c', 'd', 'e'} 107 | assert nodes == expected_nodes 108 | 109 | 110 | def test_closeness_centrality_paths_norm(path_from_ngram_file): 111 | p = path_from_ngram_file 112 | closeness_centrality = pp.algorithms.centralities.closeness(p, normalized=True) 113 | closeness_max = max(c for c in closeness_centrality.values()) 114 | expected_max = 1 115 | assert closeness_max == pytest.approx(expected_max) 116 | 117 | 118 | def test_visitation_probabilities(path_from_ngram_file): 119 | p = path_from_ngram_file 120 | v_prob = pp.algorithms.centralities.visitation_probabilities(p) 121 | prob_sum = sum(p for p in v_prob.values()) 122 | assert prob_sum == pytest.approx(1) 123 | 124 | max_prob = max(p for p in v_prob.values()) 125 | expected_max = 0.3125 126 | assert max_prob == pytest.approx(expected_max) 127 | 128 | -------------------------------------------------------------------------------- /tests/test_data/edge_frequency.edge: -------------------------------------------------------------------------------- 1 | 1,2,45,3,3,3 2 | 1,3,2,23,2,2 3 | 1,5,5,12,5,2 4 | 3,5,2,11,45,2 5 | 5,3,4,12,2,2 6 | 5,2,1,12,4,1 7 | -------------------------------------------------------------------------------- /tests/test_data/example_int.tedges: -------------------------------------------------------------------------------- 1 | source,target,time 2 | 1,2,0 3 | 1,2,2 4 | 1,3,5 5 | 3,2,6 6 | 2,1,8 7 | 4,5,2 8 | 5,3,4 -------------------------------------------------------------------------------- /tests/test_data/example_network.edges: -------------------------------------------------------------------------------- 1 | A,B 2 | A,C 3 | B,C 4 | B,D 5 | B,F 6 | C,G 7 | C,E 8 | D,F 9 | E,G 10 | F,H 11 | G,H -------------------------------------------------------------------------------- /tests/test_data/example_origin_destination.csv: -------------------------------------------------------------------------------- 1 | A,H,5 2 | B,E,3 3 | D,E,7 4 | A,F,3 -------------------------------------------------------------------------------- /tests/test_data/example_timestamp.tedges: -------------------------------------------------------------------------------- 1 | source,target,time 2 | 1,4,2000-03-04 12:45 3 | 2,4,2000-03-04 15:45 4 | 5,2,2000-03-04 19:56 5 | 8,2,2000-03-07 21:32 -------------------------------------------------------------------------------- /tests/test_data/ngram_simple.ngram: -------------------------------------------------------------------------------- 1 | a,b,c,d,a,b,2 2 | d,e,d,a,b,4 3 | -------------------------------------------------------------------------------- /tests/test_data/test_tempnets.db: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/uzhdag/pathpy/1ddaf7d710aa3b5ed119de4f810c7c919eda3241/tests/test_data/test_tempnets.db -------------------------------------------------------------------------------- /tests/test_estimation.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | """ 3 | Created on Fri Feb 20 11:59:22 2015 4 | @author: Ingo Scholtes 5 | 6 | (c) Copyright ETH Zurich, Chair of Systems Design, 2015-2017 7 | """ 8 | 9 | import pathpy as pp 10 | import numpy as _np 11 | import pytest 12 | # mark to be used as decorator on slow functions such that they are only run 13 | # when explicitly called with `$ pytest --runslow` 14 | slow = pytest.mark.slow 15 | 16 | 17 | 18 | @pytest.mark.parametrize('method', ('BIC', 'AIC')) 19 | def test_markov_sequence(method): 20 | _np.random.seed(90) 21 | x = list(map(str, _np.random.choice(range(10), 1000))) 22 | ms = pp.MarkovSequence(x) 23 | detected_order = ms.estimate_order(maxOrder=4, method=method) 24 | assert detected_order == 1, \ 25 | "Error, wrongly detected higher-order correlations" 26 | 27 | 28 | def test_estimate_order_strongly_connected(): 29 | """ 30 | Example with single strongly connected component in first- 31 | and two connected components in second-order network 32 | """ 33 | paths = pp.Paths() 34 | 35 | ngram_list = ['a,b,c', 'b,c,b', 'c,b,a', 36 | 'b,a,b', 'e,b,f', 'b,f,b', 37 | 'f,b,e', 'b,e,b'] 38 | 39 | for ngram in ngram_list: 40 | paths.add_path(ngram) 41 | 42 | g1 = pp.HigherOrderNetwork(paths, k=1) 43 | pp.algorithms.components.reduce_to_gcc(g1) 44 | assert g1.ncount() == 5, "Error, wrong number of nodes in first-order network" 45 | assert g1.ecount() == 8, "Error, wrong number of links in first-order network" 46 | 47 | g2 = pp.HigherOrderNetwork(paths, k=2) 48 | pp.algorithms.components.reduce_to_gcc(g2) 49 | assert g2.ncount() == 4, "Error, wrong number of nodes in second-order network" 50 | assert g2.ecount() == 4, "Error, wrong number of links in second-order network" 51 | 52 | # test mapping of higher-order nodes and paths 53 | assert g2.higher_order_node_to_path('a,b') == ('a', 'b'), \ 54 | "Error: mapping from higher-order node to first-order path failed" 55 | assert g2.higher_order_path_to_first_order(('a,b', 'b,c')) == ('a', 'b', 'c'), \ 56 | "Error: mapping from higher-order path to first-order path failed" 57 | 58 | 59 | def test_temp_net_extraction(temporal_network_object): 60 | t = temporal_network_object 61 | paths = pp.path_extraction.paths_from_temporal_network(t, delta=1) 62 | 63 | assert paths.observation_count == 10, \ 64 | "Extracted wrong number of time-respecting paths" 65 | 66 | 67 | def test_betweenness_preference_empty(): 68 | t = pp.TemporalNetwork() 69 | paths = pp.path_extraction.paths_from_temporal_network(t, delta=3) 70 | assert len(paths.nodes) == 0 71 | 72 | betweenness_pref = pp.algorithms.path_measures.betweenness_preference(paths, 'e', method='MLE') 73 | expected = 0.0 74 | assert betweenness_pref == pytest.approx(expected) 75 | 76 | 77 | def test_betweenness_preference_mle(temporal_network_object): 78 | t = temporal_network_object 79 | 80 | # Extract (time-respecting) paths 81 | p = pp.path_extraction.paths_from_temporal_network(t, delta=1) 82 | betweenness_pref = pp.algorithms.path_measures.betweenness_preference(p, 'e', method='MLE') 83 | expected = 1.2954618442383219 84 | assert betweenness_pref == pytest.approx(expected) 85 | 86 | 87 | def test_betweenness_preference_miller(temporal_network_object): 88 | t = temporal_network_object 89 | p = pp.path_extraction.paths_from_temporal_network(t, delta=1) 90 | 91 | betweenness_pref = pp.algorithms.path_measures.betweenness_preference(p, 'e', method='Miller') 92 | expected = 0.99546184423832196 93 | assert betweenness_pref == pytest.approx(expected) 94 | 95 | 96 | def test_betweenness_preference_normalized(temporal_network_object): 97 | t = temporal_network_object 98 | p = pp.path_extraction.paths_from_temporal_network(t, delta=1) 99 | # test normalize 100 | betweenness_pref = pp.algorithms.path_measures.betweenness_preference(p, 'e', normalized=True) 101 | expected_norm = 1 102 | assert betweenness_pref == pytest.approx(expected_norm) 103 | 104 | 105 | def test_slow_down_factor_random(random_paths): 106 | paths = random_paths(90, 90) 107 | slow_down_factor = pp.algorithms.path_measures.slow_down_factor(paths) 108 | expected = 4.05 109 | assert slow_down_factor == pytest.approx(expected, rel=1e-2), \ 110 | "Got slowdown factor %f but expected %f +- 1e-2" % (slow_down_factor, expected) 111 | 112 | 113 | def test_get_distance_matrix_temporal(temporal_network_object): 114 | p = pp.path_extraction.paths_from_temporal_network(temporal_network_object) 115 | shortest_paths_dict = pp.algorithms.shortest_paths.distance_matrix(p) 116 | 117 | path_distances = dict() 118 | for k in shortest_paths_dict: 119 | for p in shortest_paths_dict[k]: 120 | path_distances[(k, p)] = shortest_paths_dict[k][p] 121 | 122 | expected_distances = { 123 | ('c', 'e'): 1, 124 | ('c', 'f'): 2, 125 | ('c', 'c'): 0, 126 | ('b', 'g'): 2, 127 | ('f', 'e'): 1, 128 | ('c', 'b'): 4, 129 | ('a', 'a'): 0, 130 | ('a', 'g'): 2, 131 | ('g', 'g'): 0, 132 | ('e', 'g'): 1, 133 | ('e', 'e'): 0, 134 | ('b', 'b'): 0, 135 | ('e', 'b'): 1, 136 | ('e', 'f'): 1, 137 | ('f', 'b'): 2, 138 | ('a', 'e'): 1, 139 | ('f', 'f'): 0, 140 | ('b', 'e'): 1 141 | } 142 | assert path_distances == expected_distances 143 | 144 | 145 | def test_get_distance_matrix_empty(): 146 | p = pp.Paths() 147 | shortest_paths_dict = pp.algorithms.shortest_paths.distance_matrix(p) 148 | assert len(shortest_paths_dict) == 0 149 | 150 | @slow 151 | def test_entropy_growth_rate_ratio_mle(random_paths): 152 | p = random_paths(100, 500) 153 | mle_ratio = pp.algorithms.path_measures.entropy_growth_rate_ratio(p, method="MLE") 154 | mle_expected = 0.10515408343772015 155 | assert mle_ratio == pytest.approx(mle_expected) 156 | 157 | 158 | @slow 159 | def test_entropy_growth_rate_ratio_miller(random_paths): 160 | p = random_paths(100, 500) 161 | miller_ratio = pp.algorithms.path_measures.entropy_growth_rate_ratio(p, method="Miller") 162 | miller_expected = 0.88685603746914599 163 | assert miller_ratio == pytest.approx(miller_expected) 164 | -------------------------------------------------------------------------------- /tests/test_random_graphs.py: -------------------------------------------------------------------------------- 1 | import pathpy as pp 2 | 3 | import pytest 4 | import numpy as np 5 | 6 | 7 | def test_is_graphic_sequence(): 8 | 9 | sequence = [2, 2, 90] 10 | assert pp.algorithms.random_graphs.is_graphic_sequence(sequence) is False, \ 11 | 'Wrongly detected graphic sequence' 12 | 13 | sequence = [1, 1] 14 | assert pp.algorithms.random_graphs.is_graphic_sequence(sequence), \ 15 | 'Wrongly rejected graphic sequence' 16 | 17 | sequence = [1, 2, 3] 18 | assert pp.algorithms.random_graphs.is_graphic_sequence(sequence) is False, \ 19 | 'Wrongly detected graphic sequence' 20 | 21 | sequence = [2, 2] 22 | assert pp.algorithms.random_graphs.is_graphic_sequence(sequence, self_loops=True), \ 23 | 'Wrongly rejected graphic sequence' 24 | 25 | sequence = [2] 26 | assert pp.algorithms.random_graphs.is_graphic_sequence(sequence, self_loops=True), \ 27 | 'Wrongly rejected graphic sequence' 28 | 29 | sequence = [2] 30 | assert pp.algorithms.random_graphs.is_graphic_sequence(sequence, self_loops=True, 31 | multi_edges=True), \ 32 | 'Wrongly rejected graphic sequence' 33 | 34 | sequence = [2] 35 | assert pp.algorithms.random_graphs.is_graphic_sequence(sequence, self_loops=False) is False, \ 36 | 'Wrongly detected graphic sequence' 37 | 38 | sequence = [3, 3] 39 | assert pp.algorithms.random_graphs.is_graphic_sequence(sequence, multi_edges=True, 40 | self_loops=True), \ 41 | 'Wrongly rejected graphic sequence' 42 | 43 | sequence = [1, 3] 44 | assert pp.algorithms.random_graphs.is_graphic_sequence(sequence, self_loops=True), \ 45 | 'Wrongly rejected graphic sequence' 46 | 47 | sequence = [1, 2] 48 | assert pp.algorithms.random_graphs.is_graphic_sequence(sequence, self_loops=True) is False, \ 49 | 'Wrongly detected graphic sequence' 50 | 51 | for i in range(10): 52 | g = pp.algorithms.random_graphs.erdoes_renyi_gnp(n=100, p=0.03, self_loops=False) 53 | assert pp.algorithms.random_graphs.is_graphic_sequence([x for x in g.degrees() if x > 0]), \ 54 | 'Wrongly rejected degree sequence of randomly generated graph' 55 | 56 | for i in range(10): 57 | g = pp.algorithms.random_graphs.erdoes_renyi_gnp(n=100, p=0.03, self_loops=True) 58 | # HACK: correct degrees for self_loops. Need to consistently define degrees of self-loops as two in pathpy! 59 | for e in g.edges: 60 | if e[0] == e[1]: 61 | g.nodes[e[0]]['degree'] += 1 62 | assert pp.algorithms.random_graphs.is_graphic_sequence([x for x in g.degrees() if x > 0], 63 | self_loops=True), \ 64 | 'Wrongly rejected degree sequence of randomly generated graph' 65 | -------------------------------------------------------------------------------- /tests/test_spectral.py: -------------------------------------------------------------------------------- 1 | import pathpy as pp 2 | import pytest 3 | 4 | # absolute eigenvalue difference tolerance 5 | EIGEN_ABS_TOL = 1e-2 6 | 7 | 8 | @pytest.mark.parametrize('k, sub, e_gap', ( 9 | (2, False, 1e-9), 10 | (1, False, 1e-5), 11 | (2, True, 1), 12 | )) 13 | def test_eigen_value_gap(random_paths, k, sub, e_gap): 14 | import numpy as np 15 | p = random_paths(200, 0, 40) 16 | hon = pp.HigherOrderNetwork(p, k=k) 17 | np.random.seed(0) 18 | eigen_gap = pp.algorithms.spectral.eigenvalue_gap(hon, include_sub_paths=sub, lanczos_vectors=90) 19 | assert eigen_gap 20 | 21 | 22 | @pytest.mark.xfail 23 | @pytest.mark.parametrize('k, norm, e_sum, e_var', ( 24 | (3, True, 1, 0.0036494914419765924), 25 | (2, False, 2765.72998141474, 8.661474971012986), 26 | (1, True, 1, 0.04948386659908706), 27 | )) 28 | def test_fiedler_vector_sparse(random_paths, k, norm, e_sum, e_var): 29 | import numpy as np 30 | p = random_paths(90, 0, 20) 31 | hon = pp.HigherOrderNetwork(p, k=k) 32 | fv = pp.algorithms.spectral.fiedler_vector_sparse(hon, normalized=norm) 33 | assert fv.var() == pytest.approx(e_var, abs=EIGEN_ABS_TOL) 34 | assert np.sum(fv) == pytest.approx(e_sum, abs=EIGEN_ABS_TOL) 35 | 36 | 37 | @pytest.mark.xfail 38 | @pytest.mark.parametrize('k, e_sum, e_var', ( 39 | (3, 1, 0.003649586067168485), 40 | (2, (1.0000000000000002+0j), 0.0031136096467386416), 41 | (1, (-0.0009514819500764382+0.1190367717310192j), 0.049999999999999996), 42 | )) 43 | def test_fiedler_vector_dense(random_paths, k, e_sum, e_var): 44 | import numpy as np 45 | p = random_paths(90, 0, 20) 46 | hon = pp.HigherOrderNetwork(p, k=k) 47 | fv = pp.algorithms.spectral.fiedler_vector_dense(hon) 48 | assert fv.var() == pytest.approx(e_var, abs=EIGEN_ABS_TOL) 49 | assert np.sum(fv) == pytest.approx(e_sum, abs=EIGEN_ABS_TOL) 50 | 51 | 52 | @pytest.mark.xfail 53 | @pytest.mark.parametrize('k, e_sum', ( 54 | (3, 0.9967398214809227), 55 | (2, 0.24345712528855065), 56 | (1, 0.7143571081268268), 57 | )) 58 | def test_algebraic_connectivity(random_paths, k, e_sum): 59 | import pathpy 60 | p = random_paths(120, 0, 40) 61 | hon = pp.HigherOrderNetwork(p, k=k) 62 | ac = pp.algorithms.spectral.algebraic_connectivity(hon, lanczos_vectors=60, maxiter=40) 63 | assert ac == pytest.approx(e_sum, rel=1e-7) 64 | --------------------------------------------------------------------------------