├── .gitignore ├── LICENSE ├── Makefile ├── Pipfile ├── Pipfile.lock ├── README.md ├── analysis ├── __init__.py ├── ami.py ├── citation-network-stats.py ├── plot.py ├── stats.py └── wjaccard.py ├── create-representations ├── Cargo.lock ├── Cargo.toml ├── README.md └── src │ ├── bipartite.rs │ ├── config.rs │ ├── hyperedge_similarity.rs │ ├── hypergraph.rs │ ├── js_similarity.rs │ ├── main.rs │ ├── multilayer.rs │ ├── network.rs │ ├── preprocess.rs │ ├── representation.rs │ └── unipartite.rs ├── data ├── citations.txt ├── example-paper.txt ├── example.txt ├── figure-1.txt ├── hyperedge-names.csv ├── minimal.txt ├── networks-beyond-pairwise-interactions-references.tex ├── paleo-1-77.txt └── references-weighted.txt ├── hypergraph ├── __init__.py ├── __main__.py ├── components.py ├── main.py ├── network │ ├── __init__.py │ ├── hypergraph.py │ ├── network.py │ └── tree.py ├── optimize_weights.py ├── representation │ ├── __init__.py │ ├── bipartite.py │ ├── multilayer.py │ └── unipartite.py └── transition.py ├── notebooks ├── hypergraph.ipynb └── paleo data.ipynb ├── output └── .gitkeep └── references ├── __init__.py ├── __main__.py ├── get_citations.py ├── parse_references.py └── write_hypergraph.py /.gitignore: -------------------------------------------------------------------------------- 1 | 2 | # Created by https://www.toptal.com/developers/gitignore/api/pycharm+all,python,jupyternotebooks 3 | # Edit at https://www.toptal.com/developers/gitignore?templates=pycharm+all,python,jupyternotebooks 4 | 5 | ### JupyterNotebooks ### 6 | # gitignore template for Jupyter Notebooks 7 | # website: http://jupyter.org/ 8 | 9 | .ipynb_checkpoints 10 | */.ipynb_checkpoints/* 11 | 12 | # IPython 13 | profile_default/ 14 | ipython_config.py 15 | 16 | # Remove previous ipynb_checkpoints 17 | # git rm -r .ipynb_checkpoints/ 18 | 19 | ### PyCharm+all ### 20 | # Covers JetBrains IDEs: IntelliJ, RubyMine, PhpStorm, AppCode, PyCharm, CLion, Android Studio, WebStorm and Rider 21 | # Reference: https://intellij-support.jetbrains.com/hc/en-us/articles/206544839 22 | 23 | # User-specific stuff 24 | .idea/**/workspace.xml 25 | .idea/**/tasks.xml 26 | .idea/**/usage.statistics.xml 27 | .idea/**/dictionaries 28 | .idea/**/shelf 29 | 30 | # Generated files 31 | .idea/**/contentModel.xml 32 | 33 | # Sensitive or high-churn files 34 | .idea/**/dataSources/ 35 | .idea/**/dataSources.ids 36 | .idea/**/dataSources.local.xml 37 | .idea/**/sqlDataSources.xml 38 | .idea/**/dynamic.xml 39 | .idea/**/uiDesigner.xml 40 | .idea/**/dbnavigator.xml 41 | 42 | # Gradle 43 | .idea/**/gradle.xml 44 | .idea/**/libraries 45 | 46 | # Gradle and Maven with auto-import 47 | # When using Gradle or Maven with auto-import, you should exclude module files, 48 | # since they will be recreated, and may cause churn. Uncomment if using 49 | # auto-import. 50 | # .idea/artifacts 51 | # .idea/compiler.xml 52 | # .idea/jarRepositories.xml 53 | # .idea/modules.xml 54 | # .idea/*.iml 55 | # .idea/modules 56 | # *.iml 57 | # *.ipr 58 | 59 | # CMake 60 | cmake-build-*/ 61 | 62 | # Mongo Explorer plugin 63 | .idea/**/mongoSettings.xml 64 | 65 | # File-based project format 66 | *.iws 67 | 68 | # IntelliJ 69 | out/ 70 | 71 | # mpeltonen/sbt-idea plugin 72 | .idea_modules/ 73 | 74 | # JIRA plugin 75 | atlassian-ide-plugin.xml 76 | 77 | # Cursive Clojure plugin 78 | .idea/replstate.xml 79 | 80 | # Crashlytics plugin (for Android Studio and IntelliJ) 81 | com_crashlytics_export_strings.xml 82 | crashlytics.properties 83 | crashlytics-build.properties 84 | fabric.properties 85 | 86 | # Editor-based Rest Client 87 | .idea/httpRequests 88 | 89 | # Android studio 3.1+ serialized cache file 90 | .idea/caches/build_file_checksums.ser 91 | 92 | ### PyCharm+all Patch ### 93 | # Ignores the whole .idea folder and all .iml files 94 | # See https://github.com/joeblau/gitignore.io/issues/186 and https://github.com/joeblau/gitignore.io/issues/360 95 | 96 | .idea/ 97 | 98 | # Reason: https://github.com/joeblau/gitignore.io/issues/186#issuecomment-249601023 99 | 100 | *.iml 101 | modules.xml 102 | .idea/misc.xml 103 | *.ipr 104 | 105 | # Sonarlint plugin 106 | .idea/sonarlint 107 | 108 | ### Python ### 109 | # Byte-compiled / optimized / DLL files 110 | __pycache__/ 111 | *.py[cod] 112 | *$py.class 113 | 114 | # C extensions 115 | *.so 116 | 117 | # Distribution / packaging 118 | .Python 119 | build/ 120 | develop-eggs/ 121 | dist/ 122 | downloads/ 123 | eggs/ 124 | .eggs/ 125 | lib/ 126 | lib64/ 127 | parts/ 128 | sdist/ 129 | var/ 130 | wheels/ 131 | pip-wheel-metadata/ 132 | share/python-wheels/ 133 | *.egg-info/ 134 | .installed.cfg 135 | *.egg 136 | MANIFEST 137 | 138 | # PyInstaller 139 | # Usually these files are written by a python script from a template 140 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 141 | *.manifest 142 | *.spec 143 | 144 | # Installer logs 145 | pip-log.txt 146 | pip-delete-this-directory.txt 147 | 148 | # Unit test / coverage reports 149 | htmlcov/ 150 | .tox/ 151 | .nox/ 152 | .coverage 153 | .coverage.* 154 | .cache 155 | nosetests.xml 156 | coverage.xml 157 | *.cover 158 | *.py,cover 159 | .hypothesis/ 160 | .pytest_cache/ 161 | pytestdebug.log 162 | 163 | # Translations 164 | *.mo 165 | *.pot 166 | 167 | # Django stuff: 168 | *.log 169 | local_settings.py 170 | db.sqlite3 171 | db.sqlite3-journal 172 | 173 | # Flask stuff: 174 | instance/ 175 | .webassets-cache 176 | 177 | # Scrapy stuff: 178 | .scrapy 179 | 180 | # Sphinx documentation 181 | docs/_build/ 182 | doc/_build/ 183 | 184 | # PyBuilder 185 | target/ 186 | 187 | # Jupyter Notebook 188 | 189 | # IPython 190 | 191 | # pyenv 192 | .python-version 193 | 194 | # pipenv 195 | # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. 196 | # However, in case of collaboration, if having platform-specific dependencies or dependencies 197 | # having no cross-platform support, pipenv may install dependencies that don't work, or not 198 | # install all needed dependencies. 199 | #Pipfile.lock 200 | 201 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow 202 | __pypackages__/ 203 | 204 | # Celery stuff 205 | celerybeat-schedule 206 | celerybeat.pid 207 | 208 | # SageMath parsed files 209 | *.sage.py 210 | 211 | # Environments 212 | .env 213 | .venv 214 | env/ 215 | venv/ 216 | ENV/ 217 | env.bak/ 218 | venv.bak/ 219 | 220 | # Spyder project settings 221 | .spyderproject 222 | .spyproject 223 | 224 | # Rope project settings 225 | .ropeproject 226 | 227 | # mkdocs documentation 228 | /site 229 | 230 | # mypy 231 | .mypy_cache/ 232 | .dmypy.json 233 | dmypy.json 234 | 235 | # Pyre type checker 236 | .pyre/ 237 | 238 | # pytype static type analyzer 239 | .pytype/ 240 | 241 | # End of https://www.toptal.com/developers/gitignore/api/pycharm+all,python,jupyternotebooks 242 | 243 | *.ftree 244 | output 245 | results 246 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2021 Anton Eriksson 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /Makefile: -------------------------------------------------------------------------------- 1 | OUTDIR := output 2 | DATA := data 3 | ARGS = $(INPUT) $(OUTDIR) 4 | 5 | # EXAMPLE 6 | .PHONY: example 7 | 8 | example: INPUT := $(DATA)/example.txt 9 | example: FLAGS := -w --teleportation-probability 0 $(ARGS) 10 | example: clean all_representations 11 | 12 | # MINIMAL EXAMPLE 13 | .PHONY: minimal 14 | 15 | minimal: INPUT := $(DATA)/minimal.txt 16 | minimal: FLAGS := -w --teleportation-probability 0.15 $(ARGS) 17 | minimal: clean weighted_representations 18 | 19 | # EXAMPLE FOR PAPER 20 | .PHONY: example_for_paper 21 | 22 | example_for_paper: INPUT := $(DATA)/example-paper.txt 23 | example_for_paper: FLAGS := -w -2 --teleportation-probability 0.15 $(ARGS) 24 | example_for_paper: clean weighted_representations 25 | 26 | # REFERENCES 27 | .PHONY: references references_weighted 28 | 29 | REFS := $(DATA)/references.txt 30 | REFS_WEIGHTED := $(DATA)/references-weighted.txt 31 | TEX_FILE := $(DATA)/networks-beyond-pairwise-interactions-references.tex 32 | 33 | $(REFS): 34 | python -m references --omega log-citations $(TEX_FILE) $(REFS) 35 | 36 | $(REFS_WEIGHTED): 37 | python -m references --omega log-citations --gamma-weighted $(TEX_FILE) $(REFS_WEIGHTED) 38 | 39 | references_weighted: INPUT := $(REFS_WEIGHTED) 40 | references_weighted: 41 | @$(MAKE) clean 42 | @$(MAKE) $(REFS_WEIGHTED) 43 | @$(MAKE) weighted_representations FLAGS="--num-trials 100 --largest-cc $(ARGS)" 44 | 45 | 46 | # SEEDS 47 | SEEDS = 1 2 3 4 5 6 7 8 9 10 48 | .PHONY: seeds $(SEEDS) 49 | 50 | seeds: 51 | @$(MAKE) clean 52 | @$(MAKE) $(REFS) 53 | @$(MAKE) $(SEEDS) 54 | 55 | $(SEEDS): INPUT := $(REFS) 56 | $(SEEDS): 57 | @$(MAKE) all_representations FLAGS="--seed $(@) $(ARGS)" 58 | 59 | # REPRESENTATIONS 60 | RUN := python -m hypergraph 61 | 62 | .PHONY: \ 63 | all_representations \ 64 | weighted_representations \ 65 | bipartite \ 66 | bipartite_non_backtracking \ 67 | unipartite_undirected \ 68 | unipartite_undirected_self_links \ 69 | unipartite_directed \ 70 | unipartite_directed_self_links \ 71 | multilayer \ 72 | multilayer_self_links 73 | 74 | weighted_representations: \ 75 | bipartite \ 76 | bipartite_non_backtracking \ 77 | unipartite_directed \ 78 | unipartite_directed_self_links \ 79 | multilayer \ 80 | multilayer_self_links \ 81 | multilayer_similarity \ 82 | multilayer_similarity_self_links 83 | 84 | all_representations: \ 85 | weighted_representations \ 86 | unipartite_undirected \ 87 | unipartite_undirected_self_links 88 | 89 | bipartite: 90 | $(RUN) -b $(FLAGS) 91 | 92 | bipartite_non_backtracking: 93 | $(RUN) -B $(FLAGS) 94 | 95 | unipartite_undirected: 96 | $(RUN) -u $(FLAGS) 97 | 98 | unipartite_undirected_self_links: 99 | $(RUN) -uk $(FLAGS) 100 | 101 | unipartite_directed: 102 | $(RUN) -U $(FLAGS) 103 | 104 | unipartite_directed_self_links: 105 | $(RUN) -Uk $(FLAGS) 106 | 107 | multilayer: 108 | $(RUN) -m $(FLAGS) 109 | 110 | multilayer_self_links: 111 | $(RUN) -mk $(FLAGS) 112 | 113 | multilayer_similarity: 114 | $(RUN) -M $(FLAGS) 115 | 116 | multilayer_similarity_self_links: 117 | $(RUN) -Mk $(FLAGS) 118 | 119 | # CLEAN 120 | .PHONY: clean 121 | 122 | clean: 123 | $(RM) -r $(OUTDIR)/*.{clu,tree,ftree,net} 124 | $(RM) -r $(OUTDIR)/**/*.{clu,tree,ftree,net} 125 | -------------------------------------------------------------------------------- /Pipfile: -------------------------------------------------------------------------------- 1 | [[source]] 2 | name = "pypi" 3 | url = "https://pypi.org/simple" 4 | verify_ssl = true 5 | 6 | [dev-packages] 7 | 8 | [packages] 9 | sklearn = "*" 10 | seaborn = "*" 11 | scholarly = "*" 12 | numba = "*" 13 | infomap = "1.3.0" 14 | 15 | [requires] 16 | python_version = "3.8" 17 | -------------------------------------------------------------------------------- /Pipfile.lock: -------------------------------------------------------------------------------- 1 | { 2 | "_meta": { 3 | "hash": { 4 | "sha256": "6baec7958eeb07790b59ea5b802f335746cca140f31d7d6b965ba148b56d0bd6" 5 | }, 6 | "pipfile-spec": 6, 7 | "requires": { 8 | "python_version": "3.8" 9 | }, 10 | "sources": [ 11 | { 12 | "name": "pypi", 13 | "url": "https://pypi.org/simple", 14 | "verify_ssl": true 15 | } 16 | ] 17 | }, 18 | "default": { 19 | "alabaster": { 20 | "hashes": [ 21 | "sha256:446438bdcca0e05bd45ea2de1668c1d9b032e1a9154c2c259092d77031ddd359", 22 | "sha256:a661d72d58e6ea8a57f7a86e37d86716863ee5e92788398526d58b26a4e4dc02" 23 | ], 24 | "version": "==0.7.12" 25 | }, 26 | "arrow": { 27 | "hashes": [ 28 | "sha256:e098abbd9af3665aea81bdd6c869e93af4feb078e98468dd351c383af187aac5", 29 | "sha256:ff08d10cda1d36c68657d6ad20d74fbea493d980f8b2d45344e00d6ed2bf6ed4" 30 | ], 31 | "markers": "python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3, 3.4'", 32 | "version": "==0.17.0" 33 | }, 34 | "babel": { 35 | "hashes": [ 36 | "sha256:9d35c22fcc79893c3ecc85ac4a56cde1ecf3f19c540bba0922308a6c06ca6fa5", 37 | "sha256:da031ab54472314f210b0adcff1588ee5d1d1d0ba4dbd07b94dba82bde791e05" 38 | ], 39 | "markers": "python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3'", 40 | "version": "==2.9.0" 41 | }, 42 | "beautifulsoup4": { 43 | "hashes": [ 44 | "sha256:4c98143716ef1cb40bf7f39a8e3eec8f8b009509e74904ba3a7b315431577e35", 45 | "sha256:84729e322ad1d5b4d25f805bfa05b902dd96450f43842c4e99067d5e1369eb25", 46 | "sha256:fff47e031e34ec82bf17e00da8f592fe7de69aeea38be00523c04623c04fb666" 47 | ], 48 | "version": "==4.9.3" 49 | }, 50 | "bibtexparser": { 51 | "hashes": [ 52 | "sha256:0f9ab94e3fc36ee2ee6a3713c5dd7320d4b6ee52bd66ecbab03c6b06675ae410" 53 | ], 54 | "version": "==1.2.0" 55 | }, 56 | "certifi": { 57 | "hashes": [ 58 | "sha256:1a4995114262bffbc2413b159f2a1a480c969de6e6eb13ee966d470af86af59c", 59 | "sha256:719a74fb9e33b9bd44cc7f3a8d94bc35e4049deebe19ba7d8e108280cfd59830" 60 | ], 61 | "version": "==2020.12.5" 62 | }, 63 | "cffi": { 64 | "hashes": [ 65 | "sha256:00a1ba5e2e95684448de9b89888ccd02c98d512064b4cb987d48f4b40aa0421e", 66 | "sha256:00e28066507bfc3fe865a31f325c8391a1ac2916219340f87dfad602c3e48e5d", 67 | "sha256:045d792900a75e8b1e1b0ab6787dd733a8190ffcf80e8c8ceb2fb10a29ff238a", 68 | "sha256:0638c3ae1a0edfb77c6765d487fee624d2b1ee1bdfeffc1f0b58c64d149e7eec", 69 | "sha256:105abaf8a6075dc96c1fe5ae7aae073f4696f2905fde6aeada4c9d2926752362", 70 | "sha256:155136b51fd733fa94e1c2ea5211dcd4c8879869008fc811648f16541bf99668", 71 | "sha256:1a465cbe98a7fd391d47dce4b8f7e5b921e6cd805ef421d04f5f66ba8f06086c", 72 | "sha256:1d2c4994f515e5b485fd6d3a73d05526aa0fcf248eb135996b088d25dfa1865b", 73 | "sha256:2c24d61263f511551f740d1a065eb0212db1dbbbbd241db758f5244281590c06", 74 | "sha256:51a8b381b16ddd370178a65360ebe15fbc1c71cf6f584613a7ea08bfad946698", 75 | "sha256:594234691ac0e9b770aee9fcdb8fa02c22e43e5c619456efd0d6c2bf276f3eb2", 76 | "sha256:5cf4be6c304ad0b6602f5c4e90e2f59b47653ac1ed9c662ed379fe48a8f26b0c", 77 | "sha256:64081b3f8f6f3c3de6191ec89d7dc6c86a8a43911f7ecb422c60e90c70be41c7", 78 | "sha256:6bc25fc545a6b3d57b5f8618e59fc13d3a3a68431e8ca5fd4c13241cd70d0009", 79 | "sha256:798caa2a2384b1cbe8a2a139d80734c9db54f9cc155c99d7cc92441a23871c03", 80 | "sha256:7c6b1dece89874d9541fc974917b631406233ea0440d0bdfbb8e03bf39a49b3b", 81 | "sha256:840793c68105fe031f34d6a086eaea153a0cd5c491cde82a74b420edd0a2b909", 82 | "sha256:8d6603078baf4e11edc4168a514c5ce5b3ba6e3e9c374298cb88437957960a53", 83 | "sha256:9cc46bc107224ff5b6d04369e7c595acb700c3613ad7bcf2e2012f62ece80c35", 84 | "sha256:9f7a31251289b2ab6d4012f6e83e58bc3b96bd151f5b5262467f4bb6b34a7c26", 85 | "sha256:9ffb888f19d54a4d4dfd4b3f29bc2c16aa4972f1c2ab9c4ab09b8ab8685b9c2b", 86 | "sha256:a5ed8c05548b54b998b9498753fb9cadbfd92ee88e884641377d8a8b291bcc01", 87 | "sha256:a7711edca4dcef1a75257b50a2fbfe92a65187c47dab5a0f1b9b332c5919a3fb", 88 | "sha256:af5c59122a011049aad5dd87424b8e65a80e4a6477419c0c1015f73fb5ea0293", 89 | "sha256:b18e0a9ef57d2b41f5c68beefa32317d286c3d6ac0484efd10d6e07491bb95dd", 90 | "sha256:b4e248d1087abf9f4c10f3c398896c87ce82a9856494a7155823eb45a892395d", 91 | "sha256:ba4e9e0ae13fc41c6b23299545e5ef73055213e466bd107953e4a013a5ddd7e3", 92 | "sha256:c6332685306b6417a91b1ff9fae889b3ba65c2292d64bd9245c093b1b284809d", 93 | "sha256:d5ff0621c88ce83a28a10d2ce719b2ee85635e85c515f12bac99a95306da4b2e", 94 | "sha256:d9efd8b7a3ef378dd61a1e77367f1924375befc2eba06168b6ebfa903a5e59ca", 95 | "sha256:df5169c4396adc04f9b0a05f13c074df878b6052430e03f50e68adf3a57aa28d", 96 | "sha256:ebb253464a5d0482b191274f1c8bf00e33f7e0b9c66405fbffc61ed2c839c775", 97 | "sha256:ec80dc47f54e6e9a78181ce05feb71a0353854cc26999db963695f950b5fb375", 98 | "sha256:f032b34669220030f905152045dfa27741ce1a6db3324a5bc0b96b6c7420c87b", 99 | "sha256:f60567825f791c6f8a592f3c6e3bd93dd2934e3f9dac189308426bd76b00ef3b", 100 | "sha256:f803eaa94c2fcda012c047e62bc7a51b0bdabda1cad7a92a522694ea2d76e49f" 101 | ], 102 | "version": "==1.14.4" 103 | }, 104 | "chardet": { 105 | "hashes": [ 106 | "sha256:0d6f53a15db4120f2b08c94f11e7d93d2c911ee118b6b30a04ec3ee8310179fa", 107 | "sha256:f864054d66fd9118f2e67044ac8981a54775ec5b67aed0441892edb553d21da5" 108 | ], 109 | "markers": "python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3, 3.4'", 110 | "version": "==4.0.0" 111 | }, 112 | "cryptography": { 113 | "hashes": [ 114 | "sha256:0003a52a123602e1acee177dc90dd201f9bb1e73f24a070db7d36c588e8f5c7d", 115 | "sha256:0e85aaae861d0485eb5a79d33226dd6248d2a9f133b81532c8f5aae37de10ff7", 116 | "sha256:594a1db4511bc4d960571536abe21b4e5c3003e8750ab8365fafce71c5d86901", 117 | "sha256:69e836c9e5ff4373ce6d3ab311c1a2eed274793083858d3cd4c7d12ce20d5f9c", 118 | "sha256:788a3c9942df5e4371c199d10383f44a105d67d401fb4304178020142f020244", 119 | "sha256:7e177e4bea2de937a584b13645cab32f25e3d96fc0bc4a4cf99c27dc77682be6", 120 | "sha256:83d9d2dfec70364a74f4e7c70ad04d3ca2e6a08b703606993407bf46b97868c5", 121 | "sha256:84ef7a0c10c24a7773163f917f1cb6b4444597efd505a8aed0a22e8c4780f27e", 122 | "sha256:9e21301f7a1e7c03dbea73e8602905a4ebba641547a462b26dd03451e5769e7c", 123 | "sha256:9f6b0492d111b43de5f70052e24c1f0951cb9e6022188ebcb1cc3a3d301469b0", 124 | "sha256:a69bd3c68b98298f490e84519b954335154917eaab52cf582fa2c5c7efc6e812", 125 | "sha256:b4890d5fb9b7a23e3bf8abf5a8a7da8e228f1e97dc96b30b95685df840b6914a", 126 | "sha256:c366df0401d1ec4e548bebe8f91d55ebcc0ec3137900d214dd7aac8427ef3030", 127 | "sha256:dc42f645f8f3a489c3dd416730a514e7a91a59510ddaadc09d04224c098d3302" 128 | ], 129 | "version": "==3.3.1" 130 | }, 131 | "cycler": { 132 | "hashes": [ 133 | "sha256:1d8a5ae1ff6c5cf9b93e8811e581232ad8920aeec647c37316ceac982b08cb2d", 134 | "sha256:cd7b2d1018258d7247a71425e9f26463dfb444d411c39569972f4ce586b0c9d8" 135 | ], 136 | "version": "==0.10.0" 137 | }, 138 | "docutils": { 139 | "hashes": [ 140 | "sha256:0c5b78adfbf7762415433f5515cd5c9e762339e23369dbe8000d84a4bf4ab3af", 141 | "sha256:c2de3a60e9e7d07be26b7f2b00ca0309c207e06c100f9cc2a94931fc75a478fc" 142 | ], 143 | "markers": "python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3, 3.4'", 144 | "version": "==0.16" 145 | }, 146 | "fake-useragent": { 147 | "hashes": [ 148 | "sha256:c104998b750eb097eefc28ae28e92d66397598d2cf41a31aa45d5559ef1adf35" 149 | ], 150 | "version": "==0.1.11" 151 | }, 152 | "free-proxy": { 153 | "hashes": [ 154 | "sha256:2a6bcf25c8ca03d186e171fe384dab299a72073f38d3e3515d068d92d9d1635c", 155 | "sha256:91b667cc89686139695db85b926bcfad4ca1f07a3e0209359182b4f744f425b2" 156 | ], 157 | "markers": "python_version >= '3.6'", 158 | "version": "==1.0.2" 159 | }, 160 | "future": { 161 | "hashes": [ 162 | "sha256:b1bead90b70cf6ec3f0710ae53a525360fa360d306a86583adc6bf83a4db537d" 163 | ], 164 | "markers": "python_version >= '2.6' and python_version not in '3.0, 3.1, 3.2, 3.3'", 165 | "version": "==0.18.2" 166 | }, 167 | "idna": { 168 | "hashes": [ 169 | "sha256:b307872f855b18632ce0c21c5e45be78c0ea7ae4c15c828c20788b26921eb3f6", 170 | "sha256:b97d804b1e9b523befed77c48dacec60e6dcb0b5391d57af6a65a312a90648c0" 171 | ], 172 | "markers": "python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3'", 173 | "version": "==2.10" 174 | }, 175 | "imagesize": { 176 | "hashes": [ 177 | "sha256:6965f19a6a2039c7d48bca7dba2473069ff854c36ae6f19d2cde309d998228a1", 178 | "sha256:b1f6b5a4eab1f73479a50fb79fcf729514a900c341d8503d62a62dbc4127a2b1" 179 | ], 180 | "markers": "python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3'", 181 | "version": "==1.2.0" 182 | }, 183 | "infomap": { 184 | "hashes": [ 185 | "sha256:a972f1f0fc4728c08c902f90dbb4a9c1ad8fb2c97aafdd80ba0fdda4fa40d518", 186 | "sha256:b7a17ef31242b0d8204fbc630487bfc834310cdbceb89e440586388756e10c01" 187 | ], 188 | "index": "pypi", 189 | "version": "==1.3.0" 190 | }, 191 | "jinja2": { 192 | "hashes": [ 193 | "sha256:89aab215427ef59c34ad58735269eb58b1a5808103067f7bb9d5836c651b3bb0", 194 | "sha256:f0a4641d3cf955324a89c04f3d94663aa4d638abe8f733ecd3582848e1c37035" 195 | ], 196 | "markers": "python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3, 3.4'", 197 | "version": "==2.11.2" 198 | }, 199 | "joblib": { 200 | "hashes": [ 201 | "sha256:75ead23f13484a2a414874779d69ade40d4fa1abe62b222a23cd50d4bc822f6f", 202 | "sha256:7ad866067ac1fdec27d51c8678ea760601b70e32ff1881d4dc8e1171f2b64b24" 203 | ], 204 | "markers": "python_version >= '3.6'", 205 | "version": "==1.0.0" 206 | }, 207 | "kiwisolver": { 208 | "hashes": [ 209 | "sha256:0cd53f403202159b44528498de18f9285b04482bab2a6fc3f5dd8dbb9352e30d", 210 | "sha256:1e1bc12fb773a7b2ffdeb8380609f4f8064777877b2225dec3da711b421fda31", 211 | "sha256:225e2e18f271e0ed8157d7f4518ffbf99b9450fca398d561eb5c4a87d0986dd9", 212 | "sha256:232c9e11fd7ac3a470d65cd67e4359eee155ec57e822e5220322d7b2ac84fbf0", 213 | "sha256:31dfd2ac56edc0ff9ac295193eeaea1c0c923c0355bf948fbd99ed6018010b72", 214 | "sha256:33449715e0101e4d34f64990352bce4095c8bf13bed1b390773fc0a7295967b3", 215 | "sha256:401a2e9afa8588589775fe34fc22d918ae839aaaf0c0e96441c0fdbce6d8ebe6", 216 | "sha256:44a62e24d9b01ba94ae7a4a6c3fb215dc4af1dde817e7498d901e229aaf50e4e", 217 | "sha256:50af681a36b2a1dee1d3c169ade9fdc59207d3c31e522519181e12f1b3ba7000", 218 | "sha256:563c649cfdef27d081c84e72a03b48ea9408c16657500c312575ae9d9f7bc1c3", 219 | "sha256:5989db3b3b34b76c09253deeaf7fbc2707616f130e166996606c284395da3f18", 220 | "sha256:5a7a7dbff17e66fac9142ae2ecafb719393aaee6a3768c9de2fd425c63b53e21", 221 | "sha256:5c3e6455341008a054cccee8c5d24481bcfe1acdbc9add30aa95798e95c65621", 222 | "sha256:5f6ccd3dd0b9739edcf407514016108e2280769c73a85b9e59aa390046dbf08b", 223 | "sha256:72c99e39d005b793fb7d3d4e660aed6b6281b502e8c1eaf8ee8346023c8e03bc", 224 | "sha256:78751b33595f7f9511952e7e60ce858c6d64db2e062afb325985ddbd34b5c131", 225 | "sha256:834ee27348c4aefc20b479335fd422a2c69db55f7d9ab61721ac8cd83eb78882", 226 | "sha256:8be8d84b7d4f2ba4ffff3665bcd0211318aa632395a1a41553250484a871d454", 227 | "sha256:950a199911a8d94683a6b10321f9345d5a3a8433ec58b217ace979e18f16e248", 228 | "sha256:a357fd4f15ee49b4a98b44ec23a34a95f1e00292a139d6015c11f55774ef10de", 229 | "sha256:a53d27d0c2a0ebd07e395e56a1fbdf75ffedc4a05943daf472af163413ce9598", 230 | "sha256:acef3d59d47dd85ecf909c359d0fd2c81ed33bdff70216d3956b463e12c38a54", 231 | "sha256:b38694dcdac990a743aa654037ff1188c7a9801ac3ccc548d3341014bc5ca278", 232 | "sha256:b9edd0110a77fc321ab090aaa1cfcaba1d8499850a12848b81be2222eab648f6", 233 | "sha256:c08e95114951dc2090c4a630c2385bef681cacf12636fb0241accdc6b303fd81", 234 | "sha256:c5518d51a0735b1e6cee1fdce66359f8d2b59c3ca85dc2b0813a8aa86818a030", 235 | "sha256:c8fd0f1ae9d92b42854b2979024d7597685ce4ada367172ed7c09edf2cef9cb8", 236 | "sha256:ca3820eb7f7faf7f0aa88de0e54681bddcb46e485beb844fcecbcd1c8bd01689", 237 | "sha256:cf8b574c7b9aa060c62116d4181f3a1a4e821b2ec5cbfe3775809474113748d4", 238 | "sha256:d3155d828dec1d43283bd24d3d3e0d9c7c350cdfcc0bd06c0ad1209c1bbc36d0", 239 | "sha256:f8d6f8db88049a699817fd9178782867bf22283e3813064302ac59f61d95be05", 240 | "sha256:fd34fbbfbc40628200730bc1febe30631347103fc8d3d4fa012c21ab9c11eca9" 241 | ], 242 | "markers": "python_version >= '3.6'", 243 | "version": "==1.3.1" 244 | }, 245 | "llvmlite": { 246 | "hashes": [ 247 | "sha256:10f444ab648fb4b0ca266d0dcd201892aa8051db11f5dc98dc79631fc6bbf528", 248 | "sha256:4b510150a5cba35f3014ce7614c4b4d2b8a5aeeebe930693825711d66c8f127f", 249 | "sha256:4d1ed8d1d20cf57fdfff8560740283c28f44b2dd6c3749e4677c3e19b914da0a", 250 | "sha256:6365f3bd781512506761e081ae88722f7acdd389ae486512d3612cdbbaf1b3f4", 251 | "sha256:6383bf98f71c0ccafb20ed1fc560127c26b8db9a2f3aeb09d2be4ae26c3d2674", 252 | "sha256:733c8a191fa8294abb4f6a699306339b19afce84c6fc29646b5c40be92fdee41", 253 | "sha256:75120207100c87ecf0a4bf297cd7da2ff04bf2a97aecfa2d327723f83e457779", 254 | "sha256:80e51d5aa02ad72da9870e89d21f9b152b0220ca551b4596a6c0614bcde336fc", 255 | "sha256:822975d3ad2861d163ce7b1474e32e6ca7c6a6e76143c461ffc43aedfb610857", 256 | "sha256:8381b5530b4064a913e0bf1fb5cdd714ddd1834e0496a9343c905be5683e013a", 257 | "sha256:9166ed996df3e345409bd4d98bae58e0b5e85eb2f4c32b186ff5c9ae93448da5", 258 | "sha256:aa844f9c0961799530915b45545c287bec1970399da27629a8d9e762ab55de9f", 259 | "sha256:c541226f3ceb5bd311ef4786ad0ccfff2ed10fa601b4788b7fe8164c16719ba0", 260 | "sha256:d5fcb329c3a8c69f280b57f91833f8a939e6688eebd4614cf6d3e04424ef3330", 261 | "sha256:d99059da5630d4c38b155ef0cccd34932a8d16e2c5d18b29ec6d6ec06ef3c8b7", 262 | "sha256:f36f1ee5905c5e91254376db7df9163aa7793cfd79220a98ef3c9b59895f0008" 263 | ], 264 | "markers": "python_version >= '3.6'", 265 | "version": "==0.35.0" 266 | }, 267 | "lxml": { 268 | "hashes": [ 269 | "sha256:0448576c148c129594d890265b1a83b9cd76fd1f0a6a04620753d9a6bcfd0a4d", 270 | "sha256:127f76864468d6630e1b453d3ffbbd04b024c674f55cf0a30dc2595137892d37", 271 | "sha256:1471cee35eba321827d7d53d104e7b8c593ea3ad376aa2df89533ce8e1b24a01", 272 | "sha256:2363c35637d2d9d6f26f60a208819e7eafc4305ce39dc1d5005eccc4593331c2", 273 | "sha256:2e5cc908fe43fe1aa299e58046ad66981131a66aea3129aac7770c37f590a644", 274 | "sha256:2e6fd1b8acd005bd71e6c94f30c055594bbd0aa02ef51a22bbfa961ab63b2d75", 275 | "sha256:366cb750140f221523fa062d641393092813b81e15d0e25d9f7c6025f910ee80", 276 | "sha256:42ebca24ba2a21065fb546f3e6bd0c58c3fe9ac298f3a320147029a4850f51a2", 277 | "sha256:4e751e77006da34643ab782e4a5cc21ea7b755551db202bc4d3a423b307db780", 278 | "sha256:4fb85c447e288df535b17ebdebf0ec1cf3a3f1a8eba7e79169f4f37af43c6b98", 279 | "sha256:50c348995b47b5a4e330362cf39fc503b4a43b14a91c34c83b955e1805c8e308", 280 | "sha256:535332fe9d00c3cd455bd3dd7d4bacab86e2d564bdf7606079160fa6251caacf", 281 | "sha256:535f067002b0fd1a4e5296a8f1bf88193080ff992a195e66964ef2a6cfec5388", 282 | "sha256:5be4a2e212bb6aa045e37f7d48e3e1e4b6fd259882ed5a00786f82e8c37ce77d", 283 | "sha256:60a20bfc3bd234d54d49c388950195d23a5583d4108e1a1d47c9eef8d8c042b3", 284 | "sha256:648914abafe67f11be7d93c1a546068f8eff3c5fa938e1f94509e4a5d682b2d8", 285 | "sha256:681d75e1a38a69f1e64ab82fe4b1ed3fd758717bed735fb9aeaa124143f051af", 286 | "sha256:68a5d77e440df94011214b7db907ec8f19e439507a70c958f750c18d88f995d2", 287 | "sha256:69a63f83e88138ab7642d8f61418cf3180a4d8cd13995df87725cb8b893e950e", 288 | "sha256:6e4183800f16f3679076dfa8abf2db3083919d7e30764a069fb66b2b9eff9939", 289 | "sha256:6fd8d5903c2e53f49e99359b063df27fdf7acb89a52b6a12494208bf61345a03", 290 | "sha256:791394449e98243839fa822a637177dd42a95f4883ad3dec2a0ce6ac99fb0a9d", 291 | "sha256:7a7669ff50f41225ca5d6ee0a1ec8413f3a0d8aa2b109f86d540887b7ec0d72a", 292 | "sha256:7e9eac1e526386df7c70ef253b792a0a12dd86d833b1d329e038c7a235dfceb5", 293 | "sha256:7ee8af0b9f7de635c61cdd5b8534b76c52cd03536f29f51151b377f76e214a1a", 294 | "sha256:8246f30ca34dc712ab07e51dc34fea883c00b7ccb0e614651e49da2c49a30711", 295 | "sha256:8c88b599e226994ad4db29d93bc149aa1aff3dc3a4355dd5757569ba78632bdf", 296 | "sha256:923963e989ffbceaa210ac37afc9b906acebe945d2723e9679b643513837b089", 297 | "sha256:94d55bd03d8671686e3f012577d9caa5421a07286dd351dfef64791cf7c6c505", 298 | "sha256:97db258793d193c7b62d4e2586c6ed98d51086e93f9a3af2b2034af01450a74b", 299 | "sha256:a9d6bc8642e2c67db33f1247a77c53476f3a166e09067c0474facb045756087f", 300 | "sha256:cd11c7e8d21af997ee8079037fff88f16fda188a9776eb4b81c7e4c9c0a7d7fc", 301 | "sha256:d8d3d4713f0c28bdc6c806a278d998546e8efc3498949e3ace6e117462ac0a5e", 302 | "sha256:e0bfe9bb028974a481410432dbe1b182e8191d5d40382e5b8ff39cdd2e5c5931", 303 | "sha256:f4822c0660c3754f1a41a655e37cb4dbbc9be3d35b125a37fab6f82d47674ebc", 304 | "sha256:f83d281bb2a6217cd806f4cf0ddded436790e66f393e124dfe9731f6b3fb9afe", 305 | "sha256:fc37870d6716b137e80d19241d0e2cff7a7643b925dfa49b4c8ebd1295eb506e" 306 | ], 307 | "markers": "python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3, 3.4'", 308 | "version": "==4.6.2" 309 | }, 310 | "markupsafe": { 311 | "hashes": [ 312 | "sha256:00bc623926325b26bb9605ae9eae8a215691f33cae5df11ca5424f06f2d1f473", 313 | "sha256:09027a7803a62ca78792ad89403b1b7a73a01c8cb65909cd876f7fcebd79b161", 314 | "sha256:09c4b7f37d6c648cb13f9230d847adf22f8171b1ccc4d5682398e77f40309235", 315 | "sha256:1027c282dad077d0bae18be6794e6b6b8c91d58ed8a8d89a89d59693b9131db5", 316 | "sha256:13d3144e1e340870b25e7b10b98d779608c02016d5184cfb9927a9f10c689f42", 317 | "sha256:24982cc2533820871eba85ba648cd53d8623687ff11cbb805be4ff7b4c971aff", 318 | "sha256:29872e92839765e546828bb7754a68c418d927cd064fd4708fab9fe9c8bb116b", 319 | "sha256:43a55c2930bbc139570ac2452adf3d70cdbb3cfe5912c71cdce1c2c6bbd9c5d1", 320 | "sha256:46c99d2de99945ec5cb54f23c8cd5689f6d7177305ebff350a58ce5f8de1669e", 321 | "sha256:500d4957e52ddc3351cabf489e79c91c17f6e0899158447047588650b5e69183", 322 | "sha256:535f6fc4d397c1563d08b88e485c3496cf5784e927af890fb3c3aac7f933ec66", 323 | "sha256:596510de112c685489095da617b5bcbbac7dd6384aeebeda4df6025d0256a81b", 324 | "sha256:62fe6c95e3ec8a7fad637b7f3d372c15ec1caa01ab47926cfdf7a75b40e0eac1", 325 | "sha256:6788b695d50a51edb699cb55e35487e430fa21f1ed838122d722e0ff0ac5ba15", 326 | "sha256:6dd73240d2af64df90aa7c4e7481e23825ea70af4b4922f8ede5b9e35f78a3b1", 327 | "sha256:717ba8fe3ae9cc0006d7c451f0bb265ee07739daf76355d06366154ee68d221e", 328 | "sha256:79855e1c5b8da654cf486b830bd42c06e8780cea587384cf6545b7d9ac013a0b", 329 | "sha256:7c1699dfe0cf8ff607dbdcc1e9b9af1755371f92a68f706051cc8c37d447c905", 330 | "sha256:88e5fcfb52ee7b911e8bb6d6aa2fd21fbecc674eadd44118a9cc3863f938e735", 331 | "sha256:8defac2f2ccd6805ebf65f5eeb132adcf2ab57aa11fdf4c0dd5169a004710e7d", 332 | "sha256:98c7086708b163d425c67c7a91bad6e466bb99d797aa64f965e9d25c12111a5e", 333 | "sha256:9add70b36c5666a2ed02b43b335fe19002ee5235efd4b8a89bfcf9005bebac0d", 334 | "sha256:9bf40443012702a1d2070043cb6291650a0841ece432556f784f004937f0f32c", 335 | "sha256:ade5e387d2ad0d7ebf59146cc00c8044acbd863725f887353a10df825fc8ae21", 336 | "sha256:b00c1de48212e4cc9603895652c5c410df699856a2853135b3967591e4beebc2", 337 | "sha256:b1282f8c00509d99fef04d8ba936b156d419be841854fe901d8ae224c59f0be5", 338 | "sha256:b2051432115498d3562c084a49bba65d97cf251f5a331c64a12ee7e04dacc51b", 339 | "sha256:ba59edeaa2fc6114428f1637ffff42da1e311e29382d81b339c1817d37ec93c6", 340 | "sha256:c8716a48d94b06bb3b2524c2b77e055fb313aeb4ea620c8dd03a105574ba704f", 341 | "sha256:cd5df75523866410809ca100dc9681e301e3c27567cf498077e8551b6d20e42f", 342 | "sha256:cdb132fc825c38e1aeec2c8aa9338310d29d337bebbd7baa06889d09a60a1fa2", 343 | "sha256:e249096428b3ae81b08327a63a485ad0878de3fb939049038579ac0ef61e17e7", 344 | "sha256:e8313f01ba26fbbe36c7be1966a7b7424942f670f38e666995b88d012765b9be" 345 | ], 346 | "markers": "python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3'", 347 | "version": "==1.1.1" 348 | }, 349 | "matplotlib": { 350 | "hashes": [ 351 | "sha256:09225edca87a79815822eb7d3be63a83ebd4d9d98d5aa3a15a94f4eee2435954", 352 | "sha256:0caa687fce6174fef9b27d45f8cc57cbc572e04e98c81db8e628b12b563d59a2", 353 | "sha256:27c9393fada62bd0ad7c730562a0fecbd3d5aaa8d9ed80ba7d3ebb8abc4f0453", 354 | "sha256:2c2c5041608cb75c39cbd0ed05256f8a563e144234a524c59d091abbfa7a868f", 355 | "sha256:2d31aff0c8184b05006ad756b9a4dc2a0805e94d28f3abc3187e881b6673b302", 356 | "sha256:3a4c3e9be63adf8e9b305aa58fb3ec40ecc61fd0f8fd3328ce55bc30e7a2aeb0", 357 | "sha256:5111d6d47a0f5b8f3e10af7a79d5e7eb7e73a22825391834734274c4f312a8a0", 358 | "sha256:5ed3d3342698c2b1f3651f8ea6c099b0f196d16ee00e33dc3a6fee8cb01d530a", 359 | "sha256:6ffd2d80d76df2e5f9f0c0140b5af97e3b87dd29852dcdb103ec177d853ec06b", 360 | "sha256:746897fbd72bd462b888c74ed35d812ca76006b04f717cd44698cdfc99aca70d", 361 | "sha256:756ee498b9ba35460e4cbbd73f09018e906daa8537fff61da5b5bf8d5e9de5c7", 362 | "sha256:7ad44f2c74c50567c694ee91c6fa16d67e7c8af6f22c656b80469ad927688457", 363 | "sha256:83e6c895d93fdf93eeff1a21ee96778ba65ef258e5d284160f7c628fee40c38f", 364 | "sha256:9b03722c89a43a61d4d148acfc89ec5bb54cd0fd1539df25b10eb9c5fa6c393a", 365 | "sha256:a4fe54eab2c7129add75154823e6543b10261f9b65b2abe692d68743a4999f8c", 366 | "sha256:b1b60c6476c4cfe9e5cf8ab0d3127476fd3d5f05de0f343a452badaad0e4bdec", 367 | "sha256:b26c472847911f5a7eb49e1c888c31c77c4ddf8023c1545e0e8e0367ba74fb15", 368 | "sha256:b2a5e1f637a92bb6f3526cc54cc8af0401112e81ce5cba6368a1b7908f9e18bc", 369 | "sha256:b7b09c61a91b742cb5460b72efd1fe26ef83c1c704f666e0af0df156b046aada", 370 | "sha256:b8ba2a1dbb4660cb469fe8e1febb5119506059e675180c51396e1723ff9b79d9", 371 | "sha256:c092fc4673260b1446b8578015321081d5db73b94533fe4bf9b69f44e948d174", 372 | "sha256:c586ac1d64432f92857c3cf4478cfb0ece1ae18b740593f8a39f2f0b27c7fda5", 373 | "sha256:d082f77b4ed876ae94a9373f0db96bf8768a7cca6c58fc3038f94e30ffde1880", 374 | "sha256:e71cdd402047e657c1662073e9361106c6981e9621ab8c249388dfc3ec1de07b", 375 | "sha256:eb6b6700ea454bb88333d98601e74928e06f9669c1ea231b4c4c666c1d7701b4" 376 | ], 377 | "markers": "python_version >= '3.6'", 378 | "version": "==3.3.3" 379 | }, 380 | "numba": { 381 | "hashes": [ 382 | "sha256:0115d0a69b3eacaa7d762b5c6b5f03179bb848470af7188785c03b2e1b3ca857", 383 | "sha256:17c799904ab890107895e376a52912b0bf8c05d60930acd6761d48ad3ee4f155", 384 | "sha256:1e60e29efe9f9b6f4378c3890a61701d961e76990ecfce4f0dd59bc728089f7d", 385 | "sha256:227c766ccc4730766a225d27b047b8099857fc5000d323e182d95fa4cd21ad13", 386 | "sha256:2edfdaff425c3ca88f67c8560fb1566de323259706b2af6c1116542d2a5a642d", 387 | "sha256:44661c5bd85e3d3619be0a40eedee34e397e9ccb3d4c458b70e10bf95d1ce933", 388 | "sha256:4a99d8110f92f1c03fb63d676083c0512c725b196b5513295808ef7402e4854a", 389 | "sha256:668bd792635914160e42f7fa900d9daa013bdaa9d6dae9f557454ac5bd084ba7", 390 | "sha256:774aae8b3cd90338a79bd2cabd4e2c28d470102019ecd7913d9f71dbdff36c04", 391 | "sha256:77b726448d778cb8637a50c3be151a14a7e698a4a7b1a698ac34160482505da5", 392 | "sha256:a19e543a254caca74acd494438ca30292854e0291e5e91a2e54b50714b4428c3", 393 | "sha256:a489119db86896d23b608bb77c2702cc72289d1281bcf123f4bc4cdec5e72879", 394 | "sha256:b213436ee6f8c18a92d5bc2e6129111c47e1b1cec890ddf8d7ae0b38f62da70e", 395 | "sha256:c6a8b52b260549a0496ee5a0e785153ddc26569c824da39775e762711ef53938", 396 | "sha256:e8e9274bda21782928bcdf4919cd1854fa1c0962461f385f6f5c686aeceed847", 397 | "sha256:ed9a3704827055c0882d9aff2f8785bcd9a5fe7eae044459cc0d5f3e0a80706b" 398 | ], 399 | "index": "pypi", 400 | "version": "==0.52.0" 401 | }, 402 | "numpy": { 403 | "hashes": [ 404 | "sha256:012426a41bc9ab63bb158635aecccc7610e3eff5d31d1eb43bc099debc979d94", 405 | "sha256:06fab248a088e439402141ea04f0fffb203723148f6ee791e9c75b3e9e82f080", 406 | "sha256:0eef32ca3132a48e43f6a0f5a82cb508f22ce5a3d6f67a8329c81c8e226d3f6e", 407 | "sha256:1ded4fce9cfaaf24e7a0ab51b7a87be9038ea1ace7f34b841fe3b6894c721d1c", 408 | "sha256:2e55195bc1c6b705bfd8ad6f288b38b11b1af32f3c8289d6c50d47f950c12e76", 409 | "sha256:2ea52bd92ab9f768cc64a4c3ef8f4b2580a17af0a5436f6126b08efbd1838371", 410 | "sha256:36674959eed6957e61f11c912f71e78857a8d0604171dfd9ce9ad5cbf41c511c", 411 | "sha256:384ec0463d1c2671170901994aeb6dce126de0a95ccc3976c43b0038a37329c2", 412 | "sha256:39b70c19ec771805081578cc936bbe95336798b7edf4732ed102e7a43ec5c07a", 413 | "sha256:400580cbd3cff6ffa6293df2278c75aef2d58d8d93d3c5614cd67981dae68ceb", 414 | "sha256:43d4c81d5ffdff6bae58d66a3cd7f54a7acd9a0e7b18d97abb255defc09e3140", 415 | "sha256:50a4a0ad0111cc1b71fa32dedd05fa239f7fb5a43a40663269bb5dc7877cfd28", 416 | "sha256:603aa0706be710eea8884af807b1b3bc9fb2e49b9f4da439e76000f3b3c6ff0f", 417 | "sha256:6149a185cece5ee78d1d196938b2a8f9d09f5a5ebfbba66969302a778d5ddd1d", 418 | "sha256:759e4095edc3c1b3ac031f34d9459fa781777a93ccc633a472a5468587a190ff", 419 | "sha256:7fb43004bce0ca31d8f13a6eb5e943fa73371381e53f7074ed21a4cb786c32f8", 420 | "sha256:811daee36a58dc79cf3d8bdd4a490e4277d0e4b7d103a001a4e73ddb48e7e6aa", 421 | "sha256:8b5e972b43c8fc27d56550b4120fe6257fdc15f9301914380b27f74856299fea", 422 | "sha256:99abf4f353c3d1a0c7a5f27699482c987cf663b1eac20db59b8c7b061eabd7fc", 423 | "sha256:a0d53e51a6cb6f0d9082decb7a4cb6dfb33055308c4c44f53103c073f649af73", 424 | "sha256:a12ff4c8ddfee61f90a1633a4c4afd3f7bcb32b11c52026c92a12e1325922d0d", 425 | "sha256:a4646724fba402aa7504cd48b4b50e783296b5e10a524c7a6da62e4a8ac9698d", 426 | "sha256:a76f502430dd98d7546e1ea2250a7360c065a5fdea52b2dffe8ae7180909b6f4", 427 | "sha256:a9d17f2be3b427fbb2bce61e596cf555d6f8a56c222bd2ca148baeeb5e5c783c", 428 | "sha256:ab83f24d5c52d60dbc8cd0528759532736b56db58adaa7b5f1f76ad551416a1e", 429 | "sha256:aeb9ed923be74e659984e321f609b9ba54a48354bfd168d21a2b072ed1e833ea", 430 | "sha256:c843b3f50d1ab7361ca4f0b3639bf691569493a56808a0b0c54a051d260b7dbd", 431 | "sha256:cae865b1cae1ec2663d8ea56ef6ff185bad091a5e33ebbadd98de2cfa3fa668f", 432 | "sha256:cc6bd4fd593cb261332568485e20a0712883cf631f6f5e8e86a52caa8b2b50ff", 433 | "sha256:cf2402002d3d9f91c8b01e66fbb436a4ed01c6498fffed0e4c7566da1d40ee1e", 434 | "sha256:d051ec1c64b85ecc69531e1137bb9751c6830772ee5c1c426dbcfe98ef5788d7", 435 | "sha256:d6631f2e867676b13026e2846180e2c13c1e11289d67da08d71cacb2cd93d4aa", 436 | "sha256:dbd18bcf4889b720ba13a27ec2f2aac1981bd41203b3a3b27ba7a33f88ae4827", 437 | "sha256:df609c82f18c5b9f6cb97271f03315ff0dbe481a2a02e56aeb1b1a985ce38e60" 438 | ], 439 | "markers": "python_version >= '3.6'", 440 | "version": "==1.19.5" 441 | }, 442 | "packaging": { 443 | "hashes": [ 444 | "sha256:24e0da08660a87484d1602c30bb4902d74816b6985b93de36926f5bc95741858", 445 | "sha256:78598185a7008a470d64526a8059de9aaa449238f280fc9eb6b13ba6c4109093" 446 | ], 447 | "markers": "python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3'", 448 | "version": "==20.8" 449 | }, 450 | "pandas": { 451 | "hashes": [ 452 | "sha256:0be6102dd99910513e75ed6536284743ead810349c51bdeadd2a5b6649f30abb", 453 | "sha256:272675a98fa4954b9fc0933df775596fc942e50015d7e75d8f19548808a2bfdf", 454 | "sha256:2d8b4f532db37418121831a461fd107d826c240b098f52e7a1b4ab3d5aaa4fb2", 455 | "sha256:33318fa24b192b1a4684347ff76679a7267fd4e547da9f71556a5914f0dc10e7", 456 | "sha256:3bc6d2be03cb75981d8cbeda09503cd9d6d699fc0dc28a65e197165ad527b7b8", 457 | "sha256:43482789c55cbabeed9482263cfc98a11e8fcae900cb63ef038948acb4a72570", 458 | "sha256:616478c1bd8fe1e600f521ae2da434e021c11e7a4e5da3451d02906143d3629a", 459 | "sha256:6c1a57e4d0d6f9633a07817c44e6b36d81c265fe4c52d0c0505513a2d0f7953c", 460 | "sha256:7904ee438549b5223ce8dc008772458dd7c5cf0ccc64cf903e81202400702235", 461 | "sha256:7b54c14130a3448d81eed1348f52429c23e27188d9db6e6d4afeae792bc49c11", 462 | "sha256:8f92b07cdbfa3704d85b4264e52c216cafe6c0059b0d07cdad8cb29e0b90f2b8", 463 | "sha256:91fd0b94e7b98528177a05e6f65efea79d7ef9dec15ee48c7c69fc39fdd87235", 464 | "sha256:9c6692cea6d56da8650847172bdb148622f545e7782d17995822434c79d7a211", 465 | "sha256:9e18631d996fe131de6cb31a8bdae18965cc8f39eb23fdfbbf42808ecc63dabf", 466 | "sha256:cba93d4fd3b0a42858b2b599495aff793fb5d94587979f45a14177d1217ba446", 467 | "sha256:e03386615b970b8b41da6a68afe717626741bb2431cec993640685614c0680e4", 468 | "sha256:f8b87d2f541cd9bc4ecfe85a561abac85c33fe4de4ce70cca36b2768af2611f5" 469 | ], 470 | "markers": "python_full_version >= '3.7.1'", 471 | "version": "==1.2.0" 472 | }, 473 | "pillow": { 474 | "hashes": [ 475 | "sha256:165c88bc9d8dba670110c689e3cc5c71dbe4bfb984ffa7cbebf1fac9554071d6", 476 | "sha256:22d070ca2e60c99929ef274cfced04294d2368193e935c5d6febfd8b601bf865", 477 | "sha256:2353834b2c49b95e1313fb34edf18fca4d57446675d05298bb694bca4b194174", 478 | "sha256:39725acf2d2e9c17356e6835dccebe7a697db55f25a09207e38b835d5e1bc032", 479 | "sha256:3de6b2ee4f78c6b3d89d184ade5d8fa68af0848f9b6b6da2b9ab7943ec46971a", 480 | "sha256:47c0d93ee9c8b181f353dbead6530b26980fe4f5485aa18be8f1fd3c3cbc685e", 481 | "sha256:5e2fe3bb2363b862671eba632537cd3a823847db4d98be95690b7e382f3d6378", 482 | "sha256:604815c55fd92e735f9738f65dabf4edc3e79f88541c221d292faec1904a4b17", 483 | "sha256:6c5275bd82711cd3dcd0af8ce0bb99113ae8911fc2952805f1d012de7d600a4c", 484 | "sha256:731ca5aabe9085160cf68b2dbef95fc1991015bc0a3a6ea46a371ab88f3d0913", 485 | "sha256:7612520e5e1a371d77e1d1ca3a3ee6227eef00d0a9cddb4ef7ecb0b7396eddf7", 486 | "sha256:7916cbc94f1c6b1301ac04510d0881b9e9feb20ae34094d3615a8a7c3db0dcc0", 487 | "sha256:81c3fa9a75d9f1afafdb916d5995633f319db09bd773cb56b8e39f1e98d90820", 488 | "sha256:887668e792b7edbfb1d3c9d8b5d8c859269a0f0eba4dda562adb95500f60dbba", 489 | "sha256:93a473b53cc6e0b3ce6bf51b1b95b7b1e7e6084be3a07e40f79b42e83503fbf2", 490 | "sha256:96d4dc103d1a0fa6d47c6c55a47de5f5dafd5ef0114fa10c85a1fd8e0216284b", 491 | "sha256:a3d3e086474ef12ef13d42e5f9b7bbf09d39cf6bd4940f982263d6954b13f6a9", 492 | "sha256:b02a0b9f332086657852b1f7cb380f6a42403a6d9c42a4c34a561aa4530d5234", 493 | "sha256:b09e10ec453de97f9a23a5aa5e30b334195e8d2ddd1ce76cc32e52ba63c8b31d", 494 | "sha256:b6f00ad5ebe846cc91763b1d0c6d30a8042e02b2316e27b05de04fa6ec831ec5", 495 | "sha256:bba80df38cfc17f490ec651c73bb37cd896bc2400cfba27d078c2135223c1206", 496 | "sha256:c3d911614b008e8a576b8e5303e3db29224b455d3d66d1b2848ba6ca83f9ece9", 497 | "sha256:ca20739e303254287138234485579b28cb0d524401f83d5129b5ff9d606cb0a8", 498 | "sha256:cb192176b477d49b0a327b2a5a4979552b7a58cd42037034316b8018ac3ebb59", 499 | "sha256:cdbbe7dff4a677fb555a54f9bc0450f2a21a93c5ba2b44e09e54fcb72d2bd13d", 500 | "sha256:d355502dce85ade85a2511b40b4c61a128902f246504f7de29bbeec1ae27933a", 501 | "sha256:dc577f4cfdda354db3ae37a572428a90ffdbe4e51eda7849bf442fb803f09c9b", 502 | "sha256:dd9eef866c70d2cbbea1ae58134eaffda0d4bfea403025f4db6859724b18ab3d" 503 | ], 504 | "markers": "python_version >= '3.6'", 505 | "version": "==8.1.0" 506 | }, 507 | "pycparser": { 508 | "hashes": [ 509 | "sha256:2d475327684562c3a96cc71adf7dc8c4f0565175cf86b6d7a404ff4c771f15f0", 510 | "sha256:7582ad22678f0fcd81102833f60ef8d0e57288b6b5fb00323d101be910e35705" 511 | ], 512 | "markers": "python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3'", 513 | "version": "==2.20" 514 | }, 515 | "pygments": { 516 | "hashes": [ 517 | "sha256:ccf3acacf3782cbed4a989426012f1c535c9a90d3a7fc3f16d231b9372d2b716", 518 | "sha256:f275b6c0909e5dafd2d6269a656aa90fa58ebf4a74f8fcf9053195d226b24a08" 519 | ], 520 | "markers": "python_version >= '3.5'", 521 | "version": "==2.7.3" 522 | }, 523 | "pyopenssl": { 524 | "hashes": [ 525 | "sha256:4c231c759543ba02560fcd2480c48dcec4dae34c9da7d3747c508227e0624b51", 526 | "sha256:818ae18e06922c066f777a33f1fca45786d85edfe71cd043de6379337a7f274b" 527 | ], 528 | "version": "==20.0.1" 529 | }, 530 | "pyparsing": { 531 | "hashes": [ 532 | "sha256:c203ec8783bf771a155b207279b9bccb8dea02d8f0c9e5f8ead507bc3246ecc1", 533 | "sha256:ef9d7589ef3c200abe66653d3f1ab1033c3c419ae9b9bdb1240a85b024efc88b" 534 | ], 535 | "markers": "python_version >= '2.6' and python_version not in '3.0, 3.1, 3.2, 3.3'", 536 | "version": "==2.4.7" 537 | }, 538 | "pysocks": { 539 | "hashes": [ 540 | "sha256:08e69f092cc6dbe92a0fdd16eeb9b9ffbc13cadfe5ca4c7bd92ffb078b293299", 541 | "sha256:2725bd0a9925919b9b51739eea5f9e2bae91e83288108a9ad338b2e3a4435ee5", 542 | "sha256:3f8804571ebe159c380ac6de37643bb4685970655d3bba243530d6558b799aa0" 543 | ], 544 | "markers": "python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3'", 545 | "version": "==1.7.1" 546 | }, 547 | "python-dateutil": { 548 | "hashes": [ 549 | "sha256:73ebfe9dbf22e832286dafa60473e4cd239f8592f699aa5adaf10050e6e1823c", 550 | "sha256:75bb3f31ea686f1197762692a9ee6a7550b59fc6ca3a1f4b5d7e32fb98e2da2a" 551 | ], 552 | "markers": "python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3'", 553 | "version": "==2.8.1" 554 | }, 555 | "python-dotenv": { 556 | "hashes": [ 557 | "sha256:0c8d1b80d1a1e91717ea7d526178e3882732420b03f08afea0406db6402e220e", 558 | "sha256:587825ed60b1711daea4832cf37524dfd404325b7db5e25ebe88c495c9f807a0" 559 | ], 560 | "version": "==0.15.0" 561 | }, 562 | "pytz": { 563 | "hashes": [ 564 | "sha256:16962c5fb8db4a8f63a26646d8886e9d769b6c511543557bc84e9569fb9a9cb4", 565 | "sha256:180befebb1927b16f6b57101720075a984c019ac16b1b7575673bea42c6c3da5" 566 | ], 567 | "version": "==2020.5" 568 | }, 569 | "requests": { 570 | "extras": [ 571 | "security" 572 | ], 573 | "hashes": [ 574 | "sha256:27973dd4a904a4f13b263a19c866c13b92a39ed1c964655f025f3f8d3d75b804", 575 | "sha256:c210084e36a42ae6b9219e00e48287def368a26d03a048ddad7bfee44f75871e" 576 | ], 577 | "markers": "python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3, 3.4'", 578 | "version": "==2.25.1" 579 | }, 580 | "scholarly": { 581 | "hashes": [ 582 | "sha256:e0058ee92f822ad2066cc9c6397b81f9abda50275cade31fdd97dfdd8bad1dbf", 583 | "sha256:ef73cf01890f9b44ef1ca6ba914731e696ae6dd159385a058aa078a9e29b604d" 584 | ], 585 | "index": "pypi", 586 | "version": "==1.0.2" 587 | }, 588 | "scikit-learn": { 589 | "hashes": [ 590 | "sha256:076369634ee72b5a5941440661e2f306ff4ac30903802dc52031c7e9199ac640", 591 | "sha256:18f7131e62265bf2691ed1d0303c640313894ccfe4278427478c6b2f45094b53", 592 | "sha256:26f66b3726b54dfb76ea51c5d9c2431ed17ebc066cb4527662b9e851a3e7ba61", 593 | "sha256:2951f87d35e72f007701c6e028aa230f6df6212a3194677c0c950486066a454d", 594 | "sha256:2a5348585aa793bc8cc5a72f8e9067c9380834b0aadbd55f924843b071f13282", 595 | "sha256:3eeff086f7329521d27249a082ea3c48c085cedb110db5f65968ab55c3ba2e09", 596 | "sha256:4395e91b3548005f4a645018435b5a94f8cce232b5b70753020e606c6a750656", 597 | "sha256:44e452ea8491225c5783d49577aad0f36202dfd52aec7f82c0fdfe5fbd5f7400", 598 | "sha256:490436b44b3a1957cb625e871764b0aa330b34cc416aea4abc6c38ca63d0d682", 599 | "sha256:5e6e3c042cea83f2e20a45e563b8eabc1f8f72446251fe23ebefdf111a173a33", 600 | "sha256:66f27bf21202a850bcd7b6303916e4907f6e22ec59a14974ede4955aed5c7ed0", 601 | "sha256:743b6edd98c98991be46c08e6b21df3861d5ae915f91d59f988384d93f7263e7", 602 | "sha256:758619e49cd7c17282e6cc60d5cc73c02c072b47c9a10010bb3bb47e0d976e50", 603 | "sha256:7f654befc5ad413690cc58f3f34a3e906caf825195ce0fda00a8e9565e1403e6", 604 | "sha256:800aaf63f8838c00e85db2267dd226f89858594843fd03932a9eda95746d2c40", 605 | "sha256:80ca024154b84b6ac4cfc86930ba13fdc348a209753bf2c16129db6f9eb8a80b", 606 | "sha256:890d7d588f65acb0c4f6c083347c9076916bda5e6bd8400f06244b1afc1009af", 607 | "sha256:905d8934d1e27a686698864a5863ff2c0e13a2ae1adb78a8a848aacc8a49927d", 608 | "sha256:afeb06dc69847927634e58579b9cdc72e1390b79497336b2324b1b173f33bd47", 609 | "sha256:b0d13fd56d26cf3de0314a4fd48037108c638fe126d813f5c1222bb0f08b6a76", 610 | "sha256:c08b27cb78ee8d2dc781a7affed09859441f5b624f9f92da59ac0791c8774dfc", 611 | "sha256:c912247e42114f389858ae05d63f4359d4e667ea72aaabee191aee9ad3f9774a", 612 | "sha256:d7fe05fcb44eadd6d6c874c768f085f5de1239db3a3b7be4d3d23d12e4120589", 613 | "sha256:d819d625832fb2969911a243e009cfa135cb8ef1e150866e417d6e9d75290087", 614 | "sha256:e534f5f3796db6781c87e9835dcd51b7854c8c5a379c9210b93605965c1941fd" 615 | ], 616 | "markers": "python_version >= '3.6'", 617 | "version": "==0.24.0" 618 | }, 619 | "scipy": { 620 | "hashes": [ 621 | "sha256:155225621df90fcd151e25d51c50217e412de717475999ebb76e17e310176981", 622 | "sha256:1bc5b446600c4ff7ab36bade47180673141322f0febaa555f1c433fe04f2a0e3", 623 | "sha256:2f1c2ebca6fd867160e70102200b1bd07b3b2d31a3e6af3c58d688c15d0d07b7", 624 | "sha256:313785c4dab65060f9648112d025f6d2fec69a8a889c714328882d678a95f053", 625 | "sha256:31ab217b5c27ab429d07428a76002b33662f98986095bbce5d55e0788f7e8b15", 626 | "sha256:3d4303e3e21d07d9557b26a1707bb9fc065510ee8501c9bf22a0157249a82fd0", 627 | "sha256:4f1d9cc977ac6a4a63c124045c1e8bf67ec37098f67c699887a93736961a00ae", 628 | "sha256:58731bbe0103e96b89b2f41516699db9b63066e4317e31b8402891571f6d358f", 629 | "sha256:8629135ee00cc2182ac8be8e75643b9f02235942443732c2ed69ab48edcb6614", 630 | "sha256:876badc33eec20709d4e042a09834f5953ebdac4088d45a4f3a1f18b56885718", 631 | "sha256:8840a9adb4ede3751f49761653d3ebf664f25195fdd42ada394ffea8903dd51d", 632 | "sha256:aef3a2dbc436bbe8f6e0b635f0b5fe5ed024b522eee4637dbbe0b974129ca734", 633 | "sha256:b8af26839ae343655f3ca377a5d5e5466f1d3b3ac7432a43449154fe958ae0e0", 634 | "sha256:c0911f3180de343643f369dc5cfedad6ba9f939c2d516bddea4a6871eb000722", 635 | "sha256:cb6dc9f82dfd95f6b9032a8d7ea70efeeb15d5b5fd6ed4e8537bb3c673580566", 636 | "sha256:cdbc47628184a0ebeb5c08f1892614e1bd4a51f6e0d609c6eed253823a960f5b", 637 | "sha256:d902d3a5ad7f28874c0a82db95246d24ca07ad932741df668595fe00a4819870", 638 | "sha256:eb7928275f3560d47e5538e15e9f32b3d64cd30ea8f85f3e82987425476f53f6", 639 | "sha256:f68d5761a2d2376e2b194c8e9192bbf7c51306ca176f1a0889990a52ef0d551f" 640 | ], 641 | "markers": "python_version >= '3.7'", 642 | "version": "==1.6.0" 643 | }, 644 | "seaborn": { 645 | "hashes": [ 646 | "sha256:44e78eaed937c5a87fc7a892c329a7cc091060b67ebd1d0d306b446a74ba01ad", 647 | "sha256:4e1cce9489449a1c6ff3c567f2113cdb41122f727e27a984950d004a88ef3c5c" 648 | ], 649 | "index": "pypi", 650 | "version": "==0.11.1" 651 | }, 652 | "selenium": { 653 | "hashes": [ 654 | "sha256:2d7131d7bc5a5b99a2d9b04aaf2612c411b03b8ca1b1ee8d3de5845a9be2cb3c", 655 | "sha256:deaf32b60ad91a4611b98d8002757f29e6f2c2d5fcaf202e1c9ad06d6772300d" 656 | ], 657 | "version": "==3.141.0" 658 | }, 659 | "six": { 660 | "hashes": [ 661 | "sha256:30639c035cdb23534cd4aa2dd52c3bf48f06e5f4a941509c8bafd8ce11080259", 662 | "sha256:8b74bedcbbbaca38ff6d7491d76f2b06b3592611af620f8426e82dddb04a5ced" 663 | ], 664 | "markers": "python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3'", 665 | "version": "==1.15.0" 666 | }, 667 | "sklearn": { 668 | "hashes": [ 669 | "sha256:e23001573aa194b834122d2b9562459bf5ae494a2d59ca6b8aa22c85a44c0e31" 670 | ], 671 | "index": "pypi", 672 | "version": "==0.0" 673 | }, 674 | "snowballstemmer": { 675 | "hashes": [ 676 | "sha256:209f257d7533fdb3cb73bdbd24f436239ca3b2fa67d56f6ff88e86be08cc5ef0", 677 | "sha256:df3bac3df4c2c01363f3dd2cfa78cce2840a79b9f1c2d2de9ce8d31683992f52" 678 | ], 679 | "version": "==2.0.0" 680 | }, 681 | "soupsieve": { 682 | "hashes": [ 683 | "sha256:4bb21a6ee4707bf43b61230e80740e71bfe56e55d1f1f50924b087bb2975c851", 684 | "sha256:6dc52924dc0bc710a5d16794e6b3480b2c7c08b07729505feab2b2c16661ff6e" 685 | ], 686 | "markers": "python_version >= '3.0'", 687 | "version": "==2.1" 688 | }, 689 | "sphinx": { 690 | "hashes": [ 691 | "sha256:77dec5ac77ca46eee54f59cf477780f4fb23327b3339ef39c8471abb829c1285", 692 | "sha256:b8aa4eb5502c53d3b5ca13a07abeedacd887f7770c198952fd5b9530d973e767" 693 | ], 694 | "markers": "python_version >= '3.5'", 695 | "version": "==3.4.2" 696 | }, 697 | "sphinx-rtd-theme": { 698 | "hashes": [ 699 | "sha256:eda689eda0c7301a80cf122dad28b1861e5605cbf455558f3775e1e8200e83a5", 700 | "sha256:fa6bebd5ab9a73da8e102509a86f3fcc36dec04a0b52ea80e5a033b2aba00113" 701 | ], 702 | "version": "==0.5.1" 703 | }, 704 | "sphinxcontrib-applehelp": { 705 | "hashes": [ 706 | "sha256:806111e5e962be97c29ec4c1e7fe277bfd19e9652fb1a4392105b43e01af885a", 707 | "sha256:a072735ec80e7675e3f432fcae8610ecf509c5f1869d17e2eecff44389cdbc58" 708 | ], 709 | "markers": "python_version >= '3.5'", 710 | "version": "==1.0.2" 711 | }, 712 | "sphinxcontrib-devhelp": { 713 | "hashes": [ 714 | "sha256:8165223f9a335cc1af7ffe1ed31d2871f325254c0423bc0c4c7cd1c1e4734a2e", 715 | "sha256:ff7f1afa7b9642e7060379360a67e9c41e8f3121f2ce9164266f61b9f4b338e4" 716 | ], 717 | "markers": "python_version >= '3.5'", 718 | "version": "==1.0.2" 719 | }, 720 | "sphinxcontrib-htmlhelp": { 721 | "hashes": [ 722 | "sha256:3c0bc24a2c41e340ac37c85ced6dafc879ab485c095b1d65d2461ac2f7cca86f", 723 | "sha256:e8f5bb7e31b2dbb25b9cc435c8ab7a79787ebf7f906155729338f3156d93659b" 724 | ], 725 | "markers": "python_version >= '3.5'", 726 | "version": "==1.0.3" 727 | }, 728 | "sphinxcontrib-jsmath": { 729 | "hashes": [ 730 | "sha256:2ec2eaebfb78f3f2078e73666b1415417a116cc848b72e5172e596c871103178", 731 | "sha256:a9925e4a4587247ed2191a22df5f6970656cb8ca2bd6284309578f2153e0c4b8" 732 | ], 733 | "markers": "python_version >= '3.5'", 734 | "version": "==1.0.1" 735 | }, 736 | "sphinxcontrib-qthelp": { 737 | "hashes": [ 738 | "sha256:4c33767ee058b70dba89a6fc5c1892c0d57a54be67ddd3e7875a18d14cba5a72", 739 | "sha256:bd9fc24bcb748a8d51fd4ecaade681350aa63009a347a8c14e637895444dfab6" 740 | ], 741 | "markers": "python_version >= '3.5'", 742 | "version": "==1.0.3" 743 | }, 744 | "sphinxcontrib-serializinghtml": { 745 | "hashes": [ 746 | "sha256:eaa0eccc86e982a9b939b2b82d12cc5d013385ba5eadcc7e4fed23f4405f77bc", 747 | "sha256:f242a81d423f59617a8e5cf16f5d4d74e28ee9a66f9e5b637a18082991db5a9a" 748 | ], 749 | "markers": "python_version >= '3.5'", 750 | "version": "==1.1.4" 751 | }, 752 | "stem": { 753 | "hashes": [ 754 | "sha256:a0b48ea6224e95f22aa34c0bc3415f0eb4667ddeae3dfb5e32a6920c185568c2" 755 | ], 756 | "version": "==1.8.0" 757 | }, 758 | "threadpoolctl": { 759 | "hashes": [ 760 | "sha256:38b74ca20ff3bb42caca8b00055111d74159ee95c4370882bbff2b93d24da725", 761 | "sha256:ddc57c96a38beb63db45d6c159b5ab07b6bced12c45a1f07b2b92f272aebfa6b" 762 | ], 763 | "markers": "python_version >= '3.5'", 764 | "version": "==2.1.0" 765 | }, 766 | "urllib3": { 767 | "hashes": [ 768 | "sha256:19188f96923873c92ccb987120ec4acaa12f0461fa9ce5d3d0772bc965a39e08", 769 | "sha256:d8ff90d979214d7b4f8ce956e80f4028fc6860e4431f731ea4a8c08f23f99473" 770 | ], 771 | "markers": "python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3, 3.4' and python_version < '4'", 772 | "version": "==1.26.2" 773 | } 774 | }, 775 | "develop": {} 776 | } 777 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Mapping flows on hypergraphs 2 | 3 | ## Requirements 4 | 5 | Requirements are specified in the included `Pipfile`: 6 | 7 | - Python 3.8 8 | - Infomap Python package 1.3.0 9 | 10 | Install with `pipenv`: 11 | 12 | ```bash 13 | pipenv install 14 | ``` 15 | 16 | ## Running 17 | 18 | Main entrypoint is in `hypergraph/main.py`. 19 | 20 | Activate the environment with 21 | ```bash 22 | pipenv shell 23 | ``` 24 | 25 | Then, run the code 26 | ```bash 27 | python -m hypergraph --help 28 | ``` 29 | 30 | See the `Makefile` for example usage. 31 | 32 | ## Author 33 | Anton Eriksson 34 | -------------------------------------------------------------------------------- /analysis/__init__.py: -------------------------------------------------------------------------------- 1 | from .ami import * 2 | from .plot import * 3 | from .stats import * 4 | from .wjaccard import * 5 | -------------------------------------------------------------------------------- /analysis/ami.py: -------------------------------------------------------------------------------- 1 | from collections import defaultdict 2 | from itertools import combinations_with_replacement, starmap 3 | from operator import attrgetter 4 | from typing import Sequence, List 5 | 6 | import numpy as np 7 | import pandas as pd 8 | from sklearn.metrics import adjusted_mutual_info_score 9 | 10 | from hypergraph.network import Level, TreeNode, Tree 11 | 12 | Labels = List[int] 13 | 14 | 15 | def module_level(nodes: Sequence[TreeNode], level: Level = Level.LEAF_MODULE) -> Labels: 16 | return [node.level(level) 17 | for node in sorted(nodes, key=attrgetter("state_id"))] 18 | 19 | 20 | def ami(networks: Sequence[Tree], **kwargs) -> pd.DataFrame: 21 | ami_ = np.zeros(shape=(len(networks),) * 2) 22 | 23 | index = defaultdict(lambda: len(index)) 24 | 25 | for network1, network2 in combinations_with_replacement(networks, 2): 26 | j = index[network1.pretty_filename] 27 | i = index[network2.pretty_filename] 28 | 29 | labels1, labels2 = starmap(module_level, ((network1.nodes, kwargs), (network2.nodes, kwargs))) 30 | 31 | if len(labels1) != len(labels2): 32 | raise RuntimeWarning("Different sets of labels") 33 | 34 | ami_[i, j] = adjusted_mutual_info_score(labels1, labels2) 35 | 36 | return pd.DataFrame(data=ami_, columns=list(index.keys())) 37 | -------------------------------------------------------------------------------- /analysis/citation-network-stats.py: -------------------------------------------------------------------------------- 1 | import os 2 | import pickle 3 | from collections import defaultdict 4 | from statistics import mode, median, mean 5 | 6 | from hypergraph import representation 7 | from hypergraph.components import largest_connected_component 8 | from hypergraph.main import run_infomap 9 | from hypergraph.network import HyperGraph, remove_simple_hyperedges, Tree 10 | 11 | 12 | def get_effective_assignments(hypergraph, self_links, similarity): 13 | pickle_filename = f"multilayer{'_self_links' if self_links else ''}{'_similarity' if similarity else ''}.pickle" 14 | 15 | if not os.path.isfile(pickle_filename): 16 | 17 | multilayer = representation.multilayer(hypergraph, similarity_walk=similarity, self_links=self_links) 18 | 19 | im = run_infomap(multilayer, directed=True, self_links=self_links) 20 | 21 | tree = Tree.from_infomap(im) 22 | 23 | effective_assignments = tree.effective_assignments 24 | 25 | with open(pickle_filename, "wb") as fp: 26 | pickle.dump(effective_assignments, fp) 27 | else: 28 | with open(pickle_filename, "rb") as fp: 29 | effective_assignments = pickle.load(fp) 30 | return effective_assignments 31 | 32 | 33 | def main(file): 34 | hypergraph = HyperGraph.from_iter(file.readlines()) 35 | hypergraph = largest_connected_component(hypergraph) 36 | hypergraph = remove_simple_hyperedges(hypergraph) 37 | 38 | print(f"Num nodes: {len(hypergraph.nodes)}") 39 | 40 | num_coauthors = [len(edge.nodes) for edge in hypergraph.edges] 41 | 42 | print(f"Median coauthors {median(num_coauthors)}") 43 | print(f"Mean coauthors {mean(num_coauthors)}") 44 | 45 | contributions = defaultdict(int) 46 | 47 | for _, node, _ in hypergraph.weights: 48 | contributions[node.id] += 1 49 | 50 | print(f"Mean contributions: {mean(contributions.values())}") 51 | print(f"Median contributions: {median(contributions.values())}") 52 | print(f"Mode contributions: {mode(contributions.values())}") 53 | 54 | effective_assignments = defaultdict(list) 55 | 56 | for self_links in (True, False): 57 | for similarity in (False, True): 58 | for name, assignments in get_effective_assignments(hypergraph, self_links, similarity).items(): 59 | effective_assignments[name].append(assignments) 60 | 61 | effective_assignments = {name: assignments for name, assignments in effective_assignments.items() 62 | if any(map(lambda x: x > 1, assignments))} 63 | 64 | effective_assignments = dict(sorted(effective_assignments.items(), key=lambda x: mean(x[1]), reverse=True)) 65 | 66 | print(f"Num overlapping: {len(effective_assignments)}") 67 | 68 | for name, assignments in effective_assignments.items(): 69 | print(f"{name:23}", assignments) 70 | 71 | mean_assignments = { 72 | i: mean(a[i] for a in effective_assignments.values()) 73 | for i in range(4) 74 | } 75 | 76 | print(f"Mean assignments: {mean_assignments = }") 77 | 78 | 79 | if __name__ == "__main__": 80 | with open("../data/references-weighted.txt") as fp: 81 | main(fp) 82 | -------------------------------------------------------------------------------- /analysis/plot.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import seaborn as sns 3 | 4 | 5 | def plot_heatmap(data, **kwargs): 6 | return sns.heatmap(data, 7 | vmax=1, 8 | vmin=0.8, 9 | mask=np.triu(np.ones_like(data, dtype=bool), k=1), 10 | cmap=sns.color_palette("viridis", as_cmap=True), 11 | annot=True, 12 | annot_kws={"fontsize": 8}, 13 | fmt=".2g", 14 | square=True, 15 | linewidths=.5, 16 | **kwargs) 17 | -------------------------------------------------------------------------------- /analysis/stats.py: -------------------------------------------------------------------------------- 1 | import os 2 | from collections import defaultdict 3 | from itertools import takewhile, dropwhile 4 | from statistics import mean, variance 5 | from typing import Sequence 6 | 7 | import pandas as pd 8 | 9 | from hypergraph.network import Tree 10 | 11 | 12 | def summarize(networks: Sequence[Tree]) -> pd.DataFrame: 13 | summary = defaultdict(list) 14 | 15 | for network in networks: 16 | states_filename = os.path.splitext(network.filename)[0] + "_states.net" 17 | 18 | with open(states_filename) as states_fp: 19 | states_lines = states_fp.readlines() 20 | 21 | num_states = len(list(takewhile(lambda line: not line.startswith("*Links"), 22 | dropwhile(lambda line: not line.startswith("# stateId physicalId"), 23 | states_lines)))) - 1 24 | 25 | num_links = len(list(dropwhile(lambda line: not line.startswith("*Links"), states_lines))) - 1 26 | 27 | summary["network"].append(network.pretty_filename) 28 | summary["num states"].append(num_states) 29 | summary["num links"].append(num_links) 30 | summary["levels"].append(network.levels) 31 | summary["top modules"].append(network.num_top_modules) 32 | summary["leaf modules"].append(network.num_leaf_modules) 33 | summary["codelength"].append(network.codelength) 34 | summary["variance"].append(variance(network.codelengths)) 35 | summary["completed in"].append(network.completed_in) 36 | summary["mean assignments"].append(mean(network.assignments.values())) 37 | summary["mean eff. assignments"].append(mean(network.effective_assignments.values())) 38 | 39 | return pd.DataFrame(data=summary) 40 | -------------------------------------------------------------------------------- /analysis/wjaccard.py: -------------------------------------------------------------------------------- 1 | import subprocess 2 | from collections import defaultdict, namedtuple 3 | from itertools import combinations_with_replacement, product 4 | from typing import Sequence, Dict, Tuple, Any, Iterable 5 | 6 | import numpy as np 7 | import pandas as pd 8 | 9 | from hypergraph.network import TreeNode, Tree 10 | 11 | 12 | def assignment_id(path: Tuple[int]) -> Iterable[int]: 13 | assignment_id_ = defaultdict(lambda: len(assignment_id_)) 14 | 15 | for i in range(1, len(path)): 16 | yield assignment_id_[path[0:i]] 17 | 18 | 19 | Partition = namedtuple("Partition", "assignments, cluster_sizes") 20 | 21 | 22 | def make_partition(nodes: Sequence[TreeNode]) -> Partition: 23 | assignments = {node.state_id: tuple(assignment_id(node.path)) 24 | for node in nodes} 25 | 26 | cluster_sizes = defaultdict(int) 27 | 28 | for node in assignments: 29 | for assignment in assignments[node]: 30 | cluster_sizes[assignment] += 1 31 | 32 | return Partition(assignments, dict(cluster_sizes)) 33 | 34 | 35 | def dict_iter_values(dict1: Dict[Any, Any], dict2: Dict[Any, Any]): 36 | for key in dict1: 37 | yield dict1[key], dict2[key] 38 | 39 | 40 | def weighted_jaccard_distance(p1: Partition, p2: Partition) -> float: 41 | intersections = defaultdict(int) 42 | 43 | for assignments1, assignments2 in dict_iter_values(p1.assignments, p2.assignments): 44 | for assignment1, assignment2 in product(assignments1, assignments2): 45 | intersections[assignment1, assignment2] += 1 46 | 47 | intersections = dict(intersections) 48 | 49 | max_similarities_1 = defaultdict(float) 50 | max_similarities_2 = defaultdict(float) 51 | 52 | for (assignment1, assignment2), intersection in intersections.items(): 53 | union = p1.cluster_sizes[assignment1] + p2.cluster_sizes[assignment2] - intersection 54 | if union == 0: 55 | continue 56 | 57 | similarity = intersection / union 58 | 59 | max_similarities_1[assignment1] = max(similarity, max_similarities_1[assignment1]) 60 | max_similarities_2[assignment2] = max(similarity, max_similarities_2[assignment2]) 61 | 62 | s1 = np.inner(*zip(*dict_iter_values(p1.cluster_sizes, max_similarities_1))) / sum(p1.cluster_sizes.values()) 63 | s2 = np.inner(*zip(*dict_iter_values(p2.cluster_sizes, max_similarities_2))) / sum(p2.cluster_sizes.values()) 64 | 65 | return 1 - 0.5 * s1 - 0.5 * s2 66 | 67 | 68 | def wjaccard(filename1: str, filename2: str, cmd: str = "wjaccarddist") -> float: 69 | result = subprocess.run([cmd, filename1, filename2], stdout=subprocess.PIPE, stderr=subprocess.PIPE) 70 | 71 | try: 72 | result.check_returncode() 73 | except subprocess.CalledProcessError: 74 | raise Exception(result.stderr.decode("utf-8")) 75 | 76 | return float(result.stdout) 77 | 78 | 79 | def weighted_jaccard_dist(networks: Sequence[Tree]) -> pd.DataFrame: 80 | dist = np.zeros(shape=(len(networks),) * 2) 81 | 82 | index = {network.pretty_filename: i 83 | for i, network in enumerate(networks)} 84 | 85 | partitions = {index[network.pretty_filename]: make_partition(network.nodes) 86 | for network in networks} 87 | 88 | for network1, network2 in combinations_with_replacement(networks, 2): 89 | j = index[network1.pretty_filename] 90 | i = index[network2.pretty_filename] 91 | 92 | dist[i, j] = 1 - weighted_jaccard_distance(partitions[i], partitions[j]) 93 | 94 | return pd.DataFrame(data=dist, columns=list(index.keys())) 95 | -------------------------------------------------------------------------------- /create-representations/Cargo.lock: -------------------------------------------------------------------------------- 1 | # This file is automatically @generated by Cargo. 2 | # It is not intended for manual editing. 3 | [[package]] 4 | name = "create-representations" 5 | version = "0.1.0" 6 | dependencies = [ 7 | "itertools", 8 | ] 9 | 10 | [[package]] 11 | name = "either" 12 | version = "1.6.1" 13 | source = "registry+https://github.com/rust-lang/crates.io-index" 14 | checksum = "e78d4f1cc4ae33bbfc157ed5d5a5ef3bc29227303d595861deb238fcec4e9457" 15 | 16 | [[package]] 17 | name = "itertools" 18 | version = "0.9.0" 19 | source = "registry+https://github.com/rust-lang/crates.io-index" 20 | checksum = "284f18f85651fe11e8a991b2adb42cb078325c996ed026d994719efcfca1d54b" 21 | dependencies = [ 22 | "either", 23 | ] 24 | -------------------------------------------------------------------------------- /create-representations/Cargo.toml: -------------------------------------------------------------------------------- 1 | [package] 2 | name = "create-representations" 3 | version = "0.1.0" 4 | authors = ["Anton Eriksson "] 5 | edition = "2018" 6 | 7 | # See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html 8 | 9 | [dependencies] 10 | itertools = "0.9.0" -------------------------------------------------------------------------------- /create-representations/README.md: -------------------------------------------------------------------------------- 1 | # Create network representation from hypergraph 2 | 3 | Fast Rust implementation of the Mapping hypergraphs Python code. 4 | 5 | This is useful for creating network representations of large hypergraphs, 6 | for example the paleo marine faunas hypergraph. 7 | 8 | Note that as the each layer in the multiayer representation are fully connected, the resulting 9 | files are quite large (~GB). 10 | 11 | ## Usage 12 | 13 | Build the release build (much faster than debug) 14 | 15 | ```bash 16 | cargo build --release 17 | ``` 18 | 19 | Usage 20 | ``` 21 | cargo run -- representation hypergraph outfile 22 | ``` 23 | Where `representation` can be any of `-[b|B|u|U|m|M|hs|HS]` and the hypergraph 24 | is in the same format as in the main repository. 25 | 26 | | Flag | Network representation | 27 | |--------|-------------------------------| 28 | | `-b` | bipartite | 29 | | `-B` | bipartite non-lazy | 30 | | `-u` | unipartite | 31 | | `-U` | unipartite non-lazy | 32 | | `-m` | multilayer | 33 | | `-M` | multilayer non-lazy | 34 | | `-hs` | hyperedge-similarity | 35 | | `-HS` | hyperedge-similarity non-lazy | 36 | 37 | See [config.rs](src/config.rs) for more information. 38 | 39 | ## Author 40 | Anton Eriksson -------------------------------------------------------------------------------- /create-representations/src/bipartite.rs: -------------------------------------------------------------------------------- 1 | use crate::config::RandomWalk; 2 | use crate::hypergraph::{EdgeId, HyperGraph, Node, NodeId}; 3 | use crate::network::{Link, StateNode}; 4 | use crate::preprocess::PreprocessResult; 5 | use crate::representation::NetworkRepresentation; 6 | use std::collections::HashMap; 7 | use std::fs::File; 8 | use std::io::BufWriter; 9 | use std::io::Write; 10 | 11 | pub struct Bipartite; 12 | 13 | impl NetworkRepresentation for Bipartite { 14 | fn create( 15 | hypergraph: &HyperGraph, 16 | preprocessed: &PreprocessResult, 17 | random_walk: RandomWalk, 18 | outfile: &str, 19 | ) -> std::io::Result<()> { 20 | println!("Generating {} bipartite...", random_walk.to_string()); 21 | 22 | let PreprocessResult { d, gamma, pi, .. } = preprocessed; 23 | 24 | let bipartite_start_id = hypergraph 25 | .nodes 26 | .iter() 27 | .max_by_key(|node| node.id) 28 | .unwrap() 29 | .id 30 | + 1; 31 | 32 | let features: Vec = hypergraph 33 | .edges 34 | .iter() 35 | .enumerate() 36 | .map(|(i, edge)| Node { 37 | id: bipartite_start_id + i, 38 | name: format!("\"Hyperedge {}\"", edge.id), 39 | }) 40 | .collect(); 41 | 42 | let edge_id_to_feature_id: HashMap = hypergraph 43 | .edges 44 | .iter() 45 | .enumerate() 46 | .map(|(i, edge)| (edge.id, bipartite_start_id + i)) 47 | .collect(); 48 | 49 | let mut links = vec![]; 50 | 51 | let mut f = BufWriter::new(File::create(outfile)?); 52 | 53 | writeln!(f, "*Vertices")?; 54 | 55 | for node in hypergraph.nodes.iter().chain(&features) { 56 | writeln!(f, "{}", node.to_string())?; 57 | } 58 | 59 | if random_walk == RandomWalk::Lazy { 60 | for edge in &hypergraph.edges { 61 | for node in &edge.nodes { 62 | let P_ue = edge.omega / d[&node]; 63 | let P_ev = gamma[&(edge.id, *node)]; 64 | 65 | if P_ue * P_ev < 1e-10 { 66 | continue; 67 | } 68 | 69 | let feature_id = edge_id_to_feature_id[&edge.id]; 70 | 71 | links.push(Link { 72 | source: *node, 73 | target: feature_id, 74 | weight: pi[node] * P_ue, 75 | }); 76 | 77 | links.push(Link { 78 | source: feature_id, 79 | target: *node, 80 | weight: P_ev, 81 | }); 82 | } 83 | } 84 | 85 | writeln!(f, "*Bipartite {}", bipartite_start_id)?; 86 | } else { 87 | // NonLazy 88 | let mut states: Vec = hypergraph 89 | .nodes 90 | .iter() 91 | .enumerate() 92 | .map(|(i, node)| StateNode { 93 | state_id: i, 94 | node_id: node.id, 95 | }) 96 | .collect(); 97 | 98 | let node_id_to_state_id: HashMap = states 99 | .iter() 100 | .map(|state| (state.node_id, state.state_id)) 101 | .collect(); 102 | 103 | let mut last_state_id = states 104 | .iter() 105 | .max_by_key(|node| node.state_id) 106 | .unwrap() 107 | .state_id; 108 | 109 | let bipartite_state_start_id = last_state_id + 1; 110 | 111 | let mut links = vec![]; 112 | 113 | for edge in &hypergraph.edges { 114 | let feature_id = edge_id_to_feature_id[&edge.id]; 115 | 116 | let states_in_edge: Vec<_> = edge 117 | .nodes 118 | .iter() 119 | .map(|node| node_id_to_state_id[node]) 120 | .collect(); 121 | 122 | let feature_states: Vec = states_in_edge 123 | .iter() 124 | .enumerate() 125 | .map(|(i, _)| StateNode { 126 | state_id: last_state_id + i + 1, 127 | node_id: feature_id, 128 | }) 129 | .collect(); 130 | 131 | last_state_id += feature_states.len(); 132 | 133 | states.extend(&feature_states); 134 | 135 | for (i, node) in edge.nodes.iter().enumerate() { 136 | let P_ue = edge.omega / d[&node]; 137 | let P_ev = gamma[&(edge.id, *node)]; 138 | 139 | if P_ue * P_ev < 1e-10 { 140 | continue; 141 | } 142 | 143 | let state_id = node_id_to_state_id[node]; 144 | let target_feature_state_id = &feature_states[i].state_id; 145 | 146 | links.push(Link { 147 | source: state_id, 148 | target: *target_feature_state_id, 149 | weight: pi[node] * P_ue, 150 | }); 151 | 152 | for source_feature_state in &feature_states { 153 | if source_feature_state.state_id != *target_feature_state_id { 154 | links.push(Link { 155 | source: source_feature_state.state_id, 156 | target: state_id, 157 | weight: P_ev, 158 | }); 159 | } 160 | } 161 | } 162 | } 163 | 164 | writeln!(f, "*States")?; 165 | 166 | for state in &states { 167 | writeln!(f, "{}", state.to_string())?; 168 | } 169 | 170 | writeln!(f, "*Bipartite {}", bipartite_state_start_id)?; 171 | 172 | for link in &links { 173 | writeln!(f, "{}", link.to_string())?; 174 | } 175 | } 176 | 177 | for link in &links { 178 | writeln!(f, "{}", link.to_string())?; 179 | } 180 | 181 | Ok(()) 182 | } 183 | } 184 | -------------------------------------------------------------------------------- /create-representations/src/config.rs: -------------------------------------------------------------------------------- 1 | use std::fs; 2 | use std::str::FromStr; 3 | 4 | #[derive(Copy, Clone, PartialEq)] 5 | pub enum RandomWalk { 6 | Lazy, 7 | NonLazy, 8 | } 9 | 10 | impl ToString for RandomWalk { 11 | fn to_string(&self) -> String { 12 | String::from(match self { 13 | Self::Lazy => "lazy", 14 | Self::NonLazy => "non-lazy", 15 | }) 16 | } 17 | } 18 | 19 | #[derive(Copy, Clone)] 20 | pub enum Representation { 21 | Bipartite(RandomWalk), 22 | Unipartite(RandomWalk), 23 | Multilayer(RandomWalk), 24 | HyperEdgeSimilarity(RandomWalk), 25 | } 26 | 27 | impl FromStr for Representation { 28 | type Err = &'static str; 29 | 30 | fn from_str(s: &str) -> Result { 31 | if !s.starts_with('-') { 32 | return Err("Invalid argument"); 33 | } 34 | 35 | use RandomWalk::*; 36 | use Representation::*; 37 | 38 | match s { 39 | "-b" => Ok(Bipartite(Lazy)), 40 | "-B" => Ok(Bipartite(NonLazy)), 41 | "-u" => Ok(Unipartite(Lazy)), 42 | "-U" => Ok(Unipartite(NonLazy)), 43 | "-m" => Ok(Multilayer(Lazy)), 44 | "-M" => Ok(Multilayer(NonLazy)), 45 | "-hs" => Ok(HyperEdgeSimilarity(Lazy)), 46 | "-HS" => Ok(HyperEdgeSimilarity(NonLazy)), 47 | _ => Err("No such representation"), 48 | } 49 | } 50 | } 51 | 52 | pub struct Config { 53 | pub file: String, 54 | pub representation: Representation, 55 | pub outfile: String, 56 | } 57 | 58 | impl Config { 59 | pub fn new(mut args: std::env::Args) -> Result { 60 | args.next(); 61 | 62 | let representation = match args.next() { 63 | Some(arg) => arg.parse()?, 64 | None => return Err("Missing representation"), 65 | }; 66 | 67 | let file = match args.next() { 68 | Some(arg) => fs::read_to_string(arg).expect("Cannot open file"), 69 | None => return Err("Missing filename"), 70 | }; 71 | 72 | let outfile = match args.next() { 73 | Some(arg) => arg, 74 | None => return Err("Missing outfile"), 75 | }; 76 | 77 | Ok(Self { 78 | file, 79 | representation, 80 | outfile, 81 | }) 82 | } 83 | } 84 | -------------------------------------------------------------------------------- /create-representations/src/hyperedge_similarity.rs: -------------------------------------------------------------------------------- 1 | use crate::config::RandomWalk; 2 | use crate::hypergraph::{EdgeId, HyperEdge, HyperGraph}; 3 | use crate::js_similarity::js_similarity; 4 | use crate::network::{LayerId, MultilayerLink}; 5 | use crate::preprocess::PreprocessResult; 6 | use crate::representation::NetworkRepresentation; 7 | use itertools::*; 8 | use std::collections::HashMap; 9 | use std::fs::File; 10 | use std::io::BufWriter; 11 | use std::io::Write; 12 | 13 | pub struct HyperEdgeSimilarity; 14 | 15 | impl NetworkRepresentation for HyperEdgeSimilarity { 16 | fn create( 17 | hypergraph: &HyperGraph, 18 | preprocessed: &PreprocessResult, 19 | random_walk: RandomWalk, 20 | outfile: &str, 21 | ) -> std::io::Result<()> { 22 | println!( 23 | "Generating {} hyperedge-similarity...", 24 | random_walk.to_string() 25 | ); 26 | 27 | let PreprocessResult { 28 | E, 29 | gamma, 30 | delta, 31 | pi_alpha, 32 | .. 33 | } = preprocessed; 34 | 35 | let edge_by_id: HashMap = hypergraph 36 | .edges 37 | .iter() 38 | .map(|edge| (edge.id, edge)) 39 | .collect(); 40 | 41 | let mut f = BufWriter::new(File::create(outfile)?); 42 | 43 | let D: HashMap<(LayerId, LayerId), f64> = iproduct!(&hypergraph.edges, &hypergraph.edges) 44 | .map(|(alpha, beta)| { 45 | ( 46 | (alpha.id, beta.id), 47 | js_similarity(&alpha, &beta, &gamma) * beta.omega, 48 | ) 49 | }) 50 | .collect(); 51 | 52 | writeln!(f, "*Vertices")?; 53 | 54 | for node in &hypergraph.nodes { 55 | writeln!(f, "{}", node.to_string())?; 56 | } 57 | 58 | writeln!(f, "*Multilayer")?; 59 | 60 | let mut links = vec![]; 61 | 62 | let is_lazy = random_walk == RandomWalk::Lazy; 63 | 64 | for alpha in &hypergraph.edges { 65 | for u in &alpha.nodes { 66 | let pi_alpha_u = pi_alpha[&(alpha.id, *u)]; 67 | 68 | let E_u: Vec<&HyperEdge> = E[u].iter().map(|e| edge_by_id[e]).collect(); 69 | 70 | let S_alpha: f64 = E_u.iter().map(|beta| D[&(alpha.id, beta.id)]).sum(); 71 | 72 | for beta in E_u { 73 | let D_alpha_beta = D[&(alpha.id, beta.id)]; 74 | 75 | for v in &beta.nodes { 76 | if !is_lazy && u == v { 77 | continue; 78 | } 79 | 80 | let delta_e = if is_lazy { 81 | delta[&beta.id] 82 | } else { 83 | delta[&beta.id] - gamma[&(beta.id, *u)] 84 | }; 85 | 86 | let P_uv = D_alpha_beta / S_alpha * gamma[&(beta.id, *v)] / delta_e; 87 | 88 | if P_uv < 1e-10 { 89 | continue; 90 | } 91 | 92 | links.push(MultilayerLink { 93 | layer1: alpha.id, 94 | source: *u, 95 | layer2: beta.id, 96 | target: *v, 97 | weight: pi_alpha_u * P_uv, 98 | }); 99 | } 100 | } 101 | } 102 | 103 | for link in &links { 104 | writeln!(f, "{}", link.to_string())?; 105 | } 106 | 107 | links.clear(); 108 | } 109 | 110 | Ok(()) 111 | } 112 | } 113 | -------------------------------------------------------------------------------- /create-representations/src/hypergraph.rs: -------------------------------------------------------------------------------- 1 | use std::error::Error; 2 | use std::str::FromStr; 3 | use std::string::ToString; 4 | 5 | pub type NodeId = usize; 6 | pub type EdgeId = usize; 7 | 8 | #[derive(Clone)] 9 | pub struct Node { 10 | pub id: NodeId, 11 | pub name: String, 12 | } 13 | 14 | impl ToString for Node { 15 | fn to_string(&self) -> String { 16 | format!("{} {}", self.id, self.name) 17 | } 18 | } 19 | 20 | impl FromStr for Node { 21 | type Err = Box; 22 | 23 | fn from_str(s: &str) -> Result { 24 | let split: Vec<&str> = s.splitn(2, ' ').collect(); 25 | 26 | let id: NodeId = split[0].parse()?; 27 | let name: String = String::from(split[1]); 28 | 29 | Ok(Self { id, name }) 30 | } 31 | } 32 | 33 | #[derive(Clone)] 34 | pub struct HyperEdge { 35 | pub id: EdgeId, 36 | pub nodes: Vec, 37 | pub omega: f64, 38 | } 39 | 40 | impl FromStr for HyperEdge { 41 | type Err = Box; 42 | 43 | fn from_str(s: &str) -> Result { 44 | let split: Vec<&str> = s.split_whitespace().collect(); 45 | 46 | let id: EdgeId = split.first().unwrap().parse()?; 47 | let omega: f64 = split.last().unwrap().parse()?; 48 | 49 | let nodes: Vec = split[1..split.len() - 1] 50 | .iter() 51 | .map(|node| node.parse().unwrap()) 52 | .collect(); 53 | 54 | Ok(Self { id, nodes, omega }) 55 | } 56 | } 57 | 58 | #[derive(Copy, Clone)] 59 | pub struct Gamma { 60 | pub edge: EdgeId, 61 | pub node: NodeId, 62 | pub gamma: f64, 63 | } 64 | 65 | impl FromStr for Gamma { 66 | type Err = Box; 67 | 68 | fn from_str(s: &str) -> Result { 69 | let split: Vec<&str> = s.split_whitespace().collect(); 70 | 71 | Ok(Self { 72 | edge: split[0].parse()?, 73 | node: split[1].parse()?, 74 | gamma: split[2].parse()?, 75 | }) 76 | } 77 | } 78 | 79 | enum Context { 80 | Vertices, 81 | HyperEdges, 82 | Weights, 83 | } 84 | 85 | impl FromStr for Context { 86 | type Err = (); 87 | 88 | fn from_str(s: &str) -> Result { 89 | let lower = s.to_lowercase(); 90 | 91 | use Context::*; 92 | 93 | if lower.starts_with("*vertices") { 94 | Ok(Vertices) 95 | } else if lower.starts_with("*hyperedges") { 96 | Ok(HyperEdges) 97 | } else if lower.starts_with("*weights") { 98 | Ok(Weights) 99 | } else { 100 | Err(()) 101 | } 102 | } 103 | } 104 | 105 | #[derive(Clone)] 106 | pub struct HyperGraph { 107 | pub nodes: Vec, 108 | pub edges: Vec, 109 | pub weights: Vec, 110 | } 111 | 112 | impl HyperGraph { 113 | pub fn new(file: &str) -> Self { 114 | use Context::*; 115 | 116 | let mut nodes: Vec = vec![]; 117 | let mut edges: Vec = vec![]; 118 | let mut weights: Vec = vec![]; 119 | 120 | let mut context = None; 121 | 122 | for line in file.lines() { 123 | if line.starts_with('#') { 124 | continue; 125 | } 126 | 127 | if line.starts_with('*') { 128 | context = line.parse().ok(); 129 | continue; 130 | } 131 | 132 | match context { 133 | Some(Vertices) => nodes.push(line.parse().unwrap()), 134 | Some(HyperEdges) => edges.push(line.parse().unwrap()), 135 | Some(Weights) => weights.push(line.parse().unwrap()), 136 | None => (), 137 | } 138 | } 139 | 140 | Self { 141 | nodes, 142 | edges, 143 | weights, 144 | } 145 | } 146 | } 147 | -------------------------------------------------------------------------------- /create-representations/src/js_similarity.rs: -------------------------------------------------------------------------------- 1 | use crate::hypergraph::{HyperEdge, NodeId}; 2 | use crate::preprocess::NodeWeights; 3 | use itertools::*; 4 | use std::collections::HashMap; 5 | 6 | #[inline] 7 | fn kl_divergence(p: &[f64], q: &[f64]) -> f64 { 8 | debug_assert_eq!(p.len(), q.len()); 9 | 10 | -p.iter() 11 | .zip(q.iter()) 12 | .map(|(p_i, q_i)| { 13 | if *p_i > 0.0 { 14 | p_i * f64::log2(q_i / p_i) 15 | } else { 16 | 0.0 17 | } 18 | }) 19 | .sum::() 20 | } 21 | 22 | #[inline] 23 | fn js_divergence(p: &[f64], q: &[f64]) -> f64 { 24 | debug_assert_eq!(p.len(), q.len()); 25 | 26 | let mix: Vec = p 27 | .iter() 28 | .zip(q.iter()) 29 | .map(|(p_i, q_i)| 0.5 * (p_i + q_i)) 30 | .collect(); 31 | 32 | let jsd = 0.5 * kl_divergence(&p, &mix) + 0.5 * kl_divergence(&q, &mix); 33 | 34 | debug_assert!(jsd >= 0.0, "jsd = {}", jsd); 35 | debug_assert!(jsd <= 1.0 + f64::EPSILON, "jsd = {}", jsd); 36 | 37 | jsd 38 | } 39 | 40 | #[inline] 41 | fn normalize(x: &mut [f64]) { 42 | let sum: f64 = x.iter().sum(); 43 | 44 | debug_assert!(sum > 0.0); 45 | 46 | x.iter_mut().for_each(|x_i| *x_i /= sum); 47 | } 48 | 49 | #[inline] 50 | pub fn js_similarity(alpha: &HyperEdge, beta: &HyperEdge, gamma: &NodeWeights) -> f64 { 51 | let node_index: HashMap = alpha 52 | .nodes 53 | .iter() 54 | .chain(beta.nodes.iter()) 55 | .unique() 56 | .enumerate() 57 | .map(|(i, node_id)| (*node_id, i)) 58 | .collect(); 59 | 60 | let num_nodes = node_index.len(); 61 | 62 | debug_assert_ne!(num_nodes, 0); 63 | 64 | let mut X = vec![vec![0.0; num_nodes]; 2]; 65 | 66 | [&alpha, &beta].iter().enumerate().for_each(|(i, edge)| { 67 | edge.nodes 68 | .iter() 69 | .map(|node| (&node_index[node], node)) 70 | .for_each(|(j, node)| X[i][*j] = gamma[&(edge.id, *node)]); 71 | }); 72 | 73 | normalize(&mut X[0]); 74 | normalize(&mut X[1]); 75 | 76 | debug_assert!((X[0].iter().sum::() - 1.0).abs() < f64::EPSILON); 77 | debug_assert!((X[1].iter().sum::() - 1.0).abs() < f64::EPSILON); 78 | 79 | 1.0 - js_divergence(&X[0], &X[1]) 80 | } 81 | 82 | #[cfg(test)] 83 | mod tests { 84 | use super::*; 85 | 86 | #[test] 87 | fn test_js_similarity() { 88 | let mut gamma: NodeWeights = HashMap::new(); 89 | gamma.insert((0, 1), 1.0); 90 | gamma.insert((0, 2), 1.0); 91 | gamma.insert((0, 3), 1.0); 92 | 93 | let alpha = HyperEdge { 94 | id: 0, 95 | nodes: vec![1, 2], 96 | omega: 0.0, 97 | }; 98 | let beta = HyperEdge { 99 | id: 0, 100 | nodes: vec![1, 2], 101 | omega: 0.0, 102 | }; 103 | 104 | assert!((js_similarity(&alpha, &beta, &gamma) - 1.0).abs() < f64::EPSILON); 105 | } 106 | 107 | #[test] 108 | fn test_js_divergence() { 109 | let p = [0.5, 0.5]; 110 | let q = [0.5, 0.5]; 111 | 112 | assert!(js_divergence(&p, &q).abs() < f64::EPSILON); 113 | } 114 | } 115 | -------------------------------------------------------------------------------- /create-representations/src/main.rs: -------------------------------------------------------------------------------- 1 | #![allow(non_snake_case)] 2 | 3 | use crate::config::Config; 4 | use crate::hypergraph::HyperGraph; 5 | use crate::preprocess::Preprocess; 6 | use std::error::Error; 7 | use std::{env, process}; 8 | 9 | mod bipartite; 10 | mod config; 11 | mod hyperedge_similarity; 12 | mod hypergraph; 13 | mod js_similarity; 14 | mod multilayer; 15 | mod network; 16 | mod preprocess; 17 | mod representation; 18 | mod unipartite; 19 | 20 | fn run(config: Config) -> Result<(), Box> { 21 | let Config { 22 | file, 23 | representation, 24 | outfile, 25 | } = config; 26 | 27 | let hypergraph = HyperGraph::new(&file); 28 | 29 | representation.create(&hypergraph, &Preprocess::run(&hypergraph), &outfile)?; 30 | 31 | println!("Done!"); 32 | 33 | Ok(()) 34 | } 35 | 36 | fn main() { 37 | let config = Config::new(env::args()).unwrap_or_else(|err| { 38 | eprintln!("Error: {}", err); 39 | process::exit(1); 40 | }); 41 | 42 | if let Err(err) = run(config) { 43 | eprintln!("Error: {}", err); 44 | process::exit(1); 45 | } 46 | } 47 | -------------------------------------------------------------------------------- /create-representations/src/multilayer.rs: -------------------------------------------------------------------------------- 1 | use crate::config::RandomWalk; 2 | use crate::hypergraph::{EdgeId, HyperGraph}; 3 | use crate::network::MultilayerLink; 4 | use crate::preprocess::PreprocessResult; 5 | use crate::representation::NetworkRepresentation; 6 | use std::collections::HashMap; 7 | use std::fs::File; 8 | use std::io::BufWriter; 9 | use std::io::Write; 10 | 11 | pub struct Multilayer; 12 | 13 | impl NetworkRepresentation for Multilayer { 14 | fn create( 15 | hypergraph: &HyperGraph, 16 | preprocessed: &PreprocessResult, 17 | random_walk: RandomWalk, 18 | outfile: &str, 19 | ) -> std::io::Result<()> { 20 | println!("Generating {} multilayer...", random_walk.to_string()); 21 | 22 | let PreprocessResult { 23 | E, 24 | d, 25 | gamma, 26 | delta, 27 | pi_alpha, 28 | .. 29 | } = preprocessed; 30 | 31 | let edge_by_id: HashMap = hypergraph 32 | .edges 33 | .iter() 34 | .map(|edge| (edge.id, edge)) 35 | .collect(); 36 | 37 | let mut f = BufWriter::new(File::create(outfile)?); 38 | 39 | writeln!(f, "*Vertices")?; 40 | 41 | for node in &hypergraph.nodes { 42 | writeln!(f, "{}", node.to_string())?; 43 | } 44 | 45 | writeln!(f, "*Multilayer")?; 46 | 47 | let mut links = vec![]; 48 | 49 | let is_lazy = random_walk == RandomWalk::Lazy; 50 | 51 | for alpha in &hypergraph.edges { 52 | for u in &alpha.nodes { 53 | let d_u = d[u]; 54 | let pi_alpha_u = pi_alpha[&(alpha.id, *u)]; 55 | 56 | for beta in E[u].iter().map(|e| edge_by_id[e]) { 57 | for v in &beta.nodes { 58 | if !is_lazy && u == v { 59 | continue; 60 | } 61 | 62 | let delta_e = if is_lazy { 63 | delta[&beta.id] 64 | } else { 65 | delta[&beta.id] - gamma[&(beta.id, *u)] 66 | }; 67 | 68 | let P_uv = beta.omega / d_u * gamma[&(beta.id, *v)] / delta_e; 69 | 70 | if P_uv < 1e-10 { 71 | continue; 72 | } 73 | 74 | links.push(MultilayerLink { 75 | layer1: alpha.id, 76 | source: *u, 77 | layer2: beta.id, 78 | target: *v, 79 | weight: pi_alpha_u * P_uv, 80 | }); 81 | } 82 | } 83 | } 84 | 85 | for link in &links { 86 | writeln!(f, "{}", link.to_string())?; 87 | } 88 | 89 | links.clear(); 90 | } 91 | 92 | Ok(()) 93 | } 94 | } 95 | -------------------------------------------------------------------------------- /create-representations/src/network.rs: -------------------------------------------------------------------------------- 1 | use std::string::ToString; 2 | 3 | pub type NodeId = usize; 4 | pub type LayerId = usize; 5 | 6 | pub struct Link { 7 | pub source: NodeId, 8 | pub target: NodeId, 9 | pub weight: f64, 10 | } 11 | 12 | impl ToString for Link { 13 | fn to_string(&self) -> String { 14 | format!("{} {} {}", self.source, self.target, self.weight) 15 | } 16 | } 17 | 18 | #[derive(Copy, Clone)] 19 | pub struct StateNode { 20 | pub state_id: NodeId, 21 | pub node_id: NodeId, 22 | } 23 | 24 | impl ToString for StateNode { 25 | fn to_string(&self) -> String { 26 | format!("{} {}", self.state_id, self.node_id) 27 | } 28 | } 29 | 30 | pub struct MultilayerLink { 31 | pub layer1: LayerId, 32 | pub source: NodeId, 33 | pub layer2: LayerId, 34 | pub target: NodeId, 35 | pub weight: f64, 36 | } 37 | 38 | impl ToString for MultilayerLink { 39 | fn to_string(&self) -> String { 40 | format!( 41 | "{} {} {} {} {}", 42 | self.layer1, self.source, self.layer2, self.target, self.weight 43 | ) 44 | } 45 | } 46 | -------------------------------------------------------------------------------- /create-representations/src/preprocess.rs: -------------------------------------------------------------------------------- 1 | use crate::hypergraph::{EdgeId, HyperGraph, NodeId}; 2 | use std::collections::{HashMap, HashSet}; 3 | 4 | pub type EdgeSet = HashMap>; 5 | pub type NodeStrengths = HashMap; 6 | pub type NodeWeights = HashMap<(EdgeId, NodeId), f64>; 7 | pub type HyperEdgeStrengths = HashMap; 8 | pub type VisitRates = HashMap; 9 | pub type StateVisitRates = HashMap<(EdgeId, NodeId), f64>; 10 | 11 | #[derive(Clone)] 12 | pub struct PreprocessResult { 13 | pub E: EdgeSet, 14 | pub d: NodeStrengths, 15 | pub gamma: NodeWeights, 16 | pub delta: HyperEdgeStrengths, 17 | pub pi: VisitRates, 18 | pub pi_alpha: StateVisitRates, 19 | } 20 | 21 | pub struct Preprocess; 22 | 23 | impl Preprocess { 24 | pub fn run(hypergraph: &HyperGraph) -> PreprocessResult { 25 | println!("Preprocessing..."); 26 | let mut E: EdgeSet = HashMap::new(); 27 | let mut d: NodeStrengths = HashMap::new(); 28 | 29 | for edge in &hypergraph.edges { 30 | for node in &edge.nodes { 31 | E.entry(*node).or_insert_with(HashSet::new).insert(edge.id); 32 | 33 | *d.entry(*node).or_insert(0.0) += edge.omega; 34 | } 35 | } 36 | 37 | // insert disconnected nodes 38 | for node in &hypergraph.nodes { 39 | E.entry(node.id).or_insert_with(HashSet::new); 40 | 41 | d.entry(node.id).or_insert(0.0); 42 | } 43 | 44 | let mut delta: HyperEdgeStrengths = HashMap::new(); 45 | let mut gamma: NodeWeights = HashMap::new(); 46 | 47 | for weight in &hypergraph.weights { 48 | *delta.entry(weight.edge).or_insert(0.0) += weight.gamma; 49 | 50 | gamma.insert((weight.edge, weight.node), weight.gamma); 51 | } 52 | 53 | // insert missing gamma's 54 | const DEFAULT_GAMMA: f64 = 1.0; 55 | 56 | for edge in &hypergraph.edges { 57 | for node in &edge.nodes { 58 | if !gamma.contains_key(&(edge.id, *node)) { 59 | *delta.entry(edge.id).or_insert(0.0) += DEFAULT_GAMMA; 60 | } 61 | 62 | gamma.entry((edge.id, *node)).or_insert(DEFAULT_GAMMA); 63 | } 64 | } 65 | 66 | let mut pi: VisitRates = HashMap::new(); 67 | 68 | let omega: HashMap = hypergraph 69 | .edges 70 | .iter() 71 | .map(|edge| (edge.id, edge.omega)) 72 | .collect(); 73 | 74 | for node in &hypergraph.nodes { 75 | let pi_u: f64 = E[&node.id] 76 | .iter() 77 | .map(|edge_id| omega[&edge_id] * gamma[&(*edge_id, node.id)]) 78 | .sum(); 79 | 80 | pi.insert(node.id, pi_u); 81 | } 82 | 83 | let mut pi_alpha: StateVisitRates = HashMap::new(); 84 | 85 | for edge in &hypergraph.edges { 86 | let omega_e = omega[&edge.id]; 87 | 88 | for node in &edge.nodes { 89 | pi_alpha.insert((edge.id, *node), omega_e * gamma[&(edge.id, *node)]); 90 | } 91 | } 92 | 93 | PreprocessResult { 94 | E, 95 | d, 96 | gamma, 97 | delta, 98 | pi, 99 | pi_alpha, 100 | } 101 | } 102 | } 103 | -------------------------------------------------------------------------------- /create-representations/src/representation.rs: -------------------------------------------------------------------------------- 1 | use crate::bipartite::Bipartite; 2 | use crate::config::{RandomWalk, Representation}; 3 | use crate::hyperedge_similarity::HyperEdgeSimilarity; 4 | use crate::hypergraph::HyperGraph; 5 | use crate::multilayer::Multilayer; 6 | use crate::preprocess::PreprocessResult; 7 | use crate::unipartite::Unipartite; 8 | 9 | pub trait NetworkRepresentation { 10 | fn create( 11 | hypergraph: &HyperGraph, 12 | preprocessed: &PreprocessResult, 13 | random_walk: RandomWalk, 14 | outfile: &str, 15 | ) -> std::io::Result<()> 16 | where 17 | Self: Sized; 18 | } 19 | 20 | impl Representation { 21 | pub fn create( 22 | self, 23 | hypergraph: &HyperGraph, 24 | preprocessed: &PreprocessResult, 25 | outfile: &str, 26 | ) -> std::io::Result<()> { 27 | match self { 28 | Representation::Bipartite(random_walk) => { 29 | Bipartite::create(hypergraph, preprocessed, random_walk, outfile)? 30 | } 31 | Representation::Unipartite(random_walk) => { 32 | Unipartite::create(hypergraph, preprocessed, random_walk, outfile)? 33 | } 34 | Representation::Multilayer(random_walk) => { 35 | Multilayer::create(hypergraph, preprocessed, random_walk, outfile)? 36 | } 37 | Representation::HyperEdgeSimilarity(random_walk) => { 38 | HyperEdgeSimilarity::create(hypergraph, preprocessed, random_walk, outfile)? 39 | } 40 | } 41 | 42 | Ok(()) 43 | } 44 | } 45 | -------------------------------------------------------------------------------- /create-representations/src/unipartite.rs: -------------------------------------------------------------------------------- 1 | use crate::config::RandomWalk; 2 | use crate::hypergraph::{HyperGraph, NodeId}; 3 | use crate::preprocess::PreprocessResult; 4 | use crate::representation::NetworkRepresentation; 5 | use itertools::*; 6 | use std::collections::HashMap; 7 | use std::fs::File; 8 | use std::io::BufWriter; 9 | use std::io::Write; 10 | 11 | pub struct Unipartite; 12 | 13 | impl NetworkRepresentation for Unipartite { 14 | fn create( 15 | hypergraph: &HyperGraph, 16 | preprocessed: &PreprocessResult, 17 | random_walk: RandomWalk, 18 | outfile: &str, 19 | ) -> std::io::Result<()> { 20 | println!("Generating {} unipartite...", random_walk.to_string()); 21 | 22 | let PreprocessResult { 23 | d, 24 | gamma, 25 | delta, 26 | pi, 27 | .. 28 | } = preprocessed; 29 | 30 | let mut links: HashMap<(NodeId, NodeId), _> = HashMap::new(); 31 | 32 | let is_lazy = random_walk == RandomWalk::Lazy; 33 | 34 | for edge in &hypergraph.edges { 35 | for (u, v) in iproduct!(&edge.nodes, &edge.nodes) { 36 | if !is_lazy && u == v { 37 | continue; 38 | } 39 | 40 | let delta_e = if is_lazy { 41 | delta[&edge.id] 42 | } else { 43 | delta[&edge.id] - gamma[&(edge.id, *u)] 44 | }; 45 | 46 | let P_uv = edge.omega / d[u] * gamma[&(edge.id, *v)] / delta_e; 47 | 48 | if P_uv < 1e-10 { 49 | continue; 50 | } 51 | 52 | *links.entry((*u, *v)).or_insert(0.0) += pi[u] * P_uv; 53 | } 54 | } 55 | 56 | let mut f = BufWriter::new(File::create(outfile)?); 57 | 58 | writeln!(f, "*Vertices")?; 59 | 60 | for node in &hypergraph.nodes { 61 | writeln!(f, "{}", node.to_string())?; 62 | } 63 | 64 | writeln!(f, "*Links")?; 65 | 66 | for ((source, target), weight) in links { 67 | writeln!(f, "{} {} {}", source, target, weight)?; 68 | } 69 | 70 | Ok(()) 71 | } 72 | } 73 | -------------------------------------------------------------------------------- /data/example-paper.txt: -------------------------------------------------------------------------------- 1 | *Vertices 2 | # id name 3 | 1 "a" 4 | 2 "b" 5 | 3 "c" 6 | 4 "d" 7 | 5 "e" 8 | 6 "f" 9 | 7 "i" 10 | 8 "h" 11 | 9 "g" 12 | 10 "j" 13 | *Hyperedges 14 | # id nodes... omega 15 | # top 16 | 1 1 2 3 3 17 | # left 18 | 2 4 5 6 1 19 | # middle 20 | 3 3 6 9 2 21 | # right 22 | 4 7 8 9 3 23 | # right overlapping 24 | 5 7 8 9 10 3 25 | *Weights 26 | 1 1 2 27 | 1 3 2 28 | 2 4 2 29 | 2 6 2 30 | 3 3 2 31 | 3 6 2 32 | 4 7 2 33 | 4 9 2 34 | 5 7 2 35 | 5 10 2 36 | -------------------------------------------------------------------------------- /data/example.txt: -------------------------------------------------------------------------------- 1 | *Vertices 2 | # id [name] 3 | 1 "a" 4 | 2 "b" 5 | 3 "c" 6 | 4 "d" 7 | 5 "f" 8 | *Hyperedges 9 | # id nodes... omega 10 | 1 1 2 3 1 11 | 2 3 4 5 1 12 | *Weights 13 | # edge node gamma 14 | 1 1 1 15 | 1 2 1 16 | 1 3 2 17 | 2 3 2 18 | 2 4 1 19 | 2 5 1 20 | -------------------------------------------------------------------------------- /data/figure-1.txt: -------------------------------------------------------------------------------- 1 | *Vertices 2 | # id [name] 3 | 1 "a" 4 | 2 "b" 5 | 3 "c" 6 | 4 "d" 7 | 5 "f" 8 | *Hyperedges 9 | # id nodes... omega 10 | 1 1 2 3 4 20 11 | 2 2 4 5 20 12 | *Weights 13 | # edge node gamma 14 | 1 1 1 15 | 1 2 2 16 | 1 3 1 17 | 1 4 1 18 | 2 2 2 19 | 2 4 1 20 | 2 5 1 21 | -------------------------------------------------------------------------------- /data/hyperedge-names.csv: -------------------------------------------------------------------------------- 1 | 1,Aalenian 2 | 2,Aeronian 3 | 3,Albian 4 | 4,Anisian 5 | 5,Aptian 6 | 6,Artinskian 7 | 7,Asselian 8 | 8,Bajocian 9 | 9,Barremian 10 | 10,Bashkirian 11 | 11,Bathonian 12 | 12,Berriasian 13 | 13,Callovian 14 | 14,Campanian 15 | 15,Capitanian 16 | 16,Carnian 17 | 17,Cenomanian 18 | 18,Changhsingian 19 | 19,Coniacian 20 | 20,Dapingian 21 | 21,Darriwilian 22 | 22,Drumian 23 | 23,Eifelian 24 | 24,Emsian 25 | 25,Famennian 26 | 26,Floian 27 | 27,Fortunian 28 | 28,Frasnian 29 | 29,Givetian 30 | 30,Gorstian 31 | 31,Guzhangian 32 | 32,Gzhelian 33 | 33,Hauterivian 34 | 34,Hettangian 35 | 35,Hirnantian 36 | 36,Homerian 37 | 37,Induan 38 | 38,Jiangshanian 39 | 39,Kasimovian 40 | 40,Katian 41 | 41,Kimmeridgian 42 | 42,Kungurian 43 | 43,Ladinian 44 | 44,Lochkovian 45 | 45,Ludfordian 46 | 46,Maastrichtian 47 | 47,Moscovian 48 | 48,Norian 49 | 49,Olenekian 50 | 50,Oxfordian 51 | 51,Paibian 52 | 52,Pliensbachian 53 | 53,Pragian 54 | 54,Pridoli 55 | 55,Rhaetian 56 | 56,Rhuddanian 57 | 57,Roadian 58 | 58,Sakmarian 59 | 59,Sandbian 60 | 60,Santonian 61 | 61,Serpukhovian 62 | 62,Sheinwoodian 63 | 63,Sinemurian 64 | 64,Stage 2 65 | 65,Stage 3 66 | 66,Stage 4 67 | 67,Stage 5 68 | 68,Telychian 69 | 69,Tithonian 70 | 70,Toarcian 71 | 71,Tournaisian 72 | 72,Tremadocian 73 | 73,Turonian 74 | 74,Valanginian 75 | 75,Visean 76 | 76,Wordian 77 | 77,Wuchiapingian 78 | -------------------------------------------------------------------------------- /data/minimal.txt: -------------------------------------------------------------------------------- 1 | *Hyperedges 2 | # edge nodes... omega 3 | 1 1 2 1 4 | 2 1 2 3 1 5 | *Weights 6 | 2 2 2 7 | -------------------------------------------------------------------------------- /hypergraph/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mapequation/mapping-hypergraphs/f81425c6bbd45b0537f8a0da2ddc24ec79730593/hypergraph/__init__.py -------------------------------------------------------------------------------- /hypergraph/__main__.py: -------------------------------------------------------------------------------- 1 | from hypergraph.main import main 2 | 3 | if __name__ == "__main__": 4 | main() 5 | -------------------------------------------------------------------------------- /hypergraph/components.py: -------------------------------------------------------------------------------- 1 | from collections import defaultdict 2 | from itertools import combinations 3 | 4 | import numpy as np 5 | from scipy.sparse.csgraph import connected_components 6 | 7 | from hypergraph.network import HyperGraph 8 | 9 | 10 | def unipartite_projection(hypergraph: HyperGraph): 11 | adj = np.zeros(shape=(len(hypergraph.nodes),) * 2, dtype=int) 12 | 13 | id_to_index_map = defaultdict(lambda: len(id_to_index_map)) 14 | 15 | for edge in hypergraph.edges: 16 | for source, target in combinations(edge.nodes, 2): 17 | source_id = id_to_index_map[source.id] 18 | target_id = id_to_index_map[target.id] 19 | adj[source_id, target_id] = 1 20 | adj[target_id, source_id] = 1 21 | 22 | index_to_id_map = {index: id_ for id_, index in id_to_index_map.items()} 23 | 24 | return adj, index_to_id_map 25 | 26 | 27 | def largest_connected_component(hypergraph: HyperGraph) -> HyperGraph: 28 | adj, index_to_id_map = unipartite_projection(hypergraph) 29 | n_components, labels = connected_components(adj, directed=False) 30 | 31 | if n_components == 1: 32 | return hypergraph 33 | 34 | label_counts = defaultdict(int) 35 | 36 | for label in labels: 37 | label_counts[label] += 1 38 | 39 | largest_label = max(label_counts, key=label_counts.get) 40 | 41 | nodes_by_id = {node.id: node for node in hypergraph.nodes} 42 | 43 | nodes = sorted(nodes_by_id[index_to_id_map[index]] 44 | for index, label in enumerate(labels) 45 | if label == largest_label) 46 | 47 | node_ids = {node.id for node in nodes} 48 | 49 | edges = sorted(edge for edge in hypergraph.edges 50 | if any(node.id in node_ids for node in edge.nodes)) 51 | 52 | edge_ids = {edge.id for edge in edges} 53 | 54 | weights = sorted(weight for weight in hypergraph.weights 55 | if weight.edge in edge_ids and weight.node.id in node_ids) 56 | 57 | return HyperGraph(nodes, edges, weights) 58 | -------------------------------------------------------------------------------- /hypergraph/main.py: -------------------------------------------------------------------------------- 1 | from os import path 2 | from typing import Optional 3 | 4 | from infomap import Infomap 5 | 6 | from hypergraph import representation 7 | from hypergraph.components import largest_connected_component 8 | from hypergraph.network import HyperGraph, Network, remove_simple_hyperedges, Tree, StateNetwork 9 | 10 | _DEFAULT_SEED = 123 11 | _DEFAULT_TELEPORTATION_PROB = 0.15 12 | 13 | 14 | def run_infomap(network: Network, 15 | basename: Optional[str] = None, 16 | outdir: Optional[str] = None, 17 | args: Optional[str] = None, 18 | directed: bool = True, 19 | self_links: bool = False, 20 | no_infomap: bool = False, 21 | two_level: bool = False, 22 | output_states: bool = True, 23 | seed: int = _DEFAULT_SEED, 24 | num_trials: int = 20, 25 | silent: bool = True, 26 | teleportation_probability: float = _DEFAULT_TELEPORTATION_PROB, 27 | **_) -> Infomap: 28 | default_args = f" --num-trials {num_trials if not no_infomap else 1}" 29 | default_args += " --silent" if silent else "" 30 | default_args += " --directed" if directed else "" 31 | default_args += " --include-self-links" if self_links else "" 32 | default_args += " --two-level" if two_level else "" 33 | default_args += " --no-infomap" if no_infomap else "" 34 | default_args += f" --seed {seed}" 35 | default_args += f" --teleportation-probability {teleportation_probability}" 36 | 37 | filename = None 38 | 39 | if basename is not None: 40 | filename = basename + (f"_seed_{seed}" if seed != _DEFAULT_SEED else "") 41 | default_args += f" --out-name {filename} " 42 | 43 | if outdir is not None: 44 | if output_states: 45 | default_args += " -o states " 46 | 47 | default_args += outdir 48 | 49 | print("[infomap] running infomap...") 50 | im = Infomap((args if args else '') + default_args) 51 | network.apply(im) 52 | im.run() 53 | 54 | if filename is not None: 55 | outname = path.join(outdir, filename) + ".ftree" 56 | im.write_flow_tree(outname, states=True) 57 | 58 | with open(outname, "r") as fp: 59 | original = fp.read() 60 | with open(outname, "w") as fp: 61 | fp.write(f"# codelengths {','.join(map(str, im.codelengths))}\n") 62 | fp.write(f"# num leaf modules {im.num_leaf_modules}\n") 63 | fp.write(original) 64 | 65 | print(f"[infomap] codelength {im.codelength}") 66 | print(f"[infomap] num top modules {im.num_top_modules}") 67 | 68 | return im 69 | 70 | 71 | def run(file, 72 | outdir="output", 73 | outfile=None, 74 | multilayer=False, 75 | multilayer_similarity=False, 76 | bipartite=False, 77 | bipartite_non_backtracking=False, 78 | unipartite_undirected=False, 79 | unipartite_directed=False, 80 | self_links=False, 81 | write_network=False, 82 | largest_cc=False, 83 | pre_cluster_multilayer=False, 84 | **kwargs) -> Optional[Network]: 85 | hypergraph = HyperGraph.from_iter(file.readlines()) 86 | 87 | if largest_cc: 88 | hypergraph = largest_connected_component(hypergraph) 89 | 90 | hypergraph = remove_simple_hyperedges(hypergraph) 91 | 92 | args = None 93 | 94 | if multilayer or multilayer_similarity: 95 | network = representation.multilayer(hypergraph, multilayer_similarity, self_links=self_links) 96 | 97 | basename = outfile if outfile else "multilayer" 98 | basename += "_similarity" if multilayer_similarity else "" 99 | basename += "_self_links" if self_links else "" 100 | 101 | if pre_cluster_multilayer: 102 | unipartite = representation.unipartite(hypergraph, directed=True, self_links=self_links) 103 | 104 | unipartite_basename = "multilayer_flattened" 105 | 106 | # Optimize the unipartite projection 107 | run_infomap(unipartite, unipartite_basename, path.join(outdir, "multilayer"), self_links=self_links, 108 | output_states=False, **kwargs) 109 | 110 | unipartite_tree = Tree.from_file(path.join(path.join(outdir, "multilayer"), unipartite_basename + ".ftree")) 111 | 112 | # Run infomap without optimizing to get the tree and state network 113 | run_infomap(network, basename, outdir, self_links=self_links, output_states=True, 114 | args="--no-infomap", num_trials=1, **kwargs) 115 | 116 | multilayer_tree = Tree.from_file(path.join(outdir, basename + ".ftree")) 117 | multilayer_tree.match_ids((unipartite_tree,)) 118 | 119 | unipartite_tree.write() 120 | args = f"--cluster-data {unipartite_tree.filename} -F" 121 | 122 | network = StateNetwork.from_file(path.join(outdir, basename + "_states.net")) 123 | 124 | elif bipartite or bipartite_non_backtracking: 125 | args = "--bipartite-teleportation" 126 | 127 | network = representation.bipartite(hypergraph, bipartite_non_backtracking) 128 | 129 | basename = outfile if outfile else "bipartite" 130 | basename += "_non_backtracking" if bipartite_non_backtracking else "" 131 | 132 | elif unipartite_undirected or unipartite_directed: 133 | network = representation.unipartite(hypergraph, unipartite_directed, self_links) 134 | 135 | basename = outfile if outfile else "unipartite" 136 | basename += "_directed" if unipartite_directed else "_undirected" 137 | basename += "_self_links" if self_links else "" 138 | 139 | else: 140 | return 141 | 142 | if write_network: 143 | network_filename = path.join(outdir, basename) + ".net" 144 | with open(network_filename, "w") as fp: 145 | network.write(fp) 146 | 147 | run_infomap(network, 148 | basename, 149 | outdir, 150 | args=args, 151 | directed=not unipartite_undirected, 152 | self_links=self_links, 153 | **kwargs) 154 | 155 | return network 156 | 157 | 158 | def main(): 159 | from argparse import ArgumentParser, FileType, RawDescriptionHelpFormatter 160 | from textwrap import dedent 161 | import sys 162 | 163 | description = dedent(""" 164 | Create maps from hypergraps with edge-dependent vertex weights. 165 | 166 | First, represent the hypergraph as any of 167 | the formats specified under "representation". 168 | 169 | Then, Infomap finds the community structure in the network 170 | representation and outputs the result in "outdir". 171 | 172 | For hypergraph input format, see: data/example.txt 173 | """) 174 | 175 | # noinspection PyTypeChecker 176 | parser = ArgumentParser(prog="hypergraph", 177 | description=description, 178 | formatter_class=RawDescriptionHelpFormatter) 179 | 180 | parser.add_argument("file", type=FileType("r"), default=sys.stdin, help="the hypergraph file") 181 | parser.add_argument("outdir", nargs="?", default="output", help="directory to write output to") 182 | 183 | parser.add_argument("--largest-cc", action="store_true", 184 | help="only include largest connected component") 185 | parser.add_argument("-w", "--write-network", action="store_true", help="write network representation to file") 186 | parser.add_argument("-k", "--self-links", action="store_true", 187 | help="include self links (does not apply to bipartite representations)") 188 | parser.add_argument("-2", "--two-level", action="store_true", 189 | help="only search for two-level partitions") 190 | parser.add_argument("--no-infomap", action="store_true", help="do not run Infomap") 191 | parser.add_argument("--num-trials", default=20, type=int, help="number of times to run Infomap") 192 | parser.add_argument("-s", "--seed", default=_DEFAULT_SEED, type=int, help="random seed") 193 | parser.add_argument("-p", "--teleportation-probability", default=_DEFAULT_TELEPORTATION_PROB, 194 | type=float, help="probability to teleport in each step") 195 | parser.add_argument("-o", "--outfile") 196 | 197 | output = parser.add_argument_group("representation") 198 | options = output.add_mutually_exclusive_group(required=True) 199 | options.add_argument("-m", "--multilayer", action="store_true") 200 | options.add_argument("-M", "--multilayer-similarity", action="store_true") 201 | options.add_argument("-b", "--bipartite", action="store_true") 202 | options.add_argument("-B", "--bipartite-non-backtracking", action="store_true") 203 | options.add_argument("-u", "--unipartite-undirected", action="store_true") 204 | options.add_argument("-U", "--unipartite-directed", action="store_true") 205 | 206 | args = parser.parse_args() 207 | 208 | run(**vars(args)) 209 | 210 | 211 | if __name__ == "__main__": 212 | main() 213 | -------------------------------------------------------------------------------- /hypergraph/network/__init__.py: -------------------------------------------------------------------------------- 1 | from .hypergraph import * 2 | from .network import * 3 | from .tree import * 4 | -------------------------------------------------------------------------------- /hypergraph/network/hypergraph.py: -------------------------------------------------------------------------------- 1 | import re 2 | from collections import namedtuple, defaultdict 3 | from dataclasses import dataclass 4 | from operator import methodcaller 5 | from typing import Iterable, List, Tuple, Sequence, Mapping, Dict, Set, TextIO 6 | 7 | from .network import Node 8 | 9 | HyperEdge = namedtuple("HyperEdge", "id, nodes, omega") 10 | Gamma = namedtuple("Gamma", "edge, node, gamma") 11 | 12 | 13 | class DefaultNodeDict(defaultdict): 14 | def __missing__(self, key): 15 | if self.default_factory is None: 16 | raise KeyError(key) 17 | 18 | ret = self[key] = self.default_factory(key) 19 | return ret 20 | 21 | 22 | @dataclass 23 | class HyperGraph: 24 | """ 25 | Format: 26 | 27 | *Vertices # optional 28 | # id name 29 | 1 "a" 30 | 2 "b" 31 | 3 "c" 32 | 4 "d" 33 | 5 "f" 34 | *Hyperedges 35 | # id nodes... omega 36 | 1 1 2 3 10 # e1 37 | 2 3 4 5 20 # e2 38 | *Weights # optional, missing weights defaults to gamma = 1.0 39 | # edge node gamma 40 | 1 1 1 # gamma_e1(a) 41 | 1 2 1 # gamma_e1(b) 42 | 1 3 2 # gamma_e1(c) 43 | 2 3 1 # gamma_e2(c) 44 | 2 4 1 # gamma_e2(d) 45 | 2 5 2 # gamma_e2(f) 46 | """ 47 | nodes: List[Node] 48 | edges: List[HyperEdge] 49 | weights: List[Gamma] 50 | 51 | def __iter__(self): 52 | return iter((self.nodes, self.edges, self.weights)) 53 | 54 | def write(self, fp: TextIO): 55 | fp.write("*Vertices\n") 56 | fp.write("# id name\n") 57 | fp.writelines(f"{node.id} \"{node.name}\"\n" 58 | for node in sorted(self.nodes)) 59 | 60 | fp.write("*Hyperedges\n") 61 | fp.write("# id nodes... omega\n") 62 | fp.writelines(f"{edge.id} {' '.join(map(str, (node.id for node in edge.nodes)))} {edge.omega}\n" 63 | for edge in sorted(self.edges)) 64 | 65 | fp.write("*Weights\n") 66 | fp.write("# edge node gamma\n") 67 | fp.writelines(f"{weight.edge} {weight.node.id} {weight.gamma}\n" 68 | for weight in sorted(self.weights) 69 | if weight.gamma != 1) 70 | 71 | @classmethod 72 | def from_iter(cls, lines: Iterable[str]): 73 | nodes_lines, edges_lines, weights_lines = read(lines) 74 | 75 | nodes = parse_nodes(nodes_lines) 76 | 77 | if len(nodes) == 0: 78 | nodes = DefaultNodeDict(lambda node_id: Node(node_id, str(node_id))) 79 | 80 | edges = parse_edges(edges_lines, nodes) 81 | 82 | nodes = filter_dangling(nodes, edges) 83 | 84 | if len(weights_lines): 85 | weights = parse_weights(weights_lines, nodes) 86 | 87 | weights.extend(missing_default_weights(edges, weights)) 88 | else: 89 | weights = default_weights(edges) 90 | 91 | return cls(list(nodes.values()), edges, weights) 92 | 93 | 94 | def read(lines) -> Tuple[List[str], List[str], List[str]]: 95 | lines = (line.strip() for line in lines) 96 | lines = (line for line in lines if not line.startswith("#")) 97 | 98 | nodes = [] 99 | edges = [] 100 | weights = [] 101 | 102 | context = None 103 | 104 | for line in lines: 105 | if line.startswith('*'): 106 | context = line.lower() 107 | continue 108 | elif context == "*vertices": 109 | nodes.append(line) 110 | elif context == "*hyperedges": 111 | edges.append(line) 112 | elif context == "*weights": 113 | weights.append(line) 114 | 115 | return nodes, edges, weights 116 | 117 | 118 | def filter_dangling(nodes: Mapping[int, Node], edges: Sequence[HyperEdge]) -> Dict[int, Node]: 119 | referenced_nodes = {node.id for edge in edges for node in edge.nodes} 120 | 121 | return {node.id: node for node in nodes.values() 122 | if node.id in referenced_nodes} 123 | 124 | 125 | def parse_nodes(lines: Sequence[str]) -> Dict[int, Node]: 126 | nodes = {} 127 | 128 | for line in lines: 129 | m = re.match(r"(\d+) \"(.+)\"", line) 130 | if m: 131 | node_id, name = m.groups() 132 | node_id = int(node_id) 133 | nodes[node_id] = Node(node_id, name) 134 | 135 | return nodes 136 | 137 | 138 | def parse_edges(lines: Sequence[str], nodes: Mapping[int, Node]) -> List[HyperEdge]: 139 | lines_ = (tuple(map(int, first)) + (float(omega),) 140 | for *first, omega in map(methodcaller("split"), lines)) 141 | 142 | return [HyperEdge(edge_id, frozenset(nodes[node_id] for node_id in node_ids), omega) 143 | for edge_id, *node_ids, omega in lines_] 144 | 145 | 146 | def parse_weights(lines: Sequence[str], nodes: Mapping[int, Node]) -> List[Gamma]: 147 | lines_ = (tuple(map(int, ids)) + (float(gamma),) 148 | for *ids, gamma in map(methodcaller("split"), lines)) 149 | 150 | return [Gamma(edge, nodes[node_id], gamma) 151 | for edge, node_id, gamma in lines_] 152 | 153 | 154 | def default_weights(edges: Sequence[HyperEdge], weight: float = 1.0) -> List[Gamma]: 155 | return [Gamma(edge.id, node, weight) 156 | for edge in edges for node in edge.nodes] 157 | 158 | 159 | def missing_default_weights(edges: Sequence[HyperEdge], weights: Sequence[Gamma], weight: float = 1.0) -> List[Gamma]: 160 | return [Gamma(edge, node, weight) for edge, node in missing_weights(edges, weights)] 161 | 162 | 163 | def missing_weights(edges: Sequence[HyperEdge], weights: Sequence[Gamma]) -> Set[Tuple[int, Node]]: 164 | found_weights = {(weight.edge, weight.node) for weight in weights} 165 | 166 | all_nodes = {(edge.id, node) for edge in edges for node in edge.nodes} 167 | 168 | return all_nodes - found_weights 169 | 170 | 171 | def remove_simple_hyperedges(hypergraph: HyperGraph) -> HyperGraph: 172 | nodes, edges, weights = hypergraph 173 | 174 | edges_ = [edge for edge in edges if len(edge.nodes) > 1] 175 | nodes_ = set() 176 | weights_ = [] 177 | 178 | for edge in edges_: 179 | nodes_.update(edge.nodes) 180 | weights_.extend(weight for weight in weights 181 | if weight.edge == edge.id) 182 | 183 | return HyperGraph(list(nodes_), edges_, weights_) 184 | -------------------------------------------------------------------------------- /hypergraph/network/network.py: -------------------------------------------------------------------------------- 1 | from collections import namedtuple 2 | from dataclasses import dataclass 3 | from operator import attrgetter 4 | from typing import Tuple, List, TextIO, Iterable 5 | 6 | Node = namedtuple("Node", "id, name") 7 | StateNode = namedtuple("StateNode", "state_id, node_id") 8 | 9 | Link = Tuple[int, int, float] 10 | MultiLayerLink = Tuple[Tuple[int, int], Tuple[int, int], float] 11 | 12 | 13 | @dataclass 14 | class Network: 15 | nodes: List[Node] 16 | links: List[Link] 17 | 18 | def apply(self, infomap): 19 | infomap.add_nodes(self.nodes) 20 | infomap.add_links(self.links) 21 | 22 | def write(self, fp: TextIO): 23 | self._write_nodes(fp) 24 | self._write_links(fp) 25 | 26 | def _write_nodes(self, fp: TextIO): 27 | fp.write("*Vertices\n") 28 | fp.writelines(f"{node.id} \"{node.name}\"\n" for node in sorted(self.nodes)) 29 | 30 | def _write_links(self, fp: TextIO): 31 | fp.write("*Edges\n") 32 | fp.writelines(f"{source} {target} {w}\n" 33 | for source, target, w in self.links) 34 | 35 | @classmethod 36 | def from_iter(cls, lines: Iterable[str]): # -> Union[Network, StateNetwork] 37 | nodes, states, links = [], [], [] 38 | 39 | context = None 40 | 41 | for line in lines: 42 | line = line.strip() 43 | 44 | if line.startswith("*"): 45 | context = line 46 | continue 47 | if line.startswith("#"): 48 | continue 49 | 50 | if context == "*Vertices": 51 | split_index = line.index(" ") 52 | id_, name = line[:split_index], line[split_index + 1:] 53 | name = name.strip("\"") 54 | nodes.append(Node(int(id_), name)) 55 | elif context == "*States": 56 | state_id, node_id = map(int, line.split()) 57 | states.append(StateNode(state_id, node_id)) 58 | elif context == "*Links": 59 | source, target, weight = line.split() 60 | links.append((int(source), int(target), float(weight))) 61 | 62 | if len(states): 63 | return StateNetwork(nodes, links, states) 64 | 65 | return cls(nodes, links) 66 | 67 | @classmethod 68 | def from_file(cls, filename: str): 69 | with open(filename) as fp: 70 | return cls.from_iter(fp.readlines()) 71 | 72 | 73 | @dataclass 74 | class StateNetwork(Network): 75 | states: List[StateNode] 76 | 77 | def apply(self, infomap): 78 | infomap.set_names(self.nodes) 79 | infomap.add_state_nodes(self.states) 80 | infomap.add_links(self.links) 81 | 82 | def write(self, fp: TextIO): 83 | self._write_nodes(fp) 84 | self._write_states(fp) 85 | self._write_links(fp) 86 | 87 | def _write_states(self, fp: TextIO): 88 | fp.write("*States\n") 89 | fp.writelines(f"{state.state_id} {state.node_id}\n" 90 | for state in sorted(self.states)) 91 | 92 | 93 | @dataclass 94 | class BipartiteNetwork(Network): 95 | features: List[Node] 96 | 97 | def apply(self, infomap): 98 | super().apply(infomap) 99 | infomap.add_nodes(self.features) 100 | infomap.bipartite_start_id = self.bipartite_start_id 101 | 102 | @property 103 | def bipartite_start_id(self) -> int: 104 | return min(map(attrgetter("id"), self.features)) 105 | 106 | def _write_nodes(self, fp: TextIO): 107 | super()._write_nodes(fp) 108 | fp.writelines(f"{node.id} \"{node.name}\"\n" 109 | for node in sorted(self.features)) 110 | 111 | def _write_links(self, fp: TextIO): 112 | fp.write(f"*Bipartite {self.bipartite_start_id}\n") 113 | fp.writelines(f"{source} {target} {w}\n" 114 | for source, target, w in self.links) 115 | 116 | 117 | @dataclass 118 | class BipartiteStateNetwork(BipartiteNetwork, StateNetwork): 119 | @property 120 | def bipartite_start_id(self) -> int: 121 | feature_start_id = super().bipartite_start_id 122 | return min(state_id for state_id, node_id in self.states 123 | if node_id == feature_start_id) 124 | 125 | def apply(self, infomap): 126 | infomap.set_names(self.nodes) 127 | infomap.set_names(self.features) 128 | infomap.add_state_nodes(self.states) 129 | infomap.add_links(self.links) 130 | infomap.bipartite_start_id = self.bipartite_start_id 131 | 132 | 133 | @dataclass 134 | class MultilayerNetwork(Network): 135 | links: List[MultiLayerLink] 136 | 137 | def apply(self, infomap): 138 | infomap.set_names(self.nodes) 139 | infomap.add_multilayer_links(self.links) 140 | 141 | def _write_links(self, fp: TextIO): 142 | fp.write("*Multilayer\n") 143 | fp.writelines(f"{e1} {u} {e2} {v} {w}\n" 144 | for (e1, u), (e2, v), w in self.links) 145 | -------------------------------------------------------------------------------- /hypergraph/network/tree.py: -------------------------------------------------------------------------------- 1 | import math 2 | import os 3 | import re 4 | from collections import defaultdict 5 | from dataclasses import dataclass 6 | from enum import Enum 7 | from itertools import filterfalse, takewhile, dropwhile 8 | from operator import attrgetter 9 | from typing import Tuple, Optional, Iterable, List, Callable, Dict, Sequence, TextIO, Mapping 10 | 11 | from infomap import Infomap 12 | from scipy.stats import entropy 13 | 14 | from hypergraph.network import StateNetwork 15 | 16 | Path = Tuple[int, ...] 17 | 18 | 19 | class Level(Enum): 20 | TOP_MODULE = 0 21 | LEAF_MODULE = -2 22 | 23 | 24 | @dataclass 25 | class TreeNode: 26 | path: Path 27 | flow: float 28 | name: str 29 | id: int 30 | state_id: Optional[int] = None 31 | layer_id: Optional[int] = None 32 | 33 | def write(self, fp: TextIO): 34 | fp.write("{} {} \"{}\"".format(":".join(map(str, self.path)), self.flow, self.name)) 35 | if self.state_id is not None: 36 | fp.write(f" {self.state_id}") 37 | fp.write(f" {self.id}") 38 | if self.layer_id is not None: 39 | fp.write(f" {self.layer_id}") 40 | fp.write("\n") 41 | 42 | @property 43 | def top_module(self) -> int: 44 | return self.path[0] 45 | 46 | @property 47 | def leaf_module(self) -> str: 48 | return ":".join(map(str, self.path[:-1])) 49 | 50 | def level(self, level: Level): 51 | # https://bugs.python.org/issue30545 52 | if level.value == Level.TOP_MODULE.value: 53 | return self.top_module 54 | elif level.value == Level.LEAF_MODULE.value: 55 | return self.leaf_module 56 | else: 57 | raise NotImplementedError(f"Must be either top or leaf module, not {level=}.") 58 | 59 | @classmethod 60 | def from_str(cls, line: str): 61 | line = line.strip() 62 | 63 | name, state_id, layer_id = None, None, None 64 | 65 | name_begin, name_end = line.index("\""), line.rindex("\"") 66 | 67 | if name_begin >= name_end: 68 | raise RuntimeError("Could not parse name from line \"{line}\"") 69 | 70 | name = line[name_begin + 1:name_end] 71 | 72 | line = line[:name_begin - 1] + line[name_end + 1:] 73 | 74 | path, flow, *ids = line.split() 75 | path, flow, ids = tuple(map(int, path.split(":"))), float(flow), tuple(map(int, ids)) 76 | 77 | if len(ids) == 2: 78 | state_id, node_id = ids 79 | elif len(ids) == 3: 80 | state_id, node_id, layer_id = ids 81 | else: 82 | node_id = ids[0] 83 | 84 | return TreeNode(path, flow, name if name else str(node_id), node_id, state_id, layer_id) 85 | 86 | 87 | def is_feature_node(line: str) -> bool: 88 | return "hyperedge" in line.lower() 89 | 90 | 91 | def pretty_filename(filename: str) -> str: 92 | basename = os.path.basename(filename) 93 | name, _ = os.path.splitext(basename) 94 | name = re.sub(r"_seed_\d+$", "", name) 95 | 96 | representation, *kind = name.replace("_", " ").split() 97 | kind = " ".join(kind) 98 | 99 | if "backtracking" in kind: 100 | kind = "non-bt" 101 | 102 | return f"{representation} ({kind})" if kind else representation 103 | 104 | 105 | @dataclass 106 | class Tree: 107 | header: Optional[str] 108 | nodes: List[TreeNode] 109 | is_bipartite: bool = False 110 | is_multilayer: bool = False 111 | filename: Optional[str] = None 112 | levels: Optional[int] = None 113 | num_top_modules: Optional[int] = None 114 | num_leaf_modules: Optional[int] = None 115 | codelength: Optional[float] = None 116 | codelengths: Optional[Tuple[float]] = None 117 | completed_in: Optional[float] = None 118 | 119 | @property 120 | def pretty_filename(self) -> str: 121 | if not self.filename: 122 | return str(self) 123 | 124 | return pretty_filename(self.filename) 125 | 126 | def write(self, fp: Optional[TextIO] = None): 127 | did_open = fp is None 128 | 129 | if did_open: 130 | fp = open(self.filename, "w") 131 | 132 | if self.header is not None: 133 | fp.write(self.header) 134 | 135 | for node in self.nodes: 136 | node.write(fp) 137 | 138 | if did_open: 139 | fp.close() 140 | 141 | @classmethod 142 | def parse_header(cls, lines: Iterable[str]) \ 143 | -> Tuple[float, 144 | Optional[Tuple[float]], 145 | float, 146 | int, 147 | int, 148 | Optional[int], 149 | Optional[str]]: 150 | try: 151 | header = list(takewhile(lambda line: line.startswith("#"), lines)) 152 | except StopIteration: 153 | pass 154 | 155 | if len(header) == 0: 156 | return 0.0, None, 0.0, 0, 0, None, None 157 | 158 | line = next(filter(lambda line: line.startswith("# codelengths"), header), "").split() 159 | codelengths = tuple(map(float, line[2].split(","))) 160 | 161 | line = next(filter(lambda line: line.startswith("# num leaf modules"), header), "").split() 162 | num_leaf_modules = int(line[4]) 163 | 164 | # completed in 2.49655 s 165 | line = next(filter(lambda line: line.startswith("# completed in"), header), "").split() 166 | completed_in = float(line[3]) 167 | 168 | # partitioned into 4 levels with 286 top modules 169 | line = next(filter(lambda line: line.startswith("# partitioned into"), header), "").split() 170 | levels, num_top_modules = int(line[3]), int(line[6]) 171 | 172 | # codelength 3.11764 bits 173 | line = next(filter(lambda line: line.startswith("# codelength "), header), "").split() 174 | codelength = float(line[2]) 175 | 176 | return codelength, codelengths, completed_in, levels, num_top_modules, num_leaf_modules, "".join(header) 177 | 178 | @classmethod 179 | def from_file(cls, filename: str, **kwargs): # -> Tree 180 | with open(filename) as fp: 181 | lines = fp.readlines() 182 | 183 | codelength, codelenghts, completed_in, levels, num_top_modules, num_leaf_modules, header = \ 184 | cls.parse_header(lines) 185 | 186 | return cls.from_iter(lines, 187 | header, 188 | filename=filename, 189 | levels=levels, 190 | num_top_modules=num_top_modules, 191 | num_leaf_modules=num_leaf_modules, 192 | codelength=codelength, 193 | codelengths=codelenghts, 194 | completed_in=completed_in, 195 | **kwargs) 196 | 197 | @classmethod 198 | def from_files(cls, filenames: Sequence[str]): # -> List[Tree] 199 | return [cls.from_file(name, is_multilayer="multilayer" in name, is_bipartite="bipartite" in name) 200 | for name in filenames] 201 | 202 | @classmethod 203 | def from_iter(cls, 204 | iterable: Iterable[str], 205 | header: Optional[str], 206 | node_filter: Optional[Callable[[str], bool]] = is_feature_node, 207 | **kwargs): 208 | no_commented_lines = dropwhile(lambda line: line.startswith("#"), iterable) 209 | nodes = takewhile(lambda line: not line.startswith("*"), no_commented_lines) 210 | 211 | if node_filter: 212 | nodes = filterfalse(node_filter, nodes) 213 | 214 | return cls(header, list(map(TreeNode.from_str, nodes)), **kwargs) 215 | 216 | @classmethod 217 | def from_infomap(cls, im: Infomap, states=True, **kwargs): # -> Tree: 218 | tmp_filename = "/tmp/1nf0m4p.tree" 219 | 220 | im.write_tree(tmp_filename, states) 221 | 222 | self = cls.from_file(tmp_filename, **kwargs) 223 | 224 | os.remove(tmp_filename) 225 | 226 | return self 227 | 228 | @property 229 | def assignments(self) -> Mapping[str, int]: 230 | assignments_ = defaultdict(set) 231 | 232 | for node in self.nodes: 233 | assignments_[node.name].add(node.leaf_module) 234 | 235 | return {name: len(assignments) 236 | for name, assignments in assignments_.items()} 237 | 238 | @property 239 | def effective_assignments(self) -> Mapping[str, float]: 240 | assignments_ = defaultdict(lambda: defaultdict(int)) 241 | 242 | for node in self.nodes: 243 | assignments_[node.name][node.leaf_module] += 1 244 | 245 | return {name: 2 ** entropy(list(node_assignments.values()), base=2) 246 | for name, node_assignments in assignments_.items()} 247 | 248 | def initial_partition(self, network: StateNetwork) -> Dict[int, int]: 249 | tree_nodes = {node.id: node for node in self.nodes} 250 | 251 | return {state_id: tree_nodes[node_id].top_module 252 | for state_id, node_id in network.states 253 | if node_id in tree_nodes} 254 | 255 | def cluster_data(self, network: StateNetwork): # -> Tree 256 | tree_nodes = {node.id: node for node in self.nodes} 257 | 258 | path = make_indexed_path() 259 | 260 | zero_flow = 0.0 261 | 262 | mapped_nodes = (TreeNode(path(tree_nodes[node_id]), 263 | zero_flow, 264 | tree_nodes[node_id].name, 265 | node_id, 266 | state_id) 267 | for state_id, node_id in network.states 268 | if node_id in tree_nodes) 269 | 270 | return Tree(sorted(mapped_nodes, key=attrgetter("path"))) 271 | 272 | def physical_nodes(self, level=1): 273 | phys_nodes: Dict[str, Dict[int, TreeNode]] = defaultdict(dict) 274 | 275 | for node in self.nodes: 276 | module = ":".join(map(str, node.path[0:level])) 277 | if node.id not in phys_nodes[module]: 278 | phys_nodes[module][node.id] = TreeNode(node.path[0:level], node.flow, node.name, node.id) 279 | 280 | else: 281 | phys_node = phys_nodes[module][node.id] 282 | phys_node.flow += node.flow 283 | 284 | return dict(phys_nodes) 285 | 286 | def match_ids(self, networks) -> None: 287 | for network in networks: 288 | if network is self: 289 | continue 290 | 291 | if network.is_multilayer: 292 | self._match_multilayer_ids(network) 293 | else: 294 | self._match_network_ids(network) 295 | 296 | def _match_multilayer_ids(self, network) -> None: 297 | state_ids = {(node.id, node.layer_id): node.state_id for node in self.nodes} 298 | 299 | for node in network.nodes: 300 | node.state_id = state_ids[node.id, node.layer_id] 301 | 302 | network_state_ids = {node.state_id for node in network.nodes} 303 | 304 | missing_nodes = (node for node in self.nodes 305 | if node.state_id not in network_state_ids) 306 | 307 | first_free_module_id = max(map(attrgetter("top_module"), network.nodes)) + 1 308 | 309 | network.nodes.extend(TreeNode((first_free_module_id + i, 1), 310 | 0, 311 | missing_node.name, 312 | missing_node.id, 313 | missing_node.state_id) 314 | for i, missing_node in enumerate(missing_nodes)) 315 | 316 | def _match_network_ids(self, network) -> None: 317 | self_state_nodes = defaultdict(list) 318 | 319 | for node in self.nodes: 320 | self_state_nodes[node.id].append(node) 321 | 322 | self_state_nodes = dict(self_state_nodes) 323 | 324 | missing_nodes = [] 325 | 326 | # 0. we need to set the leaf node index correctly 327 | path = make_indexed_path(network.nodes) 328 | 329 | for node in network.nodes: 330 | # 1. set the state id of the already existing node 331 | # 2. add nodes for each remaining state node 332 | # 3. divide the flow between them 333 | state_nodes = self_state_nodes[node.id] 334 | 335 | first, *remaining = state_nodes 336 | node.state_id = first.state_id 337 | 338 | divide_flow = not math.isclose(node.flow, sum(node.flow for node in state_nodes), rel_tol=0.01) 339 | 340 | if divide_flow: 341 | node.flow /= len(state_nodes) 342 | else: 343 | node.flow = first.flow 344 | 345 | missing_nodes.extend(TreeNode(path(node), 346 | node.flow if divide_flow else state_node.flow, 347 | node.name, 348 | node.id, 349 | state_node.state_id) 350 | for state_node in remaining) 351 | 352 | network.nodes.extend(missing_nodes) 353 | network.nodes.sort(key=attrgetter("path")) 354 | 355 | 356 | def make_indexed_path(nodes: Optional[Iterable[TreeNode]] = None) -> Callable[[TreeNode], Path]: 357 | leaf_index = defaultdict(int) 358 | 359 | def path(node: TreeNode) -> Path: 360 | module = node.path[:-1] 361 | leaf_index[module] += 1 362 | return *module, leaf_index[module] 363 | 364 | if nodes: 365 | for node in nodes: 366 | path(node) 367 | 368 | return path 369 | -------------------------------------------------------------------------------- /hypergraph/optimize_weights.py: -------------------------------------------------------------------------------- 1 | from itertools import product 2 | from multiprocessing import Pool 3 | 4 | import numpy as np 5 | 6 | from hypergraph import representation 7 | from hypergraph.main import run_infomap 8 | from hypergraph.network import HyperGraph, Node, HyperEdge, Gamma 9 | 10 | 11 | def run_task(hypergraph, 12 | kind, 13 | self_links: bool = False, 14 | similarity_walk: bool = False, 15 | non_backtracking: bool = False): 16 | if kind == "unipartite": 17 | network = representation.unipartite(hypergraph, directed=True, self_links=self_links) 18 | elif kind == "bipartite": 19 | network = representation.bipartite(hypergraph, non_backtracking=non_backtracking) 20 | elif kind == "multilayer": 21 | network = representation.multilayer(hypergraph, similarity_walk=similarity_walk, self_links=self_links) 22 | else: 23 | raise RuntimeError(f"No such {kind = }") 24 | 25 | args = "--bipartite-teleportation" if kind == "bipartite" else None 26 | 27 | im = run_infomap(network, args=args, self_links=self_links, two_level=True) 28 | 29 | return im.codelength, im.num_top_modules 30 | 31 | 32 | def main(): 33 | nodes = [ 34 | Node(1, "a"), 35 | Node(2, "b"), 36 | Node(3, "c"), 37 | Node(4, "d"), 38 | Node(5, "e"), 39 | Node(6, "f"), 40 | Node(7, "i"), 41 | Node(8, "h"), 42 | Node(9, "g"), 43 | Node(10, "j") 44 | ] 45 | 46 | weights = [ 47 | Gamma(1, nodes[1 - 1], 2), 48 | Gamma(1, nodes[2 - 1], 1), 49 | Gamma(1, nodes[3 - 1], 2), 50 | 51 | Gamma(2, nodes[4 - 1], 2), 52 | Gamma(2, nodes[5 - 1], 1), 53 | Gamma(2, nodes[6 - 1], 2), 54 | 55 | Gamma(3, nodes[3 - 1], 2), 56 | Gamma(3, nodes[6 - 1], 2), 57 | Gamma(3, nodes[9 - 1], 1), 58 | 59 | Gamma(4, nodes[7 - 1], 2), 60 | Gamma(4, nodes[8 - 1], 1), 61 | Gamma(4, nodes[9 - 1], 2), 62 | 63 | Gamma(5, nodes[7 - 1], 2), 64 | Gamma(5, nodes[8 - 1], 1), 65 | Gamma(5, nodes[9 - 1], 1), 66 | Gamma(5, nodes[10 - 1], 2) 67 | ] 68 | 69 | hyperedges = { 70 | # top 71 | 1: [1, 2, 3], 72 | # left 73 | 2: [4, 5, 6], 74 | # middle 75 | 3: [3, 6, 9], 76 | # right 77 | 4: [7, 8, 9], 78 | # right overlapping 79 | 5: [7, 8, 9, 10] 80 | } 81 | 82 | # edges to optimize 83 | edge_ids = [1, 2, 4] 84 | 85 | nodes_by_edge = {edge: frozenset(nodes[i - 1] for i in node_ids) 86 | for edge, node_ids in hyperedges.items()} 87 | 88 | hypergraph = HyperGraph(nodes, [], weights) 89 | 90 | num_trials = 0 91 | 92 | index = {name: i for i, name in enumerate([ 93 | "bipartite", "bipartite non bt", 94 | "unipartite", "unipartite self links", 95 | "multilayer", "multilayer self links", 96 | "multilayer similarity", "multilayer similarity self links" 97 | ])} 98 | 99 | solutions = [] 100 | 101 | num_non_trivial_solutions = 0 102 | multilayer_similarity_oks = 0 103 | num_multilayer_better = 0 104 | num_multilayer_self_links_better = 0 105 | 106 | with Pool(processes=8) as pool: 107 | for omegas in product(np.linspace(1, 3, 3), repeat=len(edge_ids)): 108 | omega = dict(zip(edge_ids, omegas)) 109 | 110 | # known fix-points 111 | omega[3] = 2.0 112 | omega[5] = 3.0 113 | 114 | hypergraph.edges = [HyperEdge(edge, nodes_by_edge[edge], omega[edge]) 115 | for edge in hyperedges] 116 | 117 | tasks = ( 118 | (hypergraph, "bipartite", None, None, False), 119 | (hypergraph, "bipartite", None, None, True), 120 | (hypergraph, "unipartite", False), 121 | (hypergraph, "unipartite", True), 122 | (hypergraph, "multilayer", False), 123 | (hypergraph, "multilayer", True), 124 | (hypergraph, "multilayer", False, True), 125 | (hypergraph, "multilayer", True, True), 126 | ) 127 | 128 | results = pool.starmap(run_task, tasks) 129 | 130 | num_trials += 1 131 | 132 | codelengths, num_top_modules = zip(*results) 133 | 134 | all_non_trivial_solutions = all(map(lambda x: x > 1, num_top_modules)) 135 | 136 | if not all_non_trivial_solutions: 137 | continue 138 | 139 | num_non_trivial_solutions += 1 140 | 141 | multilayer_similarity_ok = num_top_modules[index["multilayer similarity"]] > 3 and \ 142 | num_top_modules[index["multilayer similarity self links"]] > 3 143 | 144 | if not multilayer_similarity_ok: 145 | continue 146 | 147 | multilayer_similarity_oks += 1 148 | 149 | codelength_better = abs(codelengths[index["multilayer"]] - codelengths[index["unipartite"]]) > 1e-7 150 | self_links_codelength_better = abs(codelengths[index["multilayer self links"]] - codelengths[index["unipartite self links"]]) > 1e-7 151 | 152 | same_num_top_modules = num_top_modules[index["multilayer"]] == num_top_modules[index["unipartite"]] 153 | same_num_top_modules_self_links = num_top_modules[index["multilayer self links"]] == num_top_modules[index["unipartite self links"]] 154 | 155 | multilayer_better = codelength_better and same_num_top_modules 156 | multilayer_self_links_better = self_links_codelength_better and same_num_top_modules_self_links 157 | 158 | num_multilayer_better += 1 if multilayer_better else 0 159 | num_multilayer_self_links_better += 1 if multilayer_self_links_better else 0 160 | 161 | if multilayer_better or multilayer_self_links_better: 162 | solutions.append(omega) 163 | 164 | for solution in solutions: 165 | print(solution) 166 | 167 | print(f"{len(solutions)}/{num_trials} = {len(solutions) / num_trials:.3f}%") 168 | print(f"{num_non_trivial_solutions = }") 169 | print(f"{multilayer_similarity_oks = }") 170 | print(f"{num_multilayer_better = }") 171 | print(f"{num_multilayer_self_links_better = }") 172 | 173 | 174 | if __name__ == "__main__": 175 | main() 176 | -------------------------------------------------------------------------------- /hypergraph/representation/__init__.py: -------------------------------------------------------------------------------- 1 | from .bipartite import create_network as bipartite 2 | from .multilayer import create_network as multilayer 3 | from .unipartite import create_network as unipartite 4 | -------------------------------------------------------------------------------- /hypergraph/representation/bipartite.py: -------------------------------------------------------------------------------- 1 | from collections import defaultdict 2 | from operator import attrgetter 3 | from typing import Union 4 | 5 | from hypergraph.network import HyperGraph, StateNode, Node, BipartiteNetwork, BipartiteStateNetwork 6 | from hypergraph.transition import gamma, d, pi 7 | 8 | 9 | def create_network(hypergraph: HyperGraph, non_backtracking: bool) -> Union[BipartiteNetwork, BipartiteStateNetwork]: 10 | nodes, edges, weights = hypergraph 11 | 12 | print("[bipartite] creating bipartite...") 13 | 14 | gamma_ = gamma(weights) 15 | d_ = d(edges) 16 | pi_ = pi(edges, weights) 17 | 18 | bipartite_start_id = max(map(attrgetter("id"), nodes)) + 1 19 | 20 | features = [Node(bipartite_start_id + i, f"Hyperedge {edge.id}") 21 | for i, edge in enumerate(edges)] 22 | 23 | edge_to_feature_id = {edge.id: bipartite_start_id + i 24 | for i, edge in enumerate(edges)} 25 | 26 | links = defaultdict(float) 27 | 28 | if non_backtracking: 29 | get_state_id = defaultdict(lambda: len(get_state_id) + 1) 30 | 31 | states = [StateNode(get_state_id[node.id], node.id) for node in sorted(nodes)] 32 | 33 | for edge in edges: 34 | feature_id = edge_to_feature_id[edge.id] 35 | 36 | state_ids = (get_state_id[node.id] for node in edge.nodes) 37 | 38 | feature_states = [StateNode(get_state_id[feature_id, state_id], feature_id) 39 | for state_id in state_ids] 40 | 41 | states.extend(feature_states) 42 | 43 | for node in edge.nodes: 44 | P_ue = edge.omega / d_(node) 45 | P_ev = gamma_(edge, node) 46 | 47 | if P_ue * P_ev < 1e-10: 48 | continue 49 | 50 | state_id = get_state_id[node.id] 51 | target_feature_state_id = get_state_id[feature_id, state_id] 52 | 53 | links[state_id, target_feature_state_id] = pi_(node) * P_ue 54 | 55 | for source_feature_state_id, node_id in feature_states: 56 | if source_feature_state_id != target_feature_state_id: 57 | links[source_feature_state_id, state_id] = P_ev 58 | 59 | links = [(source, target, weight) 60 | for (source, target), weight in sorted(links.items())] 61 | 62 | return BipartiteStateNetwork(nodes, links, states, features) 63 | 64 | else: 65 | for edge in edges: 66 | for node in edge.nodes: 67 | P_ue = edge.omega / d_(node) 68 | P_ev = gamma_(edge, node) 69 | 70 | if P_ue * P_ev < 1e-10: 71 | continue 72 | 73 | feature_id = edge_to_feature_id[edge.id] 74 | 75 | links[node.id, feature_id] = pi_(node) * P_ue 76 | links[feature_id, node.id] = P_ev 77 | 78 | links = [(source, target, weight) 79 | for (source, target), weight in sorted(links.items())] 80 | 81 | return BipartiteNetwork(nodes, links, features) 82 | -------------------------------------------------------------------------------- /hypergraph/representation/multilayer.py: -------------------------------------------------------------------------------- 1 | from collections import defaultdict 2 | from itertools import product 3 | from typing import Callable 4 | 5 | import numpy as np 6 | from scipy.stats import entropy 7 | from sklearn.preprocessing import normalize 8 | 9 | from hypergraph.network import HyperGraph, MultilayerNetwork, HyperEdge, Node 10 | from hypergraph.transition import pi_alpha, gamma, delta, E, d 11 | 12 | 13 | def create_random_walk(hypergraph: HyperGraph, self_links: bool) -> MultilayerNetwork: 14 | nodes, edges, weights = hypergraph 15 | 16 | gamma_ = gamma(weights) 17 | pi_alpha_ = pi_alpha(weights) 18 | delta_ = delta(weights) 19 | d_ = d(edges) 20 | E_ = E(edges) 21 | 22 | links = [] 23 | 24 | for alpha, beta in product(edges, edges): 25 | for u, v in product(alpha.nodes, beta.nodes): 26 | if not self_links and u.id == v.id: 27 | continue 28 | 29 | if beta.id not in E_(u, v): 30 | continue 31 | 32 | delta_e = delta_(beta) if self_links else delta_(beta) - gamma_(beta, u) 33 | 34 | P_uv = beta.omega / d_(u) * gamma_(beta, v) / delta_e 35 | 36 | if P_uv < 1e-10: 37 | continue 38 | 39 | weight = pi_alpha_(alpha, u) * P_uv 40 | 41 | links.append((alpha.id, u.id, beta.id, v.id, weight)) 42 | 43 | links = [((e1, u), (e2, v), w) 44 | for e1, u, e2, v, w in sorted(links)] 45 | 46 | return MultilayerNetwork(nodes, links) 47 | 48 | 49 | SimilarityMetric = Callable[[HyperEdge, HyperEdge], float] 50 | 51 | 52 | def jaccard_index(e1: HyperEdge, e2: HyperEdge) -> float: 53 | return len(e1.nodes & e2.nodes) / len(e1.nodes | e2.nodes) 54 | 55 | 56 | def sorensen_coeff(e1: HyperEdge, e2: HyperEdge) -> float: 57 | return 2 * len(e1.nodes & e2.nodes) / (len(e1.nodes) + len(e2.nodes)) 58 | 59 | 60 | def overlap_coeff(e1: HyperEdge, e2: HyperEdge) -> float: 61 | return len(e1.nodes & e2.nodes) / min(len(e1.nodes), len(e2.nodes)) 62 | 63 | 64 | def make_js_similarity(gamma: Callable[[HyperEdge, Node], float]) -> SimilarityMetric: 65 | def js_divergence(p: np.array, q: np.array) -> float: 66 | mix = 0.5 * (p + q) 67 | 68 | jsd = 0.5 * entropy(p, mix, base=2) + 0.5 * entropy(q, mix, base=2) 69 | 70 | if jsd < 0 or jsd > 1: 71 | raise RuntimeWarning("JSD out of bounds") 72 | 73 | return jsd 74 | 75 | def js_similarity(e1: HyperEdge, e2: HyperEdge) -> float: 76 | num_nodes = len(set(node.id for node in e1.nodes | e2.nodes)) 77 | 78 | j = defaultdict(lambda: len(j)) 79 | 80 | X = np.zeros(shape=(2, num_nodes)) 81 | 82 | for i, edge in enumerate([e1, e2]): 83 | for node in edge.nodes: 84 | X[i, j[node.id]] = gamma(edge, node) 85 | 86 | normalize(X, axis=1, norm="l1", copy=False) 87 | 88 | return 1 - js_divergence(X[0], X[1]) 89 | 90 | return js_similarity 91 | 92 | 93 | def create_similarity_walk(hypergraph: HyperGraph, self_links: bool) -> MultilayerNetwork: 94 | nodes, edges, weights = hypergraph 95 | 96 | gamma_ = gamma(weights) 97 | pi_alpha_ = pi_alpha(weights) 98 | delta_ = delta(weights) 99 | E_ = E(edges) 100 | edges_ = {edge.id: edge for edge in edges} 101 | 102 | links = [] 103 | 104 | similarity = make_js_similarity(gamma_) 105 | 106 | for alpha, beta in product(edges, edges): 107 | for u, v in product(alpha.nodes, beta.nodes): 108 | if not self_links and u.id == v.id: 109 | continue 110 | 111 | if beta.id not in E_(u, v): 112 | continue 113 | 114 | E_u = {edges_[edge] for edge in E_(u)} 115 | 116 | S_alpha = sum(similarity(alpha, beta_) * beta_.omega for beta_ in E_u) 117 | D_alpha_beta = similarity(alpha, beta) * beta.omega 118 | 119 | delta_e = delta_(beta) if self_links else delta_(beta) - gamma_(beta, u) 120 | 121 | P_uv = D_alpha_beta / S_alpha * gamma_(beta, v) / delta_e 122 | 123 | if P_uv < 1e-10: 124 | continue 125 | 126 | weight = pi_alpha_(alpha, u) * P_uv 127 | 128 | links.append((alpha.id, u.id, beta.id, v.id, weight)) 129 | 130 | links = [((e1, u), (e2, v), w) 131 | for e1, u, e2, v, w in sorted(links)] 132 | 133 | return MultilayerNetwork(nodes, links) 134 | 135 | 136 | def create_network(hypergraph: HyperGraph, similarity_walk: bool, **kwargs) -> MultilayerNetwork: 137 | print("[multilayer] creating multilayer...") 138 | 139 | if similarity_walk: 140 | return create_similarity_walk(hypergraph, **kwargs) 141 | 142 | return create_random_walk(hypergraph, **kwargs) 143 | -------------------------------------------------------------------------------- /hypergraph/representation/unipartite.py: -------------------------------------------------------------------------------- 1 | from itertools import combinations_with_replacement, product 2 | 3 | from hypergraph.network import HyperGraph, Network 4 | from hypergraph.transition import w, P, pi 5 | 6 | 7 | def create_network(hypergraph: HyperGraph, directed: bool, self_links: bool) -> Network: 8 | nodes, edges, weights = hypergraph 9 | 10 | print("[unipartite] creating unipartite...") 11 | 12 | if directed: 13 | links = [] 14 | 15 | P_ = P(edges, weights) 16 | pi_ = pi(edges, weights) 17 | 18 | for u, v in product(nodes, repeat=2): 19 | weight = pi_(u) * P_(u, v, self_links) 20 | 21 | if weight < 1e-10: 22 | continue 23 | 24 | links.append((u.id, v.id, weight)) 25 | 26 | else: 27 | w_ = w(edges, weights) 28 | 29 | links = [] 30 | 31 | for u, v in combinations_with_replacement(nodes, 2): 32 | weight = w_(u, v, True) 33 | 34 | if weight < 1e-10: 35 | continue 36 | 37 | links.append((u.id, v.id, weight)) 38 | 39 | return Network(nodes, sorted(links)) 40 | -------------------------------------------------------------------------------- /hypergraph/transition.py: -------------------------------------------------------------------------------- 1 | from collections import defaultdict 2 | from typing import Iterable, Set, Optional, Callable 3 | 4 | from hypergraph.network import Node, HyperEdge, Gamma 5 | 6 | 7 | def E(edges: Iterable[HyperEdge]) -> Callable[[Node, Optional[Node]], Set[int]]: 8 | """ 9 | Set of hyperedges incident to vertex v. 10 | 11 | .. math:: E(v) = { e \in E : v in e } 12 | 13 | Set of hyperedges incident to both vertices u and v. 14 | 15 | .. math:: E(u, v) = { e \in E : u \in e, v \in e } 16 | """ 17 | edges_ = defaultdict(set) 18 | 19 | for edge, nodes, _ in edges: 20 | for node in nodes: 21 | edges_[node.id].add(edge) 22 | 23 | edges_ = dict(edges_) 24 | 25 | def inner(u: Node, v: Optional[Node] = None) -> Set[int]: 26 | if v: 27 | return edges_[u.id] & edges_[v.id] 28 | 29 | return edges_[u.id] 30 | 31 | return inner 32 | 33 | 34 | def d(edges: Iterable[HyperEdge]) -> Callable[[Node], float]: 35 | """ 36 | Degree of vertex v. 37 | 38 | .. math:: d(v) = \sum_{e \in E(v)} \omega(e) 39 | """ 40 | E_ = E(edges) 41 | 42 | def inner(v: Node) -> float: 43 | return sum(omega for edge, _, omega in edges 44 | if edge in E_(v)) 45 | 46 | return inner 47 | 48 | 49 | def delta(weights: Iterable[Gamma]) -> Callable[[HyperEdge], float]: 50 | """ 51 | Degree of hyperedge e. 52 | 53 | .. math:: \delta(e) \sum_{v \in e} \gamma_e(v) 54 | """ 55 | delta_ = defaultdict(float) 56 | 57 | for edge, _, gamma in weights: 58 | delta_[edge] += gamma 59 | 60 | delta_ = dict(delta_) 61 | 62 | def inner(e: HyperEdge) -> float: 63 | return delta_[e.id] 64 | 65 | return inner 66 | 67 | 68 | def gamma(weights: Iterable[Gamma]) -> Callable[[HyperEdge, Node], float]: 69 | """ 70 | Edge-(in)dependent vertex weight. 71 | 72 | .. math:: \gamma_e(v) 73 | """ 74 | gamma_ = {(edge, node.id): gamma_ 75 | for edge, node, gamma_ in weights} 76 | 77 | def inner(e: HyperEdge, v: Node) -> float: 78 | return gamma_[e.id, v.id] 79 | 80 | return inner 81 | 82 | 83 | def pi(edges: Iterable[HyperEdge], weights: Iterable[Gamma]): 84 | E_ = E(edges) 85 | gamma_ = gamma(weights) 86 | edges_ = {edge.id: edge for edge in edges} 87 | 88 | def inner(u: Node) -> float: 89 | E_u = {edges_[edge_id] for edge_id in E_(u)} 90 | 91 | return sum(e.omega * gamma_(e, u) 92 | for e in E_u) 93 | 94 | return inner 95 | 96 | 97 | def pi_alpha(weights: Iterable[Gamma]) -> Callable[[HyperEdge, Node], float]: 98 | gamma_ = gamma(weights) 99 | 100 | def inner(e: HyperEdge, u: Node) -> float: 101 | if u not in e.nodes: 102 | return 0.0 103 | 104 | return e.omega * gamma_(e, u) 105 | 106 | return inner 107 | 108 | 109 | def P(edges: Iterable[HyperEdge], weights: Iterable[Gamma]) -> Callable[[Node, Node, bool], float]: 110 | print("[transition] pre-calculating probabilities...") 111 | gamma_ = gamma(weights) 112 | delta_ = delta(weights) 113 | d_ = d(edges) 114 | E_ = E(edges) 115 | edges_ = {edge.id: edge for edge in edges} 116 | 117 | def inner(u: Node, v: Node, self_links: bool = False) -> float: 118 | E_u_v = (edges_[edge_id] for edge_id in E_(u, v)) 119 | 120 | delta_e = lambda e: delta_(e) if self_links else delta_(e) - gamma_(e, u) 121 | 122 | return sum(e.omega / d_(u) * gamma_(e, v) / delta_e(e) 123 | for e in E_u_v) 124 | 125 | return inner 126 | 127 | 128 | def w(edges: Iterable[HyperEdge], weights: Iterable[Gamma]) -> Callable[[Node, Node, bool], float]: 129 | """ 130 | Weight for going between vertex u to v in a unipartite representation 131 | of a hypergraph with edge-independent vertex weights. 132 | 133 | Assumes edge-independent vertex weights. 134 | 135 | .. math:: 136 | 137 | w_{u,v} = \sum_{e \in E(u,v) } \frac{ \omega(e) \gamma(u) \gamma(v) }{ \delta(e) } 138 | """ 139 | print("[transition] pre-calculating probabilities...") 140 | gamma_ = gamma(weights) 141 | delta_ = delta(weights) 142 | E_ = E(edges) 143 | edges_ = {edge.id: edge for edge in edges} 144 | 145 | def inner(u: Node, v: Node, self_links: bool = False) -> float: 146 | E_u_v = (edges_[edge_id] for edge_id in E_(u, v)) 147 | 148 | delta_e = lambda e: delta_(e) if self_links else delta_(e) - gamma_(e, u) 149 | 150 | return sum(e.omega * gamma_(e, u) * gamma_(e, v) / delta_e(e) 151 | for e in E_u_v) 152 | 153 | return inner 154 | -------------------------------------------------------------------------------- /output/.gitkeep: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mapequation/mapping-hypergraphs/f81425c6bbd45b0537f8a0da2ddc24ec79730593/output/.gitkeep -------------------------------------------------------------------------------- /references/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mapequation/mapping-hypergraphs/f81425c6bbd45b0537f8a0da2ddc24ec79730593/references/__init__.py -------------------------------------------------------------------------------- /references/__main__.py: -------------------------------------------------------------------------------- 1 | from argparse import ArgumentParser, FileType 2 | 3 | from references.parse_references import parse 4 | from references.write_hypergraph import write_hypergraph, \ 5 | gamma_unweighted, gamma_weighted, \ 6 | omega_unweighted, omega_weighted, omega_citations, omega_log_citations 7 | 8 | if __name__ == "__main__": 9 | parser = ArgumentParser() 10 | 11 | parser.add_argument("infile", type=FileType("r")) 12 | parser.add_argument("outfile", type=FileType("w")) 13 | parser.add_argument("--gamma-weighted", dest="gamma_function", 14 | default=gamma_unweighted, const=gamma_weighted, action="store_const") 15 | parser.add_argument("--omega", choices=("unweighted", "weighted", "citations", "log-citations"), 16 | default="weighted") 17 | parser.add_argument("--verbose", action="store_true") 18 | 19 | args = parser.parse_args() 20 | 21 | omega_functions = { 22 | "unweighted": omega_unweighted, 23 | "weighted": omega_weighted, 24 | "citations": omega_citations, 25 | "log-citations": omega_log_citations 26 | } 27 | 28 | write_hypergraph(parse(args.infile, args.verbose), 29 | args.outfile, 30 | gamma_function=args.gamma_function, 31 | omega_function=omega_functions[args.omega]) 32 | -------------------------------------------------------------------------------- /references/get_citations.py: -------------------------------------------------------------------------------- 1 | from itertools import dropwhile 2 | from unicodedata import normalize 3 | 4 | from scholarly import scholarly, ProxyGenerator 5 | 6 | pg = ProxyGenerator() 7 | 8 | pg.Tor_Internal(tor_cmd="/usr/local/bin/tor") 9 | 10 | scholarly.use_proxy(pg) 11 | 12 | 13 | def get_google_scholar_citations(title: str, first_author: str) -> int: 14 | print(f"{first_author}: {title} -> ", end="") 15 | 16 | *_, last_name = first_author.split() 17 | 18 | query = f"{title.rstrip('.')} {last_name}" 19 | 20 | try: 21 | results = scholarly.search_pubs(query, patents=False) 22 | except Exception: 23 | print("no match") 24 | return -1 25 | 26 | for result in results: 27 | found_author = normalize("NFD", result.bib["author"][0]) \ 28 | .encode("ascii", "ignore") \ 29 | .decode("ascii") 30 | 31 | if last_name.lower() in found_author.lower(): 32 | print(f"{result.bib['title']}: {result.bib['author'][0]}") 33 | return int(result.bib["cites"]) 34 | 35 | print("no match") 36 | return -1 37 | 38 | 39 | filename = "data/citations.txt" 40 | citations = {} 41 | 42 | with open(filename) as fp: 43 | lines = dropwhile(lambda line: not line.startswith("*Hyperedges"), fp.readlines()) 44 | next(lines) 45 | for line in lines: 46 | edge_id, *node_ids, omega = line.split() 47 | citations[int(edge_id)] = int(omega) 48 | 49 | 50 | def get_citations(title: str, first_author: str, edge_id: int) -> int: 51 | try: 52 | return citations[edge_id] 53 | except KeyError: 54 | return get_google_scholar_citations(title, first_author) 55 | -------------------------------------------------------------------------------- /references/parse_references.py: -------------------------------------------------------------------------------- 1 | import re 2 | from collections import namedtuple 3 | from unicodedata import normalize 4 | 5 | Article = namedtuple("Article", "authors, title") 6 | 7 | 8 | def parse(texfile, verbose=False): 9 | tex = texfile.read() 10 | refs = list(map(lambda r: r.split("\n"), 11 | tex.split("\n\n"))) 12 | 13 | references = [] 14 | 15 | authors_map = { 16 | "arenas": "alex arenas", 17 | "barabasi": "albert-laszlo barabasi", 18 | "newman": "m e j newman", 19 | "latora": "vito latora", 20 | "sendina-nadal": "irene sendina-nadal", 21 | "diaz-guilera": "albert diaz-guilera", 22 | 'criado': 'regino criado', 23 | 'pikovsky': 'arkady s pikovsky', 24 | 'edelsbrunner': 'herbert edelsbrunner', 25 | 'ghoshal': 'gourab ghoshal', 26 | 'moreno': 'yamir moreno', 27 | 'alon': 'uri alon', 28 | 'zou': 'yong zou', 29 | 'rosenblum': 'michael g rosenblum', 30 | 'murali': 't m murali', 31 | 'guan': 'shuguang guan', 32 | 'mendes': 'j f f mendes', 33 | 'chavez': 'mario chavez', 34 | 'abbott': 'laurence f abbott', 35 | 'kurths': 'jurgen kurths', 36 | 'maynard smith': 'john maynard smith', 37 | 'romance': 'miguel romance', 38 | 'boccaletti': 'stefano boccaletti', 39 | 'strogatz': 'steven h strogatz', 40 | 'axelrod': 'robert axelrod', 41 | 'ghrist': 'robert ghrist', 42 | 'dorogovtsev': 'sergey n dorogovtsev', 43 | 'krawiecki': 'andrzej krawiecki', 44 | 'foster': 'brian l foster', 45 | 'berec': 'vesna berec', 46 | 'wang': 'zhen wang', 47 | 'mangan': 'shmoolik mangan', 48 | "miller mcpherson": "j miller mcpherson", 49 | "sole": "ricard v sole", 50 | "shen-orr": "shai s shen-orr", 51 | "benson": "austin r benson", 52 | "gleich": "david f gleich", 53 | "leskovec": "jurij leskovec", 54 | "chodrow": "philip s chodrow", 55 | "duval": "art m duval", 56 | "granovetter": "mark s granovetter", 57 | "philippa pattison": "philippa e pattison", 58 | "robins": "garry l robins", 59 | "frank": "loren m frank", 60 | "ball": "frank g ball", 61 | "aldous": "david j aldous", 62 | "sizemore": "ann e sizemore", 63 | "torres": "joaquin j torres", 64 | "mirasso": "claudio r mirasso", 65 | "perc": "matjaz perc", 66 | "floria": "luis mario floria", 67 | "vandermeer": "john h vandermeer", 68 | "klivans": "caroline j klivans" 69 | } 70 | 71 | for ref in refs: 72 | authors = None 73 | title = None 74 | bibitem_parsed = False 75 | authors_parsed = False 76 | title_parsed = False 77 | 78 | for line in ref: 79 | if line.startswith(r"\bibitem"): 80 | # is reference label in the first line? 81 | bibitem_parsed = re.search(r"\[.+]{\S+?}", line) 82 | continue 83 | 84 | if not bibitem_parsed: 85 | if re.search(r"[^\\]+]{\S+?}$", line): 86 | # label was in the second line 87 | # next line should be author list 88 | bibitem_parsed = True 89 | continue 90 | 91 | if authors_parsed or line.startswith(r"\newblock"): 92 | authors_parsed = True 93 | 94 | if not authors_parsed: 95 | if authors: 96 | authors += " " + line 97 | else: 98 | authors = line 99 | 100 | elif not title_parsed and line.startswith(r"\newblock"): 101 | match = re.match(r"\\newblock (.+)$", line) 102 | if match: 103 | title_parsed = True 104 | title = match[1] \ 105 | .strip() \ 106 | .replace("{", "") \ 107 | .replace("}", "") \ 108 | .replace(r"\emph", "") 109 | 110 | if not authors_parsed or not title_parsed: 111 | raise RuntimeError("Could not parse authors or title") 112 | 113 | if authors: 114 | # normalize line 115 | authors = normalize("NFD", authors) \ 116 | .encode("ascii", "ignore") \ 117 | .decode("ascii") 118 | 119 | authors = authors.strip() \ 120 | .lower() \ 121 | .rstrip(".") \ 122 | .replace("~", " ") \ 123 | .replace(".", " ") 124 | 125 | authors = re.sub(r",? and ", r", ", authors) 126 | authors = re.sub(r",? et\.? al\.?", "", authors) 127 | 128 | # remove latex stuff 129 | authors = re.sub(r"\s+", " ", authors) 130 | authors = re.sub(r"\\v{z}", "z", authors) 131 | authors = re.sub(r"{\\.(.)+?}", r"\1", authors) 132 | authors = re.sub(r"{(.+?)}", r"\1", authors) 133 | authors = re.sub(r"\\.", "", authors) 134 | 135 | author_list = authors.split(", ") 136 | 137 | # re-map common variants 138 | for i, name in enumerate(author_list): 139 | for last_name, complete_name in authors_map.items(): 140 | if name != complete_name \ 141 | and name[0] == complete_name[0] \ 142 | and name.endswith(" " + last_name): 143 | author_list[i] = complete_name 144 | if verbose: 145 | print(f"{name} -> {complete_name}") 146 | break 147 | 148 | references.append(Article(tuple(author_list), title)) 149 | 150 | unique_authors = set() 151 | 152 | for coauthors, _ in references: 153 | unique_authors.update(coauthors) 154 | 155 | if verbose: 156 | print("-----------------------------") 157 | print(f"Parsed {len(references)}/{len(refs)} references") 158 | print(f"Authors: {len(unique_authors)}") 159 | 160 | return references 161 | -------------------------------------------------------------------------------- /references/write_hypergraph.py: -------------------------------------------------------------------------------- 1 | import math 2 | from typing import Tuple, Sequence, Union 3 | 4 | from references.get_citations import get_citations 5 | 6 | Weight = Union[int, float] 7 | 8 | 9 | def vertex(node_id: int, name: str) -> str: 10 | return f"{node_id} \"{name}\"\n" 11 | 12 | 13 | def hyperedge(edge_id: str, node_ids: Sequence[int], omega: Weight) -> str: 14 | ids = " ".join(map(str, node_ids)) 15 | return f"{edge_id} {ids} {omega}\n" 16 | 17 | 18 | def weight(edge_id: int, node_id: int, gamma: Weight) -> str: 19 | return f"{edge_id} {node_id} {gamma}\n" 20 | 21 | 22 | def omega_unweighted(*_) -> Weight: 23 | return 1 24 | 25 | 26 | def omega_weighted(node_ids: Tuple[int, ...], *_) -> Weight: 27 | return len(node_ids) 28 | 29 | 30 | def omega_citations(_, *args) -> Weight: 31 | return get_citations(*args) + 1 32 | 33 | 34 | def omega_log_citations(_, *args) -> Weight: 35 | return math.log(get_citations(*args) + 1) + 1 36 | 37 | 38 | def gamma_unweighted(n: int, **_) -> Tuple[Weight, ...]: 39 | return (1,) * n 40 | 41 | 42 | def gamma_weighted(n: int, equal_contributions=False) -> Tuple[Weight, ...]: 43 | if n > 1 and equal_contributions: 44 | return (1,) * n 45 | elif n == 1: 46 | return 2, 47 | else: 48 | return (2,) + (1,) * (n - 2) + (2,) 49 | 50 | 51 | def is_sorted(authors: Sequence[str]): 52 | return sorted(authors) == authors 53 | 54 | 55 | def write_hypergraph(hypergraph, 56 | outfile, 57 | omega_function=omega_unweighted, 58 | gamma_function=gamma_unweighted): 59 | unique_nodes = {node for nodes in hypergraph for node in nodes.authors} 60 | nodes = {name: i + 1 for i, name in enumerate(sorted(unique_nodes))} 61 | last_names = {i: name.split()[-1] for name, i in nodes.items()} 62 | 63 | outfile.write("*Vertices\n") 64 | outfile.writelines(vertex(node_id, name) for name, node_id in nodes.items()) 65 | 66 | edges = {i + 1: tuple(nodes[name] for name in node.authors) 67 | for i, node in enumerate(hypergraph)} 68 | 69 | articles = {i + 1: (node.title, node.authors[0]) 70 | for i, node in enumerate(hypergraph)} 71 | 72 | outfile.write("*Hyperedges\n") 73 | outfile.writelines(hyperedge(edge_id, edge, omega_function(edge, *articles[edge_id], edge_id)) 74 | for edge_id, edge in edges.items()) 75 | 76 | outfile.write("*Weights\n") 77 | outfile.writelines(weight(edge_id, node_id, gamma) 78 | for edge_id, node_ids in edges.items() 79 | for node_id, gamma in zip(node_ids, 80 | gamma_function(len(node_ids), 81 | equal_contributions=is_sorted( 82 | [last_names[node_id] for node_id in node_ids])))) 83 | --------------------------------------------------------------------------------