├── .gitignore ├── LICENSE ├── README.md ├── demo_NGMs.ipynb ├── environment.yml ├── images ├── NGM-inference.png ├── NGM-learning.png ├── NGM-sampling.png ├── graphical-view.png ├── neural-view-projection-modules.png └── neural-view.png ├── ngm ├── __init__.py ├── main.py ├── main_generic.py └── utils │ ├── __init__.py │ ├── data_processing.py │ ├── ggm.py │ ├── metrics.py │ ├── neural_view.py │ └── uGLAD │ ├── __init__.py │ ├── glad │ ├── __init__.py │ ├── glad.py │ ├── glad_params.py │ └── torch_sqrtm.py │ ├── main.py │ └── utils │ ├── __init__.py │ ├── metrics.py │ └── prepare_data.py └── setup.sh /.gitignore: -------------------------------------------------------------------------------- 1 | # Byte-compiled / optimized / DLL files 2 | __pycache__/ 3 | *.py[cod] 4 | *$py.class 5 | 6 | # C extensions 7 | *.so 8 | 9 | # Distribution / packaging 10 | .Python 11 | build/ 12 | develop-eggs/ 13 | dist/ 14 | downloads/ 15 | eggs/ 16 | .eggs/ 17 | lib/ 18 | lib64/ 19 | parts/ 20 | sdist/ 21 | var/ 22 | wheels/ 23 | pip-wheel-metadata/ 24 | share/python-wheels/ 25 | *.egg-info/ 26 | .installed.cfg 27 | *.egg 28 | MANIFEST 29 | 30 | # PyInstaller 31 | # Usually these files are written by a python script from a template 32 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 33 | *.manifest 34 | *.spec 35 | 36 | # Installer logs 37 | pip-log.txt 38 | pip-delete-this-directory.txt 39 | 40 | # Unit test / coverage reports 41 | htmlcov/ 42 | .tox/ 43 | .nox/ 44 | .coverage 45 | .coverage.* 46 | .cache 47 | nosetests.xml 48 | coverage.xml 49 | *.cover 50 | *.py,cover 51 | .hypothesis/ 52 | .pytest_cache/ 53 | 54 | # Translations 55 | *.mo 56 | *.pot 57 | 58 | # Django stuff: 59 | *.log 60 | local_settings.py 61 | db.sqlite3 62 | db.sqlite3-journal 63 | 64 | # Flask stuff: 65 | instance/ 66 | .webassets-cache 67 | 68 | # Scrapy stuff: 69 | .scrapy 70 | 71 | # Sphinx documentation 72 | docs/_build/ 73 | 74 | # PyBuilder 75 | target/ 76 | 77 | # Jupyter Notebook 78 | .ipynb_checkpoints 79 | 80 | # IPython 81 | profile_default/ 82 | ipython_config.py 83 | 84 | # pyenv 85 | .python-version 86 | 87 | # pipenv 88 | # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. 89 | # However, in case of collaboration, if having platform-specific dependencies or dependencies 90 | # having no cross-platform support, pipenv may install dependencies that don't work, or not 91 | # install all needed dependencies. 92 | #Pipfile.lock 93 | 94 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow 95 | __pypackages__/ 96 | 97 | # Celery stuff 98 | celerybeat-schedule 99 | celerybeat.pid 100 | 101 | # SageMath parsed files 102 | *.sage.py 103 | 104 | # Environments 105 | .env 106 | .venv 107 | env/ 108 | venv/ 109 | ENV/ 110 | env.bak/ 111 | venv.bak/ 112 | 113 | # Spyder project settings 114 | .spyderproject 115 | .spyproject 116 | 117 | # Rope project settings 118 | .ropeproject 119 | 120 | # mkdocs documentation 121 | /site 122 | 123 | # mypy 124 | .mypy_cache/ 125 | .dmypy.json 126 | dmypy.json 127 | 128 | # Pyre type checker 129 | .pyre/ 130 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2022 Harsh Shrivastava 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | ## Neural Graphical Models 2 | `Neural Graphical Models` (NGMs) attempt to represent complex feature dependencies with reasonable computational costs. Specifically, given a graph, we capture the dependency structure between the features along with their complex function representations by using neural networks as a multi-task learning framework. We provide efficient learning, inference and sampling algorithms for NGMs. Moreover, NGMs can fit generic graph structures including directed, undirected and mixed-edge graphs as well as support mixed input data types. 3 | 4 | Key benefits & features: 5 | - Facilitate rich representations of complex underlying distributions. 6 | - Support various relationship types including directed, undirected, mixed-edge graphs. 7 | - Fast and efficient algorithms for learning, inference and sampling. 8 | - Direct access to the learned underlying distributions for analysis. 9 | - Handle different input data types like categorical, images & generic embedding representations. 10 | - Fast and scalable, supports batch learning with GPU support. 11 | 12 | 13 | ### High level overview 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | ### Algorithms 22 | 23 |

24 | 25 | 26 | 27 |

28 | 29 | ## Setup 30 | The `setup.sh` file contains the complete procedure of creating a conda environment to run mGLAD model. run `bash setup.sh` 31 | In case of dependencies conflict, one can alternatively use this command `conda env create --name ngm --file=environment.yml`. 32 | 33 | ## demo on representing Gausssian Graphical models (GGMs) using NGMs 34 | A minimalist working example of NGMs is given in `demo_NGMs.ipynb`. It is a good entry point to understand the code structure as well as NGMs. 35 | 36 | ## Citation 37 | If you find this method useful, kindly cite the following associated papers: 38 | - `Neural Graphical Models`: [arxiv]() 39 | 40 | @article{shrivastava2022neural, 41 | title={Neural Graphical Models}, 42 | author={Shrivastava, Harsh and Chajewska, Urszula}, 43 | journal={arXiv preprint arXiv:2210.00453}, 44 | year={2022} 45 | } 46 | 47 | 48 | - `uGLAD`: Sparse graph recovery by optimizing deep unrolled networks. [arxiv]() 49 | 50 | @inproceedings{ 51 | shrivastava2022a, 52 | title={A deep learning approach to recover conditional independence graphs}, 53 | author={Harsh Shrivastava and Urszula Chajewska and Robin Abraham and Xinshi Chen}, 54 | booktitle={NeurIPS 2022 Workshop: New Frontiers in Graph Learning}, 55 | year={2022}, 56 | url={https://openreview.net/forum?id=kEwzoI3Am4c} 57 | } 58 | -------------------------------------------------------------------------------- /environment.yml: -------------------------------------------------------------------------------- 1 | name: ngm 2 | channels: 3 | - pytorch 4 | - anaconda 5 | - conda-forge 6 | - defaults 7 | dependencies: 8 | - _libgcc_mutex=0.1=main 9 | - _openmp_mutex=5.1=1_gnu 10 | - argon2-cffi=21.3.0=pyhd8ed1ab_0 11 | - argon2-cffi-bindings=21.2.0=py38h0a891b7_2 12 | - asttokens=2.0.5=pyhd8ed1ab_0 13 | - atk-1.0=2.36.0=h516909a_2 14 | - attrs=22.1.0=pyh71513ae_1 15 | - backcall=0.2.0=pyh9f0ad1d_0 16 | - backports=1.0=py_2 17 | - backports.functools_lru_cache=1.6.4=pyhd8ed1ab_0 18 | - beautifulsoup4=4.11.1=pyha770c72_0 19 | - blas=1.0=mkl 20 | - bleach=5.0.1=pyhd8ed1ab_0 21 | - bottleneck=1.3.4=py38hce1f21e_0 22 | - brotli=1.0.9=h166bdaf_7 23 | - brotli-bin=1.0.9=h166bdaf_7 24 | - brotlipy=0.7.0=py38h27cfd23_1003 25 | - bzip2=1.0.8=h7b6447c_0 26 | - ca-certificates=2022.6.15=ha878542_0 27 | - cairo=1.16.0=h18b612c_1001 28 | - certifi=2022.6.15=py38h578d9bd_0 29 | - cffi=1.15.0=py38hd667e15_1 30 | - charset-normalizer=2.0.4=pyhd3eb1b0_0 31 | - cryptography=37.0.1=py38h9ce1e76_0 32 | - cudatoolkit=10.2.89=hfd86e86_1 33 | - cycler=0.11.0=pyhd8ed1ab_0 34 | - dbus=1.13.18=hb2f20db_0 35 | - debugpy=1.6.0=py38hfa26641_0 36 | - decorator=5.1.1=pyhd8ed1ab_0 37 | - defusedxml=0.7.1=pyhd8ed1ab_0 38 | - entrypoints=0.4=pyhd8ed1ab_0 39 | - executing=0.9.1=pyhd8ed1ab_0 40 | - expat=2.4.8=h27087fc_0 41 | - ffmpeg=4.3=hf484d3e_0 42 | - flit-core=3.7.1=pyhd8ed1ab_0 43 | - font-ttf-dejavu-sans-mono=2.37=hab24e00_0 44 | - font-ttf-inconsolata=3.000=h77eed37_0 45 | - font-ttf-source-code-pro=2.038=h77eed37_0 46 | - font-ttf-ubuntu=0.83=hab24e00_0 47 | - fontconfig=2.14.0=h8e229c2_0 48 | - fonts-conda-ecosystem=1=0 49 | - fonts-conda-forge=1=0 50 | - fonttools=4.25.0=pyhd3eb1b0_0 51 | - freetype=2.11.0=h70c0345_0 52 | - fribidi=1.0.10=h36c2ea0_0 53 | - gdk-pixbuf=2.42.8=h433bba3_0 54 | - giflib=5.2.1=h7b6447c_0 55 | - glib=2.69.1=h4ff587b_1 56 | - gmp=6.2.1=h295c915_3 57 | - gnutls=3.6.15=he1e5248_0 58 | - gobject-introspection=1.72.0=py38hbb6d50b_0 59 | - graphite2=1.3.14=h295c915_1 60 | - graphviz=2.50.0=h3cd0ef9_0 61 | - gst-plugins-base=1.14.0=hbbd80ab_1 62 | - gstreamer=1.14.0=h28cd5cc_2 63 | - gtk2=2.24.33=h73c1081_2 64 | - gts=0.7.6=h08bb679_0 65 | - harfbuzz=4.3.0=hd55b92a_0 66 | - icu=58.2=hf484d3e_1000 67 | - idna=3.3=pyhd3eb1b0_0 68 | - importlib-metadata=4.11.4=py38h578d9bd_0 69 | - importlib_resources=5.9.0=pyhd8ed1ab_0 70 | - intel-openmp=2021.4.0=h06a4308_3561 71 | - ipykernel=6.15.1=pyh210e3f2_0 72 | - ipython=8.4.0=py38h578d9bd_0 73 | - ipython_genutils=0.2.0=py_1 74 | - jedi=0.18.1=py38h578d9bd_1 75 | - jinja2=3.1.2=pyhd8ed1ab_1 76 | - jpeg=9e=h7f8727e_0 77 | - jsonschema=4.9.1=pyhd8ed1ab_0 78 | - jupyter_client=7.0.6=pyhd8ed1ab_0 79 | - jupyter_core=4.11.1=py38h578d9bd_0 80 | - jupyterlab_pygments=0.2.2=pyhd8ed1ab_0 81 | - kiwisolver=1.4.2=py38h295c915_0 82 | - lame=3.100=h7b6447c_0 83 | - lcms2=2.12=h3be6417_0 84 | - ld_impl_linux-64=2.38=h1181459_1 85 | - libbrotlicommon=1.0.9=h166bdaf_7 86 | - libbrotlidec=1.0.9=h166bdaf_7 87 | - libbrotlienc=1.0.9=h166bdaf_7 88 | - libffi=3.3=he6710b0_2 89 | - libgcc-ng=11.2.0=h1234567_1 90 | - libgd=2.3.3=h695aa2c_1 91 | - libgfortran-ng=7.5.0=ha8ba4b0_17 92 | - libgfortran4=7.5.0=ha8ba4b0_17 93 | - libgomp=11.2.0=h1234567_1 94 | - libiconv=1.16=h7f8727e_2 95 | - libidn2=2.3.2=h7f8727e_0 96 | - libpng=1.6.37=hbc83047_0 97 | - librsvg=2.54.4=h19fe530_0 98 | - libsodium=1.0.18=h36c2ea0_1 99 | - libstdcxx-ng=11.2.0=h1234567_1 100 | - libtasn1=4.16.0=h27cfd23_0 101 | - libtiff=4.2.0=h2818925_1 102 | - libtool=2.4.6=h9c3ff4c_1008 103 | - libunistring=0.9.10=h27cfd23_0 104 | - libuuid=2.32.1=h7f98852_1000 105 | - libwebp=1.2.2=h55f646e_0 106 | - libwebp-base=1.2.2=h7f8727e_0 107 | - libxcb=1.13=h7f98852_1004 108 | - libxml2=2.9.14=h74e7548_0 109 | - lz4-c=1.9.3=h295c915_1 110 | - markupsafe=2.1.1=py38h0a891b7_1 111 | - matplotlib=3.5.2=py38h578d9bd_1 112 | - matplotlib-base=3.5.2=py38h826bfd8_0 113 | - matplotlib-inline=0.1.3=pyhd8ed1ab_0 114 | - mistune=0.8.4=py38h497a2fe_1005 115 | - mkl=2021.4.0=h06a4308_640 116 | - mkl-service=2.4.0=py38h7f8727e_0 117 | - mkl_fft=1.3.1=py38hd3c417c_0 118 | - mkl_random=1.2.2=py38h51133e4_0 119 | - munkres=1.1.4=pyh9f0ad1d_0 120 | - nbclient=0.6.6=pyhd8ed1ab_0 121 | - nbconvert=6.5.0=pyhd8ed1ab_0 122 | - nbconvert-core=6.5.0=pyhd8ed1ab_0 123 | - nbconvert-pandoc=6.5.0=pyhd8ed1ab_0 124 | - nbformat=5.4.0=pyhd8ed1ab_0 125 | - ncurses=6.3=h5eee18b_3 126 | - nest-asyncio=1.5.5=pyhd8ed1ab_0 127 | - nettle=3.7.3=hbbd107a_1 128 | - networkx=2.7.1=pyhd3eb1b0_0 129 | - notebook=6.4.12=pyha770c72_0 130 | - numexpr=2.8.1=py38h807cd23_2 131 | - openh264=2.1.1=h4ff587b_0 132 | - openssl=1.1.1q=h7f8727e_0 133 | - packaging=21.3=pyhd8ed1ab_0 134 | - pandas=1.4.2=py38h295c915_0 135 | - pandoc=2.18=ha770c72_0 136 | - pandocfilters=1.5.0=pyhd8ed1ab_0 137 | - pango=1.50.7=h05da053_0 138 | - parso=0.8.3=pyhd8ed1ab_0 139 | - pcre=8.45=h9c3ff4c_0 140 | - pexpect=4.8.0=pyh9f0ad1d_2 141 | - pickleshare=0.7.5=py_1003 142 | - pillow=9.2.0=py38hace64e9_1 143 | - pip=22.1.2=py38h06a4308_0 144 | - pixman=0.38.0=h516909a_1003 145 | - pkgutil-resolve-name=1.3.10=pyhd8ed1ab_0 146 | - prometheus_client=0.14.1=pyhd8ed1ab_0 147 | - prompt-toolkit=3.0.30=pyha770c72_0 148 | - psutil=5.9.1=py38h0a891b7_0 149 | - pthread-stubs=0.4=h36c2ea0_1001 150 | - ptyprocess=0.7.0=pyhd3deb0d_0 151 | - pure_eval=0.2.2=pyhd8ed1ab_0 152 | - pycparser=2.21=pyhd8ed1ab_0 153 | - pygments=2.12.0=pyhd8ed1ab_0 154 | - pygraphviz=1.9=py38h86b1bdd_0 155 | - pyopenssl=22.0.0=pyhd3eb1b0_0 156 | - pyparsing=3.0.9=pyhd8ed1ab_0 157 | - pyqt=5.9.2=py38h05f1152_4 158 | - pyrsistent=0.18.1=py38h0a891b7_1 159 | - pysocks=1.7.1=py38h06a4308_0 160 | - python=3.8.13=h12debd9_0 161 | - python-dateutil=2.8.2=pyhd8ed1ab_0 162 | - python-fastjsonschema=2.16.1=pyhd8ed1ab_0 163 | - python_abi=3.8=2_cp38 164 | - pytorch=1.12.0=py3.8_cuda10.2_cudnn7.6.5_0 165 | - pytorch-mutex=1.0=cuda 166 | - pytz=2022.1=py38h06a4308_0 167 | - pyzmq=19.0.2=py38ha71036d_2 168 | - qt=5.9.7=h5867ecd_1 169 | - readline=8.1.2=h7f8727e_1 170 | - requests=2.28.1=py38h06a4308_0 171 | - scipy=1.7.3=py38hc147768_0 172 | - send2trash=1.8.0=pyhd8ed1ab_0 173 | - setuptools=61.2.0=py38h06a4308_0 174 | - sip=4.19.13=py38h295c915_0 175 | - six=1.16.0=pyh6c4a22f_0 176 | - soupsieve=2.3.2.post1=pyhd8ed1ab_0 177 | - sqlite=3.39.0=h5082296_0 178 | - stack_data=0.3.0=pyhd8ed1ab_0 179 | - terminado=0.15.0=py38h578d9bd_0 180 | - tinycss2=1.1.1=pyhd8ed1ab_0 181 | - tk=8.6.12=h1ccaba5_0 182 | - torchvision=0.13.0=py38_cu102 183 | - tornado=6.1=py38h0a891b7_3 184 | - traitlets=5.3.0=pyhd8ed1ab_0 185 | - typing_extensions=4.1.1=pyh06a4308_0 186 | - urllib3=1.26.11=py38h06a4308_0 187 | - wcwidth=0.2.5=pyh9f0ad1d_2 188 | - webencodings=0.5.1=py_1 189 | - wheel=0.37.1=pyhd3eb1b0_0 190 | - xorg-kbproto=1.0.7=h7f98852_1002 191 | - xorg-libice=1.0.10=h7f98852_0 192 | - xorg-libsm=1.2.3=hd9c2040_1000 193 | - xorg-libx11=1.7.2=h7f98852_0 194 | - xorg-libxau=1.0.9=h7f98852_0 195 | - xorg-libxdmcp=1.1.3=h7f98852_0 196 | - xorg-libxext=1.3.4=h7f98852_1 197 | - xorg-libxrender=0.9.10=h7f98852_1003 198 | - xorg-renderproto=0.11.1=h7f98852_1002 199 | - xorg-xextproto=7.3.0=h7f98852_1002 200 | - xorg-xproto=7.0.31=h7f98852_1007 201 | - xz=5.2.5=h7f8727e_1 202 | - zeromq=4.3.4=h9c3ff4c_1 203 | - zipp=3.8.1=pyhd8ed1ab_0 204 | - zlib=1.2.12=h7f8727e_2 205 | - zstd=1.5.2=ha4553b6_0 206 | - pip: 207 | - joblib==1.1.0 208 | - jsonpickle==2.2.0 209 | - numpy==1.22.4 210 | - pyvis==0.2.1 211 | - scikit-learn==1.1.1 212 | - threadpoolctl==3.1.0 213 | prefix: /home/harshx/anaconda3/envs/ngm 214 | -------------------------------------------------------------------------------- /images/NGM-inference.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Harshs27/neural-graphical-models/227f6e27ca6a02200ec895235b2251e9a4191773/images/NGM-inference.png -------------------------------------------------------------------------------- /images/NGM-learning.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Harshs27/neural-graphical-models/227f6e27ca6a02200ec895235b2251e9a4191773/images/NGM-learning.png -------------------------------------------------------------------------------- /images/NGM-sampling.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Harshs27/neural-graphical-models/227f6e27ca6a02200ec895235b2251e9a4191773/images/NGM-sampling.png -------------------------------------------------------------------------------- /images/graphical-view.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Harshs27/neural-graphical-models/227f6e27ca6a02200ec895235b2251e9a4191773/images/graphical-view.png -------------------------------------------------------------------------------- /images/neural-view-projection-modules.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Harshs27/neural-graphical-models/227f6e27ca6a02200ec895235b2251e9a4191773/images/neural-view-projection-modules.png -------------------------------------------------------------------------------- /images/neural-view.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Harshs27/neural-graphical-models/227f6e27ca6a02200ec895235b2251e9a4191773/images/neural-view.png -------------------------------------------------------------------------------- /ngm/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Harshs27/neural-graphical-models/227f6e27ca6a02200ec895235b2251e9a4191773/ngm/__init__.py -------------------------------------------------------------------------------- /ngm/main.py: -------------------------------------------------------------------------------- 1 | """ 2 | Neural graphical models for the conditional 3 | independence graphs. The conditional independence 4 | graphs show the partial correlations between the 5 | nodes (features). 6 | 7 | Functions for NGMs: 8 | 1. Learning 9 | 2. Inference 10 | 3. Sampling 11 | 12 | Note that this implementation is for 13 | 1. Undirected graphs. 14 | 2. Input data should be real valued. 15 | 16 | TODO: Implementation for the directed graphs. 17 | TODO: Extend to images and categorical variables. 18 | """ 19 | import copy 20 | import networkx as nx 21 | import numpy as np 22 | import pandas as pd 23 | from sklearn.model_selection import KFold 24 | import sys 25 | import torch 26 | import torch.nn as nn 27 | 28 | # local imports 29 | import ngm.utils.neural_view as neural_view 30 | import ngm.utils.data_processing as dp 31 | 32 | 33 | ###################################################################### 34 | # Functions for NGM learning 35 | ###################################################################### 36 | 37 | def product_weights_MLP(model): 38 | """ 39 | Reads the input model (MLP) and returns the normalized 40 | product of the neural network weight matrices. 41 | """ 42 | for i, (n, p) in enumerate(model.MLP.named_parameters()): 43 | if i==0: 44 | if 'weight' in n: 45 | W = torch.abs(p).t() # DxH 46 | # Normalizing the weight using L2-norm 47 | W = torch.nn.functional.normalize(W) 48 | else: # i > 0 49 | if 'weight' in n: 50 | curr_W = torch.abs(p).t() 51 | # Normalizing the current weight using L2-norm 52 | curr_W = torch.nn.functional.normalize(curr_W) 53 | W = torch.matmul(W, curr_W) 54 | # Normalizing the running weight product using L2-norm 55 | W = torch.nn.functional.normalize(W) 56 | return W 57 | 58 | 59 | def forward_NGM(X, model, S, structure_penalty='hadamard', lambd=0.1): 60 | """Pass the input X through the NGM model 61 | to obtain the X_pred. 62 | 63 | LOSS = reg_loss + lambd * structure_loss 64 | 65 | The 'hadamard' ||prodW * Sc|| is more theoretically sound as it just 66 | focuses on the terms needed to zero out and completely drop the 67 | non-zero terms. 68 | The 'diff' ||prodW-S|| also tries to make the non-zero terms go to 1. 69 | 70 | Args: 71 | X (torch.Tensor BxD): Input data 72 | model (torch.nn.object): The MLP model for NGM's `neural' view 73 | S (pd.DataFrame): Adjacency matrix from graph G 74 | structure_penalty (str): 'hadamard':||prodW * Sc||, 'diff':||prodW-S|| 75 | lambd (float): reg_loss + lambd * structure_loss 76 | Recommended lambd=1 as the losses are scaled to the same range. 77 | 78 | Returns: 79 | (list): [ 80 | Xp (torch.Tensor BxD): The predicted X 81 | loss (torch.scalar): The NGM loss 82 | reg_loss (torch.scalar): The regression term loss 83 | structure_loss (torch.scalar): The structure penalty loss 84 | ] 85 | """ 86 | # 1. Running the NGM model 87 | Xp = model.MLP(X) 88 | # 2. Calculate the regression loss 89 | mse = nn.MSELoss() 90 | reg_loss = mse(Xp, X) 91 | # 3. Calculate the structure loss 92 | # 3.1 Get the frame of the graph structure 93 | if structure_penalty=='hadamard': 94 | # Get the complement of S (binarized) 95 | Sg = (S==0).astype(int) 96 | Sg = dp.convertToTorch(np.array(Sg), req_grad=False) 97 | elif structure_penalty=='diff': 98 | # Binarize the adjacency matrix S 99 | Sg = (S!=0).astype(int) 100 | Sg = dp.convertToTorch(np.array(Sg), req_grad=False) 101 | else: 102 | print(f'Structure penalty {structure_penalty} is not defined') 103 | sys.exit(0) 104 | # 3.2 Initialize the structure loss 105 | structure_loss = torch.zeros(1)[0] 106 | if lambd > 0: 107 | # 3.3 Get the product of weights (L2 normalized) of the MLP 108 | prod_W = product_weights_MLP(model) 109 | D = prod_W.shape[-1] 110 | # 3.4 Calculate the penalty 111 | if structure_penalty=='hadamard': 112 | # Using the L2 norm for high structure penalty 113 | structure_loss = torch.linalg.norm(prod_W*Sg, ord=2) 114 | elif structure_penalty=='diff': 115 | struct_mse = nn.MSELoss() 116 | structure_loss = struct_mse(prod_W, Sg) 117 | # 3.5 Scale the structure loss 118 | structure_loss = structure_loss/(D**2) 119 | # Adding the log scaling 120 | structure_loss = torch.log(structure_loss) 121 | # 4. Calculate the total loss = reg_loss + lambd * struct_loss 122 | loss = reg_loss + lambd * structure_loss 123 | 124 | return Xp, loss, reg_loss, structure_loss 125 | 126 | 127 | def learning( 128 | G, 129 | X, 130 | lambd=1.0, 131 | hidden_dim=20, 132 | epochs=1200, 133 | lr=0.001, 134 | norm_type='min_max', 135 | k_fold=1, 136 | structure_penalty='hadamard', 137 | VERBOSE=True 138 | ): 139 | """Learn the distribution over a conditional independence graph. 140 | 1. Fit a MLP (autoencoder) to learn the data representation from X->X. 141 | 2. The input-output path of dependence structure of the MLP 142 | should match the conditional independence structure of the 143 | input graph. This is achieved using a regularization term. 144 | 3. Return the learned model representing the NGM 145 | 146 | Normalize X and select the best model using K-fold CV. 147 | 148 | Fit the MLP on the input data X to get the `neural' view of NGM 149 | while maintaining the conditional independence structure defined 150 | by the complement structure matrix Sc. Does cross-validation to 151 | get better generalization. 152 | 153 | Args: 154 | G (nx.Graph): Conditional independence graph. 155 | X (pd.DataFrame): Samples(M) x Features(D). 156 | lambd (float): reg_loss + lambd * structure_loss 157 | Recommended lambd=1 as the losses are scaled to the same range. 158 | hidden_dim (int): The size of the hidden unit of the MLP. 159 | Each layer will have the same value. 160 | epochs (int): The training epochs number. 161 | lr (float): Learning rate for the optimizer. 162 | norm_type (str): min_max/mean 163 | k_fold (int): #splits for the k-fold CV. 164 | structure_penalty (str): 'hadamard':||prodW * Sc||, 'diff':||prodW-S|| 165 | VERBOSE (bool): if True, prints to output. 166 | 167 | Returns: 168 | model_NGM (list): [ 169 | model (torch.nn.object): A MLP model for NGM's `neural' view, 170 | scaler (sklearn object): Learned normalizer for the input data, 171 | feature_means (pd.Series): [feature:mean val] 172 | ] 173 | """ 174 | # Get the graph structure 175 | S = nx.to_pandas_adjacency(G) 176 | # Arrange the columns of X to match the adjacency matrix 177 | X = X[S.columns] 178 | feature_means = X.mean() 179 | print(f'Means of selected features {feature_means, len(feature_means)}') 180 | # Normalize the data 181 | print(f'Normalizing the data: {norm_type}') 182 | X, scaler = dp.process_data_for_CI_graph(X, norm_type) 183 | # Converting the data to torch 184 | X = dp.convertToTorch(np.array(X), req_grad=False) 185 | M, D = X.shape 186 | # Splitting into k-fold for cross-validation 187 | n_splits = k_fold if k_fold > 1 else 2 188 | kf = KFold(n_splits=n_splits, shuffle=True) 189 | # For each fold, collect the best model and the test-loss value 190 | results_Kfold = {} 191 | for _k, (train, test) in enumerate(kf.split(X)): 192 | if _k >= k_fold: # No CV if k_fold=1 193 | continue 194 | if VERBOSE: print(f'Fold num {_k}') 195 | X_train, X_test = X[train], X[test] # KxD, (M-K)xD 196 | 197 | # Initialize the MLP model 198 | if VERBOSE: print(f'Initializing the NGM model') 199 | model = neural_view.DNN(I=D, H=hidden_dim, O=D) 200 | optimizer = neural_view.get_optimizers(model, lr=lr) 201 | 202 | # TODO: Add base initialization only on the regression loss 203 | # model = base_initialization_NGM(model, X_train) 204 | 205 | # Defining optimization & model tracking parameters 206 | best_test_loss = np.inf 207 | PRINT = int(epochs/10) # will print only 10 times 208 | lambd_increase = int(epochs/10) 209 | # updating with the best model and loss for the current fold 210 | results_Kfold[_k] = {} 211 | 212 | # Training the NGM model 213 | for e in range(epochs): 214 | # TODO: Keep increasing the lambd penalty as epochs proceed 215 | # if not e % lambd_increase: 216 | # lambd *= 10 # increase in lambd value 217 | # print(f'epoch={e}, lambda={lambd}') 218 | # reset the grads to zero 219 | optimizer.zero_grad() 220 | # calculate the loss for train data 221 | _, loss_train, reg_loss_train, struct_loss_train = forward_NGM( 222 | X_train, 223 | model, 224 | S, 225 | structure_penalty, 226 | lambd=lambd 227 | ) 228 | with torch.no_grad(): # prediction on test 229 | _, loss_test, reg_loss_test, struct_loss_test = forward_NGM( 230 | X_test, 231 | model, 232 | S, 233 | structure_penalty, 234 | lambd=lambd 235 | ) 236 | # calculate the backward gradients 237 | loss_train.backward() 238 | # updating the optimizer params with the grads 239 | optimizer.step() 240 | # Printing output 241 | if not e%PRINT and VERBOSE: 242 | print(f'\nFold {_k}: epoch:{e}/{epochs}') 243 | print(f'Train: loss={dp.t2np(loss_train)}, reg={dp.t2np(reg_loss_train)}, struct={dp.t2np(struct_loss_train)}') 244 | print(f'Test: loss={dp.t2np(loss_test)}, reg={dp.t2np(reg_loss_test)}, struct={dp.t2np(struct_loss_test)}') 245 | # Updating the best model for this fold 246 | _loss_test = dp.t2np(loss_test) 247 | if _loss_test < best_test_loss: # and e%10==9: 248 | results_Kfold[_k]['best_model_updates'] = f'Fold {_k}: epoch:{e}/{epochs}:\n\ 249 | Train: loss={dp.t2np(loss_train)}, reg={dp.t2np(reg_loss_train)}, struct={dp.t2np(struct_loss_train)}\n\ 250 | Test: loss={dp.t2np(loss_test)}, reg={dp.t2np(reg_loss_test)}, struct={dp.t2np(struct_loss_test)}' 251 | # if VERBOSE and not e%PRINT or e==epochs-1: 252 | # print(f'Fold {_k}: epoch:{e}/{epochs}: Updating the best model with test loss={_loss_test}') 253 | best_model_kfold = copy.deepcopy(model) 254 | best_test_loss = _loss_test 255 | # else: # loss increasing, reset the model to the previous best 256 | # # print('re-setting to the previous best model') 257 | # model = best_model_kfold 258 | # optimizer = neural_view.get_optimizers(model, lr=lr) 259 | results_Kfold[_k]['test_loss'] = best_test_loss 260 | results_Kfold[_k]['model'] = best_model_kfold 261 | if VERBOSE: print('\n') 262 | # Select the model from the results Kfold dictionary 263 | # with the best score on the test fold. 264 | best_loss = np.inf 265 | for _k in results_Kfold.keys(): 266 | curr_loss = results_Kfold[_k]['test_loss'] 267 | if curr_loss < best_loss: 268 | model = results_Kfold[_k]['model'] 269 | best_loss = curr_loss 270 | best_model_details = results_Kfold[_k]["best_model_updates"] 271 | 272 | print(f'Best model selected: {best_model_details}') 273 | # Checking the structure of the prodW and Sc 274 | prod_W = dp.t2np(product_weights_MLP(model)) 275 | # print(f'Structure Check: prodW={prod_W}, S={(np.array(S)!=0).astype(int)}') 276 | return [model, scaler, feature_means] 277 | 278 | 279 | ###################################################################### 280 | # Functions to run inference over the learned NGM 281 | ###################################################################### 282 | 283 | def inference( 284 | model_NGM, 285 | node_feature_dict, 286 | unknown_val='u', 287 | lr=0.001, 288 | max_itr=1000, 289 | VERBOSE=True, 290 | reg_loss_th=1e-6 291 | ): 292 | """Algorithm to run the feature inference among the nodes of the 293 | NGM learned over the conditional independence graph. 294 | 295 | We only optimize for the regression of the known values as that 296 | is the only ground truth information we have and the prediction 297 | should be able to recover the observed datapoints. 298 | Regression: Xp = f(Xi) 299 | Input Xi = {Xi[k] (fixed), Xi[u] (learned)} 300 | Reg loss for inference = ||Xp[k] - Xi[k]||^2_2 301 | 302 | Run gradient descent over the input, which modifies the unobserved 303 | features to minimize the inference regression loss. 304 | 305 | Args: 306 | model_NGM (list): [ 307 | model (torch.nn.object): A MLP model for NGM's `neural' view, 308 | scaler (sklearn object): Learned normalizer for the input data, 309 | feature_means (pd.Series): [feature:mean val] 310 | ] 311 | node_feature_dict (dict): {'name':value}. 312 | unknown_val (str): The marker for the unknown value. 313 | lr (float): Learning rate for the optimizer. 314 | max_itr (int): For the convergence. 315 | VERBOSE (bool): enable/disable print statements. 316 | reg_loss_th (float): The threshold for reg loss convergence. 317 | 318 | Returns: 319 | Xpred (pd.DataFrame): Predictions for the unobserved features. 320 | {'feature name': pred-value} 321 | """ 322 | # Get the NGM params 323 | model, scaler, feature_means = model_NGM 324 | # Get the feature names and input dimension 325 | D = len(feature_means) 326 | feature_names = feature_means.index 327 | # Freeze the model weights 328 | for p in model.parameters(): 329 | p.requires_grad = False 330 | # Initializin the input vector Xi 331 | _Xi = feature_means.copy() 332 | # TODO: Try min and max init as well 333 | # Assign the known (observed) values to the Xi 334 | for _n, v in node_feature_dict.items(): 335 | if v!=unknown_val: 336 | _Xi[_n] = v 337 | # Normalize the values of Xi using the scaler 338 | _Xi = scaler.transform(dp.series2df(_Xi))[0] 339 | # Convert to dataseries to maintain the column name associations 340 | _Xi = pd.Series( 341 | {n:v for n, v in zip(feature_names, _Xi)}, 342 | index=feature_names 343 | ) 344 | # Creating the feature list with unobserved (unkonwn) tensors as learnable. 345 | # and observed (known) tensors as fixed 346 | feature_tensors = [] # List of feature tensors 347 | # Setting the optimization parameters 348 | optimizer_parameters = [] 349 | for i, _n in enumerate(feature_names): 350 | _xi = torch.as_tensor(_Xi[_n]) 351 | # set the value to learnable or not 352 | _xi.requires_grad = node_feature_dict[_n]==unknown_val 353 | feature_tensors.append(_xi) 354 | if node_feature_dict[_n]==unknown_val: 355 | optimizer_parameters.append(_xi) 356 | # Init a mask for the known & unknown values 357 | mask_known = torch.zeros(1, D) 358 | mask_unknown = torch.zeros(1, D) 359 | for i, _n in enumerate(feature_names): 360 | if node_feature_dict[_n]==unknown_val: 361 | mask_unknown[0][i] = 1 362 | else: 363 | mask_known[0][i] = 1 364 | # Define the optimizer 365 | optimizer = torch.optim.Adam( 366 | optimizer_parameters, 367 | lr=lr, 368 | betas=(0.9, 0.999), 369 | eps=1e-08, 370 | # weight_decay=0 371 | ) 372 | # Minimizing for the regression loss for the known values. 373 | itr = 0 374 | curr_reg_loss = np.inf 375 | PRINT = int(max_itr/10) + 1 # will print only 10 times 376 | mse = nn.MSELoss() # regression loss 377 | best_reg_loss = np.inf 378 | while curr_reg_loss > reg_loss_th and itr 0 else 1 385 | return scale_wt_G 386 | 387 | scale_wt_G1 = get_scaling_wt(G1) 388 | scale_wt_G2 = get_scaling_wt(G2) 389 | 390 | plt.figure(figsize=(24, 24)) 391 | plt.subplot(221) 392 | # plt.figure(1, figsize=(fig_size, fig_size)) 393 | plot_graph_compare(G1_int, pos, title=t1+': Edges present in both graphs', scale_wt=scale_wt_G1, intensity=3) 394 | plt.subplot(222)#, figsize=(fig_size, fig_size)) 395 | plot_graph_compare(G2_int, pos, title=t2+': Edges present in both graphs', scale_wt=scale_wt_G2) 396 | plt.subplot(223)#, figsize=(fig_size, fig_size)) 397 | plot_graph_compare(G1_unique, title=t1+': Unique edges', scale_wt=scale_wt_G1, intensity=3) 398 | plt.subplot(224)#, figsize=(fig_size, fig_size)) 399 | # G2_unique.remove_nodes_from(['no_mmorb', 'attend']) 400 | plot_graph_compare(G2_unique, title=t2+': Unique edges', scale_wt=scale_wt_G2)#, get_image_bytes=True) 401 | 402 | plt.savefig('compare_graphs', bbox_inches='tight') 403 | # Saving the figure in-memory 404 | buf = io.BytesIO() 405 | plt.savefig(buf) 406 | # getting the image in bytes 407 | buf.seek(0) 408 | image_bytes = buf.getvalue() # Image.open(buf, mode='r') 409 | buf.close() 410 | # closing the plt 411 | plt.close() 412 | return image_bytes 413 | -------------------------------------------------------------------------------- /ngm/utils/ggm.py: -------------------------------------------------------------------------------- 1 | """ 2 | Contains functions for using NGMs to model 3 | Gaussian Grapical models. 4 | """ 5 | import matplotlib.pyplot as plt 6 | import networkx as nx 7 | import numpy as np 8 | import pandas as pd 9 | import io, sys 10 | from scipy.stats import multivariate_normal 11 | 12 | # Local imports 13 | import ngm.utils.data_processing as dp 14 | 15 | 16 | def get_data( 17 | num_nodes, 18 | sparsity, 19 | num_samples, 20 | batch_size=1, 21 | typeG='CHAIN', 22 | w_min=0.5, 23 | w_max=1.0, 24 | eig_offset=0.1, 25 | ): 26 | """Prepare true adj matrices as theta and then sample from 27 | Gaussian to get the corresponding samples. 28 | 29 | Args: 30 | num_nodes (int): The number of nodes in graph 31 | sparsity ([float, float]): The [min, max] probability of edges 32 | num_samples (int): The number of samples to simulate 33 | batch_size (int, optional): The number of batches 34 | typeG (str): RANDOM/GRID/CHAIN 35 | w_min (float): Precision matrix entries ~Unif[w_min, w_max] 36 | w_max (float): Precision matrix entries ~Unif[w_min, w_max] 37 | 38 | Returns: 39 | Xb (BxMxD): The sample data 40 | trueTheta (BxDxD): The true precision matrices 41 | """ 42 | Xb, trueTheta = [], [] 43 | for b in range(batch_size): 44 | # I - Getting the true edge connections 45 | edge_connections = generateGraph( 46 | num_nodes, 47 | sparsity, 48 | typeG=typeG 49 | ) 50 | # II - Gettings samples from fitting a Gaussian distribution 51 | # sample the entry of the matrix 52 | 53 | X, true_theta = simulateGaussianSamples( 54 | num_nodes, 55 | edge_connections, 56 | num_samples, 57 | u=eig_offset, 58 | w_min=w_min, 59 | w_max=w_max 60 | ) 61 | # collect the batch data 62 | Xb.append(X) 63 | trueTheta.append(true_theta) 64 | return np.array(Xb), np.array(trueTheta) 65 | 66 | 67 | def generateGraph(num_nodes, sparsity, typeG='RANDOM', seed=None): 68 | """Generate a random erdos-renyi graph with a given 69 | sparsity. 70 | 71 | Args: 72 | num_nodes (int): The number of nodes in the graph 73 | sparsity ([float, float]): The [min, max] probability of edges 74 | seed (int, optional): set the numpy random seed 75 | typeG (str): RANDOM/GRID/CHAIN 76 | 77 | Returns: 78 | edge_connections (2D np array (float)): Adj matrix 79 | """ 80 | if typeG == 'RANDOM': 81 | min_s, max_s = sparsity 82 | s = np.random.uniform(min_s, max_s, 1)[0] 83 | G = nx.generators.random_graphs.gnp_random_graph( 84 | num_nodes, 85 | s, 86 | seed=seed, 87 | directed=False 88 | ) 89 | elif typeG == 'CHAIN': 90 | G = nx.generators.path_graph(num_nodes) 91 | else: 92 | print(f'Type of graph {typeG} not found.') 93 | sys.exit(0) 94 | edge_connections = nx.adjacency_matrix(G).todense() 95 | return edge_connections 96 | 97 | 98 | def simulateGaussianSamples( 99 | num_nodes, 100 | edge_connections, 101 | num_samples, 102 | seed=None, 103 | u=0.1, 104 | w_min=0.5, 105 | w_max=1.0, 106 | ): 107 | """Simulating num_samples from a Gaussian distribution. The 108 | precision matrix of the Gaussian is determined using the 109 | edge_connections. Randomly assign +/-ve signs to entries. 110 | 111 | Args: 112 | num_nodes (int): The number of nodes in the DAG 113 | edge_connections (2D np array (float)): Adj matrix 114 | num_sample (int): The number of samples 115 | seed (int, optional): set the numpy random seed 116 | u (float): Min eigenvalue offset for the precision matrix 117 | w_min (float): Precision matrix entries ~Unif[w_min, w_max] 118 | w_max (float): Precision matrix entries ~Unif[w_min, w_max] 119 | 120 | Returns: 121 | X (2D np array (float)): num_samples x num_nodes 122 | precision_mat (2D np array (float)): num_nodes x num_nodes 123 | """ 124 | # zero mean of Gaussian distribution 125 | mean_value = 0 126 | mean_normal = np.ones(num_nodes) * mean_value 127 | # Setting the random seed 128 | if seed: np.random.seed(seed) 129 | # uniform entry matrix [w_min, w_max] 130 | U = np.matrix(np.random.random((num_nodes, num_nodes)) 131 | * (w_max - w_min) + w_min) 132 | theta = np.multiply(edge_connections, U) 133 | # making it symmetric 134 | theta = (theta + theta.T)/2 + np.eye(num_nodes) 135 | # Randomly assign +/-ve signs 136 | gs = nx.Graph() 137 | gs.add_weighted_edges_from( 138 | (u,v,np.random.choice([+1, -1], 1)[0]) 139 | for u,v in nx.complete_graph(num_nodes).edges() 140 | ) 141 | signs = nx.adjacency_matrix(gs).todense() 142 | theta = np.multiply(theta, signs) # update theta with the signs 143 | smallest_eigval = np.min(np.linalg.eigvals(theta)) 144 | # Just in case : to avoid numerical error in case an 145 | # epsilon complex component present 146 | smallest_eigval = smallest_eigval.real 147 | # making the min eigenvalue as u 148 | precision_mat = theta + np.eye(num_nodes)*(u - smallest_eigval) 149 | # print(f'Smallest eval: {np.min(np.linalg.eigvals(precision_mat))}') 150 | # getting the covariance matrix (avoid the use of pinv) 151 | cov = np.linalg.inv(precision_mat) 152 | # get the samples 153 | if seed: np.random.seed(seed) 154 | # Sampling data from multivariate normal distribution 155 | data = np.random.multivariate_normal( 156 | mean=mean_normal, 157 | cov=cov, 158 | size=num_samples 159 | ) 160 | return data, precision_mat # MxD, DxD 161 | 162 | 163 | def get_partial_correlations(precision): 164 | """Get the partial correlation matrix from the 165 | precision matrix. It applies the following 166 | 167 | Formula: rho_ij = -p_ij/sqrt(p_ii * p_jj) 168 | 169 | Args: 170 | precision (2D np.array): The precision matrix 171 | 172 | Returns: 173 | rho (2D np.array): The partial correlations 174 | """ 175 | precision = np.array(precision) 176 | D = precision.shape[0] 177 | rho = np.zeros((D, D)) 178 | for i in range(D): # rows 179 | for j in range(D): # columns 180 | if i==j: # diagonal elements 181 | rho[i][j] = 1 182 | elif j < i: # symmetric 183 | rho[i][j] = rho[j][i] 184 | else: # i > j 185 | num = -1*precision[i][j] 186 | den = np.sqrt(precision[i][i]*precision[j][j]) 187 | rho[i][j] = num/den 188 | return rho 189 | 190 | 191 | # Plot the graph 192 | def graph_from_partial_correlations( 193 | rho, 194 | names, # node names 195 | sparsity=1, 196 | title='', 197 | fig_size=12, 198 | PLOT=True, 199 | save_file=None, 200 | roundOFF=5 201 | ): 202 | G = nx.Graph() 203 | G.add_nodes_from(names) 204 | D = rho.shape[-1] 205 | 206 | # determining the threshold to maintain the sparsity level of the graph 207 | def upper_tri_indexing(A): 208 | m = A.shape[0] 209 | r,c = np.triu_indices(m,1) 210 | return A[r,c] 211 | 212 | rho_upper = upper_tri_indexing(np.abs(rho)) 213 | num_non_zeros = int(sparsity*len(rho_upper)) 214 | rho_upper.sort() 215 | th = rho_upper[-num_non_zeros] 216 | print(f'Sparsity {sparsity} using threshold {th}') 217 | th_pos, th_neg = th, -1*th 218 | 219 | graph_edge_list = [] 220 | for i in range(D): 221 | for j in range(i+1, D): 222 | if rho[i,j] > th_pos: 223 | G.add_edge(names[i], names[j], color='green', weight=round(rho[i,j], roundOFF), label='+') 224 | _edge = '('+names[i]+', '+names[j]+', '+str(round(rho[i,j], roundOFF))+', green)' 225 | graph_edge_list.append(_edge) 226 | elif rho[i,j] < th_neg: 227 | G.add_edge(names[i], names[j], color='red', weight=round(rho[i,j], roundOFF), label='-') 228 | _edge = '('+names[i]+', '+names[j]+', '+str(round(rho[i,j], roundOFF))+', red)' 229 | graph_edge_list.append(_edge) 230 | 231 | # if PLOT: print(f'graph edges {graph_edge_list, len(graph_edge_list)}') 232 | 233 | edge_colors = [G.edges[e]['color'] for e in G.edges] 234 | edge_width = np.array([abs(G.edges[e]['weight']) for e in G.edges]) 235 | # Scaling the intensity of the edge_weights for viewing purposes 236 | if len(edge_width) > 0: 237 | edge_width = edge_width/np.max(np.abs(edge_width)) 238 | image_bytes = None 239 | if PLOT: 240 | fig = plt.figure(1, figsize=(fig_size,fig_size)) 241 | plt.title(title) 242 | n_edges = len(G.edges()) 243 | pos = nx.spring_layout(G, scale=0.2, k=1/np.sqrt(n_edges+10)) 244 | # pos = nx.nx_agraph.graphviz_layout(G, prog='fdp') #'fdp', 'sfdp', 'neato' 245 | nx.draw_networkx_nodes(G, pos, node_color='grey', node_size=100) 246 | nx.draw_networkx_edges(G, pos, edge_color=edge_colors, width=edge_width) 247 | y_off = 0.008 248 | nx.draw_networkx_labels(G, pos = {k:([v[0], v[1]+y_off]) for k,v in pos.items()}) 249 | plt.title(f'{title}', fontsize=20) 250 | plt.margins(0.15) 251 | plt.tight_layout() 252 | # saving the file 253 | if save_file: 254 | plt.savefig(save_file, bbox_inches='tight') 255 | # Saving the figure in-memory 256 | buf = io.BytesIO() 257 | plt.savefig(buf) 258 | # getting the image in bytes 259 | buf.seek(0) 260 | image_bytes = buf.getvalue() # Image.open(buf, mode='r') 261 | buf.close() 262 | # closing the plt 263 | plt.close(fig) 264 | return G, image_bytes, graph_edge_list 265 | 266 | 267 | def viz_graph_from_precision(theta, column_names, sparsity=0.1, title=''): 268 | rho = get_partial_correlations(theta) 269 | Gr, _, _ = graph_from_partial_correlations( 270 | rho, 271 | column_names, 272 | sparsity=sparsity 273 | ) 274 | print(f'Num nodes: {len(Gr.nodes)}') 275 | Gv = dp.get_interactive_graph(Gr, title, node_PREFIX=None) 276 | return Gr, Gv 277 | 278 | 279 | ###################################################################### 280 | # Functions to analyse the marginal and conditional distributions 281 | ###################################################################### 282 | 283 | def get_distribution_function(target, source, model_GGM, Xi, count=100): 284 | """Plot the function target=GGM(source) or Xp=f(Xi). 285 | Vary the range of the source and collect the values of the 286 | target variable. We keep the rest of the targets & sources 287 | constant given in Xi (input to the GGM). 288 | 289 | Args: 290 | target (str/int/float): The feature of interest 291 | source (str/int/float): The feature having a direct connection 292 | with the target in the neural view of NGM. 293 | model_GGM (list): [ 294 | mean (pd.Series) = {feature: mean value} 295 | cov (2D np.array) = Covariance matrix between features 296 | scaler (list of pd.Series): [data_min_, data_max_] 297 | ] 298 | Xi (pd.DataFrame): Initial values of the input to the model. 299 | All the values except the source nodes remain constant 300 | while varying the input over the range of source feature. 301 | count (int): The number of points to evaluate f(x) in the range. 302 | 303 | Returns: 304 | x_vals (np.array): range of source values 305 | fx_vals (np.array): predicted f(source) values for the target 306 | """ 307 | mean, cov, scaler = model_GGM 308 | data_min_, data_max_ = scaler 309 | column_names = Xi.columns 310 | print(f'target={target}, source={source}') 311 | # Get the min and max range of the source 312 | source_idx = Xi.columns.get_loc(source) 313 | source_min = data_min_[source_idx] 314 | source_max = data_max_[source_idx] 315 | # Get the min and max range of the target 316 | target_idx = Xi.columns.get_loc(target) 317 | target_min = data_min_[target_idx] 318 | target_max = data_max_[target_idx] 319 | # print(f'Source {source} at index {source_idx}: range ({source_min}, {source_max})') 320 | # Get the range of the source and target values 321 | x_vals = np.linspace(source_min, source_max, count) 322 | y_vals = np.linspace(target_min, target_max, count) 323 | # Collect the fx_vals 324 | fx_vals = [] 325 | # For each x_val, find the expected value of y from the pdf 326 | for _x in x_vals: # expected_value calculation 327 | # Set the source value 328 | Xi[source] = _x 329 | # Replicate the Xi entries to have count rows 330 | Xi_batch = pd.DataFrame(np.repeat(Xi.values, count, axis=0), columns=column_names) 331 | # Get the range of possible target values 332 | Xi_batch[target] = y_vals 333 | # Get the probabilitites using the probability density function 334 | py = multivariate_normal.pdf(Xi_batch, mean=mean, cov=cov) 335 | # Normalize the probabilities to make it proportional to conditional 336 | # distribution p(target, source| X{remaining}) = p(S, T, {Xr})/p({Xr}) 337 | py = py/np.sum(py) 338 | _y = np.dot(py, y_vals) # Direct expectation calculation 339 | # Choose the y based on sample count 340 | # _y = np.random.choice(y_vals, count, p=py) 341 | fx_vals.append(_y) 342 | return x_vals, fx_vals 343 | 344 | 345 | def analyse_feature(target_feature, model_GGM, G, Xi=[]): 346 | """Analyse the feature of interest with regards to the 347 | underlying multivariate Gaussian distribution defining 348 | the conditional independence graph G. 349 | 350 | Args: 351 | target_feature (str/int/float): The feature of interest, should 352 | be present as one of the nodes in graph G 353 | model_GGM (list): [ 354 | mean (pd.Series) = {feature: mean value} 355 | cov (2D np.array) = Covariance matrix between features 356 | scaler (list of pd.Series): [data_min_, data_max_] 357 | ] 358 | G (nx.Graph): Conditional independence graph. 359 | Xi (pd.DataFrame): Initial input sample. 360 | 361 | Returns: 362 | None (Plots the dependency functions) 363 | """ 364 | mean, cov, scaler = model_GGM 365 | model_features = mean.index 366 | # Preliminary check for the presence of target feature 367 | if target_feature not in model_features: 368 | print(f'Error: Input feature {target_feature} not in model features') 369 | sys.exit(0) 370 | # Drop the nodes not in the model from the graph 371 | common_features = set(G.nodes()).intersection(model_features) 372 | features_dropped = G.nodes() - common_features 373 | print(f'Features dropped from graph: {features_dropped}') 374 | G = G.subgraph(list(common_features)) 375 | # 1. Get the neighbors (the dependent vars in CI graph) of the target 376 | # feature from Graph G. 377 | target_nbrs = G[target_feature] 378 | # 2. Set the initial values of the nodes. 379 | if len(Xi)==0: 380 | Xi = mean 381 | Xi = dp.series2df(Xi) 382 | # Arrange the columns based on the model_feature names for compatibility 383 | Xi = Xi[model_features] 384 | # 3. Getting the plots by varying each nbr node and getting the regression 385 | # values for the target node. 386 | plot_dict = {target_feature:{}} 387 | for nbr in target_nbrs.keys(): 388 | x, fx = get_distribution_function( 389 | target_feature, 390 | nbr, 391 | model_GGM, 392 | Xi 393 | ) 394 | title = f'GGM: {target_feature} (y-axis) vs {nbr} (x-axis)' 395 | plot_dict[target_feature][nbr] = [x, fx, title] 396 | dp.function_plots_for_target(plot_dict) 397 | return None -------------------------------------------------------------------------------- /ngm/utils/metrics.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | from sklearn import metrics 3 | from pprint import pprint 4 | 5 | def get_auc(y, scores): 6 | y = np.array(y).astype(int) 7 | fpr, tpr, thresholds = metrics.roc_curve(y, scores) 8 | roc_auc = metrics.auc(fpr, tpr) 9 | aupr = metrics.average_precision_score(y, scores) 10 | return roc_auc, aupr 11 | 12 | def reportMetrics(trueG, G, beta=1): 13 | """Compute various metrics 14 | Args: 15 | trueG (2D numpy arr[floats]): ground truth precision matrix 16 | G (2D numpy arr[floats]): predicted precsion mat 17 | beta (int, optional): beta for the Fbeta score 18 | Returns: 19 | Dict: {fdr (float): (false positive) / prediction positive = FP/P 20 | tpr (float): (true positive) / condition positive = TP/T 21 | fpr (float): (false positive) / condition negative = FP/F 22 | shd (int): undirected extra + undirected missing = E+M 23 | nnz (int): number of non-zeros for trueG and predG 24 | ps (float): probability of success, sign match 25 | Fbeta (float): F-score with beta 26 | aupr (float): area under the precision-recall curve 27 | auc (float): area under the ROC curve} 28 | """ 29 | trueG = trueG.real 30 | G =G.real 31 | # trueG and G are numpy arrays 32 | # convert all non-zeros in G to 1 33 | d = G.shape[-1] 34 | 35 | # changing to 1/0 for TP and FP calculations 36 | G_binary = np.where(G!=0, 1, 0) 37 | trueG_binary = np.where(trueG!=0, 1, 0) 38 | # extract the upper diagonal matrix 39 | indices_triu = np.triu_indices(d, 1) 40 | trueEdges = trueG_binary[indices_triu] #np.triu(G_true_binary, 1) 41 | predEdges = G_binary[indices_triu] #np.triu(G_binary, 1) 42 | # Getting AUROC value 43 | predEdges_auc = G[indices_triu] #np.triu(G_true_binary, 1) 44 | auc, aupr = get_auc(trueEdges, np.absolute(predEdges_auc)) 45 | # Now, we have the edge array for comparison 46 | # true pos = pred is 1 and true is 1 47 | TP = np.sum(trueEdges * predEdges) # true_pos 48 | # False pos = pred is 1 and true is 0 49 | mismatches = np.logical_xor(trueEdges, predEdges) 50 | FP = np.sum(mismatches * predEdges) 51 | # Find all mismatches with Xor and then just select the ones with pred as 1 52 | # P = Number of pred edges : nnzPred 53 | P = np.sum(predEdges) 54 | nnzPred = P 55 | # T = Number of True edges : nnzTrue 56 | T = np.sum(trueEdges) 57 | nnzTrue = T 58 | # F = Number of non-edges in true graph 59 | F = len(trueEdges) - T 60 | # SHD = total number of mismatches 61 | SHD = np.sum(mismatches) 62 | # FDR = False discovery rate 63 | FDR = FP/P 64 | # TPR = True positive rate 65 | TPR = TP/T 66 | # FPR = False positive rate 67 | FPR = FP/F 68 | # False negative = pred is 0 and true is 1 69 | FN = np.sum(mismatches * trueEdges) 70 | # F beta score 71 | num = (1+beta**2)*TP 72 | den = ((1+beta**2)*TP + beta**2 * FN + FP) 73 | Fbeta = num/den 74 | # precision 75 | precision = TP/(TP+FP) 76 | # recall 77 | recall = TP/(TP+FN) 78 | return {'FDR': FDR, 'TPR': TPR, 'FPR': FPR, 'SHD': SHD, 'nnzTrue': nnzTrue, 79 | 'nnzPred': nnzPred, 'precision': precision, 'recall': recall, 80 | 'Fbeta': Fbeta, 'aupr': aupr, 'auc': auc} 81 | 82 | def summarize_compare_theta(compare_dict_list, method_name='Method Name'): 83 | avg_results = {} 84 | for key in compare_dict_list[0].keys(): 85 | avg_results[key] = [] 86 | 87 | total_runs = len(compare_dict_list) 88 | for cd in compare_dict_list: 89 | for key in cd.keys(): 90 | avg_results[key].append(cd[key]) 91 | # getting the mean and std dev 92 | for key in avg_results.keys(): 93 | avk = avg_results[key] 94 | avg_results[key] = (np.mean(avk), np.std(avk)) 95 | print(f'Avg results for {method_name}\n') 96 | pprint(avg_results) 97 | print(f'\nTotal runs {total_runs}\n\n') 98 | return avg_results -------------------------------------------------------------------------------- /ngm/utils/neural_view.py: -------------------------------------------------------------------------------- 1 | """ 2 | Utils for neural graphical models 3 | """ 4 | 5 | import torch 6 | import torch.nn as nn 7 | 8 | class DNN(torch.nn.Module): 9 | """The DNN architecture to map the input to input. 10 | """ 11 | def __init__(self, I, H, O, USE_CUDA=False): 12 | """Initializing the MLP for the regression 13 | network. 14 | 15 | Args: 16 | I (int): The input dimension 17 | H (int): The hidden layer dimension 18 | O (int): The output layer dimension 19 | USE_CUDA (bool): Flag to enable GPU 20 | """ 21 | super(DNN, self).__init__() # init the nn.module 22 | self.dtype = torch.cuda.FloatTensor if USE_CUDA else torch.FloatTensor 23 | self.I, self.H, self.O = I, H, O 24 | self.MLP = self.getMLP() 25 | 26 | def getMLP(self): 27 | l1 = nn.Linear(self.I, self.H).type(self.dtype) 28 | l2 = nn.Linear(self.H, self.H).type(self.dtype) 29 | # l3 = nn.Linear(self.H, self.H).type(self.dtype) 30 | # l4 = nn.Linear(self.H, self.H).type(self.dtype) 31 | l5 = nn.Linear(self.H, self.O).type(self.dtype) 32 | return nn.Sequential( 33 | l1, nn.ReLU(), #nn.Tanh(), #, 34 | l2, nn.ReLU(), #nn.Tanh(), #nn.ReLU(), #nn.Tanh(), 35 | # l3, nn.ReLU(), 36 | # l4, nn.ReLU(), 37 | l5#, nn.ReLU()#, nn.Sigmoid() 38 | ).type(self.dtype) 39 | 40 | 41 | def get_optimizers(model, lr=0.002, use_optimizer='adam'): 42 | if use_optimizer == 'adam': 43 | optimizer = torch.optim.Adam( 44 | model.parameters(), 45 | lr=lr, 46 | betas=(0.9, 0.999), 47 | eps=1e-08, 48 | # weight_decay=0 49 | ) 50 | else: 51 | print('Optimizer not found!') 52 | return optimizer 53 | -------------------------------------------------------------------------------- /ngm/utils/uGLAD/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Harshs27/neural-graphical-models/227f6e27ca6a02200ec895235b2251e9a4191773/ngm/utils/uGLAD/__init__.py -------------------------------------------------------------------------------- /ngm/utils/uGLAD/glad/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Harshs27/neural-graphical-models/227f6e27ca6a02200ec895235b2251e9a4191773/ngm/utils/uGLAD/glad/__init__.py -------------------------------------------------------------------------------- /ngm/utils/uGLAD/glad/glad.py: -------------------------------------------------------------------------------- 1 | import torch 2 | from ngm.utils.uGLAD.glad.torch_sqrtm import MatrixSquareRoot 3 | 4 | torch_sqrtm = MatrixSquareRoot.apply 5 | 6 | def get_optimizers(model_glad, lr_glad=0.002, use_optimizer='adam'): 7 | if use_optimizer == 'adam': 8 | optimizer_glad = torch.optim.Adam( 9 | model_glad.parameters(), 10 | lr=lr_glad, 11 | betas=(0.9, 0.999), 12 | eps=1e-08, 13 | # weight_decay=0 14 | ) 15 | else: 16 | print('Optimizer not found!') 17 | return optimizer_glad 18 | 19 | 20 | def batch_matrix_sqrt(A): 21 | # A should be PSD 22 | # if shape of A is 2D, i.e. a single matrix 23 | if len(A.shape)==2: 24 | return torch_sqrtm(A) 25 | else: 26 | n = A.shape[0] 27 | sqrtm_torch = torch.zeros(A.shape).type_as(A) 28 | for i in range(n): 29 | sqrtm_torch[i] = torch_sqrtm(A[i]) 30 | return sqrtm_torch 31 | 32 | 33 | def get_frobenius_norm(A, single=False): 34 | if single: 35 | return torch.sum(A**2) 36 | return torch.mean(torch.sum(A**2, (1,2))) 37 | 38 | 39 | def glad(Sb, model, lambda_init=1, L=15, INIT_DIAG=0, USE_CUDA = False): 40 | """Unrolling the Alternating Minimization algorithm which takes in the 41 | sample covariance (batch mode), runs the iterations of the AM updates and 42 | returns the precision matrix. The hyperparameters are modeled as small 43 | neural networks which are to be learned from the backprop signal of the 44 | loss function. 45 | 46 | Args: 47 | Sb (3D torch tensor (float)): Covariance (batch x dim x dim) 48 | model (class object): The GLAD neural network parameters 49 | (theta_init, rho, lambda) 50 | lambda_init (float): The initial lambda value 51 | L (int): The number of unrolled iterations 52 | INIT_DIAG (int): if 0 - Initial theta as (S + theta_init_offset * I)^-1 53 | if 1 - Initial theta as (diag(S)+theta_init_offset*I)^-1 54 | USE_CUDA (bool): `True` if GPUs present else `False` 55 | 56 | Returns: 57 | theta_pred (3D torch tensor (float)): The output precision matrix 58 | (batch x dim x dim) 59 | loss (torch scalar (float)): The graphical lasso objective function 60 | """ 61 | D = Sb.shape[-1] # dimension of matrix 62 | # if batch is 1, then reshaping Sb 63 | if len(Sb.shape)==2: 64 | Sb = Sb.reshape(1, Sb.shape[0], Sb.shape[1]) 65 | # Initializing the theta 66 | if INIT_DIAG == 1: 67 | #print('extract batchwise diagonals, add offset and take inverse') 68 | batch_diags = 1/(torch.diagonal(Sb, offset=0, dim1=-2, dim2=-1) 69 | + model.theta_init_offset) 70 | theta_init = torch.diag_embed(batch_diags) 71 | else: 72 | #print('(S+theta_offset*I)^-1 is used') 73 | theta_init = torch.inverse(Sb+model.theta_init_offset * 74 | torch.eye(D).expand_as(Sb).type_as(Sb)) 75 | 76 | theta_pred = theta_init#[ridx] 77 | identity_mat = torch.eye(Sb.shape[-1]).expand_as(Sb) 78 | # diagonal mask 79 | # mask = torch.eye(Sb.shape[-1], Sb.shape[-1]).byte() 80 | # dim = Sb.shape[-1] 81 | # mask1 = torch.ones(dim, dim) - torch.eye(dim, dim) 82 | if USE_CUDA == True: 83 | identity_mat = identity_mat.cuda() 84 | # mask = mask.cuda() 85 | # mask1 = mask1.cuda() 86 | 87 | zero = torch.Tensor([0]) 88 | dtype = torch.FloatTensor 89 | if USE_CUDA == True: 90 | zero = zero.cuda() 91 | dtype = torch.cuda.FloatTensor 92 | 93 | lambda_k = model.lambda_forward(zero + lambda_init, zero, k=0) 94 | for k in range(L): 95 | # GLAD CELL 96 | b = 1.0/lambda_k * Sb - theta_pred 97 | b2_4ac = torch.matmul(b.transpose(-1, -2), b) + 4.0/lambda_k * identity_mat 98 | sqrt_term = batch_matrix_sqrt(b2_4ac) 99 | theta_k1 = 1.0/2*(-1*b+sqrt_term) 100 | 101 | theta_pred = model.eta_forward(theta_k1, Sb, k, theta_pred) 102 | # update the lambda 103 | lambda_k = model.lambda_forward(torch.Tensor( 104 | [get_frobenius_norm(theta_pred-theta_k1)] 105 | ).type(dtype), lambda_k, k) 106 | return theta_pred -------------------------------------------------------------------------------- /ngm/utils/uGLAD/glad/glad_params.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | 4 | class glad_params(torch.nn.Module): 5 | """The AM hyperparameters are parameterized in the glad_params. 6 | rho, lambda and theta_init_offset are learnable. 7 | """ 8 | def __init__(self, theta_init_offset, nF, H, USE_CUDA=False): 9 | """Initializing the GLAD model 10 | 11 | Args: 12 | theta_init_offset (float): The initial eigenvalue offset, set to a high value > 0.1 13 | nF (int): The number of input features for the entrywise thresholding 14 | H (int): The hidden layer size to be used for the NNs 15 | USE_CUDA (bool): Use GPU if True else CPU 16 | """ 17 | super(glad_params, self).__init__() 18 | self.dtype = torch.cuda.FloatTensor if USE_CUDA else torch.FloatTensor 19 | self.theta_init_offset = nn.Parameter( 20 | torch.Tensor( 21 | [theta_init_offset] 22 | ).type(self.dtype) 23 | ) 24 | self.nF = nF # number of input features 25 | self.H = H # hidden layer size 26 | self.rho_l1 = self.rhoNN() 27 | self.lambda_f = self.lambdaNN() 28 | self.zero = torch.Tensor([0]).type(self.dtype) 29 | 30 | def rhoNN(self):# per iteration NN 31 | l1 = nn.Linear(self.nF, self.H).type(self.dtype) 32 | lH1 = nn.Linear(self.H, self.H).type(self.dtype) 33 | l2 = nn.Linear(self.H, 1).type(self.dtype) 34 | return nn.Sequential(l1, nn.Tanh(), 35 | lH1, nn.Tanh(), 36 | l2, nn.Sigmoid()).type(self.dtype) 37 | 38 | def lambdaNN(self): 39 | l1 = nn.Linear(2, self.H).type(self.dtype) 40 | l2 = nn.Linear(self.H, 1).type(self.dtype) 41 | return nn.Sequential(l1, nn.Tanh(), 42 | l2, nn.Sigmoid()).type(self.dtype) 43 | 44 | def eta_forward(self, X, S, k, F3=[]): 45 | batch_size, shape1, shape2 = X.shape 46 | Xr = X.reshape(batch_size, -1, 1) 47 | Sr = S.reshape(batch_size, -1, 1) 48 | feature_vector = torch.cat((Xr, Sr), -1) 49 | if len(F3)>0: 50 | F3r = F3.reshape(batch_size, -1, 1) 51 | feature_vector = torch.cat((feature_vector, F3r), -1) 52 | # elementwise thresholding 53 | rho_val = self.rho_l1(feature_vector).reshape(X.shape) 54 | return torch.sign(X)*torch.max(self.zero, torch.abs(X)-rho_val) 55 | 56 | def lambda_forward(self, normF, prev_lambda, k=0): 57 | feature_vector = torch.Tensor([normF, prev_lambda]).type(self.dtype) 58 | return self.lambda_f(feature_vector) 59 | 60 | 61 | -------------------------------------------------------------------------------- /ngm/utils/uGLAD/glad/torch_sqrtm.py: -------------------------------------------------------------------------------- 1 | import torch 2 | from torch.autograd import Variable 3 | from torch.autograd import Function 4 | import numpy as np 5 | import scipy.linalg 6 | 7 | class MatrixSquareRoot(Function): 8 | """Square root of a positive definite matrix. 9 | NOTE: matrix square root is not differentiable for matrices with 10 | zero eigenvalues. 11 | """ 12 | @staticmethod 13 | def forward(ctx, input): 14 | itr_TH = 10 # number of iterations threshold 15 | dim = input.shape[0] 16 | norm = torch.norm(input)#.double()) 17 | #Y = input.double()/norm 18 | Y = input/norm 19 | I = torch.eye(dim,dim,device=input.device)#.double() 20 | Z = torch.eye(dim,dim,device=input.device)#.double() 21 | #print('Check: ', Y.type(), I.type(), Z.type()) 22 | for i in range(itr_TH): 23 | T = 0.5*(3.0*I - Z.mm(Y)) 24 | Y = Y.mm(T) 25 | Z = T.mm(Z) 26 | sqrtm = Y*torch.sqrt(norm) 27 | # ctx.mark_dirty(Y,I,Z) 28 | ctx.save_for_backward(sqrtm) 29 | return sqrtm 30 | 31 | @staticmethod 32 | def backward(ctx, grad_output): 33 | itr_TH = 10 # number of iterations threshold 34 | grad_input = None 35 | sqrtm, = ctx.saved_tensors 36 | dim = sqrtm.shape[0] 37 | norm = torch.norm(sqrtm) 38 | A = sqrtm/norm 39 | I = torch.eye(dim, dim, device=sqrtm.device)#.double() 40 | #Q = grad_output.double()/norm 41 | Q = grad_output/norm 42 | for i in range(itr_TH): 43 | Q = 0.5*(Q.mm(3.0*I-A.mm(A))-A.t().mm(A.t().mm(Q)-Q.mm(A))) 44 | A = 0.5*A.mm(3.0*I-A.mm(A)) 45 | grad_input = 0.5*Q 46 | return grad_input 47 | sqrtm = MatrixSquareRoot.apply 48 | 49 | 50 | def original_main(): 51 | from torch.autograd import gradcheck 52 | k = torch.randn(20, 10).double() 53 | # Create a positive definite matrix 54 | pd_mat = k.t().matmul(k) 55 | pd_mat = Variable(pd_mat, requires_grad=True) 56 | test = gradcheck(MatrixSquareRoot.apply, (pd_mat,)) 57 | print(test) 58 | 59 | def single_main(): 60 | from torch.autograd import gradcheck 61 | n = 1 62 | A = torch.randn( 20, 10).double() 63 | # Create a positive definite matrix 64 | pd_mat = A.t().matmul(A) 65 | pd_mat = Variable(pd_mat, requires_grad=True) 66 | test = gradcheck(MatrixSquareRoot.apply, (pd_mat,)) 67 | print(test) 68 | 69 | #sqrtm_scipy = np.zeros_like(A) 70 | print('err: ', pd_mat) 71 | sqrtm_scipy = scipy.linalg.sqrtm(pd_mat.detach().numpy().astype(np.float_)) 72 | # for i in range(n): 73 | # sqrtm_scipy[i] = sqrtm(pd_mat[i].detach().numpy()) 74 | sqrtm_torch = sqrtm(pd_mat) 75 | print('sqrtm torch: ', sqrtm_torch) 76 | print('scipy', sqrtm_scipy) 77 | print('Difference: ', np.linalg.norm(sqrtm_scipy - sqrtm_torch.detach().numpy())) 78 | 79 | def main():# batch 80 | from torch.autograd import gradcheck 81 | n = 2 82 | A = torch.randn(n, 4, 5).double() 83 | A.requires_grad = True 84 | # Create a positive definite matrix 85 | #pd_mat = A.t().matmul(A) 86 | pd_mat = torch.matmul(A.transpose(-1, -2), A) 87 | pd_mat = Variable(pd_mat, requires_grad=True) 88 | pd_mat.type = torch.FloatTensor 89 | print('err: ', pd_mat.shape, pd_mat.type) 90 | #test = gradcheck(MatrixSquareRoot.apply, (pd_mat,)) 91 | #print(test) 92 | 93 | sqrtm_scipy = np.zeros_like(pd_mat.detach().numpy()) 94 | #sqrtm_scipy = scipy.linalg.sqrtm(pd_mat.detach().numpy().astype(np.float_)) 95 | for i in range(n): 96 | sqrtm_scipy[i] = scipy.linalg.sqrtm(pd_mat[i].detach().numpy().astype(np.float)) 97 | # batch implementation 98 | sqrtm_torch = torch.zeros(pd_mat.shape) 99 | for i in range(n): 100 | print('custom implementation', pd_mat[i].type()) 101 | sqrtm_torch[i] = sqrtm(pd_mat[i].type(torch.FloatTensor)) 102 | #sqrtm_torch = sqrtm(pd_mat) 103 | print('sqrtm torch: ', sqrtm_torch) 104 | print('scipy', sqrtm_scipy) 105 | print('Difference: ', np.linalg.norm(sqrtm_scipy - sqrtm_torch.detach().numpy())) 106 | 107 | if __name__ == '__main__': 108 | main() 109 | 110 | -------------------------------------------------------------------------------- /ngm/utils/uGLAD/main.py: -------------------------------------------------------------------------------- 1 | """ 2 | The main file to train/test the uGLAD algorithm. 3 | Contains code to generate data, run training and the 4 | loss function. 5 | """ 6 | import copy 7 | from tabnanny import check 8 | import numpy as np 9 | import pandas as pd 10 | from sklearn import covariance 11 | from sklearn.model_selection import KFold 12 | import sys 13 | from time import time 14 | import torch 15 | 16 | # Helper functions for uGLAD 17 | from ngm.utils.uGLAD.glad.glad_params import glad_params 18 | from ngm.utils.uGLAD.glad import glad 19 | from ngm.utils.uGLAD.utils.metrics import reportMetrics 20 | 21 | import ngm.utils.uGLAD.utils.prepare_data as prepare_data 22 | 23 | ############### Wrapper class to match sklearn package ############# 24 | class uGLAD_GL(object): 25 | def __init__(self): 26 | """Wrapper class to match the sklearn GraphicalLassoCV 27 | output signature. Initializing the uGLAD model. 28 | """ 29 | super(uGLAD_GL, self).__init__() 30 | self.covariance_ = None 31 | self.precision_ = None 32 | self.model_glad = None 33 | 34 | def fit( 35 | self, 36 | X, 37 | true_theta=None, 38 | eval_offset=0.1, 39 | centered=False, 40 | epochs=250, 41 | lr=0.002, 42 | INIT_DIAG=0, 43 | L=15, 44 | verbose=True, 45 | k_fold=3, 46 | mode='direct', 47 | cov=False 48 | ): 49 | """Takes in the samples X and returns 50 | a uGLAD model which stores the corresponding 51 | covariance and precision matrices. 52 | 53 | Args: 54 | X (2D np array): num_samples x dimension 55 | true_theta (2D np array): dim x dim of the 56 | true precision matrix 57 | eval_offset (float): eigenval adjustment in 58 | case the cov is ill-conditioned 59 | centered (bool): Whether samples are mean 60 | adjusted or not. True/False 61 | epochs (int): Training epochs 62 | lr (float): Learning rate of glad for the adam optimizer 63 | INIT_DIAG (int): 0/1 for initilization strategy of GLAD 64 | L (int): Num of unrolled iterations of GLAD 65 | verbose (bool): Print training output 66 | k_fold (int): num batches in missing mode 67 | num splits for k-fold in CV mode, 68 | k=0 will run the direct mode 69 | mode (str): direct/cv/missing 70 | cov (bool): If True, X = covariance matrix (DxD) 71 | 72 | Returns: 73 | compare_theta (dict): Dictionary of comparison metrics 74 | between the predicted and true precision matrix 75 | """ 76 | print(f'Running uGLAD') 77 | start = time() 78 | if not cov: 79 | print(f'Processing the input table for basic compatibility check') 80 | X = prepare_data.process_table( 81 | pd.DataFrame(X), 82 | NORM='min_max', 83 | VERBOSE=verbose 84 | ) 85 | X = np.array(X) 86 | # Running the uGLAD model 87 | M, D = X.shape 88 | # Reshaping due to GLAD algorithm requirements 89 | Xb = X.reshape(1, M, D) 90 | true_theta_b = None 91 | if true_theta is not None: 92 | true_theta_b = true_theta.reshape(1, D, D) 93 | if mode=='missing': 94 | print(f'Handling missing data') 95 | pred_theta, compare_theta, model_glad = run_uGLAD_missing( 96 | Xb, 97 | trueTheta=true_theta_b, 98 | eval_offset=eval_offset, 99 | EPOCHS=epochs, 100 | lr=lr, 101 | INIT_DIAG=INIT_DIAG, 102 | L=L, 103 | VERBOSE=verbose, 104 | K_batch=k_fold 105 | ) 106 | elif mode=='cv' and k_fold>=0: 107 | print(f'CV mode: {k_fold}-fold') 108 | pred_theta, compare_theta, model_glad = run_uGLAD_CV( 109 | Xb, 110 | trueTheta=true_theta_b, 111 | eval_offset=eval_offset, 112 | EPOCHS=epochs, 113 | lr=lr, 114 | INIT_DIAG=INIT_DIAG, 115 | L=L, 116 | VERBOSE=verbose, 117 | k_fold=k_fold 118 | ) 119 | elif mode=='direct': 120 | print(f'Direct Mode') 121 | pred_theta, compare_theta, model_glad = run_uGLAD_direct( 122 | Xb, 123 | trueTheta=true_theta_b, 124 | eval_offset=eval_offset, 125 | EPOCHS=epochs, 126 | lr=lr, 127 | INIT_DIAG=INIT_DIAG, 128 | L=L, 129 | VERBOSE=verbose, 130 | cov=cov 131 | ) 132 | else: 133 | print(f'ERROR Please enter K-fold value in valid range [0, ), currently entered {k_fold}; Check mode {mode}') 134 | print(f'Note that cov={cov} only valid for mode=direct') 135 | sys.exit(0) 136 | # np.dot((X-mu)T, (X-mu)) / X.shape[0] 137 | self.covariance_ = covariance.empirical_covariance( 138 | X, 139 | assume_centered=centered 140 | ) 141 | self.precision_ = pred_theta[0].detach().numpy() 142 | self.model_glad = model_glad 143 | print(f'Total runtime: {time()-start} secs\n') 144 | return compare_theta 145 | 146 | 147 | class uGLAD_multitask(object): 148 | def __init__(self): 149 | """Initializing the uGLAD model in multi-task 150 | mode. It saves the covariance and predicted 151 | precision matrices for the input batch of data 152 | """ 153 | super(uGLAD_multitask, self).__init__() 154 | self.covariance_ = [] 155 | self.precision_ = [] 156 | self.model_glad = None 157 | 158 | def fit( 159 | self, 160 | Xb, 161 | true_theta_b=None, 162 | eval_offset=0.1, 163 | centered=False, 164 | epochs=250, 165 | lr=0.002, 166 | INIT_DIAG=0, 167 | L=15, 168 | verbose=True, 169 | ): 170 | """Takes in the samples X and returns 171 | a uGLAD model which stores the corresponding 172 | covariance and precision matrices. 173 | 174 | Args: 175 | Xb (list of 2D np.array): batch * [num_samples' x dimension] 176 | NOTE: num_samples can be different for different data 177 | true_theta (3D np.array): batch x dim x dim of the 178 | true precision matrix 179 | eval_offset (float): eigenval adjustment in 180 | case the cov is ill-conditioned 181 | centered (bool): Whether samples are mean 182 | adjusted or not. True/False 183 | epochs (int): Training epochs 184 | lr (float): Learning rate of glad for the adam optimizer 185 | INIT_DIAG (int): 0/1 for initilization strategy of GLAD 186 | L (int): Num of unrolled iterations of GLAD 187 | verbose (bool): Print training output 188 | 189 | Returns: 190 | compare_theta (list[dict]): Dictionary of comparison metrics 191 | between the predicted and true precision matrices 192 | """ 193 | print(f'Running uGLAD in multi-task mode') 194 | start = time() 195 | print(f'Processing the input table for basic compatibility check') 196 | processed_Xb = [] 197 | for X in Xb: 198 | X = prepare_data.process_table( 199 | pd.DataFrame(X), 200 | NORM='min_max', 201 | VERBOSE=verbose 202 | ) 203 | processed_Xb.append(np.array(X)) 204 | Xb = processed_Xb 205 | # Running the uGLAD model 206 | pred_theta, compare_theta, model_glad = run_uGLAD_multitask( 207 | Xb, 208 | trueTheta=true_theta_b, 209 | eval_offset=eval_offset, 210 | EPOCHS=epochs, 211 | lr=lr, 212 | INIT_DIAG=INIT_DIAG, 213 | L=L, 214 | VERBOSE=verbose, 215 | ) 216 | 217 | # np.dot((X-mu)T, (X-mu)) / X.shape[0] 218 | self.covariance_ = [] 219 | for b in range(len(Xb)): 220 | self.covariance_.append( 221 | covariance.empirical_covariance( 222 | Xb[b], 223 | assume_centered=centered 224 | ) 225 | ) 226 | self.covariance_ = np.array(self.covariance_) 227 | self.precision_ = pred_theta.detach().numpy() 228 | self.model_glad = model_glad 229 | print(f'Total runtime: {time()-start} secs\n') 230 | return compare_theta 231 | ##################################################################### 232 | 233 | 234 | #################### Functions to prepare model ###################### 235 | def init_uGLAD(lr, theta_init_offset=1.0, nF=3, H=3): 236 | """Initialize the GLAD model parameters and the optimizer 237 | to be used. 238 | 239 | Args: 240 | lr (float): Learning rate of glad for the adam optimizer 241 | theta_init_offset (float): Initialization diagonal offset 242 | for the pred theta (adjust eigenvalue) 243 | nF (int): #input features for the entrywise thresholding 244 | H (int): The hidden layer size to be used for the NNs 245 | 246 | Returns: 247 | model: class object 248 | optimizer: class object 249 | """ 250 | model = glad_params( 251 | theta_init_offset=theta_init_offset, 252 | nF=nF, 253 | H=H 254 | ) 255 | optimizer = glad.get_optimizers(model, lr_glad=lr) 256 | return model, optimizer 257 | 258 | 259 | def forward_uGLAD(Sb, model_glad, L=15, INIT_DIAG=0, loss_Sb=None): 260 | """Run the input through the unsupervised GLAD algorithm. 261 | It executes the following steps in batch mode 262 | 1. Run the GLAD model to get predicted precision matrix 263 | 2. Calculate the glasso-loss 264 | 265 | Args: 266 | Sb (torch.Tensor BxDxD): The input covariance matrix 267 | model_glad (dict): Contains the learnable params 268 | L (int): Num of unrolled iterations of GLAD 269 | INIT_DIAG (int): 0/1 for initilization strategy of GLAD 270 | loss_Sb (torch.Tensor BxDxD): The input covariance matrix 271 | against which loss is calculated. If None, then use 272 | the input covariance matrix Sb 273 | 274 | Returns: 275 | predTheta (torch.Tensor BxDxD): The predicted theta 276 | loss (torch.scalar): The glasso loss 277 | """ 278 | # 1. Running the GLAD model 279 | predTheta = glad.glad(Sb, model_glad, L=L, INIT_DIAG=INIT_DIAG) 280 | # 2. Calculate the glasso-loss 281 | if loss_Sb is None: 282 | loss = loss_uGLAD(predTheta, Sb) 283 | else: 284 | loss = loss_uGLAD(predTheta, loss_Sb) 285 | return predTheta, loss 286 | 287 | 288 | def loss_uGLAD(theta, S): 289 | """The objective function of the graphical lasso which is 290 | the loss function for the unsupervised learning of glad 291 | loss-glasso = 1/M(-log|theta| + ) 292 | 293 | NOTE: We fix the batch size B=1 for `uGLAD` 294 | 295 | Args: 296 | theta (tensor 3D): precision matrix BxDxD 297 | S (tensor 3D): covariance matrix BxDxD (dim=D) 298 | 299 | Returns: 300 | loss (tensor 1D): the loss value of the obj function 301 | """ 302 | B, D, _ = S.shape 303 | t1 = -1*torch.logdet(theta) 304 | # Batch Matrix multiplication: torch.bmm 305 | t21 = torch.einsum("bij, bjk -> bik", S, theta) 306 | # getting the trace (batch mode) 307 | t2 = torch.einsum('jii->j', t21) 308 | # print(t1, torch.det(theta), t2) 309 | # regularization term 310 | # tr = 1e-02 * torch.sum(torch.abs(theta)) 311 | glasso_loss = torch.sum(t1+t2)/B # sum over the batch 312 | return glasso_loss 313 | 314 | 315 | def run_uGLAD_direct( 316 | Xb, 317 | trueTheta=None, 318 | eval_offset=0.1, 319 | EPOCHS=250, 320 | lr=0.002, 321 | INIT_DIAG=0, 322 | L=15, 323 | VERBOSE=True, 324 | cov=False 325 | ): 326 | """Running the uGLAD algorithm in direct mode 327 | 328 | Args: 329 | Xb (np.array 1xMxD): The input sample matrix 330 | trueTheta (np.array 1xDxD): The corresponding 331 | true graphs for reporting metrics or None 332 | eval_offset (float): eigenvalue offset for 333 | covariance matrix adjustment 334 | lr (float): Learning rate of glad for the adam optimizer 335 | INIT_DIAG (int): 0/1 for initilization strategy of GLAD 336 | L (int): Num of unrolled iterations of GLAD 337 | EPOCHS (int): The number of training epochs 338 | VERBOSE (bool): if True, prints to sys.out 339 | cov (bool): if True, Xb= cov matrix (1xDxD) 340 | 341 | Returns: 342 | predTheta (torch.Tensor 1xDxD): Predicted graphs 343 | compare_theta (dict): returns comparison metrics if 344 | true precision matrix is provided 345 | model_glad (class object): Returns the learned glad model 346 | """ 347 | # Calculating the batch covariance 348 | if cov: 349 | Sb = [] 350 | for X in Xb: 351 | Sb.append(prepare_data.adjustCov( 352 | X, 353 | offset=eval_offset, 354 | max_con=np.inf # Usually no need to adjust the 355 | # covariance matrix if calculated entrywise 356 | )) 357 | Sb = np.array(Sb) 358 | else: 359 | Sb = prepare_data.getCovariance(Xb, offset=eval_offset) # BxDxD 360 | # Converting the data to torch 361 | Xb = prepare_data.convertToTorch(Xb, req_grad=False) 362 | Sb = prepare_data.convertToTorch(Sb, req_grad=False) 363 | if trueTheta is not None: 364 | trueTheta = prepare_data.convertToTorch( 365 | trueTheta, 366 | req_grad=False 367 | ) 368 | B, _, _ = Xb.shape 369 | # NOTE: We fix the batch size B=1 for `uGLAD` 370 | # model and optimizer for uGLAD 371 | model_glad, optimizer_glad = init_uGLAD( 372 | lr=lr, 373 | theta_init_offset=1.0, 374 | nF=3, 375 | H=3 376 | ) 377 | PRINT_EVERY = int(EPOCHS/10) 378 | # print max 10 times per training 379 | # Optimizing for the glasso loss 380 | for e in range(EPOCHS): 381 | # reset the grads to zero 382 | optimizer_glad.zero_grad() 383 | # calculate the loss 384 | predTheta, loss = forward_uGLAD( 385 | Sb, 386 | model_glad, 387 | L=L, 388 | INIT_DIAG=INIT_DIAG 389 | ) 390 | # calculate the backward gradients 391 | loss.backward() 392 | if not e%PRINT_EVERY and VERBOSE: print(f'epoch:{e}/{EPOCHS} loss:{loss.detach().numpy()}') 393 | # updating the optimizer params with the grads 394 | optimizer_glad.step() 395 | # reporting the metrics if true thetas provided 396 | compare_theta = None 397 | if trueTheta is not None: 398 | for b in range(B): 399 | compare_theta = reportMetrics( 400 | trueTheta[b].detach().numpy(), 401 | predTheta[b].detach().numpy() 402 | ) 403 | print(f'Compare - {compare_theta}') 404 | return predTheta, compare_theta, model_glad 405 | 406 | 407 | def run_uGLAD_CV( 408 | Xb, 409 | trueTheta=None, 410 | eval_offset=0.1, 411 | EPOCHS=250, 412 | lr=0.002, 413 | INIT_DIAG=0, 414 | L=15, 415 | VERBOSE=True, 416 | k_fold=5 417 | ): 418 | """Running the uGLAD algorithm and select the best 419 | model using 5-fold CV. 420 | 421 | Args: 422 | Xb (np.array 1xMxD): The input sample matrix 423 | trueTheta (np.array 1xDxD): The corresponding 424 | true graphs for reporting metrics or None 425 | eval_offset (float): eigenvalue offset for 426 | covariance matrix adjustment 427 | EPOCHS (int): The number of training epochs 428 | lr (float): Learning rate of glad for the adam optimizer 429 | INIT_DIAG (int): 0/1 for initilization strategy of GLAD 430 | L (int): Num of unrolled iterations of GLAD 431 | VERBOSE (bool): if True, prints to sys.out 432 | k_fold (int): #splits for k-fold CV 433 | 434 | Returns: 435 | predTheta (torch.Tensor 1xDxD): Predicted graphs 436 | compare_theta (dict): returns comparison metrics if 437 | true precision matrix is provided 438 | model_glad (class object): Returns the learned glad model 439 | """ 440 | # Batch size is fixed to 1 441 | Sb = prepare_data.getCovariance(Xb, offset=eval_offset) 442 | Sb = prepare_data.convertToTorch(Sb, req_grad=False) 443 | # Splitting into k-fold for cross-validation 444 | kf = KFold(n_splits=k_fold) 445 | # For each fold, collect the best model and the glasso-loss value 446 | results_Kfold = {} 447 | for _k, (train, test) in enumerate(kf.split(Xb[0])): 448 | if VERBOSE: print(f'Fold num {_k}') 449 | Xb_train = np.expand_dims(Xb[0][train], axis=0) # 1 x Mtrain x D 450 | Xb_test = np.expand_dims(Xb[0][test], axis=0) # 1 x Mtest x D 451 | # Calculating the batch covariance 452 | Sb_train = prepare_data.getCovariance(Xb_train, offset=eval_offset) # BxDxD 453 | Sb_test = prepare_data.getCovariance(Xb_test, offset=eval_offset) # BxDxD 454 | # Converting the data to torch 455 | Sb_train = prepare_data.convertToTorch(Sb_train, req_grad=False) 456 | Sb_test = prepare_data.convertToTorch(Sb_test, req_grad=False) 457 | if trueTheta is not None: 458 | trueTheta = prepare_data.convertToTorch( 459 | trueTheta, 460 | req_grad=False 461 | ) 462 | B, M, D = Xb_train.shape 463 | # NOTE: We fix the batch size B=1 for `uGLAD' 464 | # model and optimizer for uGLAD 465 | model_glad, optimizer_glad = init_uGLAD( 466 | lr=lr, 467 | theta_init_offset=1.0, 468 | nF=3, 469 | H=3 470 | ) 471 | # Optimizing for the glasso loss 472 | best_test_loss = np.inf 473 | PRINT_EVERY = int(EPOCHS/10) 474 | # print max 10 times per training 475 | for e in range(EPOCHS): 476 | # reset the grads to zero 477 | optimizer_glad.zero_grad() 478 | # calculate the loss for test and precision matrix for train 479 | predTheta, loss_train = forward_uGLAD( 480 | Sb_train, 481 | model_glad, 482 | L=L, 483 | INIT_DIAG=INIT_DIAG 484 | ) 485 | with torch.no_grad(): 486 | _, loss_test = forward_uGLAD( 487 | Sb_test, 488 | model_glad, 489 | L=L, 490 | INIT_DIAG=INIT_DIAG 491 | ) 492 | # calculate the backward gradients 493 | loss_train.backward() 494 | # updating the optimizer params with the grads 495 | optimizer_glad.step() 496 | # Printing output 497 | _loss = loss_test.detach().numpy() 498 | if not e%PRINT_EVERY and VERBOSE: print(f'Fold {_k}: epoch:{e}/{EPOCHS} test-loss:{_loss}') 499 | # Updating the best model for this fold 500 | if _loss < best_test_loss: # and e%10==9: 501 | if VERBOSE and not e%PRINT_EVERY: 502 | print(f'Fold {_k}: epoch:{e}/{EPOCHS}: Updating the best model with test-loss {_loss}') 503 | best_model_kfold = copy.deepcopy(model_glad) 504 | best_test_loss = _loss 505 | # updating with the best model and loss for the current fold 506 | results_Kfold[_k] = {} 507 | results_Kfold[_k]['test_loss'] = best_test_loss 508 | results_Kfold[_k]['model'] = best_model_kfold 509 | if VERBOSE: print('\n') 510 | 511 | # Strategy I: Select the best model from the results Kfold dictionary 512 | # with the best score on the test fold. 513 | # print(f'Using Strategy I to select the best model') 514 | best_loss = np.inf 515 | for _k in results_Kfold.keys(): 516 | curr_loss = results_Kfold[_k]['test_loss'] 517 | if curr_loss < best_loss: 518 | model_glad = results_Kfold[_k]['model'] 519 | best_loss = curr_loss 520 | 521 | # Run the best model on the complete data to retrieve the 522 | # final predTheta (precision matrix) 523 | with torch.no_grad(): 524 | predTheta, total_loss = forward_uGLAD( 525 | Sb, 526 | model_glad, 527 | L=L, 528 | INIT_DIAG=INIT_DIAG) 529 | 530 | # reporting the metrics if true theta is provided 531 | compare_theta = None 532 | if trueTheta is not None: 533 | for b in range(B): 534 | compare_theta = reportMetrics( 535 | trueTheta[b].detach().numpy(), 536 | predTheta[b].detach().numpy() 537 | ) 538 | print(f'Comparison - {compare_theta}') 539 | return predTheta, compare_theta, model_glad 540 | 541 | 542 | def run_uGLAD_missing( 543 | Xb, 544 | trueTheta=None, 545 | eval_offset=0.1, 546 | EPOCHS=250, 547 | lr=0.002, 548 | INIT_DIAG=0, 549 | L=15, 550 | VERBOSE=True, 551 | K_batch=3 552 | ): 553 | """Running the uGLAD algorithm in missing data mode. We do a 554 | row-subsample of the input data and then train using multi-task 555 | learning approach to obtain the final precision matrix. 556 | 557 | Args: 558 | Xb (np.array 1xMxD): The input sample matrix with 559 | missing entries as np.NaNs 560 | trueTheta (np.array 1xDxD): The corresponding 561 | true graphs for reporting metrics or None 562 | eval_offset (float): eigenvalue offset for 563 | covariance matrix adjustment 564 | EPOCHS (int): The number of training epochs 565 | lr (float): Learning rate of glad for the adam optimizer 566 | INIT_DIAG (int): 0/1 for initilization strategy of GLAD 567 | L (int): Num of unrolled iterations of GLAD 568 | VERBOSE (bool): if True, prints to sys.out 569 | K_batch (int): number of row-sumsampled batch for 570 | multi-task learning (For less conflict in deciding the 571 | sign of final precision matrix, choose K as a odd value) 572 | 573 | Returns: 574 | predTheta (torch.Tensor 1xDxD): Predicted graphs 575 | compare_theta (dict): returns comparison metrics if 576 | true precision matrix is provided 577 | model_glad (class object): Returns the learned glad model 578 | """ 579 | # Batch size is fixed to 1 580 | if K_batch == 0: K_batch = 3 # setting the default 581 | # Step I: Do statistical mean imputation 582 | Xb = mean_imputation(Xb) 583 | 584 | # Step II: Getting the batches and preparing data for uGLAD 585 | Sb = prepare_data.getCovariance(Xb, offset=eval_offset) 586 | Sb = prepare_data.convertToTorch(Sb, req_grad=False) 587 | # Splitting into k-fold for getting row-subsampled batches 588 | kf = KFold(n_splits=K_batch) 589 | print(f'Creating K={K_batch} row-subsampled batches') 590 | # Collect all the subsample in batch form: K x M' x D 591 | X_K = np.array([Xb[0][Idx] for Idx, _ in kf.split(Xb[0])]) 592 | # Calculating the batch covariance 593 | S_K = prepare_data.getCovariance(X_K, offset=eval_offset) # BxDxD 594 | # Converting the data to torch 595 | S_K = prepare_data.convertToTorch(S_K, req_grad=False) 596 | # Initialize the model and prepare theta if provided 597 | if trueTheta is not None: 598 | trueTheta = prepare_data.convertToTorch( 599 | trueTheta, 600 | req_grad=False 601 | ) 602 | # model and optimizer for uGLAD 603 | model_glad, optimizer_glad = init_uGLAD( 604 | lr=lr, 605 | theta_init_offset=1.0, 606 | nF=3, 607 | H=3 608 | ) 609 | # STEP III: Optimizing for the glasso loss 610 | PRINT_EVERY = int(EPOCHS/10) 611 | # print max 10 times per training 612 | for e in range(EPOCHS): 613 | # reset the grads to zero 614 | optimizer_glad.zero_grad() 615 | # calculate the loss and precision matrix 616 | predTheta, loss = forward_uGLAD( 617 | S_K, 618 | model_glad, 619 | L=L, 620 | INIT_DIAG=INIT_DIAG, 621 | loss_Sb=Sb 622 | ) 623 | # calculate the backward gradients 624 | loss.backward() 625 | # updating the optimizer params with the grads 626 | optimizer_glad.step() 627 | # Printing output 628 | _loss = loss.detach().numpy() 629 | if not e%PRINT_EVERY and VERBOSE: print(f'epoch:{e}/{EPOCHS} loss:{_loss}') 630 | 631 | # STEP IV: Getting the final precision matrix 632 | print(f'Getting the final precision matrix using the consensus strategy') 633 | predTheta = get_final_precision_from_batch(predTheta, type='min') 634 | 635 | # reporting the metrics if true theta is provided 636 | compare_theta = None 637 | if trueTheta is not None: 638 | compare_theta = reportMetrics( 639 | trueTheta[0].detach().numpy(), 640 | predTheta[0].detach().numpy() 641 | ) 642 | print(f'Comparison - {compare_theta}') 643 | 644 | return predTheta, compare_theta, model_glad 645 | 646 | def mean_imputation(Xb): 647 | """Replace nans of the input data by column 648 | means 649 | 650 | Args: 651 | Xb (torch.Tensor 1xMxD): The input sample matrix with 652 | missing entries as np.NaNs 653 | 654 | Returns: 655 | Xb (torch.Tensor 1xMxD): Mean imputed matrix 656 | """ 657 | Xb = Xb[0] 658 | # Mean of columns (ignoring NaNs) 659 | col_mean = np.nanmean(Xb, axis=0) 660 | #Find indices that you need to replace 661 | inds = np.where(np.isnan(Xb)) 662 | # Place column means in the indices. Align the arrays using take 663 | Xb[inds] = np.take(col_mean, inds[1]) 664 | # Check if any column is full of NaNs, raise sys.exit() 665 | if np.isnan(np.sum(Xb)): 666 | print(f'ERROR: One or more columns have all NaNs') 667 | sys.exit(0) 668 | # Reshaping Xb with an extra dimension for compatability with glad 669 | Xb = np.expand_dims(Xb, axis=0) 670 | return Xb 671 | 672 | def get_final_precision_from_batch(predTheta, type='min'): 673 | """The predTheta contains a batch of K precision 674 | matrices. This function calculates the final 675 | precision matrix by following the consensus 676 | strategy 677 | 678 | \Theta^{f}_{i,j} = max-count(sign(\Theta^K_{i,j})) 679 | * min/mean{|\Theta^K_{i,j}|} 680 | (`min` is the recommended setting) 681 | 682 | Args: 683 | predTheta (torch.Tensor KxDxD): Predicted graphs 684 | with batch_size = K 685 | type (str): min/mean to get the entry values 686 | 687 | Returns: 688 | predTheta (torch.Tensor 1xDxD): Final precision matrix 689 | """ 690 | K, _, D = predTheta.shape 691 | # get the value term 692 | if type=='min': 693 | value_term = torch.min(torch.abs(predTheta), 0)[0] 694 | elif type=='mean': 695 | value_term = torch.mean(torch.abs(predTheta), 0)[0] 696 | else: 697 | print(f'Enter valid type min/mean, currently {type}') 698 | sys.exit(0) 699 | # get the sign term 700 | max_count_sign = torch.sum(torch.sign(predTheta), 0) 701 | # If sign is 0, then assign +1 702 | max_count_sign[max_count_sign>=0] = 1 703 | max_count_sign[max_count_sign<0] = -1 704 | # Get the final precision matrix 705 | predTheta = max_count_sign * value_term 706 | return predTheta.reshape(1, D, D) 707 | 708 | 709 | def run_uGLAD_multitask( 710 | Xb, 711 | trueTheta=None, 712 | eval_offset=0.1, 713 | EPOCHS=250, 714 | lr=0.002, 715 | INIT_DIAG=0, 716 | L=15, 717 | VERBOSE=True, 718 | ): 719 | """Running the uGLAD algorithm in multitask mode. We 720 | train using multi-task learning approach to obtain 721 | the final precision matrices for the batch of input data 722 | 723 | Args: 724 | Xb (list of 2D np.array): The input sample matrix K * [M' x D] 725 | NOTE: num_samples can be different for different data 726 | trueTheta (np.array KxDxD): The corresponding 727 | true graphs for reporting metrics or None 728 | eval_offset (float): eigenvalue offset for 729 | covariance matrix adjustment 730 | EPOCHS (int): The number of training epochs 731 | lr (float): Learning rate of glad for the adam optimizer 732 | INIT_DIAG (int): 0/1 for initilization strategy of GLAD 733 | L (int): Num of unrolled iterations of GLAD 734 | VERBOSE (bool): if True, prints to sys.out 735 | 736 | Returns: 737 | predTheta (torch.Tensor BxDxD): Predicted graphs 738 | compare_theta (dict): returns comparison metrics if 739 | true precision matrix is provided 740 | model_glad (class object): Returns the learned glad model 741 | """ 742 | K = len(Xb) 743 | # Getting the batches and preparing data for uGLAD 744 | Sb = prepare_data.getCovariance(Xb, offset=eval_offset) 745 | Sb = prepare_data.convertToTorch(Sb, req_grad=False) 746 | # Initialize the model and prepare theta if provided 747 | if trueTheta is not None: 748 | trueTheta = prepare_data.convertToTorch( 749 | trueTheta, 750 | req_grad=False 751 | ) 752 | # model and optimizer for uGLAD 753 | model_glad, optimizer_glad = init_uGLAD( 754 | lr=lr, 755 | theta_init_offset=1.0, 756 | nF=3, 757 | H=3 758 | ) 759 | # Optimizing for the glasso loss 760 | PRINT_EVERY = int(EPOCHS/10) 761 | # print max 10 times per training 762 | for e in range(EPOCHS): 763 | # reset the grads to zero 764 | optimizer_glad.zero_grad() 765 | # calculate the loss and precision matrix 766 | predTheta, loss = forward_uGLAD( 767 | Sb, 768 | model_glad, 769 | L=L, 770 | INIT_DIAG=INIT_DIAG 771 | ) 772 | # calculate the backward gradients 773 | loss.backward() 774 | # updating the optimizer params with the grads 775 | optimizer_glad.step() 776 | # Printing output 777 | _loss = loss.detach().numpy() 778 | if not e%PRINT_EVERY and VERBOSE: print(f'epoch:{e}/{EPOCHS} loss:{_loss}') 779 | 780 | # reporting the metrics if true theta is provided 781 | compare_theta = [] 782 | if trueTheta is not None: 783 | for b in range(K): 784 | rM = reportMetrics( 785 | trueTheta[b].detach().numpy(), 786 | predTheta[b].detach().numpy() 787 | ) 788 | print(f'Metrics for graph {b}: {rM}\n') 789 | compare_theta.append(rM) 790 | return predTheta, compare_theta, model_glad 791 | ###################################################################### 792 | 793 | # DO NOT USE 794 | def post_threshold(theta, s=80.0): 795 | """Apply post-hoc thresholding to zero out the 796 | entries based on the input sparsity percentile. 797 | Usually we take conservative value of sparsity 798 | percentage, so that we do not miss important 799 | edges. 800 | 801 | Args: 802 | theta (2d np array): The DxD precision matrix 803 | s (float): Percentile sparsity desired 804 | 805 | Returns: 806 | theta (2d np array): The DxD precision matrix 807 | """ 808 | # getting the threshold for s percentile 809 | cutoff = np.percentile(np.abs(theta), s) 810 | theta[np.abs(theta)=i: 164 | if dtype[fi]=='c' and dtype[fj]=='c': 165 | cov[i, j] = cramers_v(df[fi], df[fj]) 166 | elif dtype[fi]=='c' and dtype[fj]=='r': 167 | cov[i, j] = correlation_ratio(df[fi], df[fj]) 168 | elif dtype[fi]=='r' and dtype[fj]=='c': 169 | cov[i, j] = correlation_ratio(df[fj], df[fi]) 170 | elif dtype[fi]=='r' and dtype[fj]=='r': 171 | cov[i, j] = pearsonr(df[fi], df[fj])[0] 172 | cov[j, i] = cov[i, j] # cov is symmetric 173 | # Convert to pd.Dataframe 174 | cov = pd.DataFrame(cov, index=features, columns=features) 175 | return cov 176 | 177 | 178 | def convertToTorch(data, req_grad=False, use_cuda=False): 179 | """Convert data from numpy to torch variable, if the req_grad 180 | flag is on then the gradient calculation is turned on. 181 | """ 182 | if not torch.is_tensor(data): 183 | dtype = torch.cuda.FloatTensor if use_cuda else torch.FloatTensor 184 | data = torch.from_numpy(data.astype(np.float, copy=False)).type(dtype) 185 | data.requires_grad = req_grad 186 | return data 187 | 188 | 189 | def eigVal_conditionNum(A): 190 | """Calculates the eigenvalues and the condition 191 | number of the input matrix A 192 | 193 | condition number = max(|eig|)/min(|eig|) 194 | """ 195 | eig = [v.real for v in np.linalg.eigvals(A)] 196 | condition_number = max(np.abs(eig)) / min(np.abs(eig)) 197 | return eig, condition_number 198 | 199 | 200 | def check_symmetric(a, rtol=1e-05, atol=1e-08): 201 | return np.allclose(a, a.T, rtol=rtol, atol=atol) 202 | 203 | def adjustCov(S, offset=0.1, min_eig=1e-6, max_con=1e5): 204 | # calculate the eigenvalue of the covariance S 205 | eig, con = eigVal_conditionNum(S) 206 | if min(eig)<=min_eig and con>max_con: 207 | # adjust the eigenvalue 208 | print(f'Adjust the eval: min {min(eig)}, con {con}') 209 | S += np.eye(S.shape[-1]) * (offset-min(eig)) 210 | eig, con = eigVal_conditionNum(S) 211 | print(f'new eval: min {min(eig)}, con {con}') 212 | return S 213 | 214 | def getCovariance(Xb, offset=0.1): 215 | """Calculate the batch covariance matrix 216 | 217 | Args: 218 | Xb (3D np array): The input sample matrices (B x M x D) 219 | offset (float): The eigenvalue offset in case of bad 220 | condition number 221 | Returns: 222 | Sb (3D np array): Covariance matrices (B x D x D) 223 | """ 224 | Sb = [] 225 | for X in Xb: 226 | S = covariance.empirical_covariance(X, assume_centered=False) 227 | Sb.append(adjustCov(S, offset)) 228 | return np.array(Sb) 229 | 230 | 231 | def generateRandomGraph(num_nodes, sparsity, seed=None): 232 | """Generate a random erdos-renyi graph with a given 233 | sparsity. 234 | 235 | Args: 236 | num_nodes (int): The number of nodes in the graph 237 | sparsity ([float, float]): The [min, max] probability of edges 238 | seed (int, optional): set the numpy random seed 239 | 240 | Returns: 241 | edge_connections (2D np array (float)): Adj matrix 242 | """ 243 | # if seed: np.random.seed(seed) 244 | min_s, max_s = sparsity 245 | s = np.random.uniform(min_s, max_s, 1)[0] 246 | G = nx.generators.random_graphs.gnp_random_graph( 247 | num_nodes, 248 | s, 249 | seed=seed, 250 | directed=False 251 | ) 252 | edge_connections = nx.adjacency_matrix(G).todense() 253 | return edge_connections 254 | 255 | 256 | def simulateGaussianSamples( 257 | num_nodes, 258 | edge_connections, 259 | num_samples, 260 | seed=None, 261 | u=0.1, 262 | w_min=0.5, 263 | w_max=1.0, 264 | ): 265 | """Simulating num_samples from a Gaussian distribution. The 266 | precision matrix of the Gaussian is determined using the 267 | edge_connections 268 | 269 | Args: 270 | num_nodes (int): The number of nodes in the DAG 271 | edge_connections (2D np array (float)): Adj matrix 272 | num_sample (int): The number of samples 273 | seed (int, optional): set the numpy random seed 274 | u (float): Min eigenvalue offset for the precision matrix 275 | w_min (float): Precision matrix entries ~Unif[w_min, w_max] 276 | w_max (float): Precision matrix entries ~Unif[w_min, w_max] 277 | 278 | Returns: 279 | X (2D np array (float)): num_samples x num_nodes 280 | precision_mat (2D np array (float)): num_nodes x num_nodes 281 | """ 282 | # zero mean of Gaussian distribution 283 | mean_value = 0 284 | mean_normal = np.ones(num_nodes) * mean_value 285 | # Setting the random seed 286 | if seed: np.random.seed(seed) 287 | # uniform entry matrix [w_min, w_max] 288 | U = np.matrix(np.random.random((num_nodes, num_nodes)) 289 | * (w_max - w_min) + w_min) 290 | theta = np.multiply(edge_connections, U) 291 | # making it symmetric 292 | theta = (theta + theta.T)/2 + np.eye(num_nodes) 293 | smallest_eigval = np.min(np.linalg.eigvals(theta)) 294 | # Just in case : to avoid numerical error in case an 295 | # epsilon complex component present 296 | smallest_eigval = smallest_eigval.real 297 | # making the min eigenvalue as u 298 | precision_mat = theta + np.eye(num_nodes)*(u - smallest_eigval) 299 | # print(f'Smallest eval: {np.min(np.linalg.eigvals(precision_mat))}') 300 | # getting the covariance matrix (avoid the use of pinv) 301 | cov = np.linalg.inv(precision_mat) 302 | # get the samples 303 | if seed: np.random.seed(seed) 304 | # Sampling data from multivariate normal distribution 305 | data = np.random.multivariate_normal( 306 | mean=mean_normal, 307 | cov=cov, 308 | size=num_samples 309 | ) 310 | return data, precision_mat # MxD, DxD 311 | 312 | ############## Functions to check the input ######## 313 | 314 | # Processing the input data to be compatiable for the sparse graph recovery models 315 | def process_table( 316 | table, 317 | NORM='no', 318 | MIN_VARIANCE=0.0, 319 | msg='', 320 | COND_NUM=np.inf, 321 | eigval_th=1e-3, 322 | VERBOSE=True 323 | ): 324 | """Processing the input data to be compatiable for the 325 | sparse graph recovery models. Checks for the following 326 | issues in the input tabular data (real values only). 327 | Note: The order is important. Repeat the function 328 | twice: process_table(process_table(table)) to ensure 329 | the below conditions are satisfied. 330 | 1. Remove all the rows with zero entries 331 | 2. Fill Nans with column mean 332 | 3. Remove columns containing only a single entry 333 | 4. Remove columns with duplicate values 334 | 5. Remove columns with low variance after centering 335 | The above steps are taken in order to ensure that the 336 | input matrix is well-conditioned. 337 | Args: 338 | table (pd.DataFrame): The input table with headers 339 | NORM (str): min_max/mean/no 340 | MIN_VARIANCE (float): Drop the columns below this 341 | variance threshold 342 | COND_NUM (int): The max condition number allowed 343 | eigval_th (float): Min eigval threshold. Making sure 344 | that the min eigval is above this threshold by 345 | droppping highly correlated columns 346 | Returns: 347 | table (pd.DataFrame): The processed table with headers 348 | """ 349 | start = time() 350 | if VERBOSE: 351 | print(f'{msg}: Processing the input table for basic compatibility check') 352 | print(f'{msg}: The input table has sample {table.shape[0]} and features {table.shape[1]}') 353 | 354 | total_samples = table.shape[0] 355 | 356 | # typecast the table to floats 357 | table = table._convert(numeric=True) 358 | 359 | # 1. Removing all the rows with zero entries as the samples are missing 360 | table = table.loc[~(table==0).all(axis=1)] 361 | if VERBOSE: print(f'{msg}: Total zero samples dropped {total_samples - table.shape[0]}') 362 | 363 | # 2. Fill nan's with mean of columns 364 | table = table.fillna(table.mean()) 365 | 366 | # 3. Remove columns containing only a single value 367 | single_value_columns = [] 368 | for col in table.columns: 369 | if len(table[col].unique()) == 1: 370 | single_value_columns.append(col) 371 | table.drop(single_value_columns, inplace=True, axis=1) 372 | if VERBOSE: print(f'{msg}: Single value columns dropped: total {len(single_value_columns)}, columns {single_value_columns}') 373 | 374 | # Normalization of the input table 375 | table = normalize_table(table, NORM) 376 | 377 | # Analysing the input table's covariance matrix condition number 378 | analyse_condition_number(table, 'Input', VERBOSE) 379 | 380 | # 4. Remove columns with duplicate values 381 | all_columns = table.columns 382 | table = table.T.drop_duplicates().T 383 | duplicate_columns = list(set(all_columns) - set(table.columns)) 384 | if VERBOSE: print(f'{msg}: Duplicates dropped: total {len(duplicate_columns)}, columns {duplicate_columns}') 385 | 386 | # 5. Columns having similar variance have a slight chance that they might be almost duplicates 387 | # which can affect the condition number of the covariance matrix. 388 | # Also columns with low variance are less informative 389 | table_var = table.var().sort_values(ascending=True) 390 | # print(f'{msg}: Variance of the columns {table_var.to_string()}') 391 | # Dropping the columns with variance < MIN_VARIANCE 392 | low_variance_columns = list(table_var[table_var COND_NUM: # ill-conditioned matrix 403 | if VERBOSE: 404 | print(f'{msg}: {itr} Condition number is high {con}. \ 405 | Dropping the highly correlated features in the cov-table') 406 | # Find the number of eig vals < eigval_th for the cov_table matrix. 407 | # Rough indicator of the lower bound num of features that are highly correlated. 408 | eig = np.array(sorted(eig)) 409 | lb_ill_cond_features = len(eig[eig {eigval_th} and current cond num {con}') 413 | if con > COND_NUM: 414 | lb_ill_cond_features = 1 415 | else: 416 | break 417 | highly_correlated_features = get_highly_correlated_features(cov_table) 418 | # Extracting the minimum num of features making the cov_table ill-conditioned 419 | highly_correlated_features = highly_correlated_features[ 420 | :min(lb_ill_cond_features, len(highly_correlated_features)) 421 | ] 422 | # The corresponding column names 423 | highly_correlated_columns = table.columns[highly_correlated_features] 424 | if VERBOSE: print(f'{msg} {itr}: Highly Correlated features dropped {highly_correlated_columns}, \ 425 | {len(highly_correlated_columns)}') 426 | # Dropping the columns 427 | table.drop(highly_correlated_columns, inplace=True, axis=1) 428 | # Analysing the processed table's covariance matrix condition number 429 | cov_table, eig, con = analyse_condition_number( 430 | table, 431 | f'{msg} {itr}: Corr features dropped', 432 | VERBOSE, 433 | ) 434 | # Increasing the iteration number 435 | itr += 1 436 | if VERBOSE: 437 | print(f'{msg}: The processed table has sample {table.shape[0]} and features {table.shape[1]}') 438 | print(f'{msg}: Total time to process the table {np.round(time()-start, 3)} secs') 439 | return table 440 | 441 | 442 | def get_highly_correlated_features(input_cov): 443 | """Taking the covariance of the input covariance matrix 444 | to find the highly correlated features that makes the 445 | input cov matrix ill-conditioned. 446 | Args: 447 | input_cov (2D np.array): DxD matrix 448 | Returns: 449 | features_to_drop (np.array): List of indices to drop 450 | """ 451 | cov2 = covariance.empirical_covariance(input_cov) 452 | # mask the diagonal 453 | np.fill_diagonal(cov2, 0) 454 | # Get the threshold for top 10% 455 | cov_upper = upper_tri_indexing(np.abs(cov2)) 456 | sorted_cov_upper = [i for i in sorted(enumerate(cov_upper), key=lambda x:x[1], reverse=True)] 457 | th = sorted_cov_upper[int(0.1*len(sorted_cov_upper))][1] 458 | # Getting the feature correlation dictionary 459 | high_indices = np.transpose(np.nonzero(np.abs(cov2) >= th)) 460 | high_indices_dict = {} 461 | for i in high_indices: # the upper triangular part 462 | if i[0] in high_indices_dict: 463 | high_indices_dict[i[0]].append(i[1]) 464 | else: 465 | high_indices_dict[i[0]] = [i[1]] 466 | # sort the features based on the number of other correlated features. 467 | top_correlated_features = [[f, len(v)] for (f, v) in high_indices_dict.items()] 468 | top_correlated_features.sort(key=lambda x: x[1], reverse=True) 469 | top_correlated_features = np.array(top_correlated_features) 470 | features_to_drop = top_correlated_features[:, 0] 471 | return features_to_drop 472 | 473 | 474 | def upper_tri_indexing(A): 475 | m = A.shape[0] 476 | r,c = np.triu_indices(m,1) 477 | return A[r,c] 478 | 479 | 480 | def analyse_condition_number(table, MESSAGE='', VERBOSE=True): 481 | S = covariance.empirical_covariance(table, assume_centered=False) 482 | eig, con = eig_val_condition_num(S) 483 | if VERBOSE: print(f'{MESSAGE} covariance matrix: The condition number {con} and min eig {min(eig)} max eig {max(eig)}') 484 | return S, eig, con 485 | 486 | 487 | def eig_val_condition_num(A): 488 | """Calculates the eigenvalues and the condition 489 | number of the input matrix A 490 | 491 | condition number = max(|eig|)/min(|eig|) 492 | """ 493 | eig = [v.real for v in np.linalg.eigvals(A)] 494 | condition_number = max(np.abs(eig)) / min(np.abs(eig)) 495 | return eig, condition_number 496 | 497 | 498 | def normalize_table(df, typeN): 499 | if typeN == 'min_max': 500 | return (df-df.min())/(df.max()-df.min()) 501 | elif typeN == 'mean': 502 | return (df-df.mean())/df.std() 503 | else: 504 | print(f'No Norm applied : Type entered {typeN}') 505 | return df -------------------------------------------------------------------------------- /setup.sh: -------------------------------------------------------------------------------- 1 | # Update conda environment. 2 | conda update -n base conda; 3 | conda update --all; 4 | 5 | # Create conda environment. 6 | conda create -n ngm python=3.8 -y; 7 | conda activate ngm; 8 | conda install -c conda-forge notebook -y; 9 | python -m ipykernel install --user --name ngm; 10 | 11 | # install pytorch (1.9.0 version) 12 | conda install numpy -y; 13 | # conda install pytorch==1.4.0 torchvision==0.5.0 cudatoolkit=10.1 -c pytorch -y; 14 | conda install pytorch torchvision cudatoolkit=10.2 -c pytorch -y; 15 | 16 | # Install packages from conda-forge. 17 | conda install -c conda-forge matplotlib -y; 18 | 19 | # Install packages from anaconda. 20 | # conda install -c anaconda pandas networkx scipy -y; 21 | # Alternate to anaconda channel 22 | conda install -c conda-forge pandas networkx scipy -y; 23 | 24 | # Install pygraphviz (Optional) 25 | conda install --channel conda-forge graphviz pygraphviz -y; 26 | 27 | # Install pip packages 28 | pip3 install -U scikit-learn; 29 | 30 | # Install packages from pip. (Optional) 31 | pip install pyvis; 32 | pip install --upgrade scipy networkx; 33 | 34 | # Create environment.yml. 35 | conda env export > environment.yml; 36 | --------------------------------------------------------------------------------