├── .gitignore
├── LICENSE
├── README.md
├── demo_NGMs.ipynb
├── environment.yml
├── images
    ├── NGM-inference.png
    ├── NGM-learning.png
    ├── NGM-sampling.png
    ├── graphical-view.png
    ├── neural-view-projection-modules.png
    └── neural-view.png
├── ngm
    ├── __init__.py
    ├── main.py
    ├── main_generic.py
    └── utils
    │   ├── __init__.py
    │   ├── data_processing.py
    │   ├── ggm.py
    │   ├── metrics.py
    │   ├── neural_view.py
    │   └── uGLAD
    │       ├── __init__.py
    │       ├── glad
    │           ├── __init__.py
    │           ├── glad.py
    │           ├── glad_params.py
    │           └── torch_sqrtm.py
    │       ├── main.py
    │       └── utils
    │           ├── __init__.py
    │           ├── metrics.py
    │           └── prepare_data.py
└── setup.sh


/.gitignore:
--------------------------------------------------------------------------------
  1 | # Byte-compiled / optimized / DLL files
  2 | __pycache__/
  3 | *.py[cod]
  4 | *$py.class
  5 | 
  6 | # C extensions
  7 | *.so
  8 | 
  9 | # Distribution / packaging
 10 | .Python
 11 | build/
 12 | develop-eggs/
 13 | dist/
 14 | downloads/
 15 | eggs/
 16 | .eggs/
 17 | lib/
 18 | lib64/
 19 | parts/
 20 | sdist/
 21 | var/
 22 | wheels/
 23 | pip-wheel-metadata/
 24 | share/python-wheels/
 25 | *.egg-info/
 26 | .installed.cfg
 27 | *.egg
 28 | MANIFEST
 29 | 
 30 | # PyInstaller
 31 | #  Usually these files are written by a python script from a template
 32 | #  before PyInstaller builds the exe, so as to inject date/other infos into it.
 33 | *.manifest
 34 | *.spec
 35 | 
 36 | # Installer logs
 37 | pip-log.txt
 38 | pip-delete-this-directory.txt
 39 | 
 40 | # Unit test / coverage reports
 41 | htmlcov/
 42 | .tox/
 43 | .nox/
 44 | .coverage
 45 | .coverage.*
 46 | .cache
 47 | nosetests.xml
 48 | coverage.xml
 49 | *.cover
 50 | *.py,cover
 51 | .hypothesis/
 52 | .pytest_cache/
 53 | 
 54 | # Translations
 55 | *.mo
 56 | *.pot
 57 | 
 58 | # Django stuff:
 59 | *.log
 60 | local_settings.py
 61 | db.sqlite3
 62 | db.sqlite3-journal
 63 | 
 64 | # Flask stuff:
 65 | instance/
 66 | .webassets-cache
 67 | 
 68 | # Scrapy stuff:
 69 | .scrapy
 70 | 
 71 | # Sphinx documentation
 72 | docs/_build/
 73 | 
 74 | # PyBuilder
 75 | target/
 76 | 
 77 | # Jupyter Notebook
 78 | .ipynb_checkpoints
 79 | 
 80 | # IPython
 81 | profile_default/
 82 | ipython_config.py
 83 | 
 84 | # pyenv
 85 | .python-version
 86 | 
 87 | # pipenv
 88 | #   According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
 89 | #   However, in case of collaboration, if having platform-specific dependencies or dependencies
 90 | #   having no cross-platform support, pipenv may install dependencies that don't work, or not
 91 | #   install all needed dependencies.
 92 | #Pipfile.lock
 93 | 
 94 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow
 95 | __pypackages__/
 96 | 
 97 | # Celery stuff
 98 | celerybeat-schedule
 99 | celerybeat.pid
100 | 
101 | # SageMath parsed files
102 | *.sage.py
103 | 
104 | # Environments
105 | .env
106 | .venv
107 | env/
108 | venv/
109 | ENV/
110 | env.bak/
111 | venv.bak/
112 | 
113 | # Spyder project settings
114 | .spyderproject
115 | .spyproject
116 | 
117 | # Rope project settings
118 | .ropeproject
119 | 
120 | # mkdocs documentation
121 | /site
122 | 
123 | # mypy
124 | .mypy_cache/
125 | .dmypy.json
126 | dmypy.json
127 | 
128 | # Pyre type checker
129 | .pyre/
130 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2022 Harsh Shrivastava
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | ## Neural Graphical Models  
 2 | `Neural Graphical Models` (NGMs) attempt to represent complex feature dependencies with reasonable computational costs. Specifically, given a graph, we capture the dependency structure between the features along with their complex function representations by using neural networks as a multi-task learning framework. We provide efficient learning, inference and sampling algorithms for NGMs. Moreover, NGMs can fit generic graph structures including directed, undirected and mixed-edge graphs as well as support mixed input data types.  
 3 | 
 4 | Key benefits & features:   
 5 | - Facilitate rich representations of complex underlying distributions.  
 6 | - Support various relationship types including directed, undirected, mixed-edge graphs.  
 7 | - Fast and efficient algorithms for learning, inference and sampling.  
 8 | - Direct access to the learned underlying distributions for analysis.  
 9 | - Handle different input data types like categorical, images & generic embedding representations.  
10 | - Fast and scalable, supports batch learning with GPU support.  
11 |  
12 | 
13 | ### High level overview  
14 | 
15 | <img src="images/graphical-view.png" width="800" title="NGMs: graphical view" />  
16 | 
17 | <img src="images/neural-view.png" width="800" title="NGMs: neural view" />       
18 | 
19 | <img src="images/neural-view-projection-modules.png" width="800" title="NGMs: neural view with projection modules" />       
20 | 
21 | ### Algorithms  
22 | 
23 | <p float="left" align="center">
24 |     <img src="images/NGM-learning.png" width="300" />    
25 |     <img src="images/NGM-inference.png" width="300" />    
26 |     <img src="images/NGM-sampling.png" width="300" />   
27 | </p>
28 | 
29 | ## Setup  
30 | The `setup.sh` file contains the complete procedure of creating a conda environment to run mGLAD model. run `bash setup.sh`    
31 | In case of dependencies conflict, one can alternatively use this command `conda env create --name ngm --file=environment.yml`.  
32 | 
33 | ## demo on representing Gausssian Graphical models (GGMs) using NGMs    
34 | A minimalist working example of NGMs is given in `demo_NGMs.ipynb`. It is a good entry point to understand the code structure as well as NGMs.  
35 | 
36 | ## Citation
37 | If you find this method useful, kindly cite the following associated papers:
38 | - `Neural Graphical Models`: [arxiv](<https://arxiv.org/abs/2210.00453>)  
39 | 
40 | @article{shrivastava2022neural,  
41 |   title={Neural Graphical Models},  
42 |   author={Shrivastava, Harsh and Chajewska, Urszula},  
43 |   journal={arXiv preprint arXiv:2210.00453},  
44 |   year={2022}  
45 | }   
46 | 
47 | 
48 | - `uGLAD`: Sparse graph recovery by optimizing deep unrolled networks. [arxiv](<https://arxiv.org/abs/2205.11610>)  
49 | 
50 | @inproceedings{  
51 | shrivastava2022a,   
52 | title={A deep learning approach to recover conditional independence graphs},  
53 | author={Harsh Shrivastava and Urszula Chajewska and Robin Abraham and Xinshi Chen},  
54 | booktitle={NeurIPS 2022 Workshop: New Frontiers in Graph Learning},  
55 | year={2022},  
56 | url={https://openreview.net/forum?id=kEwzoI3Am4c}  
57 | }  
58 | 


--------------------------------------------------------------------------------
/environment.yml:
--------------------------------------------------------------------------------
  1 | name: ngm
  2 | channels:
  3 |   - pytorch
  4 |   - anaconda
  5 |   - conda-forge
  6 |   - defaults
  7 | dependencies:
  8 |   - _libgcc_mutex=0.1=main
  9 |   - _openmp_mutex=5.1=1_gnu
 10 |   - argon2-cffi=21.3.0=pyhd8ed1ab_0
 11 |   - argon2-cffi-bindings=21.2.0=py38h0a891b7_2
 12 |   - asttokens=2.0.5=pyhd8ed1ab_0
 13 |   - atk-1.0=2.36.0=h516909a_2
 14 |   - attrs=22.1.0=pyh71513ae_1
 15 |   - backcall=0.2.0=pyh9f0ad1d_0
 16 |   - backports=1.0=py_2
 17 |   - backports.functools_lru_cache=1.6.4=pyhd8ed1ab_0
 18 |   - beautifulsoup4=4.11.1=pyha770c72_0
 19 |   - blas=1.0=mkl
 20 |   - bleach=5.0.1=pyhd8ed1ab_0
 21 |   - bottleneck=1.3.4=py38hce1f21e_0
 22 |   - brotli=1.0.9=h166bdaf_7
 23 |   - brotli-bin=1.0.9=h166bdaf_7
 24 |   - brotlipy=0.7.0=py38h27cfd23_1003
 25 |   - bzip2=1.0.8=h7b6447c_0
 26 |   - ca-certificates=2022.6.15=ha878542_0
 27 |   - cairo=1.16.0=h18b612c_1001
 28 |   - certifi=2022.6.15=py38h578d9bd_0
 29 |   - cffi=1.15.0=py38hd667e15_1
 30 |   - charset-normalizer=2.0.4=pyhd3eb1b0_0
 31 |   - cryptography=37.0.1=py38h9ce1e76_0
 32 |   - cudatoolkit=10.2.89=hfd86e86_1
 33 |   - cycler=0.11.0=pyhd8ed1ab_0
 34 |   - dbus=1.13.18=hb2f20db_0
 35 |   - debugpy=1.6.0=py38hfa26641_0
 36 |   - decorator=5.1.1=pyhd8ed1ab_0
 37 |   - defusedxml=0.7.1=pyhd8ed1ab_0
 38 |   - entrypoints=0.4=pyhd8ed1ab_0
 39 |   - executing=0.9.1=pyhd8ed1ab_0
 40 |   - expat=2.4.8=h27087fc_0
 41 |   - ffmpeg=4.3=hf484d3e_0
 42 |   - flit-core=3.7.1=pyhd8ed1ab_0
 43 |   - font-ttf-dejavu-sans-mono=2.37=hab24e00_0
 44 |   - font-ttf-inconsolata=3.000=h77eed37_0
 45 |   - font-ttf-source-code-pro=2.038=h77eed37_0
 46 |   - font-ttf-ubuntu=0.83=hab24e00_0
 47 |   - fontconfig=2.14.0=h8e229c2_0
 48 |   - fonts-conda-ecosystem=1=0
 49 |   - fonts-conda-forge=1=0
 50 |   - fonttools=4.25.0=pyhd3eb1b0_0
 51 |   - freetype=2.11.0=h70c0345_0
 52 |   - fribidi=1.0.10=h36c2ea0_0
 53 |   - gdk-pixbuf=2.42.8=h433bba3_0
 54 |   - giflib=5.2.1=h7b6447c_0
 55 |   - glib=2.69.1=h4ff587b_1
 56 |   - gmp=6.2.1=h295c915_3
 57 |   - gnutls=3.6.15=he1e5248_0
 58 |   - gobject-introspection=1.72.0=py38hbb6d50b_0
 59 |   - graphite2=1.3.14=h295c915_1
 60 |   - graphviz=2.50.0=h3cd0ef9_0
 61 |   - gst-plugins-base=1.14.0=hbbd80ab_1
 62 |   - gstreamer=1.14.0=h28cd5cc_2
 63 |   - gtk2=2.24.33=h73c1081_2
 64 |   - gts=0.7.6=h08bb679_0
 65 |   - harfbuzz=4.3.0=hd55b92a_0
 66 |   - icu=58.2=hf484d3e_1000
 67 |   - idna=3.3=pyhd3eb1b0_0
 68 |   - importlib-metadata=4.11.4=py38h578d9bd_0
 69 |   - importlib_resources=5.9.0=pyhd8ed1ab_0
 70 |   - intel-openmp=2021.4.0=h06a4308_3561
 71 |   - ipykernel=6.15.1=pyh210e3f2_0
 72 |   - ipython=8.4.0=py38h578d9bd_0
 73 |   - ipython_genutils=0.2.0=py_1
 74 |   - jedi=0.18.1=py38h578d9bd_1
 75 |   - jinja2=3.1.2=pyhd8ed1ab_1
 76 |   - jpeg=9e=h7f8727e_0
 77 |   - jsonschema=4.9.1=pyhd8ed1ab_0
 78 |   - jupyter_client=7.0.6=pyhd8ed1ab_0
 79 |   - jupyter_core=4.11.1=py38h578d9bd_0
 80 |   - jupyterlab_pygments=0.2.2=pyhd8ed1ab_0
 81 |   - kiwisolver=1.4.2=py38h295c915_0
 82 |   - lame=3.100=h7b6447c_0
 83 |   - lcms2=2.12=h3be6417_0
 84 |   - ld_impl_linux-64=2.38=h1181459_1
 85 |   - libbrotlicommon=1.0.9=h166bdaf_7
 86 |   - libbrotlidec=1.0.9=h166bdaf_7
 87 |   - libbrotlienc=1.0.9=h166bdaf_7
 88 |   - libffi=3.3=he6710b0_2
 89 |   - libgcc-ng=11.2.0=h1234567_1
 90 |   - libgd=2.3.3=h695aa2c_1
 91 |   - libgfortran-ng=7.5.0=ha8ba4b0_17
 92 |   - libgfortran4=7.5.0=ha8ba4b0_17
 93 |   - libgomp=11.2.0=h1234567_1
 94 |   - libiconv=1.16=h7f8727e_2
 95 |   - libidn2=2.3.2=h7f8727e_0
 96 |   - libpng=1.6.37=hbc83047_0
 97 |   - librsvg=2.54.4=h19fe530_0
 98 |   - libsodium=1.0.18=h36c2ea0_1
 99 |   - libstdcxx-ng=11.2.0=h1234567_1
100 |   - libtasn1=4.16.0=h27cfd23_0
101 |   - libtiff=4.2.0=h2818925_1
102 |   - libtool=2.4.6=h9c3ff4c_1008
103 |   - libunistring=0.9.10=h27cfd23_0
104 |   - libuuid=2.32.1=h7f98852_1000
105 |   - libwebp=1.2.2=h55f646e_0
106 |   - libwebp-base=1.2.2=h7f8727e_0
107 |   - libxcb=1.13=h7f98852_1004
108 |   - libxml2=2.9.14=h74e7548_0
109 |   - lz4-c=1.9.3=h295c915_1
110 |   - markupsafe=2.1.1=py38h0a891b7_1
111 |   - matplotlib=3.5.2=py38h578d9bd_1
112 |   - matplotlib-base=3.5.2=py38h826bfd8_0
113 |   - matplotlib-inline=0.1.3=pyhd8ed1ab_0
114 |   - mistune=0.8.4=py38h497a2fe_1005
115 |   - mkl=2021.4.0=h06a4308_640
116 |   - mkl-service=2.4.0=py38h7f8727e_0
117 |   - mkl_fft=1.3.1=py38hd3c417c_0
118 |   - mkl_random=1.2.2=py38h51133e4_0
119 |   - munkres=1.1.4=pyh9f0ad1d_0
120 |   - nbclient=0.6.6=pyhd8ed1ab_0
121 |   - nbconvert=6.5.0=pyhd8ed1ab_0
122 |   - nbconvert-core=6.5.0=pyhd8ed1ab_0
123 |   - nbconvert-pandoc=6.5.0=pyhd8ed1ab_0
124 |   - nbformat=5.4.0=pyhd8ed1ab_0
125 |   - ncurses=6.3=h5eee18b_3
126 |   - nest-asyncio=1.5.5=pyhd8ed1ab_0
127 |   - nettle=3.7.3=hbbd107a_1
128 |   - networkx=2.7.1=pyhd3eb1b0_0
129 |   - notebook=6.4.12=pyha770c72_0
130 |   - numexpr=2.8.1=py38h807cd23_2
131 |   - openh264=2.1.1=h4ff587b_0
132 |   - openssl=1.1.1q=h7f8727e_0
133 |   - packaging=21.3=pyhd8ed1ab_0
134 |   - pandas=1.4.2=py38h295c915_0
135 |   - pandoc=2.18=ha770c72_0
136 |   - pandocfilters=1.5.0=pyhd8ed1ab_0
137 |   - pango=1.50.7=h05da053_0
138 |   - parso=0.8.3=pyhd8ed1ab_0
139 |   - pcre=8.45=h9c3ff4c_0
140 |   - pexpect=4.8.0=pyh9f0ad1d_2
141 |   - pickleshare=0.7.5=py_1003
142 |   - pillow=9.2.0=py38hace64e9_1
143 |   - pip=22.1.2=py38h06a4308_0
144 |   - pixman=0.38.0=h516909a_1003
145 |   - pkgutil-resolve-name=1.3.10=pyhd8ed1ab_0
146 |   - prometheus_client=0.14.1=pyhd8ed1ab_0
147 |   - prompt-toolkit=3.0.30=pyha770c72_0
148 |   - psutil=5.9.1=py38h0a891b7_0
149 |   - pthread-stubs=0.4=h36c2ea0_1001
150 |   - ptyprocess=0.7.0=pyhd3deb0d_0
151 |   - pure_eval=0.2.2=pyhd8ed1ab_0
152 |   - pycparser=2.21=pyhd8ed1ab_0
153 |   - pygments=2.12.0=pyhd8ed1ab_0
154 |   - pygraphviz=1.9=py38h86b1bdd_0
155 |   - pyopenssl=22.0.0=pyhd3eb1b0_0
156 |   - pyparsing=3.0.9=pyhd8ed1ab_0
157 |   - pyqt=5.9.2=py38h05f1152_4
158 |   - pyrsistent=0.18.1=py38h0a891b7_1
159 |   - pysocks=1.7.1=py38h06a4308_0
160 |   - python=3.8.13=h12debd9_0
161 |   - python-dateutil=2.8.2=pyhd8ed1ab_0
162 |   - python-fastjsonschema=2.16.1=pyhd8ed1ab_0
163 |   - python_abi=3.8=2_cp38
164 |   - pytorch=1.12.0=py3.8_cuda10.2_cudnn7.6.5_0
165 |   - pytorch-mutex=1.0=cuda
166 |   - pytz=2022.1=py38h06a4308_0
167 |   - pyzmq=19.0.2=py38ha71036d_2
168 |   - qt=5.9.7=h5867ecd_1
169 |   - readline=8.1.2=h7f8727e_1
170 |   - requests=2.28.1=py38h06a4308_0
171 |   - scipy=1.7.3=py38hc147768_0
172 |   - send2trash=1.8.0=pyhd8ed1ab_0
173 |   - setuptools=61.2.0=py38h06a4308_0
174 |   - sip=4.19.13=py38h295c915_0
175 |   - six=1.16.0=pyh6c4a22f_0
176 |   - soupsieve=2.3.2.post1=pyhd8ed1ab_0
177 |   - sqlite=3.39.0=h5082296_0
178 |   - stack_data=0.3.0=pyhd8ed1ab_0
179 |   - terminado=0.15.0=py38h578d9bd_0
180 |   - tinycss2=1.1.1=pyhd8ed1ab_0
181 |   - tk=8.6.12=h1ccaba5_0
182 |   - torchvision=0.13.0=py38_cu102
183 |   - tornado=6.1=py38h0a891b7_3
184 |   - traitlets=5.3.0=pyhd8ed1ab_0
185 |   - typing_extensions=4.1.1=pyh06a4308_0
186 |   - urllib3=1.26.11=py38h06a4308_0
187 |   - wcwidth=0.2.5=pyh9f0ad1d_2
188 |   - webencodings=0.5.1=py_1
189 |   - wheel=0.37.1=pyhd3eb1b0_0
190 |   - xorg-kbproto=1.0.7=h7f98852_1002
191 |   - xorg-libice=1.0.10=h7f98852_0
192 |   - xorg-libsm=1.2.3=hd9c2040_1000
193 |   - xorg-libx11=1.7.2=h7f98852_0
194 |   - xorg-libxau=1.0.9=h7f98852_0
195 |   - xorg-libxdmcp=1.1.3=h7f98852_0
196 |   - xorg-libxext=1.3.4=h7f98852_1
197 |   - xorg-libxrender=0.9.10=h7f98852_1003
198 |   - xorg-renderproto=0.11.1=h7f98852_1002
199 |   - xorg-xextproto=7.3.0=h7f98852_1002
200 |   - xorg-xproto=7.0.31=h7f98852_1007
201 |   - xz=5.2.5=h7f8727e_1
202 |   - zeromq=4.3.4=h9c3ff4c_1
203 |   - zipp=3.8.1=pyhd8ed1ab_0
204 |   - zlib=1.2.12=h7f8727e_2
205 |   - zstd=1.5.2=ha4553b6_0
206 |   - pip:
207 |     - joblib==1.1.0
208 |     - jsonpickle==2.2.0
209 |     - numpy==1.22.4
210 |     - pyvis==0.2.1
211 |     - scikit-learn==1.1.1
212 |     - threadpoolctl==3.1.0
213 | prefix: /home/harshx/anaconda3/envs/ngm
214 | 


--------------------------------------------------------------------------------
/images/NGM-inference.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Harshs27/neural-graphical-models/227f6e27ca6a02200ec895235b2251e9a4191773/images/NGM-inference.png


--------------------------------------------------------------------------------
/images/NGM-learning.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Harshs27/neural-graphical-models/227f6e27ca6a02200ec895235b2251e9a4191773/images/NGM-learning.png


--------------------------------------------------------------------------------
/images/NGM-sampling.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Harshs27/neural-graphical-models/227f6e27ca6a02200ec895235b2251e9a4191773/images/NGM-sampling.png


--------------------------------------------------------------------------------
/images/graphical-view.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Harshs27/neural-graphical-models/227f6e27ca6a02200ec895235b2251e9a4191773/images/graphical-view.png


--------------------------------------------------------------------------------
/images/neural-view-projection-modules.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Harshs27/neural-graphical-models/227f6e27ca6a02200ec895235b2251e9a4191773/images/neural-view-projection-modules.png


--------------------------------------------------------------------------------
/images/neural-view.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Harshs27/neural-graphical-models/227f6e27ca6a02200ec895235b2251e9a4191773/images/neural-view.png


--------------------------------------------------------------------------------
/ngm/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Harshs27/neural-graphical-models/227f6e27ca6a02200ec895235b2251e9a4191773/ngm/__init__.py


--------------------------------------------------------------------------------
/ngm/main.py:
--------------------------------------------------------------------------------
  1 | """
  2 | Neural graphical models for the conditional 
  3 | independence graphs. The conditional independence
  4 | graphs show the partial correlations between the 
  5 | nodes (features). 
  6 | 
  7 | Functions for NGMs:
  8 | 1. Learning
  9 | 2. Inference
 10 | 3. Sampling
 11 | 
 12 | Note that this implementation is for 
 13 | 1. Undirected graphs.
 14 | 2. Input data should be real valued.
 15 | 
 16 | TODO: Implementation for the directed graphs. 
 17 | TODO: Extend to images and categorical variables.
 18 | """
 19 | import copy
 20 | import networkx as nx
 21 | import numpy as np
 22 | import pandas as pd
 23 | from sklearn.model_selection import KFold
 24 | import sys
 25 | import torch
 26 | import torch.nn as nn
 27 | 
 28 | # local imports
 29 | import ngm.utils.neural_view as neural_view
 30 | import ngm.utils.data_processing as dp
 31 | 
 32 | 
 33 | ######################################################################
 34 | # Functions for NGM learning
 35 | ######################################################################
 36 | 
 37 | def product_weights_MLP(model):
 38 |     """
 39 |     Reads the input model (MLP) and returns the normalized
 40 |     product of the neural network weight matrices. 
 41 |     """
 42 |     for i, (n, p) in enumerate(model.MLP.named_parameters()):
 43 |         if i==0:
 44 |             if 'weight' in n:
 45 |                 W = torch.abs(p).t() # DxH
 46 |                 # Normalizing the weight using L2-norm
 47 |                 W = torch.nn.functional.normalize(W)
 48 |         else: # i > 0
 49 |             if 'weight' in n:
 50 |                 curr_W = torch.abs(p).t()
 51 |                 # Normalizing the current weight using L2-norm
 52 |                 curr_W = torch.nn.functional.normalize(curr_W)
 53 |                 W = torch.matmul(W, curr_W)
 54 |                 # Normalizing the running weight product using L2-norm
 55 |                 W = torch.nn.functional.normalize(W)
 56 |     return W
 57 | 
 58 | 
 59 | def forward_NGM(X, model, S, structure_penalty='hadamard', lambd=0.1):
 60 |     """Pass the input X through the NGM model
 61 |     to obtain the X_pred. 
 62 | 
 63 |     LOSS = reg_loss + lambd * structure_loss
 64 | 
 65 |     The 'hadamard' ||prodW * Sc|| is more theoretically sound as it just 
 66 |     focuses on the terms needed to zero out and completely drop the 
 67 |     non-zero terms. 
 68 |     The 'diff' ||prodW-S|| also tries to make the non-zero terms go to 1.
 69 | 
 70 |     Args:
 71 |         X (torch.Tensor BxD): Input data
 72 |         model (torch.nn.object): The MLP model for NGM's `neural' view
 73 |         S (pd.DataFrame): Adjacency matrix from graph G
 74 |         structure_penalty (str): 'hadamard':||prodW * Sc||, 'diff':||prodW-S||
 75 |         lambd (float): reg_loss + lambd * structure_loss
 76 |             Recommended lambd=1 as the losses are scaled to the same range.
 77 |     
 78 |     Returns:
 79 |         (list): [
 80 |             Xp (torch.Tensor BxD): The predicted X
 81 |             loss (torch.scalar): The NGM loss 
 82 |             reg_loss (torch.scalar): The regression term loss
 83 |             structure_loss (torch.scalar): The structure penalty loss
 84 |         ]
 85 |     """
 86 |     # 1. Running the NGM model 
 87 |     Xp = model.MLP(X)
 88 |     # 2. Calculate the regression loss
 89 |     mse = nn.MSELoss() 
 90 |     reg_loss = mse(Xp, X)
 91 |     # 3. Calculate the structure loss
 92 |     # 3.1 Get the frame of the graph structure
 93 |     if structure_penalty=='hadamard':
 94 |         # Get the complement of S (binarized)
 95 |         Sg = (S==0).astype(int)
 96 |         Sg = dp.convertToTorch(np.array(Sg), req_grad=False)
 97 |     elif structure_penalty=='diff':
 98 |         # Binarize the adjacency matrix S
 99 |         Sg = (S!=0).astype(int)
100 |         Sg = dp.convertToTorch(np.array(Sg), req_grad=False)
101 |     else:
102 |         print(f'Structure penalty {structure_penalty} is not defined')
103 |         sys.exit(0)
104 |     # 3.2 Initialize the structure loss
105 |     structure_loss = torch.zeros(1)[0]
106 |     if lambd > 0:
107 |         # 3.3 Get the product of weights (L2 normalized) of the MLP
108 |         prod_W = product_weights_MLP(model)
109 |         D = prod_W.shape[-1]
110 |         # 3.4 Calculate the penalty
111 |         if structure_penalty=='hadamard':
112 |             # Using the L2 norm for high structure penalty
113 |             structure_loss = torch.linalg.norm(prod_W*Sg, ord=2)
114 |         elif structure_penalty=='diff':
115 |             struct_mse = nn.MSELoss() 
116 |             structure_loss = struct_mse(prod_W, Sg)
117 |         # 3.5 Scale the structure loss
118 |         structure_loss = structure_loss/(D**2)
119 |         # Adding the log scaling
120 |         structure_loss = torch.log(structure_loss)
121 |     # 4. Calculate the total loss = reg_loss + lambd * struct_loss
122 |     loss = reg_loss + lambd * structure_loss
123 |  
124 |     return Xp, loss, reg_loss, structure_loss
125 | 
126 | 
127 | def learning(
128 |     G, 
129 |     X,
130 |     lambd=1.0,
131 |     hidden_dim=20,
132 |     epochs=1200, 
133 |     lr=0.001,
134 |     norm_type='min_max',
135 |     k_fold=1,
136 |     structure_penalty='hadamard',
137 |     VERBOSE=True
138 |     ):
139 |     """Learn the distribution over a conditional independence graph. 
140 |     1. Fit a MLP (autoencoder) to learn the data representation from X->X. 
141 |     2. The input-output path of dependence structure of the MLP 
142 |        should match the conditional independence structure of the
143 |        input graph. This is achieved using a regularization term.
144 |     3. Return the learned model representing the NGM
145 | 
146 |     Normalize X and select the best model using K-fold CV. 
147 | 
148 |     Fit the MLP on the input data X to get the `neural' view of NGM 
149 |     while maintaining the conditional independence structure defined 
150 |     by the complement structure matrix Sc. Does cross-validation to 
151 |     get better generalization.
152 | 
153 |     Args:
154 |         G (nx.Graph): Conditional independence graph.
155 |         X (pd.DataFrame): Samples(M) x Features(D).
156 |         lambd (float): reg_loss + lambd * structure_loss
157 |             Recommended lambd=1 as the losses are scaled to the same range.
158 |         hidden_dim (int): The size of the hidden unit of the MLP. 
159 |             Each layer will have the same value.
160 |         epochs (int): The training epochs number.
161 |         lr (float): Learning rate for the optimizer.
162 |         norm_type (str): min_max/mean
163 |         k_fold (int): #splits for the k-fold CV.
164 |         structure_penalty (str): 'hadamard':||prodW * Sc||, 'diff':||prodW-S||
165 |         VERBOSE (bool): if True, prints to output.
166 |         
167 |     Returns:
168 |         model_NGM (list): [
169 |             model (torch.nn.object): A MLP model for NGM's `neural' view,
170 |             scaler (sklearn object): Learned normalizer for the input data,
171 |             feature_means (pd.Series): [feature:mean val]
172 |         ]
173 |     """
174 |     # Get the graph structure
175 |     S = nx.to_pandas_adjacency(G)
176 |     # Arrange the columns of X to match the adjacency matrix
177 |     X = X[S.columns]
178 |     feature_means = X.mean()
179 |     print(f'Means of selected features {feature_means, len(feature_means)}')
180 |     # Normalize the data
181 |     print(f'Normalizing the data: {norm_type}')
182 |     X, scaler = dp.process_data_for_CI_graph(X, norm_type)
183 |     # Converting the data to torch 
184 |     X = dp.convertToTorch(np.array(X), req_grad=False)
185 |     M, D = X.shape
186 |     # Splitting into k-fold for cross-validation 
187 |     n_splits = k_fold if k_fold > 1 else 2
188 |     kf = KFold(n_splits=n_splits, shuffle=True)
189 |     # For each fold, collect the best model and the test-loss value
190 |     results_Kfold = {}
191 |     for _k, (train, test) in enumerate(kf.split(X)):
192 |         if _k >= k_fold: # No CV if k_fold=1
193 |             continue
194 |         if VERBOSE: print(f'Fold num {_k}')
195 |         X_train, X_test = X[train], X[test] # KxD, (M-K)xD
196 | 
197 |         # Initialize the MLP model
198 |         if VERBOSE: print(f'Initializing the NGM model')
199 |         model = neural_view.DNN(I=D, H=hidden_dim, O=D)
200 |         optimizer = neural_view.get_optimizers(model, lr=lr)
201 | 
202 |         # TODO: Add base initialization only on the regression loss
203 |         # model = base_initialization_NGM(model, X_train)
204 | 
205 |         # Defining optimization & model tracking parameters
206 |         best_test_loss = np.inf
207 |         PRINT = int(epochs/10) # will print only 10 times
208 |         lambd_increase = int(epochs/10)
209 |         # updating with the best model and loss for the current fold
210 |         results_Kfold[_k] = {}
211 | 
212 |         # Training the NGM model
213 |         for e in range(epochs):
214 |             # TODO: Keep increasing the lambd penalty as epochs proceed
215 |             # if not e % lambd_increase:
216 |             #     lambd *= 10 # increase in lambd value
217 |             #     print(f'epoch={e}, lambda={lambd}')
218 |             # reset the grads to zero
219 |             optimizer.zero_grad()
220 |             # calculate the loss for train data
221 |             _, loss_train, reg_loss_train, struct_loss_train = forward_NGM(
222 |                 X_train, 
223 |                 model, 
224 |                 S,
225 |                 structure_penalty,
226 |                 lambd=lambd
227 |             )
228 |             with torch.no_grad(): # prediction on test 
229 |                 _, loss_test, reg_loss_test, struct_loss_test = forward_NGM(
230 |                     X_test, 
231 |                     model, 
232 |                     S,
233 |                     structure_penalty, 
234 |                     lambd=lambd 
235 |                 )
236 |             # calculate the backward gradients
237 |             loss_train.backward()
238 |             # updating the optimizer params with the grads
239 |             optimizer.step()
240 |             # Printing output
241 |             if not e%PRINT and VERBOSE: 
242 |                 print(f'\nFold {_k}: epoch:{e}/{epochs}')
243 |                 print(f'Train: loss={dp.t2np(loss_train)}, reg={dp.t2np(reg_loss_train)}, struct={dp.t2np(struct_loss_train)}')
244 |                 print(f'Test: loss={dp.t2np(loss_test)}, reg={dp.t2np(reg_loss_test)}, struct={dp.t2np(struct_loss_test)}')
245 |             # Updating the best model for this fold
246 |             _loss_test = dp.t2np(loss_test)
247 |             if _loss_test < best_test_loss: # and e%10==9:
248 |                 results_Kfold[_k]['best_model_updates'] = f'Fold {_k}: epoch:{e}/{epochs}:\n\
249 |                     Train: loss={dp.t2np(loss_train)}, reg={dp.t2np(reg_loss_train)}, struct={dp.t2np(struct_loss_train)}\n\
250 |                     Test: loss={dp.t2np(loss_test)}, reg={dp.t2np(reg_loss_test)}, struct={dp.t2np(struct_loss_test)}'
251 |                 # if VERBOSE and not e%PRINT or e==epochs-1:
252 |                     # print(f'Fold {_k}: epoch:{e}/{epochs}: Updating the best model with test loss={_loss_test}')
253 |                 best_model_kfold = copy.deepcopy(model)
254 |                 best_test_loss = _loss_test
255 |             # else: # loss increasing, reset the model to the previous best
256 |             #     # print('re-setting to the previous best model')
257 |             #     model = best_model_kfold
258 |             #     optimizer = neural_view.get_optimizers(model, lr=lr)
259 |         results_Kfold[_k]['test_loss'] = best_test_loss
260 |         results_Kfold[_k]['model'] = best_model_kfold
261 |         if VERBOSE: print('\n')
262 |     # Select the model from the results Kfold dictionary 
263 |     # with the best score on the test fold.
264 |     best_loss = np.inf
265 |     for _k in results_Kfold.keys():
266 |         curr_loss = results_Kfold[_k]['test_loss']
267 |         if curr_loss < best_loss:
268 |             model = results_Kfold[_k]['model']
269 |             best_loss = curr_loss
270 |             best_model_details = results_Kfold[_k]["best_model_updates"]
271 | 
272 |     print(f'Best model selected: {best_model_details}')
273 |     # Checking the structure of the prodW and Sc
274 |     prod_W = dp.t2np(product_weights_MLP(model))
275 |     # print(f'Structure Check: prodW={prod_W}, S={(np.array(S)!=0).astype(int)}')
276 |     return [model, scaler, feature_means]
277 | 
278 | 
279 | ######################################################################
280 | # Functions to run inference over the learned NGM
281 | ######################################################################
282 | 
283 | def inference(
284 |     model_NGM, 
285 |     node_feature_dict, 
286 |     unknown_val='u', 
287 |     lr=0.001, 
288 |     max_itr=1000,
289 |     VERBOSE=True,
290 |     reg_loss_th=1e-6
291 |     ):
292 |     """Algorithm to run the feature inference among the nodes of the
293 |     NGM learned over the conditional independence graph.
294 | 
295 |     We only optimize for the regression of the known values as that 
296 |     is the only ground truth information we have and the prediction
297 |     should be able to recover the observed datapoints.
298 |     Regression: Xp = f(Xi) 
299 |     Input Xi = {Xi[k] (fixed), Xi[u] (learned)}
300 |     Reg loss for inference = ||Xp[k] - Xi[k]||^2_2
301 | 
302 |     Run gradient descent over the input, which modifies the unobserved
303 |     features to minimize the inference regression loss. 
304 | 
305 |     Args:
306 |         model_NGM (list): [
307 |             model (torch.nn.object): A MLP model for NGM's `neural' view,
308 |             scaler (sklearn object): Learned normalizer for the input data,
309 |             feature_means (pd.Series): [feature:mean val]
310 |         ]
311 |         node_feature_dict (dict): {'name':value}.
312 |         unknown_val (str): The marker for the unknown value.
313 |         lr (float): Learning rate for the optimizer.
314 |         max_itr (int): For the convergence.
315 |         VERBOSE (bool): enable/disable print statements.
316 |         reg_loss_th (float): The threshold for reg loss convergence.
317 | 
318 |     Returns:
319 |         Xpred (pd.DataFrame): Predictions for the unobserved features.
320 |             {'feature name': pred-value} 
321 |     """
322 |     # Get the NGM params
323 |     model, scaler, feature_means = model_NGM
324 |     # Get the feature names and input dimension
325 |     D = len(feature_means)
326 |     feature_names = feature_means.index
327 |     # Freeze the model weights
328 |     for p in model.parameters():
329 |         p.requires_grad = False
330 |     # Initializin the input vector Xi
331 |     _Xi = feature_means.copy()
332 |     # TODO: Try min and max init as well
333 |     # Assign the known (observed) values to the Xi
334 |     for _n, v in node_feature_dict.items():
335 |         if v!=unknown_val:
336 |             _Xi[_n] = v
337 |     # Normalize the values of Xi using the scaler
338 |     _Xi = scaler.transform(dp.series2df(_Xi))[0]
339 |     # Convert to dataseries to maintain the column name associations
340 |     _Xi = pd.Series(
341 |         {n:v for n, v in zip(feature_names, _Xi)}, 
342 |         index=feature_names
343 |     )
344 |     # Creating the feature list with unobserved (unkonwn) tensors as learnable.
345 |     # and observed (known) tensors as fixed
346 |     feature_tensors = [] # List of feature tensors
347 |     # Setting the optimization parameters
348 |     optimizer_parameters = []
349 |     for i, _n in enumerate(feature_names):
350 |         _xi = torch.as_tensor(_Xi[_n])
351 |         # set the value to learnable or not
352 |         _xi.requires_grad = node_feature_dict[_n]==unknown_val
353 |         feature_tensors.append(_xi)
354 |         if node_feature_dict[_n]==unknown_val:
355 |             optimizer_parameters.append(_xi)
356 |     # Init a mask for the known & unknown values
357 |     mask_known = torch.zeros(1, D)
358 |     mask_unknown = torch.zeros(1, D)
359 |     for i, _n in enumerate(feature_names):
360 |         if node_feature_dict[_n]==unknown_val:
361 |             mask_unknown[0][i] = 1
362 |         else:
363 |             mask_known[0][i] = 1
364 |     # Define the optimizer
365 |     optimizer = torch.optim.Adam(
366 |         optimizer_parameters,
367 |         lr=lr, 
368 |         betas=(0.9, 0.999),
369 |         eps=1e-08,
370 |         # weight_decay=0
371 |     )
372 |     # Minimizing for the regression loss for the known values.
373 |     itr = 0
374 |     curr_reg_loss = np.inf
375 |     PRINT = int(max_itr/10) + 1 # will print only 10 times
376 |     mse = nn.MSELoss() # regression loss 
377 |     best_reg_loss = np.inf
378 |     while curr_reg_loss > reg_loss_th and itr<max_itr: # Until convergence
379 |         # The tensor input to the MLP model
380 |         Xi = torch.zeros(1, D) 
381 |         for i, f in enumerate(feature_tensors):
382 |             Xi[0][i] = f
383 |         # reset the grads to zero
384 |         optimizer.zero_grad()
385 |         # Running the NGM model 
386 |         Xp = model.MLP(Xi)
387 |         # Output should be Xi*known_mask with no grad
388 |         Xo = Xi.clone().detach()
389 |         # Set the gradient to False
390 |         Xo.requires_grad = False
391 |         # Calculate the Inference loss using the known values
392 |         reg_loss = mse(mask_known*Xp, mask_known*Xo)
393 |         # reg_loss = mse(Xp, Xo)
394 |         # calculate the backward gradients
395 |         reg_loss.backward()
396 |         # updating the optimizer params with the grads
397 |         optimizer.step()
398 |         # Selecting the output with the lowest inference loss
399 |         curr_reg_loss = dp.t2np(reg_loss)
400 |         if curr_reg_loss < best_reg_loss:
401 |             best_reg_loss = curr_reg_loss
402 |             best_Xp = dp.t2np(Xo) # Xi
403 |         if not itr%PRINT and VERBOSE: 
404 |             print(f'itr {itr}: reg loss {curr_reg_loss}, Xi={Xi}, Xp={Xp}')
405 |             Xpred = dp.inverse_norm_table(best_Xp, scaler)
406 |             print(f'Current best Xpred={Xpred}')
407 |         itr += 1
408 |     # inverse normalize the prediction
409 |     Xpred = dp.inverse_norm_table(best_Xp, scaler)
410 |     Xpred = pd.DataFrame(Xpred, columns=feature_names)
411 |     return Xpred
412 | 
413 | 
414 | ######################################################################
415 | # Functions to analyse the marginal and conditional distributions
416 | ######################################################################
417 | 
418 | def get_distribution_function(target, source, model, scaler, Xi, x_count=100):
419 |     """Plot the function target=NGM(source) or Xp=f(Xi).
420 |     Vary the range of the source and collect the values of the 
421 |     target variable. We keep the rest of the targets & sources
422 |     constant given in Xi (input to the NGM). 
423 |     
424 |     Args:
425 |         target (str/int/float): The feature of interest 
426 |         source (str/int/float): The feature having a direct connection
427 |             with the target in the neural view of NGM.
428 |         model (torch.nn.object):  A MLP model for NGM's `neural' view.
429 |         scaler (sklearn object): Learned normalizer for the input data.
430 |         Xi (pd.DataFrame): Initial values of the input to the model.
431 |             All the values except the source nodes remain constant
432 |             while varying the input over the range of source feature.
433 |         x_count (int): The number of points to evaluate f(x) in the range.
434 | 
435 |     Returns:
436 |         x_vals (np.array): range of source values
437 |         fx_vals (np.array): predicted f(source) values for the target
438 |     """
439 |     print(f'target={target}, source={source}')
440 |     # 1. Get the min and max range of the source 
441 |     source_idx = Xi.columns.get_loc(source)
442 |     source_min = scaler.data_min_[source_idx]
443 |     source_max = scaler.data_max_[source_idx]
444 |     # print(f'Source {source} at index {source_idx}: range ({source_min}, {source_max})')
445 |     # 2. Create a batch input by varying the source values
446 |     x_vals = np.linspace(source_min, source_max, x_count)
447 |     # 2.1 Replicate the Xi entries to have x_count rows
448 |     column_names = Xi.columns
449 |     Xi = pd.DataFrame(np.repeat(Xi.values, x_count, axis=0), columns=column_names)
450 |     # 2.2 Find the source column and assign the range values
451 |     Xi[source] = x_vals
452 |     # 3. Normalize the Xi and create a batch tensor
453 |     Xi = scaler.transform(Xi) # x_count x D
454 |     Xi = dp.convertToTorch(Xi, req_grad=False)
455 |     # 4. Run the NGM model 
456 |     Xp = model.MLP(Xi)
457 |     # 5. Rescale the output back to the original scale 
458 |     Xp = dp.inverse_norm_table(dp.t2np(Xp), scaler)
459 |     Xp = pd.DataFrame(Xp, columns=column_names)
460 |     # 6. Get the values for the plots
461 |     fx_vals = np.array(Xp[target])
462 |     return x_vals, fx_vals
463 | 
464 | 
465 | def analyse_feature(target_feature, model_NGM, G, Xi=[]):
466 |     """Analyse the feature of interest with regards to the distributions
467 |     learned by NGM over the conditional independence graph G.
468 | 
469 |     Args:
470 |         target_feature (str/int/float): The feature of interest, should 
471 |             be present as one of the nodes in graph G
472 |         model_NGM (list): [
473 |             model (torch.nn.object): A MLP model for NGM's `neural' view,
474 |             scaler (sklearn object): Learned normalizer for the input data,
475 |             feature_means (pd.Series): [feature:mean val]
476 |         ]
477 |         G (nx.Graph): Conditional independence graph.
478 |         Xi (pd.DataFrame): Initial input sample.
479 |         
480 |     Returns:
481 |         None (Plots the dependency functions)
482 |     """
483 |     # TODO: Infer the graphs using the prod_W instead?
484 |     # Get the NGM params
485 |     model, scaler, feature_means = model_NGM
486 |     for p in model.parameters(): # Freeze the weights
487 |         p.requires_grad = False
488 |     # feature_means = dp.series2df(feature_means)
489 |     # model_features = feature_means.columns
490 |     model_features = feature_means.index
491 |     # Preliminary check for the presence of target feature
492 |     if target_feature not in model_features:
493 |         print(f'Error: Input feature {target_feature} not in model features')
494 |         sys.exit(0)
495 |     # Drop the nodes not in the model from the graph
496 |     common_features = set(G.nodes()).intersection(model_features)
497 |     features_dropped = G.nodes() - common_features
498 |     print(f'Features dropped from graph: {features_dropped}')
499 |     G = G.subgraph(list(common_features))
500 |     # 1. Get the neighbors (the dependent vars in CI graph) of the target  
501 |     # feature from Graph G.
502 |     target_nbrs = G[target_feature]
503 |     # 2. Set the initial values of the nodes. 
504 |     if len(Xi)==0:
505 |         Xi = dp.series2df(feature_means)
506 |     # Arrange the columns based on the model_feature names for compatibility
507 |     Xi = Xi[model_features]
508 |     # 3. Getting the plots by varying each nbr node and getting the regression 
509 |     # values for the target node.
510 |     plot_dict = {target_feature:{}}
511 |     for nbr in target_nbrs.keys():
512 |         x, fx = get_distribution_function(target_feature, nbr, model, scaler, Xi)
513 |         title = f'NGM: {target_feature} (y-axis) vs {nbr} (x-axis)'
514 |         plot_dict[target_feature][nbr] = [x, fx, title]
515 |     dp.function_plots_for_target(plot_dict)
516 |     return None
517 | 
518 | 
519 | # Getting the marginal distributions
520 | def marginal_distributions(model_NGM, X):
521 |     """Get the marginal distribution for all the features learned by NGM.
522 | 
523 |     1. Uses the probability sum law to calculate the marginals
524 |        P(A) = \sum_{n} P(A|B)
525 |     2. Use the histogram binning over the input data X (frequentist way)
526 | 
527 |     Args:
528 |         model_NGM (list): [
529 |             model (torch.nn.object): A MLP model for NGM's `neural' view,
530 |             scaler (sklearn object): Learned normalizer for the input data,
531 |             feature_means (pd.Series): [feature:mean val]
532 |         ]
533 |         X (pd.DataFrame): Provided input samples.
534 | 
535 |     Returns:
536 |         hist: Histogram (or function)
537 |     """
538 |     hist = X.hist(bins=100, figsize=(15, 15))
539 |     return hist
540 | 
541 | 
542 | ######################################################################
543 | # Functions to sample from the learned NGM
544 | ######################################################################
545 | 
546 | def get_sample(model_NGM, Ds, max_itr=10):
547 |     """Get a sample from the NGM model.
548 |     
549 |     Args:
550 |         model_NGM (list): [
551 |             model (torch.nn.object): A MLP model for NGM's `neural' view,
552 |             scaler (sklearn object): Learned normalizer for the input data,
553 |             feature_means (pd.Series): [feature:mean val]
554 |         ]
555 |         Ds (list of str): Ordered features list 
556 |         max_itr (int): Max number of iterations for inference
557 | 
558 |     Returns:
559 |         xs(dict) = {'feature name': sample value} 
560 |     """
561 |     # Get the NGM params
562 |     model, scaler, feature_means = model_NGM
563 |     # Initialize the features dict for sampling procedure
564 |     unknown_cat = 'u'
565 |     features_dict = {f:unknown_cat for f in Ds}
566 |     # Randomly assign the value of the 1st feature.
567 |     feature_min = pd.Series(scaler.data_min_, index=feature_means.index)
568 |     feature_max = pd.Series(scaler.data_max_, index=feature_means.index)
569 |     f0 = Ds[0]  # Get the first feature
570 |     # Uniformly sample the first feature value from its range.
571 |     f0_val = np.random.uniform(feature_min[f0], feature_max[f0]) 
572 |     features_dict[Ds[0]] = f0_val # set the known feature value
573 |     for f in Ds:
574 |         pred_x = inference(
575 |             model_NGM, 
576 |             features_dict, 
577 |             unknown_cat,
578 |             lr=0.01, 
579 |             max_itr=max_itr, 
580 |             VERBOSE=False
581 |         )
582 |         # random noise for the feature.
583 |         val = pred_x[f][0]
584 |         # Add a small % of random noise 
585 |         eps = np.random.uniform(-0.05*np.abs(val), 0.05*np.abs(val))
586 |         features_dict[f] = val + eps
587 |     return features_dict
588 |     
589 | 
590 | def sampling(model_NGM, G, num_samples=10, max_infer_itr=20):
591 |     """Get samples from the learned NGM by using the sampling algorithm. 
592 |     The procedure is akin to Gibbs sampling. 
593 | 
594 |     TODO: Implement batch sampling. 
595 | 
596 |     Args:
597 |         model_NGM (list): [
598 |             model (torch.nn.object): A MLP model for NGM's `neural' view,
599 |             scaler (sklearn object): Learned normalizer for the input data,
600 |             feature_means (pd.Series): [feature:mean val]
601 |         ]
602 |         G (nx.Graph): Conditional independence graph.
603 |         num_samples (int): The number of samples needed.
604 |         max_infer_itr (int): Max #iterations to run per inference per sample.
605 | 
606 |     Returns:
607 |         Xs (pd.DataFrame): [{'feature name': pred-value} x num_samples]
608 |     """
609 |     Xs = []  # Collection of feature dicts
610 |     for i in range(num_samples):
611 |         # Select a node at random
612 |         n1 = np.random.choice(G.nodes(), 1)[0]
613 |         if not i%100: print(f'Sample={i}')#, Source node {n1}')
614 |         # Get the BFS ordering
615 |         edges = nx.bfs_edges(G, n1)
616 |         Ds = [n1] + [v for u, v in edges]
617 |         Xs.append(get_sample(model_NGM, Ds, max_itr=max_infer_itr))
618 |     # Convert to pd.DataFrame
619 |     Xs = pd.DataFrame(Xs, columns=Ds)
620 |     return Xs
621 | 


--------------------------------------------------------------------------------
/ngm/utils/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Harshs27/neural-graphical-models/227f6e27ca6a02200ec895235b2251e9a4191773/ngm/utils/__init__.py


--------------------------------------------------------------------------------
/ngm/utils/data_processing.py:
--------------------------------------------------------------------------------
  1 | """
  2 | Additional data processing and post-processing
  3 | functions for neural graphical model analytics.
  4 | """
  5 | import matplotlib.pyplot as plt
  6 | import numpy as np
  7 | import networkx as nx
  8 | from pyvis import network as net
  9 | from PIL import Image
 10 | import io
 11 | import pandas as pd
 12 | from sklearn import covariance
 13 | from sklearn import preprocessing
 14 | from time import time
 15 | import torch
 16 | 
 17 | 
 18 | 
 19 | def function_plots_for_target(plot_dict):
 20 |     """
 21 |     plot_dict ={
 22 |         target: {
 23 |             source1: [x, fx, title], 
 24 |             source2: [x, fx, title], 
 25 |             ...,
 26 |             }
 27 |     }
 28 |     """
 29 |     # Get the target
 30 |     target = list(plot_dict.keys())
 31 |     if len(target)==1:
 32 |         target = target[0]
 33 |     num_sources = len(plot_dict[target])
 34 |     # fig = plt.figure(figsize=(int(3*num_sources), 25))
 35 |     # fig = plt.figure(figsize=(5, int(5*num_sources)))
 36 |     fig = plt.figure(figsize=(15, 15))
 37 |     p=min(num_sources, 3)
 38 |     for i, source in enumerate(plot_dict[target].keys()):
 39 |         ax = plt.subplot(p+1, int(num_sources/p), i+1) # (grid_x, grid_y, plot_num)
 40 |         # plt.subplot(num_sources, 1, i+1)
 41 |         x, fx, title = plot_dict[target][source]
 42 |         # plot the function
 43 |         plt.plot(x, fx, 'b')
 44 |         ax.tick_params(axis='both', which='major', labelsize=20)
 45 |         ax.tick_params(axis='both', which='minor', labelsize=20)
 46 |         plt.title(title, fontsize=20)
 47 |     # show the plot
 48 |     plt.savefig(f'plot_{target}.jpg', dpi=300)
 49 |     plt.show()
 50 | 
 51 | 
 52 | def plot_function(x, fx, title=f'plot of (x, fx)'):
 53 |     # setting the axes at the centre
 54 |     fig = plt.figure()
 55 |     ax = fig.add_subplot(1, 1, 1)
 56 |     ax.spines['left'].set_position('center')
 57 |     ax.spines['bottom'].set_position('zero')
 58 |     ax.spines['right'].set_color('none')
 59 |     ax.spines['top'].set_color('none')
 60 |     ax.xaxis.set_ticks_position('bottom')
 61 |     ax.yaxis.set_ticks_position('left')
 62 |     # plot the function
 63 |     plt.plot(x, fx, 'b')
 64 |     plt.title(title)
 65 |     # show the plot
 66 |     # plt.savefig('plot.jpg', dpi=300)
 67 |     # plt.show()
 68 |     return 
 69 | 
 70 | 
 71 | def retrieve_graph(graph_edges):
 72 |     """ Read the graph edgelist and 
 73 |     convert it to a networkx graph.
 74 |     """
 75 |     graph_edges = graph_edges.replace('(', '').replace(')', '')
 76 |     graph_edges = graph_edges[2:-1].split("', '")
 77 |     edge_list = []
 78 |     for e in graph_edges:
 79 |         e = e.split(',')
 80 |         edge_list.append(
 81 |             (e[0], ''.join(e[1:-2]).lstrip(), 
 82 |             {"weight":float(e[-2]), 'color':e[-1][1:]})
 83 |         )
 84 |     G = nx.Graph()
 85 |     G.add_edges_from(edge_list)
 86 |     for n in G.nodes():
 87 |         G.nodes[n].update({'category':'unknown'})
 88 |     return G
 89 | 
 90 | 
 91 | def get_interactive_graph(G, title='', node_PREFIX='ObsVal'):
 92 |     Gv = net.Network(
 93 |         notebook=True, 
 94 |          height='750px', width='100%', 
 95 |     #     bgcolor='#222222', font_color='white',
 96 |         heading=title
 97 |     )
 98 |     Gv.from_nx(G.copy(), show_edge_weights=True, edge_weight_transf=(lambda x:x) )
 99 |     for e in Gv.edges:
100 |         e['title'] = str(e['weight'])
101 |         e['value'] = abs(e['weight'])
102 |     if node_PREFIX is not None:
103 |         for n in Gv.nodes:
104 |             n['title'] = node_PREFIX+':'+n['category']
105 |     Gv.show_buttons()
106 |     return Gv
107 | 
108 | 
109 | def set_feature_values(features_dict, features_known):
110 |     """Updates the feature values with the known categories
111 | 
112 |     Args:
113 |         features_dict (dict): {'name':'category'}
114 |         node_attribute_konwn (dict): {'name':'category'}
115 | 
116 |     Returns:
117 |         features_dict (dict): {'name':'category'}
118 |     """
119 |     for n, c in features_known.items():
120 |         if n in features_dict.keys():
121 |             features_dict[n] = c
122 |         else:
123 |             print(f'node {n} not found in features_dict')
124 |     return features_dict
125 | 
126 | 
127 | def series2df(series):
128 |     "Convert a pd.Series to pd.Dataframe and set the index as header."
129 |     # Convert the series to dictionary.
130 |     series_dict = {n:v for n, v in zip(series.index, series.values)}
131 |     # Create the dataframe from series and transpose.
132 |     df = pd.DataFrame(series_dict.items()).transpose()
133 |     # Set the index row as header and drop it from values.
134 |     df.columns = df.iloc[0]
135 |     df = df.drop(df.index[0])
136 |     return df
137 | 
138 | 
139 | def t2np(x):
140 |     "Convert torch to numpy"
141 |     return x.detach().cpu().numpy()
142 | 
143 | 
144 | def convertToTorch(data, req_grad=False, use_cuda=False):
145 |     """Convert data from numpy to torch variable, if the req_grad
146 |     flag is on then the gradient calculation is turned on.
147 |     """
148 |     if not torch.is_tensor(data):
149 |         dtype = torch.cuda.FloatTensor if use_cuda else torch.FloatTensor
150 |         data = torch.from_numpy(data.astype(np.float, copy=False)).type(dtype)
151 |     data.requires_grad = req_grad
152 |     return data
153 | 
154 | 
155 | def normalize_table(X, method='min_max'):
156 |     """Normalize the input data X.
157 | 
158 |     Args:
159 |         X (pd.Dataframe): Samples(M) x Features(D).
160 |         methods (str): min_max/mean 
161 | 
162 |     Returns:
163 |         Xnorm (pd.Dataframe): Samples(M) x Features(D).
164 |         scaler (object): The scaler to scale X
165 |     """
166 |     if method=='min_max':
167 |         scaler = preprocessing.MinMaxScaler()
168 |     elif method=='mean':
169 |         scaler = preprocessing.StandardScaler()
170 |     else:
171 |         print(f'Scaler "{method}" not found')
172 |     # Apply the scaler on the data X
173 |     Xnorm = scaler.fit_transform(X)
174 |     # Convert back to pandas dataframe
175 |     Xnorm = pd.DataFrame(Xnorm, columns=X.columns)
176 |     return Xnorm, scaler
177 | 
178 | 
179 | def inverse_norm_table(Xnorm, Xscaler):
180 |     """
181 |     Apply the inverse transform on input normalized
182 |     data to get back the original data.
183 |     """
184 |     return Xscaler.inverse_transform(Xnorm)
185 | 
186 | def analyse_condition_number(table, MESSAGE=''):
187 |     S = covariance.empirical_covariance(table, assume_centered=False)
188 |     eig, con = eig_val_condition_num(S)
189 |     print(f'{MESSAGE} covariance matrix: The condition number {con} and min eig {min(eig)} max eig {max(eig)}')
190 |     return S, eig, con
191 |      
192 | 
193 | def eig_val_condition_num(A):
194 |     """Calculates the eigenvalues and the condition 
195 |     number of the input matrix A
196 | 
197 |     condition number = max(|eig|)/min(|eig|)
198 |     """
199 |     eig = [v.real for v in np.linalg.eigvals(A)]
200 |     condition_number = max(np.abs(eig)) / min(np.abs(eig))
201 |     return eig, condition_number
202 | 
203 | 
204 | # Processing the input data to be compatiable for the CI graph recovery models
205 | def process_data_for_CI_graph(table, NORM='min_max', msg='', drop_duplicate=True):
206 |     """Processing the input data to be compatiable for the 
207 |     regression network model. Checks for the following
208 |     issues in the input tabular data (real values only).
209 |     
210 |     1. Remove all the rows with zero entries
211 |     2. Fill Nans with column mean
212 |     3. Remove columns containing only a single entry
213 |     4. Remove columns with duplicate values
214 |     
215 |     Args:
216 |         X (pd.DataFrame): The input table with headers
217 |         NORM (str): min_max/mean
218 | 
219 |     Returns:
220 |         table (pd.DataFrame): The processed table with headers
221 |     """
222 |     start = time()
223 |     print(f'{msg}: Processing the input table for basic compatibility check')
224 |     print(f'{msg}: The input table has sample {table.shape[0]} and features {table.shape[1]}')
225 |     
226 |     total_samples = table.shape[0]
227 | 
228 |     # typecast the table to floats
229 |     table = table._convert(numeric=True)
230 | 
231 |     # 1. Removing all the rows with zero entries as the samples are missing
232 |     table = table.loc[~(table==0).all(axis=1)]
233 |     print(f'{msg}: Total zero samples dropped {total_samples - table.shape[0]}')
234 | 
235 |     # 2. Fill nan's with mean of columns
236 |     table = table.fillna(table.mean())
237 | 
238 |     # 3. Remove columns containing only a single value
239 |     single_value_columns = []
240 |     for col in table.columns:
241 |         if len(table[col].unique()) == 1:
242 |             single_value_columns.append(col)
243 |     table.drop(single_value_columns, inplace=True, axis=1)
244 |     print(f'{msg}: Single value columns dropped: total {len(single_value_columns)}, columns {single_value_columns}')
245 | 
246 |     # Normalization of the input table
247 |     table, scaler = normalize_table(table, NORM)
248 | 
249 |     if drop_duplicate:
250 |         # 4. Remove columns with duplicate values
251 |         all_columns = table.columns
252 |         table = table.T.drop_duplicates().T  
253 |         duplicate_columns = list(set(all_columns) - set(table.columns))
254 |         print(f'{msg}: Duplicates dropped: total {len(duplicate_columns)}, columns {duplicate_columns}')
255 | 
256 |     # # Analysing the processed table's covariance matrix condition number
257 |     # cov_table, eig, con = analyse_condition_number(table, 'Processed')
258 | 
259 |     print(f'{msg}: The processed table has sample {table.shape[0]} and features {table.shape[1]}')
260 |     print(f'{msg}: Total time to process the table {np.round(time()-start, 3)} secs')
261 |     return table, scaler
262 | 
263 | 
264 | def get_cat_names(ohe, dtype):
265 |     # Collecting the number of categories in cat features
266 |     # categorical features in the original df. 
267 |     categorical_features = [k for k, v in dtype.items() if v=='c']
268 |     cat_names = {}
269 |     for name, cat in zip(categorical_features, ohe.categories_):
270 |         cat_names[name] = [str(name)+'_'+str(c) for c in cat]
271 |     return cat_names
272 | 
273 | def convert_to_onehot(df, prefix=None):
274 |     ohe = preprocessing.OneHotEncoder()#(handle_unknown='ignore')
275 |     ohe.fit(df)
276 |     # transforming the entire array
277 |     df_ohe = ohe.transform(df).toarray()
278 |     # transforming a single input
279 |     # single_ohe = ohe.transform([df.loc[0].values]).toarrayray()
280 |     # setting the column names
281 |     col_names = ohe.get_feature_names_out()
282 |     df_ohe = pd.DataFrame(df_ohe, columns=col_names)
283 | 
284 |     return df_ohe, ohe
285 | 
286 | 
287 | # Graph processing tools
288 | 
289 | def plot_graph_compare(G, pos=None, title='', scale_wt=1, intensity=1):
290 |     edge_colors = [G.edges[e]['color'] for e in G.edges]
291 |     edge_width = [intensity*abs(float(G.edges[e]['weight']))/scale_wt for e in G.edges]    
292 |     plt.title(title, fontsize=20)
293 |     n_edges = len(G.edges)
294 |     if pos is None:
295 |         pos = nx.spring_layout(G, scale=0.2, k=1/np.sqrt(n_edges+10))
296 |         # pos = nx.nx_agraph.graphviz_layout(G, prog='fdp') #'fdp', 'sfdp', 'neato'
297 |     nx.draw_networkx_nodes(G, pos, node_color='grey', node_size=100)
298 |     nx.draw_networkx_edges(G, pos, edge_color=edge_colors, width=edge_width)
299 |     y_off = 0.008
300 |     nx.draw_networkx_labels(G, pos = {k:([v[0], v[1]+y_off]) for k,v in pos.items()})
301 |     return 
302 | 
303 | def compare_graphs(G1, G2, t1='Title1', t2='Title2', graph_type={'G1':'undirected', 'G2':'undirected'}):
304 |     """
305 |     1. Finding the common edges in the graphs (edges present in both G1 and G2)
306 |     2. Finding the unique edges in graphs (edges in G1 and not in G2 and vice-versa)
307 |     """
308 |     # Find the common nodes between the graphs
309 |     common_nodes = set(G1.nodes()).intersection(G2.nodes())
310 |     print(f'DA: Common nodes: {len(common_nodes)}, Nodes in G1: {len(G1.nodes())}, Nodes in G2: {len(G2.nodes())}')
311 |     # Reduce the graphs to just the common nodes
312 |     G1_int = G1.subgraph(common_nodes)
313 |     G2_int = G2.subgraph(common_nodes)
314 | 
315 |     # NOTE: There is some logic problem with the networkx 2.8.6 implementation
316 |     # G_int = nx.intersection(G1_int, G2_int)
317 | 
318 |     # print(f'Nx function: G_int {G_int.edges(data=False)}')
319 | 
320 |     def get_graph_intersection(G1, G2):
321 |         G1_v = G1.to_undirected(G1)
322 |         G2_v = G2.to_undirected(G2)
323 |         common_edges = []
324 |         for edge in G1_v.edges(data=False):
325 |             if G2_v.has_edge(*edge):
326 |                 common_edges.append(edge)
327 |         G_int = nx.Graph()
328 |         G_int.add_edges_from(common_edges)
329 |         return G_int
330 | 
331 |     G_int = get_graph_intersection(G1_int, G2_int)
332 | 
333 | 
334 |     # print(f'G1_int {G1_int.edges(data=False)}')
335 |     # print(f'G2_int {G2_int.edges(data=False)}')
336 |     # print(f'My function: G_int {G_int.edges(data=False)}')
337 |     # print(f'CHEKCE: {G_int.has_edge("cause_of_death","manner"), G1_int.has_edge("cause_of_death", "manner"), G2_int.has_edge("manner","cause_of_death")}')
338 |     
339 |     # remove isolated nodes with no edge connections
340 |     isolated_nodes = list(nx.isolates(G_int))
341 |     print(f'isolated nodes {isolated_nodes}')
342 |     G_int.remove_nodes_from(isolated_nodes)
343 |     # unfreeze the graphs
344 |     G1_int = nx.Graph(G1_int) if graph_type['G1']=='undirected' else nx.DiGraph(G1_int)
345 |     G2_int = nx.Graph(G2_int) if graph_type['G2']=='undirected' else nx.DiGraph(G2_int)
346 |     # Remove isolated nodes from the G1 and G2 subgraphs
347 |     G1_int.remove_nodes_from(isolated_nodes)
348 |     G2_int.remove_nodes_from(isolated_nodes)
349 |     # freeze the graphs
350 |     G1_int = nx.freeze(G1_int)
351 |     G2_int = nx.freeze(G2_int)
352 | 
353 |     # Find the common set of edges 
354 |     common_edges = G_int.edges()
355 |     print(f'common_edges{common_edges}')
356 |     # 1. Updating the G1 and G2 graphs with only the common edges
357 |     # 2. Unique edges present in G1
358 |     if graph_type['G1']=='undirected':
359 |         G1_int = nx.Graph(((u, v, e) for u,v,e in G1_int.edges(data=True) if G_int.has_edge(u, v)))
360 |         G1_unique = nx.Graph(((u, v, e) for u,v,e in G1.edges(data=True) if not G_int.has_edge(u, v)))
361 |         #  G1_int = nx.Graph(((u, v, e) for u,v,e in G1_int.edges(data=True) if (u, v) in G_int.edges))
362 |         # G1_unique = nx.Graph(((u, v, e) for u,v,e in G1.edges(data=True) if (u, v) not in G_int.edges))
363 |     else:
364 |         G1_int = nx.DiGraph(((u, v, e) for u,v,e in G1_int.edges(data=True) if G_int.has_edge(u, v)))
365 |         G1_unique = nx.DiGraph(((u, v, e) for u,v,e in G1.edges(data=True) if not G_int.has_edge(u, v)))
366 | 
367 |     if graph_type['G2']=='undirected':
368 |         G2_int = nx.Graph(((u, v, e) for u,v,e in G2_int.edges(data=True) if G_int.has_edge(u, v)))
369 |         G2_unique = nx.Graph(((u, v, e) for u,v,e in G2.edges(data=True) if not G_int.has_edge(u, v)))
370 |     else:
371 |         G2_int = nx.DiGraph(((u, v, e) for u,v,e in G2_int.edges(data=True) if G_int.has_edge(u, v)))
372 |         G2_unique = nx.DiGraph(((u, v, e) for u,v,e in G2.edges(data=True) if not G_int.has_edge(u, v)))
373 | 
374 |     # if pos is None:
375 |     pos = nx.spring_layout(G_int, scale=40, k=3/np.sqrt(G_int.order()))
376 |     # pos = nx.nx_agraph.graphviz_layout(G_int, prog='neato') #'fdp', 'sfdp', 'neato'
377 |     # nx.draw(G1_int, pos=pos, with_labels=True)
378 |     # fig = plt.figure(figsize=(fig_size, fig_size))
379 | 
380 | 
381 |     def get_scaling_wt(G):
382 |         edge_width_G = np.array([abs(G.edges[e]['weight']) for e in G.edges])
383 |         # Scaling the intensity of the edge_weights for viewing purposes
384 |         scale_wt_G = np.max(np.abs(edge_width_G)) if len(edge_width_G) > 0 else 1
385 |         return scale_wt_G
386 | 
387 |     scale_wt_G1 = get_scaling_wt(G1)
388 |     scale_wt_G2 = get_scaling_wt(G2)
389 | 
390 |     plt.figure(figsize=(24, 24)) 
391 |     plt.subplot(221)
392 |     # plt.figure(1, figsize=(fig_size, fig_size))
393 |     plot_graph_compare(G1_int, pos, title=t1+': Edges present in both graphs', scale_wt=scale_wt_G1, intensity=3)
394 |     plt.subplot(222)#, figsize=(fig_size, fig_size))
395 |     plot_graph_compare(G2_int, pos, title=t2+': Edges present in both graphs', scale_wt=scale_wt_G2)
396 |     plt.subplot(223)#, figsize=(fig_size, fig_size))
397 |     plot_graph_compare(G1_unique, title=t1+': Unique edges', scale_wt=scale_wt_G1, intensity=3)
398 |     plt.subplot(224)#, figsize=(fig_size, fig_size))
399 |     # G2_unique.remove_nodes_from(['no_mmorb', 'attend'])
400 |     plot_graph_compare(G2_unique, title=t2+': Unique edges', scale_wt=scale_wt_G2)#, get_image_bytes=True)
401 | 
402 |     plt.savefig('compare_graphs', bbox_inches='tight')
403 |     # Saving the figure in-memory
404 |     buf = io.BytesIO()
405 |     plt.savefig(buf)
406 |     # getting the image in bytes
407 |     buf.seek(0)
408 |     image_bytes = buf.getvalue() # Image.open(buf, mode='r')
409 |     buf.close()
410 |     # closing the plt
411 |     plt.close()
412 |     return image_bytes
413 | 


--------------------------------------------------------------------------------
/ngm/utils/ggm.py:
--------------------------------------------------------------------------------
  1 | """
  2 | Contains functions for using NGMs to model
  3 | Gaussian Grapical models. 
  4 | """
  5 | import matplotlib.pyplot as plt
  6 | import networkx as nx
  7 | import numpy as np
  8 | import pandas as pd
  9 | import io, sys
 10 | from scipy.stats import multivariate_normal
 11 | 
 12 | # Local imports
 13 | import ngm.utils.data_processing as dp
 14 | 
 15 | 
 16 | def get_data(
 17 |     num_nodes,
 18 |     sparsity,
 19 |     num_samples,
 20 |     batch_size=1,
 21 |     typeG='CHAIN', 
 22 |     w_min=0.5, 
 23 |     w_max=1.0,
 24 |     eig_offset=0.1, 
 25 |     ):
 26 |     """Prepare true adj matrices as theta and then sample from 
 27 |     Gaussian to get the corresponding samples.
 28 |     
 29 |     Args:
 30 |         num_nodes (int): The number of nodes in graph
 31 |         sparsity ([float, float]): The [min, max] probability of edges
 32 |         num_samples (int): The number of samples to simulate
 33 |         batch_size (int, optional): The number of batches
 34 |         typeG (str): RANDOM/GRID/CHAIN
 35 |         w_min (float): Precision matrix entries ~Unif[w_min, w_max]
 36 |         w_max (float):  Precision matrix entries ~Unif[w_min, w_max]
 37 |     
 38 |     Returns:
 39 |         Xb (BxMxD): The sample data
 40 |         trueTheta (BxDxD): The true precision matrices
 41 |     """
 42 |     Xb, trueTheta = [], []
 43 |     for b in range(batch_size):
 44 |         # I - Getting the true edge connections
 45 |         edge_connections = generateGraph(
 46 |             num_nodes, 
 47 |             sparsity,
 48 |             typeG=typeG
 49 |         )
 50 |         # II - Gettings samples from fitting a Gaussian distribution
 51 |         # sample the entry of the matrix 
 52 |         
 53 |         X, true_theta = simulateGaussianSamples(
 54 |             num_nodes,
 55 |             edge_connections,
 56 |             num_samples, 
 57 |             u=eig_offset,
 58 |             w_min=w_min,
 59 |             w_max=w_max
 60 |         )
 61 |         # collect the batch data
 62 |         Xb.append(X)
 63 |         trueTheta.append(true_theta)
 64 |     return np.array(Xb), np.array(trueTheta)
 65 | 
 66 | 
 67 | def generateGraph(num_nodes, sparsity, typeG='RANDOM', seed=None):
 68 |     """Generate a random erdos-renyi graph with a given
 69 |     sparsity. 
 70 | 
 71 |     Args:
 72 |         num_nodes (int): The number of nodes in the graph
 73 |         sparsity ([float, float]): The [min, max] probability of edges
 74 |         seed (int, optional): set the numpy random seed
 75 |         typeG (str): RANDOM/GRID/CHAIN
 76 |     
 77 |     Returns:
 78 |         edge_connections (2D np array (float)): Adj matrix
 79 |     """
 80 |     if typeG == 'RANDOM':
 81 |         min_s, max_s = sparsity
 82 |         s =  np.random.uniform(min_s, max_s, 1)[0]
 83 |         G = nx.generators.random_graphs.gnp_random_graph(
 84 |             num_nodes, 
 85 |             s, 
 86 |             seed=seed, 
 87 |             directed=False
 88 |         )
 89 |     elif typeG == 'CHAIN':
 90 |         G = nx.generators.path_graph(num_nodes)
 91 |     else:
 92 |         print(f'Type of graph {typeG} not found.')
 93 |         sys.exit(0)
 94 |     edge_connections = nx.adjacency_matrix(G).todense()
 95 |     return edge_connections
 96 | 
 97 | 
 98 | def simulateGaussianSamples(
 99 |     num_nodes,
100 |     edge_connections, 
101 |     num_samples, 
102 |     seed=None, 
103 |     u=0.1,
104 |     w_min=0.5,
105 |     w_max=1.0, 
106 |     ): 
107 |     """Simulating num_samples from a Gaussian distribution. The 
108 |     precision matrix of the Gaussian is determined using the 
109 |     edge_connections. Randomly assign +/-ve signs to entries.
110 | 
111 |     Args:
112 |         num_nodes (int): The number of nodes in the DAG
113 |         edge_connections (2D np array (float)): Adj matrix
114 |         num_sample (int): The number of samples
115 |         seed (int, optional): set the numpy random seed
116 |         u (float): Min eigenvalue offset for the precision matrix
117 |         w_min (float): Precision matrix entries ~Unif[w_min, w_max]
118 |         w_max (float):  Precision matrix entries ~Unif[w_min, w_max]
119 | 
120 |     Returns:
121 |         X (2D np array (float)): num_samples x num_nodes
122 |         precision_mat (2D np array (float)): num_nodes x num_nodes
123 |     """
124 |     # zero mean of Gaussian distribution
125 |     mean_value = 0 
126 |     mean_normal = np.ones(num_nodes) * mean_value
127 |     # Setting the random seed
128 |     if seed: np.random.seed(seed)
129 |     # uniform entry matrix [w_min, w_max]
130 |     U = np.matrix(np.random.random((num_nodes, num_nodes))
131 |         * (w_max - w_min) + w_min)
132 |     theta = np.multiply(edge_connections, U)
133 |     # making it symmetric
134 |     theta = (theta + theta.T)/2 + np.eye(num_nodes)
135 |     # Randomly assign +/-ve signs
136 |     gs = nx.Graph()
137 |     gs.add_weighted_edges_from(
138 |         (u,v,np.random.choice([+1, -1], 1)[0]) 
139 |         for u,v in nx.complete_graph(num_nodes).edges()
140 |     )
141 |     signs = nx.adjacency_matrix(gs).todense()
142 |     theta = np.multiply(theta, signs) # update theta with the signs
143 |     smallest_eigval = np.min(np.linalg.eigvals(theta))
144 |     # Just in case : to avoid numerical error in case an 
145 |     # epsilon complex component present
146 |     smallest_eigval = smallest_eigval.real
147 |     # making the min eigenvalue as u
148 |     precision_mat = theta + np.eye(num_nodes)*(u - smallest_eigval)
149 |     # print(f'Smallest eval: {np.min(np.linalg.eigvals(precision_mat))}')
150 |     # getting the covariance matrix (avoid the use of pinv) 
151 |     cov = np.linalg.inv(precision_mat) 
152 |     # get the samples 
153 |     if seed: np.random.seed(seed)
154 |     # Sampling data from multivariate normal distribution
155 |     data = np.random.multivariate_normal(
156 |         mean=mean_normal,
157 |         cov=cov, 
158 |         size=num_samples
159 |         )
160 |     return data, precision_mat  # MxD, DxD
161 | 
162 | 
163 | def get_partial_correlations(precision):
164 |     """Get the partial correlation matrix from the 
165 |     precision matrix. It applies the following 
166 |     
167 |     Formula: rho_ij = -p_ij/sqrt(p_ii * p_jj)
168 |     
169 |     Args:
170 |         precision (2D np.array): The precision matrix
171 |     
172 |     Returns:
173 |         rho (2D np.array): The partial correlations
174 |     """
175 |     precision = np.array(precision)
176 |     D = precision.shape[0]
177 |     rho = np.zeros((D, D))
178 |     for i in range(D): # rows
179 |         for j in range(D): # columns
180 |             if i==j: # diagonal elements
181 |                 rho[i][j] = 1
182 |             elif j < i: # symmetric
183 |                 rho[i][j] = rho[j][i]
184 |             else: # i > j
185 |                 num = -1*precision[i][j]
186 |                 den = np.sqrt(precision[i][i]*precision[j][j])
187 |                 rho[i][j] = num/den
188 |     return rho
189 | 
190 | 
191 | # Plot the graph
192 | def graph_from_partial_correlations( 
193 |     rho, 
194 |     names, # node names
195 |     sparsity=1,
196 |     title='', 
197 |     fig_size=12, 
198 |     PLOT=True,
199 |     save_file=None,
200 |     roundOFF=5
201 |     ):
202 |     G = nx.Graph()
203 |     G.add_nodes_from(names)
204 |     D = rho.shape[-1]
205 | 
206 |     # determining the threshold to maintain the sparsity level of the graph
207 |     def upper_tri_indexing(A):
208 |         m = A.shape[0]
209 |         r,c = np.triu_indices(m,1)
210 |         return A[r,c]
211 | 
212 |     rho_upper = upper_tri_indexing(np.abs(rho))
213 |     num_non_zeros = int(sparsity*len(rho_upper))
214 |     rho_upper.sort()
215 |     th = rho_upper[-num_non_zeros]
216 |     print(f'Sparsity {sparsity} using threshold {th}')
217 |     th_pos, th_neg = th, -1*th
218 | 
219 |     graph_edge_list = []
220 |     for i in range(D):
221 |         for j in range(i+1, D):
222 |             if rho[i,j] > th_pos:
223 |                 G.add_edge(names[i], names[j], color='green', weight=round(rho[i,j], roundOFF), label='+')
224 |                 _edge = '('+names[i]+', '+names[j]+', '+str(round(rho[i,j], roundOFF))+', green)'
225 |                 graph_edge_list.append(_edge)
226 |             elif rho[i,j] < th_neg:
227 |                 G.add_edge(names[i], names[j], color='red', weight=round(rho[i,j], roundOFF), label='-')
228 |                 _edge = '('+names[i]+', '+names[j]+', '+str(round(rho[i,j], roundOFF))+', red)'
229 |                 graph_edge_list.append(_edge)
230 | 
231 |     # if PLOT: print(f'graph edges {graph_edge_list, len(graph_edge_list)}')
232 | 
233 |     edge_colors = [G.edges[e]['color'] for e in G.edges]
234 |     edge_width = np.array([abs(G.edges[e]['weight']) for e in G.edges])
235 |     # Scaling the intensity of the edge_weights for viewing purposes
236 |     if len(edge_width) > 0:
237 |         edge_width = edge_width/np.max(np.abs(edge_width))
238 |     image_bytes = None
239 |     if PLOT:
240 |         fig = plt.figure(1, figsize=(fig_size,fig_size))
241 |         plt.title(title)
242 |         n_edges = len(G.edges())
243 |         pos = nx.spring_layout(G, scale=0.2, k=1/np.sqrt(n_edges+10))
244 |         # pos = nx.nx_agraph.graphviz_layout(G, prog='fdp') #'fdp', 'sfdp', 'neato'
245 |         nx.draw_networkx_nodes(G, pos, node_color='grey', node_size=100)
246 |         nx.draw_networkx_edges(G, pos, edge_color=edge_colors, width=edge_width)
247 |         y_off = 0.008
248 |         nx.draw_networkx_labels(G, pos = {k:([v[0], v[1]+y_off]) for k,v in pos.items()})
249 |         plt.title(f'{title}', fontsize=20)
250 |         plt.margins(0.15)
251 |         plt.tight_layout()
252 |         # saving the file
253 |         if save_file:
254 |             plt.savefig(save_file, bbox_inches='tight')
255 |         # Saving the figure in-memory
256 |         buf = io.BytesIO()
257 |         plt.savefig(buf)
258 |         # getting the image in bytes
259 |         buf.seek(0)
260 |         image_bytes = buf.getvalue() # Image.open(buf, mode='r')
261 |         buf.close()
262 |         # closing the plt
263 |         plt.close(fig)
264 |     return G, image_bytes, graph_edge_list
265 | 
266 | 
267 | def viz_graph_from_precision(theta, column_names, sparsity=0.1, title=''):
268 |     rho = get_partial_correlations(theta)
269 |     Gr, _, _ = graph_from_partial_correlations(
270 |         rho, 
271 |         column_names,
272 |         sparsity=sparsity
273 |     )
274 |     print(f'Num nodes: {len(Gr.nodes)}')
275 |     Gv = dp.get_interactive_graph(Gr, title, node_PREFIX=None)
276 |     return Gr, Gv
277 | 
278 | 
279 | ######################################################################
280 | # Functions to analyse the marginal and conditional distributions
281 | ######################################################################
282 | 
283 | def get_distribution_function(target, source, model_GGM, Xi, count=100):
284 |     """Plot the function target=GGM(source) or Xp=f(Xi).
285 |     Vary the range of the source and collect the values of the 
286 |     target variable. We keep the rest of the targets & sources
287 |     constant given in Xi (input to the GGM). 
288 | 
289 |     Args:
290 |         target (str/int/float): The feature of interest 
291 |         source (str/int/float): The feature having a direct connection
292 |             with the target in the neural view of NGM.
293 |         model_GGM (list): [
294 |             mean (pd.Series) = {feature: mean value}
295 |             cov (2D np.array) = Covariance matrix between features
296 |             scaler (list of pd.Series): [data_min_, data_max_]
297 |         ]
298 |         Xi (pd.DataFrame): Initial values of the input to the model.
299 |             All the values except the source nodes remain constant
300 |             while varying the input over the range of source feature.
301 |         count (int): The number of points to evaluate f(x) in the range.
302 | 
303 |     Returns:
304 |         x_vals (np.array): range of source values
305 |         fx_vals (np.array): predicted f(source) values for the target
306 |     """
307 |     mean, cov, scaler = model_GGM
308 |     data_min_, data_max_ = scaler
309 |     column_names = Xi.columns
310 |     print(f'target={target}, source={source}')
311 |     # Get the min and max range of the source 
312 |     source_idx = Xi.columns.get_loc(source)
313 |     source_min = data_min_[source_idx]
314 |     source_max = data_max_[source_idx]
315 |     # Get the min and max range of the target 
316 |     target_idx = Xi.columns.get_loc(target)
317 |     target_min = data_min_[target_idx]
318 |     target_max = data_max_[target_idx]
319 |     # print(f'Source {source} at index {source_idx}: range ({source_min}, {source_max})')
320 |     # Get the range of the source and target values
321 |     x_vals = np.linspace(source_min, source_max, count)
322 |     y_vals = np.linspace(target_min, target_max, count)
323 |     # Collect the fx_vals
324 |     fx_vals = []
325 |     # For each x_val, find the expected value of y from the pdf
326 |     for _x in x_vals:  # expected_value calculation
327 |         # Set the source value
328 |         Xi[source] = _x
329 |         # Replicate the Xi entries to have count rows
330 |         Xi_batch = pd.DataFrame(np.repeat(Xi.values, count, axis=0), columns=column_names)
331 |         # Get the range of possible target values
332 |         Xi_batch[target] = y_vals
333 |         # Get the probabilitites using the probability density function
334 |         py = multivariate_normal.pdf(Xi_batch, mean=mean, cov=cov)
335 |         # Normalize the probabilities to make it proportional to conditional 
336 |         # distribution p(target, source| X{remaining}) = p(S, T, {Xr})/p({Xr})
337 |         py = py/np.sum(py)
338 |         _y = np.dot(py, y_vals)  # Direct expectation calculation 
339 |         # Choose the y based on sample count
340 |         # _y = np.random.choice(y_vals, count, p=py) 
341 |         fx_vals.append(_y)
342 |     return x_vals, fx_vals
343 |     
344 | 
345 | def analyse_feature(target_feature, model_GGM, G, Xi=[]):
346 |     """Analyse the feature of interest with regards to the
347 |     underlying multivariate Gaussian distribution defining 
348 |     the conditional independence graph G.
349 | 
350 |     Args:
351 |         target_feature (str/int/float): The feature of interest, should 
352 |             be present as one of the nodes in graph G
353 |         model_GGM (list): [
354 |             mean (pd.Series) = {feature: mean value}
355 |             cov (2D np.array) = Covariance matrix between features
356 |             scaler (list of pd.Series): [data_min_, data_max_]
357 |         ]
358 |         G (nx.Graph): Conditional independence graph.
359 |         Xi (pd.DataFrame): Initial input sample.
360 |     
361 |     Returns:
362 |         None (Plots the dependency functions)
363 |     """
364 |     mean, cov, scaler = model_GGM
365 |     model_features = mean.index
366 |     # Preliminary check for the presence of target feature
367 |     if target_feature not in model_features:
368 |         print(f'Error: Input feature {target_feature} not in model features')
369 |         sys.exit(0)
370 |     # Drop the nodes not in the model from the graph
371 |     common_features = set(G.nodes()).intersection(model_features)
372 |     features_dropped = G.nodes() - common_features
373 |     print(f'Features dropped from graph: {features_dropped}')
374 |     G = G.subgraph(list(common_features))
375 |     # 1. Get the neighbors (the dependent vars in CI graph) of the target  
376 |     # feature from Graph G.
377 |     target_nbrs = G[target_feature]
378 |     # 2. Set the initial values of the nodes. 
379 |     if len(Xi)==0: 
380 |         Xi = mean
381 |     Xi = dp.series2df(Xi)
382 |     # Arrange the columns based on the model_feature names for compatibility
383 |     Xi = Xi[model_features]
384 |     # 3. Getting the plots by varying each nbr node and getting the regression 
385 |     # values for the target node.
386 |     plot_dict = {target_feature:{}}
387 |     for nbr in target_nbrs.keys():
388 |         x, fx = get_distribution_function(
389 |             target_feature, 
390 |             nbr, 
391 |             model_GGM, 
392 |             Xi
393 |         )
394 |         title = f'GGM: {target_feature} (y-axis) vs {nbr} (x-axis)'
395 |         plot_dict[target_feature][nbr] = [x, fx, title]
396 |     dp.function_plots_for_target(plot_dict)
397 |     return None


--------------------------------------------------------------------------------
/ngm/utils/metrics.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | from sklearn import metrics
 3 | from pprint import pprint
 4 | 
 5 | def get_auc(y, scores):
 6 |     y = np.array(y).astype(int)
 7 |     fpr, tpr, thresholds = metrics.roc_curve(y, scores)
 8 |     roc_auc = metrics.auc(fpr, tpr)
 9 |     aupr = metrics.average_precision_score(y, scores)
10 |     return roc_auc, aupr
11 | 
12 | def reportMetrics(trueG, G, beta=1):
13 |     """Compute various metrics
14 |     Args:
15 |         trueG (2D numpy arr[floats]): ground truth precision matrix
16 |         G (2D numpy arr[floats]): predicted precsion mat
17 |         beta (int, optional): beta for the Fbeta score
18 |     Returns:
19 |         Dict: {fdr (float): (false positive) / prediction positive = FP/P
20 |                 tpr (float): (true positive) / condition positive = TP/T
21 |                 fpr (float): (false positive) / condition negative = FP/F
22 |                 shd (int): undirected extra + undirected missing = E+M
23 |                 nnz (int): number of non-zeros for trueG and predG
24 |                 ps (float): probability of success, sign match
25 |                 Fbeta (float): F-score with beta
26 |                 aupr (float): area under the precision-recall curve
27 |                 auc (float): area under the ROC curve}
28 |     """
29 |     trueG = trueG.real
30 |     G =G.real
31 |     # trueG and G are numpy arrays
32 |     # convert all non-zeros in G to 1
33 |     d = G.shape[-1]
34 | 
35 |     # changing to 1/0 for TP and FP calculations
36 |     G_binary = np.where(G!=0, 1, 0)
37 |     trueG_binary = np.where(trueG!=0, 1, 0)
38 |     # extract the upper diagonal matrix
39 |     indices_triu = np.triu_indices(d, 1)
40 |     trueEdges = trueG_binary[indices_triu] #np.triu(G_true_binary, 1)
41 |     predEdges = G_binary[indices_triu] #np.triu(G_binary, 1)
42 |     # Getting AUROC value
43 |     predEdges_auc = G[indices_triu] #np.triu(G_true_binary, 1)
44 |     auc, aupr = get_auc(trueEdges, np.absolute(predEdges_auc))
45 |     # Now, we have the edge array for comparison
46 |     # true pos = pred is 1 and true is 1
47 |     TP = np.sum(trueEdges * predEdges) # true_pos
48 |     # False pos = pred is 1 and true is 0
49 |     mismatches = np.logical_xor(trueEdges, predEdges)
50 |     FP = np.sum(mismatches * predEdges)
51 |     # Find all mismatches with Xor and then just select the ones with pred as 1 
52 |     # P = Number of pred edges : nnzPred 
53 |     P = np.sum(predEdges)
54 |     nnzPred = P
55 |     # T = Number of True edges :  nnzTrue
56 |     T = np.sum(trueEdges)
57 |     nnzTrue = T
58 |     # F = Number of non-edges in true graph
59 |     F = len(trueEdges) - T
60 |     # SHD = total number of mismatches
61 |     SHD = np.sum(mismatches)
62 |     # FDR = False discovery rate
63 |     FDR = FP/P
64 |     # TPR = True positive rate
65 |     TPR = TP/T
66 |     # FPR = False positive rate
67 |     FPR = FP/F
68 |     # False negative = pred is 0 and true is 1
69 |     FN = np.sum(mismatches * trueEdges)
70 |     # F beta score
71 |     num = (1+beta**2)*TP
72 |     den = ((1+beta**2)*TP + beta**2 * FN + FP)
73 |     Fbeta = num/den
74 |     # precision 
75 |     precision = TP/(TP+FP)
76 |     # recall 
77 |     recall = TP/(TP+FN)
78 |     return {'FDR': FDR, 'TPR': TPR, 'FPR': FPR, 'SHD': SHD, 'nnzTrue': nnzTrue, 
79 |             'nnzPred': nnzPred, 'precision': precision, 'recall': recall, 
80 |             'Fbeta': Fbeta, 'aupr': aupr, 'auc': auc}
81 | 
82 | def summarize_compare_theta(compare_dict_list, method_name='Method Name'):
83 |     avg_results = {}
84 |     for key in compare_dict_list[0].keys():
85 |         avg_results[key] = []
86 |     
87 |     total_runs = len(compare_dict_list)
88 |     for cd in compare_dict_list:
89 |         for key in cd.keys():
90 |             avg_results[key].append(cd[key])
91 |     # getting the mean and std dev
92 |     for key in avg_results.keys():
93 |         avk = avg_results[key]
94 |         avg_results[key] = (np.mean(avk), np.std(avk))
95 |     print(f'Avg results for {method_name}\n')
96 |     pprint(avg_results)
97 |     print(f'\nTotal runs {total_runs}\n\n')
98 |     return avg_results


--------------------------------------------------------------------------------
/ngm/utils/neural_view.py:
--------------------------------------------------------------------------------
 1 | """
 2 | Utils for neural graphical models
 3 | """
 4 | 
 5 | import torch 
 6 | import torch.nn as nn
 7 | 
 8 | class DNN(torch.nn.Module):
 9 |     """The DNN architecture to map the input to input.
10 |     """
11 |     def __init__(self, I, H, O, USE_CUDA=False):
12 |         """Initializing the MLP for the regression 
13 |         network.
14 | 
15 |         Args:
16 |             I (int): The input dimension
17 |             H (int): The hidden layer dimension
18 |             O (int): The output layer dimension
19 |             USE_CUDA (bool): Flag to enable GPU
20 |         """
21 |         super(DNN, self).__init__() # init the nn.module
22 |         self.dtype = torch.cuda.FloatTensor if USE_CUDA else torch.FloatTensor
23 |         self.I, self.H, self.O = I, H, O
24 |         self.MLP = self.getMLP()
25 | 
26 |     def getMLP(self):
27 |         l1 = nn.Linear(self.I, self.H).type(self.dtype)
28 |         l2 = nn.Linear(self.H, self.H).type(self.dtype)
29 |         # l3 = nn.Linear(self.H, self.H).type(self.dtype)
30 |         # l4 = nn.Linear(self.H, self.H).type(self.dtype)
31 |         l5 = nn.Linear(self.H, self.O).type(self.dtype)
32 |         return nn.Sequential(
33 |             l1, nn.ReLU(), #nn.Tanh(), #,
34 |             l2, nn.ReLU(), #nn.Tanh(), #nn.ReLU(), #nn.Tanh(),
35 |             # l3, nn.ReLU(),
36 |             # l4, nn.ReLU(),
37 |             l5#, nn.ReLU()#, nn.Sigmoid()
38 |             ).type(self.dtype)
39 | 
40 | 
41 | def get_optimizers(model, lr=0.002, use_optimizer='adam'):
42 |     if use_optimizer == 'adam':
43 |         optimizer = torch.optim.Adam(
44 |             model.parameters(),
45 |             lr=lr, 
46 |             betas=(0.9, 0.999),
47 |             eps=1e-08,
48 |             # weight_decay=0
49 |         )
50 |     else:
51 |         print('Optimizer not found!')
52 |     return optimizer
53 | 


--------------------------------------------------------------------------------
/ngm/utils/uGLAD/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Harshs27/neural-graphical-models/227f6e27ca6a02200ec895235b2251e9a4191773/ngm/utils/uGLAD/__init__.py


--------------------------------------------------------------------------------
/ngm/utils/uGLAD/glad/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Harshs27/neural-graphical-models/227f6e27ca6a02200ec895235b2251e9a4191773/ngm/utils/uGLAD/glad/__init__.py


--------------------------------------------------------------------------------
/ngm/utils/uGLAD/glad/glad.py:
--------------------------------------------------------------------------------
  1 | import torch
  2 | from ngm.utils.uGLAD.glad.torch_sqrtm import MatrixSquareRoot
  3 | 
  4 | torch_sqrtm = MatrixSquareRoot.apply
  5 | 
  6 | def get_optimizers(model_glad, lr_glad=0.002, use_optimizer='adam'):
  7 |     if use_optimizer == 'adam':
  8 |         optimizer_glad = torch.optim.Adam(
  9 |             model_glad.parameters(),
 10 |             lr=lr_glad, 
 11 |             betas=(0.9, 0.999),
 12 |             eps=1e-08,
 13 |             # weight_decay=0
 14 |         )
 15 |     else:
 16 |         print('Optimizer not found!')
 17 |     return optimizer_glad
 18 | 
 19 | 
 20 | def batch_matrix_sqrt(A):
 21 |     # A should be PSD
 22 |     # if shape of A is 2D, i.e. a single matrix
 23 |     if len(A.shape)==2:
 24 |         return torch_sqrtm(A)
 25 |     else:
 26 |         n = A.shape[0]
 27 |         sqrtm_torch = torch.zeros(A.shape).type_as(A)
 28 |         for i in range(n):
 29 |             sqrtm_torch[i] = torch_sqrtm(A[i])
 30 |         return sqrtm_torch
 31 | 
 32 | 
 33 | def get_frobenius_norm(A, single=False):
 34 |     if single:
 35 |         return torch.sum(A**2)
 36 |     return torch.mean(torch.sum(A**2, (1,2)))
 37 | 
 38 | 
 39 | def glad(Sb, model, lambda_init=1, L=15, INIT_DIAG=0, USE_CUDA = False):
 40 |     """Unrolling the Alternating Minimization algorithm which takes in the 
 41 |     sample covariance (batch mode), runs the iterations of the AM updates and 
 42 |     returns the precision matrix. The hyperparameters are modeled as small 
 43 |     neural networks which are to be learned from the backprop signal of the 
 44 |     loss function. 
 45 | 
 46 |     Args:
 47 |         Sb (3D torch tensor (float)): Covariance (batch x dim x dim)
 48 |         model (class object): The GLAD neural network parameters 
 49 |                               (theta_init, rho, lambda)
 50 |         lambda_init (float): The initial lambda value
 51 |         L (int): The number of unrolled iterations
 52 |         INIT_DIAG (int): if 0 - Initial theta as (S + theta_init_offset * I)^-1
 53 |                          if 1 - Initial theta as (diag(S)+theta_init_offset*I)^-1
 54 |         USE_CUDA (bool): `True` if GPUs present else `False`
 55 |     
 56 |     Returns:
 57 |         theta_pred (3D torch tensor (float)): The output precision matrix
 58 |                                               (batch x dim x dim)
 59 |         loss (torch scalar (float)): The graphical lasso objective function
 60 |     """
 61 |     D = Sb.shape[-1]  # dimension of matrix
 62 |     # if batch is 1, then reshaping Sb
 63 |     if len(Sb.shape)==2:
 64 |         Sb = Sb.reshape(1, Sb.shape[0], Sb.shape[1])
 65 |     # Initializing the theta
 66 |     if INIT_DIAG == 1:
 67 |         #print('extract batchwise diagonals, add offset and take inverse')
 68 |         batch_diags = 1/(torch.diagonal(Sb, offset=0, dim1=-2, dim2=-1) 
 69 |                         + model.theta_init_offset)
 70 |         theta_init = torch.diag_embed(batch_diags)
 71 |     else:
 72 |         #print('(S+theta_offset*I)^-1 is used')
 73 |         theta_init = torch.inverse(Sb+model.theta_init_offset * 
 74 |                                     torch.eye(D).expand_as(Sb).type_as(Sb))
 75 | 
 76 |     theta_pred = theta_init#[ridx]
 77 |     identity_mat = torch.eye(Sb.shape[-1]).expand_as(Sb)
 78 |     # diagonal mask
 79 | #    mask = torch.eye(Sb.shape[-1], Sb.shape[-1]).byte()
 80 | #    dim = Sb.shape[-1]
 81 | #    mask1 = torch.ones(dim, dim) - torch.eye(dim, dim)
 82 |     if USE_CUDA == True:
 83 |         identity_mat = identity_mat.cuda()
 84 | #        mask = mask.cuda()
 85 | #        mask1 = mask1.cuda()
 86 | 
 87 |     zero = torch.Tensor([0])
 88 |     dtype = torch.FloatTensor
 89 |     if USE_CUDA == True:
 90 |         zero = zero.cuda()
 91 |         dtype = torch.cuda.FloatTensor
 92 | 
 93 |     lambda_k = model.lambda_forward(zero + lambda_init, zero,  k=0)
 94 |     for k in range(L):
 95 |         # GLAD CELL
 96 |         b = 1.0/lambda_k * Sb - theta_pred
 97 |         b2_4ac = torch.matmul(b.transpose(-1, -2), b) + 4.0/lambda_k * identity_mat
 98 |         sqrt_term = batch_matrix_sqrt(b2_4ac)
 99 |         theta_k1 = 1.0/2*(-1*b+sqrt_term)
100 | 
101 |         theta_pred = model.eta_forward(theta_k1, Sb, k, theta_pred)
102 |         # update the lambda
103 |         lambda_k = model.lambda_forward(torch.Tensor(
104 |                                 [get_frobenius_norm(theta_pred-theta_k1)]
105 |                                 ).type(dtype), lambda_k, k)
106 |     return theta_pred


--------------------------------------------------------------------------------
/ngm/utils/uGLAD/glad/glad_params.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | import torch.nn as nn
 3 | 
 4 | class glad_params(torch.nn.Module): 
 5 |     """The AM hyperparameters are parameterized in the glad_params.
 6 |     rho, lambda and theta_init_offset are learnable. 
 7 |     """
 8 |     def __init__(self, theta_init_offset, nF, H, USE_CUDA=False):
 9 |         """Initializing the GLAD model
10 | 
11 |         Args:
12 |             theta_init_offset (float): The initial eigenvalue offset, set to a high value > 0.1
13 |             nF (int): The number of input features for the entrywise thresholding
14 |             H (int): The hidden layer size to be used for the NNs
15 |             USE_CUDA (bool): Use GPU if True else CPU
16 |         """
17 |         super(glad_params, self).__init__() 
18 |         self.dtype = torch.cuda.FloatTensor if USE_CUDA else torch.FloatTensor
19 |         self.theta_init_offset = nn.Parameter(
20 |             torch.Tensor(
21 |                 [theta_init_offset]
22 |                 ).type(self.dtype)
23 |             )
24 |         self.nF = nF # number of input features 
25 |         self.H = H # hidden layer size
26 |         self.rho_l1 = self.rhoNN()
27 |         self.lambda_f = self.lambdaNN()
28 |         self.zero = torch.Tensor([0]).type(self.dtype)
29 | 
30 |     def rhoNN(self):# per iteration NN
31 |         l1 = nn.Linear(self.nF, self.H).type(self.dtype)
32 |         lH1 = nn.Linear(self.H, self.H).type(self.dtype)
33 |         l2 = nn.Linear(self.H, 1).type(self.dtype)
34 |         return nn.Sequential(l1, nn.Tanh(),
35 |                              lH1, nn.Tanh(),
36 |                              l2, nn.Sigmoid()).type(self.dtype)
37 | 
38 |     def lambdaNN(self):
39 |         l1 = nn.Linear(2, self.H).type(self.dtype)
40 |         l2 = nn.Linear(self.H, 1).type(self.dtype)
41 |         return nn.Sequential(l1, nn.Tanh(),
42 |                              l2, nn.Sigmoid()).type(self.dtype)
43 | 
44 |     def eta_forward(self, X, S, k, F3=[]):
45 |         batch_size, shape1, shape2 = X.shape
46 |         Xr = X.reshape(batch_size, -1, 1)
47 |         Sr = S.reshape(batch_size, -1, 1)
48 |         feature_vector = torch.cat((Xr, Sr), -1)
49 |         if len(F3)>0:
50 |             F3r = F3.reshape(batch_size, -1, 1)
51 |             feature_vector = torch.cat((feature_vector, F3r), -1)
52 |         # elementwise thresholding
53 |         rho_val = self.rho_l1(feature_vector).reshape(X.shape) 
54 |         return torch.sign(X)*torch.max(self.zero, torch.abs(X)-rho_val)
55 | 
56 |     def lambda_forward(self, normF, prev_lambda, k=0):
57 |         feature_vector = torch.Tensor([normF, prev_lambda]).type(self.dtype)
58 |         return self.lambda_f(feature_vector)
59 |         
60 | 
61 | 


--------------------------------------------------------------------------------
/ngm/utils/uGLAD/glad/torch_sqrtm.py:
--------------------------------------------------------------------------------
  1 | import torch
  2 | from torch.autograd import Variable
  3 | from torch.autograd import Function
  4 | import numpy as np
  5 | import scipy.linalg
  6 | 
  7 | class MatrixSquareRoot(Function):
  8 |     """Square root of a positive definite matrix.
  9 |     NOTE: matrix square root is not differentiable for matrices with
 10 |           zero eigenvalues.
 11 |     """
 12 |     @staticmethod
 13 |     def forward(ctx, input):
 14 |         itr_TH = 10 # number of iterations threshold 
 15 |         dim = input.shape[0]
 16 |         norm = torch.norm(input)#.double())
 17 |         #Y = input.double()/norm
 18 |         Y = input/norm
 19 |         I = torch.eye(dim,dim,device=input.device)#.double()
 20 |         Z = torch.eye(dim,dim,device=input.device)#.double()
 21 |         #print('Check: ', Y.type(), I.type(), Z.type())
 22 |         for i in range(itr_TH):
 23 |             T = 0.5*(3.0*I - Z.mm(Y))
 24 |             Y = Y.mm(T)
 25 |             Z = T.mm(Z)
 26 |         sqrtm = Y*torch.sqrt(norm)
 27 |         # ctx.mark_dirty(Y,I,Z)
 28 |         ctx.save_for_backward(sqrtm)
 29 |         return sqrtm
 30 | 
 31 |     @staticmethod
 32 |     def backward(ctx, grad_output):
 33 |         itr_TH = 10 # number of iterations threshold 
 34 |         grad_input = None
 35 |         sqrtm, = ctx.saved_tensors
 36 |         dim = sqrtm.shape[0]
 37 |         norm = torch.norm(sqrtm)
 38 |         A = sqrtm/norm
 39 |         I = torch.eye(dim, dim, device=sqrtm.device)#.double()
 40 |         #Q = grad_output.double()/norm
 41 |         Q = grad_output/norm
 42 |         for i in range(itr_TH):
 43 |             Q = 0.5*(Q.mm(3.0*I-A.mm(A))-A.t().mm(A.t().mm(Q)-Q.mm(A)))
 44 |             A = 0.5*A.mm(3.0*I-A.mm(A))
 45 |         grad_input = 0.5*Q
 46 |         return grad_input
 47 | sqrtm = MatrixSquareRoot.apply
 48 | 
 49 | 
 50 | def original_main():
 51 |     from torch.autograd import gradcheck
 52 |     k = torch.randn(20, 10).double()
 53 |     # Create a positive definite matrix
 54 |     pd_mat = k.t().matmul(k)
 55 |     pd_mat = Variable(pd_mat, requires_grad=True)
 56 |     test = gradcheck(MatrixSquareRoot.apply, (pd_mat,))
 57 |     print(test)
 58 | 
 59 | def single_main():
 60 |     from torch.autograd import gradcheck
 61 |     n = 1
 62 |     A = torch.randn( 20, 10).double()
 63 |     # Create a positive definite matrix
 64 |     pd_mat = A.t().matmul(A)
 65 |     pd_mat = Variable(pd_mat, requires_grad=True)
 66 |     test = gradcheck(MatrixSquareRoot.apply, (pd_mat,))
 67 |     print(test)
 68 | 
 69 |     #sqrtm_scipy = np.zeros_like(A)
 70 |     print('err: ', pd_mat)
 71 |     sqrtm_scipy = scipy.linalg.sqrtm(pd_mat.detach().numpy().astype(np.float_))
 72 | #    for i in range(n):
 73 | #        sqrtm_scipy[i] = sqrtm(pd_mat[i].detach().numpy())
 74 |     sqrtm_torch = sqrtm(pd_mat)
 75 |     print('sqrtm torch: ', sqrtm_torch)
 76 |     print('scipy', sqrtm_scipy)
 77 |     print('Difference: ', np.linalg.norm(sqrtm_scipy - sqrtm_torch.detach().numpy()))
 78 | 
 79 | def main():# batch
 80 |     from torch.autograd import gradcheck
 81 |     n = 2
 82 |     A = torch.randn(n, 4, 5).double()
 83 |     A.requires_grad = True
 84 |     # Create a positive definite matrix
 85 |     #pd_mat = A.t().matmul(A)
 86 |     pd_mat = torch.matmul(A.transpose(-1, -2), A)
 87 |     pd_mat = Variable(pd_mat, requires_grad=True)
 88 |     pd_mat.type = torch.FloatTensor
 89 |     print('err: ', pd_mat.shape, pd_mat.type)
 90 |     #test = gradcheck(MatrixSquareRoot.apply, (pd_mat,))
 91 |     #print(test)
 92 | 
 93 |     sqrtm_scipy = np.zeros_like(pd_mat.detach().numpy())
 94 |     #sqrtm_scipy = scipy.linalg.sqrtm(pd_mat.detach().numpy().astype(np.float_))
 95 |     for i in range(n):
 96 |         sqrtm_scipy[i] = scipy.linalg.sqrtm(pd_mat[i].detach().numpy().astype(np.float))
 97 |     # batch implementation
 98 |     sqrtm_torch = torch.zeros(pd_mat.shape)
 99 |     for i in range(n):
100 |         print('custom implementation', pd_mat[i].type())
101 |         sqrtm_torch[i] = sqrtm(pd_mat[i].type(torch.FloatTensor))
102 |     #sqrtm_torch = sqrtm(pd_mat)
103 |     print('sqrtm torch: ', sqrtm_torch)
104 |     print('scipy', sqrtm_scipy)
105 |     print('Difference: ', np.linalg.norm(sqrtm_scipy - sqrtm_torch.detach().numpy()))
106 | 
107 | if __name__ == '__main__':
108 |     main()
109 | 
110 | 


--------------------------------------------------------------------------------
/ngm/utils/uGLAD/main.py:
--------------------------------------------------------------------------------
  1 | """
  2 | The main file to train/test the uGLAD algorithm.
  3 | Contains code to generate data, run training and the 
  4 | loss function.
  5 | """
  6 | import copy
  7 | from tabnanny import check
  8 | import numpy as np
  9 | import pandas as pd
 10 | from sklearn import covariance
 11 | from sklearn.model_selection import KFold
 12 | import sys
 13 | from time import time
 14 | import torch
 15 | 
 16 | # Helper functions for uGLAD
 17 | from ngm.utils.uGLAD.glad.glad_params import glad_params
 18 | from ngm.utils.uGLAD.glad import glad
 19 | from ngm.utils.uGLAD.utils.metrics import reportMetrics
 20 | 
 21 | import ngm.utils.uGLAD.utils.prepare_data as prepare_data
 22 | 
 23 | ############### Wrapper class to match sklearn package #############
 24 | class uGLAD_GL(object):
 25 |     def __init__(self):
 26 |         """Wrapper class to match the sklearn GraphicalLassoCV
 27 |         output signature. Initializing the uGLAD model.
 28 |         """
 29 |         super(uGLAD_GL, self).__init__()
 30 |         self.covariance_ = None
 31 |         self.precision_ = None
 32 |         self.model_glad = None
 33 |         
 34 |     def fit(
 35 |         self, 
 36 |         X, 
 37 |         true_theta=None, 
 38 |         eval_offset=0.1,
 39 |         centered=False, 
 40 |         epochs=250, 
 41 |         lr=0.002,
 42 |         INIT_DIAG=0,
 43 |         L=15,
 44 |         verbose=True, 
 45 |         k_fold=3, 
 46 |         mode='direct', 
 47 |         cov=False
 48 |         ):
 49 |         """Takes in the samples X and returns
 50 |         a uGLAD model which stores the corresponding
 51 |         covariance and precision matrices.
 52 |         
 53 |         Args:
 54 |             X (2D np array): num_samples x dimension
 55 |             true_theta (2D np array): dim x dim of the 
 56 |                 true precision matrix
 57 |             eval_offset (float): eigenval adjustment in
 58 |                 case the cov is ill-conditioned
 59 |             centered (bool): Whether samples are mean 
 60 |                 adjusted or not. True/False
 61 |             epochs (int): Training epochs
 62 |             lr (float): Learning rate of glad for the adam optimizer
 63 |             INIT_DIAG (int): 0/1 for initilization strategy of GLAD
 64 |             L (int): Num of unrolled iterations of GLAD
 65 |             verbose (bool): Print training output
 66 |             k_fold (int): num batches in missing mode
 67 |                 num splits for k-fold in CV mode, 
 68 |                 k=0 will run the direct mode
 69 |             mode (str): direct/cv/missing
 70 |             cov (bool): If True, X = covariance matrix (DxD)
 71 | 
 72 |         Returns:
 73 |             compare_theta (dict): Dictionary of comparison metrics
 74 |                 between the predicted and true precision matrix
 75 |         """
 76 |         print(f'Running uGLAD')
 77 |         start = time()
 78 |         if not cov:
 79 |             print(f'Processing the input table for basic compatibility check')
 80 |             X = prepare_data.process_table(
 81 |                 pd.DataFrame(X), 
 82 |                 NORM='min_max', 
 83 |                 VERBOSE=verbose
 84 |             )
 85 |         X = np.array(X)
 86 |         # Running the uGLAD model
 87 |         M, D = X.shape
 88 |         # Reshaping due to GLAD algorithm requirements
 89 |         Xb = X.reshape(1, M, D)
 90 |         true_theta_b = None
 91 |         if true_theta is not None:
 92 |             true_theta_b = true_theta.reshape(1, D, D)
 93 |         if mode=='missing':
 94 |             print(f'Handling missing data')
 95 |             pred_theta, compare_theta, model_glad = run_uGLAD_missing(
 96 |                 Xb,
 97 |                 trueTheta=true_theta_b,
 98 |                 eval_offset=eval_offset,
 99 |                 EPOCHS=epochs, 
100 |                 lr=lr,
101 |                 INIT_DIAG=INIT_DIAG,
102 |                 L=L,
103 |                 VERBOSE=verbose, 
104 |                 K_batch=k_fold 
105 |                 )
106 |         elif mode=='cv' and k_fold>=0:
107 |             print(f'CV mode: {k_fold}-fold')
108 |             pred_theta, compare_theta, model_glad = run_uGLAD_CV(
109 |                 Xb,
110 |                 trueTheta=true_theta_b,
111 |                 eval_offset=eval_offset,
112 |                 EPOCHS=epochs, 
113 |                 lr=lr,
114 |                 INIT_DIAG=INIT_DIAG,
115 |                 L=L,
116 |                 VERBOSE=verbose, 
117 |                 k_fold=k_fold
118 |                 )
119 |         elif mode=='direct':
120 |             print(f'Direct Mode')
121 |             pred_theta, compare_theta, model_glad = run_uGLAD_direct(
122 |                 Xb,
123 |                 trueTheta=true_theta_b,
124 |                 eval_offset=eval_offset,
125 |                 EPOCHS=epochs, 
126 |                 lr=lr,
127 |                 INIT_DIAG=INIT_DIAG,
128 |                 L=L,
129 |                 VERBOSE=verbose, 
130 |                 cov=cov
131 |                 )
132 |         else:
133 |             print(f'ERROR Please enter K-fold value in valid range [0, ), currently entered {k_fold}; Check mode {mode}')
134 |             print(f'Note that cov={cov} only valid for mode=direct')
135 |             sys.exit(0)
136 |         # np.dot((X-mu)T, (X-mu)) / X.shape[0]
137 |         self.covariance_ = covariance.empirical_covariance(
138 |             X,
139 |             assume_centered=centered
140 |             )
141 |         self.precision_ = pred_theta[0].detach().numpy()
142 |         self.model_glad = model_glad
143 |         print(f'Total runtime: {time()-start} secs\n')
144 |         return compare_theta
145 | 
146 | 
147 | class uGLAD_multitask(object):
148 |     def __init__(self):
149 |         """Initializing the uGLAD model in multi-task
150 |         mode. It saves the covariance and predicted
151 |         precision matrices for the input batch of data
152 |         """
153 |         super(uGLAD_multitask, self).__init__()
154 |         self.covariance_ = []
155 |         self.precision_ = []
156 |         self.model_glad = None
157 |         
158 |     def fit(
159 |         self, 
160 |         Xb, 
161 |         true_theta_b=None, 
162 |         eval_offset=0.1,
163 |         centered=False, 
164 |         epochs=250, 
165 |         lr=0.002,
166 |         INIT_DIAG=0,
167 |         L=15,
168 |         verbose=True, 
169 |         ):
170 |         """Takes in the samples X and returns
171 |         a uGLAD model which stores the corresponding
172 |         covariance and precision matrices.
173 |         
174 |         Args:
175 |             Xb (list of 2D np.array): batch * [num_samples' x dimension]
176 |                 NOTE: num_samples can be different for different data
177 |             true_theta (3D np.array): batch x dim x dim of the 
178 |                 true precision matrix
179 |             eval_offset (float): eigenval adjustment in
180 |                 case the cov is ill-conditioned
181 |             centered (bool): Whether samples are mean 
182 |                 adjusted or not. True/False
183 |             epochs (int): Training epochs
184 |             lr (float): Learning rate of glad for the adam optimizer
185 |             INIT_DIAG (int): 0/1 for initilization strategy of GLAD
186 |             L (int): Num of unrolled iterations of GLAD
187 |             verbose (bool): Print training output
188 | 
189 |         Returns:
190 |             compare_theta (list[dict]): Dictionary of comparison metrics
191 |                 between the predicted and true precision matrices
192 |         """
193 |         print(f'Running uGLAD in multi-task mode')
194 |         start = time()
195 |         print(f'Processing the input table for basic compatibility check')
196 |         processed_Xb = []
197 |         for X in Xb:
198 |             X = prepare_data.process_table(
199 |                 pd.DataFrame(X), 
200 |                 NORM='min_max', 
201 |                 VERBOSE=verbose
202 |             )
203 |             processed_Xb.append(np.array(X))
204 |         Xb = processed_Xb
205 |         # Running the uGLAD model
206 |         pred_theta, compare_theta, model_glad = run_uGLAD_multitask(
207 |             Xb,
208 |             trueTheta=true_theta_b,
209 |             eval_offset=eval_offset,
210 |             EPOCHS=epochs, 
211 |             lr=lr,
212 |             INIT_DIAG=INIT_DIAG,
213 |             L=L,
214 |             VERBOSE=verbose, 
215 |             )
216 | 
217 |         # np.dot((X-mu)T, (X-mu)) / X.shape[0]
218 |         self.covariance_ = []
219 |         for b in range(len(Xb)):
220 |             self.covariance_.append(
221 |                 covariance.empirical_covariance(
222 |                 Xb[b],
223 |                 assume_centered=centered
224 |                 )
225 |             )
226 |         self.covariance_ = np.array(self.covariance_)
227 |         self.precision_ = pred_theta.detach().numpy()
228 |         self.model_glad = model_glad
229 |         print(f'Total runtime: {time()-start} secs\n')
230 |         return compare_theta
231 | #####################################################################
232 | 
233 | 
234 | #################### Functions to prepare model ######################
235 | def init_uGLAD(lr, theta_init_offset=1.0, nF=3, H=3):
236 |     """Initialize the GLAD model parameters and the optimizer
237 |     to be used.
238 | 
239 |     Args:
240 |         lr (float): Learning rate of glad for the adam optimizer
241 |         theta_init_offset (float): Initialization diagonal offset 
242 |             for the pred theta (adjust eigenvalue)
243 |         nF (int): #input features for the entrywise thresholding
244 |         H (int): The hidden layer size to be used for the NNs
245 |     
246 |     Returns:
247 |         model: class object
248 |         optimizer: class object
249 |     """
250 |     model = glad_params(
251 |         theta_init_offset=theta_init_offset,
252 |         nF=nF, 
253 |         H=H
254 |         )
255 |     optimizer = glad.get_optimizers(model, lr_glad=lr)
256 |     return model, optimizer
257 | 
258 | 
259 | def forward_uGLAD(Sb, model_glad, L=15, INIT_DIAG=0, loss_Sb=None):
260 |     """Run the input through the unsupervised GLAD algorithm.
261 |     It executes the following steps in batch mode
262 |     1. Run the GLAD model to get predicted precision matrix
263 |     2. Calculate the glasso-loss
264 |     
265 |     Args:
266 |         Sb (torch.Tensor BxDxD): The input covariance matrix 
267 |         model_glad (dict): Contains the learnable params
268 |         L (int): Num of unrolled iterations of GLAD
269 |         INIT_DIAG (int): 0/1 for initilization strategy of GLAD
270 |         loss_Sb (torch.Tensor BxDxD): The input covariance matrix 
271 |             against which loss is calculated. If None, then use
272 |             the input covariance matrix Sb
273 |     
274 |     Returns:
275 |         predTheta (torch.Tensor BxDxD): The predicted theta
276 |         loss (torch.scalar): The glasso loss 
277 |     """
278 |     # 1. Running the GLAD model 
279 |     predTheta = glad.glad(Sb, model_glad, L=L, INIT_DIAG=INIT_DIAG)
280 |     # 2. Calculate the glasso-loss
281 |     if loss_Sb is None:
282 |         loss = loss_uGLAD(predTheta, Sb)
283 |     else:
284 |         loss = loss_uGLAD(predTheta, loss_Sb)
285 |     return predTheta, loss
286 | 
287 | 
288 | def loss_uGLAD(theta, S):
289 |     """The objective function of the graphical lasso which is 
290 |     the loss function for the unsupervised learning of glad
291 |     loss-glasso = 1/M(-log|theta| + <S, theta>)
292 | 
293 |     NOTE: We fix the batch size B=1 for `uGLAD`
294 | 
295 |     Args:
296 |         theta (tensor 3D): precision matrix BxDxD
297 |         S (tensor 3D): covariance matrix BxDxD (dim=D)
298 |     
299 |     Returns:
300 |         loss (tensor 1D): the loss value of the obj function
301 |     """
302 |     B, D, _ = S.shape
303 |     t1 = -1*torch.logdet(theta)
304 |     # Batch Matrix multiplication: torch.bmm
305 |     t21 = torch.einsum("bij, bjk -> bik", S, theta)
306 |     # getting the trace (batch mode)
307 |     t2 = torch.einsum('jii->j', t21)
308 |     # print(t1, torch.det(theta), t2) 
309 |     # regularization term 
310 |     # tr = 1e-02 * torch.sum(torch.abs(theta))
311 |     glasso_loss = torch.sum(t1+t2)/B # sum over the batch
312 |     return glasso_loss 
313 | 
314 | 
315 | def run_uGLAD_direct(
316 |     Xb, 
317 |     trueTheta=None, 
318 |     eval_offset=0.1, 
319 |     EPOCHS=250,
320 |     lr=0.002,
321 |     INIT_DIAG=0,
322 |     L=15,
323 |     VERBOSE=True,
324 |     cov=False
325 |     ):
326 |     """Running the uGLAD algorithm in direct mode
327 |     
328 |     Args:
329 |         Xb (np.array 1xMxD): The input sample matrix
330 |         trueTheta (np.array 1xDxD): The corresponding 
331 |             true graphs for reporting metrics or None
332 |         eval_offset (float): eigenvalue offset for 
333 |             covariance matrix adjustment
334 |         lr (float): Learning rate of glad for the adam optimizer
335 |         INIT_DIAG (int): 0/1 for initilization strategy of GLAD
336 |         L (int): Num of unrolled iterations of GLAD
337 |         EPOCHS (int): The number of training epochs
338 |         VERBOSE (bool): if True, prints to sys.out
339 |         cov (bool): if True, Xb= cov matrix (1xDxD)
340 | 
341 |     Returns:
342 |         predTheta (torch.Tensor 1xDxD): Predicted graphs
343 |         compare_theta (dict): returns comparison metrics if 
344 |             true precision matrix is provided
345 |         model_glad (class object): Returns the learned glad model
346 |     """
347 |     # Calculating the batch covariance
348 |     if cov:
349 |         Sb = []
350 |         for X in Xb:
351 |             Sb.append(prepare_data.adjustCov(
352 |                 X,
353 |                 offset=eval_offset,
354 |                 max_con=np.inf # Usually no need to adjust the 
355 |                 # covariance matrix if calculated entrywise
356 |             ))
357 |         Sb = np.array(Sb) 
358 |     else:
359 |         Sb = prepare_data.getCovariance(Xb, offset=eval_offset) # BxDxD
360 |     # Converting the data to torch 
361 |     Xb = prepare_data.convertToTorch(Xb, req_grad=False)
362 |     Sb = prepare_data.convertToTorch(Sb, req_grad=False)
363 |     if trueTheta is not None:
364 |         trueTheta = prepare_data.convertToTorch(
365 |             trueTheta,
366 |             req_grad=False
367 |             )
368 |     B, _, _ = Xb.shape
369 |     # NOTE: We fix the batch size B=1 for `uGLAD`
370 |     # model and optimizer for uGLAD
371 |     model_glad, optimizer_glad = init_uGLAD(
372 |         lr=lr,
373 |         theta_init_offset=1.0,
374 |         nF=3,
375 |         H=3
376 |         )
377 |     PRINT_EVERY = int(EPOCHS/10)
378 |     # print max 10 times per training
379 |     # Optimizing for the glasso loss
380 |     for e in range(EPOCHS):      
381 |         # reset the grads to zero
382 |         optimizer_glad.zero_grad()
383 |         # calculate the loss
384 |         predTheta, loss = forward_uGLAD(
385 |             Sb,
386 |             model_glad,
387 |             L=L,
388 |             INIT_DIAG=INIT_DIAG
389 |             )
390 |         # calculate the backward gradients
391 |         loss.backward()
392 |         if not e%PRINT_EVERY and VERBOSE: print(f'epoch:{e}/{EPOCHS} loss:{loss.detach().numpy()}')
393 |         # updating the optimizer params with the grads
394 |         optimizer_glad.step()
395 |     # reporting the metrics if true thetas provided
396 |     compare_theta = None
397 |     if trueTheta is not None:
398 |         for b in range(B):
399 |             compare_theta = reportMetrics(
400 |                 trueTheta[b].detach().numpy(), 
401 |                 predTheta[b].detach().numpy()
402 |             )
403 |             print(f'Compare - {compare_theta}')
404 |     return predTheta, compare_theta, model_glad
405 | 
406 | 
407 | def run_uGLAD_CV(
408 |     Xb, 
409 |     trueTheta=None, 
410 |     eval_offset=0.1, 
411 |     EPOCHS=250,
412 |     lr=0.002,
413 |     INIT_DIAG=0,
414 |     L=15,
415 |     VERBOSE=True, 
416 |     k_fold=5
417 |     ):
418 |     """Running the uGLAD algorithm and select the best 
419 |     model using 5-fold CV. 
420 |     
421 |     Args:
422 |         Xb (np.array 1xMxD): The input sample matrix
423 |         trueTheta (np.array 1xDxD): The corresponding 
424 |             true graphs for reporting metrics or None
425 |         eval_offset (float): eigenvalue offset for 
426 |             covariance matrix adjustment
427 |         EPOCHS (int): The number of training epochs
428 |         lr (float): Learning rate of glad for the adam optimizer
429 |         INIT_DIAG (int): 0/1 for initilization strategy of GLAD
430 |         L (int): Num of unrolled iterations of GLAD
431 |         VERBOSE (bool): if True, prints to sys.out
432 |         k_fold (int): #splits for k-fold CV
433 | 
434 |     Returns:
435 |         predTheta (torch.Tensor 1xDxD): Predicted graphs
436 |         compare_theta (dict): returns comparison metrics if 
437 |             true precision matrix is provided
438 |         model_glad (class object): Returns the learned glad model
439 |     """
440 |     # Batch size is fixed to 1
441 |     Sb = prepare_data.getCovariance(Xb, offset=eval_offset)
442 |     Sb = prepare_data.convertToTorch(Sb, req_grad=False)
443 |     # Splitting into k-fold for cross-validation 
444 |     kf = KFold(n_splits=k_fold)
445 |     # For each fold, collect the best model and the glasso-loss value
446 |     results_Kfold = {}
447 |     for _k, (train, test) in enumerate(kf.split(Xb[0])):
448 |         if VERBOSE: print(f'Fold num {_k}')
449 |         Xb_train = np.expand_dims(Xb[0][train], axis=0) # 1 x Mtrain x D
450 |         Xb_test = np.expand_dims(Xb[0][test], axis=0) # 1 x Mtest x D
451 |         # Calculating the batch covariance
452 |         Sb_train = prepare_data.getCovariance(Xb_train, offset=eval_offset) # BxDxD
453 |         Sb_test = prepare_data.getCovariance(Xb_test, offset=eval_offset) # BxDxD
454 |         # Converting the data to torch 
455 |         Sb_train = prepare_data.convertToTorch(Sb_train, req_grad=False)
456 |         Sb_test = prepare_data.convertToTorch(Sb_test, req_grad=False)
457 |         if trueTheta is not None:
458 |             trueTheta = prepare_data.convertToTorch(
459 |                 trueTheta,
460 |                 req_grad=False
461 |                 )
462 |         B, M, D = Xb_train.shape
463 |         # NOTE: We fix the batch size B=1 for `uGLAD'
464 |         # model and optimizer for uGLAD
465 |         model_glad, optimizer_glad = init_uGLAD(
466 |             lr=lr,
467 |             theta_init_offset=1.0,
468 |             nF=3,
469 |             H=3
470 |             )
471 |         # Optimizing for the glasso loss
472 |         best_test_loss = np.inf
473 |         PRINT_EVERY = int(EPOCHS/10)
474 |         # print max 10 times per training
475 |         for e in range(EPOCHS):      
476 |             # reset the grads to zero
477 |             optimizer_glad.zero_grad()
478 |             # calculate the loss for test and precision matrix for train
479 |             predTheta, loss_train = forward_uGLAD(
480 |                 Sb_train, 
481 |                 model_glad,
482 |                 L=L,
483 |                 INIT_DIAG=INIT_DIAG
484 |                 )
485 |             with torch.no_grad():
486 |                 _, loss_test = forward_uGLAD(
487 |                     Sb_test,
488 |                     model_glad,
489 |                     L=L,
490 |                     INIT_DIAG=INIT_DIAG
491 |                     )
492 |             # calculate the backward gradients
493 |             loss_train.backward()
494 |             # updating the optimizer params with the grads
495 |             optimizer_glad.step()
496 |             # Printing output
497 |             _loss = loss_test.detach().numpy()
498 |             if not e%PRINT_EVERY and VERBOSE: print(f'Fold {_k}: epoch:{e}/{EPOCHS} test-loss:{_loss}')
499 |             # Updating the best model for this fold
500 |             if _loss < best_test_loss: # and e%10==9:
501 |                 if VERBOSE and not e%PRINT_EVERY:
502 |                     print(f'Fold {_k}: epoch:{e}/{EPOCHS}: Updating the best model with test-loss {_loss}')
503 |                 best_model_kfold = copy.deepcopy(model_glad)
504 |                 best_test_loss = _loss
505 |         # updating with the best model and loss for the current fold
506 |         results_Kfold[_k] = {}
507 |         results_Kfold[_k]['test_loss'] = best_test_loss
508 |         results_Kfold[_k]['model'] = best_model_kfold
509 |         if VERBOSE: print('\n')
510 | 
511 |     # Strategy I: Select the best model from the results Kfold dictionary 
512 |     # with the best score on the test fold.
513 |     # print(f'Using Strategy I to select the best model')
514 |     best_loss = np.inf
515 |     for _k in results_Kfold.keys():
516 |         curr_loss = results_Kfold[_k]['test_loss']
517 |         if curr_loss < best_loss:
518 |             model_glad = results_Kfold[_k]['model']
519 |             best_loss = curr_loss
520 | 
521 |     # Run the best model on the complete data to retrieve the 
522 |     # final predTheta (precision matrix)
523 |     with torch.no_grad():
524 |         predTheta, total_loss = forward_uGLAD(
525 |             Sb,
526 |             model_glad,
527 |             L=L,
528 |             INIT_DIAG=INIT_DIAG)
529 |         
530 |     # reporting the metrics if true theta is provided
531 |     compare_theta = None
532 |     if trueTheta is not None: 
533 |         for b in range(B):
534 |             compare_theta = reportMetrics(
535 |                 trueTheta[b].detach().numpy(), 
536 |                 predTheta[b].detach().numpy()
537 |             )
538 |         print(f'Comparison - {compare_theta}')
539 |     return predTheta, compare_theta, model_glad
540 | 
541 | 
542 | def run_uGLAD_missing(
543 |     Xb, 
544 |     trueTheta=None, 
545 |     eval_offset=0.1, 
546 |     EPOCHS=250,
547 |     lr=0.002,
548 |     INIT_DIAG=0,
549 |     L=15,
550 |     VERBOSE=True,
551 |     K_batch=3
552 |     ):
553 |     """Running the uGLAD algorithm in missing data mode. We do a
554 |     row-subsample of the input data and then train using multi-task
555 |     learning approach to obtain the final precision matrix.
556 |     
557 |     Args:
558 |         Xb (np.array 1xMxD): The input sample matrix with 
559 |             missing entries as np.NaNs
560 |         trueTheta (np.array 1xDxD): The corresponding 
561 |             true graphs for reporting metrics or None
562 |         eval_offset (float): eigenvalue offset for 
563 |             covariance matrix adjustment
564 |         EPOCHS (int): The number of training epochs
565 |         lr (float): Learning rate of glad for the adam optimizer
566 |         INIT_DIAG (int): 0/1 for initilization strategy of GLAD
567 |         L (int): Num of unrolled iterations of GLAD
568 |         VERBOSE (bool): if True, prints to sys.out
569 |         K_batch (int): number of row-sumsampled batch for 
570 |             multi-task learning (For less conflict in deciding the
571 |             sign of final precision matrix, choose K as a odd value)
572 | 
573 |     Returns:
574 |         predTheta (torch.Tensor 1xDxD): Predicted graphs
575 |         compare_theta (dict): returns comparison metrics if 
576 |             true precision matrix is provided
577 |         model_glad (class object): Returns the learned glad model
578 |     """
579 |     # Batch size is fixed to 1
580 |     if K_batch == 0: K_batch = 3  # setting the default
581 |     # Step I:  Do statistical mean imputation
582 |     Xb = mean_imputation(Xb)
583 | 
584 |     # Step II: Getting the batches and preparing data for uGLAD
585 |     Sb = prepare_data.getCovariance(Xb, offset=eval_offset)
586 |     Sb = prepare_data.convertToTorch(Sb, req_grad=False)
587 |     # Splitting into k-fold for getting row-subsampled batches
588 |     kf = KFold(n_splits=K_batch)
589 |     print(f'Creating K={K_batch} row-subsampled batches')
590 |     # Collect all the subsample in batch form: K x M' x D
591 |     X_K = np.array([Xb[0][Idx] for Idx, _ in kf.split(Xb[0])])
592 |     # Calculating the batch covariance
593 |     S_K = prepare_data.getCovariance(X_K, offset=eval_offset) # BxDxD
594 |     # Converting the data to torch 
595 |     S_K = prepare_data.convertToTorch(S_K, req_grad=False)
596 |     # Initialize the model and prepare theta if provided
597 |     if trueTheta is not None:
598 |         trueTheta = prepare_data.convertToTorch(
599 |             trueTheta,
600 |             req_grad=False
601 |             )
602 |     # model and optimizer for uGLAD
603 |     model_glad, optimizer_glad = init_uGLAD(
604 |         lr=lr,
605 |         theta_init_offset=1.0,
606 |         nF=3,
607 |         H=3
608 |         )
609 |     # STEP III: Optimizing for the glasso loss
610 |     PRINT_EVERY = int(EPOCHS/10)
611 |     # print max 10 times per training
612 |     for e in range(EPOCHS):      
613 |         # reset the grads to zero
614 |         optimizer_glad.zero_grad()
615 |         # calculate the loss and precision matrix
616 |         predTheta, loss = forward_uGLAD(
617 |             S_K, 
618 |             model_glad,
619 |             L=L,
620 |             INIT_DIAG=INIT_DIAG, 
621 |             loss_Sb=Sb
622 |             )
623 |         # calculate the backward gradients
624 |         loss.backward()
625 |         # updating the optimizer params with the grads
626 |         optimizer_glad.step()
627 |         # Printing output
628 |         _loss = loss.detach().numpy()
629 |         if not e%PRINT_EVERY and VERBOSE: print(f'epoch:{e}/{EPOCHS} loss:{_loss}')
630 | 
631 |     # STEP IV: Getting the final precision matrix
632 |     print(f'Getting the final precision matrix using the consensus strategy')
633 |     predTheta = get_final_precision_from_batch(predTheta, type='min')
634 |         
635 |     # reporting the metrics if true theta is provided
636 |     compare_theta = None
637 |     if trueTheta is not None: 
638 |         compare_theta = reportMetrics(
639 |             trueTheta[0].detach().numpy(), 
640 |             predTheta[0].detach().numpy()
641 |         )
642 |         print(f'Comparison - {compare_theta}')
643 | 
644 |     return predTheta, compare_theta, model_glad
645 | 
646 | def mean_imputation(Xb):
647 |     """Replace nans of the input data by column
648 |     means
649 | 
650 |     Args:
651 |         Xb (torch.Tensor 1xMxD): The input sample matrix with 
652 |             missing entries as np.NaNs
653 |     
654 |     Returns:
655 |         Xb (torch.Tensor 1xMxD): Mean imputed matrix
656 |     """
657 |     Xb = Xb[0]
658 |     # Mean of columns (ignoring NaNs) 
659 |     col_mean = np.nanmean(Xb, axis=0)
660 |     #Find indices that you need to replace
661 |     inds = np.where(np.isnan(Xb))
662 |     # Place column means in the indices. Align the arrays using take
663 |     Xb[inds] = np.take(col_mean, inds[1])
664 |     # Check if any column is full of NaNs, raise sys.exit()
665 |     if np.isnan(np.sum(Xb)):
666 |         print(f'ERROR: One or more columns have all NaNs')
667 |         sys.exit(0)
668 |     # Reshaping Xb with an extra dimension for compatability with glad
669 |     Xb = np.expand_dims(Xb, axis=0)
670 |     return Xb
671 | 
672 | def get_final_precision_from_batch(predTheta, type='min'):
673 |     """The predTheta contains a batch of K precision 
674 |     matrices. This function calculates the final 
675 |     precision matrix by following the consensus 
676 |     strategy
677 | 
678 |     \Theta^{f}_{i,j} = max-count(sign(\Theta^K_{i,j})) 
679 |                         * min/mean{|\Theta^K_{i,j}|}
680 |     (`min` is the recommended setting) 
681 | 
682 |     Args:
683 |         predTheta (torch.Tensor KxDxD): Predicted graphs
684 |             with batch_size = K
685 |         type (str): min/mean to get the entry values
686 | 
687 |     Returns:
688 |         predTheta (torch.Tensor 1xDxD): Final precision matrix
689 |     """
690 |     K, _, D = predTheta.shape
691 |     # get the value term
692 |     if type=='min':
693 |         value_term = torch.min(torch.abs(predTheta), 0)[0]
694 |     elif type=='mean':
695 |         value_term = torch.mean(torch.abs(predTheta), 0)[0]
696 |     else:
697 |         print(f'Enter valid type min/mean, currently {type}')
698 |         sys.exit(0)
699 |     # get the sign term
700 |     max_count_sign = torch.sum(torch.sign(predTheta), 0)
701 |     # If sign is 0, then assign +1 
702 |     max_count_sign[max_count_sign>=0] = 1
703 |     max_count_sign[max_count_sign<0] = -1
704 |     # Get the final precision matrix
705 |     predTheta = max_count_sign * value_term
706 |     return predTheta.reshape(1, D, D)
707 | 
708 | 
709 | def run_uGLAD_multitask(
710 |     Xb, 
711 |     trueTheta=None, 
712 |     eval_offset=0.1, 
713 |     EPOCHS=250,
714 |     lr=0.002,
715 |     INIT_DIAG=0,
716 |     L=15,
717 |     VERBOSE=True,
718 |     ):
719 |     """Running the uGLAD algorithm in multitask mode. We
720 |     train using multi-task learning approach to obtain 
721 |     the final precision matrices for the batch of input data
722 |     
723 |     Args:
724 |         Xb (list of 2D np.array):  The input sample matrix K * [M' x D]
725 |             NOTE: num_samples can be different for different data
726 |         trueTheta (np.array KxDxD): The corresponding 
727 |             true graphs for reporting metrics or None
728 |         eval_offset (float): eigenvalue offset for 
729 |             covariance matrix adjustment
730 |         EPOCHS (int): The number of training epochs
731 |         lr (float): Learning rate of glad for the adam optimizer
732 |         INIT_DIAG (int): 0/1 for initilization strategy of GLAD
733 |         L (int): Num of unrolled iterations of GLAD
734 |         VERBOSE (bool): if True, prints to sys.out
735 | 
736 |     Returns:
737 |         predTheta (torch.Tensor BxDxD): Predicted graphs
738 |         compare_theta (dict): returns comparison metrics if 
739 |             true precision matrix is provided
740 |         model_glad (class object): Returns the learned glad model
741 |     """
742 |     K = len(Xb)
743 |     # Getting the batches and preparing data for uGLAD
744 |     Sb = prepare_data.getCovariance(Xb, offset=eval_offset)
745 |     Sb = prepare_data.convertToTorch(Sb, req_grad=False)
746 |     # Initialize the model and prepare theta if provided
747 |     if trueTheta is not None:
748 |         trueTheta = prepare_data.convertToTorch(
749 |             trueTheta,
750 |             req_grad=False
751 |             )
752 |     # model and optimizer for uGLAD
753 |     model_glad, optimizer_glad = init_uGLAD(
754 |         lr=lr,
755 |         theta_init_offset=1.0,
756 |         nF=3,
757 |         H=3
758 |         )
759 |     # Optimizing for the glasso loss
760 |     PRINT_EVERY = int(EPOCHS/10)
761 |     # print max 10 times per training
762 |     for e in range(EPOCHS):      
763 |         # reset the grads to zero
764 |         optimizer_glad.zero_grad()
765 |         # calculate the loss and precision matrix
766 |         predTheta, loss = forward_uGLAD(
767 |             Sb, 
768 |             model_glad,
769 |             L=L,
770 |             INIT_DIAG=INIT_DIAG
771 |             )
772 |         # calculate the backward gradients
773 |         loss.backward()
774 |         # updating the optimizer params with the grads
775 |         optimizer_glad.step()
776 |         # Printing output
777 |         _loss = loss.detach().numpy()
778 |         if not e%PRINT_EVERY and VERBOSE: print(f'epoch:{e}/{EPOCHS} loss:{_loss}')
779 |         
780 |     # reporting the metrics if true theta is provided
781 |     compare_theta = []
782 |     if trueTheta is not None: 
783 |         for b in range(K):
784 |             rM = reportMetrics(
785 |                     trueTheta[b].detach().numpy(), 
786 |                     predTheta[b].detach().numpy()
787 |                 )
788 |             print(f'Metrics for graph {b}: {rM}\n')
789 |             compare_theta.append(rM)
790 |     return predTheta, compare_theta, model_glad
791 | ######################################################################
792 | 
793 | # DO NOT USE 
794 | def post_threshold(theta, s=80.0):
795 |     """Apply post-hoc thresholding to zero out the 
796 |     entries based on the input sparsity percentile.
797 |     Usually we take conservative value of sparsity 
798 |     percentage, so that we do not miss important 
799 |     edges.
800 | 
801 |     Args:
802 |         theta (2d np array): The DxD precision matrix
803 |         s (float): Percentile sparsity desired
804 |     
805 |     Returns:
806 |         theta (2d np array): The DxD precision matrix
807 |     """
808 |     # getting the threshold for s percentile
809 |     cutoff = np.percentile(np.abs(theta), s)
810 |     theta[np.abs(theta)<cutoff]=0
811 |     return theta


--------------------------------------------------------------------------------
/ngm/utils/uGLAD/utils/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Harshs27/neural-graphical-models/227f6e27ca6a02200ec895235b2251e9a4191773/ngm/utils/uGLAD/utils/__init__.py


--------------------------------------------------------------------------------
/ngm/utils/uGLAD/utils/metrics.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | from sklearn import metrics
 3 | from pprint import pprint
 4 | 
 5 | def get_auc(y, scores):
 6 |     y = np.array(y).astype(int)
 7 |     fpr, tpr, thresholds = metrics.roc_curve(y, scores)
 8 |     roc_auc = metrics.auc(fpr, tpr)
 9 |     aupr = metrics.average_precision_score(y, scores)
10 |     return roc_auc, aupr
11 | 
12 | def reportMetrics(trueG, G, beta=1):
13 |     """Compute various metrics
14 |     Args:
15 |         trueG (2D numpy arr[floats]): ground truth precision matrix
16 |         G (2D numpy arr[floats]): predicted precsion mat
17 |         beta (int, optional): beta for the Fbeta score
18 | 
19 |     Returns:
20 |         Dict: {fdr (float): (false positive) / prediction positive = FP/P
21 |                 tpr (float): (true positive) / condition positive = TP/T
22 |                 fpr (float): (false positive) / condition negative = FP/F
23 |                 shd (int): undirected extra + undirected missing = E+M
24 |                 nnz (int): number of non-zeros for trueG and predG
25 |                 ps (float): probability of success, sign match
26 |                 Fbeta (float): F-score with beta
27 |                 aupr (float): area under the precision-recall curve
28 |                 auc (float): area under the ROC curve}
29 |     """
30 |     trueG = trueG.real
31 |     G =G.real
32 |     # trueG and G are numpy arrays
33 |     # convert all non-zeros in G to 1
34 |     d = G.shape[-1]
35 | 
36 |     # changing to 1/0 for TP and FP calculations
37 |     G_binary = np.where(G!=0, 1, 0)
38 |     trueG_binary = np.where(trueG!=0, 1, 0)
39 |     # extract the upper diagonal matrix
40 |     indices_triu = np.triu_indices(d, 1)
41 |     trueEdges = trueG_binary[indices_triu] #np.triu(G_true_binary, 1)
42 |     predEdges = G_binary[indices_triu] #np.triu(G_binary, 1)
43 |     # Getting AUROC value
44 |     predEdges_auc = G[indices_triu] #np.triu(G_true_binary, 1)
45 |     auc, aupr = get_auc(trueEdges, np.absolute(predEdges_auc))
46 |     # Now, we have the edge array for comparison
47 |     # true pos = pred is 1 and true is 1
48 |     TP = np.sum(trueEdges * predEdges) # true_pos
49 |     # False pos = pred is 1 and true is 0
50 |     mismatches = np.logical_xor(trueEdges, predEdges)
51 |     FP = np.sum(mismatches * predEdges)
52 |     # Find all mismatches with Xor and then just select the ones with pred as 1 
53 |     # P = Number of pred edges : nnzPred 
54 |     P = np.sum(predEdges)
55 |     nnzPred = P
56 |     # T = Number of True edges :  nnzTrue
57 |     T = np.sum(trueEdges)
58 |     nnzTrue = T
59 |     # F = Number of non-edges in true graph
60 |     F = len(trueEdges) - T
61 |     # SHD = total number of mismatches
62 |     SHD = np.sum(mismatches)
63 |     # FDR = False discovery rate
64 |     FDR = FP/P
65 |     # TPR = True positive rate
66 |     TPR = TP/T
67 |     # FPR = False positive rate
68 |     FPR = FP/F
69 |     # False negative = pred is 0 and true is 1
70 |     FN = np.sum(mismatches * trueEdges)
71 |     # F beta score
72 |     num = (1+beta**2)*TP
73 |     den = ((1+beta**2)*TP + beta**2 * FN + FP)
74 |     Fbeta = num/den
75 |     # precision 
76 |     precision = TP/(TP+FP)
77 |     # recall 
78 |     recall = TP/(TP+FN)
79 |     return {'FDR': FDR, 'TPR': TPR, 'FPR': FPR, 'SHD': SHD, 'nnzTrue': nnzTrue, 
80 |             'nnzPred': nnzPred, 'precision': precision, 'recall': recall, 
81 |             'Fbeta': Fbeta, 'aupr': aupr, 'auc': auc}
82 | 
83 | def summarize_compare_theta(compare_dict_list, method_name='Method Name'):
84 |     avg_results = {}
85 |     for key in compare_dict_list[0].keys():
86 |         avg_results[key] = []
87 |     
88 |     total_runs = len(compare_dict_list)
89 |     for cd in compare_dict_list:
90 |         for key in cd.keys():
91 |             avg_results[key].append(cd[key])
92 |     # getting the mean and std dev
93 |     for key in avg_results.keys():
94 |         avk = avg_results[key]
95 |         avg_results[key] = (np.mean(avk), np.std(avk))
96 |     print(f'Avg results for {method_name}\n')
97 |     pprint(avg_results)
98 |     print(f'\nTotal runs {total_runs}\n\n')
99 |     return avg_results


--------------------------------------------------------------------------------
/ngm/utils/uGLAD/utils/prepare_data.py:
--------------------------------------------------------------------------------
  1 | import networkx as nx
  2 | import numpy as np
  3 | import pandas as pd
  4 | from scipy.stats import chi2_contingency, pearsonr
  5 | from sklearn import covariance
  6 | from time import time
  7 | import torch
  8 | 
  9 | #################### Functions to generate data #####################
 10 | def get_data(
 11 |     num_nodes,
 12 |     sparsity,
 13 |     num_samples,
 14 |     batch_size=1,
 15 |     # typeG='RANDOM', 
 16 |     w_min=0.5, 
 17 |     w_max=1.0,
 18 |     eig_offset=0.1, 
 19 |     ):
 20 |     """Prepare true adj matrices as theta and then sample from 
 21 |     Gaussian to get the corresponding samples.
 22 |     
 23 |     Args:
 24 |         num_nodes (int): The number of nodes in graph
 25 |         sparsity ([float, float]): The [min, max] probability of edges
 26 |         num_samples (int): The number of samples to simulate
 27 |         batch_size (int, optional): The number of batches
 28 |         typeG (str): RANDOM/GRID/CHAIN
 29 |         w_min (float): Precision matrix entries ~Unif[w_min, w_max]
 30 |         w_max (float):  Precision matrix entries ~Unif[w_min, w_max]
 31 |     
 32 |     Returns:
 33 |         Xb (torch.Tensor BxMxD): The sample data
 34 |         trueTheta (torch.Tensor BxDxD): The true precision matrices
 35 |     """
 36 |     Xb, trueTheta = [], []
 37 |     for b in range(batch_size):
 38 |         # I - Getting the true edge connections
 39 |         edge_connections = generateRandomGraph(
 40 |             num_nodes, 
 41 |             sparsity,
 42 |             #typeG=typeG
 43 |             )
 44 |         # II - Gettings samples from fitting a Gaussian distribution
 45 |         # sample the entry of the matrix 
 46 |         
 47 |         X, true_theta = simulateGaussianSamples(
 48 |             num_nodes,
 49 |             edge_connections,
 50 |             num_samples, 
 51 |             u=eig_offset,
 52 |             w_min=w_min,
 53 |             w_max=w_max
 54 |             )
 55 |         # collect the batch data
 56 |         Xb.append(X)
 57 |         trueTheta.append(true_theta)
 58 |     return np.array(Xb), np.array(trueTheta)
 59 | 
 60 | def add_noise_dropout(Xb, dropout=0.25):
 61 |     """ Add the dropout noise to the input data.
 62 | 
 63 |     Args:
 64 |         Xb (torch.Tensor BxMxD): The sample data
 65 |         dropout (float): [0, 1) The percentage of 
 66 |             values to be replaced by NaNs 
 67 | 
 68 |     Returns:
 69 |         Xb_miss (torch.Tensor BxMxD): The sample with dropout
 70 |     """
 71 |     B, M, D = Xb.shape
 72 |     Xb_miss = []  # collect the noisy data
 73 |     for b in range(B):
 74 |         X = Xb[b].copy()  # M x D
 75 |         # Unroll X to 1D array: M*D
 76 |         X = X.reshape(-1)
 77 |         # Get the indices to mask/add noise
 78 |         mask_indices = np.random.choice(
 79 |             np.arange(X.size), 
 80 |             replace=False,
 81 |             size=int(X.size * dropout)
 82 |         )
 83 |         # Introduce missing values as NaNs
 84 |         X[mask_indices] = np.NaN
 85 |         # Reshape into the original dimensions
 86 |         X = X.reshape(M, D)
 87 |         Xb_miss.append(X)
 88 |     return np.array(Xb_miss)
 89 | ######################################################################
 90 | #################### Functions to process data #####################
 91 | 
 92 | # https://towardsdatascience.com/the-search-for-categorical-correlation-a1cf7f1888c9
 93 | def cramers_v(x, y):
 94 |     """
 95 |     Calculate Cramers V statistic for categorial-categorial association.
 96 |     Similarly to correlation, the output is in the range of [0,1], 
 97 |     where 0 means no association and 1 is full association.
 98 |     
 99 |     Source: https://en.wikipedia.org/wiki/Cram%C3%A9r%27s_V
100 |     Cramer's V is used with accounting for Bias correction.
101 |     
102 |     Note: chi-square = 0 implies that Cramér’s V = 0
103 |     """
104 |     confusion_matrix = pd.crosstab(x,y)
105 |     chi2 = chi2_contingency(confusion_matrix)[0]
106 |     n = confusion_matrix.sum().sum()
107 |     phi2 = chi2/n
108 |     r,k = confusion_matrix.shape
109 |     phi2corr = max(0, phi2-((k-1)*(r-1))/(n-1))
110 |     rcorr = r-((r-1)**2)/(n-1)
111 |     kcorr = k-((k-1)**2)/(n-1)
112 |     num = phi2corr
113 |     denom = min((kcorr-1),(rcorr-1))
114 |     if denom==0:
115 |         return 0 # No association
116 |     return np.sqrt(num/denom)
117 | 
118 | def correlation_ratio(categories, measurements):
119 |     """Finding correlation between categorical and numerical 
120 |     features. 
121 |     
122 |     Source: https://en.wikipedia.org/wiki/Correlation_ratio
123 |     """
124 |     fcat, _ = pd.factorize(categories)
125 |     cat_num = np.max(fcat)+1
126 |     y_avg_array = np.zeros(cat_num)
127 |     n_array = np.zeros(cat_num)
128 |     for i in range(0,cat_num):
129 |         cat_measures = measurements[np.argwhere(fcat == i).flatten()]
130 |         n_array[i] = len(cat_measures)
131 |         y_avg_array[i] = np.average(cat_measures)
132 |     y_total_avg = np.sum(np.multiply(y_avg_array,n_array))/np.sum(n_array)
133 |     numerator = np.sum(np.multiply(n_array,np.power(np.subtract(y_avg_array,y_total_avg),2)))
134 |     denominator = np.sum(np.power(np.subtract(measurements,y_total_avg),2))
135 |     if numerator == 0:
136 |         eta = 0.0
137 |     else:
138 |         eta = np.sqrt(numerator/denominator)
139 |     return eta
140 | 
141 | def pairwise_cov_matrix(df, dtype):
142 |     """Calculate the covariance matrix using pairwise calculations.
143 |     Accounts for categorical, numerical & Real features. 
144 | 
145 |     `Cat-Cat' association is calculated using cramers V statistic.
146 |     `Cat-Num' value is obtained using the correlation ratio.
147 |     `Num-Num' correlation is calculated using the Pearson coefficient.
148 |     
149 |     Args:
150 |         df (pd.DataFrame): The input data M(samples) x D(features)
151 |         dtype (dict): {'column': 'r'/'c'}, where r=real, c=cat 
152 | 
153 |     Returns:
154 |         cov (pd.DataFrame): Covariance matrix DxD 
155 |     """
156 |     features = df.columns
157 |     D = len(features)
158 |     cov = np.zeros((D, D))
159 |     for i, fi in enumerate(features):
160 |         print(f'row feature {i, fi}')
161 |         for j, fj in enumerate(features):
162 |             # print(f'col feature {j, fj}')
163 |             if j>=i:
164 |                 if dtype[fi]=='c' and dtype[fj]=='c':
165 |                     cov[i, j] = cramers_v(df[fi], df[fj])
166 |                 elif dtype[fi]=='c' and dtype[fj]=='r':
167 |                     cov[i, j] = correlation_ratio(df[fi], df[fj])
168 |                 elif dtype[fi]=='r' and dtype[fj]=='c':
169 |                     cov[i, j] = correlation_ratio(df[fj], df[fi])
170 |                 elif dtype[fi]=='r' and dtype[fj]=='r':
171 |                     cov[i, j] = pearsonr(df[fi], df[fj])[0]
172 |                 cov[j, i] = cov[i, j]  # cov is symmetric
173 |     # Convert to pd.Dataframe
174 |     cov = pd.DataFrame(cov, index=features, columns=features)
175 |     return cov
176 | 
177 | 
178 | def convertToTorch(data, req_grad=False, use_cuda=False):
179 |     """Convert data from numpy to torch variable, if the req_grad
180 |     flag is on then the gradient calculation is turned on.
181 |     """
182 |     if not torch.is_tensor(data):
183 |         dtype = torch.cuda.FloatTensor if use_cuda else torch.FloatTensor
184 |         data = torch.from_numpy(data.astype(np.float, copy=False)).type(dtype)
185 |     data.requires_grad = req_grad
186 |     return data
187 | 
188 | 
189 | def eigVal_conditionNum(A):
190 |     """Calculates the eigenvalues and the condition 
191 |     number of the input matrix A
192 | 
193 |     condition number = max(|eig|)/min(|eig|)
194 |     """
195 |     eig = [v.real for v in np.linalg.eigvals(A)]
196 |     condition_number = max(np.abs(eig)) / min(np.abs(eig))
197 |     return eig, condition_number
198 | 
199 | 
200 | def check_symmetric(a, rtol=1e-05, atol=1e-08):
201 |         return np.allclose(a, a.T, rtol=rtol, atol=atol)
202 | 
203 | def adjustCov(S, offset=0.1, min_eig=1e-6, max_con=1e5):
204 |     # calculate the eigenvalue of the covariance S
205 |     eig, con = eigVal_conditionNum(S)
206 |     if min(eig)<=min_eig and con>max_con:
207 |         # adjust the eigenvalue
208 |         print(f'Adjust the eval: min {min(eig)}, con {con}')
209 |         S += np.eye(S.shape[-1]) * (offset-min(eig))
210 |         eig, con = eigVal_conditionNum(S)
211 |         print(f'new eval: min {min(eig)}, con {con}')
212 |     return S
213 | 
214 | def getCovariance(Xb, offset=0.1):
215 |     """Calculate the batch covariance matrix 
216 | 
217 |     Args:
218 |         Xb (3D np array): The input sample matrices (B x M x D)
219 |         offset (float): The eigenvalue offset in case of bad 
220 |                         condition number
221 |     Returns:
222 |         Sb (3D np array): Covariance matrices (B x D x D)
223 |     """
224 |     Sb = []
225 |     for X in Xb:
226 |         S = covariance.empirical_covariance(X, assume_centered=False)
227 |         Sb.append(adjustCov(S, offset))
228 |     return np.array(Sb)
229 | 
230 | 
231 | def generateRandomGraph(num_nodes, sparsity, seed=None):
232 |     """Generate a random erdos-renyi graph with a given
233 |     sparsity. 
234 | 
235 |     Args:
236 |         num_nodes (int): The number of nodes in the graph
237 |         sparsity ([float, float]): The [min, max] probability of edges
238 |         seed (int, optional): set the numpy random seed
239 | 
240 |     Returns:
241 |         edge_connections (2D np array (float)): Adj matrix
242 |     """
243 |     # if seed: np.random.seed(seed)
244 |     min_s, max_s = sparsity
245 |     s =  np.random.uniform(min_s, max_s, 1)[0]
246 |     G = nx.generators.random_graphs.gnp_random_graph(
247 |         num_nodes, 
248 |         s, 
249 |         seed=seed, 
250 |         directed=False
251 |     )
252 |     edge_connections = nx.adjacency_matrix(G).todense()
253 |     return edge_connections
254 | 
255 | 
256 | def simulateGaussianSamples(
257 |     num_nodes,
258 |     edge_connections, 
259 |     num_samples, 
260 |     seed=None, 
261 |     u=0.1,
262 |     w_min=0.5,
263 |     w_max=1.0, 
264 |     ): 
265 |     """Simulating num_samples from a Gaussian distribution. The 
266 |     precision matrix of the Gaussian is determined using the 
267 |     edge_connections
268 | 
269 |     Args:
270 |         num_nodes (int): The number of nodes in the DAG
271 |         edge_connections (2D np array (float)): Adj matrix
272 |         num_sample (int): The number of samples
273 |         seed (int, optional): set the numpy random seed
274 |         u (float): Min eigenvalue offset for the precision matrix
275 |         w_min (float): Precision matrix entries ~Unif[w_min, w_max]
276 |         w_max (float):  Precision matrix entries ~Unif[w_min, w_max]
277 | 
278 |     Returns:
279 |         X (2D np array (float)): num_samples x num_nodes
280 |         precision_mat (2D np array (float)): num_nodes x num_nodes
281 |     """
282 |     # zero mean of Gaussian distribution
283 |     mean_value = 0 
284 |     mean_normal = np.ones(num_nodes) * mean_value
285 |     # Setting the random seed
286 |     if seed: np.random.seed(seed)
287 |     # uniform entry matrix [w_min, w_max]
288 |     U = np.matrix(np.random.random((num_nodes, num_nodes))
289 |                   * (w_max - w_min) + w_min)
290 |     theta = np.multiply(edge_connections, U)
291 |     # making it symmetric
292 |     theta = (theta + theta.T)/2 + np.eye(num_nodes)
293 |     smallest_eigval = np.min(np.linalg.eigvals(theta))
294 |     # Just in case : to avoid numerical error in case an 
295 |     # epsilon complex component present
296 |     smallest_eigval = smallest_eigval.real
297 |     # making the min eigenvalue as u
298 |     precision_mat = theta + np.eye(num_nodes)*(u - smallest_eigval)
299 |     # print(f'Smallest eval: {np.min(np.linalg.eigvals(precision_mat))}')
300 |     # getting the covariance matrix (avoid the use of pinv) 
301 |     cov = np.linalg.inv(precision_mat) 
302 |     # get the samples 
303 |     if seed: np.random.seed(seed)
304 |     # Sampling data from multivariate normal distribution
305 |     data = np.random.multivariate_normal(
306 |         mean=mean_normal,
307 |         cov=cov, 
308 |         size=num_samples
309 |         )
310 |     return data, precision_mat  # MxD, DxD
311 | 
312 | ############## Functions to check the input ########
313 | 
314 | # Processing the input data to be compatiable for the sparse graph recovery models
315 | def process_table(
316 |     table, 
317 |     NORM='no', 
318 |     MIN_VARIANCE=0.0, 
319 |     msg='', 
320 |     COND_NUM=np.inf, 
321 |     eigval_th=1e-3,
322 |     VERBOSE=True
323 |     ):
324 |     """Processing the input data to be compatiable for the 
325 |     sparse graph recovery models. Checks for the following
326 |     issues in the input tabular data (real values only).
327 |     Note: The order is important. Repeat the function 
328 |     twice: process_table(process_table(table)) to ensure
329 |     the below conditions are satisfied.
330 |     1. Remove all the rows with zero entries
331 |     2. Fill Nans with column mean
332 |     3. Remove columns containing only a single entry
333 |     4. Remove columns with duplicate values
334 |     5. Remove columns with low variance after centering
335 |     The above steps are taken in order to ensure that the
336 |     input matrix is well-conditioned. 
337 |     Args:
338 |         table (pd.DataFrame): The input table with headers
339 |         NORM (str): min_max/mean/no
340 |         MIN_VARIANCE (float): Drop the columns below this 
341 |             variance threshold
342 |         COND_NUM (int): The max condition number allowed
343 |         eigval_th (float): Min eigval threshold. Making sure 
344 |             that the min eigval is above this threshold by 
345 |             droppping highly correlated columns
346 |     Returns:
347 |         table (pd.DataFrame): The processed table with headers
348 |     """
349 |     start = time()
350 |     if VERBOSE:
351 |         print(f'{msg}: Processing the input table for basic compatibility check')
352 |         print(f'{msg}: The input table has sample {table.shape[0]} and features {table.shape[1]}')
353 |     
354 |     total_samples = table.shape[0]
355 | 
356 |     # typecast the table to floats
357 |     table = table._convert(numeric=True)
358 | 
359 |     # 1. Removing all the rows with zero entries as the samples are missing
360 |     table = table.loc[~(table==0).all(axis=1)]
361 |     if VERBOSE: print(f'{msg}: Total zero samples dropped {total_samples - table.shape[0]}')
362 | 
363 |     # 2. Fill nan's with mean of columns
364 |     table = table.fillna(table.mean())
365 | 
366 |     # 3. Remove columns containing only a single value
367 |     single_value_columns = []
368 |     for col in table.columns:
369 |         if len(table[col].unique()) == 1:
370 |             single_value_columns.append(col)
371 |     table.drop(single_value_columns, inplace=True, axis=1)
372 |     if VERBOSE: print(f'{msg}: Single value columns dropped: total {len(single_value_columns)}, columns {single_value_columns}')
373 | 
374 |     # Normalization of the input table
375 |     table = normalize_table(table, NORM)
376 | 
377 |     # Analysing the input table's covariance matrix condition number
378 |     analyse_condition_number(table, 'Input', VERBOSE)
379 |  
380 |     # 4. Remove columns with duplicate values
381 |     all_columns = table.columns
382 |     table = table.T.drop_duplicates().T  
383 |     duplicate_columns = list(set(all_columns) - set(table.columns))
384 |     if VERBOSE: print(f'{msg}: Duplicates dropped: total {len(duplicate_columns)}, columns {duplicate_columns}')
385 | 
386 |     # 5. Columns having similar variance have a slight chance that they might be almost duplicates 
387 |     # which can affect the condition number of the covariance matrix. 
388 |     # Also columns with low variance are less informative
389 |     table_var = table.var().sort_values(ascending=True)
390 |     # print(f'{msg}: Variance of the columns {table_var.to_string()}')
391 |     # Dropping the columns with variance < MIN_VARIANCE
392 |     low_variance_columns = list(table_var[table_var<MIN_VARIANCE].index)
393 |     table.drop(low_variance_columns, inplace=True, axis=1)
394 |     if VERBOSE: 
395 |         print(f'{msg}: Low Variance columns dropped: min variance {MIN_VARIANCE},\
396 |         total {len(low_variance_columns)}, columns {low_variance_columns}')
397 | 
398 |     # Analysing the processed table's covariance matrix condition number
399 |     cov_table, eig, con = analyse_condition_number(table, 'Processed', VERBOSE)
400 | 
401 |     itr = 1
402 |     while con > COND_NUM: # ill-conditioned matrix
403 |         if VERBOSE: 
404 |             print(f'{msg}: {itr} Condition number is high {con}. \
405 |             Dropping the highly correlated features in the cov-table')
406 |         # Find the number of eig vals < eigval_th for the cov_table matrix.
407 |         # Rough indicator of the lower bound num of features that are highly correlated.
408 |         eig = np.array(sorted(eig))
409 |         lb_ill_cond_features = len(eig[eig<eigval_th])
410 |         if VERBOSE: print(f'Current lower bound on ill-conditioned features {lb_ill_cond_features}')
411 |         if lb_ill_cond_features == 0:
412 |             if VERBOSE: print(f'All the eig vals are > {eigval_th} and current cond num {con}')
413 |             if con > COND_NUM:
414 |                 lb_ill_cond_features = 1
415 |             else:
416 |                 break
417 |         highly_correlated_features = get_highly_correlated_features(cov_table)
418 |         # Extracting the minimum num of features making the cov_table ill-conditioned
419 |         highly_correlated_features = highly_correlated_features[
420 |             :min(lb_ill_cond_features, len(highly_correlated_features))
421 |         ]
422 |         # The corresponding column names
423 |         highly_correlated_columns = table.columns[highly_correlated_features]
424 |         if VERBOSE: print(f'{msg} {itr}: Highly Correlated features dropped {highly_correlated_columns}, \
425 |         {len(highly_correlated_columns)}')
426 |         # Dropping the columns
427 |         table.drop(highly_correlated_columns, inplace=True, axis=1)
428 |         # Analysing the processed table's covariance matrix condition number
429 |         cov_table, eig, con = analyse_condition_number(
430 |             table, 
431 |             f'{msg} {itr}: Corr features dropped',
432 |             VERBOSE,
433 |         )
434 |         # Increasing the iteration number
435 |         itr += 1
436 |     if VERBOSE:
437 |         print(f'{msg}: The processed table has sample {table.shape[0]} and features {table.shape[1]}')
438 |         print(f'{msg}: Total time to process the table {np.round(time()-start, 3)} secs')
439 |     return table
440 | 
441 | 
442 | def get_highly_correlated_features(input_cov):
443 |     """Taking the covariance of the input covariance matrix
444 |     to find the highly correlated features that makes the 
445 |     input cov matrix ill-conditioned.
446 |     Args:
447 |         input_cov (2D np.array): DxD matrix
448 |     Returns:
449 |         features_to_drop (np.array): List of indices to drop
450 |     """
451 |     cov2 = covariance.empirical_covariance(input_cov)
452 |     # mask the diagonal 
453 |     np.fill_diagonal(cov2, 0)
454 |     # Get the threshold for top 10% 
455 |     cov_upper = upper_tri_indexing(np.abs(cov2))
456 |     sorted_cov_upper = [i for i in sorted(enumerate(cov_upper), key=lambda x:x[1], reverse=True)]
457 |     th = sorted_cov_upper[int(0.1*len(sorted_cov_upper))][1]
458 |     # Getting the feature correlation dictionary
459 |     high_indices = np.transpose(np.nonzero(np.abs(cov2) >= th))
460 |     high_indices_dict = {}
461 |     for i in high_indices: # the upper triangular part
462 |         if i[0] in high_indices_dict:
463 |             high_indices_dict[i[0]].append(i[1])
464 |         else:
465 |             high_indices_dict[i[0]] = [i[1]]
466 |     # sort the features based on the number of other correlated features.
467 |     top_correlated_features = [[f, len(v)] for (f, v) in high_indices_dict.items()]
468 |     top_correlated_features.sort(key=lambda x: x[1], reverse=True)
469 |     top_correlated_features = np.array(top_correlated_features)
470 |     features_to_drop = top_correlated_features[:, 0] 
471 |     return features_to_drop
472 | 
473 | 
474 | def upper_tri_indexing(A):
475 |     m = A.shape[0]
476 |     r,c = np.triu_indices(m,1)
477 |     return A[r,c]
478 | 
479 | 
480 | def analyse_condition_number(table, MESSAGE='', VERBOSE=True):
481 |     S = covariance.empirical_covariance(table, assume_centered=False)
482 |     eig, con = eig_val_condition_num(S)
483 |     if VERBOSE: print(f'{MESSAGE} covariance matrix: The condition number {con} and min eig {min(eig)} max eig {max(eig)}')
484 |     return S, eig, con
485 |      
486 | 
487 | def eig_val_condition_num(A):
488 |     """Calculates the eigenvalues and the condition
489 |     number of the input matrix A
490 | 
491 |     condition number = max(|eig|)/min(|eig|)
492 |     """
493 |     eig = [v.real for v in np.linalg.eigvals(A)]
494 |     condition_number = max(np.abs(eig)) / min(np.abs(eig))
495 |     return eig, condition_number
496 | 
497 | 
498 | def normalize_table(df, typeN):
499 |     if typeN == 'min_max':
500 |         return (df-df.min())/(df.max()-df.min())
501 |     elif typeN == 'mean':
502 |         return (df-df.mean())/df.std()
503 |     else:
504 |         print(f'No Norm applied : Type entered {typeN}')
505 |         return df


--------------------------------------------------------------------------------
/setup.sh:
--------------------------------------------------------------------------------
 1 | # Update conda environment.
 2 | conda update -n base conda;
 3 | conda update --all;
 4 | 
 5 | # Create conda environment.
 6 | conda create -n ngm python=3.8 -y;
 7 | conda activate ngm;
 8 | conda install -c conda-forge notebook -y;
 9 | python -m ipykernel install --user --name ngm;
10 | 
11 | # install pytorch (1.9.0 version)
12 | conda install numpy -y;
13 | # conda install pytorch==1.4.0 torchvision==0.5.0 cudatoolkit=10.1 -c pytorch -y;
14 | conda install pytorch torchvision cudatoolkit=10.2 -c pytorch -y;
15 | 
16 | # Install packages from conda-forge.
17 | conda install -c conda-forge matplotlib -y;
18 | 
19 | # Install packages from anaconda.
20 | # conda install -c anaconda pandas networkx scipy -y;
21 | # Alternate to anaconda channel
22 | conda install -c conda-forge pandas networkx scipy -y;
23 | 
24 | # Install pygraphviz (Optional)
25 | conda install --channel conda-forge graphviz pygraphviz -y;
26 | 
27 | # Install pip packages
28 | pip3 install -U scikit-learn;
29 | 
30 | # Install packages from pip. (Optional)
31 | pip install pyvis;
32 | pip install --upgrade scipy networkx;
33 | 
34 | # Create environment.yml.
35 | conda env export > environment.yml;
36 | 


--------------------------------------------------------------------------------