├── .gitignore
├── LICENSE
├── Models
    ├── cosmo_IllustrisTNG_LH_onlypos_0_lr_1.62e-07_weightdecay_1.00e-07_layers_2_rlink_1.50e-02_channels_64_epochs_300
    └── cosmo_SIMBA_LH_onlypos_0_lr_1.09e-06_weightdecay_1.00e-07_layers_4_rlink_1.48e-02_channels_64_epochs_300
├── PS_files
    ├── Pk_galaxies_IllustrisTNG_LH_10_kmax=20.0.npy
    ├── Pk_galaxies_IllustrisTNG_LH_14_kmax=20.0.npy
    ├── Pk_galaxies_IllustrisTNG_LH_18_kmax=20.0.npy
    ├── Pk_galaxies_IllustrisTNG_LH_24_kmax=20.0.npy
    ├── Pk_galaxies_IllustrisTNG_LH_33_kmax=20.0.npy
    ├── Pk_galaxies_SIMBA_LH_10_kmax=20.0.npy
    ├── Pk_galaxies_SIMBA_LH_14_kmax=20.0.npy
    ├── Pk_galaxies_SIMBA_LH_18_kmax=20.0.npy
    ├── Pk_galaxies_SIMBA_LH_24_kmax=20.0.npy
    ├── Pk_galaxies_SIMBA_LH_33_kmax=20.0.npy
    └── k_values.txt
├── README.md
├── Source
    ├── constants.py
    ├── load_data.py
    ├── metalayer.py
    ├── plotting.py
    └── training.py
├── crosstest.py
├── hyperparameters.py
├── hyperparams_optimization.py
├── main.py
├── ps_test.py
├── visualize_graph_10.png
└── visualize_graphs.py


/.gitignore:
--------------------------------------------------------------------------------
1 | .DS_Store
2 | batchfile.slurm
3 | crosstest_batchfile.slurm
4 | tunning_batchfile.slurm
5 | __pycache__/
6 | plotresults.py
7 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2022 Pablo Villanueva Domingo
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/Models/cosmo_IllustrisTNG_LH_onlypos_0_lr_1.62e-07_weightdecay_1.00e-07_layers_2_rlink_1.50e-02_channels_64_epochs_300:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/PabloVD/CosmoGraphNet/36041d7e3b23ae9e288cd7f178e6c2888b54042b/Models/cosmo_IllustrisTNG_LH_onlypos_0_lr_1.62e-07_weightdecay_1.00e-07_layers_2_rlink_1.50e-02_channels_64_epochs_300


--------------------------------------------------------------------------------
/Models/cosmo_SIMBA_LH_onlypos_0_lr_1.09e-06_weightdecay_1.00e-07_layers_4_rlink_1.48e-02_channels_64_epochs_300:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/PabloVD/CosmoGraphNet/36041d7e3b23ae9e288cd7f178e6c2888b54042b/Models/cosmo_SIMBA_LH_onlypos_0_lr_1.09e-06_weightdecay_1.00e-07_layers_4_rlink_1.48e-02_channels_64_epochs_300


--------------------------------------------------------------------------------
/PS_files/Pk_galaxies_IllustrisTNG_LH_10_kmax=20.0.npy:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/PabloVD/CosmoGraphNet/36041d7e3b23ae9e288cd7f178e6c2888b54042b/PS_files/Pk_galaxies_IllustrisTNG_LH_10_kmax=20.0.npy


--------------------------------------------------------------------------------
/PS_files/Pk_galaxies_IllustrisTNG_LH_14_kmax=20.0.npy:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/PabloVD/CosmoGraphNet/36041d7e3b23ae9e288cd7f178e6c2888b54042b/PS_files/Pk_galaxies_IllustrisTNG_LH_14_kmax=20.0.npy


--------------------------------------------------------------------------------
/PS_files/Pk_galaxies_IllustrisTNG_LH_18_kmax=20.0.npy:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/PabloVD/CosmoGraphNet/36041d7e3b23ae9e288cd7f178e6c2888b54042b/PS_files/Pk_galaxies_IllustrisTNG_LH_18_kmax=20.0.npy


--------------------------------------------------------------------------------
/PS_files/Pk_galaxies_IllustrisTNG_LH_24_kmax=20.0.npy:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/PabloVD/CosmoGraphNet/36041d7e3b23ae9e288cd7f178e6c2888b54042b/PS_files/Pk_galaxies_IllustrisTNG_LH_24_kmax=20.0.npy


--------------------------------------------------------------------------------
/PS_files/Pk_galaxies_IllustrisTNG_LH_33_kmax=20.0.npy:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/PabloVD/CosmoGraphNet/36041d7e3b23ae9e288cd7f178e6c2888b54042b/PS_files/Pk_galaxies_IllustrisTNG_LH_33_kmax=20.0.npy


--------------------------------------------------------------------------------
/PS_files/Pk_galaxies_SIMBA_LH_10_kmax=20.0.npy:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/PabloVD/CosmoGraphNet/36041d7e3b23ae9e288cd7f178e6c2888b54042b/PS_files/Pk_galaxies_SIMBA_LH_10_kmax=20.0.npy


--------------------------------------------------------------------------------
/PS_files/Pk_galaxies_SIMBA_LH_14_kmax=20.0.npy:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/PabloVD/CosmoGraphNet/36041d7e3b23ae9e288cd7f178e6c2888b54042b/PS_files/Pk_galaxies_SIMBA_LH_14_kmax=20.0.npy


--------------------------------------------------------------------------------
/PS_files/Pk_galaxies_SIMBA_LH_18_kmax=20.0.npy:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/PabloVD/CosmoGraphNet/36041d7e3b23ae9e288cd7f178e6c2888b54042b/PS_files/Pk_galaxies_SIMBA_LH_18_kmax=20.0.npy


--------------------------------------------------------------------------------
/PS_files/Pk_galaxies_SIMBA_LH_24_kmax=20.0.npy:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/PabloVD/CosmoGraphNet/36041d7e3b23ae9e288cd7f178e6c2888b54042b/PS_files/Pk_galaxies_SIMBA_LH_24_kmax=20.0.npy


--------------------------------------------------------------------------------
/PS_files/Pk_galaxies_SIMBA_LH_33_kmax=20.0.npy:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/PabloVD/CosmoGraphNet/36041d7e3b23ae9e288cd7f178e6c2888b54042b/PS_files/Pk_galaxies_SIMBA_LH_33_kmax=20.0.npy


--------------------------------------------------------------------------------
/PS_files/k_values.txt:
--------------------------------------------------------------------------------
 1 | 3.559856489878782115e-01
 2 | 6.031648866026231293e-01
 3 | 8.579160745827347778e-01
 4 | 1.113464310595710716e+00
 5 | 1.374039914321364408e+00
 6 | 1.619154704570298309e+00
 7 | 1.870686673430271840e+00
 8 | 2.123290040706380832e+00
 9 | 2.378144976376236119e+00
10 | 2.630489925835516996e+00
11 | 2.878653162679494670e+00
12 | 3.131144366275339319e+00
13 | 3.388159578333816313e+00
14 | 3.638741848210948149e+00
15 | 3.888315910732085268e+00
16 | 4.139052155837465996e+00
17 | 4.390305777018236988e+00
18 | 4.642172339120373081e+00
19 | 4.894678666149941293e+00
20 | 5.147269052361176378e+00
21 | 5.398003206656323272e+00
22 | 5.647798235368590269e+00
23 | 5.900534302989873581e+00
24 | 6.151460297009074374e+00
25 | 6.402630176204441526e+00
26 | 6.655964674748322096e+00
27 | 6.908782583105771202e+00
28 | 7.159060853508338873e+00
29 | 7.409414762591408632e+00
30 | 7.659227149364629561e+00
31 | 7.911559474606142395e+00
32 | 8.163750941790897997e+00
33 | 8.415757210439384295e+00
34 | 8.667638839289207198e+00
35 | 8.918801124864609520e+00
36 | 9.170480108345369530e+00
37 | 9.420566033467249412e+00
38 | 9.671642335271435797e+00
39 | 9.923426157758477473e+00
40 | 1.017396902749181820e+01
41 | 1.042533700753922155e+01
42 | 1.067702948358784276e+01
43 | 1.092969337256462303e+01
44 | 1.118211872033766419e+01
45 | 1.143346516925797296e+01
46 | 1.168350508557724687e+01
47 | 1.193379937775929100e+01
48 | 1.218553055762879112e+01
49 | 1.243711650801278878e+01
50 | 1.268867268647359303e+01
51 | 1.294075027177769144e+01
52 | 1.319267287020158363e+01
53 | 1.344470571782862756e+01
54 | 1.369480150283709641e+01
55 | 1.394591277066507828e+01
56 | 1.419764645764527167e+01
57 | 1.444766774168416212e+01
58 | 1.469906835825429070e+01
59 | 1.495059118111036511e+01
60 | 1.520261612590052991e+01
61 | 1.545415944503929495e+01
62 | 1.570456072545780124e+01
63 | 1.595705800128773966e+01
64 | 1.620829297830731619e+01
65 | 1.645966591028190962e+01
66 | 1.671125708846596325e+01
67 | 1.696160259490052624e+01
68 | 1.721370276108077135e+01
69 | 1.746543424102250341e+01
70 | 1.771598473288305442e+01
71 | 1.796653767411944003e+01
72 | 1.821765658501893270e+01
73 | 1.847053788120423334e+01
74 | 1.872257696809484173e+01
75 | 1.897394097105746269e+01
76 | 1.922542454726159988e+01
77 | 1.947607703355479103e+01
78 | 1.972678839839982601e+01
79 | 1.997802698857814363e+01
80 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # CosmoGraphNet
 2 | 
 3 | [![DOI](https://zenodo.org/badge/DOI/10.5281/zenodo.6485804.svg)](https://doi.org/10.5281/zenodo.6485804)
 4 | [![arXiv](https://img.shields.io/badge/arXiv-2204.13713-B31B1B.svg)](http://arxiv.org/abs/2204.13713)
 5 | 
 6 | Graph Neural Networks to predict the cosmological parameters and the galaxy power spectrum from galaxy catalogues.
 7 | 
 8 | A graph is created from a galaxy catalogue with information about the 3D position and intrinsic galactic properties. A Graph Neural Network is then applied to infer the cosmological parameters or the galaxy power spectrum. Galaxy catalogues extracted from the [CAMELS](https://camels.readthedocs.io/en/latest/index.html) hydrodynamic simulations, specially suited for Machine Learning purposes. Neural nets architectures are defined making use of the package [PyTorch-geometric](https://pytorch-geometric.readthedocs.io/en/latest/).
 9 | 
10 | See the paper [arXiv:2204.13713](https://arxiv.org/abs/2204.13713) for more details.
11 | 
12 | <img src="visualize_graph_10.png" width="500">
13 | 
14 | 
15 | ## Description of the codes
16 | 
17 | Here is a brief oveview of the codes included:
18 | 
19 | - `main.py`: main driver to train and test the network.
20 | 
21 | - `hyperparameters.py`: script with the definition of the hyperparameters employed by the networks.
22 | 
23 | - `crosstest.py`: tests a pre-trained model.
24 | 
25 | - `hyperparams_optimization.py`: optimize the hyperparameters using `optuna`.
26 | 
27 | - `ps_test.py`: tests the power spectrum neural networks in point distributions with different clustering properties.
28 | 
29 | - `visualize_graphs.py`: display graphs from galaxy catalogues in 2D or 3D.
30 | 
31 | 
32 | The folder `Source` contains scripts with auxiliary routines:
33 | 
34 | * `constants.py`: basic constants and initialization.
35 | 
36 | * `load_data.py`: contains routines to load data from simulation files.
37 | 
38 | * `plotting.py`: includes functions for displaying the results from the neural nets.
39 | 
40 | * `metalayer.py`: includes the definition of the Graph Neural Networks architecture.
41 | 
42 | * `training.py`: includes routines for training and testing the net.
43 | 
44 | 
45 | ## Requisites
46 | 
47 | The libraries required for training the models and compute some statistics are:
48 | * `numpy`
49 | * `pytorch`
50 | * `pytorch-geometric`
51 | * `matplotlib`
52 | * `scipy`
53 | * `sklearn`
54 | * [`optuna`](https://optuna.readthedocs.io/en/stable/index.html) (only for optimization in `hyperparams_optimization.py`)
55 | * [`Pylians`](https://pylians3.readthedocs.io/en/master/) (only for computing power spectra in `ps_test.py`)
56 | 
57 | 
58 | ## Usage
59 | 
60 | The codes implemented here are designed to train Graph Neural Network for two tasks. The desired task is chosen in `hyperparameters.py` with the `outmode` flag:
61 | 1. Infer cosmological parameters from galaxy catalogues. Set `outmode = "cosmo"`.
62 | 2. Predict the power spectrum from galaxy catalogues. Set `outmode = "ps"`.
63 | 
64 | These are some advices to employ the scripts described above:
65 | 1. To perform a search of the optimal hyperparameters, run `hyperparams_optimization.py`.
66 | 2. To train a model with a given set of parameters defined in `hyperparameters.py`, run `main.py`. The hyperparameters currently present in `hyperparameters.py` correspond to the best optimal values for each suite when all galactic features are employed (see the paper). Modify it accordingly to the task.
67 | 3. Once a model is trained to perform cosmological parameter inference, run `crosstest.py` to test in the training simulation suite and cross test it in the other one included in CAMELS (IllustrisTNG and SIMBA). It needs a pretrained model.
68 | 4. If a model has been trained to predict the power spectrum from CAMELS galaxy catalogues, evaluate its extrapolation performance on different point distributions running `ps_test.py`. It needs a pretrained model.
69 | 
70 | 
71 | ## Citation
72 | 
73 | If you use the code, please link this repository, and cite [arXiv:2204.13713](https://arxiv.org/abs/2204.13713) and the DOI [10.5281/zenodo.6485804](https://doi.org/10.5281/zenodo.6485804).
74 | 
75 | 
76 | ## Contact
77 | 
78 | Feel free to contact me at <pablo.villanueva.domingo@gmail.com> for comments, questions and suggestions.
79 | 


--------------------------------------------------------------------------------
/Source/constants.py:
--------------------------------------------------------------------------------
 1 | #----------------------------------------------------------------------
 2 | # List of constants and some common functions
 3 | # Author: Pablo Villanueva Domingo
 4 | # Last update: 4/22
 5 | #----------------------------------------------------------------------
 6 | 
 7 | import numpy as np
 8 | import torch
 9 | import os
10 | import random
11 | 
12 | # Random seeds
13 | torch.manual_seed(12345)
14 | np.random.seed(12345)
15 | random.seed(12345)
16 | 
17 | # use GPUs if available
18 | if torch.cuda.is_available():
19 |     print("CUDA Available")
20 |     device = torch.device('cuda')
21 | else:
22 |     print('CUDA Not Available')
23 |     device = torch.device('cpu')
24 | 
25 | #--- PARAMETERS AND CONSTANTS ---#
26 | 
27 | # Reduced Hubble constant
28 | hred = 0.7
29 | 
30 | # Root path for simulations
31 | simpathroot = "/projects/QUIJOTE/CAMELS/Sims/"
32 | 
33 | # Box size in comoving kpc/h
34 | boxsize = 25.e3
35 | 
36 | # Validation and test size
37 | valid_size, test_size = 0.15, 0.15
38 | 
39 | # Batch size
40 | batch_size = 25
41 | 
42 | # Number of k bins in the power spectrum
43 | ps_size = 79
44 | 
45 | #--- FUNCTIONS ---#
46 | 
47 | # Choose color depending on the CAMELS simulation suite
48 | def colorsuite(suite):
49 |     if suite=="IllustrisTNG":   return "purple"
50 |     elif suite=="SIMBA":            return "dodgerblue"
51 | 


--------------------------------------------------------------------------------
/Source/load_data.py:
--------------------------------------------------------------------------------
  1 | #----------------------------------------------------
  2 | # Routine for loading the CAMELS galaxy catalogues
  3 | # Author: Pablo Villanueva Domingo
  4 | # Last update: 4/22
  5 | #----------------------------------------------------
  6 | 
  7 | import h5py
  8 | from torch_geometric.data import Data, DataLoader
  9 | from Source.constants import *
 10 | from Source.plotting import *
 11 | import scipy.spatial as SS
 12 | 
 13 | Nstar_th = 20   # Minimum number of stellar particles required to consider a galaxy
 14 | 
 15 | # Normalize CAMELS parameters
 16 | def normalize_params(params):
 17 | 
 18 |     minimum = np.array([0.1, 0.6, 0.25, 0.25, 0.5, 0.5])
 19 |     maximum = np.array([0.5, 1.0, 4.00, 4.00, 2.0, 2.0])
 20 |     params = (params - minimum)/(maximum - minimum)
 21 |     return params
 22 | 
 23 | # Normalize power spectrum
 24 | def normalize_ps(ps):
 25 |     mean, std = ps.mean(axis=0), ps.std(axis=0)
 26 |     normps = (ps - mean)/std
 27 |     return normps
 28 | 
 29 | # Compute KDTree and get edges and edge features
 30 | def get_edges(pos, r_link, use_loops):
 31 | 
 32 |     # 1. Get edges
 33 | 
 34 |     # Create the KDTree and look for pairs within a distance r_link
 35 |     # Boxsize normalize to 1
 36 |     kd_tree = SS.KDTree(pos, leafsize=16, boxsize=1.0001)
 37 |     edge_index = kd_tree.query_pairs(r=r_link, output_type="ndarray")
 38 | 
 39 |     # Add reverse pairs
 40 |     reversepairs = np.zeros((edge_index.shape[0],2))
 41 |     for i, pair in enumerate(edge_index):
 42 |         reversepairs[i] = np.array([pair[1], pair[0]])
 43 |     edge_index = np.append(edge_index, reversepairs, 0)
 44 | 
 45 |     edge_index = edge_index.astype(int)
 46 | 
 47 |     # Write in pytorch-geometric format
 48 |     edge_index = edge_index.T
 49 |     num_pairs = edge_index.shape[1]
 50 | 
 51 |     # 2. Get edge attributes
 52 | 
 53 |     row, col = edge_index
 54 |     diff = pos[row]-pos[col]
 55 | 
 56 |     # Take into account periodic boundary conditions, correcting the distances
 57 |     for i, pos_i in enumerate(diff):
 58 |         for j, coord in enumerate(pos_i):
 59 |             if coord > r_link:
 60 |                 diff[i,j] -= 1.  # Boxsize normalize to 1
 61 |             elif -coord > r_link:
 62 |                 diff[i,j] += 1.  # Boxsize normalize to 1
 63 | 
 64 |     # Get translational and rotational invariant features
 65 |     
 66 |     # Distance
 67 |     dist = np.linalg.norm(diff, axis=1)
 68 |     
 69 |     # Centroid of galaxy catalogue
 70 |     centroid = np.mean(pos,axis=0)
 71 | 
 72 |     #Vectors of node and neighbor
 73 |     row = (pos[row] - centroid)
 74 |     col = (pos[col] - centroid)
 75 | 
 76 |    # Take into account periodic boundary conditions: row and col
 77 |     for i, pos_i in enumerate(row):
 78 |         for j, coord in enumerate(pos_i):
 79 |             if coord > 0.5:
 80 |                 row[i,j] -= 1.  # Boxsize normalize to 1
 81 |                 
 82 |             elif -coord > 0.5:
 83 |                 row[i,j] += 1.  # Boxsize normalize to 1                                                
 84 | 
 85 |     for i, pos_i in enumerate(col):
 86 |         for j, coord in enumerate(pos_i):
 87 |             if coord > 0.5:
 88 |                 col[i,j] -= 1.  # Boxsize normalize to 1
 89 |                 
 90 |             elif -coord > 0.5:
 91 |                 col[i,j] += 1.  # Boxsize normalize to 1
 92 |                 
 93 |     # Normalizing
 94 |     unitrow = row/np.linalg.norm(row, axis = 1).reshape(-1, 1)
 95 |     unitcol = col/np.linalg.norm(col, axis = 1).reshape(-1, 1)
 96 |     unitdiff = diff/dist.reshape(-1,1)
 97 |     
 98 |     # Dot products between unit vectors
 99 |     cos1 = np.array([np.dot(unitrow[i,:].T,unitcol[i,:]) for i in range(num_pairs)])
100 |     cos2 = np.array([np.dot(unitrow[i,:].T,unitdiff[i,:]) for i in range(num_pairs)])
101 | 
102 |     # Normalize distance by linking radius
103 |     dist /= r_link
104 | 
105 |     # Concatenate to get all edge attributes
106 |     edge_attr = np.concatenate([dist.reshape(-1,1), cos1.reshape(-1,1), cos2.reshape(-1,1)], axis=1)
107 | 
108 |     # Add loops
109 |     if use_loops:
110 |         loops = np.zeros((2,pos.shape[0]),dtype=int)
111 |         atrloops = np.zeros((pos.shape[0],3))
112 |         for i, posit in enumerate(pos):
113 |             loops[0,i], loops[1,i] = i, i
114 |             atrloops[i,0], atrloops[i,1], atrloops[i,2] = 0., 1., 0.
115 |         edge_index = np.append(edge_index, loops, 1)
116 |         edge_attr = np.append(edge_attr, atrloops, 0)
117 |     edge_index = edge_index.astype(int)
118 | 
119 |     return edge_index, edge_attr
120 | 
121 | # Routine to create a cosmic graph from a galaxy catalogue
122 | # simnumber: number of simulation
123 | # param_file: file with the value of the cosmological + astrophysical parameters
124 | # hparams: hyperparameters class
125 | def sim_graph(simnumber, param_file, hparams):
126 | 
127 |     # Get some hyperparameters
128 |     simsuite,simset,r_link,only_positions,outmode,pred_params = hparams.simsuite,hparams.simset,hparams.r_link,hparams.only_positions,hparams.outmode,hparams.pred_params
129 | 
130 |     # Name of the galaxy catalogue
131 |     simpath = simpathroot + simsuite + "/"+simset+"_"
132 |     catalogue = simpath + str(simnumber)+"/fof_subhalo_tab_0"+hparams.snap+".hdf5"
133 | 
134 |     # Read the catalogue
135 |     f     = h5py.File(catalogue, 'r')
136 |     pos   = f['/Subhalo/SubhaloPos'][:]/boxsize
137 |     Mstar = f['/Subhalo/SubhaloMassType'][:,4] #Msun/h
138 |     Rstar = f["Subhalo/SubhaloHalfmassRadType"][:,4]
139 |     Metal = f["Subhalo/SubhaloStarMetallicity"][:]
140 |     Vmax = f["Subhalo/SubhaloVmax"][:]
141 |     Nstar = f['/Subhalo/SubhaloLenType'][:,4]       #number of stars
142 |     f.close()
143 | 
144 |     # Some simulations are slightly outside the box, correct it
145 |     pos[np.where(pos<0.0)]+=1.0
146 |     pos[np.where(pos>1.0)]-=1.0
147 | 
148 |     # Select only galaxies with more than Nstar_th star particles
149 |     indexes = np.where(Nstar>Nstar_th)[0]
150 |     pos     = pos[indexes]
151 |     Mstar   = Mstar[indexes]
152 |     Rstar   = Rstar[indexes]
153 |     Metal   = Metal[indexes]
154 |     Vmax   = Vmax[indexes]
155 | 
156 |     # Get the output to be predicted by the GNN, either the cosmo parameters or the power spectrum
157 |     if outmode=="cosmo":
158 |         # Read the value of the cosmological & astrophysical parameters
159 |         paramsfile = np.loadtxt(param_file, dtype=str)
160 |         params = np.array(paramsfile[simnumber,1:-1],dtype=np.float32)
161 |         params = normalize_params(params)
162 |         params = params[:pred_params]   # Consider only the first parameters, up to pred_params
163 |         y = np.reshape(params, (1,params.shape[0]))
164 | 
165 |     # Read the power spectra
166 |     elif outmode=="ps":
167 | 
168 |         ps = np.load(param_file)
169 |         ps = ps[simnumber]
170 |         ps = np.log10(ps)
171 |         #ps = normalize_ps(ps)
172 |         y = np.reshape(ps, (1,ps_size))
173 | 
174 |     # Number of galaxies as global feature
175 |     u = np.log10(pos.shape[0]).reshape(1,1)
176 | 
177 |     Mstar = np.log10(1.+ Mstar)
178 |     Rstar = np.log10(1.+ Rstar)
179 |     Metal = np.log10(1.+ Metal)
180 |     Vmax = np.log10(1. + Vmax)
181 | 
182 |     # Node features
183 |     tab = np.column_stack((Mstar, Rstar, Metal, Vmax))
184 |     #tab = Vmax.reshape(-1,1)       # For using only Vmax
185 |     x = torch.tensor(tab, dtype=torch.float32)
186 | 
187 |     # Use loops if node features are considered only
188 |     if only_positions:
189 |         tab = np.zeros_like(pos[:,:1])   # Node features not really used
190 |         use_loops = False
191 |     else:
192 |         use_loops = True
193 | 
194 |     # Get edges and edge features
195 |     edge_index, edge_attr = get_edges(pos, r_link, use_loops)
196 | 
197 |     # Construct the graph
198 |     graph = Data(x=x,
199 |                  y=torch.tensor(y, dtype=torch.float32),
200 |                  u=torch.tensor(u, dtype=torch.float32),
201 |                  edge_index=torch.tensor(edge_index, dtype=torch.long),
202 |                  edge_attr=torch.tensor(edge_attr, dtype=torch.float32))
203 | 
204 |     return graph
205 | 
206 | 
207 | # Split training and validation sets
208 | def split_datasets(dataset):
209 | 
210 |     random.shuffle(dataset)
211 | 
212 |     num_train = len(dataset)
213 |     split_valid = int(np.floor(valid_size * num_train))
214 |     split_test = split_valid + int(np.floor(test_size * num_train))
215 | 
216 |     train_dataset = dataset[split_test:]
217 |     valid_dataset = dataset[:split_valid]
218 |     test_dataset = dataset[split_valid:split_test]
219 | 
220 |     train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
221 |     valid_loader = DataLoader(valid_dataset, batch_size=batch_size, shuffle=True)
222 |     test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=True)
223 | 
224 |     return train_loader, valid_loader, test_loader
225 | 
226 | ######################################################################################
227 | 
228 | # Main routine to load data and create the dataset
229 | def create_dataset(hparams):
230 | 
231 |     # Target file depending on the task: inferring cosmo parameters or predicting power spectrum
232 |     if hparams.outmode == "cosmo":
233 |         param_file = "/projects/QUIJOTE/CAMELS/Sims/CosmoAstroSeed_params_"+hparams.simsuite+".txt"
234 |     elif hparams.outmode == "ps":
235 |         param_file = "PS_files/Pk_galaxies_"+hparams.simsuite+"_LH_"+hparams.snap+"_kmax=20.0.npy"
236 | 
237 |     dataset = []
238 | 
239 |     for simnumber in range(hparams.n_sims):
240 |         dataset.append(sim_graph(simnumber,param_file,hparams))
241 | 
242 |     # Add the other suite for predicting the power spectrum
243 |     if hparams.outmode == "ps":
244 |         hparams.simsuite = hparams.flip_suite()
245 |         param_file = "PS_files/Pk_galaxies_"+hparams.simsuite+"_LH_"+hparams.snap+"_kmax=20.0.npy"
246 | 
247 |         for simnumber in range(hparams.n_sims):
248 |             dataset.append(sim_graph(simnumber,param_file,hparams))
249 | 
250 |         # Add other snapshots from other redshifts
251 |         # Snapshot redshift
252 |         # 004: z=3, 010: z=2, 014: z=1.5, 018: z=1, 024: z=0.5, 033: z=0
253 |         #for snap in [24,18,14,10]:
254 |         for snap in [18,10]:
255 | 
256 |             hparams.snap = str(snap)
257 | 
258 |             param_file = "PS_files/Pk_galaxies_"+hparams.simsuite+"_LH_"+hparams.snap+"_kmax=20.0.npy"
259 | 
260 |             for simnumber in range(hparams.n_sims):
261 |                 dataset.append(sim_graph(simnumber,param_file,hparams))
262 | 
263 |             hparams.simsuite = hparams.flip_suite()
264 |             param_file = "PS_files/Pk_galaxies_"+hparams.simsuite+"_LH_"+hparams.snap+"_kmax=20.0.npy"
265 | 
266 |             for simnumber in range(hparams.n_sims):
267 |                 dataset.append(sim_graph(simnumber,param_file,hparams))
268 | 
269 |     gals = np.array([graph.x.shape[0] for graph in dataset])
270 |     print("Total of galaxies", gals.sum(0), "Mean of", gals.mean(0),"per simulation, Std of", gals.std(0))
271 | 
272 |     return dataset
273 | 


--------------------------------------------------------------------------------
/Source/metalayer.py:
--------------------------------------------------------------------------------
  1 | #----------------------------------------------------
  2 | # Graph Neural Network architecture
  3 | # Author: Pablo Villanueva Domingo
  4 | # Last update: 4/22
  5 | #----------------------------------------------------
  6 | 
  7 | import torch
  8 | import torch.nn.functional as F
  9 | from torch_cluster import knn_graph, radius_graph
 10 | from torch.nn import Sequential, Linear, ReLU, ModuleList
 11 | from torch_geometric.nn import MessagePassing, MetaLayer, LayerNorm
 12 | from torch_scatter import scatter_mean, scatter_sum, scatter_max, scatter_min, scatter_add
 13 | from torch_geometric.nn import global_mean_pool, global_max_pool, global_add_pool
 14 | from Source.constants import *
 15 | 
 16 | # Model for updating edge attritbutes
 17 | class EdgeModel(torch.nn.Module):
 18 |     def __init__(self, node_in, node_out, edge_in, edge_out, hid_channels, residuals=True, norm=False):
 19 |         super().__init__()
 20 | 
 21 |         self.residuals = residuals
 22 |         self.norm = norm
 23 | 
 24 |         layers = [Linear(node_in*2 + edge_in, hid_channels),
 25 |                   ReLU(),
 26 |                   Linear(hid_channels, edge_out)]
 27 |         if self.norm:  layers.append(LayerNorm(edge_out))
 28 | 
 29 |         self.edge_mlp = Sequential(*layers)
 30 | 
 31 | 
 32 |     def forward(self, src, dest, edge_attr, u, batch):
 33 |         # src, dest: [E, F_x], where E is the number of edges.
 34 |         # edge_attr: [E, F_e]
 35 |         # u: [B, F_u], where B is the number of graphs.
 36 |         # batch: [E] with max entry B - 1.
 37 | 
 38 |         out = torch.cat([src, dest, edge_attr], dim=1)
 39 |         #out = torch.cat([src, dest, edge_attr, u[batch]], 1)
 40 |         out = self.edge_mlp(out)
 41 |         if self.residuals:
 42 |             out = out + edge_attr
 43 |         return out
 44 | 
 45 | # Model for updating node attritbutes
 46 | class NodeModel(torch.nn.Module):
 47 |     def __init__(self, node_in, node_out, edge_in, edge_out, hid_channels, residuals=True, norm=False):
 48 |         super().__init__()
 49 | 
 50 |         self.residuals = residuals
 51 |         self.norm = norm
 52 | 
 53 |         layers = [Linear(node_in + 3*edge_out + 1, hid_channels),
 54 |                   ReLU(),
 55 |                   Linear(hid_channels, node_out)]
 56 |         if self.norm:  layers.append(LayerNorm(node_out))
 57 | 
 58 |         self.node_mlp = Sequential(*layers)
 59 | 
 60 |     def forward(self, x, edge_index, edge_attr, u, batch):
 61 |         # x: [N, F_x], where N is the number of nodes.
 62 |         # edge_index: [2, E] with max entry N - 1.
 63 |         # edge_attr: [E, F_e]
 64 |         # u: [B, F_u]
 65 |         # batch: [N] with max entry B - 1.
 66 | 
 67 |         row, col = edge_index
 68 |         out = edge_attr
 69 | 
 70 |         # Multipooling layer
 71 |         out1 = scatter_add(out, col, dim=0, dim_size=x.size(0))
 72 |         out2 = scatter_max(out, col, dim=0, dim_size=x.size(0))[0]
 73 |         out3 = scatter_mean(out, col, dim=0, dim_size=x.size(0))
 74 |         out = torch.cat([x, out1, out2, out3, u[batch]], dim=1)
 75 | 
 76 |         out = self.node_mlp(out)
 77 |         if self.residuals:
 78 |             out = out + x
 79 |         return out
 80 | 
 81 | # First edge model for updating edge attritbutes when no initial node features are provided
 82 | class EdgeModelIn(torch.nn.Module):
 83 |     def __init__(self, node_in, node_out, edge_in, edge_out, hid_channels, norm=False):
 84 |         super().__init__()
 85 | 
 86 |         self.norm = norm
 87 | 
 88 |         layers = [Linear(edge_in, hid_channels),
 89 |                   ReLU(),
 90 |                   Linear(hid_channels, edge_out)]
 91 |         if self.norm:  layers.append(LayerNorm(edge_out))
 92 | 
 93 |         self.edge_mlp = Sequential(*layers)
 94 | 
 95 | 
 96 |     def forward(self, src, dest, edge_attr, u, batch):
 97 | 
 98 |         out = self.edge_mlp(edge_attr)
 99 | 
100 |         return out
101 | 
102 | # First node model for updating node attritbutes when no initial node features are provided
103 | class NodeModelIn(torch.nn.Module):
104 |     def __init__(self, node_in, node_out, edge_in, edge_out, hid_channels, norm=False):
105 |         super().__init__()
106 | 
107 |         self.norm = norm
108 | 
109 |         layers = [Linear(3*edge_out + 1, hid_channels),
110 |                   ReLU(),
111 |                   Linear(hid_channels, node_out)]
112 |         if self.norm:  layers.append(LayerNorm(node_out))
113 | 
114 |         self.node_mlp = Sequential(*layers)
115 | 
116 |     def forward(self, x, edge_index, edge_attr, u, batch):
117 | 
118 |         row, col = edge_index
119 |         out = edge_attr
120 | 
121 |         # Multipooling layer
122 |         out1 = scatter_add(out, col, dim=0, dim_size=x.size(0))
123 |         out2 = scatter_max(out, col, dim=0, dim_size=x.size(0))[0]
124 |         out3 = scatter_mean(out, col, dim=0, dim_size=x.size(0))
125 |         out = torch.cat([out1, out2, out3, u[batch]], dim=1)
126 | 
127 |         out = self.node_mlp(out)
128 | 
129 |         return out
130 | 
131 | # Graph Neural Network architecture, based on the Graph Network (arXiv:1806.01261)
132 | # Employing the MetaLayer implementation in Pytorch-Geometric
133 | class GNN(torch.nn.Module):
134 |     def __init__(self, node_features, n_layers, hidden_channels, linkradius, dim_out, only_positions, residuals=True):
135 |         super().__init__()
136 | 
137 |         self.n_layers = n_layers
138 |         self.link_r = linkradius
139 |         self.dim_out = dim_out
140 |         self.only_positions = only_positions
141 | 
142 |         # Number of input node features (0 if only_positions is used)
143 |         node_in = node_features
144 |         # Input edge features: |p_i-p_j|, p_i*p_j, p_i*(p_i-p_j)
145 |         edge_in = 3
146 |         node_out = hidden_channels
147 |         edge_out = hidden_channels
148 |         hid_channels = hidden_channels
149 | 
150 |         layers = []
151 | 
152 |         # Encoder graph block
153 |         # If use only positions, node features are created from the aggregation of edge attritbutes of neighbors
154 |         if self.only_positions:
155 |             inlayer = MetaLayer(node_model=NodeModelIn(node_in, node_out, edge_in, edge_out, hid_channels),
156 |                                 edge_model=EdgeModelIn(node_in, node_out, edge_in, edge_out, hid_channels))
157 | 
158 |         else:
159 |             inlayer = MetaLayer(node_model=NodeModel(node_in, node_out, edge_in, edge_out, hid_channels, residuals=False),
160 |                                 edge_model=EdgeModel(node_in, node_out, edge_in, edge_out, hid_channels, residuals=False))
161 | 
162 |         layers.append(inlayer)
163 | 
164 |         # Change input node and edge feature sizes
165 |         node_in = node_out
166 |         edge_in = edge_out
167 | 
168 |         # Hidden graph blocks
169 |         for i in range(n_layers-1):
170 | 
171 |             lay = MetaLayer(node_model=NodeModel(node_in, node_out, edge_in, edge_out, hid_channels, residuals=residuals),
172 |                             edge_model=EdgeModel(node_in, node_out, edge_in, edge_out, hid_channels, residuals=residuals))
173 |             layers.append(lay)
174 | 
175 |         self.layers = ModuleList(layers)
176 | 
177 |         # Final aggregation layer
178 |         self.outlayer = Sequential(Linear(3*node_out+1, hid_channels),
179 |                               ReLU(),
180 |                               Linear(hid_channels, hid_channels),
181 |                               ReLU(),
182 |                               Linear(hid_channels, hid_channels),
183 |                               ReLU(),
184 |                               Linear(hid_channels, self.dim_out))
185 | 
186 |     def forward(self, data):
187 | 
188 |         h, edge_index, edge_attr, u = data.x, data.edge_index, data.edge_attr, data.u
189 | 
190 |         # Message passing layers
191 |         for layer in self.layers:
192 |             h, edge_attr, _ = layer(h, edge_index, edge_attr, u, data.batch)
193 | 
194 |         # Multipooling layer
195 |         addpool = global_add_pool(h, data.batch)
196 |         meanpool = global_mean_pool(h, data.batch)
197 |         maxpool = global_max_pool(h, data.batch)
198 | 
199 |         out = torch.cat([addpool,meanpool,maxpool,u], dim=1)
200 | 
201 |         # Final linear layer
202 |         out = self.outlayer(out)
203 | 
204 |         return out
205 | 


--------------------------------------------------------------------------------
/Source/plotting.py:
--------------------------------------------------------------------------------
  1 | #----------------------------------------------------------------------
  2 | # Script for plotting some statistics
  3 | # Author: Pablo Villanueva Domingo
  4 | # Last update: 4/22
  5 | #----------------------------------------------------------------------
  6 | 
  7 | import matplotlib.pyplot as plt
  8 | from Source.constants import *
  9 | from sklearn.metrics import r2_score
 10 | from matplotlib.offsetbox import AnchoredText
 11 | from matplotlib.ticker import MultipleLocator
 12 | from matplotlib.lines import Line2D
 13 | import matplotlib as mpl
 14 | mpl.rcParams.update({'font.size': 12})
 15 | 
 16 | # Plot loss trends
 17 | def plot_losses(train_losses, valid_losses, test_loss, err_min, hparams):
 18 | 
 19 |     epochs = hparams.n_epochs
 20 |     plt.plot(range(epochs), np.exp(train_losses), "r-",label="Training")
 21 |     plt.plot(range(epochs), np.exp(valid_losses), "b:",label="Validation")
 22 |     plt.legend()
 23 |     plt.yscale("log")
 24 |     plt.title(f"Test loss: {test_loss:.2e}, Minimum relative error: {err_min:.2e}")
 25 |     plt.savefig("Plots/loss_"+hparams.name_model()+".png", bbox_inches='tight', dpi=300)
 26 |     plt.close()
 27 | 
 28 | # Remove normalization of cosmo parameters
 29 | def denormalize(trues, outputs, errors, minpar, maxpar):
 30 | 
 31 |     trues = minpar + trues*(maxpar - minpar)
 32 |     outputs = minpar + outputs*(maxpar - minpar)
 33 |     errors = errors*(maxpar - minpar)
 34 |     return trues, outputs, errors
 35 | 
 36 | # Scatter plot of true vs predicted cosmological parameter
 37 | def plot_out_true_scatter(hparams, cosmoparam, testsuite = False):
 38 | 
 39 |     figscat, axscat = plt.subplots(figsize=(6,5))
 40 |     suite, simset = hparams.simsuite, hparams.simset
 41 |     col = colorsuite(suite)
 42 | 
 43 |     # Load true values and predicted means and standard deviations
 44 |     outputs = np.load("Outputs/outputs_"+hparams.name_model()+".npy")
 45 |     trues = np.load("Outputs/trues_"+hparams.name_model()+".npy")
 46 |     errors = np.load("Outputs/errors_"+hparams.name_model()+".npy")
 47 | 
 48 |     # There is a (0,0) initial point, fix it
 49 |     outputs = outputs[1:]
 50 |     trues = trues[1:]
 51 |     errors = errors[1:]
 52 | 
 53 |     # Choose cosmo param and denormalize
 54 |     if cosmoparam=="Om":
 55 |         minpar, maxpar = 0.1, 0.5
 56 |         outputs, trues, errors = outputs[:,0], trues[:,0], errors[:,0]
 57 |     elif cosmoparam=="Sig":
 58 |         minpar, maxpar = 0.6, 1.0
 59 |         outputs, trues, errors = outputs[:,1], trues[:,1], errors[:,1]
 60 |     trues, outputs, errors = denormalize(trues, outputs, errors, minpar, maxpar)
 61 | 
 62 |     # Compute the number of points lying within 1 or 2 sigma regions from their uncertainties
 63 |     cond_success_1sig, cond_success_2sig = np.abs(outputs-trues)<=np.abs(errors), np.abs(outputs-trues)<=2.*np.abs(errors)
 64 |     tot_points = outputs.shape[0]
 65 |     successes1sig, successes2sig = outputs[cond_success_1sig].shape[0], outputs[cond_success_2sig].shape[0]
 66 | 
 67 |     # Compute the linear correlation coefficient
 68 |     r2 = r2_score(trues,outputs)
 69 |     err_rel = np.mean(np.abs((trues - outputs)/(trues)), axis=0)
 70 |     chi2s = (outputs-trues)**2./errors**2.
 71 |     chi2 = chi2s[chi2s<1.e4].mean()    # Remove some outliers which make explode the chi2
 72 |     print("R^2={:.2f}, Relative error={:.2e}, Chi2={:.2f}".format(r2, err_rel, chi2))
 73 |     print("A fraction of succeses of", successes1sig/tot_points, "at 1 sigma,", successes2sig/tot_points, "at 2 sigmas")
 74 | 
 75 |     # Sort by true value
 76 |     indsort = trues.argsort()
 77 |     outputs, trues, errors = outputs[indsort], trues[indsort], errors[indsort]
 78 | 
 79 |     # Compute mean and std region within several bins
 80 |     truebins, binsize = np.linspace(trues[0], trues[-1], num=10, retstep=True)
 81 |     means, stds = [], []
 82 |     for i, bin in enumerate(truebins[:-1]):
 83 |         cond = (trues>=bin) & (trues<bin+binsize)
 84 |         outbin = outputs[cond]
 85 |         if len(outbin)==0:
 86 |             outmean, outstd = np.nan, np.nan    # Avoid error message from some bins without points
 87 |         else:
 88 |             outmean, outstd = outbin.mean(), outbin.std()
 89 |         means.append(outmean); stds.append(outstd)
 90 |     means, stds = np.array(means), np.array(stds)
 91 |     print("Std in bins:",stds[~np.isnan(stds)].mean(),"Mean predicted uncertainty:", np.abs(errors.mean()))
 92 | 
 93 |     # Plot predictions vs true values
 94 |     #truemin, truemax = trues.min(), trues.max()
 95 |     truemin, truemax = minpar-0.05, maxpar+0.05
 96 |     #axscat.plot([truemin, truemax], [0., 0.], "r-")
 97 |     #axscat.errorbar(trues, outputs-trues, yerr=errors, color=col, marker="o", ls="none", markersize=0.5, elinewidth=0.5, zorder=10)
 98 |     axscat.plot([truemin, truemax], [truemin, truemax], "r-")
 99 |     axscat.errorbar(trues, outputs, yerr=errors, color=col, marker="o", ls="none", markersize=0.5, elinewidth=0.5, zorder=10)
100 | 
101 |     # Legend
102 |     if cosmoparam=="Om":
103 |         par = "\t"+r"$\Omega_m$"
104 |     elif cosmoparam=="Sig":
105 |         par = "\t"+r"$\sigma_8$"
106 |     leg = par+"\n"+"$R^2$={:.2f}".format(r2)+"\n"+"$\epsilon$={:.1f} %".format(100.*err_rel)+"\n"+"$\chi^2$={:.2f}".format(chi2)
107 |     #leg = par+"\n"+"$R^2$={:.2f}".format(r2)+"\n"+"$\epsilon$={:.2e}".format(err_rel)
108 |     at = AnchoredText(leg, frameon=True, loc="upper left")
109 |     at.patch.set_boxstyle("round,pad=0.,rounding_size=0.2")
110 |     axscat.add_artist(at)
111 | 
112 |     # Labels etc
113 |     axscat.set_xlim([truemin, truemax])
114 |     axscat.set_ylim([truemin, truemax])
115 |     if testsuite:
116 |         axscat.set_ylim([truemin, truemax+0.1])
117 |     axscat.set_ylabel(r"Prediction")
118 |     axscat.set_xlabel(r"Truth")
119 |     axscat.grid()
120 | 
121 |     # Title, indicating which are the training and testing suites
122 |     if hparams.only_positions:
123 |         usefeatures = "Only positions"
124 |     else:
125 |         usefeatures = r"Positions + $V_{\rm max}, M_*, R_*, Z_*$"
126 |     props = dict(boxstyle='round', facecolor='white')#, alpha=0.5)
127 |     axscat.text((minpar + maxpar)/2-0.02, minpar, usefeatures, color="k", bbox=props )
128 | 
129 | 
130 |     if testsuite:
131 |         titlefig = "Training in "+hparams.flip_suite()+", testing in "+suite
132 |         #titlefig = "Cross test in "+suite+usefeatures
133 |         namefig = "out_true_testsuite_"+cosmoparam+"_"+hparams.name_model()
134 |     else:
135 |         titlefig = "Training in "+suite+", testing in "+suite
136 |         #titlefig = "Train in "+suite+usefeatures
137 |         namefig = "out_true_"+cosmoparam+"_"+hparams.name_model()
138 |     axscat.set_title(titlefig)
139 | 
140 |     figscat.savefig("Plots/"+namefig+".png", bbox_inches='tight', dpi=300)
141 |     plt.close(figscat)
142 | 
143 | # Plot predicted and target power spectra
144 | def plot_ps(hparams):
145 | 
146 |     figscat = plt.figure(figsize=(6,6))
147 |     gs = gridspec.GridSpec(2,1,height_ratios=[6,2])
148 |     gs.update(hspace=0.0)#,wspace=0.4,bottom=0.6,top=1.05)
149 |     axscat=plt.subplot(gs[0])
150 |     axerr=plt.subplot(gs[1])
151 | 
152 |     suite, simset = hparams.simsuite, hparams.simset
153 |     col = colorsuite(suite)
154 | 
155 |     # Load true values and predicted means and standard deviations
156 |     outputs = np.load("Outputs/outputsPS_"+hparams.name_model()+".npy")
157 |     trues = np.load("Outputs/truesPS_"+hparams.name_model()+".npy")
158 | 
159 |     # There is a (0,0) initial point, fix it
160 |     outputs = outputs[1:]
161 |     trues = trues[1:]
162 |     trues, outputs = 10.**trues, 10.**outputs
163 | 
164 |     # Compute the linear correlation coefficient
165 |     r2 = r2_score(trues,outputs)
166 |     err_rel = np.mean(np.abs((trues - outputs)/(trues)))
167 |     print("R^2={:.2f}, Relative error={:.2e}".format(r2, err_rel))
168 | 
169 |     # Take 5 elements to plot randomly chosen
170 |     npoints = 5
171 |     indexes = np.random.choice(trues.shape[0], npoints, replace=False)
172 |     outputs = outputs[indexes]
173 |     trues = trues[indexes]
174 |     colors = ["r","b","g","orange","purple"]
175 | 
176 |     kvec = np.loadtxt("PS_files/k_values.txt")
177 | 
178 |     for i in range(npoints):
179 |         axscat.plot(kvec,trues[i], color=colors[i], linestyle="-")
180 |         axscat.plot(kvec,outputs[i], color=colors[i], linestyle=":")
181 | 
182 |     # Labels etc
183 |     axscat.set_yscale("log")
184 |     axscat.set_xscale("log")
185 |     axscat.set_ylabel(r"$P(k)$")
186 |     axscat.grid(which="both", alpha=0.3)
187 | 
188 |     axscat.set_xlim(kvec[0],kvec[-1])
189 | 
190 |     customlegend = []
191 |     customlegend.append( Line2D([0], [0], color="k", linestyle="-", lw=4, label=r"Truth") )
192 |     customlegend.append( Line2D([0], [0], color="k", linestyle=":", lw=4, label=r"GNN") )
193 |     axscat.legend(handles=customlegend, loc = "upper right")
194 | 
195 |     errk = np.mean(np.abs((trues - outputs)/(trues)), axis=0)*100.
196 |     axerr.plot(kvec,errk, color="k", linestyle="-")
197 | 
198 |     axerr.set_xlim(kvec[0],kvec[-1])
199 |     axerr.set_xscale("log")
200 |     axerr.set_ylabel(r"$\epsilon$ (%)")
201 |     axerr.set_xlabel(r"$k$ [hMpc$^{-1}$]")
202 | 
203 |     axerr.yaxis.set_minor_locator(MultipleLocator(2.5))
204 |     axerr.grid(which="both", alpha=0.3)
205 | 
206 |     axscat.set_title(r"$R^2$={:.2f}".format(r2)+"$, \epsilon$={:.2f} %".format(100.*err_rel))
207 | 
208 |     figscat.savefig("Plots/"+namefig+".png", bbox_inches='tight', dpi=300)
209 |     plt.close(figscat)
210 | 
211 | # Plot relative error of the power spectrum as a function of k
212 | def plot_relerr(hparams):
213 | 
214 |     fig, ax = plt.subplots(figsize=(6,5))
215 | 
216 |     outputs = np.load("Outputs/outputsPS_"+hparams.name_model()+".npy")
217 |     trues = np.load("Outputs/truesPS_"+hparams.name_model()+".npy")
218 |     outputs = outputs[1:]
219 |     trues = trues[1:]
220 |     trues, outputs = 10.**trues, 10.**outputs
221 | 
222 |     r2 = r2_score(trues,outputs)
223 |     err_rel = np.mean(np.abs((trues - outputs)/(trues)))#, axis=0)
224 |     print("R2=",r2,"Errrel=",err_rel)
225 | 
226 |     errk = np.mean(np.abs((trues - outputs)/(trues)), axis=0)*100.
227 |     kvec = np.loadtxt("PS_files/k_values.txt")
228 |     ax.plot(kvec, errk)
229 |     #plt.title("R2={:.2f}, Errrel={:.2f} %".format(r2,err_rel))
230 |     ax.set_xlabel("$k$ [hMpc$^{-1}$]")
231 |     ax.set_ylabel("Relative error (%)")
232 |     ax.set_xscale("log")
233 |     ax.set_xlim(kvec[0],kvec[-1])
234 |     #plt.yscale("log")
235 |     fig.savefig("Plots/rel_err_"+hparams.name_model()+".png", bbox_inches='tight', dpi=300)
236 |     plt.close(fig)
237 | 


--------------------------------------------------------------------------------
/Source/training.py:
--------------------------------------------------------------------------------
  1 | #----------------------------------------------------------------------
  2 | # Routines for training and testing the GNNs
  3 | # Author: Pablo Villanueva Domingo
  4 | # Last update: 4/22
  5 | #----------------------------------------------------------------------
  6 | 
  7 | from Source.constants import *
  8 | 
  9 | 
 10 | # Training step
 11 | def train(loader, model, hparams, optimizer, scheduler):
 12 |     model.train()
 13 | 
 14 |     loss_tot = 0
 15 |     for data in loader:  # Iterate in batches over the training dataset.
 16 | 
 17 |         data.to(device)
 18 |         optimizer.zero_grad()  # Clear gradients.
 19 |         out = model(data)  # Perform a single forward pass.
 20 | 
 21 |         # If cosmo parameters are predicted, perform likelihood-free inference to predict also the standard deviation
 22 |         if hparams.outmode == "cosmo":
 23 |             y_out, err_out = out[:,:hparams.pred_params], out[:,hparams.pred_params:2*hparams.pred_params]     # Take mean and standard deviation of the output
 24 | 
 25 |             # Compute loss as sum of two terms for likelihood-free inference
 26 |             loss_mse = torch.mean(torch.sum((y_out - data.y)**2., axis=1) , axis=0)
 27 |             loss_lfi = torch.mean(torch.sum(((y_out - data.y)**2. - err_out**2.)**2., axis=1) , axis=0)
 28 |             loss = torch.log(loss_mse) + torch.log(loss_lfi)
 29 | 
 30 |         # Else, perform standard regression
 31 |         elif hparams.outmode=="ps":
 32 |             loss_mse = torch.mean(torch.sum((out - data.y)**2., axis=1) , axis=0)
 33 |             loss = torch.log(loss_mse)
 34 | 
 35 |         loss.backward()  # Derive gradients.
 36 |         optimizer.step()  # Update parameters based on gradients.
 37 |         scheduler.step()
 38 |         loss_tot += loss.item()
 39 | 
 40 |     return loss_tot/len(loader)
 41 | 
 42 | # Testing/validation step
 43 | def test(loader, model, hparams):
 44 |     model.eval()
 45 | 
 46 |     outsPS = np.zeros((1,ps_size))
 47 |     truesPS = np.zeros((1,ps_size))
 48 | 
 49 |     trueparams = np.zeros((1,hparams.pred_params))
 50 |     outparams = np.zeros((1,hparams.pred_params))
 51 |     outerrparams = np.zeros((1,hparams.pred_params))
 52 | 
 53 |     errs = []
 54 |     loss_tot = 0
 55 |     for data in loader:  # Iterate in batches over the training/test dataset.
 56 |         with torch.no_grad():
 57 | 
 58 |             data.to(device)
 59 |             out = model(data)  # Perform a single forward pass.
 60 | 
 61 |             # If cosmo parameters are predicted, perform likelihood-free inference to predict also the standard deviation
 62 |             if hparams.outmode == "cosmo":
 63 |                 y_out, err_out = out[:,:hparams.pred_params], out[:,hparams.pred_params:2*hparams.pred_params]     # Take mean and standard deviation of the output
 64 |                 # Compute loss as sum of two terms for likelihood-free inference
 65 |                 loss_mse = torch.mean(torch.sum((y_out - data.y)**2., axis=1) , axis=0)
 66 |                 loss_lfi = torch.mean(torch.sum(((y_out - data.y)**2. - err_out**2.)**2., axis=1) , axis=0)
 67 |                 loss = torch.log(loss_mse) + torch.log(loss_lfi)
 68 | 
 69 |             elif hparams.outmode=="ps":
 70 |                 loss_mse = torch.mean(torch.sum((out - data.y)**2., axis=1) , axis=0)
 71 |                 loss = torch.log(loss_mse)
 72 |                 y_out = out
 73 | 
 74 |             err = (y_out - data.y)#/data.y
 75 |             errs.append( np.abs(err.detach().cpu().numpy()).mean() )
 76 |             loss_tot += loss.item()
 77 | 
 78 |             if hparams.outmode == "cosmo":
 79 |                 # Append true values and predictions
 80 |                 trueparams = np.append(trueparams, data.y.detach().cpu().numpy(), 0)
 81 |                 outparams = np.append(outparams, y_out.detach().cpu().numpy(), 0)
 82 |                 outerrparams  = np.append(outerrparams, err_out.detach().cpu().numpy(), 0)
 83 | 
 84 |             elif hparams.outmode=="ps":
 85 |                 # Append true values and predictions
 86 |                 outsPS = np.append(outsPS, out.detach().cpu().numpy(), 0)
 87 |                 truesPS = np.append(truesPS, data.y.detach().cpu().numpy(), 0)
 88 | 
 89 | 
 90 |     # Save true values and predictions
 91 |     if hparams.outmode == "cosmo":
 92 |         np.save("Outputs/trues_"+hparams.name_model()+".npy",trueparams)
 93 |         np.save("Outputs/outputs_"+hparams.name_model()+".npy",outparams)
 94 |         np.save("Outputs/errors_"+hparams.name_model()+".npy",outerrparams)
 95 | 
 96 |     elif hparams.outmode=="ps":
 97 |         np.save("Outputs/outputsPS_"+hparams.name_model()+".npy",outsPS)
 98 |         np.save("Outputs/truesPS_"+hparams.name_model()+".npy",truesPS)
 99 | 
100 |     return loss_tot/len(loader), np.array(errs).mean(axis=0)
101 | 
102 | # Training procedure
103 | def training_routine(model, train_loader, test_loader, hparams, verbose=True):
104 | 
105 |     # Define optimizer and learning rate cyclic scheduler
106 |     optimizer = torch.optim.Adam(model.parameters(), lr=hparams.learning_rate, weight_decay=hparams.weight_decay)
107 |     scheduler = torch.optim.lr_scheduler.CyclicLR(optimizer, base_lr=hparams.learning_rate, max_lr=1.e-3, cycle_momentum=False)
108 | 
109 |     train_losses, valid_losses = [], []
110 |     valid_loss_min, err_min = 1000., 1000.
111 | 
112 |     # Training loop
113 |     for epoch in range(1, hparams.n_epochs+1):
114 |         train_loss = train(train_loader, model, hparams, optimizer, scheduler)
115 |         test_loss, err = test(test_loader, model, hparams)
116 |         train_losses.append(train_loss); valid_losses.append(test_loss)
117 | 
118 |         # Save model if it has improved
119 |         if test_loss <= valid_loss_min:
120 |             if verbose: print("Validation loss decreased ({:.2e} --> {:.2e}).  Saving model ...".format(valid_loss_min,test_loss))
121 |             torch.save(model.state_dict(), "Models/"+hparams.name_model())
122 |             valid_loss_min = test_loss
123 |             err_min = err
124 | 
125 |         if verbose: print(f'Epoch: {epoch:03d}, Train Loss: {train_loss:.2e}, Validation Loss: {test_loss:.2e}, Error: {err:.2e}')
126 | 
127 |     return train_losses, valid_losses
128 | 


--------------------------------------------------------------------------------
/crosstest.py:
--------------------------------------------------------------------------------
 1 | #------------------------------------------------
 2 | # Test a model already trained
 3 | # Author: Pablo Villanueva Domingo
 4 | # Last update: 3/21
 5 | #------------------------------------------------
 6 | 
 7 | from main import *
 8 | from hyperparameters import hparams
 9 | 
10 | #--- MAIN ---#
11 | 
12 | time_ini = time.time()
13 | 
14 | for path in ["Plots", "Models", "Outputs"]:
15 |         if not os.path.exists(path):
16 |             os.mkdir(path)
17 | 
18 | # Test a pretrained model
19 | hparams.training = False
20 | 
21 | main(hparams)
22 | 
23 | # Test the pretrained model in the other CAMELS suite
24 | hparams.simsuite = hparams.flip_suite()
25 | 
26 | main(hparams, testsuite=True)
27 | 
28 | print("Finished. Time elapsed:",datetime.timedelta(seconds=time.time()-time_ini))
29 | 


--------------------------------------------------------------------------------
/hyperparameters.py:
--------------------------------------------------------------------------------
 1 | #----------------------------------------------------
 2 | # Hyperparameters definition
 3 | # Author: Pablo Villanueva Domingo
 4 | # Last update: 4/22
 5 | #----------------------------------------------------
 6 | 
 7 | # Hyperparameters class
 8 | class hyperparameters():
 9 |     def __init__(self, outmode, only_positions, learning_rate, weight_decay, n_layers, hidden_channels, r_link, n_epochs, simsuite, simset="LH", n_sims=1000, training=True, pred_params=2):
10 | 
11 |         # Choose the output to be predicted, either the cosmological parameters ("cosmo") or the power spectrum ("ps")
12 |         self.outmode = outmode
13 |         # 1 for using only positions as features, 0 for using additional galactic features
14 |         # 1 only for outmode = "cosmo"
15 |         self.only_positions = only_positions
16 |         if self.outmode == "ps":
17 |             self.only_positions = 1
18 |         # Learning rate
19 |         self.learning_rate = learning_rate
20 |         # Weight decay
21 |         self.weight_decay = weight_decay
22 |         # Number of graph layers
23 |         self.n_layers = n_layers
24 |         # Hidden channels
25 |         self.hidden_channels = hidden_channels
26 |         # Linking radius
27 |         self.r_link = r_link
28 |         # Number of epochs
29 |         self.n_epochs = n_epochs
30 |         # Simulation suite, choose between "IllustrisTNG" and "SIMBA"
31 |         self.simsuite = simsuite
32 |         # Simulation set, choose between "CV" and "LH"
33 |         self.simset = simset
34 |         # Number of simulations considered, maximum 27 for CV and 1000 for LH
35 |         self.n_sims = n_sims
36 |         # If training, set to True, otherwise loads a pretrained model and tests it
37 |         self.training = training
38 |         # Number of cosmo/astro params to be predicted, starting from Omega_m, sigma_8, etc.
39 |         # Only for outmode = "cosmo"
40 |         self.pred_params = pred_params
41 |         # Snapshot of the simulation, indicating redshift 4: z=3, 10: z=2, 14: z=1.5, 18: z=1, 24: z=0.5, 33: z=0
42 |         self.snap = "33"
43 | 
44 |     # Name of the model and hyperparameters
45 |     def name_model(self):
46 |         return self.outmode+"_"+self.simsuite+"_"+self.simset+"_onlypos_"+str(self.only_positions)+"_lr_{:.2e}_weightdecay_{:.2e}_layers_{:d}_rlink_{:.2e}_channels_{:d}_epochs_{:d}".format(self.learning_rate, self.weight_decay, self.n_layers, self.r_link, self.hidden_channels, self.n_epochs)
47 | 
48 |     # Return the other CAMELS simulation suite
49 |     def flip_suite(self):
50 |         if self.simsuite=="IllustrisTNG":
51 |             new_simsuite = "SIMBA"
52 |         elif self.simsuite=="SIMBA":
53 |             new_simsuite = "IllustrisTNG"
54 |         return new_simsuite
55 | 
56 | 
57 | #--- HYPERPARAMETER CHOICES ---#
58 | 
59 | #"""
60 | # IllustrisTNG best model
61 | hparams = hyperparameters(outmode = "cosmo",                        # Choose the output to be predicted, either the cosmological parameters ("cosmo") or the power spectrum ("ps")
62 |                           only_positions = 0,                       # 1 for using only positions as features, 0 for using additional galactic features
63 |                           learning_rate = 1.619e-07,                # Learning rate
64 |                           weight_decay = 1.e-07,                    # Weight decay
65 |                           n_layers = 2,                             # Number of hidden graph layers
66 |                           r_link = 0.015,                           # Linking radius
67 |                           hidden_channels = 64,                     # Hidden channels
68 |                           n_epochs = 300,                           # Number of epochs
69 |                           simsuite = "IllustrisTNG",                # Simulation suite, choose between "IllustrisTNG" and "SIMBA"
70 |                           pred_params = 1                           # Number of cosmo/astro params to be predicted, starting from Omega_m, sigma_8, etc. (Only for outmode = "cosmo")
71 |                           )
72 | """
73 | # SIMBA best model
74 | hparams = hyperparameters(outmode = "cosmo",                        # Choose the output to be predicted, either the cosmological parameters ("cosmo") or the power spectrum ("ps")
75 |                           only_positions = 0,                       # 1 for using only positions as features, 0 for using additional galactic features
76 |                           learning_rate = 1.087e-06,                # Learning rate
77 |                           weight_decay = 1.e-07,                    # Weight decay
78 |                           n_layers = 4,                             # Number of hidden graph layers
79 |                           r_link = 0.0148,                          # Linking radius
80 |                           hidden_channels = 64,                     # Hidden channels
81 |                           n_epochs = 300,                           # Number of epochs
82 |                           simsuite = "SIMBA",                       # Simulation suite, choose between "IllustrisTNG" and "SIMBA"
83 |                           pred_params = 1                           # Number of cosmo/astro params to be predicted, starting from Omega_m, sigma_8, etc. (Only for outmode = "cosmo")
84 |                           )
85 | #"""
86 | 


--------------------------------------------------------------------------------
/hyperparams_optimization.py:
--------------------------------------------------------------------------------
  1 | #----------------------------------------------------------------------
  2 | # Script for optimizing the hyperparameters of the network using optuna
  3 | # Author: Pablo Villanueva Domingo
  4 | # Last update: 4/22
  5 | #----------------------------------------------------------------------
  6 | 
  7 | import optuna
  8 | from main import *
  9 | from optuna.visualization import plot_optimization_history, plot_contour, plot_param_importances    # it needs plotly and kaleido
 10 | from hyperparameters import hparams
 11 | 
 12 | # Simulation type
 13 | simsuite = "IllustrisTNG"
 14 | simset = "LH"
 15 | n_sims = 1000
 16 | # Number of epochs
 17 | n_epochs = 300
 18 | 
 19 | # Objective function to minimize
 20 | def objective(trial):
 21 | 
 22 |     # Hyperparameters to optimize
 23 |     learning_rate = trial.suggest_float("learning_rate", 1e-8, 1e-4, log=True)
 24 |     #weight_decay = trial.suggest_float("weight_decay", 1e-8, 1e-6, log=True)
 25 |     weight_decay = 1.e-7
 26 |     n_layers = trial.suggest_int("n_layers", 1, 5)
 27 |     hidden_channels = trial.suggest_categorical("hidden_channels", [64, 128, 256])
 28 |     r_link = trial.suggest_float("r_link", 5.e-3, 5.e-2, log=True)
 29 | 
 30 |     # Some verbose
 31 |     print('\nTrial number: {}'.format(trial.number))
 32 |     print('learning_rate: {}'.format(learning_rate))
 33 |     #print('weight_decay: {}'.format(weight_decay))
 34 |     print('n_layers:  {}'.format(n_layers))
 35 |     print('hidden_channels:  {}'.format(hidden_channels))
 36 |     print('r_link:  {}'.format(r_link))
 37 | 
 38 |     # Hyperparameters to be optimized
 39 |     hparams.learning_rate = learning_rate
 40 |     hparams.weight_decay = weight_decay
 41 |     hparams.n_layers = n_layers
 42 |     hparams.hidden_channels = hidden_channels
 43 |     hparams.r_link = r_link
 44 | 
 45 |     # Default params
 46 |     hparams.n_epochs = n_epochs
 47 |     hparams.simsuite = simsuite
 48 |     hparams.simset = simset
 49 |     hparams.n_sims = n_sims
 50 | 
 51 |     # Run main routine
 52 |     min_test_loss = main(hparams, verbose = False)
 53 | 
 54 |     if torch.cuda.is_available():
 55 |         torch.cuda.empty_cache()
 56 | 
 57 |     return min_test_loss
 58 | 
 59 | 
 60 | #--- MAIN ---#
 61 | 
 62 | if __name__ == "__main__":
 63 | 
 64 |     time_ini = time.time()
 65 | 
 66 |     for path in ["Plots", "Models", "Outputs"]:
 67 |         if not os.path.exists(path):
 68 |             os.mkdir(path)
 69 | 
 70 |     # Optuna parameters
 71 |     storage = "sqlite:///"+os.getcwd()+"/optuna_"+simsuite+"_"+simset
 72 |     study_name = "gnn"
 73 |     n_trials   = 30
 74 | 
 75 |     # Define sampler and start optimization
 76 |     sampler = optuna.samplers.TPESampler(n_startup_trials=10)
 77 |     study = optuna.create_study(study_name=study_name, sampler=sampler, storage=storage, load_if_exists=True)
 78 |     study.optimize(objective, n_trials, gc_after_trial=True)
 79 | 
 80 |     # Print info for best trial
 81 |     print("Best trial:")
 82 |     trial = study.best_trial
 83 |     print("  Value: ", trial.value)
 84 |     print("  Params: ")
 85 |     for key, value in trial.params.items():
 86 |         print("    {}: {}".format(key, value))
 87 | 
 88 |     hparams.learning_rate = trial.params["learning_rate"]
 89 |     hparams.n_layers = trial.params["n_layers"]
 90 |     hparams.hidden_channels = trial.params["hidden_channels"]
 91 |     hparams.r_link = trial.params["r_link"]
 92 | 
 93 |     # Save best model and plots
 94 |     if not os.path.exists("Best"):
 95 |         os.mkdir("Best")
 96 |     # Change nominal suite to read correct files (actually in ps mode both suites are employed)
 97 |     if hparams.outmode=="ps":
 98 |         hparams.simsuite = hparams.flip_suite()
 99 |     files = []
100 |     files.append( "Plots/out_true_Om_"+hparams.name_model()+".png" )
101 |     files.append( "Plots/out_true_Sig_"+hparams.name_model()+".png" )
102 |     files.append( "Plots/loss_"+hparams.name_model()+".png" )
103 |     files.append( "Plots/ps_"+hparams.name_model()+".png" )
104 |     files.append( "Plots/rel_err_"+hparams.name_model()+".png" )
105 |     files.append( "Models/"+hparams.name_model() )
106 |     for file in files:
107 |         if os.path.exists(file):
108 |             os.system("cp "+file+" Best/.")
109 | 
110 |     # Visualization of optimization results
111 |     fig = plot_optimization_history(study)
112 |     fig.write_image("Plots/optuna_optimization_history_"+simsuite+".png")
113 | 
114 |     fig = plot_contour(study)#, params=["learning_rate", "weight_decay", "r_link"])#, "use_model"])
115 |     fig.write_image("Plots/optuna_contour_"+simsuite+".png")
116 | 
117 |     fig = plot_param_importances(study)
118 |     fig.write_image("Plots/plot_param_importances_"+simsuite+".png")
119 | 
120 |     print("Finished. Time elapsed:",datetime.timedelta(seconds=time.time()-time_ini))
121 | 


--------------------------------------------------------------------------------
/main.py:
--------------------------------------------------------------------------------
  1 | #----------------------------------------------------
  2 | # Main routine for training and testing GNN models
  3 | # Author: Pablo Villanueva Domingo
  4 | # Last update: 4/22
  5 | #----------------------------------------------------
  6 | 
  7 | import time, datetime, psutil
  8 | from Source.metalayer import *
  9 | from Source.training import *
 10 | from Source.plotting import *
 11 | from Source.load_data import *
 12 | 
 13 | 
 14 | # Main routine to train the neural net
 15 | # If testsuite==True, it takes a model already pretrained in the other suite and tests it in the selected one
 16 | def main(hparams, verbose = True, testsuite = False):
 17 | 
 18 |     # Load data and create dataset
 19 |     dataset = create_dataset(hparams)
 20 |     node_features = dataset[0].x.shape[1]
 21 | 
 22 |     # Split dataset among training, validation and testing datasets
 23 |     train_loader, valid_loader, test_loader = split_datasets(dataset)
 24 | 
 25 |     # Size of the output of the GNN
 26 |     if hparams.outmode=="cosmo":
 27 |         dim_out=2*hparams.pred_params
 28 |     elif hparams.outmode=="ps":
 29 |         dim_out=ps_size
 30 | 
 31 |     # Initialize model
 32 |     model = GNN(node_features=node_features,
 33 |                 n_layers=hparams.n_layers,
 34 |                 hidden_channels=hparams.hidden_channels,
 35 |                 linkradius=hparams.r_link,
 36 |                 dim_out=dim_out,
 37 |                 only_positions=hparams.only_positions)
 38 |     model.to(device)
 39 |     if verbose: print("Model: " + hparams.name_model()+"\n")
 40 | 
 41 |     # Print the memory (in GB) being used now:
 42 |     process = psutil.Process()
 43 |     print("Memory being used (GB):",process.memory_info().rss/1.e9)
 44 | 
 45 |     # Train the net
 46 |     if hparams.training:
 47 |         if verbose: print("Training!\n")
 48 |         train_losses, valid_losses = training_routine(model, train_loader, valid_loader, hparams, verbose)
 49 | 
 50 |     # Test the net
 51 |     if verbose: print("\nTesting!\n")
 52 | 
 53 |     # If test in other suite, change the suite for loading the model
 54 |     if testsuite==True:
 55 |         hparams.simsuite = hparams.flip_suite()   # change for loading the model
 56 | 
 57 |     # Load the trained model
 58 |     state_dict = torch.load("Models/"+hparams.name_model(), map_location=device)
 59 |     model.load_state_dict(state_dict)
 60 | 
 61 |     if testsuite==True: hparams.simsuite = hparams.flip_suite()   # change after loading the model
 62 | 
 63 |     # Test the model
 64 |     test_loss, rel_err = test(test_loader, model, hparams)
 65 |     if verbose: print("Test Loss: {:.2e}, Relative error: {:.2e}".format(test_loss, rel_err))
 66 | 
 67 |     # Plot loss trends
 68 |     if hparams.training:
 69 |         plot_losses(train_losses, valid_losses, test_loss, rel_err, hparams)
 70 | 
 71 |     # Plot true vs predicted cosmo parameters
 72 |     if hparams.outmode=="cosmo":
 73 |         plot_out_true_scatter(hparams, "Om", testsuite)
 74 |         if hparams.pred_params==2:
 75 |             plot_out_true_scatter(hparams, "Sig", testsuite)
 76 | 
 77 |     # Plot power spectrum and relative error
 78 |     elif hparams.outmode=="ps":
 79 |         plot_ps(hparams)
 80 | 
 81 |     return test_loss
 82 | 
 83 | 
 84 | #--- MAIN ---#
 85 | 
 86 | if __name__ == "__main__":
 87 | 
 88 |     time_ini = time.time()
 89 | 
 90 |     for path in ["Plots", "Models", "Outputs"]:
 91 |         if not os.path.exists(path):
 92 |             os.mkdir(path)
 93 | 
 94 |     # Load default parameters
 95 |     from hyperparameters import hparams
 96 | 
 97 |     main(hparams)
 98 | 
 99 |     print("Finished. Time elapsed:",datetime.timedelta(seconds=time.time()-time_ini))
100 | 


--------------------------------------------------------------------------------
/ps_test.py:
--------------------------------------------------------------------------------
  1 | #----------------------------------------------------
  2 | # Compute the power spectrum of different point distirbutions with the GNN trained in CAMELS
  3 | # Author: Pablo Villanueva Domingo
  4 | # Last update: 4/22
  5 | #----------------------------------------------------
  6 | 
  7 | import time, datetime
  8 | from Source.metalayer import *
  9 | from Source.training import *
 10 | from Source.plotting import *
 11 | from Source.load_data import *
 12 | from visualize_graphs import visualize_graph
 13 | #import powerbox as pbox
 14 | import MAS_library as MASL
 15 | import Pk_library as PKL
 16 | 
 17 | # Power spectrum parameters
 18 | BoxLen = 25.0
 19 | grid    = 512
 20 | MAS     = 'CIC'
 21 | kmax    = 20.0 #h/Mpc
 22 | axis    = 0
 23 | threads = 28
 24 | vol = (boxsize/1.e3)**3.    # (Mpc/h)^3
 25 | 
 26 | 
 27 | #--- POINT DISTRIBUTIONS ---#
 28 | 
 29 | # Poisson point process
 30 | def poisson_process(num_points):
 31 | 
 32 |     #pos = torch.rand((num_points,3))
 33 |     pos = np.random.uniform(0., 1., (num_points,3))
 34 | 
 35 |     return pos
 36 | 
 37 | # Neyman-Scott process with a gaussian kernel (Thomas point process)
 38 | # Based on https://hpaulkeeler.com/simulating-a-thomas-cluster-point-process/
 39 | def neynmanscott_process(num_parents, num_daughters, sigma):
 40 | 
 41 |     # Generate parents
 42 |     x_par = poisson_process(num_parents)
 43 | 
 44 |     # Simulate Poisson point process for the daughters
 45 |     numbPointsDaughter = np.random.poisson(num_daughters, x_par.shape[0])
 46 |     numbPoints = sum(numbPointsDaughter)
 47 | 
 48 |     # Generate the relative locations as independent normal variables
 49 |     x_daug = np.random.normal(0, sigma, size=(numbPoints,3)) # (relative) x coordinaets
 50 | 
 51 |     # Replicate parent points (ie centres of disks/clusters) and center daughters around them
 52 |     xx = np.repeat(x_par, numbPointsDaughter, axis=0)
 53 |     xx = xx + x_daug
 54 | 
 55 |     # Retain only those points inside simulation window
 56 |     booleInside=((xx[:,0]>=0)&(xx[:,0]<=1)&(xx[:,1]>=0)&(xx[:,1]<=1)&(xx[:,2]>=0)&(xx[:,2]<=1))
 57 |     xx = xx[booleInside]
 58 | 
 59 |     return xx
 60 | 
 61 | # Soneira-Peebles point process (Soneira & Peebles 1977, 1978)
 62 | def soneira_peebles_model(lamb, eta, n_levels, R0):
 63 | 
 64 |     # Radius for first level
 65 |     Rparent = R0
 66 | 
 67 |     # Generate parents
 68 |     #num_parents = max(1,np.random.poisson(eta))
 69 |     num_parents = eta
 70 |     xparents = poisson_process(num_parents)
 71 | 
 72 |     xtot = []
 73 |     xtot.extend(xparents)
 74 | 
 75 |     # Iterate over each level
 76 |     for n in range(2,n_levels+1):
 77 |         Rparent = Rparent/lamb
 78 |         pointsx = []
 79 | 
 80 |         for ipar in range(len(xparents)):
 81 | 
 82 |             num_points = np.random.poisson(eta)
 83 |             #num_points = eta
 84 |             x_daug = xparents[ipar] + np.random.normal(0, Rparent, size=(num_points,3))
 85 |             pointsx.extend(x_daug)
 86 | 
 87 |         xparents = pointsx
 88 |         xtot.extend(pointsx)
 89 | 
 90 |     xx = np.array(xtot)
 91 | 
 92 |     # Retain only those points inside simulation window
 93 |     booleInside=((xx[:,0]>=0)&(xx[:,0]<=1)&(xx[:,1]>=0)&(xx[:,1]<=1)&(xx[:,2]>=0)&(xx[:,2]<=1))
 94 |     xx = xx[booleInside]
 95 | 
 96 |     return xx
 97 | 
 98 | #--- OTHER ROUTINES ---#
 99 | 
100 | # Routine to compute the power spectrum using Pylians
101 | def compute_ps(pos):
102 | 
103 |     pos = pos.cpu().detach().numpy()
104 | 
105 |     pos = pos*BoxLen
106 | 
107 |     # Construct galaxy 3D density field
108 |     delta = np.zeros((grid,grid,grid), dtype=np.float32)
109 |     MASL.MA(pos, delta, BoxLen, MAS, verbose=False)
110 |     delta /= np.mean(delta, dtype=np.float64)
111 |     delta -= 1.0
112 | 
113 |     # Compute the power spectrum
114 |     Pk  = PKL.Pk(delta, BoxLen, axis, MAS, threads, verbose=False)
115 |     k   = Pk.k3D
116 |     Pk0 = Pk.Pk[:,0] # Monopole
117 | 
118 |     indexes = np.where(k<kmax)[0]
119 |     return k[indexes], Pk0[indexes]
120 | 
121 | # Scatter plot of true vs predicted properties
122 | def plot_ps_test(hparams, trues, outputs, ktrue, anals):
123 | 
124 |     figscat, axscat = plt.subplots(figsize=(6,5))
125 |     suite, simset = hparams.simsuite, hparams.simset
126 |     col = colorsuite(suite)
127 | 
128 |     # Compute the linear correlation coefficient
129 |     r2 = r2_score(trues,outputs)
130 |     #r2=0.
131 |     err_rel = np.mean(np.abs((trues - outputs)/(trues)))
132 |     print("R2",r2,"Rel Error",err_rel)
133 | 
134 |     # Take 5 samples to plot randomly chosen
135 |     npoints = 5
136 |     indexes = np.random.choice(trues.shape[0], npoints, replace=False)
137 |     outputs = outputs[indexes]
138 |     trues = trues[indexes]
139 |     colors = ["r","b","g","orange","purple","cyan","m"]
140 | 
141 |     kvec = np.loadtxt("PS_files/k_values.txt")
142 | 
143 |     for i in range(npoints):
144 |         axscat.plot(ktrue,trues[i], color=colors[i], linestyle="-")
145 |         axscat.plot(kvec,outputs[i], color=colors[i], linestyle=":")
146 | 
147 |     axscat.set_yscale("log")
148 |     axscat.set_xscale("log")
149 |     axscat.set_ylabel(r"$P(k)$")
150 |     axscat.set_xlabel(r"$k$ [hMpc$^{-1}$]")
151 |     axscat.grid()
152 |     axscat.set_xlim(kvec[0],kvec[-1])
153 | 
154 |     customlegend = []
155 |     customlegend.append( Line2D([0], [0], color="k", linestyle="-", lw=4, label=r"Truth") )
156 |     customlegend.append( Line2D([0], [0], color="k", linestyle=":", lw=4, label=r"GNN") )
157 |     axscat.legend(handles=customlegend, loc = "upper right")
158 | 
159 |     axscat.set_title(r"$R^2$={:.2f}".format(r2)+"$, \epsilon$={:.2f} %".format(100.*err_rel))
160 | 
161 |     namefig = "test_ps_"+simtype+"_"+hparams.name_model()
162 |     figscat.savefig("Plots/"+namefig+".png", bbox_inches='tight', dpi=300)
163 |     plt.close(figscat)
164 | 
165 | # Generate synthetic galaxy catalogue as point process
166 | def generate_sim(num_points, hparams, simtype):
167 | 
168 |     if simtype=="Poisson":
169 |         pos = poisson_process(num_points)
170 |         print("Points",num_points)
171 | 
172 |     elif simtype=="NeymanScott":
173 |         sigma = np.random.uniform(0.01,0.1)
174 |         pos = neynmanscott_process(num_parents=int(np.sqrt(num_points)), num_daughters=int(np.sqrt(num_points)), sigma=sigma)
175 |         print("Points",num_points,"Sigma",sigma)
176 | 
177 |     elif simtype=="SoneiraPeebles":
178 |         n_levels = np.random.randint(4, 6)
179 |         eta = int(num_points**(1/n_levels))
180 |         lamb = np.random.uniform(1.5,3.)
181 |         pos = soneira_peebles_model(lamb=lamb, eta=eta, n_levels=n_levels, R0=.3)
182 |         print("Points",num_points,"eta",eta,"levels",n_levels,"lambda",lamb)
183 | 
184 |     pos = torch.tensor(pos, dtype=torch.float32)
185 | 
186 |     edge_index, edge_attr = get_edges(pos, hparams.r_link, use_loops=False)
187 | 
188 |     x = torch.zeros_like(pos[:,:1], dtype=torch.float32)    # Not used, only for consistency
189 | 
190 |     y = torch.ones((1,ps_size))*vol/num_points
191 | 
192 |     u = torch.tensor(np.log10(pos.shape[0]), dtype=torch.float32).reshape(1,1)
193 | 
194 |     # Get the graph
195 |     graph = Data(x=x,
196 |                  y=y,
197 |                  edge_index=torch.tensor(edge_index, dtype=torch.long),
198 |                  edge_attr=torch.tensor(edge_attr, dtype=torch.float32),
199 |                  u=u,
200 |                  pos=pos)
201 | 
202 |     return graph
203 | 
204 | # Plot sample distribution
205 | def plot_pointprocess(simtype):
206 | 
207 |     num_sims = 10
208 |     num_gals = np.random.randint(500,900,size=num_sims)
209 | 
210 |     for i in range(num_sims):
211 |         data = generate_sim(num_gals[i], hparams, simtype)
212 |         data.x = data.pos
213 |         edge_index = radius_graph(data.pos, r=hparams.r_link, loop=False)
214 |         visualize_graph(data, simtype+"_"+str(i), 0.1, "3d", edge_index)
215 | 
216 | # Routine to predict the power spectrum using the pretrained GNN
217 | def test_ps(hparams, simtype):
218 | 
219 |     hparams.outmode = "ps"
220 |     hparams.only_positions = 1
221 |     dim_out=ps_size
222 | 
223 |     # Initialize model
224 |     model = GNN(node_features=0,
225 |                 n_layers=hparams.n_layers,
226 |                 hidden_channels=hparams.hidden_channels,
227 |                 linkradius=hparams.r_link,
228 |                 dim_out=dim_out,
229 |                 only_positions=hparams.only_positions)
230 |     model.to(device)
231 | 
232 |     # Load the trained model
233 |     hparams.simsuite = hparams.flip_suite()
234 |     state_dict = torch.load("Models/"+hparams.name_model(), map_location=device)
235 |     model.load_state_dict(state_dict)
236 | 
237 |     num_sims = 50
238 |     num_gals = np.random.randint(700,1200,size=num_sims)
239 |     dataset = []
240 | 
241 |     trues = []
242 |     outs = []
243 |     anals = []
244 | 
245 |     for i in range(num_sims):
246 | 
247 |         data = generate_sim(num_gals[i], hparams, simtype)
248 |         loader = DataLoader([data], batch_size=1, shuffle=True)
249 | 
250 |         for data in loader:
251 | 
252 |             # Get model prediction
253 |             data.to(device)
254 |             out = model(data)
255 |             out = 10.**out
256 |             outs.append(out.cpu().detach().numpy())
257 | 
258 |             # Get target power spectrum
259 |             ktrue, ps_true = compute_ps(data.pos)
260 |             trues.append( ps_true )
261 | 
262 |             anals.append( data.y.cpu().detach().numpy() )
263 | 
264 |             print(i,"Err rel={:.3e}".format( np.mean(np.abs((ps_true - out.cpu().detach().numpy())/(ps_true)))) )
265 | 
266 |     hparams.simsuite = hparams.flip_suite()
267 | 
268 | 
269 |     trues = np.array(trues)
270 |     outs = np.array(outs).reshape((num_sims,ps_size))
271 |     anals = np.array(anals).reshape((num_sims,ps_size))
272 | 
273 |     plot_ps_test(hparams, trues, outs, ktrue, anals)
274 | 
275 | 
276 | 
277 | #--- MAIN ---#
278 | 
279 | if __name__ == "__main__":
280 | 
281 |     time_ini = time.time()
282 | 
283 |     for path in ["Plots", "Models", "Outputs"]:
284 |         if not os.path.exists(path):
285 |             os.mkdir(path)
286 | 
287 |     # Load default parameters
288 |     from hyperparameters import hparams
289 | 
290 |     for simtype in ["Poisson", "NeymanScott","SoneiraPeebles"]:
291 | 
292 |         test_ps(hparams, simtype)
293 | 
294 |         #plot_pointprocess(simtype)
295 | 
296 |     print("Finished. Time elapsed:",datetime.timedelta(seconds=time.time()-time_ini))
297 | 


--------------------------------------------------------------------------------
/visualize_graph_10.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/PabloVD/CosmoGraphNet/36041d7e3b23ae9e288cd7f178e6c2888b54042b/visualize_graph_10.png


--------------------------------------------------------------------------------
/visualize_graphs.py:
--------------------------------------------------------------------------------
  1 | #----------------------------------------------------------------------
  2 | # Script to visualize galaxy catalogues as graphs
  3 | # Author: Pablo Villanueva Domingo
  4 | # Last update: 4/22
  5 | #----------------------------------------------------------------------
  6 | 
  7 | import time, datetime
  8 | from Source.plotting import *
  9 | from Source.load_data import *
 10 | from torch_geometric.utils import degree
 11 | from torch_cluster import radius_graph
 12 | 
 13 | fontsize = 8
 14 | 
 15 | # Visualization routine for plotting graphs
 16 | def visualize_graph(data, ind, sizes=0.1, projection="3d", edge_index=None):
 17 | 
 18 |     fig = plt.figure(figsize=(4, 4))
 19 | 
 20 |     if projection=="3d":
 21 |         ax = fig.add_subplot(projection ="3d")
 22 |         pos = data.x[:,:3]
 23 |     elif projection=="2d":
 24 |         ax = fig.add_subplot()
 25 |         pos = data.x[:,:2]
 26 | 
 27 |     pos *= boxsize/1.e3   # show in Mpc
 28 | 
 29 |     # Draw lines for each edge
 30 |     if edge_index is not None:
 31 |         for (src, dst) in edge_index.t().tolist():
 32 | 
 33 |             src = pos[src].tolist()
 34 |             dst = pos[dst].tolist()
 35 | 
 36 |             if projection=="3d":
 37 |                 ax.plot([src[0], dst[0]], [src[1], dst[1]], zs=[src[2], dst[2]], linewidth=0.1, color='black')
 38 |             elif projection=="2d":
 39 |                 ax.plot([src[0], dst[0]], [src[1], dst[1]], linewidth=0.1, color='black')
 40 | 
 41 |     # Plot nodes
 42 |     if projection=="3d":
 43 |         ax.scatter(pos[:, 0], pos[:, 1], pos[:, 2], s=sizes, zorder=1000, alpha=0.5)
 44 |     elif projection=="2d":
 45 |         ax.scatter(pos[:, 0], pos[:, 1], s=sizes, zorder=1000, alpha=0.5)
 46 | 
 47 |     ax.xaxis.set_tick_params(labelsize=fontsize)
 48 |     ax.yaxis.set_tick_params(labelsize=fontsize)
 49 |     ax.zaxis.set_tick_params(labelsize=fontsize)
 50 | 
 51 |     fig.savefig("Plots/visualize_graph_"+str(ind), bbox_inches='tight', dpi=300)
 52 |     plt.close(fig)
 53 | 
 54 | # Plot the degree distribution of the graph (see e.g. http://networksciencebook.com/)
 55 | def plot_degree_distribution(degrees):
 56 | 
 57 |     listbins = np.linspace(0,80,num=12)
 58 |     deg_dist = []
 59 | 
 60 |     for array in degrees:
 61 |         hist, bins = np.histogram(array, bins=listbins)
 62 |         deg_dist.append(hist)
 63 | 
 64 |     dist_mean = np.mean(deg_dist,axis=0)
 65 |     dist_std = np.std(deg_dist,axis=0)
 66 | 
 67 |     fig_deg, ax_deg = plt.subplots(figsize=(6, 4))
 68 | 
 69 |     ax_deg.set_yscale("log")
 70 |     #ax_deg.set_xscale("log")
 71 |     ax_deg.plot(bins[:-1], dist_mean, color=colorsuite(simsuite))
 72 |     ax_deg.fill_between(bins[:-1], dist_mean+dist_std, dist_mean-dist_std, color=colorsuite(simsuite), alpha=0.3)
 73 |     ax_deg.set_xlim([bins[0],bins[-2]])
 74 |     ax_deg.set_xlabel(r"$k$")
 75 |     ax_deg.set_ylabel(r"$p_k$")
 76 | 
 77 |     fig_deg.savefig("Plots/degree_distribution.pdf", bbox_inches='tight', dpi=300)
 78 | 
 79 | # Main routine to display graphs from several simulations
 80 | def display_graphs(simsuite, n_sims, r_link, simset="LH", showgraph=True, get_degree=False):
 81 | 
 82 |     if get_degree:
 83 |         degrees = []
 84 | 
 85 |     # Load data and create graph
 86 |     for simnumber in range(n_sims):
 87 |         simpath = simpathroot + simsuite + "/"+simset+"_"
 88 |         catalogue = simpath + str(simnumber)+"/fof_subhalo_tab_033.hdf5"
 89 | 
 90 |         # Read the catalogue
 91 |         f     = h5py.File(catalogue, 'r')
 92 |         pos   = f['/Subhalo/SubhaloPos'][:]/boxsize
 93 |         Nstar = f['/Subhalo/SubhaloLenType'][:,4]       #number of stars
 94 |         Mstar = f['/Subhalo/SubhaloMassType'][:,4] #Msun/h
 95 |         indexes = np.where(Nstar>Nstar_th)[0]
 96 |         pos     = pos[indexes]
 97 |         Mstar   = Mstar[indexes]
 98 | 
 99 |         tab = np.column_stack((pos, Mstar))
100 | 
101 |         #edge_index, edge_attr = get_edges(pos, r_link, use_loops=False)
102 |         edge_index = radius_graph(torch.tensor(pos,dtype=torch.float32), r=r_link, loop=False)
103 | 
104 |         data = Data(x=tab, edge_index=torch.tensor(edge_index, dtype=torch.long))
105 | 
106 |         if showgraph:
107 |             #visualize_graph(data, simnumber, "2d", edge_index)
108 |             visualize_graph(data, simnumber, projection="3d", edge_index=data.edge_index)
109 | 
110 |         if get_degree:
111 |             degrees.append( degree(edge_index[0], data.num_nodes).numpy() )
112 | 
113 |     if get_degree:
114 |         plot_degree_distribution(degrees)
115 | 
116 | 
117 | 
118 | 
119 | #--- MAIN ---#
120 | 
121 | if __name__=="__main__":
122 | 
123 |     time_ini = time.time()
124 | 
125 |     for path in ["Plots"]:
126 |         if not os.path.exists(path):
127 |             os.mkdir(path)
128 | 
129 |     # Linking radius
130 |     r_link = 0.05
131 |     # Simulation suite, choose between "IllustrisTNG" and "SIMBA"
132 |     simsuite = "IllustrisTNG"
133 |     # Number of simulations considered, maximum 27 for CV and 1000 for LH
134 |     n_sims = 20
135 | 
136 |     display_graphs(simsuite, n_sims, r_link)
137 | 
138 |     print("Finished. Time elapsed:",datetime.timedelta(seconds=time.time()-time_ini))
139 | 


--------------------------------------------------------------------------------