├── extra
    ├── __init__.py
    ├── single_day_series.gif
    ├── plot_time_series.py
    └── ionex_writer.py
├── spaceweather
    ├── __init__.py
    └── indicesdownloader.py
├── requirements.txt
├── models
    ├── custom_layers.py
    ├── dev
    │   └── gps_solutions.py
    └── models.py
├── LICENSE
├── batch_run.py
├── README.md
├── .gitignore
├── config.csv
├── downloader.py
├── generator.py
├── train.py
├── lstm_utils.py
├── plotresults.py
├── evaluate.py
└── ionex_samples.py


/extra/__init__.py:
--------------------------------------------------------------------------------
1 |  
2 | 


--------------------------------------------------------------------------------
/spaceweather/__init__.py:
--------------------------------------------------------------------------------
1 |  
2 | 


--------------------------------------------------------------------------------
/extra/single_day_series.gif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mauriciodev/tec_forecast/HEAD/extra/single_day_series.gif


--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
 1 | imageio
 2 | tensorflow-gpu
 3 | numpy
 4 | matplotlib
 5 | argparse
 6 | sklearn
 7 | pandas
 8 | scikit-learn
 9 | pydot
10 | graphviz
11 | h5py
12 | keras==2.15.0
13 | tensorflow==2.15.1
14 | 


--------------------------------------------------------------------------------
/models/custom_layers.py:
--------------------------------------------------------------------------------
1 |  
2 | import tensorflow as tf
3 | from tensorflow.keras.layers import *
4 | from tensorflow.keras.utils import plot_model
5 | from tensorflow.keras.regularizers import l1,l2
6 | 
7 | weight_decay=1E-4
8 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2021 Mauricio Carvalho Mathias de Paulo
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/extra/plot_time_series.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import numpy as np
 3 | import matplotlib.pyplot as plt
 4 | import imageio
 5 | 
 6 | 
 7 | 
 8 | def saveGif(matrixList,gifFileName,clearFrames=True):
 9 |     filenames=[]
10 |     for i,m in enumerate(matrixList):
11 |         # plot the line chart
12 |         #plt.plot(y[:i])
13 |         plt.imshow(np.squeeze(m), extent=[-180,180,-90,90]) #minx maxx miny maxy
14 |         
15 |         # create file name and append it to a list
16 |         filename = f'{gifFileName}_{i}.png'
17 |         filenames.append(filename)
18 |         plt.title(f"Day {int(np.floor(i/24))+1} hour {i%24:02d}")
19 |         # save frame
20 |         plt.savefig(filename, bbox_inches='tight')
21 |         plt.close()# build gif
22 |     with imageio.get_writer(gifFileName, mode='I') as writer:
23 |         for filename in filenames:
24 |             image = imageio.imread(filename)
25 |             writer.append_data(image)
26 |     # Remove files
27 |     if clearFrames:
28 |         for filename in set(filenames):
29 |             os.remove(filename)
30 | 
31 | if __name__=="__main__":
32 |     matrixList=None
33 |     
34 |     for d in range(1,2):
35 |         f=f"ionex/codg00{d}0.20i.npy"
36 |         ionex=np.load(f)
37 |         if matrixList is None:
38 |             matrixList=ionex[:24]
39 |         else:
40 |             matrixList=np.concatenate((matrixList,ionex[:24]))
41 |     saveGif(matrixList,'mygif.gif')
42 |             
43 | 


--------------------------------------------------------------------------------
/batch_run.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/python
 2 | import pandas as pd
 3 | import subprocess, os
 4 | from lstm_utils import getModelFileName,getModelFolder
 5 | config_csv_file="config.csv"
 6 | df=pd.read_csv(config_csv_file)
 7 | 
 8 | for index,row in df.iterrows():
 9 |     if not (row['batch_train'] or row['batch_test']):
10 |         pass
11 |     else:
12 |         exp=row['experiment_name']
13 |         modelFile=getModelFileName(exp)
14 |         modelFolder=getModelFolder(exp)
15 |         
16 |         logFile=os.path.join(modelFolder,'batch_log.txt')
17 |         print(f"Beginning experiment {exp}")
18 |         print(f" - Log file: {logFile}")
19 |         with open(logFile,'w',buffering=1) as stdout:
20 |             if row['batch_train']:
21 |                 if os.path.exists(modelFile): os.unlink(modelFile)
22 |                 print(" - Training.")
23 |                 subprocess.run(f"python train.py -e {exp}",stdout=stdout,stderr=stdout,shell=True)
24 |             if row['batch_test']:
25 |                 print(" - Testing.")
26 |                 subprocess.run(f"python evaluate.py -e {exp}",stdout=stdout,stderr=stdout,shell=True)
27 |         
28 |         if os.path.exists(modelFile):
29 |             print("Model found. Setting as trained.")
30 |             df.loc[df['experiment_name']==exp, 'tested']=True
31 |             df.loc[df['experiment_name']==exp, 'batch_test']=False
32 |             df.loc[df['experiment_name']==exp, 'batch_train']=False
33 |             df.loc[df['experiment_name']==exp, 'compare']=True
34 |             df.to_csv(config_csv_file, index=False)
35 |         else: 
36 |             print(f"Failed to find {modelFile}. Trainament failed.")
37 | 
38 | print("Plotting experiments comparison.")
39 | subprocess.run(f"python plotresults.py ",shell=True)
40 | 
41 | print("Done.")
42 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # DeepTEC
 2 | A deep learning laboratory for Total Electron Content prediction experiments, using Tensorflow 2.\
 3 | Models can be found at ./models/. Some reusable layers are on ./models/custom_layers.py.\
 4 | The c111 and c333 models were inspired by Boulch (2018).
 5 | 
 6 | BOULCH, A.; CHERRIER, N.; CASTAINGS, T. Ionospheric activity prediction using convolutional recurrent neural networks. arXiv:1810.13273 [cs], 6 nov. 2018. 
 7 | https://github.com/aboulch/tec_prediction/
 8 | 
 9 | The models on ./models/dev/gps_solutions.py were described on the article: \
10 | de Paulo MCM, Marques HA, Feitosa RQ, Ferreira MP (2023) New encoder–decoder convolutional LSTM neural network architectures for next-day global ionosphere maps forecast. GPS Solut 27(2):95. https://doi.org/10.1007/s10291-023-01442-4
11 | 
12 | ## download IONEX data
13 | python3 downloader.py 
14 | Or download from: https://drive.google.com/file/d/1Sm_PiVUIabaew_3Y7sT0NWBqu7xsdHvi/view?usp=share_link
15 | 
16 | ## create numpy representation for the data downloaded
17 | python3 ionex_samples.py 
18 | 
19 | ## Experiment configuration
20 | 
21 | The configuration file "config.csv" is used to setup many hyperparameters for each experiment, such as chosen model, input window, prediction window, train and test datasets, among others.
22 | 
23 | ## Batch processing
24 | 
25 | The columns "batch_train" and "batch_test" on "config.csv" can be used to perform batch testing. Set them as True on the line that describes the experiment and run 
26 | 
27 | python3 batch_run.py 
28 | 
29 | The results will be created on the "output" folder, under a subfolder with the experiment's name.
30 | 
31 | ## Training the network
32 | python3 train.py
33 | 
34 | The "parameters.py" file is created during training. If you retrain the network, please remove it.
35 | 
36 | ## Evaluating the trained network (test)
37 | python3 evaluate.py
38 | 
39 | ## Plot results
40 | python3 plotresults.py
41 | 
42 | ## Google Colab
43 | https://github.com/mauriciodev/tec_forecast/blob/main/examples/tec_forecast.ipynb
44 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
  1 | # Byte-compiled / optimized / DLL files
  2 | __pycache__/
  3 | *.py[cod]
  4 | *$py.class
  5 | 
  6 | # C extensions
  7 | *.so
  8 | 
  9 | # Distribution / packaging
 10 | .Python
 11 | build/
 12 | develop-eggs/
 13 | dist/
 14 | downloads/
 15 | eggs/
 16 | .eggs/
 17 | lib/
 18 | lib64/
 19 | parts/
 20 | sdist/
 21 | var/
 22 | wheels/
 23 | pip-wheel-metadata/
 24 | share/python-wheels/
 25 | *.egg-info/
 26 | .installed.cfg
 27 | *.egg
 28 | MANIFEST
 29 | 
 30 | # PyInstaller
 31 | #  Usually these files are written by a python script from a template
 32 | #  before PyInstaller builds the exe, so as to inject date/other infos into it.
 33 | *.manifest
 34 | *.spec
 35 | 
 36 | # Installer logs
 37 | pip-log.txt
 38 | pip-delete-this-directory.txt
 39 | 
 40 | # Unit test / coverage reports
 41 | htmlcov/
 42 | .tox/
 43 | .nox/
 44 | .coverage
 45 | .coverage.*
 46 | .cache
 47 | nosetests.xml
 48 | coverage.xml
 49 | *.cover
 50 | *.py,cover
 51 | .hypothesis/
 52 | .pytest_cache/
 53 | 
 54 | # Translations
 55 | *.mo
 56 | *.pot
 57 | 
 58 | # Django stuff:
 59 | *.log
 60 | local_settings.py
 61 | db.sqlite3
 62 | db.sqlite3-journal
 63 | 
 64 | # Flask stuff:
 65 | instance/
 66 | .webassets-cache
 67 | 
 68 | # Scrapy stuff:
 69 | .scrapy
 70 | 
 71 | # Sphinx documentation
 72 | docs/_build/
 73 | 
 74 | # PyBuilder
 75 | target/
 76 | 
 77 | # Jupyter Notebook
 78 | .ipynb_checkpoints
 79 | 
 80 | # IPython
 81 | profile_default/
 82 | ipython_config.py
 83 | 
 84 | # pyenv
 85 | .python-version
 86 | 
 87 | # pipenv
 88 | #   According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
 89 | #   However, in case of collaboration, if having platform-specific dependencies or dependencies
 90 | #   having no cross-platform support, pipenv may install dependencies that don't work, or not
 91 | #   install all needed dependencies.
 92 | #Pipfile.lock
 93 | 
 94 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow
 95 | __pypackages__/
 96 | 
 97 | # Celery stuff
 98 | celerybeat-schedule
 99 | celerybeat.pid
100 | 
101 | # SageMath parsed files
102 | *.sage.py
103 | 
104 | # Environments
105 | .env
106 | .venv
107 | env/
108 | venv/
109 | ENV/
110 | env.bak/
111 | venv.bak/
112 | 
113 | # Spyder project settings
114 | .spyderproject
115 | .spyproject
116 | 
117 | # Rope project settings
118 | .ropeproject
119 | 
120 | # mkdocs documentation
121 | /site
122 | 
123 | # mypy
124 | .mypy_cache/
125 | .dmypy.json
126 | dmypy.json
127 | 
128 | # Pyre type checker
129 | .pyre/
130 | 
131 | ionex/
132 | 


--------------------------------------------------------------------------------
/config.csv:
--------------------------------------------------------------------------------
 1 | comment,experiment_name,batch_train,batch_test,tested,compare,batch_size,num_epochs,filters,model,prediction,lag_window,prediction_window,train_time_sampling,test_time_sampling,resample_rate,train_npy_dataset,test_npy_dataset,random_seed,loss,optimizer,best_of
 2 | None,Repeat_19-20,True,True,True,True,4,30,8,usePrevious,seq2seq,36,12,2,2,12,codg2019.npy,codg2020.npy,1,mae,adam,5
 3 | None,Repeat_14-15,True,True,True,True,4,30,8,usePrevious,seq2seq,36,12,1,2,12,codg2014.npy,codg2015.npy,1,mae,adam,5
 4 | This is not a model,c1pg,False,False,True,True,1,1,0,c1pg,seq2seq,36,12,2,2,12,codg2019.npy,codg2020.npy,1,mae,adam,1
 5 | This is not a model,c1pg14-15,False,False,True,True,1,1,0,c1pg,seq2seq,36,12,2,2,12,c1pg2015.npy,codg2015.npy,1,mae,adam,1
 6 | Very low memory consumption,ANN_NtoN_12_2,False,False,True,True,32,200,150,ANN,seq2seq,12,12,2,2,12,codg2019.npy,codg2020.npy,1,mae,adam,5
 7 | Very low memory consumption,ANN_NtoN_18_2,False,False,True,True,32,200,150,ANN,seq2seq,18,12,2,2,12,codg2019.npy,codg2020.npy,1,mae,adam,5
 8 | Very low memory consumption,ANN_NtoN_24_2,False,False,True,True,32,200,150,ANN,seq2seq,24,12,2,2,12,codg2019.npy,codg2020.npy,1,mae,adam,5
 9 | Very low memory consumption,ANN_NtoN_30_2,False,False,True,True,32,200,150,ANN,seq2seq,30,12,2,2,12,codg2019.npy,codg2020.npy,1,mae,adam,5
10 | Very low memory consumption,ANN_NtoN_36_2,False,False,True,True,32,200,150,ANN,seq2seq,36,12,2,2,12,codg2019.npy,codg2020.npy,1,mae,adam,5
11 | deeptec1-19-20,iConvLSTM,True,True,True,True,16,200,48,iConvLSTM,seq2seq,12,12,2,2,12,codg2019.npy,codg2020.npy,1,mae,adam,5
12 | ,EF-ConvLSTM_1x1,False,False,True,True,4,200,24,c111,seq2seq,36,12,2,2,12,codg2019.npy,codg2020.npy,1,mae,adam,5
13 | deeptec1-19-20,EF-ConvLSTM_3x3,True,True,True,True,8,200,24,c333,seq2seq,36,12,2,2,12,codg2019.npy,codg2020.npy,1,mae,adam,5
14 | ,REF-ConvLSTMv1_1x1,False,False,True,True,4,200,24,c111_res,seq2seq,36,12,2,2,12,codg2019.npy,codg2020.npy,1,mae,adam,5
15 | deeptec1-19-20,REF-ConvLSTMv1_3x3,True,True,True,True,4,200,24,c333_res,seq2seq,36,12,2,2,12,codg2019.npy,codg2020.npy,1,mae,adam,5
16 | ,EF-ConvLSTMv2_1x1,False,False,True,True,4,200,24,c111_res_v2,seq2seq,36,12,2,2,12,codg2019.npy,codg2020.npy,1,mae,adam,5
17 | deeptec1-19-20,EF-ConvLSTMv2_3x3,True,True,True,True,8,200,24,c333_res_v2,seq2seq,36,12,2,2,12,codg2019.npy,codg2020.npy,1,mae,adam,5
18 | deeptec1-19-20,EF-ConvLSTMv3,True,True,True,True,8,200,24,c333_res_v3,seq2seq,36,12,2,2,12,codg2019.npy,codg2020.npy,1,mae,adam,5
19 | cbcg,ConvLSTM_Bi,False,False,True,True,8,200,24,c333bi,seq2seq,12,12,2,2,12,codg2019.npy,codg2020.npy,1,mae,adam,5
20 | cbcg,ConvLSTM_N-1,False,False,True,True,8,200,24,c333_nto1,seq2one,36,12,2,2,12,codg2019.npy,codg2020.npy,1,mae,adam,5
21 | cbcg,EF_ConvLSTM_60,False,False,True,True,4,200,24,c333,seq2seq,60,12,2,2,12,codg2019.npy,codg2020.npy,1,mae,adam,5
22 | cbcg,EF_ConvLSTM_48,False,False,True,True,4,200,24,c333,seq2seq,48,12,2,2,12,codg2019.npy,codg2020.npy,1,mae,adam,5
23 | cbcg,EF_ConvLSTM_36,False,False,True,True,4,200,24,c333,seq2seq,36,12,2,2,12,codg2019.npy,codg2020.npy,1,mae,adam,5
24 | cbcg,EF_ConvLSTM_24,False,False,True,True,4,200,24,c333,seq2seq,24,12,2,2,12,codg2019.npy,codg2020.npy,1,mae,adam,5
25 | cbcg,EF_ConvLSTM_12,False,False,True,True,4,200,24,c333,seq2seq,12,12,2,2,12,codg2019.npy,codg2020.npy,1,mae,adam,5
26 | deeptec1,IconvLSTM_14-15,True,True,True,True,16,200,48,iConvLSTM,seq2seq,12,12,1,2,12,codg2014.npy,codg2015.npy,1,mae,adam,5
27 | ,EF-ConvLSTM_1x1_14-15,False,False,True,True,4,200,24,c111,seq2seq,36,12,1,2,12,codg2014.npy,codg2015.npy,1,mae,adam,5
28 | deeptec1-14-15,EF-ConvLSTM_3x3_14-15,True,True,True,True,4,200,24,c333,seq2seq,36,12,1,2,12,codg2014.npy,codg2015.npy,1,mae,adam,5
29 | ,EF-ConvLSTMv2_1x1_14-15,False,False,True,True,4,200,24,c111_res_v2,seq2seq,36,12,1,2,12,codg2014.npy,codg2015.npy,1,mae,adam,5
30 | deeptec1-14-15 ,EF-ConvLSTMv2_3x3_14-15_12,True,True,True,True,8,200,24,c333_res_v2,seq2seq,12,12,1,2,12,codg2014.npy,codg2015.npy,1,mae,adam,5
31 | deeptec1-14-15,EF-ConvLSTMv2_3x3_14-15_24,True,True,True,True,8,200,24,c333_res_v2,seq2seq,24,12,1,2,12,codg2014.npy,codg2015.npy,1,mae,adam,5
32 | deeptec1-14-15,EF-ConvLSTMv2_3x3_14-15,True,True,True,True,8,200,24,c333_res_v2,seq2seq,36,12,1,2,12,codg2014.npy,codg2015.npy,1,mae,adam,5
33 | deeptec1-14-15,EF-ConvLSTMv2_3x3_14-15_48,True,True,True,True,8,200,24,c333_res_v2,seq2seq,48,12,1,2,12,codg2014.npy,codg2015.npy,1,mae,adam,5
34 | Out of memory,EF-ConvLSTMv2_3x3_14-15_60,False,False,False,True,8,200,24,c333_res_v2,seq2seq,60,12,1,2,12,codg2014.npy,codg2015.npy,1,mae,adam,5
35 | deeptec1,EF-ConvLSTMv3_14-15,True,True,True,True,8,200,24,c333_res_v3,seq2seq,36,12,1,2,12,codg2014.npy,codg2015.npy,1,mae,adam,5
36 | ,REF-ConvLSTMv1_1x1_14-15,False,False,True,True,4,200,24,c111_res,seq2seq,36,12,1,2,12,codg2014.npy,codg2015.npy,1,mae,adam,5
37 | deeptec1-14-15,REF-ConvLSTMv1_3x3_14-15_12,True,True,True,True,4,200,24,c333_res,seq2seq,12,12,1,2,12,codg2014.npy,codg2015.npy,1,mae,adam,5
38 | deeptec1-14-15,REF-ConvLSTMv1_3x3_14-15_24,True,True,True,True,4,200,24,c333_res,seq2seq,24,12,1,2,12,codg2014.npy,codg2015.npy,1,mae,adam,5
39 | deeptec1-14-15,REF-ConvLSTMv1_3x3_14-15,True,True,True,True,4,200,24,c333_res,seq2seq,36,12,1,2,12,codg2014.npy,codg2015.npy,1,mae,adam,5
40 | deeptec1-14-15,REF-ConvLSTMv1_3x3_14-15_48,True,True,True,True,4,200,24,c333_res,seq2seq,48,12,1,2,12,codg2014.npy,codg2015.npy,1,mae,adam,5
41 | 


--------------------------------------------------------------------------------
/extra/ionex_writer.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | import h5py
  3 | import numpy as np 
  4 | import pandas as pd
  5 | from tqdm import tqdm
  6 | 
  7 | 
  8 | fname="../output/EF-ConvLSTMv2_3x3_14-15/predicted_0.h5"
  9 | outFolder='edconvlstm_nd'
 10 | nstepsin = 36
 11 | 
 12 | t0=pd.to_datetime('2015-01-01')
 13 | timedelta=pd.Timedelta('2H')
 14 | 
 15 | 
 16 | 
 17 | lat1 = 87.5
 18 | lon1 = -180.0
 19 | lat2 = -87.5
 20 | lon2 = 180.0   
 21 | h = 450
 22 | scale = 0.1
 23 | convertFrom72x72 = True
 24 | 
 25 | if fname.endswith('.h5'): #recompose the h5 into a time series
 26 |     f = h5py.File(fname, 'r')
 27 |     total_seq=[]
 28 |     for i in range(len(f.keys())):
 29 |         batch_id = str(i)
 30 |         batch = f[batch_id]
 31 |         batch_size = batch.shape[0]
 32 |         if i==0: print(f"Batch shape: {batch.shape}")
 33 |         total_seq.append(batch)
 34 |     pred_seq = np.concatenate(total_seq, axis=0)
 35 |     #m=m.squeeze()
 36 | else:
 37 |     pred_seq = np.load(fname)
 38 |     avg = 24.82940426949007
 39 |     std = 19.74997754805293
 40 |     pred_seq = pred_seq * std +avg
 41 |     if 'SimVP' in fname:
 42 |         pred_seq = np.moveaxis(pred_seq, 2, 4)
 43 | 
 44 | nstepsout = pred_seq.shape[1]
 45 | 
 46 | def fill_spaces(s, size=80):
 47 |     if len(s)<size:
 48 |         s+=' '*(len(s)-size)
 49 |     return s
 50 |        
 51 | print(f"Prediction shape: {pred_seq.shape}")
 52 | os.makedirs(outFolder, exist_ok=True)
 53 | 
 54 | 
 55 | if convertFrom72x72:
 56 |     pred_seq=np.concatenate([pred_seq[:,:,:-1,:], pred_seq[:,:,:-1,[0]]], axis=3)
 57 | 
 58 | #pred_seq = codg15r
 59 | 
 60 | hourly_seq = pred_seq.reshape([-1, *pred_seq.shape[2:]])
 61 | for pred_day in range(pred_seq.shape[0]):
 62 |     fileTimeDelta = nstepsout * timedelta
 63 |     
 64 |     #for batch_ind in range(batch_size):
 65 |     #batch_ind=0
 66 | 
 67 |     #m=f[batch][batch_ind]
 68 |     m = hourly_seq[nstepsout*pred_day:nstepsout*(pred_day+1)+1].squeeze()
 69 |     
 70 | 
 71 | 
 72 |     t1 = pred_day*fileTimeDelta+t0+nstepsin*timedelta
 73 |     t2 = t1 + (m.shape[0]-1) * timedelta
 74 |     dseconds = timedelta.seconds #seconds per map
 75 |     dlat = (lat2 - lat1) / (m.shape[1]-1)
 76 |     dlon = (lon2 - lon1) / (m.shape[2]-1)
 77 | 
 78 |     fileName=f'pred{t1.day_of_year:03}0.{t1.year%100:02}i'
 79 |     with open(os.path.join(outFolder,fileName), 'w') as outfile:
 80 |         header = f"""     1.0            IONOSPHERE MAPS     GNSS                IONEX VERSION / TYPE
 81 | pyspatialgeodesy        IME             05-JAN-18 20:20     PGM / RUN BY / DATE 
 82 | Map Name                                                    COMMENT             
 83 | Predicted global ionosphere maps (GIM).                     DESCRIPTION         
 84 |   {t1.year:4}    {t1.month:2}    {t1.day:2}    {t1.hour:2}    {t1.minute:2}    {t1.second:2}                        EPOCH OF FIRST MAP  
 85 |   {t2.year:4}    {t2.month:2}    {t2.day:2}    {t2.hour:2}    {t2.minute:2}    {t2.second:2}                        EPOCH OF LAST MAP   
 86 |   {dseconds:4d}                                                      INTERVAL            
 87 |   {m.shape[0]: 4d}                                                      # OF MAPS IN FILE   
 88 |   NONE                                                      MAPPING FUNCTION    
 89 | 10.0                                                        ELEVATION CUTOFF    
 90 | One-way carrier phase leveled to code                       OBSERVABLES USED    
 91 |   6371.0                                                    BASE RADIUS         
 92 |  2                                                          MAP DIMENSION       
 93 |   {h: 6.1f}{h: 6.1f}   0.0                                        HGT1 / HGT2 / DHGT  
 94 |   {lat1: 6.1f}{lat2: 6.1f}{dlat: 6.1f}                                        LAT1 / LAT2 / DLAT  
 95 |   {lon1: 6.1f}{lon2: 6.1f}{dlon: 6.1f}                                        LON1 / LON2 / DLON  
 96 |   {int(np.log10(scale)): 4d}                                                      EXPONENT            
 97 |                                                             END OF HEADER       \n"""#{0: 6d}
 98 |         outfile.write(header)
 99 |         spaces = ' '*(m.shape[1] % 16 * 5)
100 |         for i in range(m.shape[0]):
101 |             t = t1 + i * timedelta
102 |             outfile.write(f"""{i+1: 6d}                                                      START OF TEC MAP    
103 |   {t.year:4}    {t.month:2}    {t.day:2}    {t.hour:2}    {t.minute:2}    {t.second:2}                        EPOCH OF CURRENT MAP\n""")
104 |             for j in range(m.shape[1]):
105 |                 #line=np.array_str(m[j]).replace('[',' ').replace(']','')
106 |                 lat = lat1 + dlat * j
107 |                 beginLine=f"""  {lat: 6.1f}{lon1: 6.1f}{lon2: 6.1f}{dlon: 6.1f}{h: 6.1f}                            LAT/LON1/LON2/DLON/H\n"""
108 |                 outfile.write(beginLine)
109 |                 line=np.array2string((m[i,j]/scale).astype(int), max_line_width=82, formatter={'int': '{:5d}'.format} ,separator='', precision= 5).replace('\n ','\n')[1:-1]+spaces+'\n'
110 |                 outfile.write(line)
111 |         
112 |             outfile.write(f"""{i+1: 6d}                                                      END OF TEC MAP      \n""")
113 |         outfile.write("""                                                            END OF FILE         """)
114 | 


--------------------------------------------------------------------------------
/models/dev/gps_solutions.py:
--------------------------------------------------------------------------------
 1 | from models.custom_layers import *
 2 | from tensorflow.keras.models import Model
 3 | from tensorflow.keras.backend import expand_dims, repeat_elements
 4 | import tensorflow as tf
 5 | import numpy as np
 6 | 
 7 | """ED-ConvLSTM-Res  
 8 | The encoder builds a memory from the long sequence. The decoder represents a transformation from the previous nstepsout frames to the predicted nstepout frames."""
 9 | def c111_res(inputShape,filters=16,nstepsout=12, kernel=(1, 1), dropout=0.2):
10 |     in_im = Input(shape=inputShape) 
11 |     x=in_im
12 |     x,h1,c1=ConvLSTM2D(filters=filters, kernel_size=kernel,padding='same',return_sequences=True,return_state=True, dropout=dropout)(x)
13 |     x,h2,c2=ConvLSTM2D(filters=filters, kernel_size=kernel,padding='same',return_sequences=True,return_state=True, dropout=dropout)(x)
14 |     x,h3,c3=ConvLSTM2D(filters=filters, kernel_size=kernel,padding='same',return_sequences=False,return_state=True, dropout=dropout)(x)
15 |     x=Lambda(lambda x: repeat_elements(expand_dims(x, axis=1), nstepsout, 1))(x)
16 |     x=ConvLSTM2D(filters=filters, kernel_size=kernel,padding='same',return_sequences=True, dropout=dropout)(x, initial_state=[h1,c1])
17 |     x=ConvLSTM2D(filters=filters, kernel_size=kernel,padding='same',return_sequences=True, dropout=dropout)(x, initial_state=[h2,c2])
18 |     x=ConvLSTM2D(filters=filters, kernel_size=kernel,padding='same',return_sequences=True, dropout=dropout)(x, initial_state=[h3,c3])
19 |     x=TimeDistributed(Conv2D(1, (1, 1), activation='linear', padding='same'))(x) #changed to
20 |     if nstepsout!=inputShape[0]:
21 |         residual=Lambda(lambda x: x[:,-nstepsout:,...])(in_im)
22 |     else:
23 |         residual=in_im
24 |     x=tf.keras.layers.Add()([x,residual])
25 |     encmodel = Model(in_im, x)
26 |     return encmodel
27 | 
28 | """ED-ConvLSTM-Res 3x3"""
29 | def c333_res(inputShape,filters=16,nstepsout=12, dropout=0.2):
30 |     kernel=(3, 3)
31 |     model =c111_res(inputShape,filters=filters,nstepsout=nstepsout, kernel=kernel, dropout=dropout)
32 |     return model
33 | 
34 | """ED-ConvLSTM-ND 1x1
35 | The encoder builds a memory from the long sequence. The decoder represents a transformation from the previous nstepsout frames to the predicted nstepout frames."""
36 | def c111_res_v2(inputShape,filters=16,nstepsout=12, kernel=(1, 1), dropout=0.2):
37 |     in_im = Input(shape=inputShape) 
38 |     x=in_im
39 |     #encoder
40 |     x,h1,c1=ConvLSTM2D(filters=filters, kernel_size=kernel,padding='same',return_sequences=True,return_state=True, dropout=dropout)(x)
41 |     x,h2,c2=ConvLSTM2D(filters=filters, kernel_size=kernel,padding='same',return_sequences=True,return_state=True, dropout=dropout)(x)
42 |     x,h3,c3=ConvLSTM2D(filters=filters, kernel_size=kernel,padding='same',return_sequences=False,return_state=True, dropout=dropout)(x)
43 |     #here we take the previous frames to start the decoder part of lstm
44 |     if nstepsout!=inputShape[0]:
45 |         x=Lambda(lambda x: x[:,-nstepsout:,...])(in_im) 
46 |     else:
47 |         x=in_im
48 |     #decoder
49 |     x=ConvLSTM2D(filters=filters, kernel_size=kernel,padding='same',return_sequences=True, dropout=dropout)(x, initial_state=[h1,c1])
50 |     x=ConvLSTM2D(filters=filters, kernel_size=kernel,padding='same',return_sequences=True, dropout=dropout)(x, initial_state=[h2,c2])
51 |     x=ConvLSTM2D(filters=filters, kernel_size=kernel,padding='same',return_sequences=True, dropout=dropout)(x, initial_state=[h3,c3])
52 |     x=TimeDistributed(Conv2D(1, (1, 1), activation='linear', padding='same'))(x) #changed to
53 |     encmodel = Model(in_im, x)
54 |     return encmodel#,decmodel, trainmodel
55 | 
56 | """ED-ConvLSTM-ND 3x3
57 | The encoder builds a memory from the long sequence. The decoder represents a transformation from the previous nstepsout frames to the predicted nstepout frames."""
58 | def c333_res_v2(inputShape,filters=16,nstepsout=12, dropout=0.2):
59 |     kernel=(3, 3)
60 |     model =c111_res_v2(inputShape,filters=filters,nstepsout=nstepsout, kernel=kernel, dropout=dropout)
61 |     return model
62 | 
63 | """ED-ConvLSTM-ND_mix
64 | Autoencoder Convolutional LSTM. The decoder uses a 1x1 kernel to avoid convolution artifacts."""
65 | def c333_res_v3(inputShape,filters=16,nstepsout=12, kernel=(3, 3), dropout=0.2):
66 |     in_im = Input(shape=inputShape) 
67 |     x=in_im
68 |     #encoder
69 |     x,h1,c1=ConvLSTM2D(filters=filters, kernel_size=kernel,padding='same',return_sequences=True,return_state=True, dropout=dropout)(x)
70 |     x,h2,c2=ConvLSTM2D(filters=filters, kernel_size=kernel,padding='same',return_sequences=True,return_state=True, dropout=dropout)(x)
71 |     x,h3,c3=ConvLSTM2D(filters=filters, kernel_size=kernel,padding='same',return_sequences=False,return_state=True, dropout=dropout)(x)
72 |     #here we take the previous frames to start the decoder part of lstm
73 |     if nstepsout!=inputShape[0]:
74 |         x=Lambda(lambda x: x[:,-nstepsout:,...])(in_im) 
75 |     else:
76 |         x=in_im
77 |     #decoder
78 |     x=ConvLSTM2D(filters=filters, kernel_size=(1,1),padding='same',return_sequences=True, dropout=dropout)(x, initial_state=[h1,c1])
79 |     x=ConvLSTM2D(filters=filters, kernel_size=(1,1),padding='same',return_sequences=True, dropout=dropout)(x, initial_state=[h2,c2])
80 |     x=ConvLSTM2D(filters=filters, kernel_size=(1,1),padding='same',return_sequences=True, dropout=dropout)(x, initial_state=[h3,c3])
81 |     x=TimeDistributed(Conv2D(1, (1, 1), activation='linear', padding='same'))(x) #changed to
82 |     encmodel = Model(in_im, x)
83 |     return encmodel#,decmodel, trainmodel
84 | 


--------------------------------------------------------------------------------
/downloader.py:
--------------------------------------------------------------------------------
  1 | import requests
  2 | import sys, os, subprocess
  3 | from requests.auth import HTTPBasicAuth
  4 | print("Remember that this script requires .netrc on your home folder")
  5 | requests.packages.urllib3.disable_warnings()
  6 | os.environ['OPENSSL_CONF']=os.path.join(os.getcwd(),"openssl.conf")
  7 | import logging
  8 | import urllib.request
  9 | import netrc
 10 | 
 11 | def download(url,destination):
 12 |     os.makedirs(destination,exist_ok=True)
 13 |     outfile=url.split("/")[-1]
 14 |     outfile=os.path.join(destination,outfile)
 15 |     if not os.path.exists(outfile) or update==True:
 16 |         print(url, " -> ", outfile)
 17 |         urllib.request.urlretrieve(url,outfile)
 18 |     return outfile
 19 | 
 20 | 
 21 | 
 22 | #Copied from https://cddis.nasa.gov/Data_and_Derived_Products/CDDIS_Archive_Access.html
 23 | class cddisDownloader():
 24 |     def __init__(self):
 25 |         pass
 26 |     
 27 |     def listfolder(self,url):
 28 |         #Adds '*?list' to the end of URL if not included already
 29 |         if not url.endswith("*?list"):
 30 |             url = url + "*?list"
 31 | 
 32 |         #Makes request of URL, stores response in variable r
 33 |         r = requests.get(url, verify=False)
 34 |         res=[]
 35 |         for line in r.text.splitlines():
 36 |             f=line.split(' ')[0]
 37 |             if f[0]!="#":
 38 |                 res.append(f)
 39 | 
 40 |         return res
 41 | 
 42 |     #Prints the results of the directory listing
 43 |     #print(r.text)
 44 | 
 45 | 
 46 |     def _download(self,url,rootdir):
 47 |         if not os.path.exists(rootdir):
 48 |             os.makedirs(rootdir)
 49 |         
 50 |         # Assigns the local file name to the last part of the URL
 51 |         filename = url.split('/')[-1]
 52 | 
 53 |         # Makes request of URL, stores response in variable r
 54 |         r = requests.get(url)
 55 |         if r.status_code==404:
 56 |             logging.warning("File not found: "+url) 
 57 | 
 58 |         # Opens a local file of same name as remote file for writing to
 59 |         with open(os.path.join(rootdir,filename), 'wb') as fd:
 60 |             for chunk in r.iter_content(chunk_size=1000):
 61 |                 fd.write(chunk)
 62 | 
 63 |         # Closes local file
 64 |         fd.close()
 65 |     
 66 |     def _download2(self,url,rootdir):
 67 |         if not os.path.exists(rootdir):
 68 |             os.makedirs(rootdir)
 69 |         filename = url.split('/')[-1]
 70 |         os.chdir(rootdir)
 71 |         cmd=f"curl -c [file] -n -L -O \"{url}\""
 72 |         if not os.path.exists(filename):
 73 |             subprocess.run(cmd, shell=True)
 74 |     
 75 |     def download(self,jday, year, rootdir, prefix="codg"):
 76 |         fileurl = "https://cddis.nasa.gov/archive/gnss/products/ionex/{year}/{jday:03d}/{prefix}{jday:03d}0.{lastDigits:02d}i.Z".format(year=year,jday=jday,lastDigits=year % 100, prefix=prefix)
 77 |         filename = fileurl.split('/')[-1]
 78 |         output=os.path.join(rootdir,filename)
 79 |         if not os.path.exists(output) and not os.path.exists(output[:-2]): ##ignoring downloaded data even if it's uncompressed
 80 |             print(year, jday, fileurl)
 81 |             try:
 82 |                 self._download2(fileurl,rootdir)
 83 |             except:
 84 |                 print(f"Failed to download {fileurl}.")
 85 |     
 86 | class magnDownloader():
 87 |     def __init__(self):
 88 |         top_level_url = "wilkilen.fcaglp.unlp.edu.ar"
 89 |         netrcData = netrc.netrc()
 90 |         authTokens = netrcData.authenticators(top_level_url)
 91 |         password_mgr = urllib.request.HTTPPasswordMgrWithDefaultRealm()
 92 |         top_level_url="http://"+top_level_url
 93 |         password_mgr.add_password(None, top_level_url, authTokens[0], authTokens[2])
 94 |         handler = urllib.request.HTTPBasicAuthHandler(password_mgr)
 95 |         # create "opener" (OpenerDirector instance)
 96 |         self.opener = urllib.request.build_opener(handler)
 97 |         # use the opener to fetch a URL
 98 |         a_url="http://wilkilen.fcaglp.unlp.edu.ar/"
 99 |         self.opener.open(a_url)
100 |         # Install the opener.
101 |         # Now all calls to urllib.request.urlopen use our opener.
102 |         urllib.request.install_opener(self.opener)
103 |         
104 |     def download(self,jday, year, rootdir, prefix="magn"):
105 |         fileurl = "http://wilkilen.fcaglp.unlp.edu.ar/ion/magn/{year}/{jday:03d}/{prefix}{jday:03d}0.{lastDigits:02d}i.Z".format(year=year,jday=jday,lastDigits=year % 100, prefix=prefix)
106 |         filename = fileurl.split('/')[-1]
107 |         output=os.path.join(rootdir,filename)
108 |         if not os.path.exists(output) and not os.path.exists(output[:-2]): ##ignoring downloaded data even if it's uncompressed
109 |             print(year, jday, fileurl)
110 |             try:
111 |                 urllib.request.urlretrieve(fileurl, output)
112 |             except:
113 |                 print(f"Failed to download {fileurl}.")
114 | def main():
115 |     rootdir="ionex"
116 |     years=range(2012,2022)#[2021,2020,2019,2018]
117 |     cddis=cddisDownloader()
118 |     magn=magnDownloader()
119 |     
120 |     for year in years:
121 |         leap= 0 if (2000+year)%4 else 1
122 |         for jday in range(1,366+leap):
123 |             cddis.download(jday,year,rootdir)
124 |     years=[2019,2020,2015]#[2021,2020,2019,2018]    
125 |     for year in years:
126 |         leap= 0 if (2000+year)%4 else 1
127 |         for jday in range(1,366+leap):
128 |             cddis.download(jday,year,rootdir,prefix="c1pg")
129 |             #cddis.download(jday,year,rootdir,prefix="corg")
130 |             #magn.download(jday,year,rootdir)
131 |     os.chdir(rootdir)
132 |     os.system("uncompress *.Z")
133 | 
134 | 
135 | if __name__=="__main__":
136 |     main()
137 | 


--------------------------------------------------------------------------------
/spaceweather/indicesdownloader.py:
--------------------------------------------------------------------------------
  1 | import sys, os
  2 | print("Remember that this script requires .netrc on your home folder")
  3 | import logging
  4 | import pandas as pd
  5 | #curl -d  "activity=retrieve&res=hour&spacecraft=omni2&start_date=20050101&end_date=20050301&vars=8&vars=38&vars=49&vars=50&scale=Linear&table=0" https://omniweb.gsfc.nasa.gov/cgi/nx1.cgi > test_curl.txt
  6 | from datetime import date
  7 | import urllib
  8 | import urllib.request as request
  9 | import re
 10 | import shutil
 11 | from contextlib import closing
 12 | 
 13 | import matplotlib.pyplot as plt
 14 | from matplotlib.dates import DateFormatter, AutoDateLocator
 15 | 
 16 | 
 17 | """
 18 | Use getIndexes(year,folder) to download the dataframe (3 hours interval). 
 19 | Pass year='nowcast' to download 'nowcast' data.
 20 | Use getInterpolatedIndexes(year,folder) to download 1 hour linearly interpolated data.
 21 | """
 22 | class indicesDownloader():
 23 |     def __init__(self):
 24 |         self.postdamurl="ftp://ftp.gfz-potsdam.de/pub/home/obs/Kp_ap_Ap_SN_F107/"
 25 |         #self.f107mmURL="ftp://ftp.seismo.nrcan.gc.ca/spaceweather/solar_flux/daily_flux_values/fluxtable.txt"
 26 |     
 27 |     def getInterpolatedIndexes(self,year,rootdir):
 28 |         df=self.getIndexes(year,rootdir)
 29 |         return self.interpolate(df)
 30 |         
 31 |     def getIndexes(self,year,rootdir):
 32 |         #use year='nowcast' to download nowcast
 33 |         filename=f"Kp_ap_Ap_SN_F107_{year}.txt"
 34 |         fileurl=urllib.parse.urljoin(self.postdamurl,filename)
 35 |         output=os.path.join(rootdir,filename)
 36 |         os.makedirs(rootdir,exist_ok=True)
 37 |         if year=='nowcast': os.unlink(output) #nowcast always downloads
 38 |         if not os.path.exists(output) and not os.path.exists(output[:-2]): ##ignoring downloaded data even if it's uncompressed
 39 |             print(fileurl)
 40 |             self._download(fileurl,rootdir)
 41 | 
 42 |         cols=[4,3,3,6,8,5,3,7,7,7,7,7,7,7,7,5,5,5,5,5,5,5,5,6,4,9,9,2]
 43 |         #Be careful when changing the header: Ap column is already used.
 44 |         header="YYYY MM DD  days  days_m  Bsr dB    Kp1    Kp2    Kp3    Kp4    Kp5    Kp6    Kp7    Kp8  ap1  ap2  ap3  ap4  ap5  ap6  ap7  ap8    Apm  SN F107obs F107adj D"
 45 |         header=re.split(' +', header)
 46 |         df=pd.read_fwf(output,widths=cols,comment='#',header=None, names=header)
 47 | 
 48 |         #Kp/f10.7 pivot processing
 49 |         KpCols=header[7:15]
 50 |         kpdf=pd.melt(df,id_vars=header[:3]+["F107obs","F107adj"],value_vars=KpCols,var_name="KpStep",value_name="Kp")
 51 |         kpdf['hour']=(kpdf['KpStep'].str.get(2).astype(int)-1)*3+1
 52 |         kpdf[["date"]]=pd.to_datetime(dict(year=kpdf.YYYY, month=kpdf.MM, day=kpdf.DD,hour=kpdf.hour))
 53 |         kpdf=kpdf[['date','Kp',"F107obs","F107adj"]].sort_values(["date"])
 54 |         #datetime col
 55 |         kpdf=kpdf.set_index('date')
 56 |         #Ap pivot processing
 57 |         ApCols=header[15:23]
 58 |         apdf=pd.melt(df,id_vars=header[:3],value_vars=ApCols,var_name="ApStep",value_name="Ap")
 59 |         apdf['hour']=(apdf['ApStep'].str.get(2).astype(int)-1)*3+1
 60 |         apdf[["date"]]=pd.to_datetime(dict(year=apdf.YYYY, month=apdf.MM, day=apdf.DD,hour=apdf.hour))
 61 |         apdf=apdf[['Ap','date']].sort_values(["date"])
 62 |         apdf=apdf.set_index('date')
 63 |         kpdf=kpdf.join(apdf)
 64 |         return kpdf
 65 |     
 66 |     def interpolate(self,df):
 67 |         df=df.resample('1H').interpolate()
 68 |         #I'm not sure, but I had to extrapolate the first and last hours repeating what would be a step function.
 69 |         row_1=df.head(1)
 70 |         row_1.index=row_1.index+pd.DateOffset(hours=-1)
 71 |         row_last=df.tail(1)
 72 |         row_last.index=row_last.index+pd.DateOffset(hours=1)
 73 |         df = pd.concat([row_1,df,row_last], ignore_index=False)
 74 |         return df
 75 | 
 76 |     def _download(self,url,rootdir):
 77 |         if not os.path.exists(rootdir):
 78 |             os.makedirs(rootdir)
 79 |         
 80 |         # Assigns the local file name to the last part of the URL
 81 |         filename = url.split('/')[-1]
 82 | 
 83 |         fullFilePath=os.path.join(rootdir,filename)
 84 |         with closing(request.urlopen(url)) as r:
 85 |             with open(fullFilePath, 'wb') as f:
 86 |                 shutil.copyfileobj(r, f)
 87 | 
 88 |         #if r.status_code==404:
 89 |         #    logging.warning("File not found: "+url) 
 90 |     
 91 | 
 92 | def plotSeries(df,title,measure="Ap"):
 93 |     fig, ax = plt.subplots(figsize=(10, 6))
 94 |     ax.set_title(title)
 95 |     ax.grid(True)
 96 |     # Same as above
 97 |     ax.set_xlabel('Date')
 98 |     ax.xaxis.set_major_locator(AutoDateLocator())
 99 |     ax.xaxis.set_major_formatter(DateFormatter('%b %d %Y'))
100 |     # Plotting on the first y-axis
101 |     ax.set_ylabel(measure)
102 |     ax.plot(df.index, df[measure], color='tab:orange', label=measure)
103 |     name=title.replace(" ","_")
104 |     plt.savefig(f"{name}.png",bbox_inches='tight')
105 | 
106 | def main():
107 |     downloader=indicesDownloader()
108 |     year='2019' #2019,2020 or 'nowcast'
109 |     apdf=downloader.getIndexes(year,"./postdam2/")
110 |     
111 |     plotSeries(apdf,f'Ap time series of year {year}')
112 |     plotSeries(apdf,f'F10.7cm time series of year {year}',"F107adj")
113 |     idf=downloader.interpolate(apdf)
114 | 
115 |     apdf=apdf[(apdf.index>=pd.Timestamp("2019-06-13")) & (apdf.index<=pd.Timestamp("2019-06-15"))]
116 |     idf=idf[(idf.index>=pd.Timestamp("2019-06-13")) & (idf.index<=pd.Timestamp("2019-06-15"))]
117 | 
118 |     fig, ax = plt.subplots(figsize=(10, 6))
119 |     axb = ax.twinx()
120 |     ax.set_title('Interpolating from 3 hours to 1 hour intervals')
121 |     ax.grid(True)
122 | 
123 |     # Same as above
124 |     ax.set_xlabel('Date (Month-day hour)')
125 |     ax.xaxis.set_major_locator(AutoDateLocator())
126 |     ax.xaxis.set_major_formatter(DateFormatter('%d %b : %H:%M'))
127 | 
128 |     # Plotting on the first y-axis
129 |     ax.set_ylabel('Ap (geomagnetic index)')
130 |     ax.plot(apdf.index, apdf["Ap"], color='tab:gray', label='Downloaded Ap' ) #'tab:orange'
131 |     ax.plot(idf.index, idf["Ap"], color='tab:gray', label='Interpolated Ap',marker='o', markersize=4, linestyle='None')
132 | 
133 |     # Plotting on the second y-axis
134 |     axb.set_ylabel('F10.7cm (solar flux)')
135 |     axb.plot(apdf.index, apdf["F107adj"], color='k', label='Downloaded F10.7', linestyle= 'dashed')
136 |     axb.plot(idf.index, idf["F107adj"], color='k', label='Interpolated F10.7',marker='o', markersize=4, linestyle='None')
137 | 
138 |     
139 | 
140 |     # Handling of getting lines and labels from all axes for a single legend
141 |     lines, labels = ax.get_legend_handles_labels()
142 |     lines2, labels2 = axb.get_legend_handles_labels()
143 |     axb.legend(lines + lines2, labels + labels2, loc='upper left')
144 | 
145 | 
146 |     plt.savefig("forecast.png",bbox_inches='tight')
147 | 
148 | if __name__=="__main__":
149 |     main()
150 | 


--------------------------------------------------------------------------------
/generator.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | import tensorflow.keras as keras
  3 | import pandas as pd
  4 | from sklearn.model_selection import train_test_split
  5 | import matplotlib.pyplot as plt
  6 | 
  7 | 
  8 | class MergedGenerators(keras.utils.Sequence):
  9 |     def __init__(self, generators=[]):
 10 |         self.generators = generators
 11 |         self.len_gen=[len(x) for x in self.generators]
 12 |         self.gen_ids=np.concatenate([ x*[i] for i,x in enumerate(self.len_gen)]) #calculates which generator contain each sample id
 13 |         self.delta_id=np.roll(np.cumsum(self.len_gen),1) #how much we should reduce from a global index to get the generator index
 14 |         self.delta_id[0]=0
 15 | 
 16 |     def __len__(self):
 17 |         return sum(self.len_gen)
 18 | 
 19 |     def __getitem__(self, index):
 20 |         """Getting items from the generators and packing them"""
 21 |         gen_id=self.gen_ids[index]
 22 |         gen_index=index-self.delta_id[gen_id]
 23 |         return self.generators[gen_id][gen_index]
 24 |     def count(self):
 25 |         return sum([x.count() for x in self.generators])
 26 | 
 27 |         
 28 | 
 29 | class DataGenerator(keras.utils.Sequence):
 30 |     'Generates data for Keras'
 31 |     def __init__(self, x, batch_size, nstepsin=4, nstepsout=1, shuffle=True,training=True, removeRotation=False, sample_rate=12, val_split=0, validation=False, random_state=23):
 32 |         'Initialization'
 33 |         super().__init__()
 34 |         self.list_IDs=range(0,len(x)-(nstepsout-1+nstepsin),sample_rate) #store the index to allow shuffling
 35 |         if val_split>0:
 36 |             datasplit=train_test_split(self.list_IDs,random_state=random_state, test_size=val_split)
 37 |             if validation==False: #training generator
 38 |                 self.list_IDs=datasplit[0]
 39 |             else:
 40 |                 self.list_IDs=datasplit[1]
 41 | 
 42 |         self.nstepsin=nstepsin
 43 |         self.nstepsout=nstepsout
 44 |         self.batch_size=batch_size
 45 |         self.dim=x[0].shape
 46 |         self.shuffle=shuffle
 47 |         self.training=training
 48 |         self.removeRotation=removeRotation
 49 |         self.x=self.preprocess(x)
 50 |         self.on_epoch_end()
 51 | 
 52 |     def preprocess(self,x):
 53 |         if self.removeRotation:
 54 |             shift=3 #int(72/24) #number of columns rolled per hour
 55 |             series=[]
 56 |             for i in range(0,len(x)):
 57 |                 series.append(x[i,:,:-1,:])
 58 |                 series[i]=np.roll(series[i],shift,axis=1)
 59 |             return np.array(series)
 60 |         else:
 61 |             return x
 62 |     def count(self):
 63 |         'Returns the number of samples'
 64 |         return len(self.list_IDs)
 65 | 
 66 |     def __len__(self):
 67 |         'Denotes the number of batches per epoch'
 68 |         return int(np.ceil(len(self.list_IDs) / self.batch_size))
 69 | 
 70 |     def __getitem__(self, index):
 71 |         'Generate one batch of data'
 72 |         # Generate indexes of the batch
 73 |         indexes = self.indexes[index*self.batch_size:(index+1)*self.batch_size]
 74 | 
 75 |         # Find list of IDs
 76 |         list_IDs_temp = [self.list_IDs[k] for k in indexes]
 77 | 
 78 |         # Generate data
 79 |         X, y = self.__data_generation(list_IDs_temp)
 80 | 
 81 |         if self.training:
 82 |             return X, y
 83 |         else:
 84 |             return X
 85 | 
 86 |     def on_epoch_end(self):
 87 |         'Updates indexes after each epoch'
 88 |         self.indexes = np.arange(len(self.list_IDs))
 89 |         if self.shuffle == True:
 90 |             np.random.shuffle(self.indexes)
 91 | 
 92 |     def __data_generation(self, list_IDs_temp):
 93 |         'Generates data containing batch_size samples' # X : (n_samples, *dim, n_channels)
 94 |         # Initialization
 95 |         X = []
 96 |         Y = []
 97 |         # Generate data
 98 |         for ID in list_IDs_temp:
 99 |             x,y=self.split_sequence(ID)
100 |             X.append(x)
101 |             Y.append(y[...,[0]]) #this change was made to force only the tec as output
102 |             #Y.append(y) 
103 |         return np.array(X), np.array(Y)
104 | 
105 |     def split_sequence(self, i):
106 |         # find the end of this pattern
107 |         end_ix = i + self.nstepsin
108 |         # check if we are beyond the sequence
109 |         if end_ix + self.nstepsout> len(self.x):
110 |             return None,None
111 |         # gather input and output parts of the pattern
112 |         #seq_x, seq_y = self.x[i:end_ix], self.x[i+1:end_ix+self.nstepsout] #this was used for residual prediction
113 |         seq_x, seq_y = self.x[i:end_ix], self.x[end_ix:end_ix+self.nstepsout]
114 |         #if self.nstepsout==1: seq_y=seq_y[0] #this is because the network is not going to expect a vector
115 |         #seq_x=self.pad(seq_x) #this was a test to use circular padding.
116 |         return seq_x,seq_y
117 |     
118 |     def asArray(self):
119 |         return self.__data_generation(self.list_IDs)
120 |     def pad(self,mseq): #circular padding
121 |         mseq=np.pad(mseq,pad_width=((0,0),(0,0),(4,4),(0,0)),mode='wrap')
122 |         mseq=np.pad(mseq,pad_width=((0,0),(4,4),(0,0),(0,0)),mode='edge')
123 |         return mseq
124 | 
125 | 
126 | class DataGenerator1d(keras.utils.Sequence):
127 |     def __init__(self):
128 |         super().__init__()
129 |     def __data_generation(self, list_IDs_temp):
130 |         'Generates data containing batch_size samples' # X : (n_samples, *dim, n_channels)
131 |         # Initialization
132 |         X = []
133 |         Y = []
134 |         # Generate data
135 |         for ID in list_IDs_temp:
136 |             x,y=self.split_sequence(ID)
137 |             X.append(x)
138 |             Y.append(y[...,[0]]) #this change was made to force only the tec as output
139 |             #Y.append(y) 
140 |         return np.array(X), np.array(Y)
141 | 
142 | if __name__=="__main__":
143 |     data=np.array(np.sin(np.arange(0,100,0.1)))
144 |     data=np.expand_dims(data,-1)
145 |     nstepsin=36
146 |     nstepsout=24
147 |     
148 |     
149 |     
150 |     """gen=DataGenerator1d(data,10,nstepsin=nstepsin, nstepsout=nstepsout, val_split=0.2)
151 |     x,y=gen[0]
152 |     plt.plot(range(0,nstepsin),x[0])
153 |     plt.plot(range(nstepsin,nstepsin+nstepsout),y[0])
154 |     plt.show()
155 |     plt.close()
156 |     
157 |     data=np.array(np.sin(np.arange(100,150,0.1)))
158 |     data=np.expand_dims(data,-1)
159 |     gen2=DataGenerator1d(data,10,nstepsin=nstepsin, nstepsout=nstepsout, val_split=0.2)"""
160 |     from itertools import chain
161 |     gen1=DataGenerator(data,10,nstepsin=nstepsin, nstepsout=nstepsout, val_split=0.2)
162 |     gen2=DataGenerator(data,10,nstepsin=nstepsin, nstepsout=nstepsout, val_split=0.2)
163 |     chained=chain(gen1,gen2)
164 |     
165 |     mgen=MergedGenerators([gen1,gen2])
166 |     
167 |     x,y=mgen[0]
168 |     
169 |     gen=DataGenerator(data,10,nstepsin=nstepsin, nstepsout=nstepsout, val_split=0.2)
170 |     x,y=gen[0]
171 |     print(x[0])
172 |     print(y[0])
173 |     plt.plot(range(0,nstepsin),x[0])
174 |     plt.plot(range(nstepsin,nstepsin+nstepsout),y[0])
175 |     plt.show()
176 |     plt.close()
177 |     gen=DataGenerator(data,10,nstepsin=nstepsin, nstepsout=nstepsout, val_split=0.2)
178 |     x,y=gen[0]
179 |     plt.plot(range(0,nstepsin),x[0])
180 |     plt.plot(range(nstepsin,nstepsin+nstepsout),y[0])
181 |     plt.show()
182 |     plt.close()
183 | 
184 |     
185 |     #print(gen.split_sequence(0))
186 | 
187 | 
188 |         
189 | 


--------------------------------------------------------------------------------
/models/models.py:
--------------------------------------------------------------------------------
  1 | import sys,os
  2 | 
  3 | from tensorflow.keras.models import Model, Sequential
  4 | from tensorflow.keras.backend import expand_dims, repeat_elements
  5 | import tensorflow as tf
  6 | 
  7 | from models.custom_layers import *
  8 | if os.path.exists('models/dev'):
  9 |     for f in os.listdir('models/dev'): #importing dev folder if it exists
 10 |         if f.endswith('.py'):
 11 |             s=f"from models.dev.{f[:-3]} import *"
 12 |             exec(s)
 13 | 
 14 | 
 15 | """ANN
 16 | The dense layers work only on the temporal dimension."""
 17 | def ANN(inputShape,filters=50,nstepsout=1, layers=3, activation="linear"):
 18 |     #inspired by https://www.tensorflow.org/tutorials/structured_data/time_series#multi-step_models
 19 |     in_im = Input(shape=inputShape) 
 20 |     x=in_im
 21 |     x = Permute((2,3,1,4), name="MoveTimeToLastDim")(x) #moves time to last dimension
 22 |     newShape=x.shape
 23 |     x= Reshape((*newShape[1:-2],-1))(x)
 24 |     for i in range(layers):
 25 |         if i==layers-1:
 26 |             filters=nstepsout
 27 |         #activation="LeakyReLU" #None
 28 |         x = BatchNormalization()(x)
 29 |         x = Dropout(0.2)(x)
 30 |         x = Dense(filters, activation=activation)(x)
 31 |     x=expand_dims(x, axis=1)
 32 |     x = Permute((4,2,3,1), name="TimeToFirstDim")(x) #moves time back to first dim
 33 |     model = Model(in_im, x)
 34 |     return model
 35 | 
 36 | """Convolutional LSTM N to 1 implementation with 1x1 kernels.
 37 | Inspired by https://blog.keras.io/a-ten-minute-introduction-to-sequence-to-sequence-learning-in-keras.html"""
 38 | def c111_nto1(inputShape,filters=16,nstepsout=1, kernel=(1, 1), scale=1.,offset=0., dropout=0):
 39 |     in_im = Input(shape=inputShape) 
 40 |     x=in_im
 41 |     #encoder
 42 |     #x = Conv3D(1, 1, padding='same',activation="relu")(in_im)
 43 |     x=ConvLSTM2D(filters=filters, kernel_size=kernel,padding='same',return_sequences=True, dropout=dropout)(x)
 44 |     x=ConvLSTM2D(filters=filters, kernel_size=kernel,padding='same',return_sequences=True, dropout=dropout)(x)
 45 |     x=ConvLSTM2D(filters=filters, kernel_size=kernel,padding='same',return_sequences=False, dropout=dropout)(x)
 46 |     x=Conv2D(1, (1, 1), activation='linear', padding='same')(x) #changed to
 47 |     x=expand_dims(x, axis=1)
 48 |     encmodel = Model(in_im, x)
 49 |     return encmodel
 50 | 
 51 | """ Convolutional LSTM N to 1 implementation with 3x3 kernels.
 52 | Ispired by ConvLSTM dilated 121 model (Boulch, 2018)
 53 |  Changes: 
 54 |  - tanh activation instead of relu. Data was normalized with negative numbers. ReLu doesn't reach negatives.
 55 |  """
 56 | def c333_nto1(inputShape,filters=16,nstepsout=12, dropout=0):
 57 |     kernel=(3, 3)
 58 |     model =c111_nto1(inputShape,filters=filters,nstepsout=nstepsout, kernel=kernel, dropout=dropout)
 59 |     return model
 60 | 
 61 | """BiConvLSTM 1x1.
 62 | Bidirectional Convolutional LSTM."""
 63 | def c111bi(inputShape,filters=16,nstepsout=16, kernel=(1, 1), dropout=0):
 64 |     in_im = Input(shape=inputShape) 
 65 |     x=in_im
 66 |     x=Bidirectional(ConvLSTM2D(filters=filters, kernel_size=kernel,padding='same',return_sequences=True, dropout=dropout))(x)
 67 |     x=Bidirectional(ConvLSTM2D(filters=filters, kernel_size=kernel,padding='same',return_sequences=True, dropout=dropout))(x)
 68 |     x=Bidirectional(ConvLSTM2D(filters=filters, kernel_size=kernel,padding='same',return_sequences=True, dropout=dropout))(x)
 69 |     x=TimeDistributed(Conv2D(1, (1, 1), activation='linear', padding='same'))(x) #changed to
 70 |     encmodel = Model(in_im, x)
 71 |     return encmodel
 72 | 
 73 | """BiConvLSTM 3x3.
 74 | Bidirectional Convolutional LSTM."""
 75 | def c333bi(inputShape,filters=16,nstepsout=12, dropout=0):
 76 |     kernel=(3, 3)
 77 |     model =c111bi(inputShape,filters=filters,nstepsout=nstepsout, kernel=kernel, dropout=dropout)
 78 |     return model
 79 | 
 80 | 
 81 | """ED-ConvLSTM.
 82 | Encoder-Decoder Convolutional LSTM 1x1.
 83 | """
 84 | def c111(inputShape,filters=16,nstepsout=12, kernel=(1, 1), dropout=0):
 85 |     #Inspired on https://github.com/Azure/DeepLearningForTimeSeriesForecasting/blob/master/3_RNN_encoder_decoder.ipynb
 86 |     in_im = Input(shape=inputShape) 
 87 |     x=in_im
 88 |     #encoder
 89 |     x,h1,c1=ConvLSTM2D(filters=filters, kernel_size=kernel,padding='same',return_sequences=True,return_state=True, dropout=dropout)(x)
 90 |     x,h2,c2=ConvLSTM2D(filters=filters, kernel_size=kernel,padding='same',return_sequences=True,return_state=True, dropout=dropout)(x)
 91 |     x,h3,c3=ConvLSTM2D(filters=filters, kernel_size=kernel,padding='same',return_sequences=False,return_state=True, dropout=dropout)(x)
 92 |     x=Lambda(lambda x: repeat_elements(expand_dims(x, axis=1), nstepsout, 1))(x)
 93 |     x=ConvLSTM2D(filters=filters, kernel_size=kernel,padding='same',return_sequences=True, dropout=dropout)(x, initial_state=[h1,c1])
 94 |     x=ConvLSTM2D(filters=filters, kernel_size=kernel,padding='same',return_sequences=True, dropout=dropout)(x, initial_state=[h2,c2])
 95 |     x=ConvLSTM2D(filters=filters, kernel_size=kernel,padding='same',return_sequences=True, dropout=dropout)(x, initial_state=[h3,c3])
 96 |     x=TimeDistributed(Conv2D(1, (1, 1), activation='linear', padding='same'))(x) #changed to
 97 |     encmodel = Model(in_im, x)
 98 |     return encmodel
 99 | 
100 | """ED-ConvLSTM.
101 | Encoder-Decoder Convolutional LSTM 3x3."""
102 | def c333(inputShape,filters=16,nstepsout=12, dropout=0.2):
103 |     kernel=(3, 3)
104 |     model =c111(inputShape,filters=filters,nstepsout=nstepsout, kernel=kernel, dropout=dropout)
105 |     return model
106 | 
107 | """iConvLSTM 
108 | from https://onlinelibrary.wiley.com/doi/abs/10.1029/2021SW002854"""
109 | def iConvLSTM(inputShape,filters=16,nstepsout=12):
110 |     in_im = Input(shape=inputShape) 
111 |     x=in_im
112 |     #encoder
113 |     x=TimeDistributed(Conv2D(filters*1, (2, 2), activation='LeakyReLU', padding='same',strides=(2,2)))(x) #changed to
114 |     x1=ConvLSTM2D(filters=filters*1, kernel_size=(5,5), padding='same',return_sequences=True, activation='LeakyReLU')(x)
115 |     x=TimeDistributed(Conv2D(filters*2, (2, 2), activation='LeakyReLU', padding='same',strides=(2,2)))(x1) #changed to
116 |     x2=ConvLSTM2D(filters=filters*2, kernel_size=(3,3), padding='same',return_sequences=True, activation='LeakyReLU')(x)
117 |     x=TimeDistributed(Conv2D(filters*4, (2, 2), activation='LeakyReLU', padding='same',strides=(2,2)))(x2) #changed to
118 |     x3=ConvLSTM2D(filters=filters*4, kernel_size=(3,3), padding='same',return_sequences=True, activation='LeakyReLU')(x)
119 |     
120 |     x=ConvLSTM2D(filters=filters*4, kernel_size=(3,3), padding='same',return_sequences=True, activation='LeakyReLU')(x3)
121 |     x=TimeDistributed(Conv2DTranspose(filters*4, (2, 2), activation='LeakyReLU', padding='same',strides=(2,2)))(x) #changed to
122 |     x=Concatenate()([x,x2])
123 |     x=ConvLSTM2D(filters=filters*2, kernel_size=(3,3), padding='same',return_sequences=True, activation='LeakyReLU')(x)
124 |     x=TimeDistributed(Conv2DTranspose(filters*2, (2, 2), activation='LeakyReLU', padding='same',strides=(2,2)))(x) #changed to
125 |     x=Concatenate()([x,x1])
126 |     x=ConvLSTM2D(filters=filters*1, kernel_size=(5,5), padding='same',return_sequences=True, activation='LeakyReLU')(x)
127 |     x=TimeDistributed(Conv2DTranspose(filters*1, (2, 2), activation='LeakyReLU', padding='same',strides=(2,2)))(x) #changed to
128 |     x=TimeDistributed(Conv2DTranspose(1, (1, 1), activation='LeakyReLU', padding='same',strides=(1,1)))(x) #changed to
129 |     encmodel = Model(in_im, x)
130 |     return encmodel
131 | 
132 | """Repeat previous day baseline."""
133 | def usePrevious(inputShape,filters=0,nstepsin=12,nstepsout=24):
134 |     #inspired by https://www.tensorflow.org/tutorials/structured_data/time_series#baselines
135 |     in_im = Input(shape=inputShape) 
136 |     nstepsin=inputShape[1]
137 |     if nstepsout!=nstepsin:
138 |         x=Lambda(lambda x: x[:,-nstepsout:,...])(in_im)
139 |     else:
140 |         x=in_im
141 |     m = Model(in_im, x)
142 |     return m
143 | 
144 | 
145 | if __name__=="__main__":
146 |     shape=(24,72,72,1)
147 |     model=ANN(shape,8,12)
148 |     model.summary()
149 |  
150 | 
151 | 
152 | 


--------------------------------------------------------------------------------
/train.py:
--------------------------------------------------------------------------------
  1 | #https://machinelearningmastery.com/how-to-develop-lstm-models-for-multi-step-time-series-forecasting-of-household-power-consumption/
  2 | #https://blog.keras.io/a-ten-minute-introduction-to-sequence-to-sequence-learning-in-keras.html
  3 | #https://levelup.gitconnected.com/building-seq2seq-lstm-with-luong-attention-in-keras-for-time-series-forecasting-1ee00958decb
  4 | 
  5 | import os,sys, shutil
  6 | from datetime import datetime
  7 | from itertools import chain
  8 | 
  9 | import numpy as np
 10 | from numpy import array
 11 | import pandas as pd
 12 | os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3' 
 13 | import tensorflow as tf
 14 | from tensorflow.keras.layers import LSTM, ConvLSTM2D, Dense,BatchNormalization, Input
 15 | from tensorflow.keras.callbacks import EarlyStopping
 16 | from tensorflow.keras.utils import plot_model
 17 | 
 18 | from models.models import *
 19 | from generator import DataGenerator, MergedGenerators
 20 | from lstm_utils import * #This is ours.
 21 | 
 22 | 
 23 | #These are not necessary on colab
 24 | tf.config.experimental.set_memory_growth(tf.config.get_visible_devices()[1], True)
 25 | 
 26 | #loading experiment configuration
 27 | args=get_args()
 28 | config=load_config(args.experiment)
 29 | print(config)
 30 | 
 31 | np.random.seed(1)#config.random_seed)
 32 | randomSeeds=(np.random.random(config.best_of)*100).astype(int)
 33 | 
 34 | #we have different datasets for the models that use space weather
 35 | print(f"Loading data from {config.train_npy_dataset}")
 36 | 
 37 | ionex_npy=config.train_npy_dataset.split(',')
 38 | ionexList=[]
 39 | for npy_file in ionex_npy:
 40 |     ionex=np.load(npy_file)
 41 |     if config.train_time_sampling>1:
 42 |         ionex=ionex[::config.train_time_sampling,...] #2h step
 43 |     
 44 |     #Resizing to 72x72
 45 |     ionex=np.concatenate((ionex[:,:,:-1,:],ionex[:,-1:,:-1,:]),axis=1)
 46 |     ionex=getDataSubset(ionex,config.experiment_name)
 47 | 
 48 |     ionexList.append(ionex)
 49 | 
 50 | #scaling
 51 | ionexStack=np.concatenate(ionexList)
 52 | parameters =  { "mean" : ionexStack.mean(axis=(0,1,2)) , "max": ionexStack.max(axis=(0,1,2)), "min": ionexStack.min(axis=(0,1,2)), "input_t_steps": config.lag_window}
 53 | ionexList=[scaleForward(ionex,parameters) for ionex in ionexList]
 54 | del ionexStack
 55 | #We need to save these scaling parameters for prediction
 56 | print("Saving scaling information on parameters.py. If you change the input data, please remove the file and retrain.")
 57 | with open(getModelFilePath(config.experiment_name, f"params.py"),'w') as f:f.write(repr(parameters))
 58 | 
 59 | exp_val_rmse=[]
 60 | 
 61 | for experimentNumber, randomSeed in enumerate(randomSeeds): #this represents how many times we are going to train the network
 62 |     print(f"Starting experiment {experimentNumber}")
 63 |     try:
 64 |         model= eval(config.model)
 65 |     except:
 66 |         print(f"Error trying to load the model chosen in {args.config}")
 67 |         sys.exit()
 68 |     
 69 |         
 70 |     ## HYPER PARAMETERS ##
 71 |     batch_size=config.batch_size
 72 |     input_t_steps=config.lag_window
 73 |     #output_t_steps=config.prediction_window#12#24
 74 |     if config.prediction=='seq2one':
 75 |         output_t_steps=1
 76 |     else:
 77 |         output_t_steps=config.prediction_window
 78 | 
 79 |     print("Input shape: ",ionex.shape)
 80 |     
 81 |     #The data generators apply the sliding window for the time frames
 82 |     training_generators=[]
 83 |     validation_generators=[]
 84 |     for ionex in ionexList:
 85 |         training_generators.append(DataGenerator(ionex, batch_size=batch_size, nstepsin=config.lag_window, nstepsout=output_t_steps,sample_rate=config.resample_rate, validation=False, val_split=0.2, random_state=23))
 86 |         validation_generators.append(DataGenerator(ionex, batch_size=batch_size, nstepsin=config.lag_window, nstepsout=output_t_steps,sample_rate=config.resample_rate, validation=True, val_split=0.2, random_state=23))
 87 |     training_generator=MergedGenerators(training_generators)
 88 |     validation_generator=MergedGenerators(validation_generators)
 89 | 
 90 | 
 91 |     #print(f"Checking intersections: {list(set(validation_generator.list_IDs) & set(training_generator.list_IDs))}")
 92 |     
 93 |     print(f"Training maps: {training_generator[0][0].shape}")
 94 |     print(f"Validation maps: {validation_generator[0][0].shape}")
 95 |     
 96 |     print(f"Training set: {training_generator.count()}")
 97 |     print(f"Validation set: {validation_generator.count()}")
 98 |     
 99 |     batch_shape_x=training_generator[0][0][0].shape
100 |     batch_shape_y=training_generator[0][1][0].shape
101 |     print(f"batch_shape_x={batch_shape_x}")
102 |     print(f"batch_shape_y={batch_shape_y}")
103 |     
104 |     model=model(batch_shape_x,nstepsout=output_t_steps, filters=config.filters) #initializing model
105 |     
106 |     
107 |     print(model.summary())
108 |     plot_model(model, to_file=getModelFilePath(config.experiment_name, "model.png"), show_shapes=True, show_layer_names=True)
109 |     
110 |     model.compile(optimizer=config.optimizer, jit_compile=True, loss=config.loss,metrics=['mean_absolute_error', 'mean_squared_error'])
111 |     
112 |     print("Model fitting")
113 |     earlystopping = tf.keras.callbacks.EarlyStopping(monitor='val_loss', min_delta=0.0001,patience=10, restore_best_weights=True, verbose = 1, mode="min") #0.0001 / 10
114 |     #checkpoint_callback = tf.keras.callbacks.ModelCheckpoint(getModelFileName(config.experiment_name), monitor='val_loss', verbose=1, save_best_only=True)
115 |     logdir='./logs/'+config.experiment_name
116 |     tb_callback = tf.keras.callbacks.TensorBoard(logdir, update_freq='epoch')
117 |     from tensorboard.plugins.hparams import api as hp
118 |     
119 |     
120 |     #model.adapt(training_generator)
121 |     
122 |     start = datetime.now()
123 |     print(f'Started training: {start}')
124 |     history=model.fit(training_generator,validation_data=validation_generator, epochs=config.num_epochs, verbose=2, callbacks = [earlystopping,tb_callback])
125 |     
126 |     end = datetime.now()
127 |     print(f'Ended training: {end}')
128 |     totaltime= end - start
129 |     
130 |     currentModelFname=getModelFileName(config.experiment_name,experimentNumber)
131 |     model.save(currentModelFname)
132 |     
133 |     
134 |     val_results = model.evaluate(validation_generator,batch_size=config.batch_size,verbose=2)
135 |     train_results = model.evaluate(training_generator,batch_size=config.batch_size,verbose=2)
136 |     print(val_results)
137 | 
138 |     print(f"Time spent training: {totaltime}")
139 |     memoryUsed=getNvidiaSmiMem()
140 |     print(f"Memory used: {memoryUsed}")
141 | 
142 |     s=getScaleFromParameters(parameters)[0]
143 |     val_rmse=s*np.sqrt(val_results[2])    
144 |     if experimentNumber==0:
145 |         best_val_rmse=val_rmse
146 |         bestExp=0
147 | 
148 |     if val_rmse<= best_val_rmse:
149 |         print("Saving training results because this is currently the best model.")
150 |         best_val_rmse=val_rmse
151 |         bestExp=experimentNumber
152 |         #saving macro results
153 |         #resultsFile="output/results.py"
154 |     resultsFile=getModelFilePath(config.experiment_name,"results.py")
155 |     if not os.path.exists(resultsFile):
156 |         results={}
157 |     else:
158 |         with open(resultsFile, 'r') as f: results = eval(f.read())
159 |     
160 |     expId=f"{config.experiment_name}_{experimentNumber}"
161 |     
162 |     if not expId in results.keys():
163 |         modelResults={}
164 |     else:
165 |         modelResults=results[expId]
166 |     
167 |     modelResults["time"]= totaltime.total_seconds()
168 |     modelResults["memory"]= memoryUsed
169 |     modelResults["epochs"]= len(history.history['loss'])
170 |     modelResults["train_rmse"]= s*np.sqrt(train_results[2])
171 |     modelResults["train_mae"]= s*train_results[1]
172 |     modelResults["val_rmse"]= val_rmse
173 |     modelResults["val_mae"]= s*val_results[1]
174 |     
175 |     results[expId]=modelResults
176 |     with open(resultsFile,'w') as f:f.write(repr(results))
177 |         
178 |     np.savez(getModelFilePath(config.experiment_name,f'history{experimentNumber}'),h=history.history)
179 |     #history=np.load('my_history.npy',allow_pickle='TRUE').item() #this would read 
180 | 
181 |     scale=getScaleFromParameters(parameters)
182 |     plotHistory(history,getModelFilePath(config.experiment_name,f'history{experimentNumber}.png'),scale=scale)
183 | 
184 | #finding the best experiment
185 | print(f"Best experiment: {bestExp}. Restoring best model.")
186 | shutil.copy(getModelFileName(config.experiment_name,bestExp),getModelFileName(config.experiment_name))
187 | 
188 | 
189 | 
190 | 
191 | 
192 | 
193 | 
194 | 
195 | 
196 | 
197 | with tf.summary.create_file_writer(logdir,name=config.experiment_name).as_default():
198 |     hparams = {
199 |         'model': config.model,
200 |         'parameters':  sum([v.shape.num_elements() for v in model.trainable_variables]),
201 |         'batch_size': batch_size, 
202 |         'nstepsin': config.lag_window,
203 |         'nstepsout': output_t_steps,
204 |         'time_training': totaltime.total_seconds(),
205 |     }
206 |     hp.hparams(hparams)  # record the values used in this trial
207 | 
208 | 
209 | 
210 | 
211 | 


--------------------------------------------------------------------------------
/lstm_utils.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | import subprocess
  3 | import re
  4 | import json
  5 | from types import SimpleNamespace
  6 | import argparse
  7 | 
  8 | import pandas as pd
  9 | import imageio
 10 | from numpy import array
 11 | import numpy as np
 12 | import matplotlib.pyplot as plt
 13 | 
 14 | def get_args():
 15 |     argparser = argparse.ArgumentParser(description=__doc__)
 16 |     argparser.add_argument(
 17 |         '-e', '--experiment',
 18 |         metavar='E',
 19 |         default='default',
 20 |         help='Experiment name as in config.csv')
 21 |     argparser.add_argument(
 22 |         '-c', '--config',
 23 |         metavar='C',
 24 |         default='config.csv',
 25 |         help='config.csv location.')
 26 |     args = argparser.parse_args()
 27 |     return args
 28 | 
 29 | def load_config(experiment="default",config_csv_file="config.csv"):
 30 |     df=pd.read_csv(config_csv_file)
 31 |     row=df[df['experiment_name']==experiment]
 32 |     res=SimpleNamespace(**row.to_dict(orient='records')[0])
 33 |     return res
 34 | 
 35 | def update_config(experiment="default", column='tested', value='True', config_csv_file="config.csv"):
 36 |     df=pd.read_csv(config_csv_file)
 37 |     row=df[df['experiment_name']==experiment]
 38 |     res=SimpleNamespace(**row.to_dict(orient='records')[0])
 39 |     return res
 40 | 
 41 | # split a sequence into samples
 42 | def split_sequence(sequence, n_steps, n_stepsout=1):
 43 |     X, y = list(), list()
 44 |     for i in range(len(sequence)):
 45 |         # find the end of this pattern
 46 |         end_ix = i + n_steps
 47 |         # check if we are beyond the sequence
 48 |         if end_ix > len(sequence)-n_stepsout:
 49 |             break
 50 |         # gather input and output parts of the pattern
 51 |         seq_x, seq_y = sequence[i:end_ix], sequence[end_ix:end_ix+n_stepsout]
 52 |         X.append(seq_x)
 53 |         y.append(seq_y)
 54 |     return array(X), array(y)
 55 | 
 56 | def getModelFolder(name, filename=''):
 57 |     root_path=os.getcwd()
 58 |     outfolder=os.path.join(root_path,'output',name)
 59 |     if not os.path.exists(outfolder):
 60 |         os.makedirs(outfolder)
 61 |     return os.path.join(outfolder,filename)
 62 | 
 63 | def getModelFilePath(name, filename):
 64 |     return os.path.join(getModelFolder(name),filename)
 65 | 
 66 | def getModelFileName(name,expNumber=-1):
 67 |     outfolder=getModelFolder(name)
 68 |     if expNumber>=0:
 69 |         fname=f"model_{expNumber}.h5"
 70 |     else:
 71 |         fname=f'model.h5'
 72 |     return os.path.join(outfolder,fname)
 73 | 
 74 | def plotMap(m):
 75 |     m=np.moveaxis(m,-1,0)[0]
 76 |     plt.imshow(m, extent=[-180,180,-90,90]) #minx maxx miny maxy
 77 |     plt.show()
 78 | 
 79 | 
 80 | def plotHistory(history,filename,scale=1):
 81 |     mse = np.sqrt(np.array(history.history['mean_squared_error']))*scale[0]
 82 |     val_mse = np.sqrt(np.array(history.history['val_mean_squared_error']))*scale[0]
 83 | 
 84 |     loss = np.array(history.history['mean_absolute_error'])*scale[0]
 85 |     val_loss = np.array(history.history['val_mean_absolute_error'])*scale[0]
 86 | 
 87 |     plt.figure(figsize=(8, 8))
 88 |     plt.subplot(2, 1, 1)
 89 |     plt.plot(mse, label='Training RMSE')
 90 |     plt.plot(val_mse, label='Validation RMSE')
 91 |     plt.legend(loc='lower right')
 92 |     plt.ylabel('Root Mean Square Error')
 93 |     ymax=max(max(mse),max(val_mse))
 94 |     plt.ylim([min(plt.ylim()),ymax])
 95 |     plt.title('Training and Validation RMSE')
 96 | 
 97 |     plt.subplot(2, 1, 2)
 98 |     plt.plot(loss, label='Training MAE')
 99 |     plt.plot(val_loss, label='Validation MAE')
100 |     plt.legend(loc='upper right')
101 |     plt.ylabel('Mean Absolute Error')
102 |     ymax=max(max(loss),max(val_loss))
103 |     plt.ylim([0,ymax])
104 |     plt.title('Training and Validation Loss')
105 |     plt.xlabel('epoch')
106 |     plt.savefig(filename, bbox_inches='tight')
107 |     plt.close()# build gif
108 | 
109 | def getScaleFromParameters(parameters):
110 |     return (parameters["max"]-parameters["min"])/2
111 | 
112 | def scaleBack(m,parameters):
113 |     s=getScaleFromParameters(parameters)
114 |     if isinstance(m, np.ndarray): #check if it's an array
115 |         if m.shape[-1] != s.shape[-1]:
116 |             s=s[...,:m.shape[-1]]
117 |     m=(m+1)*s
118 |     return m
119 | 
120 | def scaleForward(m,parameters):
121 |     s=getScaleFromParameters(parameters)
122 |     m=(m/s)-1
123 |     return m
124 | 
125 | def plotTwins(ma, mb, title, outputFile,shareColorBar=False,ylabel1='',ylabel2=''):
126 |     fig, axs = plt.subplots(2)
127 |     fig.suptitle(title)
128 |     
129 |     if shareColorBar:
130 |         vmin=ma.min()
131 |         vmax=ma.max()
132 |         a=axs[0].imshow(np.squeeze(ma),extent=[-180,180,-90,90],vmin=vmin,vmax=vmax)    
133 |         b=axs[1].imshow(np.squeeze(mb),extent=[-180,180,-90,90],vmin=vmin,vmax=vmax)
134 |     else:
135 |         a=axs[0].imshow(np.squeeze(ma),extent=[-180,180,-90,90])    
136 |         b=axs[1].imshow(np.squeeze(mb),extent=[-180,180,-90,90])
137 |     axs[0].set_ylabel(ylabel1)
138 |     axs[1].set_ylabel(ylabel2)
139 |     fig.colorbar(a,ax=axs[0])
140 |     fig.colorbar(b,ax=axs[1])
141 |     plt.savefig(outputFile, bbox_inches='tight')
142 |     plt.close()
143 |     
144 | def plotTwinsAndError(ma, mb, merror, title, outputFile,shareColorBar=False,ylabel1='',ylabel2='',ylabel3="Difference"):
145 |     fig, axs = plt.subplots(3)
146 |     fig.suptitle(title)
147 |     
148 |     if shareColorBar:
149 |         vmin=ma.min()
150 |         vmax=ma.max()
151 |         a=axs[0].imshow(np.squeeze(ma),extent=[-180,180,-90,90],vmin=vmin,vmax=vmax)    
152 |         b=axs[1].imshow(np.squeeze(mb),extent=[-180,180,-90,90],vmin=vmin,vmax=vmax)
153 |     else:
154 |         a=axs[0].imshow(np.squeeze(ma),extent=[-180,180,-90,90])    
155 |         b=axs[1].imshow(np.squeeze(mb),extent=[-180,180,-90,90])
156 |     axs[0].set_ylabel(ylabel1)
157 |     axs[1].set_ylabel(ylabel2)
158 |     fig.colorbar(a,ax=axs[0])
159 |     fig.colorbar(b,ax=axs[1])
160 |     c=axs[2].imshow(np.squeeze(merror),extent=[-180,180,-90,90])
161 |     fig.colorbar(c,ax=axs[2])
162 |     axs[2].set_ylabel(ylabel3)
163 |     plt.savefig(outputFile, bbox_inches='tight')
164 |     plt.close()
165 | 
166 | def getDataSubset(ionex,modelName):
167 |     if "_Ap" in modelName:  
168 |         bands="0,1" #use tec and ap
169 |     elif "_F107AP" in modelName:  
170 |         bands="0,1,2"
171 |     elif "_F107" in modelName:  
172 |         bands="0,2" #use tec and F107
173 |     else:
174 |         bands="0"
175 |     bands=eval("["+bands+"]")
176 |     ionex=ionex[:,:,:,bands] #use tec and ap
177 |     if "_1d" in modelName: 
178 |         ionex=ionex[:,35:36,35:36,bands] #use tec and ap
179 |     return ionex
180 | 
181 | def ulm_plot(mid, upper, lower):
182 |     plt.figure(figsize=(8, 8))
183 |     plt.subplot(2, 1, 1)
184 |     plt.plot(mid, label='Training MAE')
185 |     plt.fill_between(range(mid.size), upper, lower,color='green', alpha=0.2 )
186 |     plt.plot(upper)
187 |     plt.plot(lower)
188 |     plt.legend(loc='lower right')
189 |     plt.ylabel('Mean Absolute Error')
190 |     plt.title('Training and Validation MAE')
191 | 
192 | def getPixelSeries(m, i, j):
193 |     return m[...,i,j,0].flatten()
194 | 
195 | 
196 | def saveGif(matrixList,gifFileName,clearFrames=True):
197 |     filenames=[]
198 |     for i,m in enumerate(matrixList):
199 |         # plot the line chart
200 |         #plt.plot(y[:i])
201 |         plt.imshow(np.squeeze(m), extent=[-180,180,-90,90]) #minx maxx miny maxy
202 |         
203 |         # create file name and append it to a list
204 |         filename = f'{gifFileName}_{i}.png'
205 |         filenames.append(filename)
206 |         plt.title(f"Day {int(np.floor(i/24))+1} hour {i%24:02d}")
207 |         # save frame
208 |         plt.savefig(filename, bbox_inches='tight')
209 |         plt.close()# build gif
210 |     with imageio.get_writer(gifFileName, mode='I') as writer:
211 |         for filename in filenames:
212 |             image = imageio.imread(filename)
213 |             writer.append_data(image)
214 |     # Remove files
215 |     if clearFrames:
216 |         for filename in set(filenames):
217 |             os.remove(filename)
218 | 
219 | def main():
220 |     
221 |     l=range(1,100)
222 |     print(l)
223 |     x,y=split_sequence(l,4,2)
224 |     print(x.shape)
225 |     print(y.shape)
226 |     lu=np.array(l)*2
227 |     ld=np.array(l)/2
228 |     lm=np.array(l)
229 |     ulm_plot(lm,lu,ld)
230 |     
231 | def r2(y_true, y_pred):
232 |     """
233 |     R^2 (coefficient of determination) regression score function.
234 |     Best possible score is 1.0, lower values are worse.
235 |     Args:
236 |         y_true ([np.array]): test samples
237 |         y_pred ([np.array]): predicted samples
238 |     Returns:
239 |         [float]: R2    
240 |     """
241 |     SS_res =  tf.reduce_sum(tf.square(y_true - y_pred), axis=-1)
242 |     SS_tot = tf.reduce_sum(tf.square(y_true - tf.reduce_mean(y_true, axis=-1)), axis=-1)
243 |     return (1 - SS_res/(SS_tot + tf.keras.backend.epsilon()))
244 | 
245 | def getNvidiaSmiMem():
246 |     nvidiasmi=subprocess.run("nvidia-smi", shell=True, capture_output=True)
247 |     found=re.search('C.+python.(.+?)\|', nvidiasmi.stdout.decode("utf-8") )
248 |     if found:
249 |         memory=found.group(1).strip()
250 |     else:
251 |         memory=0
252 |     return memory
253 | 
254 | if __name__=="__main__":
255 |     main()
256 | 


--------------------------------------------------------------------------------
/plotresults.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | import pandas as pd
  3 | from numpy import array
  4 | import  matplotlib.pyplot as plt
  5 | from osgeo import gdal_array
  6 | from extra.plot_time_series import saveGif
  7 | from lstm_utils import *
  8 | #loading config that tells which experiments should be on the charts
  9 | 
 10 | args=get_args()
 11 | baseconfig='config.csv'
 12 | df=pd.read_csv(baseconfig)
 13 | 
 14 | if baseconfig!=args.config:
 15 |     dfFilter=pd.read_csv(args.config)
 16 |     df=df[df.experiment_name.isin(dfFilter['experiment_name'].tolist())]
 17 |     df=df.merge(dfFilter, on='experiment_name',how='left')
 18 | else:
 19 |     if not 'label' in df.columns: df['label']=np.nan
 20 | df['label'] = df['label'].fillna(df['experiment_name']) #uses experiment_name if label is empty
 21 | labels=dict(zip(df.experiment_name, df.label))
 22 | 
 23 | 
 24 | #Find which tests were trained with multiple runs and separate results file. Add their best to the unified results.py file and create statistics
 25 | 
 26 | #opening the results file
 27 | resultsFile="output/results.py"
 28 | if os.path.exists(resultsFile):
 29 |     with open(resultsFile, 'r') as f: results = eval(f.read())
 30 | else:
 31 |     results={}
 32 | 
 33 | for experiment_name in df[(df['compare']==True)]['experiment_name'].values:
 34 |     rows=[]
 35 |     individualResultsFile=getModelFilePath(experiment_name, 'results.py')
 36 |     if os.path.exists(individualResultsFile): 
 37 |         print(individualResultsFile)
 38 |         #opening the results file
 39 |         with open(individualResultsFile, 'r') as f: indResults = eval(f.read())
 40 |         best_of=len(indResults)
 41 |         best_mae=-1
 42 |         rmse_l=[]
 43 |         mae_l=[]
 44 |         r2_l=[]
 45 |         for experimentNumber in range(best_of):
 46 |             #expId=f"{config.experiment_name}_{experimentNumber}"
 47 |             expId=f"{experiment_name}_{experimentNumber}"
 48 |             if expId in indResults:
 49 |                 k=indResults[expId]
 50 |                 rmse=np.sqrt((k.get('rmse_per_hour',np.array([9999]))**2).mean())
 51 |                 mae=k.get('mae_per_hour',np.array([9999])).mean()    
 52 |                 rmse_l.append(rmse)
 53 |                 mae_l.append(mae)
 54 |                 r2_l.append(k.get('r2',9999))
 55 |                 if mae < best_mae or best_mae==-1:
 56 |                     best_mae=mae
 57 |                     if not best_mae==9999:
 58 |                         results[experiment_name]=k.copy() #copying to the main results list
 59 |         mae_l=np.array(mae_l)
 60 |         r2_l=np.array(r2_l)
 61 |         rmse_l=np.array(rmse_l)
 62 |         #rows.append([best_mae,  mae_l.mean(), mae_l.std(), rmse_l.min(), rmse_l.mean(), rmse_l.std(), r2_l.max(), r2_l.mean(), r2_l.std()  ])
 63 |         if experiment_name in results: #this means that we found evaluation data
 64 |             results[experiment_name].update({'mae_mean': mae_l.mean(), 'mae_std':mae_l.std(), 'rmse_mean': rmse_l.mean(), 'rmse_std': rmse_l.std(), 'r2_mean': r2_l.mean(), 'r2_avg':r2_l.std()})
 65 |     #header=['mae_best', 'mae_mean', 'mae_std', 'rmse_best','rmse_mean', 'rmse_std', 'r2_best', 'r2_mean', 'r2_avg']
 66 |     #outdf=pd.DataFrame.from_records(rows, columns=header)
 67 |     #outdf.to_csv("output/results_stats.csv",float_format='{:,.2f}'.format)
 68 | 
 69 | #filtering to plot only the experiments that were set as "compare" == True on config.csv
 70 | filtereddf=df[(df['compare']==True) & (df['experiment_name'].isin(results.keys()))]
 71 | comparedExperiments=filtereddf['experiment_name'].values
 72 | 
 73 | #data={"Network":[], "parameters":[], "MAE":[], "RMSE":[], "r2":[], "rmse (1st)":[], "max error(first)":[],  "rmse (last)":[], "max error (last)":[] , "time (min)":[], "memory":[], "epochs":[], "train_mae":[], "train_rmse":[], "val_mae":[], "val_rmse":[]}
 74 | # "r2 (1st)":[], "r2 (last)":[],
 75 | 
 76 | header=["Network", "parameters", "MAE", "RMSE", "r2", "rmse (1st)", "max error(first)",  "rmse (last)", "max error (last)" , "time (min)", "memory", "epochs", "train_mae", "train_rmse", "val_mae", "val_rmse", 'mae_mean', 'mae_std', 'rmse_mean', 'rmse_std', 'r2_mean', 'r2_std']
 77 | rows=[]
 78 | for experiment_name in comparedExperiments:
 79 |     k=results[experiment_name]
 80 |     row=[
 81 |         labels[experiment_name],
 82 |         k['parameters'],
 83 |         k['mae_per_hour'].mean(),
 84 |         np.sqrt((k['rmse_per_hour']**2).mean()),
 85 |         k['r2'],
 86 |         k.get('rmse_per_hour',[[np.nan]])[0][0],
 87 |         k['max_1st'][0],
 88 |         k.get('rmse_per_hour',[[np.nan]])[-1][0],
 89 |         k.get('max_per_hour',[[np.nan]])[-1][0],
 90 |         k.get('time',np.nan)/60.,
 91 |         k.get('memory',np.nan),
 92 |         k.get('epochs',np.nan),
 93 |         k.get("train_mae",np.nan),
 94 |         k.get("train_rmse",np.nan),
 95 |         k.get("val_mae",np.nan),
 96 |         k.get("val_rmse",np.nan),
 97 |         k.get("mae_mean",np.nan),
 98 |         k.get("mae_std",np.nan),
 99 |         k.get("rmse_mean",np.nan),
100 |         k.get("rmse_std",np.nan),
101 |         k.get("r2_mean",np.nan),
102 |         k.get("r2_avg",np.nan),
103 |         ]
104 |     rows.append(row)
105 | 
106 |     #line=f"{key}, {k['parameters']}, {mae}, {rmse}, {k['r2_1st'][0]}, {k['rmse_1st'][0]}, {k['max_1st'][0]}, {k['r2_per_hour'][-1][0]}, {k['rmse_per_hour'][-1][0]}, {k['max_per_hour'][-1][0]}"
107 |     #print(line)
108 |     #f.write(line+'\n')
109 | 
110 | 
111 | 
112 | 
113 | #plt.ylim([1., 2.5]) #TECU
114 | for modelName in comparedExperiments:
115 |     plt.plot(results[modelName]["rmse_per_hour"], label = labels[modelName], marker='.')
116 | plt.xlabel('Frames of prediction')
117 | plt.ylabel('RMSE (TEC units)')
118 | plt.title('Prediction RMSE per frame')
119 | plt.legend(loc='center left', bbox_to_anchor=(1, 0.5))
120 | plt.savefig("output/rmse.pdf", bbox_inches='tight')
121 | plt.close()
122 | 
123 | #plt.ylim([0.8, 1.25]) #TECU
124 | for modelName in comparedExperiments:
125 |     plt.plot(results[modelName]["mae_per_hour"], label = labels[modelName], marker='.')
126 | plt.xlabel('Frames of prediction')
127 | plt.ylabel('MAE (TEC units)')
128 | plt.title('Prediction MAE per frame')
129 | plt.legend(loc='center left', bbox_to_anchor=(1, 0.5))
130 | plt.savefig("output/mae.pdf", bbox_inches='tight')
131 | plt.close()
132 | 
133 | for modelName in comparedExperiments:
134 |     plt.plot(results[modelName]["max_per_hour"], label = labels[modelName], marker='.')
135 | plt.xlabel('Frames of prediction')
136 | plt.ylabel('Max error (TEC units)')
137 | plt.title('Prediction max error per frame')
138 | plt.legend(loc='center left', bbox_to_anchor=(1, 0.5))
139 | plt.savefig("output/max.pdf", bbox_inches='tight')
140 | plt.close()
141 | 
142 | for modelName in comparedExperiments:
143 |     plt.plot(results[modelName]["mae_per_hour"], label = labels[modelName], marker='.')
144 | plt.xlabel('Frames of prediction')
145 | plt.ylabel('MAE (TEC units)')
146 | plt.title('Prediction MAE per frame')
147 | plt.legend(loc='center left', bbox_to_anchor=(1, 0.5))
148 | plt.savefig("output/errors.pdf", bbox_inches='tight')
149 | plt.close()
150 | 
151 | 
152 | for modelName in comparedExperiments:
153 |     line=plt.plot(results[modelName]["mae_per_hour"], label = labels[modelName], marker='.')[0]
154 |     plt.fill_between(range(results[modelName]["mae_per_hour"].size), results[modelName]["max_per_hour"].flatten(), results[modelName]["mae_per_hour"].flatten(),color=line.get_color(), alpha=0.2 )
155 | plt.xlabel('Frames of prediction')
156 | plt.ylabel('MAE (TEC units)')
157 | plt.title('Prediction MAE per frame')
158 | plt.legend()
159 | plt.savefig("output/errors.pdf", bbox_inches='tight')
160 | plt.close()
161 | 
162 | df=pd.DataFrame.from_records(rows, columns=header)
163 | 
164 | #df=pd.DataFrame(data)
165 | df=df.sort_values(by='RMSE')
166 | 
167 | pd.set_option('display.max_columns', None)  # or 1000
168 | pd.set_option('display.max_rows', None)  # or 1000
169 | pd.set_option('display.max_colwidth', None)  # or 199
170 | pd.set_option('display.float_format','{:,.3f}'.format)
171 | print(df)
172 | df.to_csv("output/results.csv",float_format='{:,.3f}'.format)
173 | #Network, parameters, r2 (1st), rmse (1st), r2 (last), rmse (last)
174 | 
175 | df=df.set_index('Network')
176 | df=df.sort_index()
177 | df['rmse_mean']=df['rmse_mean'].fillna(df['RMSE'])
178 | df['mae_mean']=df['mae_mean'].fillna(df['MAE'])
179 | plt.figure()          
180 | width = 0.35       # the width of the bars
181 | plt.ylim(0, df.rmse_mean.max()*1.1)
182 | error_kw=dict(lw=1, capsize=5, capthick=1)
183 | ind = np.arange(len(df))
184 | if df.rmse_std.isnull().values.all():
185 |     plt.bar(ind-width/2., df.mae_mean, width, label='MAE',error_kw=error_kw)
186 |     plt.bar(ind+width/2., df.rmse_mean, width, label='RMSE',error_kw=error_kw)
187 | else:
188 |     plt.bar(ind-width/2., df.mae_mean, width, yerr=df.mae_std, label='MAE',error_kw=error_kw)
189 |     plt.bar(ind+width/2., df.rmse_mean, width,  yerr=df.rmse_std, label='RMSE',error_kw=error_kw)
190 | 
191 | for x,y1,y2 in zip(ind,df.mae_mean,df.rmse_mean):
192 |     label = "{:.2f}".format(y1)
193 |     plt.annotate(label, (x-width/2,y1), textcoords="offset points", xytext=(0,10), ha='center')
194 |     label = "{:.2f}".format(y2)
195 |     plt.annotate(label, (x+width/2,y2), textcoords="offset points", xytext=(0,10), ha='center')
196 | plt.ylabel('TEC units')      
197 | plt.legend(loc='lower left')
198 | plt.xticks(ind, df.index, rotation = 15, ha='right')
199 | 
200 | #Please uncomment this line if you want the marker line
201 | #plt.plot(ind-width/2, df.mae_mean, color='k', marker='.')
202 | plt.tight_layout()
203 | plt.savefig('output/bar_plot.pdf')
204 | plt.close()
205 | 


--------------------------------------------------------------------------------
/evaluate.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | #Disabling warnings
  3 | import logging
  4 | #logging.getLogger('tensorflow').setLevel(logging.ERROR)
  5 | #os.environ["KMP_AFFINITY"] = "noverbose"
  6 | os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3'
  7 | import tensorflow as tf
  8 | #tf.autograph.set_verbosity(3)
  9 | from models.models import *
 10 | 
 11 | 
 12 | from lstm_utils import *
 13 | import numpy as np
 14 | import pandas as pd
 15 | import sklearn
 16 | import  matplotlib.pyplot as plt
 17 | from osgeo import gdal_array
 18 | from generator import DataGenerator
 19 | import sys
 20 | #tf.config.experimental.set_memory_growth(tf.config.get_visible_devices()[1], True)
 21 | #Model name is used to load the weights and recover prediction parameters.
 22 | #loading experiment configuration
 23 | 
 24 | args=get_args()
 25 | config=load_config(args.experiment)
 26 | 
 27 | 
 28 | try:
 29 |     model= eval(config.model)
 30 | except:
 31 |     print(f"Error trying to load the model chosen in {args.config}")
 32 |     sys.exit()
 33 | 
 34 | 
 35 | scalerParamsFile=getModelFilePath(config.experiment_name,f"params.py")
 36 | print(f"Loading scaling parameters from {scalerParamsFile}")
 37 | with open(scalerParamsFile, 'r') as f: parameters = eval(f.read())
 38 | 
 39 | 
 40 | 
 41 | #if the scaler has more than 1 band, we are using Space Weather indices
 42 | 
 43 | print(f"Loading data from {config.test_npy_dataset}")
 44 | ionex=np.load(config.test_npy_dataset)
 45 | if config.test_time_sampling>1:
 46 |     ionex=ionex[::config.test_time_sampling,...] #2h step
 47 | #Resizing to 72x72
 48 | ionex=np.concatenate((ionex[:,:,:-1,:],ionex[:,-1:,:-1,:]),axis=1)
 49 | ionex=getDataSubset(ionex,config.experiment_name)
 50 | 
 51 | #reducing the number of test samples to use only one week
 52 | #initialdate=15
 53 | #frames=28*24
 54 | #ionex=ionex[initialdate:initialdate+frames]
 55 | 
 56 | 
 57 | 
 58 | #scaling
 59 | ionex=scaleForward(ionex,parameters)
 60 | 
 61 | if config.prediction=='seq2one':
 62 |     output_t_steps=1
 63 | else:
 64 |     output_t_steps=config.prediction_window
 65 | 
 66 | test_generator = DataGenerator(ionex, batch_size=config.batch_size, nstepsin=config.lag_window, nstepsout=config.prediction_window,shuffle=False,sample_rate=config.resample_rate)
 67 | 
 68 | #datax,datay=test_generator.asArray()
 69 | #print(f"Test data shape: {datax.shape}")
 70 | #del ionex
 71 | 
 72 | day=0
 73 | 
 74 | os.makedirs(f"output/{config.experiment_name}", exist_ok=True)
 75 | 
 76 | randomSeeds=(np.random.random(config.best_of)*100).astype(int)
 77 | 
 78 | bestModelNumber=0
 79 | bestMAE=9999
 80 | for experimentNumber, randomSeed in enumerate(randomSeeds): #this represents how many times we are going to test the network
 81 |     print(f"Starting experiment {experimentNumber}")
 82 | 
 83 |     fileName=getModelFileName(config.experiment_name, experimentNumber)
 84 |     if not os.path.exists(fileName):
 85 |         print("Model not found. Please check the models folder and set the name variable on predict.py.")
 86 |         sys.exit()
 87 | 
 88 |     print(f"Loading model {fileName}")
 89 |     model = tf.keras.models.load_model(fileName)
 90 |     #print(model.get_weights())
 91 |  
 92 |     #model.evaluate(test_generator,batch_size=config.batch_size,verbose=2)
 93 | 
 94 |     
 95 |     rmse_per_hour=np.zeros((config.prediction_window,1))   
 96 |     mae_per_hour=np.zeros((config.prediction_window,1))
 97 |     max_per_hour=np.zeros((config.prediction_window,1))
 98 |     hist, edges = None,None
 99 |     ymean=0
100 |     sstotal=0
101 |     
102 |     
103 |     for i in range(len(test_generator)): #computing the mean for r2
104 |         #aggregating batch data
105 |         datax, datay=test_generator[i]
106 |         datay=scaleBack(datay,parameters)
107 |         ymean+=np.sum(datay)
108 |         
109 |     n=test_generator.count()*datay.shape[2]*datay.shape[3]
110 |     ymean/=n*datay.shape[1]
111 |     
112 | 
113 |     for i in range(len(test_generator)): #there is a bug with seq2one and space indexes
114 |         #aggregating batch data
115 |         datax, datay=test_generator[i]
116 |         if config.prediction=='seq2one' and config.prediction_window>1:  #perform predictions seq2one
117 |             currX=datax.copy()
118 |             ynew=None
119 |             for t in range(config.prediction_window):
120 |                 newFrame=model.predict(currX,verbose=0)
121 |                 if ynew is None:
122 |                     ynew=newFrame
123 |                 else:
124 |                     ynew=np.concatenate([ynew,newFrame],axis=1)
125 |                 currX=np.concatenate([currX[:,1:], newFrame],axis=1)
126 |                 
127 |         else: #perform predictions seq2seq
128 |             ynew=model.predict(datax,verbose=0)
129 |         ynew=scaleBack(ynew,parameters)
130 |         datay=scaleBack(datay,parameters)
131 |         error=ynew-datay
132 |         #scale back
133 |         rmse_per_hour+=np.sum(error**2, axis=(0,2,3))
134 |         mae_per_hour+=np.sum(np.abs(error), axis=(0,2,3))
135 |         max_per_hour=np.maximum(np.max(np.abs(error), axis=(0,2,3)), max_per_hour)
136 |         if hist is None:
137 |             hist, edges =np.histogram(datay-ymean,range=[-40,40], bins=40)
138 |         else:
139 |             htemp,etemp =np.histogram(datay-ymean,bins=edges)
140 |             hist+=htemp
141 |         sstotal+=np.sum((datay-ymean)**2)
142 | 
143 |     
144 |     r2=1-rmse_per_hour.sum()/sstotal
145 |     rmse_per_hour=np.sqrt(rmse_per_hour/n)
146 |     mae_per_hour=mae_per_hour/n
147 |     
148 |     datax=scaleBack(datax,parameters)
149 |     
150 |     
151 |     #saving macro results
152 |     #resultsFile="output/results.py"
153 |     resultsFile=getModelFilePath(config.experiment_name,"results.py")
154 |     if not os.path.exists(resultsFile):
155 |         results={}
156 |     else:
157 |         with open(resultsFile, 'r') as f: results = eval(f.read())
158 |     
159 |     #get existing dict (from training)
160 |     expId=f"{config.experiment_name}_{experimentNumber}"
161 |     
162 |     if not expId in results.keys():
163 |         modelResults={}
164 |     else:
165 |         modelResults=results[expId]
166 |     
167 |     modelResults["parameters"]= model.count_params()
168 |     modelResults["r2"]= r2
169 |     modelResults["mae_1st"]= mae_per_hour[0]        
170 |     modelResults["rmse_1st"]= rmse_per_hour[0]
171 |     modelResults["max_1st"]= max_per_hour[0]    
172 |     modelResults["mae_per_hour"]= mae_per_hour
173 |     modelResults["rmse_per_hour"]= rmse_per_hour
174 |     modelResults["max_per_hour"]= max_per_hour
175 |     modelResults["histogram"]= hist
176 |     modelResults["edges"]= edges
177 |     
178 |     results[expId]=modelResults
179 |     with open(resultsFile,'w') as f:f.write(repr(results))
180 |     
181 |     mae=mae_per_hour.mean()
182 |     print(f"MAE: {mae}")
183 |     if mae<bestMAE:
184 |         bestModelNumber=experimentNumber
185 |         bestMAE=mae
186 | 
187 | start=0
188 | #from sklearn.metrics import r2_score,mean_squared_error
189 | 
190 | print(model.summary())
191 | 
192 | hist=results[f"{config.experiment_name}_{bestModelNumber}"]['histogram']
193 | plt.bar(results[f"{config.experiment_name}_{bestModelNumber}"]['edges'][:-1],hist/sum(hist)*100.)
194 | #plt.legend(loc='center left')
195 | plt.xlabel('Prediction error (TECU).')
196 | plt.ylabel('Percentage of total predicted pixels.')
197 | plt.title('Histogram of the prediction errors')
198 | plt.savefig(f"output/{config.experiment_name}/error_histogram.pdf", bbox_inches='tight')
199 | plt.close()
200 | 
201 | #plt.scatter(flatY,flatYnew,s=2 )
202 | #plt.annotate("r-squared = {:.3f}".format(r2_score(flatY,flatYnew)), (0, 1))
203 | #plt.savefig(fileName.split(".")[0]+"_r2plot.png", bbox_inches='tight')
204 | 
205 | #Results of the best experiment
206 | print(f"Best model: {bestModelNumber}")
207 | doy=50
208 | fileName=getModelFileName(config.experiment_name, bestModelNumber)
209 | model = tf.keras.models.load_model(fileName)
210 | datax,datay=test_generator[int(doy/config.batch_size)]
211 | start=doy%config.batch_size
212 | ynew=model.predict(datax)
213 | ynew=scaleBack(ynew,parameters)
214 | datay=scaleBack(datay,parameters)
215 | datax=scaleBack(datax,parameters)
216 | 
217 | i=36; j=36
218 | xaxis=np.array(range(config.prediction_window+config.lag_window))-config.lag_window
219 | plt.plot(xaxis, np.concatenate((datax[start,:,i,j,0],datay[start,:,i,j,0])), label = "Truth", marker='o')
220 | repeat_data=datax[start,-datay.shape[1]:,i,j,0]
221 | plt.plot(np.array(range(config.prediction_window)), repeat_data, label = "Repeat", marker='o')
222 | plt.plot(np.array(range(config.prediction_window)), ynew[start,:,i,j,0], label = "Predicted", marker='o')
223 | plt.xlabel('Frames of prediction (0 is the first prediction frame)')
224 | plt.ylabel('VTEC units')
225 | plt.title('Prediction VTEC per frame')
226 | plt.legend(loc='center left', bbox_to_anchor=(1, 0.5))
227 | plt.savefig(f"output/{config.experiment_name}/pixel_prediction.pdf", bbox_inches='tight')
228 | plt.close()
229 | 
230 | i=0;j=67
231 | plt.Figure(figsize=(10,5))
232 | extent=[j*5-180,(j+5)*5-180,90-(i+30)*2.5,90-i*2.5]
233 | plt.imshow(abs(datay[start,5,i:i+30,j:j+5,0]-ynew[start,5,i:i+30,j:j+5,0]), extent=extent)
234 | plt.colorbar(shrink=0.3, aspect=20*0.3)
235 | plt.title('Errors on last frame')
236 | plt.savefig(f"output/{config.experiment_name}/errors.pdf", bbox_inches='tight')
237 | 
238 |     
239 | #plt.imshow(error[0,0])
240 | 
241 | plotTwinsAndError(
242 |     datay[start,0,:,:,0],
243 |     ynew[start,0,:,:,0],
244 |     abs(ynew[start,0,:,:,0]-datay[start,0,:,:,0]),
245 |     'Reference and prediction on first frame', f"output/{config.experiment_name}/compare_first.pdf",
246 |     shareColorBar=True, 
247 |     ylabel2="Prediction",
248 |     ylabel1="Reference"
249 | )
250 | 
251 | plotTwinsAndError(
252 |     datay[start,-1,:,:,0],
253 |     ynew[start,-1,:,:,0],
254 |     abs(datay[start,-1,:,:,0]-ynew[start,-1,:,:,0]),
255 |     'Reference and prediction on last frame',f"output/{config.experiment_name}/compare_last.pdf",
256 |     shareColorBar=True,
257 |     ylabel2="Prediction",
258 |     ylabel1="Reference"        
259 | )
260 | 
261 | plotTwinsAndError(
262 |     datay[start,5,:,:,0],
263 |     ynew[start,5,:,:,0],
264 |     abs(datay[start,5,:,:,0]-ynew[start,5,:,:,0]),
265 |     'Reference and prediction on 5th frame',f"output/{config.experiment_name}/compare_5.pdf",
266 |     shareColorBar=True,
267 |     ylabel2="Prediction",
268 |     ylabel1="Reference"        
269 | )
270 | 
271 | saveGif(ynew[start,:,:,:,0],f'output/{config.experiment_name}/series.gif', clearFrames=False)
272 | 
273 | 
274 | 
275 | 
276 | 
277 |     #datax=datax[:,input_t_steps:] #this is only the output
278 |     #ynew[day][input_t_steps-1,:,:,0]-datay[day][input_t_steps-1,:,:,0]
279 | 
280 |     #TODO:This is UNFINISHED WE 
281 |     #lastmap=scaleBack(ynew[-1],parameters)
282 |     #gdal_array.SaveArray( np.moveaxis(lastmap,-1,0) ,"lastMap.tif")
283 |     #lastmap=scaleBack(datay[-1],parameters)
284 |     #gdal_array.SaveArray( np.moveaxis(lastmap,-1,0) ,"lastMap_ref.tif")
285 | 


--------------------------------------------------------------------------------
/ionex_samples.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/python3
  2 | import pandas as pd
  3 | import numpy as np
  4 | import re,time,os
  5 | from datetime import datetime,timedelta
  6 | import gdal
  7 | latlon=(10,20)
  8 | 
  9 | class ionexreader:
 10 |     def __init__(self,rootFolder='./'):
 11 |         self.lonValues=[0,0,0] #min, max, delta
 12 |         self.latValues=[0,0,0] #min, max, delta
 13 |         self.heighValues=[0,0,0] #min, max, delta
 14 |         self.root=rootFolder
 15 |         self.scale=1
 16 | 
 17 |     def getYear(self,d): return d.astype(object).year #d is np.datetime64
 18 |     def getMonth(self,d): return d.astype(object).month #d is np.datetime64
 19 |     def getDay(self,d): return d.astype(object).day #d is np.datetime64
 20 |     def getDOY(self,d): return ((d-d.astype('datetime64[Y]'))/np.timedelta64(1,'D')+1).astype(np.int64)
 21 | 
 22 |     def constantToMap(c,inputShape):
 23 |         return np.full(inputShape, c)
 24 |     
 25 |     def chunks(self,l, n):  #split a line in chunks of size n
 26 |         return [l[i:i+n].strip() for i in range(0, len(l), n)]
 27 | 
 28 |     def read2DIonex(self,fileName):
 29 |         matrixList=[]
 30 |         data=None
 31 |         currMatrix=None
 32 |         currentEpoch=None
 33 |         m=n=z=0
 34 |         with open(fileName) as f:
 35 |             header=[]
 36 |             headerEnded=False
 37 |             daterange=None
 38 |             for line in f:
 39 |                 if not headerEnded:
 40 |                     if not "END OF HEADER" in line:
 41 |                         header.append(line)
 42 |                         if "HGT1 / HGT2 / DHGT" in line:
 43 |                             #Ex:   450.0 450.0   0.0                                        HGT1 / HGT2 / DHGT  
 44 |                             values=re.split(' +', line.strip())
 45 |                             self.heighValues=[float(v) for v in values[0:3]] 
 46 |                             if self.heighValues[2]==0:
 47 |                                 z=1
 48 |                             else:
 49 |                                 z=int(float(self.heighValues[1]-self.heighValues[0])/self.heighValues[2])
 50 |                         elif "LAT1 / LAT2 / DLAT" in line:
 51 |                             #Ex:    87.5 -87.5  -2.5                                        LAT1 / LAT2 / DLAT  
 52 |                             values=re.split(' +', line.strip())
 53 |                             self.latValues=[float(v) for v in values[0:3]] 
 54 |                             n=int(float(self.latValues[1]-self.latValues[0])/self.latValues[2])+1
 55 |                         elif "LON1 / LON2 / DLON" in line:
 56 |                             #Ex:  -180.0 180.0   5.0                                        LON1 / LON2 / DLON
 57 |                             values=re.split(' +', line.strip())
 58 |                             self.lonValues=[float(v) for v in values[0:3]] 
 59 |                             m=int(float(self.lonValues[1]-self.lonValues[0])/self.lonValues[2])+1 #first colunm repeats
 60 |                         elif "EXPONENT" in line:
 61 |                             values=re.split(' +', line.strip())
 62 |                             self.scale=10**float(values[0])
 63 |                         elif "EPOCH OF FIRST MAP" in line:
 64 |                             values = re.split(' +', line)                
 65 |                             epoch=' '.join(values[1:7])
 66 |                             self.date_first=datetime.strptime(epoch, "%Y %m %d %H %M %S")
 67 |                         elif "EPOCH OF LAST MAP" in line:
 68 |                             values = re.split(' +', line)                
 69 |                             epoch=' '.join(values[1:7])
 70 |                             self.date_last=datetime.strptime(epoch, "%Y %m %d %H %M %S")
 71 |                         elif "INTERVAL" in line:
 72 |                             values = re.split(' +', line)  
 73 |                             self.interval=timedelta(seconds=int(values[1]))
 74 |                     else:
 75 |                         headerEnded=True
 76 |                         daterange=pd.date_range(self.date_first, self.date_last, freq=self.interval)
 77 |                 else:
 78 |                     if "START OF TEC MAP" in line: #new epoch
 79 |                         currMatrix=np.zeros((n,m))
 80 |                     elif "END OF TEC MAP" in line: #SAVE THE OLDER MATRIX
 81 |                         matrixList.append(currMatrix)
 82 |                         currMatrix=None
 83 |                     elif "EPOCH OF CURRENT MAP" in line:
 84 |                         #  2021     4    25     8     0     0                        EPOCH OF CURRENT MAP
 85 |                         values = re.split(' +', line)                
 86 |                         epoch=' '.join(values[1:7])
 87 |                         currentEpoch=datetime.strptime(epoch, "%Y %m %d %H %M %S")
 88 |                     elif "LAT/LON1/LON2/DLON/H" in line:
 89 |                         lat=float(line[2:8])
 90 |                         lon1=float(line[8:14])
 91 |                         lon2=float(line[14:20])
 92 |                         dlon=float(line[20:26])
 93 |                         h=line[26:32]
 94 |                         row=int((lat-self.latValues[0])/self.latValues[2]) #find the row
 95 |                         #Not sure if I really need this info
 96 |                         col0=0#(lon1-self.lonValues[0])/self.lonValues[2]
 97 |                         #col1=(lon2-self.lonValues[0])/self.lonValues[2]
 98 |                     else: #finally some data
 99 |                         if not currMatrix is None:
100 |                             values=self.chunks(line.replace('\n',''),5)
101 |                             #print(values)
102 |                             values=[float(x) for x in values]
103 |                             values=np.array(values)
104 |                             nvals=len(values)
105 |                             currMatrix[row,col0:col0+nvals]=values*self.scale
106 |                             col0+=nvals
107 |                             #print(values)
108 |         outputArray=np.array(matrixList)
109 |         transform=[self.lonValues[0],self.lonValues[2],0,self.latValues[0],0,self.latValues[2] ]
110 |         return outputArray, transform, daterange
111 | 
112 |     def concatenateYear(self,year,outputFile,useSpaceWeather=True, prefix='codg', hour_step=2):
113 |         matrixList=None
114 |         if useSpaceWeather:
115 |             from spaceweather.indicesdownloader import indicesDownloader
116 |             spaceweatherfolder=os.path.join(os.getcwd(),'spaceweather')
117 |             downloader=indicesDownloader()
118 |             weatherdf=downloader.getInterpolatedIndexes(year,spaceweatherfolder, hour_step=hour_step)
119 |         leap= 0 if (year)%4 else 1
120 |         for d in range(1,366+leap):
121 |             f=os.path.join(self.root,f"{prefix}{d:003d}0.{year%100}i.npy")
122 |             day=datetime.strptime(f'{year} {d}', '%Y %j')
123 |             ionex=np.load(f)[:-1] #last hour is repeated
124 |             ionex=np.expand_dims(ionex,-1) #adding channel dimension
125 | 
126 |             if useSpaceWeather:
127 |                 mapShape=ionex[0].shape
128 |                 #yeah, numpy is amazing. Transforming pandas to stacked images in 4 lines
129 |                 dailyIndices=weatherdf[(weatherdf.index>=day) & (weatherdf.index<day+timedelta(1))]
130 |                 baseMatrix=dailyIndices[['Ap','F107obs']].to_numpy() #
131 |                 m=np.full((*mapShape[:-1],*baseMatrix.shape), baseMatrix)
132 |                 m=np.moveaxis(m,2,0) #done
133 |                 ionex=np.concatenate([ionex,m],-1) #built the 2 extra maps.
134 |             if matrixList is None:
135 |                 matrixList=ionex
136 |             else:
137 |                 matrixList=np.concatenate((matrixList,ionex))
138 |             #print(len(ionex)) #used this to check if everyone had 24 hours
139 |         with open(outputFile, 'wb') as f:
140 |             np.save(f,matrixList)     
141 | 
142 |     def concatenateFromIONEX(self,dateBegin, dateEnd,outputFile,useSpaceWeather=False,prefix='codg'):
143 |         mapsPerFile=0 #initializing. Later on we will replace this for the first map and force every map after that to have the same number.
144 |         matrixList=None
145 |         if useSpaceWeather:
146 |             from spaceweather.indicesdownloader import indicesDownloader
147 |             spaceweatherfolder=os.path.join(os.getcwd(),'spaceweather')
148 |             downloader=indicesDownloader()
149 |             weatherdf=downloader.getInterpolatedIndexes(year,spaceweatherfolder)
150 |         daterange=np.arange(np.datetime64(dateBegin), np.datetime64(dateEnd)+ np.timedelta64(1, 'D'))
151 |         for date in daterange:
152 |             print(f"Processing {prefix} day {date}")
153 |             doy=self.getDOY(date)
154 |             year=self.getYear(date)
155 |             f=os.path.join(self.root,f"{prefix}{doy:003d}0.{year%100}i")
156 |             if os.path.exists(f+'.npy'):
157 |                 ionex=np.load(f+'.npy')
158 |             else:
159 |                 ionex, transform, timerange=self.read2DIonex(f)
160 |                 np.save(f+'.npy',ionex)
161 |             #TODO: check if the ionex was read successfully
162 |             #[:24] #last hour is repeated
163 |             if mapsPerFile==0: mapsPerFile=ionex.shape[0]-1
164 |             sampling=int(ionex.shape[0]/mapsPerFile)
165 |             ionex=np.expand_dims(ionex[:mapsPerFile*sampling:sampling],-1) #adding channel dimension
166 |             if useSpaceWeather:
167 |                 day=datetime.strptime(f'{year} {d}', '%Y %j')
168 |                 mapShape=ionex[0].shape
169 |                 #yeah, numpy is amazing. Transforming pandas to stacked images in 4 lines
170 |                 dailyIndices=weatherdf[(weatherdf.index>=day) & (weatherdf.index<day+timedelta(1))]
171 |                 baseMatrix=dailyIndices[['Ap','F107obs']].to_numpy() #
172 |                 m=np.full((*mapShape[:-1],*baseMatrix.shape), baseMatrix)
173 |                 m=np.moveaxis(m,2,0) #done
174 |                 ionex=np.concatenate([ionex,m],-1) #built the 2 extra maps.
175 |             if matrixList is None:
176 |                 matrixList=ionex
177 |             else:
178 |                 matrixList=np.concatenate((matrixList,ionex))
179 |             #print(len(ionex)) #used this to check if everyone had 24 hours
180 |         with open(outputFile, 'wb') as f:
181 |             np.save(f,matrixList)  
182 | 
183 |     def createNPYMatricesOnFolder(self):
184 |         for fileName in os.listdir(self.root):
185 |             if fileName.endswith("i"):
186 |                 ionex=os.path.join(self.root,fileName)
187 |                 if not os.path.exists(ionex+".npy"):
188 |                     print(f"Processing {ionex}")               
189 |                     try:
190 |                         arr,trans,daterange=self.read2DIonex(ionex)
191 |                         with open(ionex+'.npy', 'wb') as f:
192 |                             np.save(f,arr)
193 |                     except:
194 |                         print(f"Failed to process {ionex}")
195 | 
196 |     def ionex2tiff(self,inputIONEXName,outputTiffName):
197 |         m,transform,daterange=self.read2DIonex(inputIONEXName)
198 |         driver = gdal.GetDriverByName("GTiff")
199 |         dst_ds = driver.Create(outputTiffName, xsize=m.shape[2], ysize=m.shape[1], bands=m.shape[0], eType=gdal.GDT_Float32)
200 |         dst_ds.SetGeoTransform(transform)
201 |         #87.5 -87.5  -2.5
202 |         #-180.0 180.0   5.0
203 |         for i in range(m.shape[0]):
204 |             dst_ds.GetRasterBand(i+1).WriteArray(m[i])
205 |         # Once we're done, close properly the dataset
206 |         dst_ds = None
207 | 
208 |     def write2DIonex(self,m,transformation, daterange, fileName):
209 |         m=m.squeeze()
210 |         with open(fileName, 'w') as outfile:
211 |             h=450
212 |             lastLon=(m.shape[2]-1)*transformation[1]+transformation[0]
213 |             lastLat=(m.shape[1]-1)*transformation[5]+transformation[3]
214 |             t0=daterange[0]
215 |             t1=daterange[-1]
216 |             dseconds=int((daterange[1]-daterange[0]).total_seconds())
217 |             header=f"""     1.0            IONOSPHERE MAPS     GNSS                IONEX VERSION / TYPE
218 | pyspatialgeodesy        IME             05-JAN-18 20:20     PGM / RUN BY / DATE 
219 | Map Name                                                    COMMENT             
220 | Predicted global ionosphere maps (GIM).                     DESCRIPTION         
221 |   {t0.year:4}    {t0.month:2}    {t0.day:2}    {t0.hour:2}    {t0.minute:2}    {t0.second:2}                        EPOCH OF FIRST MAP  
222 |   {t1.year:4}    {t1.month:2}    {t1.day:2}    {t1.hour:2}    {t1.minute:2}    {t1.second:2}                        EPOCH OF LAST MAP  
223 |   {dseconds:4d}                                                      INTERVAL            
224 |   {m.shape[0]: 4d}                                                      # OF MAPS IN FILE   
225 |   NONE                                                      MAPPING FUNCTION    
226 |     10.0                                                    ELEVATION CUTOFF    
227 | One-way carrier phase leveled to code                       OBSERVABLES USED    
228 |    279                                                      # OF STATIONS       
229 |     56                                                      # OF SATELLITES     
230 |   6371.0                                                    BASE RADIUS         
231 |      2                                                      MAP DIMENSION       
232 |   {h: 6.1f}{h: 6.1f}   0.0                                        HGT1 / HGT2 / DHGT  
233 |   {transformation[3]: 6.1f}{lastLat: 6.1f}{transformation[5]: 6.1f}                                        LAT1 / LAT2 / DLAT  
234 |   {transformation[0]: 6.1f}{lastLon: 6.1f}{transformation[1]: 6.1f}                                        LON1 / LON2 / DLON  
235 |   {int(np.log10(self.scale)): 4d}                                                      EXPONENT            \n"""#{0: 6d}
236 |             outfile.write(header)
237 |             for i in range(m.shape[0]):
238 |                 t=daterange[i]
239 |                 outfile.write(f"""{i+1: 6d}                                                      START OF TEC MAP    
240 |   {t.year:4}    {t.month:2}    {t.day:2}    {t.hour:2}    {t.minute:2}    {t.second:2}                        EPOCH OF CURRENT MAP\n""")
241 |                 for j in range(m.shape[1]):
242 |                     #line=np.array_str(m[j]).replace('[',' ').replace(']','')
243 |                     lat=transformation[3]+transformation[5]*j
244 |                     beginLine=f"""  {lat: 6.1f}{transformation[0]: 6.1f}{lastLon: 6.1f}{transformation[1]: 6.1f}{h: 6.1f}                            LAT/LON1/LON2/DLON/H\n"""
245 |                     outfile.write(beginLine)
246 |                     line=np.array2string((m[i,j]/self.scale).astype(int),max_line_width=82, formatter={'int': '{:5d}'.format} ,separator='', precision= 5).replace('\n ','\n')[1:-1]+'\n'
247 |                     outfile.write(line)
248 | 
249 |                 outfile.write(f"""{i+1: 6d}                                                      END OF TEC MAP      \n""")
250 |             outfile.write("""                                                            END OF FILE   """)
251 |             
252 | 
253 | if __name__=="__main__":
254 |     reader=ionexreader("./ionex/")
255 |     reader.createNPYMatricesOnFolder()
256 |     
257 |     for year in range(2013,2020+1):
258 |         if not os.path.exists(f"codg{year}.npy"):
259 |             reader.concatenateFromIONEX(f'{year}-01-01',f'{year}-12-31', f'codg{year}.npy', useSpaceWeather=False, prefix='codg')
260 |     
261 |     #complete series.
262 |     if not os.path.exists('codg_12_20.npy'):
263 |         codg_12_20=[]
264 |         for year in range(2013,2020+1):
265 |             m=np.load(f"codg{year}.npy")
266 |             if m.shape[0]/365>12:
267 |                 m=m[::2]
268 |             codg_12_20.append(m)
269 |         codg_12_20=np.concatenate(codg_12_20, axis=0)
270 |         np.save('codg_12_20.npy',codg_12_20)
271 |         print(codg_12_20.shape)
272 |     
273 |     for year in range(2015,2020+1):
274 |         if not os.path.exists(f"codg{year}_12h.npy"):
275 |             m=np.load(f"codg{year}.npy")
276 |             if m.shape[0]/365>12:
277 |                 m=m[::2]
278 |             np.save(f"codg{year}_12h.npy",m)
279 |     
280 |     #for year in range(2019,2021):
281 |         #fname=f"timeseries{year%100}.npy"
282 |         #print(f"Test data saved in {fname}")
283 |         #if not os.path.exists(fname):
284 |             #reader.concatenateYear(year,"timeseries.npy",useSpaceWeather=False)
285 |             #reader.concatenateYear(year,fname,useSpaceWeather=True)
286 |     
287 |     year=2019 #Training data
288 |     print("Training data saved in timeseries19.npy")
289 |     if not os.path.exists("timeseries19_ind.npy"):
290 |         reader.concatenateYear(year,"timeseries19.npy",useSpaceWeather=False)
291 |         reader.concatenateYear(year,"timeseries19_ind.npy",useSpaceWeather=True)
292 |     year=2020 #Test data
293 |     print("Test data saved in timeseries.npy")
294 |     if not os.path.exists("timeseries_ind.npy"):
295 |         reader.concatenateYear(year,"timeseries.npy",useSpaceWeather=False)
296 |         reader.concatenateYear(year,"timeseries_ind.npy",useSpaceWeather=True)
297 |     
298 |     #if not os.path.exists("timeseries14_ind.npy"):
299 |         #reader.concatenateYear(2014,"timeseries14_ind.npy",useSpaceWeather=True, hour_step=2)
300 |     if not os.path.exists("timeseries15_ind.npy"):
301 |         reader.concatenateYear(2015,"timeseries15_ind.npy",useSpaceWeather=True, hour_step=2)
302 | 
303 |     if not os.path.exists("c1pg.npy"):
304 |         reader.concatenateYear(2019,"c1pg.npy",useSpaceWeather=False,prefix='c1pg')
305 |         
306 |     if not os.path.exists("c1pg2015.npy"):
307 |         reader.concatenateYear(2015,"c1pg2015.npy",useSpaceWeather=False,prefix='c1pg')
308 |         
309 |     if not os.path.exists("c1pg20.npy"):
310 |         reader.concatenateYear(2020,"c1pg20.npy",useSpaceWeather=False,prefix='c1pg')
311 |         
312 |     if not os.path.exists("corg.npy"):
313 |         reader.concatenateYear(2019,"corg.npy",useSpaceWeather=False,prefix='corg')
314 | 
315 |     if not os.path.exists("magn19.npy"):
316 |         reader.concatenateYear(2019,"magn19.npy",useSpaceWeather=False,prefix='magn')
317 | 
318 | 
319 |     #print("Tiff conversion test")
320 |     #reader.ionex2tiff("./ionex/codg0010.18i","./output/teste.tif")
321 |     #reader=ionexreader()
322 |     m,trans,daterange=reader.read2DIonex("./ionex/codg0010.18i")
323 |     reader.write2DIonex(m,trans,daterange,"./output/teste.18i")
324 |         
325 | 


--------------------------------------------------------------------------------