├── extra ├── __init__.py ├── single_day_series.gif ├── plot_time_series.py └── ionex_writer.py ├── spaceweather ├── __init__.py └── indicesdownloader.py ├── requirements.txt ├── models ├── custom_layers.py ├── dev │ └── gps_solutions.py └── models.py ├── LICENSE ├── batch_run.py ├── README.md ├── .gitignore ├── config.csv ├── downloader.py ├── generator.py ├── train.py ├── lstm_utils.py ├── plotresults.py ├── evaluate.py └── ionex_samples.py /extra/__init__.py: -------------------------------------------------------------------------------- 1 | 2 | -------------------------------------------------------------------------------- /spaceweather/__init__.py: -------------------------------------------------------------------------------- 1 | 2 | -------------------------------------------------------------------------------- /extra/single_day_series.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mauriciodev/tec_forecast/HEAD/extra/single_day_series.gif -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | imageio 2 | tensorflow-gpu 3 | numpy 4 | matplotlib 5 | argparse 6 | sklearn 7 | pandas 8 | scikit-learn 9 | pydot 10 | graphviz 11 | h5py 12 | keras==2.15.0 13 | tensorflow==2.15.1 14 | -------------------------------------------------------------------------------- /models/custom_layers.py: -------------------------------------------------------------------------------- 1 | 2 | import tensorflow as tf 3 | from tensorflow.keras.layers import * 4 | from tensorflow.keras.utils import plot_model 5 | from tensorflow.keras.regularizers import l1,l2 6 | 7 | weight_decay=1E-4 8 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2021 Mauricio Carvalho Mathias de Paulo 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /extra/plot_time_series.py: -------------------------------------------------------------------------------- 1 | import os 2 | import numpy as np 3 | import matplotlib.pyplot as plt 4 | import imageio 5 | 6 | 7 | 8 | def saveGif(matrixList,gifFileName,clearFrames=True): 9 | filenames=[] 10 | for i,m in enumerate(matrixList): 11 | # plot the line chart 12 | #plt.plot(y[:i]) 13 | plt.imshow(np.squeeze(m), extent=[-180,180,-90,90]) #minx maxx miny maxy 14 | 15 | # create file name and append it to a list 16 | filename = f'{gifFileName}_{i}.png' 17 | filenames.append(filename) 18 | plt.title(f"Day {int(np.floor(i/24))+1} hour {i%24:02d}") 19 | # save frame 20 | plt.savefig(filename, bbox_inches='tight') 21 | plt.close()# build gif 22 | with imageio.get_writer(gifFileName, mode='I') as writer: 23 | for filename in filenames: 24 | image = imageio.imread(filename) 25 | writer.append_data(image) 26 | # Remove files 27 | if clearFrames: 28 | for filename in set(filenames): 29 | os.remove(filename) 30 | 31 | if __name__=="__main__": 32 | matrixList=None 33 | 34 | for d in range(1,2): 35 | f=f"ionex/codg00{d}0.20i.npy" 36 | ionex=np.load(f) 37 | if matrixList is None: 38 | matrixList=ionex[:24] 39 | else: 40 | matrixList=np.concatenate((matrixList,ionex[:24])) 41 | saveGif(matrixList,'mygif.gif') 42 | 43 | -------------------------------------------------------------------------------- /batch_run.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/python 2 | import pandas as pd 3 | import subprocess, os 4 | from lstm_utils import getModelFileName,getModelFolder 5 | config_csv_file="config.csv" 6 | df=pd.read_csv(config_csv_file) 7 | 8 | for index,row in df.iterrows(): 9 | if not (row['batch_train'] or row['batch_test']): 10 | pass 11 | else: 12 | exp=row['experiment_name'] 13 | modelFile=getModelFileName(exp) 14 | modelFolder=getModelFolder(exp) 15 | 16 | logFile=os.path.join(modelFolder,'batch_log.txt') 17 | print(f"Beginning experiment {exp}") 18 | print(f" - Log file: {logFile}") 19 | with open(logFile,'w',buffering=1) as stdout: 20 | if row['batch_train']: 21 | if os.path.exists(modelFile): os.unlink(modelFile) 22 | print(" - Training.") 23 | subprocess.run(f"python train.py -e {exp}",stdout=stdout,stderr=stdout,shell=True) 24 | if row['batch_test']: 25 | print(" - Testing.") 26 | subprocess.run(f"python evaluate.py -e {exp}",stdout=stdout,stderr=stdout,shell=True) 27 | 28 | if os.path.exists(modelFile): 29 | print("Model found. Setting as trained.") 30 | df.loc[df['experiment_name']==exp, 'tested']=True 31 | df.loc[df['experiment_name']==exp, 'batch_test']=False 32 | df.loc[df['experiment_name']==exp, 'batch_train']=False 33 | df.loc[df['experiment_name']==exp, 'compare']=True 34 | df.to_csv(config_csv_file, index=False) 35 | else: 36 | print(f"Failed to find {modelFile}. Trainament failed.") 37 | 38 | print("Plotting experiments comparison.") 39 | subprocess.run(f"python plotresults.py ",shell=True) 40 | 41 | print("Done.") 42 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # DeepTEC 2 | A deep learning laboratory for Total Electron Content prediction experiments, using Tensorflow 2.\ 3 | Models can be found at ./models/. Some reusable layers are on ./models/custom_layers.py.\ 4 | The c111 and c333 models were inspired by Boulch (2018). 5 | 6 | BOULCH, A.; CHERRIER, N.; CASTAINGS, T. Ionospheric activity prediction using convolutional recurrent neural networks. arXiv:1810.13273 [cs], 6 nov. 2018. 7 | https://github.com/aboulch/tec_prediction/ 8 | 9 | The models on ./models/dev/gps_solutions.py were described on the article: \ 10 | de Paulo MCM, Marques HA, Feitosa RQ, Ferreira MP (2023) New encoder–decoder convolutional LSTM neural network architectures for next-day global ionosphere maps forecast. GPS Solut 27(2):95. https://doi.org/10.1007/s10291-023-01442-4 11 | 12 | ## download IONEX data 13 | python3 downloader.py 14 | Or download from: https://drive.google.com/file/d/1Sm_PiVUIabaew_3Y7sT0NWBqu7xsdHvi/view?usp=share_link 15 | 16 | ## create numpy representation for the data downloaded 17 | python3 ionex_samples.py 18 | 19 | ## Experiment configuration 20 | 21 | The configuration file "config.csv" is used to setup many hyperparameters for each experiment, such as chosen model, input window, prediction window, train and test datasets, among others. 22 | 23 | ## Batch processing 24 | 25 | The columns "batch_train" and "batch_test" on "config.csv" can be used to perform batch testing. Set them as True on the line that describes the experiment and run 26 | 27 | python3 batch_run.py 28 | 29 | The results will be created on the "output" folder, under a subfolder with the experiment's name. 30 | 31 | ## Training the network 32 | python3 train.py 33 | 34 | The "parameters.py" file is created during training. If you retrain the network, please remove it. 35 | 36 | ## Evaluating the trained network (test) 37 | python3 evaluate.py 38 | 39 | ## Plot results 40 | python3 plotresults.py 41 | 42 | ## Google Colab 43 | https://github.com/mauriciodev/tec_forecast/blob/main/examples/tec_forecast.ipynb 44 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Byte-compiled / optimized / DLL files 2 | __pycache__/ 3 | *.py[cod] 4 | *$py.class 5 | 6 | # C extensions 7 | *.so 8 | 9 | # Distribution / packaging 10 | .Python 11 | build/ 12 | develop-eggs/ 13 | dist/ 14 | downloads/ 15 | eggs/ 16 | .eggs/ 17 | lib/ 18 | lib64/ 19 | parts/ 20 | sdist/ 21 | var/ 22 | wheels/ 23 | pip-wheel-metadata/ 24 | share/python-wheels/ 25 | *.egg-info/ 26 | .installed.cfg 27 | *.egg 28 | MANIFEST 29 | 30 | # PyInstaller 31 | # Usually these files are written by a python script from a template 32 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 33 | *.manifest 34 | *.spec 35 | 36 | # Installer logs 37 | pip-log.txt 38 | pip-delete-this-directory.txt 39 | 40 | # Unit test / coverage reports 41 | htmlcov/ 42 | .tox/ 43 | .nox/ 44 | .coverage 45 | .coverage.* 46 | .cache 47 | nosetests.xml 48 | coverage.xml 49 | *.cover 50 | *.py,cover 51 | .hypothesis/ 52 | .pytest_cache/ 53 | 54 | # Translations 55 | *.mo 56 | *.pot 57 | 58 | # Django stuff: 59 | *.log 60 | local_settings.py 61 | db.sqlite3 62 | db.sqlite3-journal 63 | 64 | # Flask stuff: 65 | instance/ 66 | .webassets-cache 67 | 68 | # Scrapy stuff: 69 | .scrapy 70 | 71 | # Sphinx documentation 72 | docs/_build/ 73 | 74 | # PyBuilder 75 | target/ 76 | 77 | # Jupyter Notebook 78 | .ipynb_checkpoints 79 | 80 | # IPython 81 | profile_default/ 82 | ipython_config.py 83 | 84 | # pyenv 85 | .python-version 86 | 87 | # pipenv 88 | # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. 89 | # However, in case of collaboration, if having platform-specific dependencies or dependencies 90 | # having no cross-platform support, pipenv may install dependencies that don't work, or not 91 | # install all needed dependencies. 92 | #Pipfile.lock 93 | 94 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow 95 | __pypackages__/ 96 | 97 | # Celery stuff 98 | celerybeat-schedule 99 | celerybeat.pid 100 | 101 | # SageMath parsed files 102 | *.sage.py 103 | 104 | # Environments 105 | .env 106 | .venv 107 | env/ 108 | venv/ 109 | ENV/ 110 | env.bak/ 111 | venv.bak/ 112 | 113 | # Spyder project settings 114 | .spyderproject 115 | .spyproject 116 | 117 | # Rope project settings 118 | .ropeproject 119 | 120 | # mkdocs documentation 121 | /site 122 | 123 | # mypy 124 | .mypy_cache/ 125 | .dmypy.json 126 | dmypy.json 127 | 128 | # Pyre type checker 129 | .pyre/ 130 | 131 | ionex/ 132 | -------------------------------------------------------------------------------- /config.csv: -------------------------------------------------------------------------------- 1 | comment,experiment_name,batch_train,batch_test,tested,compare,batch_size,num_epochs,filters,model,prediction,lag_window,prediction_window,train_time_sampling,test_time_sampling,resample_rate,train_npy_dataset,test_npy_dataset,random_seed,loss,optimizer,best_of 2 | None,Repeat_19-20,True,True,True,True,4,30,8,usePrevious,seq2seq,36,12,2,2,12,codg2019.npy,codg2020.npy,1,mae,adam,5 3 | None,Repeat_14-15,True,True,True,True,4,30,8,usePrevious,seq2seq,36,12,1,2,12,codg2014.npy,codg2015.npy,1,mae,adam,5 4 | This is not a model,c1pg,False,False,True,True,1,1,0,c1pg,seq2seq,36,12,2,2,12,codg2019.npy,codg2020.npy,1,mae,adam,1 5 | This is not a model,c1pg14-15,False,False,True,True,1,1,0,c1pg,seq2seq,36,12,2,2,12,c1pg2015.npy,codg2015.npy,1,mae,adam,1 6 | Very low memory consumption,ANN_NtoN_12_2,False,False,True,True,32,200,150,ANN,seq2seq,12,12,2,2,12,codg2019.npy,codg2020.npy,1,mae,adam,5 7 | Very low memory consumption,ANN_NtoN_18_2,False,False,True,True,32,200,150,ANN,seq2seq,18,12,2,2,12,codg2019.npy,codg2020.npy,1,mae,adam,5 8 | Very low memory consumption,ANN_NtoN_24_2,False,False,True,True,32,200,150,ANN,seq2seq,24,12,2,2,12,codg2019.npy,codg2020.npy,1,mae,adam,5 9 | Very low memory consumption,ANN_NtoN_30_2,False,False,True,True,32,200,150,ANN,seq2seq,30,12,2,2,12,codg2019.npy,codg2020.npy,1,mae,adam,5 10 | Very low memory consumption,ANN_NtoN_36_2,False,False,True,True,32,200,150,ANN,seq2seq,36,12,2,2,12,codg2019.npy,codg2020.npy,1,mae,adam,5 11 | deeptec1-19-20,iConvLSTM,True,True,True,True,16,200,48,iConvLSTM,seq2seq,12,12,2,2,12,codg2019.npy,codg2020.npy,1,mae,adam,5 12 | ,EF-ConvLSTM_1x1,False,False,True,True,4,200,24,c111,seq2seq,36,12,2,2,12,codg2019.npy,codg2020.npy,1,mae,adam,5 13 | deeptec1-19-20,EF-ConvLSTM_3x3,True,True,True,True,8,200,24,c333,seq2seq,36,12,2,2,12,codg2019.npy,codg2020.npy,1,mae,adam,5 14 | ,REF-ConvLSTMv1_1x1,False,False,True,True,4,200,24,c111_res,seq2seq,36,12,2,2,12,codg2019.npy,codg2020.npy,1,mae,adam,5 15 | deeptec1-19-20,REF-ConvLSTMv1_3x3,True,True,True,True,4,200,24,c333_res,seq2seq,36,12,2,2,12,codg2019.npy,codg2020.npy,1,mae,adam,5 16 | ,EF-ConvLSTMv2_1x1,False,False,True,True,4,200,24,c111_res_v2,seq2seq,36,12,2,2,12,codg2019.npy,codg2020.npy,1,mae,adam,5 17 | deeptec1-19-20,EF-ConvLSTMv2_3x3,True,True,True,True,8,200,24,c333_res_v2,seq2seq,36,12,2,2,12,codg2019.npy,codg2020.npy,1,mae,adam,5 18 | deeptec1-19-20,EF-ConvLSTMv3,True,True,True,True,8,200,24,c333_res_v3,seq2seq,36,12,2,2,12,codg2019.npy,codg2020.npy,1,mae,adam,5 19 | cbcg,ConvLSTM_Bi,False,False,True,True,8,200,24,c333bi,seq2seq,12,12,2,2,12,codg2019.npy,codg2020.npy,1,mae,adam,5 20 | cbcg,ConvLSTM_N-1,False,False,True,True,8,200,24,c333_nto1,seq2one,36,12,2,2,12,codg2019.npy,codg2020.npy,1,mae,adam,5 21 | cbcg,EF_ConvLSTM_60,False,False,True,True,4,200,24,c333,seq2seq,60,12,2,2,12,codg2019.npy,codg2020.npy,1,mae,adam,5 22 | cbcg,EF_ConvLSTM_48,False,False,True,True,4,200,24,c333,seq2seq,48,12,2,2,12,codg2019.npy,codg2020.npy,1,mae,adam,5 23 | cbcg,EF_ConvLSTM_36,False,False,True,True,4,200,24,c333,seq2seq,36,12,2,2,12,codg2019.npy,codg2020.npy,1,mae,adam,5 24 | cbcg,EF_ConvLSTM_24,False,False,True,True,4,200,24,c333,seq2seq,24,12,2,2,12,codg2019.npy,codg2020.npy,1,mae,adam,5 25 | cbcg,EF_ConvLSTM_12,False,False,True,True,4,200,24,c333,seq2seq,12,12,2,2,12,codg2019.npy,codg2020.npy,1,mae,adam,5 26 | deeptec1,IconvLSTM_14-15,True,True,True,True,16,200,48,iConvLSTM,seq2seq,12,12,1,2,12,codg2014.npy,codg2015.npy,1,mae,adam,5 27 | ,EF-ConvLSTM_1x1_14-15,False,False,True,True,4,200,24,c111,seq2seq,36,12,1,2,12,codg2014.npy,codg2015.npy,1,mae,adam,5 28 | deeptec1-14-15,EF-ConvLSTM_3x3_14-15,True,True,True,True,4,200,24,c333,seq2seq,36,12,1,2,12,codg2014.npy,codg2015.npy,1,mae,adam,5 29 | ,EF-ConvLSTMv2_1x1_14-15,False,False,True,True,4,200,24,c111_res_v2,seq2seq,36,12,1,2,12,codg2014.npy,codg2015.npy,1,mae,adam,5 30 | deeptec1-14-15 ,EF-ConvLSTMv2_3x3_14-15_12,True,True,True,True,8,200,24,c333_res_v2,seq2seq,12,12,1,2,12,codg2014.npy,codg2015.npy,1,mae,adam,5 31 | deeptec1-14-15,EF-ConvLSTMv2_3x3_14-15_24,True,True,True,True,8,200,24,c333_res_v2,seq2seq,24,12,1,2,12,codg2014.npy,codg2015.npy,1,mae,adam,5 32 | deeptec1-14-15,EF-ConvLSTMv2_3x3_14-15,True,True,True,True,8,200,24,c333_res_v2,seq2seq,36,12,1,2,12,codg2014.npy,codg2015.npy,1,mae,adam,5 33 | deeptec1-14-15,EF-ConvLSTMv2_3x3_14-15_48,True,True,True,True,8,200,24,c333_res_v2,seq2seq,48,12,1,2,12,codg2014.npy,codg2015.npy,1,mae,adam,5 34 | Out of memory,EF-ConvLSTMv2_3x3_14-15_60,False,False,False,True,8,200,24,c333_res_v2,seq2seq,60,12,1,2,12,codg2014.npy,codg2015.npy,1,mae,adam,5 35 | deeptec1,EF-ConvLSTMv3_14-15,True,True,True,True,8,200,24,c333_res_v3,seq2seq,36,12,1,2,12,codg2014.npy,codg2015.npy,1,mae,adam,5 36 | ,REF-ConvLSTMv1_1x1_14-15,False,False,True,True,4,200,24,c111_res,seq2seq,36,12,1,2,12,codg2014.npy,codg2015.npy,1,mae,adam,5 37 | deeptec1-14-15,REF-ConvLSTMv1_3x3_14-15_12,True,True,True,True,4,200,24,c333_res,seq2seq,12,12,1,2,12,codg2014.npy,codg2015.npy,1,mae,adam,5 38 | deeptec1-14-15,REF-ConvLSTMv1_3x3_14-15_24,True,True,True,True,4,200,24,c333_res,seq2seq,24,12,1,2,12,codg2014.npy,codg2015.npy,1,mae,adam,5 39 | deeptec1-14-15,REF-ConvLSTMv1_3x3_14-15,True,True,True,True,4,200,24,c333_res,seq2seq,36,12,1,2,12,codg2014.npy,codg2015.npy,1,mae,adam,5 40 | deeptec1-14-15,REF-ConvLSTMv1_3x3_14-15_48,True,True,True,True,4,200,24,c333_res,seq2seq,48,12,1,2,12,codg2014.npy,codg2015.npy,1,mae,adam,5 41 | -------------------------------------------------------------------------------- /extra/ionex_writer.py: -------------------------------------------------------------------------------- 1 | import os 2 | import h5py 3 | import numpy as np 4 | import pandas as pd 5 | from tqdm import tqdm 6 | 7 | 8 | fname="../output/EF-ConvLSTMv2_3x3_14-15/predicted_0.h5" 9 | outFolder='edconvlstm_nd' 10 | nstepsin = 36 11 | 12 | t0=pd.to_datetime('2015-01-01') 13 | timedelta=pd.Timedelta('2H') 14 | 15 | 16 | 17 | lat1 = 87.5 18 | lon1 = -180.0 19 | lat2 = -87.5 20 | lon2 = 180.0 21 | h = 450 22 | scale = 0.1 23 | convertFrom72x72 = True 24 | 25 | if fname.endswith('.h5'): #recompose the h5 into a time series 26 | f = h5py.File(fname, 'r') 27 | total_seq=[] 28 | for i in range(len(f.keys())): 29 | batch_id = str(i) 30 | batch = f[batch_id] 31 | batch_size = batch.shape[0] 32 | if i==0: print(f"Batch shape: {batch.shape}") 33 | total_seq.append(batch) 34 | pred_seq = np.concatenate(total_seq, axis=0) 35 | #m=m.squeeze() 36 | else: 37 | pred_seq = np.load(fname) 38 | avg = 24.82940426949007 39 | std = 19.74997754805293 40 | pred_seq = pred_seq * std +avg 41 | if 'SimVP' in fname: 42 | pred_seq = np.moveaxis(pred_seq, 2, 4) 43 | 44 | nstepsout = pred_seq.shape[1] 45 | 46 | def fill_spaces(s, size=80): 47 | if len(s) ", outfile) 17 | urllib.request.urlretrieve(url,outfile) 18 | return outfile 19 | 20 | 21 | 22 | #Copied from https://cddis.nasa.gov/Data_and_Derived_Products/CDDIS_Archive_Access.html 23 | class cddisDownloader(): 24 | def __init__(self): 25 | pass 26 | 27 | def listfolder(self,url): 28 | #Adds '*?list' to the end of URL if not included already 29 | if not url.endswith("*?list"): 30 | url = url + "*?list" 31 | 32 | #Makes request of URL, stores response in variable r 33 | r = requests.get(url, verify=False) 34 | res=[] 35 | for line in r.text.splitlines(): 36 | f=line.split(' ')[0] 37 | if f[0]!="#": 38 | res.append(f) 39 | 40 | return res 41 | 42 | #Prints the results of the directory listing 43 | #print(r.text) 44 | 45 | 46 | def _download(self,url,rootdir): 47 | if not os.path.exists(rootdir): 48 | os.makedirs(rootdir) 49 | 50 | # Assigns the local file name to the last part of the URL 51 | filename = url.split('/')[-1] 52 | 53 | # Makes request of URL, stores response in variable r 54 | r = requests.get(url) 55 | if r.status_code==404: 56 | logging.warning("File not found: "+url) 57 | 58 | # Opens a local file of same name as remote file for writing to 59 | with open(os.path.join(rootdir,filename), 'wb') as fd: 60 | for chunk in r.iter_content(chunk_size=1000): 61 | fd.write(chunk) 62 | 63 | # Closes local file 64 | fd.close() 65 | 66 | def _download2(self,url,rootdir): 67 | if not os.path.exists(rootdir): 68 | os.makedirs(rootdir) 69 | filename = url.split('/')[-1] 70 | os.chdir(rootdir) 71 | cmd=f"curl -c [file] -n -L -O \"{url}\"" 72 | if not os.path.exists(filename): 73 | subprocess.run(cmd, shell=True) 74 | 75 | def download(self,jday, year, rootdir, prefix="codg"): 76 | fileurl = "https://cddis.nasa.gov/archive/gnss/products/ionex/{year}/{jday:03d}/{prefix}{jday:03d}0.{lastDigits:02d}i.Z".format(year=year,jday=jday,lastDigits=year % 100, prefix=prefix) 77 | filename = fileurl.split('/')[-1] 78 | output=os.path.join(rootdir,filename) 79 | if not os.path.exists(output) and not os.path.exists(output[:-2]): ##ignoring downloaded data even if it's uncompressed 80 | print(year, jday, fileurl) 81 | try: 82 | self._download2(fileurl,rootdir) 83 | except: 84 | print(f"Failed to download {fileurl}.") 85 | 86 | class magnDownloader(): 87 | def __init__(self): 88 | top_level_url = "wilkilen.fcaglp.unlp.edu.ar" 89 | netrcData = netrc.netrc() 90 | authTokens = netrcData.authenticators(top_level_url) 91 | password_mgr = urllib.request.HTTPPasswordMgrWithDefaultRealm() 92 | top_level_url="http://"+top_level_url 93 | password_mgr.add_password(None, top_level_url, authTokens[0], authTokens[2]) 94 | handler = urllib.request.HTTPBasicAuthHandler(password_mgr) 95 | # create "opener" (OpenerDirector instance) 96 | self.opener = urllib.request.build_opener(handler) 97 | # use the opener to fetch a URL 98 | a_url="http://wilkilen.fcaglp.unlp.edu.ar/" 99 | self.opener.open(a_url) 100 | # Install the opener. 101 | # Now all calls to urllib.request.urlopen use our opener. 102 | urllib.request.install_opener(self.opener) 103 | 104 | def download(self,jday, year, rootdir, prefix="magn"): 105 | fileurl = "http://wilkilen.fcaglp.unlp.edu.ar/ion/magn/{year}/{jday:03d}/{prefix}{jday:03d}0.{lastDigits:02d}i.Z".format(year=year,jday=jday,lastDigits=year % 100, prefix=prefix) 106 | filename = fileurl.split('/')[-1] 107 | output=os.path.join(rootdir,filename) 108 | if not os.path.exists(output) and not os.path.exists(output[:-2]): ##ignoring downloaded data even if it's uncompressed 109 | print(year, jday, fileurl) 110 | try: 111 | urllib.request.urlretrieve(fileurl, output) 112 | except: 113 | print(f"Failed to download {fileurl}.") 114 | def main(): 115 | rootdir="ionex" 116 | years=range(2012,2022)#[2021,2020,2019,2018] 117 | cddis=cddisDownloader() 118 | magn=magnDownloader() 119 | 120 | for year in years: 121 | leap= 0 if (2000+year)%4 else 1 122 | for jday in range(1,366+leap): 123 | cddis.download(jday,year,rootdir) 124 | years=[2019,2020,2015]#[2021,2020,2019,2018] 125 | for year in years: 126 | leap= 0 if (2000+year)%4 else 1 127 | for jday in range(1,366+leap): 128 | cddis.download(jday,year,rootdir,prefix="c1pg") 129 | #cddis.download(jday,year,rootdir,prefix="corg") 130 | #magn.download(jday,year,rootdir) 131 | os.chdir(rootdir) 132 | os.system("uncompress *.Z") 133 | 134 | 135 | if __name__=="__main__": 136 | main() 137 | -------------------------------------------------------------------------------- /spaceweather/indicesdownloader.py: -------------------------------------------------------------------------------- 1 | import sys, os 2 | print("Remember that this script requires .netrc on your home folder") 3 | import logging 4 | import pandas as pd 5 | #curl -d "activity=retrieve&res=hour&spacecraft=omni2&start_date=20050101&end_date=20050301&vars=8&vars=38&vars=49&vars=50&scale=Linear&table=0" https://omniweb.gsfc.nasa.gov/cgi/nx1.cgi > test_curl.txt 6 | from datetime import date 7 | import urllib 8 | import urllib.request as request 9 | import re 10 | import shutil 11 | from contextlib import closing 12 | 13 | import matplotlib.pyplot as plt 14 | from matplotlib.dates import DateFormatter, AutoDateLocator 15 | 16 | 17 | """ 18 | Use getIndexes(year,folder) to download the dataframe (3 hours interval). 19 | Pass year='nowcast' to download 'nowcast' data. 20 | Use getInterpolatedIndexes(year,folder) to download 1 hour linearly interpolated data. 21 | """ 22 | class indicesDownloader(): 23 | def __init__(self): 24 | self.postdamurl="ftp://ftp.gfz-potsdam.de/pub/home/obs/Kp_ap_Ap_SN_F107/" 25 | #self.f107mmURL="ftp://ftp.seismo.nrcan.gc.ca/spaceweather/solar_flux/daily_flux_values/fluxtable.txt" 26 | 27 | def getInterpolatedIndexes(self,year,rootdir): 28 | df=self.getIndexes(year,rootdir) 29 | return self.interpolate(df) 30 | 31 | def getIndexes(self,year,rootdir): 32 | #use year='nowcast' to download nowcast 33 | filename=f"Kp_ap_Ap_SN_F107_{year}.txt" 34 | fileurl=urllib.parse.urljoin(self.postdamurl,filename) 35 | output=os.path.join(rootdir,filename) 36 | os.makedirs(rootdir,exist_ok=True) 37 | if year=='nowcast': os.unlink(output) #nowcast always downloads 38 | if not os.path.exists(output) and not os.path.exists(output[:-2]): ##ignoring downloaded data even if it's uncompressed 39 | print(fileurl) 40 | self._download(fileurl,rootdir) 41 | 42 | cols=[4,3,3,6,8,5,3,7,7,7,7,7,7,7,7,5,5,5,5,5,5,5,5,6,4,9,9,2] 43 | #Be careful when changing the header: Ap column is already used. 44 | header="YYYY MM DD days days_m Bsr dB Kp1 Kp2 Kp3 Kp4 Kp5 Kp6 Kp7 Kp8 ap1 ap2 ap3 ap4 ap5 ap6 ap7 ap8 Apm SN F107obs F107adj D" 45 | header=re.split(' +', header) 46 | df=pd.read_fwf(output,widths=cols,comment='#',header=None, names=header) 47 | 48 | #Kp/f10.7 pivot processing 49 | KpCols=header[7:15] 50 | kpdf=pd.melt(df,id_vars=header[:3]+["F107obs","F107adj"],value_vars=KpCols,var_name="KpStep",value_name="Kp") 51 | kpdf['hour']=(kpdf['KpStep'].str.get(2).astype(int)-1)*3+1 52 | kpdf[["date"]]=pd.to_datetime(dict(year=kpdf.YYYY, month=kpdf.MM, day=kpdf.DD,hour=kpdf.hour)) 53 | kpdf=kpdf[['date','Kp',"F107obs","F107adj"]].sort_values(["date"]) 54 | #datetime col 55 | kpdf=kpdf.set_index('date') 56 | #Ap pivot processing 57 | ApCols=header[15:23] 58 | apdf=pd.melt(df,id_vars=header[:3],value_vars=ApCols,var_name="ApStep",value_name="Ap") 59 | apdf['hour']=(apdf['ApStep'].str.get(2).astype(int)-1)*3+1 60 | apdf[["date"]]=pd.to_datetime(dict(year=apdf.YYYY, month=apdf.MM, day=apdf.DD,hour=apdf.hour)) 61 | apdf=apdf[['Ap','date']].sort_values(["date"]) 62 | apdf=apdf.set_index('date') 63 | kpdf=kpdf.join(apdf) 64 | return kpdf 65 | 66 | def interpolate(self,df): 67 | df=df.resample('1H').interpolate() 68 | #I'm not sure, but I had to extrapolate the first and last hours repeating what would be a step function. 69 | row_1=df.head(1) 70 | row_1.index=row_1.index+pd.DateOffset(hours=-1) 71 | row_last=df.tail(1) 72 | row_last.index=row_last.index+pd.DateOffset(hours=1) 73 | df = pd.concat([row_1,df,row_last], ignore_index=False) 74 | return df 75 | 76 | def _download(self,url,rootdir): 77 | if not os.path.exists(rootdir): 78 | os.makedirs(rootdir) 79 | 80 | # Assigns the local file name to the last part of the URL 81 | filename = url.split('/')[-1] 82 | 83 | fullFilePath=os.path.join(rootdir,filename) 84 | with closing(request.urlopen(url)) as r: 85 | with open(fullFilePath, 'wb') as f: 86 | shutil.copyfileobj(r, f) 87 | 88 | #if r.status_code==404: 89 | # logging.warning("File not found: "+url) 90 | 91 | 92 | def plotSeries(df,title,measure="Ap"): 93 | fig, ax = plt.subplots(figsize=(10, 6)) 94 | ax.set_title(title) 95 | ax.grid(True) 96 | # Same as above 97 | ax.set_xlabel('Date') 98 | ax.xaxis.set_major_locator(AutoDateLocator()) 99 | ax.xaxis.set_major_formatter(DateFormatter('%b %d %Y')) 100 | # Plotting on the first y-axis 101 | ax.set_ylabel(measure) 102 | ax.plot(df.index, df[measure], color='tab:orange', label=measure) 103 | name=title.replace(" ","_") 104 | plt.savefig(f"{name}.png",bbox_inches='tight') 105 | 106 | def main(): 107 | downloader=indicesDownloader() 108 | year='2019' #2019,2020 or 'nowcast' 109 | apdf=downloader.getIndexes(year,"./postdam2/") 110 | 111 | plotSeries(apdf,f'Ap time series of year {year}') 112 | plotSeries(apdf,f'F10.7cm time series of year {year}',"F107adj") 113 | idf=downloader.interpolate(apdf) 114 | 115 | apdf=apdf[(apdf.index>=pd.Timestamp("2019-06-13")) & (apdf.index<=pd.Timestamp("2019-06-15"))] 116 | idf=idf[(idf.index>=pd.Timestamp("2019-06-13")) & (idf.index<=pd.Timestamp("2019-06-15"))] 117 | 118 | fig, ax = plt.subplots(figsize=(10, 6)) 119 | axb = ax.twinx() 120 | ax.set_title('Interpolating from 3 hours to 1 hour intervals') 121 | ax.grid(True) 122 | 123 | # Same as above 124 | ax.set_xlabel('Date (Month-day hour)') 125 | ax.xaxis.set_major_locator(AutoDateLocator()) 126 | ax.xaxis.set_major_formatter(DateFormatter('%d %b : %H:%M')) 127 | 128 | # Plotting on the first y-axis 129 | ax.set_ylabel('Ap (geomagnetic index)') 130 | ax.plot(apdf.index, apdf["Ap"], color='tab:gray', label='Downloaded Ap' ) #'tab:orange' 131 | ax.plot(idf.index, idf["Ap"], color='tab:gray', label='Interpolated Ap',marker='o', markersize=4, linestyle='None') 132 | 133 | # Plotting on the second y-axis 134 | axb.set_ylabel('F10.7cm (solar flux)') 135 | axb.plot(apdf.index, apdf["F107adj"], color='k', label='Downloaded F10.7', linestyle= 'dashed') 136 | axb.plot(idf.index, idf["F107adj"], color='k', label='Interpolated F10.7',marker='o', markersize=4, linestyle='None') 137 | 138 | 139 | 140 | # Handling of getting lines and labels from all axes for a single legend 141 | lines, labels = ax.get_legend_handles_labels() 142 | lines2, labels2 = axb.get_legend_handles_labels() 143 | axb.legend(lines + lines2, labels + labels2, loc='upper left') 144 | 145 | 146 | plt.savefig("forecast.png",bbox_inches='tight') 147 | 148 | if __name__=="__main__": 149 | main() 150 | -------------------------------------------------------------------------------- /generator.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import tensorflow.keras as keras 3 | import pandas as pd 4 | from sklearn.model_selection import train_test_split 5 | import matplotlib.pyplot as plt 6 | 7 | 8 | class MergedGenerators(keras.utils.Sequence): 9 | def __init__(self, generators=[]): 10 | self.generators = generators 11 | self.len_gen=[len(x) for x in self.generators] 12 | self.gen_ids=np.concatenate([ x*[i] for i,x in enumerate(self.len_gen)]) #calculates which generator contain each sample id 13 | self.delta_id=np.roll(np.cumsum(self.len_gen),1) #how much we should reduce from a global index to get the generator index 14 | self.delta_id[0]=0 15 | 16 | def __len__(self): 17 | return sum(self.len_gen) 18 | 19 | def __getitem__(self, index): 20 | """Getting items from the generators and packing them""" 21 | gen_id=self.gen_ids[index] 22 | gen_index=index-self.delta_id[gen_id] 23 | return self.generators[gen_id][gen_index] 24 | def count(self): 25 | return sum([x.count() for x in self.generators]) 26 | 27 | 28 | 29 | class DataGenerator(keras.utils.Sequence): 30 | 'Generates data for Keras' 31 | def __init__(self, x, batch_size, nstepsin=4, nstepsout=1, shuffle=True,training=True, removeRotation=False, sample_rate=12, val_split=0, validation=False, random_state=23): 32 | 'Initialization' 33 | super().__init__() 34 | self.list_IDs=range(0,len(x)-(nstepsout-1+nstepsin),sample_rate) #store the index to allow shuffling 35 | if val_split>0: 36 | datasplit=train_test_split(self.list_IDs,random_state=random_state, test_size=val_split) 37 | if validation==False: #training generator 38 | self.list_IDs=datasplit[0] 39 | else: 40 | self.list_IDs=datasplit[1] 41 | 42 | self.nstepsin=nstepsin 43 | self.nstepsout=nstepsout 44 | self.batch_size=batch_size 45 | self.dim=x[0].shape 46 | self.shuffle=shuffle 47 | self.training=training 48 | self.removeRotation=removeRotation 49 | self.x=self.preprocess(x) 50 | self.on_epoch_end() 51 | 52 | def preprocess(self,x): 53 | if self.removeRotation: 54 | shift=3 #int(72/24) #number of columns rolled per hour 55 | series=[] 56 | for i in range(0,len(x)): 57 | series.append(x[i,:,:-1,:]) 58 | series[i]=np.roll(series[i],shift,axis=1) 59 | return np.array(series) 60 | else: 61 | return x 62 | def count(self): 63 | 'Returns the number of samples' 64 | return len(self.list_IDs) 65 | 66 | def __len__(self): 67 | 'Denotes the number of batches per epoch' 68 | return int(np.ceil(len(self.list_IDs) / self.batch_size)) 69 | 70 | def __getitem__(self, index): 71 | 'Generate one batch of data' 72 | # Generate indexes of the batch 73 | indexes = self.indexes[index*self.batch_size:(index+1)*self.batch_size] 74 | 75 | # Find list of IDs 76 | list_IDs_temp = [self.list_IDs[k] for k in indexes] 77 | 78 | # Generate data 79 | X, y = self.__data_generation(list_IDs_temp) 80 | 81 | if self.training: 82 | return X, y 83 | else: 84 | return X 85 | 86 | def on_epoch_end(self): 87 | 'Updates indexes after each epoch' 88 | self.indexes = np.arange(len(self.list_IDs)) 89 | if self.shuffle == True: 90 | np.random.shuffle(self.indexes) 91 | 92 | def __data_generation(self, list_IDs_temp): 93 | 'Generates data containing batch_size samples' # X : (n_samples, *dim, n_channels) 94 | # Initialization 95 | X = [] 96 | Y = [] 97 | # Generate data 98 | for ID in list_IDs_temp: 99 | x,y=self.split_sequence(ID) 100 | X.append(x) 101 | Y.append(y[...,[0]]) #this change was made to force only the tec as output 102 | #Y.append(y) 103 | return np.array(X), np.array(Y) 104 | 105 | def split_sequence(self, i): 106 | # find the end of this pattern 107 | end_ix = i + self.nstepsin 108 | # check if we are beyond the sequence 109 | if end_ix + self.nstepsout> len(self.x): 110 | return None,None 111 | # gather input and output parts of the pattern 112 | #seq_x, seq_y = self.x[i:end_ix], self.x[i+1:end_ix+self.nstepsout] #this was used for residual prediction 113 | seq_x, seq_y = self.x[i:end_ix], self.x[end_ix:end_ix+self.nstepsout] 114 | #if self.nstepsout==1: seq_y=seq_y[0] #this is because the network is not going to expect a vector 115 | #seq_x=self.pad(seq_x) #this was a test to use circular padding. 116 | return seq_x,seq_y 117 | 118 | def asArray(self): 119 | return self.__data_generation(self.list_IDs) 120 | def pad(self,mseq): #circular padding 121 | mseq=np.pad(mseq,pad_width=((0,0),(0,0),(4,4),(0,0)),mode='wrap') 122 | mseq=np.pad(mseq,pad_width=((0,0),(4,4),(0,0),(0,0)),mode='edge') 123 | return mseq 124 | 125 | 126 | class DataGenerator1d(keras.utils.Sequence): 127 | def __init__(self): 128 | super().__init__() 129 | def __data_generation(self, list_IDs_temp): 130 | 'Generates data containing batch_size samples' # X : (n_samples, *dim, n_channels) 131 | # Initialization 132 | X = [] 133 | Y = [] 134 | # Generate data 135 | for ID in list_IDs_temp: 136 | x,y=self.split_sequence(ID) 137 | X.append(x) 138 | Y.append(y[...,[0]]) #this change was made to force only the tec as output 139 | #Y.append(y) 140 | return np.array(X), np.array(Y) 141 | 142 | if __name__=="__main__": 143 | data=np.array(np.sin(np.arange(0,100,0.1))) 144 | data=np.expand_dims(data,-1) 145 | nstepsin=36 146 | nstepsout=24 147 | 148 | 149 | 150 | """gen=DataGenerator1d(data,10,nstepsin=nstepsin, nstepsout=nstepsout, val_split=0.2) 151 | x,y=gen[0] 152 | plt.plot(range(0,nstepsin),x[0]) 153 | plt.plot(range(nstepsin,nstepsin+nstepsout),y[0]) 154 | plt.show() 155 | plt.close() 156 | 157 | data=np.array(np.sin(np.arange(100,150,0.1))) 158 | data=np.expand_dims(data,-1) 159 | gen2=DataGenerator1d(data,10,nstepsin=nstepsin, nstepsout=nstepsout, val_split=0.2)""" 160 | from itertools import chain 161 | gen1=DataGenerator(data,10,nstepsin=nstepsin, nstepsout=nstepsout, val_split=0.2) 162 | gen2=DataGenerator(data,10,nstepsin=nstepsin, nstepsout=nstepsout, val_split=0.2) 163 | chained=chain(gen1,gen2) 164 | 165 | mgen=MergedGenerators([gen1,gen2]) 166 | 167 | x,y=mgen[0] 168 | 169 | gen=DataGenerator(data,10,nstepsin=nstepsin, nstepsout=nstepsout, val_split=0.2) 170 | x,y=gen[0] 171 | print(x[0]) 172 | print(y[0]) 173 | plt.plot(range(0,nstepsin),x[0]) 174 | plt.plot(range(nstepsin,nstepsin+nstepsout),y[0]) 175 | plt.show() 176 | plt.close() 177 | gen=DataGenerator(data,10,nstepsin=nstepsin, nstepsout=nstepsout, val_split=0.2) 178 | x,y=gen[0] 179 | plt.plot(range(0,nstepsin),x[0]) 180 | plt.plot(range(nstepsin,nstepsin+nstepsout),y[0]) 181 | plt.show() 182 | plt.close() 183 | 184 | 185 | #print(gen.split_sequence(0)) 186 | 187 | 188 | 189 | -------------------------------------------------------------------------------- /models/models.py: -------------------------------------------------------------------------------- 1 | import sys,os 2 | 3 | from tensorflow.keras.models import Model, Sequential 4 | from tensorflow.keras.backend import expand_dims, repeat_elements 5 | import tensorflow as tf 6 | 7 | from models.custom_layers import * 8 | if os.path.exists('models/dev'): 9 | for f in os.listdir('models/dev'): #importing dev folder if it exists 10 | if f.endswith('.py'): 11 | s=f"from models.dev.{f[:-3]} import *" 12 | exec(s) 13 | 14 | 15 | """ANN 16 | The dense layers work only on the temporal dimension.""" 17 | def ANN(inputShape,filters=50,nstepsout=1, layers=3, activation="linear"): 18 | #inspired by https://www.tensorflow.org/tutorials/structured_data/time_series#multi-step_models 19 | in_im = Input(shape=inputShape) 20 | x=in_im 21 | x = Permute((2,3,1,4), name="MoveTimeToLastDim")(x) #moves time to last dimension 22 | newShape=x.shape 23 | x= Reshape((*newShape[1:-2],-1))(x) 24 | for i in range(layers): 25 | if i==layers-1: 26 | filters=nstepsout 27 | #activation="LeakyReLU" #None 28 | x = BatchNormalization()(x) 29 | x = Dropout(0.2)(x) 30 | x = Dense(filters, activation=activation)(x) 31 | x=expand_dims(x, axis=1) 32 | x = Permute((4,2,3,1), name="TimeToFirstDim")(x) #moves time back to first dim 33 | model = Model(in_im, x) 34 | return model 35 | 36 | """Convolutional LSTM N to 1 implementation with 1x1 kernels. 37 | Inspired by https://blog.keras.io/a-ten-minute-introduction-to-sequence-to-sequence-learning-in-keras.html""" 38 | def c111_nto1(inputShape,filters=16,nstepsout=1, kernel=(1, 1), scale=1.,offset=0., dropout=0): 39 | in_im = Input(shape=inputShape) 40 | x=in_im 41 | #encoder 42 | #x = Conv3D(1, 1, padding='same',activation="relu")(in_im) 43 | x=ConvLSTM2D(filters=filters, kernel_size=kernel,padding='same',return_sequences=True, dropout=dropout)(x) 44 | x=ConvLSTM2D(filters=filters, kernel_size=kernel,padding='same',return_sequences=True, dropout=dropout)(x) 45 | x=ConvLSTM2D(filters=filters, kernel_size=kernel,padding='same',return_sequences=False, dropout=dropout)(x) 46 | x=Conv2D(1, (1, 1), activation='linear', padding='same')(x) #changed to 47 | x=expand_dims(x, axis=1) 48 | encmodel = Model(in_im, x) 49 | return encmodel 50 | 51 | """ Convolutional LSTM N to 1 implementation with 3x3 kernels. 52 | Ispired by ConvLSTM dilated 121 model (Boulch, 2018) 53 | Changes: 54 | - tanh activation instead of relu. Data was normalized with negative numbers. ReLu doesn't reach negatives. 55 | """ 56 | def c333_nto1(inputShape,filters=16,nstepsout=12, dropout=0): 57 | kernel=(3, 3) 58 | model =c111_nto1(inputShape,filters=filters,nstepsout=nstepsout, kernel=kernel, dropout=dropout) 59 | return model 60 | 61 | """BiConvLSTM 1x1. 62 | Bidirectional Convolutional LSTM.""" 63 | def c111bi(inputShape,filters=16,nstepsout=16, kernel=(1, 1), dropout=0): 64 | in_im = Input(shape=inputShape) 65 | x=in_im 66 | x=Bidirectional(ConvLSTM2D(filters=filters, kernel_size=kernel,padding='same',return_sequences=True, dropout=dropout))(x) 67 | x=Bidirectional(ConvLSTM2D(filters=filters, kernel_size=kernel,padding='same',return_sequences=True, dropout=dropout))(x) 68 | x=Bidirectional(ConvLSTM2D(filters=filters, kernel_size=kernel,padding='same',return_sequences=True, dropout=dropout))(x) 69 | x=TimeDistributed(Conv2D(1, (1, 1), activation='linear', padding='same'))(x) #changed to 70 | encmodel = Model(in_im, x) 71 | return encmodel 72 | 73 | """BiConvLSTM 3x3. 74 | Bidirectional Convolutional LSTM.""" 75 | def c333bi(inputShape,filters=16,nstepsout=12, dropout=0): 76 | kernel=(3, 3) 77 | model =c111bi(inputShape,filters=filters,nstepsout=nstepsout, kernel=kernel, dropout=dropout) 78 | return model 79 | 80 | 81 | """ED-ConvLSTM. 82 | Encoder-Decoder Convolutional LSTM 1x1. 83 | """ 84 | def c111(inputShape,filters=16,nstepsout=12, kernel=(1, 1), dropout=0): 85 | #Inspired on https://github.com/Azure/DeepLearningForTimeSeriesForecasting/blob/master/3_RNN_encoder_decoder.ipynb 86 | in_im = Input(shape=inputShape) 87 | x=in_im 88 | #encoder 89 | x,h1,c1=ConvLSTM2D(filters=filters, kernel_size=kernel,padding='same',return_sequences=True,return_state=True, dropout=dropout)(x) 90 | x,h2,c2=ConvLSTM2D(filters=filters, kernel_size=kernel,padding='same',return_sequences=True,return_state=True, dropout=dropout)(x) 91 | x,h3,c3=ConvLSTM2D(filters=filters, kernel_size=kernel,padding='same',return_sequences=False,return_state=True, dropout=dropout)(x) 92 | x=Lambda(lambda x: repeat_elements(expand_dims(x, axis=1), nstepsout, 1))(x) 93 | x=ConvLSTM2D(filters=filters, kernel_size=kernel,padding='same',return_sequences=True, dropout=dropout)(x, initial_state=[h1,c1]) 94 | x=ConvLSTM2D(filters=filters, kernel_size=kernel,padding='same',return_sequences=True, dropout=dropout)(x, initial_state=[h2,c2]) 95 | x=ConvLSTM2D(filters=filters, kernel_size=kernel,padding='same',return_sequences=True, dropout=dropout)(x, initial_state=[h3,c3]) 96 | x=TimeDistributed(Conv2D(1, (1, 1), activation='linear', padding='same'))(x) #changed to 97 | encmodel = Model(in_im, x) 98 | return encmodel 99 | 100 | """ED-ConvLSTM. 101 | Encoder-Decoder Convolutional LSTM 3x3.""" 102 | def c333(inputShape,filters=16,nstepsout=12, dropout=0.2): 103 | kernel=(3, 3) 104 | model =c111(inputShape,filters=filters,nstepsout=nstepsout, kernel=kernel, dropout=dropout) 105 | return model 106 | 107 | """iConvLSTM 108 | from https://onlinelibrary.wiley.com/doi/abs/10.1029/2021SW002854""" 109 | def iConvLSTM(inputShape,filters=16,nstepsout=12): 110 | in_im = Input(shape=inputShape) 111 | x=in_im 112 | #encoder 113 | x=TimeDistributed(Conv2D(filters*1, (2, 2), activation='LeakyReLU', padding='same',strides=(2,2)))(x) #changed to 114 | x1=ConvLSTM2D(filters=filters*1, kernel_size=(5,5), padding='same',return_sequences=True, activation='LeakyReLU')(x) 115 | x=TimeDistributed(Conv2D(filters*2, (2, 2), activation='LeakyReLU', padding='same',strides=(2,2)))(x1) #changed to 116 | x2=ConvLSTM2D(filters=filters*2, kernel_size=(3,3), padding='same',return_sequences=True, activation='LeakyReLU')(x) 117 | x=TimeDistributed(Conv2D(filters*4, (2, 2), activation='LeakyReLU', padding='same',strides=(2,2)))(x2) #changed to 118 | x3=ConvLSTM2D(filters=filters*4, kernel_size=(3,3), padding='same',return_sequences=True, activation='LeakyReLU')(x) 119 | 120 | x=ConvLSTM2D(filters=filters*4, kernel_size=(3,3), padding='same',return_sequences=True, activation='LeakyReLU')(x3) 121 | x=TimeDistributed(Conv2DTranspose(filters*4, (2, 2), activation='LeakyReLU', padding='same',strides=(2,2)))(x) #changed to 122 | x=Concatenate()([x,x2]) 123 | x=ConvLSTM2D(filters=filters*2, kernel_size=(3,3), padding='same',return_sequences=True, activation='LeakyReLU')(x) 124 | x=TimeDistributed(Conv2DTranspose(filters*2, (2, 2), activation='LeakyReLU', padding='same',strides=(2,2)))(x) #changed to 125 | x=Concatenate()([x,x1]) 126 | x=ConvLSTM2D(filters=filters*1, kernel_size=(5,5), padding='same',return_sequences=True, activation='LeakyReLU')(x) 127 | x=TimeDistributed(Conv2DTranspose(filters*1, (2, 2), activation='LeakyReLU', padding='same',strides=(2,2)))(x) #changed to 128 | x=TimeDistributed(Conv2DTranspose(1, (1, 1), activation='LeakyReLU', padding='same',strides=(1,1)))(x) #changed to 129 | encmodel = Model(in_im, x) 130 | return encmodel 131 | 132 | """Repeat previous day baseline.""" 133 | def usePrevious(inputShape,filters=0,nstepsin=12,nstepsout=24): 134 | #inspired by https://www.tensorflow.org/tutorials/structured_data/time_series#baselines 135 | in_im = Input(shape=inputShape) 136 | nstepsin=inputShape[1] 137 | if nstepsout!=nstepsin: 138 | x=Lambda(lambda x: x[:,-nstepsout:,...])(in_im) 139 | else: 140 | x=in_im 141 | m = Model(in_im, x) 142 | return m 143 | 144 | 145 | if __name__=="__main__": 146 | shape=(24,72,72,1) 147 | model=ANN(shape,8,12) 148 | model.summary() 149 | 150 | 151 | 152 | -------------------------------------------------------------------------------- /train.py: -------------------------------------------------------------------------------- 1 | #https://machinelearningmastery.com/how-to-develop-lstm-models-for-multi-step-time-series-forecasting-of-household-power-consumption/ 2 | #https://blog.keras.io/a-ten-minute-introduction-to-sequence-to-sequence-learning-in-keras.html 3 | #https://levelup.gitconnected.com/building-seq2seq-lstm-with-luong-attention-in-keras-for-time-series-forecasting-1ee00958decb 4 | 5 | import os,sys, shutil 6 | from datetime import datetime 7 | from itertools import chain 8 | 9 | import numpy as np 10 | from numpy import array 11 | import pandas as pd 12 | os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3' 13 | import tensorflow as tf 14 | from tensorflow.keras.layers import LSTM, ConvLSTM2D, Dense,BatchNormalization, Input 15 | from tensorflow.keras.callbacks import EarlyStopping 16 | from tensorflow.keras.utils import plot_model 17 | 18 | from models.models import * 19 | from generator import DataGenerator, MergedGenerators 20 | from lstm_utils import * #This is ours. 21 | 22 | 23 | #These are not necessary on colab 24 | tf.config.experimental.set_memory_growth(tf.config.get_visible_devices()[1], True) 25 | 26 | #loading experiment configuration 27 | args=get_args() 28 | config=load_config(args.experiment) 29 | print(config) 30 | 31 | np.random.seed(1)#config.random_seed) 32 | randomSeeds=(np.random.random(config.best_of)*100).astype(int) 33 | 34 | #we have different datasets for the models that use space weather 35 | print(f"Loading data from {config.train_npy_dataset}") 36 | 37 | ionex_npy=config.train_npy_dataset.split(',') 38 | ionexList=[] 39 | for npy_file in ionex_npy: 40 | ionex=np.load(npy_file) 41 | if config.train_time_sampling>1: 42 | ionex=ionex[::config.train_time_sampling,...] #2h step 43 | 44 | #Resizing to 72x72 45 | ionex=np.concatenate((ionex[:,:,:-1,:],ionex[:,-1:,:-1,:]),axis=1) 46 | ionex=getDataSubset(ionex,config.experiment_name) 47 | 48 | ionexList.append(ionex) 49 | 50 | #scaling 51 | ionexStack=np.concatenate(ionexList) 52 | parameters = { "mean" : ionexStack.mean(axis=(0,1,2)) , "max": ionexStack.max(axis=(0,1,2)), "min": ionexStack.min(axis=(0,1,2)), "input_t_steps": config.lag_window} 53 | ionexList=[scaleForward(ionex,parameters) for ionex in ionexList] 54 | del ionexStack 55 | #We need to save these scaling parameters for prediction 56 | print("Saving scaling information on parameters.py. If you change the input data, please remove the file and retrain.") 57 | with open(getModelFilePath(config.experiment_name, f"params.py"),'w') as f:f.write(repr(parameters)) 58 | 59 | exp_val_rmse=[] 60 | 61 | for experimentNumber, randomSeed in enumerate(randomSeeds): #this represents how many times we are going to train the network 62 | print(f"Starting experiment {experimentNumber}") 63 | try: 64 | model= eval(config.model) 65 | except: 66 | print(f"Error trying to load the model chosen in {args.config}") 67 | sys.exit() 68 | 69 | 70 | ## HYPER PARAMETERS ## 71 | batch_size=config.batch_size 72 | input_t_steps=config.lag_window 73 | #output_t_steps=config.prediction_window#12#24 74 | if config.prediction=='seq2one': 75 | output_t_steps=1 76 | else: 77 | output_t_steps=config.prediction_window 78 | 79 | print("Input shape: ",ionex.shape) 80 | 81 | #The data generators apply the sliding window for the time frames 82 | training_generators=[] 83 | validation_generators=[] 84 | for ionex in ionexList: 85 | training_generators.append(DataGenerator(ionex, batch_size=batch_size, nstepsin=config.lag_window, nstepsout=output_t_steps,sample_rate=config.resample_rate, validation=False, val_split=0.2, random_state=23)) 86 | validation_generators.append(DataGenerator(ionex, batch_size=batch_size, nstepsin=config.lag_window, nstepsout=output_t_steps,sample_rate=config.resample_rate, validation=True, val_split=0.2, random_state=23)) 87 | training_generator=MergedGenerators(training_generators) 88 | validation_generator=MergedGenerators(validation_generators) 89 | 90 | 91 | #print(f"Checking intersections: {list(set(validation_generator.list_IDs) & set(training_generator.list_IDs))}") 92 | 93 | print(f"Training maps: {training_generator[0][0].shape}") 94 | print(f"Validation maps: {validation_generator[0][0].shape}") 95 | 96 | print(f"Training set: {training_generator.count()}") 97 | print(f"Validation set: {validation_generator.count()}") 98 | 99 | batch_shape_x=training_generator[0][0][0].shape 100 | batch_shape_y=training_generator[0][1][0].shape 101 | print(f"batch_shape_x={batch_shape_x}") 102 | print(f"batch_shape_y={batch_shape_y}") 103 | 104 | model=model(batch_shape_x,nstepsout=output_t_steps, filters=config.filters) #initializing model 105 | 106 | 107 | print(model.summary()) 108 | plot_model(model, to_file=getModelFilePath(config.experiment_name, "model.png"), show_shapes=True, show_layer_names=True) 109 | 110 | model.compile(optimizer=config.optimizer, jit_compile=True, loss=config.loss,metrics=['mean_absolute_error', 'mean_squared_error']) 111 | 112 | print("Model fitting") 113 | earlystopping = tf.keras.callbacks.EarlyStopping(monitor='val_loss', min_delta=0.0001,patience=10, restore_best_weights=True, verbose = 1, mode="min") #0.0001 / 10 114 | #checkpoint_callback = tf.keras.callbacks.ModelCheckpoint(getModelFileName(config.experiment_name), monitor='val_loss', verbose=1, save_best_only=True) 115 | logdir='./logs/'+config.experiment_name 116 | tb_callback = tf.keras.callbacks.TensorBoard(logdir, update_freq='epoch') 117 | from tensorboard.plugins.hparams import api as hp 118 | 119 | 120 | #model.adapt(training_generator) 121 | 122 | start = datetime.now() 123 | print(f'Started training: {start}') 124 | history=model.fit(training_generator,validation_data=validation_generator, epochs=config.num_epochs, verbose=2, callbacks = [earlystopping,tb_callback]) 125 | 126 | end = datetime.now() 127 | print(f'Ended training: {end}') 128 | totaltime= end - start 129 | 130 | currentModelFname=getModelFileName(config.experiment_name,experimentNumber) 131 | model.save(currentModelFname) 132 | 133 | 134 | val_results = model.evaluate(validation_generator,batch_size=config.batch_size,verbose=2) 135 | train_results = model.evaluate(training_generator,batch_size=config.batch_size,verbose=2) 136 | print(val_results) 137 | 138 | print(f"Time spent training: {totaltime}") 139 | memoryUsed=getNvidiaSmiMem() 140 | print(f"Memory used: {memoryUsed}") 141 | 142 | s=getScaleFromParameters(parameters)[0] 143 | val_rmse=s*np.sqrt(val_results[2]) 144 | if experimentNumber==0: 145 | best_val_rmse=val_rmse 146 | bestExp=0 147 | 148 | if val_rmse<= best_val_rmse: 149 | print("Saving training results because this is currently the best model.") 150 | best_val_rmse=val_rmse 151 | bestExp=experimentNumber 152 | #saving macro results 153 | #resultsFile="output/results.py" 154 | resultsFile=getModelFilePath(config.experiment_name,"results.py") 155 | if not os.path.exists(resultsFile): 156 | results={} 157 | else: 158 | with open(resultsFile, 'r') as f: results = eval(f.read()) 159 | 160 | expId=f"{config.experiment_name}_{experimentNumber}" 161 | 162 | if not expId in results.keys(): 163 | modelResults={} 164 | else: 165 | modelResults=results[expId] 166 | 167 | modelResults["time"]= totaltime.total_seconds() 168 | modelResults["memory"]= memoryUsed 169 | modelResults["epochs"]= len(history.history['loss']) 170 | modelResults["train_rmse"]= s*np.sqrt(train_results[2]) 171 | modelResults["train_mae"]= s*train_results[1] 172 | modelResults["val_rmse"]= val_rmse 173 | modelResults["val_mae"]= s*val_results[1] 174 | 175 | results[expId]=modelResults 176 | with open(resultsFile,'w') as f:f.write(repr(results)) 177 | 178 | np.savez(getModelFilePath(config.experiment_name,f'history{experimentNumber}'),h=history.history) 179 | #history=np.load('my_history.npy',allow_pickle='TRUE').item() #this would read 180 | 181 | scale=getScaleFromParameters(parameters) 182 | plotHistory(history,getModelFilePath(config.experiment_name,f'history{experimentNumber}.png'),scale=scale) 183 | 184 | #finding the best experiment 185 | print(f"Best experiment: {bestExp}. Restoring best model.") 186 | shutil.copy(getModelFileName(config.experiment_name,bestExp),getModelFileName(config.experiment_name)) 187 | 188 | 189 | 190 | 191 | 192 | 193 | 194 | 195 | 196 | 197 | with tf.summary.create_file_writer(logdir,name=config.experiment_name).as_default(): 198 | hparams = { 199 | 'model': config.model, 200 | 'parameters': sum([v.shape.num_elements() for v in model.trainable_variables]), 201 | 'batch_size': batch_size, 202 | 'nstepsin': config.lag_window, 203 | 'nstepsout': output_t_steps, 204 | 'time_training': totaltime.total_seconds(), 205 | } 206 | hp.hparams(hparams) # record the values used in this trial 207 | 208 | 209 | 210 | 211 | -------------------------------------------------------------------------------- /lstm_utils.py: -------------------------------------------------------------------------------- 1 | import os 2 | import subprocess 3 | import re 4 | import json 5 | from types import SimpleNamespace 6 | import argparse 7 | 8 | import pandas as pd 9 | import imageio 10 | from numpy import array 11 | import numpy as np 12 | import matplotlib.pyplot as plt 13 | 14 | def get_args(): 15 | argparser = argparse.ArgumentParser(description=__doc__) 16 | argparser.add_argument( 17 | '-e', '--experiment', 18 | metavar='E', 19 | default='default', 20 | help='Experiment name as in config.csv') 21 | argparser.add_argument( 22 | '-c', '--config', 23 | metavar='C', 24 | default='config.csv', 25 | help='config.csv location.') 26 | args = argparser.parse_args() 27 | return args 28 | 29 | def load_config(experiment="default",config_csv_file="config.csv"): 30 | df=pd.read_csv(config_csv_file) 31 | row=df[df['experiment_name']==experiment] 32 | res=SimpleNamespace(**row.to_dict(orient='records')[0]) 33 | return res 34 | 35 | def update_config(experiment="default", column='tested', value='True', config_csv_file="config.csv"): 36 | df=pd.read_csv(config_csv_file) 37 | row=df[df['experiment_name']==experiment] 38 | res=SimpleNamespace(**row.to_dict(orient='records')[0]) 39 | return res 40 | 41 | # split a sequence into samples 42 | def split_sequence(sequence, n_steps, n_stepsout=1): 43 | X, y = list(), list() 44 | for i in range(len(sequence)): 45 | # find the end of this pattern 46 | end_ix = i + n_steps 47 | # check if we are beyond the sequence 48 | if end_ix > len(sequence)-n_stepsout: 49 | break 50 | # gather input and output parts of the pattern 51 | seq_x, seq_y = sequence[i:end_ix], sequence[end_ix:end_ix+n_stepsout] 52 | X.append(seq_x) 53 | y.append(seq_y) 54 | return array(X), array(y) 55 | 56 | def getModelFolder(name, filename=''): 57 | root_path=os.getcwd() 58 | outfolder=os.path.join(root_path,'output',name) 59 | if not os.path.exists(outfolder): 60 | os.makedirs(outfolder) 61 | return os.path.join(outfolder,filename) 62 | 63 | def getModelFilePath(name, filename): 64 | return os.path.join(getModelFolder(name),filename) 65 | 66 | def getModelFileName(name,expNumber=-1): 67 | outfolder=getModelFolder(name) 68 | if expNumber>=0: 69 | fname=f"model_{expNumber}.h5" 70 | else: 71 | fname=f'model.h5' 72 | return os.path.join(outfolder,fname) 73 | 74 | def plotMap(m): 75 | m=np.moveaxis(m,-1,0)[0] 76 | plt.imshow(m, extent=[-180,180,-90,90]) #minx maxx miny maxy 77 | plt.show() 78 | 79 | 80 | def plotHistory(history,filename,scale=1): 81 | mse = np.sqrt(np.array(history.history['mean_squared_error']))*scale[0] 82 | val_mse = np.sqrt(np.array(history.history['val_mean_squared_error']))*scale[0] 83 | 84 | loss = np.array(history.history['mean_absolute_error'])*scale[0] 85 | val_loss = np.array(history.history['val_mean_absolute_error'])*scale[0] 86 | 87 | plt.figure(figsize=(8, 8)) 88 | plt.subplot(2, 1, 1) 89 | plt.plot(mse, label='Training RMSE') 90 | plt.plot(val_mse, label='Validation RMSE') 91 | plt.legend(loc='lower right') 92 | plt.ylabel('Root Mean Square Error') 93 | ymax=max(max(mse),max(val_mse)) 94 | plt.ylim([min(plt.ylim()),ymax]) 95 | plt.title('Training and Validation RMSE') 96 | 97 | plt.subplot(2, 1, 2) 98 | plt.plot(loss, label='Training MAE') 99 | plt.plot(val_loss, label='Validation MAE') 100 | plt.legend(loc='upper right') 101 | plt.ylabel('Mean Absolute Error') 102 | ymax=max(max(loss),max(val_loss)) 103 | plt.ylim([0,ymax]) 104 | plt.title('Training and Validation Loss') 105 | plt.xlabel('epoch') 106 | plt.savefig(filename, bbox_inches='tight') 107 | plt.close()# build gif 108 | 109 | def getScaleFromParameters(parameters): 110 | return (parameters["max"]-parameters["min"])/2 111 | 112 | def scaleBack(m,parameters): 113 | s=getScaleFromParameters(parameters) 114 | if isinstance(m, np.ndarray): #check if it's an array 115 | if m.shape[-1] != s.shape[-1]: 116 | s=s[...,:m.shape[-1]] 117 | m=(m+1)*s 118 | return m 119 | 120 | def scaleForward(m,parameters): 121 | s=getScaleFromParameters(parameters) 122 | m=(m/s)-1 123 | return m 124 | 125 | def plotTwins(ma, mb, title, outputFile,shareColorBar=False,ylabel1='',ylabel2=''): 126 | fig, axs = plt.subplots(2) 127 | fig.suptitle(title) 128 | 129 | if shareColorBar: 130 | vmin=ma.min() 131 | vmax=ma.max() 132 | a=axs[0].imshow(np.squeeze(ma),extent=[-180,180,-90,90],vmin=vmin,vmax=vmax) 133 | b=axs[1].imshow(np.squeeze(mb),extent=[-180,180,-90,90],vmin=vmin,vmax=vmax) 134 | else: 135 | a=axs[0].imshow(np.squeeze(ma),extent=[-180,180,-90,90]) 136 | b=axs[1].imshow(np.squeeze(mb),extent=[-180,180,-90,90]) 137 | axs[0].set_ylabel(ylabel1) 138 | axs[1].set_ylabel(ylabel2) 139 | fig.colorbar(a,ax=axs[0]) 140 | fig.colorbar(b,ax=axs[1]) 141 | plt.savefig(outputFile, bbox_inches='tight') 142 | plt.close() 143 | 144 | def plotTwinsAndError(ma, mb, merror, title, outputFile,shareColorBar=False,ylabel1='',ylabel2='',ylabel3="Difference"): 145 | fig, axs = plt.subplots(3) 146 | fig.suptitle(title) 147 | 148 | if shareColorBar: 149 | vmin=ma.min() 150 | vmax=ma.max() 151 | a=axs[0].imshow(np.squeeze(ma),extent=[-180,180,-90,90],vmin=vmin,vmax=vmax) 152 | b=axs[1].imshow(np.squeeze(mb),extent=[-180,180,-90,90],vmin=vmin,vmax=vmax) 153 | else: 154 | a=axs[0].imshow(np.squeeze(ma),extent=[-180,180,-90,90]) 155 | b=axs[1].imshow(np.squeeze(mb),extent=[-180,180,-90,90]) 156 | axs[0].set_ylabel(ylabel1) 157 | axs[1].set_ylabel(ylabel2) 158 | fig.colorbar(a,ax=axs[0]) 159 | fig.colorbar(b,ax=axs[1]) 160 | c=axs[2].imshow(np.squeeze(merror),extent=[-180,180,-90,90]) 161 | fig.colorbar(c,ax=axs[2]) 162 | axs[2].set_ylabel(ylabel3) 163 | plt.savefig(outputFile, bbox_inches='tight') 164 | plt.close() 165 | 166 | def getDataSubset(ionex,modelName): 167 | if "_Ap" in modelName: 168 | bands="0,1" #use tec and ap 169 | elif "_F107AP" in modelName: 170 | bands="0,1,2" 171 | elif "_F107" in modelName: 172 | bands="0,2" #use tec and F107 173 | else: 174 | bands="0" 175 | bands=eval("["+bands+"]") 176 | ionex=ionex[:,:,:,bands] #use tec and ap 177 | if "_1d" in modelName: 178 | ionex=ionex[:,35:36,35:36,bands] #use tec and ap 179 | return ionex 180 | 181 | def ulm_plot(mid, upper, lower): 182 | plt.figure(figsize=(8, 8)) 183 | plt.subplot(2, 1, 1) 184 | plt.plot(mid, label='Training MAE') 185 | plt.fill_between(range(mid.size), upper, lower,color='green', alpha=0.2 ) 186 | plt.plot(upper) 187 | plt.plot(lower) 188 | plt.legend(loc='lower right') 189 | plt.ylabel('Mean Absolute Error') 190 | plt.title('Training and Validation MAE') 191 | 192 | def getPixelSeries(m, i, j): 193 | return m[...,i,j,0].flatten() 194 | 195 | 196 | def saveGif(matrixList,gifFileName,clearFrames=True): 197 | filenames=[] 198 | for i,m in enumerate(matrixList): 199 | # plot the line chart 200 | #plt.plot(y[:i]) 201 | plt.imshow(np.squeeze(m), extent=[-180,180,-90,90]) #minx maxx miny maxy 202 | 203 | # create file name and append it to a list 204 | filename = f'{gifFileName}_{i}.png' 205 | filenames.append(filename) 206 | plt.title(f"Day {int(np.floor(i/24))+1} hour {i%24:02d}") 207 | # save frame 208 | plt.savefig(filename, bbox_inches='tight') 209 | plt.close()# build gif 210 | with imageio.get_writer(gifFileName, mode='I') as writer: 211 | for filename in filenames: 212 | image = imageio.imread(filename) 213 | writer.append_data(image) 214 | # Remove files 215 | if clearFrames: 216 | for filename in set(filenames): 217 | os.remove(filename) 218 | 219 | def main(): 220 | 221 | l=range(1,100) 222 | print(l) 223 | x,y=split_sequence(l,4,2) 224 | print(x.shape) 225 | print(y.shape) 226 | lu=np.array(l)*2 227 | ld=np.array(l)/2 228 | lm=np.array(l) 229 | ulm_plot(lm,lu,ld) 230 | 231 | def r2(y_true, y_pred): 232 | """ 233 | R^2 (coefficient of determination) regression score function. 234 | Best possible score is 1.0, lower values are worse. 235 | Args: 236 | y_true ([np.array]): test samples 237 | y_pred ([np.array]): predicted samples 238 | Returns: 239 | [float]: R2 240 | """ 241 | SS_res = tf.reduce_sum(tf.square(y_true - y_pred), axis=-1) 242 | SS_tot = tf.reduce_sum(tf.square(y_true - tf.reduce_mean(y_true, axis=-1)), axis=-1) 243 | return (1 - SS_res/(SS_tot + tf.keras.backend.epsilon())) 244 | 245 | def getNvidiaSmiMem(): 246 | nvidiasmi=subprocess.run("nvidia-smi", shell=True, capture_output=True) 247 | found=re.search('C.+python.(.+?)\|', nvidiasmi.stdout.decode("utf-8") ) 248 | if found: 249 | memory=found.group(1).strip() 250 | else: 251 | memory=0 252 | return memory 253 | 254 | if __name__=="__main__": 255 | main() 256 | -------------------------------------------------------------------------------- /plotresults.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import pandas as pd 3 | from numpy import array 4 | import matplotlib.pyplot as plt 5 | from osgeo import gdal_array 6 | from extra.plot_time_series import saveGif 7 | from lstm_utils import * 8 | #loading config that tells which experiments should be on the charts 9 | 10 | args=get_args() 11 | baseconfig='config.csv' 12 | df=pd.read_csv(baseconfig) 13 | 14 | if baseconfig!=args.config: 15 | dfFilter=pd.read_csv(args.config) 16 | df=df[df.experiment_name.isin(dfFilter['experiment_name'].tolist())] 17 | df=df.merge(dfFilter, on='experiment_name',how='left') 18 | else: 19 | if not 'label' in df.columns: df['label']=np.nan 20 | df['label'] = df['label'].fillna(df['experiment_name']) #uses experiment_name if label is empty 21 | labels=dict(zip(df.experiment_name, df.label)) 22 | 23 | 24 | #Find which tests were trained with multiple runs and separate results file. Add their best to the unified results.py file and create statistics 25 | 26 | #opening the results file 27 | resultsFile="output/results.py" 28 | if os.path.exists(resultsFile): 29 | with open(resultsFile, 'r') as f: results = eval(f.read()) 30 | else: 31 | results={} 32 | 33 | for experiment_name in df[(df['compare']==True)]['experiment_name'].values: 34 | rows=[] 35 | individualResultsFile=getModelFilePath(experiment_name, 'results.py') 36 | if os.path.exists(individualResultsFile): 37 | print(individualResultsFile) 38 | #opening the results file 39 | with open(individualResultsFile, 'r') as f: indResults = eval(f.read()) 40 | best_of=len(indResults) 41 | best_mae=-1 42 | rmse_l=[] 43 | mae_l=[] 44 | r2_l=[] 45 | for experimentNumber in range(best_of): 46 | #expId=f"{config.experiment_name}_{experimentNumber}" 47 | expId=f"{experiment_name}_{experimentNumber}" 48 | if expId in indResults: 49 | k=indResults[expId] 50 | rmse=np.sqrt((k.get('rmse_per_hour',np.array([9999]))**2).mean()) 51 | mae=k.get('mae_per_hour',np.array([9999])).mean() 52 | rmse_l.append(rmse) 53 | mae_l.append(mae) 54 | r2_l.append(k.get('r2',9999)) 55 | if mae < best_mae or best_mae==-1: 56 | best_mae=mae 57 | if not best_mae==9999: 58 | results[experiment_name]=k.copy() #copying to the main results list 59 | mae_l=np.array(mae_l) 60 | r2_l=np.array(r2_l) 61 | rmse_l=np.array(rmse_l) 62 | #rows.append([best_mae, mae_l.mean(), mae_l.std(), rmse_l.min(), rmse_l.mean(), rmse_l.std(), r2_l.max(), r2_l.mean(), r2_l.std() ]) 63 | if experiment_name in results: #this means that we found evaluation data 64 | results[experiment_name].update({'mae_mean': mae_l.mean(), 'mae_std':mae_l.std(), 'rmse_mean': rmse_l.mean(), 'rmse_std': rmse_l.std(), 'r2_mean': r2_l.mean(), 'r2_avg':r2_l.std()}) 65 | #header=['mae_best', 'mae_mean', 'mae_std', 'rmse_best','rmse_mean', 'rmse_std', 'r2_best', 'r2_mean', 'r2_avg'] 66 | #outdf=pd.DataFrame.from_records(rows, columns=header) 67 | #outdf.to_csv("output/results_stats.csv",float_format='{:,.2f}'.format) 68 | 69 | #filtering to plot only the experiments that were set as "compare" == True on config.csv 70 | filtereddf=df[(df['compare']==True) & (df['experiment_name'].isin(results.keys()))] 71 | comparedExperiments=filtereddf['experiment_name'].values 72 | 73 | #data={"Network":[], "parameters":[], "MAE":[], "RMSE":[], "r2":[], "rmse (1st)":[], "max error(first)":[], "rmse (last)":[], "max error (last)":[] , "time (min)":[], "memory":[], "epochs":[], "train_mae":[], "train_rmse":[], "val_mae":[], "val_rmse":[]} 74 | # "r2 (1st)":[], "r2 (last)":[], 75 | 76 | header=["Network", "parameters", "MAE", "RMSE", "r2", "rmse (1st)", "max error(first)", "rmse (last)", "max error (last)" , "time (min)", "memory", "epochs", "train_mae", "train_rmse", "val_mae", "val_rmse", 'mae_mean', 'mae_std', 'rmse_mean', 'rmse_std', 'r2_mean', 'r2_std'] 77 | rows=[] 78 | for experiment_name in comparedExperiments: 79 | k=results[experiment_name] 80 | row=[ 81 | labels[experiment_name], 82 | k['parameters'], 83 | k['mae_per_hour'].mean(), 84 | np.sqrt((k['rmse_per_hour']**2).mean()), 85 | k['r2'], 86 | k.get('rmse_per_hour',[[np.nan]])[0][0], 87 | k['max_1st'][0], 88 | k.get('rmse_per_hour',[[np.nan]])[-1][0], 89 | k.get('max_per_hour',[[np.nan]])[-1][0], 90 | k.get('time',np.nan)/60., 91 | k.get('memory',np.nan), 92 | k.get('epochs',np.nan), 93 | k.get("train_mae",np.nan), 94 | k.get("train_rmse",np.nan), 95 | k.get("val_mae",np.nan), 96 | k.get("val_rmse",np.nan), 97 | k.get("mae_mean",np.nan), 98 | k.get("mae_std",np.nan), 99 | k.get("rmse_mean",np.nan), 100 | k.get("rmse_std",np.nan), 101 | k.get("r2_mean",np.nan), 102 | k.get("r2_avg",np.nan), 103 | ] 104 | rows.append(row) 105 | 106 | #line=f"{key}, {k['parameters']}, {mae}, {rmse}, {k['r2_1st'][0]}, {k['rmse_1st'][0]}, {k['max_1st'][0]}, {k['r2_per_hour'][-1][0]}, {k['rmse_per_hour'][-1][0]}, {k['max_per_hour'][-1][0]}" 107 | #print(line) 108 | #f.write(line+'\n') 109 | 110 | 111 | 112 | 113 | #plt.ylim([1., 2.5]) #TECU 114 | for modelName in comparedExperiments: 115 | plt.plot(results[modelName]["rmse_per_hour"], label = labels[modelName], marker='.') 116 | plt.xlabel('Frames of prediction') 117 | plt.ylabel('RMSE (TEC units)') 118 | plt.title('Prediction RMSE per frame') 119 | plt.legend(loc='center left', bbox_to_anchor=(1, 0.5)) 120 | plt.savefig("output/rmse.pdf", bbox_inches='tight') 121 | plt.close() 122 | 123 | #plt.ylim([0.8, 1.25]) #TECU 124 | for modelName in comparedExperiments: 125 | plt.plot(results[modelName]["mae_per_hour"], label = labels[modelName], marker='.') 126 | plt.xlabel('Frames of prediction') 127 | plt.ylabel('MAE (TEC units)') 128 | plt.title('Prediction MAE per frame') 129 | plt.legend(loc='center left', bbox_to_anchor=(1, 0.5)) 130 | plt.savefig("output/mae.pdf", bbox_inches='tight') 131 | plt.close() 132 | 133 | for modelName in comparedExperiments: 134 | plt.plot(results[modelName]["max_per_hour"], label = labels[modelName], marker='.') 135 | plt.xlabel('Frames of prediction') 136 | plt.ylabel('Max error (TEC units)') 137 | plt.title('Prediction max error per frame') 138 | plt.legend(loc='center left', bbox_to_anchor=(1, 0.5)) 139 | plt.savefig("output/max.pdf", bbox_inches='tight') 140 | plt.close() 141 | 142 | for modelName in comparedExperiments: 143 | plt.plot(results[modelName]["mae_per_hour"], label = labels[modelName], marker='.') 144 | plt.xlabel('Frames of prediction') 145 | plt.ylabel('MAE (TEC units)') 146 | plt.title('Prediction MAE per frame') 147 | plt.legend(loc='center left', bbox_to_anchor=(1, 0.5)) 148 | plt.savefig("output/errors.pdf", bbox_inches='tight') 149 | plt.close() 150 | 151 | 152 | for modelName in comparedExperiments: 153 | line=plt.plot(results[modelName]["mae_per_hour"], label = labels[modelName], marker='.')[0] 154 | plt.fill_between(range(results[modelName]["mae_per_hour"].size), results[modelName]["max_per_hour"].flatten(), results[modelName]["mae_per_hour"].flatten(),color=line.get_color(), alpha=0.2 ) 155 | plt.xlabel('Frames of prediction') 156 | plt.ylabel('MAE (TEC units)') 157 | plt.title('Prediction MAE per frame') 158 | plt.legend() 159 | plt.savefig("output/errors.pdf", bbox_inches='tight') 160 | plt.close() 161 | 162 | df=pd.DataFrame.from_records(rows, columns=header) 163 | 164 | #df=pd.DataFrame(data) 165 | df=df.sort_values(by='RMSE') 166 | 167 | pd.set_option('display.max_columns', None) # or 1000 168 | pd.set_option('display.max_rows', None) # or 1000 169 | pd.set_option('display.max_colwidth', None) # or 199 170 | pd.set_option('display.float_format','{:,.3f}'.format) 171 | print(df) 172 | df.to_csv("output/results.csv",float_format='{:,.3f}'.format) 173 | #Network, parameters, r2 (1st), rmse (1st), r2 (last), rmse (last) 174 | 175 | df=df.set_index('Network') 176 | df=df.sort_index() 177 | df['rmse_mean']=df['rmse_mean'].fillna(df['RMSE']) 178 | df['mae_mean']=df['mae_mean'].fillna(df['MAE']) 179 | plt.figure() 180 | width = 0.35 # the width of the bars 181 | plt.ylim(0, df.rmse_mean.max()*1.1) 182 | error_kw=dict(lw=1, capsize=5, capthick=1) 183 | ind = np.arange(len(df)) 184 | if df.rmse_std.isnull().values.all(): 185 | plt.bar(ind-width/2., df.mae_mean, width, label='MAE',error_kw=error_kw) 186 | plt.bar(ind+width/2., df.rmse_mean, width, label='RMSE',error_kw=error_kw) 187 | else: 188 | plt.bar(ind-width/2., df.mae_mean, width, yerr=df.mae_std, label='MAE',error_kw=error_kw) 189 | plt.bar(ind+width/2., df.rmse_mean, width, yerr=df.rmse_std, label='RMSE',error_kw=error_kw) 190 | 191 | for x,y1,y2 in zip(ind,df.mae_mean,df.rmse_mean): 192 | label = "{:.2f}".format(y1) 193 | plt.annotate(label, (x-width/2,y1), textcoords="offset points", xytext=(0,10), ha='center') 194 | label = "{:.2f}".format(y2) 195 | plt.annotate(label, (x+width/2,y2), textcoords="offset points", xytext=(0,10), ha='center') 196 | plt.ylabel('TEC units') 197 | plt.legend(loc='lower left') 198 | plt.xticks(ind, df.index, rotation = 15, ha='right') 199 | 200 | #Please uncomment this line if you want the marker line 201 | #plt.plot(ind-width/2, df.mae_mean, color='k', marker='.') 202 | plt.tight_layout() 203 | plt.savefig('output/bar_plot.pdf') 204 | plt.close() 205 | -------------------------------------------------------------------------------- /evaluate.py: -------------------------------------------------------------------------------- 1 | import os 2 | #Disabling warnings 3 | import logging 4 | #logging.getLogger('tensorflow').setLevel(logging.ERROR) 5 | #os.environ["KMP_AFFINITY"] = "noverbose" 6 | os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3' 7 | import tensorflow as tf 8 | #tf.autograph.set_verbosity(3) 9 | from models.models import * 10 | 11 | 12 | from lstm_utils import * 13 | import numpy as np 14 | import pandas as pd 15 | import sklearn 16 | import matplotlib.pyplot as plt 17 | from osgeo import gdal_array 18 | from generator import DataGenerator 19 | import sys 20 | #tf.config.experimental.set_memory_growth(tf.config.get_visible_devices()[1], True) 21 | #Model name is used to load the weights and recover prediction parameters. 22 | #loading experiment configuration 23 | 24 | args=get_args() 25 | config=load_config(args.experiment) 26 | 27 | 28 | try: 29 | model= eval(config.model) 30 | except: 31 | print(f"Error trying to load the model chosen in {args.config}") 32 | sys.exit() 33 | 34 | 35 | scalerParamsFile=getModelFilePath(config.experiment_name,f"params.py") 36 | print(f"Loading scaling parameters from {scalerParamsFile}") 37 | with open(scalerParamsFile, 'r') as f: parameters = eval(f.read()) 38 | 39 | 40 | 41 | #if the scaler has more than 1 band, we are using Space Weather indices 42 | 43 | print(f"Loading data from {config.test_npy_dataset}") 44 | ionex=np.load(config.test_npy_dataset) 45 | if config.test_time_sampling>1: 46 | ionex=ionex[::config.test_time_sampling,...] #2h step 47 | #Resizing to 72x72 48 | ionex=np.concatenate((ionex[:,:,:-1,:],ionex[:,-1:,:-1,:]),axis=1) 49 | ionex=getDataSubset(ionex,config.experiment_name) 50 | 51 | #reducing the number of test samples to use only one week 52 | #initialdate=15 53 | #frames=28*24 54 | #ionex=ionex[initialdate:initialdate+frames] 55 | 56 | 57 | 58 | #scaling 59 | ionex=scaleForward(ionex,parameters) 60 | 61 | if config.prediction=='seq2one': 62 | output_t_steps=1 63 | else: 64 | output_t_steps=config.prediction_window 65 | 66 | test_generator = DataGenerator(ionex, batch_size=config.batch_size, nstepsin=config.lag_window, nstepsout=config.prediction_window,shuffle=False,sample_rate=config.resample_rate) 67 | 68 | #datax,datay=test_generator.asArray() 69 | #print(f"Test data shape: {datax.shape}") 70 | #del ionex 71 | 72 | day=0 73 | 74 | os.makedirs(f"output/{config.experiment_name}", exist_ok=True) 75 | 76 | randomSeeds=(np.random.random(config.best_of)*100).astype(int) 77 | 78 | bestModelNumber=0 79 | bestMAE=9999 80 | for experimentNumber, randomSeed in enumerate(randomSeeds): #this represents how many times we are going to test the network 81 | print(f"Starting experiment {experimentNumber}") 82 | 83 | fileName=getModelFileName(config.experiment_name, experimentNumber) 84 | if not os.path.exists(fileName): 85 | print("Model not found. Please check the models folder and set the name variable on predict.py.") 86 | sys.exit() 87 | 88 | print(f"Loading model {fileName}") 89 | model = tf.keras.models.load_model(fileName) 90 | #print(model.get_weights()) 91 | 92 | #model.evaluate(test_generator,batch_size=config.batch_size,verbose=2) 93 | 94 | 95 | rmse_per_hour=np.zeros((config.prediction_window,1)) 96 | mae_per_hour=np.zeros((config.prediction_window,1)) 97 | max_per_hour=np.zeros((config.prediction_window,1)) 98 | hist, edges = None,None 99 | ymean=0 100 | sstotal=0 101 | 102 | 103 | for i in range(len(test_generator)): #computing the mean for r2 104 | #aggregating batch data 105 | datax, datay=test_generator[i] 106 | datay=scaleBack(datay,parameters) 107 | ymean+=np.sum(datay) 108 | 109 | n=test_generator.count()*datay.shape[2]*datay.shape[3] 110 | ymean/=n*datay.shape[1] 111 | 112 | 113 | for i in range(len(test_generator)): #there is a bug with seq2one and space indexes 114 | #aggregating batch data 115 | datax, datay=test_generator[i] 116 | if config.prediction=='seq2one' and config.prediction_window>1: #perform predictions seq2one 117 | currX=datax.copy() 118 | ynew=None 119 | for t in range(config.prediction_window): 120 | newFrame=model.predict(currX,verbose=0) 121 | if ynew is None: 122 | ynew=newFrame 123 | else: 124 | ynew=np.concatenate([ynew,newFrame],axis=1) 125 | currX=np.concatenate([currX[:,1:], newFrame],axis=1) 126 | 127 | else: #perform predictions seq2seq 128 | ynew=model.predict(datax,verbose=0) 129 | ynew=scaleBack(ynew,parameters) 130 | datay=scaleBack(datay,parameters) 131 | error=ynew-datay 132 | #scale back 133 | rmse_per_hour+=np.sum(error**2, axis=(0,2,3)) 134 | mae_per_hour+=np.sum(np.abs(error), axis=(0,2,3)) 135 | max_per_hour=np.maximum(np.max(np.abs(error), axis=(0,2,3)), max_per_hour) 136 | if hist is None: 137 | hist, edges =np.histogram(datay-ymean,range=[-40,40], bins=40) 138 | else: 139 | htemp,etemp =np.histogram(datay-ymean,bins=edges) 140 | hist+=htemp 141 | sstotal+=np.sum((datay-ymean)**2) 142 | 143 | 144 | r2=1-rmse_per_hour.sum()/sstotal 145 | rmse_per_hour=np.sqrt(rmse_per_hour/n) 146 | mae_per_hour=mae_per_hour/n 147 | 148 | datax=scaleBack(datax,parameters) 149 | 150 | 151 | #saving macro results 152 | #resultsFile="output/results.py" 153 | resultsFile=getModelFilePath(config.experiment_name,"results.py") 154 | if not os.path.exists(resultsFile): 155 | results={} 156 | else: 157 | with open(resultsFile, 'r') as f: results = eval(f.read()) 158 | 159 | #get existing dict (from training) 160 | expId=f"{config.experiment_name}_{experimentNumber}" 161 | 162 | if not expId in results.keys(): 163 | modelResults={} 164 | else: 165 | modelResults=results[expId] 166 | 167 | modelResults["parameters"]= model.count_params() 168 | modelResults["r2"]= r2 169 | modelResults["mae_1st"]= mae_per_hour[0] 170 | modelResults["rmse_1st"]= rmse_per_hour[0] 171 | modelResults["max_1st"]= max_per_hour[0] 172 | modelResults["mae_per_hour"]= mae_per_hour 173 | modelResults["rmse_per_hour"]= rmse_per_hour 174 | modelResults["max_per_hour"]= max_per_hour 175 | modelResults["histogram"]= hist 176 | modelResults["edges"]= edges 177 | 178 | results[expId]=modelResults 179 | with open(resultsFile,'w') as f:f.write(repr(results)) 180 | 181 | mae=mae_per_hour.mean() 182 | print(f"MAE: {mae}") 183 | if mae=day) & (weatherdf.index=day) & (weatherdf.index12: 267 | m=m[::2] 268 | codg_12_20.append(m) 269 | codg_12_20=np.concatenate(codg_12_20, axis=0) 270 | np.save('codg_12_20.npy',codg_12_20) 271 | print(codg_12_20.shape) 272 | 273 | for year in range(2015,2020+1): 274 | if not os.path.exists(f"codg{year}_12h.npy"): 275 | m=np.load(f"codg{year}.npy") 276 | if m.shape[0]/365>12: 277 | m=m[::2] 278 | np.save(f"codg{year}_12h.npy",m) 279 | 280 | #for year in range(2019,2021): 281 | #fname=f"timeseries{year%100}.npy" 282 | #print(f"Test data saved in {fname}") 283 | #if not os.path.exists(fname): 284 | #reader.concatenateYear(year,"timeseries.npy",useSpaceWeather=False) 285 | #reader.concatenateYear(year,fname,useSpaceWeather=True) 286 | 287 | year=2019 #Training data 288 | print("Training data saved in timeseries19.npy") 289 | if not os.path.exists("timeseries19_ind.npy"): 290 | reader.concatenateYear(year,"timeseries19.npy",useSpaceWeather=False) 291 | reader.concatenateYear(year,"timeseries19_ind.npy",useSpaceWeather=True) 292 | year=2020 #Test data 293 | print("Test data saved in timeseries.npy") 294 | if not os.path.exists("timeseries_ind.npy"): 295 | reader.concatenateYear(year,"timeseries.npy",useSpaceWeather=False) 296 | reader.concatenateYear(year,"timeseries_ind.npy",useSpaceWeather=True) 297 | 298 | #if not os.path.exists("timeseries14_ind.npy"): 299 | #reader.concatenateYear(2014,"timeseries14_ind.npy",useSpaceWeather=True, hour_step=2) 300 | if not os.path.exists("timeseries15_ind.npy"): 301 | reader.concatenateYear(2015,"timeseries15_ind.npy",useSpaceWeather=True, hour_step=2) 302 | 303 | if not os.path.exists("c1pg.npy"): 304 | reader.concatenateYear(2019,"c1pg.npy",useSpaceWeather=False,prefix='c1pg') 305 | 306 | if not os.path.exists("c1pg2015.npy"): 307 | reader.concatenateYear(2015,"c1pg2015.npy",useSpaceWeather=False,prefix='c1pg') 308 | 309 | if not os.path.exists("c1pg20.npy"): 310 | reader.concatenateYear(2020,"c1pg20.npy",useSpaceWeather=False,prefix='c1pg') 311 | 312 | if not os.path.exists("corg.npy"): 313 | reader.concatenateYear(2019,"corg.npy",useSpaceWeather=False,prefix='corg') 314 | 315 | if not os.path.exists("magn19.npy"): 316 | reader.concatenateYear(2019,"magn19.npy",useSpaceWeather=False,prefix='magn') 317 | 318 | 319 | #print("Tiff conversion test") 320 | #reader.ionex2tiff("./ionex/codg0010.18i","./output/teste.tif") 321 | #reader=ionexreader() 322 | m,trans,daterange=reader.read2DIonex("./ionex/codg0010.18i") 323 | reader.write2DIonex(m,trans,daterange,"./output/teste.18i") 324 | 325 | --------------------------------------------------------------------------------