├── LICENSE ├── README.md ├── examples ├── __init__.py ├── __pycache__ │ ├── metrics.cpython-310.pyc │ └── utils.cpython-310.pyc ├── metrics.py ├── model │ ├── __init__.py │ ├── __pycache__ │ │ ├── __init__.cpython-310.pyc │ │ └── model.cpython-310.pyc │ └── model.py ├── train.py └── utils.py ├── megaflow ├── __init__.py ├── __pycache__ │ ├── __init__.cpython-310.pyc │ ├── __init__.cpython-39.pyc │ ├── version.cpython-310.pyc │ └── version.cpython-39.pyc ├── common │ ├── __init__.py │ ├── __pycache__ │ │ ├── __init__.cpython-310.pyc │ │ └── utils.cpython-310.pyc │ ├── metrics.py │ └── utils.py ├── dataset │ ├── MegaFlow2D.py │ ├── __init__.py │ └── __pycache__ │ │ ├── MegaFlow2D.cpython-310.pyc │ │ ├── MegaFlow2D.cpython-39.pyc │ │ ├── __init__.cpython-310.pyc │ │ └── __init__.cpython-39.pyc └── version.py ├── pyproject.toml ├── setup.py ├── test.py └── xu2023.pdf /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2023 The Design Research Collective 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # MegaFlow2D 2 | 3 | 4 | ## Overview 5 | The MegaFlow2D dataset package of parameteric CFD simulation results for machine learning / super-resolution purposes. 6 | 7 | The package contains: 8 | 1. A standard structure for transferring simulation results into graph structure. 9 | 2. Common utility functions for visualizing, retrieving and processing simulation results. (Everything that requires the [FEniCS](https://fenicsproject.org/) or [dolfin](https://github.com/FEniCS/dolfinx) package can only be run on linux or wsl.) 10 | 11 | ## Installation 12 | The MegaFlow dataset can be installed by pip: 13 | ```bash 14 | pip install MegaFlow2D 15 | ``` 16 | 17 | Running `pip install` would automatically configure package dependencies, however to build graphical models [torch-geometric](https://pytorch-geometric.readthedocs.io/en/latest/) needs to be installed manually. 18 | 19 | ## Dataset structure 20 | The entire dataset is stored inside a single HDF5 file. Although multiple HDF5 files are created during processing depending on the number of processing cores used to avoid data corruption while concurrently writing to a single file. The reading operation, however, can be done concurrently as long as all operations are restricted in `r` mode. The dataset is stored in a hierarchical structure, and each group is indexed by the geometry type, mesh resolution and time step. The dataset object is stored as a `h5py.dataset` object under each group. The dataset structure is shown below: 21 | ```bash 22 | ├── MegaFlow2D 23 | │ ├── _ 24 | │ │ ├── 25 | │ │ │ ├── 26 | │ │ │ │ ├── dataset 27 | 28 | ``` 29 | In theory, searching through the dataset can have a complexity of O(1) due to the B-tree structure of HDF5 to allow for fast data retrieval in training loading process. However, the process might be slowed down by the auto decompression of the dataset. This may be improved by reprocessing the dataset with a different compression setting in `utils.py`. Please keep in mind that reprocessing the dataset can take several hours depending on the number of cores used. 30 | 31 | ## Using the MegaFlow package 32 | 33 | The MegaFlow package provides a simple interface for initializing and loading the dataset. 34 | 35 | ```py 36 | from megaflow.dataset.MegaFlow2D import MegaFlow2D 37 | 38 | if __name__ == '__main__': 39 | dataset = MegaFlow2D(root='/path/to/your/directory', download=True, transform='normalize', pre_transform=None, split_scheme='mixed', split_ratio=0.8) 40 | # if the dataset is not processed, the process function will be called automatically. 41 | # to facilitate multi-thread processing, be sure to exceute the process function in '__main__'. 42 | 43 | # get one sample 44 | sample_low, sample_high = dataset.get(0) 45 | print('Number of nodes: {}, number of edges: {}'.format(sample_low.num_nodes, sample_low.num_edges)) 46 | ``` 47 | 48 | ## Using the example scripts 49 | We provide an example script for training a super-resolution model on the MegaFlow2D dataset. The script can be found in the `examples` directory. The script can be run by (one configuration example): 50 | ```bash 51 | python examples/train.py --root /path/to/your/directory --dataset MegaFlow2D --tranform normalize --model FlowMLError --epochs 100 --batch_size 32 52 | ``` 53 | 54 | ## Citing MegaFlow2D 55 | If you use MegaFlow2D in your research, please cite: 56 | ```bibtex 57 | @inproceedings{10.1145/3576914.3587552, 58 | author = {Xu, Wenzhuo and Grande Gutierrez, Noelia and McComb, Christopher}, 59 | title = {MegaFlow2D: A Parametric Dataset for Machine Learning Super-Resolution in Computational Fluid Dynamics Simulations}, 60 | year = {2023}, 61 | isbn = {9798400700491}, 62 | publisher = {Association for Computing Machinery}, 63 | address = {New York, NY, USA}, 64 | url = {https://doi.org/10.1145/3576914.3587552}, 65 | doi = {10.1145/3576914.3587552}, 66 | abstract = {This paper introduces MegaFlow2D, a dataset of over 2 million snapshots of parameterized 2D fluid dynamics simulations of 3000 different external flow and internal flow configurations. It’s worth noting that, simulation results on both low and high mesh resolutions are provided to facilitate the training of machine learning (ML) models for super-resolution purposes. This is the first large-scale multi-fidelity fluid dynamics dataset ever provided. We build the entire data generation and simulation workflow on open-source and efficient interfaces that can be utilized for a variety of data samples according to the user’s specific needs. Finally, we provide a use case to demonstrate the potential value of the MegaFlow2D dataset in applications related to error correction.}, 67 | booktitle = {Proceedings of Cyber-Physical Systems and Internet of Things Week 2023}, 68 | pages = {100–104}, 69 | numpages = {5}, 70 | keywords = {datasets, neural networks, computational fluid dynamics, discretization error}, 71 | location = {San Antonio, TX, USA}, 72 | series = {CPS-IoT Week '23} 73 | } 74 | ``` 75 | -------------------------------------------------------------------------------- /examples/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cmudrc/MegaFlow2D/abfb4282689261a3e19059d1bba552608d34ab3d/examples/__init__.py -------------------------------------------------------------------------------- /examples/__pycache__/metrics.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cmudrc/MegaFlow2D/abfb4282689261a3e19059d1bba552608d34ab3d/examples/__pycache__/metrics.cpython-310.pyc -------------------------------------------------------------------------------- /examples/__pycache__/utils.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cmudrc/MegaFlow2D/abfb4282689261a3e19059d1bba552608d34ab3d/examples/__pycache__/utils.cpython-310.pyc -------------------------------------------------------------------------------- /examples/metrics.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import torch 3 | import torch.nn.functional as F 4 | 5 | 6 | def max_divergence(y_pred, y_true): 7 | """ 8 | Computes the maximum divergence between the predicted and true distributions 9 | Input: 10 | y_pred: tensor, predicted distribution 11 | y_true: tensor, true distribution 12 | Output: 13 | max_div: float, maximum divergence between the predicted and true distributions 14 | """ 15 | max_div = 1 - torch.max(torch.abs(y_pred - y_true)) / torch.max(y_true) 16 | return max_div 17 | 18 | 19 | def norm_divergence(y_pred, y_true): 20 | """ 21 | Computes the norm divergence between the predicted and true distributions 22 | Input: 23 | y_pred: tensor, predicted distribution 24 | y_true: tensor, true distribution 25 | Output: 26 | norm_div: float, norm divergence between the predicted and true distributions 27 | """ 28 | norm_div = 1 - (torch.norm(y_pred) - torch.norm(y_true)) / torch.norm(y_true) 29 | return norm_div -------------------------------------------------------------------------------- /examples/model/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cmudrc/MegaFlow2D/abfb4282689261a3e19059d1bba552608d34ab3d/examples/model/__init__.py -------------------------------------------------------------------------------- /examples/model/__pycache__/__init__.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cmudrc/MegaFlow2D/abfb4282689261a3e19059d1bba552608d34ab3d/examples/model/__pycache__/__init__.cpython-310.pyc -------------------------------------------------------------------------------- /examples/model/__pycache__/model.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cmudrc/MegaFlow2D/abfb4282689261a3e19059d1bba552608d34ab3d/examples/model/__pycache__/model.cpython-310.pyc -------------------------------------------------------------------------------- /examples/model/model.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch_geometric.nn as nn 3 | from torch_geometric.nn.unpool import knn_interpolate 4 | 5 | class FlowMLConvolution(torch.nn.Module): 6 | def __init__(self, in_channels, out_channels, num_layers, num_filters): 7 | super(FlowMLConvolution, self).__init__() 8 | # self.conv1 = nn.Sequential('x, edge_index', [(nn.GraphConv(in_channels, num_filters[0]), 'x, edge_index -> x'), torch.nn.LeakyReLU(0.1), torch.nn.BatchNorm1d(num_filters[0])]) 9 | self.conv1 = nn.Sequential('x, edge_index', [(nn.GraphConv(in_channels, num_filters[0]), 'x, edge_index -> x'), torch.nn.LeakyReLU(0.1)]) 10 | self.convs = torch.nn.ModuleList() 11 | for i in range(num_layers - 1): 12 | # self.convs.append(nn.Sequential('x, edge_index', [(nn.GraphConv(num_filters[i], num_filters[i+1]), 'x, edge_index -> x'), torch.nn.LeakyReLU(0.1), torch.nn.BatchNorm1d(num_filters[i+1])])) 13 | self.convs.append(nn.Sequential('x, edge_index', [(nn.GraphConv(num_filters[i], num_filters[i+1]), 'x, edge_index -> x'), torch.nn.LeakyReLU(0.1)])) 14 | self.conv2 = nn.GraphConv(num_filters[i+1], out_channels) 15 | 16 | def forward(self, data): 17 | x, edge_index, edge_attr, batch = data.x, data.edge_index, data.edge_attr, data.batch 18 | x = self.conv1(x, edge_index) 19 | for conv in self.convs: 20 | x = conv(x, edge_index) 21 | x = self.conv2(x, edge_index) 22 | # x = nn.global_mean_pool(x, batch) 23 | return x 24 | 25 | 26 | class EdgeConv(nn.MessagePassing): 27 | def __init__(self, in_channels, out_channels): 28 | super(EdgeConv, self).__init__(aggr='add') 29 | self.mlp = torch.nn.Sequential(nn.Linear(2*in_channels, 32), torch.nn.LeakyReLU(0.1), torch.nn.BatchNorm1d(32), torch.nn.Linear(32, 32), torch.nn.LeakyReLU(0.1), torch.nn.BatchNorm1d(32), torch.nn.Linear(32, out_channels)) 30 | 31 | def forward(self, x, edge_index): 32 | return self.propagate(edge_index, x=x) 33 | 34 | def message(self, x_i, x_j): 35 | tmp = torch.cat([x_i, x_j-x_i], dim=1) 36 | return self.mlp(tmp) 37 | 38 | 39 | class DynamicEdgeConv(EdgeConv): 40 | def __init__(self, in_channels, out_channels, k=6): 41 | super().__init__(in_channels, out_channels) 42 | self.k = k 43 | 44 | def forward(self, x, batch=None): 45 | edge_index = nn.knn_graph(x, self.k, batch, loop=False, flow=self.flow) 46 | return super().forward(x, edge_index) 47 | 48 | 49 | class FlowMLError(torch.nn.Module): 50 | def __init__(self, in_channels, out_channels): 51 | super(FlowMLError, self).__init__() 52 | # self.convs = FlowMLConvolution(in_channels+1, out_channels, 3, [64, 64, 64]) 53 | 54 | self.edge_conv1 = DynamicEdgeConv(2, 64) 55 | self.edge_convs = torch.nn.ModuleList() 56 | for i in range(2): 57 | self.edge_convs.append(DynamicEdgeConv(64, 64)) 58 | self.edge_conv2 = DynamicEdgeConv(64, 128) 59 | self.edge_conv3 = DynamicEdgeConv(192, 1) 60 | 61 | self.conv4 = nn.Sequential('x, edge_index', [(nn.GCNConv(in_channels+1, 64), 'x, edge_index -> x'), torch.nn.LeakyReLU(0.1)]) 62 | self.convs2 = torch.nn.ModuleList() 63 | for i in range(3): 64 | # self.convs.append(nn.Sequential('x, edge_index', [(nn.GraphConv(num_filters[i], num_filters[i+1]), 'x, edge_index -> x'), torch.nn.LeakyReLU(0.1), torch.nn.BatchNorm1d(num_filters[i+1])])) 65 | self.convs2.append(nn.Sequential('x, edge_index', [(nn.GCNConv(64, 64), 'x, edge_index -> x'), torch.nn.LeakyReLU(0.1)])) 66 | self.conv5 = nn.GCNConv(64, out_channels) 67 | 68 | 69 | def forward(self, data): 70 | u, coord, edge_index, batch = data.x, data.pos, data.edge_index, data.batch 71 | x = self.edge_conv1(coord, batch) 72 | append = x 73 | for conv in self.edge_convs: 74 | x = conv(x, batch) 75 | torch.cat((append, x), dim=1) 76 | x = self.edge_conv2(x, batch) 77 | x = self.edge_conv3(torch.cat([append, x], dim=1), batch) 78 | 79 | u = self.conv4(torch.cat([u, x], dim=1), edge_index) 80 | for conv in self.convs2: 81 | u = conv(u, edge_index) 82 | u = self.conv5(u, edge_index) 83 | 84 | return u 85 | -------------------------------------------------------------------------------- /examples/train.py: -------------------------------------------------------------------------------- 1 | import os 2 | import numpy as np 3 | import torch 4 | from torch_geometric.loader import DataLoader 5 | from utils import * 6 | 7 | from torch.utils.tensorboard import SummaryWriter 8 | 9 | 10 | def main(): 11 | # read command line arguments 12 | args = parse_args() 13 | model_name = args.model 14 | dataset_name = args.dataset 15 | dataset_dir = args.dir 16 | dataset_split = args.split_scheme 17 | dataset_transform = args.transform 18 | model_layers = args.layers 19 | model_num_filters = args.num_filters 20 | model_loss = args.loss 21 | model_metric = args.metric 22 | train_epochs = args.epochs 23 | train_batch_size = args.batch_size 24 | train_lr = args.lr 25 | train_load_model = args.load_model 26 | 27 | # setup device 28 | device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') 29 | print('Using device:', device) 30 | 31 | # setup model according to command line arguments 32 | model = initialize_model(in_channel=3, out_channel=3, type=model_name, layers=model_layers, num_filters=model_num_filters) 33 | if train_load_model: 34 | checkpoint_load(model, train_load_model) 35 | 36 | model = model.to(device) 37 | print(model) 38 | 39 | # setup dataset 40 | dataset = initialize_dataset(dataset=dataset_name, split_scheme=dataset_split, dir=dataset_dir, transform=dataset_transform, split_ratio=[1, 1], pre_transform=None) 41 | # dataset.process() # test dataset processing parallel 42 | print(dataset) 43 | 44 | # split dataset into train, val and test sets 45 | train_dataset = dataset[:int(len(dataset) * 0.8)] 46 | val_dataset = dataset[int(len(dataset) * 0.8):int(len(dataset) * 0.9)] 47 | test_dataset = dataset[int(len(dataset) * 0.9):] 48 | 49 | # setup dataloader 50 | train_dataloader = DataLoader(train_dataset, batch_size=train_batch_size, shuffle=True, num_workers=8) 51 | val_dataloader = DataLoader(val_dataset, batch_size=train_batch_size, shuffle=False, num_workers=8) 52 | test_dataloader = DataLoader(test_dataset, batch_size=64, shuffle=False, num_workers=8) 53 | 54 | # setup loss function 55 | loss_fn = initialize_loss(loss_type=model_loss) 56 | 57 | # setup metric function 58 | metric_fn = initialize_metric(metric_type=model_metric) 59 | 60 | # setup optimizer 61 | optimizer = torch.optim.Adam(model.parameters(), lr=train_lr) 62 | 63 | # setup tensorboard 64 | logdir = '../train/logs/{}'.format(get_cur_time()) 65 | savedir = '../train/checkpoints/{}'.format(get_cur_time()) 66 | os.makedirs(logdir, exist_ok=True) 67 | os.makedirs(savedir, exist_ok=True) 68 | writer_logs = SummaryWriter(logdir) 69 | 70 | # training loop 71 | for epoch in range(train_epochs): 72 | model.train() 73 | avg_loss = 0 74 | avg_accuracy = 0 75 | for batch in train_dataloader: 76 | batch = batch.to(device) 77 | optimizer.zero_grad() 78 | pred = model(batch) 79 | loss = loss_fn(batch.y, pred) 80 | avg_loss += loss.item() 81 | avg_accuracy += metric_fn(batch.y, pred) 82 | loss.backward() 83 | optimizer.step() 84 | # print('Epoch: {:03d}, Batch: {:03d}, Loss: {:.4f}, Accuracy metric: {:4f}'.format(epoch, batch.batch[-1], loss.item(), metric_fn(batch.y, pred))) 85 | 86 | avg_loss /= len(train_dataloader) 87 | avg_accuracy /= len(train_dataloader) 88 | print('Epoch: {:03d}, Loss: {:.4f}, Accuracy metric: {:4f}'.format(epoch, avg_loss, avg_accuracy)) 89 | 90 | writer_logs.add_scalar('Loss/train', avg_loss, epoch) 91 | writer_logs.add_scalar('Max_div/train', avg_accuracy, epoch) 92 | # evaluate model with validation set every 25 epochs and save checkpoint 93 | if epoch % 25 == 0: 94 | evaluate_model(model, val_dataloader, writer_logs, epoch, loss_fn, metric_fn, device, mode='val') 95 | checkpoint_save(model, savedir, epoch) 96 | 97 | # evaluate model with test set 98 | evaluate_model(model, test_dataloader, writer_logs, epoch, loss_fn, metric_fn, device, mode='test') 99 | 100 | # close tensorboard and save final model 101 | writer_logs.close() 102 | checkpoint_save(model, savedir, epoch) 103 | 104 | if __name__ == '__main__': 105 | main() -------------------------------------------------------------------------------- /examples/utils.py: -------------------------------------------------------------------------------- 1 | import os 2 | from argparse import ArgumentParser 3 | from datetime import datetime 4 | import torch 5 | from model.model import * 6 | from megaflow.dataset.MegaFlow2D import * 7 | from metrics import * 8 | 9 | 10 | def get_cur_time(): 11 | return datetime.strftime(datetime.now(), '%Y-%m-%d_%H-%M') 12 | 13 | 14 | def checkpoint_save(model, name, epoch): 15 | f = os.path.join(name, 'checkpoint-{:06d}.pth'.format(epoch)) 16 | torch.save(model.state_dict(), f) 17 | print('Saved checkpoint:', f) 18 | 19 | 20 | def checkpoint_load(model, name): 21 | print('Restoring checkpoint: {}'.format(name)) 22 | model.load_state_dict(torch.load(name, map_location='cpu')) 23 | epoch = int(os.path.splitext(os.path.basename(name))[0].split('-')[1]) 24 | return epoch 25 | 26 | 27 | def initialize_model(in_channel, out_channel, type, layers, num_filters): 28 | # initialize model based on type, layers, and num_filters provided 29 | if type == 'FlowMLConvolution': 30 | model = FlowMLConvolution(in_channel, out_channel, layers, num_filters) 31 | elif type == 'FlowMLError': 32 | model = FlowMLError(in_channel, out_channel) 33 | else: 34 | raise ValueError('Unknown model type: {}'.format(type)) 35 | return model 36 | 37 | 38 | def initialize_dataset(dataset, split_scheme, dir, transform, pre_transform, split_ratio): 39 | # initialize dataset based on dataset and mode 40 | if dataset == 'MegaFlow2D': 41 | dataset = MegaFlow2D(root=dir, download=False, split_scheme=split_scheme, transform=transform, pre_transform=pre_transform, split_ratio=split_ratio) 42 | print('Dataset initialized') 43 | else: 44 | raise ValueError('Unknown dataset: {}'.format(dataset)) 45 | return dataset 46 | 47 | 48 | def initialize_loss(loss_type): 49 | """ 50 | Initialize loss function based on type provided 51 | Input: 52 | loss_type: string, type of loss function 53 | Output: 54 | loss_fn: loss function 55 | """ 56 | if loss_type == 'MSELoss': 57 | loss_fn = torch.nn.MSELoss() 58 | elif loss_type == 'L1Loss': 59 | loss_fn = torch.nn.L1Loss() 60 | else: 61 | raise ValueError('Unknown loss type: {}'.format(loss_type)) 62 | return loss_fn 63 | 64 | 65 | def initialize_metric(metric_type): 66 | """ 67 | Initialize metric function based on type provided 68 | Input: 69 | metric_type: string, type of metric function 70 | Output: 71 | metric_fn: metric function 72 | """ 73 | if metric_type == 'max_divergence': 74 | metric_fn = max_divergence 75 | elif metric_type == 'norm_divergence': 76 | metric_fn = norm_divergence 77 | else: 78 | raise ValueError('Unknown metric type: {}'.format(metric_type)) 79 | return metric_fn 80 | 81 | 82 | def evaluate_model(model, dataloader, logger, iteration, loss_fn, eval_metric, device, mode, checkpoint=None): 83 | # load checkpoint if provided 84 | if checkpoint is not None: 85 | checkpoint_load(model, checkpoint) 86 | 87 | model.eval() 88 | with torch.no_grad(): 89 | avg_loss = 0 90 | avg_metric = 0 91 | 92 | for batch in dataloader: 93 | batch = batch.to(device) 94 | pred = model(batch) 95 | loss = loss_fn(batch.y, pred) 96 | metric = eval_metric(batch.y, pred) 97 | avg_loss += loss.item() 98 | avg_metric += metric 99 | 100 | avg_loss /= len(dataloader) 101 | avg_metric /= len(dataloader) 102 | 103 | if mode == 'val': 104 | logger.add_scalar('Loss/val', avg_loss, iteration) 105 | logger.add_scalar('Max_div/val', avg_metric, iteration) 106 | print('-' * 72) 107 | print('Val loss: {:.4f}, Val metric: {:.4f}'.format(avg_loss, avg_metric)) 108 | 109 | if mode == 'test': 110 | logger.add_scalar('test_loss', avg_loss, iteration) 111 | logger.add_scalar('test_metric', avg_metric, iteration) 112 | print('-' * 72) 113 | print('Test loss: {:.4f}, Test metric: {:.4f}'.format(avg_loss, avg_metric)) 114 | 115 | model.train() 116 | return avg_loss, avg_metric 117 | 118 | 119 | def parse_args(): 120 | parser = ArgumentParser() 121 | parser.add_argument('--dataset', type=str, default='MegaFlow2D', help='dataset name') 122 | parser.add_argument('--split_scheme', type=str, default='mixed', help='dataset mode') 123 | parser.add_argument('--transform', type=str, default='None', help='dataset transform') 124 | parser.add_argument('--dir', type=str, default='C:/research/data', help='dataset directory') 125 | parser.add_argument('--model', type=str, default='FlowMLConvolution', help='model type') 126 | parser.add_argument('--layers', type=int, default=3, help='number of layers') 127 | parser.add_argument('--num_filters', type=int, nargs='+', default=[8, 16, 8], help='number of filters') 128 | parser.add_argument('--loss', type=str, default='MSELoss', help='loss function') 129 | parser.add_argument('--metric', type=str, default='max_divergence', help='metric function') 130 | parser.add_argument('--epochs', type=int, default=500, help='number of epochs') 131 | parser.add_argument('--batch_size', type=int, default=64, help='batch size') 132 | parser.add_argument('--lr', type=float, default=1e-4, help='learning rate') 133 | parser.add_argument('--load_model', type=str, default=None, help='load model from checkpoint') 134 | 135 | args = parser.parse_args() 136 | return args 137 | -------------------------------------------------------------------------------- /megaflow/__init__.py: -------------------------------------------------------------------------------- 1 | """ 2 | The MegaFLow2D dataset 3 | """ 4 | __author__ = "Wenzhuo Xu, Christopher McComb, and Noelia Grande Gutierrez" 5 | __version__ = "0.6.0" 6 | __credits__ = "Carnegie Mellon University" -------------------------------------------------------------------------------- /megaflow/__pycache__/__init__.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cmudrc/MegaFlow2D/abfb4282689261a3e19059d1bba552608d34ab3d/megaflow/__pycache__/__init__.cpython-310.pyc -------------------------------------------------------------------------------- /megaflow/__pycache__/__init__.cpython-39.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cmudrc/MegaFlow2D/abfb4282689261a3e19059d1bba552608d34ab3d/megaflow/__pycache__/__init__.cpython-39.pyc -------------------------------------------------------------------------------- /megaflow/__pycache__/version.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cmudrc/MegaFlow2D/abfb4282689261a3e19059d1bba552608d34ab3d/megaflow/__pycache__/version.cpython-310.pyc -------------------------------------------------------------------------------- /megaflow/__pycache__/version.cpython-39.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cmudrc/MegaFlow2D/abfb4282689261a3e19059d1bba552608d34ab3d/megaflow/__pycache__/version.cpython-39.pyc -------------------------------------------------------------------------------- /megaflow/common/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cmudrc/MegaFlow2D/abfb4282689261a3e19059d1bba552608d34ab3d/megaflow/common/__init__.py -------------------------------------------------------------------------------- /megaflow/common/__pycache__/__init__.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cmudrc/MegaFlow2D/abfb4282689261a3e19059d1bba552608d34ab3d/megaflow/common/__pycache__/__init__.cpython-310.pyc -------------------------------------------------------------------------------- /megaflow/common/__pycache__/utils.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cmudrc/MegaFlow2D/abfb4282689261a3e19059d1bba552608d34ab3d/megaflow/common/__pycache__/utils.cpython-310.pyc -------------------------------------------------------------------------------- /megaflow/common/metrics.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import torch 3 | import torch.nn.functional as F 4 | 5 | 6 | def max_divergence(y_pred, y_true): 7 | """ 8 | Computes the maximum divergence between the predicted and true distributions 9 | Input: 10 | y_pred: tensor, predicted distribution 11 | y_true: tensor, true distribution 12 | Output: 13 | max_div: float, maximum divergence between the predicted and true distributions 14 | """ 15 | max_div = 1 - torch.max(torch.abs(y_pred - y_true)) / torch.max(y_true) 16 | return max_div 17 | 18 | 19 | def norm_divergence(y_pred, y_true): 20 | """ 21 | Computes the norm divergence between the predicted and true distributions 22 | Input: 23 | y_pred: tensor, predicted distribution 24 | y_true: tensor, true distribution 25 | Output: 26 | norm_div: float, norm divergence between the predicted and true distributions 27 | """ 28 | norm_div = 1 - (torch.norm(y_pred) - torch.norm(y_true)) / torch.norm(y_true) 29 | return norm_div -------------------------------------------------------------------------------- /megaflow/common/utils.py: -------------------------------------------------------------------------------- 1 | import os 2 | import time 3 | from datetime import datetime 4 | import torch 5 | from torch_geometric.data import Data 6 | # from model import * 7 | # from dataset import * 8 | import h5py 9 | import numpy as np 10 | from tqdm import tqdm 11 | from queue import Empty 12 | 13 | 14 | def get_cur_time(): 15 | return datetime.strftime(datetime.now(), '%Y-%m-%d_%H-%M') 16 | 17 | 18 | def process_file_list(data_list): 19 | raw_data_dir = data_list[0] 20 | save_dir = data_list[1] 21 | # has_save_dir = data_list[2] 22 | las_data_list = data_list[2] 23 | has_data_list = data_list[3] 24 | # has_original_data_list = data_list[5] 25 | index = data_list[4] 26 | shared_progress_list = data_list[5] 27 | processed_file_count = 0 28 | with h5py.File(os.path.join(save_dir, 'data_{}.h5'.format(index)), 'a') as f: 29 | # with h5py.File(os.path.join(has_save_dir, 'data_{}.h5'.format(index)), 'a') as has_h5_file: 30 | for las_data_name, has_data_name in zip(las_data_list, has_data_list): 31 | # process data name into format geometry_index_timestep 32 | str1, str2, str3, str4 = las_data_name.split('_') 33 | str4 = str4.split('.')[0] 34 | mesh_name = str1 + '_' + str2 35 | 36 | # check if the mesh type is in the h5 file, if not, create a group for it 37 | try: 38 | grp = f[mesh_name] 39 | grp_time = grp[str4] 40 | grp_las = grp_time['las'] 41 | grp_has = grp_time['has'] 42 | except KeyError: 43 | grp = f.require_group(mesh_name) 44 | grp_time = grp.require_group(str4) 45 | grp_las = grp_time.require_group('las') 46 | grp_has = grp_time.require_group('has') 47 | 48 | # process las graph 49 | las_data = np.load(os.path.join(raw_data_dir, 'las', las_data_name)) 50 | has_data = np.load(os.path.join(raw_data_dir, 'has', has_data_name)) 51 | 52 | str1, str2, str3, str4 = las_data_name.split('_') 53 | str4 = str4.split('.')[0] 54 | mesh_name = str1 + '_' + str2 + '.npz' 55 | mesh_data = np.load(os.path.join(raw_data_dir, 'mesh', 'las', mesh_name)) 56 | # node_data = np.zeros(3) 57 | # val_data = np.zeros(3) 58 | 59 | node_data_x = las_data['ux'] 60 | node_data_y = las_data['uy'] 61 | node_data_p = las_data['p'] 62 | 63 | val_data_x = has_data['ux'] 64 | val_data_y = has_data['uy'] 65 | val_data_p = has_data['p'] 66 | 67 | node_data_list = np.column_stack((node_data_x, node_data_y, node_data_p)) 68 | val_data_list = np.column_stack((val_data_x, val_data_y, val_data_p)) 69 | # for j in range(len(mesh_data['x'])): 70 | # node_data[0] = las_data['ux'][j] 71 | # node_data[1] = las_data['uy'][j] 72 | # node_data[2] = las_data['p'][j] 73 | 74 | # val_data[0] = has_data['ux'][j] 75 | # val_data[1] = has_data['uy'][j] 76 | # val_data[2] = has_data['p'][j] 77 | 78 | # if j == 0: 79 | # node_data_list = np.array([node_data]) 80 | # val_data_list = np.array([val_data]) 81 | # else: 82 | # node_data_list = np.append(node_data_list, np.array([node_data]), axis=0) 83 | # val_data_list = np.append(val_data_list, np.array([val_data]), axis=0) 84 | 85 | node_data_list = torch.tensor(node_data_list, dtype=torch.float) 86 | val_data_list = torch.tensor(val_data_list, dtype=torch.float) 87 | 88 | # process edge 89 | edge_index = np.array(mesh_data['edges']) 90 | edge_index = torch.tensor(edge_index, dtype=torch.long) 91 | edge_attr = np.array(mesh_data['edge_properties']) 92 | edge_attr = torch.tensor(edge_attr, dtype=torch.float) 93 | 94 | # node_pos = np.zeros(2) 95 | node_pos_x = mesh_data['x'] 96 | node_pos_y = mesh_data['y'] 97 | node_pos_list = np.column_stack((node_pos_x, node_pos_y)) 98 | # for j in range(len(mesh_data['x'])): 99 | # node_pos[0] = mesh_data['x'][j] 100 | # node_pos[1] = mesh_data['y'][j] 101 | 102 | # if j == 0: 103 | # node_pos_list = np.array([node_pos]) 104 | # else: 105 | # node_pos_list = np.append(node_pos_list, np.array([node_pos]), axis=0) 106 | 107 | node_pos_list = torch.tensor(node_pos_list, dtype=torch.float) 108 | 109 | # create a python dictionary to store the data 110 | # data_las = {'x': node_data_list, 'y': val_data_list, 'edge_index': edge_index.t().contiguous(), 'edge_attr': edge_attr, 'pos': node_pos_list} 111 | data_las = Data(x=node_data_list, y=val_data_list, edge_index=edge_index.t().contiguous(), edge_attr=edge_attr, pos=node_pos_list) 112 | # print("las data process done") 113 | # process has graph 114 | has_data_original = np.load(os.path.join(raw_data_dir, 'has_original', has_data_name)) 115 | mesh_data = np.load(os.path.join(raw_data_dir, 'mesh', 'has', mesh_name)) 116 | # node_data = np.zeros(3) 117 | node_data_x = has_data_original['ux'] 118 | node_data_y = has_data_original['uy'] 119 | node_data_p = has_data_original['p'] 120 | 121 | node_data_list = np.column_stack((node_data_x, node_data_y, node_data_p)) 122 | # for j in range(len(mesh_data['x'])): 123 | # node_data[0] = has_data_original['ux'][j] 124 | # node_data[1] = has_data_original['uy'][j] 125 | # node_data[2] = has_data_original['p'][j] 126 | 127 | # if j == 0: 128 | # node_data_list = np.array([node_data]) 129 | # else: 130 | # node_data_list = np.append(node_data_list, np.array([node_data]), axis=0) 131 | 132 | node_data_list = torch.tensor(node_data_list, dtype=torch.float) 133 | edge_index = np.array(mesh_data['edges']) 134 | edge_index = torch.tensor(edge_index, dtype=torch.long) 135 | edge_attr = np.array(mesh_data['edge_properties']) 136 | edge_attr = torch.tensor(edge_attr, dtype=torch.float) 137 | 138 | node_pos_x = mesh_data['x'] 139 | node_pos_y = mesh_data['y'] 140 | node_pos_list = np.column_stack((node_pos_x, node_pos_y)) 141 | # node_pos = np.zeros(2) 142 | # for j in range(len(mesh_data['x'])): 143 | # node_pos[0] = mesh_data['x'][j] 144 | # node_pos[1] = mesh_data['y'][j] 145 | 146 | # if j == 0: 147 | # node_pos_list = np.array([node_pos]) 148 | # else: 149 | # node_pos_list = np.append(node_pos_list, np.array([node_pos]), axis=0) 150 | 151 | node_pos_list = torch.tensor(node_pos_list, dtype=torch.float) 152 | 153 | # create a python dictionary to store the data 154 | # data_has = {'x': node_data_list, 'edge_index': edge_index.t().contiguous(), 'edge_attr': edge_attr, 'pos': node_pos_list} 155 | data_has = Data(x=node_data_list, edge_index=edge_index.t().contiguous(), edge_attr=edge_attr, pos=node_pos_list) 156 | # print("has data process done") 157 | 158 | # write las, has data to dset, with key being time step, and data being the data object 159 | for key, value in data_las: 160 | grp_las.create_dataset(key, data=value.numpy(), compression="gzip", compression_opts=9, chunks=True) 161 | for key, value in data_has: 162 | grp_has.create_dataset(key, data=value.numpy(), compression="gzip", compression_opts=9, chunks=True) 163 | # has_h5_file.flush() 164 | # print("data save done") 165 | # with progress.get_lock(): 166 | # progress.value += 1 167 | processed_file_count += 1 168 | # update progress every 10 files 169 | if processed_file_count % 10 == 0 and processed_file_count > 0: 170 | shared_progress_list[index] = processed_file_count 171 | 172 | # update progress 173 | shared_progress_list[index] = processed_file_count 174 | 175 | # print("process done") 176 | return 0 177 | 178 | 179 | def update_progress(shared_progress_list, total_data): 180 | with tqdm(total=total_data) as pbar: 181 | while np.sum(np.array(shared_progress_list)) < total_data - 1: 182 | current_len = np.sum(np.array(shared_progress_list)) 183 | pbar.update(current_len - pbar.n) 184 | time.sleep(1) 185 | 186 | 187 | def copy_group(src_group, dst_group): 188 | for key in src_group.keys(): 189 | src_item = src_group[key] 190 | if isinstance(src_item, h5py.Group): 191 | # Create a subgroup in the destination group if it doesn't exist 192 | if key not in dst_group: 193 | dst_group.create_group(key) 194 | dst_subgroup = dst_group[key] 195 | copy_group(src_item, dst_subgroup) 196 | else: 197 | src_group.copy(key, dst_group) 198 | 199 | 200 | def merge_hdf5_files(input_files, output_file): 201 | with h5py.File(output_file, 'a') as output_h5: 202 | for input_file in input_files: 203 | with h5py.File(input_file, 'r') as input_h5: 204 | copy_group(input_h5, output_h5) 205 | # remove the input file once it's copied 206 | os.remove(input_file) 207 | -------------------------------------------------------------------------------- /megaflow/dataset/MegaFlow2D.py: -------------------------------------------------------------------------------- 1 | import os 2 | from typing import Union 3 | from collections.abc import Sequence 4 | import multiprocessing as mp 5 | from threading import Thread 6 | import h5py 7 | from zipfile import ZipFile 8 | 9 | import torch 10 | from torch import Tensor 11 | from torch_geometric.data import Data, Dataset, download_url, extract_zip 12 | import numpy as np 13 | from megaflow.common.utils import process_file_list, update_progress, copy_group, merge_hdf5_files 14 | 15 | IndexType = Union[slice, Tensor, np.ndarray, Sequence] 16 | 17 | class MegaFlow2D(Dataset): 18 | """ 19 | The MegaFlow2D dataset is a collection of 2D flow simulations of different geometries. 20 | Current supported geometries include: circle, ellipse, nozzle. 21 | 22 | Input: 23 | root: root directory of the dataset 24 | transform: transform to be applied on the data 25 | pre_transform: transform to be applied on the data during preprocessing, e.g. splitting into individual graphs 26 | or dividing in temporal sequence 27 | split_scheme: 'full', 'circle', 'ellipse', 'mixed' 28 | split_ratio: defult set as [0.5, 0.5] for circle and ellipse respectively 29 | """ 30 | def __init__(self, root, download, transform, pre_transform, split_scheme='mixed', split_ratio=None): 31 | self._indices = None 32 | self.root = root 33 | # self.split = split 34 | self.transforms = transform 35 | self.pre_transform = pre_transform 36 | if download: 37 | self.download() 38 | # give a warning that the package does not check the integrity of the downloaded data 39 | Warning('The package does not check the integrity of the downloaded data. The downloading operation is always executed if the flag download is True. Please disable the download flag if you have already downloaded the data') 40 | self.data_list = self.get_data_list 41 | # self.processed_las_data_dir = os.path.join(self.root, 'processed', 'las') 42 | # self.processed_has_data_dir = os.path.join(self.root, 'processed', 'has') 43 | 44 | if not self.is_processed: 45 | self.process() 46 | # input_file = [os.path.join(self.processed_dir, 'data_{}.h5'.format(i)) for i in range(24)] 47 | # merge_hdf5_files(input_files=input_file, output_file=os.path.join(self.processed_dir, 'data.h5')) 48 | 49 | self.circle_data_list = [name for name in self.data_list if name.split('_')[0] == 'circle'] 50 | self.ellipse_data_list = [name for name in self.data_list if name.split('_')[0] == 'ellipse'] 51 | 52 | # self.circle_low_res_data_list = [name for name in self.data_list if name.split('_')[0] == 'las'] 53 | # self.high_res_data_list = [name for name in self.data_list if name.split('_')[0] == 'has'] 54 | 55 | # self.las_data_list = os.listdir(os.path.join(self.raw_dir, 'las')) 56 | # self.has_data_list = os.listdir(os.path.join(self.raw_dir, 'has')) 57 | # self.mesh_data_list = os.listdir(os.path.join(self.raw_dir, 'mesh')) 58 | self.split_scheme = split_scheme 59 | if self.split_scheme == 'full': 60 | self.data_list = self.data_list 61 | elif self.split_scheme == 'circle': 62 | self.data_list = self.circle_data_list 63 | elif self.split_scheme == 'ellipse': 64 | self.data_list = self.ellipse_data_list 65 | elif self.split_scheme == 'mixed': 66 | # split the dataset according to the split_ratio 67 | if split_ratio is None: 68 | split_ratio = [0.5, 0.5] 69 | self.data_list = self.circle_data_list[:int(len(self.circle_data_list) * split_ratio[0])] + \ 70 | self.ellipse_data_list[:int(len(self.ellipse_data_list) * split_ratio[1])] 71 | 72 | @property 73 | def raw_file_names(self): 74 | return os.listdir(os.path.join(self.raw_dir, 'las')) 75 | 76 | @property 77 | def processed_file_names(self): 78 | if os.path.exists(self.processed_dir): 79 | return os.listdir(self.processed_dir) 80 | else: 81 | return [] 82 | 83 | @property 84 | def is_processed(self): 85 | if os.path.exists(self.processed_dir): 86 | if len(self.processed_file_names) == 0: 87 | return False 88 | else: 89 | return True 90 | else: 91 | return False 92 | 93 | @property 94 | def get_data_list(self): 95 | # process raw file names into geometry_index_timestep format, save the list in data_list 96 | raw_file_names = self.raw_file_names 97 | _data_list = [] 98 | for file_name in raw_file_names: 99 | str1, str2, str3, str4 = file_name.split('_') 100 | str4 = str4.split('.')[0] 101 | _data_list.append(str1 + '_' + str2 + '_' + str4) 102 | 103 | return _data_list 104 | 105 | def len(self): 106 | if not self.is_processed: 107 | return 0 108 | else: 109 | return len(self.data_list) 110 | 111 | def _extract_zip(self, path, folder): 112 | zips = ["data.zip.00{}".format(i) for i in range(1, 6)] 113 | 114 | with open(os.path.join(path, "data.zip"), "ab") as f: 115 | for zipName in zips: 116 | with open(os.path.join(path, zipName), "rb") as z: 117 | f.write(z.read()) 118 | 119 | z.close() 120 | os.remove(os.path.join(path, zipName)) 121 | 122 | with ZipFile(os.path.join(path, "data.zip"), "r") as zipObj: 123 | zipObj.extractall(folder) 124 | os.remove(os.path.join(path, "data.zip")) 125 | 126 | def download(self): 127 | for i in range(1, 6): 128 | url = 'https://huggingface.co/datasets/cmudrc/MegaFlow2D/resolve/main/data.zip.00{}'.format(i) 129 | path = download_url(url, self.root) 130 | self._extract_zip(self.root, self.root) 131 | 132 | def process(self): 133 | # Read mesh solution into graph structure 134 | os.makedirs(self.processed_dir, exist_ok=True) 135 | # os.makedirs(self.processed_has_data_dir, exist_ok=True) 136 | las_data_list = os.listdir(os.path.join(self.raw_dir, 'las')) 137 | has_data_list = os.listdir(os.path.join(self.raw_dir, 'has')) 138 | # has_original_data_list = os.listdir(os.path.join(self.raw_dir, 'has_original')) 139 | data_len = len(las_data_list) 140 | # mesh_data_list = os.listdir(os.path.join(self.raw_dir, 'mesh')) 141 | # split the list according to the number of processors and process the data in parallel 142 | num_proc = mp.cpu_count() 143 | las_data_list = np.array_split(las_data_list, num_proc) 144 | has_data_list = np.array_split(has_data_list, num_proc) 145 | # has_original_data_list = np.array_split(has_original_data_list, num_proc) 146 | 147 | # organize the data list for each process and combine into pool.map input 148 | data_list = [] 149 | # progress = mp.Value('i', 0) 150 | manager = mp.Manager() 151 | shared_progress_list = manager.list([0] * num_proc) 152 | for i in range(num_proc): 153 | data_list.append([self.raw_dir, self.processed_dir, las_data_list[i], has_data_list[i], i, shared_progress_list]) 154 | 155 | # start the progress bar 156 | progress_thread = Thread(target=update_progress, args=(shared_progress_list, data_len)) 157 | progress_thread.start() 158 | 159 | # start the processes 160 | with mp.Pool(num_proc) as pool: 161 | results = [pool.apply_async(process_file_list, args=([data_list[i]])) for i in range(num_proc)] 162 | 163 | for result in results: 164 | result.get() 165 | 166 | # stop the progress bar 167 | progress_thread.join() 168 | 169 | # merge the data 170 | input_file = [os.path.join(self.processed_dir, 'data_{}.h5'.format(i)) for i in range(num_proc)] 171 | # input_file_has = [os.path.join(self.processed_has_data_dir, 'data_{}.h5'.format(i)) for i in range(num_proc)] 172 | output_file = os.path.join(self.processed_dir, 'data.h5') 173 | # output_file_has = os.path.join(self.processed_has_data_dir, 'data.h5') 174 | merge_hdf5_files(input_files=input_file, output_file=output_file) 175 | # self.merge_hdf5_files(input_file_has, output_file_has) 176 | # redo data list 177 | 178 | def transform(self, data): 179 | (data_l, data_h) = data 180 | if self.transforms == 'error_estimation': 181 | data_l.y = data_l.y - data_l.x 182 | if self.transforms == 'normalize': 183 | # normalize the data layer-wise via gaussian distribution 184 | data_l.x = (data_l.x - data_l.x.mean(dim=0)) / (data_l.x.std(dim=0) + 1e-8) 185 | data_h.x = (data_h.y - data_l.x.mean(dim=0)) / (data_l.x.std(dim=0) + 1e-8) 186 | return (data_l, data_h) 187 | 188 | def get(self, idx): 189 | data_name = self.data_list[idx] 190 | str1, str2, str3 = data_name.split('_') 191 | mesh_name = str1 + '_' + str2 192 | with h5py.File(os.path.join(self.processed_dir, 'data.h5'), 'r') as f: 193 | grp = f[mesh_name] 194 | grp_time = grp[str3] 195 | grp_las = grp_time['las'] 196 | grp_has = grp_time['has'] 197 | las_data_dict = {key: torch.tensor(grp_las[key][:]) for key in grp_las.keys()} 198 | has_data_dict = {key: torch.tensor(grp_has[key][:]) for key in grp_has.keys()} 199 | data_l = Data.from_dict(las_data_dict) 200 | data_h = Data.from_dict(has_data_dict) 201 | # if self.transforms is not None: 202 | # data_l = self.transform(data_l) 203 | 204 | return data_l, data_h 205 | 206 | def __getitem__( 207 | self, 208 | idx: Union[int, np.integer, IndexType], 209 | ) -> Union['Dataset', Data]: 210 | r"""In case :obj:`idx` is of type integer, will return the data object 211 | at index :obj:`idx` (and transforms it in case :obj:`transform` is 212 | present). 213 | In case :obj:`idx` is a slicing object, *e.g.*, :obj:`[2:5]`, a list, a 214 | tuple, or a :obj:`torch.Tensor` or :obj:`np.ndarray` of type long or 215 | bool, will return a subset of the dataset at the specified indices.""" 216 | if (isinstance(idx, (int, np.integer)) 217 | or (isinstance(idx, Tensor) and idx.dim() == 0) 218 | or (isinstance(idx, np.ndarray) and np.isscalar(idx))): 219 | 220 | data_l, data_h = self.get(self.indices()[idx]) 221 | data_l = data_l if self.transform is None else self.transform(data_l) 222 | return (data_l, data_h) 223 | 224 | else: 225 | return self.index_select(idx) 226 | 227 | def get_eval(self, idx): 228 | data_name = self.data_list[idx] 229 | str1, str2, str3 = data_name.split('_') 230 | mesh_name = str1 + '_' + str2 231 | with h5py.File(os.path.join(self.processed_dir, 'data.h5'), 'r') as f: 232 | grp = f[mesh_name] 233 | grp_time = grp[str3] 234 | grp_las = grp_time['las'] 235 | grp_has = grp_time['has'] 236 | las_data_dict = {key: torch.tensor(grp_las[key][:]) for key in grp_las.keys()} 237 | has_data_dict = {key: torch.tensor(grp_has[key][:]) for key in grp_has.keys()} 238 | data_l = Data.from_dict(las_data_dict) 239 | data_h = Data.from_dict(has_data_dict) 240 | # if self.transforms is not None: 241 | # data_l = self.transform(data_l) 242 | 243 | return data_name, (data_l, data_h) 244 | 245 | 246 | class MegaFlow2DSubset(MegaFlow2D): 247 | """ 248 | This subset splits the entire dataset into 40 subsets, which is initialized via indices. 249 | """ 250 | def __init__(self, root, indices, transform=None): 251 | raise NotImplementedError 252 | -------------------------------------------------------------------------------- /megaflow/dataset/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cmudrc/MegaFlow2D/abfb4282689261a3e19059d1bba552608d34ab3d/megaflow/dataset/__init__.py -------------------------------------------------------------------------------- /megaflow/dataset/__pycache__/MegaFlow2D.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cmudrc/MegaFlow2D/abfb4282689261a3e19059d1bba552608d34ab3d/megaflow/dataset/__pycache__/MegaFlow2D.cpython-310.pyc -------------------------------------------------------------------------------- /megaflow/dataset/__pycache__/MegaFlow2D.cpython-39.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cmudrc/MegaFlow2D/abfb4282689261a3e19059d1bba552608d34ab3d/megaflow/dataset/__pycache__/MegaFlow2D.cpython-39.pyc -------------------------------------------------------------------------------- /megaflow/dataset/__pycache__/__init__.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cmudrc/MegaFlow2D/abfb4282689261a3e19059d1bba552608d34ab3d/megaflow/dataset/__pycache__/__init__.cpython-310.pyc -------------------------------------------------------------------------------- /megaflow/dataset/__pycache__/__init__.cpython-39.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cmudrc/MegaFlow2D/abfb4282689261a3e19059d1bba552608d34ab3d/megaflow/dataset/__pycache__/__init__.cpython-39.pyc -------------------------------------------------------------------------------- /megaflow/version.py: -------------------------------------------------------------------------------- 1 | import os 2 | import logging 3 | from threading import Thread 4 | 5 | __version__ = '0.6.0' 6 | 7 | try: 8 | os.environ['OUTDATED_IGNORE'] = '1' 9 | from outdated import check_outdated # noqa 10 | except ImportError: 11 | check_outdated = None 12 | 13 | def check(): 14 | try: 15 | is_outdated, latest = check_outdated('MegaFlow2D', __version__) 16 | if is_outdated: 17 | logging.warning( 18 | f'The MegaFlow2D package is out of date. Your version is ' 19 | f'{__version__}, while the latest version is {latest}.') 20 | except Exception: 21 | pass 22 | 23 | if check_outdated is not None: 24 | thread = Thread(target=check) 25 | thread.start() -------------------------------------------------------------------------------- /pyproject.toml: -------------------------------------------------------------------------------- 1 | [build-system] 2 | requires = ["setuptools"] 3 | build-backend = "setuptools.build_meta" -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | import setuptools 2 | import os 3 | import sys 4 | 5 | here = os.path.abspath(os.path.dirname(__file__)) 6 | sys.path.insert(0, os.path.join(here, 'megaflow')) 7 | from version import __version__ 8 | 9 | print(f'Version {__version__}') 10 | 11 | with open("README.md", "r", encoding="utf-8") as fh: 12 | long_description = fh.read() 13 | 14 | setuptools.setup( 15 | name="MegaFlow2D", 16 | version=__version__, 17 | author="Wenzhuo Xu, Noelia Grande Gutierrez and Christopher McComb", 18 | author_email="wxu2@andrew.cmu.edu", 19 | url="https://github.com/cmudrc/MegaFlow2D", 20 | description="MegaFlow2D: A Large-Scale Dataset for 2D Flow Simulation", 21 | long_description=long_description, 22 | long_description_content_type="text/markdown", 23 | install_requires = [ 24 | 'h5py>=2.10.0', 25 | 'numpy>=1.19.1', 26 | 'ogb>=1.2.6', 27 | 'outdated>=0.2.0', 28 | 'pandas>=1.1.0', 29 | 'pillow>=7.2.0', 30 | 'pytz>=2020.4', 31 | 'scikit-learn>=0.20.0', 32 | 'scipy>=1.5.4', 33 | 'torch>=1.7.0', 34 | 'torchvision>=0.8.2', 35 | 'tqdm>=4.53.0', 36 | ], 37 | license='MIT', 38 | packages=setuptools.find_packages(exclude=['examples', 'examples.model']), 39 | classifiers=[ 40 | 'Topic :: Scientific/Engineering :: Artificial Intelligence', 41 | 'Intended Audience :: Science/Research', 42 | "Programming Language :: Python :: 3", 43 | "License :: OSI Approved :: MIT License", 44 | ], 45 | python_requires='>=3.6', 46 | ) 47 | -------------------------------------------------------------------------------- /test.py: -------------------------------------------------------------------------------- 1 | from megaflow.dataset.MegaFlow2D import MegaFlow2D 2 | 3 | if __name__ == '__main__': 4 | # Create a dataset object 5 | dataset = MegaFlow2D(root='D:/Work/data', download=True, transform='normalize', pre_transform=None) -------------------------------------------------------------------------------- /xu2023.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cmudrc/MegaFlow2D/abfb4282689261a3e19059d1bba552608d34ab3d/xu2023.pdf --------------------------------------------------------------------------------