├── .gitignore ├── LICENSE.txt ├── README.md ├── profile ├── adaptdl │ ├── cifar_ddp.py │ ├── dcgan_exmp.py │ ├── extract_data.py │ ├── pollux_cifar.py │ ├── pollux_cifar_multigpu.py │ ├── pollux_dcgan.py │ ├── pollux_mnist.py │ ├── pollux_pointnet.py │ ├── pollux_pointnet_seg.py │ ├── result │ │ ├── EfficientNetB0 │ │ │ ├── false.csv │ │ │ └── true.csv │ │ └── plot.ipynb │ └── run.sh ├── co_collect.py ├── main_co.py ├── main_co3.py ├── main_single.py ├── requirements.txt ├── single_collect.py └── smi.py ├── simulation ├── analyzer │ ├── analyzer.py │ └── single_data.csv ├── cluster.py ├── data │ ├── Venus │ │ ├── cluster_full_log.csv │ │ ├── cluster_throughput.csv │ │ └── vc_config.csv │ ├── colocate_info.csv │ ├── dict2csv.py │ ├── log_process.py │ ├── prepare_data.sh │ ├── trace_parser.py │ └── vc_dict_generator.py ├── estimator │ ├── __init__.py │ ├── ebm │ │ └── Venus_Sept_ebm_weekly_updated.csv │ ├── estimator.py │ ├── estimator_lucid.ipynb │ ├── lgb │ │ └── Venus_Sept_lgb.csv │ └── utils.py ├── job.py ├── plot │ └── result_plot.ipynb ├── policy │ ├── __init__.py │ ├── fifo.py │ ├── lucid.py │ ├── placer │ │ ├── __init__.py │ │ ├── consolidate.py │ │ ├── consolidateFirst.py │ │ ├── consolidateWithShare.py │ │ └── random.py │ ├── policy.py │ ├── qssf.py │ ├── sjf.py │ ├── srtf.py │ └── tiresias.py ├── predictor │ ├── Venus_throughput_pred.csv │ └── predictor.ipynb ├── profiler │ ├── __init__.py │ ├── lgf.py │ └── profiler.py ├── requirements.txt ├── run.sh ├── simulator.py ├── updater.py └── utils.py └── workloads ├── bert └── profile_bert.py ├── cifar ├── models │ ├── __init__.py │ ├── alexnet.py │ ├── densenet.py │ ├── dpn.py │ ├── efficientnet.py │ ├── googlenet.py │ ├── lenet.py │ ├── mobilenet.py │ ├── mobilenetv2.py │ ├── pnasnet.py │ ├── preact_resnet.py │ ├── resnet.py │ ├── resnext.py │ ├── senet.py │ ├── shufflenet.py │ ├── shufflenetv2.py │ └── vgg.py ├── profile_cifar.py └── run.sh ├── dcgan ├── download.py └── profile_dcgan.py ├── deepspeech2 ├── data │ ├── __init__.py │ ├── an4.py │ ├── cmu-arctic-manifests.tar.gz │ ├── common_voice.py │ ├── data_loader.py │ ├── librispeech.py │ ├── merge_manifests.py │ ├── sparse_image_warp.py │ ├── spec_augment.py │ ├── ted.py │ ├── utils.py │ └── voxforge.py ├── decoder.py ├── labels.json ├── models.py └── profile_deepspeech.py ├── imagenet ├── profile_imagenet.py ├── profile_imagenet_ddp.py └── requirements.txt ├── lstm ├── data.py ├── models.py └── profile_lstm.py ├── ncf ├── config.py ├── data_utils.py ├── evaluate.py ├── models.py └── profile_ncf.py ├── pointnet ├── dataset.py ├── num_seg_classes.txt ├── pointnet.py └── profile_pointnet.py ├── rl ├── profile_rl_lunarlander.py └── profile_rl_walker.py ├── settings.py └── translation ├── dataset.py ├── multi-bleu.perl ├── nonbreaking_prefix.de ├── nonbreaking_prefix.en ├── pollux_transformer.py ├── preprocess.py ├── profile_transformer.py ├── tokenizer.perl └── transformer ├── Beam.py ├── Constants.py ├── Layers.py ├── Models.py ├── Modules.py ├── Optim.py ├── SubLayers.py ├── Translator.py └── __init__.py /.gitignore: -------------------------------------------------------------------------------- 1 | # Python egg metadata, regenerated from source files by setuptools. 2 | *.egg-info 3 | .eggs/ 4 | 5 | # PyPI distribution artifacts. 6 | build/ 7 | dist/ 8 | 9 | # Byte-compiled 10 | _pycache__/ 11 | .cache/ 12 | 13 | # Compiled python modules. 14 | *.pyc 15 | 16 | # PyCharm/vscode 17 | .idea 18 | .vscode 19 | 20 | # jupyter checkpoints 21 | **/.ipynb_checkpoints 22 | 23 | # Other 24 | *.DS_Store -------------------------------------------------------------------------------- /LICENSE.txt: -------------------------------------------------------------------------------- 1 | S-Lab License 1.0 2 | 3 | Copyright 2022 S-Lab 4 | 5 | Redistribution and use for non-commercial purpose in source and binary forms, with or without modification, are permitted provided that the following conditions are met: 6 | 7 | 1. Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. 8 | 9 | 2. Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. 10 | 11 | 3. Neither the name of the copyright holder nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. 12 | 13 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 14 | 15 | 4. In the event that redistribution and/or use for commercial purpose in source or binary forms, with or without modification is required, please contact the contributor(s) of the work -------------------------------------------------------------------------------- /profile/adaptdl/extract_data.py: -------------------------------------------------------------------------------- 1 | from tensorboard.backend.event_processing import event_accumulator 2 | import argparse 3 | import pandas as pd 4 | from tqdm import tqdm 5 | 6 | 7 | def main(): 8 | # load log data 9 | parser = argparse.ArgumentParser(description='Export tensorboard data') 10 | parser.add_argument('--in-path', type=str, default='./result/VGG/True/200', help='Tensorboard event files or a single tensorboard file location') 11 | parser.add_argument('--ex-path', type=str, default='./result/VGG/true.csv', help='location to save the exported data') 12 | 13 | args = parser.parse_args() 14 | event_data = event_accumulator.EventAccumulator(args.in_path) # a python interface for loading Event data 15 | event_data.Reload() # synchronously loads all of the data written so far 16 | # print(event_data.Tags()) # print all tags 17 | keys = event_data.scalars.Keys() # get all tags,save in a list 18 | # print(keys) 19 | df = pd.DataFrame(columns=keys[7:]) # my first column is training loss per iteration, so I abandon it 20 | for key in tqdm(keys): 21 | # print(key) 22 | if key == 'Loss/Train' or key == 'Accuracy/Train' or key == 'Loss/Valid' or key == 'Accuracy/Valid': 23 | df[key] = pd.DataFrame(event_data.Scalars(key)).value 24 | 25 | df.to_csv(args.ex_path) 26 | 27 | print("Tensorboard data exported successfully") 28 | 29 | 30 | if __name__ == '__main__': 31 | main() -------------------------------------------------------------------------------- /profile/adaptdl/run.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | worker_num=2 3 | model=$2 4 | batch_size=$3 5 | 6 | last_rank=`expr $worker_num - 1` 7 | 8 | # nodes=$(scontrol show hostnames $SLURM_JOB_NODELIST) # Getting the node names 9 | nodes='127.0.0.1' 10 | nodes_array=( $nodes ) 11 | node1=${nodes_array[0]} 12 | 13 | #export ADAPTDL_CHECKPOINT_PATH=cifar-checkpoint 14 | # export ADAPTDL_SHARE_PATH=data 15 | # export ADAPTDL_JOB_ID=$SLURM_JOB_ID 16 | export ADAPTDL_MASTER_ADDR=$node1 17 | export ADAPTDL_MASTER_PORT=47020 18 | export ADAPTDL_NUM_REPLICAS=$worker_num 19 | 20 | 21 | ADAPTDL_REPLICA_RANK=0 python3 -u pollux_mnist.py & 22 | ADAPTDL_REPLICA_RANK=1 python3 -u pollux_mnist.py 23 | 24 | # # batch_size=128 25 | # for (( i=0; i < $worker_num; i++ )) 26 | # do 27 | # # node=${nodes_array[$i]} 28 | # node=${nodes_array[0]} 29 | # if [[ $i -lt `expr $worker_num-1` ]] 30 | # then 31 | # ADAPTDL_REPLICA_RANK=$i python3 -u pollux_cifar.py & 32 | # else 33 | # ADAPTDL_REPLICA_RANK=$i python3 -u pollux_cifar.py 34 | # fi 35 | # done -------------------------------------------------------------------------------- /profile/co_collect.py: -------------------------------------------------------------------------------- 1 | import sys 2 | import pandas as pd 3 | import time 4 | from multiprocessing import Process, Manager, Value 5 | 6 | from smi import smi_getter 7 | 8 | 9 | def collect(fun1, m_list1, dataset1, bs_list1, fun2, m_list2, dataset2, bs_list2, gpu_id): 10 | metric_list = [] 11 | if dataset1 == 'LunarLander' or dataset1 == 'BipedalWalker' or dataset1 == 'Multi30k': 12 | mp_list1 = [0] 13 | else: 14 | mp_list1 = [0, 1] 15 | 16 | if dataset2 == 'LunarLander' or dataset2 == 'BipedalWalker' or dataset1 == 'Multi30k': 17 | mp_list2 = [0] 18 | else: 19 | mp_list2 = [0, 1] 20 | for model_name1 in m_list1: 21 | for model_name2 in m_list2: 22 | for batch_size1 in bs_list1: 23 | for batch_size2 in bs_list2: 24 | if model_name1 == 'resnet50': 25 | batch_size1 = 32 26 | if model_name2 == 'resnet50': 27 | batch_size2 = 32 28 | for mp1 in mp_list1: 29 | for mp2 in mp_list2: 30 | # Check whether there are duplicated pairs 31 | df_check = pd.DataFrame(metric_list, columns=['model1', 'dataset1', 'gpu_num1', 'batchsize1', 'amp1', 'speed1', 'model2', 'dataset2', 'gpu_num2', 'batchsize2', 'amp2', 'speed2', 'gpu_util', 'gmem_util', 'gmem']) 32 | # print(df_check[['model1', 'batchsize1', 'amp1', 'model2', 'batchsize2', 'amp2']]) 33 | 34 | info = df_check.query(" model1 == @model_name2 and model2 == @model_name1 and batchsize1 == @batch_size2 and batchsize2 == @batch_size1 and dataset1 == @dataset2 and dataset2 == @dataset1 and amp1 == @mp2 and amp2 == @mp1") 35 | if not info.empty: 36 | continue 37 | info2 = df_check.query(" model1 == @model_name1 and model2 == @model_name2 and batchsize1 == @batch_size1 and batchsize2 == @batch_size2 and dataset1 == @dataset1 and dataset2 == @dataset2 and amp1 == @mp1 and amp2 == @mp2") 38 | if not info2.empty: 39 | continue 40 | 41 | # collect co-locate jobs gpu info 42 | print('co-locate:') 43 | with Manager() as manager: 44 | smi_list = manager.list() 45 | speed_list1 = manager.list() 46 | speed_list2 = manager.list() 47 | signal1 = Value('i', 0) 48 | signal2 = Value('i', 0) 49 | 50 | p1 = Process(target=fun1, args=(model_name1, batch_size1, mp1, gpu_id, speed_list1, signal1, )) 51 | p2 = Process(target=fun2, args=(model_name2, batch_size2, mp2, gpu_id, speed_list2, signal2, )) 52 | p3 = Process(target=smi_getter, args=(sys.argv[1:], smi_list, gpu_id, )) 53 | 54 | p1.start() 55 | p2.start() 56 | while True: 57 | if signal1.value == 1 and signal2.value == 1: 58 | p3.start() 59 | break 60 | 61 | p1.join() 62 | p2.join() 63 | p3.terminate() 64 | 65 | speed_list1 = list(speed_list1) 66 | speed_list2 = list(speed_list2) 67 | smi_df = pd.DataFrame(list(smi_list)) 68 | smi_df.drop([0]) 69 | smi_df.drop([len(smi_df)-1], inplace=True) 70 | 71 | d1 = {'model1': model_name1, 'dataset1': dataset1, 'gpu_num1': len(gpu_id), 'batchsize1': batch_size1, 'amp1': mp1} 72 | d1['speed1'] = round(speed_list1[0], 3) 73 | d2 = {'model2': model_name2, 'dataset2': dataset2, 'gpu_num2': len(gpu_id), 'batchsize2': batch_size2, 'amp2': mp2} 74 | d2['speed2'] = round(speed_list2[0], 3) 75 | metric_dict = {} 76 | metric_dict.update(d1) 77 | metric_dict.update(d2) 78 | # Process gpu info 79 | smi_df['gpuUtil'] = pd.to_numeric(smi_df['gpuUtil']) 80 | metric_dict['gpu_util'] = round(pd.to_numeric(smi_df['gpuUtil']).mean(), 3) 81 | metric_dict['gmem_util'] = round(pd.to_numeric(smi_df['gpuMemUtil']).mean(), 3) 82 | smi_df['gpuMem'] = smi_df['gpuMem'].apply(lambda x: x[:-4]).astype('int64') 83 | metric_dict['gmem'] = round(smi_df['gpuMem'].max(), 3) 84 | 85 | # print(metric_dict) 86 | metric_list.append(metric_dict) 87 | time.sleep(2) 88 | 89 | return metric_list -------------------------------------------------------------------------------- /profile/main_co3.py: -------------------------------------------------------------------------------- 1 | #NOTE: CUDA_LAUNCH_BLOCKING=1 python main_co.py will slow down the speed 2 | from __future__ import print_function 3 | 4 | import torch.backends.cudnn as cudnn 5 | import torch.nn.functional as F 6 | import torch.optim as optim 7 | import torch 8 | import numpy as np 9 | import os 10 | import pandas as pd 11 | import time 12 | import sys 13 | sys.path.append('./workloads/') 14 | 15 | from multiprocessing import Process, Manager, Value 16 | from workloads.lstm.profile_lstm import benchmark_lstm 17 | from workloads.imagenet.profile_imagenet import benchmark_imagenet 18 | from workloads.cifar.profile_cifar import benchmark_cifar 19 | from workloads.pointnet.profile_pointnet import benchmark_pointnet 20 | from workloads.dcgan.profile_dcgan import benchmark_dcgan 21 | from workloads.rl.profile_rl_lunarlander import benchmark_rl 22 | from workloads.rl.profile_rl_walker import benchmark_rl2 23 | from workloads.bert.profile_bert import benchmark_bert 24 | from workloads.ncf.profile_ncf import benchmark_ncf 25 | from smi import smi_getter 26 | from co_collect import collect 27 | 28 | # model_list_imagenet = ['resnet18', 'resnet50', 'mobilenet_v3_small', 'efficientnet_b0', 'shufflenet_v2_x0_5', 'vgg11', 'alexnet'] 29 | # model_list_cifar = ['AlexNet', 'EfficientNetB0', 'MobileNetV2', 'ResNet18', 'ResNet50', 'ShuffleNetV2', 'VGG'] 30 | metric_list = [] 31 | model_name1 = 'ResNet18' 32 | model_name2 = 'ResNet18' 33 | model_name3 = 'ResNet18' 34 | batch_size1 = 32 35 | batch_size2 = 32 36 | batch_size3 = 32 37 | 38 | gpu_id = [0] 39 | 40 | start_record = time.time() 41 | with Manager() as manager: 42 | smi_list = manager.list() 43 | speed_list1 = manager.list() 44 | speed_list2 = manager.list() 45 | speed_list3 = manager.list() 46 | signal1 = Value('i', 0) 47 | signal2 = Value('i', 0) 48 | signal3 = Value('i', 0) 49 | 50 | p1 = Process(target=benchmark_cifar, args=(model_name1, batch_size1, 0, gpu_id, speed_list1, signal1, )) 51 | p2 = Process(target=benchmark_cifar, args=(model_name2, batch_size2, 0, gpu_id, speed_list2, signal2, )) 52 | p3 = Process(target=benchmark_cifar, args=(model_name3, batch_size3, 0, gpu_id, speed_list3, signal3, )) 53 | p4 = Process(target=smi_getter, args=(sys.argv[1:], smi_list, gpu_id, )) 54 | 55 | p1.start() 56 | p2.start() 57 | p3.start() 58 | 59 | while True: 60 | if signal1.value == 1 and signal2.value == 1 and signal3.value == 1: 61 | # if signal1.value == 1: 62 | p4.start() 63 | break 64 | 65 | p1.join() 66 | p2.join() 67 | p3.join() 68 | p4.terminate() 69 | 70 | smi_df = pd.DataFrame(list(smi_list)) 71 | 72 | print(f'1: {list(speed_list1)}, 2: {list(speed_list2)}, 3:{list(speed_list3)}') 73 | # print(f'1: {list(speed_list1)}') 74 | print(smi_df) 75 | 76 | # mlist_imagenet = ['mobilenet_v3_small'] 77 | # print('imagenet + imagenet') 78 | # metric_list1 = collect(benchmark_imagenet, mlist_imagenet, 'ImageNet', bs_list, benchmark_imagenet, mlist_imagenet, 'ImageNet', bs_list, gpu_id) 79 | # df = pd.DataFrame(metric_list1) 80 | # df.to_csv('./1.csv') 81 | 82 | # smi_list = [] 83 | # smi_getter(sys.argv[1:], smi_list, gpu_id) 84 | 85 | 86 | end_record = time.time() 87 | print(f'time usage: {end_record - start_record}') -------------------------------------------------------------------------------- /profile/main_single.py: -------------------------------------------------------------------------------- 1 | #NOTE: CUDA_LAUNCH_BLOCKING=1 python main.py 2 | from __future__ import print_function 3 | import os 4 | import pandas as pd 5 | import time 6 | 7 | import sys 8 | sys.path.append('./workloads/') 9 | 10 | from workloads.lstm.profile_lstm import benchmark_lstm 11 | from workloads.imagenet.profile_imagenet import benchmark_imagenet 12 | from workloads.cifar.profile_cifar import benchmark_cifar 13 | from workloads.pointnet.profile_pointnet import benchmark_pointnet 14 | from workloads.dcgan.profile_dcgan import benchmark_dcgan 15 | from workloads.rl.profile_rl_lunarlander import benchmark_rl 16 | from workloads.rl.profile_rl_walker import benchmark_rl2 17 | from workloads.bert.profile_bert import benchmark_bert 18 | from workloads.ncf.profile_ncf import benchmark_ncf 19 | from workloads.translation.profile_transformer import benchmark_transformer 20 | 21 | from single_collect import s_collect 22 | 23 | model_list_imagenet = ['resnet50', 'mobilenet_v3_small'] 24 | # model_list_cifar = ['ResNet18', 'MobileNetV2', 'EfficientNetB0', 'VGG'] 25 | model_list_cifar = ['ResNet18'] 26 | bs_list = [64] 27 | gpu_id = [0] 28 | metric_list = [] 29 | mp_list = [0, 1] 30 | 31 | os.makedirs('result/', exist_ok=True) 32 | 33 | # # Single: imagenet metric 34 | # print('Classification: imagenet') 35 | # dataset = 'imagenet' 36 | # for model_name in model_list_imagenet: 37 | # for batch_size in bs_list: 38 | # for mp in mp_list: 39 | # # collect single job gpu info 40 | # metric_dict = s_collect(benchmark_imagenet, dataset, model_name, batch_size, mp, gpu_id) 41 | # metric_list.append(metric_dict) 42 | # time.sleep(2) 43 | 44 | # Single: cifar10 metric 45 | print('Classification: cifar') 46 | dataset = 'cifar10' 47 | for model_name in model_list_cifar: 48 | for batch_size in bs_list: 49 | for mp in mp_list: 50 | # collect single job gpu info 51 | metric_dict = s_collect(benchmark_cifar, dataset, model_name, batch_size, mp, gpu_id) 52 | metric_list.append(metric_dict) 53 | time.sleep(2) 54 | 55 | # # Single: pointnet 56 | # print('3D: pointnet') 57 | # for batch_size in bs_list: 58 | # for mp in mp_list: 59 | # metric_dict = s_collect(benchmark_pointnet, dataset='shapenet', model_name='pointnet', batch_size=batch_size, mp=mp, gpu_id=gpu_id) 60 | # metric_list.append(metric_dict) 61 | # time.sleep(2) 62 | 63 | # # Single: dcgan 64 | # print('CV: dcgan') 65 | # for batch_size in bs_list: 66 | # for mp in mp_list: 67 | # metric_dict = s_collect(benchmark_dcgan, dataset='LSUN', model_name='dcgan', batch_size=batch_size, mp=mp, gpu_id=gpu_id) 68 | # metric_list.append(metric_dict) 69 | # time.sleep(2) 70 | 71 | # # Single: rl-lunalander 72 | # print('RL: LunarLander-v2') 73 | # for batch_size in bs_list: 74 | # metric_dict = s_collect(benchmark_rl, dataset='LunarLander-v2', model_name='PPO', batch_size=batch_size, mp=0, gpu_id=gpu_id) 75 | # metric_list.append(metric_dict) 76 | # time.sleep(2) 77 | 78 | # # Single: rl-Bipedal Walker 79 | # print('RL: Bipedal Walker') 80 | # for batch_size in bs_list: 81 | # metric_dict = s_collect(benchmark_rl2, dataset='BipedalWalker-v3', model_name='TD3', batch_size=batch_size, mp=0, gpu_id=gpu_id) 82 | # metric_list.append(metric_dict) 83 | # time.sleep(2) 84 | 85 | # # Single: ncf 86 | # print('Recommendation: ncf') 87 | # for batch_size in [64, 128]: 88 | # for mp in mp_list: 89 | # metric_dict = s_collect(benchmark_ncf, dataset='MovieLens', model_name='NeuMF-pre', batch_size=batch_size, mp=mp, gpu_id=gpu_id) 90 | # metric_list.append(metric_dict) 91 | # time.sleep(2) 92 | 93 | # # Single: lstm 94 | # print('Language Modeling: lstm') 95 | # for batch_size in [64, 128]: 96 | # for mp in mp_list: 97 | # metric_dict = s_collect(benchmark_lstm, dataset='Wikitext2', model_name='LSTM', batch_size=batch_size, mp=mp, gpu_id=gpu_id) 98 | # metric_list.append(metric_dict) 99 | # time.sleep(2) 100 | 101 | # # Single: bert 102 | # print('Question Answering: bert') 103 | # for batch_size in [32]: 104 | # for mp in mp_list: 105 | # metric_dict = s_collect(benchmark_bert, dataset='SQUAD', model_name='bert', batch_size=batch_size, mp=mp, gpu_id=gpu_id) 106 | # metric_list.append(metric_dict) 107 | # time.sleep(2) 108 | 109 | # # Single: transformer 110 | # print('Translation: tranformer') 111 | # for batch_size in [32, 64]: 112 | # metric_dict = s_collect(benchmark_transformer, dataset='multi30k', model_name='transformer', batch_size=batch_size, mp=0, gpu_id=gpu_id) 113 | # metric_list.append(metric_dict) 114 | # time.sleep(2) 115 | 116 | # print(pd.DataFrame(metric_list)) 117 | df = pd.DataFrame(metric_list) 118 | # df.replace( 119 | # ['imagenet', 'cifar10', 'shapenet', 'LSUN', 'LunarLander-v2', 'BipedalWalker-v3', 'MovieLens', 'Wikitext2', 'SQUAD', 'multi30k'], 120 | # ['ImageNet', 'CIFAR-10', 'ShapeNet', 'LSUN', 'LunarLander', 'BipedalWalker', 'MovieLens', 'Wikitext2', 'SQuAD', 'Multi30k'], inplace=True) 121 | 122 | # df.replace( 123 | # ['resnet50', 'mobilenet_v3_small', 'EfficientNetB0', 'pointnet', 'dcgan', 'NeuMF-pre', 'bert', 'transformer'], 124 | # ['ResNet50', 'MobileNetV3', 'EfficientNet', 'PointNet', 'DCGAN', 'NeuMF', 'BERT', 'Transformer'], inplace=True) 125 | 126 | df.to_csv('./result/single_cifar.csv') 127 | 128 | 129 | 130 | -------------------------------------------------------------------------------- /profile/requirements.txt: -------------------------------------------------------------------------------- 1 | Box2D 2 | Box2D-kengz 3 | swig 4 | gym 5 | transformers 6 | stable_baselines3 7 | scipy 8 | torch 9 | torchvision -------------------------------------------------------------------------------- /profile/single_collect.py: -------------------------------------------------------------------------------- 1 | import sys 2 | import pandas as pd 3 | import time 4 | from multiprocessing import Process, Manager, Value 5 | 6 | from smi import smi_getter 7 | 8 | 9 | def s_collect(fun, dataset, model_name, batch_size, mp, gpu_id): 10 | with Manager() as manager: 11 | smi_list = manager.list() 12 | speed_list = manager.list() 13 | warm_signal = Value('i', 0) 14 | 15 | p1 = Process(target=fun, args=(model_name, batch_size, mp, gpu_id, speed_list, warm_signal, )) 16 | p2 = Process(target=smi_getter, args=(sys.argv[1:], smi_list, gpu_id, )) 17 | 18 | t_begin = time.time() 19 | p1.start() 20 | while True: 21 | if warm_signal.value == 1: 22 | p2.start() 23 | break 24 | 25 | p1.join() 26 | p2.terminate() 27 | t_pass = time.time() - t_begin 28 | 29 | speed_list = list(speed_list) 30 | smi_df = pd.DataFrame(list(smi_list)) 31 | smi_df.drop([0]) 32 | smi_df.drop([len(smi_df)-1], inplace=True) 33 | # print(smi_df) 34 | 35 | metric_dict = {'model': model_name, 'dataset': dataset, 'gpu_num': len(gpu_id), 'batchsize': batch_size, 'amp': mp} 36 | metric_dict['speed'] = round(speed_list[0], 3) 37 | 38 | # Process gpu info 39 | metric_dict['gpu_util'] = round(pd.to_numeric(smi_df['gpuUtil']).mean(), 3) 40 | metric_dict['gmem_util'] = round(pd.to_numeric(smi_df['gpuMemUtil']).mean(), 3) 41 | smi_df['gpuMem'] = smi_df['gpuMem'].apply(lambda x: x[:-4]).astype('int64') 42 | metric_dict['gmem'] = round(smi_df['gpuMem'].mean(), 3) 43 | metric_dict['time'] = t_pass 44 | 45 | return metric_dict 46 | 47 | 48 | 49 | -------------------------------------------------------------------------------- /profile/smi.py: -------------------------------------------------------------------------------- 1 | import json 2 | import os 3 | import subprocess 4 | import sys 5 | import time 6 | import traceback 7 | 8 | from xml.dom import minidom 9 | 10 | 11 | def smi_getter(argv, smi_list, gpu_id): 12 | metrics_output_dir = "./" 13 | if len(gpu_id) == 1: 14 | cmd = f"nvidia-smi -q -x -i {gpu_id[0]}".split() 15 | elif len(gpu_id) == 2: 16 | cmd = f"nvidia-smi -q -x -i {gpu_id[0]},{gpu_id[1]}".split() 17 | elif len(gpu_id) == 4: 18 | cmd = f"nvidia-smi -q -x -i {gpu_id[0]},{gpu_id[1]},{gpu_id[2]},{gpu_id[3]}".split() 19 | while True: 20 | try: 21 | p = subprocess.Popen(cmd, stdout=subprocess.PIPE) 22 | smi_output = p.stdout.read() 23 | except Exception: 24 | traceback.print_exc() 25 | gen_empty_gpu_metric(metrics_output_dir) 26 | break 27 | output = parse_nvidia_smi_result(smi_output, metrics_output_dir, gpu_id) 28 | smi_list.extend(output) 29 | # TODO: change to sleep time configurable via arguments 30 | time.sleep(0.2) 31 | 32 | 33 | def parse_nvidia_smi_result(smi, outputDir, gpu_id): 34 | try: 35 | old_umask = os.umask(0) 36 | xmldoc = minidom.parseString(smi) 37 | gpuList = xmldoc.getElementsByTagName("gpu") 38 | gpuInfo = [] 39 | outPut = {} 40 | outPut["Timestamp"] = time.asctime(time.localtime()) 41 | for gpuIndex, gpu in enumerate(gpuList): 42 | outPut["index"] = gpu_id[gpuIndex] 43 | outPut["gpuUtil"] = ( 44 | gpu.getElementsByTagName("utilization")[0] 45 | .getElementsByTagName("gpu_util")[0] 46 | .childNodes[0] 47 | .data.replace("%", "") 48 | .strip() 49 | ) 50 | outPut["gpuMemUtil"] = ( 51 | gpu.getElementsByTagName("utilization")[0] 52 | .getElementsByTagName("memory_util")[0] 53 | .childNodes[0] 54 | .data.replace("%", "") 55 | .strip() 56 | ) 57 | outPut["gpuMem"] = ( 58 | gpu.getElementsByTagName("fb_memory_usage")[0] 59 | .getElementsByTagName("used")[0] 60 | .childNodes[0] 61 | .data 62 | ) 63 | # processes = gpu.getElementsByTagName("processes") 64 | # runningProNumber = len(processes[0].getElementsByTagName("process_info")) 65 | # gpuInfo["activeProcessNum"] = runningProNumber 66 | 67 | # print(outPut) 68 | gpuInfo.append(outPut.copy()) 69 | return gpuInfo 70 | 71 | except Exception as error: 72 | # e_info = sys.exc_info() 73 | print("gpu_metrics_collector error: %s" % error) 74 | finally: 75 | os.umask(old_umask) 76 | 77 | 78 | def gen_empty_gpu_metric(outputDir): 79 | try: 80 | old_umask = os.umask(0) 81 | with open(os.path.join(outputDir, "gpu_metrics"), "a") as outputFile: 82 | outPut = {} 83 | outPut["Timestamp"] = time.asctime(time.localtime()) 84 | outPut["gpuCount"] = 0 85 | outPut["gpuInfos"] = [] 86 | print(outPut) 87 | outputFile.write("{}\n".format(json.dumps(outPut, sort_keys=True))) 88 | outputFile.flush() 89 | except Exception: 90 | traceback.print_exc() 91 | finally: 92 | os.umask(old_umask) 93 | 94 | -------------------------------------------------------------------------------- /simulation/analyzer/analyzer.py: -------------------------------------------------------------------------------- 1 | import random 2 | import numpy as np 3 | import pandas as pd 4 | 5 | from primo.model import PrimoClassifier 6 | from sklearn.model_selection import train_test_split 7 | from sklearn import preprocessing, metrics 8 | 9 | 10 | def set_seed(seed): 11 | random.seed(seed) 12 | np.random.seed(seed) 13 | 14 | 15 | seed = 123 16 | set_seed(seed) 17 | 18 | result = pd.DataFrame() 19 | single = pd.read_csv("PATH_TO_PROFILED_SINGLE_DATA.csv") 20 | colo = pd.read_csv(f"PATH_TO_PROFILED_COLOCATED_DATA.csv", index_col=0) 21 | 22 | 23 | def query_speed(trail): 24 | m, d, b, a = trail["model"], trail["dataset"], trail["batchsize"], trail["amp"] 25 | info1 = colo.query("model1 == @m and batchsize1 == @b and dataset1 == @d and amp1 == @a") 26 | info2 = colo.query("model2 == @m and batchsize2 == @b and dataset2 == @d and amp2 == @a") 27 | 28 | speed1, len1, speed2, len2 = 0, len(info1), 0, len(info2) 29 | if len1 > 0: 30 | speed1 = info1["speed1"].sum() 31 | if len2 > 0: 32 | speed2 = info2["speed2"].sum() 33 | 34 | avg = (speed1 + speed2) / max(len1 + len2, 1) 35 | 36 | return round(avg, 3) 37 | 38 | 39 | """Compare with original manual labeling""" 40 | for i in range(len(single)): 41 | avg_speed = query_speed(single.loc[i]) 42 | single.at[i, "avg_speed"] = avg_speed 43 | if avg_speed < 0.85: 44 | single.at[i, "auto_label"] = 2 45 | elif avg_speed < 0.95: 46 | single.at[i, "auto_label"] = 1 47 | else: 48 | single.at[i, "auto_label"] = 0 49 | 50 | 51 | single = single.drop(columns=["dataset", "batchsize", "speed", "model"]) 52 | train_data, test_data, train_label, test_label = train_test_split( 53 | single.drop(columns="label"), single[["label"]], test_size=0.3, random_state=42 54 | ) 55 | 56 | config = {"prune_factor": 0.0001} 57 | model = PrimoClassifier(model="PrDT", model_config=config, hpo=None) 58 | model.fit(train_data, train_label) 59 | pred = model.predict(test_data) 60 | 61 | acc = metrics.accuracy_score(test_label, pred) 62 | print(f"acc: {acc:.3f}") 63 | -------------------------------------------------------------------------------- /simulation/analyzer/single_data.csv: -------------------------------------------------------------------------------- 1 | model,dataset,batchsize,amp,speed,gpu_util,gmem_util,gmem,label 2 | BERT,SQuAD,32,0,84.872,99.689,62.484,20162.0,2 3 | BERT,SQuAD,32,1,119.911,99.593,74.23,16334.0,2 4 | DCGAN,LSUN,32,0,1986.707,57.778,20.222,2458.0,1 5 | DCGAN,LSUN,32,1,1561.581,34.9,11.8,2730.0,0 6 | DCGAN,LSUN,64,0,3275.832,72.9,31.0,2322.0,2 7 | DCGAN,LSUN,64,1,2936.807,47.5,17.167,2330.0,1 8 | DCGAN,LSUN,128,0,4659.802,94.0,41.0,3096.0,2 9 | DCGAN,LSUN,128,1,6097.188,66.333,32.417,2684.0,2 10 | EfficientNet,CIFAR-10,32,0,814.137,24.5,3.0,2308.0,0 11 | EfficientNet,CIFAR-10,32,1,680.977,20.571,1.286,2176.0,0 12 | EfficientNet,CIFAR-10,64,0,1922.826,35.4,8.0,2766.0,1 13 | EfficientNet,CIFAR-10,64,1,1371.123,25.5,3.167,2290.0,1 14 | EfficientNet,CIFAR-10,128,0,3465.072,44.833,16.167,2580.0,1 15 | EfficientNet,CIFAR-10,128,1,2802.744,33.571,7.286,2358.0,1 16 | LSTM,Wikitext2,64,0,4496.071,97.0,74.875,3884.0,2 17 | LSTM,Wikitext2,64,1,4495.889,73.625,60.0,3750.0,2 18 | LSTM,Wikitext2,128,0,3951.755,76.2,63.75,5616.0,2 19 | LSTM,Wikitext2,128,1,4440.872,63.706,55.824,5320.0,2 20 | MobileNetV2,CIFAR-10,32,0,1108.427,42.8,17.0,2508.0,1 21 | MobileNetV2,CIFAR-10,32,1,895.251,28.833,7.333,2320.0,1 22 | MobileNetV2,CIFAR-10,64,0,2228.976,69.5,39.5,3010.0,1 23 | MobileNetV2,CIFAR-10,64,1,1710.877,41.833,16.167,2528.0,1 24 | MobileNetV2,CIFAR-10,128,0,3516.037,98.833,65.333,4078.0,2 25 | MobileNetV2,CIFAR-10,128,1,2866.76,55.714,28.286,2992.0,1 26 | MobileNetV3,ImageNet,32,0,1304.503,45.409,19.727,2694.0,0 27 | MobileNetV3,ImageNet,32,1,978.255,34.966,7.931,2382.0,0 28 | MobileNetV3,ImageNet,64,0,2537.436,80.522,44.261,3266.0,1 29 | MobileNetV3,ImageNet,64,1,1908.093,52.355,18.065,2706.0,0 30 | MobileNetV3,ImageNet,128,0,3353.137,100.0,60.912,4428.0,2 31 | MobileNetV3,ImageNet,128,1,3482.526,86.121,33.939,3216.0,1 32 | NeuMF,MovieLens,64,0,15393.307,10.483,2.023,2050.0,0 33 | NeuMF,MovieLens,64,1,12173.612,9.836,2.009,2050.0,0 34 | NeuMF,MovieLens,128,0,29773.989,10.483,2.011,2050.0,0 35 | NeuMF,MovieLens,128,1,23040.359,9.643,1.983,2050.0,0 36 | PPO,LunarLander,32,0,5.156,12.425,0.0,2051.0,0 37 | PPO,LunarLander,64,0,14.246,11.949,0.0,2051.0,0 38 | PPO,LunarLander,128,0,46.507,14.96,0.0,2051.0,0 39 | PointNet,ShapeNet,32,0,131.491,11.533,9.667,3968.0,0 40 | PointNet,ShapeNet,32,1,136.02,7.0,5.615,3334.0,0 41 | PointNet,ShapeNet,64,0,138.049,7.75,6.571,6346.0,0 42 | PointNet,ShapeNet,64,1,135.349,6.24,5.32,4532.0,0 43 | PointNet,ShapeNet,128,0,140.209,8.982,7.589,10474.0,0 44 | PointNet,ShapeNet,128,1,144.304,5.551,4.878,6976.0,0 45 | ResNet18,CIFAR-10,32,0,1763.905,58.667,25.0,2360.0,0 46 | ResNet18,CIFAR-10,32,1,1649.038,40.667,20.333,2330.0,1 47 | ResNet18,CIFAR-10,64,0,3711.999,80.667,47.0,2660.0,2 48 | ResNet18,CIFAR-10,64,1,4038.576,62.0,34.0,3642.0,2 49 | ResNet18,CIFAR-10,128,0,4903.86,96.25,67.0,3072.0,2 50 | ResNet18,CIFAR-10,128,1,5699.482,78.0,43.667,4036.0,2 51 | ResNet50,ImageNet,32,0,432.563,100.0,80.754,5518.0,2 52 | ResNet50,ImageNet,32,1,670.679,95.864,67.568,3838.0,2 53 | ResNet50,ImageNet,64,0,435.943,100.0,80.899,8570.0,2 54 | ResNet50,ImageNet,64,1,708.822,97.735,72.518,5376.0,2 55 | ResNet50,ImageNet,128,0,465.651,100.0,86.927,13326.0,2 56 | ResNet50,ImageNet,128,1,765.473,99.66,75.566,7992.0,2 57 | TD3,BipedalWalker,32,0,16.292,12.828,0.0,2059.0,0 58 | TD3,BipedalWalker,64,0,59.807,13.303,0.0,2059.0,0 59 | TD3,BipedalWalker,128,0,67.445,14.089,0.0,2059.0,0 60 | Transformer,Multi30k,32,0,464.423,53.645,17.258,11949.258,2 61 | Transformer,Multi30k,64,0,857.9,75.164,31.806,11801.104,2 62 | VGG,CIFAR-10,32,0,2998.72,44.0,23.0,3386.0,0 63 | VGG,CIFAR-10,32,1,3556.88,48.0,19.0,2762.0,0 64 | VGG,CIFAR-10,64,0,7430.143,51.0,30.0,3584.0,1 65 | VGG,CIFAR-10,64,1,5112.546,34.0,16.5,3588.0,1 66 | VGG,CIFAR-10,128,0,7260.35,50.5,34.0,3990.0,2 67 | VGG,CIFAR-10,128,1,7009.529,43.667,22.333,2666.0,2 68 | -------------------------------------------------------------------------------- /simulation/data/Venus/vc_config.csv: -------------------------------------------------------------------------------- 1 | ,num 2 | vcEwI,9 3 | vcWoR,5 4 | vcHvQ,8 5 | vcvGl,20 6 | vc8Gr,6 7 | vcKeu,12 8 | vcKrE,4 9 | vcYVn,11 10 | vchbv,4 11 | vcLTP,8 12 | vchA3,3 13 | vcJsw,32 14 | vcefl,10 15 | vcvlY,2 16 | vcgkz,1 17 | -------------------------------------------------------------------------------- /simulation/data/dict2csv.py: -------------------------------------------------------------------------------- 1 | import pandas as pd 2 | import pickle 3 | 4 | # df = pd.read_csv("philly_vc.csv") 5 | # d = dict(zip(df["vc"].values, df["node num"].values)) 6 | 7 | # with open(f"./vc_dict_homo.pkl", "wb") as f: 8 | # pickle.dump(d, f, pickle.HIGHEST_PROTOCOL) 9 | 10 | cluster_list = ["Venus", "Earth", "Saturn", "Uranus", "Philly"] 11 | 12 | for i, v in enumerate(cluster_list): 13 | vc_dict = pd.read_pickle(v + "/vc_dict_homo.pkl") 14 | df = pd.DataFrame.from_dict(vc_dict, orient="index", columns=["num"]) 15 | df.to_csv(v + "/vc_config.csv") 16 | 17 | print(df.to_dict()["num"]) 18 | -------------------------------------------------------------------------------- /simulation/data/log_process.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | import pandas as pd 3 | from pathlib import Path 4 | 5 | """ 6 | Processing log files for simulation 7 | """ 8 | 9 | 10 | def main(args): 11 | cluster = args.cluster 12 | 13 | if not Path(f"./{cluster}").exists(): 14 | Path(f"./{cluster}").mkdir() 15 | 16 | if cluster == "Philly": 17 | logfile = Path(f"../../../analysis/1_compare with Philly trace/philly_trace.csv") 18 | else: 19 | logfile = Path(f"../../../data/{cluster}/cluster_log.csv") 20 | 21 | df = pd.read_csv(logfile, parse_dates=["submit_time", "start_time", "end_time"]) 22 | df = df.sort_values(by="submit_time") 23 | df.reset_index(drop=True, inplace=True) 24 | 25 | if cluster == "Neptune": 26 | df = df[df["vc"] != "vc7Bz"] 27 | df = df[df["vc"] != "vcIoD"] 28 | df = df[df["vc"] != "vcftk"] 29 | df = df[df["vc"] != "vc5LC"] 30 | df = df[df["vc"] != "vcEwI"] 31 | 32 | df.loc[df["vc"] == "vcVvI", "vc"] = "vcUV3" 33 | df.loc[df["vc"] == "vcrsE", "vc"] = "vcBUL" 34 | df.loc[df["vc"] == "vcHyk", "vc"] = "vcBUL" 35 | 36 | df.reset_index(drop=True, inplace=True) 37 | df.to_csv(f"./{cluster}/cluster_log.csv", index=False) 38 | 39 | elif cluster == "Saturn": 40 | df = df[df["vc"] != "vc7Bz"] 41 | df = df[df["vc"] != "vcHcQ"] 42 | df = df[df["vc"] != "vck1d"] 43 | df = df[df["vc"] != "vcj72"] 44 | df = df[df["vc"] != "vcIya"] 45 | df = df[df["vc"] != "vcygX"] 46 | df = df[df["vc"] != "vcxqr"] 47 | df = df[df["vc"] != "vcsgw"] 48 | 49 | df.reset_index(drop=True, inplace=True) 50 | df.to_csv(f"./{cluster}/cluster_log.csv", index=False) 51 | 52 | elif cluster == "Uranus": 53 | df = df[df["vc"] != "vc7Bz"] 54 | df = df[df["vc"] != "vczGr"] 55 | df = df[df["vc"] != "vciN1"] 56 | df = df[df["vc"] != "vcV7h"] 57 | df = df[df["vc"] != "vcRAl"] 58 | df = df[df["vc"] != "vcvcM"] 59 | df = df[df["vc"] != "vc1z2"] 60 | 61 | df.loc[df["vc"] == "vcVvI", "vc"] = "vcUV3" 62 | df.loc[df["vc"] == "vcxqr", "vc"] = "vcUV3" 63 | df.loc[df["vc"] == "vcsBT", "vc"] = "vcUV3" 64 | df.loc[df["vc"] == "vcygX", "vc"] = "vcUV3" 65 | df.loc[df["vc"] == "vcHyk", "vc"] = "vcOlr" 66 | df.loc[df["vc"] == "vcRDh", "vc"] = "vc7hD" 67 | df.loc[df["vc"] == "vcFsC", "vc"] = "vc7hD" 68 | 69 | df.reset_index(drop=True, inplace=True) 70 | df.to_csv(f"./{cluster}/cluster_log.csv", index=False) 71 | 72 | elif cluster == "Earth": 73 | df = df[df["vc"] != "vcp4O"] 74 | df = df[df["vc"] != "vcvcM"] 75 | df = df[df["vc"] != "vcXrB"] 76 | df = df[df["vc"] != "vc7hD"] 77 | df = df[df["vc"] != "vcIya"] 78 | df = df[df["vc"] != "vc8Sj"] 79 | df = df[df["vc"] != "vcLJZ"] 80 | 81 | df.loc[df["vc"] == "vcxS0", "vc"] = "vc3sl" 82 | 83 | df.reset_index(drop=True, inplace=True) 84 | df.to_csv(f"./{cluster}/cluster_log.csv", index=False) 85 | 86 | elif cluster == "Venus": 87 | df = df[df["vc"] != "vcEhP"] 88 | df = df[df["vc"] != "vcIya"] 89 | df = df[df["vc"] != "vcJLV"] 90 | df = df[df["vc"] != "vcJkd"] 91 | df = df[df["vc"] != "vcsBT"] 92 | 93 | df.loc[df["vc"] == "vcbIW", "vc"] = "vcvGl" 94 | df.loc[df["vc"] == "vc6YE", "vc"] = "vcvGl" 95 | df.loc[df["vc"] == "vcOhe", "vc"] = "vcKeu" 96 | df.loc[df["vc"] == "vccJW", "vc"] = "vcKeu" 97 | df.loc[df["vc"] == "vcP2J", "vc"] = "vchA3" 98 | 99 | df.reset_index(drop=True, inplace=True) 100 | df.to_csv(f"./{cluster}/cluster_log.csv", index=False) 101 | 102 | elif cluster == "Philly": 103 | df = df[df["vc"] != "795a4c"] 104 | df = df[df["vc"] != "51b7ef"] 105 | df = df[df["vc"] != "925e2b"] 106 | df = df[df["vc"] != "23dbec"] 107 | 108 | df.reset_index(drop=True, inplace=True) 109 | df.to_csv(f"./{cluster}/cluster_log.csv", index=False) 110 | 111 | else: 112 | raise ValueError("Wrong Cluster Name.") 113 | 114 | 115 | if __name__ == "__main__": 116 | parser = argparse.ArgumentParser(description="Job Log Processor") 117 | parser.add_argument("-c", "--cluster", default="Earth", type=str, help="Cluster Name") 118 | args = parser.parse_args() 119 | main(args) 120 | -------------------------------------------------------------------------------- /simulation/data/prepare_data.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | :< 0] 19 | df = df.sort_values(by="submit_time") 20 | 21 | # VC filter 22 | vc_df = pd.read_csv(dir + "/vc_config.csv", index_col=0) 23 | vc_list = vc_df.index.to_list() 24 | df = df[df["vc"].isin(vc_list)] 25 | 26 | df = df[df["submit_time"] >= pd.Timestamp(start)] 27 | df["submit_time"] = df["submit_time"].apply(lambda x: int(datetime.datetime.timestamp(pd.Timestamp(x)))) 28 | 29 | # Normalizing 30 | df["submit_time"] = df["submit_time"] - df.iloc[0]["submit_time"] 31 | 32 | # Slicing val data 33 | begin = (pd.Timestamp(test_date_range[0]) - pd.Timestamp(start)).total_seconds() 34 | end = (pd.Timestamp(test_date_range[1]) - pd.Timestamp(start)).total_seconds() 35 | val_df = df[(df["submit_time"] >= begin) & (df["submit_time"] <= end)] 36 | # Slicing train data 37 | # | (df['submit_time'] > pd.Timestamp(test_date_range[1]))] 38 | train_df = df[(df["submit_time"] < begin)] 39 | 40 | # Filter user, vc not in val data around 9% jobs be filtered 41 | val_users = val_df["user"].unique() 42 | 43 | val_vcs = val_df["vc"].unique() 44 | 45 | train_df = train_df[train_df["user"].isin(val_users)] 46 | train_df = train_df[train_df["vc"].isin(val_vcs)] # no jobs be filtered 47 | 48 | train_df = train_df.sort_values(by="submit_time") 49 | train_df.reset_index(inplace=True, drop=True) 50 | 51 | val_df = val_df.sort_values(by="submit_time") 52 | val_df.reset_index(inplace=True, drop=True) 53 | 54 | return train_df, val_df 55 | 56 | 57 | def logger_init(file): 58 | logger = logging.getLogger() 59 | handler_file = logging.FileHandler(f"{file}.log", "w") 60 | handler_stream = logging.StreamHandler(sys.stdout) 61 | 62 | logger.setLevel(logging.INFO) 63 | handler_file.setLevel(logging.INFO) 64 | handler_stream.setLevel(logging.INFO) 65 | 66 | formatter = logging.Formatter("%(asctime)s | %(processName)s | %(message)s", datefmt="%Y %b %d %H:%M:%S") 67 | handler_file.setFormatter(formatter) 68 | handler_stream.setFormatter(formatter) 69 | 70 | logger.addHandler(handler_file) 71 | logger.addHandler(handler_stream) 72 | 73 | return logger 74 | -------------------------------------------------------------------------------- /simulation/job.py: -------------------------------------------------------------------------------- 1 | import sys 2 | 3 | 4 | class Job(dict): 5 | def __init__(self, series): 6 | super(Job, self).__init__() 7 | self.update(series.to_dict()) 8 | # Priority Define by Estimator, Random Means No History Data Found 9 | self.update({"nodes": [], "priority": -1, "random": 0}) 10 | # Profiler 11 | self.update({"profiled": 0, "profqueue": 0, "toskip": 0}) 12 | # Co-locate 13 | # NOTE: exclusive: {0: colocate, 1: exclusive} 14 | # NOTE: rate: the ratio of colocate and exclusive execution performance 15 | # NOTE: sharescore: 0, 1, 2 16 | self.update({"exclusive": 1, "rate": 1, "sharescore": None, "Tcolocate": 0, "Tdelocate": 0}) 17 | 18 | def set_ckpt_time(self, time): 19 | self.last_ckpt_time = time 20 | 21 | def get_ckpt_time(self): 22 | return self.last_ckpt_time 23 | 24 | 25 | class Trace: 26 | def __init__(self): 27 | self.job_list = [] 28 | 29 | def append_job(self, job): 30 | self.job_list.append(job) 31 | 32 | def job_num(self): 33 | return len(self.job_list) 34 | 35 | def profiler_remain_job_num(self): 36 | num = 0 37 | for job in self.job_list: 38 | if job["toskip"] == 0: 39 | num += 1 40 | return num 41 | 42 | def sort_jobs(self, key): 43 | self.job_list.sort(key=lambda x: x.__getitem__(key)) 44 | 45 | def vc_trace(self, vc_name): 46 | vc_trace = Trace() 47 | for job in self.job_list: 48 | if job["vc"] == vc_name: 49 | vc_trace.append_job(job) 50 | vc_trace.sort_jobs("submit_time") 51 | return vc_trace 52 | 53 | def reset_trace(self): 54 | for job in self.job_list: 55 | if job["toskip"] == 0: 56 | job["start_time"] = sys.maxsize 57 | job["end_time"] = sys.maxsize 58 | job["nodes"] = [] 59 | -------------------------------------------------------------------------------- /simulation/policy/__init__.py: -------------------------------------------------------------------------------- 1 | from .sjf import ShortestJobFirst 2 | from .srtf import ShortestRemainingTimeFirst 3 | from .fifo import FirstInFirstOut 4 | from .qssf import QuasiShortestServiceFirst 5 | from .lucid import Lucid 6 | from .tiresias import Tiresias 7 | -------------------------------------------------------------------------------- /simulation/policy/fifo.py: -------------------------------------------------------------------------------- 1 | from .policy import Policy 2 | 3 | 4 | class FirstInFirstOut(Policy): 5 | def __init__(self, trace, vc, placement, log_dir, logger, start_ts): 6 | super(FirstInFirstOut, self).__init__(trace, vc, placement, log_dir, logger, start_ts) 7 | self._name = "fifo" 8 | 9 | def simulate(self): 10 | prev_index = 0 11 | 12 | while self.end_job_num != self.total_job_num: 13 | 14 | """1. Check & Release End Jobs""" 15 | run_ls = self.run_list.copy() # Avoid list.remove() issue 16 | for job in run_ls: 17 | if self.time == job["end_time"]: 18 | job["remain"] = 0 19 | job["status"] = "end" 20 | self.end_job_num += 1 21 | assert self._vc.release_resource(job) == True 22 | self.run_list.remove(job) 23 | 24 | """2. Allocate New / Pending Jobs""" 25 | # New Job 26 | for idx in range(prev_index, self.total_job_num): 27 | job = self.trace.job_list[idx] 28 | if job["submit_time"] == self.time: 29 | job["status"] = "pend" 30 | self.que_list.append(job) 31 | prev_index = idx 32 | elif job["submit_time"] > self.time: 33 | break 34 | 35 | # Pend Job 36 | # NOTE: Sort by submit time -- FIFO 37 | self.que_list.sort(key=lambda x: x.__getitem__("submit_time")) 38 | que_ls = self.que_list.copy() # Avoid list.remove() issue 39 | for job in que_ls: 40 | if self.job_placer(job): 41 | job["start_time"] = self.time 42 | job["end_time"] = job["start_time"] + job["duration"] 43 | job["queue"] = self.time - job["submit_time"] 44 | job["status"] = "run" 45 | self.que_list.remove(job) 46 | self.run_list.append(job) 47 | else: 48 | break 49 | 50 | """3. Log & Result Recorder""" 51 | if self.time % 10000 == 0: 52 | self.runtime_log() 53 | 54 | # Sample Cluster State Every Minute 55 | if self.time % 60 == 0: 56 | self.seq_recorder() 57 | 58 | self.time += 1 59 | 60 | self.log_recorder(self._name) 61 | -------------------------------------------------------------------------------- /simulation/policy/placer/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/S-Lab-System-Group/Lucid/63685a3ab7d15d8e940bb47ef98b6d5cca472b13/simulation/policy/placer/__init__.py -------------------------------------------------------------------------------- /simulation/policy/placer/consolidate.py: -------------------------------------------------------------------------------- 1 | class ConsolidatePlacement: 2 | def __init__(self, vc): 3 | self.name = "consolidate" 4 | self.vc = vc 5 | self.avail_nodes = self.vc.avail_node_list() 6 | 7 | """ 8 | Enforce consolidate placement 9 | Node list selection 10 | -- job_gpu_num <= 8 11 | -- job_gpu_num > 8 and job_gpu_num % 8 == 0 12 | -- job_gpu_num > 8 and job_gpu_num % 8 != 0 13 | """ 14 | 15 | def update_avail_nodes(self): 16 | self.avail_nodes = self.vc.avail_node_list() 17 | 18 | def consolidateSelect(self, job_gpu_num): 19 | self.update_avail_nodes() 20 | alloc_nodes = [] 21 | if job_gpu_num <= 8: 22 | nodes = sorted(self.avail_nodes, key=lambda x: x.free_gpus, reverse=False) 23 | for node in nodes: 24 | if node.free_gpus >= job_gpu_num: 25 | alloc_nodes.append((node, job_gpu_num)) 26 | return True, alloc_nodes 27 | return False, alloc_nodes 28 | else: 29 | nodes = sorted(self.avail_nodes, key=lambda x: x.free_gpus, reverse=True) 30 | if job_gpu_num % 8 == 0: 31 | node_num = job_gpu_num // 8 32 | for node in nodes: 33 | if node.free_gpus < 8: 34 | return False, alloc_nodes 35 | 36 | if node.free_gpus == 8 and node_num > 0: 37 | alloc_nodes.append((node, 8)) 38 | node_num -= 1 39 | 40 | if node_num == 0: 41 | return True, alloc_nodes 42 | else: 43 | node_num = (job_gpu_num // 8) + 1 44 | for node in nodes: 45 | if node.free_gpus == 8 and node_num > 1: 46 | alloc_nodes.append((node, 8)) 47 | node_num -= 1 48 | continue 49 | 50 | if node.free_gpus >= (job_gpu_num % 8) and node_num == 1: 51 | alloc_nodes.append((node, job_gpu_num % 8)) 52 | node_num -= 1 53 | return True, alloc_nodes 54 | 55 | return False, alloc_nodes 56 | 57 | def place(self, job): 58 | vc_free_gpu_num = self.vc.vc_free_gpus() 59 | job_gpu_num = job["gpu_num"] 60 | 61 | # Total Free GPU Check 62 | if vc_free_gpu_num < job_gpu_num: 63 | return False 64 | 65 | if self.vc._num_gpus_per_node != 8: 66 | raise NotImplementedError 67 | 68 | select_flag, alloc_nodes = self.consolidateSelect(job_gpu_num) 69 | 70 | """ Placement """ 71 | if select_flag: 72 | for (node, req_gpu) in alloc_nodes: 73 | allocate_gpus = node.allocate_gpu(req_gpu, job) 74 | job["nodes"].append({node.node_name: allocate_gpus}) 75 | return True 76 | else: 77 | return False 78 | -------------------------------------------------------------------------------- /simulation/policy/placer/consolidateFirst.py: -------------------------------------------------------------------------------- 1 | class ConsolidateFirstPlacement: 2 | def __init__(self, vc): 3 | self.name = "consolidateFirst" 4 | self.vc = vc 5 | self.avail_nodes = self.vc.avail_node_list() 6 | 7 | """ 8 | consolidate first placement 9 | Try consolidate first, if fail, try random placement 10 | Random placement: place to idlest node first 11 | """ 12 | 13 | def update_avail_nodes(self): 14 | self.avail_nodes = self.vc.avail_node_list() 15 | 16 | def randomSelect(self, job_gpu_num): 17 | self.update_avail_nodes() 18 | alloc_nodes = [] 19 | nodes = sorted(self.avail_nodes, key=lambda x: x.free_gpus, reverse=True) 20 | for node in nodes: 21 | if node.free_gpus < job_gpu_num: 22 | alloc_nodes.append((node, node.free_gpus)) 23 | job_gpu_num -= node.free_gpus 24 | continue 25 | else: 26 | alloc_nodes.append((node, job_gpu_num)) 27 | return True, alloc_nodes 28 | return False, alloc_nodes 29 | 30 | def consolidateFirstSelect(self, job_gpu_num): 31 | alloc_nodes = [] 32 | if job_gpu_num <= 8: 33 | nodes = sorted(self.avail_nodes, key=lambda x: x.free_gpus, reverse=False) 34 | for node in nodes: 35 | if node.free_gpus >= job_gpu_num: 36 | alloc_nodes.append((node, job_gpu_num)) 37 | return True, alloc_nodes 38 | return self.randomSelect(job_gpu_num) 39 | else: 40 | nodes = sorted(self.avail_nodes, key=lambda x: x.free_gpus, reverse=True) 41 | if job_gpu_num % 8 == 0: 42 | node_num = job_gpu_num // 8 43 | for node in nodes: 44 | if node.free_gpus < 8: 45 | return self.randomSelect(job_gpu_num) 46 | 47 | if node.free_gpus == 8 and node_num > 0: 48 | alloc_nodes.append((node, 8)) 49 | node_num -= 1 50 | 51 | if node_num == 0: 52 | return True, alloc_nodes 53 | else: 54 | node_num = (job_gpu_num // 8) + 1 55 | for node in nodes: 56 | if node.free_gpus == 8 and node_num > 1: 57 | alloc_nodes.append((node, 8)) 58 | node_num -= 1 59 | continue 60 | 61 | if node.free_gpus >= (job_gpu_num % 8) and node_num == 1: 62 | alloc_nodes.append((node, job_gpu_num % 8)) 63 | node_num -= 1 64 | return True, alloc_nodes 65 | 66 | return self.randomSelect(job_gpu_num) 67 | 68 | def place(self, job): 69 | vc_free_gpu_num = self.vc.vc_free_gpus() 70 | job_gpu_num = job["gpu_num"] 71 | 72 | # Total Free GPU Check 73 | if vc_free_gpu_num < job_gpu_num: 74 | return False 75 | 76 | if self.vc._num_gpus_per_node != 8: 77 | raise NotImplementedError 78 | 79 | select_flag, alloc_nodes = self.consolidateSelect(job_gpu_num) 80 | 81 | """ Placement """ 82 | if select_flag: 83 | for (node, req_gpu) in alloc_nodes: 84 | allocate_gpus = node.allocate_gpu(req_gpu, job) 85 | job["nodes"].append({node.node_name: allocate_gpus}) 86 | return True 87 | else: 88 | return False 89 | -------------------------------------------------------------------------------- /simulation/policy/placer/consolidateWithShare.py: -------------------------------------------------------------------------------- 1 | class ConsolidateWithSharePlacement: 2 | def __init__(self, vc): 3 | self.name = "consolidate_share" 4 | self.vc = vc 5 | self.avail_nodes = self.vc.avail_node_list() 6 | 7 | """ 8 | Enforce consolidate placement 9 | Node list selection 10 | -- job_gpu_num <= 8 11 | -- job_gpu_num > 8 and job_gpu_num % 8 == 0 12 | -- job_gpu_num > 8 and job_gpu_num % 8 != 0 13 | """ 14 | 15 | def update_avail_nodes(self): 16 | self.avail_nodes = self.vc.avail_node_list() 17 | 18 | def consolidateSelect(self, job_gpu_num): 19 | alloc_nodes = [] 20 | self.update_avail_nodes() 21 | if job_gpu_num <= 8: 22 | nodes = sorted(self.avail_nodes, key=lambda x: x.free_gpus, reverse=False) 23 | for node in nodes: 24 | if node.free_gpus >= job_gpu_num: 25 | alloc_nodes.append((node, job_gpu_num)) 26 | return True, alloc_nodes 27 | return False, alloc_nodes 28 | else: 29 | nodes = sorted(self.avail_nodes, key=lambda x: x.free_gpus, reverse=True) 30 | if job_gpu_num % 8 == 0: 31 | node_num = job_gpu_num // 8 32 | for node in nodes: 33 | if node.free_gpus < 8: 34 | return False, alloc_nodes 35 | 36 | if node.free_gpus == 8 and node_num > 0: 37 | alloc_nodes.append((node, 8)) 38 | node_num -= 1 39 | 40 | if node_num == 0: 41 | return True, alloc_nodes 42 | else: 43 | node_num = (job_gpu_num // 8) + 1 44 | for node in nodes: 45 | if node.free_gpus == 8 and node_num > 1: 46 | alloc_nodes.append((node, 8)) 47 | node_num -= 1 48 | continue 49 | 50 | if node.free_gpus >= (job_gpu_num % 8) and node_num == 1: 51 | alloc_nodes.append((node, job_gpu_num % 8)) 52 | node_num -= 1 53 | return True, alloc_nodes 54 | 55 | return False, alloc_nodes 56 | 57 | def place(self, job): 58 | vc_free_gpu_num = self.vc.vc_free_gpus() 59 | job_gpu_num = job["gpu_num"] 60 | 61 | # Total Free GPU Check 62 | if vc_free_gpu_num < job_gpu_num: 63 | return False 64 | 65 | if self.vc._num_gpus_per_node != 8: 66 | raise NotImplementedError 67 | 68 | select_flag, alloc_nodes = self.consolidateSelect(job_gpu_num) 69 | 70 | """ Placement """ 71 | if select_flag: 72 | for (node, req_gpu) in alloc_nodes: 73 | allocate_gpus = node.allocate_gpu(req_gpu, job) 74 | job["nodes"].append({node.node_name: allocate_gpus}) 75 | return True 76 | else: 77 | return False 78 | 79 | def colocateSelect(self, job, target_job): 80 | # nodes = sorted(target_nodes, key=lambda x: len(list(x.values())[0]), reverse=True) 81 | # job_gpu_num = job["gpu_num"] 82 | alloc_nodes = [] 83 | target_nodes = target_job["nodes"] 84 | for node_dict in target_nodes: 85 | alloc_nodes.append((self.vc.get_node(list(node_dict.keys())[0]), list(node_dict.values())[0])) 86 | return True, alloc_nodes 87 | 88 | def colcoate_place(self, job, target_job, gutil, gmem): 89 | assert job["gpu_num"] == target_job["gpu_num"], "Need to implement" 90 | select_flag, alloc_nodes = self.colocateSelect(job, target_job) 91 | 92 | """ Placement """ 93 | if select_flag: 94 | for (node, gpu_list) in alloc_nodes: 95 | assert node.allocate_colocate_gpu(gpu_list, job, gutil, gmem) 96 | job["nodes"].append({node.node_name: gpu_list}) 97 | return True 98 | else: 99 | raise NotImplementedError 100 | -------------------------------------------------------------------------------- /simulation/policy/placer/random.py: -------------------------------------------------------------------------------- 1 | class RandomPlacement: 2 | def __init__(self, vc): 3 | self.vc = vc 4 | self.name = "random" 5 | self.avail_nodes = self.vc.avail_node_list() 6 | 7 | """Random placement""" 8 | 9 | def update_avail_nodes(self): 10 | self.avail_nodes = self.vc.avail_node_list() 11 | 12 | def randomSelect(self, job_gpu_num): 13 | self.update_avail_nodes() 14 | alloc_nodes = [] 15 | 16 | for node in self.avail_nodes: 17 | if node.free_gpus < job_gpu_num: 18 | alloc_nodes.append((node, node.free_gpus)) 19 | job_gpu_num -= node.free_gpus 20 | continue 21 | else: 22 | alloc_nodes.append((node, job_gpu_num)) 23 | return True, alloc_nodes 24 | return False, alloc_nodes 25 | 26 | def place(self, job): 27 | vc_free_gpu_num = self.vc.vc_free_gpus() 28 | job_gpu_num = job["gpu_num"] 29 | 30 | # Total Free GPU Check 31 | if vc_free_gpu_num < job_gpu_num: 32 | return False 33 | 34 | select_flag, alloc_nodes = self.randomSelect(job_gpu_num) 35 | 36 | """ Placement """ 37 | if select_flag: 38 | for (node, req_gpu) in alloc_nodes: 39 | allocate_gpus = node.allocate_gpu(req_gpu, job) 40 | job["nodes"].append({node.node_name: allocate_gpus}) 41 | return True 42 | else: 43 | return False 44 | -------------------------------------------------------------------------------- /simulation/policy/qssf.py: -------------------------------------------------------------------------------- 1 | from .policy import Policy 2 | 3 | 4 | class QuasiShortestServiceFirst(Policy): 5 | def __init__(self, trace, vc, placement, log_dir, logger, start_ts, estimator): 6 | super(QuasiShortestServiceFirst, self).__init__(trace, vc, placement, log_dir, logger, start_ts) 7 | self.estimator = estimator 8 | self._name = "qssf" 9 | 10 | def simulate(self): 11 | prev_index = 0 12 | 13 | while self.end_job_num != self.total_job_num: 14 | new_job_num = 0 15 | 16 | """1. Check & Release End Jobs""" 17 | run_ls = self.run_list.copy() # Avoid list.remove() issue 18 | for job in run_ls: 19 | if self.time == job["end_time"]: 20 | job["remain"] = 0 21 | job["status"] = "end" 22 | self.end_job_num += 1 23 | assert self._vc.release_resource(job) == True 24 | self.run_list.remove(job) 25 | if self.estimator.name != "LGBEstimator" and self.estimator.name != "PhillyEstimator": 26 | self.estimator.update_train_data(job) 27 | 28 | """2. Check New Jobs""" 29 | # New Job 30 | for idx in range(prev_index, self.total_job_num): 31 | job = self.trace.job_list[idx] 32 | if job["submit_time"] == self.time: 33 | job["status"] = "pend" 34 | self.que_list.append(job) 35 | prev_index = idx 36 | new_job_num += 1 37 | elif job["submit_time"] > self.time: 38 | break 39 | 40 | """3. Assign Priority If Exist Job Pending""" 41 | # NOTE: Sort by priority given by estimator -- QSSF 42 | # Only assign priority to the pending job, new job will sort by required gpu_num 43 | self.que_list.sort(key=lambda x: x.__getitem__("gpu_num")) 44 | if len(self.que_list) > new_job_num: 45 | for job in self.que_list: 46 | if job["priority"] == -1: 47 | job["priority"] = self.estimator.inference(job) 48 | self.que_list.sort(key=lambda x: x.__getitem__("priority")) 49 | 50 | """4. Allocate Job""" 51 | que_ls = self.que_list.copy() # Avoid list.remove() issue 52 | for job in que_ls: 53 | if self.job_placer(job): 54 | job["start_time"] = self.time 55 | job["end_time"] = job["start_time"] + job["duration"] 56 | job["queue"] = self.time - job["submit_time"] 57 | job["status"] = "run" 58 | self.que_list.remove(job) 59 | self.run_list.append(job) 60 | else: 61 | break 62 | 63 | """5. Log & Result Recorder""" 64 | if self.time % 10000 == 0: 65 | self.runtime_log() 66 | 67 | # Sample Cluster State Every Minute 68 | if self.time % 60 == 0: 69 | self.seq_recorder() 70 | 71 | self.time += 1 72 | 73 | self.log_recorder(self._name) 74 | -------------------------------------------------------------------------------- /simulation/policy/sjf.py: -------------------------------------------------------------------------------- 1 | from .policy import Policy 2 | 3 | 4 | class ShortestJobFirst(Policy): 5 | def __init__(self, trace, vc, placement, log_dir, logger, start_ts): 6 | super(ShortestJobFirst, self).__init__(trace, vc, placement, log_dir, logger, start_ts) 7 | self._name = "sjf" 8 | 9 | def simulate(self): 10 | prev_index = 0 11 | 12 | while self.end_job_num != self.total_job_num: 13 | 14 | """1. Check & Release End Jobs""" 15 | run_ls = self.run_list.copy() # Avoid list.remove() issue 16 | for job in run_ls: 17 | if self.time == job["end_time"]: 18 | job["remain"] = 0 19 | job["status"] = "end" 20 | self.end_job_num += 1 21 | assert self._vc.release_resource(job) == True 22 | self.run_list.remove(job) 23 | 24 | """2. Allocate New / Pending Jobs""" 25 | # New Job 26 | for idx in range(prev_index, self.total_job_num): 27 | job = self.trace.job_list[idx] 28 | if job["submit_time"] == self.time: 29 | job["status"] = "pend" 30 | self.que_list.append(job) 31 | prev_index = idx 32 | elif job["submit_time"] > self.time: 33 | break 34 | 35 | # Pend Job 36 | # NOTE: Sort by duration -- SJF 37 | self.que_list.sort(key=lambda x: x.__getitem__("duration")) 38 | que_ls = self.que_list.copy() # Avoid list.remove() issue 39 | for job in que_ls: 40 | if self.job_placer(job): 41 | job["start_time"] = self.time 42 | job["end_time"] = job["start_time"] + job["duration"] 43 | job["queue"] = self.time - job["submit_time"] 44 | job["status"] = "run" 45 | self.que_list.remove(job) 46 | self.run_list.append(job) 47 | else: 48 | break 49 | 50 | """3. Log & Result Recorder""" 51 | if self.time % 10000 == 0: 52 | self.runtime_log() 53 | 54 | # Sample Cluster State Every Minute 55 | if self.time % 60 == 0: 56 | self.seq_recorder() 57 | 58 | self.time += 1 59 | 60 | self.log_recorder(self._name) 61 | -------------------------------------------------------------------------------- /simulation/policy/srtf.py: -------------------------------------------------------------------------------- 1 | from .policy import Policy 2 | 3 | 4 | class ShortestRemainingTimeFirst(Policy): 5 | def __init__(self, trace, vc, placement, log_dir, logger, start_ts): 6 | super(ShortestRemainingTimeFirst, self).__init__(trace, vc, placement, log_dir, logger, start_ts) 7 | self._name = "srtf" 8 | 9 | def simulate(self): 10 | prev_index = 0 11 | 12 | while self.end_job_num != self.total_job_num: 13 | 14 | """1. Check & Release End Jobs""" 15 | run_ls = self.run_list.copy() # Avoid list.remove() issue 16 | for job in run_ls: 17 | if job["remain"] == 0: 18 | job["status"] = "end" 19 | job["end_time"] = self.time 20 | self.end_job_num += 1 21 | assert self._vc.release_resource(job) == True 22 | self.run_list.remove(job) 23 | else: 24 | job["remain"] -= 1 25 | 26 | """2. Check New Jobs """ 27 | for idx in range(prev_index, self.total_job_num): 28 | job = self.trace.job_list[idx] 29 | if job["submit_time"] == self.time: 30 | job["status"] = "pend" 31 | self.que_list.append(job) 32 | prev_index = idx 33 | elif job["submit_time"] > self.time: 34 | break 35 | 36 | """3. Select Job to Preempt or Run """ 37 | # NOTE: Sort by remain -- SRTF 38 | 39 | current_job = self.que_list + self.run_list 40 | current_job.sort(key=lambda x: x.__getitem__("remain")) 41 | 42 | quota = self._vc.total_gpus 43 | preempt_list = [] 44 | prerun_list = [] 45 | for job in current_job: 46 | if job.__getitem__("gpu_num") <= quota: 47 | quota -= job.__getitem__("gpu_num") 48 | if job["status"] == "pend": 49 | prerun_list.append(job) 50 | elif job["status"] == "run": 51 | preempt_list.append(job) 52 | 53 | """4. Preempt Job """ 54 | for job in preempt_list: 55 | job["ckpt_times"] += 1 56 | job.set_ckpt_time(self.time) 57 | job["status"] = "pend" 58 | job["remain"] += self.ckpt_overhead(job) 59 | assert self._vc.release_resource(job) == True 60 | job["nodes"] = [] 61 | 62 | if job not in self.que_list: 63 | self.que_list.append(job) 64 | if job in self.run_list: 65 | self.run_list.remove(job) 66 | 67 | """5. Allocate Job """ 68 | for job in prerun_list: 69 | if self.job_placer(job): 70 | job["status"] = "run" 71 | if job["ckpt_times"] == 0: 72 | job["start_time"] = self.time 73 | job["queue"] = self.time - job["submit_time"] 74 | else: 75 | job["queue"] = job["queue"] + (self.time - job.get_ckpt_time()) 76 | 77 | if job in self.que_list: 78 | self.que_list.remove(job) 79 | if job not in self.run_list: 80 | self.run_list.append(job) 81 | else: 82 | # May place fail because consolidate requirement 83 | if job not in self.que_list: 84 | self.que_list.append(job) 85 | continue 86 | 87 | """6. Log & Result Recorder""" 88 | if self.time % 10000 == 0: 89 | self.runtime_log() 90 | 91 | # Sample Cluster State Every Minute 92 | if self.time % 60 == 0: 93 | self.seq_recorder() 94 | 95 | self.time += 1 96 | 97 | self.log_recorder(self._name) 98 | -------------------------------------------------------------------------------- /simulation/policy/tiresias.py: -------------------------------------------------------------------------------- 1 | from .policy import Policy 2 | 3 | 4 | class Tiresias(Policy): 5 | def __init__(self, trace, vc, placement, log_dir, logger, start_ts): 6 | super(Tiresias, self).__init__(trace, vc, placement, log_dir, logger, start_ts) 7 | self._name = "tiresias" 8 | 9 | # Refer to https://github.com/SymbioticLab/Tiresias 10 | self._discretize_threshold = 18000 11 | self._low_priority_queue = [] 12 | self._high_priority_queue = [] 13 | 14 | def discretize_queue(self, job_queue): 15 | self._low_priority_queue = [] 16 | self._high_priority_queue = [] 17 | for job in job_queue: 18 | if job["priority"] > self._discretize_threshold: 19 | self._low_priority_queue.append(job) 20 | else: 21 | self._high_priority_queue.append(job) 22 | 23 | # Tiresias: Jobs in the same queue are scheduled in a FIFO order 24 | self._low_priority_queue.sort(key=lambda x: x.__getitem__("submit_time")) 25 | self._high_priority_queue.sort(key=lambda x: x.__getitem__("submit_time")) 26 | 27 | def simulate(self): 28 | prev_index = 0 29 | 30 | while self.end_job_num != self.total_job_num: 31 | 32 | """1. Check & Release End Jobs""" 33 | run_ls = self.run_list.copy() 34 | for job in run_ls: 35 | if job["remain"] == 0: 36 | job["status"] = "end" 37 | job["end_time"] = self.time 38 | self.end_job_num += 1 39 | assert self._vc.release_resource(job) == True 40 | self.run_list.remove(job) 41 | else: 42 | job["remain"] -= 1 43 | job["priority"] += job.__getitem__("gpu_num") 44 | 45 | """2. Check New Jobs """ 46 | for idx in range(prev_index, self.total_job_num): 47 | job = self.trace.job_list[idx] 48 | if job["submit_time"] == self.time: 49 | job["status"] = "pend" 50 | job["priority"] = 0 51 | self.que_list.append(job) 52 | prev_index = idx 53 | elif job["submit_time"] > self.time: 54 | break 55 | 56 | """3. Select Job to Preempt or Run """ 57 | preempt_list = [] 58 | prerun_list = [] 59 | # Refer to Pollux implementation, scheduling interval = 60s by default 60 | if self.time % 60 == 0: 61 | current_job = self.run_list + self.que_list 62 | quota = self._vc.total_gpus 63 | self.discretize_queue(current_job) 64 | current_job = self._high_priority_queue + self._low_priority_queue 65 | 66 | for job in current_job: 67 | if job.__getitem__("gpu_num") <= quota: 68 | quota -= job.__getitem__("gpu_num") 69 | if job["status"] == "pend": 70 | prerun_list.append(job) 71 | elif job["status"] == "run": 72 | preempt_list.append(job) 73 | 74 | """4. Preempt Job """ 75 | for job in preempt_list: 76 | job["ckpt_times"] += 1 77 | job.set_ckpt_time(self.time) 78 | job["status"] = "pend" 79 | job["remain"] += self.ckpt_overhead(job) 80 | assert self._vc.release_resource(job) == True 81 | job["nodes"] = [] 82 | 83 | if job not in self.que_list: 84 | self.que_list.append(job) 85 | if job in self.run_list: 86 | self.run_list.remove(job) 87 | 88 | """5. Allocate Job """ 89 | for job in prerun_list: 90 | if self.job_placer(job): 91 | job["status"] = "run" 92 | if job["ckpt_times"] == 0: 93 | job["start_time"] = self.time 94 | job["queue"] = self.time - job["submit_time"] 95 | else: 96 | job["queue"] = job["queue"] + (self.time - job.get_ckpt_time()) 97 | 98 | if job in self.que_list: 99 | self.que_list.remove(job) 100 | if job not in self.run_list: 101 | self.run_list.append(job) 102 | else: 103 | # May place fail because consolidate requirement 104 | if job not in self.que_list: 105 | self.que_list.append(job) 106 | continue 107 | 108 | """6. Log & Result Recorder""" 109 | if self.time % 10000 == 0: 110 | self.runtime_log() 111 | 112 | # Sample Cluster State Every Minute 113 | if self.time % 60 == 0: 114 | self.seq_recorder() 115 | 116 | self.time += 1 117 | 118 | self.log_recorder(self._name) 119 | -------------------------------------------------------------------------------- /simulation/profiler/__init__.py: -------------------------------------------------------------------------------- 1 | from .lgf import LeastGPUFirstProfiler 2 | -------------------------------------------------------------------------------- /simulation/profiler/lgf.py: -------------------------------------------------------------------------------- 1 | from .profiler import Profiler 2 | 3 | 4 | class LeastGPUFirstProfiler(Profiler): 5 | def __init__(self, trace, scale, time_limit, prof_gpu_limit, placement, log_dir, logger, start_ts): 6 | super(LeastGPUFirstProfiler, self).__init__( 7 | trace, scale, time_limit, prof_gpu_limit, placement, log_dir, logger, start_ts 8 | ) 9 | self._name = "lgfprof" 10 | self.cluster_name = log_dir.split("/")[-1].split("_")[0] 11 | self.get_time_series_data(self.cluster_name) 12 | self.enable_scaling = True if self.cluster_name == "Venus" else False 13 | self.node_scaling_time = 0 14 | self.node_scaling_num = 1 15 | 16 | def profile(self): 17 | prev_index = 0 18 | 19 | while self.end_job_num != self.total_job_num: 20 | 21 | """1. Check & Release End Jobs""" 22 | run_ls = self.run_list.copy() # Avoid list.remove() issue 23 | for job in run_ls: 24 | if self.time == job["end_time"]: 25 | if job["toskip"] == 1: 26 | job["remain"] = 0 27 | job["status"] = "end" 28 | self.end_job_num += 1 29 | assert self._vc.release_resource(job) 30 | self.run_list.remove(job) 31 | 32 | """2. Allocate New / Pending Jobs""" 33 | # New Job 34 | for idx in range(prev_index, self.total_job_num): 35 | job = self.trace.job_list[idx] 36 | if job["gpu_num"] > self.gpu_limit: 37 | self.end_job_num += 1 38 | prev_index = idx + 1 39 | else: 40 | if job["submit_time"] == self.time: 41 | self.que_list.append(job) 42 | prev_index = idx 43 | elif job["submit_time"] > self.time: 44 | break 45 | 46 | # Pend Job 47 | # NOTE: Sort by Job GPU Num -- LGF 48 | self.que_list.sort(key=lambda x: x.__getitem__("gpu_num")) 49 | # self.que_list.sort(key=lambda x: x.__getitem__("submit_time")) 50 | que_ls = self.que_list.copy() 51 | for job in que_ls: 52 | if self.job_placer(job): 53 | job["profiled"] = 1 54 | job["start_time"] = self.time 55 | job["profqueue"] = self.time - job["submit_time"] 56 | job["queue"] = job["profqueue"] 57 | if job["duration"] <= self.time_limit: 58 | job["end_time"] = job["start_time"] + job["duration"] 59 | job["toskip"] = 1 60 | else: 61 | job["end_time"] = job["start_time"] + self.time_limit 62 | self.que_list.remove(job) 63 | self.run_list.append(job) 64 | else: 65 | break 66 | 67 | """3. Time-aware Scaling (Optional)""" 68 | if self.enable_scaling: 69 | # Scale-Up 70 | if self.time % 10 == 0 and len(self.que_list) > 10 and self._vc.node_num == self._vc.base_node_num: 71 | self._vc.update_vc_node(change_node_num=self.node_scaling_num) 72 | self.node_scaling_time = self.time 73 | self.scaling_recorder(self.node_scaling_num) 74 | 75 | # Scale-Down 76 | if ( 77 | self.time % 100 == 0 78 | and len(self.que_list) < 5 79 | and self._vc.node_num == self._vc.base_node_num + self.node_scaling_num 80 | and len(self._vc.idle_node_list()) >= self.node_scaling_num 81 | and self._vc.check_node_inside_idle_vc(self._vc.temp_node_num_base) 82 | ): 83 | if self.check_future_cluster_throughput() <= self.gpu_limit * 5: 84 | self._vc.update_vc_node(change_node_num=-1 * self.node_scaling_num) 85 | self.node_scaling_time = self.time 86 | self.scaling_recorder(-1 * self.node_scaling_num) 87 | 88 | """4. Log & Result Recorder""" 89 | if self.time % 10000 == 0: 90 | self.runtime_log() 91 | 92 | # Sample Cluster State Every Minute 93 | if self.time % 60 == 0: 94 | self.seq_recorder() 95 | 96 | self.time += 1 97 | 98 | self.log_recorder(self._name) 99 | -------------------------------------------------------------------------------- /simulation/requirements.txt: -------------------------------------------------------------------------------- 1 | pyprimo 2 | numpy 3 | panda 4 | scikit_learn 5 | lightgbm 6 | seaborn 7 | matplotlib 8 | xgboost 9 | 10 | 11 | -------------------------------------------------------------------------------- /simulation/run.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | python simulator.py -e='Venus_Sept' -t='./data/Venus' --sweep -------------------------------------------------------------------------------- /simulation/updater.py: -------------------------------------------------------------------------------- 1 | class ColocateUpdater: 2 | def __init__(self, colocate_df): 3 | self.df = colocate_df 4 | 5 | def _query(self, job1, job2): 6 | reverse = 0 7 | m1, m2 = job1["model"], job2["model"] 8 | d1, d2 = job1["dataset"], job2["dataset"] 9 | b1, b2 = job1["batchsize"], job2["batchsize"] 10 | a1, a2 = job1["amp"], job2["amp"] 11 | # g1, g2 = job1["gpu_num"], job2["gpu_num"] # NOTE 12 | 13 | info = self.df.query( 14 | " model1 == @m1 and model2 == @m2 and batchsize1 == @b1 and batchsize2 == @b2 and dataset1 == @d1 and dataset2 == @d2 and amp1 == @a1 and amp2 == @a2" 15 | ) 16 | if len(info) == 0: 17 | info = self.df.query( 18 | " model1 == @m2 and model2 == @m1 and batchsize1 == @b2 and batchsize2 == @b1 and dataset1 == @d2 and dataset2 == @d1 and amp1 == @a2 and amp2 == @a1" 19 | ) 20 | reverse = 1 21 | assert len(info) == 1, f"job1: {job1} | job2: {job2}" 22 | return info, reverse 23 | 24 | def query_info(self, job1, job2): 25 | if self.check_outside_job(job1, job2): 26 | # Little Influence 27 | total_util = min(1, job1["gpu_util"] + job2["gpu_util"]) 28 | total_mem = job1["gmem"] + job2["gmem"] 29 | return 1, 1, total_util, total_mem 30 | else: 31 | info, reverse = self._query(job1, job2) 32 | speed1, speed2 = info["speed1"].values[0], info["speed2"].values[0] 33 | if reverse: 34 | return speed2, speed1, info["gpu_util"].values[0], info["gmem"].values[0] 35 | else: 36 | return speed1, speed2, info["gpu_util"].values[0], info["gmem"].values[0] 37 | 38 | def query_speed(self, job1, job2): 39 | if self.check_outside_job(job1, job2): 40 | # Little Influence 41 | return 1, 1 42 | else: 43 | info, reverse = self._query(job1, job2) 44 | speed1, speed2 = info["speed1"].values[0], info["speed2"].values[0] 45 | if reverse: 46 | return speed2, speed1 47 | else: 48 | return speed1, speed2 49 | 50 | def query_utils(self, job1, job2): 51 | 52 | if self.check_outside_job(job1, job2): 53 | # Approximate as adding 54 | total_util = min(1, job1["gpu_util"] + job2["gpu_util"]) 55 | total_mem = job1["gmem"] + job2["gmem"] 56 | return total_util, total_mem 57 | else: 58 | info, _ = self._query(job1, job2) 59 | return info["gpu_util"].values[0], info["gmem"].values[0] 60 | 61 | # Some Jobs are not recorded inside colocate_df 62 | def check_outside_job(self, job1, job2): 63 | m1, m2 = job1["model"], job2["model"] 64 | models = [m1, m2] 65 | if "NeuMF" in models: 66 | return True 67 | # Large Model are classified as 2 68 | elif "ResNet50" in models or "BERT" in models or "Transformer" in models: 69 | # raise NotImplementedError 70 | return True 71 | else: 72 | return False 73 | -------------------------------------------------------------------------------- /workloads/cifar/models/__init__.py: -------------------------------------------------------------------------------- 1 | from .vgg import * 2 | from .dpn import * 3 | from .lenet import * 4 | from .senet import * 5 | from .pnasnet import * 6 | from .densenet import * 7 | from .googlenet import * 8 | from .shufflenet import * 9 | from .shufflenetv2 import * 10 | from .resnet import * 11 | from .resnext import * 12 | from .preact_resnet import * 13 | from .mobilenet import * 14 | from .mobilenetv2 import * 15 | from .efficientnet import * 16 | from .alexnet import * -------------------------------------------------------------------------------- /workloads/cifar/models/alexnet.py: -------------------------------------------------------------------------------- 1 | 2 | import torch.nn as nn 3 | ########################## 4 | ### MODEL 5 | ########################## 6 | NUM_CLASSES = 10 7 | 8 | 9 | class AlexNet(nn.Module): 10 | def __init__(self, num_classes=NUM_CLASSES): 11 | super(AlexNet, self).__init__() 12 | self.features = nn.Sequential( 13 | nn.Conv2d(3, 64, kernel_size=3, stride=2, padding=1), 14 | nn.ReLU(inplace=True), 15 | nn.MaxPool2d(kernel_size=2), 16 | nn.Conv2d(64, 192, kernel_size=3, padding=1), 17 | nn.ReLU(inplace=True), 18 | nn.MaxPool2d(kernel_size=2), 19 | nn.Conv2d(192, 384, kernel_size=3, padding=1), 20 | nn.ReLU(inplace=True), 21 | nn.Conv2d(384, 256, kernel_size=3, padding=1), 22 | nn.ReLU(inplace=True), 23 | nn.Conv2d(256, 256, kernel_size=3, padding=1), 24 | nn.ReLU(inplace=True), 25 | nn.MaxPool2d(kernel_size=2), 26 | ) 27 | self.classifier = nn.Sequential( 28 | nn.Dropout(), 29 | nn.Linear(256 * 2 * 2, 4096), 30 | nn.ReLU(inplace=True), 31 | nn.Dropout(), 32 | nn.Linear(4096, 4096), 33 | nn.ReLU(inplace=True), 34 | nn.Linear(4096, num_classes), 35 | ) 36 | 37 | def forward(self, x): 38 | x = self.features(x) 39 | x = x.view(x.size(0), 256 * 2 * 2) 40 | x = self.classifier(x) 41 | return x -------------------------------------------------------------------------------- /workloads/cifar/models/densenet.py: -------------------------------------------------------------------------------- 1 | '''DenseNet in PyTorch.''' 2 | import math 3 | 4 | import torch 5 | import torch.nn as nn 6 | import torch.nn.functional as F 7 | 8 | 9 | class Bottleneck(nn.Module): 10 | def __init__(self, in_planes, growth_rate): 11 | super(Bottleneck, self).__init__() 12 | self.bn1 = nn.BatchNorm2d(in_planes) 13 | self.conv1 = nn.Conv2d(in_planes, 4*growth_rate, kernel_size=1, bias=False) 14 | self.bn2 = nn.BatchNorm2d(4*growth_rate) 15 | self.conv2 = nn.Conv2d(4*growth_rate, growth_rate, kernel_size=3, padding=1, bias=False) 16 | 17 | def forward(self, x): 18 | out = self.conv1(F.relu(self.bn1(x))) 19 | out = self.conv2(F.relu(self.bn2(out))) 20 | out = torch.cat([out,x], 1) 21 | return out 22 | 23 | 24 | class Transition(nn.Module): 25 | def __init__(self, in_planes, out_planes): 26 | super(Transition, self).__init__() 27 | self.bn = nn.BatchNorm2d(in_planes) 28 | self.conv = nn.Conv2d(in_planes, out_planes, kernel_size=1, bias=False) 29 | 30 | def forward(self, x): 31 | out = self.conv(F.relu(self.bn(x))) 32 | out = F.avg_pool2d(out, 2) 33 | return out 34 | 35 | 36 | class DenseNet(nn.Module): 37 | def __init__(self, block, nblocks, growth_rate=12, reduction=0.5, num_classes=10): 38 | super(DenseNet, self).__init__() 39 | self.growth_rate = growth_rate 40 | 41 | num_planes = 2*growth_rate 42 | self.conv1 = nn.Conv2d(3, num_planes, kernel_size=3, padding=1, bias=False) 43 | 44 | self.dense1 = self._make_dense_layers(block, num_planes, nblocks[0]) 45 | num_planes += nblocks[0]*growth_rate 46 | out_planes = int(math.floor(num_planes*reduction)) 47 | self.trans1 = Transition(num_planes, out_planes) 48 | num_planes = out_planes 49 | 50 | self.dense2 = self._make_dense_layers(block, num_planes, nblocks[1]) 51 | num_planes += nblocks[1]*growth_rate 52 | out_planes = int(math.floor(num_planes*reduction)) 53 | self.trans2 = Transition(num_planes, out_planes) 54 | num_planes = out_planes 55 | 56 | self.dense3 = self._make_dense_layers(block, num_planes, nblocks[2]) 57 | num_planes += nblocks[2]*growth_rate 58 | out_planes = int(math.floor(num_planes*reduction)) 59 | self.trans3 = Transition(num_planes, out_planes) 60 | num_planes = out_planes 61 | 62 | self.dense4 = self._make_dense_layers(block, num_planes, nblocks[3]) 63 | num_planes += nblocks[3]*growth_rate 64 | 65 | self.bn = nn.BatchNorm2d(num_planes) 66 | self.linear = nn.Linear(num_planes, num_classes) 67 | 68 | def _make_dense_layers(self, block, in_planes, nblock): 69 | layers = [] 70 | for i in range(nblock): 71 | layers.append(block(in_planes, self.growth_rate)) 72 | in_planes += self.growth_rate 73 | return nn.Sequential(*layers) 74 | 75 | def forward(self, x): 76 | out = self.conv1(x) 77 | out = self.trans1(self.dense1(out)) 78 | out = self.trans2(self.dense2(out)) 79 | out = self.trans3(self.dense3(out)) 80 | out = self.dense4(out) 81 | out = F.avg_pool2d(F.relu(self.bn(out)), 4) 82 | out = out.view(out.size(0), -1) 83 | out = self.linear(out) 84 | return out 85 | 86 | def DenseNet121(): 87 | return DenseNet(Bottleneck, [6,12,24,16], growth_rate=32) 88 | 89 | def DenseNet169(): 90 | return DenseNet(Bottleneck, [6,12,32,32], growth_rate=32) 91 | 92 | def DenseNet201(): 93 | return DenseNet(Bottleneck, [6,12,48,32], growth_rate=32) 94 | 95 | def DenseNet161(): 96 | return DenseNet(Bottleneck, [6,12,36,24], growth_rate=48) 97 | 98 | def densenet_cifar(): 99 | return DenseNet(Bottleneck, [6,12,24,16], growth_rate=12) 100 | 101 | def test(): 102 | net = densenet_cifar() 103 | x = torch.randn(1,3,32,32) 104 | y = net(x) 105 | print(y) 106 | 107 | # test() 108 | -------------------------------------------------------------------------------- /workloads/cifar/models/dpn.py: -------------------------------------------------------------------------------- 1 | '''Dual Path Networks in PyTorch.''' 2 | import torch 3 | import torch.nn as nn 4 | import torch.nn.functional as F 5 | 6 | 7 | class Bottleneck(nn.Module): 8 | def __init__(self, last_planes, in_planes, out_planes, dense_depth, stride, first_layer): 9 | super(Bottleneck, self).__init__() 10 | self.out_planes = out_planes 11 | self.dense_depth = dense_depth 12 | 13 | self.conv1 = nn.Conv2d(last_planes, in_planes, kernel_size=1, bias=False) 14 | self.bn1 = nn.BatchNorm2d(in_planes) 15 | self.conv2 = nn.Conv2d(in_planes, in_planes, kernel_size=3, stride=stride, padding=1, groups=32, bias=False) 16 | self.bn2 = nn.BatchNorm2d(in_planes) 17 | self.conv3 = nn.Conv2d(in_planes, out_planes+dense_depth, kernel_size=1, bias=False) 18 | self.bn3 = nn.BatchNorm2d(out_planes+dense_depth) 19 | 20 | self.shortcut = nn.Sequential() 21 | if first_layer: 22 | self.shortcut = nn.Sequential( 23 | nn.Conv2d(last_planes, out_planes+dense_depth, kernel_size=1, stride=stride, bias=False), 24 | nn.BatchNorm2d(out_planes+dense_depth) 25 | ) 26 | 27 | def forward(self, x): 28 | out = F.relu(self.bn1(self.conv1(x))) 29 | out = F.relu(self.bn2(self.conv2(out))) 30 | out = self.bn3(self.conv3(out)) 31 | x = self.shortcut(x) 32 | d = self.out_planes 33 | out = torch.cat([x[:,:d,:,:]+out[:,:d,:,:], x[:,d:,:,:], out[:,d:,:,:]], 1) 34 | out = F.relu(out) 35 | return out 36 | 37 | 38 | class DPN(nn.Module): 39 | def __init__(self, cfg): 40 | super(DPN, self).__init__() 41 | in_planes, out_planes = cfg['in_planes'], cfg['out_planes'] 42 | num_blocks, dense_depth = cfg['num_blocks'], cfg['dense_depth'] 43 | 44 | self.conv1 = nn.Conv2d(3, 64, kernel_size=3, stride=1, padding=1, bias=False) 45 | self.bn1 = nn.BatchNorm2d(64) 46 | self.last_planes = 64 47 | self.layer1 = self._make_layer(in_planes[0], out_planes[0], num_blocks[0], dense_depth[0], stride=1) 48 | self.layer2 = self._make_layer(in_planes[1], out_planes[1], num_blocks[1], dense_depth[1], stride=2) 49 | self.layer3 = self._make_layer(in_planes[2], out_planes[2], num_blocks[2], dense_depth[2], stride=2) 50 | self.layer4 = self._make_layer(in_planes[3], out_planes[3], num_blocks[3], dense_depth[3], stride=2) 51 | self.linear = nn.Linear(out_planes[3]+(num_blocks[3]+1)*dense_depth[3], 10) 52 | 53 | def _make_layer(self, in_planes, out_planes, num_blocks, dense_depth, stride): 54 | strides = [stride] + [1]*(num_blocks-1) 55 | layers = [] 56 | for i,stride in enumerate(strides): 57 | layers.append(Bottleneck(self.last_planes, in_planes, out_planes, dense_depth, stride, i==0)) 58 | self.last_planes = out_planes + (i+2) * dense_depth 59 | return nn.Sequential(*layers) 60 | 61 | def forward(self, x): 62 | out = F.relu(self.bn1(self.conv1(x))) 63 | out = self.layer1(out) 64 | out = self.layer2(out) 65 | out = self.layer3(out) 66 | out = self.layer4(out) 67 | out = F.avg_pool2d(out, 4) 68 | out = out.view(out.size(0), -1) 69 | out = self.linear(out) 70 | return out 71 | 72 | 73 | def DPN26(): 74 | cfg = { 75 | 'in_planes': (96,192,384,768), 76 | 'out_planes': (256,512,1024,2048), 77 | 'num_blocks': (2,2,2,2), 78 | 'dense_depth': (16,32,24,128) 79 | } 80 | return DPN(cfg) 81 | 82 | def DPN92(): 83 | cfg = { 84 | 'in_planes': (96,192,384,768), 85 | 'out_planes': (256,512,1024,2048), 86 | 'num_blocks': (3,4,20,3), 87 | 'dense_depth': (16,32,24,128) 88 | } 89 | return DPN(cfg) 90 | 91 | 92 | def test(): 93 | net = DPN92() 94 | x = torch.randn(1,3,32,32) 95 | y = net(x) 96 | print(y) 97 | 98 | # test() 99 | -------------------------------------------------------------------------------- /workloads/cifar/models/efficientnet.py: -------------------------------------------------------------------------------- 1 | '''EfficientNet in PyTorch. 2 | Paper: "EfficientNet: Rethinking Model Scaling for Convolutional Neural Networks". 3 | ''' 4 | import torch 5 | import torch.nn as nn 6 | import torch.nn.functional as F 7 | 8 | 9 | class Block(nn.Module): 10 | '''expand + depthwise + pointwise + squeeze-excitation''' 11 | 12 | def __init__(self, in_planes, out_planes, expansion, stride): 13 | super(Block, self).__init__() 14 | self.stride = stride 15 | 16 | planes = expansion * in_planes 17 | self.conv1 = nn.Conv2d( 18 | in_planes, planes, kernel_size=1, stride=1, padding=0, bias=False) 19 | self.bn1 = nn.BatchNorm2d(planes) 20 | self.conv2 = nn.Conv2d(planes, planes, kernel_size=3, 21 | stride=stride, padding=1, groups=planes, bias=False) 22 | self.bn2 = nn.BatchNorm2d(planes) 23 | self.conv3 = nn.Conv2d( 24 | planes, out_planes, kernel_size=1, stride=1, padding=0, bias=False) 25 | self.bn3 = nn.BatchNorm2d(out_planes) 26 | 27 | self.shortcut = nn.Sequential() 28 | if stride == 1 and in_planes != out_planes: 29 | self.shortcut = nn.Sequential( 30 | nn.Conv2d(in_planes, out_planes, kernel_size=1, 31 | stride=1, padding=0, bias=False), 32 | nn.BatchNorm2d(out_planes), 33 | ) 34 | 35 | # SE layers 36 | self.fc1 = nn.Conv2d(out_planes, out_planes//16, kernel_size=1) 37 | self.fc2 = nn.Conv2d(out_planes//16, out_planes, kernel_size=1) 38 | 39 | def forward(self, x): 40 | out = F.relu(self.bn1(self.conv1(x))) 41 | out = F.relu(self.bn2(self.conv2(out))) 42 | out = self.bn3(self.conv3(out)) 43 | shortcut = self.shortcut(x) if self.stride == 1 else out 44 | # Squeeze-Excitation 45 | w = F.avg_pool2d(out, out.size(2)) 46 | w = F.relu(self.fc1(w)) 47 | w = self.fc2(w).sigmoid() 48 | out = out * w + shortcut 49 | return out 50 | 51 | 52 | class EfficientNet(nn.Module): 53 | def __init__(self, cfg, num_classes=10): 54 | super(EfficientNet, self).__init__() 55 | self.cfg = cfg 56 | self.conv1 = nn.Conv2d(3, 32, kernel_size=3, 57 | stride=1, padding=1, bias=False) 58 | self.bn1 = nn.BatchNorm2d(32) 59 | self.layers = self._make_layers(in_planes=32) 60 | self.linear = nn.Linear(cfg[-1][1], num_classes) 61 | 62 | def _make_layers(self, in_planes): 63 | layers = [] 64 | for expansion, out_planes, num_blocks, stride in self.cfg: 65 | strides = [stride] + [1]*(num_blocks-1) 66 | for stride in strides: 67 | layers.append(Block(in_planes, out_planes, expansion, stride)) 68 | in_planes = out_planes 69 | return nn.Sequential(*layers) 70 | 71 | def forward(self, x): 72 | out = F.relu(self.bn1(self.conv1(x))) 73 | out = self.layers(out) 74 | out = out.view(out.size(0), -1) 75 | out = self.linear(out) 76 | return out 77 | 78 | 79 | def EfficientNetB0(): 80 | # (expansion, out_planes, num_blocks, stride) 81 | cfg = [(1, 16, 1, 2), 82 | (6, 24, 2, 1), 83 | (6, 40, 2, 2), 84 | (6, 80, 3, 2), 85 | (6, 112, 3, 1), 86 | (6, 192, 4, 2), 87 | (6, 320, 1, 2)] 88 | return EfficientNet(cfg) 89 | 90 | 91 | def test(): 92 | net = EfficientNetB0() 93 | x = torch.randn(2, 3, 32, 32) 94 | y = net(x) 95 | print(y.shape) 96 | 97 | # test -------------------------------------------------------------------------------- /workloads/cifar/models/googlenet.py: -------------------------------------------------------------------------------- 1 | '''GoogLeNet with PyTorch.''' 2 | import torch 3 | import torch.nn as nn 4 | import torch.nn.functional as F 5 | 6 | 7 | class Inception(nn.Module): 8 | def __init__(self, in_planes, n1x1, n3x3red, n3x3, n5x5red, n5x5, pool_planes): 9 | super(Inception, self).__init__() 10 | # 1x1 conv branch 11 | self.b1 = nn.Sequential( 12 | nn.Conv2d(in_planes, n1x1, kernel_size=1), 13 | nn.BatchNorm2d(n1x1), 14 | nn.ReLU(True), 15 | ) 16 | 17 | # 1x1 conv -> 3x3 conv branch 18 | self.b2 = nn.Sequential( 19 | nn.Conv2d(in_planes, n3x3red, kernel_size=1), 20 | nn.BatchNorm2d(n3x3red), 21 | nn.ReLU(True), 22 | nn.Conv2d(n3x3red, n3x3, kernel_size=3, padding=1), 23 | nn.BatchNorm2d(n3x3), 24 | nn.ReLU(True), 25 | ) 26 | 27 | # 1x1 conv -> 5x5 conv branch 28 | self.b3 = nn.Sequential( 29 | nn.Conv2d(in_planes, n5x5red, kernel_size=1), 30 | nn.BatchNorm2d(n5x5red), 31 | nn.ReLU(True), 32 | nn.Conv2d(n5x5red, n5x5, kernel_size=3, padding=1), 33 | nn.BatchNorm2d(n5x5), 34 | nn.ReLU(True), 35 | nn.Conv2d(n5x5, n5x5, kernel_size=3, padding=1), 36 | nn.BatchNorm2d(n5x5), 37 | nn.ReLU(True), 38 | ) 39 | 40 | # 3x3 pool -> 1x1 conv branch 41 | self.b4 = nn.Sequential( 42 | nn.MaxPool2d(3, stride=1, padding=1), 43 | nn.Conv2d(in_planes, pool_planes, kernel_size=1), 44 | nn.BatchNorm2d(pool_planes), 45 | nn.ReLU(True), 46 | ) 47 | 48 | def forward(self, x): 49 | y1 = self.b1(x) 50 | y2 = self.b2(x) 51 | y3 = self.b3(x) 52 | y4 = self.b4(x) 53 | return torch.cat([y1,y2,y3,y4], 1) 54 | 55 | 56 | class GoogLeNet(nn.Module): 57 | def __init__(self): 58 | super(GoogLeNet, self).__init__() 59 | self.pre_layers = nn.Sequential( 60 | nn.Conv2d(3, 192, kernel_size=3, padding=1), 61 | nn.BatchNorm2d(192), 62 | nn.ReLU(True), 63 | ) 64 | 65 | self.a3 = Inception(192, 64, 96, 128, 16, 32, 32) 66 | self.b3 = Inception(256, 128, 128, 192, 32, 96, 64) 67 | 68 | self.maxpool = nn.MaxPool2d(3, stride=2, padding=1) 69 | 70 | self.a4 = Inception(480, 192, 96, 208, 16, 48, 64) 71 | self.b4 = Inception(512, 160, 112, 224, 24, 64, 64) 72 | self.c4 = Inception(512, 128, 128, 256, 24, 64, 64) 73 | self.d4 = Inception(512, 112, 144, 288, 32, 64, 64) 74 | self.e4 = Inception(528, 256, 160, 320, 32, 128, 128) 75 | 76 | self.a5 = Inception(832, 256, 160, 320, 32, 128, 128) 77 | self.b5 = Inception(832, 384, 192, 384, 48, 128, 128) 78 | 79 | self.avgpool = nn.AvgPool2d(8, stride=1) 80 | self.linear = nn.Linear(1024, 10) 81 | 82 | def forward(self, x): 83 | out = self.pre_layers(x) 84 | out = self.a3(out) 85 | out = self.b3(out) 86 | out = self.maxpool(out) 87 | out = self.a4(out) 88 | out = self.b4(out) 89 | out = self.c4(out) 90 | out = self.d4(out) 91 | out = self.e4(out) 92 | out = self.maxpool(out) 93 | out = self.a5(out) 94 | out = self.b5(out) 95 | out = self.avgpool(out) 96 | out = out.view(out.size(0), -1) 97 | out = self.linear(out) 98 | return out 99 | 100 | 101 | def test(): 102 | net = GoogLeNet() 103 | x = torch.randn(1,3,32,32) 104 | y = net(x) 105 | print(y.size()) 106 | 107 | # test() 108 | -------------------------------------------------------------------------------- /workloads/cifar/models/lenet.py: -------------------------------------------------------------------------------- 1 | '''LeNet in PyTorch.''' 2 | import torch.nn as nn 3 | import torch.nn.functional as F 4 | 5 | class LeNet(nn.Module): 6 | def __init__(self): 7 | super(LeNet, self).__init__() 8 | self.conv1 = nn.Conv2d(3, 6, 5) 9 | self.conv2 = nn.Conv2d(6, 16, 5) 10 | self.fc1 = nn.Linear(16*5*5, 120) 11 | self.fc2 = nn.Linear(120, 84) 12 | self.fc3 = nn.Linear(84, 10) 13 | 14 | def forward(self, x): 15 | out = F.relu(self.conv1(x)) 16 | out = F.max_pool2d(out, 2) 17 | out = F.relu(self.conv2(out)) 18 | out = F.max_pool2d(out, 2) 19 | out = out.view(out.size(0), -1) 20 | out = F.relu(self.fc1(out)) 21 | out = F.relu(self.fc2(out)) 22 | out = self.fc3(out) 23 | return out 24 | -------------------------------------------------------------------------------- /workloads/cifar/models/mobilenet.py: -------------------------------------------------------------------------------- 1 | '''MobileNet in PyTorch. 2 | 3 | See the paper "MobileNets: Efficient Convolutional Neural Networks for Mobile Vision Applications" 4 | for more details. 5 | ''' 6 | import torch 7 | import torch.nn as nn 8 | import torch.nn.functional as F 9 | 10 | 11 | class Block(nn.Module): 12 | '''Depthwise conv + Pointwise conv''' 13 | def __init__(self, in_planes, out_planes, stride=1): 14 | super(Block, self).__init__() 15 | self.conv1 = nn.Conv2d(in_planes, in_planes, kernel_size=3, stride=stride, padding=1, groups=in_planes, bias=False) 16 | self.bn1 = nn.BatchNorm2d(in_planes) 17 | self.conv2 = nn.Conv2d(in_planes, out_planes, kernel_size=1, stride=1, padding=0, bias=False) 18 | self.bn2 = nn.BatchNorm2d(out_planes) 19 | 20 | def forward(self, x): 21 | out = F.relu(self.bn1(self.conv1(x))) 22 | out = F.relu(self.bn2(self.conv2(out))) 23 | return out 24 | 25 | 26 | class MobileNet(nn.Module): 27 | # (128,2) means conv planes=128, conv stride=2, by default conv stride=1 28 | cfg = [64, (128,2), 128, (256,2), 256, (512,2), 512, 512, 512, 512, 512, (1024,2), 1024] 29 | 30 | def __init__(self, num_classes=10): 31 | super(MobileNet, self).__init__() 32 | self.conv1 = nn.Conv2d(3, 32, kernel_size=3, stride=1, padding=1, bias=False) 33 | self.bn1 = nn.BatchNorm2d(32) 34 | self.layers = self._make_layers(in_planes=32) 35 | self.linear = nn.Linear(1024, num_classes) 36 | 37 | def _make_layers(self, in_planes): 38 | layers = [] 39 | for x in self.cfg: 40 | out_planes = x if isinstance(x, int) else x[0] 41 | stride = 1 if isinstance(x, int) else x[1] 42 | layers.append(Block(in_planes, out_planes, stride)) 43 | in_planes = out_planes 44 | return nn.Sequential(*layers) 45 | 46 | def forward(self, x): 47 | out = F.relu(self.bn1(self.conv1(x))) 48 | out = self.layers(out) 49 | out = F.avg_pool2d(out, 2) 50 | out = out.view(out.size(0), -1) 51 | out = self.linear(out) 52 | return out 53 | 54 | 55 | def test(): 56 | net = MobileNet() 57 | x = torch.randn(1,3,32,32) 58 | y = net(x) 59 | print(y.size()) 60 | 61 | # test() 62 | -------------------------------------------------------------------------------- /workloads/cifar/models/mobilenetv2.py: -------------------------------------------------------------------------------- 1 | '''MobileNetV2 in PyTorch. 2 | 3 | See the paper "Inverted Residuals and Linear Bottlenecks: 4 | Mobile Networks for Classification, Detection and Segmentation" for more details. 5 | ''' 6 | import torch 7 | import torch.nn as nn 8 | import torch.nn.functional as F 9 | 10 | 11 | class Block(nn.Module): 12 | '''expand + depthwise + pointwise''' 13 | def __init__(self, in_planes, out_planes, expansion, stride): 14 | super(Block, self).__init__() 15 | self.stride = stride 16 | 17 | planes = expansion * in_planes 18 | self.conv1 = nn.Conv2d(in_planes, planes, kernel_size=1, stride=1, padding=0, bias=False) 19 | self.bn1 = nn.BatchNorm2d(planes) 20 | self.conv2 = nn.Conv2d(planes, planes, kernel_size=3, stride=stride, padding=1, groups=planes, bias=False) 21 | self.bn2 = nn.BatchNorm2d(planes) 22 | self.conv3 = nn.Conv2d(planes, out_planes, kernel_size=1, stride=1, padding=0, bias=False) 23 | self.bn3 = nn.BatchNorm2d(out_planes) 24 | 25 | self.shortcut = nn.Sequential() 26 | if stride == 1 and in_planes != out_planes: 27 | self.shortcut = nn.Sequential( 28 | nn.Conv2d(in_planes, out_planes, kernel_size=1, stride=1, padding=0, bias=False), 29 | nn.BatchNorm2d(out_planes), 30 | ) 31 | 32 | def forward(self, x): 33 | out = F.relu(self.bn1(self.conv1(x))) 34 | out = F.relu(self.bn2(self.conv2(out))) 35 | out = self.bn3(self.conv3(out)) 36 | out = out + self.shortcut(x) if self.stride==1 else out 37 | return out 38 | 39 | 40 | class MobileNetV2(nn.Module): 41 | # (expansion, out_planes, num_blocks, stride) 42 | cfg = [(1, 16, 1, 1), 43 | (6, 24, 2, 1), # NOTE: change stride 2 -> 1 for CIFAR10 44 | (6, 32, 3, 2), 45 | (6, 64, 4, 2), 46 | (6, 96, 3, 1), 47 | (6, 160, 3, 2), 48 | (6, 320, 1, 1)] 49 | 50 | def __init__(self, num_classes=10): 51 | super(MobileNetV2, self).__init__() 52 | # NOTE: change conv1 stride 2 -> 1 for CIFAR10 53 | self.conv1 = nn.Conv2d(3, 32, kernel_size=3, stride=1, padding=1, bias=False) 54 | self.bn1 = nn.BatchNorm2d(32) 55 | self.layers = self._make_layers(in_planes=32) 56 | self.conv2 = nn.Conv2d(320, 1280, kernel_size=1, stride=1, padding=0, bias=False) 57 | self.bn2 = nn.BatchNorm2d(1280) 58 | self.linear = nn.Linear(1280, num_classes) 59 | 60 | def _make_layers(self, in_planes): 61 | layers = [] 62 | for expansion, out_planes, num_blocks, stride in self.cfg: 63 | strides = [stride] + [1]*(num_blocks-1) 64 | for stride in strides: 65 | layers.append(Block(in_planes, out_planes, expansion, stride)) 66 | in_planes = out_planes 67 | return nn.Sequential(*layers) 68 | 69 | def forward(self, x): 70 | out = F.relu(self.bn1(self.conv1(x))) 71 | out = self.layers(out) 72 | out = F.relu(self.bn2(self.conv2(out))) 73 | # NOTE: change pooling kernel_size 7 -> 4 for CIFAR10 74 | out = F.avg_pool2d(out, 4) 75 | out = out.view(out.size(0), -1) 76 | out = self.linear(out) 77 | return out 78 | 79 | 80 | def test(): 81 | net = MobileNetV2() 82 | x = torch.randn(2,3,32,32) 83 | y = net(x) 84 | print(y.size()) 85 | 86 | # test() 87 | -------------------------------------------------------------------------------- /workloads/cifar/models/pnasnet.py: -------------------------------------------------------------------------------- 1 | '''PNASNet in PyTorch. 2 | 3 | Paper: Progressive Neural Architecture Search 4 | ''' 5 | import torch 6 | import torch.nn as nn 7 | import torch.nn.functional as F 8 | 9 | 10 | class SepConv(nn.Module): 11 | '''Separable Convolution.''' 12 | def __init__(self, in_planes, out_planes, kernel_size, stride): 13 | super(SepConv, self).__init__() 14 | self.conv1 = nn.Conv2d(in_planes, out_planes, 15 | kernel_size, stride, 16 | padding=(kernel_size-1)//2, 17 | bias=False, groups=in_planes) 18 | self.bn1 = nn.BatchNorm2d(out_planes) 19 | 20 | def forward(self, x): 21 | return self.bn1(self.conv1(x)) 22 | 23 | 24 | class CellA(nn.Module): 25 | def __init__(self, in_planes, out_planes, stride=1): 26 | super(CellA, self).__init__() 27 | self.stride = stride 28 | self.sep_conv1 = SepConv(in_planes, out_planes, kernel_size=7, stride=stride) 29 | if stride==2: 30 | self.conv1 = nn.Conv2d(in_planes, out_planes, kernel_size=1, stride=1, padding=0, bias=False) 31 | self.bn1 = nn.BatchNorm2d(out_planes) 32 | 33 | def forward(self, x): 34 | y1 = self.sep_conv1(x) 35 | y2 = F.max_pool2d(x, kernel_size=3, stride=self.stride, padding=1) 36 | if self.stride==2: 37 | y2 = self.bn1(self.conv1(y2)) 38 | return F.relu(y1+y2) 39 | 40 | class CellB(nn.Module): 41 | def __init__(self, in_planes, out_planes, stride=1): 42 | super(CellB, self).__init__() 43 | self.stride = stride 44 | # Left branch 45 | self.sep_conv1 = SepConv(in_planes, out_planes, kernel_size=7, stride=stride) 46 | self.sep_conv2 = SepConv(in_planes, out_planes, kernel_size=3, stride=stride) 47 | # Right branch 48 | self.sep_conv3 = SepConv(in_planes, out_planes, kernel_size=5, stride=stride) 49 | if stride==2: 50 | self.conv1 = nn.Conv2d(in_planes, out_planes, kernel_size=1, stride=1, padding=0, bias=False) 51 | self.bn1 = nn.BatchNorm2d(out_planes) 52 | # Reduce channels 53 | self.conv2 = nn.Conv2d(2*out_planes, out_planes, kernel_size=1, stride=1, padding=0, bias=False) 54 | self.bn2 = nn.BatchNorm2d(out_planes) 55 | 56 | def forward(self, x): 57 | # Left branch 58 | y1 = self.sep_conv1(x) 59 | y2 = self.sep_conv2(x) 60 | # Right branch 61 | y3 = F.max_pool2d(x, kernel_size=3, stride=self.stride, padding=1) 62 | if self.stride==2: 63 | y3 = self.bn1(self.conv1(y3)) 64 | y4 = self.sep_conv3(x) 65 | # Concat & reduce channels 66 | b1 = F.relu(y1+y2) 67 | b2 = F.relu(y3+y4) 68 | y = torch.cat([b1,b2], 1) 69 | return F.relu(self.bn2(self.conv2(y))) 70 | 71 | class PNASNet(nn.Module): 72 | def __init__(self, cell_type, num_cells, num_planes): 73 | super(PNASNet, self).__init__() 74 | self.in_planes = num_planes 75 | self.cell_type = cell_type 76 | 77 | self.conv1 = nn.Conv2d(3, num_planes, kernel_size=3, stride=1, padding=1, bias=False) 78 | self.bn1 = nn.BatchNorm2d(num_planes) 79 | 80 | self.layer1 = self._make_layer(num_planes, num_cells=6) 81 | self.layer2 = self._downsample(num_planes*2) 82 | self.layer3 = self._make_layer(num_planes*2, num_cells=6) 83 | self.layer4 = self._downsample(num_planes*4) 84 | self.layer5 = self._make_layer(num_planes*4, num_cells=6) 85 | 86 | self.linear = nn.Linear(num_planes*4, 10) 87 | 88 | def _make_layer(self, planes, num_cells): 89 | layers = [] 90 | for _ in range(num_cells): 91 | layers.append(self.cell_type(self.in_planes, planes, stride=1)) 92 | self.in_planes = planes 93 | return nn.Sequential(*layers) 94 | 95 | def _downsample(self, planes): 96 | layer = self.cell_type(self.in_planes, planes, stride=2) 97 | self.in_planes = planes 98 | return layer 99 | 100 | def forward(self, x): 101 | out = F.relu(self.bn1(self.conv1(x))) 102 | out = self.layer1(out) 103 | out = self.layer2(out) 104 | out = self.layer3(out) 105 | out = self.layer4(out) 106 | out = self.layer5(out) 107 | out = F.avg_pool2d(out, 8) 108 | out = self.linear(out.view(out.size(0), -1)) 109 | return out 110 | 111 | 112 | def PNASNetA(): 113 | return PNASNet(CellA, num_cells=6, num_planes=44) 114 | 115 | def PNASNetB(): 116 | return PNASNet(CellB, num_cells=6, num_planes=32) 117 | 118 | 119 | def test(): 120 | net = PNASNetB() 121 | x = torch.randn(1,3,32,32) 122 | y = net(x) 123 | print(y) 124 | 125 | # test() 126 | -------------------------------------------------------------------------------- /workloads/cifar/models/preact_resnet.py: -------------------------------------------------------------------------------- 1 | '''Pre-activation ResNet in PyTorch. 2 | 3 | Reference: 4 | [1] Kaiming He, Xiangyu Zhang, Shaoqing Ren, Jian Sun 5 | Identity Mappings in Deep Residual Networks. arXiv:1603.05027 6 | ''' 7 | import torch 8 | import torch.nn as nn 9 | import torch.nn.functional as F 10 | 11 | 12 | class PreActBlock(nn.Module): 13 | '''Pre-activation version of the BasicBlock.''' 14 | expansion = 1 15 | 16 | def __init__(self, in_planes, planes, stride=1): 17 | super(PreActBlock, self).__init__() 18 | self.bn1 = nn.BatchNorm2d(in_planes) 19 | self.conv1 = nn.Conv2d(in_planes, planes, kernel_size=3, stride=stride, padding=1, bias=False) 20 | self.bn2 = nn.BatchNorm2d(planes) 21 | self.conv2 = nn.Conv2d(planes, planes, kernel_size=3, stride=1, padding=1, bias=False) 22 | 23 | if stride != 1 or in_planes != self.expansion*planes: 24 | self.shortcut = nn.Sequential( 25 | nn.Conv2d(in_planes, self.expansion*planes, kernel_size=1, stride=stride, bias=False) 26 | ) 27 | 28 | def forward(self, x): 29 | out = F.relu(self.bn1(x)) 30 | shortcut = self.shortcut(out) if hasattr(self, 'shortcut') else x 31 | out = self.conv1(out) 32 | out = self.conv2(F.relu(self.bn2(out))) 33 | out += shortcut 34 | return out 35 | 36 | 37 | class PreActBottleneck(nn.Module): 38 | '''Pre-activation version of the original Bottleneck module.''' 39 | expansion = 4 40 | 41 | def __init__(self, in_planes, planes, stride=1): 42 | super(PreActBottleneck, self).__init__() 43 | self.bn1 = nn.BatchNorm2d(in_planes) 44 | self.conv1 = nn.Conv2d(in_planes, planes, kernel_size=1, bias=False) 45 | self.bn2 = nn.BatchNorm2d(planes) 46 | self.conv2 = nn.Conv2d(planes, planes, kernel_size=3, stride=stride, padding=1, bias=False) 47 | self.bn3 = nn.BatchNorm2d(planes) 48 | self.conv3 = nn.Conv2d(planes, self.expansion*planes, kernel_size=1, bias=False) 49 | 50 | if stride != 1 or in_planes != self.expansion*planes: 51 | self.shortcut = nn.Sequential( 52 | nn.Conv2d(in_planes, self.expansion*planes, kernel_size=1, stride=stride, bias=False) 53 | ) 54 | 55 | def forward(self, x): 56 | out = F.relu(self.bn1(x)) 57 | shortcut = self.shortcut(out) if hasattr(self, 'shortcut') else x 58 | out = self.conv1(out) 59 | out = self.conv2(F.relu(self.bn2(out))) 60 | out = self.conv3(F.relu(self.bn3(out))) 61 | out += shortcut 62 | return out 63 | 64 | 65 | class PreActResNet(nn.Module): 66 | def __init__(self, block, num_blocks, num_classes=10): 67 | super(PreActResNet, self).__init__() 68 | self.in_planes = 64 69 | 70 | self.conv1 = nn.Conv2d(3, 64, kernel_size=3, stride=1, padding=1, bias=False) 71 | self.layer1 = self._make_layer(block, 64, num_blocks[0], stride=1) 72 | self.layer2 = self._make_layer(block, 128, num_blocks[1], stride=2) 73 | self.layer3 = self._make_layer(block, 256, num_blocks[2], stride=2) 74 | self.layer4 = self._make_layer(block, 512, num_blocks[3], stride=2) 75 | self.linear = nn.Linear(512*block.expansion, num_classes) 76 | 77 | def _make_layer(self, block, planes, num_blocks, stride): 78 | strides = [stride] + [1]*(num_blocks-1) 79 | layers = [] 80 | for stride in strides: 81 | layers.append(block(self.in_planes, planes, stride)) 82 | self.in_planes = planes * block.expansion 83 | return nn.Sequential(*layers) 84 | 85 | def forward(self, x): 86 | out = self.conv1(x) 87 | out = self.layer1(out) 88 | out = self.layer2(out) 89 | out = self.layer3(out) 90 | out = self.layer4(out) 91 | out = F.avg_pool2d(out, 4) 92 | out = out.view(out.size(0), -1) 93 | out = self.linear(out) 94 | return out 95 | 96 | 97 | def PreActResNet18(): 98 | return PreActResNet(PreActBlock, [2,2,2,2]) 99 | 100 | def PreActResNet34(): 101 | return PreActResNet(PreActBlock, [3,4,6,3]) 102 | 103 | def PreActResNet50(): 104 | return PreActResNet(PreActBottleneck, [3,4,6,3]) 105 | 106 | def PreActResNet101(): 107 | return PreActResNet(PreActBottleneck, [3,4,23,3]) 108 | 109 | def PreActResNet152(): 110 | return PreActResNet(PreActBottleneck, [3,8,36,3]) 111 | 112 | 113 | def test(): 114 | net = PreActResNet18() 115 | y = net((torch.randn(1,3,32,32))) 116 | print(y.size()) 117 | 118 | # test() 119 | -------------------------------------------------------------------------------- /workloads/cifar/models/resnet.py: -------------------------------------------------------------------------------- 1 | '''ResNet in PyTorch. 2 | 3 | For Pre-activation ResNet, see 'preact_resnet.py'. 4 | 5 | Reference: 6 | [1] Kaiming He, Xiangyu Zhang, Shaoqing Ren, Jian Sun 7 | Deep Residual Learning for Image Recognition. arXiv:1512.03385 8 | ''' 9 | import torch 10 | import torch.nn as nn 11 | import torch.nn.functional as F 12 | 13 | 14 | class BasicBlock(nn.Module): 15 | expansion = 1 16 | 17 | def __init__(self, in_planes, planes, stride=1): 18 | super(BasicBlock, self).__init__() 19 | self.conv1 = nn.Conv2d(in_planes, planes, kernel_size=3, stride=stride, padding=1, bias=False) 20 | self.bn1 = nn.BatchNorm2d(planes) 21 | self.conv2 = nn.Conv2d(planes, planes, kernel_size=3, stride=1, padding=1, bias=False) 22 | self.bn2 = nn.BatchNorm2d(planes) 23 | 24 | self.shortcut = nn.Sequential() 25 | if stride != 1 or in_planes != self.expansion*planes: 26 | self.shortcut = nn.Sequential( 27 | nn.Conv2d(in_planes, self.expansion*planes, kernel_size=1, stride=stride, bias=False), 28 | nn.BatchNorm2d(self.expansion*planes) 29 | ) 30 | 31 | def forward(self, x): 32 | out = F.relu(self.bn1(self.conv1(x))) 33 | out = self.bn2(self.conv2(out)) 34 | out += self.shortcut(x) 35 | out = F.relu(out) 36 | return out 37 | 38 | 39 | class Bottleneck(nn.Module): 40 | expansion = 4 41 | 42 | def __init__(self, in_planes, planes, stride=1): 43 | super(Bottleneck, self).__init__() 44 | self.conv1 = nn.Conv2d(in_planes, planes, kernel_size=1, bias=False) 45 | self.bn1 = nn.BatchNorm2d(planes) 46 | self.conv2 = nn.Conv2d(planes, planes, kernel_size=3, stride=stride, padding=1, bias=False) 47 | self.bn2 = nn.BatchNorm2d(planes) 48 | self.conv3 = nn.Conv2d(planes, self.expansion*planes, kernel_size=1, bias=False) 49 | self.bn3 = nn.BatchNorm2d(self.expansion*planes) 50 | 51 | self.shortcut = nn.Sequential() 52 | if stride != 1 or in_planes != self.expansion*planes: 53 | self.shortcut = nn.Sequential( 54 | nn.Conv2d(in_planes, self.expansion*planes, kernel_size=1, stride=stride, bias=False), 55 | nn.BatchNorm2d(self.expansion*planes) 56 | ) 57 | 58 | def forward(self, x): 59 | out = F.relu(self.bn1(self.conv1(x))) 60 | out = F.relu(self.bn2(self.conv2(out))) 61 | out = self.bn3(self.conv3(out)) 62 | out += self.shortcut(x) 63 | out = F.relu(out) 64 | return out 65 | 66 | 67 | class ResNet(nn.Module): 68 | def __init__(self, block, num_blocks, num_classes=10): 69 | super(ResNet, self).__init__() 70 | self.in_planes = 64 71 | 72 | self.conv1 = nn.Conv2d(3, 64, kernel_size=3, stride=1, padding=1, bias=False) 73 | self.bn1 = nn.BatchNorm2d(64) 74 | self.layer1 = self._make_layer(block, 64, num_blocks[0], stride=1) 75 | self.layer2 = self._make_layer(block, 128, num_blocks[1], stride=2) 76 | self.layer3 = self._make_layer(block, 256, num_blocks[2], stride=2) 77 | self.layer4 = self._make_layer(block, 512, num_blocks[3], stride=2) 78 | self.linear = nn.Linear(512*block.expansion, num_classes) 79 | 80 | def _make_layer(self, block, planes, num_blocks, stride): 81 | strides = [stride] + [1]*(num_blocks-1) 82 | layers = [] 83 | for stride in strides: 84 | layers.append(block(self.in_planes, planes, stride)) 85 | self.in_planes = planes * block.expansion 86 | return nn.Sequential(*layers) 87 | 88 | def forward(self, x): 89 | out = F.relu(self.bn1(self.conv1(x))) 90 | out = self.layer1(out) 91 | out = self.layer2(out) 92 | out = self.layer3(out) 93 | out = self.layer4(out) 94 | out = F.avg_pool2d(out, 4) 95 | out = out.view(out.size(0), -1) 96 | out = self.linear(out) 97 | return out 98 | 99 | 100 | def ResNet18(): 101 | return ResNet(BasicBlock, [2,2,2,2]) 102 | 103 | def ResNet34(): 104 | return ResNet(BasicBlock, [3,4,6,3]) 105 | 106 | def ResNet50(): 107 | return ResNet(Bottleneck, [3,4,6,3]) 108 | 109 | def ResNet101(): 110 | return ResNet(Bottleneck, [3,4,23,3]) 111 | 112 | def ResNet152(): 113 | return ResNet(Bottleneck, [3,8,36,3]) 114 | 115 | 116 | def test(): 117 | net = ResNet18() 118 | y = net(torch.randn(1,3,32,32)) 119 | print(y.size()) 120 | 121 | # test() 122 | -------------------------------------------------------------------------------- /workloads/cifar/models/resnext.py: -------------------------------------------------------------------------------- 1 | '''ResNeXt in PyTorch. 2 | 3 | See the paper "Aggregated Residual Transformations for Deep Neural Networks" for more details. 4 | ''' 5 | import torch 6 | import torch.nn as nn 7 | import torch.nn.functional as F 8 | 9 | 10 | class Block(nn.Module): 11 | '''Grouped convolution block.''' 12 | expansion = 2 13 | 14 | def __init__(self, in_planes, cardinality=32, bottleneck_width=4, stride=1): 15 | super(Block, self).__init__() 16 | group_width = cardinality * bottleneck_width 17 | self.conv1 = nn.Conv2d(in_planes, group_width, kernel_size=1, bias=False) 18 | self.bn1 = nn.BatchNorm2d(group_width) 19 | self.conv2 = nn.Conv2d(group_width, group_width, kernel_size=3, stride=stride, padding=1, groups=cardinality, bias=False) 20 | self.bn2 = nn.BatchNorm2d(group_width) 21 | self.conv3 = nn.Conv2d(group_width, self.expansion*group_width, kernel_size=1, bias=False) 22 | self.bn3 = nn.BatchNorm2d(self.expansion*group_width) 23 | 24 | self.shortcut = nn.Sequential() 25 | if stride != 1 or in_planes != self.expansion*group_width: 26 | self.shortcut = nn.Sequential( 27 | nn.Conv2d(in_planes, self.expansion*group_width, kernel_size=1, stride=stride, bias=False), 28 | nn.BatchNorm2d(self.expansion*group_width) 29 | ) 30 | 31 | def forward(self, x): 32 | out = F.relu(self.bn1(self.conv1(x))) 33 | out = F.relu(self.bn2(self.conv2(out))) 34 | out = self.bn3(self.conv3(out)) 35 | out += self.shortcut(x) 36 | out = F.relu(out) 37 | return out 38 | 39 | 40 | class ResNeXt(nn.Module): 41 | def __init__(self, num_blocks, cardinality, bottleneck_width, num_classes=10): 42 | super(ResNeXt, self).__init__() 43 | self.cardinality = cardinality 44 | self.bottleneck_width = bottleneck_width 45 | self.in_planes = 64 46 | 47 | self.conv1 = nn.Conv2d(3, 64, kernel_size=1, bias=False) 48 | self.bn1 = nn.BatchNorm2d(64) 49 | self.layer1 = self._make_layer(num_blocks[0], 1) 50 | self.layer2 = self._make_layer(num_blocks[1], 2) 51 | self.layer3 = self._make_layer(num_blocks[2], 2) 52 | # self.layer4 = self._make_layer(num_blocks[3], 2) 53 | self.linear = nn.Linear(cardinality*bottleneck_width*8, num_classes) 54 | 55 | def _make_layer(self, num_blocks, stride): 56 | strides = [stride] + [1]*(num_blocks-1) 57 | layers = [] 58 | for stride in strides: 59 | layers.append(Block(self.in_planes, self.cardinality, self.bottleneck_width, stride)) 60 | self.in_planes = Block.expansion * self.cardinality * self.bottleneck_width 61 | # Increase bottleneck_width by 2 after each stage. 62 | self.bottleneck_width *= 2 63 | return nn.Sequential(*layers) 64 | 65 | def forward(self, x): 66 | out = F.relu(self.bn1(self.conv1(x))) 67 | out = self.layer1(out) 68 | out = self.layer2(out) 69 | out = self.layer3(out) 70 | # out = self.layer4(out) 71 | out = F.avg_pool2d(out, 8) 72 | out = out.view(out.size(0), -1) 73 | out = self.linear(out) 74 | return out 75 | 76 | 77 | def ResNeXt29_2x64d(): 78 | return ResNeXt(num_blocks=[3,3,3], cardinality=2, bottleneck_width=64) 79 | 80 | def ResNeXt29_4x64d(): 81 | return ResNeXt(num_blocks=[3,3,3], cardinality=4, bottleneck_width=64) 82 | 83 | def ResNeXt29_8x64d(): 84 | return ResNeXt(num_blocks=[3,3,3], cardinality=8, bottleneck_width=64) 85 | 86 | def ResNeXt29_32x4d(): 87 | return ResNeXt(num_blocks=[3,3,3], cardinality=32, bottleneck_width=4) 88 | 89 | def test_resnext(): 90 | net = ResNeXt29_2x64d() 91 | x = torch.randn(1,3,32,32) 92 | y = net(x) 93 | print(y.size()) 94 | 95 | # test_resnext() 96 | -------------------------------------------------------------------------------- /workloads/cifar/models/senet.py: -------------------------------------------------------------------------------- 1 | '''SENet in PyTorch. 2 | 3 | SENet is the winner of ImageNet-2017. The paper is not released yet. 4 | ''' 5 | import torch 6 | import torch.nn as nn 7 | import torch.nn.functional as F 8 | 9 | 10 | class BasicBlock(nn.Module): 11 | def __init__(self, in_planes, planes, stride=1): 12 | super(BasicBlock, self).__init__() 13 | self.conv1 = nn.Conv2d(in_planes, planes, kernel_size=3, stride=stride, padding=1, bias=False) 14 | self.bn1 = nn.BatchNorm2d(planes) 15 | self.conv2 = nn.Conv2d(planes, planes, kernel_size=3, stride=1, padding=1, bias=False) 16 | self.bn2 = nn.BatchNorm2d(planes) 17 | 18 | self.shortcut = nn.Sequential() 19 | if stride != 1 or in_planes != planes: 20 | self.shortcut = nn.Sequential( 21 | nn.Conv2d(in_planes, planes, kernel_size=1, stride=stride, bias=False), 22 | nn.BatchNorm2d(planes) 23 | ) 24 | 25 | # SE layers 26 | self.fc1 = nn.Conv2d(planes, planes//16, kernel_size=1) # Use nn.Conv2d instead of nn.Linear 27 | self.fc2 = nn.Conv2d(planes//16, planes, kernel_size=1) 28 | 29 | def forward(self, x): 30 | out = F.relu(self.bn1(self.conv1(x))) 31 | out = self.bn2(self.conv2(out)) 32 | 33 | # Squeeze 34 | w = F.avg_pool2d(out, out.size(2)) 35 | w = F.relu(self.fc1(w)) 36 | w = F.sigmoid(self.fc2(w)) 37 | # Excitation 38 | out = out * w # New broadcasting feature from v0.2! 39 | 40 | out += self.shortcut(x) 41 | out = F.relu(out) 42 | return out 43 | 44 | 45 | class PreActBlock(nn.Module): 46 | def __init__(self, in_planes, planes, stride=1): 47 | super(PreActBlock, self).__init__() 48 | self.bn1 = nn.BatchNorm2d(in_planes) 49 | self.conv1 = nn.Conv2d(in_planes, planes, kernel_size=3, stride=stride, padding=1, bias=False) 50 | self.bn2 = nn.BatchNorm2d(planes) 51 | self.conv2 = nn.Conv2d(planes, planes, kernel_size=3, stride=1, padding=1, bias=False) 52 | 53 | if stride != 1 or in_planes != planes: 54 | self.shortcut = nn.Sequential( 55 | nn.Conv2d(in_planes, planes, kernel_size=1, stride=stride, bias=False) 56 | ) 57 | 58 | # SE layers 59 | self.fc1 = nn.Conv2d(planes, planes//16, kernel_size=1) 60 | self.fc2 = nn.Conv2d(planes//16, planes, kernel_size=1) 61 | 62 | def forward(self, x): 63 | out = F.relu(self.bn1(x)) 64 | shortcut = self.shortcut(out) if hasattr(self, 'shortcut') else x 65 | out = self.conv1(out) 66 | out = self.conv2(F.relu(self.bn2(out))) 67 | 68 | # Squeeze 69 | w = F.avg_pool2d(out, out.size(2)) 70 | w = F.relu(self.fc1(w)) 71 | w = F.sigmoid(self.fc2(w)) 72 | # Excitation 73 | out = out * w 74 | 75 | out += shortcut 76 | return out 77 | 78 | 79 | class SENet(nn.Module): 80 | def __init__(self, block, num_blocks, num_classes=10): 81 | super(SENet, self).__init__() 82 | self.in_planes = 64 83 | 84 | self.conv1 = nn.Conv2d(3, 64, kernel_size=3, stride=1, padding=1, bias=False) 85 | self.bn1 = nn.BatchNorm2d(64) 86 | self.layer1 = self._make_layer(block, 64, num_blocks[0], stride=1) 87 | self.layer2 = self._make_layer(block, 128, num_blocks[1], stride=2) 88 | self.layer3 = self._make_layer(block, 256, num_blocks[2], stride=2) 89 | self.layer4 = self._make_layer(block, 512, num_blocks[3], stride=2) 90 | self.linear = nn.Linear(512, num_classes) 91 | 92 | def _make_layer(self, block, planes, num_blocks, stride): 93 | strides = [stride] + [1]*(num_blocks-1) 94 | layers = [] 95 | for stride in strides: 96 | layers.append(block(self.in_planes, planes, stride)) 97 | self.in_planes = planes 98 | return nn.Sequential(*layers) 99 | 100 | def forward(self, x): 101 | out = F.relu(self.bn1(self.conv1(x))) 102 | out = self.layer1(out) 103 | out = self.layer2(out) 104 | out = self.layer3(out) 105 | out = self.layer4(out) 106 | out = F.avg_pool2d(out, 4) 107 | out = out.view(out.size(0), -1) 108 | out = self.linear(out) 109 | return out 110 | 111 | 112 | def SENet18(): 113 | return SENet(PreActBlock, [2,2,2,2]) 114 | 115 | 116 | def test(): 117 | net = SENet18() 118 | y = net(torch.randn(1,3,32,32)) 119 | print(y.size()) 120 | 121 | # test() 122 | -------------------------------------------------------------------------------- /workloads/cifar/models/shufflenet.py: -------------------------------------------------------------------------------- 1 | '''ShuffleNet in PyTorch. 2 | 3 | See the paper "ShuffleNet: An Extremely Efficient Convolutional Neural Network for Mobile Devices" for more details. 4 | ''' 5 | import torch 6 | import torch.nn as nn 7 | import torch.nn.functional as F 8 | 9 | 10 | class ShuffleBlock(nn.Module): 11 | def __init__(self, groups): 12 | super(ShuffleBlock, self).__init__() 13 | self.groups = groups 14 | 15 | def forward(self, x): 16 | '''Channel shuffle: [N,C,H,W] -> [N,g,C/g,H,W] -> [N,C/g,g,H,w] -> [N,C,H,W]''' 17 | N,C,H,W = x.size() 18 | g = self.groups 19 | return x.view(N,g,C/g,H,W).permute(0,2,1,3,4).contiguous().view(N,C,H,W) 20 | 21 | 22 | class Bottleneck(nn.Module): 23 | def __init__(self, in_planes, out_planes, stride, groups): 24 | super(Bottleneck, self).__init__() 25 | self.stride = stride 26 | 27 | mid_planes = out_planes/4 28 | g = 1 if in_planes==24 else groups 29 | self.conv1 = nn.Conv2d(in_planes, mid_planes, kernel_size=1, groups=g, bias=False) 30 | self.bn1 = nn.BatchNorm2d(mid_planes) 31 | self.shuffle1 = ShuffleBlock(groups=g) 32 | self.conv2 = nn.Conv2d(mid_planes, mid_planes, kernel_size=3, stride=stride, padding=1, groups=mid_planes, bias=False) 33 | self.bn2 = nn.BatchNorm2d(mid_planes) 34 | self.conv3 = nn.Conv2d(mid_planes, out_planes, kernel_size=1, groups=groups, bias=False) 35 | self.bn3 = nn.BatchNorm2d(out_planes) 36 | 37 | self.shortcut = nn.Sequential() 38 | if stride == 2: 39 | self.shortcut = nn.Sequential(nn.AvgPool2d(3, stride=2, padding=1)) 40 | 41 | def forward(self, x): 42 | out = F.relu(self.bn1(self.conv1(x))) 43 | out = self.shuffle1(out) 44 | out = F.relu(self.bn2(self.conv2(out))) 45 | out = self.bn3(self.conv3(out)) 46 | res = self.shortcut(x) 47 | out = F.relu(torch.cat([out,res], 1)) if self.stride==2 else F.relu(out+res) 48 | return out 49 | 50 | 51 | class ShuffleNet(nn.Module): 52 | def __init__(self, cfg): 53 | super(ShuffleNet, self).__init__() 54 | out_planes = cfg['out_planes'] 55 | num_blocks = cfg['num_blocks'] 56 | groups = cfg['groups'] 57 | 58 | self.conv1 = nn.Conv2d(3, 24, kernel_size=1, bias=False) 59 | self.bn1 = nn.BatchNorm2d(24) 60 | self.in_planes = 24 61 | self.layer1 = self._make_layer(out_planes[0], num_blocks[0], groups) 62 | self.layer2 = self._make_layer(out_planes[1], num_blocks[1], groups) 63 | self.layer3 = self._make_layer(out_planes[2], num_blocks[2], groups) 64 | self.linear = nn.Linear(out_planes[2], 10) 65 | 66 | def _make_layer(self, out_planes, num_blocks, groups): 67 | layers = [] 68 | for i in range(num_blocks): 69 | stride = 2 if i == 0 else 1 70 | cat_planes = self.in_planes if i == 0 else 0 71 | layers.append(Bottleneck(self.in_planes, out_planes-cat_planes, stride=stride, groups=groups)) 72 | self.in_planes = out_planes 73 | return nn.Sequential(*layers) 74 | 75 | def forward(self, x): 76 | out = F.relu(self.bn1(self.conv1(x))) 77 | out = self.layer1(out) 78 | out = self.layer2(out) 79 | out = self.layer3(out) 80 | out = F.avg_pool2d(out, 4) 81 | out = out.view(out.size(0), -1) 82 | out = self.linear(out) 83 | return out 84 | 85 | 86 | def ShuffleNetG2(): 87 | cfg = { 88 | 'out_planes': [200,400,800], 89 | 'num_blocks': [4,8,4], 90 | 'groups': 2 91 | } 92 | return ShuffleNet(cfg) 93 | 94 | def ShuffleNetG3(): 95 | cfg = { 96 | 'out_planes': [240,480,960], 97 | 'num_blocks': [4,8,4], 98 | 'groups': 3 99 | } 100 | return ShuffleNet(cfg) 101 | 102 | 103 | def test(): 104 | net = ShuffleNetG2() 105 | x = torch.randn(1,3,32,32) 106 | y = net(x) 107 | print(y) 108 | 109 | # test() 110 | -------------------------------------------------------------------------------- /workloads/cifar/models/vgg.py: -------------------------------------------------------------------------------- 1 | '''VGG11/13/16/19 in Pytorch.''' 2 | import torch 3 | import torch.nn as nn 4 | 5 | 6 | cfg = { 7 | 'VGG11': [64, 'M', 128, 'M', 256, 256, 'M', 512, 512, 'M', 512, 512, 'M'], 8 | 'VGG13': [64, 64, 'M', 128, 128, 'M', 256, 256, 'M', 512, 512, 'M', 512, 512, 'M'], 9 | 'VGG16': [64, 64, 'M', 128, 128, 'M', 256, 256, 256, 'M', 512, 512, 512, 'M', 512, 512, 512, 'M'], 10 | 'VGG19': [64, 64, 'M', 128, 128, 'M', 256, 256, 256, 256, 'M', 512, 512, 512, 512, 'M', 512, 512, 512, 512, 'M'], 11 | } 12 | 13 | 14 | class VGG(nn.Module): 15 | def __init__(self, vgg_name): 16 | super(VGG, self).__init__() 17 | self.features = self._make_layers(cfg[vgg_name]) 18 | self.classifier = nn.Linear(512, 10) 19 | 20 | def forward(self, x): 21 | out = self.features(x) 22 | out = out.view(out.size(0), -1) 23 | out = self.classifier(out) 24 | return out 25 | 26 | def _make_layers(self, cfg): 27 | layers = [] 28 | in_channels = 3 29 | for x in cfg: 30 | if x == 'M': 31 | layers += [nn.MaxPool2d(kernel_size=2, stride=2)] 32 | else: 33 | layers += [nn.Conv2d(in_channels, x, kernel_size=3, padding=1), 34 | nn.BatchNorm2d(x), 35 | nn.ReLU(inplace=True)] 36 | in_channels = x 37 | layers += [nn.AvgPool2d(kernel_size=1, stride=1)] 38 | return nn.Sequential(*layers) 39 | 40 | 41 | def test(): 42 | net = VGG('VGG11') 43 | x = torch.randn(2,3,32,32) 44 | y = net(x) 45 | print(y.size()) 46 | 47 | # test() 48 | -------------------------------------------------------------------------------- /workloads/cifar/profile_cifar.py: -------------------------------------------------------------------------------- 1 | from __future__ import print_function 2 | import argparse 3 | import timeit 4 | import torch.backends.cudnn as cudnn 5 | import torch.nn as nn 6 | import torch.nn.functional as F 7 | import torch.optim as optim 8 | import torch.utils.data.distributed 9 | import numpy as np 10 | import os 11 | import torchvision 12 | import time 13 | from torch.nn import DataParallel 14 | from torchvision import transforms 15 | 16 | from models import * 17 | import workloads.settings as settings 18 | 19 | 20 | # Benchmark settings 21 | parser = argparse.ArgumentParser( 22 | description="PyTorch DP Synthetic Benchmark", formatter_class=argparse.ArgumentDefaultsHelpFormatter 23 | ) 24 | parser.add_argument("--num-gpus", type=int, default=1, help="number of gpus") 25 | parser.add_argument("--gpu", default=1, type=int, help="GPU id to use. Only work when use single gpu.") 26 | parser.add_argument( 27 | "--num-warmup-batches", type=int, default=1, help='number of warm-up batches that don"t count towards benchmark' 28 | ) 29 | parser.add_argument("--num-batches-per-iter", type=int, default=1, help="number of batches per benchmark iteration") 30 | parser.add_argument("--num-iters", type=int, default=1, help="number of benchmark iterations") 31 | parser.add_argument("--amp-fp16", action="store_true", default=False, help="Enables FP16 training with Apex.") 32 | parser.add_argument('--warmup_epoch', type=int, default=10, help='number of warmup epochs') 33 | parser.add_argument('--benchmark_epoch', type=int, default=50, help='number of training benchmark epochs') 34 | parser.add_argument('--data_dir', type=str, default="~/data/", help='Data directory') 35 | parser.add_argument('--total_time', type=int, default=30, help='Total time to run the code') 36 | 37 | args = parser.parse_args() 38 | 39 | # args.data_dir = settings.data_dir 40 | # args.total_time = settings.total_time 41 | 42 | def benchmark_cifar(model_name, batch_size, mixed_precision, gpu_id, bench_list, warm_signal): 43 | t_start = time.time() 44 | if len(gpu_id) == 1: 45 | os.environ["CUDA_VISIBLE_DEVICES"] = f"{gpu_id[0]}" 46 | else: 47 | os.environ["CUDA_VISIBLE_DEVICES"] = ",".join(str(i) for i in gpu_id) 48 | 49 | cudnn.benchmark = True 50 | 51 | device = 'cuda' if torch.cuda.is_available() else 'cpu' 52 | 53 | # Model 54 | # print('==> Building model..') 55 | if model_name == 'VGG': 56 | model = VGG('VGG11') 57 | elif model_name == 'ShuffleNetV2': 58 | model = ShuffleNetV2(net_size=0.5) 59 | else: 60 | model = eval(model_name)() 61 | model = model.to(device) 62 | 63 | criterion = nn.CrossEntropyLoss() 64 | optimizer = optim.SGD(model.parameters(), lr=0.01, momentum=0.9, weight_decay=5e-4) 65 | 66 | if mixed_precision: 67 | scaler = torch.cuda.amp.GradScaler(enabled=True) 68 | else: 69 | scaler = None 70 | 71 | # specify dataset 72 | ###### dataloader 73 | # print('==> Preparing data..') 74 | transform_train = transforms.Compose([ 75 | transforms.RandomCrop(32, padding=4), 76 | transforms.RandomHorizontalFlip(), 77 | transforms.ToTensor(), 78 | transforms.Normalize((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010)), 79 | ]) 80 | 81 | trainset = torchvision.datasets.CIFAR10(root=args.data_dir, train=True, download=True, transform=transform_train) 82 | trainloader = torch.utils.data.DataLoader(trainset, batch_size, shuffle=True, num_workers=2) 83 | # data, target = next(iter(trainloader)) 84 | # data, target = data.cuda(), target.cuda() 85 | 86 | if len(gpu_id) > 1: 87 | model = DataParallel(model) 88 | 89 | # Train 90 | def benchmark_step(): 91 | iter_num = 0 92 | exit_flag = False 93 | model.train() 94 | # Prevent total batch number < warmup+benchmark situation 95 | while True: 96 | for inputs, targets in trainloader: 97 | # Warm-up: previous 10 iters 98 | if iter_num == args.warmup_epoch-1: 99 | warm_signal.value = 1 100 | t_warmend = time.time() 101 | # Reach timeout: exit profiling 102 | if time.time() - t_start >= args.total_time: 103 | t_end = time.time() 104 | t_pass = t_end - t_warmend 105 | exit_flag = True 106 | break 107 | optimizer.zero_grad() 108 | if mixed_precision: 109 | inputs, targets = inputs.to(device), targets.to(device) 110 | with torch.cuda.amp.autocast(): 111 | outputs = model(inputs) 112 | loss = criterion(outputs, targets) 113 | scaler.scale(loss).backward() 114 | scaler.step(optimizer) 115 | scaler.update() 116 | else: 117 | inputs, targets = inputs.to(device), targets.to(device) 118 | outputs = model(inputs) 119 | loss = criterion(outputs, targets) 120 | loss.backward() 121 | optimizer.step() 122 | iter_num += 1 123 | if exit_flag: 124 | break 125 | return t_pass, iter_num 126 | 127 | print(f'==> Training {model_name} model with {batch_size} batchsize, {mixed_precision} mp..') 128 | t_pass, iter_num = benchmark_step() 129 | img_sec = (iter_num - args.warmup_epoch) * batch_size / t_pass 130 | print(img_sec) 131 | 132 | # Results 133 | bench_list.append(img_sec) 134 | 135 | if __name__ == "__main__": 136 | # since this example shows a single process per GPU, the number of processes is simply replaced with the 137 | # number of GPUs available for training. 138 | model_name = 'EfficientNetB0' 139 | batch_size = 64 140 | mixed_precision = 0 141 | gpu_id = [0,1,2,3] 142 | benchmark_cifar(model_name, batch_size, mixed_precision, gpu_id, bench_list, warm_signal) -------------------------------------------------------------------------------- /workloads/cifar/run.sh: -------------------------------------------------------------------------------- 1 | max=10 2 | 3 | for (( i=1; i <= $max; ++i )) 4 | do 5 | python profile_cifar_ddp.py 6 | done 7 | -------------------------------------------------------------------------------- /workloads/dcgan/download.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | from __future__ import print_function, division 4 | import argparse 5 | from os.path import join 6 | 7 | import subprocess 8 | from urllib.request import Request, urlopen 9 | 10 | __author__ = 'Fisher Yu' 11 | __email__ = 'fy@cs.princeton.edu' 12 | __license__ = 'MIT' 13 | 14 | 15 | def list_categories(): 16 | url = 'http://dl.yf.io/lsun/categories.txt' 17 | with urlopen(Request(url)) as response: 18 | return response.read().decode().strip().split('\n') 19 | 20 | 21 | def download(out_dir, category, set_name): 22 | url = 'http://dl.yf.io/lsun/scenes/{category}_' \ 23 | '{set_name}_lmdb.zip'.format(**locals()) 24 | if set_name == 'test': 25 | out_name = 'test_lmdb.zip' 26 | url = 'http://dl.yf.io/lsun/scenes/{set_name}_lmdb.zip' 27 | else: 28 | out_name = '{category}_{set_name}_lmdb.zip'.format(**locals()) 29 | out_path = join(out_dir, out_name) 30 | cmd = ['curl', url, '-o', out_path] 31 | print('Downloading', category, set_name, 'set') 32 | subprocess.call(cmd) 33 | 34 | 35 | def main(): 36 | parser = argparse.ArgumentParser() 37 | parser.add_argument('-o', '--out_dir', default='') 38 | parser.add_argument('-c', '--category', default=None) 39 | args = parser.parse_args() 40 | 41 | categories = list_categories() 42 | if args.category is None: 43 | print('Downloading', len(categories), 'categories') 44 | for category in categories: 45 | download(args.out_dir, category, 'train') 46 | download(args.out_dir, category, 'val') 47 | download(args.out_dir, '', 'test') 48 | else: 49 | if args.category == 'test': 50 | download(args.out_dir, '', 'test') 51 | elif args.category not in categories: 52 | print('Error:', args.category, "doesn't exist in", 'LSUN release') 53 | else: 54 | download(args.out_dir, args.category, 'train') 55 | download(args.out_dir, args.category, 'val') 56 | 57 | 58 | if __name__ == '__main__': 59 | main() -------------------------------------------------------------------------------- /workloads/deepspeech2/data/__init__.py: -------------------------------------------------------------------------------- 1 | from . import data_loader 2 | -------------------------------------------------------------------------------- /workloads/deepspeech2/data/an4.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | import os 3 | import io 4 | import shutil 5 | import tarfile 6 | import wget 7 | 8 | from utils import create_manifest 9 | 10 | parser = argparse.ArgumentParser(description='Processes and downloads an4.') 11 | parser.add_argument('--target-dir', default='an4_dataset/', help='Path to save dataset') 12 | parser.add_argument('--min-duration', default=1, type=int, 13 | help='Prunes training samples shorter than the min duration (given in seconds, default 1)') 14 | parser.add_argument('--max-duration', default=15, type=int, 15 | help='Prunes training samples longer than the max duration (given in seconds, default 15)') 16 | args = parser.parse_args() 17 | 18 | 19 | def _format_data(root_path, data_tag, name, wav_folder): 20 | data_path = args.target_dir + data_tag + '/' + name + '/' 21 | new_transcript_path = data_path + '/txt/' 22 | new_wav_path = data_path + '/wav/' 23 | 24 | os.makedirs(new_transcript_path) 25 | os.makedirs(new_wav_path) 26 | 27 | wav_path = root_path + 'wav/' 28 | file_ids = root_path + 'etc/an4_%s.fileids' % data_tag 29 | transcripts = root_path + 'etc/an4_%s.transcription' % data_tag 30 | train_path = wav_path + wav_folder 31 | 32 | _convert_audio_to_wav(train_path) 33 | _format_files(file_ids, new_transcript_path, new_wav_path, transcripts, wav_path) 34 | 35 | 36 | def _convert_audio_to_wav(train_path): 37 | with os.popen('find %s -type f -name "*.raw"' % train_path) as pipe: 38 | for line in pipe: 39 | raw_path = line.strip() 40 | new_path = line.replace('.raw', '.wav').strip() 41 | cmd = 'sox -t raw -r %d -b 16 -e signed-integer -B -c 1 \"%s\" \"%s\"' % ( 42 | 16000, raw_path, new_path) 43 | os.system(cmd) 44 | 45 | 46 | def _format_files(file_ids, new_transcript_path, new_wav_path, transcripts, wav_path): 47 | with open(file_ids, 'r') as f: 48 | with open(transcripts, 'r') as t: 49 | paths = f.readlines() 50 | transcripts = t.readlines() 51 | for x in range(len(paths)): 52 | path = wav_path + paths[x].strip() + '.wav' 53 | filename = path.split('/')[-1] 54 | extracted_transcript = _process_transcript(transcripts, x) 55 | current_path = os.path.abspath(path) 56 | new_path = new_wav_path + filename 57 | text_path = new_transcript_path + filename.replace('.wav', '.txt') 58 | with io.FileIO(text_path, "w") as file: 59 | file.write(extracted_transcript.encode('utf-8')) 60 | os.rename(current_path, new_path) 61 | 62 | 63 | def _process_transcript(transcripts, x): 64 | extracted_transcript = transcripts[x].split('(')[0].strip("").split('<')[0].strip().upper() 65 | return extracted_transcript 66 | 67 | 68 | def main(): 69 | root_path = 'an4/' 70 | name = 'an4' 71 | wget.download('http://www.speech.cs.cmu.edu/databases/an4/an4_raw.bigendian.tar.gz') 72 | tar = tarfile.open('an4_raw.bigendian.tar.gz') 73 | tar.extractall() 74 | os.makedirs(args.target_dir) 75 | _format_data(root_path, 'train', name, 'an4_clstk') 76 | _format_data(root_path, 'test', name, 'an4test_clstk') 77 | shutil.rmtree(root_path) 78 | os.remove('an4_raw.bigendian.tar.gz') 79 | train_path = args.target_dir + '/train/' 80 | test_path = args.target_dir + '/test/' 81 | print ('\n', 'Creating manifests...') 82 | create_manifest(train_path, 'an4_train_manifest.csv', args.min_duration, args.max_duration) 83 | create_manifest(test_path, 'an4_val_manifest.csv') 84 | 85 | 86 | if __name__ == '__main__': 87 | main() 88 | -------------------------------------------------------------------------------- /workloads/deepspeech2/data/cmu-arctic-manifests.tar.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/S-Lab-System-Group/Lucid/63685a3ab7d15d8e940bb47ef98b6d5cca472b13/workloads/deepspeech2/data/cmu-arctic-manifests.tar.gz -------------------------------------------------------------------------------- /workloads/deepspeech2/data/common_voice.py: -------------------------------------------------------------------------------- 1 | import os 2 | import wget 3 | import tarfile 4 | import argparse 5 | import csv 6 | from multiprocessing.pool import ThreadPool 7 | import subprocess 8 | from utils import create_manifest 9 | 10 | parser = argparse.ArgumentParser(description='Downloads and processes Mozilla Common Voice dataset.') 11 | parser.add_argument("--target-dir", default='CommonVoice_dataset/', type=str, help="Directory to store the dataset.") 12 | parser.add_argument("--tar-path", type=str, help="Path to the Common Voice *.tar file if downloaded (Optional).") 13 | parser.add_argument('--sample-rate', default=16000, type=int, help='Sample rate') 14 | parser.add_argument('--min-duration', default=1, type=int, 15 | help='Prunes training samples shorter than the min duration (given in seconds, default 1)') 16 | parser.add_argument('--max-duration', default=15, type=int, 17 | help='Prunes training samples longer than the max duration (given in seconds, default 15)') 18 | parser.add_argument('--files-to-process', default="cv-valid-dev.csv,cv-valid-test.csv,cv-valid-train.csv", 19 | type=str, help='list of *.csv file names to process') 20 | args = parser.parse_args() 21 | COMMON_VOICE_URL = "https://common-voice-data-download.s3.amazonaws.com/cv_corpus_v1.tar.gz" 22 | 23 | 24 | def convert_to_wav(csv_file, target_dir): 25 | """ Read *.csv file description, convert mp3 to wav, process text. 26 | Save results to target_dir. 27 | 28 | Args: 29 | csv_file: str, path to *.csv file with data description, usually start from 'cv-' 30 | target_dir: str, path to dir to save results; wav/ and txt/ dirs will be created 31 | """ 32 | wav_dir = os.path.join(target_dir, 'wav/') 33 | txt_dir = os.path.join(target_dir, 'txt/') 34 | os.makedirs(wav_dir, exist_ok=True) 35 | os.makedirs(txt_dir, exist_ok=True) 36 | path_to_data = os.path.dirname(csv_file) 37 | 38 | def process(x): 39 | file_path, text = x 40 | file_name = os.path.splitext(os.path.basename(file_path))[0] 41 | text = text.strip().upper() 42 | with open(os.path.join(txt_dir, file_name + '.txt'), 'w') as f: 43 | f.write(text) 44 | cmd = "sox {} -r {} -b 16 -c 1 {}".format( 45 | os.path.join(path_to_data, file_path), 46 | args.sample_rate, 47 | os.path.join(wav_dir, file_name + '.wav')) 48 | subprocess.call([cmd], shell=True) 49 | 50 | print('Converting mp3 to wav for {}.'.format(csv_file)) 51 | with open(csv_file) as csvfile: 52 | reader = csv.DictReader(csvfile) 53 | data = [(row['filename'], row['text']) for row in reader] 54 | with ThreadPool(10) as pool: 55 | pool.map(process, data) 56 | 57 | 58 | def main(): 59 | target_dir = args.target_dir 60 | os.makedirs(target_dir, exist_ok=True) 61 | 62 | target_unpacked_dir = os.path.join(target_dir, "CV_unpacked") 63 | os.makedirs(target_unpacked_dir, exist_ok=True) 64 | 65 | if args.tar_path and os.path.exists(args.tar_path): 66 | print('Find existing file {}'.format(args.tar_path)) 67 | target_file = args.tar_path 68 | else: 69 | print("Could not find downloaded Common Voice archive, Downloading corpus...") 70 | filename = wget.download(COMMON_VOICE_URL, target_dir) 71 | target_file = os.path.join(target_dir, os.path.basename(filename)) 72 | 73 | print("Unpacking corpus to {} ...".format(target_unpacked_dir)) 74 | tar = tarfile.open(target_file) 75 | tar.extractall(target_unpacked_dir) 76 | tar.close() 77 | 78 | for csv_file in args.files_to_process.split(','): 79 | convert_to_wav(os.path.join(target_unpacked_dir, 'cv_corpus_v1/', csv_file), 80 | os.path.join(target_dir, os.path.splitext(csv_file)[0])) 81 | 82 | print('Creating manifests...') 83 | for csv_file in args.files_to_process.split(','): 84 | create_manifest(os.path.join(target_dir, os.path.splitext(csv_file)[0]), 85 | os.path.splitext(csv_file)[0] + '_manifest.csv', 86 | args.min_duration, 87 | args.max_duration) 88 | 89 | 90 | if __name__ == "__main__": 91 | main() 92 | -------------------------------------------------------------------------------- /workloads/deepspeech2/data/librispeech.py: -------------------------------------------------------------------------------- 1 | import os 2 | import wget 3 | import tarfile 4 | import argparse 5 | import subprocess 6 | from utils import create_manifest 7 | from tqdm import tqdm 8 | import shutil 9 | 10 | parser = argparse.ArgumentParser(description='Processes and downloads LibriSpeech dataset.') 11 | parser.add_argument("--target-dir", default='LibriSpeech_dataset/', type=str, help="Directory to store the dataset.") 12 | parser.add_argument('--sample-rate', default=16000, type=int, help='Sample rate') 13 | parser.add_argument('--files-to-use', default="train-clean-100.tar.gz," 14 | "train-clean-360.tar.gz,train-other-500.tar.gz," 15 | "dev-clean.tar.gz,dev-other.tar.gz," 16 | "test-clean.tar.gz,test-other.tar.gz", type=str, 17 | help='list of file names to download') 18 | parser.add_argument('--min-duration', default=1, type=int, 19 | help='Prunes training samples shorter than the min duration (given in seconds, default 1)') 20 | parser.add_argument('--max-duration', default=15, type=int, 21 | help='Prunes training samples longer than the max duration (given in seconds, default 15)') 22 | args = parser.parse_args() 23 | 24 | LIBRI_SPEECH_URLS = { 25 | "train": ["http://www.openslr.org/resources/12/train-clean-100.tar.gz", 26 | "http://www.openslr.org/resources/12/train-clean-360.tar.gz", 27 | "http://www.openslr.org/resources/12/train-other-500.tar.gz"], 28 | 29 | "val": ["http://www.openslr.org/resources/12/dev-clean.tar.gz", 30 | "http://www.openslr.org/resources/12/dev-other.tar.gz"], 31 | 32 | "test_clean": ["http://www.openslr.org/resources/12/test-clean.tar.gz"], 33 | "test_other": ["http://www.openslr.org/resources/12/test-other.tar.gz"] 34 | } 35 | 36 | 37 | def _preprocess_transcript(phrase): 38 | return phrase.strip().upper() 39 | 40 | 41 | def _process_file(wav_dir, txt_dir, base_filename, root_dir): 42 | full_recording_path = os.path.join(root_dir, base_filename) 43 | assert os.path.exists(full_recording_path) and os.path.exists(root_dir) 44 | wav_recording_path = os.path.join(wav_dir, base_filename.replace(".flac", ".wav")) 45 | subprocess.call(["sox {} -r {} -b 16 -c 1 {}".format(full_recording_path, str(args.sample_rate), 46 | wav_recording_path)], shell=True) 47 | # process transcript 48 | txt_transcript_path = os.path.join(txt_dir, base_filename.replace(".flac", ".txt")) 49 | transcript_file = os.path.join(root_dir, "-".join(base_filename.split('-')[:-1]) + ".trans.txt") 50 | assert os.path.exists(transcript_file), "Transcript file {} does not exist.".format(transcript_file) 51 | transcriptions = open(transcript_file).read().strip().split("\n") 52 | transcriptions = {t.split()[0].split("-")[-1]: " ".join(t.split()[1:]) for t in transcriptions} 53 | with open(txt_transcript_path, "w") as f: 54 | key = base_filename.replace(".flac", "").split("-")[-1] 55 | assert key in transcriptions, "{} is not in the transcriptions".format(key) 56 | f.write(_preprocess_transcript(transcriptions[key])) 57 | f.flush() 58 | 59 | 60 | def main(): 61 | target_dl_dir = args.target_dir 62 | if not os.path.exists(target_dl_dir): 63 | os.makedirs(target_dl_dir) 64 | files_to_dl = args.files_to_use.strip().split(',') 65 | for split_type, lst_libri_urls in LIBRI_SPEECH_URLS.items(): 66 | split_dir = os.path.join(target_dl_dir, split_type) 67 | if not os.path.exists(split_dir): 68 | os.makedirs(split_dir) 69 | split_wav_dir = os.path.join(split_dir, "wav") 70 | if not os.path.exists(split_wav_dir): 71 | os.makedirs(split_wav_dir) 72 | split_txt_dir = os.path.join(split_dir, "txt") 73 | if not os.path.exists(split_txt_dir): 74 | os.makedirs(split_txt_dir) 75 | extracted_dir = os.path.join(split_dir, "LibriSpeech") 76 | if os.path.exists(extracted_dir): 77 | shutil.rmtree(extracted_dir) 78 | for url in lst_libri_urls: 79 | # check if we want to dl this file 80 | dl_flag = False 81 | for f in files_to_dl: 82 | if url.find(f) != -1: 83 | dl_flag = True 84 | if not dl_flag: 85 | print("Skipping url: {}".format(url)) 86 | continue 87 | filename = url.split("/")[-1] 88 | target_filename = os.path.join(split_dir, filename) 89 | if not os.path.exists(target_filename): 90 | wget.download(url, split_dir) 91 | print("Unpacking {}...".format(filename)) 92 | tar = tarfile.open(target_filename) 93 | tar.extractall(split_dir) 94 | tar.close() 95 | os.remove(target_filename) 96 | print("Converting flac files to wav and extracting transcripts...") 97 | assert os.path.exists(extracted_dir), "Archive {} was not properly uncompressed.".format(filename) 98 | for root, subdirs, files in tqdm(os.walk(extracted_dir)): 99 | for f in files: 100 | if f.find(".flac") != -1: 101 | _process_file(wav_dir=split_wav_dir, txt_dir=split_txt_dir, 102 | base_filename=f, root_dir=root) 103 | 104 | print("Finished {}".format(url)) 105 | shutil.rmtree(extracted_dir) 106 | if split_type == 'train': # Prune to min/max duration 107 | create_manifest(split_dir, 'libri_' + split_type + '_manifest.csv', args.min_duration, args.max_duration) 108 | else: 109 | create_manifest(split_dir, 'libri_' + split_type + '_manifest.csv') 110 | 111 | 112 | if __name__ == "__main__": 113 | main() 114 | -------------------------------------------------------------------------------- /workloads/deepspeech2/data/merge_manifests.py: -------------------------------------------------------------------------------- 1 | from __future__ import print_function 2 | 3 | import argparse 4 | import io 5 | import os 6 | 7 | from tqdm import tqdm 8 | from utils import order_and_prune_files 9 | 10 | parser = argparse.ArgumentParser(description='Merges all manifest CSV files in specified folder.') 11 | parser.add_argument('--merge-dir', default='manifests/', help='Path to all manifest files you want to merge') 12 | parser.add_argument('--min-duration', default=1, type=int, 13 | help='Prunes any samples shorter than the min duration (given in seconds, default 1)') 14 | parser.add_argument('--max-duration', default=15, type=int, 15 | help='Prunes any samples longer than the max duration (given in seconds, default 15)') 16 | parser.add_argument('--output-path', default='merged_manifest.csv', help='Output path to merged manifest') 17 | 18 | args = parser.parse_args() 19 | 20 | file_paths = [] 21 | for file in os.listdir(args.merge_dir): 22 | if file.endswith(".csv"): 23 | with open(os.path.join(args.merge_dir, file), 'r') as fh: 24 | file_paths += fh.readlines() 25 | file_paths = [file_path.split(',')[0] for file_path in file_paths] 26 | file_paths = order_and_prune_files(file_paths, args.min_duration, args.max_duration) 27 | with io.FileIO(args.output_path, "w") as file: 28 | for wav_path in tqdm(file_paths, total=len(file_paths)): 29 | transcript_path = wav_path.replace('/wav/', '/txt/').replace('.wav', '.txt') 30 | sample = os.path.abspath(wav_path) + ',' + os.path.abspath(transcript_path) + '\n' 31 | file.write(sample.encode('utf-8')) 32 | -------------------------------------------------------------------------------- /workloads/deepspeech2/data/spec_augment.py: -------------------------------------------------------------------------------- 1 | # Copyright 2019 RnD at Spoon Radio 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | # ============================================================================== 15 | """SpecAugment Implementation for Tensorflow. 16 | Related paper : https://arxiv.org/pdf/1904.08779.pdf 17 | In this paper, show summarized parameters by each open datasets in Tabel 1. 18 | ----------------------------------------- 19 | Policy | W | F | m_F | T | p | m_T 20 | ----------------------------------------- 21 | None | 0 | 0 | - | 0 | - | - 22 | ----------------------------------------- 23 | LB | 80 | 27 | 1 | 100 | 1.0 | 1 24 | ----------------------------------------- 25 | LD | 80 | 27 | 2 | 100 | 1.0 | 2 26 | ----------------------------------------- 27 | SM | 40 | 15 | 2 | 70 | 0.2 | 2 28 | ----------------------------------------- 29 | SS | 40 | 27 | 2 | 70 | 0.2 | 2 30 | ----------------------------------------- 31 | LB : LibriSpeech basic 32 | LD : LibriSpeech double 33 | SM : Switchboard mild 34 | SS : Switchboard strong 35 | """ 36 | 37 | import librosa 38 | import librosa.display 39 | import numpy as np 40 | import random 41 | import matplotlib 42 | #matplotlib.use('TkAgg') 43 | import matplotlib.pyplot as plt 44 | from .sparse_image_warp import sparse_image_warp 45 | import torch 46 | 47 | 48 | def time_warp(spec, W=5): 49 | num_rows = spec.shape[1] 50 | spec_len = spec.shape[2] 51 | 52 | y = num_rows // 2 53 | horizontal_line_at_ctr = spec[0][y] 54 | # assert len(horizontal_line_at_ctr) == spec_len 55 | 56 | point_to_warp = horizontal_line_at_ctr[random.randrange(W, spec_len-W)] 57 | # assert isinstance(point_to_warp, torch.Tensor) 58 | 59 | # Uniform distribution from (0,W) with chance to be up to W negative 60 | dist_to_warp = random.randrange(-W, W) 61 | src_pts = torch.tensor([[[y, point_to_warp]]]) 62 | dest_pts = torch.tensor([[[y, point_to_warp + dist_to_warp]]]) 63 | warped_spectro, dense_flows = sparse_image_warp(spec, src_pts, dest_pts) 64 | 65 | return warped_spectro.squeeze(3) 66 | 67 | 68 | def spec_augment(mel_spectrogram, time_warping_para=40, frequency_masking_para=27, 69 | time_masking_para=70, frequency_mask_num=1, time_mask_num=1): 70 | """Spec augmentation Calculation Function. 71 | 'SpecAugment' have 3 steps for audio data augmentation. 72 | first step is time warping using Tensorflow's image_sparse_warp function. 73 | Second step is frequency masking, last step is time masking. 74 | # Arguments: 75 | mel_spectrogram(numpy array): audio file path of you want to warping and masking. 76 | time_warping_para(float): Augmentation parameter, "time warp parameter W". 77 | If none, default = 40. 78 | frequency_masking_para(float): Augmentation parameter, "frequency mask parameter F" 79 | If none, default = 27. 80 | time_masking_para(float): Augmentation parameter, "time mask parameter T" 81 | If none, default = 70. 82 | frequency_mask_num(float): number of frequency masking lines, "m_F". 83 | If none, default = 1. 84 | time_mask_num(float): number of time masking lines, "m_T". 85 | If none, default = 1. 86 | # Returns 87 | mel_spectrogram(numpy array): warped and masked mel spectrogram. 88 | """ 89 | mel_spectrogram = mel_spectrogram.unsqueeze(0) 90 | 91 | v = mel_spectrogram.shape[1] 92 | tau = mel_spectrogram.shape[2] 93 | 94 | # Step 1 : Time warping 95 | warped_mel_spectrogram = time_warp(mel_spectrogram) 96 | 97 | # Step 2 : Frequency masking 98 | for i in range(frequency_mask_num): 99 | f = np.random.uniform(low=0.0, high=frequency_masking_para) 100 | f = int(f) 101 | if v - f < 0: 102 | continue 103 | f0 = random.randint(0, v-f) 104 | warped_mel_spectrogram[:, f0:f0+f, :] = 0 105 | 106 | # Step 3 : Time masking 107 | for i in range(time_mask_num): 108 | t = np.random.uniform(low=0.0, high=time_masking_para) 109 | t = int(t) 110 | if tau - t < 0: 111 | continue 112 | t0 = random.randint(0, tau-t) 113 | warped_mel_spectrogram[:, :, t0:t0+t] = 0 114 | 115 | return warped_mel_spectrogram.squeeze() 116 | 117 | 118 | def visualization_spectrogram(mel_spectrogram, title): 119 | """visualizing result of SpecAugment 120 | # Arguments: 121 | mel_spectrogram(ndarray): mel_spectrogram to visualize. 122 | title(String): plot figure's title 123 | """ 124 | # Show mel-spectrogram using librosa's specshow. 125 | plt.figure(figsize=(10, 4)) 126 | librosa.display.specshow(librosa.power_to_db(mel_spectrogram[0, :, :], ref=np.max), y_axis='mel', fmax=8000, x_axis='time') 127 | # plt.colorbar(format='%+2.0f dB') 128 | plt.title(title) 129 | plt.tight_layout() 130 | plt.show() 131 | -------------------------------------------------------------------------------- /workloads/deepspeech2/data/ted.py: -------------------------------------------------------------------------------- 1 | import os 2 | import wget 3 | import tarfile 4 | import argparse 5 | import subprocess 6 | import unicodedata 7 | import io 8 | from utils import create_manifest 9 | from tqdm import tqdm 10 | 11 | parser = argparse.ArgumentParser(description='Processes and downloads TED-LIUMv2 dataset.') 12 | parser.add_argument("--target-dir", default='TEDLIUM_dataset/', type=str, help="Directory to store the dataset.") 13 | parser.add_argument("--tar-path", type=str, help="Path to the TEDLIUM_release tar if downloaded (Optional).") 14 | parser.add_argument('--sample-rate', default=16000, type=int, help='Sample rate') 15 | parser.add_argument('--min-duration', default=1, type=int, 16 | help='Prunes training samples shorter than the min duration (given in seconds, default 1)') 17 | parser.add_argument('--max-duration', default=15, type=int, 18 | help='Prunes training samples longer than the max duration (given in seconds, default 15)') 19 | args = parser.parse_args() 20 | 21 | TED_LIUM_V2_DL_URL = "http://www.openslr.org/resources/19/TEDLIUM_release2.tar.gz" 22 | 23 | 24 | def get_utterances_from_stm(stm_file): 25 | """ 26 | Return list of entries containing phrase and its start/end timings 27 | :param stm_file: 28 | :return: 29 | """ 30 | res = [] 31 | with io.open(stm_file, "r", encoding='utf-8') as f: 32 | for stm_line in f: 33 | tokens = stm_line.split() 34 | start_time = float(tokens[3]) 35 | end_time = float(tokens[4]) 36 | filename = tokens[0] 37 | transcript = unicodedata.normalize("NFKD", 38 | " ".join(t for t in tokens[6:]).strip()). \ 39 | encode("utf-8", "ignore").decode("utf-8", "ignore") 40 | if transcript != "ignore_time_segment_in_scoring": 41 | res.append({ 42 | "start_time": start_time, "end_time": end_time, 43 | "filename": filename, "transcript": transcript 44 | }) 45 | return res 46 | 47 | 48 | def cut_utterance(src_sph_file, target_wav_file, start_time, end_time, sample_rate=16000): 49 | subprocess.call(["sox {} -r {} -b 16 -c 1 {} trim {} ={}".format(src_sph_file, str(sample_rate), 50 | target_wav_file, start_time, end_time)], 51 | shell=True) 52 | 53 | 54 | def _preprocess_transcript(phrase): 55 | return phrase.strip().upper() 56 | 57 | 58 | def filter_short_utterances(utterance_info, min_len_sec=1.0): 59 | return utterance_info["end_time"] - utterance_info["start_time"] > min_len_sec 60 | 61 | 62 | def prepare_dir(ted_dir): 63 | converted_dir = os.path.join(ted_dir, "converted") 64 | # directories to store converted wav files and their transcriptions 65 | wav_dir = os.path.join(converted_dir, "wav") 66 | if not os.path.exists(wav_dir): 67 | os.makedirs(wav_dir) 68 | txt_dir = os.path.join(converted_dir, "txt") 69 | if not os.path.exists(txt_dir): 70 | os.makedirs(txt_dir) 71 | counter = 0 72 | entries = os.listdir(os.path.join(ted_dir, "sph")) 73 | for sph_file in tqdm(entries, total=len(entries)): 74 | speaker_name = sph_file.split('.sph')[0] 75 | 76 | sph_file_full = os.path.join(ted_dir, "sph", sph_file) 77 | stm_file_full = os.path.join(ted_dir, "stm", "{}.stm".format(speaker_name)) 78 | 79 | assert os.path.exists(sph_file_full) and os.path.exists(stm_file_full) 80 | all_utterances = get_utterances_from_stm(stm_file_full) 81 | 82 | all_utterances = filter(filter_short_utterances, all_utterances) 83 | for utterance_id, utterance in enumerate(all_utterances): 84 | target_wav_file = os.path.join(wav_dir, "{}_{}.wav".format(utterance["filename"], str(utterance_id))) 85 | target_txt_file = os.path.join(txt_dir, "{}_{}.txt".format(utterance["filename"], str(utterance_id))) 86 | cut_utterance(sph_file_full, target_wav_file, utterance["start_time"], utterance["end_time"], 87 | sample_rate=args.sample_rate) 88 | with io.FileIO(target_txt_file, "w") as f: 89 | f.write(_preprocess_transcript(utterance["transcript"]).encode('utf-8')) 90 | counter += 1 91 | 92 | 93 | def main(): 94 | target_dl_dir = args.target_dir 95 | if not os.path.exists(target_dl_dir): 96 | os.makedirs(target_dl_dir) 97 | 98 | target_unpacked_dir = os.path.join(target_dl_dir, "TEDLIUM_release2") 99 | if args.tar_path and os.path.exists(args.tar_path): 100 | target_file = args.tar_path 101 | else: 102 | print("Could not find downloaded TEDLIUM archive, Downloading corpus...") 103 | wget.download(TED_LIUM_V2_DL_URL, target_dl_dir) 104 | target_file = os.path.join(target_dl_dir, "TEDLIUM_release2.tar.gz") 105 | 106 | if not os.path.exists(target_unpacked_dir): 107 | print("Unpacking corpus...") 108 | tar = tarfile.open(target_file) 109 | tar.extractall(target_dl_dir) 110 | tar.close() 111 | else: 112 | print("Found TEDLIUM directory, skipping unpacking of tar files") 113 | 114 | train_ted_dir = os.path.join(target_unpacked_dir, "train") 115 | val_ted_dir = os.path.join(target_unpacked_dir, "dev") 116 | test_ted_dir = os.path.join(target_unpacked_dir, "test") 117 | 118 | prepare_dir(train_ted_dir) 119 | prepare_dir(val_ted_dir) 120 | prepare_dir(test_ted_dir) 121 | print('Creating manifests...') 122 | 123 | create_manifest(train_ted_dir, 'ted_train_manifest.csv', args.min_duration, args.max_duration) 124 | create_manifest(val_ted_dir, 'ted_val_manifest.csv') 125 | create_manifest(test_ted_dir, 'ted_test_manifest.csv') 126 | 127 | 128 | if __name__ == "__main__": 129 | main() 130 | -------------------------------------------------------------------------------- /workloads/deepspeech2/data/utils.py: -------------------------------------------------------------------------------- 1 | from __future__ import print_function 2 | 3 | import fnmatch 4 | import io 5 | import os 6 | from tqdm import tqdm 7 | import subprocess 8 | import torch.distributed as dist 9 | 10 | 11 | def create_manifest(data_path, output_path, min_duration=None, max_duration=None): 12 | file_paths = [os.path.join(dirpath, f) 13 | for dirpath, dirnames, files in os.walk(data_path) 14 | for f in fnmatch.filter(files, '*.wav')] 15 | file_paths = order_and_prune_files(file_paths, min_duration, max_duration) 16 | with io.FileIO(output_path, "w") as file: 17 | for wav_path in tqdm(file_paths, total=len(file_paths)): 18 | transcript_path = wav_path.replace('/wav/', '/txt/').replace('.wav', '.txt') 19 | sample = os.path.abspath(wav_path) + ',' + os.path.abspath(transcript_path) + '\n' 20 | file.write(sample.encode('utf-8')) 21 | print('\n') 22 | 23 | 24 | def order_and_prune_files(file_paths, min_duration, max_duration): 25 | print("Sorting manifests...") 26 | duration_file_paths = [(path, float(subprocess.check_output( 27 | ['soxi -D \"%s\"' % path.strip()], shell=True))) for path in file_paths] 28 | if min_duration and max_duration: 29 | print("Pruning manifests between %d and %d seconds" % (min_duration, max_duration)) 30 | duration_file_paths = [(path, duration) for path, duration in duration_file_paths if 31 | min_duration <= duration <= max_duration] 32 | 33 | def func(element): 34 | return element[1] 35 | 36 | duration_file_paths.sort(key=func) 37 | return [x[0] for x in duration_file_paths] # Remove durations 38 | 39 | def reduce_tensor(tensor, world_size): 40 | rt = tensor.clone() 41 | dist.all_reduce(rt, op=dist.reduce_op.SUM) 42 | rt /= world_size 43 | return rt 44 | 45 | -------------------------------------------------------------------------------- /workloads/deepspeech2/data/voxforge.py: -------------------------------------------------------------------------------- 1 | import os 2 | from six.moves import urllib 3 | import argparse 4 | import re 5 | import tempfile 6 | import shutil 7 | import subprocess 8 | import tarfile 9 | import io 10 | from tqdm import tqdm 11 | 12 | from utils import create_manifest 13 | 14 | VOXFORGE_URL_16kHz = 'http://www.repository.voxforge1.org/downloads/SpeechCorpus/Trunk/Audio/Main/16kHz_16bit/' 15 | 16 | parser = argparse.ArgumentParser(description='Processes and downloads VoxForge dataset.') 17 | parser.add_argument("--target-dir", default='voxforge_dataset/', type=str, help="Directory to store the dataset.") 18 | parser.add_argument('--sample-rate', default=16000, 19 | type=int, help='Sample rate') 20 | parser.add_argument('--min-duration', default=1, type=int, 21 | help='Prunes training samples shorter than the min duration (given in seconds, default 1)') 22 | parser.add_argument('--max-duration', default=15, type=int, 23 | help='Prunes training samples longer than the max duration (given in seconds, default 15)') 24 | args = parser.parse_args() 25 | 26 | 27 | def _get_recordings_dir(sample_dir, recording_name): 28 | wav_dir = os.path.join(sample_dir, recording_name, "wav") 29 | if os.path.exists(wav_dir): 30 | return "wav", wav_dir 31 | flac_dir = os.path.join(sample_dir, recording_name, "flac") 32 | if os.path.exists(flac_dir): 33 | return "flac", flac_dir 34 | raise Exception("wav or flac directory was not found for recording name: {}".format(recording_name)) 35 | 36 | 37 | def prepare_sample(recording_name, url, target_folder): 38 | """ 39 | Downloads and extracts a sample from VoxForge and puts the wav and txt files into :target_folder. 40 | """ 41 | wav_dir = os.path.join(target_folder, "wav") 42 | if not os.path.exists(wav_dir): 43 | os.makedirs(wav_dir) 44 | txt_dir = os.path.join(target_folder, "txt") 45 | if not os.path.exists(txt_dir): 46 | os.makedirs(txt_dir) 47 | # check if sample is processed 48 | filename_set = set(['_'.join(wav_file.split('_')[:-1]) for wav_file in os.listdir(wav_dir)]) 49 | if recording_name in filename_set: 50 | return 51 | 52 | request = urllib.request.Request(url) 53 | response = urllib.request.urlopen(request) 54 | content = response.read() 55 | response.close() 56 | with tempfile.NamedTemporaryFile(suffix=".tgz", mode='wb') as target_tgz: 57 | target_tgz.write(content) 58 | target_tgz.flush() 59 | dirpath = tempfile.mkdtemp() 60 | 61 | tar = tarfile.open(target_tgz.name) 62 | tar.extractall(dirpath) 63 | tar.close() 64 | 65 | recordings_type, recordings_dir = _get_recordings_dir(dirpath, recording_name) 66 | tgz_prompt_file = os.path.join(dirpath, recording_name, "etc", "PROMPTS") 67 | 68 | if os.path.exists(recordings_dir) and os.path.exists(tgz_prompt_file): 69 | transcriptions = open(tgz_prompt_file).read().strip().split("\n") 70 | transcriptions = {t.split()[0]: " ".join(t.split()[1:]) for t in transcriptions} 71 | for wav_file in os.listdir(recordings_dir): 72 | recording_id = wav_file.split('.{}'.format(recordings_type))[0] 73 | transcription_key = recording_name + "/mfc/" + recording_id 74 | if transcription_key not in transcriptions: 75 | continue 76 | utterance = transcriptions[transcription_key] 77 | 78 | target_wav_file = os.path.join(wav_dir, "{}_{}.wav".format(recording_name, recording_id)) 79 | target_txt_file = os.path.join(txt_dir, "{}_{}.txt".format(recording_name, recording_id)) 80 | with io.FileIO(target_txt_file, "w") as file: 81 | file.write(utterance.encode('utf-8')) 82 | original_wav_file = os.path.join(recordings_dir, wav_file) 83 | subprocess.call(["sox {} -r {} -b 16 -c 1 {}".format(original_wav_file, str(args.sample_rate), 84 | target_wav_file)], shell=True) 85 | 86 | shutil.rmtree(dirpath) 87 | 88 | 89 | if __name__ == '__main__': 90 | target_dir = args.target_dir 91 | sample_rate = args.sample_rate 92 | 93 | if not os.path.isdir(target_dir): 94 | os.makedirs(target_dir) 95 | request = urllib.request.Request(VOXFORGE_URL_16kHz) 96 | response = urllib.request.urlopen(request) 97 | content = response.read() 98 | all_files = re.findall("href\=\"(.*\.tgz)\"", content.decode("utf-8")) 99 | for f in tqdm(all_files, total=len(all_files)): 100 | prepare_sample(f.replace(".tgz", ""), VOXFORGE_URL_16kHz + f, target_dir) 101 | print('Creating manifests...') 102 | create_manifest(target_dir, 'voxforge_train_manifest.csv', args.min_duration, args.max_duration) 103 | -------------------------------------------------------------------------------- /workloads/deepspeech2/labels.json: -------------------------------------------------------------------------------- 1 | [ 2 | "_", 3 | "'", 4 | "A", 5 | "B", 6 | "C", 7 | "D", 8 | "E", 9 | "F", 10 | "G", 11 | "H", 12 | "I", 13 | "J", 14 | "K", 15 | "L", 16 | "M", 17 | "N", 18 | "O", 19 | "P", 20 | "Q", 21 | "R", 22 | "S", 23 | "T", 24 | "U", 25 | "V", 26 | "W", 27 | "X", 28 | "Y", 29 | "Z", 30 | " " 31 | ] -------------------------------------------------------------------------------- /workloads/imagenet/profile_imagenet.py: -------------------------------------------------------------------------------- 1 | from __future__ import print_function 2 | import argparse 3 | import timeit 4 | import torch 5 | import torch.backends.cudnn as cudnn 6 | import torch.nn.functional as F 7 | import torch.optim as optim 8 | import torch.utils.data.distributed 9 | import numpy as np 10 | import time 11 | import os 12 | import torchvision 13 | import workloads.settings as settings 14 | 15 | from torch.nn import DataParallel 16 | from models import * 17 | 18 | 19 | # Benchmark settings 20 | parser = argparse.ArgumentParser( 21 | description="PyTorch DP Synthetic Benchmark", formatter_class=argparse.ArgumentDefaultsHelpFormatter 22 | ) 23 | parser.add_argument("--num-gpus", type=int, default=1, help="number of gpus") 24 | parser.add_argument("--gpu", default=1, type=int, help="GPU id to use. Only work when use single gpu.") 25 | 26 | 27 | parser.add_argument('--warmup_epoch', type=int, default=10, help='number of warmup epochs') 28 | parser.add_argument("--num-batches-per-iter", type=int, default=30, help="number of batches per benchmark iteration") 29 | parser.add_argument("--num-iters", type=int, default=30, help="number of benchmark iterations") 30 | parser.add_argument("--amp-fp16", action="store_true", default=False, help="Enables FP16 training with Apex.") 31 | parser.add_argument('--total_time', type=int, default=30, help='Total time to run the code') 32 | 33 | 34 | args = parser.parse_args() 35 | # args.total_time = settings.total_time 36 | 37 | # Training 38 | def benchmark_imagenet(model_name, batch_size, mixed_precision, gpu_id, bench_list, warm_signal): 39 | t_start = time.time() 40 | if len(gpu_id) == 1: 41 | os.environ["CUDA_VISIBLE_DEVICES"] = f"{gpu_id[0]}" 42 | else: 43 | os.environ["CUDA_VISIBLE_DEVICES"] = ",".join(str(i) for i in gpu_id) 44 | 45 | cudnn.benchmark = True 46 | 47 | model = getattr(torchvision.models, model_name)() 48 | model = model.cuda() 49 | 50 | data = torch.randn(batch_size, 3, 224, 224) 51 | target = torch.LongTensor(batch_size).random_() % 1000 52 | data, target = data.cuda(), target.cuda() 53 | 54 | optimizer = optim.SGD(model.parameters(), lr=0.01) 55 | 56 | if mixed_precision: 57 | scaler = torch.cuda.amp.GradScaler(enabled=True) 58 | else: 59 | scaler = None 60 | 61 | if len(gpu_id) > 1: 62 | model = DataParallel(model) 63 | 64 | def benchmark_step(): 65 | iter_num = 0 66 | while True: 67 | optimizer.zero_grad() 68 | if iter_num == args.warmup_epoch-1: 69 | warm_signal.value = 1 70 | t_warmend = time.time() 71 | # Reach timeout: exit profiling 72 | if time.time() - t_start >= args.total_time: 73 | t_end = time.time() 74 | t_pass = t_end - t_warmend 75 | break 76 | if mixed_precision: 77 | with torch.cuda.amp.autocast(): 78 | output = model(data) 79 | loss = F.cross_entropy(output, target) 80 | scaler.scale(loss).backward() 81 | scaler.step(optimizer) 82 | scaler.update() 83 | else: 84 | output = model(data) 85 | loss = F.cross_entropy(output, target) 86 | loss.backward() 87 | optimizer.step() 88 | iter_num += 1 89 | return t_pass, iter_num 90 | 91 | # Benchmark 92 | print(f'==> Training {model_name} model with {batch_size} batchsize, {mixed_precision} mp..') 93 | t_pass, iter_num = benchmark_step() 94 | img_sec = len(gpu_id) * (iter_num - args.warmup_epoch) * batch_size / t_pass 95 | 96 | bench_list.append(img_sec) 97 | 98 | 99 | 100 | 101 | -------------------------------------------------------------------------------- /workloads/imagenet/profile_imagenet_ddp.py: -------------------------------------------------------------------------------- 1 | from __future__ import print_function 2 | import argparse 3 | import timeit 4 | from cvxpy import mixed_norm 5 | import torch.backends.cudnn as cudnn 6 | import torch.nn as nn 7 | import torch.nn.functional as F 8 | import torch.optim as optim 9 | import torch.utils.data.distributed 10 | import torch.distributed as dist 11 | import sys 12 | import numpy as np 13 | import os 14 | import pandas as pd 15 | import torchvision 16 | import time 17 | import torch.multiprocessing as mp 18 | sys.path.append('/home/mzhang/work/ASPLOS23/collect_metric/') 19 | 20 | from torch.nn import DataParallel 21 | from multiprocessing import Process, Manager, Value 22 | from torch.nn.parallel import DistributedDataParallel as DDP 23 | from torchvision import transforms 24 | from models import * 25 | 26 | # Benchmark settings 27 | parser = argparse.ArgumentParser( 28 | description="PyTorch DP Synthetic Benchmark", formatter_class=argparse.ArgumentDefaultsHelpFormatter 29 | ) 30 | parser.add_argument('--warmup_iter', type=int, default=10, help='number of warmup epochs') 31 | parser.add_argument('--benchmark_epoch', type=int, default=50, help='number of training benchmark epochs') 32 | parser.add_argument('--data_dir', type=str, default="~/data/", help='Data directory') 33 | parser.add_argument('--total_time', type=int, default=30, help='Total time to run the code') 34 | parser.add_argument('--master_addr', type=str, default='127.0.0.1', help='Total time to run the code') 35 | parser.add_argument('--master_port', type=str, default='47020', help='Total time to run the code') 36 | 37 | 38 | args = parser.parse_args() 39 | 40 | 41 | # ------ Setting up the distributed environment ------- 42 | def setup(rank, world_size): 43 | os.environ['MASTER_ADDR'] = args.master_addr 44 | os.environ['MASTER_PORT'] = args.master_port 45 | # initialize the process group 46 | dist.init_process_group(backend="nccl", rank=rank, world_size=world_size) 47 | # this function is responsible for synchronizing and successfully communicate across multiple process 48 | # involving multiple GPUs. 49 | 50 | 51 | def cleanup(): 52 | dist.destroy_process_group() 53 | 54 | 55 | def benchmark_imagenet_ddp(rank, model_name, batch_size, mixed_precision, gpu_id, t_start): 56 | os.environ["CUDA_VISIBLE_DEVICES"] = ",".join(str(i) for i in gpu_id) 57 | print(f"Running Distributed ResNet on rank {rank}.") 58 | setup(rank, len(gpu_id)) 59 | torch.manual_seed(0) 60 | torch.cuda.set_device(rank) 61 | 62 | # Model 63 | # print('==> Building model..') 64 | model = getattr(torchvision.models, model_name)() 65 | model.to(rank) 66 | model = DDP(model, device_ids=[rank]) 67 | 68 | criterion = nn.CrossEntropyLoss().to(rank) 69 | optimizer = optim.SGD(model.parameters(), lr=0.01, momentum=0.9, weight_decay=5e-4) 70 | 71 | if mixed_precision: 72 | scaler = torch.cuda.amp.GradScaler(enabled=True) 73 | else: 74 | scaler = None 75 | 76 | # Dataset 77 | data = torch.randn(batch_size, 3, 224, 224) 78 | target = torch.LongTensor(batch_size).random_() % 1000 79 | data, target = data.to(rank), target.to(rank) 80 | 81 | # data, target = next(iter(trainloader)) 82 | # data, target = data.cuda(), target.cuda() 83 | 84 | # Train 85 | print(f'==> Training {model_name} model with {batch_size} batchsize, {mixed_precision} mp..') 86 | iter_num = 0 87 | model.train() 88 | # Prevent total batch number < warmup+benchmark situation 89 | while True: 90 | # Warm-up: previous 10 iters 91 | if iter_num == args.warmup_iter-1: 92 | t_warmend = time.time() 93 | # Reach timeout: exit benchmark 94 | if time.time() - t_start >= args.total_time: 95 | t_end = time.time() 96 | t_pass = t_end - t_warmend 97 | break 98 | optimizer.zero_grad() 99 | if mixed_precision: 100 | with torch.cuda.amp.autocast(): 101 | output = model(data) 102 | loss = criterion(output, target) 103 | scaler.scale(loss).backward() 104 | scaler.step(optimizer) 105 | scaler.update() 106 | else: 107 | output = model(data) 108 | loss = criterion(output, target) 109 | loss.backward() 110 | optimizer.step() 111 | iter_num += 1 112 | 113 | img_sec = len(gpu_id) * (iter_num - args.warmup_iter) * batch_size / t_pass 114 | if rank == 0: 115 | print(f'master port: {args.master_port}, speed: {img_sec}') 116 | 117 | cleanup() 118 | 119 | if __name__ == '__main__': 120 | model_name = 'resnet50' 121 | batch_size = 64 122 | mixed_precision = 0 123 | gpu_id = [0,1,2,3] 124 | # world_size = 4 125 | t_start = time.time() 126 | mp.spawn(benchmark_imagenet_ddp, args=(model_name, batch_size, mixed_precision, gpu_id, t_start, ), nprocs=len(gpu_id), join=True) 127 | -------------------------------------------------------------------------------- /workloads/imagenet/requirements.txt: -------------------------------------------------------------------------------- 1 | asposestorage==1.0.2 2 | numpy==1.21.5 3 | pandas==1.4.1 4 | torch==1.11.0+cu113 5 | torchvision==0.12.0 6 | -------------------------------------------------------------------------------- /workloads/lstm/data.py: -------------------------------------------------------------------------------- 1 | import os 2 | from io import open 3 | import torch 4 | 5 | class Dictionary(object): 6 | def __init__(self): 7 | self.word2idx = {} 8 | self.idx2word = [] 9 | 10 | def add_word(self, word): 11 | if word not in self.word2idx: 12 | self.idx2word.append(word) 13 | self.word2idx[word] = len(self.idx2word) - 1 14 | return self.word2idx[word] 15 | 16 | def __len__(self): 17 | return len(self.idx2word) 18 | 19 | 20 | class Corpus(object): 21 | def __init__(self, path): 22 | self.dictionary = Dictionary() 23 | self.train = self.tokenize(os.path.join(path, 'train.txt')) 24 | self.valid = self.tokenize(os.path.join(path, 'valid.txt')) 25 | self.test = self.tokenize(os.path.join(path, 'test.txt')) 26 | 27 | def tokenize(self, path): 28 | """Tokenizes a text file.""" 29 | assert os.path.exists(path) 30 | # Add words to the dictionary 31 | with open(path, 'r', encoding="utf8") as f: 32 | tokens = 0 33 | for line in f: 34 | words = line.split() + [''] 35 | tokens += len(words) 36 | for word in words: 37 | self.dictionary.add_word(word) 38 | 39 | # Tokenize file content 40 | with open(path, 'r', encoding="utf8") as f: 41 | ids = torch.LongTensor(tokens) 42 | token = 0 43 | for line in f: 44 | words = line.split() + [''] 45 | for word in words: 46 | ids[token] = self.dictionary.word2idx[word] 47 | token += 1 48 | 49 | return ids 50 | -------------------------------------------------------------------------------- /workloads/ncf/config.py: -------------------------------------------------------------------------------- 1 | import workloads.settings as settings 2 | 3 | # dataset name 4 | dataset = 'ml-1m' 5 | assert dataset in ['ml-1m', 'pinterest-20'] 6 | 7 | # model name 8 | model = 'NeuMF-end' 9 | #model = 'MLP' 10 | #model = 'GMF' 11 | #model = 'NeuMF-pre' 12 | assert model in ['MLP', 'GMF', 'NeuMF-end', 'NeuMF-pre'] 13 | 14 | # paths 15 | # main_path = '/home/mzhang/data/ml-1m/' 16 | main_path = settings.data_dir + 'ml-1m/' 17 | 18 | train_rating = main_path + '{}.train.rating'.format(dataset) 19 | test_rating = main_path + '{}.test.rating'.format(dataset) 20 | test_negative = main_path + '{}.test.negative'.format(dataset) 21 | 22 | model_path = '/mnt/ncf/models/' 23 | GMF_model_path = model_path + 'GMF.pth' 24 | MLP_model_path = model_path + 'MLP.pth' 25 | NeuMF_model_path = model_path + 'NeuMF.pth' 26 | -------------------------------------------------------------------------------- /workloads/ncf/data_utils.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import pandas as pd 3 | import scipy.sparse as sp 4 | 5 | import torch.utils.data as data 6 | 7 | import ncf.config as config 8 | 9 | 10 | def load_all(test_num=100): 11 | """ We load all the three file here to save time in each epoch. """ 12 | train_data = pd.read_csv( 13 | config.train_rating, 14 | sep='\t', header=None, names=['user', 'item'], 15 | usecols=[0, 1], dtype={0: np.int32, 1: np.int32}) 16 | 17 | user_num = train_data['user'].max() + 1 18 | item_num = train_data['item'].max() + 1 19 | 20 | train_data = train_data.values.tolist() 21 | 22 | # load ratings as a dok matrix 23 | train_mat = sp.dok_matrix((user_num, item_num), dtype=np.float32) 24 | for x in train_data: 25 | train_mat[x[0], x[1]] = 1.0 26 | 27 | test_data = [] 28 | with open(config.test_negative, 'r') as fd: 29 | line = fd.readline() 30 | while line != None and line != '': 31 | arr = line.split('\t') 32 | u = eval(arr[0])[0] 33 | test_data.append([u, eval(arr[0])[1]]) 34 | for i in arr[1:]: 35 | test_data.append([u, int(i)]) 36 | line = fd.readline() 37 | return train_data, test_data, user_num, item_num, train_mat 38 | 39 | 40 | class NCFData(data.Dataset): 41 | def __init__(self, features, 42 | num_item, train_mat=None, num_ng=0, is_training=None): 43 | super(NCFData, self).__init__() 44 | """ Note that the labels are only useful when training, we thus 45 | add them in the ng_sample() function. 46 | """ 47 | self.features_ps = features 48 | self.num_item = num_item 49 | self.train_mat = train_mat 50 | self.num_ng = num_ng 51 | self.is_training = is_training 52 | self.labels = [0 for _ in range(len(features))] 53 | 54 | def ng_sample(self): 55 | assert self.is_training, 'no need to sampling when testing' 56 | 57 | self.features_ng = [] 58 | for x in self.features_ps: 59 | u = x[0] 60 | for t in range(self.num_ng): 61 | j = np.random.randint(self.num_item) 62 | while (u, j) in self.train_mat: 63 | j = np.random.randint(self.num_item) 64 | self.features_ng.append([u, j]) 65 | 66 | labels_ps = [1 for _ in range(len(self.features_ps))] 67 | labels_ng = [0 for _ in range(len(self.features_ng))] 68 | 69 | self.features_fill = self.features_ps + self.features_ng 70 | self.labels_fill = labels_ps + labels_ng 71 | 72 | def __len__(self): 73 | return (self.num_ng + 1) * len(self.labels) 74 | 75 | def __getitem__(self, idx): 76 | features = self.features_fill if self.is_training \ 77 | else self.features_ps 78 | labels = self.labels_fill if self.is_training \ 79 | else self.labels 80 | 81 | user = features[idx][0] 82 | item = features[idx][1] 83 | label = labels[idx] 84 | return user, item ,label -------------------------------------------------------------------------------- /workloads/ncf/evaluate.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import torch 3 | 4 | 5 | def hit(gt_item, pred_items): 6 | if gt_item in pred_items: 7 | return 1 8 | return 0 9 | 10 | 11 | def ndcg(gt_item, pred_items): 12 | if gt_item in pred_items: 13 | index = pred_items.index(gt_item) 14 | return np.reciprocal(np.log2(index+2)) 15 | return 0 16 | 17 | 18 | def metrics(model, test_loader, top_k): 19 | HR, NDCG = [], [] 20 | 21 | for user, item, label in test_loader: 22 | user = user.cuda() 23 | item = item.cuda() 24 | 25 | predictions = model(user, item) 26 | _, indices = torch.topk(predictions, top_k) 27 | recommends = torch.take( 28 | item, indices).cpu().numpy().tolist() 29 | 30 | gt_item = item[0].item() 31 | HR.append(hit(gt_item, recommends)) 32 | NDCG.append(ndcg(gt_item, recommends)) 33 | 34 | return np.mean(HR), np.mean(NDCG) 35 | -------------------------------------------------------------------------------- /workloads/ncf/models.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | import torch.nn.functional as F 4 | 5 | 6 | class NCF(nn.Module): 7 | def __init__(self, user_num, item_num, factor_num, num_layers, 8 | dropout, model, GMF_model=None, MLP_model=None): 9 | super(NCF, self).__init__() 10 | """ 11 | user_num: number of users; 12 | item_num: number of items; 13 | factor_num: number of predictive factors; 14 | num_layers: the number of layers in MLP model; 15 | dropout: dropout rate between fully connected layers; 16 | model: 'MLP', 'GMF', 'NeuMF-end', and 'NeuMF-pre'; 17 | GMF_model: pre-trained GMF weights; 18 | MLP_model: pre-trained MLP weights. 19 | """ 20 | self.dropout = dropout 21 | self.model = model 22 | 23 | self.embed_user_GMF = nn.Embedding(user_num, factor_num) 24 | self.embed_item_GMF = nn.Embedding(item_num, factor_num) 25 | self.embed_user_MLP = nn.Embedding( 26 | user_num, factor_num * (2 ** (num_layers - 1))) 27 | self.embed_item_MLP = nn.Embedding( 28 | item_num, factor_num * (2 ** (num_layers - 1))) 29 | 30 | MLP_modules = [] 31 | for i in range(num_layers): 32 | input_size = factor_num * (2 ** (num_layers - i)) 33 | MLP_modules.append(nn.Dropout(p=self.dropout)) 34 | MLP_modules.append(nn.Linear(input_size, input_size//2)) 35 | MLP_modules.append(nn.ReLU()) 36 | self.MLP_layers = nn.Sequential(*MLP_modules) 37 | 38 | if self.model in ['MLP', 'GMF']: 39 | predict_size = factor_num 40 | else: 41 | predict_size = factor_num * 2 42 | self.predict_layer = nn.Linear(predict_size, 1) 43 | 44 | self._init_weight_(GMF_model, MLP_model) 45 | 46 | def _init_weight_(self, GMF_model, MLP_model): 47 | """ We leave the weights initialization here. """ 48 | if not self.model == 'NeuMF-pre': 49 | nn.init.normal_(self.embed_user_GMF.weight, std=0.01) 50 | nn.init.normal_(self.embed_user_MLP.weight, std=0.01) 51 | nn.init.normal_(self.embed_item_GMF.weight, std=0.01) 52 | nn.init.normal_(self.embed_item_MLP.weight, std=0.01) 53 | 54 | for m in self.MLP_layers: 55 | if isinstance(m, nn.Linear): 56 | nn.init.xavier_uniform_(m.weight) 57 | nn.init.kaiming_uniform_(self.predict_layer.weight, 58 | a=1, nonlinearity='sigmoid') 59 | 60 | for m in self.modules(): 61 | if isinstance(m, nn.Linear) and m.bias is not None: 62 | m.bias.data.zero_() 63 | else: 64 | # embedding layers 65 | self.embed_user_GMF.weight.data.copy_( 66 | GMF_model.embed_user_GMF.weight) 67 | self.embed_item_GMF.weight.data.copy_( 68 | GMF_model.embed_item_GMF.weight) 69 | self.embed_user_MLP.weight.data.copy_( 70 | MLP_model.embed_user_MLP.weight) 71 | self.embed_item_MLP.weight.data.copy_( 72 | MLP_model.embed_item_MLP.weight) 73 | 74 | # mlp layers 75 | for (m1, m2) in zip( 76 | self.MLP_layers, MLP_model.MLP_layers): 77 | if isinstance(m1, nn.Linear) and isinstance(m2, nn.Linear): 78 | m1.weight.data.copy_(m2.weight) 79 | m1.bias.data.copy_(m2.bias) 80 | 81 | # predict layers 82 | predict_weight = torch.cat([ 83 | GMF_model.predict_layer.weight, 84 | MLP_model.predict_layer.weight], dim=1) 85 | precit_bias = GMF_model.predict_layer.bias + \ 86 | MLP_model.predict_layer.bias 87 | 88 | self.predict_layer.weight.data.copy_(0.5 * predict_weight) 89 | self.predict_layer.bias.data.copy_(0.5 * precit_bias) 90 | 91 | def forward(self, user, item): 92 | if not self.model == 'MLP': 93 | embed_user_GMF = self.embed_user_GMF(user) 94 | embed_item_GMF = self.embed_item_GMF(item) 95 | output_GMF = embed_user_GMF * embed_item_GMF 96 | if not self.model == 'GMF': 97 | embed_user_MLP = self.embed_user_MLP(user) 98 | embed_item_MLP = self.embed_item_MLP(item) 99 | interaction = torch.cat((embed_user_MLP, embed_item_MLP), -1) 100 | output_MLP = self.MLP_layers(interaction) 101 | 102 | if self.model == 'GMF': 103 | concat = output_GMF 104 | elif self.model == 'MLP': 105 | concat = output_MLP 106 | else: 107 | concat = torch.cat((output_GMF, output_MLP), -1) 108 | 109 | prediction = self.predict_layer(concat) 110 | return prediction.view(-1) 111 | -------------------------------------------------------------------------------- /workloads/pointnet/num_seg_classes.txt: -------------------------------------------------------------------------------- 1 | Airplane 4 2 | Bag 2 3 | Cap 2 4 | Car 4 5 | Chair 4 6 | Earphone 3 7 | Guitar 3 8 | Knife 2 9 | Lamp 4 10 | Laptop 2 11 | Motorbike 6 12 | Mug 2 13 | Pistol 3 14 | Rocket 3 15 | Skateboard 3 16 | Table 3 -------------------------------------------------------------------------------- /workloads/rl/profile_rl_lunarlander.py: -------------------------------------------------------------------------------- 1 | import gym 2 | import os 3 | import time 4 | import argparse 5 | import torch 6 | import torch.backends.cudnn as cudnn 7 | import workloads.settings as settings 8 | 9 | from stable_baselines3 import PPO, A2C, TD3 10 | from stable_baselines3.common.env_util import make_vec_env 11 | 12 | 13 | parser = argparse.ArgumentParser( 14 | description="PyTorch DP Synthetic Benchmark", formatter_class=argparse.ArgumentDefaultsHelpFormatter 15 | ) 16 | parser.add_argument('--total_time', type=int, default=30, help='Total time to run the code') 17 | 18 | args = parser.parse_args() 19 | 20 | args.total_time = settings.total_time 21 | 22 | warmup_epoch = 200 23 | benchmark_epoch = 1000 24 | 25 | 26 | def benchmark_rl(model_name, batch_size, mixed_precision, gpu_id, bench_list, warm_signal): 27 | t_start = time.time() 28 | 29 | if len(gpu_id) == 1: 30 | os.environ["CUDA_VISIBLE_DEVICES"] = f"{gpu_id[0]}" 31 | else: 32 | os.environ["CUDA_VISIBLE_DEVICES"] = ",".join(str(i) for i in gpu_id) 33 | 34 | cudnn.benchmark = True 35 | device = 'cuda' if torch.cuda.is_available() else 'cpu' 36 | 37 | # Environments & Model 38 | env = make_vec_env("LunarLander-v2", n_envs=1) 39 | if model_name == 'PPO': 40 | model = PPO("MlpPolicy", env, verbose=0, batch_size=batch_size, device=device) 41 | elif model_name == 'TD3': 42 | model = TD3("MlpPolicy", env, verbose=0, batch_size=batch_size, device=device) 43 | 44 | # Warm-up 45 | model.learn(total_timesteps=warmup_epoch) 46 | warm_signal.value = 1 47 | t_warmend = time.time() 48 | 49 | # Benchmark 50 | print(f'==> Training {model_name} model with {batch_size} batchsize, {mixed_precision} mp..') 51 | iter_num = 0 52 | while True: 53 | if time.time() - t_start >= args.total_time: 54 | t_end = time.time() 55 | t_pass = t_end - t_warmend 56 | exit_flag = True 57 | break 58 | model.learn(total_timesteps=1) 59 | iter_num += 1 60 | 61 | img_sec = iter_num * batch_size / t_pass 62 | 63 | # Results 64 | bench_list.append(img_sec) 65 | -------------------------------------------------------------------------------- /workloads/rl/profile_rl_walker.py: -------------------------------------------------------------------------------- 1 | import gym 2 | import os 3 | import time 4 | import torch 5 | import argparse 6 | import torch.backends.cudnn as cudnn 7 | import workloads.settings as settings 8 | 9 | from stable_baselines3 import PPO, A2C, TD3 10 | from stable_baselines3.common.env_util import make_vec_env 11 | 12 | 13 | parser = argparse.ArgumentParser( 14 | description="PyTorch DP Synthetic Benchmark", formatter_class=argparse.ArgumentDefaultsHelpFormatter 15 | ) 16 | parser.add_argument('--total_time', type=int, default=30, help='Total time to run the code') 17 | 18 | args = parser.parse_args() 19 | 20 | args.total_time = settings.total_time 21 | 22 | warmup_epoch = 200 23 | benchmark_epoch = 1000 24 | 25 | 26 | def benchmark_rl2(model_name, batch_size, mixed_precision, gpu_id, bench_list, warm_signal): 27 | t_start = time.time() 28 | 29 | if len(gpu_id) == 1: 30 | os.environ["CUDA_VISIBLE_DEVICES"] = f"{gpu_id[0]}" 31 | else: 32 | os.environ["CUDA_VISIBLE_DEVICES"] = ",".join(str(i) for i in gpu_id) 33 | 34 | cudnn.benchmark = True 35 | device = 'cuda' if torch.cuda.is_available() else 'cpu' 36 | 37 | # Environments & Model 38 | env = make_vec_env("BipedalWalker-v3", n_envs=1) 39 | if model_name == 'PPO': 40 | model = PPO("MlpPolicy", env, verbose=0, batch_size=batch_size, device=device) 41 | elif model_name == 'TD3': 42 | model = TD3("MlpPolicy", env, verbose=0, batch_size=batch_size, device=device) 43 | 44 | # Warm-up 45 | model.learn(total_timesteps=warmup_epoch) 46 | warm_signal.value = 1 47 | t_warmend = time.time() 48 | 49 | # Benchmark 50 | print(f'==> Training {model_name} model with {batch_size} batchsize, {mixed_precision} mp..') 51 | iter_num = 0 52 | while True: 53 | if time.time() - t_start >= args.total_time: 54 | t_end = time.time() 55 | t_pass = t_end - t_warmend 56 | exit_flag = True 57 | break 58 | model.learn(total_timesteps=1) 59 | iter_num += 1 60 | 61 | img_sec = iter_num * batch_size / t_pass 62 | 63 | # Results 64 | bench_list.append(img_sec) 65 | -------------------------------------------------------------------------------- /workloads/settings.py: -------------------------------------------------------------------------------- 1 | # Data path 2 | data_dir = "/home/xxx/data/" 3 | total_time = 30 4 | -------------------------------------------------------------------------------- /workloads/translation/dataset.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import torch 3 | import torch.utils.data 4 | 5 | from translation.transformer import Constants 6 | 7 | def paired_collate_fn(insts): 8 | src_insts, tgt_insts = list(zip(*insts)) 9 | src_insts = collate_fn(src_insts) 10 | tgt_insts = collate_fn(tgt_insts) 11 | return (*src_insts, *tgt_insts) 12 | 13 | def collate_fn(insts): 14 | ''' Pad the instance to the max seq length in batch ''' 15 | 16 | max_len = max(len(inst) for inst in insts) 17 | 18 | batch_seq = np.array([ 19 | inst + [Constants.PAD] * (max_len - len(inst)) 20 | for inst in insts]) 21 | 22 | batch_pos = np.array([ 23 | [pos_i+1 if w_i != Constants.PAD else 0 24 | for pos_i, w_i in enumerate(inst)] for inst in batch_seq]) 25 | 26 | batch_seq = torch.LongTensor(batch_seq) 27 | batch_pos = torch.LongTensor(batch_pos) 28 | 29 | return batch_seq, batch_pos 30 | 31 | class TranslationDataset(torch.utils.data.Dataset): 32 | def __init__( 33 | self, src_word2idx, tgt_word2idx, 34 | src_insts=None, tgt_insts=None): 35 | 36 | assert src_insts 37 | assert not tgt_insts or (len(src_insts) == len(tgt_insts)) 38 | 39 | src_idx2word = {idx:word for word, idx in src_word2idx.items()} 40 | self._src_word2idx = src_word2idx 41 | self._src_idx2word = src_idx2word 42 | self._src_insts = src_insts 43 | 44 | tgt_idx2word = {idx:word for word, idx in tgt_word2idx.items()} 45 | self._tgt_word2idx = tgt_word2idx 46 | self._tgt_idx2word = tgt_idx2word 47 | self._tgt_insts = tgt_insts 48 | 49 | @property 50 | def n_insts(self): 51 | ''' Property for dataset size ''' 52 | return len(self._src_insts) 53 | 54 | @property 55 | def src_vocab_size(self): 56 | ''' Property for vocab size ''' 57 | return len(self._src_word2idx) 58 | 59 | @property 60 | def tgt_vocab_size(self): 61 | ''' Property for vocab size ''' 62 | return len(self._tgt_word2idx) 63 | 64 | @property 65 | def src_word2idx(self): 66 | ''' Property for word dictionary ''' 67 | return self._src_word2idx 68 | 69 | @property 70 | def tgt_word2idx(self): 71 | ''' Property for word dictionary ''' 72 | return self._tgt_word2idx 73 | 74 | @property 75 | def src_idx2word(self): 76 | ''' Property for index dictionary ''' 77 | return self._src_idx2word 78 | 79 | @property 80 | def tgt_idx2word(self): 81 | ''' Property for index dictionary ''' 82 | return self._tgt_idx2word 83 | 84 | def __len__(self): 85 | return self.n_insts 86 | 87 | def __getitem__(self, idx): 88 | if self._tgt_insts: 89 | return self._src_insts[idx], self._tgt_insts[idx] 90 | return self._src_insts[idx] 91 | -------------------------------------------------------------------------------- /workloads/translation/multi-bleu.perl: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env perl 2 | # 3 | # This file is part of moses. Its use is licensed under the GNU Lesser General 4 | # Public License version 2.1 or, at your option, any later version. 5 | 6 | # $Id$ 7 | use warnings; 8 | use strict; 9 | 10 | my $lowercase = 0; 11 | if ($ARGV[0] eq "-lc") { 12 | $lowercase = 1; 13 | shift; 14 | } 15 | 16 | my $stem = $ARGV[0]; 17 | if (!defined $stem) { 18 | print STDERR "usage: multi-bleu.pl [-lc] reference < hypothesis\n"; 19 | print STDERR "Reads the references from reference or reference0, reference1, ...\n"; 20 | exit(1); 21 | } 22 | 23 | $stem .= ".ref" if !-e $stem && !-e $stem."0" && -e $stem.".ref0"; 24 | 25 | my @REF; 26 | my $ref=0; 27 | while(-e "$stem$ref") { 28 | &add_to_ref("$stem$ref",\@REF); 29 | $ref++; 30 | } 31 | &add_to_ref($stem,\@REF) if -e $stem; 32 | die("ERROR: could not find reference file $stem") unless scalar @REF; 33 | 34 | # add additional references explicitly specified on the command line 35 | shift; 36 | foreach my $stem (@ARGV) { 37 | &add_to_ref($stem,\@REF) if -e $stem; 38 | } 39 | 40 | 41 | 42 | sub add_to_ref { 43 | my ($file,$REF) = @_; 44 | my $s=0; 45 | if ($file =~ /.gz$/) { 46 | open(REF,"gzip -dc $file|") or die "Can't read $file"; 47 | } else { 48 | open(REF,$file) or die "Can't read $file"; 49 | } 50 | while() { 51 | chomp; 52 | push @{$$REF[$s++]}, $_; 53 | } 54 | close(REF); 55 | } 56 | 57 | my(@CORRECT,@TOTAL,$length_translation,$length_reference); 58 | my $s=0; 59 | while() { 60 | chomp; 61 | $_ = lc if $lowercase; 62 | my @WORD = split; 63 | my %REF_NGRAM = (); 64 | my $length_translation_this_sentence = scalar(@WORD); 65 | my ($closest_diff,$closest_length) = (9999,9999); 66 | foreach my $reference (@{$REF[$s]}) { 67 | # print "$s $_ <=> $reference\n"; 68 | $reference = lc($reference) if $lowercase; 69 | my @WORD = split(' ',$reference); 70 | my $length = scalar(@WORD); 71 | my $diff = abs($length_translation_this_sentence-$length); 72 | if ($diff < $closest_diff) { 73 | $closest_diff = $diff; 74 | $closest_length = $length; 75 | # print STDERR "$s: closest diff ".abs($length_translation_this_sentence-$length)." = abs($length_translation_this_sentence-$length), setting len: $closest_length\n"; 76 | } elsif ($diff == $closest_diff) { 77 | $closest_length = $length if $length < $closest_length; 78 | # from two references with the same closeness to me 79 | # take the *shorter* into account, not the "first" one. 80 | } 81 | for(my $n=1;$n<=4;$n++) { 82 | my %REF_NGRAM_N = (); 83 | for(my $start=0;$start<=$#WORD-($n-1);$start++) { 84 | my $ngram = "$n"; 85 | for(my $w=0;$w<$n;$w++) { 86 | $ngram .= " ".$WORD[$start+$w]; 87 | } 88 | $REF_NGRAM_N{$ngram}++; 89 | } 90 | foreach my $ngram (keys %REF_NGRAM_N) { 91 | if (!defined($REF_NGRAM{$ngram}) || 92 | $REF_NGRAM{$ngram} < $REF_NGRAM_N{$ngram}) { 93 | $REF_NGRAM{$ngram} = $REF_NGRAM_N{$ngram}; 94 | # print "$i: REF_NGRAM{$ngram} = $REF_NGRAM{$ngram}
\n"; 95 | } 96 | } 97 | } 98 | } 99 | $length_translation += $length_translation_this_sentence; 100 | $length_reference += $closest_length; 101 | for(my $n=1;$n<=4;$n++) { 102 | my %T_NGRAM = (); 103 | for(my $start=0;$start<=$#WORD-($n-1);$start++) { 104 | my $ngram = "$n"; 105 | for(my $w=0;$w<$n;$w++) { 106 | $ngram .= " ".$WORD[$start+$w]; 107 | } 108 | $T_NGRAM{$ngram}++; 109 | } 110 | foreach my $ngram (keys %T_NGRAM) { 111 | $ngram =~ /^(\d+) /; 112 | my $n = $1; 113 | # my $corr = 0; 114 | # print "$i e $ngram $T_NGRAM{$ngram}
\n"; 115 | $TOTAL[$n] += $T_NGRAM{$ngram}; 116 | if (defined($REF_NGRAM{$ngram})) { 117 | if ($REF_NGRAM{$ngram} >= $T_NGRAM{$ngram}) { 118 | $CORRECT[$n] += $T_NGRAM{$ngram}; 119 | # $corr = $T_NGRAM{$ngram}; 120 | # print "$i e correct1 $T_NGRAM{$ngram}
\n"; 121 | } 122 | else { 123 | $CORRECT[$n] += $REF_NGRAM{$ngram}; 124 | # $corr = $REF_NGRAM{$ngram}; 125 | # print "$i e correct2 $REF_NGRAM{$ngram}
\n"; 126 | } 127 | } 128 | # $REF_NGRAM{$ngram} = 0 if !defined $REF_NGRAM{$ngram}; 129 | # print STDERR "$ngram: {$s, $REF_NGRAM{$ngram}, $T_NGRAM{$ngram}, $corr}\n" 130 | } 131 | } 132 | $s++; 133 | } 134 | my $brevity_penalty = 1; 135 | my $bleu = 0; 136 | 137 | my @bleu=(); 138 | 139 | for(my $n=1;$n<=4;$n++) { 140 | if (defined ($TOTAL[$n])){ 141 | $bleu[$n]=($TOTAL[$n])?$CORRECT[$n]/$TOTAL[$n]:0; 142 | # print STDERR "CORRECT[$n]:$CORRECT[$n] TOTAL[$n]:$TOTAL[$n]\n"; 143 | }else{ 144 | $bleu[$n]=0; 145 | } 146 | } 147 | 148 | if ($length_reference==0){ 149 | printf "BLEU = 0, 0/0/0/0 (BP=0, ratio=0, hyp_len=0, ref_len=0)\n"; 150 | exit(1); 151 | } 152 | 153 | if ($length_translation<$length_reference) { 154 | $brevity_penalty = exp(1-$length_reference/$length_translation); 155 | } 156 | $bleu = $brevity_penalty * exp((my_log( $bleu[1] ) + 157 | my_log( $bleu[2] ) + 158 | my_log( $bleu[3] ) + 159 | my_log( $bleu[4] ) ) / 4) ; 160 | printf "BLEU = %.2f, %.1f/%.1f/%.1f/%.1f (BP=%.3f, ratio=%.3f, hyp_len=%d, ref_len=%d)\n", 161 | 100*$bleu, 162 | 100*$bleu[1], 163 | 100*$bleu[2], 164 | 100*$bleu[3], 165 | 100*$bleu[4], 166 | $brevity_penalty, 167 | $length_translation / $length_reference, 168 | $length_translation, 169 | $length_reference; 170 | 171 | 172 | print STDERR "It is not advisable to publish scores from multi-bleu.perl. The scores depend on your tokenizer, which is unlikely to be reproducible from your paper or consistent across research groups. Instead you should detokenize then use mteval-v14.pl, which has a standard tokenization. Scores from multi-bleu.perl can still be used for internal purposes when you have a consistent tokenizer.\n"; 173 | 174 | sub my_log { 175 | return -9999999999 unless $_[0]; 176 | return log($_[0]); 177 | } 178 | -------------------------------------------------------------------------------- /workloads/translation/nonbreaking_prefix.de: -------------------------------------------------------------------------------- 1 | #Anything in this file, followed by a period (and an upper-case word), does NOT indicate an end-of-sentence marker. 2 | #Special cases are included for prefixes that ONLY appear before 0-9 numbers. 3 | 4 | #any single upper case letter followed by a period is not a sentence ender (excluding I occasionally, but we leave it in) 5 | #usually upper case letters are initials in a name 6 | #no german words end in single lower-case letters, so we throw those in too. 7 | A 8 | B 9 | C 10 | D 11 | E 12 | F 13 | G 14 | H 15 | I 16 | J 17 | K 18 | L 19 | M 20 | N 21 | O 22 | P 23 | Q 24 | R 25 | S 26 | T 27 | U 28 | V 29 | W 30 | X 31 | Y 32 | Z 33 | a 34 | b 35 | c 36 | d 37 | e 38 | f 39 | g 40 | h 41 | i 42 | j 43 | k 44 | l 45 | m 46 | n 47 | o 48 | p 49 | q 50 | r 51 | s 52 | t 53 | u 54 | v 55 | w 56 | x 57 | y 58 | z 59 | 60 | 61 | #Roman Numerals. A dot after one of these is not a sentence break in German. 62 | I 63 | II 64 | III 65 | IV 66 | V 67 | VI 68 | VII 69 | VIII 70 | IX 71 | X 72 | XI 73 | XII 74 | XIII 75 | XIV 76 | XV 77 | XVI 78 | XVII 79 | XVIII 80 | XIX 81 | XX 82 | i 83 | ii 84 | iii 85 | iv 86 | v 87 | vi 88 | vii 89 | viii 90 | ix 91 | x 92 | xi 93 | xii 94 | xiii 95 | xiv 96 | xv 97 | xvi 98 | xvii 99 | xviii 100 | xix 101 | xx 102 | 103 | #Titles and Honorifics 104 | Adj 105 | Adm 106 | Adv 107 | Asst 108 | Bart 109 | Bldg 110 | Brig 111 | Bros 112 | Capt 113 | Cmdr 114 | Col 115 | Comdr 116 | Con 117 | Corp 118 | Cpl 119 | DR 120 | Dr 121 | Ens 122 | Gen 123 | Gov 124 | Hon 125 | Hosp 126 | Insp 127 | Lt 128 | MM 129 | MR 130 | MRS 131 | MS 132 | Maj 133 | Messrs 134 | Mlle 135 | Mme 136 | Mr 137 | Mrs 138 | Ms 139 | Msgr 140 | Op 141 | Ord 142 | Pfc 143 | Ph 144 | Prof 145 | Pvt 146 | Rep 147 | Reps 148 | Res 149 | Rev 150 | Rt 151 | Sen 152 | Sens 153 | Sfc 154 | Sgt 155 | Sr 156 | St 157 | Supt 158 | Surg 159 | 160 | #Misc symbols 161 | Mio 162 | Mrd 163 | bzw 164 | v 165 | vs 166 | usw 167 | d.h 168 | z.B 169 | u.a 170 | etc 171 | Mrd 172 | MwSt 173 | ggf 174 | d.J 175 | D.h 176 | m.E 177 | vgl 178 | I.F 179 | z.T 180 | sogen 181 | ff 182 | u.E 183 | g.U 184 | g.g.A 185 | c.-à-d 186 | Buchst 187 | u.s.w 188 | sog 189 | u.ä 190 | Std 191 | evtl 192 | Zt 193 | Chr 194 | u.U 195 | o.ä 196 | Ltd 197 | b.A 198 | z.Zt 199 | spp 200 | sen 201 | SA 202 | k.o 203 | jun 204 | i.H.v 205 | dgl 206 | dergl 207 | Co 208 | zzt 209 | usf 210 | s.p.a 211 | Dkr 212 | Corp 213 | bzgl 214 | BSE 215 | 216 | #Number indicators 217 | # add #NUMERIC_ONLY# after the word if it should ONLY be non-breaking when a 0-9 digit follows it 218 | No 219 | Nos 220 | Art 221 | Nr 222 | pp 223 | ca 224 | Ca 225 | 226 | #Ordinals are done with . in German - "1." = "1st" in English 227 | 1 228 | 2 229 | 3 230 | 4 231 | 5 232 | 6 233 | 7 234 | 8 235 | 9 236 | 10 237 | 11 238 | 12 239 | 13 240 | 14 241 | 15 242 | 16 243 | 17 244 | 18 245 | 19 246 | 20 247 | 21 248 | 22 249 | 23 250 | 24 251 | 25 252 | 26 253 | 27 254 | 28 255 | 29 256 | 30 257 | 31 258 | 32 259 | 33 260 | 34 261 | 35 262 | 36 263 | 37 264 | 38 265 | 39 266 | 40 267 | 41 268 | 42 269 | 43 270 | 44 271 | 45 272 | 46 273 | 47 274 | 48 275 | 49 276 | 50 277 | 51 278 | 52 279 | 53 280 | 54 281 | 55 282 | 56 283 | 57 284 | 58 285 | 59 286 | 60 287 | 61 288 | 62 289 | 63 290 | 64 291 | 65 292 | 66 293 | 67 294 | 68 295 | 69 296 | 70 297 | 71 298 | 72 299 | 73 300 | 74 301 | 75 302 | 76 303 | 77 304 | 78 305 | 79 306 | 80 307 | 81 308 | 82 309 | 83 310 | 84 311 | 85 312 | 86 313 | 87 314 | 88 315 | 89 316 | 90 317 | 91 318 | 92 319 | 93 320 | 94 321 | 95 322 | 96 323 | 97 324 | 98 325 | 99 326 | -------------------------------------------------------------------------------- /workloads/translation/nonbreaking_prefix.en: -------------------------------------------------------------------------------- 1 | #Anything in this file, followed by a period (and an upper-case word), does NOT indicate an end-of-sentence marker. 2 | #Special cases are included for prefixes that ONLY appear before 0-9 numbers. 3 | 4 | #any single upper case letter followed by a period is not a sentence ender (excluding I occasionally, but we leave it in) 5 | #usually upper case letters are initials in a name 6 | A 7 | B 8 | C 9 | D 10 | E 11 | F 12 | G 13 | H 14 | I 15 | J 16 | K 17 | L 18 | M 19 | N 20 | O 21 | P 22 | Q 23 | R 24 | S 25 | T 26 | U 27 | V 28 | W 29 | X 30 | Y 31 | Z 32 | 33 | #List of titles. These are often followed by upper-case names, but do not indicate sentence breaks 34 | Adj 35 | Adm 36 | Adv 37 | Asst 38 | Bart 39 | Bldg 40 | Brig 41 | Bros 42 | Capt 43 | Cmdr 44 | Col 45 | Comdr 46 | Con 47 | Corp 48 | Cpl 49 | DR 50 | Dr 51 | Drs 52 | Ens 53 | Gen 54 | Gov 55 | Hon 56 | Hr 57 | Hosp 58 | Insp 59 | Lt 60 | MM 61 | MR 62 | MRS 63 | MS 64 | Maj 65 | Messrs 66 | Mlle 67 | Mme 68 | Mr 69 | Mrs 70 | Ms 71 | Msgr 72 | Op 73 | Ord 74 | Pfc 75 | Ph 76 | Prof 77 | Pvt 78 | Rep 79 | Reps 80 | Res 81 | Rev 82 | Rt 83 | Sen 84 | Sens 85 | Sfc 86 | Sgt 87 | Sr 88 | St 89 | Supt 90 | Surg 91 | 92 | #misc - odd period-ending items that NEVER indicate breaks (p.m. does NOT fall into this category - it sometimes ends a sentence) 93 | v 94 | vs 95 | i.e 96 | rev 97 | e.g 98 | 99 | #Numbers only. These should only induce breaks when followed by a numeric sequence 100 | # add NUMERIC_ONLY after the word for this function 101 | #This case is mostly for the english "No." which can either be a sentence of its own, or 102 | #if followed by a number, a non-breaking prefix 103 | No #NUMERIC_ONLY# 104 | Nos 105 | Art #NUMERIC_ONLY# 106 | Nr 107 | pp #NUMERIC_ONLY# 108 | 109 | #month abbreviations 110 | Jan 111 | Feb 112 | Mar 113 | Apr 114 | #May is a full word 115 | Jun 116 | Jul 117 | Aug 118 | Sep 119 | Oct 120 | Nov 121 | Dec 122 | -------------------------------------------------------------------------------- /workloads/translation/transformer/Beam.py: -------------------------------------------------------------------------------- 1 | """ Manage beam search info structure. 2 | 3 | Heavily borrowed from OpenNMT-py. 4 | For code in OpenNMT-py, please check the following link: 5 | https://github.com/OpenNMT/OpenNMT-py/blob/master/onmt/Beam.py 6 | """ 7 | 8 | import torch 9 | import numpy as np 10 | import transformer.Constants as Constants 11 | 12 | class Beam(): 13 | ''' Beam search ''' 14 | 15 | def __init__(self, size, device=False): 16 | 17 | self.size = size 18 | self._done = False 19 | 20 | # The score for each translation on the beam. 21 | self.scores = torch.zeros((size,), dtype=torch.float, device=device) 22 | self.all_scores = [] 23 | 24 | # The backpointers at each time-step. 25 | self.prev_ks = [] 26 | 27 | # The outputs at each time-step. 28 | self.next_ys = [torch.full((size,), Constants.PAD, dtype=torch.long, device=device)] 29 | self.next_ys[0][0] = Constants.BOS 30 | 31 | def get_current_state(self): 32 | "Get the outputs for the current timestep." 33 | return self.get_tentative_hypothesis() 34 | 35 | def get_current_origin(self): 36 | "Get the backpointers for the current timestep." 37 | return self.prev_ks[-1] 38 | 39 | @property 40 | def done(self): 41 | return self._done 42 | 43 | def advance(self, word_prob): 44 | "Update beam status and check if finished or not." 45 | num_words = word_prob.size(1) 46 | 47 | # Sum the previous scores. 48 | if len(self.prev_ks) > 0: 49 | beam_lk = word_prob + self.scores.unsqueeze(1).expand_as(word_prob) 50 | else: 51 | beam_lk = word_prob[0] 52 | 53 | flat_beam_lk = beam_lk.view(-1) 54 | 55 | best_scores, best_scores_id = flat_beam_lk.topk(self.size, 0, True, True) # 1st sort 56 | best_scores, best_scores_id = flat_beam_lk.topk(self.size, 0, True, True) # 2nd sort 57 | 58 | self.all_scores.append(self.scores) 59 | self.scores = best_scores 60 | 61 | # bestScoresId is flattened as a (beam x word) array, 62 | # so we need to calculate which word and beam each score came from 63 | prev_k = best_scores_id / num_words 64 | self.prev_ks.append(prev_k) 65 | self.next_ys.append(best_scores_id - prev_k * num_words) 66 | 67 | # End condition is when top-of-beam is EOS. 68 | if self.next_ys[-1][0].item() == Constants.EOS: 69 | self._done = True 70 | self.all_scores.append(self.scores) 71 | 72 | return self._done 73 | 74 | def sort_scores(self): 75 | "Sort the scores." 76 | return torch.sort(self.scores, 0, True) 77 | 78 | def get_the_best_score_and_idx(self): 79 | "Get the score of the best in the beam." 80 | scores, ids = self.sort_scores() 81 | return scores[1], ids[1] 82 | 83 | def get_tentative_hypothesis(self): 84 | "Get the decoded sequence for the current timestep." 85 | 86 | if len(self.next_ys) == 1: 87 | dec_seq = self.next_ys[0].unsqueeze(1) 88 | else: 89 | _, keys = self.sort_scores() 90 | hyps = [self.get_hypothesis(k) for k in keys] 91 | hyps = [[Constants.BOS] + h for h in hyps] 92 | dec_seq = torch.LongTensor(hyps) 93 | 94 | return dec_seq 95 | 96 | def get_hypothesis(self, k): 97 | """ Walk back to construct the full hypothesis. """ 98 | hyp = [] 99 | for j in range(len(self.prev_ks) - 1, -1, -1): 100 | hyp.append(self.next_ys[j+1][k]) 101 | k = self.prev_ks[j][k] 102 | 103 | return list(map(lambda x: x.item(), hyp[::-1])) 104 | -------------------------------------------------------------------------------- /workloads/translation/transformer/Constants.py: -------------------------------------------------------------------------------- 1 | 2 | PAD = 0 3 | UNK = 1 4 | BOS = 2 5 | EOS = 3 6 | 7 | PAD_WORD = '' 8 | UNK_WORD = '' 9 | BOS_WORD = '' 10 | EOS_WORD = '' 11 | -------------------------------------------------------------------------------- /workloads/translation/transformer/Layers.py: -------------------------------------------------------------------------------- 1 | ''' Define the Layers ''' 2 | import torch.nn as nn 3 | from .SubLayers import MultiHeadAttention, PositionwiseFeedForward 4 | 5 | __author__ = "Yu-Hsiang Huang" 6 | 7 | 8 | class EncoderLayer(nn.Module): 9 | ''' Compose with two layers ''' 10 | 11 | def __init__(self, d_model, d_inner, n_head, d_k, d_v, dropout=0.1): 12 | super(EncoderLayer, self).__init__() 13 | self.slf_attn = MultiHeadAttention( 14 | n_head, d_model, d_k, d_v, dropout=dropout) 15 | self.pos_ffn = PositionwiseFeedForward(d_model, d_inner, dropout=dropout) 16 | 17 | def forward(self, enc_input, non_pad_mask=None, slf_attn_mask=None): 18 | enc_output, enc_slf_attn = self.slf_attn( 19 | enc_input, enc_input, enc_input, mask=slf_attn_mask) 20 | enc_output *= non_pad_mask 21 | 22 | enc_output = self.pos_ffn(enc_output) 23 | enc_output *= non_pad_mask 24 | 25 | return enc_output, enc_slf_attn 26 | 27 | 28 | class DecoderLayer(nn.Module): 29 | ''' Compose with three layers ''' 30 | 31 | def __init__(self, d_model, d_inner, n_head, d_k, d_v, dropout=0.1): 32 | super(DecoderLayer, self).__init__() 33 | self.slf_attn = MultiHeadAttention(n_head, d_model, d_k, d_v, dropout=dropout) 34 | self.enc_attn = MultiHeadAttention(n_head, d_model, d_k, d_v, dropout=dropout) 35 | self.pos_ffn = PositionwiseFeedForward(d_model, d_inner, dropout=dropout) 36 | 37 | def forward(self, dec_input, enc_output, non_pad_mask=None, slf_attn_mask=None, dec_enc_attn_mask=None): 38 | dec_output, dec_slf_attn = self.slf_attn( 39 | dec_input, dec_input, dec_input, mask=slf_attn_mask) 40 | dec_output *= non_pad_mask 41 | 42 | dec_output, dec_enc_attn = self.enc_attn( 43 | dec_output, enc_output, enc_output, mask=dec_enc_attn_mask) 44 | dec_output *= non_pad_mask 45 | 46 | dec_output = self.pos_ffn(dec_output) 47 | dec_output *= non_pad_mask 48 | 49 | return dec_output, dec_slf_attn, dec_enc_attn 50 | -------------------------------------------------------------------------------- /workloads/translation/transformer/Modules.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | import numpy as np 4 | 5 | __author__ = "Yu-Hsiang Huang" 6 | 7 | class ScaledDotProductAttention(nn.Module): 8 | ''' Scaled Dot-Product Attention ''' 9 | 10 | def __init__(self, temperature, attn_dropout=0.1): 11 | super().__init__() 12 | self.temperature = temperature 13 | self.dropout = nn.Dropout(attn_dropout) 14 | self.softmax = nn.Softmax(dim=2) 15 | 16 | def forward(self, q, k, v, mask=None): 17 | 18 | attn = torch.bmm(q, k.transpose(1, 2)) 19 | attn = attn / self.temperature 20 | 21 | if mask is not None: 22 | attn = attn.masked_fill(mask, -np.inf) 23 | 24 | attn = self.softmax(attn) 25 | attn = self.dropout(attn) 26 | output = torch.bmm(attn, v) 27 | 28 | return output, attn 29 | -------------------------------------------------------------------------------- /workloads/translation/transformer/Optim.py: -------------------------------------------------------------------------------- 1 | '''A wrapper class for optimizer ''' 2 | import numpy as np 3 | 4 | class ScheduledOptim(): 5 | '''A simple wrapper class for learning rate scheduling''' 6 | 7 | def __init__(self, optimizer, d_model, n_warmup_steps): 8 | self._optimizer = optimizer 9 | self.n_warmup_steps = n_warmup_steps 10 | self.n_current_steps = 0 11 | self.init_lr = np.power(d_model, -0.5) 12 | 13 | def step_and_update_lr(self): 14 | "Step with the inner optimizer" 15 | self._update_learning_rate() 16 | self._optimizer.step() 17 | 18 | def zero_grad(self): 19 | "Zero out the gradients by the inner optimizer" 20 | self._optimizer.zero_grad() 21 | 22 | def _get_lr_scale(self): 23 | return np.min([ 24 | np.power(self.n_current_steps, -0.5), 25 | np.power(self.n_warmup_steps, -1.5) * self.n_current_steps]) 26 | 27 | def _update_learning_rate(self): 28 | ''' Learning rate scheduling per step ''' 29 | 30 | self.n_current_steps += 1 31 | lr = self.init_lr * self._get_lr_scale() 32 | 33 | for param_group in self._optimizer.param_groups: 34 | param_group['lr'] = lr 35 | 36 | -------------------------------------------------------------------------------- /workloads/translation/transformer/SubLayers.py: -------------------------------------------------------------------------------- 1 | ''' Define the sublayers in encoder/decoder layer ''' 2 | import numpy as np 3 | import torch.nn as nn 4 | import torch.nn.functional as F 5 | from .Modules import ScaledDotProductAttention 6 | 7 | __author__ = "Yu-Hsiang Huang" 8 | 9 | class MultiHeadAttention(nn.Module): 10 | ''' Multi-Head Attention module ''' 11 | 12 | def __init__(self, n_head, d_model, d_k, d_v, dropout=0.1): 13 | super().__init__() 14 | 15 | self.n_head = n_head 16 | self.d_k = d_k 17 | self.d_v = d_v 18 | 19 | self.w_qs = nn.Linear(d_model, n_head * d_k) 20 | self.w_ks = nn.Linear(d_model, n_head * d_k) 21 | self.w_vs = nn.Linear(d_model, n_head * d_v) 22 | nn.init.normal_(self.w_qs.weight, mean=0, std=np.sqrt(2.0 / (d_model + d_k))) 23 | nn.init.normal_(self.w_ks.weight, mean=0, std=np.sqrt(2.0 / (d_model + d_k))) 24 | nn.init.normal_(self.w_vs.weight, mean=0, std=np.sqrt(2.0 / (d_model + d_v))) 25 | 26 | self.attention = ScaledDotProductAttention(temperature=np.power(d_k, 0.5)) 27 | self.layer_norm = nn.LayerNorm(d_model) 28 | 29 | self.fc = nn.Linear(n_head * d_v, d_model) 30 | nn.init.xavier_normal_(self.fc.weight) 31 | 32 | self.dropout = nn.Dropout(dropout) 33 | 34 | 35 | def forward(self, q, k, v, mask=None): 36 | 37 | d_k, d_v, n_head = self.d_k, self.d_v, self.n_head 38 | 39 | sz_b, len_q, _ = q.size() 40 | sz_b, len_k, _ = k.size() 41 | sz_b, len_v, _ = v.size() 42 | 43 | residual = q 44 | 45 | q = self.w_qs(q).view(sz_b, len_q, n_head, d_k) 46 | k = self.w_ks(k).view(sz_b, len_k, n_head, d_k) 47 | v = self.w_vs(v).view(sz_b, len_v, n_head, d_v) 48 | 49 | q = q.permute(2, 0, 1, 3).contiguous().view(-1, len_q, d_k) # (n*b) x lq x dk 50 | k = k.permute(2, 0, 1, 3).contiguous().view(-1, len_k, d_k) # (n*b) x lk x dk 51 | v = v.permute(2, 0, 1, 3).contiguous().view(-1, len_v, d_v) # (n*b) x lv x dv 52 | 53 | mask = mask.repeat(n_head, 1, 1) # (n*b) x .. x .. 54 | output, attn = self.attention(q, k, v, mask=mask) 55 | 56 | output = output.view(n_head, sz_b, len_q, d_v) 57 | output = output.permute(1, 2, 0, 3).contiguous().view(sz_b, len_q, -1) # b x lq x (n*dv) 58 | 59 | output = self.dropout(self.fc(output)) 60 | output = self.layer_norm(output + residual) 61 | 62 | return output, attn 63 | 64 | class PositionwiseFeedForward(nn.Module): 65 | ''' A two-feed-forward-layer module ''' 66 | 67 | def __init__(self, d_in, d_hid, dropout=0.1): 68 | super().__init__() 69 | self.w_1 = nn.Conv1d(d_in, d_hid, 1) # position-wise 70 | self.w_2 = nn.Conv1d(d_hid, d_in, 1) # position-wise 71 | self.layer_norm = nn.LayerNorm(d_in) 72 | self.dropout = nn.Dropout(dropout) 73 | 74 | def forward(self, x): 75 | residual = x 76 | output = x.transpose(1, 2) 77 | output = self.w_2(F.relu(self.w_1(output))) 78 | output = output.transpose(1, 2) 79 | output = self.dropout(output) 80 | output = self.layer_norm(output + residual) 81 | return output 82 | -------------------------------------------------------------------------------- /workloads/translation/transformer/__init__.py: -------------------------------------------------------------------------------- 1 | 2 | 3 | --------------------------------------------------------------------------------