├── .gitignore
├── LICENSE.txt
├── README.md
├── profile
    ├── adaptdl
    │   ├── cifar_ddp.py
    │   ├── dcgan_exmp.py
    │   ├── extract_data.py
    │   ├── pollux_cifar.py
    │   ├── pollux_cifar_multigpu.py
    │   ├── pollux_dcgan.py
    │   ├── pollux_mnist.py
    │   ├── pollux_pointnet.py
    │   ├── pollux_pointnet_seg.py
    │   ├── result
    │   │   ├── EfficientNetB0
    │   │   │   ├── false.csv
    │   │   │   └── true.csv
    │   │   └── plot.ipynb
    │   └── run.sh
    ├── co_collect.py
    ├── main_co.py
    ├── main_co3.py
    ├── main_single.py
    ├── requirements.txt
    ├── single_collect.py
    └── smi.py
├── simulation
    ├── analyzer
    │   ├── analyzer.py
    │   └── single_data.csv
    ├── cluster.py
    ├── data
    │   ├── Venus
    │   │   ├── cluster_full_log.csv
    │   │   ├── cluster_throughput.csv
    │   │   └── vc_config.csv
    │   ├── colocate_info.csv
    │   ├── dict2csv.py
    │   ├── log_process.py
    │   ├── prepare_data.sh
    │   ├── trace_parser.py
    │   └── vc_dict_generator.py
    ├── estimator
    │   ├── __init__.py
    │   ├── ebm
    │   │   └── Venus_Sept_ebm_weekly_updated.csv
    │   ├── estimator.py
    │   ├── estimator_lucid.ipynb
    │   ├── lgb
    │   │   └── Venus_Sept_lgb.csv
    │   └── utils.py
    ├── job.py
    ├── plot
    │   └── result_plot.ipynb
    ├── policy
    │   ├── __init__.py
    │   ├── fifo.py
    │   ├── lucid.py
    │   ├── placer
    │   │   ├── __init__.py
    │   │   ├── consolidate.py
    │   │   ├── consolidateFirst.py
    │   │   ├── consolidateWithShare.py
    │   │   └── random.py
    │   ├── policy.py
    │   ├── qssf.py
    │   ├── sjf.py
    │   ├── srtf.py
    │   └── tiresias.py
    ├── predictor
    │   ├── Venus_throughput_pred.csv
    │   └── predictor.ipynb
    ├── profiler
    │   ├── __init__.py
    │   ├── lgf.py
    │   └── profiler.py
    ├── requirements.txt
    ├── run.sh
    ├── simulator.py
    ├── updater.py
    └── utils.py
└── workloads
    ├── bert
        └── profile_bert.py
    ├── cifar
        ├── models
        │   ├── __init__.py
        │   ├── alexnet.py
        │   ├── densenet.py
        │   ├── dpn.py
        │   ├── efficientnet.py
        │   ├── googlenet.py
        │   ├── lenet.py
        │   ├── mobilenet.py
        │   ├── mobilenetv2.py
        │   ├── pnasnet.py
        │   ├── preact_resnet.py
        │   ├── resnet.py
        │   ├── resnext.py
        │   ├── senet.py
        │   ├── shufflenet.py
        │   ├── shufflenetv2.py
        │   └── vgg.py
        ├── profile_cifar.py
        └── run.sh
    ├── dcgan
        ├── download.py
        └── profile_dcgan.py
    ├── deepspeech2
        ├── data
        │   ├── __init__.py
        │   ├── an4.py
        │   ├── cmu-arctic-manifests.tar.gz
        │   ├── common_voice.py
        │   ├── data_loader.py
        │   ├── librispeech.py
        │   ├── merge_manifests.py
        │   ├── sparse_image_warp.py
        │   ├── spec_augment.py
        │   ├── ted.py
        │   ├── utils.py
        │   └── voxforge.py
        ├── decoder.py
        ├── labels.json
        ├── models.py
        └── profile_deepspeech.py
    ├── imagenet
        ├── profile_imagenet.py
        ├── profile_imagenet_ddp.py
        └── requirements.txt
    ├── lstm
        ├── data.py
        ├── models.py
        └── profile_lstm.py
    ├── ncf
        ├── config.py
        ├── data_utils.py
        ├── evaluate.py
        ├── models.py
        └── profile_ncf.py
    ├── pointnet
        ├── dataset.py
        ├── num_seg_classes.txt
        ├── pointnet.py
        └── profile_pointnet.py
    ├── rl
        ├── profile_rl_lunarlander.py
        └── profile_rl_walker.py
    ├── settings.py
    └── translation
        ├── dataset.py
        ├── multi-bleu.perl
        ├── nonbreaking_prefix.de
        ├── nonbreaking_prefix.en
        ├── pollux_transformer.py
        ├── preprocess.py
        ├── profile_transformer.py
        ├── tokenizer.perl
        └── transformer
            ├── Beam.py
            ├── Constants.py
            ├── Layers.py
            ├── Models.py
            ├── Modules.py
            ├── Optim.py
            ├── SubLayers.py
            ├── Translator.py
            └── __init__.py


/.gitignore:
--------------------------------------------------------------------------------
 1 | # Python egg metadata, regenerated from source files by setuptools.
 2 | *.egg-info
 3 | .eggs/
 4 | 
 5 | # PyPI distribution artifacts.
 6 | build/
 7 | dist/
 8 | 
 9 | # Byte-compiled
10 | _pycache__/
11 | .cache/
12 | 
13 | # Compiled python modules.
14 | *.pyc
15 | 
16 | # PyCharm/vscode
17 | .idea
18 | .vscode
19 | 
20 | # jupyter checkpoints
21 | **/.ipynb_checkpoints
22 | 
23 | # Other
24 | *.DS_Store


--------------------------------------------------------------------------------
/LICENSE.txt:
--------------------------------------------------------------------------------
 1 | S-Lab License 1.0
 2 | 
 3 | Copyright 2022 S-Lab
 4 | 
 5 | Redistribution and use for non-commercial purpose in source and binary forms, with or without modification, are permitted provided that the following conditions are met:
 6 | 
 7 | 1. Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer.
 8 | 
 9 | 2. Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution.
10 | 
11 | 3. Neither the name of the copyright holder nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission.
12 | 
13 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
14 | 
15 | 4. In the event that redistribution and/or use for commercial purpose in source or binary forms, with or without modification is required, please contact the contributor(s) of the work


--------------------------------------------------------------------------------
/profile/adaptdl/extract_data.py:
--------------------------------------------------------------------------------
 1 | from tensorboard.backend.event_processing import event_accumulator
 2 | import argparse
 3 | import pandas as pd
 4 | from tqdm import tqdm
 5 | 
 6 | 
 7 | def main():
 8 |     # load log data
 9 |     parser = argparse.ArgumentParser(description='Export tensorboard data')
10 |     parser.add_argument('--in-path', type=str, default='./result/VGG/True/200', help='Tensorboard event files or a single tensorboard file location')
11 |     parser.add_argument('--ex-path', type=str, default='./result/VGG/true.csv', help='location to save the exported data')
12 | 
13 |     args = parser.parse_args()
14 |     event_data = event_accumulator.EventAccumulator(args.in_path)  # a python interface for loading Event data
15 |     event_data.Reload()  # synchronously loads all of the data written so far
16 |     # print(event_data.Tags())  # print all tags
17 |     keys = event_data.scalars.Keys()  # get all tags,save in a list
18 |     # print(keys)
19 |     df = pd.DataFrame(columns=keys[7:])  # my first column is training loss per iteration, so I abandon it
20 |     for key in tqdm(keys):
21 |         # print(key)
22 |         if key == 'Loss/Train' or key == 'Accuracy/Train' or key == 'Loss/Valid' or key == 'Accuracy/Valid':
23 |             df[key] = pd.DataFrame(event_data.Scalars(key)).value
24 | 
25 |     df.to_csv(args.ex_path)
26 | 
27 |     print("Tensorboard data exported successfully")
28 | 
29 | 
30 | if __name__ == '__main__':
31 |     main()


--------------------------------------------------------------------------------
/profile/adaptdl/run.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | worker_num=2
 3 | model=$2
 4 | batch_size=$3
 5 | 
 6 | last_rank=`expr $worker_num - 1`
 7 | 
 8 | # nodes=$(scontrol show hostnames $SLURM_JOB_NODELIST) # Getting the node names
 9 | nodes='127.0.0.1'
10 | nodes_array=( $nodes )
11 | node1=${nodes_array[0]}
12 | 
13 | #export ADAPTDL_CHECKPOINT_PATH=cifar-checkpoint
14 | # export ADAPTDL_SHARE_PATH=data
15 | # export ADAPTDL_JOB_ID=$SLURM_JOB_ID
16 | export ADAPTDL_MASTER_ADDR=$node1
17 | export ADAPTDL_MASTER_PORT=47020
18 | export ADAPTDL_NUM_REPLICAS=$worker_num
19 | 
20 | 
21 | ADAPTDL_REPLICA_RANK=0 python3 -u pollux_mnist.py &
22 | ADAPTDL_REPLICA_RANK=1 python3 -u pollux_mnist.py
23 | 
24 | # # batch_size=128
25 | # for ((  i=0; i < $worker_num; i++ ))
26 | # do
27 | #     # node=${nodes_array[$i]}
28 | #     node=${nodes_array[0]}
29 | #     if [[ $i -lt `expr $worker_num-1` ]]
30 | #     then
31 | #         ADAPTDL_REPLICA_RANK=$i python3 -u pollux_cifar.py &
32 | #     else
33 | #         ADAPTDL_REPLICA_RANK=$i python3 -u pollux_cifar.py 
34 | #     fi
35 | # done


--------------------------------------------------------------------------------
/profile/co_collect.py:
--------------------------------------------------------------------------------
 1 | import sys
 2 | import pandas as pd
 3 | import time
 4 | from multiprocessing import Process, Manager, Value
 5 | 
 6 | from smi import smi_getter
 7 | 
 8 | 
 9 | def collect(fun1, m_list1, dataset1, bs_list1, fun2, m_list2, dataset2, bs_list2, gpu_id):
10 |     metric_list = []
11 |     if dataset1 == 'LunarLander' or dataset1 == 'BipedalWalker' or dataset1 == 'Multi30k':
12 |         mp_list1 = [0]
13 |     else:
14 |         mp_list1 = [0, 1]
15 | 
16 |     if dataset2 == 'LunarLander' or dataset2 == 'BipedalWalker' or dataset1 == 'Multi30k':
17 |         mp_list2 = [0]
18 |     else:
19 |         mp_list2 = [0, 1]
20 |     for model_name1 in m_list1:
21 |         for model_name2 in m_list2:
22 |             for batch_size1 in bs_list1:
23 |                 for batch_size2 in bs_list2: 
24 |                     if model_name1 == 'resnet50':
25 |                         batch_size1 = 32
26 |                     if model_name2 == 'resnet50':
27 |                         batch_size2 = 32
28 |                     for mp1 in mp_list1:
29 |                         for mp2 in mp_list2:
30 |                             # Check whether there are duplicated pairs
31 |                             df_check = pd.DataFrame(metric_list, columns=['model1', 'dataset1', 'gpu_num1', 'batchsize1', 'amp1', 'speed1', 'model2', 'dataset2', 'gpu_num2', 'batchsize2', 'amp2', 'speed2', 'gpu_util', 'gmem_util', 'gmem'])
32 |                             # print(df_check[['model1', 'batchsize1', 'amp1', 'model2', 'batchsize2', 'amp2']])
33 | 
34 |                             info = df_check.query(" model1 == @model_name2 and model2 == @model_name1 and batchsize1 == @batch_size2 and batchsize2 == @batch_size1 and dataset1 == @dataset2 and dataset2 == @dataset1 and amp1 == @mp2 and amp2 == @mp1")
35 |                             if not info.empty:
36 |                                 continue
37 |                             info2 = df_check.query(" model1 == @model_name1 and model2 == @model_name2 and batchsize1 == @batch_size1 and batchsize2 == @batch_size2 and dataset1 == @dataset1 and dataset2 == @dataset2 and amp1 == @mp1 and amp2 == @mp2")
38 |                             if not info2.empty:
39 |                                 continue
40 | 
41 |                             # collect co-locate jobs gpu info
42 |                             print('co-locate:')
43 |                             with Manager() as manager:
44 |                                 smi_list = manager.list()
45 |                                 speed_list1 = manager.list()
46 |                                 speed_list2 = manager.list()
47 |                                 signal1 = Value('i', 0)
48 |                                 signal2 = Value('i', 0)
49 | 
50 |                                 p1 = Process(target=fun1, args=(model_name1, batch_size1, mp1, gpu_id, speed_list1, signal1, ))
51 |                                 p2 = Process(target=fun2, args=(model_name2, batch_size2, mp2, gpu_id, speed_list2, signal2, ))
52 |                                 p3 = Process(target=smi_getter, args=(sys.argv[1:], smi_list, gpu_id, ))
53 | 
54 |                                 p1.start()
55 |                                 p2.start()
56 |                                 while True:
57 |                                     if signal1.value == 1 and signal2.value == 1:
58 |                                         p3.start()
59 |                                         break
60 |                                 
61 |                                 p1.join()
62 |                                 p2.join()
63 |                                 p3.terminate()
64 |                             
65 |                                 speed_list1 = list(speed_list1)
66 |                                 speed_list2 = list(speed_list2)
67 |                                 smi_df = pd.DataFrame(list(smi_list))
68 |                                 smi_df.drop([0])
69 |                                 smi_df.drop([len(smi_df)-1], inplace=True)
70 | 
71 |                                 d1 = {'model1': model_name1, 'dataset1': dataset1, 'gpu_num1': len(gpu_id), 'batchsize1': batch_size1, 'amp1': mp1}
72 |                                 d1['speed1'] = round(speed_list1[0], 3)
73 |                                 d2 = {'model2': model_name2, 'dataset2': dataset2, 'gpu_num2': len(gpu_id), 'batchsize2': batch_size2, 'amp2': mp2}
74 |                                 d2['speed2'] = round(speed_list2[0], 3)
75 |                                 metric_dict = {}
76 |                                 metric_dict.update(d1)
77 |                                 metric_dict.update(d2)
78 |                                 # Process gpu info
79 |                                 smi_df['gpuUtil'] = pd.to_numeric(smi_df['gpuUtil'])
80 |                                 metric_dict['gpu_util'] = round(pd.to_numeric(smi_df['gpuUtil']).mean(), 3)
81 |                                 metric_dict['gmem_util'] = round(pd.to_numeric(smi_df['gpuMemUtil']).mean(), 3)
82 |                                 smi_df['gpuMem'] = smi_df['gpuMem'].apply(lambda x: x[:-4]).astype('int64')
83 |                                 metric_dict['gmem'] = round(smi_df['gpuMem'].max(), 3)
84 |                                 
85 |                                 # print(metric_dict)
86 |                                 metric_list.append(metric_dict)
87 |                             time.sleep(2)
88 | 
89 |     return metric_list


--------------------------------------------------------------------------------
/profile/main_co3.py:
--------------------------------------------------------------------------------
 1 | #NOTE: CUDA_LAUNCH_BLOCKING=1 python main_co.py will slow down the speed
 2 | from __future__ import print_function
 3 | 
 4 | import torch.backends.cudnn as cudnn
 5 | import torch.nn.functional as F
 6 | import torch.optim as optim
 7 | import torch
 8 | import numpy as np
 9 | import os
10 | import pandas as pd
11 | import time
12 | import sys
13 | sys.path.append('./workloads/')
14 | 
15 | from multiprocessing import Process, Manager, Value
16 | from workloads.lstm.profile_lstm import benchmark_lstm
17 | from workloads.imagenet.profile_imagenet import benchmark_imagenet
18 | from workloads.cifar.profile_cifar import benchmark_cifar
19 | from workloads.pointnet.profile_pointnet import benchmark_pointnet
20 | from workloads.dcgan.profile_dcgan import benchmark_dcgan
21 | from workloads.rl.profile_rl_lunarlander import benchmark_rl
22 | from workloads.rl.profile_rl_walker import benchmark_rl2
23 | from workloads.bert.profile_bert import benchmark_bert
24 | from workloads.ncf.profile_ncf import benchmark_ncf
25 | from smi import smi_getter
26 | from co_collect import collect
27 | 
28 | # model_list_imagenet = ['resnet18', 'resnet50', 'mobilenet_v3_small', 'efficientnet_b0', 'shufflenet_v2_x0_5', 'vgg11', 'alexnet']
29 | # model_list_cifar = ['AlexNet', 'EfficientNetB0', 'MobileNetV2', 'ResNet18', 'ResNet50', 'ShuffleNetV2', 'VGG']
30 | metric_list = []
31 | model_name1 = 'ResNet18'
32 | model_name2 = 'ResNet18'
33 | model_name3 = 'ResNet18'
34 | batch_size1 = 32
35 | batch_size2 = 32
36 | batch_size3 = 32
37 | 
38 | gpu_id = [0]
39 | 
40 | start_record = time.time()
41 | with Manager() as manager:
42 |     smi_list = manager.list()
43 |     speed_list1 = manager.list()
44 |     speed_list2 = manager.list()
45 |     speed_list3 = manager.list()
46 |     signal1 = Value('i', 0)
47 |     signal2 = Value('i', 0)
48 |     signal3 = Value('i', 0)
49 | 
50 |     p1 = Process(target=benchmark_cifar, args=(model_name1, batch_size1, 0, gpu_id, speed_list1, signal1, ))
51 |     p2 = Process(target=benchmark_cifar, args=(model_name2, batch_size2, 0, gpu_id, speed_list2, signal2, ))
52 |     p3 = Process(target=benchmark_cifar, args=(model_name3, batch_size3, 0, gpu_id, speed_list3, signal3, ))
53 |     p4 = Process(target=smi_getter, args=(sys.argv[1:], smi_list, gpu_id, ))
54 | 
55 |     p1.start()
56 |     p2.start()
57 |     p3.start()
58 | 
59 |     while True:
60 |         if signal1.value == 1 and signal2.value == 1 and signal3.value == 1:
61 |         # if signal1.value == 1:
62 |             p4.start()
63 |             break
64 | 
65 |     p1.join()
66 |     p2.join()
67 |     p3.join()
68 |     p4.terminate()
69 | 
70 |     smi_df = pd.DataFrame(list(smi_list))
71 |     
72 |     print(f'1: {list(speed_list1)}, 2: {list(speed_list2)}, 3:{list(speed_list3)}')
73 |     # print(f'1: {list(speed_list1)}')
74 |     print(smi_df)
75 | 
76 | # mlist_imagenet = ['mobilenet_v3_small']
77 | # print('imagenet + imagenet')
78 | # metric_list1 = collect(benchmark_imagenet, mlist_imagenet, 'ImageNet', bs_list, benchmark_imagenet, mlist_imagenet, 'ImageNet', bs_list, gpu_id)
79 | # df = pd.DataFrame(metric_list1)
80 | # df.to_csv('./1.csv')
81 | 
82 | # smi_list = []
83 | # smi_getter(sys.argv[1:], smi_list, gpu_id)
84 | 
85 | 
86 | end_record = time.time()
87 | print(f'time usage: {end_record - start_record}')


--------------------------------------------------------------------------------
/profile/main_single.py:
--------------------------------------------------------------------------------
  1 | #NOTE: CUDA_LAUNCH_BLOCKING=1 python main.py
  2 | from __future__ import print_function
  3 | import os
  4 | import pandas as pd
  5 | import time
  6 | 
  7 | import sys
  8 | sys.path.append('./workloads/')
  9 | 
 10 | from workloads.lstm.profile_lstm import benchmark_lstm
 11 | from workloads.imagenet.profile_imagenet import benchmark_imagenet
 12 | from workloads.cifar.profile_cifar import benchmark_cifar
 13 | from workloads.pointnet.profile_pointnet import benchmark_pointnet
 14 | from workloads.dcgan.profile_dcgan import benchmark_dcgan
 15 | from workloads.rl.profile_rl_lunarlander import benchmark_rl
 16 | from workloads.rl.profile_rl_walker import benchmark_rl2
 17 | from workloads.bert.profile_bert import benchmark_bert
 18 | from workloads.ncf.profile_ncf import benchmark_ncf
 19 | from workloads.translation.profile_transformer import benchmark_transformer
 20 | 
 21 | from single_collect import s_collect
 22 | 
 23 | model_list_imagenet = ['resnet50', 'mobilenet_v3_small']
 24 | # model_list_cifar = ['ResNet18', 'MobileNetV2', 'EfficientNetB0', 'VGG']
 25 | model_list_cifar = ['ResNet18']
 26 | bs_list = [64]
 27 | gpu_id = [0]
 28 | metric_list = []
 29 | mp_list = [0, 1]
 30 | 
 31 | os.makedirs('result/', exist_ok=True)
 32 | 
 33 | # # Single: imagenet metric
 34 | # print('Classification: imagenet')
 35 | # dataset = 'imagenet'
 36 | # for model_name in model_list_imagenet:
 37 | #     for batch_size in bs_list:
 38 | #         for mp in mp_list:
 39 | #             # collect single job gpu info
 40 | #             metric_dict = s_collect(benchmark_imagenet, dataset, model_name, batch_size, mp, gpu_id)
 41 | #             metric_list.append(metric_dict)
 42 | #             time.sleep(2)
 43 | 
 44 | # Single: cifar10 metric
 45 | print('Classification: cifar')
 46 | dataset = 'cifar10'
 47 | for model_name in model_list_cifar:
 48 |     for batch_size in bs_list:
 49 |         for mp in mp_list:
 50 |             # collect single job gpu info
 51 |             metric_dict = s_collect(benchmark_cifar, dataset, model_name, batch_size, mp, gpu_id)
 52 |             metric_list.append(metric_dict)
 53 |             time.sleep(2)
 54 | 
 55 | # # Single: pointnet
 56 | # print('3D: pointnet')
 57 | # for batch_size in bs_list:
 58 | #     for mp in mp_list:
 59 | #         metric_dict = s_collect(benchmark_pointnet, dataset='shapenet', model_name='pointnet', batch_size=batch_size, mp=mp, gpu_id=gpu_id)
 60 | #         metric_list.append(metric_dict)
 61 | #         time.sleep(2)
 62 | 
 63 | # # Single: dcgan
 64 | # print('CV: dcgan')
 65 | # for batch_size in bs_list:
 66 | #     for mp in mp_list:
 67 | #         metric_dict = s_collect(benchmark_dcgan, dataset='LSUN', model_name='dcgan', batch_size=batch_size, mp=mp, gpu_id=gpu_id)
 68 | #         metric_list.append(metric_dict)
 69 | #         time.sleep(2)
 70 | 
 71 | # # Single: rl-lunalander
 72 | # print('RL: LunarLander-v2')
 73 | # for batch_size in bs_list:
 74 | #     metric_dict = s_collect(benchmark_rl, dataset='LunarLander-v2', model_name='PPO', batch_size=batch_size, mp=0, gpu_id=gpu_id)
 75 | #     metric_list.append(metric_dict)
 76 | #     time.sleep(2)
 77 | 
 78 | # # Single: rl-Bipedal Walker
 79 | # print('RL: Bipedal Walker')
 80 | # for batch_size in bs_list:
 81 | #     metric_dict = s_collect(benchmark_rl2, dataset='BipedalWalker-v3', model_name='TD3', batch_size=batch_size, mp=0, gpu_id=gpu_id)
 82 | #     metric_list.append(metric_dict)
 83 | #     time.sleep(2)
 84 | 
 85 | # # Single: ncf
 86 | # print('Recommendation: ncf')
 87 | # for batch_size in [64, 128]:
 88 | #     for mp in mp_list:
 89 | #         metric_dict = s_collect(benchmark_ncf, dataset='MovieLens', model_name='NeuMF-pre', batch_size=batch_size, mp=mp, gpu_id=gpu_id)
 90 | #         metric_list.append(metric_dict)
 91 | #         time.sleep(2)
 92 | 
 93 | # # Single: lstm
 94 | # print('Language Modeling: lstm')
 95 | # for batch_size in [64, 128]:
 96 | #     for mp in mp_list:
 97 | #         metric_dict = s_collect(benchmark_lstm, dataset='Wikitext2', model_name='LSTM', batch_size=batch_size, mp=mp, gpu_id=gpu_id)
 98 | #         metric_list.append(metric_dict)
 99 | #         time.sleep(2)
100 | 
101 | # # Single: bert
102 | # print('Question Answering: bert')
103 | # for batch_size in [32]:
104 | #     for mp in mp_list:
105 | #         metric_dict = s_collect(benchmark_bert, dataset='SQUAD', model_name='bert', batch_size=batch_size, mp=mp, gpu_id=gpu_id)
106 | #         metric_list.append(metric_dict)
107 | #         time.sleep(2)
108 | 
109 | # # Single: transformer
110 | # print('Translation: tranformer')
111 | # for batch_size in [32, 64]:
112 | #         metric_dict = s_collect(benchmark_transformer, dataset='multi30k', model_name='transformer', batch_size=batch_size, mp=0, gpu_id=gpu_id)
113 | #         metric_list.append(metric_dict)
114 | #         time.sleep(2)
115 | 
116 | # print(pd.DataFrame(metric_list))
117 | df = pd.DataFrame(metric_list)
118 | # df.replace(
119 | # ['imagenet', 'cifar10', 'shapenet', 'LSUN', 'LunarLander-v2', 'BipedalWalker-v3', 'MovieLens', 'Wikitext2', 'SQUAD', 'multi30k'],
120 | # ['ImageNet', 'CIFAR-10', 'ShapeNet', 'LSUN', 'LunarLander', 'BipedalWalker', 'MovieLens', 'Wikitext2', 'SQuAD', 'Multi30k'], inplace=True)
121 | 
122 | # df.replace(
123 | # ['resnet50', 'mobilenet_v3_small', 'EfficientNetB0', 'pointnet', 'dcgan', 'NeuMF-pre', 'bert', 'transformer'],
124 | # ['ResNet50', 'MobileNetV3', 'EfficientNet', 'PointNet', 'DCGAN', 'NeuMF', 'BERT', 'Transformer'], inplace=True)
125 | 
126 | df.to_csv('./result/single_cifar.csv')
127 | 
128 | 
129 | 
130 | 


--------------------------------------------------------------------------------
/profile/requirements.txt:
--------------------------------------------------------------------------------
1 | Box2D
2 | Box2D-kengz
3 | swig
4 | gym
5 | transformers
6 | stable_baselines3
7 | scipy
8 | torch
9 | torchvision


--------------------------------------------------------------------------------
/profile/single_collect.py:
--------------------------------------------------------------------------------
 1 | import sys
 2 | import pandas as pd
 3 | import time
 4 | from multiprocessing import Process, Manager, Value
 5 | 
 6 | from smi import smi_getter
 7 | 
 8 | 
 9 | def s_collect(fun, dataset, model_name, batch_size, mp, gpu_id):
10 |     with Manager() as manager:
11 |         smi_list = manager.list()
12 |         speed_list = manager.list()
13 |         warm_signal = Value('i', 0)
14 | 
15 |         p1 = Process(target=fun, args=(model_name, batch_size, mp, gpu_id, speed_list, warm_signal, ))
16 |         p2 = Process(target=smi_getter, args=(sys.argv[1:], smi_list, gpu_id, ))
17 | 
18 |         t_begin = time.time()
19 |         p1.start()
20 |         while True:
21 |             if warm_signal.value == 1:
22 |                 p2.start()
23 |                 break
24 |         
25 |         p1.join()
26 |         p2.terminate()
27 |         t_pass = time.time() - t_begin
28 | 
29 |         speed_list = list(speed_list)
30 |         smi_df = pd.DataFrame(list(smi_list))
31 |         smi_df.drop([0])
32 |         smi_df.drop([len(smi_df)-1], inplace=True)
33 |         # print(smi_df)
34 |         
35 |         metric_dict = {'model': model_name, 'dataset': dataset, 'gpu_num': len(gpu_id), 'batchsize': batch_size, 'amp': mp}
36 |         metric_dict['speed'] = round(speed_list[0], 3)
37 | 
38 |         # Process gpu info
39 |         metric_dict['gpu_util'] = round(pd.to_numeric(smi_df['gpuUtil']).mean(), 3)
40 |         metric_dict['gmem_util'] = round(pd.to_numeric(smi_df['gpuMemUtil']).mean(), 3)
41 |         smi_df['gpuMem'] = smi_df['gpuMem'].apply(lambda x: x[:-4]).astype('int64')
42 |         metric_dict['gmem'] = round(smi_df['gpuMem'].mean(), 3)
43 |         metric_dict['time'] = t_pass
44 |         
45 |     return metric_dict
46 | 
47 | 
48 | 
49 | 


--------------------------------------------------------------------------------
/profile/smi.py:
--------------------------------------------------------------------------------
 1 | import json
 2 | import os
 3 | import subprocess
 4 | import sys
 5 | import time
 6 | import traceback
 7 | 
 8 | from xml.dom import minidom
 9 | 
10 | 
11 | def smi_getter(argv, smi_list, gpu_id):
12 |     metrics_output_dir = "./"
13 |     if len(gpu_id) == 1:
14 |         cmd = f"nvidia-smi -q -x -i {gpu_id[0]}".split()
15 |     elif len(gpu_id) == 2:
16 |         cmd = f"nvidia-smi -q -x -i {gpu_id[0]},{gpu_id[1]}".split()
17 |     elif len(gpu_id) == 4:
18 |         cmd = f"nvidia-smi -q -x -i {gpu_id[0]},{gpu_id[1]},{gpu_id[2]},{gpu_id[3]}".split()
19 |     while True:
20 |         try:
21 |             p = subprocess.Popen(cmd, stdout=subprocess.PIPE)
22 |             smi_output = p.stdout.read()
23 |         except Exception:
24 |             traceback.print_exc()
25 |             gen_empty_gpu_metric(metrics_output_dir)
26 |             break
27 |         output = parse_nvidia_smi_result(smi_output, metrics_output_dir, gpu_id)
28 |         smi_list.extend(output)
29 |         # TODO: change to sleep time configurable via arguments
30 |         time.sleep(0.2)
31 | 
32 | 
33 | def parse_nvidia_smi_result(smi, outputDir, gpu_id):
34 |     try:
35 |         old_umask = os.umask(0)
36 |         xmldoc = minidom.parseString(smi)
37 |         gpuList = xmldoc.getElementsByTagName("gpu")
38 |         gpuInfo = []
39 |         outPut = {}
40 |         outPut["Timestamp"] = time.asctime(time.localtime())
41 |         for gpuIndex, gpu in enumerate(gpuList):
42 |             outPut["index"] = gpu_id[gpuIndex]
43 |             outPut["gpuUtil"] = (
44 |                 gpu.getElementsByTagName("utilization")[0]
45 |                 .getElementsByTagName("gpu_util")[0]
46 |                 .childNodes[0]
47 |                 .data.replace("%", "")
48 |                 .strip()
49 |             )
50 |             outPut["gpuMemUtil"] = (
51 |                 gpu.getElementsByTagName("utilization")[0]
52 |                 .getElementsByTagName("memory_util")[0]
53 |                 .childNodes[0]
54 |                 .data.replace("%", "")
55 |                 .strip()
56 |             )
57 |             outPut["gpuMem"] = (
58 |                 gpu.getElementsByTagName("fb_memory_usage")[0]
59 |                 .getElementsByTagName("used")[0]
60 |                 .childNodes[0]
61 |                 .data
62 |             )
63 |             # processes = gpu.getElementsByTagName("processes")
64 |             # runningProNumber = len(processes[0].getElementsByTagName("process_info"))
65 |             # gpuInfo["activeProcessNum"] = runningProNumber
66 | 
67 |             # print(outPut)
68 |             gpuInfo.append(outPut.copy())
69 |         return gpuInfo
70 | 
71 |     except Exception as error:
72 |         # e_info = sys.exc_info()
73 |         print("gpu_metrics_collector error: %s" % error)
74 |     finally:
75 |         os.umask(old_umask)
76 | 
77 | 
78 | def gen_empty_gpu_metric(outputDir):
79 |     try:
80 |         old_umask = os.umask(0)
81 |         with open(os.path.join(outputDir, "gpu_metrics"), "a") as outputFile:
82 |             outPut = {}
83 |             outPut["Timestamp"] = time.asctime(time.localtime())
84 |             outPut["gpuCount"] = 0
85 |             outPut["gpuInfos"] = []
86 |             print(outPut)
87 |             outputFile.write("{}\n".format(json.dumps(outPut, sort_keys=True)))
88 |             outputFile.flush()
89 |     except Exception:
90 |         traceback.print_exc()
91 |     finally:
92 |         os.umask(old_umask)
93 | 
94 | 


--------------------------------------------------------------------------------
/simulation/analyzer/analyzer.py:
--------------------------------------------------------------------------------
 1 | import random
 2 | import numpy as np
 3 | import pandas as pd
 4 | 
 5 | from primo.model import PrimoClassifier
 6 | from sklearn.model_selection import train_test_split
 7 | from sklearn import preprocessing, metrics
 8 | 
 9 | 
10 | def set_seed(seed):
11 |     random.seed(seed)
12 |     np.random.seed(seed)
13 | 
14 | 
15 | seed = 123
16 | set_seed(seed)
17 | 
18 | result = pd.DataFrame()
19 | single = pd.read_csv("PATH_TO_PROFILED_SINGLE_DATA.csv")
20 | colo = pd.read_csv(f"PATH_TO_PROFILED_COLOCATED_DATA.csv", index_col=0)
21 | 
22 | 
23 | def query_speed(trail):
24 |     m, d, b, a = trail["model"], trail["dataset"], trail["batchsize"], trail["amp"]
25 |     info1 = colo.query("model1 == @m and batchsize1 == @b and dataset1 == @d and amp1 == @a")
26 |     info2 = colo.query("model2 == @m and batchsize2 == @b and dataset2 == @d and amp2 == @a")
27 | 
28 |     speed1, len1, speed2, len2 = 0, len(info1), 0, len(info2)
29 |     if len1 > 0:
30 |         speed1 = info1["speed1"].sum()
31 |     if len2 > 0:
32 |         speed2 = info2["speed2"].sum()
33 | 
34 |     avg = (speed1 + speed2) / max(len1 + len2, 1)
35 | 
36 |     return round(avg, 3)
37 | 
38 | 
39 | """Compare with original manual labeling"""
40 | for i in range(len(single)):
41 |     avg_speed = query_speed(single.loc[i])
42 |     single.at[i, "avg_speed"] = avg_speed
43 |     if avg_speed < 0.85:
44 |         single.at[i, "auto_label"] = 2
45 |     elif avg_speed < 0.95:
46 |         single.at[i, "auto_label"] = 1
47 |     else:
48 |         single.at[i, "auto_label"] = 0
49 | 
50 | 
51 | single = single.drop(columns=["dataset", "batchsize", "speed", "model"])
52 | train_data, test_data, train_label, test_label = train_test_split(
53 |     single.drop(columns="label"), single[["label"]], test_size=0.3, random_state=42
54 | )
55 | 
56 | config = {"prune_factor": 0.0001}
57 | model = PrimoClassifier(model="PrDT", model_config=config, hpo=None)
58 | model.fit(train_data, train_label)
59 | pred = model.predict(test_data)
60 | 
61 | acc = metrics.accuracy_score(test_label, pred)
62 | print(f"acc: {acc:.3f}")
63 | 


--------------------------------------------------------------------------------
/simulation/analyzer/single_data.csv:
--------------------------------------------------------------------------------
 1 | model,dataset,batchsize,amp,speed,gpu_util,gmem_util,gmem,label
 2 | BERT,SQuAD,32,0,84.872,99.689,62.484,20162.0,2
 3 | BERT,SQuAD,32,1,119.911,99.593,74.23,16334.0,2
 4 | DCGAN,LSUN,32,0,1986.707,57.778,20.222,2458.0,1
 5 | DCGAN,LSUN,32,1,1561.581,34.9,11.8,2730.0,0
 6 | DCGAN,LSUN,64,0,3275.832,72.9,31.0,2322.0,2
 7 | DCGAN,LSUN,64,1,2936.807,47.5,17.167,2330.0,1
 8 | DCGAN,LSUN,128,0,4659.802,94.0,41.0,3096.0,2
 9 | DCGAN,LSUN,128,1,6097.188,66.333,32.417,2684.0,2
10 | EfficientNet,CIFAR-10,32,0,814.137,24.5,3.0,2308.0,0
11 | EfficientNet,CIFAR-10,32,1,680.977,20.571,1.286,2176.0,0
12 | EfficientNet,CIFAR-10,64,0,1922.826,35.4,8.0,2766.0,1
13 | EfficientNet,CIFAR-10,64,1,1371.123,25.5,3.167,2290.0,1
14 | EfficientNet,CIFAR-10,128,0,3465.072,44.833,16.167,2580.0,1
15 | EfficientNet,CIFAR-10,128,1,2802.744,33.571,7.286,2358.0,1
16 | LSTM,Wikitext2,64,0,4496.071,97.0,74.875,3884.0,2
17 | LSTM,Wikitext2,64,1,4495.889,73.625,60.0,3750.0,2
18 | LSTM,Wikitext2,128,0,3951.755,76.2,63.75,5616.0,2
19 | LSTM,Wikitext2,128,1,4440.872,63.706,55.824,5320.0,2
20 | MobileNetV2,CIFAR-10,32,0,1108.427,42.8,17.0,2508.0,1
21 | MobileNetV2,CIFAR-10,32,1,895.251,28.833,7.333,2320.0,1
22 | MobileNetV2,CIFAR-10,64,0,2228.976,69.5,39.5,3010.0,1
23 | MobileNetV2,CIFAR-10,64,1,1710.877,41.833,16.167,2528.0,1
24 | MobileNetV2,CIFAR-10,128,0,3516.037,98.833,65.333,4078.0,2
25 | MobileNetV2,CIFAR-10,128,1,2866.76,55.714,28.286,2992.0,1
26 | MobileNetV3,ImageNet,32,0,1304.503,45.409,19.727,2694.0,0
27 | MobileNetV3,ImageNet,32,1,978.255,34.966,7.931,2382.0,0
28 | MobileNetV3,ImageNet,64,0,2537.436,80.522,44.261,3266.0,1
29 | MobileNetV3,ImageNet,64,1,1908.093,52.355,18.065,2706.0,0
30 | MobileNetV3,ImageNet,128,0,3353.137,100.0,60.912,4428.0,2
31 | MobileNetV3,ImageNet,128,1,3482.526,86.121,33.939,3216.0,1
32 | NeuMF,MovieLens,64,0,15393.307,10.483,2.023,2050.0,0
33 | NeuMF,MovieLens,64,1,12173.612,9.836,2.009,2050.0,0
34 | NeuMF,MovieLens,128,0,29773.989,10.483,2.011,2050.0,0
35 | NeuMF,MovieLens,128,1,23040.359,9.643,1.983,2050.0,0
36 | PPO,LunarLander,32,0,5.156,12.425,0.0,2051.0,0
37 | PPO,LunarLander,64,0,14.246,11.949,0.0,2051.0,0
38 | PPO,LunarLander,128,0,46.507,14.96,0.0,2051.0,0
39 | PointNet,ShapeNet,32,0,131.491,11.533,9.667,3968.0,0
40 | PointNet,ShapeNet,32,1,136.02,7.0,5.615,3334.0,0
41 | PointNet,ShapeNet,64,0,138.049,7.75,6.571,6346.0,0
42 | PointNet,ShapeNet,64,1,135.349,6.24,5.32,4532.0,0
43 | PointNet,ShapeNet,128,0,140.209,8.982,7.589,10474.0,0
44 | PointNet,ShapeNet,128,1,144.304,5.551,4.878,6976.0,0
45 | ResNet18,CIFAR-10,32,0,1763.905,58.667,25.0,2360.0,0
46 | ResNet18,CIFAR-10,32,1,1649.038,40.667,20.333,2330.0,1
47 | ResNet18,CIFAR-10,64,0,3711.999,80.667,47.0,2660.0,2
48 | ResNet18,CIFAR-10,64,1,4038.576,62.0,34.0,3642.0,2
49 | ResNet18,CIFAR-10,128,0,4903.86,96.25,67.0,3072.0,2
50 | ResNet18,CIFAR-10,128,1,5699.482,78.0,43.667,4036.0,2
51 | ResNet50,ImageNet,32,0,432.563,100.0,80.754,5518.0,2
52 | ResNet50,ImageNet,32,1,670.679,95.864,67.568,3838.0,2
53 | ResNet50,ImageNet,64,0,435.943,100.0,80.899,8570.0,2
54 | ResNet50,ImageNet,64,1,708.822,97.735,72.518,5376.0,2
55 | ResNet50,ImageNet,128,0,465.651,100.0,86.927,13326.0,2
56 | ResNet50,ImageNet,128,1,765.473,99.66,75.566,7992.0,2
57 | TD3,BipedalWalker,32,0,16.292,12.828,0.0,2059.0,0
58 | TD3,BipedalWalker,64,0,59.807,13.303,0.0,2059.0,0
59 | TD3,BipedalWalker,128,0,67.445,14.089,0.0,2059.0,0
60 | Transformer,Multi30k,32,0,464.423,53.645,17.258,11949.258,2
61 | Transformer,Multi30k,64,0,857.9,75.164,31.806,11801.104,2
62 | VGG,CIFAR-10,32,0,2998.72,44.0,23.0,3386.0,0
63 | VGG,CIFAR-10,32,1,3556.88,48.0,19.0,2762.0,0
64 | VGG,CIFAR-10,64,0,7430.143,51.0,30.0,3584.0,1
65 | VGG,CIFAR-10,64,1,5112.546,34.0,16.5,3588.0,1
66 | VGG,CIFAR-10,128,0,7260.35,50.5,34.0,3990.0,2
67 | VGG,CIFAR-10,128,1,7009.529,43.667,22.333,2666.0,2
68 | 


--------------------------------------------------------------------------------
/simulation/data/Venus/vc_config.csv:
--------------------------------------------------------------------------------
 1 | ,num
 2 | vcEwI,9
 3 | vcWoR,5
 4 | vcHvQ,8
 5 | vcvGl,20
 6 | vc8Gr,6
 7 | vcKeu,12
 8 | vcKrE,4
 9 | vcYVn,11
10 | vchbv,4
11 | vcLTP,8
12 | vchA3,3
13 | vcJsw,32
14 | vcefl,10
15 | vcvlY,2
16 | vcgkz,1
17 | 


--------------------------------------------------------------------------------
/simulation/data/dict2csv.py:
--------------------------------------------------------------------------------
 1 | import pandas as pd
 2 | import pickle
 3 | 
 4 | # df = pd.read_csv("philly_vc.csv")
 5 | # d = dict(zip(df["vc"].values, df["node num"].values))
 6 | 
 7 | # with open(f"./vc_dict_homo.pkl", "wb") as f:
 8 | #     pickle.dump(d, f, pickle.HIGHEST_PROTOCOL)
 9 | 
10 | cluster_list = ["Venus", "Earth", "Saturn", "Uranus", "Philly"]
11 | 
12 | for i, v in enumerate(cluster_list):
13 |     vc_dict = pd.read_pickle(v + "/vc_dict_homo.pkl")
14 |     df = pd.DataFrame.from_dict(vc_dict, orient="index", columns=["num"])
15 |     df.to_csv(v + "/vc_config.csv")
16 | 
17 | print(df.to_dict()["num"])
18 | 


--------------------------------------------------------------------------------
/simulation/data/log_process.py:
--------------------------------------------------------------------------------
  1 | import argparse
  2 | import pandas as pd
  3 | from pathlib import Path
  4 | 
  5 | """
  6 | Processing log files for simulation
  7 | """
  8 | 
  9 | 
 10 | def main(args):
 11 |     cluster = args.cluster
 12 | 
 13 |     if not Path(f"./{cluster}").exists():
 14 |         Path(f"./{cluster}").mkdir()
 15 | 
 16 |     if cluster == "Philly":
 17 |         logfile = Path(f"../../../analysis/1_compare with Philly trace/philly_trace.csv")
 18 |     else:
 19 |         logfile = Path(f"../../../data/{cluster}/cluster_log.csv")
 20 | 
 21 |     df = pd.read_csv(logfile, parse_dates=["submit_time", "start_time", "end_time"])
 22 |     df = df.sort_values(by="submit_time")
 23 |     df.reset_index(drop=True, inplace=True)
 24 | 
 25 |     if cluster == "Neptune":
 26 |         df = df[df["vc"] != "vc7Bz"]
 27 |         df = df[df["vc"] != "vcIoD"]
 28 |         df = df[df["vc"] != "vcftk"]
 29 |         df = df[df["vc"] != "vc5LC"]
 30 |         df = df[df["vc"] != "vcEwI"]
 31 | 
 32 |         df.loc[df["vc"] == "vcVvI", "vc"] = "vcUV3"
 33 |         df.loc[df["vc"] == "vcrsE", "vc"] = "vcBUL"
 34 |         df.loc[df["vc"] == "vcHyk", "vc"] = "vcBUL"
 35 | 
 36 |         df.reset_index(drop=True, inplace=True)
 37 |         df.to_csv(f"./{cluster}/cluster_log.csv", index=False)
 38 | 
 39 |     elif cluster == "Saturn":
 40 |         df = df[df["vc"] != "vc7Bz"]
 41 |         df = df[df["vc"] != "vcHcQ"]
 42 |         df = df[df["vc"] != "vck1d"]
 43 |         df = df[df["vc"] != "vcj72"]
 44 |         df = df[df["vc"] != "vcIya"]
 45 |         df = df[df["vc"] != "vcygX"]
 46 |         df = df[df["vc"] != "vcxqr"]
 47 |         df = df[df["vc"] != "vcsgw"]
 48 | 
 49 |         df.reset_index(drop=True, inplace=True)
 50 |         df.to_csv(f"./{cluster}/cluster_log.csv", index=False)
 51 | 
 52 |     elif cluster == "Uranus":
 53 |         df = df[df["vc"] != "vc7Bz"]
 54 |         df = df[df["vc"] != "vczGr"]
 55 |         df = df[df["vc"] != "vciN1"]
 56 |         df = df[df["vc"] != "vcV7h"]
 57 |         df = df[df["vc"] != "vcRAl"]
 58 |         df = df[df["vc"] != "vcvcM"]
 59 |         df = df[df["vc"] != "vc1z2"]
 60 | 
 61 |         df.loc[df["vc"] == "vcVvI", "vc"] = "vcUV3"
 62 |         df.loc[df["vc"] == "vcxqr", "vc"] = "vcUV3"
 63 |         df.loc[df["vc"] == "vcsBT", "vc"] = "vcUV3"
 64 |         df.loc[df["vc"] == "vcygX", "vc"] = "vcUV3"
 65 |         df.loc[df["vc"] == "vcHyk", "vc"] = "vcOlr"
 66 |         df.loc[df["vc"] == "vcRDh", "vc"] = "vc7hD"
 67 |         df.loc[df["vc"] == "vcFsC", "vc"] = "vc7hD"
 68 | 
 69 |         df.reset_index(drop=True, inplace=True)
 70 |         df.to_csv(f"./{cluster}/cluster_log.csv", index=False)
 71 | 
 72 |     elif cluster == "Earth":
 73 |         df = df[df["vc"] != "vcp4O"]
 74 |         df = df[df["vc"] != "vcvcM"]
 75 |         df = df[df["vc"] != "vcXrB"]
 76 |         df = df[df["vc"] != "vc7hD"]
 77 |         df = df[df["vc"] != "vcIya"]
 78 |         df = df[df["vc"] != "vc8Sj"]
 79 |         df = df[df["vc"] != "vcLJZ"]
 80 | 
 81 |         df.loc[df["vc"] == "vcxS0", "vc"] = "vc3sl"
 82 | 
 83 |         df.reset_index(drop=True, inplace=True)
 84 |         df.to_csv(f"./{cluster}/cluster_log.csv", index=False)
 85 | 
 86 |     elif cluster == "Venus":
 87 |         df = df[df["vc"] != "vcEhP"]
 88 |         df = df[df["vc"] != "vcIya"]
 89 |         df = df[df["vc"] != "vcJLV"]
 90 |         df = df[df["vc"] != "vcJkd"]
 91 |         df = df[df["vc"] != "vcsBT"]
 92 | 
 93 |         df.loc[df["vc"] == "vcbIW", "vc"] = "vcvGl"
 94 |         df.loc[df["vc"] == "vc6YE", "vc"] = "vcvGl"
 95 |         df.loc[df["vc"] == "vcOhe", "vc"] = "vcKeu"
 96 |         df.loc[df["vc"] == "vccJW", "vc"] = "vcKeu"
 97 |         df.loc[df["vc"] == "vcP2J", "vc"] = "vchA3"
 98 | 
 99 |         df.reset_index(drop=True, inplace=True)
100 |         df.to_csv(f"./{cluster}/cluster_log.csv", index=False)
101 | 
102 |     elif cluster == "Philly":
103 |         df = df[df["vc"] != "795a4c"]
104 |         df = df[df["vc"] != "51b7ef"]
105 |         df = df[df["vc"] != "925e2b"]
106 |         df = df[df["vc"] != "23dbec"]
107 | 
108 |         df.reset_index(drop=True, inplace=True)
109 |         df.to_csv(f"./{cluster}/cluster_log.csv", index=False)
110 | 
111 |     else:
112 |         raise ValueError("Wrong Cluster Name.")
113 | 
114 | 
115 | if __name__ == "__main__":
116 |     parser = argparse.ArgumentParser(description="Job Log Processor")
117 |     parser.add_argument("-c", "--cluster", default="Earth", type=str, help="Cluster Name")
118 |     args = parser.parse_args()
119 |     main(args)
120 | 


--------------------------------------------------------------------------------
/simulation/data/prepare_data.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | :<<!
 4 | Preparing simulation data.
 5 | log_process: process job traces.
 6 | vc_dict_generator: generate VC configurations.
 7 | !
 8 | 
 9 | # clusters=('Venus' 'Earth' 'Saturn' 'Uranus' 'Philly')
10 | clusters=('Venus' 'Philly')
11 | 
12 | 
13 | for cluster in ${clusters[@]}; do
14 |     echo "Parsing ${cluster}"
15 | 
16 |     python log_process.py -c=${cluster}
17 |     
18 |     python vc_dict_generator.py -c=${cluster} -d='Sept'
19 | done
20 | 
21 | echo 'Done'


--------------------------------------------------------------------------------
/simulation/estimator/__init__.py:
--------------------------------------------------------------------------------
1 | from .estimator import NaiveEstimator
2 | from .estimator import LGBEstimator
3 | from .estimator import CombinedEstimator
4 | from .estimator import PhillyEstimator
5 | 


--------------------------------------------------------------------------------
/simulation/estimator/utils.py:
--------------------------------------------------------------------------------
 1 | import pandas as pd
 2 | import sys
 3 | import datetime
 4 | import logging
 5 | 
 6 | 
 7 | def train_data_loader(dir, test_date_range):
 8 |     # NOTE: test_date_range
 9 |     # test_date_range refer to the dates used for test, which will be excluded in our train dataset
10 |     start = "2020-04-01 00:00:00"
11 |     df = pd.read_csv(
12 |         dir + "/cluster_full_log.csv",
13 |         parse_dates=["submit_time"],
14 |         usecols=["job_id", "user", "vc", "jobname", "gpu_num", "cpu_num", "submit_time", "duration"],
15 |     )
16 | 
17 |     # Consider gpu jobs only
18 |     df = df[df["gpu_num"] > 0]
19 |     df = df.sort_values(by="submit_time")
20 | 
21 |     # VC filter
22 |     vc_df = pd.read_csv(dir + "/vc_config.csv", index_col=0)
23 |     vc_list = vc_df.index.to_list()
24 |     df = df[df["vc"].isin(vc_list)]
25 | 
26 |     df = df[df["submit_time"] >= pd.Timestamp(start)]
27 |     df["submit_time"] = df["submit_time"].apply(lambda x: int(datetime.datetime.timestamp(pd.Timestamp(x))))
28 | 
29 |     # Normalizing
30 |     df["submit_time"] = df["submit_time"] - df.iloc[0]["submit_time"]
31 | 
32 |     # Slicing val data
33 |     begin = (pd.Timestamp(test_date_range[0]) - pd.Timestamp(start)).total_seconds()
34 |     end = (pd.Timestamp(test_date_range[1]) - pd.Timestamp(start)).total_seconds()
35 |     val_df = df[(df["submit_time"] >= begin) & (df["submit_time"] <= end)]
36 |     # Slicing train data
37 |     # | (df['submit_time'] > pd.Timestamp(test_date_range[1]))]
38 |     train_df = df[(df["submit_time"] < begin)]
39 | 
40 |     # Filter user, vc not in val data  around 9% jobs be filtered
41 |     val_users = val_df["user"].unique()
42 | 
43 |     val_vcs = val_df["vc"].unique()
44 | 
45 |     train_df = train_df[train_df["user"].isin(val_users)]
46 |     train_df = train_df[train_df["vc"].isin(val_vcs)]  # no jobs be filtered
47 | 
48 |     train_df = train_df.sort_values(by="submit_time")
49 |     train_df.reset_index(inplace=True, drop=True)
50 | 
51 |     val_df = val_df.sort_values(by="submit_time")
52 |     val_df.reset_index(inplace=True, drop=True)
53 | 
54 |     return train_df, val_df
55 | 
56 | 
57 | def logger_init(file):
58 |     logger = logging.getLogger()
59 |     handler_file = logging.FileHandler(f"{file}.log", "w")
60 |     handler_stream = logging.StreamHandler(sys.stdout)
61 | 
62 |     logger.setLevel(logging.INFO)
63 |     handler_file.setLevel(logging.INFO)
64 |     handler_stream.setLevel(logging.INFO)
65 | 
66 |     formatter = logging.Formatter("%(asctime)s | %(processName)s | %(message)s", datefmt="%Y %b %d %H:%M:%S")
67 |     handler_file.setFormatter(formatter)
68 |     handler_stream.setFormatter(formatter)
69 | 
70 |     logger.addHandler(handler_file)
71 |     logger.addHandler(handler_stream)
72 | 
73 |     return logger
74 | 


--------------------------------------------------------------------------------
/simulation/job.py:
--------------------------------------------------------------------------------
 1 | import sys
 2 | 
 3 | 
 4 | class Job(dict):
 5 |     def __init__(self, series):
 6 |         super(Job, self).__init__()
 7 |         self.update(series.to_dict())
 8 |         # Priority Define by Estimator, Random Means No History Data Found
 9 |         self.update({"nodes": [], "priority": -1, "random": 0})
10 |         # Profiler
11 |         self.update({"profiled": 0, "profqueue": 0, "toskip": 0})
12 |         # Co-locate
13 |         # NOTE: exclusive: {0: colocate, 1: exclusive}
14 |         # NOTE: rate: the ratio of colocate and exclusive execution performance
15 |         # NOTE: sharescore: 0, 1, 2
16 |         self.update({"exclusive": 1, "rate": 1, "sharescore": None, "Tcolocate": 0, "Tdelocate": 0})
17 | 
18 |     def set_ckpt_time(self, time):
19 |         self.last_ckpt_time = time
20 | 
21 |     def get_ckpt_time(self):
22 |         return self.last_ckpt_time
23 | 
24 | 
25 | class Trace:
26 |     def __init__(self):
27 |         self.job_list = []
28 | 
29 |     def append_job(self, job):
30 |         self.job_list.append(job)
31 | 
32 |     def job_num(self):
33 |         return len(self.job_list)
34 | 
35 |     def profiler_remain_job_num(self):
36 |         num = 0
37 |         for job in self.job_list:
38 |             if job["toskip"] == 0:
39 |                 num += 1
40 |         return num
41 | 
42 |     def sort_jobs(self, key):
43 |         self.job_list.sort(key=lambda x: x.__getitem__(key))
44 | 
45 |     def vc_trace(self, vc_name):
46 |         vc_trace = Trace()
47 |         for job in self.job_list:
48 |             if job["vc"] == vc_name:
49 |                 vc_trace.append_job(job)
50 |         vc_trace.sort_jobs("submit_time")
51 |         return vc_trace
52 | 
53 |     def reset_trace(self):
54 |         for job in self.job_list:
55 |             if job["toskip"] == 0:
56 |                 job["start_time"] = sys.maxsize
57 |                 job["end_time"] = sys.maxsize
58 |                 job["nodes"] = []
59 | 


--------------------------------------------------------------------------------
/simulation/policy/__init__.py:
--------------------------------------------------------------------------------
1 | from .sjf import ShortestJobFirst
2 | from .srtf import ShortestRemainingTimeFirst
3 | from .fifo import FirstInFirstOut
4 | from .qssf import QuasiShortestServiceFirst
5 | from .lucid import Lucid
6 | from .tiresias import Tiresias
7 | 


--------------------------------------------------------------------------------
/simulation/policy/fifo.py:
--------------------------------------------------------------------------------
 1 | from .policy import Policy
 2 | 
 3 | 
 4 | class FirstInFirstOut(Policy):
 5 |     def __init__(self, trace, vc, placement, log_dir, logger, start_ts):
 6 |         super(FirstInFirstOut, self).__init__(trace, vc, placement, log_dir, logger, start_ts)
 7 |         self._name = "fifo"
 8 | 
 9 |     def simulate(self):
10 |         prev_index = 0
11 | 
12 |         while self.end_job_num != self.total_job_num:
13 | 
14 |             """1. Check & Release End Jobs"""
15 |             run_ls = self.run_list.copy()  # Avoid list.remove() issue
16 |             for job in run_ls:
17 |                 if self.time == job["end_time"]:
18 |                     job["remain"] = 0
19 |                     job["status"] = "end"
20 |                     self.end_job_num += 1
21 |                     assert self._vc.release_resource(job) == True
22 |                     self.run_list.remove(job)
23 | 
24 |             """2. Allocate New / Pending Jobs"""
25 |             # New Job
26 |             for idx in range(prev_index, self.total_job_num):
27 |                 job = self.trace.job_list[idx]
28 |                 if job["submit_time"] == self.time:
29 |                     job["status"] = "pend"
30 |                     self.que_list.append(job)
31 |                     prev_index = idx
32 |                 elif job["submit_time"] > self.time:
33 |                     break
34 | 
35 |             # Pend Job
36 |             # NOTE: Sort by submit time -- FIFO
37 |             self.que_list.sort(key=lambda x: x.__getitem__("submit_time"))
38 |             que_ls = self.que_list.copy()  # Avoid list.remove() issue
39 |             for job in que_ls:
40 |                 if self.job_placer(job):
41 |                     job["start_time"] = self.time
42 |                     job["end_time"] = job["start_time"] + job["duration"]
43 |                     job["queue"] = self.time - job["submit_time"]
44 |                     job["status"] = "run"
45 |                     self.que_list.remove(job)
46 |                     self.run_list.append(job)
47 |                 else:
48 |                     break
49 | 
50 |             """3. Log & Result Recorder"""
51 |             if self.time % 10000 == 0:
52 |                 self.runtime_log()
53 | 
54 |             # Sample Cluster State Every Minute
55 |             if self.time % 60 == 0:
56 |                 self.seq_recorder()
57 | 
58 |             self.time += 1
59 | 
60 |         self.log_recorder(self._name)
61 | 


--------------------------------------------------------------------------------
/simulation/policy/placer/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/S-Lab-System-Group/Lucid/63685a3ab7d15d8e940bb47ef98b6d5cca472b13/simulation/policy/placer/__init__.py


--------------------------------------------------------------------------------
/simulation/policy/placer/consolidate.py:
--------------------------------------------------------------------------------
 1 | class ConsolidatePlacement:
 2 |     def __init__(self, vc):
 3 |         self.name = "consolidate"
 4 |         self.vc = vc
 5 |         self.avail_nodes = self.vc.avail_node_list()
 6 | 
 7 |     """
 8 |         Enforce consolidate placement
 9 |         Node list selection
10 |         -- job_gpu_num <= 8
11 |         -- job_gpu_num > 8  and job_gpu_num % 8 == 0
12 |         -- job_gpu_num > 8  and job_gpu_num % 8 != 0
13 |     """
14 | 
15 |     def update_avail_nodes(self):
16 |         self.avail_nodes = self.vc.avail_node_list()
17 | 
18 |     def consolidateSelect(self, job_gpu_num):
19 |         self.update_avail_nodes()
20 |         alloc_nodes = []
21 |         if job_gpu_num <= 8:
22 |             nodes = sorted(self.avail_nodes, key=lambda x: x.free_gpus, reverse=False)
23 |             for node in nodes:
24 |                 if node.free_gpus >= job_gpu_num:
25 |                     alloc_nodes.append((node, job_gpu_num))
26 |                     return True, alloc_nodes
27 |             return False, alloc_nodes
28 |         else:
29 |             nodes = sorted(self.avail_nodes, key=lambda x: x.free_gpus, reverse=True)
30 |             if job_gpu_num % 8 == 0:
31 |                 node_num = job_gpu_num // 8
32 |                 for node in nodes:
33 |                     if node.free_gpus < 8:
34 |                         return False, alloc_nodes
35 | 
36 |                     if node.free_gpus == 8 and node_num > 0:
37 |                         alloc_nodes.append((node, 8))
38 |                         node_num -= 1
39 | 
40 |                     if node_num == 0:
41 |                         return True, alloc_nodes
42 |             else:
43 |                 node_num = (job_gpu_num // 8) + 1
44 |                 for node in nodes:
45 |                     if node.free_gpus == 8 and node_num > 1:
46 |                         alloc_nodes.append((node, 8))
47 |                         node_num -= 1
48 |                         continue
49 | 
50 |                     if node.free_gpus >= (job_gpu_num % 8) and node_num == 1:
51 |                         alloc_nodes.append((node, job_gpu_num % 8))
52 |                         node_num -= 1
53 |                         return True, alloc_nodes
54 | 
55 |                     return False, alloc_nodes
56 | 
57 |     def place(self, job):
58 |         vc_free_gpu_num = self.vc.vc_free_gpus()
59 |         job_gpu_num = job["gpu_num"]
60 | 
61 |         # Total Free GPU Check
62 |         if vc_free_gpu_num < job_gpu_num:
63 |             return False
64 | 
65 |         if self.vc._num_gpus_per_node != 8:
66 |             raise NotImplementedError
67 | 
68 |         select_flag, alloc_nodes = self.consolidateSelect(job_gpu_num)
69 | 
70 |         """ Placement """
71 |         if select_flag:
72 |             for (node, req_gpu) in alloc_nodes:
73 |                 allocate_gpus = node.allocate_gpu(req_gpu, job)
74 |                 job["nodes"].append({node.node_name: allocate_gpus})
75 |             return True
76 |         else:
77 |             return False
78 | 


--------------------------------------------------------------------------------
/simulation/policy/placer/consolidateFirst.py:
--------------------------------------------------------------------------------
 1 | class ConsolidateFirstPlacement:
 2 |     def __init__(self, vc):
 3 |         self.name = "consolidateFirst"
 4 |         self.vc = vc
 5 |         self.avail_nodes = self.vc.avail_node_list()
 6 | 
 7 |     """
 8 |         consolidate first placement
 9 |         Try consolidate first, if fail, try random placement
10 |         Random placement: place to idlest node first
11 |     """
12 | 
13 |     def update_avail_nodes(self):
14 |         self.avail_nodes = self.vc.avail_node_list()
15 | 
16 |     def randomSelect(self, job_gpu_num):
17 |         self.update_avail_nodes()
18 |         alloc_nodes = []
19 |         nodes = sorted(self.avail_nodes, key=lambda x: x.free_gpus, reverse=True)
20 |         for node in nodes:
21 |             if node.free_gpus < job_gpu_num:
22 |                 alloc_nodes.append((node, node.free_gpus))
23 |                 job_gpu_num -= node.free_gpus
24 |                 continue
25 |             else:
26 |                 alloc_nodes.append((node, job_gpu_num))
27 |                 return True, alloc_nodes
28 |         return False, alloc_nodes
29 | 
30 |     def consolidateFirstSelect(self, job_gpu_num):
31 |         alloc_nodes = []
32 |         if job_gpu_num <= 8:
33 |             nodes = sorted(self.avail_nodes, key=lambda x: x.free_gpus, reverse=False)
34 |             for node in nodes:
35 |                 if node.free_gpus >= job_gpu_num:
36 |                     alloc_nodes.append((node, job_gpu_num))
37 |                     return True, alloc_nodes
38 |             return self.randomSelect(job_gpu_num)
39 |         else:
40 |             nodes = sorted(self.avail_nodes, key=lambda x: x.free_gpus, reverse=True)
41 |             if job_gpu_num % 8 == 0:
42 |                 node_num = job_gpu_num // 8
43 |                 for node in nodes:
44 |                     if node.free_gpus < 8:
45 |                         return self.randomSelect(job_gpu_num)
46 | 
47 |                     if node.free_gpus == 8 and node_num > 0:
48 |                         alloc_nodes.append((node, 8))
49 |                         node_num -= 1
50 | 
51 |                     if node_num == 0:
52 |                         return True, alloc_nodes
53 |             else:
54 |                 node_num = (job_gpu_num // 8) + 1
55 |                 for node in nodes:
56 |                     if node.free_gpus == 8 and node_num > 1:
57 |                         alloc_nodes.append((node, 8))
58 |                         node_num -= 1
59 |                         continue
60 | 
61 |                     if node.free_gpus >= (job_gpu_num % 8) and node_num == 1:
62 |                         alloc_nodes.append((node, job_gpu_num % 8))
63 |                         node_num -= 1
64 |                         return True, alloc_nodes
65 | 
66 |                     return self.randomSelect(job_gpu_num)
67 | 
68 |     def place(self, job):
69 |         vc_free_gpu_num = self.vc.vc_free_gpus()
70 |         job_gpu_num = job["gpu_num"]
71 | 
72 |         # Total Free GPU Check
73 |         if vc_free_gpu_num < job_gpu_num:
74 |             return False
75 | 
76 |         if self.vc._num_gpus_per_node != 8:
77 |             raise NotImplementedError
78 | 
79 |         select_flag, alloc_nodes = self.consolidateSelect(job_gpu_num)
80 | 
81 |         """ Placement """
82 |         if select_flag:
83 |             for (node, req_gpu) in alloc_nodes:
84 |                 allocate_gpus = node.allocate_gpu(req_gpu, job)
85 |                 job["nodes"].append({node.node_name: allocate_gpus})
86 |             return True
87 |         else:
88 |             return False
89 | 


--------------------------------------------------------------------------------
/simulation/policy/placer/consolidateWithShare.py:
--------------------------------------------------------------------------------
  1 | class ConsolidateWithSharePlacement:
  2 |     def __init__(self, vc):
  3 |         self.name = "consolidate_share"
  4 |         self.vc = vc
  5 |         self.avail_nodes = self.vc.avail_node_list()
  6 | 
  7 |     """
  8 |         Enforce consolidate placement
  9 |         Node list selection
 10 |         -- job_gpu_num <= 8
 11 |         -- job_gpu_num > 8  and job_gpu_num % 8 == 0
 12 |         -- job_gpu_num > 8  and job_gpu_num % 8 != 0
 13 |     """
 14 | 
 15 |     def update_avail_nodes(self):
 16 |         self.avail_nodes = self.vc.avail_node_list()
 17 | 
 18 |     def consolidateSelect(self, job_gpu_num):
 19 |         alloc_nodes = []
 20 |         self.update_avail_nodes()
 21 |         if job_gpu_num <= 8:
 22 |             nodes = sorted(self.avail_nodes, key=lambda x: x.free_gpus, reverse=False)
 23 |             for node in nodes:
 24 |                 if node.free_gpus >= job_gpu_num:
 25 |                     alloc_nodes.append((node, job_gpu_num))
 26 |                     return True, alloc_nodes
 27 |             return False, alloc_nodes
 28 |         else:
 29 |             nodes = sorted(self.avail_nodes, key=lambda x: x.free_gpus, reverse=True)
 30 |             if job_gpu_num % 8 == 0:
 31 |                 node_num = job_gpu_num // 8
 32 |                 for node in nodes:
 33 |                     if node.free_gpus < 8:
 34 |                         return False, alloc_nodes
 35 | 
 36 |                     if node.free_gpus == 8 and node_num > 0:
 37 |                         alloc_nodes.append((node, 8))
 38 |                         node_num -= 1
 39 | 
 40 |                     if node_num == 0:
 41 |                         return True, alloc_nodes
 42 |             else:
 43 |                 node_num = (job_gpu_num // 8) + 1
 44 |                 for node in nodes:
 45 |                     if node.free_gpus == 8 and node_num > 1:
 46 |                         alloc_nodes.append((node, 8))
 47 |                         node_num -= 1
 48 |                         continue
 49 | 
 50 |                     if node.free_gpus >= (job_gpu_num % 8) and node_num == 1:
 51 |                         alloc_nodes.append((node, job_gpu_num % 8))
 52 |                         node_num -= 1
 53 |                         return True, alloc_nodes
 54 | 
 55 |                     return False, alloc_nodes
 56 | 
 57 |     def place(self, job):
 58 |         vc_free_gpu_num = self.vc.vc_free_gpus()
 59 |         job_gpu_num = job["gpu_num"]
 60 | 
 61 |         # Total Free GPU Check
 62 |         if vc_free_gpu_num < job_gpu_num:
 63 |             return False
 64 | 
 65 |         if self.vc._num_gpus_per_node != 8:
 66 |             raise NotImplementedError
 67 | 
 68 |         select_flag, alloc_nodes = self.consolidateSelect(job_gpu_num)
 69 | 
 70 |         """ Placement """
 71 |         if select_flag:
 72 |             for (node, req_gpu) in alloc_nodes:
 73 |                 allocate_gpus = node.allocate_gpu(req_gpu, job)
 74 |                 job["nodes"].append({node.node_name: allocate_gpus})
 75 |             return True
 76 |         else:
 77 |             return False
 78 | 
 79 |     def colocateSelect(self, job, target_job):
 80 |         # nodes = sorted(target_nodes, key=lambda x: len(list(x.values())[0]), reverse=True)
 81 |         # job_gpu_num = job["gpu_num"]
 82 |         alloc_nodes = []
 83 |         target_nodes = target_job["nodes"]
 84 |         for node_dict in target_nodes:
 85 |             alloc_nodes.append((self.vc.get_node(list(node_dict.keys())[0]), list(node_dict.values())[0]))
 86 |         return True, alloc_nodes
 87 | 
 88 |     def colcoate_place(self, job, target_job, gutil, gmem):
 89 |         assert job["gpu_num"] == target_job["gpu_num"], "Need to implement"
 90 |         select_flag, alloc_nodes = self.colocateSelect(job, target_job)
 91 | 
 92 |         """ Placement """
 93 |         if select_flag:
 94 |             for (node, gpu_list) in alloc_nodes:
 95 |                 assert node.allocate_colocate_gpu(gpu_list, job, gutil, gmem)
 96 |                 job["nodes"].append({node.node_name: gpu_list})
 97 |             return True
 98 |         else:
 99 |             raise NotImplementedError
100 | 


--------------------------------------------------------------------------------
/simulation/policy/placer/random.py:
--------------------------------------------------------------------------------
 1 | class RandomPlacement:
 2 |     def __init__(self, vc):
 3 |         self.vc = vc
 4 |         self.name = "random"
 5 |         self.avail_nodes = self.vc.avail_node_list()
 6 | 
 7 |     """Random placement"""
 8 | 
 9 |     def update_avail_nodes(self):
10 |         self.avail_nodes = self.vc.avail_node_list()
11 | 
12 |     def randomSelect(self, job_gpu_num):
13 |         self.update_avail_nodes()
14 |         alloc_nodes = []
15 | 
16 |         for node in self.avail_nodes:
17 |             if node.free_gpus < job_gpu_num:
18 |                 alloc_nodes.append((node, node.free_gpus))
19 |                 job_gpu_num -= node.free_gpus
20 |                 continue
21 |             else:
22 |                 alloc_nodes.append((node, job_gpu_num))
23 |                 return True, alloc_nodes
24 |         return False, alloc_nodes
25 | 
26 |     def place(self, job):
27 |         vc_free_gpu_num = self.vc.vc_free_gpus()
28 |         job_gpu_num = job["gpu_num"]
29 | 
30 |         # Total Free GPU Check
31 |         if vc_free_gpu_num < job_gpu_num:
32 |             return False
33 | 
34 |         select_flag, alloc_nodes = self.randomSelect(job_gpu_num)
35 | 
36 |         """ Placement """
37 |         if select_flag:
38 |             for (node, req_gpu) in alloc_nodes:
39 |                 allocate_gpus = node.allocate_gpu(req_gpu, job)
40 |                 job["nodes"].append({node.node_name: allocate_gpus})
41 |             return True
42 |         else:
43 |             return False
44 | 


--------------------------------------------------------------------------------
/simulation/policy/qssf.py:
--------------------------------------------------------------------------------
 1 | from .policy import Policy
 2 | 
 3 | 
 4 | class QuasiShortestServiceFirst(Policy):
 5 |     def __init__(self, trace, vc, placement, log_dir, logger, start_ts, estimator):
 6 |         super(QuasiShortestServiceFirst, self).__init__(trace, vc, placement, log_dir, logger, start_ts)
 7 |         self.estimator = estimator
 8 |         self._name = "qssf"
 9 | 
10 |     def simulate(self):
11 |         prev_index = 0
12 | 
13 |         while self.end_job_num != self.total_job_num:
14 |             new_job_num = 0
15 | 
16 |             """1. Check & Release End Jobs"""
17 |             run_ls = self.run_list.copy()  # Avoid list.remove() issue
18 |             for job in run_ls:
19 |                 if self.time == job["end_time"]:
20 |                     job["remain"] = 0
21 |                     job["status"] = "end"
22 |                     self.end_job_num += 1
23 |                     assert self._vc.release_resource(job) == True
24 |                     self.run_list.remove(job)
25 |                     if self.estimator.name != "LGBEstimator" and self.estimator.name != "PhillyEstimator":
26 |                         self.estimator.update_train_data(job)
27 | 
28 |             """2. Check New Jobs"""
29 |             # New Job
30 |             for idx in range(prev_index, self.total_job_num):
31 |                 job = self.trace.job_list[idx]
32 |                 if job["submit_time"] == self.time:
33 |                     job["status"] = "pend"
34 |                     self.que_list.append(job)
35 |                     prev_index = idx
36 |                     new_job_num += 1
37 |                 elif job["submit_time"] > self.time:
38 |                     break
39 | 
40 |             """3. Assign Priority If Exist Job Pending"""
41 |             # NOTE: Sort by priority given by estimator -- QSSF
42 |             # Only assign priority to the pending job, new job will sort by required gpu_num
43 |             self.que_list.sort(key=lambda x: x.__getitem__("gpu_num"))
44 |             if len(self.que_list) > new_job_num:
45 |                 for job in self.que_list:
46 |                     if job["priority"] == -1:
47 |                         job["priority"] = self.estimator.inference(job)
48 |                 self.que_list.sort(key=lambda x: x.__getitem__("priority"))
49 | 
50 |             """4. Allocate Job"""
51 |             que_ls = self.que_list.copy()  # Avoid list.remove() issue
52 |             for job in que_ls:
53 |                 if self.job_placer(job):
54 |                     job["start_time"] = self.time
55 |                     job["end_time"] = job["start_time"] + job["duration"]
56 |                     job["queue"] = self.time - job["submit_time"]
57 |                     job["status"] = "run"
58 |                     self.que_list.remove(job)
59 |                     self.run_list.append(job)
60 |                 else:
61 |                     break
62 | 
63 |             """5. Log & Result Recorder"""
64 |             if self.time % 10000 == 0:
65 |                 self.runtime_log()
66 | 
67 |             # Sample Cluster State Every Minute
68 |             if self.time % 60 == 0:
69 |                 self.seq_recorder()
70 | 
71 |             self.time += 1
72 | 
73 |         self.log_recorder(self._name)
74 | 


--------------------------------------------------------------------------------
/simulation/policy/sjf.py:
--------------------------------------------------------------------------------
 1 | from .policy import Policy
 2 | 
 3 | 
 4 | class ShortestJobFirst(Policy):
 5 |     def __init__(self, trace, vc, placement, log_dir, logger, start_ts):
 6 |         super(ShortestJobFirst, self).__init__(trace, vc, placement, log_dir, logger, start_ts)
 7 |         self._name = "sjf"
 8 | 
 9 |     def simulate(self):
10 |         prev_index = 0
11 | 
12 |         while self.end_job_num != self.total_job_num:
13 | 
14 |             """1. Check & Release End Jobs"""
15 |             run_ls = self.run_list.copy()  # Avoid list.remove() issue
16 |             for job in run_ls:
17 |                 if self.time == job["end_time"]:
18 |                     job["remain"] = 0
19 |                     job["status"] = "end"
20 |                     self.end_job_num += 1
21 |                     assert self._vc.release_resource(job) == True
22 |                     self.run_list.remove(job)
23 | 
24 |             """2. Allocate New / Pending Jobs"""
25 |             # New Job
26 |             for idx in range(prev_index, self.total_job_num):
27 |                 job = self.trace.job_list[idx]
28 |                 if job["submit_time"] == self.time:
29 |                     job["status"] = "pend"
30 |                     self.que_list.append(job)
31 |                     prev_index = idx
32 |                 elif job["submit_time"] > self.time:
33 |                     break
34 | 
35 |             # Pend Job
36 |             # NOTE: Sort by duration -- SJF
37 |             self.que_list.sort(key=lambda x: x.__getitem__("duration"))
38 |             que_ls = self.que_list.copy()  # Avoid list.remove() issue
39 |             for job in que_ls:
40 |                 if self.job_placer(job):
41 |                     job["start_time"] = self.time
42 |                     job["end_time"] = job["start_time"] + job["duration"]
43 |                     job["queue"] = self.time - job["submit_time"]
44 |                     job["status"] = "run"
45 |                     self.que_list.remove(job)
46 |                     self.run_list.append(job)
47 |                 else:
48 |                     break
49 | 
50 |             """3. Log & Result Recorder"""
51 |             if self.time % 10000 == 0:
52 |                 self.runtime_log()
53 | 
54 |             # Sample Cluster State Every Minute
55 |             if self.time % 60 == 0:
56 |                 self.seq_recorder()
57 | 
58 |             self.time += 1
59 | 
60 |         self.log_recorder(self._name)
61 | 


--------------------------------------------------------------------------------
/simulation/policy/srtf.py:
--------------------------------------------------------------------------------
 1 | from .policy import Policy
 2 | 
 3 | 
 4 | class ShortestRemainingTimeFirst(Policy):
 5 |     def __init__(self, trace, vc, placement, log_dir, logger, start_ts):
 6 |         super(ShortestRemainingTimeFirst, self).__init__(trace, vc, placement, log_dir, logger, start_ts)
 7 |         self._name = "srtf"
 8 | 
 9 |     def simulate(self):
10 |         prev_index = 0
11 | 
12 |         while self.end_job_num != self.total_job_num:
13 | 
14 |             """1. Check & Release End Jobs"""
15 |             run_ls = self.run_list.copy()  # Avoid list.remove() issue
16 |             for job in run_ls:
17 |                 if job["remain"] == 0:
18 |                     job["status"] = "end"
19 |                     job["end_time"] = self.time
20 |                     self.end_job_num += 1
21 |                     assert self._vc.release_resource(job) == True
22 |                     self.run_list.remove(job)
23 |                 else:
24 |                     job["remain"] -= 1
25 | 
26 |             """2. Check New Jobs """
27 |             for idx in range(prev_index, self.total_job_num):
28 |                 job = self.trace.job_list[idx]
29 |                 if job["submit_time"] == self.time:
30 |                     job["status"] = "pend"
31 |                     self.que_list.append(job)
32 |                     prev_index = idx
33 |                 elif job["submit_time"] > self.time:
34 |                     break
35 | 
36 |             """3. Select Job to Preempt or Run """
37 |             # NOTE: Sort by remain -- SRTF
38 | 
39 |             current_job = self.que_list + self.run_list
40 |             current_job.sort(key=lambda x: x.__getitem__("remain"))
41 | 
42 |             quota = self._vc.total_gpus
43 |             preempt_list = []
44 |             prerun_list = []
45 |             for job in current_job:
46 |                 if job.__getitem__("gpu_num") <= quota:
47 |                     quota -= job.__getitem__("gpu_num")
48 |                     if job["status"] == "pend":
49 |                         prerun_list.append(job)
50 |                 elif job["status"] == "run":
51 |                     preempt_list.append(job)
52 | 
53 |             """4. Preempt Job """
54 |             for job in preempt_list:
55 |                 job["ckpt_times"] += 1
56 |                 job.set_ckpt_time(self.time)
57 |                 job["status"] = "pend"
58 |                 job["remain"] += self.ckpt_overhead(job)
59 |                 assert self._vc.release_resource(job) == True
60 |                 job["nodes"] = []
61 | 
62 |                 if job not in self.que_list:
63 |                     self.que_list.append(job)
64 |                 if job in self.run_list:
65 |                     self.run_list.remove(job)
66 | 
67 |             """5. Allocate Job """
68 |             for job in prerun_list:
69 |                 if self.job_placer(job):
70 |                     job["status"] = "run"
71 |                     if job["ckpt_times"] == 0:
72 |                         job["start_time"] = self.time
73 |                         job["queue"] = self.time - job["submit_time"]
74 |                     else:
75 |                         job["queue"] = job["queue"] + (self.time - job.get_ckpt_time())
76 | 
77 |                     if job in self.que_list:
78 |                         self.que_list.remove(job)
79 |                     if job not in self.run_list:
80 |                         self.run_list.append(job)
81 |                 else:
82 |                     # May place fail because consolidate requirement
83 |                     if job not in self.que_list:
84 |                         self.que_list.append(job)
85 |                     continue
86 | 
87 |             """6. Log & Result Recorder"""
88 |             if self.time % 10000 == 0:
89 |                 self.runtime_log()
90 | 
91 |             # Sample Cluster State Every Minute
92 |             if self.time % 60 == 0:
93 |                 self.seq_recorder()
94 | 
95 |             self.time += 1
96 | 
97 |         self.log_recorder(self._name)
98 | 


--------------------------------------------------------------------------------
/simulation/policy/tiresias.py:
--------------------------------------------------------------------------------
  1 | from .policy import Policy
  2 | 
  3 | 
  4 | class Tiresias(Policy):
  5 |     def __init__(self, trace, vc, placement, log_dir, logger, start_ts):
  6 |         super(Tiresias, self).__init__(trace, vc, placement, log_dir, logger, start_ts)
  7 |         self._name = "tiresias"
  8 | 
  9 |         # Refer to https://github.com/SymbioticLab/Tiresias
 10 |         self._discretize_threshold = 18000
 11 |         self._low_priority_queue = []
 12 |         self._high_priority_queue = []
 13 | 
 14 |     def discretize_queue(self, job_queue):
 15 |         self._low_priority_queue = []
 16 |         self._high_priority_queue = []
 17 |         for job in job_queue:
 18 |             if job["priority"] > self._discretize_threshold:
 19 |                 self._low_priority_queue.append(job)
 20 |             else:
 21 |                 self._high_priority_queue.append(job)
 22 | 
 23 |         # Tiresias: Jobs in the same queue are scheduled in a FIFO order
 24 |         self._low_priority_queue.sort(key=lambda x: x.__getitem__("submit_time"))
 25 |         self._high_priority_queue.sort(key=lambda x: x.__getitem__("submit_time"))
 26 | 
 27 |     def simulate(self):
 28 |         prev_index = 0
 29 | 
 30 |         while self.end_job_num != self.total_job_num:
 31 | 
 32 |             """1. Check & Release End Jobs"""
 33 |             run_ls = self.run_list.copy()
 34 |             for job in run_ls:
 35 |                 if job["remain"] == 0:
 36 |                     job["status"] = "end"
 37 |                     job["end_time"] = self.time
 38 |                     self.end_job_num += 1
 39 |                     assert self._vc.release_resource(job) == True
 40 |                     self.run_list.remove(job)
 41 |                 else:
 42 |                     job["remain"] -= 1
 43 |                     job["priority"] += job.__getitem__("gpu_num")
 44 | 
 45 |             """2. Check New Jobs """
 46 |             for idx in range(prev_index, self.total_job_num):
 47 |                 job = self.trace.job_list[idx]
 48 |                 if job["submit_time"] == self.time:
 49 |                     job["status"] = "pend"
 50 |                     job["priority"] = 0
 51 |                     self.que_list.append(job)
 52 |                     prev_index = idx
 53 |                 elif job["submit_time"] > self.time:
 54 |                     break
 55 | 
 56 |             """3. Select Job to Preempt or Run """
 57 |             preempt_list = []
 58 |             prerun_list = []
 59 |             # Refer to Pollux implementation, scheduling interval = 60s by default
 60 |             if self.time % 60 == 0:
 61 |                 current_job = self.run_list + self.que_list
 62 |                 quota = self._vc.total_gpus
 63 |                 self.discretize_queue(current_job)
 64 |                 current_job = self._high_priority_queue + self._low_priority_queue
 65 | 
 66 |                 for job in current_job:
 67 |                     if job.__getitem__("gpu_num") <= quota:
 68 |                         quota -= job.__getitem__("gpu_num")
 69 |                         if job["status"] == "pend":
 70 |                             prerun_list.append(job)
 71 |                     elif job["status"] == "run":
 72 |                         preempt_list.append(job)
 73 | 
 74 |             """4. Preempt Job """
 75 |             for job in preempt_list:
 76 |                 job["ckpt_times"] += 1
 77 |                 job.set_ckpt_time(self.time)
 78 |                 job["status"] = "pend"
 79 |                 job["remain"] += self.ckpt_overhead(job)
 80 |                 assert self._vc.release_resource(job) == True
 81 |                 job["nodes"] = []
 82 | 
 83 |                 if job not in self.que_list:
 84 |                     self.que_list.append(job)
 85 |                 if job in self.run_list:
 86 |                     self.run_list.remove(job)
 87 | 
 88 |             """5. Allocate Job """
 89 |             for job in prerun_list:
 90 |                 if self.job_placer(job):
 91 |                     job["status"] = "run"
 92 |                     if job["ckpt_times"] == 0:
 93 |                         job["start_time"] = self.time
 94 |                         job["queue"] = self.time - job["submit_time"]
 95 |                     else:
 96 |                         job["queue"] = job["queue"] + (self.time - job.get_ckpt_time())
 97 | 
 98 |                     if job in self.que_list:
 99 |                         self.que_list.remove(job)
100 |                     if job not in self.run_list:
101 |                         self.run_list.append(job)
102 |                 else:
103 |                     # May place fail because consolidate requirement
104 |                     if job not in self.que_list:
105 |                         self.que_list.append(job)
106 |                     continue
107 | 
108 |             """6. Log & Result Recorder"""
109 |             if self.time % 10000 == 0:
110 |                 self.runtime_log()
111 | 
112 |             # Sample Cluster State Every Minute
113 |             if self.time % 60 == 0:
114 |                 self.seq_recorder()
115 | 
116 |             self.time += 1
117 | 
118 |         self.log_recorder(self._name)
119 | 


--------------------------------------------------------------------------------
/simulation/profiler/__init__.py:
--------------------------------------------------------------------------------
1 | from .lgf import LeastGPUFirstProfiler
2 | 


--------------------------------------------------------------------------------
/simulation/profiler/lgf.py:
--------------------------------------------------------------------------------
 1 | from .profiler import Profiler
 2 | 
 3 | 
 4 | class LeastGPUFirstProfiler(Profiler):
 5 |     def __init__(self, trace, scale, time_limit, prof_gpu_limit, placement, log_dir, logger, start_ts):
 6 |         super(LeastGPUFirstProfiler, self).__init__(
 7 |             trace, scale, time_limit, prof_gpu_limit, placement, log_dir, logger, start_ts
 8 |         )
 9 |         self._name = "lgfprof"
10 |         self.cluster_name = log_dir.split("/")[-1].split("_")[0]
11 |         self.get_time_series_data(self.cluster_name)
12 |         self.enable_scaling = True if self.cluster_name == "Venus" else False
13 |         self.node_scaling_time = 0
14 |         self.node_scaling_num = 1
15 | 
16 |     def profile(self):
17 |         prev_index = 0
18 | 
19 |         while self.end_job_num != self.total_job_num:
20 | 
21 |             """1. Check & Release End Jobs"""
22 |             run_ls = self.run_list.copy()  # Avoid list.remove() issue
23 |             for job in run_ls:
24 |                 if self.time == job["end_time"]:
25 |                     if job["toskip"] == 1:
26 |                         job["remain"] = 0
27 |                         job["status"] = "end"
28 |                     self.end_job_num += 1
29 |                     assert self._vc.release_resource(job)
30 |                     self.run_list.remove(job)
31 | 
32 |             """2. Allocate New / Pending Jobs"""
33 |             # New Job
34 |             for idx in range(prev_index, self.total_job_num):
35 |                 job = self.trace.job_list[idx]
36 |                 if job["gpu_num"] > self.gpu_limit:
37 |                     self.end_job_num += 1
38 |                     prev_index = idx + 1
39 |                 else:
40 |                     if job["submit_time"] == self.time:
41 |                         self.que_list.append(job)
42 |                         prev_index = idx
43 |                     elif job["submit_time"] > self.time:
44 |                         break
45 | 
46 |             # Pend Job
47 |             # NOTE: Sort by Job GPU Num -- LGF
48 |             self.que_list.sort(key=lambda x: x.__getitem__("gpu_num"))
49 |             # self.que_list.sort(key=lambda x: x.__getitem__("submit_time"))
50 |             que_ls = self.que_list.copy()
51 |             for job in que_ls:
52 |                 if self.job_placer(job):
53 |                     job["profiled"] = 1
54 |                     job["start_time"] = self.time
55 |                     job["profqueue"] = self.time - job["submit_time"]
56 |                     job["queue"] = job["profqueue"]
57 |                     if job["duration"] <= self.time_limit:
58 |                         job["end_time"] = job["start_time"] + job["duration"]
59 |                         job["toskip"] = 1
60 |                     else:
61 |                         job["end_time"] = job["start_time"] + self.time_limit
62 |                     self.que_list.remove(job)
63 |                     self.run_list.append(job)
64 |                 else:
65 |                     break
66 | 
67 |             """3. Time-aware Scaling (Optional)"""
68 |             if self.enable_scaling:
69 |                 # Scale-Up
70 |                 if self.time % 10 == 0 and len(self.que_list) > 10 and self._vc.node_num == self._vc.base_node_num:
71 |                     self._vc.update_vc_node(change_node_num=self.node_scaling_num)
72 |                     self.node_scaling_time = self.time
73 |                     self.scaling_recorder(self.node_scaling_num)
74 | 
75 |                 # Scale-Down
76 |                 if (
77 |                     self.time % 100 == 0
78 |                     and len(self.que_list) < 5
79 |                     and self._vc.node_num == self._vc.base_node_num + self.node_scaling_num
80 |                     and len(self._vc.idle_node_list()) >= self.node_scaling_num
81 |                     and self._vc.check_node_inside_idle_vc(self._vc.temp_node_num_base)
82 |                 ):
83 |                     if self.check_future_cluster_throughput() <= self.gpu_limit * 5:
84 |                         self._vc.update_vc_node(change_node_num=-1 * self.node_scaling_num)
85 |                         self.node_scaling_time = self.time
86 |                         self.scaling_recorder(-1 * self.node_scaling_num)
87 | 
88 |             """4. Log & Result Recorder"""
89 |             if self.time % 10000 == 0:
90 |                 self.runtime_log()
91 | 
92 |             # Sample Cluster State Every Minute
93 |             if self.time % 60 == 0:
94 |                 self.seq_recorder()
95 | 
96 |             self.time += 1
97 | 
98 |         self.log_recorder(self._name)
99 | 


--------------------------------------------------------------------------------
/simulation/requirements.txt:
--------------------------------------------------------------------------------
 1 | pyprimo
 2 | numpy
 3 | panda
 4 | scikit_learn
 5 | lightgbm
 6 | seaborn
 7 | matplotlib
 8 | xgboost
 9 | 
10 | 
11 | 


--------------------------------------------------------------------------------
/simulation/run.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 | 
3 | python simulator.py -e='Venus_Sept' -t='./data/Venus' --sweep


--------------------------------------------------------------------------------
/simulation/updater.py:
--------------------------------------------------------------------------------
 1 | class ColocateUpdater:
 2 |     def __init__(self, colocate_df):
 3 |         self.df = colocate_df
 4 | 
 5 |     def _query(self, job1, job2):
 6 |         reverse = 0
 7 |         m1, m2 = job1["model"], job2["model"]
 8 |         d1, d2 = job1["dataset"], job2["dataset"]
 9 |         b1, b2 = job1["batchsize"], job2["batchsize"]
10 |         a1, a2 = job1["amp"], job2["amp"]
11 |         # g1, g2 = job1["gpu_num"], job2["gpu_num"]  # NOTE
12 | 
13 |         info = self.df.query(
14 |             " model1 == @m1 and model2 == @m2 and batchsize1 == @b1 and batchsize2 == @b2 and dataset1 == @d1 and dataset2 == @d2 and amp1 == @a1 and amp2 == @a2"
15 |         )
16 |         if len(info) == 0:
17 |             info = self.df.query(
18 |                 " model1 == @m2 and model2 == @m1 and batchsize1 == @b2 and batchsize2 == @b1 and dataset1 == @d2 and dataset2 == @d1 and amp1 == @a2 and amp2 == @a1"
19 |             )
20 |             reverse = 1
21 |         assert len(info) == 1, f"job1: {job1} | job2: {job2}"
22 |         return info, reverse
23 | 
24 |     def query_info(self, job1, job2):
25 |         if self.check_outside_job(job1, job2):
26 |             # Little Influence
27 |             total_util = min(1, job1["gpu_util"] + job2["gpu_util"])
28 |             total_mem = job1["gmem"] + job2["gmem"]
29 |             return 1, 1, total_util, total_mem
30 |         else:
31 |             info, reverse = self._query(job1, job2)
32 |         speed1, speed2 = info["speed1"].values[0], info["speed2"].values[0]
33 |         if reverse:
34 |             return speed2, speed1, info["gpu_util"].values[0], info["gmem"].values[0]
35 |         else:
36 |             return speed1, speed2, info["gpu_util"].values[0], info["gmem"].values[0]
37 | 
38 |     def query_speed(self, job1, job2):
39 |         if self.check_outside_job(job1, job2):
40 |             # Little Influence
41 |             return 1, 1
42 |         else:
43 |             info, reverse = self._query(job1, job2)
44 |         speed1, speed2 = info["speed1"].values[0], info["speed2"].values[0]
45 |         if reverse:
46 |             return speed2, speed1
47 |         else:
48 |             return speed1, speed2
49 | 
50 |     def query_utils(self, job1, job2):
51 | 
52 |         if self.check_outside_job(job1, job2):
53 |             # Approximate as adding
54 |             total_util = min(1, job1["gpu_util"] + job2["gpu_util"])
55 |             total_mem = job1["gmem"] + job2["gmem"]
56 |             return total_util, total_mem
57 |         else:
58 |             info, _ = self._query(job1, job2)
59 |         return info["gpu_util"].values[0], info["gmem"].values[0]
60 | 
61 |     # Some Jobs are not recorded inside colocate_df
62 |     def check_outside_job(self, job1, job2):
63 |         m1, m2 = job1["model"], job2["model"]
64 |         models = [m1, m2]
65 |         if "NeuMF" in models:
66 |             return True
67 |         # Large Model are classified as 2
68 |         elif "ResNet50" in models or "BERT" in models or "Transformer" in models:
69 |             # raise NotImplementedError
70 |             return True
71 |         else:
72 |             return False
73 | 


--------------------------------------------------------------------------------
/workloads/cifar/models/__init__.py:
--------------------------------------------------------------------------------
 1 | from .vgg import *
 2 | from .dpn import *
 3 | from .lenet import *
 4 | from .senet import *
 5 | from .pnasnet import *
 6 | from .densenet import *
 7 | from .googlenet import *
 8 | from .shufflenet import *
 9 | from .shufflenetv2 import *
10 | from .resnet import *
11 | from .resnext import *
12 | from .preact_resnet import *
13 | from .mobilenet import *
14 | from .mobilenetv2 import *
15 | from .efficientnet import *
16 | from .alexnet import *


--------------------------------------------------------------------------------
/workloads/cifar/models/alexnet.py:
--------------------------------------------------------------------------------
 1 | 
 2 | import torch.nn as nn
 3 | ##########################
 4 | ### MODEL
 5 | ##########################
 6 | NUM_CLASSES = 10
 7 | 
 8 | 
 9 | class AlexNet(nn.Module):
10 |     def __init__(self, num_classes=NUM_CLASSES):
11 |         super(AlexNet, self).__init__()
12 |         self.features = nn.Sequential(
13 |             nn.Conv2d(3, 64, kernel_size=3, stride=2, padding=1),
14 |             nn.ReLU(inplace=True),
15 |             nn.MaxPool2d(kernel_size=2),
16 |             nn.Conv2d(64, 192, kernel_size=3, padding=1),
17 |             nn.ReLU(inplace=True),
18 |             nn.MaxPool2d(kernel_size=2),
19 |             nn.Conv2d(192, 384, kernel_size=3, padding=1),
20 |             nn.ReLU(inplace=True),
21 |             nn.Conv2d(384, 256, kernel_size=3, padding=1),
22 |             nn.ReLU(inplace=True),
23 |             nn.Conv2d(256, 256, kernel_size=3, padding=1),
24 |             nn.ReLU(inplace=True),
25 |             nn.MaxPool2d(kernel_size=2),
26 |         )
27 |         self.classifier = nn.Sequential(
28 |             nn.Dropout(),
29 |             nn.Linear(256 * 2 * 2, 4096),
30 |             nn.ReLU(inplace=True),
31 |             nn.Dropout(),
32 |             nn.Linear(4096, 4096),
33 |             nn.ReLU(inplace=True),
34 |             nn.Linear(4096, num_classes),
35 |         )
36 | 
37 |     def forward(self, x):
38 |         x = self.features(x)
39 |         x = x.view(x.size(0), 256 * 2 * 2)
40 |         x = self.classifier(x)
41 |         return x


--------------------------------------------------------------------------------
/workloads/cifar/models/densenet.py:
--------------------------------------------------------------------------------
  1 | '''DenseNet in PyTorch.'''
  2 | import math
  3 | 
  4 | import torch
  5 | import torch.nn as nn
  6 | import torch.nn.functional as F
  7 | 
  8 | 
  9 | class Bottleneck(nn.Module):
 10 |     def __init__(self, in_planes, growth_rate):
 11 |         super(Bottleneck, self).__init__()
 12 |         self.bn1 = nn.BatchNorm2d(in_planes)
 13 |         self.conv1 = nn.Conv2d(in_planes, 4*growth_rate, kernel_size=1, bias=False)
 14 |         self.bn2 = nn.BatchNorm2d(4*growth_rate)
 15 |         self.conv2 = nn.Conv2d(4*growth_rate, growth_rate, kernel_size=3, padding=1, bias=False)
 16 | 
 17 |     def forward(self, x):
 18 |         out = self.conv1(F.relu(self.bn1(x)))
 19 |         out = self.conv2(F.relu(self.bn2(out)))
 20 |         out = torch.cat([out,x], 1)
 21 |         return out
 22 | 
 23 | 
 24 | class Transition(nn.Module):
 25 |     def __init__(self, in_planes, out_planes):
 26 |         super(Transition, self).__init__()
 27 |         self.bn = nn.BatchNorm2d(in_planes)
 28 |         self.conv = nn.Conv2d(in_planes, out_planes, kernel_size=1, bias=False)
 29 | 
 30 |     def forward(self, x):
 31 |         out = self.conv(F.relu(self.bn(x)))
 32 |         out = F.avg_pool2d(out, 2)
 33 |         return out
 34 | 
 35 | 
 36 | class DenseNet(nn.Module):
 37 |     def __init__(self, block, nblocks, growth_rate=12, reduction=0.5, num_classes=10):
 38 |         super(DenseNet, self).__init__()
 39 |         self.growth_rate = growth_rate
 40 | 
 41 |         num_planes = 2*growth_rate
 42 |         self.conv1 = nn.Conv2d(3, num_planes, kernel_size=3, padding=1, bias=False)
 43 | 
 44 |         self.dense1 = self._make_dense_layers(block, num_planes, nblocks[0])
 45 |         num_planes += nblocks[0]*growth_rate
 46 |         out_planes = int(math.floor(num_planes*reduction))
 47 |         self.trans1 = Transition(num_planes, out_planes)
 48 |         num_planes = out_planes
 49 | 
 50 |         self.dense2 = self._make_dense_layers(block, num_planes, nblocks[1])
 51 |         num_planes += nblocks[1]*growth_rate
 52 |         out_planes = int(math.floor(num_planes*reduction))
 53 |         self.trans2 = Transition(num_planes, out_planes)
 54 |         num_planes = out_planes
 55 | 
 56 |         self.dense3 = self._make_dense_layers(block, num_planes, nblocks[2])
 57 |         num_planes += nblocks[2]*growth_rate
 58 |         out_planes = int(math.floor(num_planes*reduction))
 59 |         self.trans3 = Transition(num_planes, out_planes)
 60 |         num_planes = out_planes
 61 | 
 62 |         self.dense4 = self._make_dense_layers(block, num_planes, nblocks[3])
 63 |         num_planes += nblocks[3]*growth_rate
 64 | 
 65 |         self.bn = nn.BatchNorm2d(num_planes)
 66 |         self.linear = nn.Linear(num_planes, num_classes)
 67 | 
 68 |     def _make_dense_layers(self, block, in_planes, nblock):
 69 |         layers = []
 70 |         for i in range(nblock):
 71 |             layers.append(block(in_planes, self.growth_rate))
 72 |             in_planes += self.growth_rate
 73 |         return nn.Sequential(*layers)
 74 | 
 75 |     def forward(self, x):
 76 |         out = self.conv1(x)
 77 |         out = self.trans1(self.dense1(out))
 78 |         out = self.trans2(self.dense2(out))
 79 |         out = self.trans3(self.dense3(out))
 80 |         out = self.dense4(out)
 81 |         out = F.avg_pool2d(F.relu(self.bn(out)), 4)
 82 |         out = out.view(out.size(0), -1)
 83 |         out = self.linear(out)
 84 |         return out
 85 | 
 86 | def DenseNet121():
 87 |     return DenseNet(Bottleneck, [6,12,24,16], growth_rate=32)
 88 | 
 89 | def DenseNet169():
 90 |     return DenseNet(Bottleneck, [6,12,32,32], growth_rate=32)
 91 | 
 92 | def DenseNet201():
 93 |     return DenseNet(Bottleneck, [6,12,48,32], growth_rate=32)
 94 | 
 95 | def DenseNet161():
 96 |     return DenseNet(Bottleneck, [6,12,36,24], growth_rate=48)
 97 | 
 98 | def densenet_cifar():
 99 |     return DenseNet(Bottleneck, [6,12,24,16], growth_rate=12)
100 | 
101 | def test():
102 |     net = densenet_cifar()
103 |     x = torch.randn(1,3,32,32)
104 |     y = net(x)
105 |     print(y)
106 | 
107 | # test()
108 | 


--------------------------------------------------------------------------------
/workloads/cifar/models/dpn.py:
--------------------------------------------------------------------------------
 1 | '''Dual Path Networks in PyTorch.'''
 2 | import torch
 3 | import torch.nn as nn
 4 | import torch.nn.functional as F
 5 | 
 6 | 
 7 | class Bottleneck(nn.Module):
 8 |     def __init__(self, last_planes, in_planes, out_planes, dense_depth, stride, first_layer):
 9 |         super(Bottleneck, self).__init__()
10 |         self.out_planes = out_planes
11 |         self.dense_depth = dense_depth
12 | 
13 |         self.conv1 = nn.Conv2d(last_planes, in_planes, kernel_size=1, bias=False)
14 |         self.bn1 = nn.BatchNorm2d(in_planes)
15 |         self.conv2 = nn.Conv2d(in_planes, in_planes, kernel_size=3, stride=stride, padding=1, groups=32, bias=False)
16 |         self.bn2 = nn.BatchNorm2d(in_planes)
17 |         self.conv3 = nn.Conv2d(in_planes, out_planes+dense_depth, kernel_size=1, bias=False)
18 |         self.bn3 = nn.BatchNorm2d(out_planes+dense_depth)
19 | 
20 |         self.shortcut = nn.Sequential()
21 |         if first_layer:
22 |             self.shortcut = nn.Sequential(
23 |                 nn.Conv2d(last_planes, out_planes+dense_depth, kernel_size=1, stride=stride, bias=False),
24 |                 nn.BatchNorm2d(out_planes+dense_depth)
25 |             )
26 | 
27 |     def forward(self, x):
28 |         out = F.relu(self.bn1(self.conv1(x)))
29 |         out = F.relu(self.bn2(self.conv2(out)))
30 |         out = self.bn3(self.conv3(out))
31 |         x = self.shortcut(x)
32 |         d = self.out_planes
33 |         out = torch.cat([x[:,:d,:,:]+out[:,:d,:,:], x[:,d:,:,:], out[:,d:,:,:]], 1)
34 |         out = F.relu(out)
35 |         return out
36 | 
37 | 
38 | class DPN(nn.Module):
39 |     def __init__(self, cfg):
40 |         super(DPN, self).__init__()
41 |         in_planes, out_planes = cfg['in_planes'], cfg['out_planes']
42 |         num_blocks, dense_depth = cfg['num_blocks'], cfg['dense_depth']
43 | 
44 |         self.conv1 = nn.Conv2d(3, 64, kernel_size=3, stride=1, padding=1, bias=False)
45 |         self.bn1 = nn.BatchNorm2d(64)
46 |         self.last_planes = 64
47 |         self.layer1 = self._make_layer(in_planes[0], out_planes[0], num_blocks[0], dense_depth[0], stride=1)
48 |         self.layer2 = self._make_layer(in_planes[1], out_planes[1], num_blocks[1], dense_depth[1], stride=2)
49 |         self.layer3 = self._make_layer(in_planes[2], out_planes[2], num_blocks[2], dense_depth[2], stride=2)
50 |         self.layer4 = self._make_layer(in_planes[3], out_planes[3], num_blocks[3], dense_depth[3], stride=2)
51 |         self.linear = nn.Linear(out_planes[3]+(num_blocks[3]+1)*dense_depth[3], 10)
52 | 
53 |     def _make_layer(self, in_planes, out_planes, num_blocks, dense_depth, stride):
54 |         strides = [stride] + [1]*(num_blocks-1)
55 |         layers = []
56 |         for i,stride in enumerate(strides):
57 |             layers.append(Bottleneck(self.last_planes, in_planes, out_planes, dense_depth, stride, i==0))
58 |             self.last_planes = out_planes + (i+2) * dense_depth
59 |         return nn.Sequential(*layers)
60 | 
61 |     def forward(self, x):
62 |         out = F.relu(self.bn1(self.conv1(x)))
63 |         out = self.layer1(out)
64 |         out = self.layer2(out)
65 |         out = self.layer3(out)
66 |         out = self.layer4(out)
67 |         out = F.avg_pool2d(out, 4)
68 |         out = out.view(out.size(0), -1)
69 |         out = self.linear(out)
70 |         return out
71 | 
72 | 
73 | def DPN26():
74 |     cfg = {
75 |         'in_planes': (96,192,384,768),
76 |         'out_planes': (256,512,1024,2048),
77 |         'num_blocks': (2,2,2,2),
78 |         'dense_depth': (16,32,24,128)
79 |     }
80 |     return DPN(cfg)
81 | 
82 | def DPN92():
83 |     cfg = {
84 |         'in_planes': (96,192,384,768),
85 |         'out_planes': (256,512,1024,2048),
86 |         'num_blocks': (3,4,20,3),
87 |         'dense_depth': (16,32,24,128)
88 |     }
89 |     return DPN(cfg)
90 | 
91 | 
92 | def test():
93 |     net = DPN92()
94 |     x = torch.randn(1,3,32,32)
95 |     y = net(x)
96 |     print(y)
97 | 
98 | # test()
99 | 


--------------------------------------------------------------------------------
/workloads/cifar/models/efficientnet.py:
--------------------------------------------------------------------------------
 1 | '''EfficientNet in PyTorch.
 2 | Paper: "EfficientNet: Rethinking Model Scaling for Convolutional Neural Networks".
 3 | '''
 4 | import torch
 5 | import torch.nn as nn
 6 | import torch.nn.functional as F
 7 | 
 8 | 
 9 | class Block(nn.Module):
10 |     '''expand + depthwise + pointwise + squeeze-excitation'''
11 | 
12 |     def __init__(self, in_planes, out_planes, expansion, stride):
13 |         super(Block, self).__init__()
14 |         self.stride = stride
15 | 
16 |         planes = expansion * in_planes
17 |         self.conv1 = nn.Conv2d(
18 |             in_planes, planes, kernel_size=1, stride=1, padding=0, bias=False)
19 |         self.bn1 = nn.BatchNorm2d(planes)
20 |         self.conv2 = nn.Conv2d(planes, planes, kernel_size=3,
21 |                                stride=stride, padding=1, groups=planes, bias=False)
22 |         self.bn2 = nn.BatchNorm2d(planes)
23 |         self.conv3 = nn.Conv2d(
24 |             planes, out_planes, kernel_size=1, stride=1, padding=0, bias=False)
25 |         self.bn3 = nn.BatchNorm2d(out_planes)
26 | 
27 |         self.shortcut = nn.Sequential()
28 |         if stride == 1 and in_planes != out_planes:
29 |             self.shortcut = nn.Sequential(
30 |                 nn.Conv2d(in_planes, out_planes, kernel_size=1,
31 |                           stride=1, padding=0, bias=False),
32 |                 nn.BatchNorm2d(out_planes),
33 |             )
34 | 
35 |         # SE layers
36 |         self.fc1 = nn.Conv2d(out_planes, out_planes//16, kernel_size=1)
37 |         self.fc2 = nn.Conv2d(out_planes//16, out_planes, kernel_size=1)
38 | 
39 |     def forward(self, x):
40 |         out = F.relu(self.bn1(self.conv1(x)))
41 |         out = F.relu(self.bn2(self.conv2(out)))
42 |         out = self.bn3(self.conv3(out))
43 |         shortcut = self.shortcut(x) if self.stride == 1 else out
44 |         # Squeeze-Excitation
45 |         w = F.avg_pool2d(out, out.size(2))
46 |         w = F.relu(self.fc1(w))
47 |         w = self.fc2(w).sigmoid()
48 |         out = out * w + shortcut
49 |         return out
50 | 
51 | 
52 | class EfficientNet(nn.Module):
53 |     def __init__(self, cfg, num_classes=10):
54 |         super(EfficientNet, self).__init__()
55 |         self.cfg = cfg
56 |         self.conv1 = nn.Conv2d(3, 32, kernel_size=3,
57 |                                stride=1, padding=1, bias=False)
58 |         self.bn1 = nn.BatchNorm2d(32)
59 |         self.layers = self._make_layers(in_planes=32)
60 |         self.linear = nn.Linear(cfg[-1][1], num_classes)
61 | 
62 |     def _make_layers(self, in_planes):
63 |         layers = []
64 |         for expansion, out_planes, num_blocks, stride in self.cfg:
65 |             strides = [stride] + [1]*(num_blocks-1)
66 |             for stride in strides:
67 |                 layers.append(Block(in_planes, out_planes, expansion, stride))
68 |                 in_planes = out_planes
69 |         return nn.Sequential(*layers)
70 | 
71 |     def forward(self, x):
72 |         out = F.relu(self.bn1(self.conv1(x)))
73 |         out = self.layers(out)
74 |         out = out.view(out.size(0), -1)
75 |         out = self.linear(out)
76 |         return out
77 | 
78 | 
79 | def EfficientNetB0():
80 |     # (expansion, out_planes, num_blocks, stride)
81 |     cfg = [(1,  16, 1, 2),
82 |            (6,  24, 2, 1),
83 |            (6,  40, 2, 2),
84 |            (6,  80, 3, 2),
85 |            (6, 112, 3, 1),
86 |            (6, 192, 4, 2),
87 |            (6, 320, 1, 2)]
88 |     return EfficientNet(cfg)
89 | 
90 | 
91 | def test():
92 |     net = EfficientNetB0()
93 |     x = torch.randn(2, 3, 32, 32)
94 |     y = net(x)
95 |     print(y.shape)
96 | 
97 | # test


--------------------------------------------------------------------------------
/workloads/cifar/models/googlenet.py:
--------------------------------------------------------------------------------
  1 | '''GoogLeNet with PyTorch.'''
  2 | import torch
  3 | import torch.nn as nn
  4 | import torch.nn.functional as F
  5 | 
  6 | 
  7 | class Inception(nn.Module):
  8 |     def __init__(self, in_planes, n1x1, n3x3red, n3x3, n5x5red, n5x5, pool_planes):
  9 |         super(Inception, self).__init__()
 10 |         # 1x1 conv branch
 11 |         self.b1 = nn.Sequential(
 12 |             nn.Conv2d(in_planes, n1x1, kernel_size=1),
 13 |             nn.BatchNorm2d(n1x1),
 14 |             nn.ReLU(True),
 15 |         )
 16 | 
 17 |         # 1x1 conv -> 3x3 conv branch
 18 |         self.b2 = nn.Sequential(
 19 |             nn.Conv2d(in_planes, n3x3red, kernel_size=1),
 20 |             nn.BatchNorm2d(n3x3red),
 21 |             nn.ReLU(True),
 22 |             nn.Conv2d(n3x3red, n3x3, kernel_size=3, padding=1),
 23 |             nn.BatchNorm2d(n3x3),
 24 |             nn.ReLU(True),
 25 |         )
 26 | 
 27 |         # 1x1 conv -> 5x5 conv branch
 28 |         self.b3 = nn.Sequential(
 29 |             nn.Conv2d(in_planes, n5x5red, kernel_size=1),
 30 |             nn.BatchNorm2d(n5x5red),
 31 |             nn.ReLU(True),
 32 |             nn.Conv2d(n5x5red, n5x5, kernel_size=3, padding=1),
 33 |             nn.BatchNorm2d(n5x5),
 34 |             nn.ReLU(True),
 35 |             nn.Conv2d(n5x5, n5x5, kernel_size=3, padding=1),
 36 |             nn.BatchNorm2d(n5x5),
 37 |             nn.ReLU(True),
 38 |         )
 39 | 
 40 |         # 3x3 pool -> 1x1 conv branch
 41 |         self.b4 = nn.Sequential(
 42 |             nn.MaxPool2d(3, stride=1, padding=1),
 43 |             nn.Conv2d(in_planes, pool_planes, kernel_size=1),
 44 |             nn.BatchNorm2d(pool_planes),
 45 |             nn.ReLU(True),
 46 |         )
 47 | 
 48 |     def forward(self, x):
 49 |         y1 = self.b1(x)
 50 |         y2 = self.b2(x)
 51 |         y3 = self.b3(x)
 52 |         y4 = self.b4(x)
 53 |         return torch.cat([y1,y2,y3,y4], 1)
 54 | 
 55 | 
 56 | class GoogLeNet(nn.Module):
 57 |     def __init__(self):
 58 |         super(GoogLeNet, self).__init__()
 59 |         self.pre_layers = nn.Sequential(
 60 |             nn.Conv2d(3, 192, kernel_size=3, padding=1),
 61 |             nn.BatchNorm2d(192),
 62 |             nn.ReLU(True),
 63 |         )
 64 | 
 65 |         self.a3 = Inception(192,  64,  96, 128, 16, 32, 32)
 66 |         self.b3 = Inception(256, 128, 128, 192, 32, 96, 64)
 67 | 
 68 |         self.maxpool = nn.MaxPool2d(3, stride=2, padding=1)
 69 | 
 70 |         self.a4 = Inception(480, 192,  96, 208, 16,  48,  64)
 71 |         self.b4 = Inception(512, 160, 112, 224, 24,  64,  64)
 72 |         self.c4 = Inception(512, 128, 128, 256, 24,  64,  64)
 73 |         self.d4 = Inception(512, 112, 144, 288, 32,  64,  64)
 74 |         self.e4 = Inception(528, 256, 160, 320, 32, 128, 128)
 75 | 
 76 |         self.a5 = Inception(832, 256, 160, 320, 32, 128, 128)
 77 |         self.b5 = Inception(832, 384, 192, 384, 48, 128, 128)
 78 | 
 79 |         self.avgpool = nn.AvgPool2d(8, stride=1)
 80 |         self.linear = nn.Linear(1024, 10)
 81 | 
 82 |     def forward(self, x):
 83 |         out = self.pre_layers(x)
 84 |         out = self.a3(out)
 85 |         out = self.b3(out)
 86 |         out = self.maxpool(out)
 87 |         out = self.a4(out)
 88 |         out = self.b4(out)
 89 |         out = self.c4(out)
 90 |         out = self.d4(out)
 91 |         out = self.e4(out)
 92 |         out = self.maxpool(out)
 93 |         out = self.a5(out)
 94 |         out = self.b5(out)
 95 |         out = self.avgpool(out)
 96 |         out = out.view(out.size(0), -1)
 97 |         out = self.linear(out)
 98 |         return out
 99 | 
100 | 
101 | def test():
102 |     net = GoogLeNet()
103 |     x = torch.randn(1,3,32,32)
104 |     y = net(x)
105 |     print(y.size())
106 | 
107 | # test()
108 | 


--------------------------------------------------------------------------------
/workloads/cifar/models/lenet.py:
--------------------------------------------------------------------------------
 1 | '''LeNet in PyTorch.'''
 2 | import torch.nn as nn
 3 | import torch.nn.functional as F
 4 | 
 5 | class LeNet(nn.Module):
 6 |     def __init__(self):
 7 |         super(LeNet, self).__init__()
 8 |         self.conv1 = nn.Conv2d(3, 6, 5)
 9 |         self.conv2 = nn.Conv2d(6, 16, 5)
10 |         self.fc1   = nn.Linear(16*5*5, 120)
11 |         self.fc2   = nn.Linear(120, 84)
12 |         self.fc3   = nn.Linear(84, 10)
13 | 
14 |     def forward(self, x):
15 |         out = F.relu(self.conv1(x))
16 |         out = F.max_pool2d(out, 2)
17 |         out = F.relu(self.conv2(out))
18 |         out = F.max_pool2d(out, 2)
19 |         out = out.view(out.size(0), -1)
20 |         out = F.relu(self.fc1(out))
21 |         out = F.relu(self.fc2(out))
22 |         out = self.fc3(out)
23 |         return out
24 | 


--------------------------------------------------------------------------------
/workloads/cifar/models/mobilenet.py:
--------------------------------------------------------------------------------
 1 | '''MobileNet in PyTorch.
 2 | 
 3 | See the paper "MobileNets: Efficient Convolutional Neural Networks for Mobile Vision Applications"
 4 | for more details.
 5 | '''
 6 | import torch
 7 | import torch.nn as nn
 8 | import torch.nn.functional as F
 9 | 
10 | 
11 | class Block(nn.Module):
12 |     '''Depthwise conv + Pointwise conv'''
13 |     def __init__(self, in_planes, out_planes, stride=1):
14 |         super(Block, self).__init__()
15 |         self.conv1 = nn.Conv2d(in_planes, in_planes, kernel_size=3, stride=stride, padding=1, groups=in_planes, bias=False)
16 |         self.bn1 = nn.BatchNorm2d(in_planes)
17 |         self.conv2 = nn.Conv2d(in_planes, out_planes, kernel_size=1, stride=1, padding=0, bias=False)
18 |         self.bn2 = nn.BatchNorm2d(out_planes)
19 | 
20 |     def forward(self, x):
21 |         out = F.relu(self.bn1(self.conv1(x)))
22 |         out = F.relu(self.bn2(self.conv2(out)))
23 |         return out
24 | 
25 | 
26 | class MobileNet(nn.Module):
27 |     # (128,2) means conv planes=128, conv stride=2, by default conv stride=1
28 |     cfg = [64, (128,2), 128, (256,2), 256, (512,2), 512, 512, 512, 512, 512, (1024,2), 1024]
29 | 
30 |     def __init__(self, num_classes=10):
31 |         super(MobileNet, self).__init__()
32 |         self.conv1 = nn.Conv2d(3, 32, kernel_size=3, stride=1, padding=1, bias=False)
33 |         self.bn1 = nn.BatchNorm2d(32)
34 |         self.layers = self._make_layers(in_planes=32)
35 |         self.linear = nn.Linear(1024, num_classes)
36 | 
37 |     def _make_layers(self, in_planes):
38 |         layers = []
39 |         for x in self.cfg:
40 |             out_planes = x if isinstance(x, int) else x[0]
41 |             stride = 1 if isinstance(x, int) else x[1]
42 |             layers.append(Block(in_planes, out_planes, stride))
43 |             in_planes = out_planes
44 |         return nn.Sequential(*layers)
45 | 
46 |     def forward(self, x):
47 |         out = F.relu(self.bn1(self.conv1(x)))
48 |         out = self.layers(out)
49 |         out = F.avg_pool2d(out, 2)
50 |         out = out.view(out.size(0), -1)
51 |         out = self.linear(out)
52 |         return out
53 | 
54 | 
55 | def test():
56 |     net = MobileNet()
57 |     x = torch.randn(1,3,32,32)
58 |     y = net(x)
59 |     print(y.size())
60 | 
61 | # test()
62 | 


--------------------------------------------------------------------------------
/workloads/cifar/models/mobilenetv2.py:
--------------------------------------------------------------------------------
 1 | '''MobileNetV2 in PyTorch.
 2 | 
 3 | See the paper "Inverted Residuals and Linear Bottlenecks:
 4 | Mobile Networks for Classification, Detection and Segmentation" for more details.
 5 | '''
 6 | import torch
 7 | import torch.nn as nn
 8 | import torch.nn.functional as F
 9 | 
10 | 
11 | class Block(nn.Module):
12 |     '''expand + depthwise + pointwise'''
13 |     def __init__(self, in_planes, out_planes, expansion, stride):
14 |         super(Block, self).__init__()
15 |         self.stride = stride
16 | 
17 |         planes = expansion * in_planes
18 |         self.conv1 = nn.Conv2d(in_planes, planes, kernel_size=1, stride=1, padding=0, bias=False)
19 |         self.bn1 = nn.BatchNorm2d(planes)
20 |         self.conv2 = nn.Conv2d(planes, planes, kernel_size=3, stride=stride, padding=1, groups=planes, bias=False)
21 |         self.bn2 = nn.BatchNorm2d(planes)
22 |         self.conv3 = nn.Conv2d(planes, out_planes, kernel_size=1, stride=1, padding=0, bias=False)
23 |         self.bn3 = nn.BatchNorm2d(out_planes)
24 | 
25 |         self.shortcut = nn.Sequential()
26 |         if stride == 1 and in_planes != out_planes:
27 |             self.shortcut = nn.Sequential(
28 |                 nn.Conv2d(in_planes, out_planes, kernel_size=1, stride=1, padding=0, bias=False),
29 |                 nn.BatchNorm2d(out_planes),
30 |             )
31 | 
32 |     def forward(self, x):
33 |         out = F.relu(self.bn1(self.conv1(x)))
34 |         out = F.relu(self.bn2(self.conv2(out)))
35 |         out = self.bn3(self.conv3(out))
36 |         out = out + self.shortcut(x) if self.stride==1 else out
37 |         return out
38 | 
39 | 
40 | class MobileNetV2(nn.Module):
41 |     # (expansion, out_planes, num_blocks, stride)
42 |     cfg = [(1,  16, 1, 1),
43 |            (6,  24, 2, 1),  # NOTE: change stride 2 -> 1 for CIFAR10
44 |            (6,  32, 3, 2),
45 |            (6,  64, 4, 2),
46 |            (6,  96, 3, 1),
47 |            (6, 160, 3, 2),
48 |            (6, 320, 1, 1)]
49 | 
50 |     def __init__(self, num_classes=10):
51 |         super(MobileNetV2, self).__init__()
52 |         # NOTE: change conv1 stride 2 -> 1 for CIFAR10
53 |         self.conv1 = nn.Conv2d(3, 32, kernel_size=3, stride=1, padding=1, bias=False)
54 |         self.bn1 = nn.BatchNorm2d(32)
55 |         self.layers = self._make_layers(in_planes=32)
56 |         self.conv2 = nn.Conv2d(320, 1280, kernel_size=1, stride=1, padding=0, bias=False)
57 |         self.bn2 = nn.BatchNorm2d(1280)
58 |         self.linear = nn.Linear(1280, num_classes)
59 | 
60 |     def _make_layers(self, in_planes):
61 |         layers = []
62 |         for expansion, out_planes, num_blocks, stride in self.cfg:
63 |             strides = [stride] + [1]*(num_blocks-1)
64 |             for stride in strides:
65 |                 layers.append(Block(in_planes, out_planes, expansion, stride))
66 |                 in_planes = out_planes
67 |         return nn.Sequential(*layers)
68 | 
69 |     def forward(self, x):
70 |         out = F.relu(self.bn1(self.conv1(x)))
71 |         out = self.layers(out)
72 |         out = F.relu(self.bn2(self.conv2(out)))
73 |         # NOTE: change pooling kernel_size 7 -> 4 for CIFAR10
74 |         out = F.avg_pool2d(out, 4)
75 |         out = out.view(out.size(0), -1)
76 |         out = self.linear(out)
77 |         return out
78 | 
79 | 
80 | def test():
81 |     net = MobileNetV2()
82 |     x = torch.randn(2,3,32,32)
83 |     y = net(x)
84 |     print(y.size())
85 | 
86 | # test()
87 | 


--------------------------------------------------------------------------------
/workloads/cifar/models/pnasnet.py:
--------------------------------------------------------------------------------
  1 | '''PNASNet in PyTorch.
  2 | 
  3 | Paper: Progressive Neural Architecture Search
  4 | '''
  5 | import torch
  6 | import torch.nn as nn
  7 | import torch.nn.functional as F
  8 | 
  9 | 
 10 | class SepConv(nn.Module):
 11 |     '''Separable Convolution.'''
 12 |     def __init__(self, in_planes, out_planes, kernel_size, stride):
 13 |         super(SepConv, self).__init__()
 14 |         self.conv1 = nn.Conv2d(in_planes, out_planes,
 15 |                                kernel_size, stride,
 16 |                                padding=(kernel_size-1)//2,
 17 |                                bias=False, groups=in_planes)
 18 |         self.bn1 = nn.BatchNorm2d(out_planes)
 19 | 
 20 |     def forward(self, x):
 21 |         return self.bn1(self.conv1(x))
 22 | 
 23 | 
 24 | class CellA(nn.Module):
 25 |     def __init__(self, in_planes, out_planes, stride=1):
 26 |         super(CellA, self).__init__()
 27 |         self.stride = stride
 28 |         self.sep_conv1 = SepConv(in_planes, out_planes, kernel_size=7, stride=stride)
 29 |         if stride==2:
 30 |             self.conv1 = nn.Conv2d(in_planes, out_planes, kernel_size=1, stride=1, padding=0, bias=False)
 31 |             self.bn1 = nn.BatchNorm2d(out_planes)
 32 | 
 33 |     def forward(self, x):
 34 |         y1 = self.sep_conv1(x)
 35 |         y2 = F.max_pool2d(x, kernel_size=3, stride=self.stride, padding=1)
 36 |         if self.stride==2:
 37 |             y2 = self.bn1(self.conv1(y2))
 38 |         return F.relu(y1+y2)
 39 | 
 40 | class CellB(nn.Module):
 41 |     def __init__(self, in_planes, out_planes, stride=1):
 42 |         super(CellB, self).__init__()
 43 |         self.stride = stride
 44 |         # Left branch
 45 |         self.sep_conv1 = SepConv(in_planes, out_planes, kernel_size=7, stride=stride)
 46 |         self.sep_conv2 = SepConv(in_planes, out_planes, kernel_size=3, stride=stride)
 47 |         # Right branch
 48 |         self.sep_conv3 = SepConv(in_planes, out_planes, kernel_size=5, stride=stride)
 49 |         if stride==2:
 50 |             self.conv1 = nn.Conv2d(in_planes, out_planes, kernel_size=1, stride=1, padding=0, bias=False)
 51 |             self.bn1 = nn.BatchNorm2d(out_planes)
 52 |         # Reduce channels
 53 |         self.conv2 = nn.Conv2d(2*out_planes, out_planes, kernel_size=1, stride=1, padding=0, bias=False)
 54 |         self.bn2 = nn.BatchNorm2d(out_planes)
 55 | 
 56 |     def forward(self, x):
 57 |         # Left branch
 58 |         y1 = self.sep_conv1(x)
 59 |         y2 = self.sep_conv2(x)
 60 |         # Right branch
 61 |         y3 = F.max_pool2d(x, kernel_size=3, stride=self.stride, padding=1)
 62 |         if self.stride==2:
 63 |             y3 = self.bn1(self.conv1(y3))
 64 |         y4 = self.sep_conv3(x)
 65 |         # Concat & reduce channels
 66 |         b1 = F.relu(y1+y2)
 67 |         b2 = F.relu(y3+y4)
 68 |         y = torch.cat([b1,b2], 1)
 69 |         return F.relu(self.bn2(self.conv2(y)))
 70 | 
 71 | class PNASNet(nn.Module):
 72 |     def __init__(self, cell_type, num_cells, num_planes):
 73 |         super(PNASNet, self).__init__()
 74 |         self.in_planes = num_planes
 75 |         self.cell_type = cell_type
 76 | 
 77 |         self.conv1 = nn.Conv2d(3, num_planes, kernel_size=3, stride=1, padding=1, bias=False)
 78 |         self.bn1 = nn.BatchNorm2d(num_planes)
 79 | 
 80 |         self.layer1 = self._make_layer(num_planes, num_cells=6)
 81 |         self.layer2 = self._downsample(num_planes*2)
 82 |         self.layer3 = self._make_layer(num_planes*2, num_cells=6)
 83 |         self.layer4 = self._downsample(num_planes*4)
 84 |         self.layer5 = self._make_layer(num_planes*4, num_cells=6)
 85 | 
 86 |         self.linear = nn.Linear(num_planes*4, 10)
 87 | 
 88 |     def _make_layer(self, planes, num_cells):
 89 |         layers = []
 90 |         for _ in range(num_cells):
 91 |             layers.append(self.cell_type(self.in_planes, planes, stride=1))
 92 |             self.in_planes = planes
 93 |         return nn.Sequential(*layers)
 94 | 
 95 |     def _downsample(self, planes):
 96 |         layer = self.cell_type(self.in_planes, planes, stride=2)
 97 |         self.in_planes = planes
 98 |         return layer
 99 | 
100 |     def forward(self, x):
101 |         out = F.relu(self.bn1(self.conv1(x)))
102 |         out = self.layer1(out)
103 |         out = self.layer2(out)
104 |         out = self.layer3(out)
105 |         out = self.layer4(out)
106 |         out = self.layer5(out)
107 |         out = F.avg_pool2d(out, 8)
108 |         out = self.linear(out.view(out.size(0), -1))
109 |         return out
110 | 
111 | 
112 | def PNASNetA():
113 |     return PNASNet(CellA, num_cells=6, num_planes=44)
114 | 
115 | def PNASNetB():
116 |     return PNASNet(CellB, num_cells=6, num_planes=32)
117 | 
118 | 
119 | def test():
120 |     net = PNASNetB()
121 |     x = torch.randn(1,3,32,32)
122 |     y = net(x)
123 |     print(y)
124 | 
125 | # test()
126 | 


--------------------------------------------------------------------------------
/workloads/cifar/models/preact_resnet.py:
--------------------------------------------------------------------------------
  1 | '''Pre-activation ResNet in PyTorch.
  2 | 
  3 | Reference:
  4 | [1] Kaiming He, Xiangyu Zhang, Shaoqing Ren, Jian Sun
  5 |     Identity Mappings in Deep Residual Networks. arXiv:1603.05027
  6 | '''
  7 | import torch
  8 | import torch.nn as nn
  9 | import torch.nn.functional as F
 10 | 
 11 | 
 12 | class PreActBlock(nn.Module):
 13 |     '''Pre-activation version of the BasicBlock.'''
 14 |     expansion = 1
 15 | 
 16 |     def __init__(self, in_planes, planes, stride=1):
 17 |         super(PreActBlock, self).__init__()
 18 |         self.bn1 = nn.BatchNorm2d(in_planes)
 19 |         self.conv1 = nn.Conv2d(in_planes, planes, kernel_size=3, stride=stride, padding=1, bias=False)
 20 |         self.bn2 = nn.BatchNorm2d(planes)
 21 |         self.conv2 = nn.Conv2d(planes, planes, kernel_size=3, stride=1, padding=1, bias=False)
 22 | 
 23 |         if stride != 1 or in_planes != self.expansion*planes:
 24 |             self.shortcut = nn.Sequential(
 25 |                 nn.Conv2d(in_planes, self.expansion*planes, kernel_size=1, stride=stride, bias=False)
 26 |             )
 27 | 
 28 |     def forward(self, x):
 29 |         out = F.relu(self.bn1(x))
 30 |         shortcut = self.shortcut(out) if hasattr(self, 'shortcut') else x
 31 |         out = self.conv1(out)
 32 |         out = self.conv2(F.relu(self.bn2(out)))
 33 |         out += shortcut
 34 |         return out
 35 | 
 36 | 
 37 | class PreActBottleneck(nn.Module):
 38 |     '''Pre-activation version of the original Bottleneck module.'''
 39 |     expansion = 4
 40 | 
 41 |     def __init__(self, in_planes, planes, stride=1):
 42 |         super(PreActBottleneck, self).__init__()
 43 |         self.bn1 = nn.BatchNorm2d(in_planes)
 44 |         self.conv1 = nn.Conv2d(in_planes, planes, kernel_size=1, bias=False)
 45 |         self.bn2 = nn.BatchNorm2d(planes)
 46 |         self.conv2 = nn.Conv2d(planes, planes, kernel_size=3, stride=stride, padding=1, bias=False)
 47 |         self.bn3 = nn.BatchNorm2d(planes)
 48 |         self.conv3 = nn.Conv2d(planes, self.expansion*planes, kernel_size=1, bias=False)
 49 | 
 50 |         if stride != 1 or in_planes != self.expansion*planes:
 51 |             self.shortcut = nn.Sequential(
 52 |                 nn.Conv2d(in_planes, self.expansion*planes, kernel_size=1, stride=stride, bias=False)
 53 |             )
 54 | 
 55 |     def forward(self, x):
 56 |         out = F.relu(self.bn1(x))
 57 |         shortcut = self.shortcut(out) if hasattr(self, 'shortcut') else x
 58 |         out = self.conv1(out)
 59 |         out = self.conv2(F.relu(self.bn2(out)))
 60 |         out = self.conv3(F.relu(self.bn3(out)))
 61 |         out += shortcut
 62 |         return out
 63 | 
 64 | 
 65 | class PreActResNet(nn.Module):
 66 |     def __init__(self, block, num_blocks, num_classes=10):
 67 |         super(PreActResNet, self).__init__()
 68 |         self.in_planes = 64
 69 | 
 70 |         self.conv1 = nn.Conv2d(3, 64, kernel_size=3, stride=1, padding=1, bias=False)
 71 |         self.layer1 = self._make_layer(block, 64, num_blocks[0], stride=1)
 72 |         self.layer2 = self._make_layer(block, 128, num_blocks[1], stride=2)
 73 |         self.layer3 = self._make_layer(block, 256, num_blocks[2], stride=2)
 74 |         self.layer4 = self._make_layer(block, 512, num_blocks[3], stride=2)
 75 |         self.linear = nn.Linear(512*block.expansion, num_classes)
 76 | 
 77 |     def _make_layer(self, block, planes, num_blocks, stride):
 78 |         strides = [stride] + [1]*(num_blocks-1)
 79 |         layers = []
 80 |         for stride in strides:
 81 |             layers.append(block(self.in_planes, planes, stride))
 82 |             self.in_planes = planes * block.expansion
 83 |         return nn.Sequential(*layers)
 84 | 
 85 |     def forward(self, x):
 86 |         out = self.conv1(x)
 87 |         out = self.layer1(out)
 88 |         out = self.layer2(out)
 89 |         out = self.layer3(out)
 90 |         out = self.layer4(out)
 91 |         out = F.avg_pool2d(out, 4)
 92 |         out = out.view(out.size(0), -1)
 93 |         out = self.linear(out)
 94 |         return out
 95 | 
 96 | 
 97 | def PreActResNet18():
 98 |     return PreActResNet(PreActBlock, [2,2,2,2])
 99 | 
100 | def PreActResNet34():
101 |     return PreActResNet(PreActBlock, [3,4,6,3])
102 | 
103 | def PreActResNet50():
104 |     return PreActResNet(PreActBottleneck, [3,4,6,3])
105 | 
106 | def PreActResNet101():
107 |     return PreActResNet(PreActBottleneck, [3,4,23,3])
108 | 
109 | def PreActResNet152():
110 |     return PreActResNet(PreActBottleneck, [3,8,36,3])
111 | 
112 | 
113 | def test():
114 |     net = PreActResNet18()
115 |     y = net((torch.randn(1,3,32,32)))
116 |     print(y.size())
117 | 
118 | # test()
119 | 


--------------------------------------------------------------------------------
/workloads/cifar/models/resnet.py:
--------------------------------------------------------------------------------
  1 | '''ResNet in PyTorch.
  2 | 
  3 | For Pre-activation ResNet, see 'preact_resnet.py'.
  4 | 
  5 | Reference:
  6 | [1] Kaiming He, Xiangyu Zhang, Shaoqing Ren, Jian Sun
  7 |     Deep Residual Learning for Image Recognition. arXiv:1512.03385
  8 | '''
  9 | import torch
 10 | import torch.nn as nn
 11 | import torch.nn.functional as F
 12 | 
 13 | 
 14 | class BasicBlock(nn.Module):
 15 |     expansion = 1
 16 | 
 17 |     def __init__(self, in_planes, planes, stride=1):
 18 |         super(BasicBlock, self).__init__()
 19 |         self.conv1 = nn.Conv2d(in_planes, planes, kernel_size=3, stride=stride, padding=1, bias=False)
 20 |         self.bn1 = nn.BatchNorm2d(planes)
 21 |         self.conv2 = nn.Conv2d(planes, planes, kernel_size=3, stride=1, padding=1, bias=False)
 22 |         self.bn2 = nn.BatchNorm2d(planes)
 23 | 
 24 |         self.shortcut = nn.Sequential()
 25 |         if stride != 1 or in_planes != self.expansion*planes:
 26 |             self.shortcut = nn.Sequential(
 27 |                 nn.Conv2d(in_planes, self.expansion*planes, kernel_size=1, stride=stride, bias=False),
 28 |                 nn.BatchNorm2d(self.expansion*planes)
 29 |             )
 30 | 
 31 |     def forward(self, x):
 32 |         out = F.relu(self.bn1(self.conv1(x)))
 33 |         out = self.bn2(self.conv2(out))
 34 |         out += self.shortcut(x)
 35 |         out = F.relu(out)
 36 |         return out
 37 | 
 38 | 
 39 | class Bottleneck(nn.Module):
 40 |     expansion = 4
 41 | 
 42 |     def __init__(self, in_planes, planes, stride=1):
 43 |         super(Bottleneck, self).__init__()
 44 |         self.conv1 = nn.Conv2d(in_planes, planes, kernel_size=1, bias=False)
 45 |         self.bn1 = nn.BatchNorm2d(planes)
 46 |         self.conv2 = nn.Conv2d(planes, planes, kernel_size=3, stride=stride, padding=1, bias=False)
 47 |         self.bn2 = nn.BatchNorm2d(planes)
 48 |         self.conv3 = nn.Conv2d(planes, self.expansion*planes, kernel_size=1, bias=False)
 49 |         self.bn3 = nn.BatchNorm2d(self.expansion*planes)
 50 | 
 51 |         self.shortcut = nn.Sequential()
 52 |         if stride != 1 or in_planes != self.expansion*planes:
 53 |             self.shortcut = nn.Sequential(
 54 |                 nn.Conv2d(in_planes, self.expansion*planes, kernel_size=1, stride=stride, bias=False),
 55 |                 nn.BatchNorm2d(self.expansion*planes)
 56 |             )
 57 | 
 58 |     def forward(self, x):
 59 |         out = F.relu(self.bn1(self.conv1(x)))
 60 |         out = F.relu(self.bn2(self.conv2(out)))
 61 |         out = self.bn3(self.conv3(out))
 62 |         out += self.shortcut(x)
 63 |         out = F.relu(out)
 64 |         return out
 65 | 
 66 | 
 67 | class ResNet(nn.Module):
 68 |     def __init__(self, block, num_blocks, num_classes=10):
 69 |         super(ResNet, self).__init__()
 70 |         self.in_planes = 64
 71 | 
 72 |         self.conv1 = nn.Conv2d(3, 64, kernel_size=3, stride=1, padding=1, bias=False)
 73 |         self.bn1 = nn.BatchNorm2d(64)
 74 |         self.layer1 = self._make_layer(block, 64, num_blocks[0], stride=1)
 75 |         self.layer2 = self._make_layer(block, 128, num_blocks[1], stride=2)
 76 |         self.layer3 = self._make_layer(block, 256, num_blocks[2], stride=2)
 77 |         self.layer4 = self._make_layer(block, 512, num_blocks[3], stride=2)
 78 |         self.linear = nn.Linear(512*block.expansion, num_classes)
 79 | 
 80 |     def _make_layer(self, block, planes, num_blocks, stride):
 81 |         strides = [stride] + [1]*(num_blocks-1)
 82 |         layers = []
 83 |         for stride in strides:
 84 |             layers.append(block(self.in_planes, planes, stride))
 85 |             self.in_planes = planes * block.expansion
 86 |         return nn.Sequential(*layers)
 87 | 
 88 |     def forward(self, x):
 89 |         out = F.relu(self.bn1(self.conv1(x)))
 90 |         out = self.layer1(out)
 91 |         out = self.layer2(out)
 92 |         out = self.layer3(out)
 93 |         out = self.layer4(out)
 94 |         out = F.avg_pool2d(out, 4)
 95 |         out = out.view(out.size(0), -1)
 96 |         out = self.linear(out)
 97 |         return out
 98 | 
 99 | 
100 | def ResNet18():
101 |     return ResNet(BasicBlock, [2,2,2,2])
102 | 
103 | def ResNet34():
104 |     return ResNet(BasicBlock, [3,4,6,3])
105 | 
106 | def ResNet50():
107 |     return ResNet(Bottleneck, [3,4,6,3])
108 | 
109 | def ResNet101():
110 |     return ResNet(Bottleneck, [3,4,23,3])
111 | 
112 | def ResNet152():
113 |     return ResNet(Bottleneck, [3,8,36,3])
114 | 
115 | 
116 | def test():
117 |     net = ResNet18()
118 |     y = net(torch.randn(1,3,32,32))
119 |     print(y.size())
120 | 
121 | # test()
122 | 


--------------------------------------------------------------------------------
/workloads/cifar/models/resnext.py:
--------------------------------------------------------------------------------
 1 | '''ResNeXt in PyTorch.
 2 | 
 3 | See the paper "Aggregated Residual Transformations for Deep Neural Networks" for more details.
 4 | '''
 5 | import torch
 6 | import torch.nn as nn
 7 | import torch.nn.functional as F
 8 | 
 9 | 
10 | class Block(nn.Module):
11 |     '''Grouped convolution block.'''
12 |     expansion = 2
13 | 
14 |     def __init__(self, in_planes, cardinality=32, bottleneck_width=4, stride=1):
15 |         super(Block, self).__init__()
16 |         group_width = cardinality * bottleneck_width
17 |         self.conv1 = nn.Conv2d(in_planes, group_width, kernel_size=1, bias=False)
18 |         self.bn1 = nn.BatchNorm2d(group_width)
19 |         self.conv2 = nn.Conv2d(group_width, group_width, kernel_size=3, stride=stride, padding=1, groups=cardinality, bias=False)
20 |         self.bn2 = nn.BatchNorm2d(group_width)
21 |         self.conv3 = nn.Conv2d(group_width, self.expansion*group_width, kernel_size=1, bias=False)
22 |         self.bn3 = nn.BatchNorm2d(self.expansion*group_width)
23 | 
24 |         self.shortcut = nn.Sequential()
25 |         if stride != 1 or in_planes != self.expansion*group_width:
26 |             self.shortcut = nn.Sequential(
27 |                 nn.Conv2d(in_planes, self.expansion*group_width, kernel_size=1, stride=stride, bias=False),
28 |                 nn.BatchNorm2d(self.expansion*group_width)
29 |             )
30 | 
31 |     def forward(self, x):
32 |         out = F.relu(self.bn1(self.conv1(x)))
33 |         out = F.relu(self.bn2(self.conv2(out)))
34 |         out = self.bn3(self.conv3(out))
35 |         out += self.shortcut(x)
36 |         out = F.relu(out)
37 |         return out
38 | 
39 | 
40 | class ResNeXt(nn.Module):
41 |     def __init__(self, num_blocks, cardinality, bottleneck_width, num_classes=10):
42 |         super(ResNeXt, self).__init__()
43 |         self.cardinality = cardinality
44 |         self.bottleneck_width = bottleneck_width
45 |         self.in_planes = 64
46 | 
47 |         self.conv1 = nn.Conv2d(3, 64, kernel_size=1, bias=False)
48 |         self.bn1 = nn.BatchNorm2d(64)
49 |         self.layer1 = self._make_layer(num_blocks[0], 1)
50 |         self.layer2 = self._make_layer(num_blocks[1], 2)
51 |         self.layer3 = self._make_layer(num_blocks[2], 2)
52 |         # self.layer4 = self._make_layer(num_blocks[3], 2)
53 |         self.linear = nn.Linear(cardinality*bottleneck_width*8, num_classes)
54 | 
55 |     def _make_layer(self, num_blocks, stride):
56 |         strides = [stride] + [1]*(num_blocks-1)
57 |         layers = []
58 |         for stride in strides:
59 |             layers.append(Block(self.in_planes, self.cardinality, self.bottleneck_width, stride))
60 |             self.in_planes = Block.expansion * self.cardinality * self.bottleneck_width
61 |         # Increase bottleneck_width by 2 after each stage.
62 |         self.bottleneck_width *= 2
63 |         return nn.Sequential(*layers)
64 | 
65 |     def forward(self, x):
66 |         out = F.relu(self.bn1(self.conv1(x)))
67 |         out = self.layer1(out)
68 |         out = self.layer2(out)
69 |         out = self.layer3(out)
70 |         # out = self.layer4(out)
71 |         out = F.avg_pool2d(out, 8)
72 |         out = out.view(out.size(0), -1)
73 |         out = self.linear(out)
74 |         return out
75 | 
76 | 
77 | def ResNeXt29_2x64d():
78 |     return ResNeXt(num_blocks=[3,3,3], cardinality=2, bottleneck_width=64)
79 | 
80 | def ResNeXt29_4x64d():
81 |     return ResNeXt(num_blocks=[3,3,3], cardinality=4, bottleneck_width=64)
82 | 
83 | def ResNeXt29_8x64d():
84 |     return ResNeXt(num_blocks=[3,3,3], cardinality=8, bottleneck_width=64)
85 | 
86 | def ResNeXt29_32x4d():
87 |     return ResNeXt(num_blocks=[3,3,3], cardinality=32, bottleneck_width=4)
88 | 
89 | def test_resnext():
90 |     net = ResNeXt29_2x64d()
91 |     x = torch.randn(1,3,32,32)
92 |     y = net(x)
93 |     print(y.size())
94 | 
95 | # test_resnext()
96 | 


--------------------------------------------------------------------------------
/workloads/cifar/models/senet.py:
--------------------------------------------------------------------------------
  1 | '''SENet in PyTorch.
  2 | 
  3 | SENet is the winner of ImageNet-2017. The paper is not released yet.
  4 | '''
  5 | import torch
  6 | import torch.nn as nn
  7 | import torch.nn.functional as F
  8 | 
  9 | 
 10 | class BasicBlock(nn.Module):
 11 |     def __init__(self, in_planes, planes, stride=1):
 12 |         super(BasicBlock, self).__init__()
 13 |         self.conv1 = nn.Conv2d(in_planes, planes, kernel_size=3, stride=stride, padding=1, bias=False)
 14 |         self.bn1 = nn.BatchNorm2d(planes)
 15 |         self.conv2 = nn.Conv2d(planes, planes, kernel_size=3, stride=1, padding=1, bias=False)
 16 |         self.bn2 = nn.BatchNorm2d(planes)
 17 | 
 18 |         self.shortcut = nn.Sequential()
 19 |         if stride != 1 or in_planes != planes:
 20 |             self.shortcut = nn.Sequential(
 21 |                 nn.Conv2d(in_planes, planes, kernel_size=1, stride=stride, bias=False),
 22 |                 nn.BatchNorm2d(planes)
 23 |             )
 24 | 
 25 |         # SE layers
 26 |         self.fc1 = nn.Conv2d(planes, planes//16, kernel_size=1)  # Use nn.Conv2d instead of nn.Linear
 27 |         self.fc2 = nn.Conv2d(planes//16, planes, kernel_size=1)
 28 | 
 29 |     def forward(self, x):
 30 |         out = F.relu(self.bn1(self.conv1(x)))
 31 |         out = self.bn2(self.conv2(out))
 32 | 
 33 |         # Squeeze
 34 |         w = F.avg_pool2d(out, out.size(2))
 35 |         w = F.relu(self.fc1(w))
 36 |         w = F.sigmoid(self.fc2(w))
 37 |         # Excitation
 38 |         out = out * w  # New broadcasting feature from v0.2!
 39 | 
 40 |         out += self.shortcut(x)
 41 |         out = F.relu(out)
 42 |         return out
 43 | 
 44 | 
 45 | class PreActBlock(nn.Module):
 46 |     def __init__(self, in_planes, planes, stride=1):
 47 |         super(PreActBlock, self).__init__()
 48 |         self.bn1 = nn.BatchNorm2d(in_planes)
 49 |         self.conv1 = nn.Conv2d(in_planes, planes, kernel_size=3, stride=stride, padding=1, bias=False)
 50 |         self.bn2 = nn.BatchNorm2d(planes)
 51 |         self.conv2 = nn.Conv2d(planes, planes, kernel_size=3, stride=1, padding=1, bias=False)
 52 | 
 53 |         if stride != 1 or in_planes != planes:
 54 |             self.shortcut = nn.Sequential(
 55 |                 nn.Conv2d(in_planes, planes, kernel_size=1, stride=stride, bias=False)
 56 |             )
 57 | 
 58 |         # SE layers
 59 |         self.fc1 = nn.Conv2d(planes, planes//16, kernel_size=1)
 60 |         self.fc2 = nn.Conv2d(planes//16, planes, kernel_size=1)
 61 | 
 62 |     def forward(self, x):
 63 |         out = F.relu(self.bn1(x))
 64 |         shortcut = self.shortcut(out) if hasattr(self, 'shortcut') else x
 65 |         out = self.conv1(out)
 66 |         out = self.conv2(F.relu(self.bn2(out)))
 67 | 
 68 |         # Squeeze
 69 |         w = F.avg_pool2d(out, out.size(2))
 70 |         w = F.relu(self.fc1(w))
 71 |         w = F.sigmoid(self.fc2(w))
 72 |         # Excitation
 73 |         out = out * w
 74 | 
 75 |         out += shortcut
 76 |         return out
 77 | 
 78 | 
 79 | class SENet(nn.Module):
 80 |     def __init__(self, block, num_blocks, num_classes=10):
 81 |         super(SENet, self).__init__()
 82 |         self.in_planes = 64
 83 | 
 84 |         self.conv1 = nn.Conv2d(3, 64, kernel_size=3, stride=1, padding=1, bias=False)
 85 |         self.bn1 = nn.BatchNorm2d(64)
 86 |         self.layer1 = self._make_layer(block,  64, num_blocks[0], stride=1)
 87 |         self.layer2 = self._make_layer(block, 128, num_blocks[1], stride=2)
 88 |         self.layer3 = self._make_layer(block, 256, num_blocks[2], stride=2)
 89 |         self.layer4 = self._make_layer(block, 512, num_blocks[3], stride=2)
 90 |         self.linear = nn.Linear(512, num_classes)
 91 | 
 92 |     def _make_layer(self, block, planes, num_blocks, stride):
 93 |         strides = [stride] + [1]*(num_blocks-1)
 94 |         layers = []
 95 |         for stride in strides:
 96 |             layers.append(block(self.in_planes, planes, stride))
 97 |             self.in_planes = planes
 98 |         return nn.Sequential(*layers)
 99 | 
100 |     def forward(self, x):
101 |         out = F.relu(self.bn1(self.conv1(x)))
102 |         out = self.layer1(out)
103 |         out = self.layer2(out)
104 |         out = self.layer3(out)
105 |         out = self.layer4(out)
106 |         out = F.avg_pool2d(out, 4)
107 |         out = out.view(out.size(0), -1)
108 |         out = self.linear(out)
109 |         return out
110 | 
111 | 
112 | def SENet18():
113 |     return SENet(PreActBlock, [2,2,2,2])
114 | 
115 | 
116 | def test():
117 |     net = SENet18()
118 |     y = net(torch.randn(1,3,32,32))
119 |     print(y.size())
120 | 
121 | # test()
122 | 


--------------------------------------------------------------------------------
/workloads/cifar/models/shufflenet.py:
--------------------------------------------------------------------------------
  1 | '''ShuffleNet in PyTorch.
  2 | 
  3 | See the paper "ShuffleNet: An Extremely Efficient Convolutional Neural Network for Mobile Devices" for more details.
  4 | '''
  5 | import torch
  6 | import torch.nn as nn
  7 | import torch.nn.functional as F
  8 | 
  9 | 
 10 | class ShuffleBlock(nn.Module):
 11 |     def __init__(self, groups):
 12 |         super(ShuffleBlock, self).__init__()
 13 |         self.groups = groups
 14 | 
 15 |     def forward(self, x):
 16 |         '''Channel shuffle: [N,C,H,W] -> [N,g,C/g,H,W] -> [N,C/g,g,H,w] -> [N,C,H,W]'''
 17 |         N,C,H,W = x.size()
 18 |         g = self.groups
 19 |         return x.view(N,g,C/g,H,W).permute(0,2,1,3,4).contiguous().view(N,C,H,W)
 20 | 
 21 | 
 22 | class Bottleneck(nn.Module):
 23 |     def __init__(self, in_planes, out_planes, stride, groups):
 24 |         super(Bottleneck, self).__init__()
 25 |         self.stride = stride
 26 | 
 27 |         mid_planes = out_planes/4
 28 |         g = 1 if in_planes==24 else groups
 29 |         self.conv1 = nn.Conv2d(in_planes, mid_planes, kernel_size=1, groups=g, bias=False)
 30 |         self.bn1 = nn.BatchNorm2d(mid_planes)
 31 |         self.shuffle1 = ShuffleBlock(groups=g)
 32 |         self.conv2 = nn.Conv2d(mid_planes, mid_planes, kernel_size=3, stride=stride, padding=1, groups=mid_planes, bias=False)
 33 |         self.bn2 = nn.BatchNorm2d(mid_planes)
 34 |         self.conv3 = nn.Conv2d(mid_planes, out_planes, kernel_size=1, groups=groups, bias=False)
 35 |         self.bn3 = nn.BatchNorm2d(out_planes)
 36 | 
 37 |         self.shortcut = nn.Sequential()
 38 |         if stride == 2:
 39 |             self.shortcut = nn.Sequential(nn.AvgPool2d(3, stride=2, padding=1))
 40 | 
 41 |     def forward(self, x):
 42 |         out = F.relu(self.bn1(self.conv1(x)))
 43 |         out = self.shuffle1(out)
 44 |         out = F.relu(self.bn2(self.conv2(out)))
 45 |         out = self.bn3(self.conv3(out))
 46 |         res = self.shortcut(x)
 47 |         out = F.relu(torch.cat([out,res], 1)) if self.stride==2 else F.relu(out+res)
 48 |         return out
 49 | 
 50 | 
 51 | class ShuffleNet(nn.Module):
 52 |     def __init__(self, cfg):
 53 |         super(ShuffleNet, self).__init__()
 54 |         out_planes = cfg['out_planes']
 55 |         num_blocks = cfg['num_blocks']
 56 |         groups = cfg['groups']
 57 | 
 58 |         self.conv1 = nn.Conv2d(3, 24, kernel_size=1, bias=False)
 59 |         self.bn1 = nn.BatchNorm2d(24)
 60 |         self.in_planes = 24
 61 |         self.layer1 = self._make_layer(out_planes[0], num_blocks[0], groups)
 62 |         self.layer2 = self._make_layer(out_planes[1], num_blocks[1], groups)
 63 |         self.layer3 = self._make_layer(out_planes[2], num_blocks[2], groups)
 64 |         self.linear = nn.Linear(out_planes[2], 10)
 65 | 
 66 |     def _make_layer(self, out_planes, num_blocks, groups):
 67 |         layers = []
 68 |         for i in range(num_blocks):
 69 |             stride = 2 if i == 0 else 1
 70 |             cat_planes = self.in_planes if i == 0 else 0
 71 |             layers.append(Bottleneck(self.in_planes, out_planes-cat_planes, stride=stride, groups=groups))
 72 |             self.in_planes = out_planes
 73 |         return nn.Sequential(*layers)
 74 | 
 75 |     def forward(self, x):
 76 |         out = F.relu(self.bn1(self.conv1(x)))
 77 |         out = self.layer1(out)
 78 |         out = self.layer2(out)
 79 |         out = self.layer3(out)
 80 |         out = F.avg_pool2d(out, 4)
 81 |         out = out.view(out.size(0), -1)
 82 |         out = self.linear(out)
 83 |         return out
 84 | 
 85 | 
 86 | def ShuffleNetG2():
 87 |     cfg = {
 88 |         'out_planes': [200,400,800],
 89 |         'num_blocks': [4,8,4],
 90 |         'groups': 2
 91 |     }
 92 |     return ShuffleNet(cfg)
 93 | 
 94 | def ShuffleNetG3():
 95 |     cfg = {
 96 |         'out_planes': [240,480,960],
 97 |         'num_blocks': [4,8,4],
 98 |         'groups': 3
 99 |     }
100 |     return ShuffleNet(cfg)
101 | 
102 | 
103 | def test():
104 |     net = ShuffleNetG2()
105 |     x = torch.randn(1,3,32,32)
106 |     y = net(x)
107 |     print(y)
108 | 
109 | # test()
110 | 


--------------------------------------------------------------------------------
/workloads/cifar/models/vgg.py:
--------------------------------------------------------------------------------
 1 | '''VGG11/13/16/19 in Pytorch.'''
 2 | import torch
 3 | import torch.nn as nn
 4 | 
 5 | 
 6 | cfg = {
 7 |     'VGG11': [64, 'M', 128, 'M', 256, 256, 'M', 512, 512, 'M', 512, 512, 'M'],
 8 |     'VGG13': [64, 64, 'M', 128, 128, 'M', 256, 256, 'M', 512, 512, 'M', 512, 512, 'M'],
 9 |     'VGG16': [64, 64, 'M', 128, 128, 'M', 256, 256, 256, 'M', 512, 512, 512, 'M', 512, 512, 512, 'M'],
10 |     'VGG19': [64, 64, 'M', 128, 128, 'M', 256, 256, 256, 256, 'M', 512, 512, 512, 512, 'M', 512, 512, 512, 512, 'M'],
11 | }
12 | 
13 | 
14 | class VGG(nn.Module):
15 |     def __init__(self, vgg_name):
16 |         super(VGG, self).__init__()
17 |         self.features = self._make_layers(cfg[vgg_name])
18 |         self.classifier = nn.Linear(512, 10)
19 | 
20 |     def forward(self, x):
21 |         out = self.features(x)
22 |         out = out.view(out.size(0), -1)
23 |         out = self.classifier(out)
24 |         return out
25 | 
26 |     def _make_layers(self, cfg):
27 |         layers = []
28 |         in_channels = 3
29 |         for x in cfg:
30 |             if x == 'M':
31 |                 layers += [nn.MaxPool2d(kernel_size=2, stride=2)]
32 |             else:
33 |                 layers += [nn.Conv2d(in_channels, x, kernel_size=3, padding=1),
34 |                            nn.BatchNorm2d(x),
35 |                            nn.ReLU(inplace=True)]
36 |                 in_channels = x
37 |         layers += [nn.AvgPool2d(kernel_size=1, stride=1)]
38 |         return nn.Sequential(*layers)
39 | 
40 | 
41 | def test():
42 |     net = VGG('VGG11')
43 |     x = torch.randn(2,3,32,32)
44 |     y = net(x)
45 |     print(y.size())
46 | 
47 | # test()
48 | 


--------------------------------------------------------------------------------
/workloads/cifar/profile_cifar.py:
--------------------------------------------------------------------------------
  1 | from __future__ import print_function
  2 | import argparse
  3 | import timeit
  4 | import torch.backends.cudnn as cudnn
  5 | import torch.nn as nn
  6 | import torch.nn.functional as F
  7 | import torch.optim as optim
  8 | import torch.utils.data.distributed
  9 | import numpy as np
 10 | import os
 11 | import torchvision
 12 | import time
 13 | from torch.nn import DataParallel
 14 | from torchvision import transforms
 15 | 
 16 | from models import *
 17 | import workloads.settings as settings
 18 | 
 19 | 
 20 | # Benchmark settings
 21 | parser = argparse.ArgumentParser(
 22 |     description="PyTorch DP Synthetic Benchmark", formatter_class=argparse.ArgumentDefaultsHelpFormatter
 23 | )
 24 | parser.add_argument("--num-gpus", type=int, default=1, help="number of gpus")
 25 | parser.add_argument("--gpu", default=1, type=int, help="GPU id to use. Only work when use single gpu.")
 26 | parser.add_argument(
 27 |     "--num-warmup-batches", type=int, default=1, help='number of warm-up batches that don"t count towards benchmark'
 28 | )
 29 | parser.add_argument("--num-batches-per-iter", type=int, default=1, help="number of batches per benchmark iteration")
 30 | parser.add_argument("--num-iters", type=int, default=1, help="number of benchmark iterations")
 31 | parser.add_argument("--amp-fp16", action="store_true", default=False, help="Enables FP16 training with Apex.")
 32 | parser.add_argument('--warmup_epoch', type=int, default=10, help='number of warmup epochs')
 33 | parser.add_argument('--benchmark_epoch', type=int, default=50, help='number of training benchmark epochs')
 34 | parser.add_argument('--data_dir', type=str, default="~/data/", help='Data directory')
 35 | parser.add_argument('--total_time', type=int, default=30, help='Total time to run the code')
 36 | 
 37 | args = parser.parse_args()
 38 | 
 39 | # args.data_dir = settings.data_dir
 40 | # args.total_time = settings.total_time
 41 | 
 42 | def benchmark_cifar(model_name, batch_size, mixed_precision, gpu_id, bench_list, warm_signal):
 43 |     t_start = time.time()
 44 |     if len(gpu_id) == 1:
 45 |         os.environ["CUDA_VISIBLE_DEVICES"] = f"{gpu_id[0]}"
 46 |     else:
 47 |         os.environ["CUDA_VISIBLE_DEVICES"] = ",".join(str(i) for i in gpu_id)
 48 | 
 49 |     cudnn.benchmark = True 
 50 |     
 51 |     device = 'cuda' if torch.cuda.is_available() else 'cpu'
 52 | 
 53 |     # Model
 54 |     # print('==> Building model..')
 55 |     if model_name == 'VGG':
 56 |         model = VGG('VGG11')
 57 |     elif model_name == 'ShuffleNetV2': 
 58 |         model = ShuffleNetV2(net_size=0.5)
 59 |     else:
 60 |         model = eval(model_name)()
 61 |     model = model.to(device)
 62 | 
 63 |     criterion = nn.CrossEntropyLoss()
 64 |     optimizer = optim.SGD(model.parameters(), lr=0.01, momentum=0.9, weight_decay=5e-4)
 65 | 
 66 |     if mixed_precision:
 67 |         scaler = torch.cuda.amp.GradScaler(enabled=True)
 68 |     else:
 69 |         scaler = None
 70 |     
 71 |     # specify dataset
 72 |     ###### dataloader
 73 |     # print('==> Preparing data..')
 74 |     transform_train = transforms.Compose([
 75 |         transforms.RandomCrop(32, padding=4),
 76 |         transforms.RandomHorizontalFlip(),
 77 |         transforms.ToTensor(),
 78 |         transforms.Normalize((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010)),
 79 |     ])
 80 |     
 81 |     trainset = torchvision.datasets.CIFAR10(root=args.data_dir, train=True, download=True, transform=transform_train)
 82 |     trainloader = torch.utils.data.DataLoader(trainset, batch_size, shuffle=True, num_workers=2)
 83 |     # data, target = next(iter(trainloader))
 84 |     # data, target = data.cuda(), target.cuda()
 85 | 
 86 |     if len(gpu_id) > 1:
 87 |         model = DataParallel(model)
 88 | 
 89 |     # Train
 90 |     def benchmark_step():
 91 |         iter_num = 0
 92 |         exit_flag = False
 93 |         model.train()
 94 |         # Prevent total batch number < warmup+benchmark situation
 95 |         while True:
 96 |             for inputs, targets in trainloader:
 97 |                 # Warm-up: previous 10 iters
 98 |                 if iter_num == args.warmup_epoch-1:
 99 |                     warm_signal.value = 1
100 |                     t_warmend = time.time()
101 |                 # Reach timeout: exit profiling
102 |                 if time.time() - t_start >= args.total_time:
103 |                     t_end = time.time()
104 |                     t_pass = t_end - t_warmend
105 |                     exit_flag = True
106 |                     break
107 |                 optimizer.zero_grad()
108 |                 if mixed_precision:
109 |                     inputs, targets = inputs.to(device), targets.to(device)
110 |                     with torch.cuda.amp.autocast():
111 |                         outputs = model(inputs)
112 |                         loss = criterion(outputs, targets)
113 |                     scaler.scale(loss).backward()
114 |                     scaler.step(optimizer)
115 |                     scaler.update()
116 |                 else:
117 |                     inputs, targets = inputs.to(device), targets.to(device)
118 |                     outputs = model(inputs)
119 |                     loss = criterion(outputs, targets)
120 |                     loss.backward()
121 |                     optimizer.step()
122 |                 iter_num += 1
123 |             if exit_flag:
124 |                 break
125 |         return t_pass, iter_num
126 | 
127 |     print(f'==> Training {model_name} model with {batch_size} batchsize, {mixed_precision} mp..')
128 |     t_pass, iter_num = benchmark_step()
129 |     img_sec = (iter_num - args.warmup_epoch) * batch_size / t_pass
130 |     print(img_sec)
131 |   
132 |     # Results
133 |     bench_list.append(img_sec)
134 | 
135 | if __name__ == "__main__":
136 |     # since this example shows a single process per GPU, the number of processes is simply replaced with the
137 |     # number of GPUs available for training.
138 |     model_name = 'EfficientNetB0'
139 |     batch_size = 64
140 |     mixed_precision = 0
141 |     gpu_id = [0,1,2,3]
142 |     benchmark_cifar(model_name, batch_size, mixed_precision, gpu_id, bench_list, warm_signal)


--------------------------------------------------------------------------------
/workloads/cifar/run.sh:
--------------------------------------------------------------------------------
1 | max=10
2 | 
3 | for (( i=1; i <= $max; ++i ))
4 | do
5 |    python profile_cifar_ddp.py
6 | done
7 | 


--------------------------------------------------------------------------------
/workloads/dcgan/download.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | 
 3 | from __future__ import print_function, division
 4 | import argparse
 5 | from os.path import join
 6 | 
 7 | import subprocess
 8 | from urllib.request import Request, urlopen
 9 | 
10 | __author__ = 'Fisher Yu'
11 | __email__ = 'fy@cs.princeton.edu'
12 | __license__ = 'MIT'
13 | 
14 | 
15 | def list_categories():
16 |     url = 'http://dl.yf.io/lsun/categories.txt'
17 |     with urlopen(Request(url)) as response:
18 |         return response.read().decode().strip().split('\n')
19 | 
20 | 
21 | def download(out_dir, category, set_name):
22 |     url = 'http://dl.yf.io/lsun/scenes/{category}_' \
23 |           '{set_name}_lmdb.zip'.format(**locals())
24 |     if set_name == 'test':
25 |         out_name = 'test_lmdb.zip'
26 |         url = 'http://dl.yf.io/lsun/scenes/{set_name}_lmdb.zip'
27 |     else:
28 |         out_name = '{category}_{set_name}_lmdb.zip'.format(**locals())
29 |     out_path = join(out_dir, out_name)
30 |     cmd = ['curl', url, '-o', out_path]
31 |     print('Downloading', category, set_name, 'set')
32 |     subprocess.call(cmd)
33 | 
34 | 
35 | def main():
36 |     parser = argparse.ArgumentParser()
37 |     parser.add_argument('-o', '--out_dir', default='')
38 |     parser.add_argument('-c', '--category', default=None)
39 |     args = parser.parse_args()
40 | 
41 |     categories = list_categories()
42 |     if args.category is None:
43 |         print('Downloading', len(categories), 'categories')
44 |         for category in categories:
45 |             download(args.out_dir, category, 'train')
46 |             download(args.out_dir, category, 'val')
47 |         download(args.out_dir, '', 'test')
48 |     else:
49 |         if args.category == 'test':
50 |             download(args.out_dir, '', 'test')
51 |         elif args.category not in categories:
52 |             print('Error:', args.category, "doesn't exist in", 'LSUN release')
53 |         else:
54 |             download(args.out_dir, args.category, 'train')
55 |             download(args.out_dir, args.category, 'val')
56 | 
57 | 
58 | if __name__ == '__main__':
59 |     main()


--------------------------------------------------------------------------------
/workloads/deepspeech2/data/__init__.py:
--------------------------------------------------------------------------------
1 | from . import data_loader
2 | 


--------------------------------------------------------------------------------
/workloads/deepspeech2/data/an4.py:
--------------------------------------------------------------------------------
 1 | import argparse
 2 | import os
 3 | import io
 4 | import shutil
 5 | import tarfile
 6 | import wget
 7 | 
 8 | from utils import create_manifest
 9 | 
10 | parser = argparse.ArgumentParser(description='Processes and downloads an4.')
11 | parser.add_argument('--target-dir', default='an4_dataset/', help='Path to save dataset')
12 | parser.add_argument('--min-duration', default=1, type=int,
13 |                     help='Prunes training samples shorter than the min duration (given in seconds, default 1)')
14 | parser.add_argument('--max-duration', default=15, type=int,
15 |                     help='Prunes training samples longer than the max duration (given in seconds, default 15)')
16 | args = parser.parse_args()
17 | 
18 | 
19 | def _format_data(root_path, data_tag, name, wav_folder):
20 |     data_path = args.target_dir + data_tag + '/' + name + '/'
21 |     new_transcript_path = data_path + '/txt/'
22 |     new_wav_path = data_path + '/wav/'
23 | 
24 |     os.makedirs(new_transcript_path)
25 |     os.makedirs(new_wav_path)
26 | 
27 |     wav_path = root_path + 'wav/'
28 |     file_ids = root_path + 'etc/an4_%s.fileids' % data_tag
29 |     transcripts = root_path + 'etc/an4_%s.transcription' % data_tag
30 |     train_path = wav_path + wav_folder
31 | 
32 |     _convert_audio_to_wav(train_path)
33 |     _format_files(file_ids, new_transcript_path, new_wav_path, transcripts, wav_path)
34 | 
35 | 
36 | def _convert_audio_to_wav(train_path):
37 |     with os.popen('find %s -type f -name "*.raw"' % train_path) as pipe:
38 |         for line in pipe:
39 |             raw_path = line.strip()
40 |             new_path = line.replace('.raw', '.wav').strip()
41 |             cmd = 'sox -t raw -r %d -b 16 -e signed-integer -B -c 1 \"%s\" \"%s\"' % (
42 |                 16000, raw_path, new_path)
43 |             os.system(cmd)
44 | 
45 | 
46 | def _format_files(file_ids, new_transcript_path, new_wav_path, transcripts, wav_path):
47 |     with open(file_ids, 'r') as f:
48 |         with open(transcripts, 'r') as t:
49 |             paths = f.readlines()
50 |             transcripts = t.readlines()
51 |             for x in range(len(paths)):
52 |                 path = wav_path + paths[x].strip() + '.wav'
53 |                 filename = path.split('/')[-1]
54 |                 extracted_transcript = _process_transcript(transcripts, x)
55 |                 current_path = os.path.abspath(path)
56 |                 new_path = new_wav_path + filename
57 |                 text_path = new_transcript_path + filename.replace('.wav', '.txt')
58 |                 with io.FileIO(text_path, "w") as file:
59 |                     file.write(extracted_transcript.encode('utf-8'))
60 |                 os.rename(current_path, new_path)
61 | 
62 | 
63 | def _process_transcript(transcripts, x):
64 |     extracted_transcript = transcripts[x].split('(')[0].strip("<s>").split('<')[0].strip().upper()
65 |     return extracted_transcript
66 | 
67 | 
68 | def main():
69 |     root_path = 'an4/'
70 |     name = 'an4'
71 |     wget.download('http://www.speech.cs.cmu.edu/databases/an4/an4_raw.bigendian.tar.gz')
72 |     tar = tarfile.open('an4_raw.bigendian.tar.gz')
73 |     tar.extractall()
74 |     os.makedirs(args.target_dir)
75 |     _format_data(root_path, 'train', name, 'an4_clstk')
76 |     _format_data(root_path, 'test', name, 'an4test_clstk')
77 |     shutil.rmtree(root_path)
78 |     os.remove('an4_raw.bigendian.tar.gz')
79 |     train_path = args.target_dir + '/train/'
80 |     test_path = args.target_dir + '/test/'
81 |     print ('\n', 'Creating manifests...')
82 |     create_manifest(train_path, 'an4_train_manifest.csv', args.min_duration, args.max_duration)
83 |     create_manifest(test_path, 'an4_val_manifest.csv')
84 | 
85 | 
86 | if __name__ == '__main__':
87 |     main()
88 | 


--------------------------------------------------------------------------------
/workloads/deepspeech2/data/cmu-arctic-manifests.tar.gz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/S-Lab-System-Group/Lucid/63685a3ab7d15d8e940bb47ef98b6d5cca472b13/workloads/deepspeech2/data/cmu-arctic-manifests.tar.gz


--------------------------------------------------------------------------------
/workloads/deepspeech2/data/common_voice.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import wget
 3 | import tarfile
 4 | import argparse
 5 | import csv
 6 | from multiprocessing.pool import ThreadPool
 7 | import subprocess
 8 | from utils import create_manifest
 9 | 
10 | parser = argparse.ArgumentParser(description='Downloads and processes Mozilla Common Voice dataset.')
11 | parser.add_argument("--target-dir", default='CommonVoice_dataset/', type=str, help="Directory to store the dataset.")
12 | parser.add_argument("--tar-path", type=str, help="Path to the Common Voice *.tar file if downloaded (Optional).")
13 | parser.add_argument('--sample-rate', default=16000, type=int, help='Sample rate')
14 | parser.add_argument('--min-duration', default=1, type=int,
15 |                     help='Prunes training samples shorter than the min duration (given in seconds, default 1)')
16 | parser.add_argument('--max-duration', default=15, type=int,
17 |                     help='Prunes training samples longer than the max duration (given in seconds, default 15)')
18 | parser.add_argument('--files-to-process', default="cv-valid-dev.csv,cv-valid-test.csv,cv-valid-train.csv",
19 |                     type=str, help='list of *.csv file names to process')
20 | args = parser.parse_args()
21 | COMMON_VOICE_URL = "https://common-voice-data-download.s3.amazonaws.com/cv_corpus_v1.tar.gz"
22 | 
23 | 
24 | def convert_to_wav(csv_file, target_dir):
25 |     """ Read *.csv file description, convert mp3 to wav, process text.
26 |         Save results to target_dir.
27 | 
28 |     Args:
29 |         csv_file: str, path to *.csv file with data description, usually start from 'cv-'
30 |         target_dir: str, path to dir to save results; wav/ and txt/ dirs will be created
31 |     """
32 |     wav_dir = os.path.join(target_dir, 'wav/')
33 |     txt_dir = os.path.join(target_dir, 'txt/')
34 |     os.makedirs(wav_dir, exist_ok=True)
35 |     os.makedirs(txt_dir, exist_ok=True)
36 |     path_to_data = os.path.dirname(csv_file)
37 | 
38 |     def process(x):
39 |         file_path, text = x
40 |         file_name = os.path.splitext(os.path.basename(file_path))[0]
41 |         text = text.strip().upper()
42 |         with open(os.path.join(txt_dir, file_name + '.txt'), 'w') as f:
43 |             f.write(text)
44 |         cmd = "sox {} -r {} -b 16 -c 1 {}".format(
45 |             os.path.join(path_to_data, file_path),
46 |             args.sample_rate,
47 |             os.path.join(wav_dir, file_name + '.wav'))
48 |         subprocess.call([cmd], shell=True)
49 | 
50 |     print('Converting mp3 to wav for {}.'.format(csv_file))
51 |     with open(csv_file) as csvfile:
52 |         reader = csv.DictReader(csvfile)
53 |         data = [(row['filename'], row['text']) for row in reader]
54 |         with ThreadPool(10) as pool:
55 |             pool.map(process, data)
56 | 
57 | 
58 | def main():
59 |     target_dir = args.target_dir
60 |     os.makedirs(target_dir, exist_ok=True)
61 | 
62 |     target_unpacked_dir = os.path.join(target_dir, "CV_unpacked")
63 |     os.makedirs(target_unpacked_dir, exist_ok=True)
64 | 
65 |     if args.tar_path and os.path.exists(args.tar_path):
66 |         print('Find existing file {}'.format(args.tar_path))
67 |         target_file = args.tar_path
68 |     else:
69 |         print("Could not find downloaded Common Voice archive, Downloading corpus...")
70 |         filename = wget.download(COMMON_VOICE_URL, target_dir)
71 |         target_file = os.path.join(target_dir, os.path.basename(filename))
72 | 
73 |     print("Unpacking corpus to {} ...".format(target_unpacked_dir))
74 |     tar = tarfile.open(target_file)
75 |     tar.extractall(target_unpacked_dir)
76 |     tar.close()
77 | 
78 |     for csv_file in args.files_to_process.split(','):
79 |         convert_to_wav(os.path.join(target_unpacked_dir, 'cv_corpus_v1/', csv_file),
80 |                        os.path.join(target_dir, os.path.splitext(csv_file)[0]))
81 | 
82 |     print('Creating manifests...')
83 |     for csv_file in args.files_to_process.split(','):
84 |         create_manifest(os.path.join(target_dir, os.path.splitext(csv_file)[0]),
85 |                         os.path.splitext(csv_file)[0] + '_manifest.csv',
86 |                         args.min_duration,
87 |                         args.max_duration)
88 | 
89 | 
90 | if __name__ == "__main__":
91 |     main()
92 | 


--------------------------------------------------------------------------------
/workloads/deepspeech2/data/librispeech.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | import wget
  3 | import tarfile
  4 | import argparse
  5 | import subprocess
  6 | from utils import create_manifest
  7 | from tqdm import tqdm
  8 | import shutil
  9 | 
 10 | parser = argparse.ArgumentParser(description='Processes and downloads LibriSpeech dataset.')
 11 | parser.add_argument("--target-dir", default='LibriSpeech_dataset/', type=str, help="Directory to store the dataset.")
 12 | parser.add_argument('--sample-rate', default=16000, type=int, help='Sample rate')
 13 | parser.add_argument('--files-to-use', default="train-clean-100.tar.gz,"
 14 |                                               "train-clean-360.tar.gz,train-other-500.tar.gz,"
 15 |                                               "dev-clean.tar.gz,dev-other.tar.gz,"
 16 |                                               "test-clean.tar.gz,test-other.tar.gz", type=str,
 17 |                     help='list of file names to download')
 18 | parser.add_argument('--min-duration', default=1, type=int,
 19 |                     help='Prunes training samples shorter than the min duration (given in seconds, default 1)')
 20 | parser.add_argument('--max-duration', default=15, type=int,
 21 |                     help='Prunes training samples longer than the max duration (given in seconds, default 15)')
 22 | args = parser.parse_args()
 23 | 
 24 | LIBRI_SPEECH_URLS = {
 25 |     "train": ["http://www.openslr.org/resources/12/train-clean-100.tar.gz",
 26 |               "http://www.openslr.org/resources/12/train-clean-360.tar.gz",
 27 |               "http://www.openslr.org/resources/12/train-other-500.tar.gz"],
 28 | 
 29 |     "val": ["http://www.openslr.org/resources/12/dev-clean.tar.gz",
 30 |             "http://www.openslr.org/resources/12/dev-other.tar.gz"],
 31 | 
 32 |     "test_clean": ["http://www.openslr.org/resources/12/test-clean.tar.gz"],
 33 |     "test_other": ["http://www.openslr.org/resources/12/test-other.tar.gz"]
 34 | }
 35 | 
 36 | 
 37 | def _preprocess_transcript(phrase):
 38 |     return phrase.strip().upper()
 39 | 
 40 | 
 41 | def _process_file(wav_dir, txt_dir, base_filename, root_dir):
 42 |     full_recording_path = os.path.join(root_dir, base_filename)
 43 |     assert os.path.exists(full_recording_path) and os.path.exists(root_dir)
 44 |     wav_recording_path = os.path.join(wav_dir, base_filename.replace(".flac", ".wav"))
 45 |     subprocess.call(["sox {}  -r {} -b 16 -c 1 {}".format(full_recording_path, str(args.sample_rate),
 46 |                                                           wav_recording_path)], shell=True)
 47 |     # process transcript
 48 |     txt_transcript_path = os.path.join(txt_dir, base_filename.replace(".flac", ".txt"))
 49 |     transcript_file = os.path.join(root_dir, "-".join(base_filename.split('-')[:-1]) + ".trans.txt")
 50 |     assert os.path.exists(transcript_file), "Transcript file {} does not exist.".format(transcript_file)
 51 |     transcriptions = open(transcript_file).read().strip().split("\n")
 52 |     transcriptions = {t.split()[0].split("-")[-1]: " ".join(t.split()[1:]) for t in transcriptions}
 53 |     with open(txt_transcript_path, "w") as f:
 54 |         key = base_filename.replace(".flac", "").split("-")[-1]
 55 |         assert key in transcriptions, "{} is not in the transcriptions".format(key)
 56 |         f.write(_preprocess_transcript(transcriptions[key]))
 57 |         f.flush()
 58 | 
 59 | 
 60 | def main():
 61 |     target_dl_dir = args.target_dir
 62 |     if not os.path.exists(target_dl_dir):
 63 |         os.makedirs(target_dl_dir)
 64 |     files_to_dl = args.files_to_use.strip().split(',')
 65 |     for split_type, lst_libri_urls in LIBRI_SPEECH_URLS.items():
 66 |         split_dir = os.path.join(target_dl_dir, split_type)
 67 |         if not os.path.exists(split_dir):
 68 |             os.makedirs(split_dir)
 69 |         split_wav_dir = os.path.join(split_dir, "wav")
 70 |         if not os.path.exists(split_wav_dir):
 71 |             os.makedirs(split_wav_dir)
 72 |         split_txt_dir = os.path.join(split_dir, "txt")
 73 |         if not os.path.exists(split_txt_dir):
 74 |             os.makedirs(split_txt_dir)
 75 |         extracted_dir = os.path.join(split_dir, "LibriSpeech")
 76 |         if os.path.exists(extracted_dir):
 77 |             shutil.rmtree(extracted_dir)
 78 |         for url in lst_libri_urls:
 79 |             # check if we want to dl this file
 80 |             dl_flag = False
 81 |             for f in files_to_dl:
 82 |                 if url.find(f) != -1:
 83 |                     dl_flag = True
 84 |             if not dl_flag:
 85 |                 print("Skipping url: {}".format(url))
 86 |                 continue
 87 |             filename = url.split("/")[-1]
 88 |             target_filename = os.path.join(split_dir, filename)
 89 |             if not os.path.exists(target_filename):
 90 |                 wget.download(url, split_dir)
 91 |             print("Unpacking {}...".format(filename))
 92 |             tar = tarfile.open(target_filename)
 93 |             tar.extractall(split_dir)
 94 |             tar.close()
 95 |             os.remove(target_filename)
 96 |             print("Converting flac files to wav and extracting transcripts...")
 97 |             assert os.path.exists(extracted_dir), "Archive {} was not properly uncompressed.".format(filename)
 98 |             for root, subdirs, files in tqdm(os.walk(extracted_dir)):
 99 |                 for f in files:
100 |                     if f.find(".flac") != -1:
101 |                         _process_file(wav_dir=split_wav_dir, txt_dir=split_txt_dir,
102 |                                       base_filename=f, root_dir=root)
103 | 
104 |             print("Finished {}".format(url))
105 |             shutil.rmtree(extracted_dir)
106 |         if split_type == 'train':  # Prune to min/max duration
107 |             create_manifest(split_dir, 'libri_' + split_type + '_manifest.csv', args.min_duration, args.max_duration)
108 |         else:
109 |             create_manifest(split_dir, 'libri_' + split_type + '_manifest.csv')
110 | 
111 | 
112 | if __name__ == "__main__":
113 |     main()
114 | 


--------------------------------------------------------------------------------
/workloads/deepspeech2/data/merge_manifests.py:
--------------------------------------------------------------------------------
 1 | from __future__ import print_function
 2 | 
 3 | import argparse
 4 | import io
 5 | import os
 6 | 
 7 | from tqdm import tqdm
 8 | from utils import order_and_prune_files
 9 | 
10 | parser = argparse.ArgumentParser(description='Merges all manifest CSV files in specified folder.')
11 | parser.add_argument('--merge-dir', default='manifests/', help='Path to all manifest files you want to merge')
12 | parser.add_argument('--min-duration', default=1, type=int,
13 |                     help='Prunes any samples shorter than the min duration (given in seconds, default 1)')
14 | parser.add_argument('--max-duration', default=15, type=int,
15 |                     help='Prunes any samples longer than the max duration (given in seconds, default 15)')
16 | parser.add_argument('--output-path', default='merged_manifest.csv', help='Output path to merged manifest')
17 | 
18 | args = parser.parse_args()
19 | 
20 | file_paths = []
21 | for file in os.listdir(args.merge_dir):
22 |     if file.endswith(".csv"):
23 |         with open(os.path.join(args.merge_dir, file), 'r') as fh:
24 |             file_paths += fh.readlines()
25 | file_paths = [file_path.split(',')[0] for file_path in file_paths]
26 | file_paths = order_and_prune_files(file_paths, args.min_duration, args.max_duration)
27 | with io.FileIO(args.output_path, "w") as file:
28 |     for wav_path in tqdm(file_paths, total=len(file_paths)):
29 |         transcript_path = wav_path.replace('/wav/', '/txt/').replace('.wav', '.txt')
30 |         sample = os.path.abspath(wav_path) + ',' + os.path.abspath(transcript_path) + '\n'
31 |         file.write(sample.encode('utf-8'))
32 | 


--------------------------------------------------------------------------------
/workloads/deepspeech2/data/spec_augment.py:
--------------------------------------------------------------------------------
  1 | # Copyright 2019 RnD at Spoon Radio
  2 | #
  3 | # Licensed under the Apache License, Version 2.0 (the "License");
  4 | # you may not use this file except in compliance with the License.
  5 | # You may obtain a copy of the License at
  6 | #
  7 | #     http://www.apache.org/licenses/LICENSE-2.0
  8 | #
  9 | # Unless required by applicable law or agreed to in writing, software
 10 | # distributed under the License is distributed on an "AS IS" BASIS,
 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 12 | # See the License for the specific language governing permissions and
 13 | # limitations under the License.
 14 | # ==============================================================================
 15 | """SpecAugment Implementation for Tensorflow.
 16 | Related paper : https://arxiv.org/pdf/1904.08779.pdf
 17 | In this paper, show summarized parameters by each open datasets in Tabel 1.
 18 | -----------------------------------------
 19 | Policy | W  | F  | m_F |  T  |  p  | m_T
 20 | -----------------------------------------
 21 | None   |  0 |  0 |  -  |  0  |  -  |  -
 22 | -----------------------------------------
 23 | LB     | 80 | 27 |  1  | 100 | 1.0 | 1
 24 | -----------------------------------------
 25 | LD     | 80 | 27 |  2  | 100 | 1.0 | 2
 26 | -----------------------------------------
 27 | SM     | 40 | 15 |  2  |  70 | 0.2 | 2
 28 | -----------------------------------------
 29 | SS     | 40 | 27 |  2  |  70 | 0.2 | 2
 30 | -----------------------------------------
 31 | LB : LibriSpeech basic
 32 | LD : LibriSpeech double
 33 | SM : Switchboard mild
 34 | SS : Switchboard strong
 35 | """
 36 | 
 37 | import librosa
 38 | import librosa.display
 39 | import numpy as np
 40 | import random
 41 | import matplotlib
 42 | #matplotlib.use('TkAgg')
 43 | import matplotlib.pyplot as plt
 44 | from .sparse_image_warp import sparse_image_warp
 45 | import torch
 46 | 
 47 | 
 48 | def time_warp(spec, W=5):
 49 |     num_rows = spec.shape[1]
 50 |     spec_len = spec.shape[2]
 51 | 
 52 |     y = num_rows // 2
 53 |     horizontal_line_at_ctr = spec[0][y]
 54 |     # assert len(horizontal_line_at_ctr) == spec_len
 55 | 
 56 |     point_to_warp = horizontal_line_at_ctr[random.randrange(W, spec_len-W)]
 57 |     # assert isinstance(point_to_warp, torch.Tensor)
 58 | 
 59 |     # Uniform distribution from (0,W) with chance to be up to W negative
 60 |     dist_to_warp = random.randrange(-W, W)
 61 |     src_pts = torch.tensor([[[y, point_to_warp]]])
 62 |     dest_pts = torch.tensor([[[y, point_to_warp + dist_to_warp]]])
 63 |     warped_spectro, dense_flows = sparse_image_warp(spec, src_pts, dest_pts)
 64 | 
 65 |     return warped_spectro.squeeze(3)
 66 | 
 67 | 
 68 | def spec_augment(mel_spectrogram, time_warping_para=40, frequency_masking_para=27,
 69 |                  time_masking_para=70, frequency_mask_num=1, time_mask_num=1):
 70 |     """Spec augmentation Calculation Function.
 71 |     'SpecAugment' have 3 steps for audio data augmentation.
 72 |     first step is time warping using Tensorflow's image_sparse_warp function.
 73 |     Second step is frequency masking, last step is time masking.
 74 |     # Arguments:
 75 |       mel_spectrogram(numpy array): audio file path of you want to warping and masking.
 76 |       time_warping_para(float): Augmentation parameter, "time warp parameter W".
 77 |         If none, default = 40.
 78 |       frequency_masking_para(float): Augmentation parameter, "frequency mask parameter F"
 79 |         If none, default = 27.
 80 |       time_masking_para(float): Augmentation parameter, "time mask parameter T"
 81 |         If none, default = 70.
 82 |       frequency_mask_num(float): number of frequency masking lines, "m_F".
 83 |         If none, default = 1.
 84 |       time_mask_num(float): number of time masking lines, "m_T".
 85 |         If none, default = 1.
 86 |     # Returns
 87 |       mel_spectrogram(numpy array): warped and masked mel spectrogram.
 88 |     """
 89 |     mel_spectrogram = mel_spectrogram.unsqueeze(0)
 90 | 
 91 |     v = mel_spectrogram.shape[1]
 92 |     tau = mel_spectrogram.shape[2]
 93 | 
 94 |     # Step 1 : Time warping
 95 |     warped_mel_spectrogram = time_warp(mel_spectrogram)
 96 | 
 97 |     # Step 2 : Frequency masking
 98 |     for i in range(frequency_mask_num):
 99 |         f = np.random.uniform(low=0.0, high=frequency_masking_para)
100 |         f = int(f)
101 |         if v - f < 0:
102 |             continue
103 |         f0 = random.randint(0, v-f)
104 |         warped_mel_spectrogram[:, f0:f0+f, :] = 0
105 | 
106 |     # Step 3 : Time masking
107 |     for i in range(time_mask_num):
108 |         t = np.random.uniform(low=0.0, high=time_masking_para)
109 |         t = int(t)
110 |         if tau - t < 0:
111 |             continue
112 |         t0 = random.randint(0, tau-t)
113 |         warped_mel_spectrogram[:, :, t0:t0+t] = 0
114 | 
115 |     return warped_mel_spectrogram.squeeze()
116 | 
117 | 
118 | def visualization_spectrogram(mel_spectrogram, title):
119 |     """visualizing result of SpecAugment
120 |     # Arguments:
121 |       mel_spectrogram(ndarray): mel_spectrogram to visualize.
122 |       title(String): plot figure's title
123 |     """
124 |     # Show mel-spectrogram using librosa's specshow.
125 |     plt.figure(figsize=(10, 4))
126 |     librosa.display.specshow(librosa.power_to_db(mel_spectrogram[0, :, :], ref=np.max), y_axis='mel', fmax=8000, x_axis='time')
127 |     # plt.colorbar(format='%+2.0f dB')
128 |     plt.title(title)
129 |     plt.tight_layout()
130 |     plt.show()
131 | 


--------------------------------------------------------------------------------
/workloads/deepspeech2/data/ted.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | import wget
  3 | import tarfile
  4 | import argparse
  5 | import subprocess
  6 | import unicodedata
  7 | import io
  8 | from utils import create_manifest
  9 | from tqdm import tqdm
 10 | 
 11 | parser = argparse.ArgumentParser(description='Processes and downloads TED-LIUMv2 dataset.')
 12 | parser.add_argument("--target-dir", default='TEDLIUM_dataset/', type=str, help="Directory to store the dataset.")
 13 | parser.add_argument("--tar-path", type=str, help="Path to the TEDLIUM_release tar if downloaded (Optional).")
 14 | parser.add_argument('--sample-rate', default=16000, type=int, help='Sample rate')
 15 | parser.add_argument('--min-duration', default=1, type=int,
 16 |                     help='Prunes training samples shorter than the min duration (given in seconds, default 1)')
 17 | parser.add_argument('--max-duration', default=15, type=int,
 18 |                     help='Prunes training samples longer than the max duration (given in seconds, default 15)')
 19 | args = parser.parse_args()
 20 | 
 21 | TED_LIUM_V2_DL_URL = "http://www.openslr.org/resources/19/TEDLIUM_release2.tar.gz"
 22 | 
 23 | 
 24 | def get_utterances_from_stm(stm_file):
 25 |     """
 26 |     Return list of entries containing phrase and its start/end timings
 27 |     :param stm_file:
 28 |     :return:
 29 |     """
 30 |     res = []
 31 |     with io.open(stm_file, "r", encoding='utf-8') as f:
 32 |         for stm_line in f:
 33 |             tokens = stm_line.split()
 34 |             start_time = float(tokens[3])
 35 |             end_time = float(tokens[4])
 36 |             filename = tokens[0]
 37 |             transcript = unicodedata.normalize("NFKD",
 38 |                                                " ".join(t for t in tokens[6:]).strip()). \
 39 |                 encode("utf-8", "ignore").decode("utf-8", "ignore")
 40 |             if transcript != "ignore_time_segment_in_scoring":
 41 |                 res.append({
 42 |                     "start_time": start_time, "end_time": end_time,
 43 |                     "filename": filename, "transcript": transcript
 44 |                 })
 45 |         return res
 46 | 
 47 | 
 48 | def cut_utterance(src_sph_file, target_wav_file, start_time, end_time, sample_rate=16000):
 49 |     subprocess.call(["sox {}  -r {} -b 16 -c 1 {} trim {} ={}".format(src_sph_file, str(sample_rate),
 50 |                                                                       target_wav_file, start_time, end_time)],
 51 |                     shell=True)
 52 | 
 53 | 
 54 | def _preprocess_transcript(phrase):
 55 |     return phrase.strip().upper()
 56 | 
 57 | 
 58 | def filter_short_utterances(utterance_info, min_len_sec=1.0):
 59 |     return utterance_info["end_time"] - utterance_info["start_time"] > min_len_sec
 60 | 
 61 | 
 62 | def prepare_dir(ted_dir):
 63 |     converted_dir = os.path.join(ted_dir, "converted")
 64 |     # directories to store converted wav files and their transcriptions
 65 |     wav_dir = os.path.join(converted_dir, "wav")
 66 |     if not os.path.exists(wav_dir):
 67 |         os.makedirs(wav_dir)
 68 |     txt_dir = os.path.join(converted_dir, "txt")
 69 |     if not os.path.exists(txt_dir):
 70 |         os.makedirs(txt_dir)
 71 |     counter = 0
 72 |     entries = os.listdir(os.path.join(ted_dir, "sph"))
 73 |     for sph_file in tqdm(entries, total=len(entries)):
 74 |         speaker_name = sph_file.split('.sph')[0]
 75 | 
 76 |         sph_file_full = os.path.join(ted_dir, "sph", sph_file)
 77 |         stm_file_full = os.path.join(ted_dir, "stm", "{}.stm".format(speaker_name))
 78 | 
 79 |         assert os.path.exists(sph_file_full) and os.path.exists(stm_file_full)
 80 |         all_utterances = get_utterances_from_stm(stm_file_full)
 81 | 
 82 |         all_utterances = filter(filter_short_utterances, all_utterances)
 83 |         for utterance_id, utterance in enumerate(all_utterances):
 84 |             target_wav_file = os.path.join(wav_dir, "{}_{}.wav".format(utterance["filename"], str(utterance_id)))
 85 |             target_txt_file = os.path.join(txt_dir, "{}_{}.txt".format(utterance["filename"], str(utterance_id)))
 86 |             cut_utterance(sph_file_full, target_wav_file, utterance["start_time"], utterance["end_time"],
 87 |                           sample_rate=args.sample_rate)
 88 |             with io.FileIO(target_txt_file, "w") as f:
 89 |                 f.write(_preprocess_transcript(utterance["transcript"]).encode('utf-8'))
 90 |         counter += 1
 91 | 
 92 | 
 93 | def main():
 94 |     target_dl_dir = args.target_dir
 95 |     if not os.path.exists(target_dl_dir):
 96 |         os.makedirs(target_dl_dir)
 97 | 
 98 |     target_unpacked_dir = os.path.join(target_dl_dir, "TEDLIUM_release2")
 99 |     if args.tar_path and os.path.exists(args.tar_path):
100 |         target_file = args.tar_path
101 |     else:
102 |         print("Could not find downloaded TEDLIUM archive, Downloading corpus...")
103 |         wget.download(TED_LIUM_V2_DL_URL, target_dl_dir)
104 |         target_file = os.path.join(target_dl_dir, "TEDLIUM_release2.tar.gz")
105 | 
106 |     if not os.path.exists(target_unpacked_dir):
107 |         print("Unpacking corpus...")
108 |         tar = tarfile.open(target_file)
109 |         tar.extractall(target_dl_dir)
110 |         tar.close()
111 |     else:
112 |         print("Found TEDLIUM directory, skipping unpacking of tar files")
113 | 
114 |     train_ted_dir = os.path.join(target_unpacked_dir, "train")
115 |     val_ted_dir = os.path.join(target_unpacked_dir, "dev")
116 |     test_ted_dir = os.path.join(target_unpacked_dir, "test")
117 | 
118 |     prepare_dir(train_ted_dir)
119 |     prepare_dir(val_ted_dir)
120 |     prepare_dir(test_ted_dir)
121 |     print('Creating manifests...')
122 | 
123 |     create_manifest(train_ted_dir, 'ted_train_manifest.csv', args.min_duration, args.max_duration)
124 |     create_manifest(val_ted_dir, 'ted_val_manifest.csv')
125 |     create_manifest(test_ted_dir, 'ted_test_manifest.csv')
126 | 
127 | 
128 | if __name__ == "__main__":
129 |     main()
130 | 


--------------------------------------------------------------------------------
/workloads/deepspeech2/data/utils.py:
--------------------------------------------------------------------------------
 1 | from __future__ import print_function
 2 | 
 3 | import fnmatch
 4 | import io
 5 | import os
 6 | from tqdm import tqdm
 7 | import subprocess
 8 | import torch.distributed as dist
 9 | 
10 | 
11 | def create_manifest(data_path, output_path, min_duration=None, max_duration=None):
12 |     file_paths = [os.path.join(dirpath, f)
13 |                   for dirpath, dirnames, files in os.walk(data_path)
14 |                   for f in fnmatch.filter(files, '*.wav')]
15 |     file_paths = order_and_prune_files(file_paths, min_duration, max_duration)
16 |     with io.FileIO(output_path, "w") as file:
17 |         for wav_path in tqdm(file_paths, total=len(file_paths)):
18 |             transcript_path = wav_path.replace('/wav/', '/txt/').replace('.wav', '.txt')
19 |             sample = os.path.abspath(wav_path) + ',' + os.path.abspath(transcript_path) + '\n'
20 |             file.write(sample.encode('utf-8'))
21 |     print('\n')
22 | 
23 | 
24 | def order_and_prune_files(file_paths, min_duration, max_duration):
25 |     print("Sorting manifests...")
26 |     duration_file_paths = [(path, float(subprocess.check_output(
27 |         ['soxi -D \"%s\"' % path.strip()], shell=True))) for path in file_paths]
28 |     if min_duration and max_duration:
29 |         print("Pruning manifests between %d and %d seconds" % (min_duration, max_duration))
30 |         duration_file_paths = [(path, duration) for path, duration in duration_file_paths if
31 |                                min_duration <= duration <= max_duration]
32 | 
33 |     def func(element):
34 |         return element[1]
35 | 
36 |     duration_file_paths.sort(key=func)
37 |     return [x[0] for x in duration_file_paths]  # Remove durations
38 | 
39 | def reduce_tensor(tensor, world_size):
40 |     rt = tensor.clone()
41 |     dist.all_reduce(rt, op=dist.reduce_op.SUM)
42 |     rt /= world_size
43 |     return rt
44 | 
45 | 


--------------------------------------------------------------------------------
/workloads/deepspeech2/data/voxforge.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | from six.moves import urllib
  3 | import argparse
  4 | import re
  5 | import tempfile
  6 | import shutil
  7 | import subprocess
  8 | import tarfile
  9 | import io
 10 | from tqdm import tqdm
 11 | 
 12 | from utils import create_manifest
 13 | 
 14 | VOXFORGE_URL_16kHz = 'http://www.repository.voxforge1.org/downloads/SpeechCorpus/Trunk/Audio/Main/16kHz_16bit/'
 15 | 
 16 | parser = argparse.ArgumentParser(description='Processes and downloads VoxForge dataset.')
 17 | parser.add_argument("--target-dir", default='voxforge_dataset/', type=str, help="Directory to store the dataset.")
 18 | parser.add_argument('--sample-rate', default=16000,
 19 |                     type=int, help='Sample rate')
 20 | parser.add_argument('--min-duration', default=1, type=int,
 21 |                     help='Prunes training samples shorter than the min duration (given in seconds, default 1)')
 22 | parser.add_argument('--max-duration', default=15, type=int,
 23 |                     help='Prunes training samples longer than the max duration (given in seconds, default 15)')
 24 | args = parser.parse_args()
 25 | 
 26 | 
 27 | def _get_recordings_dir(sample_dir, recording_name):
 28 |     wav_dir = os.path.join(sample_dir, recording_name, "wav")
 29 |     if os.path.exists(wav_dir):
 30 |         return "wav", wav_dir
 31 |     flac_dir = os.path.join(sample_dir, recording_name, "flac")
 32 |     if os.path.exists(flac_dir):
 33 |         return "flac", flac_dir
 34 |     raise Exception("wav or flac directory was not found for recording name: {}".format(recording_name))
 35 | 
 36 | 
 37 | def prepare_sample(recording_name, url, target_folder):
 38 |     """
 39 |     Downloads and extracts a sample from VoxForge and puts the wav and txt files into :target_folder.
 40 |     """
 41 |     wav_dir = os.path.join(target_folder, "wav")
 42 |     if not os.path.exists(wav_dir):
 43 |         os.makedirs(wav_dir)
 44 |     txt_dir = os.path.join(target_folder, "txt")
 45 |     if not os.path.exists(txt_dir):
 46 |         os.makedirs(txt_dir)
 47 |     # check if sample is processed
 48 |     filename_set = set(['_'.join(wav_file.split('_')[:-1]) for wav_file in os.listdir(wav_dir)])
 49 |     if recording_name in filename_set:
 50 |         return
 51 | 
 52 |     request = urllib.request.Request(url)
 53 |     response = urllib.request.urlopen(request)
 54 |     content = response.read()
 55 |     response.close()
 56 |     with tempfile.NamedTemporaryFile(suffix=".tgz", mode='wb') as target_tgz:
 57 |         target_tgz.write(content)
 58 |         target_tgz.flush()
 59 |         dirpath = tempfile.mkdtemp()
 60 | 
 61 |         tar = tarfile.open(target_tgz.name)
 62 |         tar.extractall(dirpath)
 63 |         tar.close()
 64 | 
 65 |         recordings_type, recordings_dir = _get_recordings_dir(dirpath, recording_name)
 66 |         tgz_prompt_file = os.path.join(dirpath, recording_name, "etc", "PROMPTS")
 67 | 
 68 |         if os.path.exists(recordings_dir) and os.path.exists(tgz_prompt_file):
 69 |             transcriptions = open(tgz_prompt_file).read().strip().split("\n")
 70 |             transcriptions = {t.split()[0]: " ".join(t.split()[1:]) for t in transcriptions}
 71 |             for wav_file in os.listdir(recordings_dir):
 72 |                 recording_id = wav_file.split('.{}'.format(recordings_type))[0]
 73 |                 transcription_key = recording_name + "/mfc/" + recording_id
 74 |                 if transcription_key not in transcriptions:
 75 |                     continue
 76 |                 utterance = transcriptions[transcription_key]
 77 | 
 78 |                 target_wav_file = os.path.join(wav_dir, "{}_{}.wav".format(recording_name, recording_id))
 79 |                 target_txt_file = os.path.join(txt_dir, "{}_{}.txt".format(recording_name, recording_id))
 80 |                 with io.FileIO(target_txt_file, "w") as file:
 81 |                     file.write(utterance.encode('utf-8'))
 82 |                 original_wav_file = os.path.join(recordings_dir, wav_file)
 83 |                 subprocess.call(["sox {}  -r {} -b 16 -c 1 {}".format(original_wav_file, str(args.sample_rate),
 84 |                                                                       target_wav_file)], shell=True)
 85 | 
 86 |         shutil.rmtree(dirpath)
 87 | 
 88 | 
 89 | if __name__ == '__main__':
 90 |     target_dir = args.target_dir
 91 |     sample_rate = args.sample_rate
 92 | 
 93 |     if not os.path.isdir(target_dir):
 94 |         os.makedirs(target_dir)
 95 |     request = urllib.request.Request(VOXFORGE_URL_16kHz)
 96 |     response = urllib.request.urlopen(request)
 97 |     content = response.read()
 98 |     all_files = re.findall("href\=\"(.*\.tgz)\"", content.decode("utf-8"))
 99 |     for f in tqdm(all_files, total=len(all_files)):
100 |         prepare_sample(f.replace(".tgz", ""), VOXFORGE_URL_16kHz + f, target_dir)
101 |     print('Creating manifests...')
102 |     create_manifest(target_dir, 'voxforge_train_manifest.csv', args.min_duration, args.max_duration)
103 | 


--------------------------------------------------------------------------------
/workloads/deepspeech2/labels.json:
--------------------------------------------------------------------------------
 1 | [
 2 |   "_",
 3 |   "'",
 4 |   "A",
 5 |   "B",
 6 |   "C",
 7 |   "D",
 8 |   "E",
 9 |   "F",
10 |   "G",
11 |   "H",
12 |   "I",
13 |   "J",
14 |   "K",
15 |   "L",
16 |   "M",
17 |   "N",
18 |   "O",
19 |   "P",
20 |   "Q",
21 |   "R",
22 |   "S",
23 |   "T",
24 |   "U",
25 |   "V",
26 |   "W",
27 |   "X",
28 |   "Y",
29 |   "Z",
30 |   " "
31 | ]


--------------------------------------------------------------------------------
/workloads/imagenet/profile_imagenet.py:
--------------------------------------------------------------------------------
  1 | from __future__ import print_function
  2 | import argparse
  3 | import timeit
  4 | import torch
  5 | import torch.backends.cudnn as cudnn
  6 | import torch.nn.functional as F
  7 | import torch.optim as optim
  8 | import torch.utils.data.distributed
  9 | import numpy as np
 10 | import time
 11 | import os
 12 | import torchvision
 13 | import workloads.settings as settings
 14 | 
 15 | from torch.nn import DataParallel
 16 | from models import *
 17 | 
 18 | 
 19 | # Benchmark settings
 20 | parser = argparse.ArgumentParser(
 21 |     description="PyTorch DP Synthetic Benchmark", formatter_class=argparse.ArgumentDefaultsHelpFormatter
 22 | )
 23 | parser.add_argument("--num-gpus", type=int, default=1, help="number of gpus")
 24 | parser.add_argument("--gpu", default=1, type=int, help="GPU id to use. Only work when use single gpu.")
 25 | 
 26 | 
 27 | parser.add_argument('--warmup_epoch', type=int, default=10, help='number of warmup epochs')
 28 | parser.add_argument("--num-batches-per-iter", type=int, default=30, help="number of batches per benchmark iteration")
 29 | parser.add_argument("--num-iters", type=int, default=30, help="number of benchmark iterations")
 30 | parser.add_argument("--amp-fp16", action="store_true", default=False, help="Enables FP16 training with Apex.")
 31 | parser.add_argument('--total_time', type=int, default=30, help='Total time to run the code')
 32 | 
 33 | 
 34 | args = parser.parse_args()
 35 | # args.total_time = settings.total_time
 36 | 
 37 | # Training
 38 | def benchmark_imagenet(model_name, batch_size, mixed_precision, gpu_id, bench_list, warm_signal):
 39 |     t_start = time.time()
 40 |     if len(gpu_id) == 1:
 41 |         os.environ["CUDA_VISIBLE_DEVICES"] = f"{gpu_id[0]}"
 42 |     else:
 43 |         os.environ["CUDA_VISIBLE_DEVICES"] = ",".join(str(i) for i in gpu_id)
 44 | 
 45 |     cudnn.benchmark = True
 46 | 
 47 |     model = getattr(torchvision.models, model_name)()
 48 |     model = model.cuda()
 49 | 
 50 |     data = torch.randn(batch_size, 3, 224, 224)
 51 |     target = torch.LongTensor(batch_size).random_() % 1000
 52 |     data, target = data.cuda(), target.cuda()
 53 | 
 54 |     optimizer = optim.SGD(model.parameters(), lr=0.01)
 55 | 
 56 |     if mixed_precision:
 57 |         scaler = torch.cuda.amp.GradScaler(enabled=True)
 58 |     else:
 59 |         scaler = None
 60 |     
 61 |     if len(gpu_id) > 1:
 62 |         model = DataParallel(model)
 63 |     
 64 |     def benchmark_step():
 65 |         iter_num = 0
 66 |         while True:
 67 |             optimizer.zero_grad()
 68 |             if iter_num == args.warmup_epoch-1:
 69 |                 warm_signal.value = 1
 70 |                 t_warmend = time.time()
 71 |             # Reach timeout: exit profiling
 72 |             if time.time() - t_start >= args.total_time:
 73 |                 t_end = time.time()
 74 |                 t_pass = t_end - t_warmend
 75 |                 break
 76 |             if mixed_precision:
 77 |                 with torch.cuda.amp.autocast():
 78 |                     output = model(data)
 79 |                     loss = F.cross_entropy(output, target)
 80 |                 scaler.scale(loss).backward()
 81 |                 scaler.step(optimizer)
 82 |                 scaler.update()
 83 |             else:
 84 |                 output = model(data)
 85 |                 loss = F.cross_entropy(output, target)
 86 |                 loss.backward()
 87 |                 optimizer.step()
 88 |             iter_num += 1
 89 |         return t_pass, iter_num
 90 | 
 91 |     # Benchmark
 92 |     print(f'==> Training {model_name} model with {batch_size} batchsize, {mixed_precision} mp..')
 93 |     t_pass, iter_num = benchmark_step()
 94 |     img_sec = len(gpu_id) * (iter_num - args.warmup_epoch) * batch_size / t_pass
 95 | 
 96 |     bench_list.append(img_sec)
 97 | 
 98 |     
 99 |         
100 | 
101 | 


--------------------------------------------------------------------------------
/workloads/imagenet/profile_imagenet_ddp.py:
--------------------------------------------------------------------------------
  1 | from __future__ import print_function
  2 | import argparse
  3 | import timeit
  4 | from cvxpy import mixed_norm
  5 | import torch.backends.cudnn as cudnn
  6 | import torch.nn as nn
  7 | import torch.nn.functional as F
  8 | import torch.optim as optim
  9 | import torch.utils.data.distributed
 10 | import torch.distributed as dist
 11 | import sys
 12 | import numpy as np
 13 | import os
 14 | import pandas as pd
 15 | import torchvision
 16 | import time
 17 | import torch.multiprocessing as mp
 18 | sys.path.append('/home/mzhang/work/ASPLOS23/collect_metric/')
 19 | 
 20 | from torch.nn import DataParallel
 21 | from multiprocessing import Process, Manager, Value
 22 | from torch.nn.parallel import DistributedDataParallel as DDP
 23 | from torchvision import transforms
 24 | from models import *
 25 | 
 26 | # Benchmark settings
 27 | parser = argparse.ArgumentParser(
 28 |     description="PyTorch DP Synthetic Benchmark", formatter_class=argparse.ArgumentDefaultsHelpFormatter
 29 | )
 30 | parser.add_argument('--warmup_iter', type=int, default=10, help='number of warmup epochs')
 31 | parser.add_argument('--benchmark_epoch', type=int, default=50, help='number of training benchmark epochs')
 32 | parser.add_argument('--data_dir', type=str, default="~/data/", help='Data directory')
 33 | parser.add_argument('--total_time', type=int, default=30, help='Total time to run the code')
 34 | parser.add_argument('--master_addr', type=str, default='127.0.0.1', help='Total time to run the code')
 35 | parser.add_argument('--master_port', type=str, default='47020', help='Total time to run the code')
 36 | 
 37 | 
 38 | args = parser.parse_args()
 39 | 
 40 | 
 41 | # ------ Setting up the distributed environment -------
 42 | def setup(rank, world_size):
 43 |     os.environ['MASTER_ADDR'] = args.master_addr
 44 |     os.environ['MASTER_PORT'] = args.master_port
 45 |     # initialize the process group
 46 |     dist.init_process_group(backend="nccl", rank=rank, world_size=world_size)
 47 |     # this function is responsible for synchronizing and successfully communicate across multiple process
 48 |     # involving multiple GPUs.
 49 | 
 50 | 
 51 | def cleanup():
 52 |     dist.destroy_process_group()
 53 | 
 54 | 
 55 | def benchmark_imagenet_ddp(rank, model_name, batch_size, mixed_precision, gpu_id, t_start):
 56 |     os.environ["CUDA_VISIBLE_DEVICES"] = ",".join(str(i) for i in gpu_id)
 57 |     print(f"Running Distributed ResNet on rank {rank}.")
 58 |     setup(rank, len(gpu_id))
 59 |     torch.manual_seed(0)
 60 |     torch.cuda.set_device(rank)
 61 | 
 62 |     # Model
 63 |     # print('==> Building model..')
 64 |     model = getattr(torchvision.models, model_name)()
 65 |     model.to(rank)
 66 |     model = DDP(model, device_ids=[rank])
 67 |     
 68 |     criterion = nn.CrossEntropyLoss().to(rank)
 69 |     optimizer = optim.SGD(model.parameters(), lr=0.01, momentum=0.9, weight_decay=5e-4)
 70 | 
 71 |     if mixed_precision:
 72 |         scaler = torch.cuda.amp.GradScaler(enabled=True)
 73 |     else:
 74 |         scaler = None
 75 | 
 76 |     # Dataset
 77 |     data = torch.randn(batch_size, 3, 224, 224)
 78 |     target = torch.LongTensor(batch_size).random_() % 1000
 79 |     data, target = data.to(rank), target.to(rank)
 80 | 
 81 |     # data, target = next(iter(trainloader))
 82 |     # data, target = data.cuda(), target.cuda()
 83 | 
 84 |     # Train
 85 |     print(f'==> Training {model_name} model with {batch_size} batchsize, {mixed_precision} mp..')
 86 |     iter_num = 0
 87 |     model.train()
 88 |     # Prevent total batch number < warmup+benchmark situation
 89 |     while True:
 90 |         # Warm-up: previous 10 iters
 91 |         if iter_num == args.warmup_iter-1:
 92 |             t_warmend = time.time()
 93 |         # Reach timeout: exit benchmark
 94 |         if time.time() - t_start >= args.total_time:
 95 |             t_end = time.time()
 96 |             t_pass = t_end - t_warmend
 97 |             break
 98 |         optimizer.zero_grad()
 99 |         if mixed_precision:
100 |             with torch.cuda.amp.autocast():
101 |                 output = model(data)
102 |                 loss = criterion(output, target)
103 |             scaler.scale(loss).backward()
104 |             scaler.step(optimizer)
105 |             scaler.update()
106 |         else:
107 |             output = model(data)
108 |             loss = criterion(output, target)
109 |             loss.backward()
110 |             optimizer.step()
111 |         iter_num += 1
112 | 
113 |     img_sec = len(gpu_id) * (iter_num - args.warmup_iter) * batch_size / t_pass
114 |     if rank == 0:
115 |         print(f'master port: {args.master_port}, speed: {img_sec}')
116 | 
117 |     cleanup()
118 | 
119 | if __name__ == '__main__':
120 |     model_name = 'resnet50'
121 |     batch_size = 64
122 |     mixed_precision = 0
123 |     gpu_id = [0,1,2,3]
124 |     # world_size = 4
125 |     t_start = time.time()
126 |     mp.spawn(benchmark_imagenet_ddp, args=(model_name, batch_size, mixed_precision, gpu_id, t_start, ), nprocs=len(gpu_id), join=True)
127 | 


--------------------------------------------------------------------------------
/workloads/imagenet/requirements.txt:
--------------------------------------------------------------------------------
1 | asposestorage==1.0.2
2 | numpy==1.21.5
3 | pandas==1.4.1
4 | torch==1.11.0+cu113
5 | torchvision==0.12.0
6 | 


--------------------------------------------------------------------------------
/workloads/lstm/data.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | from io import open
 3 | import torch
 4 | 
 5 | class Dictionary(object):
 6 |     def __init__(self):
 7 |         self.word2idx = {}
 8 |         self.idx2word = []
 9 | 
10 |     def add_word(self, word):
11 |         if word not in self.word2idx:
12 |             self.idx2word.append(word)
13 |             self.word2idx[word] = len(self.idx2word) - 1
14 |         return self.word2idx[word]
15 | 
16 |     def __len__(self):
17 |         return len(self.idx2word)
18 | 
19 | 
20 | class Corpus(object):
21 |     def __init__(self, path):
22 |         self.dictionary = Dictionary()
23 |         self.train = self.tokenize(os.path.join(path, 'train.txt'))
24 |         self.valid = self.tokenize(os.path.join(path, 'valid.txt'))
25 |         self.test = self.tokenize(os.path.join(path, 'test.txt'))
26 | 
27 |     def tokenize(self, path):
28 |         """Tokenizes a text file."""
29 |         assert os.path.exists(path)
30 |         # Add words to the dictionary
31 |         with open(path, 'r', encoding="utf8") as f:
32 |             tokens = 0
33 |             for line in f:
34 |                 words = line.split() + ['<eos>']
35 |                 tokens += len(words)
36 |                 for word in words:
37 |                     self.dictionary.add_word(word)
38 | 
39 |         # Tokenize file content
40 |         with open(path, 'r', encoding="utf8") as f:
41 |             ids = torch.LongTensor(tokens)
42 |             token = 0
43 |             for line in f:
44 |                 words = line.split() + ['<eos>']
45 |                 for word in words:
46 |                     ids[token] = self.dictionary.word2idx[word]
47 |                     token += 1
48 | 
49 |         return ids
50 | 


--------------------------------------------------------------------------------
/workloads/ncf/config.py:
--------------------------------------------------------------------------------
 1 | import workloads.settings as settings
 2 | 
 3 | # dataset name 
 4 | dataset = 'ml-1m'
 5 | assert dataset in ['ml-1m', 'pinterest-20']
 6 | 
 7 | # model name 
 8 | model = 'NeuMF-end'
 9 | #model = 'MLP'
10 | #model = 'GMF'
11 | #model = 'NeuMF-pre'
12 | assert model in ['MLP', 'GMF', 'NeuMF-end', 'NeuMF-pre']
13 | 
14 | # paths
15 | # main_path = '/home/mzhang/data/ml-1m/'
16 | main_path = settings.data_dir + 'ml-1m/'
17 | 
18 | train_rating = main_path + '{}.train.rating'.format(dataset)
19 | test_rating = main_path + '{}.test.rating'.format(dataset)
20 | test_negative = main_path + '{}.test.negative'.format(dataset)
21 | 
22 | model_path = '/mnt/ncf/models/'
23 | GMF_model_path = model_path + 'GMF.pth'
24 | MLP_model_path = model_path + 'MLP.pth'
25 | NeuMF_model_path = model_path + 'NeuMF.pth'
26 | 


--------------------------------------------------------------------------------
/workloads/ncf/data_utils.py:
--------------------------------------------------------------------------------
 1 | import numpy as np 
 2 | import pandas as pd 
 3 | import scipy.sparse as sp
 4 | 
 5 | import torch.utils.data as data
 6 | 
 7 | import ncf.config as config
 8 | 
 9 | 
10 | def load_all(test_num=100):
11 | 	""" We load all the three file here to save time in each epoch. """
12 | 	train_data = pd.read_csv(
13 | 		config.train_rating, 
14 | 		sep='\t', header=None, names=['user', 'item'], 
15 | 		usecols=[0, 1], dtype={0: np.int32, 1: np.int32})
16 | 
17 | 	user_num = train_data['user'].max() + 1
18 | 	item_num = train_data['item'].max() + 1
19 | 
20 | 	train_data = train_data.values.tolist()
21 | 
22 | 	# load ratings as a dok matrix
23 | 	train_mat = sp.dok_matrix((user_num, item_num), dtype=np.float32)
24 | 	for x in train_data:
25 | 		train_mat[x[0], x[1]] = 1.0
26 | 
27 | 	test_data = []
28 | 	with open(config.test_negative, 'r') as fd:
29 | 		line = fd.readline()
30 | 		while line != None and line != '':
31 | 			arr = line.split('\t')
32 | 			u = eval(arr[0])[0]
33 | 			test_data.append([u, eval(arr[0])[1]])
34 | 			for i in arr[1:]:
35 | 				test_data.append([u, int(i)])
36 | 			line = fd.readline()
37 | 	return train_data, test_data, user_num, item_num, train_mat
38 | 
39 | 
40 | class NCFData(data.Dataset):
41 | 	def __init__(self, features, 
42 | 				num_item, train_mat=None, num_ng=0, is_training=None):
43 | 		super(NCFData, self).__init__()
44 | 		""" Note that the labels are only useful when training, we thus 
45 | 			add them in the ng_sample() function.
46 | 		"""
47 | 		self.features_ps = features
48 | 		self.num_item = num_item
49 | 		self.train_mat = train_mat
50 | 		self.num_ng = num_ng
51 | 		self.is_training = is_training
52 | 		self.labels = [0 for _ in range(len(features))]
53 | 
54 | 	def ng_sample(self):
55 | 		assert self.is_training, 'no need to sampling when testing'
56 | 
57 | 		self.features_ng = []
58 | 		for x in self.features_ps:
59 | 			u = x[0]
60 | 			for t in range(self.num_ng):
61 | 				j = np.random.randint(self.num_item)
62 | 				while (u, j) in self.train_mat:
63 | 					j = np.random.randint(self.num_item)
64 | 				self.features_ng.append([u, j])
65 | 
66 | 		labels_ps = [1 for _ in range(len(self.features_ps))]
67 | 		labels_ng = [0 for _ in range(len(self.features_ng))]
68 | 
69 | 		self.features_fill = self.features_ps + self.features_ng
70 | 		self.labels_fill = labels_ps + labels_ng
71 | 
72 | 	def __len__(self):
73 | 		return (self.num_ng + 1) * len(self.labels)
74 | 
75 | 	def __getitem__(self, idx):
76 | 		features = self.features_fill if self.is_training \
77 | 					else self.features_ps
78 | 		labels = self.labels_fill if self.is_training \
79 | 					else self.labels
80 | 
81 | 		user = features[idx][0]
82 | 		item = features[idx][1]
83 | 		label = labels[idx]
84 | 		return user, item ,label


--------------------------------------------------------------------------------
/workloads/ncf/evaluate.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | import torch
 3 | 
 4 | 
 5 | def hit(gt_item, pred_items):
 6 |   if gt_item in pred_items:
 7 |     return 1
 8 |   return 0
 9 | 
10 | 
11 | def ndcg(gt_item, pred_items):
12 |   if gt_item in pred_items:
13 |     index = pred_items.index(gt_item)
14 |     return np.reciprocal(np.log2(index+2))
15 |   return 0
16 | 
17 | 
18 | def metrics(model, test_loader, top_k):
19 |   HR, NDCG = [], []
20 | 
21 |   for user, item, label in test_loader:
22 |     user = user.cuda()
23 |     item = item.cuda()
24 | 
25 |     predictions = model(user, item)
26 |     _, indices = torch.topk(predictions, top_k)
27 |     recommends = torch.take(
28 |         item, indices).cpu().numpy().tolist()
29 | 
30 |     gt_item = item[0].item()
31 |     HR.append(hit(gt_item, recommends))
32 |     NDCG.append(ndcg(gt_item, recommends))
33 | 
34 |   return np.mean(HR), np.mean(NDCG)
35 | 


--------------------------------------------------------------------------------
/workloads/ncf/models.py:
--------------------------------------------------------------------------------
  1 | import torch
  2 | import torch.nn as nn
  3 | import torch.nn.functional as F 
  4 | 
  5 | 
  6 | class NCF(nn.Module):
  7 | 	def __init__(self, user_num, item_num, factor_num, num_layers,
  8 | 					dropout, model, GMF_model=None, MLP_model=None):
  9 | 		super(NCF, self).__init__()
 10 | 		"""
 11 | 		user_num: number of users;
 12 | 		item_num: number of items;
 13 | 		factor_num: number of predictive factors;
 14 | 		num_layers: the number of layers in MLP model;
 15 | 		dropout: dropout rate between fully connected layers;
 16 | 		model: 'MLP', 'GMF', 'NeuMF-end', and 'NeuMF-pre';
 17 | 		GMF_model: pre-trained GMF weights;
 18 | 		MLP_model: pre-trained MLP weights.
 19 | 		"""		
 20 | 		self.dropout = dropout
 21 | 		self.model = model
 22 | 
 23 | 		self.embed_user_GMF = nn.Embedding(user_num, factor_num)
 24 | 		self.embed_item_GMF = nn.Embedding(item_num, factor_num)
 25 | 		self.embed_user_MLP = nn.Embedding(
 26 | 				user_num, factor_num * (2 ** (num_layers - 1)))
 27 | 		self.embed_item_MLP = nn.Embedding(
 28 | 				item_num, factor_num * (2 ** (num_layers - 1)))
 29 | 
 30 | 		MLP_modules = []
 31 | 		for i in range(num_layers):
 32 | 			input_size = factor_num * (2 ** (num_layers - i))
 33 | 			MLP_modules.append(nn.Dropout(p=self.dropout))
 34 | 			MLP_modules.append(nn.Linear(input_size, input_size//2))
 35 | 			MLP_modules.append(nn.ReLU())
 36 | 		self.MLP_layers = nn.Sequential(*MLP_modules)
 37 | 
 38 | 		if self.model in ['MLP', 'GMF']:
 39 | 			predict_size = factor_num 
 40 | 		else:
 41 | 			predict_size = factor_num * 2
 42 | 		self.predict_layer = nn.Linear(predict_size, 1)
 43 | 
 44 | 		self._init_weight_(GMF_model, MLP_model)
 45 | 
 46 | 	def _init_weight_(self, GMF_model, MLP_model):
 47 | 		""" We leave the weights initialization here. """
 48 | 		if not self.model == 'NeuMF-pre':
 49 | 			nn.init.normal_(self.embed_user_GMF.weight, std=0.01)
 50 | 			nn.init.normal_(self.embed_user_MLP.weight, std=0.01)
 51 | 			nn.init.normal_(self.embed_item_GMF.weight, std=0.01)
 52 | 			nn.init.normal_(self.embed_item_MLP.weight, std=0.01)
 53 | 
 54 | 			for m in self.MLP_layers:
 55 | 				if isinstance(m, nn.Linear):
 56 | 					nn.init.xavier_uniform_(m.weight)
 57 | 			nn.init.kaiming_uniform_(self.predict_layer.weight, 
 58 | 									a=1, nonlinearity='sigmoid')
 59 | 
 60 | 			for m in self.modules():
 61 | 				if isinstance(m, nn.Linear) and m.bias is not None:
 62 | 					m.bias.data.zero_()
 63 | 		else:
 64 | 			# embedding layers
 65 | 			self.embed_user_GMF.weight.data.copy_(
 66 | 							GMF_model.embed_user_GMF.weight)
 67 | 			self.embed_item_GMF.weight.data.copy_(
 68 | 							GMF_model.embed_item_GMF.weight)
 69 | 			self.embed_user_MLP.weight.data.copy_(
 70 | 							MLP_model.embed_user_MLP.weight)
 71 | 			self.embed_item_MLP.weight.data.copy_(
 72 | 							MLP_model.embed_item_MLP.weight)
 73 | 
 74 | 			# mlp layers
 75 | 			for (m1, m2) in zip(
 76 | 				self.MLP_layers, MLP_model.MLP_layers):
 77 | 				if isinstance(m1, nn.Linear) and isinstance(m2, nn.Linear):
 78 | 					m1.weight.data.copy_(m2.weight)
 79 | 					m1.bias.data.copy_(m2.bias)
 80 | 
 81 | 			# predict layers
 82 | 			predict_weight = torch.cat([
 83 | 				GMF_model.predict_layer.weight,
 84 | 				MLP_model.predict_layer.weight], dim=1)
 85 | 			precit_bias = GMF_model.predict_layer.bias + \
 86 | 						MLP_model.predict_layer.bias
 87 | 
 88 | 			self.predict_layer.weight.data.copy_(0.5 * predict_weight)
 89 | 			self.predict_layer.bias.data.copy_(0.5 * precit_bias)
 90 | 
 91 | 	def forward(self, user, item):
 92 | 		if not self.model == 'MLP':
 93 | 			embed_user_GMF = self.embed_user_GMF(user)
 94 | 			embed_item_GMF = self.embed_item_GMF(item)
 95 | 			output_GMF = embed_user_GMF * embed_item_GMF
 96 | 		if not self.model == 'GMF':
 97 | 			embed_user_MLP = self.embed_user_MLP(user)
 98 | 			embed_item_MLP = self.embed_item_MLP(item)
 99 | 			interaction = torch.cat((embed_user_MLP, embed_item_MLP), -1)
100 | 			output_MLP = self.MLP_layers(interaction)
101 | 
102 | 		if self.model == 'GMF':
103 | 			concat = output_GMF
104 | 		elif self.model == 'MLP':
105 | 			concat = output_MLP
106 | 		else:
107 | 			concat = torch.cat((output_GMF, output_MLP), -1)
108 | 
109 | 		prediction = self.predict_layer(concat)
110 | 		return prediction.view(-1)
111 | 


--------------------------------------------------------------------------------
/workloads/pointnet/num_seg_classes.txt:
--------------------------------------------------------------------------------
 1 | Airplane	4
 2 | Bag	2
 3 | Cap	2
 4 | Car	4
 5 | Chair	4
 6 | Earphone	3
 7 | Guitar	3
 8 | Knife	2
 9 | Lamp	4
10 | Laptop	2
11 | Motorbike	6
12 | Mug	2
13 | Pistol	3
14 | Rocket	3
15 | Skateboard	3
16 | Table	3


--------------------------------------------------------------------------------
/workloads/rl/profile_rl_lunarlander.py:
--------------------------------------------------------------------------------
 1 | import gym
 2 | import os
 3 | import time
 4 | import argparse
 5 | import torch
 6 | import torch.backends.cudnn as cudnn
 7 | import workloads.settings as settings
 8 | 
 9 | from stable_baselines3 import PPO, A2C, TD3
10 | from stable_baselines3.common.env_util import make_vec_env
11 | 
12 | 
13 | parser = argparse.ArgumentParser(
14 |     description="PyTorch DP Synthetic Benchmark", formatter_class=argparse.ArgumentDefaultsHelpFormatter
15 | )
16 | parser.add_argument('--total_time', type=int, default=30, help='Total time to run the code')
17 | 
18 | args = parser.parse_args()
19 | 
20 | args.total_time = settings.total_time
21 | 
22 | warmup_epoch = 200
23 | benchmark_epoch = 1000
24 | 
25 | 
26 | def benchmark_rl(model_name, batch_size, mixed_precision, gpu_id, bench_list, warm_signal):
27 |     t_start = time.time()
28 | 
29 |     if len(gpu_id) == 1:
30 |         os.environ["CUDA_VISIBLE_DEVICES"] = f"{gpu_id[0]}"
31 |     else:
32 |         os.environ["CUDA_VISIBLE_DEVICES"] = ",".join(str(i) for i in gpu_id)
33 | 
34 |     cudnn.benchmark = True 
35 |     device = 'cuda' if torch.cuda.is_available() else 'cpu'
36 | 
37 |     # Environments & Model
38 |     env = make_vec_env("LunarLander-v2", n_envs=1)
39 |     if model_name == 'PPO':
40 |         model = PPO("MlpPolicy", env, verbose=0, batch_size=batch_size, device=device)
41 |     elif model_name == 'TD3':
42 |         model = TD3("MlpPolicy", env, verbose=0, batch_size=batch_size, device=device)
43 |     
44 |     # Warm-up
45 |     model.learn(total_timesteps=warmup_epoch)
46 |     warm_signal.value = 1
47 |     t_warmend = time.time()
48 | 
49 |     # Benchmark
50 |     print(f'==> Training {model_name} model with {batch_size} batchsize, {mixed_precision} mp..')
51 |     iter_num = 0
52 |     while True:
53 |         if time.time() - t_start >= args.total_time:
54 |             t_end = time.time()
55 |             t_pass = t_end - t_warmend
56 |             exit_flag = True
57 |             break
58 |         model.learn(total_timesteps=1)
59 |         iter_num += 1
60 | 
61 |     img_sec = iter_num * batch_size / t_pass
62 |   
63 |     # Results
64 |     bench_list.append(img_sec)
65 | 


--------------------------------------------------------------------------------
/workloads/rl/profile_rl_walker.py:
--------------------------------------------------------------------------------
 1 | import gym
 2 | import os
 3 | import time
 4 | import torch
 5 | import argparse
 6 | import torch.backends.cudnn as cudnn
 7 | import workloads.settings as settings
 8 | 
 9 | from stable_baselines3 import PPO, A2C, TD3
10 | from stable_baselines3.common.env_util import make_vec_env
11 | 
12 | 
13 | parser = argparse.ArgumentParser(
14 |     description="PyTorch DP Synthetic Benchmark", formatter_class=argparse.ArgumentDefaultsHelpFormatter
15 | )
16 | parser.add_argument('--total_time', type=int, default=30, help='Total time to run the code')
17 | 
18 | args = parser.parse_args()
19 | 
20 | args.total_time = settings.total_time
21 | 
22 | warmup_epoch = 200
23 | benchmark_epoch = 1000
24 | 
25 | 
26 | def benchmark_rl2(model_name, batch_size, mixed_precision, gpu_id, bench_list, warm_signal):
27 |     t_start = time.time()
28 | 
29 |     if len(gpu_id) == 1:
30 |         os.environ["CUDA_VISIBLE_DEVICES"] = f"{gpu_id[0]}"
31 |     else:
32 |         os.environ["CUDA_VISIBLE_DEVICES"] = ",".join(str(i) for i in gpu_id)
33 | 
34 |     cudnn.benchmark = True 
35 |     device = 'cuda' if torch.cuda.is_available() else 'cpu'
36 | 
37 |     # Environments & Model
38 |     env = make_vec_env("BipedalWalker-v3", n_envs=1)
39 |     if model_name == 'PPO':
40 |         model = PPO("MlpPolicy", env, verbose=0, batch_size=batch_size, device=device)
41 |     elif model_name == 'TD3':
42 |         model = TD3("MlpPolicy", env, verbose=0, batch_size=batch_size, device=device)
43 |     
44 |     # Warm-up
45 |     model.learn(total_timesteps=warmup_epoch)
46 |     warm_signal.value = 1
47 |     t_warmend = time.time()
48 | 
49 |     # Benchmark
50 |     print(f'==> Training {model_name} model with {batch_size} batchsize, {mixed_precision} mp..')
51 |     iter_num = 0
52 |     while True:
53 |         if time.time() - t_start >= args.total_time:
54 |             t_end = time.time()
55 |             t_pass = t_end - t_warmend
56 |             exit_flag = True
57 |             break
58 |         model.learn(total_timesteps=1)
59 |         iter_num += 1
60 | 
61 |     img_sec = iter_num * batch_size / t_pass
62 |   
63 |     # Results
64 |     bench_list.append(img_sec)
65 | 


--------------------------------------------------------------------------------
/workloads/settings.py:
--------------------------------------------------------------------------------
1 | # Data path
2 | data_dir = "/home/xxx/data/"
3 | total_time = 30
4 | 


--------------------------------------------------------------------------------
/workloads/translation/dataset.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | import torch
 3 | import torch.utils.data
 4 | 
 5 | from translation.transformer import Constants
 6 | 
 7 | def paired_collate_fn(insts):
 8 |     src_insts, tgt_insts = list(zip(*insts))
 9 |     src_insts = collate_fn(src_insts)
10 |     tgt_insts = collate_fn(tgt_insts)
11 |     return (*src_insts, *tgt_insts)
12 | 
13 | def collate_fn(insts):
14 |     ''' Pad the instance to the max seq length in batch '''
15 | 
16 |     max_len = max(len(inst) for inst in insts)
17 | 
18 |     batch_seq = np.array([
19 |         inst + [Constants.PAD] * (max_len - len(inst))
20 |         for inst in insts])
21 | 
22 |     batch_pos = np.array([
23 |         [pos_i+1 if w_i != Constants.PAD else 0
24 |          for pos_i, w_i in enumerate(inst)] for inst in batch_seq])
25 | 
26 |     batch_seq = torch.LongTensor(batch_seq)
27 |     batch_pos = torch.LongTensor(batch_pos)
28 | 
29 |     return batch_seq, batch_pos
30 | 
31 | class TranslationDataset(torch.utils.data.Dataset):
32 |     def __init__(
33 |         self, src_word2idx, tgt_word2idx,
34 |         src_insts=None, tgt_insts=None):
35 | 
36 |         assert src_insts
37 |         assert not tgt_insts or (len(src_insts) == len(tgt_insts))
38 | 
39 |         src_idx2word = {idx:word for word, idx in src_word2idx.items()}
40 |         self._src_word2idx = src_word2idx
41 |         self._src_idx2word = src_idx2word
42 |         self._src_insts = src_insts
43 | 
44 |         tgt_idx2word = {idx:word for word, idx in tgt_word2idx.items()}
45 |         self._tgt_word2idx = tgt_word2idx
46 |         self._tgt_idx2word = tgt_idx2word
47 |         self._tgt_insts = tgt_insts
48 | 
49 |     @property
50 |     def n_insts(self):
51 |         ''' Property for dataset size '''
52 |         return len(self._src_insts)
53 | 
54 |     @property
55 |     def src_vocab_size(self):
56 |         ''' Property for vocab size '''
57 |         return len(self._src_word2idx)
58 | 
59 |     @property
60 |     def tgt_vocab_size(self):
61 |         ''' Property for vocab size '''
62 |         return len(self._tgt_word2idx)
63 | 
64 |     @property
65 |     def src_word2idx(self):
66 |         ''' Property for word dictionary '''
67 |         return self._src_word2idx
68 | 
69 |     @property
70 |     def tgt_word2idx(self):
71 |         ''' Property for word dictionary '''
72 |         return self._tgt_word2idx
73 | 
74 |     @property
75 |     def src_idx2word(self):
76 |         ''' Property for index dictionary '''
77 |         return self._src_idx2word
78 | 
79 |     @property
80 |     def tgt_idx2word(self):
81 |         ''' Property for index dictionary '''
82 |         return self._tgt_idx2word
83 | 
84 |     def __len__(self):
85 |         return self.n_insts
86 | 
87 |     def __getitem__(self, idx):
88 |         if self._tgt_insts:
89 |             return self._src_insts[idx], self._tgt_insts[idx]
90 |         return self._src_insts[idx]
91 | 


--------------------------------------------------------------------------------
/workloads/translation/multi-bleu.perl:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env perl
  2 | #
  3 | # This file is part of moses.  Its use is licensed under the GNU Lesser General
  4 | # Public License version 2.1 or, at your option, any later version.
  5 | 
  6 | # $Id$
  7 | use warnings;
  8 | use strict;
  9 | 
 10 | my $lowercase = 0;
 11 | if ($ARGV[0] eq "-lc") {
 12 |   $lowercase = 1;
 13 |   shift;
 14 | }
 15 | 
 16 | my $stem = $ARGV[0];
 17 | if (!defined $stem) {
 18 |   print STDERR "usage: multi-bleu.pl [-lc] reference < hypothesis\n";
 19 |   print STDERR "Reads the references from reference or reference0, reference1, ...\n";
 20 |   exit(1);
 21 | }
 22 | 
 23 | $stem .= ".ref" if !-e $stem && !-e $stem."0" && -e $stem.".ref0";
 24 | 
 25 | my @REF;
 26 | my $ref=0;
 27 | while(-e "$stem$ref") {
 28 |     &add_to_ref("$stem$ref",\@REF);
 29 |     $ref++;
 30 | }
 31 | &add_to_ref($stem,\@REF) if -e $stem;
 32 | die("ERROR: could not find reference file $stem") unless scalar @REF;
 33 | 
 34 | # add additional references explicitly specified on the command line
 35 | shift;
 36 | foreach my $stem (@ARGV) {
 37 |     &add_to_ref($stem,\@REF) if -e $stem;
 38 | }
 39 | 
 40 | 
 41 | 
 42 | sub add_to_ref {
 43 |     my ($file,$REF) = @_;
 44 |     my $s=0;
 45 |     if ($file =~ /.gz$/) {
 46 | 	open(REF,"gzip -dc $file|") or die "Can't read $file";
 47 |     } else { 
 48 | 	open(REF,$file) or die "Can't read $file";
 49 |     }
 50 |     while(<REF>) {
 51 | 	chomp;
 52 | 	push @{$$REF[$s++]}, $_;
 53 |     }
 54 |     close(REF);
 55 | }
 56 | 
 57 | my(@CORRECT,@TOTAL,$length_translation,$length_reference);
 58 | my $s=0;
 59 | while(<STDIN>) {
 60 |     chomp;
 61 |     $_ = lc if $lowercase;
 62 |     my @WORD = split;
 63 |     my %REF_NGRAM = ();
 64 |     my $length_translation_this_sentence = scalar(@WORD);
 65 |     my ($closest_diff,$closest_length) = (9999,9999);
 66 |     foreach my $reference (@{$REF[$s]}) {
 67 | #      print "$s $_ <=> $reference\n";
 68 |   $reference = lc($reference) if $lowercase;
 69 | 	my @WORD = split(' ',$reference);
 70 | 	my $length = scalar(@WORD);
 71 |         my $diff = abs($length_translation_this_sentence-$length);
 72 | 	if ($diff < $closest_diff) {
 73 | 	    $closest_diff = $diff;
 74 | 	    $closest_length = $length;
 75 | 	    # print STDERR "$s: closest diff ".abs($length_translation_this_sentence-$length)." = abs($length_translation_this_sentence-$length), setting len: $closest_length\n";
 76 | 	} elsif ($diff == $closest_diff) {
 77 |             $closest_length = $length if $length < $closest_length;
 78 |             # from two references with the same closeness to me
 79 |             # take the *shorter* into account, not the "first" one.
 80 |         }
 81 | 	for(my $n=1;$n<=4;$n++) {
 82 | 	    my %REF_NGRAM_N = ();
 83 | 	    for(my $start=0;$start<=$#WORD-($n-1);$start++) {
 84 | 		my $ngram = "$n";
 85 | 		for(my $w=0;$w<$n;$w++) {
 86 | 		    $ngram .= " ".$WORD[$start+$w];
 87 | 		}
 88 | 		$REF_NGRAM_N{$ngram}++;
 89 | 	    }
 90 | 	    foreach my $ngram (keys %REF_NGRAM_N) {
 91 | 		if (!defined($REF_NGRAM{$ngram}) ||
 92 | 		    $REF_NGRAM{$ngram} < $REF_NGRAM_N{$ngram}) {
 93 | 		    $REF_NGRAM{$ngram} = $REF_NGRAM_N{$ngram};
 94 | #	    print "$i: REF_NGRAM{$ngram} = $REF_NGRAM{$ngram}<BR>\n";
 95 | 		}
 96 | 	    }
 97 | 	}
 98 |     }
 99 |     $length_translation += $length_translation_this_sentence;
100 |     $length_reference += $closest_length;
101 |     for(my $n=1;$n<=4;$n++) {
102 | 	my %T_NGRAM = ();
103 | 	for(my $start=0;$start<=$#WORD-($n-1);$start++) {
104 | 	    my $ngram = "$n";
105 | 	    for(my $w=0;$w<$n;$w++) {
106 | 		$ngram .= " ".$WORD[$start+$w];
107 | 	    }
108 | 	    $T_NGRAM{$ngram}++;
109 | 	}
110 | 	foreach my $ngram (keys %T_NGRAM) {
111 | 	    $ngram =~ /^(\d+) /;
112 | 	    my $n = $1;
113 |             # my $corr = 0;
114 | #	print "$i e $ngram $T_NGRAM{$ngram}<BR>\n";
115 | 	    $TOTAL[$n] += $T_NGRAM{$ngram};
116 | 	    if (defined($REF_NGRAM{$ngram})) {
117 | 		if ($REF_NGRAM{$ngram} >= $T_NGRAM{$ngram}) {
118 | 		    $CORRECT[$n] += $T_NGRAM{$ngram};
119 |                     # $corr =  $T_NGRAM{$ngram};
120 | #	    print "$i e correct1 $T_NGRAM{$ngram}<BR>\n";
121 | 		}
122 | 		else {
123 | 		    $CORRECT[$n] += $REF_NGRAM{$ngram};
124 |                     # $corr =  $REF_NGRAM{$ngram};
125 | #	    print "$i e correct2 $REF_NGRAM{$ngram}<BR>\n";
126 | 		}
127 | 	    }
128 |             # $REF_NGRAM{$ngram} = 0 if !defined $REF_NGRAM{$ngram};
129 |             # print STDERR "$ngram: {$s, $REF_NGRAM{$ngram}, $T_NGRAM{$ngram}, $corr}\n"
130 | 	}
131 |     }
132 |     $s++;
133 | }
134 | my $brevity_penalty = 1;
135 | my $bleu = 0;
136 | 
137 | my @bleu=();
138 | 
139 | for(my $n=1;$n<=4;$n++) {
140 |   if (defined ($TOTAL[$n])){
141 |     $bleu[$n]=($TOTAL[$n])?$CORRECT[$n]/$TOTAL[$n]:0;
142 |     # print STDERR "CORRECT[$n]:$CORRECT[$n] TOTAL[$n]:$TOTAL[$n]\n";
143 |   }else{
144 |     $bleu[$n]=0;
145 |   }
146 | }
147 | 
148 | if ($length_reference==0){
149 |   printf "BLEU = 0, 0/0/0/0 (BP=0, ratio=0, hyp_len=0, ref_len=0)\n";
150 |   exit(1);
151 | }
152 | 
153 | if ($length_translation<$length_reference) {
154 |   $brevity_penalty = exp(1-$length_reference/$length_translation);
155 | }
156 | $bleu = $brevity_penalty * exp((my_log( $bleu[1] ) +
157 | 				my_log( $bleu[2] ) +
158 | 				my_log( $bleu[3] ) +
159 | 				my_log( $bleu[4] ) ) / 4) ;
160 | printf "BLEU = %.2f, %.1f/%.1f/%.1f/%.1f (BP=%.3f, ratio=%.3f, hyp_len=%d, ref_len=%d)\n",
161 |     100*$bleu,
162 |     100*$bleu[1],
163 |     100*$bleu[2],
164 |     100*$bleu[3],
165 |     100*$bleu[4],
166 |     $brevity_penalty,
167 |     $length_translation / $length_reference,
168 |     $length_translation,
169 |     $length_reference;
170 | 
171 | 
172 | print STDERR "It is not advisable to publish scores from multi-bleu.perl.  The scores depend on your tokenizer, which is unlikely to be reproducible from your paper or consistent across research groups.  Instead you should detokenize then use mteval-v14.pl, which has a standard tokenization.  Scores from multi-bleu.perl can still be used for internal purposes when you have a consistent tokenizer.\n";
173 | 
174 | sub my_log {
175 |   return -9999999999 unless $_[0];
176 |   return log($_[0]);
177 | }
178 | 


--------------------------------------------------------------------------------
/workloads/translation/nonbreaking_prefix.de:
--------------------------------------------------------------------------------
  1 | #Anything in this file, followed by a period (and an upper-case word), does NOT indicate an end-of-sentence marker.
  2 | #Special cases are included for prefixes that ONLY appear before 0-9 numbers.
  3 | 
  4 | #any single upper case letter  followed by a period is not a sentence ender (excluding I occasionally, but we leave it in)
  5 | #usually upper case letters are initials in a name
  6 | #no german words end in single lower-case letters, so we throw those in too.
  7 | A
  8 | B
  9 | C
 10 | D
 11 | E
 12 | F
 13 | G
 14 | H
 15 | I
 16 | J
 17 | K
 18 | L
 19 | M
 20 | N
 21 | O
 22 | P
 23 | Q
 24 | R
 25 | S
 26 | T
 27 | U
 28 | V
 29 | W
 30 | X
 31 | Y
 32 | Z
 33 | a
 34 | b
 35 | c
 36 | d
 37 | e
 38 | f
 39 | g
 40 | h
 41 | i
 42 | j
 43 | k
 44 | l
 45 | m
 46 | n
 47 | o
 48 | p
 49 | q
 50 | r
 51 | s
 52 | t
 53 | u
 54 | v
 55 | w
 56 | x
 57 | y
 58 | z
 59 | 
 60 | 
 61 | #Roman Numerals. A dot after one of these is not a sentence break in German.
 62 | I
 63 | II
 64 | III
 65 | IV
 66 | V
 67 | VI
 68 | VII
 69 | VIII
 70 | IX
 71 | X
 72 | XI
 73 | XII
 74 | XIII
 75 | XIV
 76 | XV
 77 | XVI
 78 | XVII
 79 | XVIII
 80 | XIX
 81 | XX
 82 | i
 83 | ii
 84 | iii
 85 | iv
 86 | v
 87 | vi
 88 | vii
 89 | viii
 90 | ix
 91 | x
 92 | xi
 93 | xii
 94 | xiii
 95 | xiv
 96 | xv
 97 | xvi
 98 | xvii
 99 | xviii
100 | xix
101 | xx
102 | 
103 | #Titles and Honorifics
104 | Adj
105 | Adm
106 | Adv
107 | Asst
108 | Bart
109 | Bldg
110 | Brig
111 | Bros
112 | Capt
113 | Cmdr
114 | Col
115 | Comdr
116 | Con
117 | Corp
118 | Cpl
119 | DR
120 | Dr
121 | Ens
122 | Gen
123 | Gov
124 | Hon
125 | Hosp
126 | Insp
127 | Lt
128 | MM
129 | MR
130 | MRS
131 | MS
132 | Maj
133 | Messrs
134 | Mlle
135 | Mme
136 | Mr
137 | Mrs
138 | Ms
139 | Msgr
140 | Op
141 | Ord
142 | Pfc
143 | Ph
144 | Prof
145 | Pvt
146 | Rep
147 | Reps
148 | Res
149 | Rev
150 | Rt
151 | Sen
152 | Sens
153 | Sfc
154 | Sgt
155 | Sr
156 | St
157 | Supt
158 | Surg
159 | 
160 | #Misc symbols
161 | Mio
162 | Mrd
163 | bzw
164 | v
165 | vs
166 | usw
167 | d.h
168 | z.B
169 | u.a
170 | etc
171 | Mrd
172 | MwSt
173 | ggf
174 | d.J
175 | D.h
176 | m.E
177 | vgl
178 | I.F
179 | z.T
180 | sogen
181 | ff
182 | u.E
183 | g.U
184 | g.g.A
185 | c.-à-d
186 | Buchst
187 | u.s.w
188 | sog
189 | u.ä
190 | Std
191 | evtl
192 | Zt
193 | Chr
194 | u.U
195 | o.ä
196 | Ltd
197 | b.A
198 | z.Zt
199 | spp
200 | sen
201 | SA
202 | k.o
203 | jun
204 | i.H.v
205 | dgl
206 | dergl
207 | Co
208 | zzt
209 | usf
210 | s.p.a
211 | Dkr
212 | Corp
213 | bzgl
214 | BSE
215 | 
216 | #Number indicators
217 | # add #NUMERIC_ONLY# after the word if it should ONLY be non-breaking when a 0-9 digit follows it
218 | No
219 | Nos
220 | Art
221 | Nr
222 | pp
223 | ca
224 | Ca
225 | 
226 | #Ordinals are done with . in German - "1." = "1st" in English
227 | 1
228 | 2
229 | 3
230 | 4
231 | 5
232 | 6
233 | 7
234 | 8
235 | 9
236 | 10
237 | 11
238 | 12
239 | 13
240 | 14
241 | 15
242 | 16
243 | 17
244 | 18
245 | 19
246 | 20
247 | 21
248 | 22
249 | 23
250 | 24
251 | 25
252 | 26
253 | 27
254 | 28
255 | 29
256 | 30
257 | 31
258 | 32
259 | 33
260 | 34
261 | 35
262 | 36
263 | 37
264 | 38
265 | 39
266 | 40
267 | 41
268 | 42
269 | 43
270 | 44
271 | 45
272 | 46
273 | 47
274 | 48
275 | 49
276 | 50
277 | 51
278 | 52
279 | 53
280 | 54
281 | 55
282 | 56
283 | 57
284 | 58
285 | 59
286 | 60
287 | 61
288 | 62
289 | 63
290 | 64
291 | 65
292 | 66
293 | 67
294 | 68
295 | 69
296 | 70
297 | 71
298 | 72
299 | 73
300 | 74
301 | 75
302 | 76
303 | 77
304 | 78
305 | 79
306 | 80
307 | 81
308 | 82
309 | 83
310 | 84
311 | 85
312 | 86
313 | 87
314 | 88
315 | 89
316 | 90
317 | 91
318 | 92
319 | 93
320 | 94
321 | 95
322 | 96
323 | 97
324 | 98
325 | 99
326 | 


--------------------------------------------------------------------------------
/workloads/translation/nonbreaking_prefix.en:
--------------------------------------------------------------------------------
  1 | #Anything in this file, followed by a period (and an upper-case word), does NOT indicate an end-of-sentence marker.
  2 | #Special cases are included for prefixes that ONLY appear before 0-9 numbers.
  3 | 
  4 | #any single upper case letter  followed by a period is not a sentence ender (excluding I occasionally, but we leave it in)
  5 | #usually upper case letters are initials in a name
  6 | A
  7 | B
  8 | C
  9 | D
 10 | E
 11 | F
 12 | G
 13 | H
 14 | I
 15 | J
 16 | K
 17 | L
 18 | M
 19 | N
 20 | O
 21 | P
 22 | Q
 23 | R
 24 | S
 25 | T
 26 | U
 27 | V
 28 | W
 29 | X
 30 | Y
 31 | Z
 32 | 
 33 | #List of titles. These are often followed by upper-case names, but do not indicate sentence breaks
 34 | Adj
 35 | Adm
 36 | Adv
 37 | Asst
 38 | Bart
 39 | Bldg
 40 | Brig
 41 | Bros
 42 | Capt
 43 | Cmdr
 44 | Col
 45 | Comdr
 46 | Con
 47 | Corp
 48 | Cpl
 49 | DR
 50 | Dr
 51 | Drs
 52 | Ens
 53 | Gen
 54 | Gov
 55 | Hon
 56 | Hr
 57 | Hosp
 58 | Insp
 59 | Lt
 60 | MM
 61 | MR
 62 | MRS
 63 | MS
 64 | Maj
 65 | Messrs
 66 | Mlle
 67 | Mme
 68 | Mr
 69 | Mrs
 70 | Ms
 71 | Msgr
 72 | Op
 73 | Ord
 74 | Pfc
 75 | Ph
 76 | Prof
 77 | Pvt
 78 | Rep
 79 | Reps
 80 | Res
 81 | Rev
 82 | Rt
 83 | Sen
 84 | Sens
 85 | Sfc
 86 | Sgt
 87 | Sr
 88 | St
 89 | Supt
 90 | Surg
 91 | 
 92 | #misc - odd period-ending items that NEVER indicate breaks (p.m. does NOT fall into this category - it sometimes ends a sentence)
 93 | v
 94 | vs
 95 | i.e
 96 | rev
 97 | e.g
 98 | 
 99 | #Numbers only. These should only induce breaks when followed by a numeric sequence
100 | # add NUMERIC_ONLY after the word for this function
101 | #This case is mostly for the english "No." which can either be a sentence of its own, or
102 | #if followed by a number, a non-breaking prefix
103 | No #NUMERIC_ONLY# 
104 | Nos
105 | Art #NUMERIC_ONLY#
106 | Nr
107 | pp #NUMERIC_ONLY#
108 | 
109 | #month abbreviations
110 | Jan
111 | Feb
112 | Mar
113 | Apr
114 | #May is a full word
115 | Jun
116 | Jul
117 | Aug
118 | Sep
119 | Oct
120 | Nov
121 | Dec
122 | 


--------------------------------------------------------------------------------
/workloads/translation/transformer/Beam.py:
--------------------------------------------------------------------------------
  1 | """ Manage beam search info structure.
  2 | 
  3 |     Heavily borrowed from OpenNMT-py.
  4 |     For code in OpenNMT-py, please check the following link:
  5 |     https://github.com/OpenNMT/OpenNMT-py/blob/master/onmt/Beam.py
  6 | """
  7 | 
  8 | import torch
  9 | import numpy as np
 10 | import transformer.Constants as Constants
 11 | 
 12 | class Beam():
 13 |     ''' Beam search '''
 14 | 
 15 |     def __init__(self, size, device=False):
 16 | 
 17 |         self.size = size
 18 |         self._done = False
 19 | 
 20 |         # The score for each translation on the beam.
 21 |         self.scores = torch.zeros((size,), dtype=torch.float, device=device)
 22 |         self.all_scores = []
 23 | 
 24 |         # The backpointers at each time-step.
 25 |         self.prev_ks = []
 26 | 
 27 |         # The outputs at each time-step.
 28 |         self.next_ys = [torch.full((size,), Constants.PAD, dtype=torch.long, device=device)]
 29 |         self.next_ys[0][0] = Constants.BOS
 30 | 
 31 |     def get_current_state(self):
 32 |         "Get the outputs for the current timestep."
 33 |         return self.get_tentative_hypothesis()
 34 | 
 35 |     def get_current_origin(self):
 36 |         "Get the backpointers for the current timestep."
 37 |         return self.prev_ks[-1]
 38 | 
 39 |     @property
 40 |     def done(self):
 41 |         return self._done
 42 | 
 43 |     def advance(self, word_prob):
 44 |         "Update beam status and check if finished or not."
 45 |         num_words = word_prob.size(1)
 46 | 
 47 |         # Sum the previous scores.
 48 |         if len(self.prev_ks) > 0:
 49 |             beam_lk = word_prob + self.scores.unsqueeze(1).expand_as(word_prob)
 50 |         else:
 51 |             beam_lk = word_prob[0]
 52 | 
 53 |         flat_beam_lk = beam_lk.view(-1)
 54 | 
 55 |         best_scores, best_scores_id = flat_beam_lk.topk(self.size, 0, True, True) # 1st sort
 56 |         best_scores, best_scores_id = flat_beam_lk.topk(self.size, 0, True, True) # 2nd sort
 57 | 
 58 |         self.all_scores.append(self.scores)
 59 |         self.scores = best_scores
 60 | 
 61 |         # bestScoresId is flattened as a (beam x word) array,
 62 |         # so we need to calculate which word and beam each score came from
 63 |         prev_k = best_scores_id / num_words
 64 |         self.prev_ks.append(prev_k)
 65 |         self.next_ys.append(best_scores_id - prev_k * num_words)
 66 | 
 67 |         # End condition is when top-of-beam is EOS.
 68 |         if self.next_ys[-1][0].item() == Constants.EOS:
 69 |             self._done = True
 70 |             self.all_scores.append(self.scores)
 71 | 
 72 |         return self._done
 73 | 
 74 |     def sort_scores(self):
 75 |         "Sort the scores."
 76 |         return torch.sort(self.scores, 0, True)
 77 | 
 78 |     def get_the_best_score_and_idx(self):
 79 |         "Get the score of the best in the beam."
 80 |         scores, ids = self.sort_scores()
 81 |         return scores[1], ids[1]
 82 | 
 83 |     def get_tentative_hypothesis(self):
 84 |         "Get the decoded sequence for the current timestep."
 85 | 
 86 |         if len(self.next_ys) == 1:
 87 |             dec_seq = self.next_ys[0].unsqueeze(1)
 88 |         else:
 89 |             _, keys = self.sort_scores()
 90 |             hyps = [self.get_hypothesis(k) for k in keys]
 91 |             hyps = [[Constants.BOS] + h for h in hyps]
 92 |             dec_seq = torch.LongTensor(hyps)
 93 | 
 94 |         return dec_seq
 95 | 
 96 |     def get_hypothesis(self, k):
 97 |         """ Walk back to construct the full hypothesis. """
 98 |         hyp = []
 99 |         for j in range(len(self.prev_ks) - 1, -1, -1):
100 |             hyp.append(self.next_ys[j+1][k])
101 |             k = self.prev_ks[j][k]
102 | 
103 |         return list(map(lambda x: x.item(), hyp[::-1]))
104 | 


--------------------------------------------------------------------------------
/workloads/translation/transformer/Constants.py:
--------------------------------------------------------------------------------
 1 | 
 2 | PAD = 0
 3 | UNK = 1
 4 | BOS = 2
 5 | EOS = 3
 6 | 
 7 | PAD_WORD = '<blank>'
 8 | UNK_WORD = '<unk>'
 9 | BOS_WORD = '<s>'
10 | EOS_WORD = '</s>'
11 | 


--------------------------------------------------------------------------------
/workloads/translation/transformer/Layers.py:
--------------------------------------------------------------------------------
 1 | ''' Define the Layers '''
 2 | import torch.nn as nn
 3 | from .SubLayers import MultiHeadAttention, PositionwiseFeedForward
 4 | 
 5 | __author__ = "Yu-Hsiang Huang"
 6 | 
 7 | 
 8 | class EncoderLayer(nn.Module):
 9 |     ''' Compose with two layers '''
10 | 
11 |     def __init__(self, d_model, d_inner, n_head, d_k, d_v, dropout=0.1):
12 |         super(EncoderLayer, self).__init__()
13 |         self.slf_attn = MultiHeadAttention(
14 |             n_head, d_model, d_k, d_v, dropout=dropout)
15 |         self.pos_ffn = PositionwiseFeedForward(d_model, d_inner, dropout=dropout)
16 | 
17 |     def forward(self, enc_input, non_pad_mask=None, slf_attn_mask=None):
18 |         enc_output, enc_slf_attn = self.slf_attn(
19 |             enc_input, enc_input, enc_input, mask=slf_attn_mask)
20 |         enc_output *= non_pad_mask
21 | 
22 |         enc_output = self.pos_ffn(enc_output)
23 |         enc_output *= non_pad_mask
24 | 
25 |         return enc_output, enc_slf_attn
26 | 
27 | 
28 | class DecoderLayer(nn.Module):
29 |     ''' Compose with three layers '''
30 | 
31 |     def __init__(self, d_model, d_inner, n_head, d_k, d_v, dropout=0.1):
32 |         super(DecoderLayer, self).__init__()
33 |         self.slf_attn = MultiHeadAttention(n_head, d_model, d_k, d_v, dropout=dropout)
34 |         self.enc_attn = MultiHeadAttention(n_head, d_model, d_k, d_v, dropout=dropout)
35 |         self.pos_ffn = PositionwiseFeedForward(d_model, d_inner, dropout=dropout)
36 | 
37 |     def forward(self, dec_input, enc_output, non_pad_mask=None, slf_attn_mask=None, dec_enc_attn_mask=None):
38 |         dec_output, dec_slf_attn = self.slf_attn(
39 |             dec_input, dec_input, dec_input, mask=slf_attn_mask)
40 |         dec_output *= non_pad_mask
41 | 
42 |         dec_output, dec_enc_attn = self.enc_attn(
43 |             dec_output, enc_output, enc_output, mask=dec_enc_attn_mask)
44 |         dec_output *= non_pad_mask
45 | 
46 |         dec_output = self.pos_ffn(dec_output)
47 |         dec_output *= non_pad_mask
48 | 
49 |         return dec_output, dec_slf_attn, dec_enc_attn
50 | 


--------------------------------------------------------------------------------
/workloads/translation/transformer/Modules.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | import torch.nn as nn
 3 | import numpy as np
 4 | 
 5 | __author__ = "Yu-Hsiang Huang"
 6 | 
 7 | class ScaledDotProductAttention(nn.Module):
 8 |     ''' Scaled Dot-Product Attention '''
 9 | 
10 |     def __init__(self, temperature, attn_dropout=0.1):
11 |         super().__init__()
12 |         self.temperature = temperature
13 |         self.dropout = nn.Dropout(attn_dropout)
14 |         self.softmax = nn.Softmax(dim=2)
15 | 
16 |     def forward(self, q, k, v, mask=None):
17 | 
18 |         attn = torch.bmm(q, k.transpose(1, 2))
19 |         attn = attn / self.temperature
20 | 
21 |         if mask is not None:
22 |             attn = attn.masked_fill(mask, -np.inf)
23 | 
24 |         attn = self.softmax(attn)
25 |         attn = self.dropout(attn)
26 |         output = torch.bmm(attn, v)
27 | 
28 |         return output, attn
29 | 


--------------------------------------------------------------------------------
/workloads/translation/transformer/Optim.py:
--------------------------------------------------------------------------------
 1 | '''A wrapper class for optimizer '''
 2 | import numpy as np
 3 | 
 4 | class ScheduledOptim():
 5 |     '''A simple wrapper class for learning rate scheduling'''
 6 | 
 7 |     def __init__(self, optimizer, d_model, n_warmup_steps):
 8 |         self._optimizer = optimizer
 9 |         self.n_warmup_steps = n_warmup_steps
10 |         self.n_current_steps = 0
11 |         self.init_lr = np.power(d_model, -0.5)
12 | 
13 |     def step_and_update_lr(self):
14 |         "Step with the inner optimizer"
15 |         self._update_learning_rate()
16 |         self._optimizer.step()
17 | 
18 |     def zero_grad(self):
19 |         "Zero out the gradients by the inner optimizer"
20 |         self._optimizer.zero_grad()
21 | 
22 |     def _get_lr_scale(self):
23 |         return np.min([
24 |             np.power(self.n_current_steps, -0.5),
25 |             np.power(self.n_warmup_steps, -1.5) * self.n_current_steps])
26 | 
27 |     def _update_learning_rate(self):
28 |         ''' Learning rate scheduling per step '''
29 | 
30 |         self.n_current_steps += 1
31 |         lr = self.init_lr * self._get_lr_scale()
32 | 
33 |         for param_group in self._optimizer.param_groups:
34 |             param_group['lr'] = lr
35 | 
36 | 


--------------------------------------------------------------------------------
/workloads/translation/transformer/SubLayers.py:
--------------------------------------------------------------------------------
 1 | ''' Define the sublayers in encoder/decoder layer '''
 2 | import numpy as np
 3 | import torch.nn as nn
 4 | import torch.nn.functional as F
 5 | from .Modules import ScaledDotProductAttention
 6 | 
 7 | __author__ = "Yu-Hsiang Huang"
 8 | 
 9 | class MultiHeadAttention(nn.Module):
10 |     ''' Multi-Head Attention module '''
11 | 
12 |     def __init__(self, n_head, d_model, d_k, d_v, dropout=0.1):
13 |         super().__init__()
14 | 
15 |         self.n_head = n_head
16 |         self.d_k = d_k
17 |         self.d_v = d_v
18 | 
19 |         self.w_qs = nn.Linear(d_model, n_head * d_k)
20 |         self.w_ks = nn.Linear(d_model, n_head * d_k)
21 |         self.w_vs = nn.Linear(d_model, n_head * d_v)
22 |         nn.init.normal_(self.w_qs.weight, mean=0, std=np.sqrt(2.0 / (d_model + d_k)))
23 |         nn.init.normal_(self.w_ks.weight, mean=0, std=np.sqrt(2.0 / (d_model + d_k)))
24 |         nn.init.normal_(self.w_vs.weight, mean=0, std=np.sqrt(2.0 / (d_model + d_v)))
25 | 
26 |         self.attention = ScaledDotProductAttention(temperature=np.power(d_k, 0.5))
27 |         self.layer_norm = nn.LayerNorm(d_model)
28 | 
29 |         self.fc = nn.Linear(n_head * d_v, d_model)
30 |         nn.init.xavier_normal_(self.fc.weight)
31 | 
32 |         self.dropout = nn.Dropout(dropout)
33 | 
34 | 
35 |     def forward(self, q, k, v, mask=None):
36 | 
37 |         d_k, d_v, n_head = self.d_k, self.d_v, self.n_head
38 | 
39 |         sz_b, len_q, _ = q.size()
40 |         sz_b, len_k, _ = k.size()
41 |         sz_b, len_v, _ = v.size()
42 | 
43 |         residual = q
44 | 
45 |         q = self.w_qs(q).view(sz_b, len_q, n_head, d_k)
46 |         k = self.w_ks(k).view(sz_b, len_k, n_head, d_k)
47 |         v = self.w_vs(v).view(sz_b, len_v, n_head, d_v)
48 | 
49 |         q = q.permute(2, 0, 1, 3).contiguous().view(-1, len_q, d_k) # (n*b) x lq x dk
50 |         k = k.permute(2, 0, 1, 3).contiguous().view(-1, len_k, d_k) # (n*b) x lk x dk
51 |         v = v.permute(2, 0, 1, 3).contiguous().view(-1, len_v, d_v) # (n*b) x lv x dv
52 | 
53 |         mask = mask.repeat(n_head, 1, 1) # (n*b) x .. x ..
54 |         output, attn = self.attention(q, k, v, mask=mask)
55 | 
56 |         output = output.view(n_head, sz_b, len_q, d_v)
57 |         output = output.permute(1, 2, 0, 3).contiguous().view(sz_b, len_q, -1) # b x lq x (n*dv)
58 | 
59 |         output = self.dropout(self.fc(output))
60 |         output = self.layer_norm(output + residual)
61 | 
62 |         return output, attn
63 | 
64 | class PositionwiseFeedForward(nn.Module):
65 |     ''' A two-feed-forward-layer module '''
66 | 
67 |     def __init__(self, d_in, d_hid, dropout=0.1):
68 |         super().__init__()
69 |         self.w_1 = nn.Conv1d(d_in, d_hid, 1) # position-wise
70 |         self.w_2 = nn.Conv1d(d_hid, d_in, 1) # position-wise
71 |         self.layer_norm = nn.LayerNorm(d_in)
72 |         self.dropout = nn.Dropout(dropout)
73 | 
74 |     def forward(self, x):
75 |         residual = x
76 |         output = x.transpose(1, 2)
77 |         output = self.w_2(F.relu(self.w_1(output)))
78 |         output = output.transpose(1, 2)
79 |         output = self.dropout(output)
80 |         output = self.layer_norm(output + residual)
81 |         return output
82 | 


--------------------------------------------------------------------------------
/workloads/translation/transformer/__init__.py:
--------------------------------------------------------------------------------
1 | 
2 | 
3 | 


--------------------------------------------------------------------------------