├── .gitignore
├── CMakeLists.txt
├── LICENSE
├── README.md
├── models
    ├── __init__.py
    └── vision
    │   ├── __init__.py
    │   ├── resnet.py
    │   └── utils.py
├── plan.py
├── plans
    └── V100
    │   ├── bert_base
    │       └── README.md
    │   ├── bert_large
    │       └── README.md
    │   ├── gpt2
    │       └── README.md
    │   ├── gpt2_medium
    │       └── README.md
    │   ├── resnet101
    │       └── README.md
    │   ├── resnet50
    │       └── README.md
    │   ├── roberta_base
    │       └── README.md
    │   └── roberta_large
    │       └── README.md
├── proto
    ├── deepcache.proto
    ├── deepplan.proto
    └── deepplan_pb2.py
├── pytorch.patch
├── requirements.txt
├── scripts
    ├── create_all_plans.sh
    ├── download_azure_trace_dataset.sh
    ├── fig10
    │   ├── graph.py
    │   └── run.sh
    ├── fig12
    │   ├── graph.py
    │   └── run.sh
    ├── fig13
    │   ├── graph.py
    │   └── run.sh
    └── fig14
    │   ├── graph.py
    │   └── run.sh
├── src
    ├── CMakeLists.txt
    ├── benchmark.cpp
    ├── client.cpp
    ├── client
    │   ├── azure.h
    │   ├── client.cpp
    │   ├── client.h
    │   ├── workload.cpp
    │   └── workload.h
    ├── deepplan
    │   ├── engine.cpp
    │   ├── engine.h
    │   ├── model.cpp
    │   └── model.h
    ├── network
    │   ├── message.h
    │   ├── network.cpp
    │   ├── network.h
    │   ├── server_api.cpp
    │   ├── server_api.h
    │   ├── session.cpp
    │   └── session.h
    ├── server.cpp
    ├── server
    │   ├── controller.cpp
    │   ├── controller.h
    │   ├── model_manager.cpp
    │   ├── model_manager.h
    │   ├── server.cpp
    │   ├── server.h
    │   ├── worker.cpp
    │   └── worker.h
    ├── server_api.h
    ├── util.cpp
    └── util.h
└── util.py


/.gitignore:
--------------------------------------------------------------------------------
1 | tags
2 | *.swp
3 | build
4 | model_repo/*
5 | __pycache__
6 | 


--------------------------------------------------------------------------------
/CMakeLists.txt:
--------------------------------------------------------------------------------
 1 | cmake_minimum_required(VERSION 3.0)
 2 | #add_compile_options(-std=c++)
 3 | set(CMAKE_CXX_STANDARD 14)
 4 | set(CMAKE_C_STANDARD 11)
 5 | 
 6 | project(DeepPlan)
 7 | 
 8 | find_package(Torch REQUIRED)
 9 | find_package(Boost COMPONENTS system filesystem REQUIRED)
10 | 
11 | include_directories(${Boost_INCLUDE_DIR})
12 | 
13 | # Build protobuf v3.11.4
14 | find_package(Protobuf REQUIRED)
15 | include_directories(${Protobuf_INCLUDE_DIR})
16 | 
17 | include_directories(${CMAKE_CURRENT_BINARY_DIR})
18 | protobuf_generate_cpp(PROTO_SRCS PROTO_HDRS proto/deepplan.proto)
19 | add_library(deepplan_proto ${PROTO_HDRS} ${PROTO_SRCS})
20 | target_link_libraries(deepplan_proto PRIVATE ${Protobuf_LIBRARIES})
21 | 
22 | protobuf_generate_cpp(PROTO_SRCS PROTO_HDRS proto/deepcache.proto)
23 | add_library(deepcache_proto ${PROTO_HDRS} ${PROTO_SRCS})
24 | target_link_libraries(deepcache_proto PRIVATE ${Protobuf_LIBRARIES})
25 | 
26 | include_directories(${CMAKE_CURRENT_BINARY_DIR})
27 | #protobuf_generate_cpp(PROTO_SRCS PROTO_HDRS src/deepplan/proto/deepplan.proto)
28 | #add_library(deepplan_proto ${PROTO_HDRS} ${PROTO_SRCS})
29 | #target_link_libraries(deepplan_proto PRIVATE ${Protobuf_LIBRARIES})
30 | 
31 | add_subdirectory(src)
32 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2022 Computer Systems Laboratory @ Ajou University
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | # DeepPlan
  2 | 
  3 | Title: Fast and Efficient Model Serving Using Multi-GPUs with Direct-Host-Access
  4 | 
  5 | ## 1.Experimental Environment
  6 | ### 1.1 Hardware
  7 | * AWS P3.8xlarge instance
  8 | * GPU: NVIDIA V100 (16GB) x 4ea
  9 | * Memory: 244GB DDR4 DRAM
 10 | * CPU: Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz
 11 | * NVLink 2.0
 12 | * PCIe 3.0
 13 | 
 14 | For EuroSys '23 Artifact Evaluation Committee, we can provide the AWS instance we used if you don't have any machine that satisfies the requirements. Let us know through the HotCRP portal.
 15 | 
 16 | ### 1.2 Software requirements
 17 | * Operating system: Ubuntu 18.04
 18 | * CUDA v11.3
 19 | * CuDNN v8.2.1
 20 | * ProtoBuf v3.11.4
 21 | * Boost v1.65
 22 | * TBB (Threading Building-Blocks) [v2017_U7](https://github.com/oneapi-src/oneTBB/tree/2017_U7)
 23 | * PyTorch v1.9
 24 | * Matplotlib v3.3.4 (for generating graphs)
 25 | 
 26 | ## 2. Build software components
 27 | 
 28 | ### 2.1 Dependent packages
 29 | * build-essential
 30 | ```bash
 31 | $ sudo apt update
 32 | $ sudo apt install build-essential
 33 | ```
 34 | 
 35 | * C++ Library on Ubuntu
 36 | ```
 37 | $ sudo apt-get install libtbb-dev libboost1.65-all-dev
 38 | ```
 39 | 
 40 | * CUDA Toolkit v11.3 & CuDNN v8.2.1
 41 | 
 42 | DeepPlan works with the PyTorch DL framework. To run PyTorch,
 43 | we are supposed to install the dependent packages, CUDA and CuDNN.
 44 | 
 45 | To install the CUDA Toolkit, see this link: [Download Installer for Linux Ubuntu 18.04 x86_64](https://developer.nvidia.com/cuda-11.3.0-download-archive?target_os=Linux&target_arch=x86_64&Distribution=Ubuntu&target_version=18.04&target_type=deb_local)
 46 | 
 47 | To install the CuDNN Library, see this link: [Installation Guide](https://docs.nvidia.com/deeplearning/cudnn/install-guide/index.html) and [CuDNN Archive](https://developer.nvidia.com/rdp/cudnn-archive)
 48 | 
 49 | * ProtoBuf v3.11.4
 50 | 
 51 | DeepPlan uses the ProtoBuf library to serialize or deserialize plans.
 52 | So, ProtoBuf is required to build DeepPlan. To install ProtoBuf, see this
 53 | following link: https://github.com/protocolbuffers/protobuf/blob/main/src/README.md
 54 | 
 55 | ### 2.2 PyTorch
 56 | To use DeepPlan, it is required to modify PyTorch (v1.9) framework.
 57 | To simplify the step reflecting the code changes on the framework, we have provided a patch file for DeepPlan.
 58 | The following command applies the patch to the PyTorch v1.9.0.
 59 | 
 60 | ```bash
 61 | $ cd $HOME
 62 | $ # Let's first clone the DeepPlan repository and set the path
 63 | $ git clone https://github.com/csl-ajou/DeepPlan/
 64 | $ DEEPPLAN_HOME=$HOME/DeepPlan
 65 | $
 66 | $ # Let's download the PyTorch v1.9.0 package and set the path
 67 | $ git clone --recursive https://github.com/pytorch/pytorch -b v1.9.0
 68 | $ PYTORCH_HOME=$HOME/pytorch
 69 | $
 70 | $ cd $PYTORCH_HOME
 71 | $ patch -p1 < $DEEPPLAN_HOME/pytorch.patch
 72 | ```
 73 | 
 74 | After applying the patch file, let's compile the PyTorch.
 75 | 
 76 | ```bash
 77 | $ python3 setup.py install
 78 | ```
 79 | 
 80 | In addition to PyTorch, install pip modules using the command below, from DeepPlan's `Home` directory.
 81 | ```bash
 82 | $ cd $DEEPPLAN_HOME
 83 | $ pip3 install -r requirements.txt
 84 | ```
 85 | 
 86 | ### 2.3 DeepPlan
 87 | 
 88 | After successfully patching and building the PyTorch framework, we are
 89 | ready to build DeepPlan to generate inference execution plans and
 90 | the DL server prototype.
 91 | 
 92 | ```bash
 93 | $ cd $DEEPPLAN_HOME
 94 | $ mkdir build
 95 | $ cd build
 96 | $ cmake -DCMAKE_PREFIX_PATH=$PYTORCH_HOME ..
 97 | $ make
 98 | ```
 99 | 
100 | ## 3. Setup execution plans
101 | 
102 | You need to create a plan for a given model. In this tutorial, our target is ResNet50.
103 | The python module, `plan.py`,  already imports the pre-trained models evaluated in the paper so that you can simply type the name of the model.
104 | ```bash
105 | # Create Plan
106 | $ cd $DEEPPLAN_HOME
107 | $ mkdir -p plan_repo
108 | $ python3 plan.py -m resnet50 -p plan_repo
109 | # The generated plan from this command is saved the plans directory
110 | ```
111 | 
112 | If you want to take a look at generated plans (Table 3 in the paper), you can click the following links.
113 | 
114 | * [Plans](https://github.com/csl-ajou/DeepPlan/tree/main/plans/V100)
115 | 
116 | 
117 | ## 4. Run benchmarks
118 | Once DeepPlan generate the execution plan for a given model, you can run the model inference with the DeepPlan engine through the commands below, from DeepPlan's `Home` directory.
119 | Here, we have an example for ResNet50. In this section, we describe how to run four different execution methods,
120 | Baseline (on-demand), PipeSwitch, DeepPlan (DHA), DeepPlan (PT), and DeepPlan (PT+DHA), explained in our paper.
121 | 
122 | Before running the model inference, you have to set `PLAN_REPO` environment variable which represents where plans are stored.
123 | 
124 | ```bash
125 | # The plan repository should be the same as the path specified in above creating a plan
126 | $ export PLAN_REPO=$DEEPPLAN_HOME/plan_repo
127 | $ cd $DEEPPLAN_HOME
128 | ```
129 | 
130 |  * Baseline (on-demand)
131 | 
132 | ```bash
133 | $ ./build/benchmark -m resnet50 -e demand
134 | ```
135 | You should see output similar to the following:
136 | ```bash
137 | Benchmarking Inference resnet50
138 | Average Latency : 17.7038 ms
139 | ```
140 | 
141 | * PipeSwtich (Bai et al. OSDI 2020)
142 | 
143 | ```bash
144 | $ ./build/benchmark -m resnet50 -e pipeline
145 | ```
146 | 
147 | You should see output similar to the following:
148 | ```bash
149 | Benchmarking Inference resnet50
150 | Average Latency : 11.981 ms
151 | ```
152 | 
153 | * DeepPlan (DHA)
154 | 
155 | ```bash
156 | $ ./build/benchmark -m resnet50 -e deepplan
157 | ```
158 | You should see output similar to the following:
159 | ```bash
160 | Benchmarking Inference renset50
161 | Average Latency : 11.2345 ms
162 | ```
163 | 
164 | * DeepPlan (PT)
165 | 
166 | ```bash
167 | $ ./build/benchmark -m resnet50 -e pipeline -d 0 2 # d option represents the devices to be used for load
168 | ```
169 | You should see output similar to the following:
170 | ```bash
171 | Benchmarking Inference renset50
172 | Average Latency : 9.39064 ms
173 | ```
174 | 
175 | * DeepPlan (DHA+PT)
176 | 
177 | ```bash
178 | $ ./build/benchmark -m resnet50 -e deepplan -d 0 2 # d option represents the devices to be used for load
179 | ```
180 | You should see output similar to the following:
181 | ```bash
182 | Benchmarking Inference renset101
183 | Average Latency : 8.36423 ms
184 | ```
185 | 
186 | ## 5. Reproduce results in the paper
187 | To reproduce the experimental results presented in the paper, we should have the model plans. To simplify creating model plans,
188 | we provide `create_all_plans.sh` shell script that makes all model plans used in the experiments.
189 | 
190 | ```bash
191 | $ cd $DEEPPLAN_HOME/scripts
192 | $ mkdir -p $DEEPPLAN_HOME/plan_repo/V100
193 | $ export PLAN_REPO=$DEEPPLAN_HOME/plan_repo/V100
194 | $ source create_all_plans.sh # the plan repository is created in PLAN_REPO path.
195 | ```
196 | For all shell scripts, we should setup `PLAN_REPO` variable which represents plans repository.
197 | We provided experiments scripts for figure #10, #12, #13, and #14.
198 | Run the script in the `$DEEPPLAN_HOME/scripts/fig#/run.sh` directory and the result will be logged in
199 | the same directory. If the Matplotlib library was installed in your machine,
200 | the graph will be drawn in `fig#.pdf`.
201 | 
202 | ### 5.1 Figure 10: Performance comparison of DeepPlan and previous studies
203 | We evaluate the inference latency with a single batch for On-Demand, PipeSwitch, DeepPlan(DHA),
204 | DeepPlan (PT), and DeepPlan (PT+DHA). The results are normalized to Baseline (on-demand).
205 | 
206 | ```bash
207 | $ cd $DEEPPLAN_HOME/scripts/fig10
208 | $ source run.sh
209 | ```
210 | 
211 | ### 5.2 Figure 12: 99% latency, goodput, and cold-start rate for BERT-Base (Synthetic workloads)
212 | We perform this experiment on a four-GPU server in an AWS instance.
213 | This experiment measures the 99% latency, goodput, and cold-start for BERT-Base
214 | while increasing the number of model instances concurrently running on the GPUs.
215 | 
216 | ```bash
217 | $ cd $DEEPPLAN_HOME/scripts/fig12
218 | $ source run.sh
219 | ```
220 | 
221 | ### 5.3 Figure 13: 99% latency for BERT-Large and GPT2 (Synthetic workloads)
222 | This experiment is similar to above the experiment (Figure 12) except that
223 | the evaluation model is changed from BERT-Base to Bert-Large and GPT2.
224 | ```bash
225 | $ cd $DEEPPLAN_HOME/scripts/fig13
226 | $ source run.sh
227 | ```
228 | 
229 | ### 5.4 Figure 14: Performance of real-world trace (Real-world workloads)
230 | This experiment is also performed on a four-GPU server in an AWS instance.
231 | The above experiments (Figure 12, Figure 13) run with synthetic trace. But
232 | this experiment run with real-world trace derived from Microsoft Azure Functions.
233 | In this experiment, we evaluate three workloads of three hours each (total 9 hours).
234 | 
235 | To run this experiment, you should prepare azure trace dataset.
236 | https://github.com/Azure/AzurePublicDataset/blob/master/AzureFunctionsDataset2019.md
237 | 
238 | The following command download the azure-trace dataset.
239 | ```bash
240 | $ cd $DEEPPLAN_HOME/scripts
241 | $ source download_azure_trace_dataset.sh
242 | 
243 | # To recognize this trace file from client, The `AZURE_TRACE_DIR` variable should be set
244 | $ export AZURE_TRACE_DIR=$DEEPPLAN_HOME/scripts/azure-functions
245 | ```
246 | 
247 | ```bash
248 | $ cd $DEEPPLAN_HOME/scripts/fig14
249 | $ source run.sh
250 | ```
251 | 


--------------------------------------------------------------------------------
/models/__init__.py:
--------------------------------------------------------------------------------
  1 | import torch
  2 | from . import vision
  3 | from transformers import AutoModel
  4 | from enum import Enum
  5 | from typing import Union, List
  6 | 
  7 | class ModelType(Enum):
  8 |     CNN = 0
  9 |     TRANSFORMER = 1
 10 | 
 11 | class ModelConfig:
 12 |     def __init__(
 13 |         self,
 14 |         model_name: str,
 15 |         model_type: Union[str],
 16 |         input_shape: List[int],
 17 |         generate_input_func,
 18 |         max_num: int = -1,
 19 |         num_layers: int = 0,
 20 |     ):
 21 |         self.model_name = model_name
 22 | 
 23 |         if model_type in ("CNN", ModelType.CNN):
 24 |             self.model_type = ModelType.CNN
 25 |         elif model_type in ("Transformer", ModelType.TRANSFORMER):
 26 |             self.model_type = ModelType.TRANSFORMER
 27 | 
 28 |         self.input_shape = input_shape
 29 |         self.generate_input_func = generate_input_func
 30 |         self.max_num = max_num
 31 |         self.num_layers = num_layers
 32 | 
 33 | 
 34 |     def generateInput(self, batch_size):
 35 |         t = self.generate_input_func(batch_size, self.input_shape, self.max_num)
 36 |         return t
 37 | 
 38 |     def generateModel(self):
 39 |         model = None
 40 |         if self.model_type  == ModelType.CNN:
 41 |             model = getattr(vision, self.model_name)()
 42 |         elif self.model_type == ModelType.TRANSFORMER:
 43 |             model = AutoModel.from_pretrained(self.model_name, torchscript=True)
 44 |             model.num_layers = self.num_layers
 45 |         model.is_parallel = False
 46 | 
 47 |         return model
 48 | 
 49 | 
 50 | def generate_CNN_input(batch_size, shape, max_num=0):
 51 |     input_shape = [batch_size] + shape
 52 |     x = torch.randn(input_shape)
 53 |     return x
 54 | 
 55 | def generate_TRS_input(batch_size, shape, max_num):
 56 |     input_shape = [batch_size] + shape
 57 |     x = torch.randint(max_num, input_shape, dtype=torch.long)
 58 |     return x
 59 | 
 60 | def generate_T5_input(batch_size, shape, max_num):
 61 |     x = generate_TRS_input(batch_size, shape, max_num)
 62 |     return {"input_ids": x, "decoder_input_ids": x}
 63 | 
 64 | 
 65 | model_list = {
 66 |         'resnet50': ModelConfig('resnet50', ModelType.CNN, [3, 224, 224], generate_CNN_input),
 67 |         'resnet101': ModelConfig('resnet101', ModelType.CNN, [3, 224, 224], generate_CNN_input),
 68 |         'resnext50': ModelConfig('resnext50_32x4d', ModelType.CNN, [3, 224, 224], generate_CNN_input),
 69 |         'resnext101': ModelConfig('resnext101_32x8d', ModelType.CNN, [3, 224, 224], generate_CNN_input),
 70 |         'wide_resnet50': ModelConfig('wide_resnet50_2', ModelType.CNN, [3, 224, 224], generate_CNN_input),
 71 |         'wide_resnet101': ModelConfig('wide_resnet101_2', ModelType.CNN, [3, 224, 224], generate_CNN_input),
 72 |         'bert_base': ModelConfig('bert-base-uncased', ModelType.TRANSFORMER, [384], generate_TRS_input, max_num=30522, num_layers=12),
 73 |         'bert_large': ModelConfig('bert-large-uncased', ModelType.TRANSFORMER, [384], generate_TRS_input, max_num=30522, num_layers=24),
 74 |         'gpt2': ModelConfig('gpt2', ModelType.TRANSFORMER, [1024], generate_TRS_input, max_num=50257, num_layers=12),
 75 |         'gpt2_384': ModelConfig('gpt2', ModelType.TRANSFORMER, [384], generate_TRS_input, max_num=50257, num_layers=12),
 76 |         'gpt2_medium': ModelConfig('gpt2-medium', ModelType.TRANSFORMER, [1024], generate_TRS_input, max_num=50257, num_layers=24),
 77 |         'bart_base': ModelConfig('facebook/bart-base', ModelType.TRANSFORMER, [384], generate_TRS_input, max_num=50265),
 78 |         't5_base': ModelConfig('t5-base', ModelType.TRANSFORMER, [300], generate_T5_input, max_num=32129, num_layers=12),
 79 |         't5_small': ModelConfig('t5-small', ModelType.TRANSFORMER, [300], generate_T5_input, max_num=32129, num_layers=6),
 80 |         't5_large': ModelConfig('t5-large', ModelType.TRANSFORMER, [300], generate_T5_input, max_num=32129, num_layers=24),
 81 |         'roberta_base': ModelConfig('roberta-base', ModelType.TRANSFORMER, [384], generate_TRS_input, max_num=50265, num_layers=12),
 82 |         'roberta_large': ModelConfig('roberta-large', ModelType.TRANSFORMER, [384], generate_TRS_input, max_num=50265, num_layers=24),
 83 |         }
 84 | 
 85 | def import_model(model_name):
 86 |     model = None
 87 | 
 88 |     if model_name in model_list:
 89 |         model = model_list[model_name].generateModel()
 90 |     else:
 91 |         raise RuntimeError(f"[Error] Not found '{model_name}' model")
 92 | 
 93 |     return model
 94 | 
 95 | def import_data(model_name, batch_size):
 96 |     data = None
 97 | 
 98 |     if model_name in model_list:
 99 |         data = model_list[model_name].generateInput(batch_size)
100 |     else:
101 |         raise RuntimeError(f"[Error] Not found '{model_name}' model")
102 | 
103 |     return data
104 | 


--------------------------------------------------------------------------------
/models/vision/__init__.py:
--------------------------------------------------------------------------------
1 | from .resnet import *
2 | 


--------------------------------------------------------------------------------
/models/vision/utils.py:
--------------------------------------------------------------------------------
1 | try:
2 |     from torch.hub import load_state_dict_from_url
3 | except ImportError:
4 |     from torch.utils.model_zoo import load_url as load_state_dict_from_url
5 | 


--------------------------------------------------------------------------------
/plans/V100/bert_base/README.md:
--------------------------------------------------------------------------------
  1 | |Layer                    |Initial approach         |DeepPlan (DHA)
  2 | |-------------------------|-------------------------|-------------------------
  3 | |0-Embedding          (89.420 MB) |X (direct-host-access)     |X (direct-host-access)
  4 | |1-Embedding          (1.500 MB) |X (direct-host-access)     |X (direct-host-access)
  5 | |2-Embedding          (0.006 MB) |X (direct-host-access)     |O
  6 | |3-LayerNorm          (0.006 MB) |O                          |O
  7 | |4-Linear             (2.253 MB) |O                          |O
  8 | |5-Linear             (2.253 MB) |O                          |O
  9 | |6-Linear             (2.253 MB) |O                          |O
 10 | |7-Linear             (2.253 MB) |O                          |O
 11 | |8-LayerNorm          (0.006 MB) |O                          |O
 12 | |9-Linear             (9.012 MB) |O                          |O
 13 | |10-Linear            (9.003 MB) |O                          |O
 14 | |11-LayerNorm         (0.006 MB) |O                          |O
 15 | |12-Linear            (2.253 MB) |O                          |O
 16 | |13-Linear            (2.253 MB) |O                          |O
 17 | |14-Linear            (2.253 MB) |O                          |O
 18 | |15-Linear            (2.253 MB) |O                          |O
 19 | |16-LayerNorm         (0.006 MB) |O                          |O
 20 | |17-Linear            (9.012 MB) |O                          |O
 21 | |18-Linear            (9.003 MB) |O                          |O
 22 | |19-LayerNorm         (0.006 MB) |O                          |O
 23 | |20-Linear            (2.253 MB) |O                          |O
 24 | |21-Linear            (2.253 MB) |O                          |O
 25 | |22-Linear            (2.253 MB) |O                          |O
 26 | |23-Linear            (2.253 MB) |O                          |O
 27 | |24-LayerNorm         (0.006 MB) |O                          |O
 28 | |25-Linear            (9.012 MB) |O                          |O
 29 | |26-Linear            (9.003 MB) |O                          |O
 30 | |27-LayerNorm         (0.006 MB) |O                          |O
 31 | |28-Linear            (2.253 MB) |O                          |O
 32 | |29-Linear            (2.253 MB) |O                          |O
 33 | |30-Linear            (2.253 MB) |O                          |O
 34 | |31-Linear            (2.253 MB) |O                          |O
 35 | |32-LayerNorm         (0.006 MB) |O                          |O
 36 | |33-Linear            (9.012 MB) |O                          |O
 37 | |34-Linear            (9.003 MB) |O                          |O
 38 | |35-LayerNorm         (0.006 MB) |O                          |O
 39 | |36-Linear            (2.253 MB) |O                          |O
 40 | |37-Linear            (2.253 MB) |O                          |O
 41 | |38-Linear            (2.253 MB) |O                          |O
 42 | |39-Linear            (2.253 MB) |O                          |O
 43 | |40-LayerNorm         (0.006 MB) |O                          |O
 44 | |41-Linear            (9.012 MB) |O                          |O
 45 | |42-Linear            (9.003 MB) |O                          |O
 46 | |43-LayerNorm         (0.006 MB) |O                          |O
 47 | |44-Linear            (2.253 MB) |O                          |O
 48 | |45-Linear            (2.253 MB) |O                          |O
 49 | |46-Linear            (2.253 MB) |O                          |O
 50 | |47-Linear            (2.253 MB) |O                          |O
 51 | |48-LayerNorm         (0.006 MB) |O                          |O
 52 | |49-Linear            (9.012 MB) |O                          |O
 53 | |50-Linear            (9.003 MB) |O                          |O
 54 | |51-LayerNorm         (0.006 MB) |O                          |O
 55 | |52-Linear            (2.253 MB) |O                          |O
 56 | |53-Linear            (2.253 MB) |O                          |O
 57 | |54-Linear            (2.253 MB) |O                          |O
 58 | |55-Linear            (2.253 MB) |O                          |O
 59 | |56-LayerNorm         (0.006 MB) |O                          |O
 60 | |57-Linear            (9.012 MB) |O                          |O
 61 | |58-Linear            (9.003 MB) |O                          |O
 62 | |59-LayerNorm         (0.006 MB) |O                          |O
 63 | |60-Linear            (2.253 MB) |O                          |O
 64 | |61-Linear            (2.253 MB) |O                          |O
 65 | |62-Linear            (2.253 MB) |O                          |O
 66 | |63-Linear            (2.253 MB) |O                          |O
 67 | |64-LayerNorm         (0.006 MB) |O                          |O
 68 | |65-Linear            (9.012 MB) |O                          |O
 69 | |66-Linear            (9.003 MB) |O                          |O
 70 | |67-LayerNorm         (0.006 MB) |O                          |O
 71 | |68-Linear            (2.253 MB) |O                          |O
 72 | |69-Linear            (2.253 MB) |O                          |O
 73 | |70-Linear            (2.253 MB) |O                          |O
 74 | |71-Linear            (2.253 MB) |O                          |O
 75 | |72-LayerNorm         (0.006 MB) |O                          |O
 76 | |73-Linear            (9.012 MB) |O                          |O
 77 | |74-Linear            (9.003 MB) |O                          |O
 78 | |75-LayerNorm         (0.006 MB) |O                          |O
 79 | |76-Linear            (2.253 MB) |O                          |O
 80 | |77-Linear            (2.253 MB) |O                          |O
 81 | |78-Linear            (2.253 MB) |O                          |O
 82 | |79-Linear            (2.253 MB) |O                          |O
 83 | |80-LayerNorm         (0.006 MB) |O                          |O
 84 | |81-Linear            (9.012 MB) |O                          |O
 85 | |82-Linear            (9.003 MB) |O                          |O
 86 | |83-LayerNorm         (0.006 MB) |O                          |O
 87 | |84-Linear            (2.253 MB) |O                          |O
 88 | |85-Linear            (2.253 MB) |O                          |O
 89 | |86-Linear            (2.253 MB) |O                          |O
 90 | |87-Linear            (2.253 MB) |O                          |O
 91 | |88-LayerNorm         (0.006 MB) |O                          |O
 92 | |89-Linear            (9.012 MB) |O                          |O
 93 | |90-Linear            (9.003 MB) |O                          |O
 94 | |91-LayerNorm         (0.006 MB) |O                          |O
 95 | |92-Linear            (2.253 MB) |O                          |O
 96 | |93-Linear            (2.253 MB) |O                          |O
 97 | |94-Linear            (2.253 MB) |O                          |O
 98 | |95-Linear            (2.253 MB) |O                          |O
 99 | |96-LayerNorm         (0.006 MB) |O                          |O
100 | |97-Linear            (9.012 MB) |O                          |O
101 | |98-Linear            (9.003 MB) |O                          |O
102 | |99-LayerNorm         (0.006 MB) |O                          |O
103 | |100-Linear           (2.253 MB) |X (direct-host-access)     |O
104 | |101-Tanh             (0.000 MB) |X                          |X
105 | 


--------------------------------------------------------------------------------
/plans/V100/bert_large/README.md:
--------------------------------------------------------------------------------
  1 | |Layer                    |Initial approach         |DeepPlan (DHA)
  2 | |-------------------------|-------------------------|-------------------------
  3 | |0-Embedding          (119.227 MB) |X (direct-host-access)     |X (direct-host-access)
  4 | |1-Embedding          (2.000 MB) |X (direct-host-access)     |O
  5 | |2-Embedding          (0.008 MB) |X (direct-host-access)     |O
  6 | |3-LayerNorm          (0.008 MB) |O                          |O
  7 | |4-Linear             (4.004 MB) |O                          |O
  8 | |5-Linear             (4.004 MB) |O                          |O
  9 | |6-Linear             (4.004 MB) |O                          |O
 10 | |7-Linear             (4.004 MB) |O                          |O
 11 | |8-LayerNorm          (0.008 MB) |O                          |O
 12 | |9-Linear             (16.016 MB) |O                          |O
 13 | |10-Linear            (16.004 MB) |O                          |O
 14 | |11-LayerNorm         (0.008 MB) |O                          |O
 15 | |12-Linear            (4.004 MB) |O                          |O
 16 | |13-Linear            (4.004 MB) |O                          |O
 17 | |14-Linear            (4.004 MB) |O                          |O
 18 | |15-Linear            (4.004 MB) |O                          |O
 19 | |16-LayerNorm         (0.008 MB) |O                          |O
 20 | |17-Linear            (16.016 MB) |O                          |O
 21 | |18-Linear            (16.004 MB) |O                          |O
 22 | |19-LayerNorm         (0.008 MB) |O                          |O
 23 | |20-Linear            (4.004 MB) |O                          |O
 24 | |21-Linear            (4.004 MB) |O                          |O
 25 | |22-Linear            (4.004 MB) |O                          |O
 26 | |23-Linear            (4.004 MB) |O                          |O
 27 | |24-LayerNorm         (0.008 MB) |O                          |O
 28 | |25-Linear            (16.016 MB) |O                          |O
 29 | |26-Linear            (16.004 MB) |O                          |O
 30 | |27-LayerNorm         (0.008 MB) |O                          |O
 31 | |28-Linear            (4.004 MB) |O                          |O
 32 | |29-Linear            (4.004 MB) |O                          |O
 33 | |30-Linear            (4.004 MB) |O                          |O
 34 | |31-Linear            (4.004 MB) |O                          |O
 35 | |32-LayerNorm         (0.008 MB) |O                          |O
 36 | |33-Linear            (16.016 MB) |O                          |O
 37 | |34-Linear            (16.004 MB) |O                          |O
 38 | |35-LayerNorm         (0.008 MB) |O                          |O
 39 | |36-Linear            (4.004 MB) |O                          |O
 40 | |37-Linear            (4.004 MB) |O                          |O
 41 | |38-Linear            (4.004 MB) |O                          |O
 42 | |39-Linear            (4.004 MB) |O                          |O
 43 | |40-LayerNorm         (0.008 MB) |O                          |O
 44 | |41-Linear            (16.016 MB) |O                          |O
 45 | |42-Linear            (16.004 MB) |O                          |O
 46 | |43-LayerNorm         (0.008 MB) |O                          |O
 47 | |44-Linear            (4.004 MB) |O                          |O
 48 | |45-Linear            (4.004 MB) |O                          |O
 49 | |46-Linear            (4.004 MB) |O                          |O
 50 | |47-Linear            (4.004 MB) |O                          |O
 51 | |48-LayerNorm         (0.008 MB) |O                          |O
 52 | |49-Linear            (16.016 MB) |O                          |O
 53 | |50-Linear            (16.004 MB) |O                          |O
 54 | |51-LayerNorm         (0.008 MB) |O                          |O
 55 | |52-Linear            (4.004 MB) |O                          |O
 56 | |53-Linear            (4.004 MB) |O                          |O
 57 | |54-Linear            (4.004 MB) |O                          |O
 58 | |55-Linear            (4.004 MB) |O                          |O
 59 | |56-LayerNorm         (0.008 MB) |O                          |O
 60 | |57-Linear            (16.016 MB) |O                          |O
 61 | |58-Linear            (16.004 MB) |O                          |O
 62 | |59-LayerNorm         (0.008 MB) |O                          |O
 63 | |60-Linear            (4.004 MB) |O                          |O
 64 | |61-Linear            (4.004 MB) |O                          |O
 65 | |62-Linear            (4.004 MB) |O                          |O
 66 | |63-Linear            (4.004 MB) |O                          |O
 67 | |64-LayerNorm         (0.008 MB) |O                          |O
 68 | |65-Linear            (16.016 MB) |O                          |O
 69 | |66-Linear            (16.004 MB) |O                          |O
 70 | |67-LayerNorm         (0.008 MB) |O                          |O
 71 | |68-Linear            (4.004 MB) |O                          |O
 72 | |69-Linear            (4.004 MB) |O                          |O
 73 | |70-Linear            (4.004 MB) |O                          |O
 74 | |71-Linear            (4.004 MB) |O                          |O
 75 | |72-LayerNorm         (0.008 MB) |O                          |O
 76 | |73-Linear            (16.016 MB) |O                          |O
 77 | |74-Linear            (16.004 MB) |O                          |O
 78 | |75-LayerNorm         (0.008 MB) |O                          |O
 79 | |76-Linear            (4.004 MB) |O                          |O
 80 | |77-Linear            (4.004 MB) |O                          |O
 81 | |78-Linear            (4.004 MB) |O                          |O
 82 | |79-Linear            (4.004 MB) |O                          |O
 83 | |80-LayerNorm         (0.008 MB) |O                          |O
 84 | |81-Linear            (16.016 MB) |O                          |O
 85 | |82-Linear            (16.004 MB) |O                          |O
 86 | |83-LayerNorm         (0.008 MB) |O                          |O
 87 | |84-Linear            (4.004 MB) |O                          |O
 88 | |85-Linear            (4.004 MB) |O                          |O
 89 | |86-Linear            (4.004 MB) |O                          |O
 90 | |87-Linear            (4.004 MB) |O                          |O
 91 | |88-LayerNorm         (0.008 MB) |O                          |O
 92 | |89-Linear            (16.016 MB) |O                          |O
 93 | |90-Linear            (16.004 MB) |O                          |O
 94 | |91-LayerNorm         (0.008 MB) |O                          |O
 95 | |92-Linear            (4.004 MB) |O                          |O
 96 | |93-Linear            (4.004 MB) |O                          |O
 97 | |94-Linear            (4.004 MB) |O                          |O
 98 | |95-Linear            (4.004 MB) |O                          |O
 99 | |96-LayerNorm         (0.008 MB) |O                          |O
100 | |97-Linear            (16.016 MB) |O                          |O
101 | |98-Linear            (16.004 MB) |O                          |O
102 | |99-LayerNorm         (0.008 MB) |O                          |O
103 | |100-Linear           (4.004 MB) |O                          |O
104 | |101-Linear           (4.004 MB) |O                          |O
105 | |102-Linear           (4.004 MB) |O                          |O
106 | |103-Linear           (4.004 MB) |O                          |O
107 | |104-LayerNorm        (0.008 MB) |O                          |O
108 | |105-Linear           (16.016 MB) |O                          |O
109 | |106-Linear           (16.004 MB) |O                          |O
110 | |107-LayerNorm        (0.008 MB) |O                          |O
111 | |108-Linear           (4.004 MB) |O                          |O
112 | |109-Linear           (4.004 MB) |O                          |O
113 | |110-Linear           (4.004 MB) |O                          |O
114 | |111-Linear           (4.004 MB) |O                          |O
115 | |112-LayerNorm        (0.008 MB) |O                          |O
116 | |113-Linear           (16.016 MB) |O                          |O
117 | |114-Linear           (16.004 MB) |O                          |O
118 | |115-LayerNorm        (0.008 MB) |O                          |O
119 | |116-Linear           (4.004 MB) |O                          |O
120 | |117-Linear           (4.004 MB) |O                          |O
121 | |118-Linear           (4.004 MB) |O                          |O
122 | |119-Linear           (4.004 MB) |O                          |O
123 | |120-LayerNorm        (0.008 MB) |O                          |O
124 | |121-Linear           (16.016 MB) |O                          |O
125 | |122-Linear           (16.004 MB) |O                          |O
126 | |123-LayerNorm        (0.008 MB) |O                          |O
127 | |124-Linear           (4.004 MB) |O                          |O
128 | |125-Linear           (4.004 MB) |O                          |O
129 | |126-Linear           (4.004 MB) |O                          |O
130 | |127-Linear           (4.004 MB) |O                          |O
131 | |128-LayerNorm        (0.008 MB) |O                          |O
132 | |129-Linear           (16.016 MB) |O                          |O
133 | |130-Linear           (16.004 MB) |O                          |O
134 | |131-LayerNorm        (0.008 MB) |O                          |O
135 | |132-Linear           (4.004 MB) |O                          |O
136 | |133-Linear           (4.004 MB) |O                          |O
137 | |134-Linear           (4.004 MB) |O                          |O
138 | |135-Linear           (4.004 MB) |O                          |O
139 | |136-LayerNorm        (0.008 MB) |O                          |O
140 | |137-Linear           (16.016 MB) |O                          |O
141 | |138-Linear           (16.004 MB) |O                          |O
142 | |139-LayerNorm        (0.008 MB) |O                          |O
143 | |140-Linear           (4.004 MB) |O                          |O
144 | |141-Linear           (4.004 MB) |O                          |O
145 | |142-Linear           (4.004 MB) |O                          |O
146 | |143-Linear           (4.004 MB) |O                          |O
147 | |144-LayerNorm        (0.008 MB) |O                          |O
148 | |145-Linear           (16.016 MB) |O                          |O
149 | |146-Linear           (16.004 MB) |O                          |O
150 | |147-LayerNorm        (0.008 MB) |O                          |O
151 | |148-Linear           (4.004 MB) |O                          |O
152 | |149-Linear           (4.004 MB) |O                          |O
153 | |150-Linear           (4.004 MB) |O                          |O
154 | |151-Linear           (4.004 MB) |O                          |O
155 | |152-LayerNorm        (0.008 MB) |O                          |O
156 | |153-Linear           (16.016 MB) |O                          |O
157 | |154-Linear           (16.004 MB) |O                          |O
158 | |155-LayerNorm        (0.008 MB) |O                          |O
159 | |156-Linear           (4.004 MB) |O                          |O
160 | |157-Linear           (4.004 MB) |O                          |O
161 | |158-Linear           (4.004 MB) |O                          |O
162 | |159-Linear           (4.004 MB) |O                          |O
163 | |160-LayerNorm        (0.008 MB) |O                          |O
164 | |161-Linear           (16.016 MB) |O                          |O
165 | |162-Linear           (16.004 MB) |O                          |O
166 | |163-LayerNorm        (0.008 MB) |O                          |O
167 | |164-Linear           (4.004 MB) |O                          |O
168 | |165-Linear           (4.004 MB) |O                          |O
169 | |166-Linear           (4.004 MB) |O                          |O
170 | |167-Linear           (4.004 MB) |O                          |O
171 | |168-LayerNorm        (0.008 MB) |O                          |O
172 | |169-Linear           (16.016 MB) |O                          |O
173 | |170-Linear           (16.004 MB) |O                          |O
174 | |171-LayerNorm        (0.008 MB) |O                          |O
175 | |172-Linear           (4.004 MB) |O                          |O
176 | |173-Linear           (4.004 MB) |O                          |O
177 | |174-Linear           (4.004 MB) |O                          |O
178 | |175-Linear           (4.004 MB) |O                          |O
179 | |176-LayerNorm        (0.008 MB) |O                          |O
180 | |177-Linear           (16.016 MB) |O                          |O
181 | |178-Linear           (16.004 MB) |O                          |O
182 | |179-LayerNorm        (0.008 MB) |O                          |O
183 | |180-Linear           (4.004 MB) |O                          |O
184 | |181-Linear           (4.004 MB) |O                          |O
185 | |182-Linear           (4.004 MB) |O                          |O
186 | |183-Linear           (4.004 MB) |O                          |O
187 | |184-LayerNorm        (0.008 MB) |O                          |O
188 | |185-Linear           (16.016 MB) |O                          |O
189 | |186-Linear           (16.004 MB) |O                          |O
190 | |187-LayerNorm        (0.008 MB) |O                          |O
191 | |188-Linear           (4.004 MB) |O                          |O
192 | |189-Linear           (4.004 MB) |O                          |O
193 | |190-Linear           (4.004 MB) |O                          |O
194 | |191-Linear           (4.004 MB) |O                          |O
195 | |192-LayerNorm        (0.008 MB) |O                          |O
196 | |193-Linear           (16.016 MB) |O                          |O
197 | |194-Linear           (16.004 MB) |O                          |O
198 | |195-LayerNorm        (0.008 MB) |O                          |O
199 | |196-Linear           (4.004 MB) |O                          |O
200 | |197-Tanh             (0.000 MB) |X                          |X
201 | 


--------------------------------------------------------------------------------
/plans/V100/gpt2/README.md:
--------------------------------------------------------------------------------
 1 | |Layer                    |Initial approach         |DeepPlan (DHA)
 2 | |-------------------------|-------------------------|-------------------------
 3 | |0-Embedding          (147.237 MB) |X (direct-host-access)     |X (direct-host-access)
 4 | |1-Embedding          (3.000 MB) |X (direct-host-access)     |O
 5 | |2-LayerNorm          (0.006 MB) |O                          |O
 6 | |3-Conv1D             (6.759 MB) |O                          |O
 7 | |4-Conv1D             (2.253 MB) |O                          |O
 8 | |5-LayerNorm          (0.006 MB) |O                          |O
 9 | |6-Conv1D             (9.012 MB) |O                          |O
10 | |7-Conv1D             (9.003 MB) |O                          |O
11 | |8-LayerNorm          (0.006 MB) |O                          |O
12 | |9-Conv1D             (6.759 MB) |O                          |O
13 | |10-Conv1D            (2.253 MB) |O                          |O
14 | |11-LayerNorm         (0.006 MB) |O                          |O
15 | |12-Conv1D            (9.012 MB) |O                          |O
16 | |13-Conv1D            (9.003 MB) |O                          |O
17 | |14-LayerNorm         (0.006 MB) |O                          |O
18 | |15-Conv1D            (6.759 MB) |O                          |O
19 | |16-Conv1D            (2.253 MB) |O                          |O
20 | |17-LayerNorm         (0.006 MB) |O                          |O
21 | |18-Conv1D            (9.012 MB) |O                          |O
22 | |19-Conv1D            (9.003 MB) |O                          |O
23 | |20-LayerNorm         (0.006 MB) |O                          |O
24 | |21-Conv1D            (6.759 MB) |O                          |O
25 | |22-Conv1D            (2.253 MB) |O                          |O
26 | |23-LayerNorm         (0.006 MB) |O                          |O
27 | |24-Conv1D            (9.012 MB) |O                          |O
28 | |25-Conv1D            (9.003 MB) |O                          |O
29 | |26-LayerNorm         (0.006 MB) |O                          |O
30 | |27-Conv1D            (6.759 MB) |O                          |O
31 | |28-Conv1D            (2.253 MB) |O                          |O
32 | |29-LayerNorm         (0.006 MB) |O                          |O
33 | |30-Conv1D            (9.012 MB) |O                          |O
34 | |31-Conv1D            (9.003 MB) |O                          |O
35 | |32-LayerNorm         (0.006 MB) |O                          |O
36 | |33-Conv1D            (6.759 MB) |O                          |O
37 | |34-Conv1D            (2.253 MB) |O                          |O
38 | |35-LayerNorm         (0.006 MB) |O                          |O
39 | |36-Conv1D            (9.012 MB) |O                          |O
40 | |37-Conv1D            (9.003 MB) |O                          |O
41 | |38-LayerNorm         (0.006 MB) |O                          |O
42 | |39-Conv1D            (6.759 MB) |O                          |O
43 | |40-Conv1D            (2.253 MB) |O                          |O
44 | |41-LayerNorm         (0.006 MB) |O                          |O
45 | |42-Conv1D            (9.012 MB) |O                          |O
46 | |43-Conv1D            (9.003 MB) |O                          |O
47 | |44-LayerNorm         (0.006 MB) |O                          |O
48 | |45-Conv1D            (6.759 MB) |O                          |O
49 | |46-Conv1D            (2.253 MB) |O                          |O
50 | |47-LayerNorm         (0.006 MB) |O                          |O
51 | |48-Conv1D            (9.012 MB) |O                          |O
52 | |49-Conv1D            (9.003 MB) |O                          |O
53 | |50-LayerNorm         (0.006 MB) |O                          |O
54 | |51-Conv1D            (6.759 MB) |O                          |O
55 | |52-Conv1D            (2.253 MB) |O                          |O
56 | |53-LayerNorm         (0.006 MB) |O                          |O
57 | |54-Conv1D            (9.012 MB) |O                          |O
58 | |55-Conv1D            (9.003 MB) |O                          |O
59 | |56-LayerNorm         (0.006 MB) |O                          |O
60 | |57-Conv1D            (6.759 MB) |O                          |O
61 | |58-Conv1D            (2.253 MB) |O                          |O
62 | |59-LayerNorm         (0.006 MB) |O                          |O
63 | |60-Conv1D            (9.012 MB) |O                          |O
64 | |61-Conv1D            (9.003 MB) |O                          |O
65 | |62-LayerNorm         (0.006 MB) |O                          |O
66 | |63-Conv1D            (6.759 MB) |O                          |O
67 | |64-Conv1D            (2.253 MB) |O                          |O
68 | |65-LayerNorm         (0.006 MB) |O                          |O
69 | |66-Conv1D            (9.012 MB) |O                          |O
70 | |67-Conv1D            (9.003 MB) |O                          |O
71 | |68-LayerNorm         (0.006 MB) |O                          |O
72 | |69-Conv1D            (6.759 MB) |O                          |O
73 | |70-Conv1D            (2.253 MB) |O                          |O
74 | |71-LayerNorm         (0.006 MB) |O                          |O
75 | |72-Conv1D            (9.012 MB) |O                          |O
76 | |73-Conv1D            (9.003 MB) |O                          |O
77 | |74-LayerNorm         (0.006 MB) |O                          |O
78 | 


--------------------------------------------------------------------------------
/plans/V100/gpt2_medium/README.md:
--------------------------------------------------------------------------------
  1 | |Layer                    |Initial approach         |DeepPlan (DHA)
  2 | |-------------------------|-------------------------|-------------------------
  3 | |0-Embedding          (196.316 MB) |X (direct-host-access)     |X (direct-host-access)
  4 | |1-Embedding          (4.000 MB) |X (direct-host-access)     |O
  5 | |2-LayerNorm          (0.008 MB) |O                          |O
  6 | |3-Conv1D             (12.012 MB) |O                          |O
  7 | |4-Conv1D             (4.004 MB) |O                          |O
  8 | |5-LayerNorm          (0.008 MB) |O                          |O
  9 | |6-Conv1D             (16.016 MB) |O                          |O
 10 | |7-Conv1D             (16.004 MB) |O                          |O
 11 | |8-LayerNorm          (0.008 MB) |O                          |O
 12 | |9-Conv1D             (12.012 MB) |O                          |O
 13 | |10-Conv1D            (4.004 MB) |O                          |O
 14 | |11-LayerNorm         (0.008 MB) |O                          |O
 15 | |12-Conv1D            (16.016 MB) |O                          |O
 16 | |13-Conv1D            (16.004 MB) |O                          |O
 17 | |14-LayerNorm         (0.008 MB) |O                          |O
 18 | |15-Conv1D            (12.012 MB) |O                          |O
 19 | |16-Conv1D            (4.004 MB) |O                          |O
 20 | |17-LayerNorm         (0.008 MB) |O                          |O
 21 | |18-Conv1D            (16.016 MB) |O                          |O
 22 | |19-Conv1D            (16.004 MB) |O                          |O
 23 | |20-LayerNorm         (0.008 MB) |O                          |O
 24 | |21-Conv1D            (12.012 MB) |O                          |O
 25 | |22-Conv1D            (4.004 MB) |O                          |O
 26 | |23-LayerNorm         (0.008 MB) |O                          |O
 27 | |24-Conv1D            (16.016 MB) |O                          |O
 28 | |25-Conv1D            (16.004 MB) |O                          |O
 29 | |26-LayerNorm         (0.008 MB) |O                          |O
 30 | |27-Conv1D            (12.012 MB) |O                          |O
 31 | |28-Conv1D            (4.004 MB) |O                          |O
 32 | |29-LayerNorm         (0.008 MB) |O                          |O
 33 | |30-Conv1D            (16.016 MB) |O                          |O
 34 | |31-Conv1D            (16.004 MB) |O                          |O
 35 | |32-LayerNorm         (0.008 MB) |O                          |O
 36 | |33-Conv1D            (12.012 MB) |O                          |O
 37 | |34-Conv1D            (4.004 MB) |O                          |O
 38 | |35-LayerNorm         (0.008 MB) |O                          |O
 39 | |36-Conv1D            (16.016 MB) |O                          |O
 40 | |37-Conv1D            (16.004 MB) |O                          |O
 41 | |38-LayerNorm         (0.008 MB) |O                          |O
 42 | |39-Conv1D            (12.012 MB) |O                          |O
 43 | |40-Conv1D            (4.004 MB) |O                          |O
 44 | |41-LayerNorm         (0.008 MB) |O                          |O
 45 | |42-Conv1D            (16.016 MB) |O                          |O
 46 | |43-Conv1D            (16.004 MB) |O                          |O
 47 | |44-LayerNorm         (0.008 MB) |O                          |O
 48 | |45-Conv1D            (12.012 MB) |O                          |O
 49 | |46-Conv1D            (4.004 MB) |O                          |O
 50 | |47-LayerNorm         (0.008 MB) |O                          |O
 51 | |48-Conv1D            (16.016 MB) |O                          |O
 52 | |49-Conv1D            (16.004 MB) |O                          |O
 53 | |50-LayerNorm         (0.008 MB) |O                          |O
 54 | |51-Conv1D            (12.012 MB) |O                          |O
 55 | |52-Conv1D            (4.004 MB) |O                          |O
 56 | |53-LayerNorm         (0.008 MB) |O                          |O
 57 | |54-Conv1D            (16.016 MB) |O                          |O
 58 | |55-Conv1D            (16.004 MB) |O                          |O
 59 | |56-LayerNorm         (0.008 MB) |O                          |O
 60 | |57-Conv1D            (12.012 MB) |O                          |O
 61 | |58-Conv1D            (4.004 MB) |O                          |O
 62 | |59-LayerNorm         (0.008 MB) |O                          |O
 63 | |60-Conv1D            (16.016 MB) |O                          |O
 64 | |61-Conv1D            (16.004 MB) |O                          |O
 65 | |62-LayerNorm         (0.008 MB) |O                          |O
 66 | |63-Conv1D            (12.012 MB) |O                          |O
 67 | |64-Conv1D            (4.004 MB) |O                          |O
 68 | |65-LayerNorm         (0.008 MB) |O                          |O
 69 | |66-Conv1D            (16.016 MB) |O                          |O
 70 | |67-Conv1D            (16.004 MB) |O                          |O
 71 | |68-LayerNorm         (0.008 MB) |O                          |O
 72 | |69-Conv1D            (12.012 MB) |O                          |O
 73 | |70-Conv1D            (4.004 MB) |O                          |O
 74 | |71-LayerNorm         (0.008 MB) |O                          |O
 75 | |72-Conv1D            (16.016 MB) |O                          |O
 76 | |73-Conv1D            (16.004 MB) |O                          |O
 77 | |74-LayerNorm         (0.008 MB) |O                          |O
 78 | |75-Conv1D            (12.012 MB) |O                          |O
 79 | |76-Conv1D            (4.004 MB) |O                          |O
 80 | |77-LayerNorm         (0.008 MB) |O                          |O
 81 | |78-Conv1D            (16.016 MB) |O                          |O
 82 | |79-Conv1D            (16.004 MB) |O                          |O
 83 | |80-LayerNorm         (0.008 MB) |O                          |O
 84 | |81-Conv1D            (12.012 MB) |O                          |O
 85 | |82-Conv1D            (4.004 MB) |O                          |O
 86 | |83-LayerNorm         (0.008 MB) |O                          |O
 87 | |84-Conv1D            (16.016 MB) |O                          |O
 88 | |85-Conv1D            (16.004 MB) |O                          |O
 89 | |86-LayerNorm         (0.008 MB) |O                          |O
 90 | |87-Conv1D            (12.012 MB) |O                          |O
 91 | |88-Conv1D            (4.004 MB) |O                          |O
 92 | |89-LayerNorm         (0.008 MB) |O                          |O
 93 | |90-Conv1D            (16.016 MB) |O                          |O
 94 | |91-Conv1D            (16.004 MB) |O                          |O
 95 | |92-LayerNorm         (0.008 MB) |O                          |O
 96 | |93-Conv1D            (12.012 MB) |O                          |O
 97 | |94-Conv1D            (4.004 MB) |O                          |O
 98 | |95-LayerNorm         (0.008 MB) |O                          |O
 99 | |96-Conv1D            (16.016 MB) |O                          |O
100 | |97-Conv1D            (16.004 MB) |O                          |O
101 | |98-LayerNorm         (0.008 MB) |O                          |O
102 | |99-Conv1D            (12.012 MB) |O                          |O
103 | |100-Conv1D           (4.004 MB) |O                          |O
104 | |101-LayerNorm        (0.008 MB) |O                          |O
105 | |102-Conv1D           (16.016 MB) |O                          |O
106 | |103-Conv1D           (16.004 MB) |O                          |O
107 | |104-LayerNorm        (0.008 MB) |O                          |O
108 | |105-Conv1D           (12.012 MB) |O                          |O
109 | |106-Conv1D           (4.004 MB) |O                          |O
110 | |107-LayerNorm        (0.008 MB) |O                          |O
111 | |108-Conv1D           (16.016 MB) |O                          |O
112 | |109-Conv1D           (16.004 MB) |O                          |O
113 | |110-LayerNorm        (0.008 MB) |O                          |O
114 | |111-Conv1D           (12.012 MB) |O                          |O
115 | |112-Conv1D           (4.004 MB) |O                          |O
116 | |113-LayerNorm        (0.008 MB) |O                          |O
117 | |114-Conv1D           (16.016 MB) |O                          |O
118 | |115-Conv1D           (16.004 MB) |O                          |O
119 | |116-LayerNorm        (0.008 MB) |O                          |O
120 | |117-Conv1D           (12.012 MB) |O                          |O
121 | |118-Conv1D           (4.004 MB) |O                          |O
122 | |119-LayerNorm        (0.008 MB) |O                          |O
123 | |120-Conv1D           (16.016 MB) |O                          |O
124 | |121-Conv1D           (16.004 MB) |O                          |O
125 | |122-LayerNorm        (0.008 MB) |O                          |O
126 | |123-Conv1D           (12.012 MB) |O                          |O
127 | |124-Conv1D           (4.004 MB) |O                          |O
128 | |125-LayerNorm        (0.008 MB) |O                          |O
129 | |126-Conv1D           (16.016 MB) |O                          |O
130 | |127-Conv1D           (16.004 MB) |O                          |O
131 | |128-LayerNorm        (0.008 MB) |O                          |O
132 | |129-Conv1D           (12.012 MB) |O                          |O
133 | |130-Conv1D           (4.004 MB) |O                          |O
134 | |131-LayerNorm        (0.008 MB) |O                          |O
135 | |132-Conv1D           (16.016 MB) |O                          |O
136 | |133-Conv1D           (16.004 MB) |O                          |O
137 | |134-LayerNorm        (0.008 MB) |O                          |O
138 | |135-Conv1D           (12.012 MB) |O                          |O
139 | |136-Conv1D           (4.004 MB) |O                          |O
140 | |137-LayerNorm        (0.008 MB) |O                          |O
141 | |138-Conv1D           (16.016 MB) |O                          |O
142 | |139-Conv1D           (16.004 MB) |O                          |O
143 | |140-LayerNorm        (0.008 MB) |O                          |O
144 | |141-Conv1D           (12.012 MB) |O                          |O
145 | |142-Conv1D           (4.004 MB) |O                          |O
146 | |143-LayerNorm        (0.008 MB) |O                          |O
147 | |144-Conv1D           (16.016 MB) |O                          |O
148 | |145-Conv1D           (16.004 MB) |O                          |O
149 | |146-LayerNorm        (0.008 MB) |O                          |O
150 | 


--------------------------------------------------------------------------------
/plans/V100/resnet50/README.md:
--------------------------------------------------------------------------------
  1 | |Layer                    |Initial approach         |DeepPlan (DHA)
  2 | |-------------------------|-------------------------|-------------------------
  3 | |0-Conv2d             (0.036 MB) |O                          |O
  4 | |1-BatchNorm2d        (0.001 MB) |X (direct-host-access)     |X (direct-host-access)
  5 | |2-ReLU               (0.000 MB) |X                          |X
  6 | |3-MaxPool2d          (0.000 MB) |X                          |X
  7 | |4-Conv2d             (0.016 MB) |O                          |O
  8 | |5-BatchNorm2d        (0.001 MB) |X (direct-host-access)     |X (direct-host-access)
  9 | |6-ReLU               (0.000 MB) |X                          |X
 10 | |7-Conv2d             (0.141 MB) |X (direct-host-access)     |X (direct-host-access)
 11 | |8-BatchNorm2d        (0.001 MB) |X (direct-host-access)     |X (direct-host-access)
 12 | |9-ReLU               (0.000 MB) |X                          |X
 13 | |10-Conv2d            (0.062 MB) |O                          |O
 14 | |11-BatchNorm2d       (0.005 MB) |X (direct-host-access)     |X (direct-host-access)
 15 | |12-Conv2d            (0.062 MB) |O                          |O
 16 | |13-BatchNorm2d       (0.005 MB) |X (direct-host-access)     |X (direct-host-access)
 17 | |14-ReLU              (0.000 MB) |X                          |X
 18 | |15-Conv2d            (0.062 MB) |O                          |O
 19 | |16-BatchNorm2d       (0.001 MB) |X (direct-host-access)     |X (direct-host-access)
 20 | |17-ReLU              (0.000 MB) |X                          |X
 21 | |18-Conv2d            (0.141 MB) |X (direct-host-access)     |X (direct-host-access)
 22 | |19-BatchNorm2d       (0.001 MB) |X (direct-host-access)     |X (direct-host-access)
 23 | |20-ReLU              (0.000 MB) |X                          |X
 24 | |21-Conv2d            (0.062 MB) |O                          |O
 25 | |22-BatchNorm2d       (0.005 MB) |X (direct-host-access)     |X (direct-host-access)
 26 | |23-ReLU              (0.000 MB) |X                          |X
 27 | |24-Conv2d            (0.062 MB) |O                          |O
 28 | |25-BatchNorm2d       (0.001 MB) |X (direct-host-access)     |X (direct-host-access)
 29 | |26-ReLU              (0.000 MB) |X                          |X
 30 | |27-Conv2d            (0.141 MB) |X (direct-host-access)     |X (direct-host-access)
 31 | |28-BatchNorm2d       (0.001 MB) |X (direct-host-access)     |X (direct-host-access)
 32 | |29-ReLU              (0.000 MB) |X                          |X
 33 | |30-Conv2d            (0.062 MB) |O                          |O
 34 | |31-BatchNorm2d       (0.005 MB) |X (direct-host-access)     |X (direct-host-access)
 35 | |32-ReLU              (0.000 MB) |X                          |X
 36 | |33-Conv2d            (0.125 MB) |O                          |O
 37 | |34-BatchNorm2d       (0.002 MB) |X (direct-host-access)     |X (direct-host-access)
 38 | |35-ReLU              (0.000 MB) |X                          |X
 39 | |36-Conv2d            (0.562 MB) |X (direct-host-access)     |X (direct-host-access)
 40 | |37-BatchNorm2d       (0.002 MB) |X (direct-host-access)     |X (direct-host-access)
 41 | |38-ReLU              (0.000 MB) |X                          |X
 42 | |39-Conv2d            (0.250 MB) |O                          |O
 43 | |40-BatchNorm2d       (0.010 MB) |X (direct-host-access)     |X (direct-host-access)
 44 | |41-Conv2d            (0.500 MB) |O                          |O
 45 | |42-BatchNorm2d       (0.010 MB) |X (direct-host-access)     |X (direct-host-access)
 46 | |43-ReLU              (0.000 MB) |X                          |X
 47 | |44-Conv2d            (0.250 MB) |X (direct-host-access)     |O
 48 | |45-BatchNorm2d       (0.002 MB) |X (direct-host-access)     |X (direct-host-access)
 49 | |46-ReLU              (0.000 MB) |X                          |X
 50 | |47-Conv2d            (0.562 MB) |X (direct-host-access)     |X (direct-host-access)
 51 | |48-BatchNorm2d       (0.002 MB) |X (direct-host-access)     |X (direct-host-access)
 52 | |49-ReLU              (0.000 MB) |X                          |X
 53 | |50-Conv2d            (0.250 MB) |O                          |O
 54 | |51-BatchNorm2d       (0.010 MB) |X (direct-host-access)     |X (direct-host-access)
 55 | |52-ReLU              (0.000 MB) |X                          |X
 56 | |53-Conv2d            (0.250 MB) |X (direct-host-access)     |O
 57 | |54-BatchNorm2d       (0.002 MB) |X (direct-host-access)     |X (direct-host-access)
 58 | |55-ReLU              (0.000 MB) |X                          |X
 59 | |56-Conv2d            (0.562 MB) |X (direct-host-access)     |X (direct-host-access)
 60 | |57-BatchNorm2d       (0.002 MB) |X (direct-host-access)     |X (direct-host-access)
 61 | |58-ReLU              (0.000 MB) |X                          |X
 62 | |59-Conv2d            (0.250 MB) |O                          |O
 63 | |60-BatchNorm2d       (0.010 MB) |X (direct-host-access)     |X (direct-host-access)
 64 | |61-ReLU              (0.000 MB) |X                          |X
 65 | |62-Conv2d            (0.250 MB) |X (direct-host-access)     |O
 66 | |63-BatchNorm2d       (0.002 MB) |X (direct-host-access)     |X (direct-host-access)
 67 | |64-ReLU              (0.000 MB) |X                          |X
 68 | |65-Conv2d            (0.562 MB) |X (direct-host-access)     |X (direct-host-access)
 69 | |66-BatchNorm2d       (0.002 MB) |X (direct-host-access)     |X (direct-host-access)
 70 | |67-ReLU              (0.000 MB) |X                          |X
 71 | |68-Conv2d            (0.250 MB) |O                          |O
 72 | |69-BatchNorm2d       (0.010 MB) |X (direct-host-access)     |X (direct-host-access)
 73 | |70-ReLU              (0.000 MB) |X                          |X
 74 | |71-Conv2d            (0.500 MB) |O                          |O
 75 | |72-BatchNorm2d       (0.005 MB) |X (direct-host-access)     |X (direct-host-access)
 76 | |73-ReLU              (0.000 MB) |X                          |X
 77 | |74-Conv2d            (2.250 MB) |O                          |O
 78 | |75-BatchNorm2d       (0.005 MB) |X (direct-host-access)     |X (direct-host-access)
 79 | |76-ReLU              (0.000 MB) |X                          |X
 80 | |77-Conv2d            (1.000 MB) |X (direct-host-access)     |O
 81 | |78-BatchNorm2d       (0.020 MB) |X (direct-host-access)     |O
 82 | |79-Conv2d            (2.000 MB) |O                          |O
 83 | |80-BatchNorm2d       (0.020 MB) |X (direct-host-access)     |O
 84 | |81-ReLU              (0.000 MB) |X                          |X
 85 | |82-Conv2d            (1.000 MB) |X (direct-host-access)     |O
 86 | |83-BatchNorm2d       (0.005 MB) |X (direct-host-access)     |X (direct-host-access)
 87 | |84-ReLU              (0.000 MB) |X                          |X
 88 | |85-Conv2d            (2.250 MB) |O                          |O
 89 | |86-BatchNorm2d       (0.005 MB) |X (direct-host-access)     |X (direct-host-access)
 90 | |87-ReLU              (0.000 MB) |X                          |X
 91 | |88-Conv2d            (1.000 MB) |X (direct-host-access)     |O
 92 | |89-BatchNorm2d       (0.020 MB) |X (direct-host-access)     |X (direct-host-access)
 93 | |90-ReLU              (0.000 MB) |X                          |X
 94 | |91-Conv2d            (1.000 MB) |X (direct-host-access)     |O
 95 | |92-BatchNorm2d       (0.005 MB) |X (direct-host-access)     |X (direct-host-access)
 96 | |93-ReLU              (0.000 MB) |X                          |X
 97 | |94-Conv2d            (2.250 MB) |O                          |O
 98 | |95-BatchNorm2d       (0.005 MB) |X (direct-host-access)     |X (direct-host-access)
 99 | |96-ReLU              (0.000 MB) |X                          |X
100 | |97-Conv2d            (1.000 MB) |X (direct-host-access)     |O
101 | |98-BatchNorm2d       (0.020 MB) |X (direct-host-access)     |O
102 | |99-ReLU              (0.000 MB) |X                          |X
103 | |100-Conv2d           (1.000 MB) |X (direct-host-access)     |O
104 | |101-BatchNorm2d      (0.005 MB) |X (direct-host-access)     |X (direct-host-access)
105 | |102-ReLU             (0.000 MB) |X                          |X
106 | |103-Conv2d           (2.250 MB) |O                          |O
107 | |104-BatchNorm2d      (0.005 MB) |X (direct-host-access)     |X (direct-host-access)
108 | |105-ReLU             (0.000 MB) |X                          |X
109 | |106-Conv2d           (1.000 MB) |X (direct-host-access)     |O
110 | |107-BatchNorm2d      (0.020 MB) |X (direct-host-access)     |O
111 | |108-ReLU             (0.000 MB) |X                          |X
112 | |109-Conv2d           (1.000 MB) |X (direct-host-access)     |O
113 | |110-BatchNorm2d      (0.005 MB) |X (direct-host-access)     |X (direct-host-access)
114 | |111-ReLU             (0.000 MB) |X                          |X
115 | |112-Conv2d           (2.250 MB) |O                          |O
116 | |113-BatchNorm2d      (0.005 MB) |X (direct-host-access)     |X (direct-host-access)
117 | |114-ReLU             (0.000 MB) |X                          |X
118 | |115-Conv2d           (1.000 MB) |X (direct-host-access)     |O
119 | |116-BatchNorm2d      (0.020 MB) |X (direct-host-access)     |O
120 | |117-ReLU             (0.000 MB) |X                          |X
121 | |118-Conv2d           (1.000 MB) |X (direct-host-access)     |O
122 | |119-BatchNorm2d      (0.005 MB) |X (direct-host-access)     |X (direct-host-access)
123 | |120-ReLU             (0.000 MB) |X                          |X
124 | |121-Conv2d           (2.250 MB) |O                          |O
125 | |122-BatchNorm2d      (0.005 MB) |X (direct-host-access)     |X (direct-host-access)
126 | |123-ReLU             (0.000 MB) |X                          |X
127 | |124-Conv2d           (1.000 MB) |X (direct-host-access)     |O
128 | |125-BatchNorm2d      (0.020 MB) |X (direct-host-access)     |O
129 | |126-ReLU             (0.000 MB) |X                          |X
130 | |127-Conv2d           (2.000 MB) |X (direct-host-access)     |O
131 | |128-BatchNorm2d      (0.010 MB) |X (direct-host-access)     |X (direct-host-access)
132 | |129-ReLU             (0.000 MB) |X                          |X
133 | |130-Conv2d           (9.000 MB) |O                          |O
134 | |131-BatchNorm2d      (0.010 MB) |X (direct-host-access)     |O
135 | |132-ReLU             (0.000 MB) |X                          |X
136 | |133-Conv2d           (4.000 MB) |X (direct-host-access)     |O
137 | |134-BatchNorm2d      (0.039 MB) |X (direct-host-access)     |O
138 | |135-Conv2d           (8.000 MB) |X (direct-host-access)     |O
139 | |136-BatchNorm2d      (0.039 MB) |X (direct-host-access)     |O
140 | |137-ReLU             (0.000 MB) |X                          |X
141 | |138-Conv2d           (4.000 MB) |X (direct-host-access)     |O
142 | |139-BatchNorm2d      (0.010 MB) |X (direct-host-access)     |X (direct-host-access)
143 | |140-ReLU             (0.000 MB) |X                          |X
144 | |141-Conv2d           (9.000 MB) |O                          |O
145 | |142-BatchNorm2d      (0.010 MB) |X (direct-host-access)     |O
146 | |143-ReLU             (0.000 MB) |X                          |X
147 | |144-Conv2d           (4.000 MB) |X (direct-host-access)     |O
148 | |145-BatchNorm2d      (0.039 MB) |X (direct-host-access)     |O
149 | |146-ReLU             (0.000 MB) |X                          |X
150 | |147-Conv2d           (4.000 MB) |X (direct-host-access)     |O
151 | |148-BatchNorm2d      (0.010 MB) |X (direct-host-access)     |X (direct-host-access)
152 | |149-ReLU             (0.000 MB) |X                          |X
153 | |150-Conv2d           (9.000 MB) |O                          |O
154 | |151-BatchNorm2d      (0.010 MB) |X (direct-host-access)     |O
155 | |152-ReLU             (0.000 MB) |X                          |X
156 | |153-Conv2d           (4.000 MB) |X (direct-host-access)     |O
157 | |154-BatchNorm2d      (0.039 MB) |X (direct-host-access)     |O
158 | |155-ReLU             (0.000 MB) |X                          |X
159 | |156-AdaptiveAvgPool2d (0.000 MB) |X                          |X
160 | |157-Linear           (7.816 MB) |X (direct-host-access)     |O
161 | 


--------------------------------------------------------------------------------
/plans/V100/roberta_base/README.md:
--------------------------------------------------------------------------------
  1 | |Layer                    |Initial approach         |DeepPlan (DHA)
  2 | |-------------------------|-------------------------|-------------------------
  3 | |0-Embedding          (147.261 MB) |X (direct-host-access)     |X (direct-host-access)
  4 | |1-Embedding          (1.506 MB) |X (direct-host-access)     |O
  5 | |2-Embedding          (0.003 MB) |X (direct-host-access)     |O
  6 | |3-LayerNorm          (0.006 MB) |O                          |O
  7 | |4-Linear             (2.253 MB) |O                          |O
  8 | |5-Linear             (2.253 MB) |O                          |O
  9 | |6-Linear             (2.253 MB) |O                          |O
 10 | |7-Linear             (2.253 MB) |O                          |O
 11 | |8-LayerNorm          (0.006 MB) |O                          |O
 12 | |9-Linear             (9.012 MB) |O                          |O
 13 | |10-Linear            (9.003 MB) |O                          |O
 14 | |11-LayerNorm         (0.006 MB) |O                          |O
 15 | |12-Linear            (2.253 MB) |O                          |O
 16 | |13-Linear            (2.253 MB) |O                          |O
 17 | |14-Linear            (2.253 MB) |O                          |O
 18 | |15-Linear            (2.253 MB) |O                          |O
 19 | |16-LayerNorm         (0.006 MB) |O                          |O
 20 | |17-Linear            (9.012 MB) |O                          |O
 21 | |18-Linear            (9.003 MB) |O                          |O
 22 | |19-LayerNorm         (0.006 MB) |O                          |O
 23 | |20-Linear            (2.253 MB) |O                          |O
 24 | |21-Linear            (2.253 MB) |O                          |O
 25 | |22-Linear            (2.253 MB) |O                          |O
 26 | |23-Linear            (2.253 MB) |O                          |O
 27 | |24-LayerNorm         (0.006 MB) |O                          |O
 28 | |25-Linear            (9.012 MB) |O                          |O
 29 | |26-Linear            (9.003 MB) |O                          |O
 30 | |27-LayerNorm         (0.006 MB) |O                          |O
 31 | |28-Linear            (2.253 MB) |O                          |O
 32 | |29-Linear            (2.253 MB) |O                          |O
 33 | |30-Linear            (2.253 MB) |O                          |O
 34 | |31-Linear            (2.253 MB) |O                          |O
 35 | |32-LayerNorm         (0.006 MB) |O                          |O
 36 | |33-Linear            (9.012 MB) |O                          |O
 37 | |34-Linear            (9.003 MB) |O                          |O
 38 | |35-LayerNorm         (0.006 MB) |O                          |O
 39 | |36-Linear            (2.253 MB) |O                          |O
 40 | |37-Linear            (2.253 MB) |O                          |O
 41 | |38-Linear            (2.253 MB) |O                          |O
 42 | |39-Linear            (2.253 MB) |O                          |O
 43 | |40-LayerNorm         (0.006 MB) |O                          |O
 44 | |41-Linear            (9.012 MB) |O                          |O
 45 | |42-Linear            (9.003 MB) |O                          |O
 46 | |43-LayerNorm         (0.006 MB) |O                          |O
 47 | |44-Linear            (2.253 MB) |O                          |O
 48 | |45-Linear            (2.253 MB) |O                          |O
 49 | |46-Linear            (2.253 MB) |O                          |O
 50 | |47-Linear            (2.253 MB) |O                          |O
 51 | |48-LayerNorm         (0.006 MB) |O                          |O
 52 | |49-Linear            (9.012 MB) |O                          |O
 53 | |50-Linear            (9.003 MB) |O                          |O
 54 | |51-LayerNorm         (0.006 MB) |O                          |O
 55 | |52-Linear            (2.253 MB) |O                          |O
 56 | |53-Linear            (2.253 MB) |O                          |O
 57 | |54-Linear            (2.253 MB) |O                          |O
 58 | |55-Linear            (2.253 MB) |O                          |O
 59 | |56-LayerNorm         (0.006 MB) |O                          |O
 60 | |57-Linear            (9.012 MB) |O                          |O
 61 | |58-Linear            (9.003 MB) |O                          |O
 62 | |59-LayerNorm         (0.006 MB) |O                          |O
 63 | |60-Linear            (2.253 MB) |O                          |O
 64 | |61-Linear            (2.253 MB) |O                          |O
 65 | |62-Linear            (2.253 MB) |O                          |O
 66 | |63-Linear            (2.253 MB) |O                          |O
 67 | |64-LayerNorm         (0.006 MB) |O                          |O
 68 | |65-Linear            (9.012 MB) |O                          |O
 69 | |66-Linear            (9.003 MB) |O                          |O
 70 | |67-LayerNorm         (0.006 MB) |O                          |O
 71 | |68-Linear            (2.253 MB) |O                          |O
 72 | |69-Linear            (2.253 MB) |O                          |O
 73 | |70-Linear            (2.253 MB) |O                          |O
 74 | |71-Linear            (2.253 MB) |O                          |O
 75 | |72-LayerNorm         (0.006 MB) |O                          |O
 76 | |73-Linear            (9.012 MB) |O                          |O
 77 | |74-Linear            (9.003 MB) |O                          |O
 78 | |75-LayerNorm         (0.006 MB) |O                          |O
 79 | |76-Linear            (2.253 MB) |O                          |O
 80 | |77-Linear            (2.253 MB) |O                          |O
 81 | |78-Linear            (2.253 MB) |O                          |O
 82 | |79-Linear            (2.253 MB) |O                          |O
 83 | |80-LayerNorm         (0.006 MB) |O                          |O
 84 | |81-Linear            (9.012 MB) |O                          |O
 85 | |82-Linear            (9.003 MB) |O                          |O
 86 | |83-LayerNorm         (0.006 MB) |O                          |O
 87 | |84-Linear            (2.253 MB) |O                          |O
 88 | |85-Linear            (2.253 MB) |O                          |O
 89 | |86-Linear            (2.253 MB) |O                          |O
 90 | |87-Linear            (2.253 MB) |O                          |O
 91 | |88-LayerNorm         (0.006 MB) |O                          |O
 92 | |89-Linear            (9.012 MB) |O                          |O
 93 | |90-Linear            (9.003 MB) |O                          |O
 94 | |91-LayerNorm         (0.006 MB) |O                          |O
 95 | |92-Linear            (2.253 MB) |O                          |O
 96 | |93-Linear            (2.253 MB) |O                          |O
 97 | |94-Linear            (2.253 MB) |O                          |O
 98 | |95-Linear            (2.253 MB) |O                          |O
 99 | |96-LayerNorm         (0.006 MB) |O                          |O
100 | |97-Linear            (9.012 MB) |O                          |O
101 | |98-Linear            (9.003 MB) |O                          |O
102 | |99-LayerNorm         (0.006 MB) |O                          |O
103 | |100-Linear           (2.253 MB) |X (direct-host-access)     |O
104 | |101-Tanh             (0.000 MB) |X                          |X
105 | 


--------------------------------------------------------------------------------
/plans/V100/roberta_large/README.md:
--------------------------------------------------------------------------------
  1 | |Layer                    |Initial approach         |DeepPlan (DHA)
  2 | |-------------------------|-------------------------|-------------------------
  3 | |0-Embedding          (196.348 MB) |X (direct-host-access)     |X (direct-host-access)
  4 | |1-Embedding          (2.008 MB) |X (direct-host-access)     |O
  5 | |2-Embedding          (0.004 MB) |X (direct-host-access)     |O
  6 | |3-LayerNorm          (0.008 MB) |O                          |O
  7 | |4-Linear             (4.004 MB) |O                          |O
  8 | |5-Linear             (4.004 MB) |O                          |O
  9 | |6-Linear             (4.004 MB) |O                          |O
 10 | |7-Linear             (4.004 MB) |O                          |O
 11 | |8-LayerNorm          (0.008 MB) |O                          |O
 12 | |9-Linear             (16.016 MB) |O                          |O
 13 | |10-Linear            (16.004 MB) |O                          |O
 14 | |11-LayerNorm         (0.008 MB) |O                          |O
 15 | |12-Linear            (4.004 MB) |O                          |O
 16 | |13-Linear            (4.004 MB) |O                          |O
 17 | |14-Linear            (4.004 MB) |O                          |O
 18 | |15-Linear            (4.004 MB) |O                          |O
 19 | |16-LayerNorm         (0.008 MB) |O                          |O
 20 | |17-Linear            (16.016 MB) |O                          |O
 21 | |18-Linear            (16.004 MB) |O                          |O
 22 | |19-LayerNorm         (0.008 MB) |O                          |O
 23 | |20-Linear            (4.004 MB) |O                          |O
 24 | |21-Linear            (4.004 MB) |O                          |O
 25 | |22-Linear            (4.004 MB) |O                          |O
 26 | |23-Linear            (4.004 MB) |O                          |O
 27 | |24-LayerNorm         (0.008 MB) |O                          |O
 28 | |25-Linear            (16.016 MB) |O                          |O
 29 | |26-Linear            (16.004 MB) |O                          |O
 30 | |27-LayerNorm         (0.008 MB) |O                          |O
 31 | |28-Linear            (4.004 MB) |O                          |O
 32 | |29-Linear            (4.004 MB) |O                          |O
 33 | |30-Linear            (4.004 MB) |O                          |O
 34 | |31-Linear            (4.004 MB) |O                          |O
 35 | |32-LayerNorm         (0.008 MB) |O                          |O
 36 | |33-Linear            (16.016 MB) |O                          |O
 37 | |34-Linear            (16.004 MB) |O                          |O
 38 | |35-LayerNorm         (0.008 MB) |O                          |O
 39 | |36-Linear            (4.004 MB) |O                          |O
 40 | |37-Linear            (4.004 MB) |O                          |O
 41 | |38-Linear            (4.004 MB) |O                          |O
 42 | |39-Linear            (4.004 MB) |O                          |O
 43 | |40-LayerNorm         (0.008 MB) |O                          |O
 44 | |41-Linear            (16.016 MB) |O                          |O
 45 | |42-Linear            (16.004 MB) |O                          |O
 46 | |43-LayerNorm         (0.008 MB) |O                          |O
 47 | |44-Linear            (4.004 MB) |O                          |O
 48 | |45-Linear            (4.004 MB) |O                          |O
 49 | |46-Linear            (4.004 MB) |O                          |O
 50 | |47-Linear            (4.004 MB) |O                          |O
 51 | |48-LayerNorm         (0.008 MB) |O                          |O
 52 | |49-Linear            (16.016 MB) |O                          |O
 53 | |50-Linear            (16.004 MB) |O                          |O
 54 | |51-LayerNorm         (0.008 MB) |O                          |O
 55 | |52-Linear            (4.004 MB) |O                          |O
 56 | |53-Linear            (4.004 MB) |O                          |O
 57 | |54-Linear            (4.004 MB) |O                          |O
 58 | |55-Linear            (4.004 MB) |O                          |O
 59 | |56-LayerNorm         (0.008 MB) |O                          |O
 60 | |57-Linear            (16.016 MB) |O                          |O
 61 | |58-Linear            (16.004 MB) |O                          |O
 62 | |59-LayerNorm         (0.008 MB) |O                          |O
 63 | |60-Linear            (4.004 MB) |O                          |O
 64 | |61-Linear            (4.004 MB) |O                          |O
 65 | |62-Linear            (4.004 MB) |O                          |O
 66 | |63-Linear            (4.004 MB) |O                          |O
 67 | |64-LayerNorm         (0.008 MB) |O                          |O
 68 | |65-Linear            (16.016 MB) |O                          |O
 69 | |66-Linear            (16.004 MB) |O                          |O
 70 | |67-LayerNorm         (0.008 MB) |O                          |O
 71 | |68-Linear            (4.004 MB) |O                          |O
 72 | |69-Linear            (4.004 MB) |O                          |O
 73 | |70-Linear            (4.004 MB) |O                          |O
 74 | |71-Linear            (4.004 MB) |O                          |O
 75 | |72-LayerNorm         (0.008 MB) |O                          |O
 76 | |73-Linear            (16.016 MB) |O                          |O
 77 | |74-Linear            (16.004 MB) |O                          |O
 78 | |75-LayerNorm         (0.008 MB) |O                          |O
 79 | |76-Linear            (4.004 MB) |O                          |O
 80 | |77-Linear            (4.004 MB) |O                          |O
 81 | |78-Linear            (4.004 MB) |O                          |O
 82 | |79-Linear            (4.004 MB) |O                          |O
 83 | |80-LayerNorm         (0.008 MB) |O                          |O
 84 | |81-Linear            (16.016 MB) |O                          |O
 85 | |82-Linear            (16.004 MB) |O                          |O
 86 | |83-LayerNorm         (0.008 MB) |O                          |O
 87 | |84-Linear            (4.004 MB) |O                          |O
 88 | |85-Linear            (4.004 MB) |O                          |O
 89 | |86-Linear            (4.004 MB) |O                          |O
 90 | |87-Linear            (4.004 MB) |O                          |O
 91 | |88-LayerNorm         (0.008 MB) |O                          |O
 92 | |89-Linear            (16.016 MB) |O                          |O
 93 | |90-Linear            (16.004 MB) |O                          |O
 94 | |91-LayerNorm         (0.008 MB) |O                          |O
 95 | |92-Linear            (4.004 MB) |O                          |O
 96 | |93-Linear            (4.004 MB) |O                          |O
 97 | |94-Linear            (4.004 MB) |O                          |O
 98 | |95-Linear            (4.004 MB) |O                          |O
 99 | |96-LayerNorm         (0.008 MB) |O                          |O
100 | |97-Linear            (16.016 MB) |O                          |O
101 | |98-Linear            (16.004 MB) |O                          |O
102 | |99-LayerNorm         (0.008 MB) |O                          |O
103 | |100-Linear           (4.004 MB) |O                          |O
104 | |101-Linear           (4.004 MB) |O                          |O
105 | |102-Linear           (4.004 MB) |O                          |O
106 | |103-Linear           (4.004 MB) |O                          |O
107 | |104-LayerNorm        (0.008 MB) |O                          |O
108 | |105-Linear           (16.016 MB) |O                          |O
109 | |106-Linear           (16.004 MB) |O                          |O
110 | |107-LayerNorm        (0.008 MB) |O                          |O
111 | |108-Linear           (4.004 MB) |O                          |O
112 | |109-Linear           (4.004 MB) |O                          |O
113 | |110-Linear           (4.004 MB) |O                          |O
114 | |111-Linear           (4.004 MB) |O                          |O
115 | |112-LayerNorm        (0.008 MB) |O                          |O
116 | |113-Linear           (16.016 MB) |O                          |O
117 | |114-Linear           (16.004 MB) |O                          |O
118 | |115-LayerNorm        (0.008 MB) |O                          |O
119 | |116-Linear           (4.004 MB) |O                          |O
120 | |117-Linear           (4.004 MB) |O                          |O
121 | |118-Linear           (4.004 MB) |O                          |O
122 | |119-Linear           (4.004 MB) |O                          |O
123 | |120-LayerNorm        (0.008 MB) |O                          |O
124 | |121-Linear           (16.016 MB) |O                          |O
125 | |122-Linear           (16.004 MB) |O                          |O
126 | |123-LayerNorm        (0.008 MB) |O                          |O
127 | |124-Linear           (4.004 MB) |O                          |O
128 | |125-Linear           (4.004 MB) |O                          |O
129 | |126-Linear           (4.004 MB) |O                          |O
130 | |127-Linear           (4.004 MB) |O                          |O
131 | |128-LayerNorm        (0.008 MB) |O                          |O
132 | |129-Linear           (16.016 MB) |O                          |O
133 | |130-Linear           (16.004 MB) |O                          |O
134 | |131-LayerNorm        (0.008 MB) |O                          |O
135 | |132-Linear           (4.004 MB) |O                          |O
136 | |133-Linear           (4.004 MB) |O                          |O
137 | |134-Linear           (4.004 MB) |O                          |O
138 | |135-Linear           (4.004 MB) |O                          |O
139 | |136-LayerNorm        (0.008 MB) |O                          |O
140 | |137-Linear           (16.016 MB) |O                          |O
141 | |138-Linear           (16.004 MB) |O                          |O
142 | |139-LayerNorm        (0.008 MB) |O                          |O
143 | |140-Linear           (4.004 MB) |O                          |O
144 | |141-Linear           (4.004 MB) |O                          |O
145 | |142-Linear           (4.004 MB) |O                          |O
146 | |143-Linear           (4.004 MB) |O                          |O
147 | |144-LayerNorm        (0.008 MB) |O                          |O
148 | |145-Linear           (16.016 MB) |O                          |O
149 | |146-Linear           (16.004 MB) |O                          |O
150 | |147-LayerNorm        (0.008 MB) |O                          |O
151 | |148-Linear           (4.004 MB) |O                          |O
152 | |149-Linear           (4.004 MB) |O                          |O
153 | |150-Linear           (4.004 MB) |O                          |O
154 | |151-Linear           (4.004 MB) |O                          |O
155 | |152-LayerNorm        (0.008 MB) |O                          |O
156 | |153-Linear           (16.016 MB) |O                          |O
157 | |154-Linear           (16.004 MB) |O                          |O
158 | |155-LayerNorm        (0.008 MB) |O                          |O
159 | |156-Linear           (4.004 MB) |O                          |O
160 | |157-Linear           (4.004 MB) |O                          |O
161 | |158-Linear           (4.004 MB) |O                          |O
162 | |159-Linear           (4.004 MB) |O                          |O
163 | |160-LayerNorm        (0.008 MB) |O                          |O
164 | |161-Linear           (16.016 MB) |O                          |O
165 | |162-Linear           (16.004 MB) |O                          |O
166 | |163-LayerNorm        (0.008 MB) |O                          |O
167 | |164-Linear           (4.004 MB) |O                          |O
168 | |165-Linear           (4.004 MB) |O                          |O
169 | |166-Linear           (4.004 MB) |O                          |O
170 | |167-Linear           (4.004 MB) |O                          |O
171 | |168-LayerNorm        (0.008 MB) |O                          |O
172 | |169-Linear           (16.016 MB) |O                          |O
173 | |170-Linear           (16.004 MB) |O                          |O
174 | |171-LayerNorm        (0.008 MB) |O                          |O
175 | |172-Linear           (4.004 MB) |O                          |O
176 | |173-Linear           (4.004 MB) |O                          |O
177 | |174-Linear           (4.004 MB) |O                          |O
178 | |175-Linear           (4.004 MB) |O                          |O
179 | |176-LayerNorm        (0.008 MB) |O                          |O
180 | |177-Linear           (16.016 MB) |O                          |O
181 | |178-Linear           (16.004 MB) |O                          |O
182 | |179-LayerNorm        (0.008 MB) |O                          |O
183 | |180-Linear           (4.004 MB) |O                          |O
184 | |181-Linear           (4.004 MB) |O                          |O
185 | |182-Linear           (4.004 MB) |O                          |O
186 | |183-Linear           (4.004 MB) |O                          |O
187 | |184-LayerNorm        (0.008 MB) |O                          |O
188 | |185-Linear           (16.016 MB) |O                          |O
189 | |186-Linear           (16.004 MB) |O                          |O
190 | |187-LayerNorm        (0.008 MB) |O                          |O
191 | |188-Linear           (4.004 MB) |O                          |O
192 | |189-Linear           (4.004 MB) |O                          |O
193 | |190-Linear           (4.004 MB) |O                          |O
194 | |191-Linear           (4.004 MB) |O                          |O
195 | |192-LayerNorm        (0.008 MB) |O                          |O
196 | |193-Linear           (16.016 MB) |O                          |O
197 | |194-Linear           (16.004 MB) |O                          |O
198 | |195-LayerNorm        (0.008 MB) |O                          |O
199 | |196-Linear           (4.004 MB) |O                          |O
200 | |197-Tanh             (0.000 MB) |X                          |X
201 | 


--------------------------------------------------------------------------------
/proto/deepcache.proto:
--------------------------------------------------------------------------------
 1 | syntax = "proto2";
 2 | 
 3 | enum MsgType {
 4 | 	REQ_INFERENCE    = 1;
 5 | 	REQ_UPLOAD_MODEL = 2;
 6 | 	REQ_LS				   = 3;
 7 | 	REQ_CLOSE        = 4;
 8 | 
 9 | 	RSP_INFERENCE    = 101;
10 | 	RSP_UPLOAD_MODEL = 102;
11 | 	RSP_LS				   = 103;
12 | 	RSP_CLOSE			   = 104;
13 | }
14 | 
15 | message InferenceReqProto {
16 | 	required uint32 req_id = 1;
17 | 	required uint32 model_id = 2;
18 | 	required uint32 batch_size = 3;
19 | }
20 | 
21 | message InferenceRspProto {
22 | 	required uint32 req_id = 1;
23 | 	required bool is_cold = 2;
24 | }
25 | 
26 | message UploadModelReqProto {
27 | 	required uint32 req_id = 1;
28 | 	repeated string model_names = 2;
29 | 	required uint32 n_models = 3;
30 | 	required uint32 engine_type = 4;
31 | 	required uint32 mp_size = 5;
32 | }
33 | 
34 | message UploadModelRspProto {
35 | 	required uint32 req_id = 1;
36 | }
37 | 
38 | message CloseReqProto {
39 | 	required uint32 req_id = 1;
40 | }
41 | 
42 | message CloseRspProto {
43 | 	required uint32 req_id = 1;
44 | }
45 | 


--------------------------------------------------------------------------------
/proto/deepplan.proto:
--------------------------------------------------------------------------------
 1 | syntax = "proto2";
 2 | 
 3 | enum DataType {
 4 | 	TYPE_FP32 = 1;
 5 | 	TYPE_INT32 = 2;
 6 | 	TYPE_INT64 = 3;
 7 | }
 8 | 
 9 | message ModelInput {
10 | 	required DataType data_type = 1;
11 | 	repeated uint32 shape = 2;
12 | 	optional uint32 max_number = 3;
13 | }
14 | 
15 | message Plan {
16 | 	enum PlanType {
17 | 		STATIC = 1;
18 | 		DYNAMIC = 2;
19 | 		BENCH_DYNAMIC = 3;
20 | 	}
21 | 
22 | 	required PlanType plan_type = 1;
23 | 	repeated uint32 load_layers = 2;
24 | }
25 | 
26 | message ModelConfig {
27 | 	required string model_name = 1;
28 | 	repeated ModelInput inputs = 2;
29 | 	repeated Plan plans = 3;
30 | }
31 | 


--------------------------------------------------------------------------------
/proto/deepplan_pb2.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf-8 -*-
  2 | # Generated by the protocol buffer compiler.  DO NOT EDIT!
  3 | # source: deepplan.proto
  4 | 
  5 | from google.protobuf.internal import enum_type_wrapper
  6 | from google.protobuf import descriptor as _descriptor
  7 | from google.protobuf import message as _message
  8 | from google.protobuf import reflection as _reflection
  9 | from google.protobuf import symbol_database as _symbol_database
 10 | # @@protoc_insertion_point(imports)
 11 | 
 12 | _sym_db = _symbol_database.Default()
 13 | 
 14 | 
 15 | 
 16 | 
 17 | DESCRIPTOR = _descriptor.FileDescriptor(
 18 |   name='deepplan.proto',
 19 |   package='',
 20 |   syntax='proto2',
 21 |   serialized_options=None,
 22 |   serialized_pb=b'\n\x0e\x64\x65\x65pplan.proto\"M\n\nModelInput\x12\x1c\n\tdata_type\x18\x01 \x02(\x0e\x32\t.DataType\x12\r\n\x05shape\x18\x02 \x03(\r\x12\x12\n\nmax_number\x18\x03 \x01(\r\"v\n\x04Plan\x12!\n\tplan_type\x18\x01 \x02(\x0e\x32\x0e.Plan.PlanType\x12\x13\n\x0bload_layers\x18\x02 \x03(\r\"6\n\x08PlanType\x12\n\n\x06STATIC\x10\x01\x12\x0b\n\x07\x44YNAMIC\x10\x02\x12\x11\n\rBENCH_DYNAMIC\x10\x03\"T\n\x0bModelConfig\x12\x12\n\nmodel_name\x18\x01 \x02(\t\x12\x1b\n\x06inputs\x18\x02 \x03(\x0b\x32\x0b.ModelInput\x12\x14\n\x05plans\x18\x03 \x03(\x0b\x32\x05.Plan*9\n\x08\x44\x61taType\x12\r\n\tTYPE_FP32\x10\x01\x12\x0e\n\nTYPE_INT32\x10\x02\x12\x0e\n\nTYPE_INT64\x10\x03'
 23 | )
 24 | 
 25 | _DATATYPE = _descriptor.EnumDescriptor(
 26 |   name='DataType',
 27 |   full_name='DataType',
 28 |   filename=None,
 29 |   file=DESCRIPTOR,
 30 |   values=[
 31 |     _descriptor.EnumValueDescriptor(
 32 |       name='TYPE_FP32', index=0, number=1,
 33 |       serialized_options=None,
 34 |       type=None),
 35 |     _descriptor.EnumValueDescriptor(
 36 |       name='TYPE_INT32', index=1, number=2,
 37 |       serialized_options=None,
 38 |       type=None),
 39 |     _descriptor.EnumValueDescriptor(
 40 |       name='TYPE_INT64', index=2, number=3,
 41 |       serialized_options=None,
 42 |       type=None),
 43 |   ],
 44 |   containing_type=None,
 45 |   serialized_options=None,
 46 |   serialized_start=303,
 47 |   serialized_end=360,
 48 | )
 49 | _sym_db.RegisterEnumDescriptor(_DATATYPE)
 50 | 
 51 | DataType = enum_type_wrapper.EnumTypeWrapper(_DATATYPE)
 52 | TYPE_FP32 = 1
 53 | TYPE_INT32 = 2
 54 | TYPE_INT64 = 3
 55 | 
 56 | 
 57 | _PLAN_PLANTYPE = _descriptor.EnumDescriptor(
 58 |   name='PlanType',
 59 |   full_name='Plan.PlanType',
 60 |   filename=None,
 61 |   file=DESCRIPTOR,
 62 |   values=[
 63 |     _descriptor.EnumValueDescriptor(
 64 |       name='STATIC', index=0, number=1,
 65 |       serialized_options=None,
 66 |       type=None),
 67 |     _descriptor.EnumValueDescriptor(
 68 |       name='DYNAMIC', index=1, number=2,
 69 |       serialized_options=None,
 70 |       type=None),
 71 |     _descriptor.EnumValueDescriptor(
 72 |       name='BENCH_DYNAMIC', index=2, number=3,
 73 |       serialized_options=None,
 74 |       type=None),
 75 |   ],
 76 |   containing_type=None,
 77 |   serialized_options=None,
 78 |   serialized_start=161,
 79 |   serialized_end=215,
 80 | )
 81 | _sym_db.RegisterEnumDescriptor(_PLAN_PLANTYPE)
 82 | 
 83 | 
 84 | _MODELINPUT = _descriptor.Descriptor(
 85 |   name='ModelInput',
 86 |   full_name='ModelInput',
 87 |   filename=None,
 88 |   file=DESCRIPTOR,
 89 |   containing_type=None,
 90 |   fields=[
 91 |     _descriptor.FieldDescriptor(
 92 |       name='data_type', full_name='ModelInput.data_type', index=0,
 93 |       number=1, type=14, cpp_type=8, label=2,
 94 |       has_default_value=False, default_value=1,
 95 |       message_type=None, enum_type=None, containing_type=None,
 96 |       is_extension=False, extension_scope=None,
 97 |       serialized_options=None, file=DESCRIPTOR),
 98 |     _descriptor.FieldDescriptor(
 99 |       name='shape', full_name='ModelInput.shape', index=1,
100 |       number=2, type=13, cpp_type=3, label=3,
101 |       has_default_value=False, default_value=[],
102 |       message_type=None, enum_type=None, containing_type=None,
103 |       is_extension=False, extension_scope=None,
104 |       serialized_options=None, file=DESCRIPTOR),
105 |     _descriptor.FieldDescriptor(
106 |       name='max_number', full_name='ModelInput.max_number', index=2,
107 |       number=3, type=13, cpp_type=3, label=1,
108 |       has_default_value=False, default_value=0,
109 |       message_type=None, enum_type=None, containing_type=None,
110 |       is_extension=False, extension_scope=None,
111 |       serialized_options=None, file=DESCRIPTOR),
112 |   ],
113 |   extensions=[
114 |   ],
115 |   nested_types=[],
116 |   enum_types=[
117 |   ],
118 |   serialized_options=None,
119 |   is_extendable=False,
120 |   syntax='proto2',
121 |   extension_ranges=[],
122 |   oneofs=[
123 |   ],
124 |   serialized_start=18,
125 |   serialized_end=95,
126 | )
127 | 
128 | 
129 | _PLAN = _descriptor.Descriptor(
130 |   name='Plan',
131 |   full_name='Plan',
132 |   filename=None,
133 |   file=DESCRIPTOR,
134 |   containing_type=None,
135 |   fields=[
136 |     _descriptor.FieldDescriptor(
137 |       name='plan_type', full_name='Plan.plan_type', index=0,
138 |       number=1, type=14, cpp_type=8, label=2,
139 |       has_default_value=False, default_value=1,
140 |       message_type=None, enum_type=None, containing_type=None,
141 |       is_extension=False, extension_scope=None,
142 |       serialized_options=None, file=DESCRIPTOR),
143 |     _descriptor.FieldDescriptor(
144 |       name='load_layers', full_name='Plan.load_layers', index=1,
145 |       number=2, type=13, cpp_type=3, label=3,
146 |       has_default_value=False, default_value=[],
147 |       message_type=None, enum_type=None, containing_type=None,
148 |       is_extension=False, extension_scope=None,
149 |       serialized_options=None, file=DESCRIPTOR),
150 |   ],
151 |   extensions=[
152 |   ],
153 |   nested_types=[],
154 |   enum_types=[
155 |     _PLAN_PLANTYPE,
156 |   ],
157 |   serialized_options=None,
158 |   is_extendable=False,
159 |   syntax='proto2',
160 |   extension_ranges=[],
161 |   oneofs=[
162 |   ],
163 |   serialized_start=97,
164 |   serialized_end=215,
165 | )
166 | 
167 | 
168 | _MODELCONFIG = _descriptor.Descriptor(
169 |   name='ModelConfig',
170 |   full_name='ModelConfig',
171 |   filename=None,
172 |   file=DESCRIPTOR,
173 |   containing_type=None,
174 |   fields=[
175 |     _descriptor.FieldDescriptor(
176 |       name='model_name', full_name='ModelConfig.model_name', index=0,
177 |       number=1, type=9, cpp_type=9, label=2,
178 |       has_default_value=False, default_value=b"".decode('utf-8'),
179 |       message_type=None, enum_type=None, containing_type=None,
180 |       is_extension=False, extension_scope=None,
181 |       serialized_options=None, file=DESCRIPTOR),
182 |     _descriptor.FieldDescriptor(
183 |       name='inputs', full_name='ModelConfig.inputs', index=1,
184 |       number=2, type=11, cpp_type=10, label=3,
185 |       has_default_value=False, default_value=[],
186 |       message_type=None, enum_type=None, containing_type=None,
187 |       is_extension=False, extension_scope=None,
188 |       serialized_options=None, file=DESCRIPTOR),
189 |     _descriptor.FieldDescriptor(
190 |       name='plans', full_name='ModelConfig.plans', index=2,
191 |       number=3, type=11, cpp_type=10, label=3,
192 |       has_default_value=False, default_value=[],
193 |       message_type=None, enum_type=None, containing_type=None,
194 |       is_extension=False, extension_scope=None,
195 |       serialized_options=None, file=DESCRIPTOR),
196 |   ],
197 |   extensions=[
198 |   ],
199 |   nested_types=[],
200 |   enum_types=[
201 |   ],
202 |   serialized_options=None,
203 |   is_extendable=False,
204 |   syntax='proto2',
205 |   extension_ranges=[],
206 |   oneofs=[
207 |   ],
208 |   serialized_start=217,
209 |   serialized_end=301,
210 | )
211 | 
212 | _MODELINPUT.fields_by_name['data_type'].enum_type = _DATATYPE
213 | _PLAN.fields_by_name['plan_type'].enum_type = _PLAN_PLANTYPE
214 | _PLAN_PLANTYPE.containing_type = _PLAN
215 | _MODELCONFIG.fields_by_name['inputs'].message_type = _MODELINPUT
216 | _MODELCONFIG.fields_by_name['plans'].message_type = _PLAN
217 | DESCRIPTOR.message_types_by_name['ModelInput'] = _MODELINPUT
218 | DESCRIPTOR.message_types_by_name['Plan'] = _PLAN
219 | DESCRIPTOR.message_types_by_name['ModelConfig'] = _MODELCONFIG
220 | DESCRIPTOR.enum_types_by_name['DataType'] = _DATATYPE
221 | _sym_db.RegisterFileDescriptor(DESCRIPTOR)
222 | 
223 | ModelInput = _reflection.GeneratedProtocolMessageType('ModelInput', (_message.Message,), {
224 |   'DESCRIPTOR' : _MODELINPUT,
225 |   '__module__' : 'deepplan_pb2'
226 |   # @@protoc_insertion_point(class_scope:ModelInput)
227 |   })
228 | _sym_db.RegisterMessage(ModelInput)
229 | 
230 | Plan = _reflection.GeneratedProtocolMessageType('Plan', (_message.Message,), {
231 |   'DESCRIPTOR' : _PLAN,
232 |   '__module__' : 'deepplan_pb2'
233 |   # @@protoc_insertion_point(class_scope:Plan)
234 |   })
235 | _sym_db.RegisterMessage(Plan)
236 | 
237 | ModelConfig = _reflection.GeneratedProtocolMessageType('ModelConfig', (_message.Message,), {
238 |   'DESCRIPTOR' : _MODELCONFIG,
239 |   '__module__' : 'deepplan_pb2'
240 |   # @@protoc_insertion_point(class_scope:ModelConfig)
241 |   })
242 | _sym_db.RegisterMessage(ModelConfig)
243 | 
244 | 
245 | # @@protoc_insertion_point(module_scope)
246 | 


--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
1 | transformers==4.36.0
2 | protobuf==3.20.1
3 | matplotlib==3.3.4
4 | 


--------------------------------------------------------------------------------
/scripts/create_all_plans.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | PLAN_REPO=${PLAN_REPO}
 4 | 
 5 | if [[ -z "$PLAN_REPO" ]]; then
 6 | 	echo "PLAN_REPO environment variable not set, please set this variable"
 7 | 	return
 8 | fi
 9 | 
10 | script_path=$( cd -- "$( dirname -- "${BASH_SOURCE[0]}" )" &> /dev/null && pwd )
11 | exec_path="$script_path/../"
12 | 
13 | TARGET="plan.py"
14 | 
15 | models=("resnet50" "resnet101" "bert_base" "bert_large" "roberta_base" "roberta_large" "gpt2" "gpt2_384" "gpt2_medium")
16 | 
17 | if [ ! -d "$PLAN_REPO" ]; then
18 | 	mkdir -p "$PLAN_REPO"
19 | 	echo "Create $PLAN_REPO directory"
20 | fi
21 | 
22 | for model in ${models[@]}; do
23 | 	cmd="python3 $exec_path/$TARGET -m $model -p $PLAN_REPO --trace --profile"
24 | 	$cmd
25 | done
26 | 


--------------------------------------------------------------------------------
/scripts/download_azure_trace_dataset.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | script_path=$( cd -- "$( dirname -- "${BASH_SOURCE[0]}" )" &> /dev/null && pwd )
 3 | output_dir="$script_path/azure-functions"
 4 | 
 5 | echo "Downloading Azure trace dataset"
 6 | wget https://azurecloudpublicdataset2.blob.core.windows.net/azurepublicdatasetv2/azurefunctions_dataset2019/azurefunctions-dataset2019.tar.xz
 7 | 
 8 | mkdir -p $output_dir
 9 | 
10 | echo "Extract azurefunctions-dataset2019.tar.xz"
11 | tar -xvf azurefunctions-dataset2019.tar.xz -C $output_dir
12 | 
13 | echo "The Azure trace datasets are saved to '$outpu_dir'"
14 | echo "To run azure experiments, follow the command below"
15 | echo "export AZURE_TRACE_DIR=\"$output_dir\""
16 | 
17 | 


--------------------------------------------------------------------------------
/scripts/fig10/graph.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python
  2 | # coding: utf-8
  3 | 
  4 | # In[3]:
  5 | 
  6 | import numpy as np
  7 | import matplotlib.pyplot as plt
  8 | from mpl_toolkits.axes_grid1 import make_axes_locatable
  9 | from matplotlib.ticker import MaxNLocator
 10 | import sys
 11 | import os
 12 | import csv
 13 | 
 14 | baseline = np.array([])
 15 | pipeswitch = np.array([])
 16 | deepplan_dha = np.array([])
 17 | deepplan_parallel = np.array([])
 18 | deepplan_all = np.array([])
 19 | 
 20 | target = sys.argv[1]
 21 | target = target.strip()
 22 | if target[0] != '/':
 23 |     target = os.path.join(os.getcwd(), target)
 24 | 
 25 | def read_file(file):
 26 |     baseline = np.array([])
 27 |     pipeswitch = np.array([])
 28 |     deepplan_dha = np.array([])
 29 |     deepplan_parallel = np.array([])
 30 |     deepplan_all = np.array([])
 31 | 
 32 |     with open(file, 'r', encoding='utf-8') as f:
 33 |         rdr = csv.reader(f)
 34 |         for i, line in enumerate(rdr):
 35 |             baseline = np.append(baseline, float(line[0]))
 36 |             pipeswitch = np.append(pipeswitch, float(line[1]))
 37 |             deepplan_dha = np.append(deepplan_dha, float(line[2]))
 38 |             deepplan_parallel = np.append(deepplan_parallel, float(line[3]))
 39 |             deepplan_all = np.append(deepplan_all, float(line[4]))
 40 | 
 41 |     return np.array([baseline, pipeswitch, deepplan_dha, deepplan_parallel, deepplan_all])
 42 | 
 43 | pipeswitch = baseline / pipeswitch
 44 | deepplan_dha = baseline / deepplan_dha
 45 | deepplan_parallel = baseline / deepplan_parallel
 46 | deepplan_all = baseline / deepplan_all
 47 | baseline /= baseline
 48 | 
 49 | label_list = ["Baseline", "PipeSwitch", "DeepPlan (DHA)", "DeepPlan (PT)", "DeepPlan (PT+DHA)"]
 50 | 
 51 | color_list = ['#EAECEE', '#AEB6BF', '#85929E', '#5D6D7E', '#34495E', '#273746']
 52 | model_list = ["ResNet-50", "ResNet-101", "BERT-Base", "BERT-Large", "RoBERTa\nBase", "RoBERTa\nLarge", "GPT-2", "GPT-2 Medium"]
 53 | 
 54 | x_label = ""
 55 | y_label = "Inference speedup"
 56 | 
 57 | FONTSIZE_LABEL = 14
 58 | FONTSIZE_LEGEND = 14
 59 | WIDTH = 1.1
 60 | SIZE_FIGURE = (12, 3)
 61 | 
 62 | 
 63 | def create_x(t, w, n, d):
 64 |     return [t*x + w*n for x in range(d)]
 65 | 
 66 | value_base = create_x(8, 1.3, 1, 8)
 67 | value_pipe = create_x(8, 1.3, 2, 8)
 68 | value_deep_dha = create_x(8, 1.3, 3, 8)
 69 | value_deep_parallel = create_x(8, 1.3, 4, 8)
 70 | value_deep_all = create_x(8, 1.3, 5, 8)
 71 | 
 72 | fig, ax = plt.subplots(1, 1, figsize=SIZE_FIGURE)
 73 | 
 74 | avg_ret = read_file(sys.argv[1])
 75 | min_ret = read_file(sys.argv[2])
 76 | max_ret = read_file(sys.argv[3])
 77 | 
 78 | base = avg_ret[0]
 79 | avg_ret = base / avg_ret
 80 | min_ret = base / min_ret
 81 | max_ret = base / max_ret
 82 | 
 83 | lower_err = abs(avg_ret - min_ret)
 84 | upper_err = abs(avg_ret - max_ret)
 85 | 
 86 | ax.bar(value_base, avg_ret[0], color=color_list[0], edgecolor="black", zorder=3, width=WIDTH)
 87 | ax.bar(value_pipe, avg_ret[1], color=color_list[1], edgecolor="black", zorder=3, width=WIDTH)
 88 | ax.bar(value_deep_dha, avg_ret[2], color=color_list[2], edgecolor="black", zorder=3, width=WIDTH)
 89 | ax.bar(value_deep_parallel, avg_ret[3], color=color_list[3], edgecolor="black", zorder=3, width=WIDTH)
 90 | ax.bar(value_deep_all, avg_ret[4], color=color_list[4], edgecolor="black", zorder=3, width=WIDTH)
 91 | 
 92 | ax.errorbar(value_base, avg_ret[0], yerr=[lower_err[0], upper_err[0]], fmt='o', capsize=3, color="black", zorder=4, ms=1)
 93 | ax.errorbar(value_pipe, avg_ret[1], yerr=[lower_err[1], upper_err[1]], fmt='o', capsize=3, color="black", zorder=4, ms=1)
 94 | ax.errorbar(value_deep_dha, avg_ret[2], yerr=[lower_err[2], upper_err[2]], fmt='o', capsize=3, color="black", zorder=4, ms=1)
 95 | ax.errorbar(value_deep_parallel, avg_ret[3], yerr=[lower_err[3], upper_err[3]], fmt='o', capsize=3, color="black", zorder=4, ms=1)
 96 | ax.errorbar(value_deep_all, avg_ret[4], yerr=[lower_err[4], upper_err[4]], fmt='o', capsize=3, color="black", zorder=4, ms=1)
 97 | 
 98 | fig.legend(labels=label_list, bbox_to_anchor=(0.52, 1.00), ncol=5, loc='center',
 99 |            fontsize=FONTSIZE_LEGEND, frameon=False)
100 | 
101 | plt.xticks([3.9 + i * 8 for i in range(0, 8)], model_list)
102 | plt.tick_params(axis="x", direction="out", labelsize=FONTSIZE_LABEL, rotation=0)
103 | plt.ylabel(y_label, fontsize=FONTSIZE_LABEL, labelpad=8)
104 | plt.yticks(fontsize=FONTSIZE_LABEL)
105 | plt.grid(linestyle='-', axis='y', zorder=-10)
106 | plt.rcParams["font.family"] = "Helvetica"
107 | plt.axhline(y=1.0, color='gray', linestyle='--')
108 | 
109 | plt.tight_layout()
110 | #plt.show()
111 | plt.savefig(sys.argv[4], bbox_inches="tight", pad_inches=0.0)
112 | print("Saved graph to {}".format(sys.argv[4]))
113 | 


--------------------------------------------------------------------------------
/scripts/fig10/run.sh:
--------------------------------------------------------------------------------
  1 | #!/bin/bash
  2 | 
  3 | PLAN_REPO=${PLAN_REPO}
  4 | 
  5 | if [[ -z "$PLAN_REPO" ]]; then
  6 | 	echo "PLAN_REPO environment variable not set, please set this variable"
  7 | 	return
  8 | fi
  9 | 
 10 | export PLAN_REPO=${PLAN_REPO}
 11 | 
 12 | script_path=$( cd -- "$( dirname -- "${BASH_SOURCE[0]}" )" &> /dev/null && pwd )
 13 | build_path="$script_path/../../build"
 14 | 
 15 | TARGET="benchmark"
 16 | 
 17 | device_maps=("0" "0 2")
 18 | models=("resnet50" "resnet101" "bert_base" "bert_large" "roberta_base" "roberta_large" "gpt2" "gpt2_medium")
 19 | engines=("pipeline" "deepplan")
 20 | batch_size=1
 21 | 
 22 | tmp_avg_file="/tmp/deepplan_fig10_avg"
 23 | tmp_min_file="/tmp/deepplan_fig10_min"
 24 | tmp_max_file="/tmp/deepplan_fig10_max"
 25 | 
 26 | printf "" > $tmp_avg_file
 27 | printf "" > $tmp_min_file
 28 | printf "" > $tmp_max_file
 29 | 
 30 | for model in "${models[@]}"; do
 31 | 	# Baseline
 32 | 	baseline_cmd="$build_path/$TARGET -m $model -e demand -b $batch_size -d 0"
 33 | 	echo "Run $baseline_cmd"
 34 | 
 35 | 	output=`$baseline_cmd`
 36 | 	echo "$output"
 37 | 	echo ""
 38 | 
 39 | 	avg_lat=$(echo "$output" | awk '{if ($1 == "Average") { print $(NF-1)}}')
 40 | 	min_lat=$(echo "$output" | awk '{if ($1 == "Min") { print $(NF-1)}}')
 41 | 	max_lat=$(echo "$output" | awk '{if ($1 == "Max") { print $(NF-1)}}')
 42 | 
 43 | 	printf "$avg_lat, " >> $tmp_avg_file
 44 | 	printf "$min_lat, " >> $tmp_min_file
 45 | 	printf "$max_lat, " >> $tmp_max_file
 46 | 
 47 | 	for device_map in "${device_maps[@]}"; do
 48 | 		for engine in "${engines[@]}"; do
 49 | 			cmd="$build_path/$TARGET -m $model -e $engine -b $batch_size -d $device_map"
 50 | 			echo "Run $cmd"
 51 | 
 52 | 			output=`$cmd`
 53 | 			echo "$output"
 54 | 			echo ""
 55 | 
 56 | 			avg_lat=$(echo "$output" | awk '{if ($1 == "Average") { print $(NF-1)}}')
 57 | 			min_lat=$(echo "$output" | awk '{if ($1 == "Min") { print $(NF-1)}}')
 58 | 			max_lat=$(echo "$output" | awk '{if ($1 == "Max") { print $(NF-1)}}')
 59 | 
 60 | 			printf "$avg_lat, " >> $tmp_avg_file
 61 | 			printf "$min_lat, " >> $tmp_min_file
 62 | 			printf "$max_lat, " >> $tmp_max_file
 63 | 		done
 64 | 	done
 65 | 
 66 | 	echo "" >> $tmp_avg_file
 67 | 	echo "" >> $tmp_min_file
 68 | 	echo "" >> $tmp_max_file
 69 | 
 70 | done
 71 | 
 72 | log_path="$script_path/logs"
 73 | 
 74 | # Check for log_path existence
 75 | if [ ! -d "$log_path" ]; then
 76 | 	echo "Created $log_path directory where log files will be stored"
 77 | 	mkdir -p $log_path
 78 | fi
 79 | 
 80 | date=`date +%y-%m-%d`
 81 | 
 82 | log_path="$log_path/$date"
 83 | if [ ! -d "$log_path" ]; then
 84 | 	mkdir -p "$log_path"
 85 | fi
 86 | 
 87 | output_path="$log_path/report"
 88 | 
 89 | version=0
 90 | while true; do
 91 | 	_output="${output_path}$version"
 92 | 	if [ -d "$_output" ]; then
 93 | 		((version++))
 94 | 	else
 95 | 		break
 96 | 	fi
 97 | done
 98 | 
 99 | output_path="$_output"
100 | 
101 | mkdir -p $output_path
102 | 
103 | avg_file="$output_path/result_avg.csv"
104 | min_file="$output_path/result_min.csv"
105 | max_file="$output_path/result_max.csv"
106 | 
107 | cp $tmp_avg_file $avg_file
108 | cp $tmp_min_file $min_file
109 | cp $tmp_max_file $max_file
110 | 
111 | echo "Created log files in '$output_path'"
112 | 
113 | is_installed=$(pip list | grep -F matplotlib)
114 | 
115 | if [ -z "$is_installed" ]; then
116 | 	echo "Matplotlib is not installed. So the graph can not be created."
117 | else
118 | 	eval "python3 graph.py $avg_file $min_file $max_file fig10.pdf"
119 | fi
120 | 


--------------------------------------------------------------------------------
/scripts/fig12/graph.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python
  2 | # coding: utf-8
  3 | 
  4 | # In[3]:
  5 | 
  6 | import numpy as np
  7 | import matplotlib.pyplot as plt
  8 | from mpl_toolkits.axes_grid1 import make_axes_locatable
  9 | from matplotlib import gridspec
 10 | import sys
 11 | import os
 12 | import csv
 13 | 
 14 | def get_data(target):
 15 |     target = "{}/bert_base_{}".format(sys.argv[1], target)
 16 |     target = target.strip()
 17 |     if target[0] != '/':
 18 |         target = os.path.join(os.getcwd(), target)
 19 | 
 20 |     latency = np.array([])
 21 |     goodput = np.array([])
 22 |     cold = np.array([])
 23 | 
 24 |     result = []
 25 | 
 26 |     with open(target, 'r', encoding='utf-8') as f:
 27 |         rdr = csv.reader(f)
 28 |         for i, line in enumerate(rdr):
 29 |             latency = np.append(latency, float(line[0]))
 30 |             goodput = np.append(goodput, float(line[1]))
 31 |             cold = np.append(cold, float(line[2]))
 32 | 
 33 |         result.append(latency)
 34 |         result.append(goodput)
 35 |         result.append(cold)
 36 | 
 37 |         return result
 38 | 
 39 | x_value = [20 * i for i in range(1, 11)]
 40 | 
 41 | 
 42 | label_list = ["PipeSwitch", "DeepPlan (DHA)", "DeepPlan (PT+DHA)"]
 43 | # color_list = ['#AEB6BF', '#5D6D7E', '#34495E', '#273746', '#273746']
 44 | color_list = ['#AEB6BF', '#5D6D7E', '#273746']
 45 | marker_list = ['o', '^', 'P']
 46 | line_list = ['-', 'dotted', 'dashed']
 47 | 
 48 | # Prepare these files
 49 | engine_list = ["pipeline.csv", "deepplan.csv", "deepplan+.csv"]
 50 | 
 51 | ylim_list = {
 52 |             "bert_base": [(0, 300), (30, 105), (0, 60)],
 53 |             "bert_large": [],
 54 |             "roberta_base": [],
 55 |             "roberta_large": [],
 56 |             "gpt2": [],
 57 |             "gpt2_medium": []}
 58 | 
 59 | 
 60 | x_label = "# of model instances (concurrency)"
 61 | y_label = ["99 % latency (ms)", "Goodput (%)", "Cold-start (%)"]
 62 | 
 63 | FONTSIZE_LABEL = 16
 64 | FONTSIZE_TICK = 13
 65 | FONTSIZE_LEGEND = 14
 66 | SIZE_FIGURE = (7, 7)
 67 | LINE_WIDTH = 3
 68 | ARKER_SIZE = 10
 69 | MARKER_SIZE = 10
 70 | 
 71 | 
 72 | plt.figure(figsize=SIZE_FIGURE)
 73 | gs = gridspec.GridSpec(nrows=3,
 74 |                        ncols=1,
 75 |                        height_ratios=[1, 0.8, 0.8]
 76 |                       )
 77 | 
 78 | li_ax = []
 79 | for i in range(0, 3):
 80 |     li_ax.append(plt.subplot(gs[i]))
 81 | 
 82 | for i, engine in enumerate(engine_list):
 83 |     result = get_data(engine)
 84 | 
 85 |     for j, ax in enumerate(li_ax):
 86 | 
 87 |         ax.plot(x_value, result[j], linewidth = LINE_WIDTH, color=color_list[i], marker=marker_list[i], linestyle=line_list[i], markersize=MARKER_SIZE)
 88 | 
 89 |         ax.set_ylim(ylim_list["bert_base"][j])
 90 |         ax.tick_params(axis='both', labelsize=FONTSIZE_TICK)
 91 | 
 92 |         ax.set_xticks(x_value)
 93 |         if j < 2:
 94 |             ax.axes.xaxis.set_ticklabels([])
 95 |             ax.set_ylabel(y_label[j], fontsize=FONTSIZE_LABEL, labelpad=10)
 96 |         else:
 97 |             ax.set_ylabel(y_label[j], fontsize=FONTSIZE_LABEL, labelpad=18)
 98 | 
 99 | 
100 |         if j == 0:
101 |             ax.axhline(y=100, color='gray', linestyle='--')
102 |             ax.text(20, 150, "Target SLO", fontsize=FONTSIZE_TICK)
103 | 
104 |         ax.grid(alpha=0.5, linestyle='--')
105 | 
106 | plt.legend(labels=label_list, bbox_to_anchor=(0.43, 3.55), ncol=3, loc='center', columnspacing=0.6,
107 |            fontsize=FONTSIZE_LEGEND, edgecolor="#FFFFFF")
108 | 
109 | plt.xlabel(x_label, fontsize=FONTSIZE_LABEL, labelpad=10)
110 | 
111 | plt.subplots_adjust(hspace=0.06)
112 | plt.rcParams["font.family"] = "Helvetica"
113 | plt.savefig(sys.argv[2], bbox_inches="tight", pad_inches=0.0)
114 | 
115 | print("Saved graph to {}".format(sys.argv[2]))
116 | 


--------------------------------------------------------------------------------
/scripts/fig12/run.sh:
--------------------------------------------------------------------------------
  1 | #!/bin/bash
  2 | 
  3 | PLAN_REPO=${PLAN_REPO}
  4 | if [[ -z "$PLAN_REPO" ]]; then
  5 | 	echo "PLAN_REPO environment variable not set, please set this variable"
  6 | 	return
  7 | fi
  8 | 
  9 | export PLAN_REPO=${PLAN_REPO}
 10 | 
 11 | script_path=$( cd -- "$( dirname -- "${BASH_SOURCE[0]}" )" &> /dev/null && pwd )
 12 | build_path="$script_path/../../build"
 13 | 
 14 | TARGET="client"
 15 | 
 16 | model_name="bert_base"
 17 | min_conc=20
 18 | max_conc=200
 19 | step_conc=20
 20 | rate=100
 21 | 
 22 | engines=("deepplan+" "deepplan" "pipeline")
 23 | 
 24 | server_cmd="$build_path/server"
 25 | 
 26 | echo "Run Server"
 27 | $server_cmd & 1> /dev/null
 28 | 
 29 | echo "Wait 30 seconds for the server to be ready."
 30 | sleep 30
 31 | 
 32 | for engine in "${engines[@]}"; do
 33 | 	p_option=1
 34 | 
 35 | 	_engine=$engine
 36 | 	if [ "$engine" = "deepplan+" ]; then
 37 | 		_engine="deepplan"
 38 | 		p_option=2
 39 | 	fi
 40 | 
 41 | 	tmp_file="/tmp/deepplan_${engine}_fig12"
 42 | 	printf "" > $tmp_file
 43 | 
 44 | 	echo "Model Setup"
 45 | 	client_cmd="$build_path/client -m $model_name -e $_engine -r $rate -c $max_conc -w simple -p $p_option"
 46 | 	$client_cmd 1> /dev/null
 47 | 
 48 | 	echo "Start Experiment ($engine)"
 49 | 	for ((c=$min_conc; c<=$max_conc; c+=$step_conc)); do
 50 | 		echo "== Concurrency $c =="
 51 | 		client_cmd="$build_path/client -m $model_name -e $_engine -r $rate -c $c -w simple -p $p_option"
 52 | 		output=`$client_cmd`
 53 | 
 54 | 		latency=$(echo "$output" | awk '{if ($2 == "Latency:") { print $(NF-1)}}')
 55 | 		goodput_rate=$(echo "$output" | awk '{if ($1 == "Goodput") { print $(NF-1)}}')
 56 | 		cold_rate=$(echo "$output" | awk '{if ($1 == "Cold") { print $(NF-1)}}')
 57 | 		echo "$output"
 58 | 		printf "$latency, $goodput_rate, $cold_rate" >> $tmp_file
 59 | 		echo "" >> $tmp_file
 60 | 	done
 61 | 
 62 | done
 63 | 
 64 | log_path="$script_path/logs"
 65 | 
 66 | # Check for log_path existence
 67 | if [ ! -d "$log_path" ]; then
 68 | 	mkdir -p $log_path
 69 | 	echo "Created $log_path directory where log files will be stored"
 70 | fi
 71 | 
 72 | date=`date +%y-%m-%d`
 73 | 
 74 | log_path="$log_path/$date/report"
 75 | 
 76 | version=0
 77 | while true; do
 78 | 	_log_path="${log_path}$version"
 79 | 	if [ -d "$_log_path" ]; then
 80 | 		((version++))
 81 | 	else
 82 | 		break
 83 | 	fi
 84 | done
 85 | 
 86 | log_path=$_log_path
 87 | mkdir -p "$log_path"
 88 | 
 89 | for engine in "${engines[@]}"; do
 90 | 	tmp_file="/tmp/deepplan_${engine}_fig12"
 91 | 
 92 | 	output_file="$log_path/${model_name}_${engine}.csv"
 93 | 
 94 | 	cp $tmp_file $output_file
 95 | done
 96 | 
 97 | server_pid=$(ps -ef | grep -v grep | grep "$server_cmd" | awk '{print $2}')
 98 | kill -s SIGINT $server_pid
 99 | 
100 | echo "Closing Server"
101 | 
102 | wait
103 | 
104 | is_installed=$(pip list | grep -F matplotlib)
105 | 
106 | if [ -z "$is_installed" ]; then
107 | 	echo "Matplotlib is not installed. So the graph can not be created."
108 | else
109 | 	eval "python3 graph.py $log_path fig12.pdf"
110 | fi
111 | 


--------------------------------------------------------------------------------
/scripts/fig13/graph.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python
  2 | # coding: utf-8
  3 | 
  4 | # In[3]:
  5 | 
  6 | 
  7 | import numpy as np
  8 | import matplotlib.pyplot as plt
  9 | from mpl_toolkits.axes_grid1 import make_axes_locatable
 10 | import sys
 11 | import os
 12 | import csv
 13 | 
 14 | def get_data(model, engine):
 15 |     target = "{}/{}_{}.csv".format(sys.argv[1], model, engine)
 16 |     target = target.strip()
 17 |     if target[0] != '/':
 18 |         target = os.path.join(os.getcwd(), target)
 19 | 
 20 |     lat = np.array([])
 21 | 
 22 |     with open(target, 'r', encoding='utf-8') as f:
 23 |         rdr = csv.reader(f)
 24 |         for i, line in enumerate(rdr):
 25 |             lat = np.append(lat, float(line[0]))
 26 | 
 27 |     return lat
 28 | 
 29 | 
 30 | label_list = ["PipeSwitch", "DeepPlan (DHA)", "DeepPlan (PT+DHA)"]
 31 | #color_list = ['#AEB6BF', '#5D6D7E', '#34495E', '#273746', '#273746']
 32 | color_list = ['#AEB6BF', '#5D6D7E', '#273746']
 33 | model_list = ["bert_large", "gpt2"]
 34 | engine_list = ["pipeline", "deepplan", "deepplan+"]
 35 | marker_list = ['o', '^', 'P']
 36 | line_list = ['-', 'dotted', 'dashed']
 37 | 
 38 | x_value_list  = {
 39 |                 "bert_large": [5 * i for i in range(1, 12)],
 40 |                 "gpt2": [20 * i for i in range(1, 11)],
 41 |                 }
 42 | 
 43 | ylim_list = { # graph 모양 확인하고 조절해야함.
 44 |             "bert_base": [],
 45 |             "bert_large": [0, 850],
 46 |             "roberta_base": [],
 47 |             "roberta_large": [],
 48 |             "gpt2": [0, 900],
 49 |             "gpt2_medium": []
 50 |             }
 51 | 
 52 | 
 53 | x_label = "# of model instances (concurrency)"
 54 | y_label = "99 % latency (ms)"
 55 | 
 56 | FONTSIZE_LABEL = 16
 57 | FONTSIZE_TICK = 15
 58 | FONTSIZE_LEGEND = 14
 59 | SIZE_FIGURE = (7, 7)
 60 | LINE_WIDTH = 3
 61 | MARKER_SIZE = 10
 62 | 
 63 | plt.figure(figsize=SIZE_FIGURE)
 64 | 
 65 | li_ax = []
 66 | for i in range(1, len(model_list) + 1):
 67 |     li_ax.append(plt.subplot(len(model_list), 1, i))
 68 | 
 69 | for i, model in enumerate(model_list):
 70 | 
 71 |     graph_title = ""
 72 |     if model_list[i] == "bert_large":
 73 |         graph_title = "BERT-Large"
 74 | 
 75 |     elif model_list[i] == "gpt2":
 76 |         graph_title = "GPT-2"
 77 | 
 78 |     for j, engine in enumerate(engine_list):
 79 |         result = get_data(model, engine)
 80 | 
 81 |         li_ax[i].plot(x_value_list[model_list[i]], result, linewidth = LINE_WIDTH, color=color_list[j], marker=marker_list[j], linestyle=line_list[j], markersize=MARKER_SIZE)
 82 |         
 83 |         li_ax[i].set_title(graph_title, fontsize=FONTSIZE_LABEL+2)
 84 |         li_ax[i].set_xticks(x_value_list[model_list[i]]) #, fontsize=FONTSIZE_TICK)
 85 |         li_ax[i].set_ylim(ylim_list[model_list[i]])
 86 | 
 87 |         li_ax[i].set_ylabel(y_label, fontsize=FONTSIZE_LABEL, labelpad=10)
 88 |         li_ax[i].tick_params(which="major", labelsize=FONTSIZE_TICK)
 89 |         li_ax[i].grid(alpha=0.5, linestyle='--')
 90 | 
 91 | 
 92 | plt.legend(labels=label_list, bbox_to_anchor=(0.45, 2.60), ncol=3, loc='center', columnspacing=0.5,
 93 |            fontsize=FONTSIZE_LEGEND, edgecolor="#FFFFFF")
 94 | 
 95 | plt.xlabel(x_label, fontsize=FONTSIZE_LABEL, labelpad=10)
 96 | 
 97 | plt.subplots_adjust(hspace=0.35)
 98 | plt.rcParams["font.family"] = "Helvetica"
 99 | plt.savefig(sys.argv[2], bbox_inches="tight", pad_inches=0.0)
100 | print("Saved graph to {}".format(sys.argv[2]))
101 | 


--------------------------------------------------------------------------------
/scripts/fig13/run.sh:
--------------------------------------------------------------------------------
  1 | #!/bin/bash
  2 | 
  3 | PLAN_REPO=${PLAN_REPO}
  4 | 
  5 | if [[ -z "$PLAN_REPO" ]]; then
  6 | 	echo "PLAN_REPO environment variable not set, please set this variable"
  7 | 	return
  8 | fi
  9 | 
 10 | export PLAN_REPO=${PLAN_REPO}
 11 | 
 12 | script_path=$( cd -- "$( dirname -- "${BASH_SOURCE[0]}" )" &> /dev/null && pwd
 13 | ) build_path="$script_path/../../build"
 14 | 
 15 | TARGET="client"
 16 | 
 17 | engines=("deepplan+" "deepplan" "pipeline")
 18 | 
 19 | server_cmd="$build_path/server"
 20 | 
 21 | echo "Run Server"
 22 | $server_cmd & 1> /dev/null
 23 | 
 24 | echo "Wait 30 seconds for the server to be ready."
 25 | sleep 30
 26 | 
 27 | model_name="bert_large" min_conc=5 max_conc=55 step_conc=5 rate=30
 28 | 
 29 | for engine in "${engines[@]}"; do p_option=1
 30 | 
 31 | 	_engine=$engine
 32 | 	if [ "$engine" = "deepplan+" ]; then
 33 | 		_engine="deepplan"
 34 | 		p_option=2
 35 | 	fi
 36 | 
 37 | 	tmp_file="/tmp/deepplan_${model_name}_${engine}_fig13"
 38 | 	printf "" > $tmp_file
 39 | 
 40 | 	echo "Model Setup"
 41 | 	client_cmd="$build_path/client -m $model_name -e $_engine -r $rate -c 60 -w simple -p $p_option"
 42 | 	$client_cmd 1> /dev/null
 43 | 
 44 | 	echo "Start Experiment ($engine)"
 45 | 	for ((c=$min_conc; c<=$max_conc; c+=$step_conc)); do
 46 | 		echo "== Concurrency $c =="
 47 | 		client_cmd="$build_path/client -m $model_name -e $_engine -r $rate -c $c -w simple -p $p_option"
 48 | 		output=`$client_cmd`
 49 | 
 50 | 		latency=$(echo "$output" | awk '{if ($2 == "Latency:") { print $(NF-1)}}')
 51 | 		goodput_rate=$(echo "$output" | awk '{if ($1 == "Goodput") { print $(NF-1)}}')
 52 | 		cold_rate=$(echo "$output" | awk '{if ($1 == "Cold") { print $(NF-1)}}')
 53 | 		echo "$output"
 54 | 		printf "$latency, $goodput_rate, $cold_rate" >> $tmp_file
 55 | 		echo "" >> $tmp_file
 56 | 
 57 | 	done
 58 | done
 59 | 
 60 | server_pid=$(ps -ef | grep -v grep | grep "$server_cmd" | awk '{print $2}')
 61 | kill -s SIGINT $server_pid
 62 | 
 63 | echo "Closing Server"
 64 | 
 65 | wait
 66 | 
 67 | echo "Run Server"
 68 | $server_cmd & 1> /dev/null
 69 | 
 70 | echo "Wait 30 seconds for the server to be ready."
 71 | sleep 30
 72 | 
 73 | model_name="gpt2"
 74 | min_conc=20
 75 | max_conc=200
 76 | step_conc=20
 77 | rate=90
 78 | 
 79 | for engine in "${engines[@]}"; do
 80 | 	p_option=1
 81 | 
 82 | 	_engine=$engine
 83 | 	if [ "$engine" = "deepplan+" ]; then
 84 | 		_engine="deepplan"
 85 | 		p_option=2
 86 | 	fi
 87 | 
 88 | 	tmp_file="/tmp/deepplan_${model_name}_${engine}_fig13"
 89 | 	printf "" > $tmp_file
 90 | 
 91 | 	echo "Model Setup"
 92 | 	client_cmd="$build_path/client -m $model_name -e $_engine -r $rate -c $max_conc -w simple -p $p_option"
 93 | 	$client_cmd 1> /dev/null
 94 | 
 95 | 	echo "Start Experiment ($engine)"
 96 | 	for ((c=$min_conc; c<=$max_conc; c+=$step_conc)); do
 97 | 		echo "== Concurrency $c =="
 98 | 		client_cmd="$build_path/client -m $model_name -e $_engine -r $rate -c $c -w simple -p $p_option -s 200"
 99 | 		output=`$client_cmd`
100 | 
101 | 		latency=$(echo "$output" | awk '{if ($2 == "Latency:") { print $(NF-1)}}')
102 | 		goodput_rate=$(echo "$output" | awk '{if ($1 == "Goodput") { print $(NF-1)}}')
103 | 		cold_rate=$(echo "$output" | awk '{if ($1 == "Cold") { print $(NF-1)}}')
104 | 		echo "$output"
105 | 		printf "$latency, $goodput_rate, $cold_rate" >> $tmp_file
106 | 		echo "" >> $tmp_file
107 | 	done
108 | 
109 | 
110 | done
111 | 
112 | server_pid=$(ps -ef | grep -v grep | grep "$server_cmd" | awk '{print $2}')
113 | kill -s SIGINT $server_pid
114 | 
115 | echo "Closing Server"
116 | 
117 | wait
118 | 
119 | log_path="$script_path/logs"
120 | 
121 | # Check for log_path existence
122 | if [ ! -d "$log_path" ]; then
123 | 	mkdir -p $log_path
124 | 	echo "Created $log_path directory where log files will be stored"
125 | fi
126 | 
127 | date=`date +%y-%m-%d`
128 | 
129 | log_path="$log_path/$date/report"
130 | 
131 | version=0
132 | while true; do
133 | 	_log_path="${log_path}$version"
134 | 	if [ -d "$_log_path" ]; then
135 | 		((version++))
136 | 	else
137 | 		break
138 | 	fi
139 | done
140 | 
141 | log_path=$_log_path
142 | mkdir -p "$log_path"
143 | 
144 | model_names=("bert_large" "gpt2")
145 | 
146 | for model in "${model_names[@]}"; do
147 | 	for engine in "${engines[@]}"; do
148 | 		tmp_file="/tmp/deepplan_${model}_${engine}_fig13"
149 | 
150 | 		output_file="$log_path/${model}_${engine}.csv"
151 | 
152 | 		cp $tmp_file $output_file
153 | 
154 | 		echo "Created '$output_file' log file"
155 | 	done
156 | done
157 | 
158 | is_installed=$(pip list | grep -F matplotlib)
159 | 
160 | if [ -z "$is_installed" ]; then
161 | 	echo "Matplotlib is not installed. So the graph can not be created."
162 | else
163 | 	eval "python3 graph.py $log_path fig13.pdf"
164 | fi
165 | 


--------------------------------------------------------------------------------
/scripts/fig14/graph.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python
  2 | # coding: utf-8
  3 | 
  4 | # In[3]:
  5 | 
  6 | 
  7 | import numpy as np
  8 | import matplotlib.pyplot as plt
  9 | from mpl_toolkits.axes_grid1 import make_axes_locatable
 10 | from matplotlib import gridspec
 11 | import sys
 12 | import os
 13 | import csv
 14 | 
 15 | def get_data(target):
 16 |     target = "{}/{}".format(sys.argv[1], target)
 17 |     target = target.strip()
 18 |     if target[0] != '/':
 19 |         target = os.path.join(os.getcwd(), target)
 20 | 
 21 |     if "offered" in target:
 22 |         offered_load = np.array([])
 23 |     else :
 24 |         result = []
 25 | 
 26 |         latency = np.array([])
 27 |         goodput = np.array([])
 28 |         cold = np.array([])
 29 | 
 30 |     with open(target, 'r', encoding='utf-8') as f:
 31 |         rdr = csv.reader(f)
 32 |         if "offered" in target:
 33 |             for i, line in enumerate(rdr):
 34 |                 offered_load = np.append(offered_load, int(line[0]))
 35 |             return offered_load
 36 | 
 37 |         else:
 38 |             for i, line in enumerate(rdr):
 39 |                 latency = np.append(latency, float(line[0]))
 40 |                 cold = np.append(cold, float(line[1]))
 41 |                 goodput = np.append(goodput, float(line[2]))
 42 | 
 43 |             result.append(latency)
 44 |             result.append(goodput)
 45 |             result.append(cold)
 46 | 
 47 |         return result
 48 | 
 49 | 
 50 | 
 51 | x_value = [i for i in range(1, 181)]
 52 | x_ticks = [30 * i for i in range(0, 7)]
 53 | 
 54 | 
 55 | label_list = ["PipeSwitch", "DeepPlan (DHA)", "DeepPlan (PT+DHA)"]
 56 | #color_list = ['#EAECEE', '#AEB6BF', '#85929E', '#5D6D7E', '#34495E', '#273746']
 57 | 
 58 | # Prepare these files
 59 | engine_list = ["pipeline.csv", "deepplan.csv", "deepplan+.csv"]
 60 | 
 61 | color_list = ['#AEB6BF', '#5D6D7E', '#273746']
 62 | line_list = ['solid', 'dotted', 'dashdot']
 63 | 
 64 | ylim_list = [(5500, 10001), (0, 550), (50, 103), (0, 22.5)]
 65 | 
 66 | x_label = "Time (minutes)"
 67 | y_label = ["Offered load\n (req./min.)", "99 % latency\n (ms)", "Goodput\n (%)", "Cold-start\n (%)"]
 68 | 
 69 | FONTSIZE_XLABEL = 16
 70 | FONTSIZE_YLABEL = 14
 71 | FONTSIZE_TICK = 13
 72 | FONTSIZE_LEGEND = 14
 73 | SIZE_FIGURE = (7, 7)
 74 | LINE_WIDTH = 1.5
 75 | ARKER_SIZE = 10
 76 | MARKER_SIZE = 10
 77 | 
 78 | 
 79 | plt.figure(figsize=SIZE_FIGURE)
 80 | gs = gridspec.GridSpec(nrows=4, # row 몇 개
 81 |                        ncols=1, # col 몇 개
 82 |                        height_ratios=[0.8, 1, 0.8, 0.8]
 83 |                       )
 84 | 
 85 | li_ax = []
 86 | for i in range(0, 4):
 87 |     li_ax.append(plt.subplot(gs[i]))
 88 | 
 89 |     if i == 0: # Offered Load graph
 90 |         offered_load = get_data("offered_load.csv")
 91 | 
 92 |         li_ax[i].plot(x_value, offered_load, linewidth = LINE_WIDTH, color='#000000', linestyle="solid")
 93 |         li_ax[i].set_ylim(ylim_list[i])
 94 |         li_ax[i].tick_params(axis="both", labelsize=FONTSIZE_TICK)
 95 |         li_ax[i].set_xticks(x_ticks)
 96 | 
 97 |         li_ax[i].axes.xaxis.set_ticklabels([])
 98 | 
 99 |         li_ax[i].set_ylabel(y_label[i], fontsize=FONTSIZE_YLABEL)
100 |         li_ax[i].get_yaxis().set_label_coords(-0.13, 0.5)
101 | 
102 |         li_ax[i].set_xlim(0, 180)
103 | 
104 |         li_ax[i].grid(alpha=1, linestyle='--')
105 | 
106 | 
107 | for i, engine in enumerate(engine_list):
108 |     result = get_data(engine) # Read data
109 |     for j, ax in enumerate(li_ax):
110 |         if j > 0:
111 |             ax.plot(x_value, result[j-1], linewidth = LINE_WIDTH, color=color_list[i], linestyle=line_list[i], markersize=MARKER_SIZE)
112 | 
113 |             ax.set_ylim(ylim_list[j])
114 |             ax.tick_params(axis='both', labelsize=FONTSIZE_TICK)
115 | 
116 |             ax.set_xticks(x_ticks)
117 |             if j < 3:
118 |                 ax.axes.xaxis.set_ticklabels([])
119 | 
120 |             ax.set_ylabel(y_label[j], fontsize=FONTSIZE_YLABEL)
121 |             ax.get_yaxis().set_label_coords(-0.13, 0.5)
122 | 
123 |             ax.set_xlim(0, 180)
124 | 
125 |             ax.grid(alpha=1, linestyle='--')
126 | 
127 | plt.legend(labels=label_list, bbox_to_anchor=(0.43, 4.7), ncol=3, loc='center', columnspacing=0.6,
128 |            fontsize=FONTSIZE_LEGEND, edgecolor="#FFFFFF")
129 | 
130 | plt.xlabel(x_label, fontsize=FONTSIZE_XLABEL, labelpad=10)
131 | 
132 | plt.subplots_adjust(hspace=0.06)
133 | plt.rcParams["font.family"] = "Helvetica"
134 | plt.savefig(sys.argv[2], bbox_inches="tight", pad_inches=0.0)
135 | 
136 | print("Saved graph to {}".format(sys.argv[2]))
137 | 


--------------------------------------------------------------------------------
/scripts/fig14/run.sh:
--------------------------------------------------------------------------------
  1 | #!/bin/bash
  2 | 
  3 | PLAN_REPO=${PLAN_REPO}
  4 | if [[ -z "$PLAN_REPO" ]]; then
  5 | 	echo "PLAN_REPO environment variable not set, please set this variable"
  6 | 	return
  7 | fi
  8 | 
  9 | AZURE_TRACE_DIR=${AZURE_TRACE_DIR}
 10 | if [[ -z "$AZURE_TRACE_DIR" ]]; then
 11 | 	echo "AZURE_TRACE_DIR environment variable not set, please set thie variable"
 12 | 	return
 13 | fi
 14 | 
 15 | export PLAN_REPO=${PLAN_REPO}
 16 | 
 17 | script_path=$( cd -- "$( dirname -- "${BASH_SOURCE[0]}" )" &> /dev/null && pwd )
 18 | build_path="$script_path/../../build"
 19 | 
 20 | model_list="gpt2_384 bert_base bert_base bert_base bert_base roberta_base roberta_base roberta_base roberta_base"
 21 | conc=252
 22 | rate=150
 23 | 
 24 | engines=("deepplan+" "deepplan" "pipeline")
 25 | 
 26 | for engine in "${engines[@]}"; do
 27 | 	server_cmd="$build_path/server"
 28 | 
 29 | 	echo "Run Server"
 30 | 	$server_cmd & 1> /dev/null
 31 | 
 32 | 	echo "Wait 30 seconds for the server to be ready."
 33 | 	sleep 30
 34 | 
 35 | 	p_option=1
 36 | 
 37 | 	_engine=$engine
 38 | 	if [ "$engine" = "deepplan+" ]; then
 39 | 		_engine="deepplan"
 40 | 		p_option=2
 41 | 	fi
 42 | 
 43 | 	tmp_file="/tmp/deepplan_${engine}_fig14"
 44 | 	printf "" > $tmp_file
 45 | 
 46 | 	echo "Start Experiment ($engine)"
 47 | 	client_cmd="$build_path/client -m $model_list -e $_engine -r $rate -c $conc -w azure -p $p_option"
 48 | 	stdbuf --output=L $client_cmd | tee -a $tmp_file
 49 | 
 50 | 	server_pid=$(ps -ef | grep -v grep | grep "$server_cmd" | awk '{print $2}')
 51 | 	kill -s SIGINT $server_pid
 52 | 
 53 | 	echo "Closing Server"
 54 | 
 55 | 	wait
 56 | 
 57 | done
 58 | 
 59 | log_path="$script_path/logs"
 60 | 
 61 | # Check for log_path existence
 62 | if [ ! -d "$log_path" ]; then
 63 | 	mkdir -p $log_path
 64 | 	echo "Created $log_path directory where log files will be stored"
 65 | fi
 66 | 
 67 | date=`date +%y-%m-%d`
 68 | 
 69 | log_path="$log_path/$date/report"
 70 | 
 71 | version=0
 72 | while true; do
 73 | 	_log_path="${log_path}$version"
 74 | 	if [ -d "$_log_path" ]; then
 75 | 		((version++))
 76 | 	else
 77 | 		break
 78 | 	fi
 79 | done
 80 | 
 81 | log_path=$_log_path
 82 | mkdir -p "$log_path"
 83 | 
 84 | for engine in "${engines[@]}"; do
 85 | 	tmp_file="/tmp/deepplan_${engine}_fig14"
 86 | 
 87 | 	output_file="$log_path/${engine}.csv"
 88 | 
 89 | 	awk '$1 ~ /^[0-9]*,/ { print $3 $4 $5 }' $tmp_file > $output_file
 90 | 
 91 | 	echo "Created '$output_file' log file"
 92 | done
 93 | 
 94 | output_file="$log_path/offered_load.csv"
 95 | awk '$1 ~ /^[0-9]*,/ { print $2 }' $tmp_file > "$log_path/offered_load.csv"
 96 | echo "Created '$output_file' log file"
 97 | 
 98 | is_installed=$(pip list | grep -F matplotlib)
 99 | 
100 | if [ -z "$is_installed" ]; then
101 | 	echo "Matplotlib is not installed. So the graph can not be created."
102 | else
103 | 	eval "python3 graph.py $log_path fig14.pdf"
104 | fi
105 | 


--------------------------------------------------------------------------------
/src/CMakeLists.txt:
--------------------------------------------------------------------------------
 1 | include_directories(${CMAKE_CURRENT_SOURCE_DIR})
 2 | 
 3 | set(DEEPPLAN_SRC
 4 | 		util.cpp
 5 | 		deepplan/model.cpp
 6 | 		deepplan/engine.cpp
 7 | )
 8 | 
 9 | set(NETWORK_SRC
10 | 		network/network.cpp
11 | 		network/session.cpp
12 | 		network/server_api.cpp)
13 | 
14 | 
15 | set(SERVER_SRC
16 | 		server.cpp
17 | 		server/server.cpp
18 | 		server/controller.cpp
19 | 		server/model_manager.cpp
20 | 		server/worker.cpp
21 | )
22 | 
23 | set(CLIENT_SRC
24 | 		client.cpp
25 | 		client/client.cpp
26 | 		client/workload.cpp
27 | )
28 | 
29 | set (CMAKE_RUNTIME_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR})
30 | 
31 | add_library(deepplan ${DEEPPLAN_SRC})
32 | target_link_libraries(deepplan PRIVATE
33 | 	deepplan_proto
34 | 	tbb
35 | 	${Boost_LIBRARIES}
36 | 	${TORCH_LIBRARIES}
37 | )
38 | 
39 | add_library(network ${NETWORK_SRC})
40 | target_link_libraries(network PRIVATE
41 | 	deepplan_proto
42 | 	deepcache_proto
43 | 	tbb
44 | 	${Boost_LIBRARIES}
45 | )
46 | 
47 | add_executable(benchmark benchmark.cpp)
48 | target_link_libraries(benchmark
49 | 	deepplan
50 | 	${Boost_LIBRARIES}
51 | 	${TORCH_LIBRARIES}
52 | )
53 | 
54 | add_executable(server ${SERVER_SRC})
55 | target_link_libraries(server
56 | 	deepplan
57 | 	network
58 | 	tbb
59 | 	${Boost_LIBRARIES}
60 | 	${TORCH_LIBRARIES}
61 | )
62 | 
63 | add_executable(client ${CLIENT_SRC})
64 | target_link_libraries(client
65 | 	deepplan
66 | 	network
67 | 	tbb
68 | 	${Boost_LIBRARIES}
69 | 	${TORCH_LIBRARIES}
70 | )
71 | 


--------------------------------------------------------------------------------
/src/benchmark.cpp:
--------------------------------------------------------------------------------
  1 | #include <iostream>
  2 | #include <string>
  3 | #include <unistd.h>
  4 | #include <getopt.h>
  5 | #include <torch/script.h>
  6 | #include <c10/cuda/CUDAStream.h>
  7 | #include <torch/cuda.h>
  8 | #include <c10/cuda/CUDAGuard.h>
  9 | #include <cuda_runtime_api.h>
 10 | 
 11 | #include <deepplan/model.h>
 12 | #include <deepplan/engine.h>
 13 | #include <util.h>
 14 | 
 15 | struct BenchmarkOptions {
 16 |   std::string model_name;
 17 |   EngineType engine_type;
 18 |   std::vector<int> devices;
 19 |   int batch_size;
 20 |   int num_warmup;
 21 |   int num_test;
 22 | };
 23 | 
 24 | static struct option long_options[] =
 25 | {
 26 |   {"help",    no_argument,       0, 'h' },
 27 |   {"model",   required_argument, 0, 'm' },
 28 |   {"engine",  required_argument, 0, 'e' },
 29 |   {"devices", required_argument, 0, 'd' },
 30 |   {"batch",   required_argument, 0, 'b' },
 31 |   {0,         0,                 0,  0  }
 32 | };
 33 | 
 34 | static void print_usage(char* program_name) {
 35 |   fprintf(stderr,
 36 |       "Usage : %s [-h] --model/-m MODEL_NAME [--device/-d DEVICES [DEVICES ...]]\n"
 37 |       "\t\t[--engine/-e {in_memory,demand,pipeline,deepplan}]\n"
 38 |       "\t\t[--batch/-b BATCH_SIZE\n",
 39 |       program_name);
 40 | }
 41 | 
 42 | void parseOptions(BenchmarkOptions** benchmark_options, int argc, char** argv) {
 43 |   *benchmark_options = new BenchmarkOptions();
 44 |   auto options = *benchmark_options;
 45 |   char flag;
 46 | 
 47 |   char engine_types[][20] = { "in_memory", "demand", "pipeline", "deepplan"};
 48 |   int n_types = sizeof(engine_types) / 20;
 49 |   bool found = false;
 50 |   bool pass_model = false;
 51 | 
 52 |   options->num_warmup  = 20;
 53 |   options->num_test    = 200;
 54 |   options->batch_size  = 1;
 55 |   options->engine_type = EngineType::IN_MEMORY;
 56 |   options->devices     = std::vector<int>(1, 0); // = [0]
 57 | 
 58 |   while ((flag = getopt_long(argc, argv, "b:d:e:hm:", long_options, NULL)) != -1) { 
 59 |     switch (flag) {
 60 |       case 'h':
 61 |         print_usage(argv[0]);
 62 |         break;
 63 |       case 'm':
 64 |         options->model_name = std::string(optarg);
 65 |         pass_model = true;
 66 |         break;
 67 |       case 'e':
 68 |         found = false;
 69 |         for (int i = 0; i < n_types; i++) {
 70 |           if (!strcmp(engine_types[i], optarg)) {
 71 |             options->engine_type = EngineType(i);
 72 |             found = true;
 73 |             break;
 74 |           }
 75 |         }
 76 | 
 77 |         if (!found) {
 78 |           print_usage(argv[0]);
 79 |           fprintf(stderr, "[Error] argument --engine/-e: invalid choice: %s (choose from",
 80 |               optarg);
 81 |           for (int i = 0; i < n_types; i++) {
 82 |             fprintf(stderr, " \'%s\'", engine_types[i]);
 83 |           }
 84 |           fprintf(stderr, ")\n");
 85 |           exit(EXIT_FAILURE);
 86 |         }
 87 |         break;
 88 |       case 'b':
 89 |         options->batch_size = (int)strtol(optarg, NULL, 10);
 90 |         break;
 91 |       case 'd':
 92 |         optind--;
 93 |         {
 94 |           std::vector<int> devices;
 95 |           for ( ; optind < argc && *argv[optind] != '-'; optind++) {
 96 |             devices.push_back((int)strtol(argv[optind], NULL, 10));
 97 |           }
 98 |           options->devices = devices;
 99 |         }
100 |         break;
101 |       default:
102 |         print_usage(argv[0]);
103 |         exit(EXIT_FAILURE);
104 |         break;
105 |         bool found = false;
106 |     }
107 |   }
108 | 
109 |   if (!pass_model) {
110 |     print_usage(argv[0]);
111 |     fprintf(stderr, "[Error] the following arguments are required: --model_name/-m\n");
112 |     exit(EXIT_FAILURE);
113 |   }
114 | }
115 | 
116 | void benchmark(BenchmarkOptions* options) {
117 |   double t1, t2, total_ms = 0;
118 |   std::vector<double> latencies;
119 | 
120 |   int num_warmup = options->num_warmup;
121 |   int num_test   = options->num_test;
122 |   int batch_size  = options->batch_size;
123 |   at::Device target_device(at::kCUDA, options->devices[0]);
124 | 
125 |   torch::NoGradGuard no_grad;
126 | 
127 |   deepplan::Model* model = new deepplan::Model(
128 |                                             options->model_name,
129 |                                             options->engine_type,
130 |                                             options->devices);
131 | 
132 |   util::InputGenerator input_generator;
133 | 
134 |   ScriptModuleInput inputs;
135 |   input_generator.generate_input(options->model_name, batch_size, &inputs);
136 | 
137 |   for (auto& input : inputs) {
138 |     input = input.toTensor().to(model->target_device);
139 |   }
140 | 
141 |   if (options->engine_type == IN_MEMORY)
142 |     model->to(target_device);
143 | 
144 |   for (int step = 0; step < num_warmup+num_test; step++) {
145 |     t1 = util::now();
146 | 
147 |     if (options->engine_type == ON_DEMAND) {
148 |       model->to(target_device, true);
149 |       torch::cuda::synchronize(target_device.index());
150 |     }
151 | 
152 |     auto outputs = model->forward(inputs);
153 | 
154 |     torch::cuda::synchronize(target_device.index());
155 |     t2 = util::now();
156 | 
157 |     if (options->engine_type != IN_MEMORY) {
158 |       model->clear();
159 |     }
160 | 
161 |     if (step >= num_warmup) {
162 |       latencies.push_back((t2-t1) / 1e6);
163 |     }
164 |   }
165 | 
166 |   std::sort(latencies.begin(), latencies.end());
167 | 
168 |   total_ms = std::accumulate(latencies.begin(), latencies.end(), 0.f);
169 |   double avg_latency = total_ms / num_test;
170 | 
171 |   std::cout << "Average Latency : " << avg_latency << " ms\n";
172 |   std::cout << "Min Latency : " << latencies.front() << " ms\n";
173 |   std::cout << "Max Latency : " << latencies.back() << " ms\n";
174 | 
175 |   return;
176 | }
177 | 
178 | int main(int argc, char** argv) {
179 |   BenchmarkOptions* benchmark_options;
180 |   parseOptions(&benchmark_options, argc, argv);
181 | 
182 |   std::cout << "Benchmarking Inference " << benchmark_options->model_name << "\n";
183 | 
184 |   deepplan::Init();
185 | 
186 |   benchmark(benchmark_options);
187 | 
188 |   deepplan::Deinit();
189 | 
190 |   return 0;
191 | }
192 | 


--------------------------------------------------------------------------------
/src/client.cpp:
--------------------------------------------------------------------------------
  1 | #include <iostream>
  2 | #include <getopt.h>
  3 | 
  4 | #include <client/workload.h>
  5 | #include <client/azure.h>
  6 | #include <util.h>
  7 | 
  8 | typedef enum {
  9 |   SIMPLE = 0,
 10 |   BURSTY,
 11 |   AZURE,
 12 | } WorkloadType;
 13 | 
 14 | struct ClientOptions {
 15 |   WorkloadType workload_type;
 16 |   std::vector<std::string> model_names;
 17 |   int concurrency;
 18 |   int rate;
 19 |   int mp_size;
 20 |   EngineType engine_type;
 21 |   int slo;
 22 |   int n_warmup;
 23 |   int n_test;
 24 | };
 25 | 
 26 | static struct option long_options[] =
 27 | {
 28 |   {"help",          no_argument,        0,  'h' },
 29 |   {"workload",      required_argument,  0,  'w' },
 30 |   {"model",         required_argument,  0,  'm' },
 31 |   {"concurrency",   required_argument,  0,  'c' },
 32 |   {"rate",          required_argument,  0,  'r' },
 33 |   {"mp_size",       required_argument,  0,  'p' },
 34 |   {"engine",        required_argument,  0,  'e' },
 35 |   {"slo",           required_argument,  0,  's' },
 36 |   {0, 0, 0, 0}
 37 | };
 38 | 
 39 | static void print_usage(char* program_name) {
 40 |   fprintf(stderr,
 41 |       "Usage : %s [-h] --workload/-w WORKLOAD --model/-m MODEL_NAME\n"
 42 |       "\t\t--concurrency/-c CONCURRENCY --rate/-r RATE [--mp_size/-p MP_SIZE]\n"
 43 |       "\t\t[--engine/-e {in_memory,demand,pipeline,deepplan}]\n"
 44 |       "\t\t[--slo/-s SLO]\n",
 45 |       program_name);
 46 | }
 47 | 
 48 | void parseOptions(ClientOptions** benchmark_options, int argc, char** argv) {
 49 |   *benchmark_options = new ClientOptions();
 50 |   auto options = *benchmark_options;
 51 |   char flag;
 52 | 
 53 |   char engine_types[][20] = { "in_memory", "demand", "pipeline", "deepplan" };
 54 |   char workload_types[][20] = { "simple", "bursty", "azure" };
 55 |   int n_engine_types = sizeof(engine_types) / 20;
 56 |   int n_workload_types = sizeof(workload_types) / 20;
 57 |   bool found = false;
 58 |   bool pass_model = false;
 59 |   bool pass_concurrency = false;
 60 |   bool pass_rate = false;
 61 | 
 62 |   options->mp_size   = 1;
 63 |   options->n_warmup  = 1000;
 64 |   options->n_test    = 10000;
 65 |   options->engine_type = EngineType::DEEPPLAN;
 66 |   options->slo       = 100;
 67 | 
 68 |   while ((flag = getopt_long(argc, argv, "c:e:hm:r:s:w:p:", long_options, NULL)) != -1) {
 69 |     switch (flag) {
 70 |       case 'h':
 71 |         print_usage(argv[0]);
 72 |         break;
 73 |       case 'm':
 74 |         optind--;
 75 |         {
 76 |           std::vector<std::string> model_names;
 77 |           for ( ; optind < argc && *argv[optind] != '-'; optind++) {
 78 |             model_names.push_back(std::string(argv[optind]));
 79 |           }
 80 |           options->model_names = model_names;
 81 |         }
 82 |         pass_model = true;
 83 |         break;
 84 |       case 'c':
 85 |         options->concurrency = (int)strtol(optarg, NULL, 10);
 86 |         pass_concurrency = true;
 87 |         break;
 88 |       case 'r':
 89 |         options->rate = (int)strtol(optarg, NULL, 10);
 90 |         pass_rate = true;
 91 |         break;
 92 |       case 'p':
 93 |         options->mp_size = (int)strtol(optarg, NULL, 10);
 94 |         break;
 95 |       case 's':
 96 |         options->slo = (int)strtol(optarg, NULL, 10);
 97 |         break;
 98 |       case 'e':
 99 |         found = false;
100 |         for (int i = 0; i < n_engine_types; i++) {
101 |           if (!strcmp(engine_types[i], optarg)) {
102 |             options->engine_type = EngineType(i);
103 |             found = true;
104 |             break;
105 |           }
106 |         }
107 | 
108 |         if (!found) {
109 |           print_usage(argv[0]);
110 |           fprintf(stderr, "[Error] argument --engine/-e: invalid choice: %s (choose from",
111 |               optarg);
112 |           for (int i = 0; i < n_engine_types; i++) {
113 |             fprintf(stderr, " \'%s\'", engine_types[i]);
114 |           }
115 |           fprintf(stderr, ")\n");
116 |           exit(EXIT_FAILURE);
117 |         }
118 |         break;
119 |       case 'w':
120 |         found = false;
121 |         for (int i = 0; i < n_workload_types; i++) {
122 |           if (!strcmp(workload_types[i], optarg)) {
123 |             options->workload_type = WorkloadType(i);
124 |             found = true;
125 |             break;
126 |           }
127 |         }
128 | 
129 |         if (!found) {
130 |           print_usage(argv[0]);
131 |           fprintf(stderr, "[Error] argument --workload/-w: invalid choice: %s (choose from",
132 |               optarg);
133 |           for (int i = 0; i < n_workload_types; i++) {
134 |             fprintf(stderr, " \'%s\'", workload_types[i]);
135 |           }
136 |           fprintf(stderr, ")\n");
137 |           exit(EXIT_FAILURE);
138 |         }
139 |         break;
140 |       default:
141 |         print_usage(argv[0]);
142 |         exit(EXIT_FAILURE);
143 |         break;
144 |         bool found = false;
145 |     }
146 |   }
147 | 
148 |   if (!(pass_model && pass_concurrency && pass_rate)) {
149 |     print_usage(argv[0]);
150 |     fprintf(stderr, "[Error] the following arguments are required:");
151 |     if (!pass_model)
152 |       fprintf(stderr, " --model_name/-m");
153 |     if (!pass_concurrency)
154 |       fprintf(stderr, " --concurrency/-c");
155 |     if (!pass_rate)
156 |       fprintf(stderr, " --rate/-r");
157 | 
158 |     exit(EXIT_FAILURE);
159 |   }
160 | 
161 | }
162 | 
163 | void simple_experiment(ClientOptions* options) {
164 |   std::vector<std::string> model_names = options->model_names;
165 |   int concurrency = options->concurrency;
166 |   int rate = options->rate;
167 |   int mp_size = options->mp_size;
168 |   EngineType engine_type = options->engine_type;
169 |   int slo = options->slo;
170 | 
171 |   int n_warmup = options->n_warmup;
172 |   int n_test = rate * 100;
173 | 
174 |   auto model_loader = new ModelLoader(model_names, concurrency, engine_type, mp_size,
175 |                                       "127.0.0.1", "4321");
176 | 
177 |   std::cout << "Upload Model...\n";
178 |   model_loader->run();
179 | 
180 |   auto warmup = new Workload(concurrency, rate, n_warmup, "127.0.0.1", "4321");
181 |   auto workload = new Workload(concurrency, rate, n_test, "127.0.0.1", "4321");
182 | 
183 |   std::cout << "Warmup...\n";
184 |   warmup->run(model_loader->inputs);
185 | 
186 |   std::cout << "Test...\n";
187 |   workload->run(model_loader->inputs);
188 | 
189 |   auto result = workload->result(slo);
190 | 
191 |   std::cout << "99% Latency: " << result.latency_99 << " ms\n";
192 |   std::cout << "Cold Start Rate: " << result.cold_rate << " %\n";
193 |   std::cout << "Goodput Rate: " << result.goodput_rate << " %\n";
194 | }
195 | 
196 | void bursty_experiment(ClientOptions* options) {
197 |   std::vector<std::string> model_names = options->model_names;
198 |   int concurrency = options->concurrency;
199 |   int rate = options->rate;
200 |   int mp_size = options->mp_size;
201 |   int slo = options->slo;
202 |   EngineType engine_type = options->engine_type;
203 | 
204 |   auto model_loader = new ModelLoader(model_names, concurrency, engine_type, mp_size,
205 |                                       "127.0.0.1", "4321");
206 | 
207 |   std::cout << "Upload Model...\n";
208 |   model_loader->run();
209 | 
210 |   std::vector<Workload*> warmups;
211 |   std::vector<Workload*> workloads;
212 |   for (int i = 1; i <= concurrency; i++) {
213 |     warmups.push_back(new Workload(i, rate, rate, "127.0.0.1", "4321"));
214 | 
215 |     workloads.push_back(new Workload(i, rate, rate, "127.0.0.1", "4321"));
216 |   }
217 | 
218 |   std::cout << "Bursty Experiment\n";
219 |   std::cout << "Concurrency, 99% Latecny(ms), Cold Start Rate(%), Goodput Rate(%)\n";
220 |   for (int i = 0; i < concurrency; i++) {
221 |     warmups[i]->run(model_loader->inputs);
222 |     workloads[i]->run(model_loader->inputs);
223 |     auto result = workloads[i]->result(slo);
224 | 
225 |     std::cout << i+1 << ", ";
226 |     std::cout << result.latency_99 << ", ";
227 |     std::cout << result.cold_rate << ", ";
228 |     std::cout << result.goodput_rate << "\n";
229 |   }
230 | }
231 | 
232 | void azure_experiment(ClientOptions* options) {
233 |   std::vector<std::string> model_names = options->model_names;
234 |   int concurrency = options->concurrency;
235 |   int rate = options->rate;
236 |   int mp_size = options->mp_size;
237 |   EngineType engine_type = options->engine_type;
238 |   int slo = options->slo;
239 | 
240 |   auto model_loader = new ModelLoader(model_names, concurrency, engine_type, mp_size,
241 |                                       "127.0.0.1", "4321");
242 | 
243 |   std::cout << "Upload Model...\n";
244 |   model_loader->run();
245 | 
246 |   auto scaled_traces = azure::load_scaled_trace(rate, concurrency, 2);
247 | 
248 |   azure::transpose(scaled_traces);
249 | 
250 |   int period = 180;
251 |   std::vector<Workload*> workloads;
252 |   for (int p = 0; p < period; p++) {
253 |     workloads.push_back(new Workload(scaled_traces[p], "127.0.0.1", "4321"));
254 |   }
255 | 
256 |   std::cout << "Azure Experiment\n";
257 |   std::cout << "Minutes, Offered Load, 99% Latecny(ms), Cold Start Rate(%), Goodput Rate(%)\n";
258 |   for (int p = 0; p < period; p++) {
259 |     workloads[p]->run(model_loader->inputs);
260 |     auto result = workloads[p]->result(slo);
261 | 
262 |     std::cout << p << ", ";
263 |     std::cout << workloads[p]->n_requests << ", ";
264 |     std::cout << result.latency_99 << ", ";
265 |     std::cout << result.cold_rate << ", ";
266 |     std::cout << result.goodput_rate << "\n";
267 |   }
268 | 
269 | }
270 | 
271 | 
272 | int main(int argc, char** argv) {
273 |   ClientOptions* client_options;
274 |   parseOptions(&client_options, argc, argv);
275 | 
276 |   try {
277 |     switch (client_options->workload_type) {
278 |       case WorkloadType::SIMPLE:
279 |         simple_experiment(client_options);
280 |         break;
281 |       case WorkloadType::BURSTY:
282 |         bursty_experiment(client_options);
283 |         break;
284 |       case WorkloadType::AZURE:
285 |         azure_experiment(client_options);
286 |         break;
287 |     }
288 |   }
289 |   catch (std::exception& e) {
290 |     std::cerr << e.what() << "\n";
291 |   }
292 | 
293 |   return 0;
294 | }
295 | 


--------------------------------------------------------------------------------
/src/client/azure.h:
--------------------------------------------------------------------------------
  1 | #pragma once
  2 | 
  3 | namespace azure {
  4 | 
  5 | std::string get_trace_dir() {
  6 |   auto trace_dir = std::getenv("AZURE_TRACE_DIR");
  7 | 
  8 |   if (trace_dir == nullptr) { return ""; }
  9 |   return trace_dir == nullptr ? "" : std::string(trace_dir);
 10 | }
 11 | 
 12 | std::string get_trace_file(std::string trace_dir, int id) {
 13 |   std::stringstream ss;
 14 | 
 15 |   ss << trace_dir << "/invocations_per_function_md.anon.d";
 16 |   if (id < 10)
 17 |     ss << "0";
 18 |   ss << id << ".csv";
 19 | 
 20 |   return ss.str();
 21 | }
 22 | 
 23 | std::string get_trace(int id) {
 24 |   std::string trace_dir = get_trace_dir();
 25 | 
 26 |   if (trace_dir == "") {
 27 |     std::cerr << "AZURE_TRACE_DIR variable not set, exiting\n";
 28 |     exit(EXIT_FAILURE);
 29 |   }
 30 | 
 31 |   if (1 > id || id > 14) {
 32 |     std::cerr << "Azure workload_id must be between 1 and 14 inclusive. Got "
 33 |               << id << "\n";
 34 |     exit(EXIT_FAILURE);
 35 |   }
 36 | 
 37 |   std::string trace_file = get_trace_file(trace_dir, id);
 38 | 
 39 |   return trace_file;
 40 | }
 41 | 
 42 | std::vector<std::string> split(std::string line) {
 43 |   std::vector<std::string> result;
 44 |   std::stringstream s(line);
 45 |   while (s.good()) {
 46 |     std::string substr;
 47 |     std::getline(s, substr, ',');
 48 |     result.push_back(substr);
 49 |   }
 50 |   return result;
 51 | }
 52 | 
 53 | std::vector<unsigned> process_trace_line(std::string line, unsigned start_index) {
 54 |   std::vector<std::string> splits = split(line);
 55 |   std::vector<unsigned> result;
 56 |   for (unsigned i = start_index; i < splits.size(); i++) {
 57 |     result.push_back(std::stoul(splits[i].c_str(), NULL, 10));
 58 |   }
 59 |   return result;
 60 | }
 61 | 
 62 | std::vector<std::vector<unsigned>> read_trace_data(std::string filename) {
 63 |   std::ifstream f(filename);
 64 | 
 65 |   std::vector<std::vector<unsigned>> results;
 66 |   std::vector<std::pair<int, int>> sizes;
 67 | 
 68 |   std::string line;
 69 |   std::getline(f, line); // Skip headers
 70 |   while (std::getline(f, line)) {
 71 |     auto traceline = process_trace_line(line, 4);
 72 |     int size = std::accumulate(traceline.begin(), traceline.end(), 0);
 73 |     sizes.push_back(std::make_pair(size, results.size()));
 74 |     results.push_back(traceline);
 75 |   }
 76 | 
 77 |   std::sort(sizes.begin(), sizes.end());
 78 | 
 79 |   std::vector<std::vector<unsigned>> ordered;
 80 |   for (int i = sizes.size()-1; i >= 0; i--) {
 81 |     ordered.push_back(results[sizes[i].second]);
 82 |   }
 83 | 
 84 |   return ordered;
 85 | }
 86 | 
 87 | std::vector<std::vector<unsigned>> load_trace(int workload_id = 1) {
 88 |   return read_trace_data(get_trace(workload_id));
 89 | }
 90 | 
 91 | std::vector<std::vector<unsigned>> scale_trace_rate(std::vector<std::vector<unsigned>>& traces, int rate) {
 92 |   std::vector<std::vector<unsigned>> scaled_traces(traces.size());
 93 | 
 94 |   unsigned total_size = 0;
 95 |   double total_rate = 0;
 96 |   double scale_ratio = 0;
 97 | 
 98 |   for (auto& trace : traces)
 99 |     total_size += std::accumulate(trace.begin(), trace.end(), 0);
100 | 
101 |   total_rate = total_size / 24.0 / 60.0 / 60.0; // caculate rate(r/s);
102 |   scale_ratio = rate / total_rate;
103 | 
104 |   std::transform(traces.begin(), traces.end(), scaled_traces.begin(),
105 |                 [scale_ratio](auto vec) {
106 |                   for (auto& v : vec) v *= scale_ratio;
107 |                   return vec;
108 |                 });
109 | 
110 |   return scaled_traces;
111 | }
112 | 
113 | std::vector<std::vector<unsigned>> scale_trace_size(std::vector<std::vector<unsigned>>& traces, int size) {
114 |   std::vector<std::vector<unsigned>> scaled_traces(size);
115 | 
116 |   for (int i = 0; i < size; i++) {
117 |     scaled_traces[i] = traces[i];
118 |   }
119 | 
120 |   // Compress the traces size
121 |   for (int i = size; i < traces.size(); i++) {
122 |     for (int j = 0; j < traces[i].size(); j++) {
123 |       scaled_traces[i % size][j] += traces[i][j];
124 |     }
125 |   }
126 | 
127 |   return scaled_traces;
128 | }
129 | 
130 | std::vector<std::vector<unsigned>> load_scaled_trace(int rate, int size, int workload_id = 1) {
131 |   auto traces = load_trace(workload_id);
132 | 
133 |   auto scaled_traces = scale_trace_rate(traces, rate);
134 |   scaled_traces = scale_trace_size(scaled_traces, size);
135 | 
136 |   return scaled_traces;
137 | }
138 | 
139 | template <typename T>
140 | void transpose(std::vector<std::vector<T>> &m) {
141 |   if (m.size() == 0)
142 |     return;
143 | 
144 |   std::vector<std::vector<T>> trans_vec(m[0].size(), std::vector<T>());
145 | 
146 |   for (int i = 0; i < m.size(); i++) {
147 |     for (int j = 0; j < m[i].size(); j++) {
148 |       trans_vec[j].push_back(m[i][j]);
149 |     }
150 |   }
151 | 
152 |   m = trans_vec;
153 | }
154 | 
155 | }
156 | 


--------------------------------------------------------------------------------
/src/client/client.cpp:
--------------------------------------------------------------------------------
 1 | #include <iostream>
 2 | 
 3 | #include <client/client.h>
 4 | #include <util.h>
 5 | #include <server_api.h>
 6 | 
 7 | Client::Client()
 8 |   : alive(true),
 9 |     network_thr(std::bind(&Client::run, this)) {};
10 | 
11 | void Client::infer_async(std::vector<char>& input, int model_id,
12 |                          std::function<void(serverapi::Response* rsp)> onSuccess) {
13 |   serverapi::InferenceRequest request;
14 | 
15 |   request.model_id = model_id;
16 |   request.batch_size = 1;
17 |   request.input_size = input.size();
18 |   request.input = input.data();
19 | 
20 |   session->send_request_async(request, onSuccess);
21 | }
22 | 
23 | Client::~Client() {
24 |   if (alive)
25 |     shutdown();
26 | }
27 | 
28 | serverapi::UploadModelResponse* Client::upload_model(std::vector<std::string> model_names, int n_models, EngineType engine_type, int mp_size) {
29 |   serverapi::UploadModelRequest request;
30 | 
31 |   request.model_names = model_names;
32 |   request.n_models = n_models;
33 |   request.engine_type = engine_type;
34 |   request.mp_size = mp_size;
35 | 
36 |   auto onSuccess = [this](serverapi::Response* rsp) {
37 |     std::cout << "Success Upload\n";
38 |   };
39 | 
40 |   auto response = dynamic_cast<serverapi::UploadModelResponse*>
41 |                       (session->send_request(request, onSuccess));
42 | 
43 |   return response;
44 | }
45 | 
46 | void Client::close() {
47 |   serverapi::CloseRequest request;
48 | 
49 |   auto onSuccess = [this](serverapi::Response* rsp) {
50 |   };
51 | 
52 |   auto response = session->send_request(request, onSuccess);
53 | }
54 | 
55 | void Client::connect(const std::string& srv_ip, const std::string& port) {
56 |   try {
57 |     session = new network::ClientSession(io_service_);
58 | 
59 |     session->connect(srv_ip, port);
60 |   }
61 |  catch (std::exception& e) { io_service_.stop();
62 |     std::cerr << e.what() << "\n";
63 |   }
64 | 
65 |   return;
66 | }
67 | 
68 | void Client::run() {
69 |   while (alive) {
70 |     try {
71 |       boost::asio::io_service::work work(io_service_);
72 |       io_service_.run();
73 |     } catch (std::exception& e) {
74 |       alive.store(false);
75 |       std::cerr << "Exception in network thread: " << e.what();
76 |     } catch (const char* m) {
77 |       alive.store(false);
78 |       std::cerr << "Exception in network thread: " << m;
79 |     }
80 |   }
81 | }
82 | 
83 | void Client::shutdown() {
84 |   session->await_completion();
85 | 
86 |   this->close();
87 | 
88 |   alive.store(false);
89 |   io_service_.stop();
90 |   if (network_thr.joinable())
91 |     network_thr.join();
92 | }
93 | 


--------------------------------------------------------------------------------
/src/client/client.h:
--------------------------------------------------------------------------------
 1 | #pragma once
 2 | 
 3 | #include <network/session.h>
 4 | #include <util.h>
 5 | #include <atomic>
 6 | 
 7 | class Client {
 8 |  public:
 9 |   Client();
10 | 
11 |   ~Client();
12 | 
13 |   void connect(const std::string& srv_ip, const std::string& port);
14 | 
15 |   void run();
16 | 
17 |   void infer_async(std::vector<char>& input, int model_id,
18 |                    std::function<void(serverapi::Response* rsp)> onSuccess);
19 | 
20 |   serverapi::UploadModelResponse* upload_model(std::vector<std::string> model_name, int n_models, EngineType engine_type, int mp_size);
21 | 
22 |   void close();
23 | 
24 |   void sync_close();
25 | 
26 |   void shutdown();
27 | 
28 | private:
29 |   std::atomic_bool alive;
30 |   std::thread network_thr;
31 |   boost::asio::io_service io_service_;
32 |   network::ClientSession* session;
33 | };
34 | 


--------------------------------------------------------------------------------
/src/client/workload.cpp:
--------------------------------------------------------------------------------
  1 | #include <client/workload.h>
  2 | 
  3 | Workload::Workload(int concurrency, int rate,
  4 |                    int n_requests, std::string addr, std::string port)
  5 |     : concurrency(concurrency),
  6 |       rate(rate),
  7 |       n_requests(n_requests),
  8 |       _traces(n_requests),
  9 |       addr(addr),
 10 |       port(port) {
 11 |         std::minstd_rand gen(0);
 12 |         std::uniform_int_distribution<> udist(0, concurrency-1);
 13 |         std::exponential_distribution<double> edist(rate);
 14 | 
 15 |         for (auto& trace : _traces) {
 16 |           trace.first = edist(gen);
 17 |           trace.second = udist(gen);
 18 |         }
 19 |       };
 20 | 
 21 | Workload::Workload(std::vector<unsigned>& rates,
 22 |                    std::string addr, std::string port)
 23 |   : _traces(0),
 24 |     addr(addr),
 25 |     port(port) {
 26 |       std::minstd_rand gen(0);
 27 | 
 28 |       int cnt = 0;
 29 |       for (int i = 0; i < rates.size(); i++) {
 30 |         double itv = 0;
 31 |         std::exponential_distribution<double> edist(rates[i]/60.0);
 32 |         cnt += rates[i];
 33 | 
 34 |         itv = edist(gen);
 35 |         while (itv < 60) {
 36 |           _traces.push_back({itv, i});
 37 |           itv += edist(gen);
 38 |         }
 39 |       }
 40 | 
 41 |       sort(_traces.begin(), _traces.end(),
 42 |           [](auto& a, auto& b) { return a.first < b.first;});
 43 | 
 44 |       for (int i = _traces.size()-1; i > 0; i--) {
 45 |         _traces[i].first -= _traces[i-1].first;
 46 |       }
 47 | 
 48 |       n_requests = _traces.size();
 49 |     };
 50 | 
 51 | void Workload::run(std::vector<std::vector<char>>& inputs) {
 52 |   client.connect(addr, port);
 53 | 
 54 |   for (auto& trace : _traces) {
 55 |     double interval = trace.first;
 56 |     int model_id = trace.second;
 57 | 
 58 |     usleep(interval*1e6);
 59 | 
 60 |     uint64_t t_send = util::now();
 61 |     auto onSuccess = [this, t_send](serverapi::Response* rsp) {
 62 |       auto response = dynamic_cast<serverapi::InferenceResponse*>(rsp);
 63 |       uint64_t t_receive = util::now();
 64 |       uint64_t latency = (t_receive-t_send) / 1e6;
 65 | 
 66 |       this->latencies.push_back(latency);
 67 |       if (response->is_cold) this->cold_start_cnt++;
 68 |     };
 69 | 
 70 |     client.infer_async(inputs[model_id], model_id, onSuccess);
 71 |   }
 72 | 
 73 |   client.shutdown();
 74 | }
 75 | 
 76 | WorkloadResult Workload::result(int slo) {
 77 |   WorkloadResult result;
 78 | 
 79 |   std::sort(latencies.begin(), latencies.end());
 80 | 
 81 |   int index_99 = latencies.size() * 0.99 - 1;
 82 |   int goodput_cnt = 0;
 83 | 
 84 |   for (auto& latency : latencies)
 85 |     if (latency <= slo) goodput_cnt++;
 86 | 
 87 |   result.latency_99 = latencies[index_99];
 88 |   result.cold_rate = (double)cold_start_cnt / n_requests * 100;
 89 |   result.goodput_rate = (double)goodput_cnt / n_requests * 100;
 90 | 
 91 |   return result;
 92 | }
 93 | 
 94 | ModelLoader::ModelLoader(std::vector<std::string> model_names,
 95 |                          int n_models, EngineType engine_type,
 96 |                          int mp_size, std::string addr, std::string port)
 97 |   : model_names(model_names),
 98 |     n_models(n_models),
 99 |     engine_type(engine_type),
100 |     mp_size(mp_size),
101 |     addr(addr),
102 |     port(port) {};
103 | 
104 | void ModelLoader::run() {
105 |   client.connect(addr, port);
106 | 
107 |   util::InputGenerator input_generator;
108 | 
109 |   inputs.resize(n_models);
110 | 
111 |   int n_models_per_type = n_models / model_names.size();
112 |   for (int i = 0; i < n_models; i++) {
113 |     input_generator.generate_input(model_names[i/n_models_per_type], 1, &inputs[i]);
114 |   }
115 | 
116 |   client.upload_model(model_names, n_models, engine_type, mp_size);
117 | 
118 |   for (int i = 0; i < n_models; i++) {
119 |     auto onSuccess = [this](serverapi::Response* rsp) {};
120 | 
121 |     client.infer_async(inputs[i], i, onSuccess);
122 |   }
123 | 
124 |   client.shutdown();
125 | }
126 | 
127 | 


--------------------------------------------------------------------------------
/src/client/workload.h:
--------------------------------------------------------------------------------
 1 | #pragma once
 2 | #include <client/client.h>
 3 | #include <util.h>
 4 | 
 5 | #include <random>
 6 | 
 7 | struct WorkloadResult {
 8 |   double latency_99;
 9 |   double cold_rate;
10 |   double goodput_rate;
11 | };
12 | 
13 | class Workload {
14 |  public:
15 |   Workload(int concurrency, int rate,
16 |            int n_requests, std::string addr, std::string port);
17 | 
18 |   Workload(std::vector<unsigned>& rates,
19 |            std::string addr, std::string port);
20 | 
21 |   void run(std::vector<std::vector<char>>& inputs);
22 | 
23 |   WorkloadResult result(int slo);
24 | 
25 |   Client client;
26 | 
27 |   std::vector<std::string> model_names;
28 |   int concurrency;
29 |   int rate;
30 |   int n_requests;
31 |   std::string addr;
32 |   std::string port;
33 | 
34 |  private:
35 |   std::vector<std::pair<double, int>> _traces;
36 |   std::vector<double> latencies;
37 |   int cold_start_cnt = 0;
38 | };
39 | 
40 | class ModelLoader {
41 |  public:
42 |   ModelLoader(std::vector<std::string> model_name,
43 |               int n_models, EngineType engine_type,
44 |               int mp_size, std::string addr, std::string port);
45 | 
46 |   void run();
47 | 
48 |   Client client;
49 | 
50 |   std::vector<std::vector<char>> inputs;
51 |   std::vector<std::string> model_names;
52 |   int n_models;
53 |   EngineType engine_type;
54 |   int mp_size;
55 |   std::string addr;
56 |   std::string port;
57 | };
58 | 


--------------------------------------------------------------------------------
/src/deepplan/engine.cpp:
--------------------------------------------------------------------------------
  1 | #include <deepplan/model.h>
  2 | #include <deepplan/engine.h>
  3 | #include <util.h>
  4 | 
  5 | #include <cassert>
  6 | #include <cuda_runtime_api.h>
  7 | #include <c10/cuda/CUDAStream.h>
  8 | #include <c10/cuda/CUDAGuard.h>
  9 | #include "tbb/concurrent_queue.h"
 10 | #include <torch/script.h>
 11 | #include <torch/csrc/jit/runtime/graph_executor.h>
 12 | 
 13 | namespace deepplan {
 14 | 
 15 | class PCIeThread;
 16 | class NVLinkThread;
 17 | 
 18 | static std::vector<c10::cuda::CUDAStream> g_exec_streams;
 19 | static std::vector<PCIeThread*> g_pcie_thrs;
 20 | static std::vector<NVLinkThread*> g_nvlink_thrs;
 21 | static int n_device;
 22 | 
 23 | class LoadThread {
 24 |  public:
 25 |   LoadThread(int device)
 26 |    : device_(device),
 27 |      is_finished(false),
 28 |      stream(c10::cuda::getStreamFromPool(false, device)) {};
 29 | 
 30 |   struct Task {
 31 |    public:
 32 |     Task(std::vector<ScriptModule> modules, int device)
 33 |       : type(Type::request),
 34 |         modules(modules),
 35 |         device(device) {};
 36 | 
 37 |     Task()
 38 |       : type(Type::end) {};
 39 | 
 40 |     enum class Type {
 41 |       request = 0,
 42 |       end
 43 |     } type;
 44 | 
 45 |     std::vector<ScriptModule> modules;
 46 |     int device;
 47 |   };
 48 | 
 49 |   void transfer_modules(std::vector<ScriptModule>& modules, int target_device) {
 50 |     if (!modules.empty())
 51 |       queue.push(std::make_shared<Task>(modules, target_device));
 52 |   }
 53 | 
 54 |   virtual void init() = 0;
 55 | 
 56 |   virtual void Loop() = 0;
 57 | 
 58 |   void stop() {
 59 |     is_finished = true;
 60 |     queue.push(std::make_shared<Task>()); // Insert EndofItem
 61 |     if (thr.joinable())
 62 |       thr.join();
 63 |   }
 64 | 
 65 |  protected:
 66 |   tbb::concurrent_bounded_queue<std::shared_ptr<Task>> queue;
 67 |   c10::cuda::CUDAStream stream;
 68 |   std::thread thr;
 69 |   std::atomic<bool> is_finished;
 70 |   int device_;
 71 | };
 72 | 
 73 | class NVLinkThread : public LoadThread {
 74 |  public:
 75 |   NVLinkThread(int device)
 76 |     : LoadThread(device) { init(); };
 77 | 
 78 |   void init() {
 79 |     thr = std::thread(std::bind(&NVLinkThread::Loop, this));
 80 |   }
 81 | 
 82 |   void Loop() {
 83 |     at::Device device(at::kCUDA, device_);
 84 |     at::cuda::CUDAStreamGuard guard(stream);
 85 |     c10::cuda::CUDAGuard device_guard(device);
 86 | 
 87 |     std::shared_ptr<Task> task;
 88 | 
 89 |     while (!is_finished) {
 90 |       queue.pop(task);
 91 |       if (task->type == Task::Type::end) {
 92 |         break;
 93 |       }
 94 |       at::Device target_device(at::kCUDA, task->device);
 95 | 
 96 |       for (auto& module : task->modules) {
 97 |         module.synchronize(device);
 98 |         module.to_and_record(target_device, true);
 99 |       }
100 |     }
101 |   }
102 | 
103 |   void transfer_modules(ScriptModule module, int target_device) {
104 |     std::vector<ScriptModule> modules;
105 |     modules.push_back(std::move(module));
106 |     queue.push(std::make_shared<Task>(modules, target_device));
107 |   }
108 | };
109 | 
110 | class PCIeThread : public LoadThread {
111 |  public:
112 |   PCIeThread(int device)
113 |    : LoadThread(device) { init(); };
114 | 
115 |   void init() {
116 |     thr = std::thread(std::bind(&PCIeThread::Loop, this));
117 |   }
118 | 
119 |   void Loop() {
120 |     at::Device device(at::kCUDA, device_);
121 |     at::cuda::CUDAStreamGuard guard(stream);
122 |     c10::cuda::CUDAGuard device_guard(device);
123 | 
124 |     std::shared_ptr<Task> task;
125 | 
126 |     while (!is_finished) {
127 |       queue.pop(task);
128 |       if (task->type == Task::Type::end) {
129 |         break;
130 |       }
131 | 
132 |       int target_device = task->device;
133 | 
134 |       for (auto& module : task->modules) {
135 |         module.to_and_record(device, true);
136 | 
137 |         if (target_device != device_) {
138 |           g_nvlink_thrs[device_]->transfer_modules(module, target_device);
139 |         }
140 |       }
141 |     }
142 |   }
143 | };
144 | 
145 | void Init(void) {
146 |   n_device = torch::cuda::device_count();
147 |   torch::jit::getBailoutDepth() = 0;
148 | 
149 |   g_pcie_thrs.resize(n_device);
150 |   g_nvlink_thrs.resize(n_device);
151 | 
152 |   for (int i = 0; i < n_device; i++) {
153 |     g_pcie_thrs[i] = new PCIeThread(i);
154 |     g_nvlink_thrs[i] = new NVLinkThread(i);
155 |     g_exec_streams.push_back(std::move(c10::cuda::getStreamFromPool(false, i)));
156 |   }
157 | }
158 | 
159 | void Deinit(void) {
160 |   for (int i = 0; i < n_device; i++) {
161 |     g_pcie_thrs[i]->stop();
162 |     g_nvlink_thrs[i]->stop();
163 |   }
164 | }
165 | 
166 | class PipelineEngine : public Engine {
167 |  public:
168 |   PipelineEngine()
169 |     : Engine() {};
170 | 
171 |   torch::jit::IValue run(Model* model, ScriptModuleInput& x) {
172 |     int target_device = model->target_device.index();
173 |     torch::jit::IValue outputs;
174 | 
175 |     assert(n_device > target_device);
176 | 
177 |     if (!model->is_cuda) {
178 | 
179 |       for (int device : model->devices) {
180 |         std::vector<ScriptModule> modules;
181 |         for (auto idx : model->device_map[device]) {
182 |           modules.push_back(model->layers[idx]);
183 |         }
184 |         g_pcie_thrs[device]->transfer_modules(modules, target_device);
185 |       }
186 |     }
187 | 
188 |     {
189 |       at::cuda::CUDAStreamGuard stream_guard(g_exec_streams[target_device]);
190 |       outputs = model->model.forward(x);
191 |     }
192 |     model->is_cuda = true;
193 | 
194 |     return outputs;
195 |   }
196 | };
197 | 
198 | static PipelineEngine engine;
199 | 
200 | torch::jit::IValue RunEngine(Model* model, ScriptModuleInput& x) {
201 |   c10::cuda::CUDAGuard device_guard(model->target_device);
202 |   auto outputs = engine.run(model, x);
203 | 
204 |   return outputs;
205 | }
206 | 
207 | }
208 | 


--------------------------------------------------------------------------------
/src/deepplan/engine.h:
--------------------------------------------------------------------------------
 1 | #pragma once
 2 | 
 3 | #include <c10/cuda/CUDAStream.h>
 4 | #include <deepplan/model.h>
 5 | #include <util.h>
 6 | 
 7 | namespace deepplan {
 8 | 
 9 | class Engine {
10 |  public:
11 |   virtual torch::jit::IValue run(Model* model, ScriptModuleInput& x) = 0;
12 | };
13 | 
14 | torch::jit::IValue RunEngine(Model* model, ScriptModuleInput& x);
15 | 
16 | void Init(void);
17 | 
18 | void Deinit(void);
19 | 
20 | }
21 | 


--------------------------------------------------------------------------------
/src/deepplan/model.cpp:
--------------------------------------------------------------------------------
  1 | #include <deepplan/model.h>
  2 | #include <deepplan/engine.h>
  3 | #include <util.h>
  4 | #include <deepplan.pb.h>
  5 | #include <c10/cuda/CUDAGuard.h>
  6 | 
  7 | #include <torch/script.h>
  8 | 
  9 | namespace deepplan {
 10 | 
 11 | static std::vector<ScriptModule> travel_layers(ScriptModule module, std::string name="") {
 12 |   std::vector<ScriptModule> traveled_layers;
 13 | 
 14 |   if (module.children().size() == 0) {
 15 |     traveled_layers.push_back(module);
 16 |     return traveled_layers;
 17 |   }
 18 |   else {
 19 |     for (auto name_child : module.named_children()) {
 20 |       if (name_child.name.find("drop") != std::string::npos) continue;
 21 |       auto layers = travel_layers(name_child.value, name_child.name);
 22 |       traveled_layers.insert(traveled_layers.end(), layers.begin(), layers.end());
 23 |     }
 24 |     return traveled_layers;
 25 |   }
 26 | }
 27 | 
 28 | Model::Model(const std::string name, const EngineType type, const std::vector<int> devices)
 29 |   : model_name(name),
 30 |     engine_type(type) {
 31 |       if (!devices.empty()) {
 32 |         this->devices = devices;
 33 |       }
 34 |       this->target_device = at::Device(at::kCUDA, this->devices[0]);
 35 |       init();
 36 |     }
 37 | 
 38 | void Model::init() {
 39 |   auto model_repo = std::getenv("PLAN_REPO");
 40 | 
 41 |   if (model_repo == nullptr) {
 42 |     std::cerr << "PLAN_REPO variable not set, exiting\n";
 43 |     exit(EXIT_FAILURE);
 44 |   }
 45 | 
 46 |   std::string model_prefix;
 47 |   std::string script_name;
 48 |   std::string script_path;
 49 |   std::string config_path;
 50 | 
 51 |   model_prefix = std::string(model_repo) + "/" + model_name;
 52 |   {
 53 |     std::ostringstream ss;
 54 |     ss << "model" << int(target_device.index()) << ".pt";
 55 |     script_name = ss.str();
 56 |   }
 57 |   script_path = model_prefix + "/" + script_name;
 58 |   config_path = model_prefix + "/config.pbtxt";
 59 | 
 60 |   try {
 61 |     this->model = torch::jit::load(script_path);
 62 |     if (!util::read_from_pbtxt(this->model_config, config_path)) {
 63 |       std::stringstream msg;
 64 |       msg << "Failed to read " << config_path;
 65 |       throw std::runtime_error(msg.str());
 66 |     }
 67 |     for (auto io : model_config.inputs()) {
 68 |       this->input_configs.emplace_back(io);
 69 |     }
 70 |   }
 71 |   catch (const c10::Error& e) {
 72 |     std::cerr << "Error loading the model\n";
 73 |     throw e;
 74 |   }
 75 |   catch (const std::exception& e) {
 76 |     std::cerr << e.what() << "\n";
 77 |     throw e;
 78 |   }
 79 | 
 80 |   this->layers = travel_layers(this->model);
 81 |   this->n_layers = this->layers.size();
 82 |   this->model.eval();
 83 |   this->model.to(at::kCPU);
 84 |   {
 85 |     c10::cuda::CUDAGuard device_guard(this->target_device);
 86 |     this->model.cuda_host();
 87 |   }
 88 | 
 89 |   switch (engine_type) {
 90 |     case EngineType::IN_MEMORY:
 91 |     case EngineType::ON_DEMAND:
 92 |     case EngineType::PIPESWITCH:
 93 |       for (int i = 0; i < this->n_layers; i++) {
 94 |         this->load_layer_idxs.push_back(i);
 95 |       }
 96 |       break;
 97 | 
 98 |     case EngineType::DEEPPLAN:
 99 |       for (auto plan : this->model_config.plans()) {
100 |         if (Plan::DYNAMIC == plan.plan_type()) {
101 |           auto ll = plan.load_layers();
102 |           this->load_layer_idxs = std::vector<int>(ll.begin(), ll.end());
103 |           break;
104 |         }
105 |       }
106 |       break;
107 |     default:
108 |       std::cerr << "Found incorrect EngineType\n";
109 |       break;
110 |   }
111 | 
112 |   for (auto& i : this->load_layer_idxs) {
113 |     this->layers[i].to(at::kCPU);
114 |     this->layers[i].pin_memory();
115 |   }
116 | 
117 |   // Set device_map
118 |   this->model_size = util::getModuleSize(this->model, true);
119 |   {
120 |     int n_device = devices.size();
121 |     size_t block_size = model_size / n_device;
122 |     auto iter = load_layer_idxs.begin();
123 | 
124 |     for (int i = 0; i < n_device; i++) {
125 |       int device = devices[i];
126 |       size_t cumm_size = 0;
127 |       size_t layer_size = 0;
128 |       std::vector<int> layer_list;
129 | 
130 |       for (iter; iter != load_layer_idxs.end(); iter++) {
131 |         layer_size = util::getModuleSize(layers[*iter]);
132 |         cumm_size += layer_size;
133 |         if (cumm_size > block_size) {
134 |           break;
135 |         }
136 | 
137 |         layer_list.push_back(*iter);
138 |       }
139 | 
140 |       // Insert remain layers to last device
141 |       if (i == n_device-1) {
142 |         for (iter; iter != load_layer_idxs.end(); iter++) {
143 |           layer_list.push_back(*iter);
144 |         }
145 |       }
146 | 
147 |       device_map[device] = layer_list;
148 |     }
149 |   }
150 | 
151 |   // TODO
152 |   // If using parallel transfer, the devices other than the target device
153 |   // convert cuda_host to pin_memory
154 | 
155 |   model.cuda_backup();
156 |   this->is_cuda = false;
157 | }
158 | 
159 | torch::jit::IValue Model::forward(ScriptModuleInput& x) {
160 |   auto outputs = RunEngine(this, x);
161 |   return outputs;
162 | }
163 | 
164 | void Model::to(at::Device device, bool non_blocking) {
165 |   model.to(device, non_blocking);
166 |   if (device.is_cuda())
167 |     is_cuda = true;
168 |   else
169 |     is_cuda = false;
170 | }
171 | 
172 | void Model::clear()
173 | {
174 |   if (this->is_cuda) {
175 |     model.clear();
176 |     is_cuda = false;
177 |   }
178 | }
179 | 
180 | }
181 | 


--------------------------------------------------------------------------------
/src/deepplan/model.h:
--------------------------------------------------------------------------------
 1 | #pragma once
 2 | 
 3 | #include <torch/script.h>
 4 | #include <util.h>
 5 | #include <unordered_map>
 6 | #include <deepplan.pb.h>
 7 | 
 8 | namespace deepplan {
 9 | 
10 | class Model {
11 |  public:
12 |   Model(const std::string name, const EngineType type, const std::vector<int> devices);
13 | 
14 |   Model() {};
15 | 
16 |   void init();
17 | 
18 | 	torch::jit::IValue forward(ScriptModuleInput& x);
19 | 
20 |   void to(at::Device device, bool non_blocking = false);
21 | 
22 |   void clear();
23 | 
24 |   std::string model_name;
25 | 
26 |   EngineType engine_type;
27 | 
28 |   std::vector<int> devices = {0};
29 | 
30 |   at::Device target_device = at::kCUDA;
31 | 
32 |   ScriptModule model;
33 | 
34 |   size_t model_size;
35 | 
36 |   std::vector<ScriptModule> layers;
37 | 
38 |   std::unordered_map<int, std::vector<int>> device_map;
39 | 
40 |   int n_layers;
41 | 
42 |   std::vector<int> load_layer_idxs;
43 | 
44 |   std::atomic<bool> is_cuda;
45 | 
46 |   ModelConfig model_config;
47 | 
48 |   std::vector<InputConfig> input_configs;
49 | };
50 | 
51 | }
52 | 


--------------------------------------------------------------------------------
/src/network/message.h:
--------------------------------------------------------------------------------
  1 | #pragma once
  2 | #include <iostream>
  3 | #include <google/protobuf/text_format.h>
  4 | 
  5 | namespace network {
  6 | class message_tx {
  7 |  public:
  8 |   virtual uint64_t get_tx_hdr_len() const = 0;
  9 |   virtual uint64_t get_tx_body_len() const = 0;
 10 |   virtual uint64_t get_tx_req_id() const = 0;
 11 |   virtual uint64_t get_tx_msg_type() const = 0;
 12 |   virtual const void* tx_body_buf() = 0;
 13 | 
 14 |   virtual void serialize_header(void* dest) = 0;
 15 | };
 16 | 
 17 | class message_rx {
 18 |  public:
 19 |   virtual void header_received(const void* hdr, size_t hdr_len) = 0;
 20 |   virtual uint64_t get_rx_body_len() const = 0;
 21 |   virtual uint64_t get_rx_req_id() const = 0;
 22 |   virtual uint64_t get_rx_msg_type() const = 0;
 23 |   virtual void* rx_body_buf() = 0;
 24 | 
 25 |   virtual void body_buf_received(size_t len) = 0;
 26 | };
 27 | 
 28 | template <uint64_t TMsgType, class TMsg, class TReq>
 29 | class msg_protobuf_tx : public message_tx {
 30 |  protected:
 31 |   uint64_t req_id_;
 32 | 
 33 |  public:
 34 |   TMsg msg;
 35 |   static const uint64_t MsgType = TMsgType;
 36 | 
 37 |   void set_req_id(uint64_t req_id) { req_id_ = req_id; };
 38 | 
 39 |   virtual uint64_t get_tx_hdr_len() const { return msg.ByteSizeLong(); };
 40 |   virtual uint64_t get_tx_body_len() const { return 0; };
 41 |   virtual uint64_t get_tx_req_id() const { return req_id_; };
 42 |   virtual uint64_t get_tx_msg_type() const { return MsgType; };
 43 | 
 44 |   virtual void serialize_header(void* dest) {
 45 |     msg.SerializeToArray(dest, get_tx_hdr_len());
 46 |   }
 47 | 
 48 |   virtual const void* tx_body_buf() {
 49 |     throw "Should not be called";
 50 |   }
 51 | 
 52 |   virtual void set(TReq &request) = 0;
 53 | };
 54 | 
 55 | template <uint64_t TMsgType, class TMsg, class TRsp>
 56 | class msg_protobuf_rx : public message_rx {
 57 |  protected:
 58 |   uint64_t req_id_;
 59 | 
 60 |  public:
 61 |   TMsg msg;
 62 |   static const uint64_t MsgType = TMsgType;
 63 | 
 64 |   virtual void header_received(const void* hdr, size_t hdr_len) {
 65 |     if (!msg.ParseFromArray(hdr, hdr_len))
 66 |       std::cerr << "parsing failed\n";
 67 |   }
 68 | 
 69 |   void set_req_id(uint64_t req_id) { req_id_ = req_id; };
 70 | 
 71 |   virtual uint64_t get_rx_req_id() const { return req_id_; };
 72 |   virtual uint64_t get_rx_body_len() const { return 0; };
 73 |   virtual uint64_t get_rx_msg_type() const { return MsgType; };
 74 | 
 75 |   virtual void* rx_body_buf() {
 76 |     throw "Should not be called";
 77 |   }
 78 | 
 79 |   virtual void body_buf_received(size_t len) {
 80 |     throw "Should not be called";
 81 |   }
 82 | 
 83 |   virtual void get(TRsp& response) = 0;
 84 | 
 85 | };
 86 | 
 87 | template <uint64_t TMsgType, class TMsg, class TRsp>
 88 | class msg_protobuf_tx_with_body : public msg_protobuf_tx<TMsgType, TMsg, TRsp> {
 89 |  protected:
 90 |   size_t body_len_ = 0;
 91 |   void* body_ = nullptr;
 92 | 
 93 |  public:
 94 |   virtual void set_body_len(size_t body_len) { body_len_ = body_len; }
 95 | 
 96 |   virtual uint64_t get_tx_body_len() const { return body_len_; }
 97 | 
 98 |   virtual const void* tx_body_buf() {
 99 |     return body_;
100 |   }
101 | 
102 | };
103 | 
104 | template <uint64_t TMsgType, class TMsg, class TRsp>
105 | class msg_protobuf_rx_with_body : public msg_protobuf_rx<TMsgType, TMsg, TRsp> {
106 |  protected:
107 |   size_t body_len_ = 0;
108 |   void* body_ = nullptr;
109 | 
110 |  public:
111 |   virtual void set_body_len(size_t body_len) {
112 |     body_len_ = body_len;
113 |     body_ = new uint8_t[body_len];
114 |   }
115 | 
116 |   virtual uint64_t get_rx_body_len() const { return body_len_; }
117 | 
118 |   virtual void* rx_body_buf() {
119 |     return body_;
120 |   }
121 | 
122 |   virtual void body_buf_received(size_t len) {
123 |   }
124 | };
125 | 
126 | }
127 | 


--------------------------------------------------------------------------------
/src/network/network.cpp:
--------------------------------------------------------------------------------
  1 | #include <iostream>
  2 | #include <network/network.h>
  3 | 
  4 | namespace network {
  5 | 
  6 | message_receiver::message_receiver(message_connection* conn, message_handler& handler)
  7 |   : socket_(conn->get_socket()),
  8 |     conn_(conn),
  9 |     handler_(handler) {};
 10 | 
 11 | void message_receiver::start() {
 12 |   read_new_message();
 13 | }
 14 | 
 15 | void message_receiver::read_new_message() {
 16 |   boost::asio::async_read(socket_, boost::asio::buffer(pre_header, 32),
 17 |       boost::bind(&message_receiver::handle_pre_hdr_read, this,
 18 |         boost::asio::placeholders::error,
 19 |         boost::asio::placeholders::bytes_transferred));
 20 | }
 21 | 
 22 | void message_receiver::handle_pre_hdr_read(const boost::system::error_code& error,
 23 |     size_t bytes_transferred) {
 24 |   if (error) {
 25 |     std::cerr << "[Error:handle_pre_hdr_read] " << error.message().data() << "\n";
 26 |     return;
 27 |   }
 28 | 
 29 |   boost::asio::async_read(socket_, boost::asio::buffer(header_buf, pre_header[0]),
 30 |       boost::bind(&message_receiver::handle_hdr_read, this,
 31 |         boost::asio::placeholders::error,
 32 |         boost::asio::placeholders::bytes_transferred));
 33 | }
 34 | 
 35 | void message_receiver::handle_hdr_read(const boost::system::error_code& error,
 36 |     size_t bytes_transferred) {
 37 |   if (error) {
 38 |     std::cerr << "[Error:hanlde_hdr_read] " << error.message().data() << "\n";
 39 |     return;
 40 |   }
 41 | 
 42 |   res_ = handler_.new_rx_message(pre_header[0], pre_header[1], pre_header[2], pre_header[3]);
 43 |   res_->header_received(header_buf, pre_header[0]);
 44 | 
 45 |   int64_t body_len = res_->get_rx_body_len();
 46 | 
 47 |   if (body_len > 0) {
 48 |     boost::asio::async_read(socket_, boost::asio::buffer(res_->rx_body_buf(), body_len),
 49 |       boost::bind(&message_receiver::handle_body_read, this,
 50 |         boost::asio::placeholders::error,
 51 |         boost::asio::placeholders::bytes_transferred));
 52 | 
 53 |   }
 54 |   else {
 55 |     handle_read_end();
 56 |   }
 57 | 
 58 | }
 59 | 
 60 | void message_receiver::handle_read_end() {
 61 |   bool is_continue;
 62 |   is_continue = handler_.completed_receive(conn_, res_);
 63 |   if (!is_continue)
 64 |     return;
 65 | 
 66 |   res_ = 0;
 67 |   read_new_message();
 68 | }
 69 | 
 70 | void message_receiver::handle_body_read(const boost::system::error_code& error,
 71 |     size_t bytes_transferred) {
 72 |   if (error) {
 73 |     std::cerr << "[Error:handle_body_read] " << error.message().data() << "\n";
 74 |     return;
 75 |   }
 76 | 
 77 |   res_->body_buf_received(bytes_transferred);
 78 | 
 79 |   handle_read_end();
 80 | }
 81 | 
 82 | message_sender::message_sender(message_connection* conn, message_handler& handler)
 83 |   : socket_(conn->get_socket()),
 84 |     conn_(conn),
 85 |     handler_(handler),
 86 |     req_(0) {};
 87 | 
 88 | void message_sender::send_message(message_tx& req) {
 89 |   tx_queue_.push(&req);
 90 |   conn_->io_service_.post(boost::bind(&message_sender::try_send, this));
 91 | }
 92 | 
 93 | void message_sender::try_send() {
 94 |   std::lock_guard<std::mutex> lock(queue_mutex);
 95 | 
 96 |   if (!req_) send_next_message();
 97 | }
 98 | 
 99 | void message_sender::send_next_message() {
100 |   message_tx *req;
101 |   if (!tx_queue_.try_pop(req)) {
102 |     return;
103 |   }
104 |   start_send(*req);
105 | }
106 | 
107 | void message_sender::start_send(message_tx& req) {
108 |   pre_header[0] = req.get_tx_hdr_len();
109 |   pre_header[1] = req.get_tx_body_len();
110 |   pre_header[2] = req.get_tx_req_id();
111 |   pre_header[3] = req.get_tx_msg_type();
112 | 
113 |   req.serialize_header(header_buf);
114 | 
115 |   req_ = &req;
116 | 
117 |   boost::asio::async_write(socket_, boost::asio::buffer(pre_header, 32),
118 |       boost::bind(&message_sender::handle_pre_hdr_write, this,
119 |         boost::asio::placeholders::error,
120 |         boost::asio::placeholders::bytes_transferred));
121 | }
122 | 
123 | void message_sender::handle_pre_hdr_write(const boost::system::error_code& error,
124 |     size_t bytes_transferred) {
125 |   if (error) {
126 |     std::cerr << "[Error:handle_pre_hdr_write] " << error.message().data() << "\n";
127 |     return;
128 |   }
129 | 
130 |   boost::asio::async_write(socket_, boost::asio::buffer(header_buf, pre_header[0]),
131 |       boost::bind(&message_sender::handle_hdr_write, this,
132 |         boost::asio::placeholders::error,
133 |         boost::asio::placeholders::bytes_transferred));
134 | }
135 | 
136 | void message_sender::handle_hdr_write(const boost::system::error_code& error,
137 |     size_t bytes_transferred) {
138 |   if (error) {
139 |     std::cerr << "[Error:handle_hdr_write] " << error.message().data() << "\n";
140 |     return;
141 |   }
142 | 
143 |   uint64_t body_len = req_->get_tx_body_len();
144 | 
145 |   if (body_len > 0) {
146 |     boost::asio::async_write(socket_, boost::asio::buffer(req_->tx_body_buf(), body_len),
147 |         boost::bind(&message_sender::handle_body_write, this,
148 |           boost::asio::placeholders::error,
149 |           boost::asio::placeholders::bytes_transferred));
150 |   }
151 |   else {
152 |     handle_write_end();
153 |   }
154 | }
155 | 
156 | void message_sender::handle_write_end() {
157 |   handler_.completed_transmit(conn_, req_);
158 | 
159 |   std::lock_guard<std::mutex> lock(queue_mutex);
160 |   req_ = 0;
161 |   send_next_message();
162 | }
163 | 
164 | void message_sender::handle_body_write(const boost::system::error_code& error,
165 |     size_t bytes_transferred) {
166 |   if (error) {
167 |     std::cerr << "[Error:handle_body_write] " << error.message().data() << "\n";
168 |     return;
169 |   }
170 | 
171 |   handle_write_end();
172 | }
173 | 
174 | message_connection::message_connection(boost::asio::io_service& io_service, message_handler& handler)
175 |   : socket_(io_service),
176 |     resolver_(io_service),
177 |     io_service_(io_service),
178 |     msg_rx_(message_receiver(this, handler)),
179 |     handler_(handler),
180 |     is_connected(false) {};
181 | 
182 | boost::asio::ip::tcp::socket& message_connection::get_socket() {
183 |   return socket_;
184 | }
185 | 
186 | void message_connection::connect(const std::string& server, const std::string& port) {
187 |   boost::asio::ip::tcp::resolver::query query(server, port);
188 |   resolver_.async_resolve(query,
189 |       boost::bind(&message_connection::handle_resolved, this,
190 |         boost::asio::placeholders::error,
191 |         boost::asio::placeholders::iterator));
192 | 
193 |   while (!is_connected.load());
194 | }
195 | 
196 | void message_connection::established() {
197 |   boost::asio::ip::tcp::no_delay option(true);
198 |   socket_.set_option(option);
199 | 
200 |   msg_rx_.start();
201 |   ready();
202 | }
203 | 
204 | void message_connection::ready() {
205 |   is_connected.store(true);
206 | }
207 | 
208 | void message_connection::handle_resolved(const boost::system::error_code& error,
209 |     boost::asio::ip::tcp::resolver::iterator endpoint_iterator) {
210 |   if (error) {
211 |     std::cerr << "[Error:handle_resolved] " << error.message().data() << "\n";
212 |     return;
213 |   }
214 | 
215 |   boost::asio::ip::tcp::endpoint endpoint = *endpoint_iterator;
216 |   socket_.async_connect(endpoint,
217 |       boost::bind(&message_connection::handle_established, this,
218 |         boost::asio::placeholders::error));
219 | }
220 | 
221 | void message_connection::handle_established(const boost::system::error_code& error) {
222 |   if (error) {
223 |     std::cerr << "[Error:handle_established] " << error.message().data() << "\n";
224 |     return;
225 |   }
226 | 
227 |   established();
228 | }
229 | }
230 | 
231 | 


--------------------------------------------------------------------------------
/src/network/network.h:
--------------------------------------------------------------------------------
  1 | #pragma once
  2 | 
  3 | #include <boost/asio.hpp>
  4 | #include <boost/bind.hpp>
  5 | #include <atomic>
  6 | #include <mutex>
  7 | #include "tbb/concurrent_queue.h"
  8 | 
  9 | #include <network/message.h>
 10 | 
 11 | namespace network {
 12 | 
 13 | class message_connection;
 14 | 
 15 | class message_handler {
 16 |  public:
 17 |   virtual message_rx* new_rx_message(
 18 |       uint64_t hdr_len,
 19 |       uint64_t body_len,
 20 |       uint64_t req_id,
 21 |       uint64_t msg_type) = 0;
 22 | 
 23 |   virtual bool completed_receive(message_connection *conn, message_rx *req) = 0;
 24 |   virtual void completed_transmit(message_connection *conn, message_tx *req) = 0;
 25 | };
 26 | 
 27 | class message_receiver {
 28 |  public:
 29 |   message_receiver(message_connection* conn, message_handler& handler);
 30 | 
 31 |   void start();
 32 | 
 33 |   void read_new_message();
 34 | 
 35 |  private:
 36 |   void handle_pre_hdr_read(const boost::system::error_code& error,
 37 |       size_t bytes_transferred);
 38 | 
 39 |   void handle_hdr_read(const boost::system::error_code& error,
 40 |       size_t bytes_transferred);
 41 | 
 42 |   void handle_body_read(const boost::system::error_code& error,
 43 |       size_t bytes_transferred);
 44 | 
 45 |   void handle_read_end();
 46 | 
 47 |   void abort_connection(const char* msg);
 48 | 
 49 |   boost::asio::ip::tcp::socket& socket_;
 50 | 
 51 |   message_connection* conn_;
 52 |   message_handler& handler_;
 53 |   message_rx* res_;
 54 | 
 55 |   size_t body_left;
 56 |   /* header_len, body_len, req_id, message_type */
 57 |   uint64_t pre_header[4];
 58 |   char header_buf[1024];
 59 | };
 60 | 
 61 | class message_sender {
 62 |  public:
 63 |   message_sender(message_connection* conn, message_handler& handler);
 64 | 
 65 |   void send_message(message_tx& req);
 66 | 
 67 |  private:
 68 |   void try_send();
 69 | 
 70 |   void send_next_message();
 71 | 
 72 |   void start_send(message_tx& req);
 73 | 
 74 |   void handle_pre_hdr_write(const boost::system::error_code& error,
 75 |       size_t bytes_transferred);
 76 | 
 77 |   void handle_hdr_write(const boost::system::error_code& error,
 78 |       size_t bytes_transferred);
 79 | 
 80 |   void handle_body_write(const boost::system::error_code& error,
 81 |       size_t bytes_transferred);
 82 | 
 83 |   void handle_write_end();
 84 | 
 85 |   void abort_connection(const char* msg);
 86 | 
 87 |   boost::asio::ip::tcp::socket& socket_;
 88 |   message_connection* conn_;
 89 |   message_handler& handler_;
 90 |   message_tx* req_;
 91 |   uint64_t pre_header[4];
 92 |   char header_buf[1024];
 93 | 
 94 |   std::mutex queue_mutex;
 95 |   tbb::concurrent_queue<message_tx*> tx_queue_;
 96 | };
 97 | 
 98 | class message_connection {
 99 |  public:
100 |   message_connection(boost::asio::io_service& io_service, message_handler& handler);
101 | 
102 |   boost::asio::ip::tcp::socket& get_socket();
103 | 
104 |   void connect(const std::string& host, const std::string& port);
105 | 
106 |   void established();
107 | 
108 |   void abort_connection(const char* msg);
109 | 
110 |   void close(const char* reason);
111 | 
112 |   virtual void ready();
113 | 
114 |  private:
115 |   void handle_resolved(const boost::system::error_code& error,
116 |       boost::asio::ip::tcp::resolver::iterator endpoint_iterator);
117 | 
118 |   void handle_established(const boost::system::error_code& error);
119 | 
120 |   boost::asio::ip::tcp::socket socket_;
121 |   boost::asio::ip::tcp::resolver resolver_;
122 |   message_receiver msg_rx_;
123 |   message_handler& handler_;
124 | 
125 |  protected:
126 |   std::atomic_bool is_connected;
127 | 
128 |  public:
129 |   boost::asio::io_service& io_service_;
130 | };
131 | 
132 | }
133 | 


--------------------------------------------------------------------------------
/src/network/server_api.cpp:
--------------------------------------------------------------------------------
 1 | #include <network/server_api.h>
 2 | 
 3 | namespace network {
 4 | 
 5 | void msg_inference_req_tx::set(serverapi::InferenceRequest& request) {
 6 |   set_req_id(request.req_id);
 7 |   msg.set_req_id(request.req_id);
 8 |   msg.set_model_id(request.model_id);
 9 |   msg.set_batch_size(request.batch_size);
10 |   body_len_ = request.input_size;
11 |   body_ = request.input;
12 | }
13 | 
14 | void msg_inference_req_rx::get(serverapi::InferenceRequest& request) {
15 |   request.req_id = get_rx_req_id();
16 |   request.model_id = msg.model_id();
17 |   request.batch_size = msg.batch_size();
18 |   request.input_size = body_len_;
19 |   request.input = body_;
20 | }
21 | 
22 | void msg_inference_rsp_tx::set(serverapi::InferenceResponse& response) {
23 |   set_req_id(response.req_id);
24 |   msg.set_req_id(response.req_id);
25 |   msg.set_is_cold(response.is_cold);
26 | }
27 | 
28 | void msg_inference_rsp_rx::get(serverapi::InferenceResponse& response) {
29 |   response.req_id = get_rx_req_id();
30 |   response.is_cold = msg.is_cold();
31 | }
32 | 
33 | void msg_upload_model_req_tx::set(serverapi::UploadModelRequest& request) {
34 |   set_req_id(request.req_id);
35 |   msg.set_req_id(request.req_id);
36 |   *msg.mutable_model_names() = {request.model_names.begin(), request.model_names.end()};
37 |   msg.set_n_models(request.n_models);
38 |   msg.set_engine_type(request.engine_type);
39 |   msg.set_mp_size(request.mp_size);
40 | }
41 | 
42 | void msg_upload_model_req_rx::get(serverapi::UploadModelRequest& request) {
43 |   request.req_id = get_rx_req_id();
44 |   request.model_names = std::vector<std::string>(msg.model_names().begin(), msg.model_names().end());
45 |   request.n_models = msg.n_models();
46 |   request.engine_type = msg.engine_type();
47 |   request.mp_size = msg.mp_size();
48 | }
49 | 
50 | void msg_upload_model_rsp_tx::set(serverapi::UploadModelResponse& response) {
51 |   set_req_id(response.req_id);
52 |   msg.set_req_id(response.req_id);
53 | }
54 | 
55 | void msg_upload_model_rsp_rx::get(serverapi::UploadModelResponse& response) {
56 |   response.req_id = get_rx_req_id();
57 | }
58 | 
59 | void msg_close_req_tx::set(serverapi::CloseRequest& request) {
60 |   set_req_id(request.req_id);
61 |   msg.set_req_id(request.req_id);
62 | }
63 | 
64 | void msg_close_req_rx::get(serverapi::CloseRequest& request) {
65 |   request.req_id = get_rx_req_id();
66 | }
67 | 
68 | void msg_close_rsp_tx::set(serverapi::CloseResponse& response) {
69 |   set_req_id(response.req_id);
70 |   msg.set_req_id(response.req_id);
71 | }
72 | 
73 | void msg_close_rsp_rx::get(serverapi::CloseResponse& response) {
74 |   response.req_id = get_rx_req_id();
75 | }
76 | 
77 | }
78 | 


--------------------------------------------------------------------------------
/src/network/server_api.h:
--------------------------------------------------------------------------------
 1 | #pragma once
 2 | 
 3 | #include <deepcache.pb.h>
 4 | #include <server_api.h>
 5 | #include <network/message.h>
 6 | 
 7 | namespace network {
 8 | 
 9 | class msg_inference_req_tx : public msg_protobuf_tx_with_body<REQ_INFERENCE, InferenceReqProto, serverapi::InferenceRequest> {
10 |  public:
11 |   virtual void set(serverapi::InferenceRequest& request);
12 | };
13 | 
14 | class msg_inference_req_rx : public msg_protobuf_rx_with_body<REQ_INFERENCE, InferenceReqProto, serverapi::InferenceRequest> {
15 |  public:
16 |   virtual void get(serverapi::InferenceRequest& request);
17 | };
18 | 
19 | class msg_inference_rsp_tx : public msg_protobuf_tx<RSP_INFERENCE, InferenceRspProto, serverapi::InferenceResponse> {
20 |  public:
21 |   virtual void set(serverapi::InferenceResponse& response);
22 | };
23 | 
24 | class msg_inference_rsp_rx : public msg_protobuf_rx<RSP_INFERENCE, InferenceRspProto, serverapi::InferenceResponse> {
25 |  public:
26 |   virtual void get(serverapi::InferenceResponse& response);
27 | };
28 | 
29 | class msg_upload_model_req_tx : public msg_protobuf_tx<REQ_UPLOAD_MODEL, UploadModelReqProto, serverapi::UploadModelRequest> {
30 |  public:
31 |   virtual void set(serverapi::UploadModelRequest& request);
32 | };
33 | 
34 | class msg_upload_model_req_rx : public msg_protobuf_rx<REQ_UPLOAD_MODEL, UploadModelReqProto, serverapi::UploadModelRequest> {
35 |  public:
36 |   virtual void get(serverapi::UploadModelRequest& request);
37 | };
38 | 
39 | class msg_upload_model_rsp_tx : public msg_protobuf_tx<RSP_UPLOAD_MODEL, UploadModelRspProto, serverapi::UploadModelResponse> {
40 |  public:
41 |   virtual void set(serverapi::UploadModelResponse& response);
42 | };
43 | 
44 | class msg_upload_model_rsp_rx : public msg_protobuf_rx<RSP_UPLOAD_MODEL, UploadModelRspProto, serverapi::UploadModelResponse> {
45 |  public:
46 |   virtual void get(serverapi::UploadModelResponse& response);
47 | };
48 | 
49 | class msg_close_req_tx : public msg_protobuf_tx<REQ_CLOSE, CloseReqProto, serverapi::CloseRequest> {
50 |  public:
51 |   virtual void set(serverapi::CloseRequest& request);
52 | };
53 | 
54 | class msg_close_req_rx : public msg_protobuf_rx<REQ_CLOSE, CloseReqProto, serverapi::CloseRequest> {
55 |  public:
56 |   virtual void get(serverapi::CloseRequest& request);
57 | };
58 | 
59 | class msg_close_rsp_tx : public msg_protobuf_tx<RSP_CLOSE, CloseRspProto, serverapi::CloseResponse> {
60 |  public:
61 |   virtual void set(serverapi::CloseResponse& response);
62 | };
63 | 
64 | class msg_close_rsp_rx : public msg_protobuf_rx<RSP_CLOSE, CloseRspProto, serverapi::CloseResponse> {
65 |  public:
66 |   virtual void get(serverapi::CloseResponse& response);
67 | };
68 | 
69 | }
70 | 
71 | 


--------------------------------------------------------------------------------
/src/network/session.cpp:
--------------------------------------------------------------------------------
  1 | #include <network/session.h>
  2 | #include <network/server_api.h>
  3 | 
  4 | #include <future>
  5 | 
  6 | namespace network {
  7 | 
  8 | SrvSession::SrvSession(boost::asio::io_service& io_service, MessageQueue& messages)
  9 |   : Session(io_service),
 10 |     messages_(messages) {};
 11 | 
 12 | message_rx* SrvSession::new_rx_message(uint64_t hdr_len, uint64_t body_len,
 13 |                                        uint64_t req_id, uint64_t msg_type) {
 14 |   message_rx* msg_rx;
 15 | 
 16 |   if (msg_type == REQ_INFERENCE) {
 17 |     auto msg = new msg_inference_req_rx();
 18 |     msg->set_req_id(req_id);
 19 |     msg->set_body_len(body_len);
 20 | 
 21 |     msg_rx = msg;
 22 |   }
 23 |   else if (msg_type == REQ_UPLOAD_MODEL) {
 24 |     auto msg = new msg_upload_model_req_rx();
 25 |     msg->set_req_id(req_id);
 26 | 
 27 |     msg_rx = msg;
 28 |   }
 29 |   else if(msg_type == REQ_CLOSE) {
 30 |     auto msg = new msg_close_req_rx();
 31 |     msg->set_req_id(req_id);
 32 | 
 33 |     msg_rx = msg;
 34 |   }
 35 | 
 36 |   return msg_rx;
 37 | }
 38 | 
 39 | bool SrvSession::completed_receive(message_connection* conn, message_rx* req) {
 40 |   bool is_continue = true;
 41 | 
 42 |   if (auto infer = dynamic_cast<msg_inference_req_rx*>(req)) {
 43 |     auto request = new serverapi::InferenceRequest();
 44 |     infer->get(*request);
 45 | 
 46 |     messages_.push({this, request});
 47 |   }
 48 |   else if (auto upload_model = dynamic_cast<msg_upload_model_req_rx*>(req)) {
 49 |     auto request = new serverapi::UploadModelRequest();
 50 |     upload_model->get(*request);
 51 | 
 52 |     messages_.push({this, request});
 53 |   }
 54 |   else if (auto close = dynamic_cast<msg_close_req_rx*>(req)) {
 55 |     auto response = new serverapi::CloseResponse();
 56 | 
 57 |     response->req_id = req->get_rx_req_id();
 58 | 
 59 |     send_response(response);
 60 | 
 61 |     is_continue = false;
 62 |   }
 63 | 
 64 |   delete req;
 65 | 
 66 |   return is_continue;
 67 | }
 68 | 
 69 | void SrvSession::completed_transmit(message_connection* conn, message_tx* req) {
 70 | }
 71 | 
 72 | void SrvSession::send_response(serverapi::Response* response) {
 73 |   message_tx* msg_tx;
 74 | 
 75 |   if (auto infer = dynamic_cast<serverapi::InferenceResponse*>(response)) {
 76 |     auto infer_rsp = new msg_inference_rsp_tx();
 77 | 
 78 |     infer_rsp->set(*infer);
 79 |     msg_tx = infer_rsp;
 80 |   }
 81 |   else if (auto upload_model = dynamic_cast<serverapi::UploadModelResponse*>(response)) {
 82 |     auto upload_model_rsp = new msg_upload_model_rsp_tx();
 83 | 
 84 |     upload_model_rsp->set(*upload_model);
 85 |     msg_tx = upload_model_rsp;
 86 |   }
 87 |   else if (auto close = dynamic_cast<serverapi::CloseResponse*>(response)) {
 88 |     auto close_rsp = new msg_close_rsp_tx();
 89 | 
 90 |     close_rsp->set(*close);
 91 |     msg_tx = close_rsp;
 92 |   }
 93 | 
 94 |   msg_tx_.send_message(*msg_tx);
 95 | }
 96 | 
 97 | ClientSession::ClientSession(boost::asio::io_service& io_service)
 98 |   : Session(io_service),
 99 |     request_seed_id(0),
100 |     received_rsp_cnt(0) {}
101 | 
102 | std::future<serverapi::Response*> ClientSession::send_request_async(serverapi::Request& request, std::function<void(serverapi::Response*)> onSuccess) {
103 |   auto promise = std::make_shared<std::promise<serverapi::Response*>>();
104 |   auto cb = [this, promise, onSuccess](serverapi::Response* response) {
105 |     onSuccess(response);
106 |     promise->set_value(response);
107 |   };
108 | 
109 |   message_tx* msg_tx;
110 | 
111 |   uint64_t request_id = request_seed_id++;
112 | 
113 |   request.req_id = request_id;
114 |   requests[request_id] = cb;
115 | 
116 |   if (auto infer = dynamic_cast<serverapi::InferenceRequest*>(&request)) {
117 |     auto infer_req = new msg_inference_req_tx();
118 | 
119 |     infer_req->set(*infer);
120 |     msg_tx = infer_req;
121 |   }
122 |   else if (auto upload_model = dynamic_cast<serverapi::UploadModelRequest*>(&request)) {
123 |     auto upload_model_req = new msg_upload_model_req_tx();
124 | 
125 |     upload_model_req->set(*upload_model);
126 |     msg_tx = upload_model_req;
127 |   }
128 |   else if (auto close = dynamic_cast<serverapi::CloseRequest*>(&request)) {
129 |     auto close_req = new msg_close_req_tx();
130 | 
131 |     close_req->set(*close);
132 |     msg_tx = close_req;
133 |   }
134 | 
135 |   msg_tx_.send_message(*msg_tx);
136 | 
137 |   return promise->get_future();
138 | }
139 | 
140 | serverapi::Response* ClientSession::send_request(serverapi::Request& request, std::function<void(serverapi::Response*)> onSuccess) {
141 |   return send_request_async(request, onSuccess).get();
142 | }
143 | 
144 | void ClientSession::await_completion() {
145 |   while (request_seed_id > received_rsp_cnt) {
146 |     usleep(100000);
147 |   }
148 | 
149 |   return;
150 | }
151 | 
152 | message_rx* ClientSession::new_rx_message(uint64_t hdr_len, uint64_t body_len,
153 |                                           uint64_t req_id, uint64_t msg_type) {
154 |   message_rx* msg_rx;
155 | 
156 |   if (msg_type == RSP_INFERENCE) {
157 |     auto msg = new msg_inference_rsp_rx();
158 |     msg->set_req_id(req_id);
159 | 
160 |     msg_rx = msg;
161 |   }
162 |   else if (msg_type == RSP_UPLOAD_MODEL) {
163 |     auto msg = new msg_upload_model_rsp_rx();
164 |     msg->set_req_id(req_id);
165 | 
166 |     msg_rx = msg;
167 |   }
168 |   else if (msg_type == RSP_CLOSE) {
169 |     auto msg = new msg_close_rsp_rx();
170 |     msg->set_req_id(req_id);
171 | 
172 |     msg_rx = msg;
173 |   }
174 | 
175 |   return msg_rx;
176 | }
177 | 
178 | bool ClientSession::completed_receive(message_connection* conn, message_rx* req) {
179 |   uint64_t req_id = req->get_rx_req_id();
180 |   serverapi::Response* response;
181 |   bool is_continue = true;
182 | 
183 |   if (auto infer = dynamic_cast<msg_inference_rsp_rx*>(req)) {
184 |     auto response_ = new serverapi::InferenceResponse();
185 |     infer->get(*response_);
186 |     response = response_;
187 |   }
188 |   else if (auto upload_model = dynamic_cast<msg_upload_model_rsp_rx*>(req)) {
189 |     auto response_ = new serverapi::UploadModelResponse();
190 |     upload_model->get(*response_);
191 |     response = response_;
192 |   }
193 |   else if (auto close = dynamic_cast<msg_close_rsp_rx*>(req)) {
194 |     auto response_ = new serverapi::CloseResponse();
195 |     close->get(*response_);
196 | 
197 |     is_continue = false;
198 |     response = response_;
199 |   }
200 | 
201 |   requests[req_id](response);
202 |   received_rsp_cnt++;
203 | 
204 |   delete req;
205 | 
206 |   return is_continue;
207 | }
208 | 
209 | void ClientSession::completed_transmit(message_connection* conn, message_tx* req) {
210 | }
211 | 
212 | }
213 | 


--------------------------------------------------------------------------------
/src/network/session.h:
--------------------------------------------------------------------------------
 1 | #pragma once
 2 | #include <network/network.h>
 3 | #include <network/server_api.h>
 4 | 
 5 | #include <atomic>
 6 | #include <thread>
 7 | #include <future>
 8 | 
 9 | #include "deepcache.pb.h"
10 | #include "tbb/concurrent_queue.h"
11 | 
12 | namespace network {
13 | 
14 | class Session : public message_connection, message_handler {
15 |  public:
16 |   Session(boost::asio::io_service& io_service)
17 |     : message_connection(io_service, *this),
18 |       msg_tx_(this, *this) {};
19 | 
20 |   virtual message_rx* new_rx_message(uint64_t hdr_len, uint64_t body_len,
21 |                                      uint64_t req_id, uint64_t msg_type) {};
22 | 
23 |   virtual bool completed_receive(message_connection* conn, message_rx* req) {};
24 | 
25 |   virtual void completed_transmit(message_connection* conn, message_tx* req) {};
26 | 
27 |  protected:
28 |   message_sender msg_tx_;
29 | };
30 | 
31 | class SrvSession;
32 | 
33 | struct Message { SrvSession* srv_session; serverapi::Request* req; };
34 | 
35 | typedef tbb::concurrent_bounded_queue<Message> MessageQueue;
36 | 
37 | class SrvSession : public Session {
38 |  public:
39 |   SrvSession(boost::asio::io_service& io_service,
40 |              MessageQueue& messages);
41 | 
42 |   void send_response(serverapi::Response* response);
43 | 
44 |   message_rx* new_rx_message(uint64_t hdr_len, uint64_t body_len,
45 |                              uint64_t req_id, uint64_t msg_type);
46 | 
47 |   bool completed_receive(message_connection* conn, message_rx* req);
48 | 
49 |   void completed_transmit(message_connection* conn, message_tx* req);
50 | 
51 |  private:
52 |   MessageQueue& messages_;
53 | };
54 | 
55 | class ClientSession : public Session {
56 |  public:
57 |   ClientSession(boost::asio::io_service& io_service);
58 | 
59 |   std::future<serverapi::Response*> send_request_async(serverapi::Request& request, std::function<void(serverapi::Response*)> onSuccess);
60 | 
61 |   serverapi::Response* send_request(serverapi::Request& request, std::function<void(serverapi::Response*)> onSuccess);
62 | 
63 |   void await_completion();
64 | 
65 |   message_rx* new_rx_message(uint64_t hdr_len, uint64_t body_len,
66 |                              uint64_t req_id, uint64_t msg_type);
67 | 
68 |   bool completed_receive(message_connection* conn, message_rx* req);
69 | 
70 |   void completed_transmit(message_connection* conn, message_tx* req);
71 | 
72 |  private:
73 |   std::atomic_int request_seed_id;
74 |   std::atomic_int received_rsp_cnt;
75 |   std::map<uint64_t, std::function<void(serverapi::Response*)>> requests;
76 | };
77 | 
78 | }
79 | 


--------------------------------------------------------------------------------
/src/server.cpp:
--------------------------------------------------------------------------------
 1 | #include <server/server.h>
 2 | #include <signal.h>
 3 | 
 4 | class InterruptException : public std::exception
 5 | {
 6 |   public:
 7 |     InterruptException(int s) : S(s) {}
 8 |     int S;
 9 | };
10 | 
11 | void sig_to_exception(int s)
12 | {
13 |   throw InterruptException(s);
14 | }
15 | 
16 | int main(int argc, char** argv) {
17 |   {
18 |     // setupt handling interrupt
19 |     struct sigaction sigIntHandler;
20 |     sigIntHandler.sa_handler = sig_to_exception;
21 |     sigemptyset(&sigIntHandler.sa_mask);
22 |     sigIntHandler.sa_flags = 0;
23 |     sigaction(SIGINT, &sigIntHandler, NULL);
24 |   }
25 | 
26 |   Server* server;
27 |   try {
28 |     server = new Server(DEFAULT_PORT);
29 |     server->run();
30 |   }
31 |   catch(InterruptException& e) {
32 |     server->shutdown();
33 |   }
34 |   catch (std::exception& e) {
35 |     std::cerr << "Exception: " << e.what() << "\n";
36 |   }
37 | 
38 |   return 0;
39 | }
40 | 


--------------------------------------------------------------------------------
/src/server/controller.cpp:
--------------------------------------------------------------------------------
  1 | #include <server/controller.h>
  2 | #include <server/worker.h>
  3 | #include <network/session.h>
  4 | #include <util.h>
  5 | #include <deepplan/engine.h>
  6 | 
  7 | #include <thread>
  8 | 
  9 | Controller::Controller(network::MessageQueue& messages)
 10 |   : messages_(messages),
 11 |     alive(false) {init();};
 12 | 
 13 | void Controller::init() {
 14 |   deepplan::Init();
 15 | 
 16 |   alive = true;
 17 |   ctrl_thr = std::thread(std::bind(&Controller::run, this));
 18 | 
 19 |   int rank = torch::cuda::device_count();
 20 |   workers.resize(rank);
 21 |   for (int i = 0; i < workers.size(); i++) {
 22 |     workers[i] = new Worker(i);
 23 |   }
 24 | }
 25 | 
 26 | void Controller::run() {
 27 |   while (alive) {
 28 |     network::Message message;
 29 | 
 30 |     if (messages_.try_pop(message)) {
 31 |       if (auto infer = dynamic_cast<serverapi::InferenceRequest*>(message.req)) {
 32 |         int model_id = infer->model_id;
 33 |         int n_workers = workers.size();
 34 |         int worker_id;
 35 | 
 36 |         worker_id = model_id % n_workers;
 37 |         infer->model_id = model_id / n_workers;
 38 | 
 39 |         auto cb = [message](serverapi::InferenceResponse* response) {
 40 |           message.srv_session->send_response(response);
 41 |         };
 42 | 
 43 |         workers[worker_id]->infer(infer, cb);
 44 |       }
 45 |       else if (auto upload_model = dynamic_cast<serverapi::UploadModelRequest*>(message.req)) {
 46 |         std::vector<std::string> model_names = upload_model->model_names;
 47 |         int n_models = upload_model->n_models;
 48 |         EngineType engine_type = static_cast<EngineType>(upload_model->engine_type);
 49 |         int mp_size = upload_model->mp_size;
 50 | 
 51 |         auto response = new serverapi::UploadModelResponse();
 52 | 
 53 |         setup_models(model_names, n_models, engine_type, mp_size);
 54 | 
 55 |         response->req_id = upload_model->req_id;
 56 |         message.srv_session->send_response(response);
 57 |       }
 58 |     }
 59 |   }
 60 | }
 61 | 
 62 | void Controller::setup_models(std::vector<std::string> model_names, int n_models, EngineType engine_type, int mp_size) {
 63 |   bool should_setup = false;
 64 | 
 65 |   // Update if the setting parameters are different
 66 |   if ((model_names_ != model_names) ||
 67 |       (n_models_ < n_models) ||
 68 |       (engine_type_ != engine_type) ||
 69 |       (mp_size_ != mp_size)) {
 70 |     should_setup = true;
 71 |   }
 72 | 
 73 |   if (should_setup) {
 74 |     int n_workers = workers.size();
 75 |     int n_models_per_worker = n_models / n_workers;
 76 |     std::vector<std::vector<int>> partitions(n_workers);
 77 | 
 78 |     for (int i = 0; i < n_workers; i++) {
 79 |       std::vector<int> p;
 80 |       for (int d = 0; d < mp_size; d++)
 81 |         p.push_back((i + 2*d) % n_workers);
 82 | 
 83 |       partitions[i] = p;
 84 |     }
 85 | 
 86 |     std::cout << "Models setup...\n";
 87 |     for (int i = 0; i < n_workers; i++) {
 88 |       workers[i]->reset_model();
 89 |       workers[i]->init_model(model_names, n_models_per_worker,
 90 |                              engine_type, partitions[i]);
 91 |     }
 92 | 
 93 |     model_names_ = model_names;
 94 |     n_models_ = n_models;
 95 |     engine_type_ = engine_type;
 96 |     mp_size_ = mp_size;
 97 | 
 98 |     std::cout << "Modele setup complete\n";
 99 |   }
100 |   else return;
101 | 
102 | }
103 | 
104 | void Controller::shutdown() {
105 |   alive = false;
106 |   if (ctrl_thr.joinable())
107 |     ctrl_thr.join();
108 | 
109 |   for (auto worker : workers)
110 |     worker->stop();
111 | }
112 | 


--------------------------------------------------------------------------------
/src/server/controller.h:
--------------------------------------------------------------------------------
 1 | #pragma once
 2 | 
 3 | #include <network/server_api.h>
 4 | #include <network/session.h>
 5 | #include <server/worker.h>
 6 | #include <util.h>
 7 | 
 8 | #include <thread>
 9 | #include <atomic>
10 | 
11 | class Controller {
12 |  public:
13 |   Controller(network::MessageQueue& messages);
14 | 
15 |   void init();
16 | 
17 |   void run();
18 | 
19 |   void shutdown();
20 | 
21 |   void setup_models(std::vector<std::string> model_name, int n_models, EngineType engine_type, int mp_size);
22 | 
23 |  private:
24 |   std::atomic_bool alive;
25 | 
26 |   std::vector<Worker*> workers;
27 | 
28 |   network::MessageQueue& messages_;
29 | 
30 |   std::thread ctrl_thr;
31 | 
32 |   std::vector<std::string> model_names_;
33 |   int n_models_ = 0;
34 |   int mp_size_ = 0;
35 |   EngineType engine_type_ = EngineType::NONE;
36 | };
37 | 


--------------------------------------------------------------------------------
/src/server/model_manager.cpp:
--------------------------------------------------------------------------------
 1 | #include <c10/cuda/CUDACachingAllocator.h>
 2 | #include <server/model_manager.h>
 3 | #include <deepplan/model.h>
 4 | 
 5 | void ModelManager::add_model(std::string model_name, std::vector<int> devices) {
 6 |   deepplan::Model* model = new deepplan::Model(model_name, engine_type, devices);
 7 | 
 8 |   models.push_back(std::move(model));
 9 | }
10 | 
11 | deepplan::Model* ModelManager::get_model(int model_id) {
12 |   return models[model_id];
13 | }
14 | 
15 | void ModelManager::clear() {
16 |   for (auto model : models) {
17 |     model->clear();
18 |     delete model;
19 |   }
20 | }
21 | 
22 | size_t getDeviceActiveMemorySize(int device){
23 |   using c10::cuda::CUDACachingAllocator::StatArray;
24 |   using c10::cuda::CUDACachingAllocator::DeviceStats;
25 | 
26 |   const DeviceStats stats = c10::cuda::CUDACachingAllocator::getDeviceStats(device);
27 | 
28 |   return stats.active_bytes[0].current;
29 | }
30 | 


--------------------------------------------------------------------------------
/src/server/model_manager.h:
--------------------------------------------------------------------------------
 1 | #pragma once
 2 | 
 3 | #include <deepplan/model.h>
 4 | #include <util.h>
 5 | 
 6 | size_t getDeviceActiveMemorySize(int deivce);
 7 | 
 8 | class ModelManager {
 9 |  public:
10 |   ModelManager(EngineType engine_type)
11 |     : engine_type(engine_type) {};
12 | 
13 |   void add_model(std::string model_name, std::vector<int> devices);
14 | 
15 |   deepplan::Model* get_model(int model_id);
16 | 
17 |   void clear();
18 | 
19 |   EngineType engine_type;
20 | 
21 |  private:
22 |   std::vector<deepplan::Model*> models;
23 | };
24 | 


--------------------------------------------------------------------------------
/src/server/server.cpp:
--------------------------------------------------------------------------------
 1 | #include <server/server.h>
 2 | #include <network/session.h>
 3 | 
 4 | #include <boost/asio.hpp>
 5 | 
 6 | Server::Server(int port)
 7 |   : io_service_(),
 8 |     acceptor_(io_service_, boost::asio::ip::tcp::endpoint(boost::asio::ip::tcp::v4(), port)),
 9 |     alive(false) {};
10 | 
11 | Server::~Server() {
12 |   shutdown();
13 | }
14 | 
15 | void Server::run() {
16 |   controller = new Controller(messages);
17 | 
18 |   start_accept();
19 | 
20 |   alive = true;
21 | 
22 |   std::cout << "Server Ready\n";
23 |   io_service_.run();
24 | }
25 | 
26 | void Server::shutdown() {
27 |   if (alive) {
28 |     std::cout << "Closing Server\n";
29 |     alive = false;
30 |     controller->shutdown();
31 | 
32 |     boost::system::error_code ec;
33 |     acceptor_.close(ec);
34 |     if (ec){
35 |       std::cerr << "Acceptor Error occured\n";
36 |     }
37 |     // If connecting session, close the session.
38 |   }
39 | }
40 | 
41 | void Server::send_response(serverapi::Response* response) {
42 |   current_session->send_response(response);
43 | }
44 | 
45 | void Server::start_accept() {
46 |   network::SrvSession* new_session = new network::SrvSession(io_service_, messages);
47 |   acceptor_.async_accept(new_session->get_socket(),
48 |       boost::bind(&Server::handle_accept, this, new_session,
49 |         boost::asio::placeholders::error));
50 | }
51 | 
52 | void Server::handle_accept(network::SrvSession* new_session,
53 |     const boost::system::error_code& error) {
54 |   if (error) {
55 |     std::cerr << "[Error] " << error.message() << std::endl;
56 |     delete new_session;
57 |     return;
58 |   }
59 | 
60 |   // FIXME: Should enable to handle multi client
61 |   new_session->established();
62 |   current_session = new_session;
63 | 
64 |   // wait when the connection from the client is disconnected.
65 |   start_accept();
66 | }
67 | 


--------------------------------------------------------------------------------
/src/server/server.h:
--------------------------------------------------------------------------------
 1 | #pragma once
 2 | 
 3 | #include <network/session.h>
 4 | #include <network/message.h>
 5 | #include <server/controller.h>
 6 | 
 7 | #include <server_api.h>
 8 | #include <boost/asio.hpp>
 9 | #include <atomic>
10 | 
11 | #include "tbb/concurrent_queue.h"
12 | 
13 | #define DEFAULT_PORT 4321
14 | 
15 | class Server {
16 |  public:
17 |   Server(int port);
18 | 
19 |   ~Server();
20 | 
21 |   void init();
22 | 
23 |   void run();
24 | 
25 |   void send_response(serverapi::Response* response);
26 | 
27 |   void shutdown();
28 | 
29 |  private:
30 |   void start_accept();
31 | 
32 |   void handle_accept(network::SrvSession* new_session, const boost::system::error_code& error);
33 | 
34 |   boost::asio::io_service io_service_;
35 |   boost::asio::ip::tcp::acceptor acceptor_;
36 | 
37 |   network::MessageQueue messages;
38 | 
39 |   network::SrvSession* current_session;
40 | 
41 |   Controller* controller;
42 | 
43 |   std::atomic_bool alive;
44 | };
45 | 


--------------------------------------------------------------------------------
/src/server/worker.cpp:
--------------------------------------------------------------------------------
  1 | #include <torch/cuda.h>
  2 | #include <server/worker.h>
  3 | #include <server/model_manager.h>
  4 | #include <deepplan/model.h>
  5 | #include <cuda_runtime_api.h>
  6 | 
  7 | Worker::Worker(int device)
  8 |   : device(at::kCUDA, device),
  9 |     alive(true) {
 10 |       worker_thr = std::thread(std::bind(&Worker::run, this));
 11 |     }
 12 | 
 13 | void Worker::run() {
 14 |   torch::NoGradGuard no_grad;
 15 | 
 16 |   InferTask task;
 17 | 
 18 |   while (alive) {
 19 |     while (queue_.try_pop(task)) {
 20 |       auto request = task.request;
 21 |       auto response = new serverapi::InferenceResponse();
 22 |       bool is_cold = false;
 23 | 
 24 |       int model_id = request->model_id;
 25 |       deepplan::Model* model;
 26 | 
 27 |       if (running_models->exist(model_id)) {
 28 |         model = running_models->get(model_id);
 29 |       }
 30 |       else {
 31 |         auto new_model = model_manager->get_model(request->model_id);
 32 | 
 33 |         while (getDeviceActiveMemorySize(device.index()) >= capacity_) {
 34 |           auto evict_model = running_models->pop();
 35 |           evict_model->clear();
 36 |         }
 37 | 
 38 |         is_cold = true;
 39 |         running_models->put(model_id, new_model);
 40 |         model = new_model;
 41 |       }
 42 | 
 43 |       ScriptModuleInput inputs;
 44 | 
 45 |       for (auto input_config : model->input_configs) {
 46 |         inputs.push_back(
 47 |             input_config.get(request->input, request->batch_size).to(device));
 48 |       }
 49 | 
 50 |       model->forward(inputs);
 51 | 
 52 |       torch::cuda::synchronize(device.index());
 53 | 
 54 |       response->req_id = request->req_id;
 55 |       response->is_cold = is_cold;
 56 |       task.cb(response);
 57 |     }
 58 |   }
 59 | }
 60 | 
 61 | void Worker::init_model(std::vector<std::string> model_names, int n_models,
 62 |                         EngineType engine_type, std::vector<int> devices) {
 63 |   if (model_manager == nullptr) {
 64 |     size_t free;
 65 |     size_t total;
 66 |     size_t padding_size = (size_t)(5.5 * (1 << 30)); // 6GB
 67 |     int n_models_per = n_models / model_names.size();
 68 | 
 69 |     model_manager = new ModelManager(engine_type);
 70 |     for (auto model_name : model_names)
 71 |       for (int i = 0; i < n_models_per; i++)
 72 |         model_manager->add_model(model_name, devices);
 73 | 
 74 |     cudaError_t err = cudaMemGetInfo(&free, &total);
 75 |     if (err != cudaSuccess) {
 76 |       throw "cudaMemGetInfo Error\n";
 77 |     }
 78 | 
 79 |     capacity_ = total - padding_size;
 80 |     running_models = new LRUCache<int, deepplan::Model*>();
 81 |   }
 82 | }
 83 | 
 84 | void Worker::reset_model() {
 85 |   if (model_manager) {
 86 |     model_manager->clear();
 87 |     delete model_manager;
 88 |     model_manager = nullptr;
 89 |     delete running_models;
 90 |   }
 91 | }
 92 | 
 93 | void Worker::stop() {
 94 |   alive = false;
 95 |   if (worker_thr.joinable())
 96 |     worker_thr.join();
 97 | 
 98 |   reset_model();
 99 | }
100 | 
101 | void Worker::infer(
102 |     serverapi::InferenceRequest* request,
103 |     std::function<void(serverapi::InferenceResponse*)> cb) {
104 |   InferTask task(request, cb);
105 |   queue_.push(task);
106 | }
107 | 


--------------------------------------------------------------------------------
/src/server/worker.h:
--------------------------------------------------------------------------------
 1 | #pragma once
 2 | #include <network/session.h>
 3 | #include <server/model_manager.h>
 4 | #include <deepplan/model.h>
 5 | #include <optional>
 6 | #include "tbb/concurrent_queue.h"
 7 | 
 8 | struct InferTask {
 9 |   InferTask() {};
10 |   InferTask(
11 |       serverapi::InferenceRequest* request,
12 |       std::function<void(serverapi::InferenceResponse*)> cb)
13 |       : request(request),
14 |         cb(cb) {};
15 | 
16 |   serverapi::InferenceRequest* request;
17 |   std::function<void(serverapi::InferenceResponse*)> cb;
18 | };
19 | 
20 | template<typename K, typename V>
21 | class LRUCache {
22 |  public:
23 |   bool put(const K& k, const V& v) {
24 |     if(exist(k)) {
25 |       return false;
26 |     }
27 | 
28 |     items.emplace_front(k, v);
29 | 
30 |     index.emplace(k, items.begin());
31 |   }
32 | 
33 |   bool exist(const K& k) {
34 |     return (index.count(k)>0);
35 |   }
36 | 
37 |   V get(const K& k) {
38 |     assert(exist(k));
39 |     auto itr = index.find(k);
40 | 
41 |     items.splice(items.begin(), items, itr->second);
42 | 
43 |     return itr->second->second;
44 |   }
45 | 
46 |   V pop() {
47 |     auto v = items.back().second;
48 |     index.erase(items.back().first);
49 |     items.pop_back();
50 | 
51 |     return v;
52 |   }
53 | 
54 |   size_t size() {
55 |     return index.size();
56 |   }
57 |  private:
58 |   std::list<std::pair<K,V>> items;
59 | 
60 |   std::unordered_map<K, typename std::list<std::pair<K,V>>::iterator> index;
61 | };
62 | 
63 | class Worker {
64 |  public:
65 |   Worker(int device);
66 | 
67 |   void run();
68 | 
69 |   void infer(
70 |       serverapi::InferenceRequest* request,
71 |       std::function<void(serverapi::InferenceResponse*)> cb);
72 | 
73 |   void init_model(std::vector<std::string> model_names, int n_models,
74 |                   EngineType engine_type, std::vector<int> devices);
75 | 
76 |   void reset_model();
77 | 
78 |   void stop();
79 | 
80 |   at::Device device;
81 | 
82 |  private:
83 |   size_t capacity_;
84 |   std::atomic_bool alive;
85 |   std::thread worker_thr;
86 |   ModelManager* model_manager = nullptr;
87 |   LRUCache<int, deepplan::Model*>* running_models;
88 |   tbb::concurrent_queue<InferTask> queue_;
89 | };
90 | 


--------------------------------------------------------------------------------
/src/server_api.h:
--------------------------------------------------------------------------------
 1 | #pragma once
 2 | 
 3 | namespace serverapi {
 4 | 
 5 | struct Request {
 6 |  public:
 7 |   virtual ~Request() {};
 8 |   uint64_t req_id;
 9 | };
10 | 
11 | struct Response {
12 |  public:
13 |   virtual ~Response() {};
14 |   uint64_t req_id;
15 | };
16 | 
17 | struct InferenceRequest : public Request {
18 |  public:
19 |   uint32_t model_id;
20 |   uint32_t batch_size;
21 |   size_t input_size;
22 |   void* input;
23 | };
24 | 
25 | struct InferenceResponse : public Response {
26 |  public:
27 |   bool is_cold;
28 | };
29 | 
30 | struct UploadModelRequest : public Request {
31 |  public:
32 |   std::vector<std::string> model_names;
33 |   uint32_t n_models;
34 |   uint32_t engine_type;
35 |   uint32_t mp_size;
36 | };
37 | 
38 | struct UploadModelResponse : public Response {
39 | };
40 | 
41 | struct CloseRequest : public Request {
42 | };
43 | 
44 | struct CloseResponse : public Response {
45 | };
46 | 
47 | }
48 | 


--------------------------------------------------------------------------------
/src/util.cpp:
--------------------------------------------------------------------------------
  1 | #include <util.h>
  2 | 
  3 | namespace util {
  4 | 
  5 | std::uint64_t now() {
  6 |   return nanos(hrt());
  7 | }
  8 | 
  9 | time_point hrt()
 10 | {
 11 |   return std::chrono::steady_clock::now();
 12 | }
 13 | 
 14 | time_point epoch = hrt();
 15 | 
 16 | uint64_t epoch_time = std::chrono::duration_cast<std::chrono::nanoseconds>(
 17 |     std::chrono::system_clock::now().time_since_epoch()).count();
 18 | 
 19 | 
 20 | std::uint64_t nanos(time_point t) {
 21 |   return std::chrono::duration_cast<std::chrono::nanoseconds>(t - epoch).count() + epoch_time;
 22 | }
 23 | 
 24 | size_t getModuleSize(ScriptModule module, bool ignore_cuda) {
 25 |   size_t size = 0;
 26 |   for (auto param : module.parameters()) {
 27 |     if (ignore_cuda && param.is_cuda()) continue;
 28 |     size += param.nbytes();
 29 |   }
 30 |   return size;
 31 | }
 32 | 
 33 | 
 34 | InputGenerator::InputGenerator(const char* model_repo)
 35 |   : model_repo_(model_repo) {assert(model_repo);}
 36 | 
 37 | InputGenerator::InputGenerator()
 38 |   : model_repo_(getenv("PLAN_REPO")) {assert(model_repo_);}
 39 | 
 40 | 
 41 | void InputGenerator::extend_rdata(DataType data_type, size_t size) {
 42 |   auto it = rdata_map.find(data_type);
 43 |   auto& data = it->second;
 44 |   char rdata[ALIGN(size)];
 45 | 
 46 |   switch (data_type) {
 47 |     case TYPE_FP32:
 48 |       {
 49 |         for (int i = 0; i < size; i += sizeof(float)) {
 50 |           float value = (float)rand() / RAND_MAX;
 51 |           memcpy(rdata+i, &value, sizeof(float));
 52 |         }
 53 |         break;
 54 |       }
 55 |     case TYPE_INT64:
 56 |       {
 57 |         for (int i = 0; i < size; i += sizeof(int64_t)) {
 58 |           int64_t value = (int64_t)rand() % 30522;
 59 |           memcpy(rdata+i, &value, sizeof(int64_t));
 60 |         }
 61 |         break;
 62 |       }
 63 |     default:
 64 |       throw std::runtime_error("Incorrect DataType");
 65 |       break;
 66 |   }
 67 | 
 68 |   data.insert(data.begin(), rdata, rdata+sizeof(rdata));
 69 | }
 70 | 
 71 | void InputGenerator::generate_rdata(size_t size, DataType data_type, char** buf_ptr) {
 72 |   *buf_ptr = new char[size];
 73 |   generate_rdata(size, data_type, *buf_ptr);
 74 | }
 75 | 
 76 | void InputGenerator::generate_rdata(size_t size, DataType data_type, char* buf) {
 77 |   auto it = rdata_map.find(data_type);
 78 |   std::vector<char> rdata;
 79 | 
 80 |   if (it == rdata_map.end()) {
 81 |     it = rdata_map.insert({data_type, {}}).first;
 82 |   }
 83 | 
 84 |   if (it->second.size() < size) {
 85 |     size_t extend_size = std::max((size_t)STEP_SIZE, size);
 86 |     extend_rdata(data_type, extend_size);
 87 |   }
 88 | 
 89 |   rdata = it->second;
 90 | 
 91 |   // TODO select random range
 92 |   std::memcpy(buf, rdata.data(), size);
 93 | }
 94 | 
 95 | void InputGenerator::add_input_config(const std::string& model_name) {
 96 |   std::vector<InputConfig> input_configs;
 97 |   ModelConfig model_config;
 98 |   std::string model_prefix;
 99 |   std::string config_path;
100 | 
101 |   model_prefix = std::string(model_repo_) + "/" + model_name;
102 |   config_path = model_prefix + "/config.pbtxt";
103 | 
104 |   try {
105 |     if (!util::read_from_pbtxt(model_config, config_path)) {
106 |       std::stringstream msg;
107 |       msg << "Failed to read " << config_path;
108 |       throw std::runtime_error(msg.str());
109 |     }
110 |   }
111 |   catch (const std::exception& e) {
112 |     std::cerr << e.what() << "\n";
113 |     throw e;
114 |   }
115 | 
116 |   for (auto io : model_config.inputs()) {
117 |     input_configs.emplace_back(io);
118 |   }
119 | 
120 |   input_config_map.insert(std::make_pair(model_name, input_configs));
121 | }
122 | 
123 | void InputGenerator::generate_input(std::string model_name, int batch_size, ScriptModuleInput* out) {
124 |   ScriptModuleInput inputs;
125 |   std::vector<InputConfig> input_configs;
126 | 
127 |   auto it = input_config_map.find(model_name);
128 |   if (it == input_config_map.end()) {
129 |     add_input_config(model_name);
130 |     it = input_config_map.find(model_name);
131 |   }
132 | 
133 |   input_configs = it->second;
134 | 
135 |   for (auto input_config : input_configs) {
136 |     auto shape = input_config.shape;
137 |     shape.insert(shape.begin(), batch_size);
138 | 
139 |     size_t size = std::accumulate(shape.begin(), shape.end(), 1, std::multiplies<size_t>());
140 |     char* data;
141 | 
142 |     auto options = torch::TensorOptions();
143 |     switch (input_config.data_type) {
144 |       case TYPE_FP32:
145 |         size *= sizeof(float);
146 |         options = options.dtype(torch::kFloat32);
147 |         break;
148 |       case TYPE_INT64:
149 |         size *= sizeof(int64_t);
150 |         options = options.dtype(torch::kInt64);
151 |         break;
152 |     }
153 |     generate_rdata(size, input_config.data_type, &data);
154 | 
155 |     inputs.push_back(torch::from_blob(data, shape, options));
156 |   }
157 | 
158 | 
159 |   *out = inputs;
160 | }
161 | 
162 | // FIXME: Maybe convert double ptr
163 | void InputGenerator::generate_input(std::string model_name, int batch_size, std::vector<char>* out) {
164 |   std::vector<char> inputs;
165 |   std::vector<InputConfig> input_configs;
166 | 
167 |   auto it = input_config_map.find(model_name);
168 |   if (it == input_config_map.end()) {
169 |     add_input_config(model_name);
170 |     it = input_config_map.find(model_name);
171 |   }
172 | 
173 |   input_configs = it->second;
174 | 
175 |   for (auto input_config : input_configs) {
176 |     auto shape = input_config.shape;
177 |     shape.insert(shape.begin(), batch_size);
178 | 
179 |     size_t size = std::accumulate(shape.begin(), shape.end(), 1, std::multiplies<size_t>());
180 |     char* data;
181 | 
182 |     switch (input_config.data_type) {
183 |       case TYPE_FP32:
184 |         size *= sizeof(float);
185 |         break;
186 |       case TYPE_INT64:
187 |         size *= sizeof(int64_t);
188 |         break;
189 |     }
190 | 
191 |     generate_rdata(size, input_config.data_type, &data);
192 |     inputs.insert(inputs.end(), data, data+size);
193 |   }
194 | 
195 |   *out = inputs;
196 | }
197 | 
198 | }
199 | 


--------------------------------------------------------------------------------
/src/util.h:
--------------------------------------------------------------------------------
  1 | #pragma once
  2 | 
  3 | #include <torch/script.h>
  4 | #include <vector>
  5 | #include <map>
  6 | #include <chrono>
  7 | #include <cstdint>
  8 | #include <deepplan.pb.h>
  9 | #include <google/protobuf/text_format.h>
 10 | #include <exception>
 11 | 
 12 | typedef std::vector<torch::jit::IValue> ScriptModuleInput;
 13 | typedef torch::jit::script::Module ScriptModule;
 14 | 
 15 | #define STEP_SIZE (1024*1024)
 16 | #define ALIGNMENT 8
 17 | #define ALIGN(size) (((size) + (ALIGNMENT-1)) & ~(ALIGNMENT-1))
 18 | 
 19 | typedef enum
 20 | {
 21 |   IN_MEMORY = 0,
 22 |   ON_DEMAND,
 23 |   PIPESWITCH,
 24 |   DEEPPLAN,
 25 |   DEEPCACHE,
 26 |   NONE,
 27 | } EngineType;
 28 | 
 29 | struct InputConfig {
 30 |  public:
 31 |   InputConfig(ModelInput io)
 32 |     : shape(io.shape().begin(), io.shape().end()),
 33 |       data_type(io.data_type()) {};
 34 | 
 35 |   std::vector<int64_t> shape;
 36 |   DataType data_type;
 37 | 
 38 |   at::Tensor get(void* data, int batch_size) {
 39 |     auto shape_ = shape;
 40 |     shape_.insert(shape_.begin(), batch_size);
 41 | 
 42 |     auto options = torch::TensorOptions();
 43 |     switch (data_type) {
 44 |       case TYPE_FP32:
 45 |         options = options.dtype(torch::kFloat32);
 46 |         break;
 47 |       case TYPE_INT64:
 48 |         options = options.dtype(torch::kInt64);
 49 |         break;
 50 |     }
 51 |     return torch::from_blob(data, shape_, options);
 52 |   }
 53 | };
 54 | 
 55 | namespace util {
 56 | 
 57 | typedef std::chrono::steady_clock::time_point time_point;
 58 | 
 59 | time_point hrt();
 60 | 
 61 | std::uint64_t now();
 62 | 
 63 | std::uint64_t nanos(time_point t);
 64 | 
 65 | template <typename T>
 66 | bool read_from_pbtxt(T& config, const std::string path) {
 67 |   std::ifstream fin(path);
 68 |   if (!fin.is_open()) return false;
 69 |   std::stringstream ss;
 70 |   ss << fin.rdbuf();
 71 |   return google::protobuf::TextFormat::ParseFromString(ss.str(), &config);
 72 | }
 73 | 
 74 | size_t getModuleSize(ScriptModule module, bool ignore_cuda=false);
 75 | 
 76 | class InputGenerator {
 77 |  public:
 78 |   InputGenerator();
 79 | 
 80 |   InputGenerator(const char* model_repo);
 81 | 
 82 |   void generate_input(std::string model_name, int batch_size, ScriptModuleInput* out);
 83 | 
 84 |   void generate_input(std::string model_name, int batch_size, std::vector<char>* out);
 85 | 
 86 |  private:
 87 |   void generate_rdata(size_t size, DataType data_type, char** buf_ptr);
 88 | 
 89 |   void generate_rdata(size_t size, DataType data_type, char* buf);
 90 | 
 91 |   void extend_rdata(DataType data_type, size_t size);
 92 | 
 93 |   void add_input_config(const std::string& model_name);
 94 | 
 95 |   std::map<std::string, std::vector<InputConfig>> input_config_map;
 96 |   std::map<DataType, std::vector<char>> rdata_map;
 97 | 
 98 |   const char* model_repo_;
 99 | };
100 | 
101 | } // namespace util
102 | 


--------------------------------------------------------------------------------
/util.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | import os
 3 | import numpy as np
 4 | from google.protobuf import text_format
 5 | 
 6 | def read_from_pbtxt(config, path):
 7 |     if os.path.isfile(path):
 8 |         with open(path, 'r') as f:
 9 |             text_format.Parse(f.read(), config)
10 |             f.close()
11 | 
12 | def write_to_pbtxt(config, path):
13 |     with open(path, 'w') as f:
14 |         f.write(text_format.MessageToString(config, use_short_repeated_primitives=True))
15 |         f.close()
16 | 
17 | def travel_layers(mod, name_path=None):
18 |     layers = []
19 |     if name_path is None:
20 |         name_path = mod.__class__.__qualname__
21 | 
22 |     if len(list(mod.children())) == 0:
23 |         if isinstance(mod, torch.nn.Dropout):
24 |             return []
25 | 
26 |         _name_path = f"{name_path}.{mod.__class__.__qualname__}"
27 |         setattr(mod, "__qualname__", _name_path)
28 | 
29 |         return [mod]
30 |     else:
31 |         for i, (name, child) in enumerate(mod.named_children()):
32 |             _name_path = f"{name_path}.{child.__class__.__qualname__}{i}"
33 | 
34 |             layers += travel_layers(child, _name_path)
35 | 
36 |         return layers
37 | 
38 | def get_module_size(module, ignore_cuda=False):
39 |     size = 0
40 |     for key, parm in module._parameters.items():
41 |         if parm is not None:
42 |             if ignore_cuda is True and parm.is_cuda:
43 |                 continue
44 |             size += np.prod(np.array(parm.size())) * 4
45 |     for key, buf in module._buffers.items():
46 |         if buf is not None:
47 |             if ignore_cuda is True and buf.is_cuda:
48 |                 continue
49 |             size += np.prod(np.array(buf.size())) * 4
50 | 
51 |     for child in module.children():
52 |         size += get_module_size(child, ignore_cuda)
53 | 
54 |     return size
55 | 


--------------------------------------------------------------------------------