├── tests
    ├── data
    │   ├── model_db
    │   │   ├── store
    │   │   │   └── .placeholder
    │   │   ├── profiles
    │   │   │   └── TITAN_X_(Pascal)
    │   │   │   │   └── darknet:yolo9000:1.txt
    │   │   └── db
    │   │   │   └── model_db.yml
    │   └── images
    │   │   ├── ILSVRC2012_val_00000001.JPEG
    │   │   ├── ILSVRC2012_val_00000002.JPEG
    │   │   ├── ILSVRC2012_val_00000003.JPEG
    │   │   ├── ILSVRC2012_val_00000004.JPEG
    │   │   └── ILSVRC2012_val_00000005.JPEG
    ├── cpp
    │   ├── test_main.cpp
    │   └── scheduler
    │   │   └── backend_delegate_test.cpp
    └── python
    │   ├── test_client.py
    │   └── test_async_client.py
├── python
    ├── requirements.txt
    ├── nexus
    │   ├── __init__.py
    │   ├── client.py
    │   └── async_client.py
    └── setup.py
├── .gitignore
├── src
    └── nexus
    │   ├── common
    │       ├── buffer.cpp
    │       ├── config.h
    │       ├── image.h
    │       ├── util.h
    │       ├── spinlock.h
    │       ├── server_base.h
    │       ├── server_base.cpp
    │       ├── metric.h
    │       ├── backend_pool.h
    │       ├── buffer.h
    │       ├── rpc_service_base.h
    │       ├── message.cpp
    │       ├── time_util.h
    │       ├── connection.h
    │       ├── device.cpp
    │       ├── model_def.h
    │       ├── time_util.cpp
    │       ├── device.h
    │       ├── metric.cpp
    │       ├── rpc_call.h
    │       ├── image.cpp
    │       ├── connection.cpp
    │       ├── message.h
    │       ├── util.cpp
    │       ├── data_type.h
    │       └── model_db.h
    │   ├── app
    │       ├── user_session.h
    │       ├── worker.h
    │       ├── rpc_service.h
    │       ├── worker.cpp
    │       ├── exec_block.h
    │       ├── rpc_service.cpp
    │       ├── app_base.h
    │       ├── query_processor.h
    │       ├── app_base.cpp
    │       ├── model_handler.h
    │       └── frontend.h
    │   ├── backend
    │       ├── utils.h
    │       ├── rpc_service.h
    │       ├── worker.h
    │       ├── backup_client.h
    │       ├── tf_share_model.h
    │       ├── darknet_model.h
    │       ├── backup_client.cpp
    │       ├── slice.h
    │       ├── caffe_model.h
    │       ├── share_prefix_model.h
    │       ├── rpc_service.cpp
    │       ├── task.cpp
    │       ├── slice.cpp
    │       ├── caffe_densecap_model.h
    │       ├── tensorflow_model.h
    │       ├── gpu_executor.h
    │       ├── caffe2_model.h
    │       ├── backend_main.cpp
    │       ├── utils.cpp
    │       ├── batch_task.cpp
    │       ├── model_exec.h
    │       ├── model_ins.cpp
    │       ├── batch_task.h
    │       ├── worker.cpp
    │       └── task.h
    │   ├── scheduler
    │       ├── scheduler_main.cpp
    │       ├── frontend_delegate.h
    │       ├── complex_query.h
    │       ├── sch_info.cpp
    │       ├── frontend_delegate.cpp
    │       ├── sch_info.h
    │       └── backend_delegate.h
    │   └── proto
    │       └── nnquery.proto
├── .gitmodules
├── examples
    ├── simple_app
    │   └── src
    │   │   ├── client.py
    │   │   └── frontend.cpp
    ├── obj_rec
    │   └── src
    │   │   └── obj_rec.cpp
    ├── face_rec
    │   └── src
    │   │   └── face_rec.cpp
    ├── README.md
    └── traffic_complex
    │   └── src
    │       └── traffic_complex.cpp
├── LICENSE
├── README.md
├── Dockerfile
├── tools
    └── test_complex_query.cpp
└── BUILDING.md


/tests/data/model_db/store/.placeholder:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/python/requirements.txt:
--------------------------------------------------------------------------------
1 | protobuf==3.11.2
2 | 


--------------------------------------------------------------------------------
/python/nexus/__init__.py:
--------------------------------------------------------------------------------
1 | from .proto.nnquery_pb2 import *
2 | from .client import Client
3 | from .async_client import AsyncClient
4 | 


--------------------------------------------------------------------------------
/tests/data/images/ILSVRC2012_val_00000001.JPEG:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/uwsampl/nexus/HEAD/tests/data/images/ILSVRC2012_val_00000001.JPEG


--------------------------------------------------------------------------------
/tests/data/images/ILSVRC2012_val_00000002.JPEG:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/uwsampl/nexus/HEAD/tests/data/images/ILSVRC2012_val_00000002.JPEG


--------------------------------------------------------------------------------
/tests/data/images/ILSVRC2012_val_00000003.JPEG:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/uwsampl/nexus/HEAD/tests/data/images/ILSVRC2012_val_00000003.JPEG


--------------------------------------------------------------------------------
/tests/data/images/ILSVRC2012_val_00000004.JPEG:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/uwsampl/nexus/HEAD/tests/data/images/ILSVRC2012_val_00000004.JPEG


--------------------------------------------------------------------------------
/tests/data/images/ILSVRC2012_val_00000005.JPEG:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/uwsampl/nexus/HEAD/tests/data/images/ILSVRC2012_val_00000005.JPEG


--------------------------------------------------------------------------------
/python/setup.py:
--------------------------------------------------------------------------------
 1 | from setuptools import setup
 2 | 
 3 | with open('requirements.txt') as f:
 4 |     required = f.read().splitlines()
 5 | 
 6 | setup(
 7 |     name='nexus',
 8 |     packages=['nexus'],
 9 |     include_package_data=True,
10 |     install_requires=required,
11 | )
12 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
 1 | *~
 2 | *.o
 3 | *.pyc
 4 | *.d
 5 | .DS_Store*
 6 | *.swp
 7 | 
 8 | obj/
 9 | lib/
10 | bin/
11 | build/
12 | build-dep-src/
13 | build-dep-install/
14 | python/nexus/proto/
15 | python/nexus.egg-info/
16 | 
17 | cmake-build*/
18 | .clion.source.upload.marker
19 | .clangd/
20 | compile_commands.json
21 | 


--------------------------------------------------------------------------------
/src/nexus/common/buffer.cpp:
--------------------------------------------------------------------------------
 1 | #include "nexus/common/buffer.h"
 2 | #include <glog/logging.h>
 3 | 
 4 | namespace nexus {
 5 | std::shared_ptr<Buffer> Buffer::Slice(size_t offset, size_t nbytes) {
 6 |     CHECK_LE(offset + nbytes, nbytes_) << "Slice exceeds buffer boundary";
 7 |     return std::shared_ptr<Buffer>(new Buffer(
 8 |             shared_from_this(), offset, nbytes));
 9 | }
10 | }
11 | 


--------------------------------------------------------------------------------
/tests/cpp/test_main.cpp:
--------------------------------------------------------------------------------
 1 | #include <gflags/gflags.h>
 2 | #include <glog/logging.h>
 3 | #include <gtest/gtest.h>
 4 | 
 5 | DECLARE_string(model_root);
 6 | 
 7 | int main(int argc, char ** argv) {
 8 |   testing::InitGoogleTest(&argc, argv);
 9 |   testing::FLAGS_gtest_death_test_style = "threadsafe";
10 |   google::ParseCommandLineFlags(&argc, &argv, true);
11 |   return RUN_ALL_TESTS();
12 | }
13 | 


--------------------------------------------------------------------------------
/src/nexus/common/config.h:
--------------------------------------------------------------------------------
 1 | #ifndef NEXUS_CONFIG_H_
 2 | #define NEXUS_CONFIG_H_
 3 | 
 4 | #define BACKEND_DEFAULT_PORT       8001
 5 | #define BACKEND_DEFAULT_RPC_PORT   8002
 6 | #define FRONTEND_DEFAULT_PORT      9001
 7 | #define FRONTEND_DEFAULT_RPC_PORT  9002
 8 | #define SCHEDULER_DEFAULT_PORT     10001
 9 | #define BEACON_INTERVAL_SEC        2
10 | #define EPOCH_INTERVAL_SEC         10
11 | 
12 | #endif // NEXUS_CONFIG_H_
13 | 


--------------------------------------------------------------------------------
/src/nexus/common/image.h:
--------------------------------------------------------------------------------
 1 | #ifndef NEXUS_COMMON_IMAGE_H_
 2 | #define NEXUS_COMMON_IMAGE_H_
 3 | 
 4 | #include <opencv2/core/core.hpp>
 5 | 
 6 | #include "nexus/proto/nnquery.pb.h"
 7 | 
 8 | namespace nexus {
 9 | 
10 | enum ChannelOrder {
11 |   CO_RGB = 0,
12 |   CO_BGR = 1,
13 | };
14 | 
15 | cv::Mat _Hack_DecodeImageByFilename(const ImageProto &image,
16 |                                     ChannelOrder order);
17 | 
18 | cv::Mat DecodeImage(const ImageProto &image, ChannelOrder order);
19 | 
20 | } // namespace nexus
21 | 
22 | #endif // NEXUS_COMMON_IMAGE_H_
23 | 


--------------------------------------------------------------------------------
/.gitmodules:
--------------------------------------------------------------------------------
 1 | # [submodule "darknet"]
 2 | # 	path = frameworks/darknet
 3 | # 	url = https://github.com/icemelon9/darknet.git
 4 | # 	branch = nexus
 5 | # [submodule "caffe"]
 6 | # 	path = frameworks/caffe
 7 | # 	url = https://github.com/icemelon9/caffe.git
 8 | # 	branch = nexus
 9 | # [submodule "frameworks/caffe2"]
10 | # 	path = frameworks/caffe2
11 | # 	url = https://github.com/icemelon9/caffe2.git
12 | # 	branch = nexus
13 | [submodule "frameworks/caffe2"]
14 | 	path = frameworks/caffe2
15 | 	url = https://github.com/abcdabcd987/caffe2-nexus.git
16 | 	branch = nexus
17 | 


--------------------------------------------------------------------------------
/src/nexus/app/user_session.h:
--------------------------------------------------------------------------------
 1 | #ifndef NEXUS_APP_USER_SESSION_H_
 2 | #define NEXUS_APP_USER_SESSION_H_
 3 | 
 4 | #include "nexus/common/connection.h"
 5 | 
 6 | namespace nexus {
 7 | namespace app {
 8 | 
 9 | class UserSession : public Connection {
10 |  public:
11 |   UserSession(boost::asio::ip::tcp::socket socket, MessageHandler* handler) :
12 |       Connection(std::move(socket), handler), user_id_(0) {}
13 | 
14 |   uint32_t user_id() const { return user_id_; }
15 | 
16 |   void set_user_id(uint32_t user_id) { user_id_ = user_id; }
17 | 
18 |  private:
19 |   uint32_t user_id_;
20 | };
21 | 
22 | } // namespace app
23 | } // namespace nexus
24 | 
25 | #endif // NEXUS_APP_USER_SESSION_H_
26 | 


--------------------------------------------------------------------------------
/src/nexus/backend/utils.h:
--------------------------------------------------------------------------------
 1 | #ifndef NEXUS_BACKEND_UTILS_H_
 2 | #define NEXUS_BACKEND_UTILS_H_
 3 | 
 4 | #include <unordered_map>
 5 | 
 6 | #include "nexus/proto/nnquery.pb.h"
 7 | 
 8 | namespace nexus {
 9 | namespace backend {
10 | 
11 | void LoadClassnames(const std::string& filepath,
12 |                     std::unordered_map<int, std::string>* classnames);
13 | 
14 | void PostprocessClassification(
15 |     const QueryProto& query, const float* prob, size_t nprobs,
16 |     QueryResultProto* result,
17 |     const std::unordered_map<int, std::string>* classnames = nullptr);
18 |                                
19 | 
20 | } // namespace backend
21 | } // namespace nexus
22 | 
23 | #endif // NEXUS_BACKEND_UTILS_H_
24 | 


--------------------------------------------------------------------------------
/src/nexus/common/util.h:
--------------------------------------------------------------------------------
 1 | #ifndef NEXUS_COMMON_UTIL_H_
 2 | #define NEXUS_COMMON_UTIL_H_
 3 | 
 4 | #include <gflags/gflags.h>
 5 | #include <string>
 6 | 
 7 | #include "nexus/common/device.h"
 8 | 
 9 | DECLARE_bool(hack_reply_omit_output);
10 | 
11 | namespace nexus {
12 | 
13 | void SplitString(const std::string &str, char delim,
14 |                  std::vector<std::string> *tokens);
15 | 
16 | void Memcpy(void *dst, const Device *dst_device, const void *src,
17 |             const Device *src_device, size_t nbytes);
18 | 
19 | // GetIpAddress returns the first IP addres that is not localhost (127.0.0.1)
20 | std::string GetIpAddress(const std::string &prefix);
21 | 
22 | } // namespace nexus
23 | 
24 | #endif // NEXUS_COMMON_UTIL_H_
25 | 


--------------------------------------------------------------------------------
/src/nexus/app/worker.h:
--------------------------------------------------------------------------------
 1 | #ifndef NEXUS_APP_WORKER_H_
 2 | #define NEXUS_APP_WORKER_H_
 3 | 
 4 | #include <atomic>
 5 | #include <thread>
 6 | 
 7 | #include "nexus/app/query_processor.h"
 8 | #include "nexus/app/request_context.h"
 9 | 
10 | namespace nexus {
11 | namespace app {
12 | 
13 | class Frontend;
14 | 
15 | class Worker {
16 |  public:
17 |   Worker(QueryProcessor* qp, RequestPool& req_pool);
18 | 
19 |   void Start();
20 | 
21 |   void Stop();
22 | 
23 |   void Join();
24 | 
25 |   void Run();
26 | 
27 |  private:
28 |   QueryProcessor* qp_;
29 |   RequestPool& req_pool_;
30 |   volatile std::atomic_bool running_;
31 |   std::thread thread_;
32 | };
33 | 
34 | } // namespace app
35 | } // namespace nexus
36 | 
37 | #endif // NEXUS_APP_MESSAGE_PROCESSOR_H_
38 | 


--------------------------------------------------------------------------------
/src/nexus/app/rpc_service.h:
--------------------------------------------------------------------------------
 1 | #ifndef NEXUS_APP_RPC_SERVICE_H_
 2 | #define NEXUS_APP_RPC_SERVICE_H_
 3 | 
 4 | #include <grpc++/grpc++.h>
 5 | 
 6 | #include "nexus/common/rpc_call.h"
 7 | #include "nexus/common/rpc_service_base.h"
 8 | #include "nexus/proto/control.grpc.pb.h"
 9 | 
10 | namespace nexus {
11 | namespace app {
12 | 
13 | using AsyncService = nexus::FrontendCtrl::AsyncService;
14 | 
15 | class Frontend;
16 | 
17 | class RpcService : public AsyncRpcServiceBase<AsyncService> {
18 |  public:
19 |   RpcService(Frontend* frontend, std::string port, size_t nthreads = 1);
20 | 
21 |  protected:
22 |   void HandleRpcs() final;
23 | 
24 |  private:
25 |   Frontend* frontend_;
26 | };
27 | 
28 | } // namespace app
29 | } // namespace nexus
30 | 
31 | #endif // NEXUS_APP_RPC_SERVICE_H_
32 | 


--------------------------------------------------------------------------------
/src/nexus/backend/rpc_service.h:
--------------------------------------------------------------------------------
 1 | #ifndef NEXUS_BACKEND_RPC_SERVICE_H_
 2 | #define NEXUS_BACKEND_RPC_SERVICE_H_
 3 | 
 4 | #include <grpc++/grpc++.h>
 5 | 
 6 | #include "nexus/common/rpc_service_base.h"
 7 | #include "nexus/proto/control.grpc.pb.h"
 8 | 
 9 | namespace nexus {
10 | namespace backend {
11 | 
12 | using AsyncService = nexus::BackendCtrl::AsyncService;
13 | 
14 | class BackendServer;
15 | 
16 | class BackendRpcService : public AsyncRpcServiceBase<AsyncService> {
17 |  public:
18 |   BackendRpcService(BackendServer* backend, std::string port,
19 |                     size_t nthreads = 1);
20 | 
21 |  protected:
22 |   void HandleRpcs() final;
23 | 
24 |  private:
25 |   BackendServer* backend_;
26 | };
27 | 
28 | } // namespace backend
29 | } // namespace nexus
30 | 
31 | #endif // NEXUS_BACKEND_RPC_SERVICE_H_
32 | 


--------------------------------------------------------------------------------
/src/nexus/common/spinlock.h:
--------------------------------------------------------------------------------
 1 | #ifndef NEXUS_COMMON_SPINLOCK_H_
 2 | #define NEXUS_COMMON_SPINLOCK_H_
 3 | 
 4 | #include <atomic>
 5 | 
 6 | namespace nexus {
 7 | 
 8 | class Spinlock {
 9 |  public:
10 |   Spinlock(): flag_(ATOMIC_FLAG_INIT) {}
11 | 
12 |   inline void Acquire() {
13 |     while (flag_.test_and_set(std::memory_order_acquire))
14 |       ; // spin
15 |   }
16 | 
17 |   inline void Release() {
18 |     flag_.clear(std::memory_order_release);
19 |   }
20 | 
21 |  private:
22 |   std::atomic_flag flag_;
23 | };
24 | 
25 | class SpinlockGuard {
26 |  public:
27 |   SpinlockGuard(Spinlock& lock): lock_(lock) {
28 |     lock.Acquire();
29 |   }
30 | 
31 |   ~SpinlockGuard() {
32 |     lock_.Release();
33 |   }
34 | 
35 |  private:
36 |   Spinlock& lock_;
37 | };  
38 | 
39 | } // namespace nexus
40 | 
41 | #endif // NEXUS_COMMON_SPINLOCK_H_
42 | 


--------------------------------------------------------------------------------
/tests/python/test_client.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import random
 3 | import nexus
 4 | 
 5 | _test_data = os.path.abspath(os.path.join(__file__, '../../data'))
 6 | 
 7 | service_addr = "127.0.0.1:9001"
 8 | 
 9 | def load_images(root):
10 |     images = {}
11 |     for fn in os.listdir(root):
12 |         with open(os.path.join(root, fn), 'rb') as f:
13 |             im = f.read()
14 |             images[fn] = im
15 |     return images
16 | 
17 | 
18 | def test_client():
19 |     user_id = random.randint(1, 1000000000)
20 |     client = nexus.Client(service_addr, user_id)
21 |     images = load_images(os.path.join(_test_data, 'images'))
22 |     for fn in images:
23 |         reply = client.request(images[fn])
24 |         print(fn)
25 |         print(reply)
26 | 
27 | 
28 | if __name__ == "__main__":
29 |     print("Test client...")
30 |     test_client()
31 | 


--------------------------------------------------------------------------------
/src/nexus/scheduler/scheduler_main.cpp:
--------------------------------------------------------------------------------
 1 | #include <gflags/gflags.h>
 2 | 
 3 | #include "nexus/common/config.h"
 4 | #include "nexus/common/util.h"
 5 | #include "nexus/scheduler/scheduler.h"
 6 | 
 7 | using namespace nexus::scheduler;
 8 | 
 9 | DEFINE_string(port, "10001", "RPC port");
10 | DEFINE_string(workload, "", "Static workload config file");
11 | 
12 | int main(int argc, char** argv) {
13 |   // Init glog
14 |   google::InitGoogleLogging(argv[0]);
15 |   // Parse command line flags
16 |   google::ParseCommandLineFlags(&argc, &argv, true);
17 |   // Setup backtrace on segfault
18 |   google::InstallFailureSignalHandler();
19 |   // Create scheduler
20 |   Scheduler scheduler(FLAGS_port, 4);
21 |   if (FLAGS_workload.length() > 0) {
22 |     scheduler.LoadWorkloadFile(FLAGS_workload);
23 |   }
24 |   scheduler.Run();
25 |   while (true) {
26 |     ;
27 |   }
28 | }
29 | 


--------------------------------------------------------------------------------
/src/nexus/app/worker.cpp:
--------------------------------------------------------------------------------
 1 | #include "nexus/app/frontend.h"
 2 | #include "nexus/app/worker.h"
 3 | 
 4 | namespace nexus {
 5 | namespace app {
 6 | 
 7 | Worker::Worker(QueryProcessor* qp, RequestPool& req_pool) :
 8 |     qp_(qp),
 9 |     req_pool_(req_pool),
10 |     running_(false) {
11 | }
12 | 
13 | void Worker::Start() {
14 |   running_ = true;
15 |   thread_ = std::thread(&Worker::Run, this);
16 | }
17 | 
18 | void Worker::Stop() {
19 |   running_ = false;
20 | }
21 | 
22 | void Worker::Join() {
23 |   if (thread_.joinable()) {
24 |     thread_.join();
25 |   }
26 | }
27 | 
28 | void Worker::Run() {
29 |   auto timeout = std::chrono::milliseconds(50);
30 |   while (running_) {
31 |     auto req = req_pool_.GetRequest(timeout);
32 |     if (req == nullptr) {
33 |       continue;
34 |     }
35 |     qp_->Process(req);
36 |   }
37 | }
38 | 
39 | } // namespace app
40 | } // namespace nexus
41 | 


--------------------------------------------------------------------------------
/src/nexus/backend/worker.h:
--------------------------------------------------------------------------------
 1 | #ifndef NEXUS_BACKEND_WORKER_H_
 2 | #define NEXUS_BACKEND_WORKER_H_
 3 | 
 4 | #include <memory>
 5 | #include <string>
 6 | #include <thread>
 7 | 
 8 | #include "nexus/common/block_queue.h"
 9 | #include "nexus/backend/task.h"
10 | 
11 | namespace nexus {
12 | namespace backend {
13 | 
14 | class BackendServer;
15 | 
16 | class Worker {
17 |  public:
18 |   Worker(int index, BackendServer* server,
19 |          BlockPriorityQueue<Task>& task_queue);
20 | 
21 |   void Start(int core = -1);
22 | 
23 |   void Stop();
24 | 
25 |   void Run();
26 | 
27 |  private:
28 |   void Process(std::shared_ptr<Task> task);
29 | 
30 |   void SendReply(std::shared_ptr<Task> task);
31 | 
32 |  private:
33 |   int index_;
34 |   BackendServer* server_;
35 |   BlockPriorityQueue<Task>& task_queue_;
36 |   volatile bool running_;
37 |   std::thread thread_;
38 | };
39 | 
40 | } // namespace backend
41 | } // namespace nexus
42 | 
43 | #endif // NEXUS_BACKEND_WORKER_H_
44 | 


--------------------------------------------------------------------------------
/examples/simple_app/src/client.py:
--------------------------------------------------------------------------------
 1 | import asyncio
 2 | import argparse
 3 | import random
 4 | import sys
 5 | 
 6 | import nexus
 7 | 
 8 | 
 9 | def read_image(img):
10 |     if img == "-":
11 |         return sys.stdin.buffer.read()
12 |     with open(img, "rb") as f:
13 |         return f.read()
14 | 
15 | 
16 | async def query(server, image):
17 |     user_id = random.randint(0, 2 ** 31 - 1)
18 |     async with nexus.AsyncClient(server, user_id) as client:
19 |         _send_time, _recv_time, reply = await client.request(image)
20 |     print(reply)
21 | 
22 | 
23 | def main():
24 |     parser = argparse.ArgumentParser()
25 |     parser.add_argument("image", help="Path to image file. `-` to read from stdin.")
26 |     parser.add_argument("--server", help="Frontend server", default="localhost:9001")
27 |     args = parser.parse_args()
28 | 
29 |     image = read_image(args.image)
30 |     asyncio.run(query(args.server, image))
31 | 
32 | 
33 | if __name__ == "__main__":
34 |     main()
35 | 


--------------------------------------------------------------------------------
/tests/data/model_db/profiles/TITAN_X_(Pascal)/darknet:yolo9000:1.txt:
--------------------------------------------------------------------------------
 1 | darknet:yolo9000:1:480x480
 2 | TITAN_X_(Pascal)
 3 | Forward latency
 4 | batch,latency(us),std(us),memory(B)
 5 | 1,21675.7,168.307,1069481984
 6 | 2,41176.2,387.454,1566507008
 7 | 3,60536,274.659,2088697856
 8 | 4,80793.9,544.708,2564751360
 9 | 5,98803,1305.68,3078553600
10 | 6,117068,849.329,3552509952
11 | 7,136328,1503.97,4053729280
12 | 8,155704,834.623,4536074240
13 | 9,174362,610.383,4915658752
14 | 10,194981,2471.87,5463015424
15 | 11,213786,1710.47,5913903104
16 | 12,234452,2751.93,6436093952
17 | 13,252422,1663.66,6857621504
18 | 14,273129,2958.86,7402881024
19 | 15,293524,3428.02,7853768704
20 | 16,311602,3175.26,8386445312
21 | 17,334098,2368.35,8774418432
22 | 18,350095,2430.07,9248374784
23 | 19,369662,1933.19,9772662784
24 | 20,390291,1212.97,10246619136
25 | 21,411059,2740.85,10714284032
26 | 22,428823,1291.49,11190337536
27 | 23,449042,2275.25,11720916992
28 | 24,467635,1058.77,12194873344
29 | Preprocess latency
30 | mean(us),std(us)
31 | 9480.56,7046.88
32 | Postprocess latency
33 | mean(us),std(us)
34 | 47495.1,3389.64
35 | 


--------------------------------------------------------------------------------
/src/nexus/backend/backup_client.h:
--------------------------------------------------------------------------------
 1 | #ifndef NEXUS_BACKEND_BACKUP_CLIENT_H_
 2 | #define NEXUS_BACKEND_BACKUP_CLIENT_H_
 3 | 
 4 | #include <atomic>
 5 | #include <grpc++/grpc++.h>
 6 | 
 7 | #include "nexus/backend/task.h"
 8 | #include "nexus/common/backend_pool.h"
 9 | 
10 | namespace nexus {
11 | namespace backend {
12 | 
13 | class BackupClient : public BackendSession {
14 |  public:
15 |   explicit BackupClient(const BackendInfo& info,
16 |                         boost::asio::io_context& io_context,
17 |                         MessageHandler* handler);
18 | 
19 |   void Forward(std::shared_ptr<Task> task);
20 | 
21 |   void Reply(std::shared_ptr<Message> message);
22 | 
23 |  private:
24 |   /*! \brief Map from task id to frontend connection. Guarded by relay_mu_. */
25 |   std::unordered_map<uint64_t, std::shared_ptr<Connection> > conns_;
26 |   /*! \brief Map from task id to query id. Guarded by relay_mu_. */
27 |   std::unordered_map<uint64_t, uint64_t> qid_lookup_;
28 |   std::mutex relay_mu_;
29 | };
30 | 
31 | } // namespace backend
32 | } // namespace nexus
33 | 
34 | #endif // NEXUS_BACKEND_BACKUP_CLIENT_H_
35 | 


--------------------------------------------------------------------------------
/src/nexus/app/exec_block.h:
--------------------------------------------------------------------------------
 1 | #ifndef NEXUS_APP_EXEC_BLOCK_H_
 2 | #define NEXUS_APP_EXEC_BLOCK_H_
 3 | 
 4 | #include <memory>
 5 | #include <unordered_set>
 6 | 
 7 | #include "nexus/app/model_handler.h"
 8 | #include "nexus/app/request_context.h"
 9 | 
10 | namespace nexus {
11 | namespace app {
12 | 
13 | using ExecFunc = std::function<std::vector<VariablePtr>(
14 |     std::shared_ptr<RequestContext> ctx)>;
15 | 
16 | class ExecBlock {
17 |  public:
18 |   ExecBlock(int id, ExecFunc func, std::vector<std::string> required_vars) :
19 |       id_(id),
20 |       func_(func) {
21 |     for (auto var_name : required_vars) {
22 |       dependency_.insert(var_name);
23 |     }
24 |   }
25 | 
26 |   int id() const { return id_; }
27 | 
28 |   std::unordered_set<std::string> dependency() const { return dependency_; }
29 | 
30 |   std::vector<VariablePtr> Run(std::shared_ptr<RequestContext> ctx) {
31 |     return func_(ctx);
32 |   }
33 | 
34 |  private:
35 |   int id_;
36 |   ExecFunc func_;
37 |   std::unordered_set<std::string> dependency_;
38 | };
39 | 
40 | } // namespace app
41 | } // namespace nexus
42 | 
43 | #endif // NEXUS_APP_EXEC_BLOCK_H_
44 | 


--------------------------------------------------------------------------------
/src/nexus/app/rpc_service.cpp:
--------------------------------------------------------------------------------
 1 | #include "nexus/app/frontend.h"
 2 | #include "nexus/app/rpc_service.h"
 3 | 
 4 | namespace nexus {
 5 | namespace app {
 6 | 
 7 | INSTANTIATE_RPC_CALL(AsyncService, UpdateModelRoutes, ModelRouteUpdates,
 8 |                      RpcReply);
 9 | INSTANTIATE_RPC_CALL(AsyncService, CheckAlive, CheckAliveRequest, RpcReply);
10 | 
11 | RpcService::RpcService(Frontend* frontend, std::string port, size_t nthreads):
12 |     AsyncRpcServiceBase(port, nthreads),
13 |     frontend_(frontend) {
14 | }
15 | 
16 | void RpcService::HandleRpcs() {
17 |   new UpdateModelRoutes_Call(
18 |       &service_, cq_.get(),
19 |       [this](const grpc::ServerContext&, const ModelRouteUpdates& req,
20 |              RpcReply* reply) {
21 |         frontend_->UpdateModelRoutes(req, reply);
22 |       });
23 |   new CheckAlive_Call(
24 |       &service_, cq_.get(),
25 |       [](const grpc::ServerContext&, const CheckAliveRequest&,
26 |          RpcReply* reply) {
27 |         reply->set_status(CTRL_OK);
28 |       });
29 |   void* tag;
30 |   bool ok;
31 |   while (running_) {
32 |     cq_->Next(&tag, &ok);
33 |     if (ok) {
34 |       static_cast<RpcCallBase*>(tag)->Proceed();
35 |     }
36 |   }
37 | }
38 | 
39 | } // namespace app
40 | } // namespace nexus
41 | 


--------------------------------------------------------------------------------
/src/nexus/common/server_base.h:
--------------------------------------------------------------------------------
 1 | #ifndef NEXUS_COMMON_SERVER_BASE_H_
 2 | #define NEXUS_COMMON_SERVER_BASE_H_
 3 | 
 4 | #include <boost/asio.hpp>
 5 | #include <memory>
 6 | #include <string>
 7 | 
 8 | namespace nexus {
 9 | 
10 | class ServerBase {
11 |  public:
12 |   // Disable copy
13 |   ServerBase(const ServerBase&) = delete;
14 |   ServerBase& operator=(const ServerBase&) = delete;
15 |   // Construct the server given port.
16 |   ServerBase(std::string port);
17 |   // Construct the server given the IP address and port.
18 |   ServerBase(std::string ip, std::string port);
19 |   // Get the server address
20 |   std::string address() const { return ip_ + ":" + port_; }
21 |   // Get listening port
22 |   std::string port() const { return port_; }
23 |   // Start the server.
24 |   virtual void Run();
25 |   // Hanlde a stop operation.
26 |   virtual void Stop();
27 |  protected:
28 |   // Asynchronously wait an accept request.
29 |   void DoAccept();
30 |   // Asynchronously wait a stop request.
31 |   void DoAwaitStop();
32 |   // Handle an accept operation.
33 |   virtual void HandleAccept() = 0;
34 |   // data fields
35 |   std::string ip_;
36 |   std::string port_;
37 |   boost::asio::io_context io_context_;
38 |   boost::asio::signal_set signals_;
39 |   boost::asio::ip::tcp::acceptor acceptor_;
40 |   boost::asio::ip::tcp::socket socket_;
41 | };
42 | 
43 | } // namespace nexus
44 | 
45 | #endif // NEXUS_COMMON_SERVER_BASE_H_
46 | 


--------------------------------------------------------------------------------
/src/nexus/backend/tf_share_model.h:
--------------------------------------------------------------------------------
 1 | #ifndef NEXUS_TFSHAREMODEL_H
 2 | #define NEXUS_TFSHAREMODEL_H
 3 | 
 4 | #include <mutex>
 5 | #include <unordered_set>
 6 | #include "nexus/backend/model_ins.h"
 7 | 
 8 | namespace nexus {
 9 | namespace backend {
10 | 
11 | class TensorflowModel;
12 | 
13 | class TFShareModel : public ModelInstance {
14 |  public:
15 |   void set_batch(size_t batch) override;
16 |   Shape InputShape() override;
17 |   std::unordered_map<std::string, Shape> OutputShapes() override;
18 |   ArrayPtr CreateInputGpuArray() override;
19 |   std::unordered_map<std::string, ArrayPtr> GetOutputGpuArrays() override;
20 |   void Preprocess(std::shared_ptr<Task> task) override;
21 |   void Forward(std::shared_ptr<BatchTask> batch_task) override;
22 |   void Postprocess(std::shared_ptr<Task> task) override;
23 | 
24 |   TFShareModel(int gpu_id, const ModelInstanceConfig& config);
25 |   bool AddModelSession(const ModelSession& model_sess);
26 |   bool RemoveModelSession(const ModelSession& model_sess);
27 |   size_t num_model_sessions();
28 | 
29 |  private:
30 |   size_t num_suffixes_;
31 |   std::shared_ptr<TFShareInfo> tf_share_info_;
32 |   std::unique_ptr<TensorflowModel> tf_model_;
33 |   std::mutex loaded_suffixes_mutex_;
34 |   std::unordered_set<std::string> loaded_suffixes_;
35 |   std::unordered_map<std::string, std::unordered_map<int, std::string>> classnames_;
36 | };
37 | 
38 | }
39 | }
40 | 
41 | #endif //NEXUS_TFSHAREMODEL_H
42 | 


--------------------------------------------------------------------------------
/src/nexus/app/app_base.h:
--------------------------------------------------------------------------------
 1 | #ifndef NEXUS_APP_APP_BASE_H_
 2 | #define NEXUS_APP_APP_BASE_H_
 3 | 
 4 | #include <gflags/gflags.h>
 5 | #include "nexus/app/frontend.h"
 6 | 
 7 | DECLARE_int32(load_balance);
 8 | 
 9 | namespace nexus {
10 | namespace app {
11 | 
12 | class AppBase : public Frontend {
13 |  public:
14 |   AppBase(const std::string& port,
15 |           const std::string& rpc_port,
16 |           const std::string& sch_addr,
17 |           size_t nthreads);
18 | 
19 |   ~AppBase() override;
20 | 
21 |   void Start();
22 | 
23 |   virtual void Setup() {}
24 | 
25 |   bool IsComplexQuery() const;
26 | 
27 |   void ComplexQuerySetup(const std::string &cq_name, uint32_t slo_us, uint32_t step_us);
28 | 
29 |   void ComplexQueryAddEdge(const std::shared_ptr<ModelHandler>& source,
30 |                            const std::shared_ptr<ModelHandler>& target);
31 | 
32 |  protected:
33 |   std::shared_ptr<ModelHandler> GetModelHandler(
34 |       const std::string& framework, const std::string& model_name,
35 |       uint32_t version, uint64_t latency_sla, float estimate_workload=0.,
36 |       std::vector<uint32_t> image_size={},
37 |       LoadBalancePolicy lb_policy=LoadBalancePolicy(FLAGS_load_balance));
38 |   size_t nthreads_;
39 |   QueryProcessor* qp_;
40 | 
41 |   std::string cq_id_;
42 |   uint32_t slo_us_;
43 |   uint32_t step_us_;
44 | };
45 | 
46 | void LaunchApp(AppBase* app);
47 | 
48 | } // namespace app
49 | } // namespace nexus
50 | 
51 | #endif // NEXUS_APP_APP_BASE_H_
52 | 


--------------------------------------------------------------------------------
/tests/data/model_db/db/model_db.yml:
--------------------------------------------------------------------------------
 1 | models:
 2 |   - framework: darknet
 3 |     model_name: yolo9000
 4 |     type: detection
 5 |     version: 1
 6 |     cfg_file: darknet/yolo9000.cfg
 7 |     weight_file: darknet/yolo9000.weights
 8 |     class_names: darknet/data/9k.names
 9 |     resizable: true
10 |     image_height: 480
11 |     image_width: 480
12 |   - framework: darknet
13 |     model_name: darknet
14 |     type: classification
15 |     version: 1
16 |     cfg_file: darknet/darknet.cfg
17 |     weight_file: darknet/darknet.weights
18 |     class_name: darknet/data/imagenet.shortnames.list
19 |   - framework: caffe
20 |     model_name: vgg_face
21 |     type: classification
22 |     version: 1
23 |     cfg_file: caffe/vgg_face/1/VGG_FACE_deploy.prototxt
24 |     weight_file: caffe/vgg_face/1/VGG_FACE.caffemodel
25 |     class_names: caffe/vgg_face/names.txt
26 |     mean_value: [99.5503, 115.7630, 151.2761]
27 |   - framework: caffe
28 |     model_name: vgg_s
29 |     type: classification
30 |     version: 1
31 |     cfg_file: caffe/vgg_s/1/vgg_s.prototxt
32 |     weight_file: caffe/vgg_s/1/vgg_s.caffemodel
33 |     class_names: caffe/vgg_s/synset_words.txt
34 |     mean_file: caffe/vgg_s/1/vgg_s_mean.binaryproto
35 |   - framework: caffe
36 |     model_name: vgg16
37 |     type: classification
38 |     version: 1
39 |     cfg_file: caffe/vgg16/1/vgg16.prototxt
40 |     weight_file: caffe/vgg16/1/vgg16.caffemodel
41 |     class_names: caffe/vgg16/synset_words.txt
42 |     mean_value: [103.939, 116.779, 123.68]
43 | 


--------------------------------------------------------------------------------
/src/nexus/scheduler/frontend_delegate.h:
--------------------------------------------------------------------------------
 1 | #ifndef NEXUS_SCHEDULER_FRONTEND_DELEGATE_H_
 2 | #define NEXUS_SCHEDULER_FRONTEND_DELEGATE_H_
 3 | 
 4 | #include <chrono>
 5 | #include <grpc++/grpc++.h>
 6 | #include <mutex>
 7 | #include <unordered_map>
 8 | #include <unordered_set>
 9 | 
10 | #include "nexus/proto/control.grpc.pb.h"
11 | 
12 | namespace nexus {
13 | namespace scheduler {
14 | 
15 | class Scheduler;
16 | 
17 | class FrontendDelegate {
18 |  public:
19 |   FrontendDelegate(uint32_t node_id, const std::string& ip,
20 |                    const std::string& server_port, const std::string& rpc_addr,
21 |                    int beacon_sec);
22 | 
23 |   uint32_t node_id() const { return node_id_; }
24 | 
25 |   std::time_t LastAliveTime();
26 | 
27 |   void Tick();
28 | 
29 |   bool IsAlive();
30 | 
31 |   void SubscribeModel(const std::string& model_session_id);
32 | 
33 |   const std::unordered_set<std::string>& subscribe_models() const {
34 |     return subscribe_models_;
35 |   }
36 | 
37 |   CtrlStatus UpdateModelRoutesRpc(const ModelRouteUpdates& request);
38 | 
39 |  private:
40 |   uint32_t node_id_;
41 |   std::string ip_;
42 |   std::string server_port_;
43 |   std::string rpc_port_;
44 |   int beacon_sec_;
45 |   long timeout_ms_;
46 |   std::unique_ptr<FrontendCtrl::Stub> stub_;
47 |   std::chrono::time_point<std::chrono::system_clock> last_time_;
48 |   std::unordered_set<std::string> subscribe_models_;
49 | };
50 | 
51 | } // namespace scheduler
52 | } // namespace nexus
53 | 
54 | #endif // NEXUS_SCHEDULER_FRONTEND_DELEGATE_H_
55 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | Copyright (c) 2018-2020 University of Washington All rights reserved.
 2 | 
 3 | Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met:
 4 | 
 5 | Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer.
 6 | 
 7 | Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution.
 8 | 
 9 | Neither the name of the University of Washington nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission.
10 | 
11 | THIS SOFTWARE IS PROVIDED BY THE UNIVERSITY OF WASHINGTON AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY OF WASHINGTON OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
12 | 
13 | 
14 | 


--------------------------------------------------------------------------------
/src/nexus/common/server_base.cpp:
--------------------------------------------------------------------------------
 1 | #include <glog/logging.h>
 2 | #include <signal.h>
 3 | 
 4 | #include "nexus/common/server_base.h"
 5 | 
 6 | namespace nexus {
 7 | 
 8 | ServerBase::ServerBase(std::string port) :
 9 |     ServerBase("0.0.0.0", port) {
10 | }
11 | 
12 | ServerBase::ServerBase(std::string ip, std::string port)
13 |     : ip_(ip),
14 |       port_(port),
15 |       io_context_(),
16 |       signals_(io_context_),
17 |       acceptor_(io_context_),
18 |       socket_(io_context_) {
19 |   // handle stop signal
20 |   signals_.add(SIGINT);
21 |   signals_.add(SIGTERM);
22 | 
23 |   DoAwaitStop();
24 | 
25 |   boost::asio::ip::tcp::resolver resolver(io_context_);
26 |   boost::asio::ip::tcp::endpoint endpoint = *resolver.resolve({ip, port});
27 |   acceptor_.open(endpoint.protocol());
28 |   acceptor_.set_option(boost::asio::ip::tcp::acceptor::reuse_address(true));
29 |   acceptor_.bind(endpoint);
30 |   acceptor_.listen();
31 | 
32 |   DoAccept();
33 | }
34 | 
35 | void ServerBase::Run() {
36 |   io_context_.run();
37 | }
38 | 
39 | void ServerBase::Stop() {
40 |   acceptor_.close();
41 | }
42 | 
43 | void ServerBase::DoAccept() {
44 |   acceptor_.async_accept(
45 |       socket_,
46 |       [this](boost::system::error_code ec){
47 |         if (!acceptor_.is_open()) {
48 |           return;
49 |         }
50 |         if (!ec) {
51 |           HandleAccept();
52 |         }
53 |         DoAccept();
54 |       });
55 | }
56 | 
57 | void ServerBase::DoAwaitStop() {
58 |   signals_.async_wait(
59 |       [this](boost::system::error_code /*ec*/, int /*signo*/) {
60 |         Stop();
61 |       });
62 | }
63 | 
64 | } // namespace nexus
65 | 


--------------------------------------------------------------------------------
/src/nexus/backend/darknet_model.h:
--------------------------------------------------------------------------------
 1 | #ifndef NEXUS_BACKEND_DARKNET_MODEL_H_
 2 | #define NEXUS_BACKEND_DARKNET_MODEL_H_
 3 | 
 4 | #ifdef USE_DARKNET
 5 | 
 6 | #include <memory>
 7 | #include <string>
 8 | 
 9 | #include "nexus/backend/model_ins.h"
10 | // Darknet headers
11 | extern "C" {
12 | #include "darknet_server.h"
13 | }
14 | 
15 | namespace nexus {
16 | namespace backend {
17 | 
18 | class DarknetModel : public ModelInstance {
19 |  public:
20 |   DarknetModel(int gpu_id, const ModelInstanceConfig& config);
21 | 
22 |   ~DarknetModel();
23 | 
24 |   Shape InputShape() final;
25 |   
26 |   std::unordered_map<std::string, Shape> OutputShapes() final;
27 | 
28 |   ArrayPtr CreateInputGpuArray() final;
29 | 
30 |   std::unordered_map<std::string, ArrayPtr> GetOutputGpuArrays() final;
31 | 
32 |   void Preprocess(std::shared_ptr<Task> task) final;
33 | 
34 |   void Forward(std::shared_ptr<BatchTask> batch_task) final;
35 | 
36 |   void Postprocess(std::shared_ptr<Task> task) final;
37 | 
38 |  private:
39 |   void MarshalDetectionResult(
40 |       const QueryProto& query, const float* probs, size_t nprobs,
41 |       const int* boxes, size_t nboxes, QueryResultProto* result);
42 | 
43 |   network* net_;
44 |   int image_height_;
45 |   int image_width_;
46 |   Shape input_shape_;
47 |   Shape output_shape_;
48 |   size_t input_size_;
49 |   size_t output_size_;
50 |   size_t output_layer_id_;
51 |   std::string output_name_;
52 |   std::unordered_map<int, std::string> classnames_;
53 |   bool first_input_array_;
54 | };
55 | 
56 | } // namespace backend
57 | } // namespace nexus
58 | 
59 | #endif // USE_DARKNET
60 | 
61 | #endif // NEXUS_BACKEND_DARKNET_MODEL_H_
62 | 


--------------------------------------------------------------------------------
/examples/obj_rec/src/obj_rec.cpp:
--------------------------------------------------------------------------------
 1 | #include <gflags/gflags.h>
 2 | 
 3 | #include "nexus/app/app_base.h"
 4 | 
 5 | using namespace nexus;
 6 | using namespace nexus::app;
 7 | 
 8 | class ObjRecApp : public AppBase {
 9 |  public:
10 |   ObjRecApp(std::string port, std::string rpc_port, std::string sch_addr,
11 |             size_t nthreads) :
12 |       AppBase(port, rpc_port, sch_addr, nthreads) {
13 |   }
14 | 
15 |   void Setup() final {
16 |     model_ = GetModelHandler("caffe2", "vgg16", 1, 1000);
17 |   }
18 | 
19 |   void Process(const RequestProto& request, ReplyProto* reply) final {
20 |     auto output = model_->Execute(request.input(),
21 |                                   {"class_id", "class_prob", "class_name"});
22 |     output->FillReply(reply);
23 |   }
24 |   
25 |  private:
26 |   std::shared_ptr<ModelHandler> model_;
27 | };
28 | 
29 | DEFINE_string(port, "9001", "Server port");
30 | DEFINE_string(rpc_port, "9002", "RPC port");
31 | DEFINE_string(sch_addr, "127.0.0.1", "Scheduler IP address");
32 | DEFINE_int32(nthread, 1000, "Number of threads processing requests "
33 |              "(default: 1000)");
34 | 
35 | int main(int argc, char** argv) {
36 |   // log to stderr
37 |   FLAGS_logtostderr = 1;
38 |   // Init glog
39 |   google::InitGoogleLogging(argv[0]);
40 |   // Parse command line flags
41 |   google::ParseCommandLineFlags(&argc, &argv, true);
42 |   // Setup backtrace on segfault
43 |   google::InstallFailureSignalHandler();
44 |   LOG(INFO) << "App port " << FLAGS_port << ", rpc port " << FLAGS_rpc_port;
45 |   // Create the frontend server
46 |   ObjRecApp app(FLAGS_port, FLAGS_rpc_port, FLAGS_sch_addr, FLAGS_nthread);
47 |   LaunchApp(&app);
48 | 
49 |   return 0;
50 | }
51 | 


--------------------------------------------------------------------------------
/examples/face_rec/src/face_rec.cpp:
--------------------------------------------------------------------------------
 1 | #include <gflags/gflags.h>
 2 | 
 3 | #include "nexus/app/app_base.h"
 4 | 
 5 | using namespace nexus;
 6 | using namespace nexus::app;
 7 | 
 8 | class FaceRecApp : public AppBase {
 9 |  public:
10 |   FaceRecApp(std::string port, std::string rpc_port, std::string sch_addr,
11 |              size_t nthreads) :
12 |       AppBase(port, rpc_port, sch_addr, nthreads) {
13 |   }
14 | 
15 |   void Setup() final {
16 |     model_ = GetModelHandler("caffe2", "vgg_face", 1, 1000);
17 |   }
18 | 
19 |   void Process(const RequestProto& request, ReplyProto* reply) final {
20 |     auto output = model_->Execute(request.input(),
21 |                                   {"class_id", "class_prob", "class_name"});
22 |     output->FillReply(reply);
23 |   }
24 |   
25 |  private:
26 |   std::shared_ptr<ModelHandler> model_;
27 | };
28 | 
29 | DEFINE_string(port, "9001", "Server port");
30 | DEFINE_string(rpc_port, "9002", "RPC port");
31 | DEFINE_string(sch_addr, "127.0.0.1", "Scheduler address");
32 | DEFINE_int32(nthread, 1000, "Number of threads processing requests "
33 |              "(default: 1000)");
34 | 
35 | int main(int argc, char** argv) {
36 |   // log to stderr
37 |   FLAGS_logtostderr = 1;
38 |   // Init glog
39 |   google::InitGoogleLogging(argv[0]);
40 |   // Parse command line flags
41 |   google::ParseCommandLineFlags(&argc, &argv, true);
42 |   // Setup backtrace on segfault
43 |   google::InstallFailureSignalHandler();
44 |   LOG(INFO) << "App port " << FLAGS_port << ", rpc port " << FLAGS_rpc_port;
45 |   // Create the frontend server
46 |   FaceRecApp app(FLAGS_port, FLAGS_rpc_port, FLAGS_sch_addr, FLAGS_nthread);
47 |   LaunchApp(&app);
48 | 
49 |   return 0;
50 | }
51 | 


--------------------------------------------------------------------------------
/src/nexus/app/query_processor.h:
--------------------------------------------------------------------------------
 1 | #ifndef NEXUS_APP_QUERY_PROCESSOR_H_
 2 | #define NEXUS_APP_QUERY_PROCESSOR_H_
 3 | 
 4 | #include "nexus/app/exec_block.h"
 5 | #include "nexus/app/request_context.h"
 6 | #include <glog/logging.h>
 7 | 
 8 | namespace nexus {
 9 | namespace app {
10 | 
11 | class QueryProcessor {
12 |  public:
13 |   QueryProcessor(std::vector<ExecBlock*> blocks) :
14 |       blocks_(blocks) {
15 |     std::unordered_set<int> block_ids;
16 |     for (auto block : blocks) {
17 |       if (block_ids.count(block->id()) > 0) {
18 |         LOG(FATAL) << "Block id " << block->id() << " already exists";
19 |       }
20 |       block_ids.insert(block->id());
21 |     }
22 |   }
23 | 
24 |   void Process(std::shared_ptr<RequestContext> ctx) {
25 |     if (ctx->state() == kUninitialized) {
26 |       // LOG(INFO) << "Init req " << ctx->const_request().user_id() << ":" <<
27 |       //     ctx->const_request().req_id();
28 |       ctx->SetExecBlocks(blocks_);
29 |     }
30 |     while (!ctx->finished()) {
31 |       auto block = ctx->NextReadyBlock();
32 |       if (block == nullptr) {
33 |         ctx->SetState(kBlocking);
34 |         return;
35 |       }
36 |       // LOG(INFO) << "Exec req " << ctx->const_request().user_id() << ":" <<
37 |       //     ctx->const_request().req_id() << ", block " << block->id();
38 |       auto ret = block->Run(ctx);
39 |       if (ctx->state() == kError) {
40 |         break;
41 |       }
42 |       ctx->AddBlockReturn(ret);
43 |     }
44 |     // LOG(INFO) << "Reply req " << ctx->const_request().user_id() << ":" <<
45 |     //     ctx->const_request().req_id();
46 |     ctx->SendReply();
47 |   }
48 | 
49 |  private:
50 |   std::vector<ExecBlock*> blocks_;
51 | };
52 | 
53 | } // namespace app
54 | } // namespace nexus
55 | 
56 | #endif // NEXUS_APP_QUERY_PROCESSOR_H_
57 | 


--------------------------------------------------------------------------------
/src/nexus/scheduler/complex_query.h:
--------------------------------------------------------------------------------
 1 | #ifndef NEXUS_SCHEDULER_COMPLEXQUERY_H
 2 | #define NEXUS_SCHEDULER_COMPLEXQUERY_H
 3 | 
 4 | #include <string>
 5 | #include <memory>
 6 | #include <unordered_map>
 7 | 
 8 | #include "nexus/scheduler/sch_info.h"
 9 | 
10 | namespace nexus {
11 | namespace scheduler {
12 | 
13 | class ComplexQuery {
14 |  public:
15 |   struct NodeID {
16 |     std::string framework;
17 |     std::string model_name;
18 |     NodeID(std::string framework_, std::string model_name_);
19 |     std::string ToString() const;
20 |   };
21 |   ComplexQuery(std::string cq_id, int slo_us, int segments);
22 |   ~ComplexQuery();
23 |   ComplexQuery(ComplexQuery &&other) noexcept;
24 |   ComplexQuery& operator=(ComplexQuery &&other) noexcept;
25 | 
26 |   void AddNode(NodeID node_id, std::string current_model_sess_id,
27 |                const ModelProfile& profile);
28 |   void AddChild(const NodeID &parent, const NodeID &child);
29 |   void SetRequestRate(const NodeID &node_id, double request_rate);
30 |   std::unordered_map<ComplexQuery::NodeID, uint32_t> GetSLOms();
31 |   double GetMinimalGPUs();
32 |   void DynamicProgramming();
33 |   void Finalize();
34 |   bool IsFinalized();
35 | 
36 |  private:
37 |   class Impl;
38 |   std::unique_ptr<Impl> impl_;
39 | };
40 | 
41 | inline bool operator==(const ComplexQuery::NodeID& lhs, const ComplexQuery::NodeID& rhs) {
42 |   return lhs.framework == rhs.framework && lhs.model_name == rhs.model_name;
43 | }
44 | 
45 | } // namespace scheduler
46 | } // namespace nexus
47 | 
48 | namespace std {
49 | template<> struct hash<nexus::scheduler::ComplexQuery::NodeID> {
50 |   std::size_t operator()(const nexus::scheduler::ComplexQuery::NodeID &v) const {
51 |     size_t h = std::hash<std::string>{}(v.framework);
52 |     h = h * 31 + std::hash<std::string>{}(v.model_name);
53 |     return h;
54 |   }
55 | };
56 | }
57 | 
58 | #endif //NEXUS_SCHEDULER_COMPLEXQUERY_H
59 | 


--------------------------------------------------------------------------------
/src/nexus/backend/backup_client.cpp:
--------------------------------------------------------------------------------
 1 | #include <glog/logging.h>
 2 | 
 3 | #include "nexus/backend/backup_client.h"
 4 | 
 5 | namespace nexus {
 6 | namespace backend {
 7 | 
 8 | BackupClient::BackupClient(const BackendInfo& info,
 9 |                            boost::asio::io_context& io_context,
10 |                            MessageHandler* handler) :
11 |     BackendSession(info, io_context, handler) {}
12 | 
13 | void BackupClient::Forward(std::shared_ptr<Task> task) {
14 |   uint64_t qid = task->query.query_id();
15 |   task->query.set_query_id(task->task_id);
16 |   auto msg = std::make_shared<Message>(kBackendRelay,
17 |                                        task->query.ByteSizeLong());
18 |   msg->EncodeBody(task->query);
19 |   Write(std::move(msg));
20 |   std::lock_guard<std::mutex> lock(relay_mu_);
21 |   qid_lookup_.emplace(task->task_id, qid);
22 |   conns_.emplace(task->task_id, task->connection);
23 | }
24 | 
25 | void BackupClient::Reply(std::shared_ptr<Message> message) {
26 |   QueryResultProto result;
27 |   message->DecodeBody(&result);
28 |   uint64_t tid = result.query_id();
29 |   std::lock_guard<std::mutex> lock(relay_mu_);
30 |   auto qid_iter = qid_lookup_.find(tid);
31 |   if (qid_iter == qid_lookup_.end()) {
32 |     LOG(ERROR) << "Cannot find query ID for task " << tid;
33 |     return;
34 |   }
35 |   uint64_t qid = qid_iter->second;
36 |   result.set_query_id(qid);
37 |   // LOG(INFO) << "Convert " << result.model_session_id() << " tid " << tid <<
38 |   //     " to qid " << qid;
39 |   auto reply_msg = std::make_shared<Message>(kBackendReply,
40 |                                              result.ByteSizeLong());
41 |   reply_msg->EncodeBody(result);
42 |   auto conn_iter = conns_.find(tid);
43 |   conn_iter->second->Write(std::move(reply_msg));
44 |   qid_lookup_.erase(qid_iter);
45 |   conns_.erase(conn_iter);
46 | }
47 | 
48 | } // namespace backend
49 | } // namespace nexus
50 | 


--------------------------------------------------------------------------------
/src/nexus/backend/slice.h:
--------------------------------------------------------------------------------
 1 | #ifndef NEXUS_BACKEND_SLICE_H_
 2 | #define NEXUS_BACKEND_SLICE_H_
 3 | 
 4 | #include <cstring>
 5 | #include <vector>
 6 | 
 7 | namespace nexus {
 8 | namespace backend {
 9 | 
10 | class Slice {
11 |  public:
12 |   /*!
13 |    * \brief construct a slice with equal size splits.
14 |    * \param nsplits Number of splits
15 |    * \param nfloats Number of floats in a slice
16 |    */
17 |   Slice(size_t nsplits, size_t nfloats);
18 |   /*!
19 |    * \brief construct a slice with varied sizes.
20 |    * \param nfloats A vector of number of floats
21 |    * \param multiplier Multiplier to number of floats
22 |    */
23 |   Slice(std::vector<size_t> nfloats, size_t multiplier = 1);
24 |   Slice(std::vector<float> nfloats, size_t multiplier = 1);
25 |   /*!
26 |    * \brief construct a slice with varied sizes.
27 |    * \param nsplits Number of splits
28 |    * \param nfloats An array of number of floats
29 |    * \param multiplier Multiplier to number of floats
30 |    */
31 |   Slice(size_t nslices, float* nfloats, size_t multiplier = 1);
32 |   /*!
33 |    * \brief get the offset for idx-th slice
34 |    * \param idx Index of the slice
35 |    * \return offset of idx-th slice
36 |    */
37 |   size_t offset(int idx) const;
38 |   /*!
39 |    * \brief get the number of floats in slice idx
40 |    * \param idx Index of the split
41 |    * \return number of floats
42 |    */
43 |   size_t num_elements(int idx) const;
44 |   /*! \brief get number of splits */
45 |   size_t num_splits() const { return offsets_.size(); }
46 |   /*! \brief total number of floats in the buffer */
47 |   size_t total_elements() const { return total_elements_; }
48 | 
49 |  private:
50 |   bool equal_split_;
51 |   size_t equal_slice_size_;
52 |   std::vector<size_t> slice_sizes_;
53 |   std::vector<size_t> offsets_;
54 |   size_t total_elements_;
55 | };
56 | 
57 | } // namespace backend
58 | } // namespace nexus
59 | 
60 | #endif // NEXUS_BACKEND_SLICE_H_
61 | 


--------------------------------------------------------------------------------
/src/nexus/scheduler/sch_info.cpp:
--------------------------------------------------------------------------------
 1 | #include "nexus/scheduler/sch_info.h"
 2 | #include <glog/logging.h>
 3 | 
 4 | namespace nexus {
 5 | namespace scheduler {
 6 | 
 7 | void SessionInfo::UpdateWorkload(uint32_t frontend_id, const ModelStatsProto &model_stats) {
 8 |   auto iter = workloads.find(frontend_id);
 9 |   if (iter == workloads.end()) {
10 |     LOG(ERROR) << "Cannot find rps for " << frontend_id << " in " <<
11 |                model_stats.model_session_id();
12 |     return;
13 |   }
14 |   auto rps = iter->second;
15 |   for (auto num_requests : model_stats.num_requests()) {
16 |     if (rps->rate() < 0 && num_requests == 0) {
17 |       continue;
18 |     }
19 |     rps->AddSample(num_requests);
20 |   }
21 | }
22 | double SessionInfo::TotalThroughput() const {
23 |   double total = 0.;
24 |   for (auto iter : backend_weights) {
25 |     total += iter.second;
26 |   }
27 |   return total;
28 | }
29 | void SessionInfo::SubscribeModelSession(uint32_t frontend_id, const std::string &model_sess_id) {
30 |   if (session_subscribers.count(model_sess_id) == 0) {
31 |     session_subscribers.emplace(model_sess_id, ServerList{frontend_id});
32 |   } else {
33 |     session_subscribers.at(model_sess_id).insert(frontend_id);
34 |   }
35 |   workloads.emplace(frontend_id,
36 |                     std::make_shared<EWMA>(1, FLAGS_avg_interval));
37 | }
38 | bool SessionInfo::UnsubscribleModelSession(uint32_t frontend_id, const std::string &model_sess_id) {
39 |   session_subscribers.at(model_sess_id).erase(frontend_id);
40 |   workloads.erase(frontend_id);
41 |   if (has_static_workload || !session_subscribers.at(model_sess_id).empty()) {
42 |     return false;
43 |   }
44 |   // Remove this model session
45 |   session_subscribers.erase(model_sess_id);
46 |   for (auto iter = model_sessions.begin(); iter != model_sessions.end();
47 |        ++iter) {
48 |     if (ModelSessionToString(*iter) == model_sess_id) {
49 |       model_sessions.erase(iter);
50 |       break;
51 |     }
52 |   }
53 |   return true;
54 | }
55 | }
56 | }


--------------------------------------------------------------------------------
/src/nexus/backend/caffe_model.h:
--------------------------------------------------------------------------------
 1 | #ifndef NEXUS_BACKEND_CAFFE_MODEL_H_
 2 | #define NEXUS_BACKEND_CAFFE_MODEL_H_
 3 | 
 4 | #ifdef USE_CAFFE
 5 | 
 6 | #include <boost/shared_ptr.hpp>
 7 | 
 8 | #include "nexus/backend/model_ins.h"
 9 | 
10 | // Caffe headers
11 | // avoid redefined keywords from darknet
12 | #ifdef GPU
13 | #undef GPU
14 | #endif
15 | #ifdef CUDNN
16 | #undef CUDNN
17 | #endif
18 | // flag to include OpenCV related functions in Caffe
19 | #define USE_OPENCV
20 | #include "caffe/caffe.hpp"
21 | #include "caffe/data_transformer.hpp"
22 | 
23 | namespace nexus {
24 | namespace backend {
25 | 
26 | class CaffeModel : public ModelInstance {
27 |  public:
28 |   CaffeModel(int gpu_id, const ModelInstanceConfig& config);
29 | 
30 |   Shape InputShape() final;
31 | 
32 |   std::unordered_map<std::string, Shape> OutputShapes() final;
33 | 
34 |   ArrayPtr CreateInputGpuArray() final;
35 | 
36 |   std::unordered_map<std::string, ArrayPtr> GetOutputGpuArrays() final;
37 | 
38 |   void Preprocess(std::shared_ptr<Task> task) final;
39 | 
40 |   void Forward(std::shared_ptr<BatchTask> batch_task) final;
41 | 
42 |   void Postprocess(std::shared_ptr<Task> task) final;
43 | 
44 |   // Caffe neural network for serving
45 |   std::unique_ptr<caffe::ServeNet<float> > net_;
46 |   // image size
47 |   int image_height_;
48 |   int image_width_;
49 |   // input shape of neural network
50 |   Shape input_shape_;
51 |   // output shape of neural network
52 |   Shape output_shape_;
53 |   // size of input in a single batch
54 |   size_t input_size_;
55 |   // size of output in a single batch
56 |   size_t output_size_;
57 |   int input_blob_idx_;
58 |   std::string output_blob_name_;
59 |   std::unordered_map<int, std::string> classnames_;
60 |   // transformer for input
61 |   std::unique_ptr<caffe::DataTransformer<float> > transformer_;
62 |   std::vector<boost::shared_ptr<caffe::Blob<float> > > input_blobs_;
63 |   std::string prefix_layer_;
64 |   int prefix_index_;
65 | };
66 | 
67 | } // namespace backend
68 | } // namespace nexus
69 | 
70 | #endif // USE_CAFFE
71 | 
72 | #endif // NEXUS_BACKEND_CAFFE_MODEL_H_
73 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | Nexus
 2 | =====
 3 | 
 4 | [![Docker Image](https://img.shields.io/microbadger/image-size/abcdabcd987/nexus)](https://hub.docker.com/repository/docker/abcdabcd987/nexus)
 5 | 
 6 | Nexus is a scalable and efficient serving system for DNN applications on GPU
 7 | cluster.
 8 | 
 9 | ## SOSP 2019 Paper
10 | 
11 | * Check out our SOSP 2019 paper [here](https://doi.org/10.1145/3341301.3359658).
12 | * Check out the [Google Drive](https://drive.google.com/open?id=104UqrlNrfJoQnGdkxTQ56mfxSBFyJTcr) that contains a sample of video dataset.
13 | 
14 | ## Building Nexus
15 | 
16 | See [BUILDING.md](BUILDING.md) for details.
17 | 
18 | ## Docker and Examples
19 | 
20 | We provide a [Docker image](https://hub.docker.com/repository/docker/abcdabcd987/nexus)
21 | so that you can try Nexus quickly. And there is an example that goes step by
22 | step on how to run Nexus with a simple example application. We recommend you to
23 | take a look [here](examples/README.md).
24 | 
25 | ## Deployment
26 | 
27 | ### Download Model Zoo
28 | 
29 | Nexus publishes public model zoo on our department-hosted GitLab. To download,
30 | you need to install [Git LFS](https://git-lfs.github.com/) first. Then, run:
31 | 
32 | ```bash
33 | git clone https://gitlab.cs.washington.edu/syslab/nexus-models
34 | cd nexus-models
35 | git lfs checkout
36 | ```
37 | 
38 | ### Run the Profiler
39 | 
40 | Nexus is a profile-based system. So before running Nexus, make sure you have
41 | profiled all the GPUs. To profile a certain model on a certain GPU, run:
42 | 
43 | ```bash
44 | nexus/tools/profiler/profiler.py --gpu_list=GPU_INDEX --gpu_uuid \
45 |     --framework=tensorflow --model=MODEL_NAME \
46 |     --model_root=nexus-models/ --dataset=/path/to/datasets/
47 | ```
48 | 
49 | The profile will be saved to the `--model_root` directory.
50 | See [examples](examples/README.md) for more concrete usage.
51 | 
52 | ### Run Nexus
53 | 
54 | To run Nexus, you need to run the **scheduler** first, then spawn a **backend** for each
55 | GPU card, and finally run the Nexus **frontend** of your application.
56 | See [examples](examples/README.md) for more concrete usage.
57 | 


--------------------------------------------------------------------------------
/src/nexus/common/metric.h:
--------------------------------------------------------------------------------
 1 | #ifndef NEXUS_COMMON_METRIC_H_
 2 | #define NEXUS_COMMON_METRIC_H_
 3 | 
 4 | #include <atomic>
 5 | #include <memory>
 6 | #include <mutex>
 7 | #include <thread>
 8 | #include <vector>
 9 | 
10 | #include "nexus/common/time_util.h"
11 | 
12 | namespace nexus {
13 | 
14 | class Metric {
15 |  public:
16 |   virtual void Reset() = 0;
17 | };
18 | 
19 | class Counter : public Metric {
20 |  public:
21 |   Counter();
22 | 
23 |   void Increase(uint64_t value);
24 | 
25 |   void Reset() final;
26 |   
27 |  private:
28 |   std::atomic<uint64_t> count_;
29 | };
30 | 
31 | class IntervalCounter : public Metric, public Tickable {
32 |  public:
33 |   IntervalCounter(uint32_t interval_sec);
34 | 
35 |   virtual ~IntervalCounter() = default;
36 | 
37 |   void Increase(uint64_t value);
38 | 
39 |   void Reset() override;
40 | 
41 |   std::vector<uint64_t> GetHistory();
42 | 
43 |  protected:
44 |   void TickImpl() final;
45 | 
46 |  private:
47 |   uint32_t tick_interval_sec_;
48 |   TimePoint last_tick_time_;
49 |   std::atomic<uint64_t> count_;
50 |   std::vector<uint64_t> history_;
51 |   std::mutex history_mutex_;
52 |   std::atomic_bool running_;
53 | };
54 | 
55 | class EWMA {
56 |  public:
57 |   EWMA(uint32_t sample_interval_sec, uint32_t avg_interval_sec);
58 | 
59 |   EWMA(const EWMA& other);
60 | 
61 |   double rate() const { return rate_; }
62 | 
63 |   void AddSample(uint64_t count);
64 | 
65 |   EWMA& operator=(const EWMA& other);
66 | 
67 |  private:
68 |   uint32_t sample_interval_sec_;
69 |   uint32_t avg_interval_sec_;
70 |   double rate_;
71 |   double alpha_;
72 | };
73 | 
74 | class MetricRegistry {
75 |  public:
76 |   static MetricRegistry& Singleton();
77 | 
78 |   std::shared_ptr<Counter> CreateCounter();
79 | 
80 |   std::shared_ptr<IntervalCounter> CreateIntervalCounter(uint32_t interval_sec);
81 | 
82 |   void RemoveMetric(std::shared_ptr<IntervalCounter> metric);
83 | 
84 |  private:
85 |   MetricRegistry() {}
86 |   
87 |   std::mutex mutex_;
88 |   std::unordered_set<std::shared_ptr<Metric> > metrics_;
89 | };
90 | 
91 | } // namespace nexus
92 | 
93 | #endif // NEXUS_COMMON_METRIC_H_
94 | 


--------------------------------------------------------------------------------
/src/nexus/backend/share_prefix_model.h:
--------------------------------------------------------------------------------
 1 | #ifndef NEXUS_BACKEND_SHARE_PREFIX_MODEL_H_
 2 | #define NEXUS_BACKEND_SHARE_PREFIX_MODEL_H_
 3 | 
 4 | #include <mutex>
 5 | 
 6 | #include "nexus/backend/model_ins.h"
 7 | 
 8 | namespace nexus {
 9 | namespace backend {
10 | 
11 | class SharePrefixModel : public ModelInstance {
12 |  public:
13 |   SharePrefixModel(int gpu_id, const ModelInstanceConfig& config);
14 | 
15 |   virtual void set_batch(size_t batch) override;
16 | 
17 |   Shape InputShape() final;
18 | 
19 |   std::unordered_map<std::string, Shape> OutputShapes() final;
20 | 
21 |   ArrayPtr CreateInputGpuArray() final;
22 | 
23 |   std::unordered_map<std::string, ArrayPtr> GetOutputGpuArrays() final;
24 | 
25 |   void Preprocess(std::shared_ptr<Task> task) final;
26 | 
27 |   void Forward(std::shared_ptr<BatchTask> batch_task) final;
28 | 
29 |   void Postprocess(std::shared_ptr<Task> task) final;
30 | 
31 |   int num_model_sessions();
32 | 
33 |   std::vector<std::string> ModelSessions();
34 | 
35 |   bool HasModelSession(const std::string& model_sess_id);
36 | 
37 |   bool AddModelSession(const ModelSession& model_sess);
38 | 
39 |   void RemoveModelSession(const std::string& model_sess_id);
40 | 
41 |  private:
42 |   // Prefix model information
43 |   int prefix_length_;
44 |   std::unique_ptr<ModelInstance> prefix_model_;
45 |   std::string prefix_output_name_;
46 |   Shape prefix_output_shape_;
47 |   std::unordered_map<std::string, ArrayPtr> prefix_batch_output_arr_;
48 |   // Suffix models information
49 |   std::unordered_map<std::string,
50 |                      std::shared_ptr<ModelInstance> > suffix_models_;
51 |   std::unordered_map<std::string, ArrayPtr> suffix_input_arrays_;
52 |   std::unordered_map<std::string, std::string> suffix_output_names_;
53 |   std::unordered_map<std::string, size_t> suffix_output_sizes_;
54 |   size_t max_suffix_output_size_;
55 |   // Guard suffix_models_, suffix_input_arrays_, suffix_output_names_,
56 |   // suffix_output_sizes_, max_suffix_output_size_
57 |   std::mutex suffix_mu_;
58 | };
59 | 
60 | } // namespace backend
61 | } // namespace nexus
62 | 
63 | #endif // NEXUS_BACKEND_SHARE_PREFIX_MODEL_H_
64 | 


--------------------------------------------------------------------------------
/src/nexus/common/backend_pool.h:
--------------------------------------------------------------------------------
 1 | #ifndef NEXUS_COMMON_BACKEND_POOL_H_
 2 | #define NEXUS_COMMON_BACKEND_POOL_H_
 3 | 
 4 | #include <sstream>
 5 | #include <unordered_map>
 6 | 
 7 | #include "nexus/common/connection.h"
 8 | #include "nexus/common/time_util.h"
 9 | #include "nexus/proto/control.grpc.pb.h"
10 | 
11 | namespace nexus {
12 | 
13 | class BackendPool;
14 | 
15 | class BackendSession : public Connection {
16 |  public:
17 |   explicit BackendSession(const BackendInfo& info,
18 |                           boost::asio::io_context& io_context,
19 |                           MessageHandler* handler);
20 | 
21 |   ~BackendSession();
22 | 
23 |   inline uint32_t node_id() const { return node_id_; }
24 | 
25 |   inline std::string ip() const { return ip_; }
26 | 
27 |   inline std::string server_port() const { return server_port_; }
28 | 
29 |   inline std::string rpc_port() const { return rpc_port_; }
30 | 
31 |   virtual void Start();
32 | 
33 |   virtual void Stop();
34 | 
35 |   double GetUtilization();
36 | 
37 |  protected:
38 |   /*! \brief Asynchronously connect to backend server. */
39 |   void DoConnect();
40 | 
41 |   /*! \brief Boost io service */
42 |   boost::asio::io_context& io_context_;
43 |   uint32_t node_id_;
44 |   std::string ip_;
45 |   std::string server_port_;
46 |   std::string rpc_port_;
47 |   std::atomic_bool running_;
48 |   std::unique_ptr<BackendCtrl::Stub> stub_;
49 |   double utilization_;
50 |   TimePoint expire_;
51 |   std::mutex util_mu_;
52 | };
53 | 
54 | class BackendPool {
55 |  public:
56 |   BackendPool() {}
57 | 
58 |   std::shared_ptr<BackendSession> GetBackend(uint32_t backend_id);
59 | 
60 |   void AddBackend(std::shared_ptr<BackendSession> backend);
61 | 
62 |   void RemoveBackend(std::shared_ptr<BackendSession> backend);
63 | 
64 |   void RemoveBackend(uint32_t backend_id);
65 | 
66 |   std::vector<uint32_t> UpdateBackendList(std::unordered_set<uint32_t> list);
67 | 
68 |   void StopAll();
69 | 
70 |  protected:
71 |   std::unordered_map<uint32_t, std::shared_ptr<BackendSession> > backends_;
72 |   std::mutex mu_;
73 | };
74 | 
75 | } // namespace nexus
76 | 
77 | #endif // NEXUS_COMMON_BACKEND_POOL_H_
78 | 


--------------------------------------------------------------------------------
/src/nexus/backend/rpc_service.cpp:
--------------------------------------------------------------------------------
 1 | #include <future>
 2 | #include <gflags/gflags.h>
 3 | 
 4 | #include "nexus/backend/backend_server.h"
 5 | #include "nexus/backend/rpc_service.h"
 6 | #include "nexus/common/rpc_call.h"
 7 | 
 8 | DECLARE_int32(occupancy_valid);
 9 | 
10 | namespace nexus {
11 | namespace backend {
12 | 
13 | INSTANTIATE_RPC_CALL(AsyncService, UpdateModelTable, ModelTableConfig,
14 |                      RpcReply);
15 | INSTANTIATE_RPC_CALL(AsyncService, CheckAlive, CheckAliveRequest, RpcReply);
16 | #ifdef USE_GPU
17 | INSTANTIATE_RPC_CALL(AsyncService, CurrentUtilization, UtilizationRequest,
18 |                      UtilizationReply);
19 | #endif
20 | 
21 | BackendRpcService::BackendRpcService(BackendServer* backend, std::string port,
22 |                                      size_t nthreads):
23 |     AsyncRpcServiceBase(port, nthreads),
24 |     backend_(backend) {
25 | }
26 | 
27 | void BackendRpcService::HandleRpcs() {
28 |   new UpdateModelTable_Call(
29 |       &service_, cq_.get(),
30 |       [this](const grpc::ServerContext&, const ModelTableConfig& req,
31 |              RpcReply* reply) {
32 |         //std::thread (&BackendServer::UpdateModelTable, backend_, req).detach();
33 |         backend_->UpdateModelTableAsync(req);
34 |         reply->set_status(CTRL_OK);
35 |       });
36 |   new CheckAlive_Call(
37 |       &service_, cq_.get(),
38 |       [](const grpc::ServerContext&, const CheckAliveRequest&,
39 |          RpcReply* reply) {
40 |         reply->set_status(CTRL_OK);
41 |       });
42 | #ifdef USE_GPU
43 |   new CurrentUtilization_Call(
44 |       &service_, cq_.get(),
45 |       [this](const grpc::ServerContext&, const UtilizationRequest&,
46 |          UtilizationReply* reply) {
47 |         reply->set_node_id(backend_->node_id());
48 |         reply->set_utilization(backend_->CurrentUtilization());
49 |         reply->set_valid_ms(FLAGS_occupancy_valid);
50 |       });
51 | #endif
52 |   void* tag;
53 |   bool ok;
54 |   while (running_) {
55 |     cq_->Next(&tag, &ok);
56 |     if (ok) {
57 |       static_cast<RpcCallBase*>(tag)->Proceed();
58 |     }
59 |   }
60 | }
61 | 
62 | } // namespace backend
63 | } // namespace nexus
64 | 


--------------------------------------------------------------------------------
/src/nexus/backend/task.cpp:
--------------------------------------------------------------------------------
 1 | #include "nexus/backend/task.h"
 2 | #include "nexus/common/model_def.h"
 3 | 
 4 | namespace nexus {
 5 | namespace backend {
 6 | 
 7 | Input::Input(TimePoint deadline, uint64_t tid, int idx, ArrayPtr arr) :
 8 |     DeadlineItem(deadline),
 9 |     task_id(tid),
10 |     index(idx),
11 |     array(arr) {}
12 | 
13 | Output::Output(uint64_t tid, int idx,
14 |                const std::unordered_map<std::string, ArrayPtr>& arrs) :
15 |     task_id(tid),
16 |     index(idx),
17 |     arrays(arrs) {}
18 | 
19 | std::atomic<uint64_t> Task::global_task_id_(0);
20 | 
21 | Task::Task() : Task(nullptr) {}
22 | 
23 | Task::Task(std::shared_ptr<Connection> conn) :
24 |     DeadlineItem(),
25 |     connection(conn),
26 |     model(nullptr),
27 |     stage(kPreprocess),
28 |     filled_outputs(0) {
29 |   task_id = global_task_id_.fetch_add(1, std::memory_order_relaxed);
30 |   timer.Record("begin");
31 | }
32 | 
33 | void Task::DecodeQuery(std::shared_ptr<Message> message) {
34 |   msg_type = message->type();
35 |   message->DecodeBody(&query);
36 |   ModelSession sess;
37 |   ParseModelSession(query.model_session_id(), &sess);
38 |   uint32_t budget = sess.latency_sla();
39 |   if (query.slack_ms() > 0) {
40 |     budget += query.slack_ms();
41 |     // LOG(INFO) << "slack " << query.slack_ms() << " ms";
42 |   }
43 |   SetDeadline(std::chrono::milliseconds(budget));
44 | }
45 | 
46 | void Task::AppendInput(ArrayPtr arr) {
47 |   auto input = std::make_shared<Input>(deadline(), task_id, inputs.size(), arr);
48 |   inputs.push_back(input);
49 |   // Put a placeholder in the outputs
50 |   outputs.push_back(nullptr);
51 | }
52 | 
53 | bool Task::AddOutput(std::shared_ptr<Output> output) {
54 |   outputs[output->index] = output;
55 |   uint32_t filled = ++filled_outputs;
56 |   if (filled == outputs.size()) {
57 |     return true;
58 |   }
59 |   return false;
60 | }
61 | 
62 | bool Task::AddVirtualOutput(int index) {
63 |   result.set_status(TIMEOUT);
64 |   uint32_t filled = ++filled_outputs;
65 |   if (filled == outputs.size()) {
66 |     return true;
67 |   }
68 |   return false;
69 | }
70 | 
71 | } // namespace backend
72 | } // namespace nexus
73 | 
74 | 


--------------------------------------------------------------------------------
/src/nexus/backend/slice.cpp:
--------------------------------------------------------------------------------
 1 | #include <glog/logging.h>
 2 | 
 3 | #include "nexus/backend/slice.h"
 4 | 
 5 | namespace nexus {
 6 | namespace backend {
 7 | 
 8 | Slice::Slice(size_t nsplits, size_t nfloats) :
 9 |     equal_split_(true) {
10 |   size_t offset = 0;
11 |   for (size_t i = 0; i < nsplits; ++i) {
12 |     offsets_.push_back(offset);
13 |     offset += nfloats;
14 |   }
15 |   total_elements_ = offset;
16 |   equal_slice_size_ = nfloats;
17 | }
18 | 
19 | Slice::Slice(std::vector<size_t> nfloats, size_t multiplier) :
20 |     equal_split_(false) {
21 |   size_t offset = 0;
22 |   for (auto size : nfloats) {
23 |     offsets_.push_back(offset);
24 |     size_t slice_size = size * multiplier;
25 |     slice_sizes_.push_back(slice_size);
26 |     offset += slice_size;
27 |   }
28 |   total_elements_ = offset;
29 | }
30 | 
31 | Slice::Slice(std::vector<float> nfloats, size_t multiplier) :
32 |     equal_split_(false) {
33 |   size_t offset = 0;
34 |   for (auto size : nfloats) {
35 |     offsets_.push_back(offset);
36 |     size_t slice_size = size_t(size) * multiplier;
37 |     slice_sizes_.push_back(slice_size);
38 |     offset += slice_size;
39 |   }
40 |   total_elements_ = offset;
41 | }
42 | 
43 | Slice::Slice(size_t nsplits, float* nfloats, size_t multiplier) :
44 |     equal_split_(false) {
45 |   size_t offset = 0;
46 |   for (size_t i = 0; i < nsplits; ++i) {
47 |     offsets_.push_back(offset);
48 |     size_t slice_size = size_t(nfloats[i]) * multiplier;
49 |     slice_sizes_.push_back(slice_size);
50 |     offset += slice_size;
51 |   }
52 |   total_elements_ = offset;
53 | }
54 | 
55 | size_t Slice::offset(int idx) const {
56 |   CHECK_LT(idx, offsets_.size()) << "Index " << idx << " exceeds the boundary "
57 |                                  << offsets_.size();
58 |   return offsets_[idx];
59 | }
60 | 
61 | size_t Slice::num_elements(int idx) const {
62 |   CHECK_LT(idx, offsets_.size()) << "Index " << idx << " exceeds the boundary "
63 |                                  << offsets_.size();
64 |   if (equal_split_) {
65 |     return equal_slice_size_;
66 |   }
67 |   return slice_sizes_[idx];
68 | }
69 | 
70 | } // namespace backend
71 | } // namespace nexus
72 | 


--------------------------------------------------------------------------------
/tests/python/test_async_client.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import random
 3 | import asyncio
 4 | from datetime import datetime, timedelta
 5 | 
 6 | import nexus
 7 | 
 8 | vgg_face_dir = '/home/abcdabcd987/datasets/vgg_face'
 9 | 
10 | service_addr = "127.0.0.1:9001"
11 | 
12 | def load_images(root, maxlen):
13 |     images = {}
14 |     for fn in os.listdir(root)[:maxlen]:
15 |         with open(os.path.join(root, fn), 'rb') as f:
16 |             im = f.read()
17 |             images[fn] = im
18 |     return images
19 | 
20 | 
21 | async def test_client(images, interval):
22 |     images = iter(images)
23 |     interval = timedelta(seconds=interval)
24 |     user_id = random.randint(1, 1000000000)
25 |     async with nexus.AsyncClient(service_addr, user_id) as client:
26 |         pending = set()
27 |         next_time = datetime.now()
28 |         try:
29 |             next_image = next(images)
30 |         except StopIteration:
31 |             return
32 |         while True:
33 |             timeout = (next_time - datetime.now()).total_seconds()
34 |             if timeout > 0:
35 |                 await asyncio.sleep(timeout)
36 |             else:
37 |                 while timeout <= 0 and next_image is not None:
38 |                     next_time += interval
39 |                     timeout = (next_time - datetime.now()).total_seconds()
40 |                     pending.add(client.request(next_image))
41 |                     try:
42 |                         next_image = next(images)
43 |                     except StopIteration:
44 |                         next_image = None
45 |                 done, pending = await asyncio.wait(pending, timeout=timeout, return_when=asyncio.FIRST_COMPLETED)
46 |                 for task in done:
47 |                     print('==========', datetime.now(), task.result())
48 |                 if not pending and next_image is None:
49 |                     break
50 | 
51 | 
52 | if __name__ == "__main__":
53 |     print('Test client...')
54 |     images = list(load_images(vgg_face_dir, 20).values())
55 | 
56 |     print('Testing the non concurrent case')
57 |     asyncio.run(test_client(images, 0.5))
58 | 
59 |     print('Testing the concurrent case')
60 |     asyncio.run(test_client(images, 0.0001))
61 | 


--------------------------------------------------------------------------------
/src/nexus/common/buffer.h:
--------------------------------------------------------------------------------
 1 | #ifndef NEXUS_COMMON_BUFFER_H_
 2 | #define NEXUS_COMMON_BUFFER_H_
 3 | 
 4 | #include <memory>
 5 | 
 6 | #include "nexus/common/device.h"
 7 | 
 8 | namespace nexus {
 9 | 
10 | class Buffer : public std::enable_shared_from_this<Buffer> {
11 |  public:
12 |   // disable copy
13 |   Buffer(const Buffer&) = delete;
14 |   Buffer& operator=(const Buffer&) = delete;
15 | 
16 |   Buffer() :
17 |       data_(nullptr),
18 |       nbytes_(0),
19 |       own_data_(false),
20 |       shared_from_(nullptr) {}
21 | 
22 |   explicit Buffer(size_t nbytes, Device* device) :
23 |       nbytes_(nbytes),
24 |       device_(device),
25 |       own_data_(true),
26 |       shared_from_(nullptr) {
27 |     data_ = device->Allocate(nbytes_);
28 |     // LOG(INFO) << "Allocate " << nbytes_ << " on " << device->name() <<
29 |     //     ", own data " << own_data_;
30 |   }
31 | 
32 |   explicit Buffer(void* data, size_t nbytes, Device* device,
33 |                   bool own_data = false) :
34 |       data_(data),
35 |       nbytes_(nbytes),
36 |       device_(device),
37 |       own_data_(own_data),
38 |       shared_from_(nullptr) {}
39 | 
40 |   ~Buffer() {
41 |     // LOG(INFO) << "Destroy buffer, size: " << nbytes_ << ", device: " <<
42 |     //     device_->name() << ", own data: " << own_data_;
43 |     if (own_data_) {
44 |       device_->Free(data_);
45 |       // LOG(INFO) << "Free " << nbytes_ << " on " << device_->name();
46 |     }
47 |   }
48 | 
49 |   size_t nbytes() { return nbytes_; }
50 | 
51 |   void* data() { return data_; }
52 | 
53 |   const void* data() const { return data_; }
54 | 
55 |   Device* device() const { return device_; }
56 | 
57 |   std::shared_ptr<Buffer> Slice(size_t offset, size_t nbytes);
58 |   
59 |  private:
60 |   Buffer(std::shared_ptr<Buffer> origin, size_t offset, size_t nbytes) :
61 |       data_((char*) origin->data_ + offset),
62 |       nbytes_(nbytes),
63 |       device_(origin->device_),
64 |       own_data_(false),
65 |       shared_from_(origin) {
66 |     // LOG(INFO) << "Slice buffer, offset: " << offset << ", size: " << nbytes <<
67 |     //     ", own data: " << own_data_;
68 |   }
69 |   
70 |   void* data_;
71 |   size_t nbytes_;
72 |   Device* device_;
73 |   bool own_data_;
74 |   std::shared_ptr<Buffer> shared_from_;
75 | };
76 | 
77 | } // namespace nexus
78 | 
79 | #endif // NEXUS_COMMON_BUFFER_H_
80 | 


--------------------------------------------------------------------------------
/src/nexus/common/rpc_service_base.h:
--------------------------------------------------------------------------------
 1 | #ifndef NEXUS_COMMON_RPC_SERVICE_BASE_H_
 2 | #define NEXUS_COMMON_RPC_SERVICE_BASE_H_
 3 | 
 4 | #include <glog/logging.h>
 5 | #include <grpc++/grpc++.h>
 6 | #include <thread>
 7 | #include <vector>
 8 | #include "nexus/common/rpc_call.h"
 9 | 
10 | namespace nexus {
11 | 
12 | template<class ServiceType>
13 | class AsyncRpcServiceBase {
14 |  public:
15 |   AsyncRpcServiceBase(std::string port, size_t nthreads):
16 |       AsyncRpcServiceBase("0.0.0.0", port, nthreads) {}
17 |   
18 |   AsyncRpcServiceBase(std::string ip, std::string port, size_t nthreads):
19 |       ip_(ip),
20 |       port_(port),
21 |       nthreads_(nthreads),
22 |       running_(false) {
23 |   }
24 | 
25 |   virtual ~AsyncRpcServiceBase() {
26 |   	if (running_) {
27 |       Stop();
28 |     }
29 |   }
30 | 
31 |   std::string port() const { return port_; }
32 | 
33 |   std::string address() const { return ip_ + ":" + port_; }
34 | 
35 |   void Start() {
36 |     grpc::ServerBuilder builder;
37 |     std::string addr = ip_ + ":" + port_;
38 |     builder.AddListeningPort(addr, grpc::InsecureServerCredentials());
39 |     builder.RegisterService(&service_);
40 |     cq_ = builder.AddCompletionQueue();
41 |     server_ = builder.BuildAndStart();
42 |     running_ = true;
43 |     for (size_t i = 0; i < nthreads_; ++i) {
44 |       thread_pool_.emplace_back(&AsyncRpcServiceBase::HandleRpcs, this);
45 |     }
46 |     LOG(INFO) << "RPC service is listening on " << addr;
47 |   }
48 | 
49 |   void Stop() {
50 |     running_ = false;
51 |     server_->Shutdown();
52 |     cq_->Shutdown();
53 | 
54 |     void *tag;
55 |     bool ok;
56 |     while (cq_->Next(&tag, &ok)) {
57 |       LOG(WARNING) << "There is a event in the grpc::ServerCompletionQueue not handled at " << tag;
58 |     }
59 | 
60 |     for (auto& thread : thread_pool_) {
61 |       thread.join();
62 |     }
63 | 
64 |     LOG(INFO) << "RPC service stopped";
65 |   }
66 | 
67 |  protected:
68 |   virtual void HandleRpcs() = 0;
69 | 
70 |  protected:
71 |   std::string ip_;
72 |   std::string port_;
73 |   size_t nthreads_;
74 |   volatile bool running_;
75 |   std::vector<std::thread> thread_pool_;
76 |   ServiceType service_;
77 |   std::unique_ptr<grpc::ServerCompletionQueue> cq_;
78 |   std::unique_ptr<grpc::Server> server_;
79 | };
80 | 
81 | } // namespace nexus
82 | 
83 | #endif // NEXUS_COMMON_RPC_SERVICE_BASE_H_
84 | 


--------------------------------------------------------------------------------
/src/nexus/common/message.cpp:
--------------------------------------------------------------------------------
 1 | #include <cstring>
 2 | #include <glog/logging.h>
 3 | 
 4 | #include "nexus/common/message.h"
 5 | 
 6 | namespace nexus {
 7 | 
 8 | #if 0
 9 | #define htonll(x)                                                       \
10 |   ((1==htonl(1)) ? (x) :                                                \
11 |    ((uint64_t) htonl((x) & 0xFFFFFFFF) << 32) | htonl((uint64_t)(x) >> 32))
12 | 
13 | #define ntohll(x)                                                       \
14 |   ((1==ntohl(1)) ? (x) :                                                \
15 |    ((uint64_t) ntohl((x) & 0xFFFFFFFF) << 32) | ntohl((uint64_t)(x) >> 32))
16 | #endif
17 | 
18 | bool DecodeHeader(const char* buffer, MessageHeader* header) {
19 |   header->magic_number = ntohl(*(const uint32_t*) buffer);
20 |   if (header->magic_number != NEXUS_SERVICE_MAGIC_NUMBER) {
21 |     return false;
22 |   }
23 |   header->msg_type = ntohl(*(const uint32_t*) (buffer + 4));
24 |   header->body_length = ntohl(*(const uint32_t*) (buffer + 8));
25 |   return true;
26 | }
27 | 
28 | Message::Message(const MessageHeader& header) {
29 |   type_ = static_cast<MessageType>(header.msg_type);
30 |   body_length_ = header.body_length;
31 |   data_ = new char[MESSAGE_HEADER_SIZE + body_length_];
32 |   *((uint32_t*) data_) = htonl(NEXUS_SERVICE_MAGIC_NUMBER);
33 |   *((uint32_t*) (data_ + 4)) = htonl((uint32_t) type_);
34 |   *((uint32_t*) (data_ + 8)) = htonl(body_length_);
35 | }
36 | 
37 | Message::Message(MessageType type, size_t body_length) :
38 |     type_(type),
39 |     body_length_(body_length) {
40 |   data_ = new char[MESSAGE_HEADER_SIZE + body_length];
41 |   *((uint32_t*) data_) = htonl(NEXUS_SERVICE_MAGIC_NUMBER);
42 |   *((uint32_t*) (data_ + 4)) = htonl((uint32_t) type);
43 |   *((uint32_t*) (data_ + 8)) = htonl(body_length_);
44 | }
45 | 
46 | Message::~Message() {
47 |   delete[] data_;
48 | }
49 | 
50 | void Message::set_type(MessageType type) {
51 |   type_ = type;
52 |   *((uint32_t*) (data_ + 4)) = htonl((uint32_t) type);
53 | }
54 | 
55 | void Message::DecodeBody(google::protobuf::Message* message) const {
56 |   message->ParseFromArray(body(), body_length_);
57 | }
58 | 
59 | void Message::EncodeBody(const google::protobuf::Message& message) {
60 |   CHECK_GE(body_length_, message.ByteSizeLong()) << "Buffer is too small to "
61 |       "store the message";
62 |   message.SerializeToArray(body(), body_length_);
63 | }
64 | 
65 | } // namespace nexus
66 | 


--------------------------------------------------------------------------------
/src/nexus/backend/caffe_densecap_model.h:
--------------------------------------------------------------------------------
 1 | #ifndef NEXUS_BACKEND_CAFFE_DENSECAP_MODEL_H_
 2 | #define NEXUS_BACKEND_CAFFE_DENSECAP_MODEL_H_
 3 | 
 4 | #ifdef USE_CAFFE
 5 | 
 6 | #include <boost/shared_ptr.hpp>
 7 | 
 8 | #include "nexus/backend/model_ins.h"
 9 | 
10 | // Caffe headers
11 | // avoid redefined keywords from darknet
12 | #ifdef GPU
13 | #undef GPU
14 | #endif
15 | #ifdef CUDNN
16 | #undef CUDNN
17 | #endif
18 | #include "caffe/caffe.hpp"
19 | 
20 | namespace nexus {
21 | namespace backend {
22 | 
23 | class CaffeDenseCapModel : public ModelInstance {
24 |  public:
25 |   CaffeDenseCapModel(int gpu_id, const ModelInstanceConfig& config);
26 | 
27 |   Shape InputShape() final;
28 | 
29 |   std::unordered_map<std::string, Shape> OutputShapes() final;
30 | 
31 |   ArrayPtr CreateInputGpuArray() final;
32 | 
33 |   std::unordered_map<std::string, ArrayPtr> GetOutputGpuArrays() final;
34 | 
35 |   void Preprocess(std::shared_ptr<Task> task) final;
36 | 
37 |   void Forward(std::shared_ptr<BatchTask> batch_task) final;
38 | 
39 |   void Postprocess(std::shared_ptr<Task> task) final;
40 | 
41 |  private:
42 |   void LoadVocabulary(const std::string& filename);
43 | 
44 |   void TransformBbox(int im_height, int im_width, float scale, int nboxes,
45 |                      const float* rois, const float* bbox_deltas, float* out);
46 | 
47 |   // parameters
48 |   int max_timestep_;
49 |   int max_boxes_;
50 |   float nms_threshold_;
51 |   float score_threshold_;
52 |   std::vector<float> mean_values_;
53 |   std::vector<float> bbox_mean_;
54 |   std::vector<float> bbox_stds_;
55 |   // networks and data
56 |   std::unique_ptr<caffe::ServeNet<float> > feature_net_;
57 |   std::unique_ptr<caffe::ServeNet<float> > rnn_net_;
58 |   std::unique_ptr<caffe::ServeNet<float> > embed_net_;
59 |   std::vector<std::string> vocabulary_;
60 |   // shapes and sizes of input and output
61 |   int image_height_;
62 |   int image_width_;
63 |   size_t input_size_;
64 |   Shape input_shape_;
65 |   std::unordered_map<std::string, Shape> output_shapes_;
66 |   //caffe::Blob<float>* input_blob_;
67 |   int feature_net_input_idx_;
68 |   std::vector<boost::shared_ptr<caffe::Blob<float> > > input_blobs_;
69 |   // temporary buffer
70 |   std::vector<float> best_words_;
71 |   std::unique_ptr<caffe::Blob<float> > multiplier_;
72 | };
73 | 
74 | } // namespace backend
75 | } // namespace nexus
76 | 
77 | #endif // USE_CAFFE
78 | 
79 | #endif // NEXUS_BACKEND_CAFFE_DENSECAP_MODEL_H_
80 | 


--------------------------------------------------------------------------------
/src/nexus/backend/tensorflow_model.h:
--------------------------------------------------------------------------------
 1 | #ifndef NEXUS_BACKEND_TENSORFLOW_MODEL_H_
 2 | #define NEXUS_BACKEND_TENSORFLOW_MODEL_H_
 3 | 
 4 | #ifdef USE_TENSORFLOW
 5 | 
 6 | #include "nexus/backend/model_ins.h"
 7 | // Tensorflow headers
 8 | #include "tensorflow/core/public/session.h"
 9 | 
10 | namespace tf = tensorflow;
11 | 
12 | 
13 | namespace nexus {
14 | namespace backend {
15 | 
16 | class TFShareModel;
17 | 
18 | class TensorflowModel : public ModelInstance {
19 |  public:
20 |   TensorflowModel(int gpu_id, const ModelInstanceConfig& config);
21 | 
22 |   ~TensorflowModel();
23 | 
24 |   Shape InputShape() final;
25 | 
26 |   std::unordered_map<std::string, Shape> OutputShapes() final;
27 | 
28 |   ArrayPtr CreateInputGpuArray() final;
29 | 
30 |   std::unordered_map<std::string, ArrayPtr> GetOutputGpuArrays() final;
31 | 
32 |   void Preprocess(std::shared_ptr<Task> task) final;
33 | 
34 |   void Forward(std::shared_ptr<BatchTask> batch_task) final;
35 | 
36 |   void Postprocess(std::shared_ptr<Task> task) final;
37 | 
38 |   uint64_t GetPeakBytesInUse() override;
39 | 
40 |  private:
41 |   tf::Tensor* NewInputTensor();
42 | 
43 |   void MarshalDetectionResult(
44 |       const QueryProto& query, std::shared_ptr<Output> output,
45 |       int im_height, int im_width, QueryResultProto* result);
46 | 
47 |   tf::SessionOptions gpu_option_;
48 |   tf::SessionOptions cpu_option_;
49 |   std::unique_ptr<tf::Session> session_;
50 |   int image_height_;
51 |   int image_width_;
52 |   std::string input_layer_;
53 |   Shape input_shape_;
54 |   size_t input_size_;
55 |   DataType input_data_type_;
56 |   std::vector<std::string> output_layers_;
57 |   std::unordered_map<std::string, Shape> output_shapes_;
58 |   std::unordered_map<std::string, size_t> output_sizes_;
59 |   std::vector<float> input_mean_;
60 |   std::vector<float> input_std_;
61 |   std::unordered_map<int, std::string> classnames_;
62 |   tf::Allocator* gpu_allocator_;
63 |   std::vector<std::unique_ptr<tf::Tensor> > input_tensors_;
64 |   bool first_input_array_;
65 | 
66 |   // supports for TFShareModel
67 |   friend class TFShareModel;
68 |   size_t num_suffixes_;
69 |   std::unique_ptr<tf::Tensor> slice_beg_tensor_;
70 |   std::unique_ptr<tf::Tensor> slice_end_tensor_;
71 |   void set_slice_tensor(const std::unique_ptr<tf::Tensor>& dst, const std::vector<int32_t> &src);
72 | };
73 | 
74 | } // namespace backend
75 | } // namespace nexus
76 | 
77 | #endif // USE_TENSORFLOW
78 | 
79 | #endif // NEXUS_BACKEND_TENSORFLOW_MODEL_H_
80 | 


--------------------------------------------------------------------------------
/src/nexus/backend/gpu_executor.h:
--------------------------------------------------------------------------------
 1 | #ifndef NEXUS_BACKEND_BASE_GPU_EXECUTOR_H_
 2 | #define NEXUS_BACKEND_BASE_GPU_EXECUTOR_H_
 3 | 
 4 | #ifdef USE_GPU
 5 | 
 6 | #include <atomic>
 7 | #include <memory>
 8 | #include <thread>
 9 | #include <vector>
10 | #include <unordered_map>
11 | 
12 | #include "nexus/backend/model_exec.h"
13 | 
14 | namespace nexus {
15 | namespace backend {
16 | 
17 | class GpuExecutor {
18 |  public:
19 |   GpuExecutor() : duty_cycle_us_(0.) {}
20 | 
21 |   virtual ~GpuExecutor() {}
22 | 
23 |   void SetDutyCycle(double duty_cycle_us) {
24 |     duty_cycle_us_.store(duty_cycle_us);
25 |   }
26 |   
27 |   virtual void Start(int core = -1) = 0;
28 |   virtual void Stop() = 0;
29 |   virtual void AddModel(std::shared_ptr<ModelExecutor> model) = 0;
30 |   virtual void RemoveModel(std::shared_ptr<ModelExecutor> model) = 0;
31 |   virtual double CurrentUtilization() = 0;
32 | 
33 |  protected:
34 |   std::atomic<double> duty_cycle_us_;
35 | };
36 | 
37 | class GpuExecutorMultiBatching : public GpuExecutor {
38 |  public:
39 |   GpuExecutorMultiBatching(int gpu_id);
40 | 
41 |   inline int gpu_id() { return gpu_id_; }
42 | 
43 |   void Start(int core = -1) final;
44 | 
45 |   void Stop() final;
46 | 
47 |   void AddModel(std::shared_ptr<ModelExecutor> model) final;
48 | 
49 |   void RemoveModel(std::shared_ptr<ModelExecutor> model) final;
50 | 
51 |   double CurrentUtilization() final;
52 | 
53 |  private:
54 |   void Run();
55 | 
56 |   int gpu_id_;
57 |   std::atomic_bool running_;
58 |   std::thread thread_;
59 |   std::vector<std::shared_ptr<ModelExecutor> > models_;
60 |   std::vector<std::shared_ptr<ModelExecutor> > backup_models_;
61 |   std::mutex models_mu_;
62 |   double utilization_;
63 |   TimePoint last_check_time_;
64 |   std::mutex util_mu_;
65 | };
66 | 
67 | class GpuExecutorNoMultiBatching : public GpuExecutor {
68 |  public:
69 |   GpuExecutorNoMultiBatching(int gpu_id);
70 | 
71 |   inline int gpu_id() { return gpu_id_; }
72 | 
73 |   void Start(int core = -1);
74 | 
75 |   void Stop();
76 | 
77 |   void AddModel(std::shared_ptr<ModelExecutor> model) final;
78 | 
79 |   void RemoveModel(std::shared_ptr<ModelExecutor> model) final;
80 | 
81 |   double CurrentUtilization() final;
82 | 
83 |  private:
84 |   int gpu_id_;
85 |   int core_;
86 |   std::mutex mu_;
87 |   std::unordered_map<std::string,
88 |                      std::unique_ptr<GpuExecutorMultiBatching> > threads_;
89 | };
90 | 
91 | } // namespace backend
92 | } // namespace nexus
93 | 
94 | #endif // USE_GPU
95 | 
96 | #endif // NEXUS_BACKEND_BASE_GPU_EXECUTOR_H_
97 | 


--------------------------------------------------------------------------------
/Dockerfile:
--------------------------------------------------------------------------------
 1 | FROM nvidia/cuda:10.0-cudnn7-devel-ubuntu18.04 AS builder
 2 | COPY . /nexus
 3 | RUN apt-get update \
 4 |  && apt-get install -y unzip build-essential git autoconf automake libtool pkg-config curl make zlib1g-dev wget \
 5 |                        libswscale-dev libjpeg-dev libpng-dev libsm6 libxext6 libxrender-dev \
 6 |                        python-dev python-pip \
 7 |                        libcurl4-openssl-dev \
 8 |                        software-properties-common \
 9 |  && python -m pip install --upgrade six numpy wheel setuptools mock 'future>=0.17.1' \
10 |  \
11 |  && wget -O - https://apt.kitware.com/keys/kitware-archive-latest.asc 2>/dev/null | apt-key add - \
12 |  && apt-add-repository 'deb https://apt.kitware.com/ubuntu/ bionic main' \
13 |  && apt-get update \
14 |  && apt-get install -y cmake \
15 |  && rm -rf /var/lib/apt/lists/* \
16 |  \
17 |  && /nexus/build-deps.bash \
18 |  && /nexus/build-tensorflow.bash \
19 |  && cd /nexus/build-dep-install/tensorflow/ \
20 |  && rm -rf c cc compiler core stream_executor \
21 |  && rm -rf /nexus/build-dep-src /root/.cache/bazel \
22 |  \
23 |  && mkdir /nexus/build \
24 |  && cd /nexus/build \
25 |  && cmake .. -DCMAKE_BUILD_TYPE=RelWithDebugInfo -DCUDA_PATH=/usr/local/cuda-10.0 -DUSE_TENSORFLOW=ON -DUSE_CAFFE2=OFF \
26 |  && make -j$(nproc) \
27 |  \
28 |  && find /nexus/build-dep-install -type d \( -name "bin" -o -name "include" -o -name "share" \) -exec rm -rf {} + \
29 |  && find /nexus/build-dep-install -type f -name "*.a" -exec rm -f {} + \
30 |  && rm -rf /nexus/build-dep-install/bazel \
31 |  && cd /nexus/build \
32 |  && rm -rf CMakeFiles gen *.a *.txt *.cmake Makefile bench_tfshare test_*
33 | 
34 | 
35 | FROM nvidia/cuda:10.0-cudnn7-devel-ubuntu18.04
36 | LABEL maintainer="Lequn Chen <lqchen@cs.washington.edu>"
37 | COPY --from=builder /nexus /nexus
38 | RUN apt-get update \
39 |  && apt-get install -y libswscale4 libjpeg8 libpng16-16 \
40 |                        software-properties-common wget \
41 |  && add-apt-repository -y ppa:deadsnakes/ppa \
42 |  && apt-get update \
43 |  && apt-get install -y python3.7 python3.7-dev \
44 |  && wget https://bootstrap.pypa.io/get-pip.py -O /tmp/get-pip.py \
45 |  && python3.7 /tmp/get-pip.py \
46 |  && rm /tmp/get-pip.py \
47 |  && python3.7 -m pip install --upgrade numpy protobuf pyyaml Pillow \
48 |  && python3.7 -m pip install --editable /nexus/python \
49 |  && python3.7 -m pip uninstall -y pip \
50 |  && apt-get purge -y python3.7-dev software-properties-common wget \
51 |  && apt-get autoremove -y \
52 |  && rm -rf /var/lib/apt/lists/* /root/.cache/pip
53 | WORKDIR /nexus
54 | 


--------------------------------------------------------------------------------
/src/nexus/common/time_util.h:
--------------------------------------------------------------------------------
 1 | #ifndef NEXUS_COMMON_TIME_UTIL_H_
 2 | #define NEXUS_COMMON_TIME_UTIL_H_
 3 | 
 4 | #include <atomic>
 5 | #include <chrono>
 6 | #include <mutex>
 7 | #include <string>
 8 | #include <thread>
 9 | #include <unordered_map>
10 | #include <unordered_set>
11 | 
12 | namespace nexus {
13 | 
14 | using Clock = std::chrono::high_resolution_clock;
15 | using TimePoint = std::chrono::time_point<Clock>;
16 | 
17 | /*! \brief Timer helps to record time and count duration between two time
18 |   points */
19 | class Timer {
20 |  public:
21 |   /*!
22 |    * \brief Records the time point with tag
23 |    * \param tag Tag of time point
24 |    */
25 |   void Record(const std::string& tag);
26 |   /*!
27 |    * \brief Get the interval between two tags in millisecond
28 |    * \param beg_tag Tag of begining time point
29 |    * \param end_tag Tag of end time point
30 |    * \return Duration in millisecond
31 |    */
32 |   uint64_t GetLatencyMillis(const std::string& beg_tag,
33 |                             const std::string& end_tag);
34 |   /*!
35 |    * \brief Get the interval between two tags in microsecond
36 |    * \param beg_tag Tag of begining time point
37 |    * \param end_tag Tag of end time point
38 |    * \return Duration in microsecond
39 |    */
40 |   uint64_t GetLatencyMicros(const std::string& beg_tag,
41 |                             const std::string& end_tag);
42 | 
43 |  private:
44 |   /*!
45 |    * \brief Get the time point given the tag
46 |    * \param tag Tag of time point
47 |    * \return TimePoint pointer
48 |    */
49 |   TimePoint* GetTimepoint(const std::string& tag);
50 |   /*! \brief Map from tag to time points */
51 |   std::unordered_map<std::string, TimePoint> time_points_;
52 | };
53 | 
54 | class Tickable {
55 |  public:
56 |   Tickable(uint32_t tick_interval_sec);
57 | 
58 |   virtual ~Tickable();
59 | 
60 |   void Tick();
61 | 
62 |  protected:
63 |   virtual void TickImpl() = 0;
64 | 
65 |  protected:
66 |   uint32_t tick_interval_sec_;
67 |   uint32_t sec_since_last_tick_;
68 | };
69 | 
70 | class TimeSystem {
71 |  public:
72 |   static TimeSystem& Singleton();
73 | 
74 |   ~TimeSystem();
75 | 
76 |   void Stop();
77 | 
78 |   bool AddTickable(std::shared_ptr<Tickable> tickable);
79 | 
80 |   bool RemoveTickable(std::shared_ptr<Tickable> tickable);
81 | 
82 |  private:
83 |   TimeSystem();
84 | 
85 |   void Run();
86 | 
87 |   std::unordered_set<std::shared_ptr<Tickable>> tickables_;
88 |   std::mutex mutex_;
89 |   std::atomic_bool running_;
90 |   std::thread thread_;
91 | };
92 | 
93 | } // namespace nexus
94 | 
95 | #endif // NEXUS_COMMON_TIME_UTIL_H_
96 | 


--------------------------------------------------------------------------------
/tools/test_complex_query.cpp:
--------------------------------------------------------------------------------
 1 | #include <gflags/gflags.h>
 2 | #include <glog/logging.h>
 3 | #include <iostream>
 4 | #include "nexus/common/model_db.h"
 5 | #include "nexus/scheduler/complex_query.h"
 6 | 
 7 | using namespace nexus;
 8 | using namespace nexus::scheduler;
 9 | 
10 | DEFINE_int32(avg_interval, 10, "Moving average interval for backend rate");  // for the sch_info.cpp linking error
11 | 
12 | ComplexQuery::NodeID add_node(ComplexQuery &cq, const std::string &gpu,
13 |                               const std::string &framework, const std::string &model_name,
14 |                               int image_width, int image_height) {
15 |   ComplexQuery::NodeID node(framework, model_name);
16 |   auto model_sess_id = framework + ':' + model_name + ":0";
17 |   auto profile_id = framework + ':' + model_name + ":1";
18 |   if (image_height > 0) {
19 |     profile_id += ":" + std::to_string(image_height) + "x" + std::to_string(image_width);
20 |   }
21 |   auto *profile = ModelDatabase::Singleton().GetModelProfile(gpu, "generic", profile_id);
22 |   CHECK(profile != nullptr);
23 |   cq.AddNode(node, model_sess_id, *profile);
24 |   return node;
25 | }
26 | 
27 | void add_node(ComplexQuery &cq, ComplexQuery::NodeID &node, const std::string &profile_id, const std::string &gpu) {
28 |   auto model_sess_id = node.framework + ':' + node.model_name + ":0";
29 |   auto *profile = ModelDatabase::Singleton().GetModelProfile(gpu, "generic", profile_id);
30 |   cq.AddNode(node, model_sess_id, *profile);
31 | }
32 | 
33 | int main(int argc, char** argv) {
34 |   FLAGS_logtostderr = 1;
35 |   google::InitGoogleLogging(argv[0]);
36 |   google::ParseCommandLineFlags(&argc, &argv, true);
37 |   google::InstallFailureSignalHandler();
38 | 
39 |   const int SLO_MS = 400;
40 |   const int SEGMENTS = 500;
41 |   const std::string gpu = "GeForce_GTX_1080_Ti";
42 | 
43 |   ComplexQuery cq("cq_id", SLO_MS * 1000, SEGMENTS);
44 |   auto node_ssd = add_node(cq, gpu, "tensorflow", "ssd_mobilenet", 300, 300);
45 |   auto node_inception = add_node(cq, gpu, "tensorflow", "inception_0", 0, 0);
46 |   auto node_vgg = add_node(cq, gpu, "tensorflow", "vgg16_0", 0, 0);
47 |   cq.AddChild(node_ssd, node_inception);
48 |   cq.AddChild(node_ssd, node_vgg);
49 |   cq.Finalize();
50 | 
51 |   cq.SetRequestRate(node_ssd, 200);
52 |   cq.SetRequestRate(node_inception, 50);
53 |   cq.SetRequestRate(node_vgg, 100);
54 |   cq.DynamicProgramming();
55 |   std::cout << "minimal number of GPUs: " << cq.GetMinimalGPUs() << std::endl;
56 |   auto split = cq.GetSLOms();
57 |   for (auto &node : split)
58 |     std::cout << "  " <<  node.first.ToString() << ": " << node.second << "ms" << std::endl;
59 | }
60 | 


--------------------------------------------------------------------------------
/src/nexus/common/connection.h:
--------------------------------------------------------------------------------
 1 | #ifndef NEXUS_COMMON_CONNECTION_H_
 2 | #define NEXUS_COMMON_CONNECTION_H_
 3 | 
 4 | #include <boost/asio.hpp>
 5 | #include <deque>
 6 | #include <memory>
 7 | #include <mutex>
 8 | 
 9 | #include "nexus/common/message.h"
10 | 
11 | namespace nexus {
12 | 
13 | class Connection; // forward declare
14 | 
15 | class MessageHandler {
16 |  public:
17 |   /*!
18 |    * \brief Handles a new message
19 |    * \param conn Connection that receives the message
20 |    * \param message Received message
21 |    */
22 |   virtual void HandleMessage(std::shared_ptr<Connection> conn,
23 |                              std::shared_ptr<Message> message) = 0;
24 |   /*!
25 |    * \brief Handles error in connection
26 |    * \param conn Connection that encounters an error
27 |    * \param ec Boost error code
28 |    */
29 |   virtual void HandleError(std::shared_ptr<Connection> conn,
30 |                            boost::system::error_code ec) = 0;
31 | };
32 | 
33 | class Connection : public std::enable_shared_from_this<Connection> {
34 |  public:
35 |   // disable copy
36 |   Connection(const Connection&) = delete;
37 |   Connection& operator=(const Connection&) = delete;
38 |   // constructor
39 |   explicit Connection(boost::asio::ip::tcp::socket socket,
40 |                       MessageHandler* handler);
41 |   /*! \brief starts processing packets received from socket */
42 |   virtual void Start();
43 |   /*! \brief stops the socket */
44 |   virtual void Stop();
45 |   /*!
46 |    * \brief sends a message through socket
47 |    * \param msg Shared pointer of message, yield the ownership to the function
48 |    */
49 |   virtual void Write(std::shared_ptr<Message> msg);
50 | 
51 |  protected:
52 |   Connection(boost::asio::io_context& io_context, MessageHandler* handler);
53 |   /*! \brief reads the header from the connection */
54 |   void DoReadHeader();
55 |   /*! \brief reads the body of message and invoke the handler */
56 |   void DoReadBody(std::shared_ptr<Message> msg);
57 |   /*! \brief sends the message to the peer */
58 |   void DoWrite();
59 | 
60 |  protected:
61 |   /*! \brief Socket */
62 |   boost::asio::ip::tcp::socket socket_;
63 |   std::mutex socket_mutex_;
64 |   /*! \brief Message handler */
65 |   MessageHandler* handler_;
66 |   /*! \brief Wrong header indicator */
67 |   bool wrong_header_;
68 |   /*! \brief Receiving message */
69 |   //std::shared_ptr<Message> recv_message_;
70 |   char msg_header_buffer_[MESSAGE_HEADER_SIZE];
71 |   /*! \brief Queue for outbound messages */
72 |   std::deque<std::shared_ptr<Message> > write_queue_;
73 |   /*! \brief Mutex for write_queue_ */
74 |   std::mutex write_queue_mutex_;
75 | };
76 | 
77 | } // namespace nexus
78 | 
79 | #endif // NEXUS_COMMON_CONNECTION_H_
80 | 


--------------------------------------------------------------------------------
/examples/README.md:
--------------------------------------------------------------------------------
 1 | # Running Nexus with the Simple Example
 2 | 
 3 | We have prepared a simple example application to walk you through how to run
 4 | Nexus in concrete steps. We provide a Docker image so that you don't have to
 5 | spend hours on building the dependencies. To download the Docker image, you
 6 | can run:
 7 | 
 8 | ```bash
 9 | docker pull abcdabcd987/nexus
10 | ```
11 | 
12 | If you want to run Nexus on the host OS, make sure you have followed the
13 | [building instructions](../BUILDING.md) and have Nexus and its dependencies
14 | built. The commands in the following sections assumes Docker, but to run it
15 | on the host, you can simply drop lines containing `docker`, omit the command
16 | line arguments that specifies server address, and replace `/nexus` with the
17 | path to your Nexus build.
18 | 
19 | ## Download Model Zoo
20 | 
21 | ```bash
22 | git clone https://gitlab.cs.washington.edu/syslab/nexus-models
23 | cd nexus-models
24 | export MODEL_DIR=$(pwd)
25 | git lfs checkout
26 | ```
27 | 
28 | ## Profile ResNet-50 on GPU 0
29 | 
30 | ```bash
31 | docker run -it --rm --gpus all -v $MODEL_DIR:$MODEL_DIR abcdabcd987/nexus \
32 |     python3.7 /nexus/tools/profiler/profiler.py --gpu_list=0 --gpu_uuid --model_root=$MODEL_DIR
33 |         --framework=tensorflow --model=resnet_0 --width=224 --height=224
34 | ```
35 | 
36 | ## Run Nexus Scheduler and Backend, and Application Frontend
37 | 
38 | ```bash
39 | docker network create nexus-net
40 | 
41 | docker run -it --rm --gpus all --network=nexus-net -v=$MODEL_DIR:$MODEL_DIR --name=nexus-scheduler -p=10001 abcdabcd987/nexus \
42 |     /nexus/build/scheduler  -model_root=$MODEL_DIR -alsologtostderr -colorlogtostderr -v 1
43 | 
44 | docker run -it --rm --gpus all --network=nexus-net -v=$MODEL_DIR:$MODEL_DIR --name=nexus-gpu0 -p=8001 -p=8002 abcdabcd987/nexus \
45 |     /nexus/build/backend -model_root=$MODEL_DIR -gpu=0 -alsologtostderr -colorlogtostderr \
46 |                          -sch_addr=nexus-scheduler:10001
47 | 
48 | docker run -it --rm --gpus all --network=nexus-net --name=nexus-simple-frontend -p=9001 -p=9002 abcdabcd987/nexus \
49 |     /nexus/build/simple -framework=tensorflow -model=resnet_0 -latency=50 -width=224 -height=224 -alsologtostderr -colorlogtostderr \
50 |                         -sch_addr=nexus-scheduler:10001
51 | ```
52 | 
53 | ## Send a Client Request
54 | 
55 | ```bash
56 | curl https://upload.wikimedia.org/wikipedia/commons/4/4c/Chihuahua1_bvdb.jpg | docker run --rm -i --network=nexus-net abcdabcd987/nexus \
57 |     python3.7 /nexus /examples/simple_app/src/client.py - --server=nexus-simple-frontend:9001
58 | ```
59 | 
60 | The [image](https://upload.wikimedia.org/wikipedia/commons/4/4c/Chihuahua1_bvdb.jpg)
61 | should be classified as a *chihuahua*.
62 | 


--------------------------------------------------------------------------------
/src/nexus/common/device.cpp:
--------------------------------------------------------------------------------
 1 | #include "nexus/common/device.h"
 2 | #include <glog/logging.h>
 3 | 
 4 | namespace nexus {
 5 | 
 6 | #ifdef USE_GPU
 7 | 
 8 | DEFINE_bool(generic_profile, false, "Use the generic profile for all GPUs of the same model instead of using profiles for each GPU card. (Applicable to Backend only)");
 9 | 
10 | GPUDevice::GPUDevice(int gpu_id) :
11 |         Device(kGPU), gpu_id_(gpu_id) {
12 |     std::stringstream ss;
13 |     ss << "gpu:" << gpu_id;
14 |     name_ = ss.str();
15 |     cudaDeviceProp prop;
16 |     NEXUS_CUDA_CHECK(cudaSetDevice(gpu_id_));
17 |     NEXUS_CUDA_CHECK(cudaGetDeviceProperties(&prop, gpu_id_));
18 |     device_name_.assign(prop.name, strlen(prop.name));
19 |     std::replace(device_name_.begin(), device_name_.end(), ' ', '_');
20 |     total_memory_ = prop.totalGlobalMem;
21 | 
22 |     if (FLAGS_generic_profile) {
23 |       uuid_ = "generic";
24 |     } else {
25 |       auto *u = reinterpret_cast<unsigned char *>(&prop.uuid);
26 |       char uuid_hex[37] = {};
27 |       sprintf(uuid_hex,
28 |               "%02x%02x%02x%02x-%02x%02x-%02x%02x-%02x%02x-%02x%02x%02x%02x%02x%02x",
29 |               u[0], u[1], u[2], u[3],
30 |               u[4], u[5],
31 |               u[6], u[7],
32 |               u[8], u[9],
33 |               u[10], u[11], u[12], u[13], u[14], u[15]);
34 |       uuid_ = uuid_hex;
35 |     }
36 | 
37 |     LOG(INFO) << "GPU " << gpu_id << " " << device_name_
38 |               << "(" << uuid_ << ")"
39 |               << ": total memory " << total_memory_ / 1024. / 1024. / 1024. << "GB";
40 | }
41 | 
42 | void *GPUDevice::Allocate(size_t nbytes) {
43 |     void* buf;
44 |     NEXUS_CUDA_CHECK(cudaSetDevice(gpu_id_));
45 |     cudaError_t err = cudaMalloc(&buf, nbytes);
46 |     if (err != cudaSuccess) {
47 |         throw cudaGetErrorString(err);
48 |     }
49 |     return buf;
50 | }
51 | 
52 | size_t GPUDevice::FreeMemory() const {
53 |     size_t free_mem, total_mem;
54 |     NEXUS_CUDA_CHECK(cudaSetDevice(gpu_id_));
55 |     NEXUS_CUDA_CHECK(cudaMemGetInfo(&free_mem, &total_mem));
56 |     return free_mem;
57 | }
58 | 
59 | void GPUDevice::Free(void *buf) {
60 |     NEXUS_CUDA_CHECK(cudaFree(buf));
61 | }
62 | 
63 | 
64 | GPUDevice *DeviceManager::GetGPUDevice(int gpu_id) const {
65 |     CHECK_LT(gpu_id, gpu_devices_.size()) << "GPU id " << gpu_id <<
66 |                                           " exceeds number of GPU devices (" << gpu_devices_.size() << ")";
67 |     return gpu_devices_[gpu_id];
68 | }
69 | #endif
70 | 
71 | DeviceManager::DeviceManager() {
72 |     cpu_device_ = new CPUDevice();
73 |     int gpu_count;
74 | #ifdef USE_GPU
75 |     NEXUS_CUDA_CHECK(cudaGetDeviceCount(&gpu_count));
76 | #endif
77 |     for (int i = 0; i < gpu_count; ++i) {
78 |         gpu_devices_.push_back(new GPUDevice(i));
79 |     }
80 | }
81 | }
82 | 


--------------------------------------------------------------------------------
/src/nexus/backend/caffe2_model.h:
--------------------------------------------------------------------------------
 1 | #ifndef NEXUS_BACKEND_CAFFE2_MODEL_H_
 2 | #define NEXUS_BACKEND_CAFFE2_MODEL_H_
 3 | 
 4 | #ifdef USE_CAFFE2
 5 | 
 6 | #include "nexus/backend/model_ins.h"
 7 | // Caffe2 headers
 8 | #include "caffe2/core/context_gpu.h"
 9 | #include "caffe2/core/predictor.h"
10 | 
11 | namespace nexus {
12 | namespace backend {
13 | 
14 | class Caffe2Model : public ModelInstance {
15 |  public:
16 |   Caffe2Model(int gpu_id, const ModelInstanceConfig& config);
17 | 
18 |   Shape InputShape() final;
19 | 
20 |   std::unordered_map<std::string, Shape> OutputShapes() final;
21 | 
22 |   ArrayPtr CreateInputGpuArray() final;
23 | 
24 |   ArrayPtr CreateInputGpuArrayWithRawPointer(float* ptr, size_t nfloats) final;
25 | 
26 |   void RemoveInputGpuArray(ArrayPtr arr) final;
27 | 
28 |   std::unordered_map<std::string, ArrayPtr> GetOutputGpuArrays() final;
29 | 
30 |   void Preprocess(std::shared_ptr<Task> task) final;
31 | 
32 |   void Forward(std::shared_ptr<BatchTask> batch_task) final;
33 | 
34 |   void ForwardAsync(std::shared_ptr<BatchTask> batch_task) final;
35 | 
36 |   void WaitOutput(std::shared_ptr<BatchTask> batch_task) final;
37 | 
38 |   void Postprocess(std::shared_ptr<Task> task) final;
39 | 
40 |  private:
41 |   void LoadModel(const std::string& init_path, const std::string& predict_path,
42 |                  const ModelInstanceConfig& config, caffe2::NetDef* init_net,
43 |                  caffe2::NetDef* predict_net);
44 | 
45 |   std::pair<uint32_t, caffe2::Blob*> NewInputBlob();
46 | 
47 |   std::pair<uint32_t, caffe2::Blob*> NewInputBlob(float* ptr, size_t nfloats);
48 | 
49 |   std::unique_ptr<caffe2::CUDAContext> gpu_ctx_;
50 |   std::string net_name_;
51 |   std::unique_ptr<caffe2::Workspace> workspace_;
52 |   caffe2::NetBase* net_;
53 |   std::string input_blob_name_;
54 |   std::string output_blob_name_;
55 |   // image size
56 |   int image_height_;
57 |   int image_width_;
58 |   // input shape of neural network
59 |   Shape input_shape_;
60 |   // output shape of neural network
61 |   Shape output_shape_;
62 |   // size of input in a single input
63 |   size_t input_size_;
64 |   // size of output in a single batch
65 |   size_t output_size_;
66 |   // Input tensor
67 |   std::unordered_map<uint32_t,
68 |                      std::pair<std::string, caffe2::Blob*> > input_blobs_;
69 |   bool first_input_array_;
70 |   // Output tensor
71 |   caffe2::TensorCUDA* output_tensor_;
72 | 
73 |   std::unordered_map<int, std::string> classnames_;
74 |   bool has_mean_file_;
75 |   std::vector<float> mean_value_;
76 |   std::vector<float> mean_blob_;
77 |   float scale_;
78 |   
79 |   // transformer for input
80 |   //std::unique_ptr<caffe::DataTransformer<float> > transformer_;
81 | };
82 | 
83 | } // namespace backend
84 | } // namespace nexus
85 | 
86 | #endif // USE_CAFFE2
87 | 
88 | #endif // NEXUS_BACKEND_CAFFE2_MODEL_H_
89 | 


--------------------------------------------------------------------------------
/src/nexus/scheduler/frontend_delegate.cpp:
--------------------------------------------------------------------------------
 1 | #include <sstream>
 2 | 
 3 | #include "nexus/scheduler/frontend_delegate.h"
 4 | #include "nexus/scheduler/scheduler.h"
 5 | 
 6 | namespace nexus {
 7 | namespace scheduler {
 8 | 
 9 | FrontendDelegate::FrontendDelegate(uint32_t node_id, const std::string& ip,
10 |                                    const std::string& server_port,
11 |                                    const std::string& rpc_port,
12 |                                    int beacon_sec):
13 |     node_id_(node_id),
14 |     ip_(ip),
15 |     server_port_(server_port),
16 |     rpc_port_(rpc_port),
17 |     beacon_sec_(beacon_sec),
18 |     timeout_ms_(beacon_sec * 3 * 1000) {
19 |   std::stringstream rpc_addr;
20 |   rpc_addr << ip_ << ":" << rpc_port_;
21 |   auto channel = grpc::CreateChannel(rpc_addr.str(),
22 |                                      grpc::InsecureChannelCredentials());
23 |   stub_ = FrontendCtrl::NewStub(channel);
24 |   last_time_ = std::chrono::system_clock::now();
25 | }
26 | 
27 | std::time_t FrontendDelegate::LastAliveTime() {
28 |   return std::chrono::system_clock::to_time_t(last_time_);
29 | }
30 | 
31 | void FrontendDelegate::Tick() {
32 |   last_time_ = std::chrono::system_clock::now();
33 | }
34 | 
35 | bool FrontendDelegate::IsAlive() {
36 |   long elapse = std::chrono::duration_cast<std::chrono::milliseconds>(
37 |       std::chrono::system_clock::now() - last_time_).count();
38 |   if (elapse < timeout_ms_) {
39 |     return true;
40 |   }
41 |   CheckAliveRequest request;
42 |   request.set_node_type(FRONTEND_NODE);
43 |   request.set_node_id(node_id_);
44 |   RpcReply reply;
45 | 
46 |   // Inovke RPC CheckAlive
47 |   grpc::ClientContext context;
48 |   grpc::Status status = stub_->CheckAlive(&context, request, &reply);
49 |   if (!status.ok()) {
50 |     LOG(ERROR) << status.error_code() << ": " << status.error_message();
51 |     return false;
52 |   }
53 |   last_time_ = std::chrono::system_clock::now();
54 |   return true;
55 | }
56 | 
57 | void FrontendDelegate::SubscribeModel(const std::string& model_session_id) {
58 |   subscribe_models_.insert(model_session_id);
59 | }
60 | 
61 | CtrlStatus FrontendDelegate::UpdateModelRoutesRpc(
62 |     const ModelRouteUpdates& request) {
63 |   RpcReply reply;
64 |   // Inovke RPC CheckAlive
65 |   grpc::ClientContext context;
66 |   grpc::Status status = stub_->UpdateModelRoutes(&context, request, &reply);
67 |   if (!status.ok()) {
68 |     LOG(ERROR) << status.error_code() << ": " << status.error_message();
69 |     return CTRL_SERVER_UNREACHABLE;
70 |   }
71 |   last_time_ = std::chrono::system_clock::now();
72 |   if (reply.status() != CTRL_OK) {
73 |     LOG(ERROR) << "Frontend " << node_id_ << " UpdateModelRoutes error: " <<
74 |         CtrlStatus_Name(reply.status());
75 |   }
76 |   return reply.status();
77 | }
78 | 
79 | } // namespace scheduler
80 | } // namespace nexus
81 | 


--------------------------------------------------------------------------------
/BUILDING.md:
--------------------------------------------------------------------------------
 1 | # Building Nexus on Ubuntu 18.04
 2 | 
 3 | ## Install system-wide packages
 4 | 
 5 | ```bash
 6 | # Build system and utilities
 7 | sudo apt-get install -y unzip build-essential git autoconf automake libtool pkg-config curl make zlib1g-dev wget
 8 | 
 9 | # For OpenCV
10 | sudo apt-get install -y libswscale-dev libjpeg-dev libpng-dev
11 | 
12 | # Python 2.7 for building Tensorflow
13 | sudo apt-get install -y python-dev python-pip
14 | pip install --upgrade --user pip six numpy wheel setuptools mock 'future>=0.17.1'
15 | 
16 | # Python 3.7 for Nexus
17 | sudo apt-get install -y software-properties-common
18 | sudo add-apt-repository -y ppa:deadsnakes/ppa
19 | sudo apt-get update
20 | sudo apt-get install -y python3.7 python3.7-dev
21 | curl https://bootstrap.pypa.io/get-pip.py | python3.7
22 | python3.7 -m pip install --upgrade --user numpy protobuf Pillow pyyaml
23 | 
24 | # CMake > 3.12
25 | # See https://apt.kitware.com/ for more details.
26 | wget -O - https://apt.kitware.com/keys/kitware-archive-latest.asc 2>/dev/null | sudo apt-key add -
27 | sudo apt-add-repository 'deb https://apt.kitware.com/ubuntu/ bionic main'
28 | sudo apt-get update
29 | sudo apt-get install -y cmake
30 | ```
31 | 
32 | ## Install NVIDIA driver
33 | 
34 | ```bash
35 | sudo apt-get install -y software-properties-common
36 | sudo add-apt-repository -y ppa:graphics-drivers/ppa
37 | sudo apt-get update
38 | sudo apt-get install -y nvidia-headless-440
39 | ```
40 | 
41 | ## Install CUDA 10.0
42 | 
43 | ```bash
44 | wget -n -O cuda_10.0.130_410.48_linux.run https://developer.nvidia.com/compute/cuda/10.0/Prod/local_installers/cuda_10.0.130_410.48_linux
45 | sudo sh cuda_10.0.130_410.48_linux.run -silent -toolkit
46 | sudo unlink /usr/local/cuda
47 | ```
48 | 
49 | ## Install cuDNN 7.6.5
50 | 
51 | Download cuDNN 7.6.5 for CUDA 10.0 from [NVIDIA](https://developer.nvidia.com/rdp/cudnn-download)
52 | 
53 | ```bash
54 | tar xf cudnn-10.0-linux-x64-v7.6.5.32.tgz
55 | sudo mv cuda/include/cudnn.h /usr/local/cuda-10.0/include
56 | sudo mv cuda/lib64/libcudnn* /usr/local/cuda-10.0/lib64
57 | sudo chmod a+r /usr/local/cuda-10.0/include/cudnn.h /usr/local/cuda-10.0/lib64/libcudnn*
58 | sudo ldconfig
59 | ```
60 | 
61 | ## Clone Nexus
62 | 
63 | ```bash
64 | git clone https://github.com/uwsampl/nexus.git
65 | cd nexus
66 | ```
67 | 
68 | ## Build Nexus dependencies
69 | 
70 | ```bash
71 | ./build-deps.bash
72 | ./build-tensorflow.bash
73 | ```
74 | 
75 | By default, the script will build TensorFlow with the following
76 | [CUDA compute capabilities](https://en.wikipedia.org/wiki/CUDA#GPUs_supported):
77 | `5.2, 6.1, 7.5`. If you want to change any build options
78 | for TensorFlow, set the environment variables specified in
79 | [`./build-tensorflow.bash`](build-tensorflow.bash)
80 | 
81 | ## Build Nexus
82 | 
83 | ```bash
84 | mkdir build
85 | cd build
86 | cmake .. -DCMAKE_BUILD_TYPE=RelWithDebugInfo -DCUDA_PATH=/usr/local/cuda-10.0 -DUSE_TENSORFLOW=ON -DUSE_CAFFE2=OFF
87 | make -j$(nproc)
88 | python3.7 -m pip install --user --editable ./python
89 | ```
90 | 


--------------------------------------------------------------------------------
/src/nexus/common/model_def.h:
--------------------------------------------------------------------------------
 1 | #ifndef NEXUS_COMMON_MODEL_DEF_H_
 2 | #define NEXUS_COMMON_MODEL_DEF_H_
 3 | 
 4 | #include <ostream>
 5 | #include <sstream>
 6 | #include <unordered_map>
 7 | #include <utility>
 8 | 
 9 | #include "nexus/common/util.h"
10 | #include "nexus/proto/nnquery.pb.h"
11 | 
12 | namespace nexus {
13 | 
14 | inline std::string ModelID(const std::string& framework,
15 |                            const std::string& model_name,
16 |                            uint32_t version) {
17 |   std::stringstream ss;
18 |   ss << framework << ":" << model_name << ":" << version;
19 |   return ss.str();
20 | }
21 | 
22 | inline void ParseModelID(const std::string model_id,
23 |                          ModelSession* model_session) {
24 |   std::vector<std::string> tokens;
25 |   SplitString(model_id, ':', &tokens);
26 |   model_session->set_framework(tokens[0]);
27 |   model_session->set_model_name(tokens[1]);
28 |   model_session->set_version(std::stoi(tokens[2]));
29 | }
30 | 
31 | inline std::string ModelSessionToModelID(const ModelSession& model_session) {
32 |   std::stringstream ss;
33 |   ss << model_session.framework() << ":" << model_session.model_name() << ":"
34 |      << model_session.version();
35 |   return ss.str();
36 | }
37 | 
38 | inline std::string ModelSessionToProfileID(const ModelSession& model_session) {
39 |   std::stringstream ss;
40 |   ss << model_session.framework() << ":" << model_session.model_name() << ":"
41 |      << model_session.version();
42 |   if (model_session.image_height() > 0) {
43 |     ss << ":" << model_session.image_height() << "x" <<
44 |         model_session.image_width();
45 |   }
46 |   return ss.str();
47 | }
48 | 
49 | inline std::string ModelSessionToString(const ModelSession& model_session) {
50 |   std::stringstream ss;
51 |   ss << model_session.framework() << ":" <<
52 |       model_session.model_name() << ":" << model_session.version();
53 |   if (model_session.image_height() > 0) {
54 |     ss << ":" << model_session.image_height() << "x" <<
55 |         model_session.image_width();
56 |   }
57 |   ss << ":" << model_session.latency_sla();
58 |   return ss.str();
59 | }
60 | 
61 | inline bool ParseModelSession(const std::string& str, ModelSession* sess) {
62 |   std::vector<std::string> tokens;
63 |   SplitString(str, ':', &tokens);
64 |   if (tokens.size() < 4) {
65 |     return false;
66 |   }
67 |   sess->set_framework(tokens[0]);
68 |   sess->set_model_name(tokens[1]);
69 |   sess->set_version(std::stoi(tokens[2]));
70 |   if (tokens.size() == 4) {
71 |     sess->set_latency_sla(std::stoi(tokens[3]));
72 |   } else {
73 |     sess->set_latency_sla(std::stoi(tokens[4]));
74 |     // decode image size
75 |     std::vector<std::string> image_dims;
76 |     SplitString(tokens[3], 'x', &image_dims);
77 |     if (image_dims.size() != 2) {
78 |       return false;
79 |     }
80 |     sess->set_image_height(std::stoi(image_dims[0]));
81 |     sess->set_image_width(std::stoi(image_dims[1]));
82 |   }
83 |   return true;
84 | }
85 | 
86 | } // namespace nexus
87 | 
88 | #endif // NEXUS_COMMON_MODEL_DEF_H_
89 | 


--------------------------------------------------------------------------------
/src/nexus/backend/backend_main.cpp:
--------------------------------------------------------------------------------
 1 | #include <signal.h>
 2 | #include <unistd.h>
 3 | 
 4 | #include <cstdlib>
 5 | #include <iostream>
 6 | #include <string>
 7 | #include <vector>
 8 | 
 9 | #include <gflags/gflags.h>
10 | #include <glog/logging.h>
11 | 
12 | #include "nexus/backend/backend_server.h"
13 | #include "nexus/common/config.h"
14 | #include "nexus/common/image.h"
15 | #include "nexus/common/util.h"
16 | #include "nexus/proto/nnquery.pb.h"
17 | 
18 | using namespace nexus;
19 | using namespace nexus::backend;
20 | 
21 | DEFINE_string(port, std::to_string(BACKEND_DEFAULT_PORT), "server port");
22 | DEFINE_string(rpc_port, std::to_string(BACKEND_DEFAULT_RPC_PORT), "RPC port");
23 | DEFINE_string(sch_addr, "127.0.0.1",
24 |               "scheduler IP address "
25 |               "(use default port 10001 if no port specified)");
26 | DEFINE_int32(gpu, 0, "gpu device ID (default: 0)");
27 | DEFINE_uint64(num_workers, 0, "number of workers (default: 0)");
28 | DEFINE_string(cores, "", "Specify cores to use, e.g., \"0-4\", or \"0-3,5\"");
29 | 
30 | std::vector<int> ParseCores(std::string s) {
31 |   std::vector<int> cores;
32 |   std::vector<std::string> segs;
33 |   SplitString(s, ',', &segs);
34 |   for (auto seg : segs) {
35 |     if (seg.find('-') == std::string::npos) {
36 |       cores.push_back(std::stoi(seg));
37 |     } else {
38 |       std::vector<std::string> range;
39 |       SplitString(seg, '-', &range);
40 |       CHECK_EQ(range.size(), 2) << "Wrong format of cores";
41 |       int beg = std::stoi(range[0]);
42 |       int end = std::stoi(range[1]);
43 |       for (int i = beg; i <= end; ++i) {
44 |         cores.push_back(i);
45 |       }
46 |     }
47 |   }
48 |   return cores;
49 | }
50 | 
51 | BackendServer *server_ptr;
52 | 
53 | void sigint_handler(int _sig) {
54 |   if (server_ptr) {
55 |     server_ptr->Stop();
56 |   }
57 |   std::exit(0);
58 | }
59 | 
60 | int main(int argc, char **argv) {
61 |   struct sigaction sig_handle;
62 |   sig_handle.sa_handler = sigint_handler;
63 |   sigemptyset(&sig_handle.sa_mask);
64 |   sig_handle.sa_flags = 0;
65 |   sigaction(SIGINT, &sig_handle, NULL);
66 | 
67 |   // Init glog
68 |   google::InitGoogleLogging(argv[0]);
69 |   // Parse command line flags
70 |   google::ParseCommandLineFlags(&argc, &argv, true);
71 |   // Setup backtrace on segfault
72 |   google::InstallFailureSignalHandler();
73 |   // Decide server IP address
74 |   LOG(INFO) << "Backend server: port " << FLAGS_port << ", rpc port "
75 |             << FLAGS_rpc_port << ", workers " << FLAGS_num_workers << ", gpu "
76 |             << FLAGS_gpu;
77 |   // Initialize _Hack_Images
78 |   {
79 |     ImageProto image;
80 |     image.set_hack_filename("__init_Hack_Images");
81 |     (void)_Hack_DecodeImageByFilename(image, ChannelOrder::CO_BGR);
82 |   }
83 |   // Create the backend server
84 |   std::vector<int> cores = ParseCores(FLAGS_cores);
85 |   BackendServer server(FLAGS_port, FLAGS_rpc_port, FLAGS_sch_addr, FLAGS_gpu,
86 |                        FLAGS_num_workers, cores);
87 |   server_ptr = &server;
88 |   server.Run();
89 |   return 0;
90 | }
91 | 


--------------------------------------------------------------------------------
/src/nexus/common/time_util.cpp:
--------------------------------------------------------------------------------
  1 | #include "nexus/common/time_util.h"
  2 | 
  3 | namespace nexus {
  4 | 
  5 | void Timer::Record(const std::string& tag) {
  6 |   time_points_.emplace(tag, Clock::now());
  7 | }
  8 | 
  9 | uint64_t Timer::GetLatencyMillis(const std::string& beg_tag,
 10 |                                  const std::string& end_tag) {
 11 |   auto beg = GetTimepoint(beg_tag);
 12 |   auto end = GetTimepoint(end_tag);
 13 |   if (beg == nullptr || end == nullptr) {
 14 |     return 0;
 15 |   }
 16 |   auto d = std::chrono::duration_cast<std::chrono::milliseconds>(*end - *beg);
 17 |   return d.count();
 18 | }
 19 | 
 20 | uint64_t Timer::GetLatencyMicros(const std::string& beg_tag,
 21 |                                  const std::string& end_tag) {
 22 |   auto beg = GetTimepoint(beg_tag);
 23 |   auto end = GetTimepoint(end_tag);
 24 |   if (beg == nullptr || end == nullptr) {
 25 |     return 0;
 26 |   }
 27 |   auto d = std::chrono::duration_cast<std::chrono::microseconds>(*end - *beg);
 28 |   return d.count();
 29 | }
 30 | 
 31 | TimePoint* Timer::GetTimepoint(const std::string& tag) {
 32 |   auto itr = time_points_.find(tag);
 33 |   if (itr == time_points_.end()) {
 34 |     return nullptr;
 35 |   }
 36 |   return &itr->second;
 37 | }
 38 | 
 39 | Tickable::Tickable(uint32_t tick_interval_sec) :
 40 |       tick_interval_sec_(tick_interval_sec),
 41 |       sec_since_last_tick_(0) {
 42 | }
 43 | 
 44 | Tickable::~Tickable() {
 45 | }
 46 | 
 47 | void Tickable::Tick() {
 48 |   ++sec_since_last_tick_;
 49 |   if (sec_since_last_tick_ == tick_interval_sec_) {
 50 |     TickImpl();
 51 |     sec_since_last_tick_ = 0;
 52 |   }
 53 | }
 54 | 
 55 | TimeSystem& TimeSystem::Singleton() {
 56 |   static TimeSystem time_system_;
 57 |   return time_system_;
 58 | }
 59 | 
 60 | TimeSystem::TimeSystem() :
 61 |     running_(true) {
 62 |   thread_ = std::thread(&TimeSystem::Run, this);
 63 | }
 64 | 
 65 | TimeSystem::~TimeSystem() {
 66 |   running_ = false;
 67 |   thread_.join();
 68 | }
 69 | 
 70 | void TimeSystem::Stop() {
 71 |   running_ = false;
 72 |   thread_.join();
 73 | }
 74 | 
 75 | bool TimeSystem::AddTickable(std::shared_ptr<Tickable> tickable) {
 76 |   std::lock_guard<std::mutex> lock(mutex_);
 77 |   if (tickables_.find(tickable) != tickables_.end()) {
 78 |     return false;
 79 |   }
 80 |   tickables_.insert(tickable);
 81 |   return true;
 82 | }
 83 | 
 84 | bool TimeSystem::RemoveTickable(std::shared_ptr<Tickable> tickable) {
 85 |   std::lock_guard<std::mutex> lock(mutex_);
 86 |   auto iter = tickables_.find(tickable);
 87 |   if (iter == tickables_.end()) {
 88 |     return false;
 89 |   }
 90 |   tickables_.erase(iter);
 91 |   return true;
 92 | }
 93 | 
 94 | void TimeSystem::Run() {
 95 |   while (running_) {
 96 |     auto next_time = Clock::now() + std::chrono::seconds(1);
 97 |     {
 98 |       std::lock_guard<std::mutex> lock(mutex_);
 99 |       for (auto item : tickables_) {
100 |         item->Tick();
101 |       }
102 |     }
103 |     std::this_thread::sleep_until(next_time);
104 |   }
105 | }
106 | 
107 | } // namespace nexus
108 | 


--------------------------------------------------------------------------------
/src/nexus/common/device.h:
--------------------------------------------------------------------------------
  1 | #ifndef NEXUS_COMMON_DEVICE_H_
  2 | #define NEXUS_COMMON_DEVICE_H_
  3 | 
  4 | #include <algorithm>
  5 | #include <sstream>
  6 | #include <string>
  7 | #include <vector>
  8 | 
  9 | #ifdef USE_GPU
 10 | #include <cuda_runtime.h>
 11 | #endif
 12 | 
 13 | namespace nexus {
 14 | 
 15 | enum DeviceType {
 16 |   kCPU = 0,
 17 |   kGPU = 1,
 18 | };
 19 | 
 20 | class DeviceManager; // forward declare
 21 | 
 22 | class Device {
 23 |  public:
 24 |   virtual void* Allocate(size_t nbytes) = 0;
 25 | 
 26 |   virtual void Free(void* buf) = 0;
 27 | 
 28 |   virtual std::string name() const = 0;
 29 |   
 30 |   DeviceType type() const { return type_; }
 31 | 
 32 |   bool operator==(const Device& other) const {
 33 |     return name() == other.name();
 34 |   }
 35 | 
 36 |  protected:
 37 |   Device(DeviceType type) : type_(type) {}
 38 |   // disable copy
 39 |   Device(const Device&) = delete;
 40 |   Device& operator=(const Device&) = delete;
 41 | 
 42 |  private:
 43 |   DeviceType type_;
 44 | };
 45 | 
 46 | class CPUDevice : public Device {
 47 |  public:
 48 |   void* Allocate(size_t nbytes) final {
 49 |     void* buf = malloc(nbytes);
 50 |     return buf;
 51 |   }
 52 | 
 53 |   void Free(void* buf) final {
 54 |     free(buf);
 55 |   }
 56 | 
 57 |   std::string name() const final { return "cpu"; }
 58 | 
 59 |  private:
 60 |   CPUDevice() : Device(kCPU) {}
 61 |   friend class DeviceManager;
 62 | };
 63 | 
 64 | #ifdef USE_GPU
 65 | 
 66 | #define NEXUS_CUDA_CHECK(condition)                              \
 67 |   do {                                                          \
 68 |     cudaError_t err = (condition);                              \
 69 |     CHECK_EQ(err, cudaSuccess) << cudaGetErrorString(err);      \
 70 |   } while (0)
 71 | 
 72 | class GPUDevice : public Device {
 73 |  public:
 74 |   int gpu_id() const { return gpu_id_; }
 75 | 
 76 |   void* Allocate(size_t nbytes) final;
 77 | 
 78 |   void Free(void* buf) final;
 79 | 
 80 |   std::string name() const final { return name_; }
 81 | 
 82 |   std::string device_name() const { return device_name_; }
 83 | 
 84 |   std::string uuid() const { return uuid_; }
 85 | 
 86 |   size_t FreeMemory() const;
 87 | 
 88 |   size_t TotalMemory() const { return total_memory_; }
 89 | 
 90 | private:
 91 |   explicit GPUDevice(int gpu_id);
 92 |   friend class DeviceManager;
 93 | 
 94 |  private:
 95 |   int gpu_id_;
 96 |   std::string name_;
 97 |   std::string device_name_;
 98 |   std::string uuid_;
 99 |   size_t total_memory_;
100 | };
101 | 
102 | #endif
103 | 
104 | class DeviceManager {
105 |  public:
106 |   static DeviceManager& Singleton() {
107 |     static DeviceManager device_manager;
108 |     return device_manager;
109 |   }
110 | 
111 |   CPUDevice* GetCPUDevice() const {
112 |     return cpu_device_;
113 |   }
114 | 
115 | #ifdef USE_GPU
116 |   GPUDevice* GetGPUDevice(int gpu_id) const;
117 | #endif
118 | 
119 |  private:
120 |   DeviceManager();
121 | 
122 |   CPUDevice* cpu_device_;
123 | #ifdef USE_GPU
124 |   std::vector<GPUDevice*> gpu_devices_;
125 | #endif
126 | };
127 | 
128 | } // namespec nexus
129 | 
130 | #endif // NEXUS_COMMON_DEVICE_H_
131 | 


--------------------------------------------------------------------------------
/src/nexus/backend/utils.cpp:
--------------------------------------------------------------------------------
 1 | #include <fstream>
 2 | #include <gflags/gflags.h>
 3 | #include <glog/logging.h>
 4 | #include <unordered_set>
 5 | 
 6 | #include "nexus/backend/utils.h"
 7 | #include "nexus/common/util.h"
 8 | 
 9 | DEFINE_bool(hack_reply_omit_output, false,
10 |             "HACK: omit output field in ReplyProto");
11 | 
12 | namespace nexus {
13 | namespace backend {
14 | 
15 | void LoadClassnames(const std::string &filepath,
16 |                     std::unordered_map<int, std::string> *classnames) {
17 |   std::ifstream infile(filepath);
18 |   CHECK(infile.good()) << "Classname file " << filepath << " doesn't exist";
19 |   std::string line;
20 |   int class_id = 0;
21 |   while (std::getline(infile, line)) {
22 |     std::vector<std::string> items;
23 |     SplitString(line, ',', &items);
24 |     if (items.size() == 1) {
25 |       classnames->emplace(class_id++, line);
26 |     } else {
27 |       int idx = std::stoi(items[0]);
28 |       classnames->emplace(idx, items[1]);
29 |     }
30 |   }
31 |   infile.close();
32 | }
33 | 
34 | void PostprocessClassification(
35 |     const QueryProto &query, const float *prob, size_t nprobs,
36 |     QueryResultProto *result,
37 |     const std::unordered_map<int, std::string> *classnames) {
38 |   // TODO: handle top k and threshold in the query
39 |   if (classnames != nullptr) {
40 |     CHECK_EQ(classnames->size(), nprobs) << "Mismatch between number of "
41 |                                          << "class names and number of outputs";
42 |   }
43 |   std::unordered_set<std::string> output_fields(query.output_field().begin(),
44 |                                                 query.output_field().end());
45 |   if (output_fields.empty()) {
46 |     output_fields.insert("class_id");
47 |     output_fields.insert("class_prob");
48 |     output_fields.insert("class_name");
49 |   }
50 |   float max_prob = 0.;
51 |   int max_idx = -1;
52 |   for (int i = 0; i < (int)nprobs; ++i) {
53 |     float p = prob[i];
54 |     if (p > max_prob) {
55 |       max_prob = p;
56 |       max_idx = i;
57 |     }
58 |   }
59 |   if (max_idx > -1) {
60 |     auto record = result->add_output();
61 |     if (FLAGS_hack_reply_omit_output)
62 |       return;
63 |     for (auto field : output_fields) {
64 |       if (field == "class_id") {
65 |         auto value = record->add_named_value();
66 |         value->set_name("class_id");
67 |         value->set_data_type(DT_INT32);
68 |         value->set_i(max_idx);
69 |       } else if (field == "class_prob") {
70 |         auto value = record->add_named_value();
71 |         value->set_name("class_prob");
72 |         value->set_data_type(DT_FLOAT);
73 |         value->set_f(max_prob);
74 |       } else if (field == "class_name") {
75 |         auto value = record->add_named_value();
76 |         value->set_name("class_name");
77 |         value->set_data_type(DT_STRING);
78 |         if (classnames != nullptr) {
79 |           auto iter = classnames->find(max_idx);
80 |           if (iter == classnames->end()) {
81 |             LOG(ERROR) << "Cannot find class name for class id " << max_idx;
82 |           } else {
83 |             value->set_s(iter->second);
84 |           }
85 |         }
86 |       }
87 |     }
88 |   }
89 | }
90 | 
91 | } // namespace backend
92 | } // namespace nexus
93 | 


--------------------------------------------------------------------------------
/src/nexus/common/metric.cpp:
--------------------------------------------------------------------------------
  1 | #include <cmath>
  2 | 
  3 | #include "nexus/common/metric.h"
  4 | 
  5 | namespace nexus {
  6 | 
  7 | Counter::Counter() :
  8 |     count_(0) {
  9 | }
 10 | 
 11 | void Counter::Increase(uint64_t value) {
 12 |   count_.fetch_add(value, std::memory_order_relaxed);
 13 | }
 14 | 
 15 | void Counter::Reset() {
 16 |   count_.exchange(0, std::memory_order_relaxed);
 17 | }
 18 | 
 19 | IntervalCounter::IntervalCounter(uint32_t interval_sec) :
 20 |     Tickable(interval_sec),
 21 |     count_(0) {
 22 | }
 23 | 
 24 | void IntervalCounter::Increase(uint64_t value) {
 25 |   count_.fetch_add(value, std::memory_order_relaxed);
 26 | }
 27 | 
 28 | void IntervalCounter::Reset() {
 29 |   std::lock_guard<std::mutex> guard(history_mutex_);
 30 |   count_.exchange(0, std::memory_order_relaxed);
 31 |   history_.clear();
 32 | }
 33 | 
 34 | std::vector<uint64_t> IntervalCounter::GetHistory() {
 35 |   std::lock_guard<std::mutex> guard(history_mutex_);
 36 |   std::vector<uint64_t> ret(std::move(history_));
 37 |   history_.clear();
 38 |   return ret;
 39 | }
 40 | 
 41 | void IntervalCounter::TickImpl() {
 42 |   uint64_t count = count_.exchange(0, std::memory_order_relaxed);
 43 |   std::lock_guard<std::mutex> guard(history_mutex_);
 44 |   history_.push_back(count);
 45 | }
 46 | 
 47 | EWMA::EWMA(uint32_t sample_interval_sec, uint32_t avg_interval_sec) :
 48 |     sample_interval_sec_(sample_interval_sec),
 49 |     avg_interval_sec_(avg_interval_sec),
 50 |     rate_() {
 51 |   alpha_ = 1 - exp(-1. * sample_interval_sec_ / avg_interval_sec_);
 52 | }
 53 | 
 54 | EWMA::EWMA(const EWMA& other) :
 55 |     sample_interval_sec_(other.sample_interval_sec_),
 56 |     avg_interval_sec_(other.avg_interval_sec_),
 57 |     rate_(other.rate_),
 58 |     alpha_(other.alpha_) {}
 59 | 
 60 | void EWMA::AddSample(uint64_t count) {
 61 |   double current_rate = static_cast<double>(count) / sample_interval_sec_;
 62 |   if (rate_ < 0) {
 63 |     rate_ = current_rate;
 64 |   } else {
 65 |     rate_ += (current_rate - rate_) * alpha_;
 66 |   }
 67 | }
 68 | 
 69 | EWMA& EWMA::operator=(const EWMA& other) {
 70 |   if (this != &other) {
 71 |     sample_interval_sec_ = other.sample_interval_sec_;
 72 |     avg_interval_sec_ = other.avg_interval_sec_;
 73 |     rate_ = other.rate_;
 74 |     alpha_ = other.alpha_;
 75 |   }
 76 |   return *this;
 77 | }
 78 | 
 79 | MetricRegistry& MetricRegistry::Singleton() {
 80 |     static MetricRegistry metric_registry_;
 81 |     return metric_registry_;
 82 | }
 83 | 
 84 | std::shared_ptr<Counter> MetricRegistry::CreateCounter() {
 85 |   std::lock_guard<std::mutex> lock(mutex_);
 86 |   auto metric = std::make_shared<Counter>();
 87 |   metrics_.insert(metric);
 88 |   return metric;
 89 | }
 90 | 
 91 | std::shared_ptr<IntervalCounter> MetricRegistry::CreateIntervalCounter(
 92 |     uint32_t interval_sec) {
 93 |   std::lock_guard<std::mutex> lock(mutex_);
 94 |   auto metric = std::make_shared<IntervalCounter>(interval_sec);
 95 |   metrics_.insert(metric);
 96 |   TimeSystem::Singleton().AddTickable(metric);
 97 |   return metric;
 98 | }
 99 | 
100 | void MetricRegistry::RemoveMetric(std::shared_ptr<IntervalCounter> metric) {
101 |   std::lock_guard<std::mutex> lock(mutex_);
102 |   TimeSystem::Singleton().RemoveTickable(metric);
103 |   metrics_.erase(metric);
104 | }
105 | 
106 | } // namespace nexus
107 | 


--------------------------------------------------------------------------------
/src/nexus/app/app_base.cpp:
--------------------------------------------------------------------------------
  1 | #include "nexus/app/app_base.h"
  2 | 
  3 | namespace nexus {
  4 | namespace app {
  5 | 
  6 | AppBase::AppBase(const std::string& port,
  7 |                  const std::string& rpc_port,
  8 |                  const std::string& sch_addr,
  9 |                  size_t nthreads) :
 10 |     Frontend(port, rpc_port, sch_addr),
 11 |     nthreads_(nthreads),
 12 |     qp_(nullptr),
 13 |     step_us_(0)
 14 | {
 15 | }
 16 | 
 17 | AppBase::~AppBase() {
 18 |   if (qp_ != nullptr) {
 19 |     delete qp_;
 20 |   }
 21 | }
 22 | 
 23 | void AppBase::Start() {
 24 |   CHECK(qp_ != nullptr) << "Query processor is not initialized";
 25 |   Run(qp_, nthreads_);
 26 | }
 27 | 
 28 | std::shared_ptr<ModelHandler> AppBase::GetModelHandler(
 29 |     const std::string& framework, const std::string& model_name,
 30 |     uint32_t version, uint64_t latency_sla, float estimate_workload,
 31 |     std::vector<uint32_t> image_size, LoadBalancePolicy lb_policy) {
 32 |   LoadModelRequest req;
 33 |   req.set_node_id(node_id());
 34 |   auto model_sess = req.mutable_model_session();
 35 |   model_sess->set_framework(framework);
 36 |   model_sess->set_model_name(model_name);
 37 |   model_sess->set_version(version);
 38 |   model_sess->set_latency_sla(latency_sla);
 39 |   if (image_size.size() > 0) {
 40 |     if (image_size.size() != 2) {
 41 |       LOG(ERROR) << "Image size is not 2";
 42 |       return nullptr;
 43 |     }
 44 |     model_sess->set_image_height(image_size[0]);
 45 |     model_sess->set_image_width(image_size[1]);
 46 |   }
 47 |   if (estimate_workload < 0) {
 48 |     LOG(ERROR) << "Estimate workload must be non-negative value";
 49 |     return nullptr;
 50 |   }
 51 |   if (estimate_workload > 0) {
 52 |     req.set_estimate_workload(estimate_workload);
 53 |   }
 54 | 
 55 |   auto model_handler = LoadModel(req, lb_policy);
 56 |   if (model_handler == nullptr) {
 57 |     // TODO: load model failed, should retry after some time,
 58 |     // or wait for callback from scheduler
 59 |     LOG(FATAL) << "Load model failed";
 60 |   }
 61 |   return model_handler;
 62 | }
 63 | 
 64 | bool AppBase::IsComplexQuery() const {
 65 |   return slo_us_ != 0;
 66 | }
 67 | 
 68 | void AppBase::ComplexQuerySetup(const std::string &cq_id, uint32_t slo_us, uint32_t step_us) {
 69 |   CHECK(!IsComplexQuery()) << "The complex query has been set up.";
 70 |   CHECK(!cq_id.empty()) << "cq_id cannot be empty.";
 71 |   CHECK(slo_us != 0) << "slo_us cannot be 0.";
 72 |   CHECK(step_us != 0) << "step_us cannot be 0.";
 73 |   cq_id_ = cq_id;
 74 |   slo_us_ = slo_us;
 75 |   step_us_ = step_us;
 76 | 
 77 |   ComplexQuerySetupRequest req;
 78 |   req.set_cq_id(cq_id_);
 79 |   req.set_slo_us(slo_us_);
 80 |   req.set_step_us(step_us);
 81 |   Frontend::ComplexQuerySetup(req);
 82 | }
 83 | 
 84 | void AppBase::ComplexQueryAddEdge(const std::shared_ptr<ModelHandler>& source,
 85 |                                   const std::shared_ptr<ModelHandler>& target) {
 86 |   ComplexQueryAddEdgeRequest req;
 87 |   req.set_cq_id(cq_id_);
 88 |   req.mutable_source()->CopyFrom(source->model_session());
 89 |   req.mutable_target()->CopyFrom(target->model_session());
 90 |   Frontend::ComplexQueryAddEdge(req);
 91 | }
 92 | 
 93 | void LaunchApp(AppBase* app) {
 94 |   app->Setup();
 95 |   app->Start();
 96 | }
 97 | 
 98 | } // namespace app
 99 | } // namespace nexus
100 | 


--------------------------------------------------------------------------------
/src/nexus/common/rpc_call.h:
--------------------------------------------------------------------------------
 1 | #ifndef NEXUS_COMMON_RPC_CALL_H_
 2 | #define NEXUS_COMMON_RPC_CALL_H_
 3 | 
 4 | #include <grpc++/grpc++.h>
 5 | 
 6 | namespace nexus {
 7 | 
 8 | enum RpcCallStatus {
 9 |   RPC_CALL_CREATE,
10 |   RPC_CALL_PROCESS,
11 |   RPC_CALL_FINISH,
12 | };
13 | 
14 | class RpcCallBase {
15 |  public:
16 |   RpcCallBase(grpc::ServerCompletionQueue* cq) :
17 |       cq_(cq),
18 |       status_(RPC_CALL_CREATE) {
19 |   }
20 | 
21 |   virtual ~RpcCallBase() {}
22 | 
23 |   virtual void Proceed() = 0;
24 | 
25 |  protected:
26 |   grpc::ServerCompletionQueue* cq_;
27 |   grpc::ServerContext ctx_;
28 |   RpcCallStatus status_;
29 | };
30 | 
31 | #define INSTANTIATE_RPC_CALL(SERVICE, RPCCALL, REQUEST, REPLY)          \
32 |   class RPCCALL##_Call : public RpcCallBase {                           \
33 |    public:                                                              \
34 |     using Handler = std::function<void(const grpc::ServerContext&,      \
35 |                                        const REQUEST&, REPLY*)>;        \
36 |     RPCCALL##_Call(SERVICE* service, grpc::ServerCompletionQueue* cq,   \
37 |                    Handler handle) :                                    \
38 |         RpcCallBase(cq),                                                \
39 |         service_(service),                                              \
40 |         handle_(std::move(handle)),                                     \
41 |         responder_(&ctx_) {                                             \
42 |       Proceed();                                                        \
43 |     }                                                                   \
44 |     void Proceed() {                                                    \
45 |       if (status_ == RPC_CALL_CREATE) {                                 \
46 |         status_ = RPC_CALL_PROCESS;                                     \
47 |         service_->Request##RPCCALL(                                     \
48 |             &ctx_, &request_, &responder_, cq_, cq_, this);             \
49 |       } else if (status_ == RPC_CALL_PROCESS) {                         \
50 |         new RPCCALL##_Call(service_, cq_, handle_);                     \
51 |         handle_(ctx_, request_, &reply_);                               \
52 |         status_ = RPC_CALL_FINISH;                                      \
53 |         responder_.Finish(reply_, grpc::Status::OK, this);              \
54 |       } else {                                                          \
55 |         CHECK_EQ(status_, RPC_CALL_FINISH);                             \
56 |         delete this;                                                    \
57 |       }                                                                 \
58 |     }                                                                   \
59 |    private:                                                             \
60 |     SERVICE* service_;                                                  \
61 |     Handler handle_;                                                    \
62 |     grpc::ServerAsyncResponseWriter<REPLY> responder_;                  \
63 |     REQUEST request_;                                                   \
64 |     REPLY reply_;                                                       \
65 |   }
66 | 
67 | } // namespace nexus
68 | 
69 | #endif // NEXUS_COMMON_RPC_CALL_H_
70 | 


--------------------------------------------------------------------------------
/python/nexus/client.py:
--------------------------------------------------------------------------------
  1 | import struct
  2 | import socket
  3 | 
  4 | from .proto import nnquery_pb2 as npb
  5 | 
  6 | MAGIC_NUMBER = 0xDEADBEEF
  7 | HEADER_SIZE = 12
  8 | # Message type
  9 | MSG_USER_REGISTER = 1
 10 | MSG_USER_REQUEST = 2
 11 | MSG_USER_REPLY = 3
 12 | 
 13 | 
 14 | class Client:
 15 |     def __init__(self, server_addr, user_id):
 16 |         self.server_addr = server_addr
 17 |         self.user_id = user_id
 18 |         self.req_id = 0
 19 |         self.sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
 20 |         self.sock.settimeout(1) # timeout after 1 second
 21 |         host, port = server_addr.split(':')
 22 |         try:
 23 |             self.sock.connect((host, int(port)))
 24 |         except:
 25 |             raise RuntimeError("Error in connecting to %s" % server_addr)
 26 |         self.register()
 27 | 
 28 | 
 29 |     def __del__(self):
 30 |         self.sock.close()
 31 | 
 32 | 
 33 |     def register(self):
 34 |         req = npb.RequestProto(user_id=self.user_id)
 35 |         msg = self._prepare_message(MSG_USER_REGISTER, req)
 36 |         self.sock.sendall(msg)
 37 |         reply = self._recv_reply()
 38 |         assert reply.status == 0
 39 |         
 40 | 
 41 |     def request(self, img):
 42 |         req = self._prepare_req(img)
 43 |         msg = self._prepare_message(MSG_USER_REQUEST, req)
 44 |         failed = 0
 45 |         while True:
 46 |             try:
 47 |                 self.sock.sendall(msg)
 48 |                 reply = self._recv_reply()
 49 |                 break
 50 |             except socket.timeout:
 51 |                 failed += 1
 52 |                 if failed == 3:
 53 |                     return None
 54 |         return reply
 55 | 
 56 | 
 57 |     def _prepare_req(self, img):
 58 |         req = npb.RequestProto()
 59 |         req.user_id = self.user_id
 60 |         req.req_id = self.req_id
 61 |         req.input.data_type = npb.DT_IMAGE
 62 |         req.input.image.data = img
 63 |         req.input.image.format = npb.ImageProto.JPEG
 64 |         req.input.image.color = True
 65 |         self.req_id += 1
 66 |         return req
 67 | 
 68 | 
 69 |     def _prepare_message(self, msg_type, request):
 70 |         body = request.SerializeToString()
 71 |         header = struct.pack('!LLL', MAGIC_NUMBER, msg_type, len(body))
 72 |         return header + body
 73 | 
 74 | 
 75 |     def _recv_reply(self):
 76 |         body_length = self._recv_header()
 77 |         buf = self._read_nbytes(body_length)
 78 |         reply = npb.ReplyProto()
 79 |         reply.ParseFromString(buf)
 80 |         return reply
 81 | 
 82 | 
 83 |     def _recv_header(self):
 84 |         buf = self._read_nbytes(HEADER_SIZE)
 85 |         magic_no, msg_type, length = struct.unpack('!LLL', buf)
 86 |         assert magic_no == MAGIC_NUMBER
 87 |         assert msg_type == MSG_USER_REPLY
 88 |         return length
 89 | 
 90 |     
 91 |     def _read_nbytes(self, n):
 92 |         """ Read exactly n bytes from the socket.
 93 |             Raise RuntimeError if the connection closed before
 94 |             n bytes were read.
 95 |         """
 96 |         buf = ''
 97 |         while n > 0:
 98 |             data = self.sock.recv(n)
 99 |             if data == '':
100 |                 raise RuntimeError("Unexpected connection close")
101 |             buf += data
102 |             n -= len(data)
103 |         return buf
104 | 


--------------------------------------------------------------------------------
/src/nexus/backend/batch_task.cpp:
--------------------------------------------------------------------------------
 1 | #include "nexus/backend/batch_task.h"
 2 | #include "nexus/common/util.h"
 3 | #include <glog/logging.h>
 4 | 
 5 | namespace nexus {
 6 | namespace backend {
 7 | 
 8 | BatchTask::BatchTask(uint32_t max_batch) :
 9 |     max_batch_(max_batch),
10 |     input_write_pt_(nullptr),
11 |     input_elements_(0) {}
12 | 
13 | void BatchTask::SetInputArray(ArrayPtr arr) {
14 |   input_array_ = arr;
15 |   input_write_pt_ = input_array_->Data<char>();
16 |   input_elements_ = 0;
17 | }
18 | 
19 | void BatchTask::CreateInputArray(DataType data_type,
20 |                                  size_t num_elements_per_input,
21 |                                  Device* device) {
22 |   input_array_ = std::make_shared<Array>(
23 |       data_type, max_batch_ * num_elements_per_input, device);
24 |   input_write_pt_ = input_array_->Data<char>();
25 |   input_elements_ = 0;
26 | }
27 | 
28 | void BatchTask::SetOutputArrays(
29 |     const std::unordered_map<std::string, ArrayPtr>& arrays) {
30 |   output_arrays_ = arrays;
31 | }
32 | 
33 | void BatchTask::CreateOutputArrays(
34 |     const std::unordered_map<std::string, size_t>& sizes, Device* device) {
35 |   uint32_t batch = max_batch_;
36 |   if (inputs_.size() > 0) {
37 |     batch = inputs_.size();
38 |   }
39 |   for (auto iter : sizes) {
40 |     auto arr = std::make_shared<Array>(DT_FLOAT, batch * iter.second,
41 |                                        device);
42 |     output_arrays_.emplace(iter.first, arr);
43 |   }
44 | }
45 | 
46 | ArrayPtr BatchTask::GetOutputArray(const std::string& name) const {
47 |   CHECK_GT(output_arrays_.count(name), 0) << "Output array " << name <<
48 |       " doesn't exist";
49 |   return output_arrays_.at(name);
50 | }
51 | 
52 | void BatchTask::AppendInput(std::shared_ptr<Input> input,
53 |                             std::shared_ptr<Task> task) {
54 |   CHECK_EQ(input_array_->data_type(), input->array->data_type()) <<
55 |       "Input data type mismatch";
56 |   CHECK_LT(inputs_.size(), max_batch_) << "Exceed max batch size";
57 |   CHECK_LE(input_elements_ + input->array->num_elements(),
58 |            input_array_->num_elements()) << "Exceeds batch input array capacity";
59 |   inputs_.push_back(input);
60 |   tasks_.push_back(task);
61 |   auto in_arr = input->array;
62 |   const char* src_data = in_arr->Data<char>();
63 |   size_t nbytes = in_arr->num_elements() * type_size(input_array_->data_type());
64 |   Memcpy(input_write_pt_, input_array_->device(), src_data, in_arr->device(),
65 |          nbytes);
66 |   input_write_pt_ += nbytes;
67 | }
68 | 
69 | void BatchTask::SliceOutputBatch(
70 |     const std::unordered_map<std::string, Slice>& slices) {
71 |   CHECK(outputs_.empty()) << "Batch output is already sliced";
72 |   CHECK_EQ(output_arrays_.size(), slices.size()) << "Number of outputs must "
73 |       "match the number of slices";
74 |   for (uint i = 0; i < inputs_.size(); ++i) {
75 |     auto input = inputs_[i];
76 |     std::unordered_map<std::string, ArrayPtr> slice_arrays;
77 |     for (auto iter : output_arrays_) {
78 |       auto const& slice = slices.at(iter.first);
79 |       slice_arrays.emplace(iter.first, iter.second->Slice(
80 |           slice.offset(i), slice.num_elements(i)));
81 |     }
82 |     outputs_.push_back(std::make_shared<Output>(input->task_id, input->index,
83 |                                                 slice_arrays));
84 |   }
85 | }
86 | 
87 | void BatchTask::set_outputs(
88 |     const std::vector<std::shared_ptr<Output>>& outputs) {
89 |   CHECK_EQ(outputs.size(), inputs_.size()) << "Number of outputs must match "
90 |       "number of inputs";
91 |   outputs_.clear();
92 |   outputs_ = outputs;
93 | }
94 | 
95 | } // namespace backend
96 | } // namespace nexus
97 | 


--------------------------------------------------------------------------------
/src/nexus/common/image.cpp:
--------------------------------------------------------------------------------
  1 | #include <boost/filesystem.hpp>
  2 | #include <boost/filesystem/operations.hpp>
  3 | #include <gflags/gflags.h>
  4 | #include <glog/logging.h>
  5 | #include <opencv2/opencv.hpp>
  6 | 
  7 | #include <fstream>
  8 | #include <iterator>
  9 | #include <string>
 10 | #include <unordered_map>
 11 | #include <vector>
 12 | 
 13 | #include "nexus/common/image.h"
 14 | 
 15 | DEFINE_string(hack_image_root, "", "HACK: path to directory of images");
 16 | 
 17 | class _Hack_Images {
 18 | public:
 19 |   _Hack_Images(const std::string &root) {
 20 |     if (root.empty())
 21 |       return;
 22 |     LOG(INFO) << "Initializing _Hack_Images, root: " << root;
 23 |     auto root_path = boost::filesystem::absolute(root);
 24 |     for (auto it = boost::filesystem::recursive_directory_iterator(root_path),
 25 |               end = boost::filesystem::recursive_directory_iterator();
 26 |          it != end; ++it) {
 27 |       if (!boost::filesystem::is_regular_file(it->path()))
 28 |         continue;
 29 |       if (it->path().extension().string() != ".jpg")
 30 |         continue;
 31 | 
 32 |       std::ifstream fin(it->path().string(), std::ios::binary);
 33 |       std::istreambuf_iterator<char> fin_beg(fin), fin_end;
 34 |       std::vector<char> data(fin_beg, fin_end);
 35 | 
 36 |       auto rel_path = boost::filesystem::relative(it->path(), root_path);
 37 |       data_.emplace(rel_path.string(), std::move(data));
 38 |     }
 39 |     LOG(INFO) << "Found " << data_.size() << " images in " << root;
 40 | 
 41 |     LOG(INFO) << "Keys of some random images:";
 42 |     auto iter = data_.begin();
 43 |     for (int i = 0; i < 10 && iter != data_.end(); ++i, ++iter) {
 44 |       LOG(INFO) << "  " << iter->first;
 45 |     }
 46 | 
 47 |     LOG(INFO) << "_Hack_Images initilization finished";
 48 |   }
 49 | 
 50 |   const std::vector<char> &get(const std::string &filename) const {
 51 |     auto iter = data_.find(filename);
 52 |     return iter != data_.end() ? iter->second : empty_;
 53 |   }
 54 | 
 55 | private:
 56 |   std::unordered_map<std::string, std::vector<char>> data_;
 57 |   std::vector<char> empty_;
 58 | };
 59 | 
 60 | namespace nexus {
 61 | 
 62 | cv::Mat DecodeImageImpl(const std::vector<char> &vec_data, bool color,
 63 |                         ChannelOrder order) {
 64 |   cv::Mat img_bgr;
 65 |   int cv_read_flag = color ? cv::IMREAD_COLOR : cv::IMREAD_GRAYSCALE;
 66 |   img_bgr = cv::imdecode(vec_data, cv_read_flag);
 67 |   if (!img_bgr.data) {
 68 |     LOG(ERROR) << "Could not decode image";
 69 |   }
 70 |   if (order == CO_BGR) {
 71 |     return img_bgr;
 72 |   }
 73 |   cv::Mat img_rgb;
 74 |   cv::cvtColor(img_bgr, img_rgb, cv::COLOR_BGR2RGB);
 75 |   return img_rgb;
 76 | }
 77 | 
 78 | cv::Mat _Hack_DecodeImageByFilename(const ImageProto &image,
 79 |                                     ChannelOrder order) {
 80 |   static _Hack_Images *_images = new _Hack_Images(FLAGS_hack_image_root);
 81 |   const auto &vec_data = _images->get(image.hack_filename());
 82 |   if (vec_data.empty()) {
 83 |     if (image.hack_filename() != "__init_Hack_Images")
 84 |       LOG(ERROR) << "Cannot find image by filename: " << image.hack_filename();
 85 |     return {};
 86 |   }
 87 |   return DecodeImageImpl(vec_data, image.color(), order);
 88 | }
 89 | 
 90 | cv::Mat DecodeImage(const ImageProto &image, ChannelOrder order) {
 91 |   if (image.hack_filename().empty()) {
 92 |     const std::string &data = image.data();
 93 |     std::vector<char> vec_data(data.c_str(), data.c_str() + data.size());
 94 |     return DecodeImageImpl(vec_data, image.color(), order);
 95 |   } else {
 96 |     return _Hack_DecodeImageByFilename(image, order);
 97 |   }
 98 | }
 99 | 
100 | } // namespace nexus
101 | 


--------------------------------------------------------------------------------
/examples/simple_app/src/frontend.cpp:
--------------------------------------------------------------------------------
 1 | #include <gflags/gflags.h>
 2 | 
 3 | #include "nexus/app/app_base.h"
 4 | 
 5 | using namespace nexus;
 6 | using namespace nexus::app;
 7 | 
 8 | class SimpleApp : public AppBase {
 9 |  public:
10 |   SimpleApp(std::string port, std::string rpc_port, std::string sch_addr,
11 |             size_t nthreads, const std::string& framework,
12 |             const std::string& model_name, int version, int latency_sla_ms,
13 |             float estimate_workload, int image_height, int image_width) :
14 |       AppBase(port, rpc_port, sch_addr, nthreads),
15 |       framework_(framework),
16 |       model_name_(model_name),
17 |       version_(version),
18 |       latency_sla_ms_(latency_sla_ms),
19 |       estimate_workload_(estimate_workload) {
20 |     CHECK_GE(image_height, 0) << "Image height must be no less than 0";
21 |     CHECK_GE(image_width, 0) << "Image width must be no less than 0";
22 |     if (image_height == 0 || image_width == 0) {
23 |       image_height_ = 0;
24 |       image_width_ = 0;
25 |     } else {
26 |       image_height_ = image_height;
27 |       image_width_ = image_width;
28 |     }
29 |   }
30 | 
31 |   void Setup() final {
32 |     model_ = GetModelHandler(framework_, model_name_, version_,
33 |                              latency_sla_ms_, estimate_workload_,
34 |                              {image_height_, image_width_});
35 |     auto func1 = [&](std::shared_ptr<RequestContext> ctx) {
36 |       auto output = model_->Execute(ctx, ctx->const_request().input());
37 |       return std::vector<VariablePtr>{
38 |         std::make_shared<Variable>("output", output)};
39 |     };
40 |     auto func2 = [&](std::shared_ptr<RequestContext> ctx) {
41 |       auto output = ctx->GetVariable("output")->result();
42 |       output->ToProto(ctx->reply());
43 |       return std::vector<VariablePtr>{};
44 |     };
45 |     ExecBlock* b1 = new ExecBlock(0, func1, {});
46 |     ExecBlock* b2 = new ExecBlock(1, func2, {"output"});
47 |     qp_ = new QueryProcessor({b1, b2});
48 |   }
49 |   
50 |  private:
51 |   std::string framework_;
52 |   std::string model_name_;
53 |   int version_;
54 |   int latency_sla_ms_;
55 |   float estimate_workload_;
56 |   uint image_height_;
57 |   uint image_width_;
58 |   std::shared_ptr<ModelHandler> model_;
59 | };
60 | 
61 | DEFINE_string(port, "9001", "Server port");
62 | DEFINE_string(rpc_port, "9002", "RPC port");
63 | DEFINE_string(sch_addr, "127.0.0.1", "Scheduler address");
64 | DEFINE_int32(nthread, 4, "Number of threads processing requests");
65 | DEFINE_string(framework, "", "Framework (caffe2, caffe, darknet, tensorflow)");
66 | DEFINE_string(model, "", "Model name");
67 | DEFINE_int32(model_version, 1, "Model version");
68 | DEFINE_int32(latency, 500, "Latency SLA in ms");
69 | DEFINE_double(workload, 0, "Estimated request rate");
70 | DEFINE_int32(height, 0, "Image height");
71 | DEFINE_int32(width, 0, "Image width");
72 | 
73 | int main(int argc, char** argv) {
74 |   // log to stderr
75 |   FLAGS_logtostderr = 1;
76 |   // Init glog
77 |   google::InitGoogleLogging(argv[0]);
78 |   // Parse command line flags
79 |   google::ParseCommandLineFlags(&argc, &argv, true);
80 |   // Setup backtrace on segfault
81 |   google::InstallFailureSignalHandler();
82 | 
83 |   CHECK_GT(FLAGS_framework.length(), 0) << "Missing framework";
84 |   CHECK_GT(FLAGS_model.length(), 0) << "Missing model";
85 |   LOG(INFO) << "App port " << FLAGS_port << ", rpc port " << FLAGS_rpc_port;
86 |   // Create the frontend server
87 |   SimpleApp app(FLAGS_port, FLAGS_rpc_port, FLAGS_sch_addr, FLAGS_nthread,
88 |                 FLAGS_framework, FLAGS_model, FLAGS_model_version,
89 |                 FLAGS_latency, FLAGS_workload, FLAGS_height, FLAGS_width);
90 |   LaunchApp(&app);
91 | 
92 |   return 0;
93 | }
94 | 


--------------------------------------------------------------------------------
/src/nexus/common/connection.cpp:
--------------------------------------------------------------------------------
  1 | #include <glog/logging.h>
  2 | 
  3 | #include "nexus/common/connection.h"
  4 | 
  5 | namespace nexus {
  6 | 
  7 | Connection::Connection(boost::asio::ip::tcp::socket socket,
  8 |                        MessageHandler* handler) :
  9 |     socket_(std::move(socket)),
 10 |     handler_(handler),
 11 |     wrong_header_(false) {
 12 |   boost::asio::ip::tcp::no_delay option(true);
 13 |   socket_.set_option(option);
 14 | }
 15 | 
 16 | Connection::Connection(boost::asio::io_context& io_context,
 17 |                        MessageHandler* handler) :
 18 |     socket_(io_context),
 19 |     handler_(handler),
 20 |     wrong_header_(false) {
 21 | }
 22 | 
 23 | void Connection::Start() {
 24 |   DoReadHeader();
 25 | }
 26 | 
 27 | void Connection::Stop() {
 28 |   LOG(INFO) << "Connection Stop";
 29 |   std::lock_guard<std::mutex> socket_guard(socket_mutex_);
 30 |   socket_.close();
 31 | }
 32 | 
 33 | void Connection::Write(std::shared_ptr<Message> msg) {
 34 |   std::lock_guard<std::mutex> lock(write_queue_mutex_);
 35 |   bool write_in_progress = !write_queue_.empty();
 36 |   write_queue_.push_back(std::move(msg));
 37 |   if (!write_in_progress) {
 38 |     DoWrite();
 39 |   }
 40 | }
 41 | 
 42 | void Connection::DoReadHeader() {
 43 |   auto self(shared_from_this());
 44 |   std::lock_guard<std::mutex> socket_guard(socket_mutex_);
 45 |   boost::asio::async_read(
 46 |       socket_,
 47 |       boost::asio::buffer(msg_header_buffer_, MESSAGE_HEADER_SIZE),
 48 |       [this, self](boost::system::error_code ec, size_t nbytes) {
 49 |         if (ec) {
 50 |           if (ec != boost::asio::error::operation_aborted) {
 51 |             handler_->HandleError(self, ec);
 52 |           }
 53 |           return;
 54 |         }
 55 |         MessageHeader msg_header;
 56 |         if (!DecodeHeader(msg_header_buffer_, &msg_header)) {
 57 |           if (!wrong_header_) {
 58 |             LOG(ERROR) << "Wrong header detected";
 59 |             wrong_header_ = true;
 60 |           }
 61 |           DoReadHeader();
 62 |         } else {
 63 |           wrong_header_ = false;
 64 |           auto msg = std::make_shared<Message>(msg_header);
 65 |           // LOG(INFO) << "msg type: " << msg->type() << ", body length: " <<
 66 |           //     msg->body_length();
 67 |           DoReadBody(std::move(msg));
 68 |         }
 69 |       });
 70 | }
 71 | 
 72 | void Connection::DoReadBody(std::shared_ptr<Message> msg) {
 73 |   auto self(shared_from_this());
 74 |   std::lock_guard<std::mutex> socket_guard(socket_mutex_);
 75 |   boost::asio::async_read(
 76 |       socket_,
 77 |       boost::asio::buffer(msg->body(), msg->body_length()),
 78 |       [this, self, msg](boost::system::error_code ec,
 79 |                         size_t /* bytes_transferred */) {
 80 |         if (ec) {
 81 |           if (ec != boost::asio::error::operation_aborted) {
 82 |             handler_->HandleError(self, ec);
 83 |           }
 84 |         } else {
 85 |           handler_->HandleMessage(self, std::move(msg));
 86 |           DoReadHeader();
 87 |         }
 88 |       });
 89 | }
 90 | 
 91 | void Connection::DoWrite() {
 92 |   auto self(shared_from_this());
 93 |   std::lock_guard<std::mutex> socket_guard(socket_mutex_);
 94 |   boost::asio::async_write(
 95 |       socket_,
 96 |       boost::asio::buffer(write_queue_.front()->data(),
 97 |                           write_queue_.front()->length()),
 98 |       [this, self](boost::system::error_code ec, size_t) {
 99 |         std::lock_guard<std::mutex> lock(write_queue_mutex_);
100 |         if (ec) {
101 |           if (ec != boost::asio::error::operation_aborted) {
102 |             handler_->HandleError(self, ec);
103 |           }
104 |         } else {
105 |           write_queue_.pop_front();
106 |           if (!write_queue_.empty()) {
107 |             DoWrite();
108 |           }
109 |         }
110 |       });
111 | }
112 | 
113 | } // namespace nexus
114 | 


--------------------------------------------------------------------------------
/tests/cpp/scheduler/backend_delegate_test.cpp:
--------------------------------------------------------------------------------
  1 | #include <chrono>
  2 | #include <glog/logging.h>
  3 | #include <gtest/gtest.h>
  4 | #include <memory>
  5 | 
  6 | #include "nexus/common/config.h"
  7 | #include "nexus/common/model_db.h"
  8 | #include "nexus/proto/control.pb.h"
  9 | #include "nexus/proto/nnquery.pb.h"
 10 | #include "nexus/scheduler/backend_delegate.h"
 11 | 
 12 | //DECLARE_string(model_db);
 13 | DECLARE_int32(beacon);
 14 | DECLARE_int32(epoch);
 15 | 
 16 | namespace nexus {
 17 | namespace scheduler {
 18 | 
 19 | class BackendDelegateTest : public ::testing::Test {
 20 |  protected:
 21 |   virtual void SetUp() {
 22 |     gpu_device_ = "TITAN_X_(Pascal)";
 23 |     gpu_available_memory_ = 12L * 1024L * 1024L * 1024L;
 24 |     FLAGS_beacon = 1;
 25 |     FLAGS_epoch = 5;
 26 |     backend_.reset(new BackendDelegate(
 27 |         1, "127.0.0.1", "8001", "8002", gpu_device_, gpu_available_memory_,
 28 |         FLAGS_beacon));
 29 |   }
 30 | 
 31 |   std::string gpu_device_;
 32 |   size_t gpu_available_memory_;
 33 |   std::unique_ptr<BackendDelegate> backend_;
 34 | };
 35 | 
 36 | TEST_F(BackendDelegateTest, PrepareLoadModel) {
 37 |   ModelSession vgg16_sess;
 38 |   vgg16_sess.set_framework("caffe");
 39 |   vgg16_sess.set_model_name("vgg16");
 40 |   vgg16_sess.set_version(1);
 41 |   vgg16_sess.set_latency_sla(500);
 42 | 
 43 |   ModelSession vgg_face_sess;
 44 |   vgg_face_sess.set_framework("caffe");
 45 |   vgg_face_sess.set_model_name("vgg_face");
 46 |   vgg_face_sess.set_version(1);
 47 |   vgg_face_sess.set_latency_sla(300);
 48 | 
 49 |   // Residue workload
 50 |   for (float workload : {50., 100., 150., 200., 250.}) {
 51 |     InstanceInfo info;
 52 |     double occupancy;
 53 |     bool ret = backend_->PrepareLoadModel(vgg16_sess, workload, &info,
 54 |                                           &occupancy);
 55 |     ASSERT_TRUE(ret);
 56 |     ASSERT_GE(info.throughput, workload);
 57 |     ASSERT_GT(info.batch, 0);
 58 |     ASSERT_LE(occupancy, 1.);
 59 |   }
 60 | 
 61 |   // Saturate entire gpu when workload > 298
 62 |   for (float workload : {300., 400., 500.}) {
 63 |     InstanceInfo info;
 64 |     double occupancy;
 65 |     bool ret = backend_->PrepareLoadModel(vgg16_sess, workload, &info,
 66 |                                           &occupancy);
 67 |     ASSERT_TRUE(ret);
 68 |     ASSERT_GT(info.batch, 0);
 69 |     ASSERT_EQ(occupancy, 1.);
 70 |   }
 71 | 
 72 |   InstanceInfo vgg16_info;
 73 |   double occupancy;
 74 |   backend_->PrepareLoadModel(vgg16_sess, 150., &vgg16_info, &occupancy);
 75 |   backend_->LoadModel(vgg16_info);
 76 |   ASSERT_NEAR(backend_->Occupancy(), occupancy, 1e-3);
 77 | 
 78 |   // Try load second model
 79 |   for (float workload : {50, 100, 125}) {
 80 |     InstanceInfo info;
 81 |     double occupancy;
 82 |     bool ret = backend_->PrepareLoadModel(vgg_face_sess, workload, &info,
 83 |                                           &occupancy);
 84 |     LOG(INFO) << occupancy;
 85 |     ASSERT_TRUE(ret);
 86 |     ASSERT_GE(info.throughput, workload);
 87 |     ASSERT_GT(info.batch, 0);
 88 |     ASSERT_LE(occupancy, 1.);
 89 |   }
 90 | 
 91 |   for (float workload : {150, 200, 250}) {
 92 |     InstanceInfo info;
 93 |     double occupancy;
 94 |     bool ret = backend_->PrepareLoadModel(vgg_face_sess, workload, &info,
 95 |                                           &occupancy);
 96 |     ASSERT_FALSE(ret);
 97 |   }
 98 |   
 99 |   InstanceInfo vgg_face_info;
100 |   backend_->PrepareLoadModel(vgg_face_sess, 125., &vgg_face_info, &occupancy);
101 |   
102 |   backend_->LoadModel(vgg_face_info);
103 |   ASSERT_NEAR(backend_->Occupancy(), occupancy, 1e-3);
104 | }
105 | 
106 | TEST_F(BackendDelegateTest, CheckAlive) {
107 |   std::this_thread::sleep_for(std::chrono::milliseconds(2100));
108 |   ASSERT_FALSE(backend_->IsAlive());
109 |   backend_->Tick();
110 |   ASSERT_TRUE(backend_->IsAlive());
111 | }
112 | 
113 | } // namespace scheduler
114 | } // namespace nexus
115 | 


--------------------------------------------------------------------------------
/python/nexus/async_client.py:
--------------------------------------------------------------------------------
  1 | import struct
  2 | import socket
  3 | import asyncio
  4 | from datetime import datetime
  5 | 
  6 | from .proto import nnquery_pb2 as npb
  7 | 
  8 | MAGIC_NUMBER = 0xDEADBEEF
  9 | HEADER_SIZE = 12
 10 | # Message type
 11 | MSG_USER_REGISTER = 1
 12 | MSG_USER_REQUEST = 2
 13 | MSG_USER_REPLY = 3
 14 | 
 15 | 
 16 | class AsyncClient:
 17 |     def __init__(self, server_addr, user_id):
 18 |         self._server_addr = server_addr
 19 |         self._user_id = user_id
 20 |         self._req_id = 0
 21 |         self._reader_lock = asyncio.Lock()
 22 |         self._replies = {}
 23 | 
 24 |     @property
 25 |     def next_req_id(self):
 26 |         return self._req_id
 27 | 
 28 |     async def __aenter__(self):
 29 |         host, port = self._server_addr.split(':')
 30 |         self._reader, self._writer = await asyncio.open_connection(host, port)
 31 |         return self
 32 | 
 33 |     async def __aexit__(self, exc_type, exc, tb):
 34 |         self._writer.close()
 35 |         await self._writer.wait_closed()
 36 | 
 37 |     async def register(self):
 38 |         req = npb.RequestProto(user_id=self.user_id)
 39 |         msg = self._prepare_message(MSG_USER_REGISTER, req)
 40 | 
 41 |         self._writer.write(msg)
 42 |         await self._writer.drain()
 43 | 
 44 |         reply, time = await self._wait_reply(req.req_id)
 45 |         assert reply.status == 0
 46 | 
 47 |     async def _do_request(self, req, msg):
 48 |         send_time = datetime.now()
 49 |         self._writer.write(msg)
 50 |         await self._writer.drain()
 51 | 
 52 |         reply, recv_time = await self._wait_reply(req.req_id)
 53 |         return send_time, recv_time, reply
 54 | 
 55 |     def request(self, img):
 56 |         req = self._prepare_req(img)
 57 |         msg = self._prepare_message(MSG_USER_REQUEST, req)
 58 |         return self._do_request(req, msg)
 59 | 
 60 |     def _prepare_req(self, img):
 61 |         req = npb.RequestProto()
 62 |         req.user_id = self._user_id
 63 |         req.req_id = self._req_id
 64 |         req.input.data_type = npb.DT_IMAGE
 65 |         req.input.image.data = img
 66 |         req.input.image.format = npb.ImageProto.JPEG
 67 |         req.input.image.color = True
 68 |         self._req_id += 1
 69 |         return req
 70 | 
 71 |     def request_with_hack_filename(self, filename):
 72 |         req = npb.RequestProto()
 73 |         req.user_id = self._user_id
 74 |         req.req_id = self._req_id
 75 |         req.input.data_type = npb.DT_IMAGE
 76 |         req.input.image.hack_filename = filename
 77 |         req.input.image.format = npb.ImageProto.JPEG
 78 |         req.input.image.color = True
 79 |         self._req_id += 1
 80 | 
 81 |         msg = self._prepare_message(MSG_USER_REQUEST, req)
 82 |         return self._do_request(req, msg)
 83 | 
 84 |     def _prepare_message(self, msg_type, request):
 85 |         body = request.SerializeToString()
 86 |         header = struct.pack('!LLL', MAGIC_NUMBER, msg_type, len(body))
 87 |         return header + body
 88 | 
 89 |     async def _wait_reply(self, req_id):
 90 |         while True:
 91 |             async with self._reader_lock:
 92 |                 reply = self._replies.pop(req_id, None)
 93 |                 if reply is not None:
 94 |                     return reply
 95 | 
 96 |                 buf = await self._reader.readexactly(HEADER_SIZE)
 97 |                 magic_no, msg_type, body_length = struct.unpack('!LLL', buf)
 98 |                 assert magic_no == MAGIC_NUMBER
 99 |                 assert msg_type == MSG_USER_REPLY
100 | 
101 |                 buf = await self._reader.readexactly(body_length)
102 |                 reply = npb.ReplyProto()
103 |                 reply.ParseFromString(buf)
104 |                 self._replies[reply.req_id] = (reply, datetime.now())
105 | 
106 |                 # return early to avoid lock competition
107 |                 reply = self._replies.pop(req_id, None)
108 |                 if reply is not None:
109 |                     return reply
110 | 


--------------------------------------------------------------------------------
/src/nexus/scheduler/sch_info.h:
--------------------------------------------------------------------------------
  1 | #ifndef NEXUS_SCHEDULER_SCH_INFO_H_
  2 | #define NEXUS_SCHEDULER_SCH_INFO_H_
  3 | 
  4 | #include <deque>
  5 | #include <unordered_map>
  6 | #include <unordered_set>
  7 | #include <vector>
  8 | #include <memory>
  9 | #include <gflags/gflags.h>
 10 | #include "nexus/common/metric.h"
 11 | #include "nexus/common/model_db.h"
 12 | #include "nexus/common/model_def.h"
 13 | #include "nexus/proto/nnquery.pb.h"
 14 | #include "nexus/proto/control.pb.h"
 15 | 
 16 | DECLARE_int32(avg_interval);
 17 | 
 18 | namespace nexus {
 19 | namespace scheduler {
 20 | 
 21 | using SessionGroup = std::vector<ModelSession>;
 22 | using ServerList = std::unordered_set<uint32_t>;
 23 | 
 24 | struct SessionInfo {
 25 |   SessionInfo() :
 26 |       has_static_workload(false),
 27 |       unassigned_workload(0) {}
 28 | 
 29 |   double TotalThroughput() const;
 30 | 
 31 |   void SubscribeModelSession(uint32_t frontend_id,
 32 |                              const std::string& model_sess_id);
 33 | 
 34 |   bool UnsubscribleModelSession(uint32_t frontend_id, const std::string& model_sess_id);
 35 | 
 36 |   void UpdateWorkload(uint32_t frontend_id, const ModelStatsProto& model_stats);
 37 | 
 38 |   SessionGroup model_sessions;
 39 |   /*! \brief Mapping from backend id to throughput */
 40 |   std::unordered_map<uint32_t, double> backend_weights;
 41 | 
 42 |   std::unordered_set<uint32_t> backup_backends;
 43 |   /*! \brief Whether there is a static workload for this session */
 44 |   bool has_static_workload;
 45 | 
 46 |   std::unordered_map<std::string, ServerList> session_subscribers;
 47 |   /*! \brief Map from frontend id to workload */
 48 |   std::unordered_map<uint32_t, std::shared_ptr<EWMA> > workloads;
 49 |   /*! \brief Workload request rate history */
 50 |   std::deque<double> rps_history;
 51 |   /*! \brief Gap between workload and throughput */
 52 |   double unassigned_workload;
 53 |   /*! \brief Complex Query ID */
 54 |   std::string complex_query_id;
 55 | };
 56 | 
 57 | struct InstanceInfo {
 58 |   SessionGroup model_sessions;
 59 |   uint32_t batch;
 60 |   uint32_t max_batch;
 61 |   const ModelProfile* profile;
 62 |   double fwd_latency_us;
 63 |   double max_duty_cycle_us;
 64 |   double workload;
 65 |   double throughput;
 66 |   double weight;
 67 |   uint64_t memory_usage;
 68 |   bool backup;
 69 |   std::unordered_map<uint32_t, BackendInfo> backup_backends;
 70 | 
 71 |   InstanceInfo() :
 72 |       batch(0),
 73 |       max_batch(0),
 74 |       profile(nullptr),
 75 |       fwd_latency_us(0.),
 76 |       max_duty_cycle_us(0.),
 77 |       workload(0.),
 78 |       throughput(0.),
 79 |       weight(0.),
 80 |       memory_usage(0),
 81 |       backup(false) {}
 82 |   
 83 |   InstanceInfo(const InstanceInfo& other) :
 84 |       model_sessions(other.model_sessions),
 85 |       batch(other.batch),
 86 |       max_batch(other.max_batch),
 87 |       profile(other.profile),
 88 |       fwd_latency_us(other.fwd_latency_us),
 89 |       max_duty_cycle_us(other.max_duty_cycle_us),
 90 |       workload(other.workload),
 91 |       throughput(other.throughput),
 92 |       weight(other.weight),
 93 |       memory_usage(other.memory_usage),
 94 |       backup(other.backup) {}
 95 |   
 96 |   InstanceInfo& operator=(const InstanceInfo& other) {
 97 |     if (this != &other) {
 98 |       model_sessions = other.model_sessions;
 99 |       batch = other.batch;
100 |       max_batch = other.max_batch;
101 |       profile = other.profile;
102 |       fwd_latency_us = other.fwd_latency_us;
103 |       max_duty_cycle_us = other.max_duty_cycle_us;
104 |       workload = other.workload;
105 |       throughput = other.throughput;
106 |       weight = other.weight;
107 |       memory_usage = other.memory_usage;
108 |       backup = other.backup;
109 |     }
110 |     return *this;
111 |   }
112 | 
113 |   double GetWeight() const {
114 |     return (weight > 0) ? weight : throughput;
115 |   }
116 | };
117 | 
118 | } // namespace scheduler
119 | } // namespace nexus
120 | 
121 | #endif // NEXUS_SCHEDULE_SCH_INFO_H_
122 | 


--------------------------------------------------------------------------------
/src/nexus/common/message.h:
--------------------------------------------------------------------------------
  1 | #ifndef NEXUS_COMMON_MESSAGE_H_
  2 | #define NEXUS_COMMON_MESSAGE_H_
  3 | 
  4 | #include <arpa/inet.h>
  5 | #include <google/protobuf/message.h>
  6 | #include <string>
  7 | 
  8 | namespace nexus {
  9 | 
 10 | /*! \brief Message types */
 11 | enum MessageType {
 12 |   /*! \brief register user to frontend */
 13 |   kUserRegister = 1,
 14 |   /*! \brief request from user to fronend */
 15 |   kUserRequest = 2,
 16 |   /*! \brief reply from frontend to user */
 17 |   kUserReply = 3,
 18 | 
 19 |   // Internal message type
 20 |   /*! \brief request from frontend to backend */
 21 |   kBackendRequest = 100,
 22 |   /*! \brief reply from backend to frontend */
 23 |   kBackendReply = 101,
 24 |   /*! \brief relay request from backend to backup */
 25 |   kBackendRelay = 102,
 26 |   /*! \brief relay reply from backup */
 27 |   kBackendRelayReply = 103,
 28 | };
 29 | 
 30 | /*! \brief Message header format */
 31 | struct MessageHeader {
 32 |   /*! \brief magic number field */
 33 |   uint32_t magic_number;
 34 |   /*! \brief message type */
 35 |   uint32_t msg_type;
 36 |   /*! \brief length of payload */
 37 |   uint32_t body_length;
 38 | };
 39 | 
 40 | /*! \brief Magic number for Nexus service */
 41 | #define NEXUS_SERVICE_MAGIC_NUMBER  0xDEADBEEF
 42 | /*! \brief Header length in bytes */
 43 | #define MESSAGE_HEADER_SIZE         sizeof(MessageHeader)
 44 | 
 45 | bool DecodeHeader(const char* buffer, MessageHeader* header);
 46 | 
 47 | /*!
 48 |  * \brief Message is used to hold the packets that are communicated between
 49 |  * client and frontend server, and between frontend server and backend server.
 50 |  */
 51 | class Message {
 52 |  public:
 53 |   /*!
 54 |    * \brief Construct a nessage.
 55 |    *
 56 |    * It allocates the data buffer with maximal size. This constructor is mainly
 57 |    * used to hold an inbound packet when the message size is unknown.
 58 |    */
 59 |   //Message();
 60 |   Message(const MessageHeader& header);
 61 |   /*!
 62 |    * \brief Construct a nessage with explicit body length.
 63 |    * 
 64 |    * It allocates the data buffer with body length plus header size. This
 65 |    * constructor is mainly used to hold an outbound packet when the message
 66 |    * size is known
 67 |    *
 68 |    * \param body_length Length of payload in bytes
 69 |    */
 70 |   Message(MessageType type, size_t body_length);
 71 |   /*! \brief Destruct a message. */
 72 |   ~Message();
 73 |   /*! \brief Get the data pointer */
 74 |   char* data() { return data_; }
 75 |   /*! \brief Get the read-only data pointer */
 76 |   const char* data() const { return data_; }
 77 |   /*! \brief Get the body pointer */
 78 |   char* body() { return data_ + MESSAGE_HEADER_SIZE; }
 79 |   /*! \brief Get the read-only body pointer */
 80 |   const char* body() const { return data_ + MESSAGE_HEADER_SIZE; }
 81 |   /*! \brief Get the length of entire message in bytes */
 82 |   size_t length() const { return MESSAGE_HEADER_SIZE + body_length_; }
 83 |   /*! \brief Get the length of body in bytes */
 84 |   size_t body_length() const { return body_length_; }
 85 |   /*! \brief Get the type of message */
 86 |   MessageType type() const { return type_; }
 87 |   /*!
 88 |    * \brief Set the message type
 89 |    * \param type Message type
 90 |    */
 91 |   void set_type(MessageType type);
 92 |   /*!
 93 |    * \brief Decode the message from the body
 94 |    * \param message Protobuf message for the decoding result
 95 |    */
 96 |   void DecodeBody(google::protobuf::Message* message) const;
 97 |   /*!
 98 |    * \brief Encode the protobuf message and store in the body
 99 |    * \param message Protobuf message to encode
100 |    */
101 |   void EncodeBody(const google::protobuf::Message& message);
102 | 
103 |  private:
104 |   /*! \brief Data buffer */
105 |   char* data_;
106 |   /*! \brief Message type */
107 |   MessageType type_;
108 |   /*! \brief Length of message body in bytes */
109 |   size_t body_length_;
110 | };
111 | 
112 | } // namespace nexus
113 | 
114 | #endif // NEXUS_COMMON_MESSAGE_H_
115 | 


--------------------------------------------------------------------------------
/src/nexus/backend/model_exec.h:
--------------------------------------------------------------------------------
  1 | #ifndef NEXUS_BACKEND_MODEL_EXEC_H_
  2 | #define NEXUS_BACKEND_MODEL_EXEC_H_
  3 | 
  4 | #include <atomic>
  5 | #include <memory>
  6 | #include <mutex>
  7 | 
  8 | #include "nexus/backend/model_ins.h"
  9 | #include "nexus/common/block_queue.h"
 10 | #include "nexus/common/metric.h"
 11 | #include "nexus/common/model_db.h"
 12 | 
 13 | namespace nexus {
 14 | namespace backend {
 15 | 
 16 | class ModelExecutor {
 17 |  public:
 18 |   ModelExecutor(int gpu_id, const ModelInstanceConfig& config,
 19 |                 BlockPriorityQueue<Task>& task_queue);
 20 | 
 21 |   ~ModelExecutor();
 22 | 
 23 |   ModelInstance* model() { return model_.get(); }
 24 | 
 25 |   const ModelInstance* model() const { return model_.get(); }
 26 |   /*! \brief Return whether this model is a backup model. */
 27 |   bool backup() const { return backup_; }
 28 | 
 29 |   const ModelProfile* profile() const { return profile_; }
 30 | 
 31 |   void SetBatch(uint32_t batch) { model_->set_batch(batch); }
 32 | 
 33 |   double GetRequestRate();
 34 | 
 35 |   double GetDropRate();
 36 | 
 37 |   bool IsSharePrefixModel() const;
 38 |   bool IsTFShareModel() const;
 39 | 
 40 |   bool HasBackup();
 41 | 
 42 |   std::vector<uint32_t> BackupBackends();
 43 | 
 44 |   void UpdateBackupBackends(const ModelInstanceConfig& config);
 45 | 
 46 |   bool Preprocess(std::shared_ptr<Task> task, bool force=false);
 47 | 
 48 |   bool AddPreprocessedTask(std::shared_ptr<Task> task, bool force=false);
 49 | 
 50 |   void Postprocess(std::shared_ptr<Task> task);
 51 | 
 52 |   uint64_t Execute(uint32_t batch = 0);
 53 | 
 54 |   TimePoint LastExecuteFinishTime();
 55 | 
 56 |   int NumberOfOpenRequests() const;
 57 | 
 58 |   uint64_t GetPeakMemoryUsage();
 59 | 
 60 |  private:
 61 |   std::pair<std::shared_ptr<BatchTask>, int> GetBatchTaskSlidingWindow(uint32_t batch_size);
 62 |   std::pair<std::shared_ptr<BatchTask>, int> GetBatchTaskEarliest(uint32_t batch_size);
 63 | 
 64 |   bool IncreaseOpenRequests(int cnt, bool limit_max_batch);
 65 | 
 66 |   void DecreaseOpenRequests(int cnt);
 67 |   /*!
 68 |    * \brief Get batch task from the task queue.
 69 |    * \param batch_size Expected batch size in the batch task.
 70 |    * \return Batch task and the number of inputs dequeued from input queue.
 71 |    */
 72 |   std::pair<std::shared_ptr<BatchTask>, int> GetBatchTask(uint32_t batch_size);
 73 | 
 74 |   void RemoveTask(std::shared_ptr<Task> task);
 75 | 
 76 |   std::unique_ptr<ModelInstance> model_;
 77 |   bool backup_;
 78 |   const ModelProfile* profile_;
 79 |   BlockPriorityQueue<Task>& task_queue_;
 80 |   /*!
 81 |    * \brief Map from task id to current processing tasks.
 82 |    * Guarded by task_mu_.
 83 |    */
 84 |   std::unordered_map<uint64_t, std::shared_ptr<Task> > processing_tasks_;
 85 |   /*! \brief Priority queue of inputs based on deadline. Guarded by task_mu_. */
 86 |   std::priority_queue<std::shared_ptr<Input>,
 87 |                       std::vector<std::shared_ptr<Input> >,
 88 |                       CompareDeadlineItem> input_queue_;
 89 |   /*! \brief Input array allocated in GPU memory to hold batch inputs. */
 90 |   std::shared_ptr<Array> input_array_;
 91 |   /*! \brief Batch index. */
 92 |   std::atomic<uint64_t> batch_id_;
 93 |   /*! \brief Number of open requests. */
 94 |   std::atomic<int> open_requests_;
 95 |   /*! \brief Interval counter to count number of requests within each interval.
 96 |    */
 97 |   std::shared_ptr<IntervalCounter> req_counter_;
 98 |   std::shared_ptr<IntervalCounter> drop_counter_;
 99 | 
100 |   EWMA req_rate_;
101 |   EWMA drop_rate_;
102 | 
103 |   std::vector<uint32_t> backup_backends_;
104 |   /*!
105 |    * \brief Last time point that finishes the batch execution.
106 |    * Guarded by time_mu_.
107 |    */
108 |   TimePoint last_exec_finish_;
109 |   /*! \brief Mutex to proect processing_tasks_ and input_queue_. */
110 |   std::mutex task_mu_;
111 |   /*! \brief Mutex to proect last_exec_finish_. */
112 |   std::mutex time_mu_;
113 | 
114 |   std::mutex backup_mu_;
115 | };
116 | 
117 | using ModelExecutorPtr = std::shared_ptr<ModelExecutor>;
118 | 
119 | } // namespace backend
120 | } // namespace nexus
121 | 
122 | #endif // NEXUS_BACKEND_MODEL_EXEC_H_
123 | 


--------------------------------------------------------------------------------
/src/nexus/backend/model_ins.cpp:
--------------------------------------------------------------------------------
  1 | #include "nexus/backend/caffe_densecap_model.h"
  2 | #include "nexus/backend/caffe_model.h"
  3 | #include "nexus/backend/caffe2_model.h"
  4 | #include "nexus/backend/darknet_model.h"
  5 | #include "nexus/backend/model_ins.h"
  6 | #include "nexus/backend/share_prefix_model.h"
  7 | #include "nexus/backend/tensorflow_model.h"
  8 | #include "nexus/backend/tf_share_model.h"
  9 | 
 10 | #include <glog/logging.h>
 11 | 
 12 | namespace nexus {
 13 | namespace backend {
 14 | 
 15 | void CreateModelInstance(int gpu_id, const ModelInstanceConfig& config,
 16 |                          std::unique_ptr<ModelInstance>* model) {
 17 |   auto beg = Clock::now();
 18 |   std::string framework = config.model_session(0).framework();
 19 | #ifdef USE_TENSORFLOW
 20 |   if (framework == "tf_share") {
 21 |     model->reset(new TFShareModel(gpu_id, config));
 22 |   } else
 23 | #endif
 24 |   if (config.model_session_size() > 1) {
 25 |     LOG(INFO) << "Create prefix model";
 26 |     model->reset(new SharePrefixModel(gpu_id, config));
 27 |   } else {
 28 |     std::string model_name = config.model_session(0).model_name();
 29 | #ifdef USE_DARKNET
 30 |     if (framework == "darknet") {
 31 |       model->reset(new DarknetModel(gpu_id, config));
 32 |     } else
 33 | #endif
 34 | #ifdef USE_CAFFE
 35 |     if (framework == "caffe") {
 36 |       if (model_name == "densecap") {
 37 |         model->reset(new CaffeDenseCapModel(gpu_id, config));
 38 |       } else {
 39 |         model->reset(new CaffeModel(gpu_id, config));
 40 |       }
 41 |     } else
 42 | #endif
 43 | #ifdef USE_CAFFE2
 44 |     if (framework == "caffe2") {
 45 |       model->reset(new Caffe2Model(gpu_id, config));
 46 |     } else
 47 | #endif
 48 | #ifdef USE_TENSORFLOW
 49 |     if (framework == "tensorflow") {
 50 |       model->reset(new TensorflowModel(gpu_id, config));
 51 |     } else
 52 | #endif
 53 |     {
 54 |       LOG(FATAL) << "Unknown framework " << framework;
 55 |     }
 56 |   }
 57 | 
 58 |   auto end = Clock::now();
 59 |   auto duration = std::chrono::duration_cast<std::chrono::milliseconds>(
 60 |       end - beg);
 61 |   LOG(INFO) << "Loading model time: " << duration.count() << "ms";
 62 | }
 63 | 
 64 | ModelInstance::ModelInstance(int gpu_id, const ModelInstanceConfig &config) :
 65 |     gpu_id_(gpu_id),
 66 |     model_session_(config.model_session(0)),
 67 |     batch_(config.batch()),
 68 |     max_batch_(config.max_batch()) {
 69 |   CHECK_GT(batch_, 0) << "batch must be greater than 0";
 70 |   CHECK_GE(max_batch_, batch_) << "max_batch must be greater than batch";
 71 |   std::string model_id = ModelSessionToModelID(model_session_);
 72 |   auto info = ModelDatabase::Singleton().GetModelInfo(model_id);
 73 |   CHECK(info != nullptr) << "Model not found in the database";
 74 |   model_info_ = *info;
 75 |   model_session_id_ = ModelSessionToString(model_session_);
 76 |   cpu_device_ = DeviceManager::Singleton().GetCPUDevice();
 77 | #ifdef USE_GPU
 78 |   gpu_device_ = DeviceManager::Singleton().GetGPUDevice(gpu_id);
 79 | #endif
 80 |   LOG(INFO) << "Construct model " << model_session_id_ << ", batch " <<
 81 |             batch_ << ", max batch " << max_batch_;
 82 | }
 83 | 
 84 | ModelInstance::~ModelInstance() {
 85 |   LOG(INFO) << "Deconstruct model " << model_session_id_;
 86 | }
 87 | void ModelInstance::set_batch(size_t batch) {
 88 |   CHECK_LE(batch, max_batch_) << "Batch size must be less than max_batch";
 89 |   batch_.store(batch);
 90 | }
 91 | ArrayPtr ModelInstance::CreateInputGpuArrayWithRawPointer(float *ptr, size_t nfloats) {
 92 |   LOG(ERROR) << "Don't support create input gpu array with raw pointer";
 93 |   return nullptr;
 94 | }
 95 | void ModelInstance::RemoveInputGpuArray(ArrayPtr arr) {
 96 |   LOG(WARNING) << "Don't support remove input gpu array";
 97 | }
 98 | void ModelInstance::ForwardAsync(std::shared_ptr<BatchTask> batch_task) {
 99 |   LOG(WARNING) << "Don't support async forward";
100 |   Forward(batch_task);
101 | }
102 | void ModelInstance::WaitOutput(std::shared_ptr<BatchTask> batch_task) {
103 |   LOG(WARNING) << "Don't support async forward";
104 | }
105 | uint64_t ModelInstance::GetPeakBytesInUse() {
106 |   LOG(FATAL) << "GetPeakBytesInUse not implemented";
107 | }
108 | } // namespace backend
109 | } // namespace nexus
110 | 


--------------------------------------------------------------------------------
/src/nexus/common/util.cpp:
--------------------------------------------------------------------------------
  1 | #include <arpa/inet.h>
  2 | #include <glog/logging.h>
  3 | #include <ifaddrs.h>
  4 | #include <netinet/in.h>
  5 | #include <sstream>
  6 | 
  7 | #ifdef USE_GPU
  8 | #include <cuda_runtime.h>
  9 | #endif
 10 | 
 11 | #include "nexus/common/util.h"
 12 | 
 13 | namespace nexus {
 14 | 
 15 | void SplitString(const std::string& str, char delim,
 16 |                  std::vector<std::string>* tokens) {
 17 |   std::stringstream ss;
 18 |   ss.str(str);
 19 |   std::string token;
 20 |   tokens->clear();
 21 |   while (std::getline(ss, token, delim)) {
 22 |     tokens->push_back(token);
 23 |   }
 24 | }
 25 | 
 26 | void Memcpy(void* dst, const Device* dst_device, const void* src,
 27 |             const Device* src_device, size_t nbytes) {
 28 |   if (dst == src && dst_device == src_device) {
 29 |     return;
 30 |   }
 31 |   DeviceType dst_type = dst_device->type();
 32 |   DeviceType src_type = src_device->type();
 33 | #ifdef USE_GPU
 34 |   if (dst_type == kCPU) {
 35 |     if (src_type == kCPU) {
 36 |       memcpy(dst, src, nbytes);
 37 |     } else { // src_type == kGPU
 38 |       NEXUS_CUDA_CHECK(cudaMemcpy(dst, src, nbytes, cudaMemcpyDeviceToHost));
 39 |     }
 40 |   } else { // dst_type == kGPU
 41 |     if (src_type == kCPU) {
 42 |       NEXUS_CUDA_CHECK(cudaMemcpy(dst, src, nbytes, cudaMemcpyHostToDevice));
 43 |     } else { // src_type == kGPU
 44 |       NEXUS_CUDA_CHECK(cudaMemcpy(dst, src, nbytes, cudaMemcpyDeviceToDevice));
 45 |     }
 46 |   }
 47 | #else
 48 |   CHECK_EQ(dst_type, kCPU);
 49 |   CHECK_EQ(src_type, kCPU);
 50 |   memcpy(dst, src, nbytes);
 51 | #endif
 52 | }
 53 | 
 54 | namespace {
 55 | /*! \brief the list of all IPv4 addresses */
 56 | std::vector<in_addr> Ipv4Interfaces;
 57 | } // namespace
 58 | 
 59 | void ListIpv4Address() {
 60 |   if (Ipv4Interfaces.size() > 0) {
 61 |     Ipv4Interfaces.clear();
 62 |   }
 63 |   struct ifaddrs* ifAddrStruct = nullptr;
 64 |   struct ifaddrs* ifa = nullptr;
 65 |   // get network interface addresses
 66 |   getifaddrs(&ifAddrStruct);
 67 |   // iterate over all addresses
 68 |   for (ifa = ifAddrStruct; ifa != nullptr; ifa = ifa->ifa_next) {
 69 |     if (!ifa->ifa_addr) {
 70 |       continue;
 71 |     }
 72 |     if (ifa->ifa_addr->sa_family == AF_INET) {
 73 |       // IPv4 Address
 74 |       in_addr* addr = &((sockaddr_in*) ifa->ifa_addr)->sin_addr;
 75 |       Ipv4Interfaces.push_back(*addr);
 76 |     } else if (ifa->ifa_addr->sa_family == AF_INET6) {
 77 |       continue;
 78 |       // IPv6 Address
 79 |       /*in6_addr* addr = &((sockaddr_in6*) ifa->ifa_addr)->sin6_addr;
 80 |       char ipv6[INET6_ADDRSTRLEN];
 81 |       inet_ntop(AF_INET6, addr, ipv6, INET6_ADDRSTRLEN);
 82 |       //printf("%s IP Address %s\n", ifa->ifa_name, ipv6);
 83 |       ret = std::string(ipv6);*/
 84 |     }
 85 |   }
 86 |   if (ifAddrStruct != nullptr) {
 87 |     freeifaddrs(ifAddrStruct);
 88 |   }
 89 | }
 90 | 
 91 | void ConvertPrefix(const std::string& prefix, uint32_t* addr, uint32_t* mask) {
 92 |   char *pref = new char[prefix.length() + 1];
 93 |   strcpy(pref, prefix.c_str());
 94 |   char *pch = strchr(pref, '/');
 95 |   if (pch == nullptr) {
 96 |     *mask = 0xffffffff;
 97 |   } else {
 98 |     *pch = 0;
 99 |     ++pch;
100 |     int prefix_len = atoi(pch);
101 |     if (prefix_len > 32 || prefix_len < 0) {
102 |       LOG(FATAL) << "Wrong prefix length: " << prefix_len;
103 |     }
104 |     *mask = ~(uint32_t)((1 << (32 - prefix_len)) - 1);
105 |   }
106 |   uint32_t prefix_addr = 0;
107 |   pch = strtok(pref, ".");
108 |   while (pch != nullptr) {
109 |     prefix_addr = (prefix_addr << 8) | (uint8_t) atoi(pch);
110 |     pch = strtok(NULL, ".");
111 |   }
112 |   *addr = prefix_addr & *mask;
113 |   delete[] pref;
114 | }
115 | 
116 | std::string GetIpAddress(const std::string& prefix) {
117 |   if (Ipv4Interfaces.empty()) {
118 |     ListIpv4Address();
119 |   }
120 |   uint32_t prefix_addr;
121 |   uint32_t prefix_mask;
122 |   ConvertPrefix(prefix, &prefix_addr, &prefix_mask);
123 |   for (size_t i = 0; i < Ipv4Interfaces.size(); ++i) {
124 |     const in_addr* addr = &Ipv4Interfaces[i];
125 |     if ((ntohl(addr->s_addr) & prefix_mask) == prefix_addr) {
126 |       char addr_str[INET_ADDRSTRLEN];
127 |       inet_ntop(AF_INET, addr, addr_str, INET_ADDRSTRLEN);
128 |       return std::string(addr_str);
129 |     }
130 |   }
131 |   return "";
132 | }
133 | 
134 | } // namespace nexus
135 | 


--------------------------------------------------------------------------------
/src/nexus/common/data_type.h:
--------------------------------------------------------------------------------
  1 | #ifndef NEXUS_COMMON_DATA_TYPE_H_
  2 | #define NEXUS_COMMON_DATA_TYPE_H_
  3 | 
  4 | #include <cstdlib>
  5 | #include <type_traits>
  6 | #include <unordered_map>
  7 | 
  8 | #include "nexus/common/buffer.h"
  9 | #include "nexus/proto/nnquery.pb.h"
 10 | 
 11 | namespace nexus {
 12 | 
 13 | template<DataType> struct TypeMap;
 14 | 
 15 | template<> struct TypeMap<DT_BOOL> {
 16 |   using type = bool;
 17 |   static constexpr size_t size = 1;
 18 | };
 19 | 
 20 | template<> struct TypeMap<DT_INT8> {
 21 |   using type = int8_t;
 22 |   static constexpr size_t size = 1;
 23 | };
 24 | 
 25 | template<> struct TypeMap<DT_UINT8> {
 26 |   using type = uint8_t;
 27 |   static constexpr size_t size = 1;
 28 | };
 29 | 
 30 | template<> struct TypeMap<DT_INT32> {
 31 |   using type = int32_t;
 32 |   static constexpr size_t size = 4;
 33 | };
 34 | 
 35 | template<> struct TypeMap<DT_UINT32> {
 36 |   using type = uint32_t;
 37 |   static constexpr size_t size = 4;
 38 | };
 39 | 
 40 | template<> struct TypeMap<DT_FLOAT> {
 41 |   using type = float;
 42 |   static constexpr size_t size = 4;
 43 | };
 44 | 
 45 | template<> struct TypeMap<DT_DOUBLE> {
 46 |   using type = double;
 47 |   static constexpr size_t size = 8;
 48 | };
 49 | 
 50 | inline size_t type_size(DataType type) {
 51 |   switch (type) {
 52 |     case DT_INT8:
 53 |       return TypeMap<DT_INT8>::size;
 54 |     case DT_UINT8:
 55 |       return TypeMap<DT_UINT8>::size;
 56 |     case DT_INT32:
 57 |       return TypeMap<DT_INT32>::size;
 58 |     case DT_UINT32:
 59 |       return TypeMap<DT_UINT32>::size;
 60 |     case DT_FLOAT:
 61 |       return TypeMap<DT_FLOAT>::size;
 62 |     case DT_DOUBLE:
 63 |       return TypeMap<DT_DOUBLE>::size;
 64 |     case DT_BOOL:
 65 |       return TypeMap<DT_BOOL>::size;
 66 |     default:
 67 |       return 0;
 68 |   };
 69 | }
 70 | 
 71 | class Array {
 72 |  public:
 73 |   Array();
 74 | 
 75 |   Array(DataType type, size_t num_elements, Device* device);
 76 | 
 77 |   Array(DataType type, size_t num_elements, std::shared_ptr<Buffer> buf);
 78 | 
 79 |   DataType data_type() const { return data_type_; }
 80 | 
 81 |   size_t num_elements() const { return num_elements_; }
 82 | 
 83 |   Device* device() const { return buffer_->device(); }
 84 | 
 85 |   DeviceType device_type() const { return buffer_->device()->type(); }
 86 | 
 87 |   void set_tag(int tag) { tag_ = tag; }
 88 | 
 89 |   int tag() const { return tag_; }
 90 | 
 91 |   template <typename T>
 92 |   T* Data() { return (T*) buffer_->data(); }
 93 | 
 94 |   template <typename T>
 95 |   const T* Data() const { return (const T*) buffer_->data(); }
 96 | 
 97 |   std::shared_ptr<Array> Slice(size_t offset, size_t num_elements);
 98 | 
 99 |   std::shared_ptr<Buffer> buffer() const { return buffer_; }
100 | 
101 |  private:
102 |   DataType data_type_;
103 |   size_t num_elements_;
104 |   std::shared_ptr<Buffer> buffer_;
105 |   int tag_;
106 | };
107 | 
108 | using ArrayPtr = std::shared_ptr<Array>;
109 | 
110 | class Shape {
111 |  public:
112 |   Shape();
113 | 
114 |   Shape(const std::vector<int>& dims);
115 | 
116 |   Shape(std::initializer_list<int> list);
117 | 
118 |   Shape(const Shape& other);
119 | 
120 |   int dim(int axis) const;
121 | 
122 |   const std::vector<int>& dims() const;
123 | 
124 |   void set_dims(const std::vector<int>& dims);
125 | 
126 |   void set_dims(const std::vector<long int>& dims);
127 | 
128 |   void set_dims(std::initializer_list<int> list);
129 | 
130 |   size_t ndims() const;
131 | 
132 |   size_t NumElements(int axis = 0) const;
133 | 
134 |   friend std::ostream& operator<<(std::ostream& out, const Shape& shape);
135 | 
136 |  private:
137 |   std::vector<int> dims_;
138 | };
139 | 
140 | class Value {
141 |  public:
142 |   Value(const ValueProto& value);
143 | 
144 |   template<class T>
145 |   const T& as() const;
146 | 
147 |   void ToProto(ValueProto* proto) const;
148 |   
149 |  private:
150 |   DataType data_type_;
151 |   bool b_;
152 |   int i_;
153 |   float f_;
154 |   double d_;
155 |   std::string s_;
156 |   TensorProto tensor_;
157 |   ImageProto image_;
158 |   RectProto rect_;
159 | };
160 | 
161 | class Record {
162 |  public:
163 |   Record(const RecordProto& record);
164 | 
165 |   void ToProto(RecordProto* proto) const;
166 | 
167 |   const Value& operator[](const std::string&& key) const;
168 |   
169 |  private:
170 |   std::unordered_map<std::string, Value> values_;
171 | };
172 | 
173 | } // namespace nexus
174 | 
175 | #endif // NEXUS_COMMON_DATA_TYPE_H_
176 | 


--------------------------------------------------------------------------------
/examples/traffic_complex/src/traffic_complex.cpp:
--------------------------------------------------------------------------------
  1 | #include <gflags/gflags.h>
  2 | 
  3 | #include "nexus/app/app_base.h"
  4 | 
  5 | using namespace nexus;
  6 | using namespace nexus::app;
  7 | 
  8 | class TrafficApp : public AppBase {
  9 |  public:
 10 |   TrafficApp(std::string port, std::string rpc_port, std::string sch_addr,
 11 |              size_t nthreads, int latency_slo, int ssd_latency_ms) :
 12 |       AppBase(port, rpc_port, sch_addr, nthreads),
 13 |       latency_slo_(latency_slo),
 14 |       ssd_latency_ms_(ssd_latency_ms),
 15 |       rec_latency_ms_(latency_slo - ssd_latency_ms) {}
 16 | 
 17 |   void Setup() final {
 18 |     ssd_model_ = GetModelHandler("tensorflow", "ssd_mobilenet", 1,
 19 |                                  ssd_latency_ms_, 0, {}, LB_DeficitRR);
 20 |     car_model_ = GetModelHandler("caffe2", "googlenet_cars", 1, rec_latency_ms_);
 21 |     face_model_ = GetModelHandler("caffe2", "vgg_face_0", 1, rec_latency_ms_);
 22 |     auto func1 = [&](std::shared_ptr<RequestContext> ctx) {
 23 |       auto ssd_output = ssd_model_->Execute(ctx, ctx->const_request().input());
 24 |       return std::vector<VariablePtr>{
 25 |           std::make_shared<Variable>("ssd_output", ssd_output)};
 26 |     };
 27 |     auto func2 = [&](std::shared_ptr<RequestContext> ctx) {
 28 |       auto ssd_output = ctx->GetVariable("ssd_output")->result();
 29 |       std::vector<std::shared_ptr<QueryResult> > results;
 30 |       std::vector<RectProto> car_boxes;
 31 |       std::vector<RectProto> face_boxes;
 32 |       for (int i = 0; i < ssd_output->num_records(); ++i) {
 33 |         auto& rec = (*ssd_output)[i];
 34 |         auto name = rec["class_name"].as<std::string>();
 35 |         if (name == "car" || name == "truck") {
 36 |           car_boxes.push_back(rec["rect"].as<RectProto>());
 37 |         } else if (name == "person") {
 38 |           face_boxes.push_back(rec["rect"].as<RectProto>());
 39 |         }
 40 |       }
 41 |       if (!car_boxes.empty()) {
 42 |         results.push_back(
 43 |             car_model_->Execute(ctx, ctx->const_request().input(), {}, 1,
 44 |                                 car_boxes));
 45 |       }
 46 |       if (!face_boxes.empty()) {
 47 |         results.push_back(
 48 |             face_model_->Execute(ctx, ctx->const_request().input(), {}, 1,
 49 |                                  face_boxes));
 50 |       }
 51 |       return std::vector<VariablePtr>{
 52 |           std::make_shared<Variable>("rec_output", results)};
 53 |     };
 54 |     auto func3 = [&](std::shared_ptr<RequestContext> ctx) {
 55 |       auto rec_output = ctx->GetVariable("rec_output");
 56 |       if (rec_output->count() > 0) {
 57 |         rec_output->result()->ToProto(ctx->reply());
 58 |       }
 59 |       return std::vector<VariablePtr>{};
 60 |     };
 61 |     ExecBlock* b1 = new ExecBlock(0, func1, {});
 62 |     ExecBlock* b2 = new ExecBlock(1, func2, {"ssd_output"});
 63 |     ExecBlock* b3 = new ExecBlock(2, func3, {"rec_output"});
 64 |     qp_ = new QueryProcessor({b1, b2, b3});
 65 |   }
 66 | 
 67 |  private:
 68 |   RectProto GetRect(int left, int right, int top, int bottom) {
 69 |     RectProto rect;
 70 |     rect.set_left(left);
 71 |     rect.set_right(right);
 72 |     rect.set_top(top);
 73 |     rect.set_bottom(bottom);
 74 |     return rect;
 75 |   }
 76 | 
 77 |   int latency_slo_;
 78 |   int ssd_latency_ms_;
 79 |   int rec_latency_ms_;
 80 |   std::shared_ptr<ModelHandler> ssd_model_;
 81 |   std::shared_ptr<ModelHandler> car_model_;
 82 |   std::shared_ptr<ModelHandler> face_model_;
 83 | };
 84 | 
 85 | DEFINE_string(port, "9001", "Server port");
 86 | DEFINE_string(rpc_port, "9002", "RPC port");
 87 | DEFINE_string(sch_addr, "127.0.0.1", "Scheduler address");
 88 | DEFINE_int32(nthread, 4, "Number of threads processing requests");
 89 | DEFINE_int32(latency, 0, "Latency SLO for query in ms");
 90 | DEFINE_int32(ssd_latency, 0, "Latency SLO for SSD model in ms");
 91 | 
 92 | int main(int argc, char** argv) {
 93 |   // log to stderr
 94 |   FLAGS_logtostderr = 1;
 95 |   // Init glog
 96 |   google::InitGoogleLogging(argv[0]);
 97 |   // Parse command line flags
 98 |   google::ParseCommandLineFlags(&argc, &argv, true);
 99 |   // Setup backtrace on segfault
100 |   google::InstallFailureSignalHandler();
101 | 
102 |   LOG(INFO) << "App port " << FLAGS_port << ", rpc port " << FLAGS_rpc_port;
103 |   // Create the frontend server
104 |   TrafficApp app(FLAGS_port, FLAGS_rpc_port, FLAGS_sch_addr, FLAGS_nthread,
105 |                  FLAGS_latency, FLAGS_ssd_latency);
106 |   LaunchApp(&app);
107 | 
108 |   return 0;
109 | }
110 | 


--------------------------------------------------------------------------------
/src/nexus/backend/batch_task.h:
--------------------------------------------------------------------------------
  1 | #ifndef NEXUS_BACKEND_BATCH_TASK_H_
  2 | #define NEXUS_BACKEND_BATCH_TASK_H_
  3 | 
  4 | #include <memory>
  5 | 
  6 | #include "nexus/backend/slice.h"
  7 | #include "nexus/backend/task.h"
  8 | 
  9 | namespace nexus {
 10 | namespace backend {
 11 | 
 12 | /*!
 13 |  * \brief BatchTask holds a batch of inputs and outputs, and is used for
 14 |  * batch forwarding through a DNN model.
 15 |  */
 16 | class BatchTask {
 17 |  public:
 18 |   /*!
 19 |    * \brief Construct a batch task.
 20 |    * \param max_batch Max batch size.
 21 |    */
 22 |   BatchTask(uint32_t max_batch);
 23 |   /*!
 24 |    * \brief Set batch id
 25 |    * \param batch_id Batch id
 26 |   */
 27 |   inline void set_batch_id(uint64_t batch_id) { batch_id_ = batch_id; }
 28 |   /*! \brief Return batch id */
 29 |   inline uint64_t batch_id() const { return batch_id_; }
 30 |   /*! \brief Return batch size */
 31 |   inline uint32_t batch_size() const { return inputs_.size(); }
 32 |   /*! \brief Return max batch size */
 33 |   inline uint32_t max_batch() const { return max_batch_; }
 34 |   /*!
 35 |    * \brief Set input array for holding the batch input data.
 36 |    * \param arr Array pointer.
 37 |    */
 38 |   void SetInputArray(ArrayPtr arr);
 39 |   /*!
 40 |    * \brief Create input arrays to hold the batch input data.
 41 |    * \param data_type Data type of input.
 42 |    * \param num_elements_per_input Number of elements in a single input.
 43 |    * \param device Device for allocation of input array.
 44 |    */
 45 |   void CreateInputArray(DataType data_type, size_t num_elements_per_input,
 46 |                         Device* device);
 47 |   /*!
 48 |    * \brief Set output arrays for holding the batch output results.
 49 |    * \param arrays Map from name to arrays.
 50 |    */
 51 |   void SetOutputArrays(const std::unordered_map<std::string, ArrayPtr>& arrays);
 52 |   /*!
 53 |    * \brief Create output arrays to hold the batch output results.
 54 |    * \param sizes Map from name to output sizes in float for a single batch.
 55 |    * \param device Device for allocation of output arrays.
 56 |    */
 57 |   void CreateOutputArrays(const std::unordered_map<std::string, size_t>& sizes,
 58 |                           Device* device);
 59 |   /*! \brief Return input batch array */
 60 |   inline ArrayPtr GetInputArray() const { return input_array_; }
 61 |   /*!
 62 |    * \brief Get the output batch array given name.
 63 |    * \param name Name of array.
 64 |    * \return Array corresponding to the name
 65 |    */
 66 |   ArrayPtr GetOutputArray(const std::string& name) const;
 67 |   /*!
 68 |    * \brief Append a new input into the batch input.
 69 |    * \param input A single input.
 70 |    */
 71 |   void AppendInput(std::shared_ptr<Input> input, std::shared_ptr<Task> task);
 72 |   /*!
 73 |    * \brief Slice the batch output into individual outputs.
 74 |    * \param slices Slices for all arrays.
 75 |    */
 76 |   void SliceOutputBatch(const std::unordered_map<std::string, Slice>& slices);
 77 |   /*! \brief Get all individual inputs in the batch. */
 78 |   inline const std::vector<std::shared_ptr<Input> >& inputs() const {
 79 |     return inputs_;
 80 |   }
 81 |   /*! \brief Get all individual outputs in the batch. */
 82 |   inline const std::vector<std::shared_ptr<Output> >& outputs() const {
 83 |     return outputs_;
 84 |   }
 85 |   /*! \brief Set individual outputs. */
 86 |   void set_outputs(const std::vector<std::shared_ptr<Output> >& outputs);
 87 |   /*! \brief Get all tasks in the batch. */
 88 |   inline const std::vector<std::shared_ptr<Task> >& tasks() const {
 89 |     return tasks_;
 90 |   }
 91 | 
 92 |  private:
 93 |   /*! \brief Batch ID. */
 94 |   uint64_t batch_id_;
 95 |   /*! \brief Max batch size. */
 96 |   uint32_t max_batch_;
 97 |   /*! \brief Array that holds batch input data. */
 98 |   ArrayPtr input_array_;
 99 |   /*! \brief Write pointer to input_array_. */
100 |   char* input_write_pt_;
101 |   /*! \brief Number of elements added in the input_array_. */
102 |   size_t input_elements_;
103 |   /*! \brief Map from name to array. */
104 |   std::unordered_map<std::string, ArrayPtr> output_arrays_;
105 |   /*! \brief Tasks in the batch */
106 |   std::vector<std::shared_ptr<Task> > tasks_;
107 |   /*! \brief Individual inputs in the batch */
108 |   std::vector<std::shared_ptr<Input> > inputs_;
109 |   /*! \brief Individual outputs in the batch */
110 |   std::vector<std::shared_ptr<Output> > outputs_;
111 | };
112 | 
113 | } // namespace backend
114 | } // namespace nexus
115 | 
116 | #endif // NEXUS_BACKEND_BATCH_TASK_H_
117 | 


--------------------------------------------------------------------------------
/src/nexus/app/model_handler.h:
--------------------------------------------------------------------------------
  1 | #ifndef NEXUS_COMMON_MODEL_HANDLER_H_
  2 | #define NEXUS_COMMON_MODEL_HANDLER_H_
  3 | 
  4 | #include <atomic>
  5 | #include <condition_variable>
  6 | #include <memory>
  7 | #include <mutex>
  8 | #include <string>
  9 | #include <random>
 10 | #include <unordered_map>
 11 | 
 12 | #include "nexus/common/backend_pool.h"
 13 | #include "nexus/common/data_type.h"
 14 | #include "nexus/common/metric.h"
 15 | #include "nexus/proto/nnquery.pb.h"
 16 | 
 17 | namespace nexus {
 18 | namespace app {
 19 | 
 20 | /*!
 21 |  * \brief QueryResult provides a mechanism to access the result of
 22 |  *   ansynchronous model execution.
 23 |  */
 24 | class QueryResult {
 25 |  public:
 26 |   /*!
 27 |    * \brief Constructor of OutputFuture
 28 |    * \param timeout_ms Timeout for output future in millisecond
 29 |    */
 30 |   QueryResult(uint64_t qid);
 31 | 
 32 |   bool ready() const { return ready_; }
 33 |   
 34 |   uint64_t query_id() const { return qid_; }
 35 |   /*! \brief Gets the status of output result */
 36 |   uint32_t status() const;
 37 |   /*! \brief Gets the error message if any error happens in the execution */
 38 |   std::string error_message() const;
 39 |   /*!
 40 |    * \brief Output the result to reply protobuf
 41 |    * \param reply ReplyProto to be filled
 42 |    */
 43 |   void ToProto(ReplyProto* reply) const;
 44 |   /*!
 45 |    * \brief Get the record given then index
 46 |    * \param idx Index of record
 47 |    * \return Record at idx
 48 |    */
 49 |   const Record& operator[](uint32_t idx) const;
 50 |   /*! \brief Get number of records in the output */
 51 |   uint32_t num_records() const;
 52 | 
 53 |   void SetResult(const QueryResultProto& result);
 54 | 
 55 |  private:
 56 |   void CheckReady() const;
 57 | 
 58 |   void SetError(uint32_t error, const std::string& error_msg);
 59 | 
 60 |  private:
 61 |   uint64_t qid_;
 62 |   std::atomic<bool> ready_;
 63 |   uint32_t status_;
 64 |   std::string error_message_;
 65 |   std::vector<Record> records_;
 66 | };
 67 | 
 68 | class RequestContext;
 69 | 
 70 | enum LoadBalancePolicy {
 71 |   // Weighted round robin
 72 |   LB_WeightedRR = 1,
 73 |   // Query 2 backends and pick one with lowest utilization
 74 |   LB_Query = 2,
 75 |   // Deficit round robin
 76 |   LB_DeficitRR = 3,
 77 | };
 78 | 
 79 | class ModelHandler {
 80 |  public:
 81 |   ModelHandler(const std::string& model_session_id, BackendPool& pool,
 82 |                LoadBalancePolicy lb_policy);
 83 | 
 84 |   ~ModelHandler();
 85 | 
 86 |   ModelSession model_session() const { return model_session_; }
 87 | 
 88 |   std::string model_session_id() const { return model_session_id_; }
 89 | 
 90 |   std::shared_ptr<IntervalCounter> counter() const { return counter_; }
 91 | 
 92 |   std::shared_ptr<QueryResult> Execute(
 93 |       std::shared_ptr<RequestContext> ctx, const ValueProto& input,
 94 |       std::vector<std::string> output_fields={}, uint32_t topk=1,
 95 |       std::vector<RectProto> windows={});
 96 | 
 97 |   void HandleReply(const QueryResultProto& result);
 98 | 
 99 |   void UpdateRoute(const ModelRouteProto& route);
100 | 
101 |   std::vector<uint32_t> BackendList();
102 | 
103 |  private:
104 |   std::shared_ptr<BackendSession> GetBackend();
105 |   
106 |   std::shared_ptr<BackendSession> GetBackendWeightedRoundRobin();
107 | 
108 |   std::shared_ptr<BackendSession> GetBackendDeficitRoundRobin();
109 | 
110 |   ModelSession model_session_;
111 |   std::string model_session_id_;
112 |   BackendPool& backend_pool_;
113 |   LoadBalancePolicy lb_policy_;
114 |   static std::atomic<uint64_t> global_query_id_;
115 | 
116 |   std::vector<uint32_t> backends_;
117 |   /*!
118 |    * \brief Mapping from backend id to its serving rate,
119 |    *
120 |    *   Guarded by route_mu_
121 |    */
122 |   std::unordered_map<uint32_t, double> backend_rates_;
123 | 
124 |   std::unordered_map<uint32_t, double> backend_quanta_;
125 |   double quantum_to_rate_ratio_ = 0;
126 |   size_t current_drr_index_ = 0;
127 |   float total_throughput_;
128 |   /*! \brief Interval counter to count number of requests within each
129 |    *  interval.
130 |    */
131 |   std::shared_ptr<IntervalCounter> counter_;
132 | 
133 |   std::unordered_map<uint64_t, std::shared_ptr<RequestContext> > query_ctx_;
134 |   std::mutex route_mu_;
135 |   std::mutex query_ctx_mu_;
136 |   /*! \brief random number generator */
137 |   std::random_device rd_;
138 |   std::mt19937 rand_gen_;
139 | 
140 |   std::atomic<bool> running_;
141 | };
142 | 
143 | } // namespace app
144 | } // namespace nexus
145 | 
146 | #endif // NEXUS_COMMON_MODEL_HANDLER_H_
147 | 


--------------------------------------------------------------------------------
/src/nexus/common/model_db.h:
--------------------------------------------------------------------------------
  1 | #ifndef NEXUS_COMMON_MODEL_DB_H_
  2 | #define NEXUS_COMMON_MODEL_DB_H_
  3 | 
  4 | #include <memory>
  5 | #include <unordered_map>
  6 | #include <yaml-cpp/yaml.h>
  7 | 
  8 | namespace nexus {
  9 | 
 10 | struct ProfileEntry {
 11 |   // latency in us unit
 12 |   float latency_mean;
 13 |   float latency_std;
 14 |   size_t memory_usage;
 15 |   int repeat;
 16 | };
 17 | 
 18 | class ModelProfile {
 19 |  public:
 20 |   ModelProfile() {}
 21 | 
 22 |   ModelProfile(const std::string& file_path);
 23 | 
 24 |   void MergeProfile(const ModelProfile& rhs);
 25 | 
 26 |   void LoadProfile(const std::string& file_path);
 27 | 
 28 |   std::string profile_id() const { return profile_id_; }
 29 | 
 30 |   std::string gpu_device_name() const { return gpu_device_name_; }
 31 | 
 32 |   std::string gpu_uuid() const { return gpu_uuid_; }
 33 | 
 34 |   float GetForwardLatency(uint32_t batch) const;
 35 | 
 36 |   float GetPreprocessLatency() const;
 37 | 
 38 |   float GetPostprocessLatency() const;
 39 | 
 40 |   size_t GetMemoryUsage(uint32_t batch) const;
 41 |   /*!
 42 |    * \brief Computes the maximum batch size to use within latency_sla
 43 |    * \param latency_sla Latency SLA in ms
 44 |    * \return max batch size
 45 |    */
 46 |   uint32_t GetMaxBatch(float latency_sla_ms) const;
 47 |   /*!
 48 |    * \brief Computes the maximum throughput can be achieved within latency_sla
 49 |    * \param latency_sla Latency SLA in ms
 50 |    * \return pair of best batch size and max throughput
 51 |    */
 52 |   std::pair<uint32_t, float> GetMaxThroughput(float latency_sla_ms) const;
 53 | 
 54 |  private:
 55 |   std::string profile_id_;
 56 |   std::string gpu_device_name_;
 57 |   std::string gpu_uuid_;
 58 |   std::unordered_map<uint32_t, ProfileEntry> forward_lats_;
 59 |   ProfileEntry preprocess_;
 60 |   ProfileEntry postprocess_;
 61 |   float network_latency_us_ = 2000; // us
 62 | };
 63 | 
 64 | struct TFShareSuffixInfo {
 65 |   size_t suffix_index;
 66 |   std::string model_name;
 67 |   std::string output_layer;
 68 |   std::string type;
 69 |   std::string class_names;
 70 | 
 71 |   TFShareSuffixInfo(size_t suffix_index_, const YAML::Node &node);
 72 | };
 73 | 
 74 | struct TFShareInfo {
 75 |   std::string model_file;
 76 |   std::string input_layer;
 77 |   std::string slice_beg_vector;
 78 |   std::string slice_len_vector;
 79 |   int image_height;
 80 |   int image_width;
 81 |   std::unordered_map<std::string, TFShareSuffixInfo> suffix_models;
 82 | 
 83 |   std::string hack_internal_id;
 84 |   explicit TFShareInfo(const YAML::Node &node);
 85 | };
 86 | 
 87 | class ModelDatabase {
 88 |  public:
 89 |   static ModelDatabase& Singleton();
 90 | 
 91 |   const YAML::Node* GetModelInfo(const std::string& model_id) const;
 92 | 
 93 |   const YAML::Node* GetModelInfo(const std::string& framework,
 94 |                                  const std::string& model_name,
 95 |                                  uint32_t version) const;
 96 | 
 97 |   const ModelProfile* GetModelProfile(const std::string& gpu_device,
 98 |                                       const std::string& gpu_uuid,
 99 |                                       const std::string& profile_id) const;
100 | 
101 |   int GetSharePrefixLength(const std::string& model_id1,
102 |                            const std::string& model_id2) const;
103 | 
104 |   std::vector<std::string> GetPrefixShareModels(const std::string& model_id)
105 |       const;
106 | 
107 |   std::shared_ptr<TFShareInfo> GetTFShareInfo(const std::string& model_name) const;
108 | 
109 |  private:
110 |   ModelDatabase(const std::string& model_root);
111 | 
112 |   void LoadModelInfo(const std::string& db_file);
113 | 
114 |   void LoadModelProfiles(const std::string& profile_dir);
115 | 
116 |  private:
117 |   using ProfileTable = std::unordered_map<std::string, ModelProfile>;
118 |   using PrefixMap = std::unordered_map<std::string, uint32_t>;
119 | 
120 |   /*! \brief Model database root directory */
121 |   std::string db_root_dir_;
122 |   /*! \brief Model store directory */
123 |   std::string model_store_dir_;
124 |   /*! \brief Map from model ID to model information */
125 |   std::unordered_map<std::string, YAML::Node> model_info_table_;
126 |   /*! \brief Map from device name to profile table */
127 |   std::unordered_map<std::string, ProfileTable> device_profile_table_;
128 | 
129 |   std::unordered_map<std::string, PrefixMap> share_prefix_models_;
130 |   /*! \brief Map from model name to TFShareInfo */
131 |   std::unordered_map<std::string, std::shared_ptr<TFShareInfo>> tf_share_models_;
132 | };
133 | 
134 | } // namespace nexus
135 | 
136 | #endif // NEXUS_COMMON_MODEL_DB_H_
137 | 


--------------------------------------------------------------------------------
/src/nexus/scheduler/backend_delegate.h:
--------------------------------------------------------------------------------
  1 | #ifndef NEXUS_SCHEDULER_BACKEND_DELEGATE_H_
  2 | #define NEXUS_SCHEDULER_BACKEND_DELEGATE_H_
  3 | 
  4 | #include <chrono>
  5 | #include <grpc++/grpc++.h>
  6 | #include <unordered_map>
  7 | #include <vector>
  8 | #include <yaml-cpp/yaml.h>
  9 | 
 10 | #include "nexus/common/metric.h"
 11 | #include "nexus/common/model_db.h"
 12 | #include "nexus/common/model_def.h"
 13 | #include "nexus/proto/control.grpc.pb.h"
 14 | #include "nexus/scheduler/sch_info.h"
 15 | 
 16 | namespace nexus {
 17 | namespace scheduler {
 18 | 
 19 | class Scheduler;
 20 | 
 21 | using InstanceInfoPtr = std::shared_ptr<InstanceInfo>;
 22 | 
 23 | class BackendDelegate {
 24 |  public:
 25 |   BackendDelegate(uint32_t node_id, const std::string& ip,
 26 |                   const std::string& server_port, const std::string& rpc_port,
 27 |                   const std::string& gpu_device, const std::string& gpu_uuid, size_t gpu_available_memory,
 28 |                   int beacon_sec);
 29 | 
 30 |   uint32_t node_id() const { return node_id_; }
 31 |  
 32 |   std::string gpu_device() const { return gpu_device_; }
 33 | 
 34 |   size_t gpu_available_memory() const { return gpu_available_memory_; }
 35 | 
 36 |   int workload_id() const { return workload_id_; }
 37 | 
 38 |   void set_workload_id(int id) { workload_id_ = id; }
 39 | 
 40 |   bool overload() const { return overload_; }
 41 | 
 42 |   double Occupancy() const;
 43 | 
 44 |   void GetInfo(BackendInfo* info) const;
 45 | 
 46 |   std::time_t LastAliveTime() const;
 47 | 
 48 |   void Tick();
 49 | 
 50 |   bool Assign(const BackendDelegate& other);
 51 | 
 52 |   bool PrepareLoadModel(const ModelSession& model_sess, double workload,
 53 |                         InstanceInfo* inst_info, double* occupancy) const;
 54 | 
 55 |   void LoadModel(const InstanceInfo& inst_info);
 56 |   
 57 |   void LoadModel(const YAML::Node& model_info);
 58 | 
 59 |   void LoadPrefixModel(const ModelSession& model_session,
 60 |                        const ModelSession& shared_session);
 61 | 
 62 |   void UnloadModel(const std::string& model_sess_id);
 63 | 
 64 |   void AddBackupForModel(const std::string& model_sess_id,
 65 |                          const BackendInfo& info);
 66 | 
 67 |   void RemoveBackupForModel(const std::string& model_sess_id,
 68 |                             uint32_t backend_id);
 69 |   /*!
 70 |    * \brief Update model throughput given model session id and throughput.
 71 |    * \param model_sess_id Model session ID.
 72 |    * \param throughput Expected throughput to be achieved.
 73 |    * \return Left over throughput if expected throughput is not achieved,
 74 |    *   otherwise 0.
 75 |    */
 76 |   double UpdateModelThroughput(const std::string& model_sess_id,
 77 |                                double throughput);
 78 | 
 79 |   void SpillOutWorkload(std::vector<std::pair<SessionGroup, double> >* spillout);
 80 | 
 81 |   CtrlStatus UpdateModelTableRpc();
 82 | 
 83 |   std::vector<std::string> GetModelSessions() const;
 84 | 
 85 |   std::vector<std::string> GetBackupModelSessions() const;
 86 | 
 87 |   std::vector<InstanceInfoPtr> GetModels() const { return models_; }
 88 | 
 89 |   const InstanceInfo* GetInstanceInfo(const std::string& model_sess_id) const;
 90 | 
 91 |   double GetModelThroughput(const std::string& model_sess_id) const;
 92 | 
 93 |   double GetModelGPUShare(const std::string& model_sess_id) const;
 94 | 
 95 |   double GetModelWeight(const std::string& model_sess_id) const;
 96 | 
 97 |   bool IsAlive();
 98 | 
 99 |   bool IsIdle() const;
100 | 
101 |  private:
102 |   void ComputeBatchSize(InstanceInfo* inst_info, double workload) const;
103 |   
104 |   void UpdateCycle();
105 |   
106 |   uint32_t node_id_;
107 |   std::string ip_;
108 |   std::string server_port_;
109 |   std::string rpc_port_;
110 |   std::string gpu_device_;
111 |   std::string gpu_uuid_;
112 |   size_t gpu_available_memory_;
113 |   int beacon_sec_;
114 |   long timeout_ms_;
115 |   std::unique_ptr<BackendCtrl::Stub> stub_;
116 | 
117 |   int workload_id_;
118 | 
119 |   std::vector<InstanceInfoPtr> models_;
120 |   std::vector<InstanceInfoPtr> backup_models_;
121 |   /*!
122 |    * \brief Mapping from model session id to instance information.
123 |    * It's possible that multiple model session ids mapping to same instance
124 |    * info due to prefix batching.
125 |    */
126 |   std::unordered_map<std::string, InstanceInfoPtr> session_model_map_;
127 |   double exec_cycle_us_;
128 |   double duty_cycle_us_;
129 |   bool overload_;
130 |   /*! \brief Indicates whether model table is dirty. */
131 |   bool dirty_model_table_;
132 |   std::chrono::time_point<std::chrono::system_clock> last_time_;
133 | };
134 | 
135 | } // namespace scheduler
136 | } // namespace nexus
137 | 
138 | #endif // NEXUS_SCHEDULER_BACKEND_DELEGATE_H_
139 | 


--------------------------------------------------------------------------------
/src/nexus/backend/worker.cpp:
--------------------------------------------------------------------------------
  1 | #include <chrono>
  2 | #include <glog/logging.h>
  3 | #include <pthread.h>
  4 | 
  5 | #include "nexus/backend/backend_server.h"
  6 | #include "nexus/backend/model_ins.h"
  7 | #include "nexus/backend/worker.h"
  8 | 
  9 | namespace nexus {
 10 | namespace backend {
 11 | 
 12 | Worker::Worker(int index, BackendServer* server,
 13 |                BlockPriorityQueue<Task>& task_queue) :
 14 |     index_(index),
 15 |     server_(server),
 16 |     task_queue_(task_queue),
 17 |     running_(false) {}
 18 | 
 19 | void Worker::Start(int core) {
 20 |   running_ = true;
 21 |   thread_ = std::thread(&Worker::Run, this);
 22 |   if (core >= 0) {
 23 |     cpu_set_t cpuset;
 24 |     CPU_ZERO(&cpuset);
 25 |     CPU_SET(core, &cpuset);
 26 |     int rc = pthread_setaffinity_np(thread_.native_handle(),
 27 |                                     sizeof(cpu_set_t), &cpuset);
 28 |     if (rc != 0) {
 29 |       LOG(ERROR) << "Error calling pthread_setaffinity_np: " << rc << "\n";
 30 |     }
 31 |     LOG(INFO) << "Worker " << index_ << " is pinned on CPU " << core;
 32 |   }
 33 | }
 34 | 
 35 | void Worker::Stop() {
 36 |   running_ = false;
 37 |   if (thread_.joinable()) {
 38 |     thread_.join();
 39 |   }
 40 | }
 41 | 
 42 | void Worker::Run() {
 43 |   std::this_thread::sleep_for(std::chrono::milliseconds(20));
 44 |   LOG(INFO) << "Worker " << index_ << " starts";
 45 |   auto timeout = std::chrono::milliseconds(50);
 46 |   while (running_) {
 47 |     std::shared_ptr<Task> task = task_queue_.pop(timeout);
 48 |     if (task == nullptr) {
 49 |       continue;
 50 |     }
 51 |     Process(task);
 52 |   }
 53 |   LOG(INFO) << "Worker " << index_ << " stopped";
 54 | }
 55 | 
 56 | void Worker::Process(std::shared_ptr<Task> task) {
 57 |   switch (task->stage) {
 58 |     case kPreprocess: {
 59 |       task->model = server_->GetModel(task->query.model_session_id());
 60 |       if (task->model == nullptr) {
 61 |         std::stringstream ss;
 62 |         ss << "Model session is not loaded: " << task->query.model_session_id();
 63 |         task->result.set_status(MODEL_SESSION_NOT_LOADED);
 64 |         SendReply(std::move(task));
 65 |         break;
 66 |       }
 67 |       // Preprocess task
 68 |       if (!task->model->Preprocess(task)) {
 69 |         if (task->result.status() != CTRL_OK) {
 70 |           SendReply(std::move(task));
 71 |         } else {
 72 |           // Relay to the request to backup servers
 73 |           std::vector<uint32_t> backups = task->model->BackupBackends();
 74 |           double min_util = 1.;
 75 |           std::shared_ptr<BackupClient> best_backup = nullptr;
 76 |           for (auto backend_id : backups) {
 77 |             auto backup = server_->GetBackupClient(backend_id);
 78 |             double util = backup->GetUtilization();
 79 |             if (util < min_util) {
 80 |               min_util = util;
 81 |               best_backup = backup;
 82 |             }
 83 |           }
 84 |           if (best_backup != nullptr) {
 85 |             // LOG(INFO) << "Relay request " << task->query.model_session_id() <<
 86 |             //     " to backup " << best_backup->node_id() <<
 87 |             //     " with utilization " << min_util;
 88 |             best_backup->Forward(std::move(task));
 89 |           } else {
 90 |             LOG(INFO) << "All backup servers are full";
 91 |             task->model->Preprocess(task, true);
 92 |           }
 93 |         }
 94 |       }
 95 |       break;
 96 |     }
 97 |     case kPostprocess: {
 98 |       if (task->result.status() != CTRL_OK) {
 99 |         SendReply(std::move(task));
100 |       } else {
101 |         task->model->Postprocess(task);
102 |         SendReply(std::move(task));
103 |       }
104 |       break;
105 |     }
106 |     default:
107 |       LOG(ERROR) << "Wrong task stage: " << task->stage;
108 |   }
109 | }
110 | 
111 | void Worker::SendReply(std::shared_ptr<Task> task) {
112 |   task->timer.Record("end");
113 |   task->result.set_query_id(task->query.query_id());
114 |   task->result.set_model_session_id(task->query.model_session_id());
115 |   task->result.set_latency_us(task->timer.GetLatencyMicros("begin", "end"));
116 |   task->result.set_queuing_us(task->timer.GetLatencyMicros("begin", "exec"));
117 |   if (task->model != nullptr && task->model->backup()) {
118 |     task->result.set_use_backup(true);
119 |   } else {
120 |     task->result.set_use_backup(false);
121 |   }
122 |   MessageType reply_type = kBackendReply;
123 |   if (task->msg_type == kBackendRelay) {
124 |     reply_type = kBackendRelayReply;
125 |   }
126 |   auto msg = std::make_shared<Message>(reply_type,
127 |                                        task->result.ByteSizeLong());
128 |   msg->EncodeBody(task->result);
129 |   task->connection->Write(std::move(msg));
130 | }
131 | 
132 | } // namespace backend
133 | } // namespace nexus
134 | 


--------------------------------------------------------------------------------
/src/nexus/app/frontend.h:
--------------------------------------------------------------------------------
  1 | #ifndef NEXUS_APP_FRONTEND_H_
  2 | #define NEXUS_APP_FRONTEND_H_
  3 | 
  4 | #include <atomic>
  5 | #include <memory>
  6 | #include <mutex>
  7 | #include <random>
  8 | #include <unordered_map>
  9 | #include <unordered_set>
 10 | 
 11 | #include "nexus/app/model_handler.h"
 12 | #include "nexus/app/query_processor.h"
 13 | #include "nexus/app/request_context.h"
 14 | #include "nexus/app/rpc_service.h"
 15 | #include "nexus/app/user_session.h"
 16 | #include "nexus/app/worker.h"
 17 | #include "nexus/common/backend_pool.h"
 18 | #include "nexus/common/block_queue.h"
 19 | #include "nexus/common/connection.h"
 20 | #include "nexus/common/model_def.h"
 21 | #include "nexus/common/server_base.h"
 22 | #include "nexus/common/spinlock.h"
 23 | #include "nexus/proto/control.grpc.pb.h"
 24 | #include "nexus/proto/nnquery.pb.h"
 25 | 
 26 | namespace nexus {
 27 | namespace app {
 28 | 
 29 | class Frontend : public ServerBase, public MessageHandler {
 30 |  public:
 31 |   Frontend(std::string port, std::string rpc_port, std::string sch_addr);
 32 | 
 33 |   virtual ~Frontend();
 34 | 
 35 |   //virtual void Process(const RequestProto& request, ReplyProto* reply) = 0;
 36 | 
 37 |   uint32_t node_id() const { return node_id_; }
 38 | 
 39 |   std::string rpc_port() const { return rpc_service_.port(); }
 40 | 
 41 |   void Run(QueryProcessor* qp, size_t nthreads);
 42 | 
 43 |   void Stop();
 44 |   /*! \brief Accepts new user connection */
 45 |   void HandleAccept() final;
 46 |   /*!
 47 |    * \brief Handles new messages from user or backend connections
 48 |    * \param conn Shared pointer of Connection
 49 |    * \param message Received message
 50 |    */
 51 |   void HandleMessage(std::shared_ptr<Connection> conn,
 52 |                      std::shared_ptr<Message> message) final;
 53 |   /*!
 54 |    * \brief Handles connection error
 55 |    * \param conn Shared pointer of Connection
 56 |    * \param ec Boost error code
 57 |    */
 58 |   void HandleError(std::shared_ptr<Connection> conn,
 59 |                    boost::system::error_code ec) final;
 60 | 
 61 |   void UpdateModelRoutes(const ModelRouteUpdates& request, RpcReply* reply);
 62 | 
 63 |   std::shared_ptr<UserSession> GetUserSession(uint32_t uid);
 64 | 
 65 |  protected:
 66 |   std::shared_ptr<ModelHandler> LoadModel(const LoadModelRequest& req);
 67 | 
 68 |   std::shared_ptr<ModelHandler> LoadModel(const LoadModelRequest& req,
 69 |                                           LoadBalancePolicy lb_policy);
 70 | 
 71 |   void ComplexQuerySetup(const ComplexQuerySetupRequest& req);
 72 | 
 73 |   void ComplexQueryAddEdge(const ComplexQueryAddEdgeRequest& req);
 74 | 
 75 |  private:
 76 |   void Register();
 77 | 
 78 |   void Unregister();
 79 | 
 80 |   void KeepAlive();
 81 | 
 82 |   bool UpdateBackendPoolAndModelRoute(const ModelRouteProto& route);
 83 | 
 84 |   void RegisterUser(std::shared_ptr<UserSession> user_sess,
 85 |                     const RequestProto& request, ReplyProto* reply);
 86 | 
 87 |   void Daemon();
 88 | 
 89 |   void ReportWorkload(const WorkloadStatsProto& request);
 90 | 
 91 |  private:
 92 |   /*! \brief Indicator whether backend is running */
 93 |   std::atomic_bool running_;
 94 |   /*! \brief Interval to update stats to scheduler in seconds */
 95 |   uint32_t beacon_interval_sec_;
 96 |   /*! \brief Frontend node ID */
 97 |   uint32_t node_id_;
 98 |   /*! \brief RPC service */
 99 |   RpcService rpc_service_;
100 |   /*! \brief RPC client connected to scheduler */
101 |   std::unique_ptr<SchedulerCtrl::Stub> sch_stub_;
102 |   /*! \brief Backend pool */
103 |   BackendPool backend_pool_;
104 |   /*!
105 |    * \brief Map from backend ID to model sessions servered at this backend.
106 |    * Guarded by backend_sessions_mu_
107 |    */
108 |   std::unordered_map<uint32_t,
109 |                      std::unordered_set<std::string> > backend_sessions_;
110 |   /*! \brief Request pool */
111 |   RequestPool request_pool_;
112 |   /*! \brief Worker pool for processing requests */
113 |   std::vector<std::unique_ptr<Worker> > workers_;
114 |   /*! \brief User connection pool. Guarded by user_mutex_. */
115 |   std::unordered_set<std::shared_ptr<Connection> > connection_pool_;
116 |   /*! \brief Map from user id to user session. Guarded by user_mutex_. */
117 |   std::unordered_map<uint32_t, std::shared_ptr<UserSession> > user_sessions_;
118 |   /*!
119 |    * \brief Map from model session ID to model handler.
120 |    */
121 |   std::unordered_map<std::string, std::shared_ptr<ModelHandler> > model_pool_;
122 | 
123 |   std::thread daemon_thread_;
124 |   /*! \brief Mutex for connection_pool_ and user_sessions_ */
125 |   std::mutex user_mutex_;
126 | 
127 |   std::mutex backend_sessions_mu_;
128 |   /*! \brief Random number generator */
129 |   std::random_device rd_;
130 |   std::mt19937 rand_gen_;
131 | };
132 | 
133 | } // namespace app
134 | } // namespace nexus
135 | 
136 | #endif // NEXUS_APP_APP_BASE_H_
137 | 


--------------------------------------------------------------------------------
/src/nexus/backend/task.h:
--------------------------------------------------------------------------------
  1 | #ifndef NEXUS_BACKEND_TASK_H_
  2 | #define NEXUS_BACKEND_TASK_H_
  3 | 
  4 | #include <atomic>
  5 | #include <chrono>
  6 | #include <memory>
  7 | #include <opencv2/opencv.hpp>
  8 | #include <yaml-cpp/yaml.h>
  9 | 
 10 | #include "nexus/common/block_queue.h"
 11 | #include "nexus/common/connection.h"
 12 | #include "nexus/common/data_type.h"
 13 | #include "nexus/proto/nnquery.pb.h"
 14 | #include "nexus/proto/control.pb.h"
 15 | 
 16 | namespace nexus {
 17 | namespace backend {
 18 | 
 19 | class ModelExecutor;
 20 | class ModelInstance;
 21 | class Task;
 22 | 
 23 | /*!
 24 |  * \brief Input contains input data of a single input and related information
 25 |  *   to neural networks.
 26 |  */
 27 | class Input : public DeadlineItem {
 28 |  public:
 29 |   /*!
 30 |    * \brief Construct a Input
 31 |    * \param deadline Deadline of corresponding task
 32 |    * \param tid Task id of corresponding task
 33 |    * \param idx Index in the inputs of task
 34 |    * \param arr Input array that contains the input data
 35 |    */
 36 |   Input(TimePoint deadline, uint64_t tid, int idx, ArrayPtr arr);
 37 | 
 38 |   /*! \brief Task id */
 39 |   uint64_t task_id;
 40 |   /*! \brief Index in the input vector of task. */
 41 |   int index;
 42 |   /*! \brief Input array that contains the data. */
 43 |   std::shared_ptr<Array> array;
 44 | };
 45 | 
 46 | /*!
 47 |  * \brief Output contains the data of a single output.
 48 |  */
 49 | class Output {
 50 |  public:
 51 |   /*!
 52 |    * \brief Construct an Output.
 53 |    * \param tid Task id of corresponding task
 54 |    * \param idx Index in the outputs of task
 55 |    * \param arrs Map from name to output arrays.
 56 |    */
 57 |   Output(uint64_t tid, int idx,
 58 |          const std::unordered_map<std::string, ArrayPtr>& arrs);
 59 |   
 60 |   /*! \brief Task id */
 61 |   uint64_t task_id;
 62 |   /*! \brief Index in the output vector of task. */
 63 |   int index;
 64 |   /*! \brief Map from array name to array. */
 65 |   std::unordered_map<std::string, ArrayPtr> arrays;
 66 | };
 67 | 
 68 | /*! \brief Stage indicates the context processing stage */
 69 | enum Stage {
 70 |   /* !\brief Task at the pre-processing stage */
 71 |   kPreprocess = 0,
 72 |   /* !\brief Task at the forwarding model stage */
 73 |   kForward,
 74 |   /* !\brief Task at the post-processing stage */
 75 |   kPostprocess,
 76 | };
 77 | 
 78 | class Task : public DeadlineItem, public std::enable_shared_from_this<Task> {
 79 |  public:
 80 |   /*! \brief Construct a task without connection. */
 81 |   Task();
 82 |   /*!
 83 |    * \brief Construct a task with connection to frontend.
 84 |    * \param conn Connection to frontend server
 85 |    */
 86 |   Task(std::shared_ptr<Connection> conn);
 87 |   /*!
 88 |    * \brief Decode query from message.
 89 |    * \param message Message received from frontend
 90 |    */
 91 |   void DecodeQuery(std::shared_ptr<Message> message);
 92 |   /*!
 93 |    * \brief Append preprocessed input array to task.
 94 |    * \param arr Input array
 95 |    */
 96 |   void AppendInput(ArrayPtr arr);
 97 |   /*!
 98 |    * \brief Add output at index location
 99 |    * \param index Index of the output
100 |    * \param output Output content
101 |    * \return whether all output has been filled in
102 |    */
103 |   bool AddOutput(std::shared_ptr<Output> output);
104 |   /*!
105 |    * \brief Add virtual output at index location due to error such as timeout
106 |    * \param index Index of the virtual output
107 |    * \return whether all output has been filled in
108 |    */
109 |   bool AddVirtualOutput(int index);
110 | 
111 |   /*! \brief Task id */
112 |   uint64_t task_id;
113 |   /*! \brief Connection to frontend. */
114 |   std::shared_ptr<Connection> connection;
115 |   /*! \brief Message type */
116 |   MessageType msg_type;
117 |   /*! \brief Query to process */
118 |   QueryProto query;
119 |   /*! \brief Query result */
120 |   QueryResultProto result;
121 |   /*! \brief Model instance to execute for the task */
122 |   std::shared_ptr<ModelExecutor> model;
123 |   /*!
124 |    * \brief Suffix model for postprocessing, only used in the share prefix
125 |    * model.
126 |    */
127 |   std::shared_ptr<ModelInstance> suffix_model;
128 |   /*! \brief Current task processing stage */
129 |   volatile Stage stage;
130 |   std::vector<std::shared_ptr<Input> > inputs;
131 |   /*! \brief Outputs of the context */
132 |   std::vector<std::shared_ptr<Output> > outputs;
133 |   /*! \brief Number of outputs that has been filled in */
134 |   std::atomic<uint32_t> filled_outputs;
135 |   /*! \brief Attributes that needs to be kept during the task */
136 |   YAML::Node attrs;
137 |   /*! \brief Timer that counts the time spent in each stage */
138 |   Timer timer;
139 | 
140 |  private:
141 |   /*! \brief Global task ID */
142 |   static std::atomic<uint64_t> global_task_id_;
143 | };
144 | 
145 | } // namespace backend
146 | } // namespace nexus
147 | 
148 | #endif // NEXUS_BACKEND_TASK_H_
149 | 


--------------------------------------------------------------------------------
/src/nexus/proto/nnquery.proto:
--------------------------------------------------------------------------------
  1 | syntax = "proto3";
  2 | 
  3 | package nexus;
  4 | 
  5 | message RectProto {
  6 |   uint32 left = 1;
  7 |   uint32 top = 2;
  8 |   uint32 right = 3;
  9 |   uint32 bottom = 4;
 10 | }
 11 | 
 12 | message ImageProto {
 13 |   enum ImageFormat {
 14 |     JPEG = 0;
 15 |     PNG = 1;
 16 |     GIF = 2;
 17 |   }
 18 |   bytes data = 1;
 19 |   ImageFormat format = 2;
 20 |   bool color = 3;
 21 | 
 22 |   // This is a hack. Provide the filename to the image instead of transfering
 23 |   // the real bytes. Hopefully this should save a lot of bandwidth so that a
 24 |   // single frontend server could handle all requests, thus we don't need to
 25 |   // deal with the problem of imbalanced load at backends.
 26 |   string hack_filename = 4;
 27 | }
 28 | 
 29 | enum DataType {
 30 |   DT_UNKNOWN = 0; 
 31 |   DT_BOOL = 1;
 32 |   DT_INT8 = 2;
 33 |   DT_UINT8 = 3;
 34 |   DT_INT32 = 4;
 35 |   DT_UINT32 = 5;
 36 |   DT_FLOAT = 6;
 37 |   DT_DOUBLE = 7;
 38 |   DT_STRING = 8;
 39 |   DT_TENSOR = 50;
 40 |   DT_IMAGE = 51;
 41 |   DT_RECT = 52;
 42 | }
 43 | 
 44 | message TensorProto {
 45 |   // Shape of tensor
 46 |   repeated uint32 shape = 1;
 47 |   // Data type can only be int8, int32, float, double, or string
 48 |   DataType data_type = 2; 
 49 |   // Exact ONE of the following fields must be present 
 50 |   repeated bool bools = 10;      // bool tensor
 51 |   repeated int32 ints = 11;      // int tensor
 52 |   repeated float floats = 12;    // float tensor
 53 |   repeated double doubles = 13;  // double tensor
 54 |   repeated bytes strings = 14;   // string tensor
 55 | }
 56 | 
 57 | message ValueProto {
 58 |   // name of value
 59 |   string name = 1;
 60 |   DataType data_type = 2;
 61 |   
 62 |   // Exact ONE of the following fields must be present.
 63 |   bool b = 10;             // bool
 64 |   int32 i = 11;            // int
 65 |   float f = 13;            // float
 66 |   double d = 14;           // double
 67 |   bytes s = 15;            // string
 68 |   
 69 |   TensorProto tensor = 20; // tensor
 70 |   ImageProto image = 21;   // image
 71 |   RectProto rect = 22;     // bbox
 72 | }
 73 | 
 74 | message RecordProto {
 75 |   repeated ValueProto named_value = 1;
 76 | }
 77 | 
 78 | message RequestProto {
 79 |   // User ID
 80 |   uint32 user_id = 1;
 81 |   // Request ID
 82 |   uint32 req_id = 2;
 83 |   // Input
 84 |   ValueProto input = 3;
 85 | }
 86 | 
 87 | message ReplyProto {
 88 |   // User ID
 89 |   uint32 user_id = 1;
 90 |   // Request ID
 91 |   uint32 req_id = 2;
 92 |   // status
 93 |   int32 status = 3;
 94 |   // Error message
 95 |   string error_message = 4;
 96 |   // Output
 97 |   repeated RecordProto output = 5;
 98 |   // Latency
 99 |   uint64 latency_us = 100;
100 |   // Breakdown latency for each query
101 |   repeated QueryLatency query_latency = 101;
102 | }
103 | 
104 | message ModelSession {
105 |   // Framework
106 |   string framework = 1;
107 |   // Model name
108 |   string model_name = 2;
109 |   // Model version
110 |   uint32 version = 3;
111 |   // Latency SLA in milliseconds
112 |   uint32 latency_sla = 4;
113 |   // Specify image height and width for models whose input are resizable,
114 |   // otherwise ignored
115 |   uint32 image_height = 10;
116 |   uint32 image_width = 11;
117 | }
118 | 
119 | message QueryProto {
120 |   // Query ID
121 |   uint64 query_id = 1;
122 |   // Model session ID
123 |   string model_session_id = 2;
124 |   // Input of query
125 |   ValueProto input = 3;
126 |   // Include top k records
127 |   uint32 topk = 10;
128 |   // Cropped windows in the image
129 |   repeated RectProto window = 11;
130 |   // Output fields
131 |   repeated string output_field = 12;
132 |   // Threshold for confidence, default is 0
133 |   repeated ValueProto filter = 13;
134 |   // Latency slack in milliseconds
135 |   int32 slack_ms = 40;
136 |   // Show breakdown latency in the result
137 |   bool debug = 100;
138 | }
139 | 
140 | message QueryResultProto {
141 |   // Query ID
142 |   uint64 query_id = 1;
143 |   // Model session ID
144 |   string model_session_id = 2;
145 |   // status
146 |   int32 status = 3;
147 |   // Error message
148 |   string error_message = 4;
149 |   // Output
150 |   repeated RecordProto output = 5;
151 |   // Latency
152 |   uint64 latency_us = 20;
153 |   uint64 queuing_us = 21;
154 | 
155 |   bool use_backup = 22;
156 | }
157 | 
158 | message QueryLatency {
159 |   // Query ID
160 |   uint64 query_id = 1;
161 |   // Model session ID
162 |   string model_session_id = 2;
163 |   // Timestamp of sending query, relative to the time that receives user request
164 |   uint64 frontend_send_timestamp_us = 3;
165 |   // Timestamp of receiving query result, relative to the time that receives
166 |   // user request
167 |   uint64 frontend_recv_timestamp_us = 4;
168 |   // Backend processing latency
169 |   uint64 backend_latency_us = 5;
170 |   // Backend queuing latency
171 |   uint64 backend_queuing_us = 6;
172 | 
173 |   bool use_backup = 7;
174 | }
175 | 


--------------------------------------------------------------------------------