├── tests ├── data │ ├── model_db │ │ ├── store │ │ │ └── .placeholder │ │ ├── profiles │ │ │ └── TITAN_X_(Pascal) │ │ │ │ └── darknet:yolo9000:1.txt │ │ └── db │ │ │ └── model_db.yml │ └── images │ │ ├── ILSVRC2012_val_00000001.JPEG │ │ ├── ILSVRC2012_val_00000002.JPEG │ │ ├── ILSVRC2012_val_00000003.JPEG │ │ ├── ILSVRC2012_val_00000004.JPEG │ │ └── ILSVRC2012_val_00000005.JPEG ├── cpp │ ├── test_main.cpp │ └── scheduler │ │ └── backend_delegate_test.cpp └── python │ ├── test_client.py │ └── test_async_client.py ├── python ├── requirements.txt ├── nexus │ ├── __init__.py │ ├── client.py │ └── async_client.py └── setup.py ├── .gitignore ├── src └── nexus │ ├── common │ ├── buffer.cpp │ ├── config.h │ ├── image.h │ ├── util.h │ ├── spinlock.h │ ├── server_base.h │ ├── server_base.cpp │ ├── metric.h │ ├── backend_pool.h │ ├── buffer.h │ ├── rpc_service_base.h │ ├── message.cpp │ ├── time_util.h │ ├── connection.h │ ├── device.cpp │ ├── model_def.h │ ├── time_util.cpp │ ├── device.h │ ├── metric.cpp │ ├── rpc_call.h │ ├── image.cpp │ ├── connection.cpp │ ├── message.h │ ├── util.cpp │ ├── data_type.h │ └── model_db.h │ ├── app │ ├── user_session.h │ ├── worker.h │ ├── rpc_service.h │ ├── worker.cpp │ ├── exec_block.h │ ├── rpc_service.cpp │ ├── app_base.h │ ├── query_processor.h │ ├── app_base.cpp │ ├── model_handler.h │ └── frontend.h │ ├── backend │ ├── utils.h │ ├── rpc_service.h │ ├── worker.h │ ├── backup_client.h │ ├── tf_share_model.h │ ├── darknet_model.h │ ├── backup_client.cpp │ ├── slice.h │ ├── caffe_model.h │ ├── share_prefix_model.h │ ├── rpc_service.cpp │ ├── task.cpp │ ├── slice.cpp │ ├── caffe_densecap_model.h │ ├── tensorflow_model.h │ ├── gpu_executor.h │ ├── caffe2_model.h │ ├── backend_main.cpp │ ├── utils.cpp │ ├── batch_task.cpp │ ├── model_exec.h │ ├── model_ins.cpp │ ├── batch_task.h │ ├── worker.cpp │ └── task.h │ ├── scheduler │ ├── scheduler_main.cpp │ ├── frontend_delegate.h │ ├── complex_query.h │ ├── sch_info.cpp │ ├── frontend_delegate.cpp │ ├── sch_info.h │ └── backend_delegate.h │ └── proto │ └── nnquery.proto ├── .gitmodules ├── examples ├── simple_app │ └── src │ │ ├── client.py │ │ └── frontend.cpp ├── obj_rec │ └── src │ │ └── obj_rec.cpp ├── face_rec │ └── src │ │ └── face_rec.cpp ├── README.md └── traffic_complex │ └── src │ └── traffic_complex.cpp ├── LICENSE ├── README.md ├── Dockerfile ├── tools └── test_complex_query.cpp └── BUILDING.md /tests/data/model_db/store/.placeholder: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /python/requirements.txt: -------------------------------------------------------------------------------- 1 | protobuf==3.11.2 2 | -------------------------------------------------------------------------------- /python/nexus/__init__.py: -------------------------------------------------------------------------------- 1 | from .proto.nnquery_pb2 import * 2 | from .client import Client 3 | from .async_client import AsyncClient 4 | -------------------------------------------------------------------------------- /tests/data/images/ILSVRC2012_val_00000001.JPEG: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/uwsampl/nexus/HEAD/tests/data/images/ILSVRC2012_val_00000001.JPEG -------------------------------------------------------------------------------- /tests/data/images/ILSVRC2012_val_00000002.JPEG: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/uwsampl/nexus/HEAD/tests/data/images/ILSVRC2012_val_00000002.JPEG -------------------------------------------------------------------------------- /tests/data/images/ILSVRC2012_val_00000003.JPEG: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/uwsampl/nexus/HEAD/tests/data/images/ILSVRC2012_val_00000003.JPEG -------------------------------------------------------------------------------- /tests/data/images/ILSVRC2012_val_00000004.JPEG: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/uwsampl/nexus/HEAD/tests/data/images/ILSVRC2012_val_00000004.JPEG -------------------------------------------------------------------------------- /tests/data/images/ILSVRC2012_val_00000005.JPEG: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/uwsampl/nexus/HEAD/tests/data/images/ILSVRC2012_val_00000005.JPEG -------------------------------------------------------------------------------- /python/setup.py: -------------------------------------------------------------------------------- 1 | from setuptools import setup 2 | 3 | with open('requirements.txt') as f: 4 | required = f.read().splitlines() 5 | 6 | setup( 7 | name='nexus', 8 | packages=['nexus'], 9 | include_package_data=True, 10 | install_requires=required, 11 | ) 12 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | *~ 2 | *.o 3 | *.pyc 4 | *.d 5 | .DS_Store* 6 | *.swp 7 | 8 | obj/ 9 | lib/ 10 | bin/ 11 | build/ 12 | build-dep-src/ 13 | build-dep-install/ 14 | python/nexus/proto/ 15 | python/nexus.egg-info/ 16 | 17 | cmake-build*/ 18 | .clion.source.upload.marker 19 | .clangd/ 20 | compile_commands.json 21 | -------------------------------------------------------------------------------- /src/nexus/common/buffer.cpp: -------------------------------------------------------------------------------- 1 | #include "nexus/common/buffer.h" 2 | #include 3 | 4 | namespace nexus { 5 | std::shared_ptr Buffer::Slice(size_t offset, size_t nbytes) { 6 | CHECK_LE(offset + nbytes, nbytes_) << "Slice exceeds buffer boundary"; 7 | return std::shared_ptr(new Buffer( 8 | shared_from_this(), offset, nbytes)); 9 | } 10 | } 11 | -------------------------------------------------------------------------------- /tests/cpp/test_main.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | 5 | DECLARE_string(model_root); 6 | 7 | int main(int argc, char ** argv) { 8 | testing::InitGoogleTest(&argc, argv); 9 | testing::FLAGS_gtest_death_test_style = "threadsafe"; 10 | google::ParseCommandLineFlags(&argc, &argv, true); 11 | return RUN_ALL_TESTS(); 12 | } 13 | -------------------------------------------------------------------------------- /src/nexus/common/config.h: -------------------------------------------------------------------------------- 1 | #ifndef NEXUS_CONFIG_H_ 2 | #define NEXUS_CONFIG_H_ 3 | 4 | #define BACKEND_DEFAULT_PORT 8001 5 | #define BACKEND_DEFAULT_RPC_PORT 8002 6 | #define FRONTEND_DEFAULT_PORT 9001 7 | #define FRONTEND_DEFAULT_RPC_PORT 9002 8 | #define SCHEDULER_DEFAULT_PORT 10001 9 | #define BEACON_INTERVAL_SEC 2 10 | #define EPOCH_INTERVAL_SEC 10 11 | 12 | #endif // NEXUS_CONFIG_H_ 13 | -------------------------------------------------------------------------------- /src/nexus/common/image.h: -------------------------------------------------------------------------------- 1 | #ifndef NEXUS_COMMON_IMAGE_H_ 2 | #define NEXUS_COMMON_IMAGE_H_ 3 | 4 | #include 5 | 6 | #include "nexus/proto/nnquery.pb.h" 7 | 8 | namespace nexus { 9 | 10 | enum ChannelOrder { 11 | CO_RGB = 0, 12 | CO_BGR = 1, 13 | }; 14 | 15 | cv::Mat _Hack_DecodeImageByFilename(const ImageProto &image, 16 | ChannelOrder order); 17 | 18 | cv::Mat DecodeImage(const ImageProto &image, ChannelOrder order); 19 | 20 | } // namespace nexus 21 | 22 | #endif // NEXUS_COMMON_IMAGE_H_ 23 | -------------------------------------------------------------------------------- /.gitmodules: -------------------------------------------------------------------------------- 1 | # [submodule "darknet"] 2 | # path = frameworks/darknet 3 | # url = https://github.com/icemelon9/darknet.git 4 | # branch = nexus 5 | # [submodule "caffe"] 6 | # path = frameworks/caffe 7 | # url = https://github.com/icemelon9/caffe.git 8 | # branch = nexus 9 | # [submodule "frameworks/caffe2"] 10 | # path = frameworks/caffe2 11 | # url = https://github.com/icemelon9/caffe2.git 12 | # branch = nexus 13 | [submodule "frameworks/caffe2"] 14 | path = frameworks/caffe2 15 | url = https://github.com/abcdabcd987/caffe2-nexus.git 16 | branch = nexus 17 | -------------------------------------------------------------------------------- /src/nexus/app/user_session.h: -------------------------------------------------------------------------------- 1 | #ifndef NEXUS_APP_USER_SESSION_H_ 2 | #define NEXUS_APP_USER_SESSION_H_ 3 | 4 | #include "nexus/common/connection.h" 5 | 6 | namespace nexus { 7 | namespace app { 8 | 9 | class UserSession : public Connection { 10 | public: 11 | UserSession(boost::asio::ip::tcp::socket socket, MessageHandler* handler) : 12 | Connection(std::move(socket), handler), user_id_(0) {} 13 | 14 | uint32_t user_id() const { return user_id_; } 15 | 16 | void set_user_id(uint32_t user_id) { user_id_ = user_id; } 17 | 18 | private: 19 | uint32_t user_id_; 20 | }; 21 | 22 | } // namespace app 23 | } // namespace nexus 24 | 25 | #endif // NEXUS_APP_USER_SESSION_H_ 26 | -------------------------------------------------------------------------------- /src/nexus/backend/utils.h: -------------------------------------------------------------------------------- 1 | #ifndef NEXUS_BACKEND_UTILS_H_ 2 | #define NEXUS_BACKEND_UTILS_H_ 3 | 4 | #include 5 | 6 | #include "nexus/proto/nnquery.pb.h" 7 | 8 | namespace nexus { 9 | namespace backend { 10 | 11 | void LoadClassnames(const std::string& filepath, 12 | std::unordered_map* classnames); 13 | 14 | void PostprocessClassification( 15 | const QueryProto& query, const float* prob, size_t nprobs, 16 | QueryResultProto* result, 17 | const std::unordered_map* classnames = nullptr); 18 | 19 | 20 | } // namespace backend 21 | } // namespace nexus 22 | 23 | #endif // NEXUS_BACKEND_UTILS_H_ 24 | -------------------------------------------------------------------------------- /src/nexus/common/util.h: -------------------------------------------------------------------------------- 1 | #ifndef NEXUS_COMMON_UTIL_H_ 2 | #define NEXUS_COMMON_UTIL_H_ 3 | 4 | #include 5 | #include 6 | 7 | #include "nexus/common/device.h" 8 | 9 | DECLARE_bool(hack_reply_omit_output); 10 | 11 | namespace nexus { 12 | 13 | void SplitString(const std::string &str, char delim, 14 | std::vector *tokens); 15 | 16 | void Memcpy(void *dst, const Device *dst_device, const void *src, 17 | const Device *src_device, size_t nbytes); 18 | 19 | // GetIpAddress returns the first IP addres that is not localhost (127.0.0.1) 20 | std::string GetIpAddress(const std::string &prefix); 21 | 22 | } // namespace nexus 23 | 24 | #endif // NEXUS_COMMON_UTIL_H_ 25 | -------------------------------------------------------------------------------- /src/nexus/app/worker.h: -------------------------------------------------------------------------------- 1 | #ifndef NEXUS_APP_WORKER_H_ 2 | #define NEXUS_APP_WORKER_H_ 3 | 4 | #include 5 | #include 6 | 7 | #include "nexus/app/query_processor.h" 8 | #include "nexus/app/request_context.h" 9 | 10 | namespace nexus { 11 | namespace app { 12 | 13 | class Frontend; 14 | 15 | class Worker { 16 | public: 17 | Worker(QueryProcessor* qp, RequestPool& req_pool); 18 | 19 | void Start(); 20 | 21 | void Stop(); 22 | 23 | void Join(); 24 | 25 | void Run(); 26 | 27 | private: 28 | QueryProcessor* qp_; 29 | RequestPool& req_pool_; 30 | volatile std::atomic_bool running_; 31 | std::thread thread_; 32 | }; 33 | 34 | } // namespace app 35 | } // namespace nexus 36 | 37 | #endif // NEXUS_APP_MESSAGE_PROCESSOR_H_ 38 | -------------------------------------------------------------------------------- /src/nexus/app/rpc_service.h: -------------------------------------------------------------------------------- 1 | #ifndef NEXUS_APP_RPC_SERVICE_H_ 2 | #define NEXUS_APP_RPC_SERVICE_H_ 3 | 4 | #include 5 | 6 | #include "nexus/common/rpc_call.h" 7 | #include "nexus/common/rpc_service_base.h" 8 | #include "nexus/proto/control.grpc.pb.h" 9 | 10 | namespace nexus { 11 | namespace app { 12 | 13 | using AsyncService = nexus::FrontendCtrl::AsyncService; 14 | 15 | class Frontend; 16 | 17 | class RpcService : public AsyncRpcServiceBase { 18 | public: 19 | RpcService(Frontend* frontend, std::string port, size_t nthreads = 1); 20 | 21 | protected: 22 | void HandleRpcs() final; 23 | 24 | private: 25 | Frontend* frontend_; 26 | }; 27 | 28 | } // namespace app 29 | } // namespace nexus 30 | 31 | #endif // NEXUS_APP_RPC_SERVICE_H_ 32 | -------------------------------------------------------------------------------- /src/nexus/backend/rpc_service.h: -------------------------------------------------------------------------------- 1 | #ifndef NEXUS_BACKEND_RPC_SERVICE_H_ 2 | #define NEXUS_BACKEND_RPC_SERVICE_H_ 3 | 4 | #include 5 | 6 | #include "nexus/common/rpc_service_base.h" 7 | #include "nexus/proto/control.grpc.pb.h" 8 | 9 | namespace nexus { 10 | namespace backend { 11 | 12 | using AsyncService = nexus::BackendCtrl::AsyncService; 13 | 14 | class BackendServer; 15 | 16 | class BackendRpcService : public AsyncRpcServiceBase { 17 | public: 18 | BackendRpcService(BackendServer* backend, std::string port, 19 | size_t nthreads = 1); 20 | 21 | protected: 22 | void HandleRpcs() final; 23 | 24 | private: 25 | BackendServer* backend_; 26 | }; 27 | 28 | } // namespace backend 29 | } // namespace nexus 30 | 31 | #endif // NEXUS_BACKEND_RPC_SERVICE_H_ 32 | -------------------------------------------------------------------------------- /src/nexus/common/spinlock.h: -------------------------------------------------------------------------------- 1 | #ifndef NEXUS_COMMON_SPINLOCK_H_ 2 | #define NEXUS_COMMON_SPINLOCK_H_ 3 | 4 | #include 5 | 6 | namespace nexus { 7 | 8 | class Spinlock { 9 | public: 10 | Spinlock(): flag_(ATOMIC_FLAG_INIT) {} 11 | 12 | inline void Acquire() { 13 | while (flag_.test_and_set(std::memory_order_acquire)) 14 | ; // spin 15 | } 16 | 17 | inline void Release() { 18 | flag_.clear(std::memory_order_release); 19 | } 20 | 21 | private: 22 | std::atomic_flag flag_; 23 | }; 24 | 25 | class SpinlockGuard { 26 | public: 27 | SpinlockGuard(Spinlock& lock): lock_(lock) { 28 | lock.Acquire(); 29 | } 30 | 31 | ~SpinlockGuard() { 32 | lock_.Release(); 33 | } 34 | 35 | private: 36 | Spinlock& lock_; 37 | }; 38 | 39 | } // namespace nexus 40 | 41 | #endif // NEXUS_COMMON_SPINLOCK_H_ 42 | -------------------------------------------------------------------------------- /tests/python/test_client.py: -------------------------------------------------------------------------------- 1 | import os 2 | import random 3 | import nexus 4 | 5 | _test_data = os.path.abspath(os.path.join(__file__, '../../data')) 6 | 7 | service_addr = "127.0.0.1:9001" 8 | 9 | def load_images(root): 10 | images = {} 11 | for fn in os.listdir(root): 12 | with open(os.path.join(root, fn), 'rb') as f: 13 | im = f.read() 14 | images[fn] = im 15 | return images 16 | 17 | 18 | def test_client(): 19 | user_id = random.randint(1, 1000000000) 20 | client = nexus.Client(service_addr, user_id) 21 | images = load_images(os.path.join(_test_data, 'images')) 22 | for fn in images: 23 | reply = client.request(images[fn]) 24 | print(fn) 25 | print(reply) 26 | 27 | 28 | if __name__ == "__main__": 29 | print("Test client...") 30 | test_client() 31 | -------------------------------------------------------------------------------- /src/nexus/scheduler/scheduler_main.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | #include "nexus/common/config.h" 4 | #include "nexus/common/util.h" 5 | #include "nexus/scheduler/scheduler.h" 6 | 7 | using namespace nexus::scheduler; 8 | 9 | DEFINE_string(port, "10001", "RPC port"); 10 | DEFINE_string(workload, "", "Static workload config file"); 11 | 12 | int main(int argc, char** argv) { 13 | // Init glog 14 | google::InitGoogleLogging(argv[0]); 15 | // Parse command line flags 16 | google::ParseCommandLineFlags(&argc, &argv, true); 17 | // Setup backtrace on segfault 18 | google::InstallFailureSignalHandler(); 19 | // Create scheduler 20 | Scheduler scheduler(FLAGS_port, 4); 21 | if (FLAGS_workload.length() > 0) { 22 | scheduler.LoadWorkloadFile(FLAGS_workload); 23 | } 24 | scheduler.Run(); 25 | while (true) { 26 | ; 27 | } 28 | } 29 | -------------------------------------------------------------------------------- /src/nexus/app/worker.cpp: -------------------------------------------------------------------------------- 1 | #include "nexus/app/frontend.h" 2 | #include "nexus/app/worker.h" 3 | 4 | namespace nexus { 5 | namespace app { 6 | 7 | Worker::Worker(QueryProcessor* qp, RequestPool& req_pool) : 8 | qp_(qp), 9 | req_pool_(req_pool), 10 | running_(false) { 11 | } 12 | 13 | void Worker::Start() { 14 | running_ = true; 15 | thread_ = std::thread(&Worker::Run, this); 16 | } 17 | 18 | void Worker::Stop() { 19 | running_ = false; 20 | } 21 | 22 | void Worker::Join() { 23 | if (thread_.joinable()) { 24 | thread_.join(); 25 | } 26 | } 27 | 28 | void Worker::Run() { 29 | auto timeout = std::chrono::milliseconds(50); 30 | while (running_) { 31 | auto req = req_pool_.GetRequest(timeout); 32 | if (req == nullptr) { 33 | continue; 34 | } 35 | qp_->Process(req); 36 | } 37 | } 38 | 39 | } // namespace app 40 | } // namespace nexus 41 | -------------------------------------------------------------------------------- /src/nexus/backend/worker.h: -------------------------------------------------------------------------------- 1 | #ifndef NEXUS_BACKEND_WORKER_H_ 2 | #define NEXUS_BACKEND_WORKER_H_ 3 | 4 | #include 5 | #include 6 | #include 7 | 8 | #include "nexus/common/block_queue.h" 9 | #include "nexus/backend/task.h" 10 | 11 | namespace nexus { 12 | namespace backend { 13 | 14 | class BackendServer; 15 | 16 | class Worker { 17 | public: 18 | Worker(int index, BackendServer* server, 19 | BlockPriorityQueue& task_queue); 20 | 21 | void Start(int core = -1); 22 | 23 | void Stop(); 24 | 25 | void Run(); 26 | 27 | private: 28 | void Process(std::shared_ptr task); 29 | 30 | void SendReply(std::shared_ptr task); 31 | 32 | private: 33 | int index_; 34 | BackendServer* server_; 35 | BlockPriorityQueue& task_queue_; 36 | volatile bool running_; 37 | std::thread thread_; 38 | }; 39 | 40 | } // namespace backend 41 | } // namespace nexus 42 | 43 | #endif // NEXUS_BACKEND_WORKER_H_ 44 | -------------------------------------------------------------------------------- /examples/simple_app/src/client.py: -------------------------------------------------------------------------------- 1 | import asyncio 2 | import argparse 3 | import random 4 | import sys 5 | 6 | import nexus 7 | 8 | 9 | def read_image(img): 10 | if img == "-": 11 | return sys.stdin.buffer.read() 12 | with open(img, "rb") as f: 13 | return f.read() 14 | 15 | 16 | async def query(server, image): 17 | user_id = random.randint(0, 2 ** 31 - 1) 18 | async with nexus.AsyncClient(server, user_id) as client: 19 | _send_time, _recv_time, reply = await client.request(image) 20 | print(reply) 21 | 22 | 23 | def main(): 24 | parser = argparse.ArgumentParser() 25 | parser.add_argument("image", help="Path to image file. `-` to read from stdin.") 26 | parser.add_argument("--server", help="Frontend server", default="localhost:9001") 27 | args = parser.parse_args() 28 | 29 | image = read_image(args.image) 30 | asyncio.run(query(args.server, image)) 31 | 32 | 33 | if __name__ == "__main__": 34 | main() 35 | -------------------------------------------------------------------------------- /tests/data/model_db/profiles/TITAN_X_(Pascal)/darknet:yolo9000:1.txt: -------------------------------------------------------------------------------- 1 | darknet:yolo9000:1:480x480 2 | TITAN_X_(Pascal) 3 | Forward latency 4 | batch,latency(us),std(us),memory(B) 5 | 1,21675.7,168.307,1069481984 6 | 2,41176.2,387.454,1566507008 7 | 3,60536,274.659,2088697856 8 | 4,80793.9,544.708,2564751360 9 | 5,98803,1305.68,3078553600 10 | 6,117068,849.329,3552509952 11 | 7,136328,1503.97,4053729280 12 | 8,155704,834.623,4536074240 13 | 9,174362,610.383,4915658752 14 | 10,194981,2471.87,5463015424 15 | 11,213786,1710.47,5913903104 16 | 12,234452,2751.93,6436093952 17 | 13,252422,1663.66,6857621504 18 | 14,273129,2958.86,7402881024 19 | 15,293524,3428.02,7853768704 20 | 16,311602,3175.26,8386445312 21 | 17,334098,2368.35,8774418432 22 | 18,350095,2430.07,9248374784 23 | 19,369662,1933.19,9772662784 24 | 20,390291,1212.97,10246619136 25 | 21,411059,2740.85,10714284032 26 | 22,428823,1291.49,11190337536 27 | 23,449042,2275.25,11720916992 28 | 24,467635,1058.77,12194873344 29 | Preprocess latency 30 | mean(us),std(us) 31 | 9480.56,7046.88 32 | Postprocess latency 33 | mean(us),std(us) 34 | 47495.1,3389.64 35 | -------------------------------------------------------------------------------- /src/nexus/backend/backup_client.h: -------------------------------------------------------------------------------- 1 | #ifndef NEXUS_BACKEND_BACKUP_CLIENT_H_ 2 | #define NEXUS_BACKEND_BACKUP_CLIENT_H_ 3 | 4 | #include 5 | #include 6 | 7 | #include "nexus/backend/task.h" 8 | #include "nexus/common/backend_pool.h" 9 | 10 | namespace nexus { 11 | namespace backend { 12 | 13 | class BackupClient : public BackendSession { 14 | public: 15 | explicit BackupClient(const BackendInfo& info, 16 | boost::asio::io_context& io_context, 17 | MessageHandler* handler); 18 | 19 | void Forward(std::shared_ptr task); 20 | 21 | void Reply(std::shared_ptr message); 22 | 23 | private: 24 | /*! \brief Map from task id to frontend connection. Guarded by relay_mu_. */ 25 | std::unordered_map > conns_; 26 | /*! \brief Map from task id to query id. Guarded by relay_mu_. */ 27 | std::unordered_map qid_lookup_; 28 | std::mutex relay_mu_; 29 | }; 30 | 31 | } // namespace backend 32 | } // namespace nexus 33 | 34 | #endif // NEXUS_BACKEND_BACKUP_CLIENT_H_ 35 | -------------------------------------------------------------------------------- /src/nexus/app/exec_block.h: -------------------------------------------------------------------------------- 1 | #ifndef NEXUS_APP_EXEC_BLOCK_H_ 2 | #define NEXUS_APP_EXEC_BLOCK_H_ 3 | 4 | #include 5 | #include 6 | 7 | #include "nexus/app/model_handler.h" 8 | #include "nexus/app/request_context.h" 9 | 10 | namespace nexus { 11 | namespace app { 12 | 13 | using ExecFunc = std::function( 14 | std::shared_ptr ctx)>; 15 | 16 | class ExecBlock { 17 | public: 18 | ExecBlock(int id, ExecFunc func, std::vector required_vars) : 19 | id_(id), 20 | func_(func) { 21 | for (auto var_name : required_vars) { 22 | dependency_.insert(var_name); 23 | } 24 | } 25 | 26 | int id() const { return id_; } 27 | 28 | std::unordered_set dependency() const { return dependency_; } 29 | 30 | std::vector Run(std::shared_ptr ctx) { 31 | return func_(ctx); 32 | } 33 | 34 | private: 35 | int id_; 36 | ExecFunc func_; 37 | std::unordered_set dependency_; 38 | }; 39 | 40 | } // namespace app 41 | } // namespace nexus 42 | 43 | #endif // NEXUS_APP_EXEC_BLOCK_H_ 44 | -------------------------------------------------------------------------------- /src/nexus/app/rpc_service.cpp: -------------------------------------------------------------------------------- 1 | #include "nexus/app/frontend.h" 2 | #include "nexus/app/rpc_service.h" 3 | 4 | namespace nexus { 5 | namespace app { 6 | 7 | INSTANTIATE_RPC_CALL(AsyncService, UpdateModelRoutes, ModelRouteUpdates, 8 | RpcReply); 9 | INSTANTIATE_RPC_CALL(AsyncService, CheckAlive, CheckAliveRequest, RpcReply); 10 | 11 | RpcService::RpcService(Frontend* frontend, std::string port, size_t nthreads): 12 | AsyncRpcServiceBase(port, nthreads), 13 | frontend_(frontend) { 14 | } 15 | 16 | void RpcService::HandleRpcs() { 17 | new UpdateModelRoutes_Call( 18 | &service_, cq_.get(), 19 | [this](const grpc::ServerContext&, const ModelRouteUpdates& req, 20 | RpcReply* reply) { 21 | frontend_->UpdateModelRoutes(req, reply); 22 | }); 23 | new CheckAlive_Call( 24 | &service_, cq_.get(), 25 | [](const grpc::ServerContext&, const CheckAliveRequest&, 26 | RpcReply* reply) { 27 | reply->set_status(CTRL_OK); 28 | }); 29 | void* tag; 30 | bool ok; 31 | while (running_) { 32 | cq_->Next(&tag, &ok); 33 | if (ok) { 34 | static_cast(tag)->Proceed(); 35 | } 36 | } 37 | } 38 | 39 | } // namespace app 40 | } // namespace nexus 41 | -------------------------------------------------------------------------------- /src/nexus/common/server_base.h: -------------------------------------------------------------------------------- 1 | #ifndef NEXUS_COMMON_SERVER_BASE_H_ 2 | #define NEXUS_COMMON_SERVER_BASE_H_ 3 | 4 | #include 5 | #include 6 | #include 7 | 8 | namespace nexus { 9 | 10 | class ServerBase { 11 | public: 12 | // Disable copy 13 | ServerBase(const ServerBase&) = delete; 14 | ServerBase& operator=(const ServerBase&) = delete; 15 | // Construct the server given port. 16 | ServerBase(std::string port); 17 | // Construct the server given the IP address and port. 18 | ServerBase(std::string ip, std::string port); 19 | // Get the server address 20 | std::string address() const { return ip_ + ":" + port_; } 21 | // Get listening port 22 | std::string port() const { return port_; } 23 | // Start the server. 24 | virtual void Run(); 25 | // Hanlde a stop operation. 26 | virtual void Stop(); 27 | protected: 28 | // Asynchronously wait an accept request. 29 | void DoAccept(); 30 | // Asynchronously wait a stop request. 31 | void DoAwaitStop(); 32 | // Handle an accept operation. 33 | virtual void HandleAccept() = 0; 34 | // data fields 35 | std::string ip_; 36 | std::string port_; 37 | boost::asio::io_context io_context_; 38 | boost::asio::signal_set signals_; 39 | boost::asio::ip::tcp::acceptor acceptor_; 40 | boost::asio::ip::tcp::socket socket_; 41 | }; 42 | 43 | } // namespace nexus 44 | 45 | #endif // NEXUS_COMMON_SERVER_BASE_H_ 46 | -------------------------------------------------------------------------------- /src/nexus/backend/tf_share_model.h: -------------------------------------------------------------------------------- 1 | #ifndef NEXUS_TFSHAREMODEL_H 2 | #define NEXUS_TFSHAREMODEL_H 3 | 4 | #include 5 | #include 6 | #include "nexus/backend/model_ins.h" 7 | 8 | namespace nexus { 9 | namespace backend { 10 | 11 | class TensorflowModel; 12 | 13 | class TFShareModel : public ModelInstance { 14 | public: 15 | void set_batch(size_t batch) override; 16 | Shape InputShape() override; 17 | std::unordered_map OutputShapes() override; 18 | ArrayPtr CreateInputGpuArray() override; 19 | std::unordered_map GetOutputGpuArrays() override; 20 | void Preprocess(std::shared_ptr task) override; 21 | void Forward(std::shared_ptr batch_task) override; 22 | void Postprocess(std::shared_ptr task) override; 23 | 24 | TFShareModel(int gpu_id, const ModelInstanceConfig& config); 25 | bool AddModelSession(const ModelSession& model_sess); 26 | bool RemoveModelSession(const ModelSession& model_sess); 27 | size_t num_model_sessions(); 28 | 29 | private: 30 | size_t num_suffixes_; 31 | std::shared_ptr tf_share_info_; 32 | std::unique_ptr tf_model_; 33 | std::mutex loaded_suffixes_mutex_; 34 | std::unordered_set loaded_suffixes_; 35 | std::unordered_map> classnames_; 36 | }; 37 | 38 | } 39 | } 40 | 41 | #endif //NEXUS_TFSHAREMODEL_H 42 | -------------------------------------------------------------------------------- /src/nexus/app/app_base.h: -------------------------------------------------------------------------------- 1 | #ifndef NEXUS_APP_APP_BASE_H_ 2 | #define NEXUS_APP_APP_BASE_H_ 3 | 4 | #include 5 | #include "nexus/app/frontend.h" 6 | 7 | DECLARE_int32(load_balance); 8 | 9 | namespace nexus { 10 | namespace app { 11 | 12 | class AppBase : public Frontend { 13 | public: 14 | AppBase(const std::string& port, 15 | const std::string& rpc_port, 16 | const std::string& sch_addr, 17 | size_t nthreads); 18 | 19 | ~AppBase() override; 20 | 21 | void Start(); 22 | 23 | virtual void Setup() {} 24 | 25 | bool IsComplexQuery() const; 26 | 27 | void ComplexQuerySetup(const std::string &cq_name, uint32_t slo_us, uint32_t step_us); 28 | 29 | void ComplexQueryAddEdge(const std::shared_ptr& source, 30 | const std::shared_ptr& target); 31 | 32 | protected: 33 | std::shared_ptr GetModelHandler( 34 | const std::string& framework, const std::string& model_name, 35 | uint32_t version, uint64_t latency_sla, float estimate_workload=0., 36 | std::vector image_size={}, 37 | LoadBalancePolicy lb_policy=LoadBalancePolicy(FLAGS_load_balance)); 38 | size_t nthreads_; 39 | QueryProcessor* qp_; 40 | 41 | std::string cq_id_; 42 | uint32_t slo_us_; 43 | uint32_t step_us_; 44 | }; 45 | 46 | void LaunchApp(AppBase* app); 47 | 48 | } // namespace app 49 | } // namespace nexus 50 | 51 | #endif // NEXUS_APP_APP_BASE_H_ 52 | -------------------------------------------------------------------------------- /tests/data/model_db/db/model_db.yml: -------------------------------------------------------------------------------- 1 | models: 2 | - framework: darknet 3 | model_name: yolo9000 4 | type: detection 5 | version: 1 6 | cfg_file: darknet/yolo9000.cfg 7 | weight_file: darknet/yolo9000.weights 8 | class_names: darknet/data/9k.names 9 | resizable: true 10 | image_height: 480 11 | image_width: 480 12 | - framework: darknet 13 | model_name: darknet 14 | type: classification 15 | version: 1 16 | cfg_file: darknet/darknet.cfg 17 | weight_file: darknet/darknet.weights 18 | class_name: darknet/data/imagenet.shortnames.list 19 | - framework: caffe 20 | model_name: vgg_face 21 | type: classification 22 | version: 1 23 | cfg_file: caffe/vgg_face/1/VGG_FACE_deploy.prototxt 24 | weight_file: caffe/vgg_face/1/VGG_FACE.caffemodel 25 | class_names: caffe/vgg_face/names.txt 26 | mean_value: [99.5503, 115.7630, 151.2761] 27 | - framework: caffe 28 | model_name: vgg_s 29 | type: classification 30 | version: 1 31 | cfg_file: caffe/vgg_s/1/vgg_s.prototxt 32 | weight_file: caffe/vgg_s/1/vgg_s.caffemodel 33 | class_names: caffe/vgg_s/synset_words.txt 34 | mean_file: caffe/vgg_s/1/vgg_s_mean.binaryproto 35 | - framework: caffe 36 | model_name: vgg16 37 | type: classification 38 | version: 1 39 | cfg_file: caffe/vgg16/1/vgg16.prototxt 40 | weight_file: caffe/vgg16/1/vgg16.caffemodel 41 | class_names: caffe/vgg16/synset_words.txt 42 | mean_value: [103.939, 116.779, 123.68] 43 | -------------------------------------------------------------------------------- /src/nexus/scheduler/frontend_delegate.h: -------------------------------------------------------------------------------- 1 | #ifndef NEXUS_SCHEDULER_FRONTEND_DELEGATE_H_ 2 | #define NEXUS_SCHEDULER_FRONTEND_DELEGATE_H_ 3 | 4 | #include 5 | #include 6 | #include 7 | #include 8 | #include 9 | 10 | #include "nexus/proto/control.grpc.pb.h" 11 | 12 | namespace nexus { 13 | namespace scheduler { 14 | 15 | class Scheduler; 16 | 17 | class FrontendDelegate { 18 | public: 19 | FrontendDelegate(uint32_t node_id, const std::string& ip, 20 | const std::string& server_port, const std::string& rpc_addr, 21 | int beacon_sec); 22 | 23 | uint32_t node_id() const { return node_id_; } 24 | 25 | std::time_t LastAliveTime(); 26 | 27 | void Tick(); 28 | 29 | bool IsAlive(); 30 | 31 | void SubscribeModel(const std::string& model_session_id); 32 | 33 | const std::unordered_set& subscribe_models() const { 34 | return subscribe_models_; 35 | } 36 | 37 | CtrlStatus UpdateModelRoutesRpc(const ModelRouteUpdates& request); 38 | 39 | private: 40 | uint32_t node_id_; 41 | std::string ip_; 42 | std::string server_port_; 43 | std::string rpc_port_; 44 | int beacon_sec_; 45 | long timeout_ms_; 46 | std::unique_ptr stub_; 47 | std::chrono::time_point last_time_; 48 | std::unordered_set subscribe_models_; 49 | }; 50 | 51 | } // namespace scheduler 52 | } // namespace nexus 53 | 54 | #endif // NEXUS_SCHEDULER_FRONTEND_DELEGATE_H_ 55 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Copyright (c) 2018-2020 University of Washington All rights reserved. 2 | 3 | Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: 4 | 5 | Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. 6 | 7 | Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. 8 | 9 | Neither the name of the University of Washington nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. 10 | 11 | THIS SOFTWARE IS PROVIDED BY THE UNIVERSITY OF WASHINGTON AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY OF WASHINGTON OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 12 | 13 | 14 | -------------------------------------------------------------------------------- /src/nexus/common/server_base.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | 4 | #include "nexus/common/server_base.h" 5 | 6 | namespace nexus { 7 | 8 | ServerBase::ServerBase(std::string port) : 9 | ServerBase("0.0.0.0", port) { 10 | } 11 | 12 | ServerBase::ServerBase(std::string ip, std::string port) 13 | : ip_(ip), 14 | port_(port), 15 | io_context_(), 16 | signals_(io_context_), 17 | acceptor_(io_context_), 18 | socket_(io_context_) { 19 | // handle stop signal 20 | signals_.add(SIGINT); 21 | signals_.add(SIGTERM); 22 | 23 | DoAwaitStop(); 24 | 25 | boost::asio::ip::tcp::resolver resolver(io_context_); 26 | boost::asio::ip::tcp::endpoint endpoint = *resolver.resolve({ip, port}); 27 | acceptor_.open(endpoint.protocol()); 28 | acceptor_.set_option(boost::asio::ip::tcp::acceptor::reuse_address(true)); 29 | acceptor_.bind(endpoint); 30 | acceptor_.listen(); 31 | 32 | DoAccept(); 33 | } 34 | 35 | void ServerBase::Run() { 36 | io_context_.run(); 37 | } 38 | 39 | void ServerBase::Stop() { 40 | acceptor_.close(); 41 | } 42 | 43 | void ServerBase::DoAccept() { 44 | acceptor_.async_accept( 45 | socket_, 46 | [this](boost::system::error_code ec){ 47 | if (!acceptor_.is_open()) { 48 | return; 49 | } 50 | if (!ec) { 51 | HandleAccept(); 52 | } 53 | DoAccept(); 54 | }); 55 | } 56 | 57 | void ServerBase::DoAwaitStop() { 58 | signals_.async_wait( 59 | [this](boost::system::error_code /*ec*/, int /*signo*/) { 60 | Stop(); 61 | }); 62 | } 63 | 64 | } // namespace nexus 65 | -------------------------------------------------------------------------------- /src/nexus/backend/darknet_model.h: -------------------------------------------------------------------------------- 1 | #ifndef NEXUS_BACKEND_DARKNET_MODEL_H_ 2 | #define NEXUS_BACKEND_DARKNET_MODEL_H_ 3 | 4 | #ifdef USE_DARKNET 5 | 6 | #include 7 | #include 8 | 9 | #include "nexus/backend/model_ins.h" 10 | // Darknet headers 11 | extern "C" { 12 | #include "darknet_server.h" 13 | } 14 | 15 | namespace nexus { 16 | namespace backend { 17 | 18 | class DarknetModel : public ModelInstance { 19 | public: 20 | DarknetModel(int gpu_id, const ModelInstanceConfig& config); 21 | 22 | ~DarknetModel(); 23 | 24 | Shape InputShape() final; 25 | 26 | std::unordered_map OutputShapes() final; 27 | 28 | ArrayPtr CreateInputGpuArray() final; 29 | 30 | std::unordered_map GetOutputGpuArrays() final; 31 | 32 | void Preprocess(std::shared_ptr task) final; 33 | 34 | void Forward(std::shared_ptr batch_task) final; 35 | 36 | void Postprocess(std::shared_ptr task) final; 37 | 38 | private: 39 | void MarshalDetectionResult( 40 | const QueryProto& query, const float* probs, size_t nprobs, 41 | const int* boxes, size_t nboxes, QueryResultProto* result); 42 | 43 | network* net_; 44 | int image_height_; 45 | int image_width_; 46 | Shape input_shape_; 47 | Shape output_shape_; 48 | size_t input_size_; 49 | size_t output_size_; 50 | size_t output_layer_id_; 51 | std::string output_name_; 52 | std::unordered_map classnames_; 53 | bool first_input_array_; 54 | }; 55 | 56 | } // namespace backend 57 | } // namespace nexus 58 | 59 | #endif // USE_DARKNET 60 | 61 | #endif // NEXUS_BACKEND_DARKNET_MODEL_H_ 62 | -------------------------------------------------------------------------------- /examples/obj_rec/src/obj_rec.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | #include "nexus/app/app_base.h" 4 | 5 | using namespace nexus; 6 | using namespace nexus::app; 7 | 8 | class ObjRecApp : public AppBase { 9 | public: 10 | ObjRecApp(std::string port, std::string rpc_port, std::string sch_addr, 11 | size_t nthreads) : 12 | AppBase(port, rpc_port, sch_addr, nthreads) { 13 | } 14 | 15 | void Setup() final { 16 | model_ = GetModelHandler("caffe2", "vgg16", 1, 1000); 17 | } 18 | 19 | void Process(const RequestProto& request, ReplyProto* reply) final { 20 | auto output = model_->Execute(request.input(), 21 | {"class_id", "class_prob", "class_name"}); 22 | output->FillReply(reply); 23 | } 24 | 25 | private: 26 | std::shared_ptr model_; 27 | }; 28 | 29 | DEFINE_string(port, "9001", "Server port"); 30 | DEFINE_string(rpc_port, "9002", "RPC port"); 31 | DEFINE_string(sch_addr, "127.0.0.1", "Scheduler IP address"); 32 | DEFINE_int32(nthread, 1000, "Number of threads processing requests " 33 | "(default: 1000)"); 34 | 35 | int main(int argc, char** argv) { 36 | // log to stderr 37 | FLAGS_logtostderr = 1; 38 | // Init glog 39 | google::InitGoogleLogging(argv[0]); 40 | // Parse command line flags 41 | google::ParseCommandLineFlags(&argc, &argv, true); 42 | // Setup backtrace on segfault 43 | google::InstallFailureSignalHandler(); 44 | LOG(INFO) << "App port " << FLAGS_port << ", rpc port " << FLAGS_rpc_port; 45 | // Create the frontend server 46 | ObjRecApp app(FLAGS_port, FLAGS_rpc_port, FLAGS_sch_addr, FLAGS_nthread); 47 | LaunchApp(&app); 48 | 49 | return 0; 50 | } 51 | -------------------------------------------------------------------------------- /examples/face_rec/src/face_rec.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | #include "nexus/app/app_base.h" 4 | 5 | using namespace nexus; 6 | using namespace nexus::app; 7 | 8 | class FaceRecApp : public AppBase { 9 | public: 10 | FaceRecApp(std::string port, std::string rpc_port, std::string sch_addr, 11 | size_t nthreads) : 12 | AppBase(port, rpc_port, sch_addr, nthreads) { 13 | } 14 | 15 | void Setup() final { 16 | model_ = GetModelHandler("caffe2", "vgg_face", 1, 1000); 17 | } 18 | 19 | void Process(const RequestProto& request, ReplyProto* reply) final { 20 | auto output = model_->Execute(request.input(), 21 | {"class_id", "class_prob", "class_name"}); 22 | output->FillReply(reply); 23 | } 24 | 25 | private: 26 | std::shared_ptr model_; 27 | }; 28 | 29 | DEFINE_string(port, "9001", "Server port"); 30 | DEFINE_string(rpc_port, "9002", "RPC port"); 31 | DEFINE_string(sch_addr, "127.0.0.1", "Scheduler address"); 32 | DEFINE_int32(nthread, 1000, "Number of threads processing requests " 33 | "(default: 1000)"); 34 | 35 | int main(int argc, char** argv) { 36 | // log to stderr 37 | FLAGS_logtostderr = 1; 38 | // Init glog 39 | google::InitGoogleLogging(argv[0]); 40 | // Parse command line flags 41 | google::ParseCommandLineFlags(&argc, &argv, true); 42 | // Setup backtrace on segfault 43 | google::InstallFailureSignalHandler(); 44 | LOG(INFO) << "App port " << FLAGS_port << ", rpc port " << FLAGS_rpc_port; 45 | // Create the frontend server 46 | FaceRecApp app(FLAGS_port, FLAGS_rpc_port, FLAGS_sch_addr, FLAGS_nthread); 47 | LaunchApp(&app); 48 | 49 | return 0; 50 | } 51 | -------------------------------------------------------------------------------- /src/nexus/app/query_processor.h: -------------------------------------------------------------------------------- 1 | #ifndef NEXUS_APP_QUERY_PROCESSOR_H_ 2 | #define NEXUS_APP_QUERY_PROCESSOR_H_ 3 | 4 | #include "nexus/app/exec_block.h" 5 | #include "nexus/app/request_context.h" 6 | #include 7 | 8 | namespace nexus { 9 | namespace app { 10 | 11 | class QueryProcessor { 12 | public: 13 | QueryProcessor(std::vector blocks) : 14 | blocks_(blocks) { 15 | std::unordered_set block_ids; 16 | for (auto block : blocks) { 17 | if (block_ids.count(block->id()) > 0) { 18 | LOG(FATAL) << "Block id " << block->id() << " already exists"; 19 | } 20 | block_ids.insert(block->id()); 21 | } 22 | } 23 | 24 | void Process(std::shared_ptr ctx) { 25 | if (ctx->state() == kUninitialized) { 26 | // LOG(INFO) << "Init req " << ctx->const_request().user_id() << ":" << 27 | // ctx->const_request().req_id(); 28 | ctx->SetExecBlocks(blocks_); 29 | } 30 | while (!ctx->finished()) { 31 | auto block = ctx->NextReadyBlock(); 32 | if (block == nullptr) { 33 | ctx->SetState(kBlocking); 34 | return; 35 | } 36 | // LOG(INFO) << "Exec req " << ctx->const_request().user_id() << ":" << 37 | // ctx->const_request().req_id() << ", block " << block->id(); 38 | auto ret = block->Run(ctx); 39 | if (ctx->state() == kError) { 40 | break; 41 | } 42 | ctx->AddBlockReturn(ret); 43 | } 44 | // LOG(INFO) << "Reply req " << ctx->const_request().user_id() << ":" << 45 | // ctx->const_request().req_id(); 46 | ctx->SendReply(); 47 | } 48 | 49 | private: 50 | std::vector blocks_; 51 | }; 52 | 53 | } // namespace app 54 | } // namespace nexus 55 | 56 | #endif // NEXUS_APP_QUERY_PROCESSOR_H_ 57 | -------------------------------------------------------------------------------- /src/nexus/scheduler/complex_query.h: -------------------------------------------------------------------------------- 1 | #ifndef NEXUS_SCHEDULER_COMPLEXQUERY_H 2 | #define NEXUS_SCHEDULER_COMPLEXQUERY_H 3 | 4 | #include 5 | #include 6 | #include 7 | 8 | #include "nexus/scheduler/sch_info.h" 9 | 10 | namespace nexus { 11 | namespace scheduler { 12 | 13 | class ComplexQuery { 14 | public: 15 | struct NodeID { 16 | std::string framework; 17 | std::string model_name; 18 | NodeID(std::string framework_, std::string model_name_); 19 | std::string ToString() const; 20 | }; 21 | ComplexQuery(std::string cq_id, int slo_us, int segments); 22 | ~ComplexQuery(); 23 | ComplexQuery(ComplexQuery &&other) noexcept; 24 | ComplexQuery& operator=(ComplexQuery &&other) noexcept; 25 | 26 | void AddNode(NodeID node_id, std::string current_model_sess_id, 27 | const ModelProfile& profile); 28 | void AddChild(const NodeID &parent, const NodeID &child); 29 | void SetRequestRate(const NodeID &node_id, double request_rate); 30 | std::unordered_map GetSLOms(); 31 | double GetMinimalGPUs(); 32 | void DynamicProgramming(); 33 | void Finalize(); 34 | bool IsFinalized(); 35 | 36 | private: 37 | class Impl; 38 | std::unique_ptr impl_; 39 | }; 40 | 41 | inline bool operator==(const ComplexQuery::NodeID& lhs, const ComplexQuery::NodeID& rhs) { 42 | return lhs.framework == rhs.framework && lhs.model_name == rhs.model_name; 43 | } 44 | 45 | } // namespace scheduler 46 | } // namespace nexus 47 | 48 | namespace std { 49 | template<> struct hash { 50 | std::size_t operator()(const nexus::scheduler::ComplexQuery::NodeID &v) const { 51 | size_t h = std::hash{}(v.framework); 52 | h = h * 31 + std::hash{}(v.model_name); 53 | return h; 54 | } 55 | }; 56 | } 57 | 58 | #endif //NEXUS_SCHEDULER_COMPLEXQUERY_H 59 | -------------------------------------------------------------------------------- /src/nexus/backend/backup_client.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | #include "nexus/backend/backup_client.h" 4 | 5 | namespace nexus { 6 | namespace backend { 7 | 8 | BackupClient::BackupClient(const BackendInfo& info, 9 | boost::asio::io_context& io_context, 10 | MessageHandler* handler) : 11 | BackendSession(info, io_context, handler) {} 12 | 13 | void BackupClient::Forward(std::shared_ptr task) { 14 | uint64_t qid = task->query.query_id(); 15 | task->query.set_query_id(task->task_id); 16 | auto msg = std::make_shared(kBackendRelay, 17 | task->query.ByteSizeLong()); 18 | msg->EncodeBody(task->query); 19 | Write(std::move(msg)); 20 | std::lock_guard lock(relay_mu_); 21 | qid_lookup_.emplace(task->task_id, qid); 22 | conns_.emplace(task->task_id, task->connection); 23 | } 24 | 25 | void BackupClient::Reply(std::shared_ptr message) { 26 | QueryResultProto result; 27 | message->DecodeBody(&result); 28 | uint64_t tid = result.query_id(); 29 | std::lock_guard lock(relay_mu_); 30 | auto qid_iter = qid_lookup_.find(tid); 31 | if (qid_iter == qid_lookup_.end()) { 32 | LOG(ERROR) << "Cannot find query ID for task " << tid; 33 | return; 34 | } 35 | uint64_t qid = qid_iter->second; 36 | result.set_query_id(qid); 37 | // LOG(INFO) << "Convert " << result.model_session_id() << " tid " << tid << 38 | // " to qid " << qid; 39 | auto reply_msg = std::make_shared(kBackendReply, 40 | result.ByteSizeLong()); 41 | reply_msg->EncodeBody(result); 42 | auto conn_iter = conns_.find(tid); 43 | conn_iter->second->Write(std::move(reply_msg)); 44 | qid_lookup_.erase(qid_iter); 45 | conns_.erase(conn_iter); 46 | } 47 | 48 | } // namespace backend 49 | } // namespace nexus 50 | -------------------------------------------------------------------------------- /src/nexus/backend/slice.h: -------------------------------------------------------------------------------- 1 | #ifndef NEXUS_BACKEND_SLICE_H_ 2 | #define NEXUS_BACKEND_SLICE_H_ 3 | 4 | #include 5 | #include 6 | 7 | namespace nexus { 8 | namespace backend { 9 | 10 | class Slice { 11 | public: 12 | /*! 13 | * \brief construct a slice with equal size splits. 14 | * \param nsplits Number of splits 15 | * \param nfloats Number of floats in a slice 16 | */ 17 | Slice(size_t nsplits, size_t nfloats); 18 | /*! 19 | * \brief construct a slice with varied sizes. 20 | * \param nfloats A vector of number of floats 21 | * \param multiplier Multiplier to number of floats 22 | */ 23 | Slice(std::vector nfloats, size_t multiplier = 1); 24 | Slice(std::vector nfloats, size_t multiplier = 1); 25 | /*! 26 | * \brief construct a slice with varied sizes. 27 | * \param nsplits Number of splits 28 | * \param nfloats An array of number of floats 29 | * \param multiplier Multiplier to number of floats 30 | */ 31 | Slice(size_t nslices, float* nfloats, size_t multiplier = 1); 32 | /*! 33 | * \brief get the offset for idx-th slice 34 | * \param idx Index of the slice 35 | * \return offset of idx-th slice 36 | */ 37 | size_t offset(int idx) const; 38 | /*! 39 | * \brief get the number of floats in slice idx 40 | * \param idx Index of the split 41 | * \return number of floats 42 | */ 43 | size_t num_elements(int idx) const; 44 | /*! \brief get number of splits */ 45 | size_t num_splits() const { return offsets_.size(); } 46 | /*! \brief total number of floats in the buffer */ 47 | size_t total_elements() const { return total_elements_; } 48 | 49 | private: 50 | bool equal_split_; 51 | size_t equal_slice_size_; 52 | std::vector slice_sizes_; 53 | std::vector offsets_; 54 | size_t total_elements_; 55 | }; 56 | 57 | } // namespace backend 58 | } // namespace nexus 59 | 60 | #endif // NEXUS_BACKEND_SLICE_H_ 61 | -------------------------------------------------------------------------------- /src/nexus/scheduler/sch_info.cpp: -------------------------------------------------------------------------------- 1 | #include "nexus/scheduler/sch_info.h" 2 | #include 3 | 4 | namespace nexus { 5 | namespace scheduler { 6 | 7 | void SessionInfo::UpdateWorkload(uint32_t frontend_id, const ModelStatsProto &model_stats) { 8 | auto iter = workloads.find(frontend_id); 9 | if (iter == workloads.end()) { 10 | LOG(ERROR) << "Cannot find rps for " << frontend_id << " in " << 11 | model_stats.model_session_id(); 12 | return; 13 | } 14 | auto rps = iter->second; 15 | for (auto num_requests : model_stats.num_requests()) { 16 | if (rps->rate() < 0 && num_requests == 0) { 17 | continue; 18 | } 19 | rps->AddSample(num_requests); 20 | } 21 | } 22 | double SessionInfo::TotalThroughput() const { 23 | double total = 0.; 24 | for (auto iter : backend_weights) { 25 | total += iter.second; 26 | } 27 | return total; 28 | } 29 | void SessionInfo::SubscribeModelSession(uint32_t frontend_id, const std::string &model_sess_id) { 30 | if (session_subscribers.count(model_sess_id) == 0) { 31 | session_subscribers.emplace(model_sess_id, ServerList{frontend_id}); 32 | } else { 33 | session_subscribers.at(model_sess_id).insert(frontend_id); 34 | } 35 | workloads.emplace(frontend_id, 36 | std::make_shared(1, FLAGS_avg_interval)); 37 | } 38 | bool SessionInfo::UnsubscribleModelSession(uint32_t frontend_id, const std::string &model_sess_id) { 39 | session_subscribers.at(model_sess_id).erase(frontend_id); 40 | workloads.erase(frontend_id); 41 | if (has_static_workload || !session_subscribers.at(model_sess_id).empty()) { 42 | return false; 43 | } 44 | // Remove this model session 45 | session_subscribers.erase(model_sess_id); 46 | for (auto iter = model_sessions.begin(); iter != model_sessions.end(); 47 | ++iter) { 48 | if (ModelSessionToString(*iter) == model_sess_id) { 49 | model_sessions.erase(iter); 50 | break; 51 | } 52 | } 53 | return true; 54 | } 55 | } 56 | } -------------------------------------------------------------------------------- /src/nexus/backend/caffe_model.h: -------------------------------------------------------------------------------- 1 | #ifndef NEXUS_BACKEND_CAFFE_MODEL_H_ 2 | #define NEXUS_BACKEND_CAFFE_MODEL_H_ 3 | 4 | #ifdef USE_CAFFE 5 | 6 | #include 7 | 8 | #include "nexus/backend/model_ins.h" 9 | 10 | // Caffe headers 11 | // avoid redefined keywords from darknet 12 | #ifdef GPU 13 | #undef GPU 14 | #endif 15 | #ifdef CUDNN 16 | #undef CUDNN 17 | #endif 18 | // flag to include OpenCV related functions in Caffe 19 | #define USE_OPENCV 20 | #include "caffe/caffe.hpp" 21 | #include "caffe/data_transformer.hpp" 22 | 23 | namespace nexus { 24 | namespace backend { 25 | 26 | class CaffeModel : public ModelInstance { 27 | public: 28 | CaffeModel(int gpu_id, const ModelInstanceConfig& config); 29 | 30 | Shape InputShape() final; 31 | 32 | std::unordered_map OutputShapes() final; 33 | 34 | ArrayPtr CreateInputGpuArray() final; 35 | 36 | std::unordered_map GetOutputGpuArrays() final; 37 | 38 | void Preprocess(std::shared_ptr task) final; 39 | 40 | void Forward(std::shared_ptr batch_task) final; 41 | 42 | void Postprocess(std::shared_ptr task) final; 43 | 44 | // Caffe neural network for serving 45 | std::unique_ptr > net_; 46 | // image size 47 | int image_height_; 48 | int image_width_; 49 | // input shape of neural network 50 | Shape input_shape_; 51 | // output shape of neural network 52 | Shape output_shape_; 53 | // size of input in a single batch 54 | size_t input_size_; 55 | // size of output in a single batch 56 | size_t output_size_; 57 | int input_blob_idx_; 58 | std::string output_blob_name_; 59 | std::unordered_map classnames_; 60 | // transformer for input 61 | std::unique_ptr > transformer_; 62 | std::vector > > input_blobs_; 63 | std::string prefix_layer_; 64 | int prefix_index_; 65 | }; 66 | 67 | } // namespace backend 68 | } // namespace nexus 69 | 70 | #endif // USE_CAFFE 71 | 72 | #endif // NEXUS_BACKEND_CAFFE_MODEL_H_ 73 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | Nexus 2 | ===== 3 | 4 | [![Docker Image](https://img.shields.io/microbadger/image-size/abcdabcd987/nexus)](https://hub.docker.com/repository/docker/abcdabcd987/nexus) 5 | 6 | Nexus is a scalable and efficient serving system for DNN applications on GPU 7 | cluster. 8 | 9 | ## SOSP 2019 Paper 10 | 11 | * Check out our SOSP 2019 paper [here](https://doi.org/10.1145/3341301.3359658). 12 | * Check out the [Google Drive](https://drive.google.com/open?id=104UqrlNrfJoQnGdkxTQ56mfxSBFyJTcr) that contains a sample of video dataset. 13 | 14 | ## Building Nexus 15 | 16 | See [BUILDING.md](BUILDING.md) for details. 17 | 18 | ## Docker and Examples 19 | 20 | We provide a [Docker image](https://hub.docker.com/repository/docker/abcdabcd987/nexus) 21 | so that you can try Nexus quickly. And there is an example that goes step by 22 | step on how to run Nexus with a simple example application. We recommend you to 23 | take a look [here](examples/README.md). 24 | 25 | ## Deployment 26 | 27 | ### Download Model Zoo 28 | 29 | Nexus publishes public model zoo on our department-hosted GitLab. To download, 30 | you need to install [Git LFS](https://git-lfs.github.com/) first. Then, run: 31 | 32 | ```bash 33 | git clone https://gitlab.cs.washington.edu/syslab/nexus-models 34 | cd nexus-models 35 | git lfs checkout 36 | ``` 37 | 38 | ### Run the Profiler 39 | 40 | Nexus is a profile-based system. So before running Nexus, make sure you have 41 | profiled all the GPUs. To profile a certain model on a certain GPU, run: 42 | 43 | ```bash 44 | nexus/tools/profiler/profiler.py --gpu_list=GPU_INDEX --gpu_uuid \ 45 | --framework=tensorflow --model=MODEL_NAME \ 46 | --model_root=nexus-models/ --dataset=/path/to/datasets/ 47 | ``` 48 | 49 | The profile will be saved to the `--model_root` directory. 50 | See [examples](examples/README.md) for more concrete usage. 51 | 52 | ### Run Nexus 53 | 54 | To run Nexus, you need to run the **scheduler** first, then spawn a **backend** for each 55 | GPU card, and finally run the Nexus **frontend** of your application. 56 | See [examples](examples/README.md) for more concrete usage. 57 | -------------------------------------------------------------------------------- /src/nexus/common/metric.h: -------------------------------------------------------------------------------- 1 | #ifndef NEXUS_COMMON_METRIC_H_ 2 | #define NEXUS_COMMON_METRIC_H_ 3 | 4 | #include 5 | #include 6 | #include 7 | #include 8 | #include 9 | 10 | #include "nexus/common/time_util.h" 11 | 12 | namespace nexus { 13 | 14 | class Metric { 15 | public: 16 | virtual void Reset() = 0; 17 | }; 18 | 19 | class Counter : public Metric { 20 | public: 21 | Counter(); 22 | 23 | void Increase(uint64_t value); 24 | 25 | void Reset() final; 26 | 27 | private: 28 | std::atomic count_; 29 | }; 30 | 31 | class IntervalCounter : public Metric, public Tickable { 32 | public: 33 | IntervalCounter(uint32_t interval_sec); 34 | 35 | virtual ~IntervalCounter() = default; 36 | 37 | void Increase(uint64_t value); 38 | 39 | void Reset() override; 40 | 41 | std::vector GetHistory(); 42 | 43 | protected: 44 | void TickImpl() final; 45 | 46 | private: 47 | uint32_t tick_interval_sec_; 48 | TimePoint last_tick_time_; 49 | std::atomic count_; 50 | std::vector history_; 51 | std::mutex history_mutex_; 52 | std::atomic_bool running_; 53 | }; 54 | 55 | class EWMA { 56 | public: 57 | EWMA(uint32_t sample_interval_sec, uint32_t avg_interval_sec); 58 | 59 | EWMA(const EWMA& other); 60 | 61 | double rate() const { return rate_; } 62 | 63 | void AddSample(uint64_t count); 64 | 65 | EWMA& operator=(const EWMA& other); 66 | 67 | private: 68 | uint32_t sample_interval_sec_; 69 | uint32_t avg_interval_sec_; 70 | double rate_; 71 | double alpha_; 72 | }; 73 | 74 | class MetricRegistry { 75 | public: 76 | static MetricRegistry& Singleton(); 77 | 78 | std::shared_ptr CreateCounter(); 79 | 80 | std::shared_ptr CreateIntervalCounter(uint32_t interval_sec); 81 | 82 | void RemoveMetric(std::shared_ptr metric); 83 | 84 | private: 85 | MetricRegistry() {} 86 | 87 | std::mutex mutex_; 88 | std::unordered_set > metrics_; 89 | }; 90 | 91 | } // namespace nexus 92 | 93 | #endif // NEXUS_COMMON_METRIC_H_ 94 | -------------------------------------------------------------------------------- /src/nexus/backend/share_prefix_model.h: -------------------------------------------------------------------------------- 1 | #ifndef NEXUS_BACKEND_SHARE_PREFIX_MODEL_H_ 2 | #define NEXUS_BACKEND_SHARE_PREFIX_MODEL_H_ 3 | 4 | #include 5 | 6 | #include "nexus/backend/model_ins.h" 7 | 8 | namespace nexus { 9 | namespace backend { 10 | 11 | class SharePrefixModel : public ModelInstance { 12 | public: 13 | SharePrefixModel(int gpu_id, const ModelInstanceConfig& config); 14 | 15 | virtual void set_batch(size_t batch) override; 16 | 17 | Shape InputShape() final; 18 | 19 | std::unordered_map OutputShapes() final; 20 | 21 | ArrayPtr CreateInputGpuArray() final; 22 | 23 | std::unordered_map GetOutputGpuArrays() final; 24 | 25 | void Preprocess(std::shared_ptr task) final; 26 | 27 | void Forward(std::shared_ptr batch_task) final; 28 | 29 | void Postprocess(std::shared_ptr task) final; 30 | 31 | int num_model_sessions(); 32 | 33 | std::vector ModelSessions(); 34 | 35 | bool HasModelSession(const std::string& model_sess_id); 36 | 37 | bool AddModelSession(const ModelSession& model_sess); 38 | 39 | void RemoveModelSession(const std::string& model_sess_id); 40 | 41 | private: 42 | // Prefix model information 43 | int prefix_length_; 44 | std::unique_ptr prefix_model_; 45 | std::string prefix_output_name_; 46 | Shape prefix_output_shape_; 47 | std::unordered_map prefix_batch_output_arr_; 48 | // Suffix models information 49 | std::unordered_map > suffix_models_; 51 | std::unordered_map suffix_input_arrays_; 52 | std::unordered_map suffix_output_names_; 53 | std::unordered_map suffix_output_sizes_; 54 | size_t max_suffix_output_size_; 55 | // Guard suffix_models_, suffix_input_arrays_, suffix_output_names_, 56 | // suffix_output_sizes_, max_suffix_output_size_ 57 | std::mutex suffix_mu_; 58 | }; 59 | 60 | } // namespace backend 61 | } // namespace nexus 62 | 63 | #endif // NEXUS_BACKEND_SHARE_PREFIX_MODEL_H_ 64 | -------------------------------------------------------------------------------- /src/nexus/common/backend_pool.h: -------------------------------------------------------------------------------- 1 | #ifndef NEXUS_COMMON_BACKEND_POOL_H_ 2 | #define NEXUS_COMMON_BACKEND_POOL_H_ 3 | 4 | #include 5 | #include 6 | 7 | #include "nexus/common/connection.h" 8 | #include "nexus/common/time_util.h" 9 | #include "nexus/proto/control.grpc.pb.h" 10 | 11 | namespace nexus { 12 | 13 | class BackendPool; 14 | 15 | class BackendSession : public Connection { 16 | public: 17 | explicit BackendSession(const BackendInfo& info, 18 | boost::asio::io_context& io_context, 19 | MessageHandler* handler); 20 | 21 | ~BackendSession(); 22 | 23 | inline uint32_t node_id() const { return node_id_; } 24 | 25 | inline std::string ip() const { return ip_; } 26 | 27 | inline std::string server_port() const { return server_port_; } 28 | 29 | inline std::string rpc_port() const { return rpc_port_; } 30 | 31 | virtual void Start(); 32 | 33 | virtual void Stop(); 34 | 35 | double GetUtilization(); 36 | 37 | protected: 38 | /*! \brief Asynchronously connect to backend server. */ 39 | void DoConnect(); 40 | 41 | /*! \brief Boost io service */ 42 | boost::asio::io_context& io_context_; 43 | uint32_t node_id_; 44 | std::string ip_; 45 | std::string server_port_; 46 | std::string rpc_port_; 47 | std::atomic_bool running_; 48 | std::unique_ptr stub_; 49 | double utilization_; 50 | TimePoint expire_; 51 | std::mutex util_mu_; 52 | }; 53 | 54 | class BackendPool { 55 | public: 56 | BackendPool() {} 57 | 58 | std::shared_ptr GetBackend(uint32_t backend_id); 59 | 60 | void AddBackend(std::shared_ptr backend); 61 | 62 | void RemoveBackend(std::shared_ptr backend); 63 | 64 | void RemoveBackend(uint32_t backend_id); 65 | 66 | std::vector UpdateBackendList(std::unordered_set list); 67 | 68 | void StopAll(); 69 | 70 | protected: 71 | std::unordered_map > backends_; 72 | std::mutex mu_; 73 | }; 74 | 75 | } // namespace nexus 76 | 77 | #endif // NEXUS_COMMON_BACKEND_POOL_H_ 78 | -------------------------------------------------------------------------------- /src/nexus/backend/rpc_service.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | 4 | #include "nexus/backend/backend_server.h" 5 | #include "nexus/backend/rpc_service.h" 6 | #include "nexus/common/rpc_call.h" 7 | 8 | DECLARE_int32(occupancy_valid); 9 | 10 | namespace nexus { 11 | namespace backend { 12 | 13 | INSTANTIATE_RPC_CALL(AsyncService, UpdateModelTable, ModelTableConfig, 14 | RpcReply); 15 | INSTANTIATE_RPC_CALL(AsyncService, CheckAlive, CheckAliveRequest, RpcReply); 16 | #ifdef USE_GPU 17 | INSTANTIATE_RPC_CALL(AsyncService, CurrentUtilization, UtilizationRequest, 18 | UtilizationReply); 19 | #endif 20 | 21 | BackendRpcService::BackendRpcService(BackendServer* backend, std::string port, 22 | size_t nthreads): 23 | AsyncRpcServiceBase(port, nthreads), 24 | backend_(backend) { 25 | } 26 | 27 | void BackendRpcService::HandleRpcs() { 28 | new UpdateModelTable_Call( 29 | &service_, cq_.get(), 30 | [this](const grpc::ServerContext&, const ModelTableConfig& req, 31 | RpcReply* reply) { 32 | //std::thread (&BackendServer::UpdateModelTable, backend_, req).detach(); 33 | backend_->UpdateModelTableAsync(req); 34 | reply->set_status(CTRL_OK); 35 | }); 36 | new CheckAlive_Call( 37 | &service_, cq_.get(), 38 | [](const grpc::ServerContext&, const CheckAliveRequest&, 39 | RpcReply* reply) { 40 | reply->set_status(CTRL_OK); 41 | }); 42 | #ifdef USE_GPU 43 | new CurrentUtilization_Call( 44 | &service_, cq_.get(), 45 | [this](const grpc::ServerContext&, const UtilizationRequest&, 46 | UtilizationReply* reply) { 47 | reply->set_node_id(backend_->node_id()); 48 | reply->set_utilization(backend_->CurrentUtilization()); 49 | reply->set_valid_ms(FLAGS_occupancy_valid); 50 | }); 51 | #endif 52 | void* tag; 53 | bool ok; 54 | while (running_) { 55 | cq_->Next(&tag, &ok); 56 | if (ok) { 57 | static_cast(tag)->Proceed(); 58 | } 59 | } 60 | } 61 | 62 | } // namespace backend 63 | } // namespace nexus 64 | -------------------------------------------------------------------------------- /src/nexus/backend/task.cpp: -------------------------------------------------------------------------------- 1 | #include "nexus/backend/task.h" 2 | #include "nexus/common/model_def.h" 3 | 4 | namespace nexus { 5 | namespace backend { 6 | 7 | Input::Input(TimePoint deadline, uint64_t tid, int idx, ArrayPtr arr) : 8 | DeadlineItem(deadline), 9 | task_id(tid), 10 | index(idx), 11 | array(arr) {} 12 | 13 | Output::Output(uint64_t tid, int idx, 14 | const std::unordered_map& arrs) : 15 | task_id(tid), 16 | index(idx), 17 | arrays(arrs) {} 18 | 19 | std::atomic Task::global_task_id_(0); 20 | 21 | Task::Task() : Task(nullptr) {} 22 | 23 | Task::Task(std::shared_ptr conn) : 24 | DeadlineItem(), 25 | connection(conn), 26 | model(nullptr), 27 | stage(kPreprocess), 28 | filled_outputs(0) { 29 | task_id = global_task_id_.fetch_add(1, std::memory_order_relaxed); 30 | timer.Record("begin"); 31 | } 32 | 33 | void Task::DecodeQuery(std::shared_ptr message) { 34 | msg_type = message->type(); 35 | message->DecodeBody(&query); 36 | ModelSession sess; 37 | ParseModelSession(query.model_session_id(), &sess); 38 | uint32_t budget = sess.latency_sla(); 39 | if (query.slack_ms() > 0) { 40 | budget += query.slack_ms(); 41 | // LOG(INFO) << "slack " << query.slack_ms() << " ms"; 42 | } 43 | SetDeadline(std::chrono::milliseconds(budget)); 44 | } 45 | 46 | void Task::AppendInput(ArrayPtr arr) { 47 | auto input = std::make_shared(deadline(), task_id, inputs.size(), arr); 48 | inputs.push_back(input); 49 | // Put a placeholder in the outputs 50 | outputs.push_back(nullptr); 51 | } 52 | 53 | bool Task::AddOutput(std::shared_ptr output) { 54 | outputs[output->index] = output; 55 | uint32_t filled = ++filled_outputs; 56 | if (filled == outputs.size()) { 57 | return true; 58 | } 59 | return false; 60 | } 61 | 62 | bool Task::AddVirtualOutput(int index) { 63 | result.set_status(TIMEOUT); 64 | uint32_t filled = ++filled_outputs; 65 | if (filled == outputs.size()) { 66 | return true; 67 | } 68 | return false; 69 | } 70 | 71 | } // namespace backend 72 | } // namespace nexus 73 | 74 | -------------------------------------------------------------------------------- /src/nexus/backend/slice.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | #include "nexus/backend/slice.h" 4 | 5 | namespace nexus { 6 | namespace backend { 7 | 8 | Slice::Slice(size_t nsplits, size_t nfloats) : 9 | equal_split_(true) { 10 | size_t offset = 0; 11 | for (size_t i = 0; i < nsplits; ++i) { 12 | offsets_.push_back(offset); 13 | offset += nfloats; 14 | } 15 | total_elements_ = offset; 16 | equal_slice_size_ = nfloats; 17 | } 18 | 19 | Slice::Slice(std::vector nfloats, size_t multiplier) : 20 | equal_split_(false) { 21 | size_t offset = 0; 22 | for (auto size : nfloats) { 23 | offsets_.push_back(offset); 24 | size_t slice_size = size * multiplier; 25 | slice_sizes_.push_back(slice_size); 26 | offset += slice_size; 27 | } 28 | total_elements_ = offset; 29 | } 30 | 31 | Slice::Slice(std::vector nfloats, size_t multiplier) : 32 | equal_split_(false) { 33 | size_t offset = 0; 34 | for (auto size : nfloats) { 35 | offsets_.push_back(offset); 36 | size_t slice_size = size_t(size) * multiplier; 37 | slice_sizes_.push_back(slice_size); 38 | offset += slice_size; 39 | } 40 | total_elements_ = offset; 41 | } 42 | 43 | Slice::Slice(size_t nsplits, float* nfloats, size_t multiplier) : 44 | equal_split_(false) { 45 | size_t offset = 0; 46 | for (size_t i = 0; i < nsplits; ++i) { 47 | offsets_.push_back(offset); 48 | size_t slice_size = size_t(nfloats[i]) * multiplier; 49 | slice_sizes_.push_back(slice_size); 50 | offset += slice_size; 51 | } 52 | total_elements_ = offset; 53 | } 54 | 55 | size_t Slice::offset(int idx) const { 56 | CHECK_LT(idx, offsets_.size()) << "Index " << idx << " exceeds the boundary " 57 | << offsets_.size(); 58 | return offsets_[idx]; 59 | } 60 | 61 | size_t Slice::num_elements(int idx) const { 62 | CHECK_LT(idx, offsets_.size()) << "Index " << idx << " exceeds the boundary " 63 | << offsets_.size(); 64 | if (equal_split_) { 65 | return equal_slice_size_; 66 | } 67 | return slice_sizes_[idx]; 68 | } 69 | 70 | } // namespace backend 71 | } // namespace nexus 72 | -------------------------------------------------------------------------------- /tests/python/test_async_client.py: -------------------------------------------------------------------------------- 1 | import os 2 | import random 3 | import asyncio 4 | from datetime import datetime, timedelta 5 | 6 | import nexus 7 | 8 | vgg_face_dir = '/home/abcdabcd987/datasets/vgg_face' 9 | 10 | service_addr = "127.0.0.1:9001" 11 | 12 | def load_images(root, maxlen): 13 | images = {} 14 | for fn in os.listdir(root)[:maxlen]: 15 | with open(os.path.join(root, fn), 'rb') as f: 16 | im = f.read() 17 | images[fn] = im 18 | return images 19 | 20 | 21 | async def test_client(images, interval): 22 | images = iter(images) 23 | interval = timedelta(seconds=interval) 24 | user_id = random.randint(1, 1000000000) 25 | async with nexus.AsyncClient(service_addr, user_id) as client: 26 | pending = set() 27 | next_time = datetime.now() 28 | try: 29 | next_image = next(images) 30 | except StopIteration: 31 | return 32 | while True: 33 | timeout = (next_time - datetime.now()).total_seconds() 34 | if timeout > 0: 35 | await asyncio.sleep(timeout) 36 | else: 37 | while timeout <= 0 and next_image is not None: 38 | next_time += interval 39 | timeout = (next_time - datetime.now()).total_seconds() 40 | pending.add(client.request(next_image)) 41 | try: 42 | next_image = next(images) 43 | except StopIteration: 44 | next_image = None 45 | done, pending = await asyncio.wait(pending, timeout=timeout, return_when=asyncio.FIRST_COMPLETED) 46 | for task in done: 47 | print('==========', datetime.now(), task.result()) 48 | if not pending and next_image is None: 49 | break 50 | 51 | 52 | if __name__ == "__main__": 53 | print('Test client...') 54 | images = list(load_images(vgg_face_dir, 20).values()) 55 | 56 | print('Testing the non concurrent case') 57 | asyncio.run(test_client(images, 0.5)) 58 | 59 | print('Testing the concurrent case') 60 | asyncio.run(test_client(images, 0.0001)) 61 | -------------------------------------------------------------------------------- /src/nexus/common/buffer.h: -------------------------------------------------------------------------------- 1 | #ifndef NEXUS_COMMON_BUFFER_H_ 2 | #define NEXUS_COMMON_BUFFER_H_ 3 | 4 | #include 5 | 6 | #include "nexus/common/device.h" 7 | 8 | namespace nexus { 9 | 10 | class Buffer : public std::enable_shared_from_this { 11 | public: 12 | // disable copy 13 | Buffer(const Buffer&) = delete; 14 | Buffer& operator=(const Buffer&) = delete; 15 | 16 | Buffer() : 17 | data_(nullptr), 18 | nbytes_(0), 19 | own_data_(false), 20 | shared_from_(nullptr) {} 21 | 22 | explicit Buffer(size_t nbytes, Device* device) : 23 | nbytes_(nbytes), 24 | device_(device), 25 | own_data_(true), 26 | shared_from_(nullptr) { 27 | data_ = device->Allocate(nbytes_); 28 | // LOG(INFO) << "Allocate " << nbytes_ << " on " << device->name() << 29 | // ", own data " << own_data_; 30 | } 31 | 32 | explicit Buffer(void* data, size_t nbytes, Device* device, 33 | bool own_data = false) : 34 | data_(data), 35 | nbytes_(nbytes), 36 | device_(device), 37 | own_data_(own_data), 38 | shared_from_(nullptr) {} 39 | 40 | ~Buffer() { 41 | // LOG(INFO) << "Destroy buffer, size: " << nbytes_ << ", device: " << 42 | // device_->name() << ", own data: " << own_data_; 43 | if (own_data_) { 44 | device_->Free(data_); 45 | // LOG(INFO) << "Free " << nbytes_ << " on " << device_->name(); 46 | } 47 | } 48 | 49 | size_t nbytes() { return nbytes_; } 50 | 51 | void* data() { return data_; } 52 | 53 | const void* data() const { return data_; } 54 | 55 | Device* device() const { return device_; } 56 | 57 | std::shared_ptr Slice(size_t offset, size_t nbytes); 58 | 59 | private: 60 | Buffer(std::shared_ptr origin, size_t offset, size_t nbytes) : 61 | data_((char*) origin->data_ + offset), 62 | nbytes_(nbytes), 63 | device_(origin->device_), 64 | own_data_(false), 65 | shared_from_(origin) { 66 | // LOG(INFO) << "Slice buffer, offset: " << offset << ", size: " << nbytes << 67 | // ", own data: " << own_data_; 68 | } 69 | 70 | void* data_; 71 | size_t nbytes_; 72 | Device* device_; 73 | bool own_data_; 74 | std::shared_ptr shared_from_; 75 | }; 76 | 77 | } // namespace nexus 78 | 79 | #endif // NEXUS_COMMON_BUFFER_H_ 80 | -------------------------------------------------------------------------------- /src/nexus/common/rpc_service_base.h: -------------------------------------------------------------------------------- 1 | #ifndef NEXUS_COMMON_RPC_SERVICE_BASE_H_ 2 | #define NEXUS_COMMON_RPC_SERVICE_BASE_H_ 3 | 4 | #include 5 | #include 6 | #include 7 | #include 8 | #include "nexus/common/rpc_call.h" 9 | 10 | namespace nexus { 11 | 12 | template 13 | class AsyncRpcServiceBase { 14 | public: 15 | AsyncRpcServiceBase(std::string port, size_t nthreads): 16 | AsyncRpcServiceBase("0.0.0.0", port, nthreads) {} 17 | 18 | AsyncRpcServiceBase(std::string ip, std::string port, size_t nthreads): 19 | ip_(ip), 20 | port_(port), 21 | nthreads_(nthreads), 22 | running_(false) { 23 | } 24 | 25 | virtual ~AsyncRpcServiceBase() { 26 | if (running_) { 27 | Stop(); 28 | } 29 | } 30 | 31 | std::string port() const { return port_; } 32 | 33 | std::string address() const { return ip_ + ":" + port_; } 34 | 35 | void Start() { 36 | grpc::ServerBuilder builder; 37 | std::string addr = ip_ + ":" + port_; 38 | builder.AddListeningPort(addr, grpc::InsecureServerCredentials()); 39 | builder.RegisterService(&service_); 40 | cq_ = builder.AddCompletionQueue(); 41 | server_ = builder.BuildAndStart(); 42 | running_ = true; 43 | for (size_t i = 0; i < nthreads_; ++i) { 44 | thread_pool_.emplace_back(&AsyncRpcServiceBase::HandleRpcs, this); 45 | } 46 | LOG(INFO) << "RPC service is listening on " << addr; 47 | } 48 | 49 | void Stop() { 50 | running_ = false; 51 | server_->Shutdown(); 52 | cq_->Shutdown(); 53 | 54 | void *tag; 55 | bool ok; 56 | while (cq_->Next(&tag, &ok)) { 57 | LOG(WARNING) << "There is a event in the grpc::ServerCompletionQueue not handled at " << tag; 58 | } 59 | 60 | for (auto& thread : thread_pool_) { 61 | thread.join(); 62 | } 63 | 64 | LOG(INFO) << "RPC service stopped"; 65 | } 66 | 67 | protected: 68 | virtual void HandleRpcs() = 0; 69 | 70 | protected: 71 | std::string ip_; 72 | std::string port_; 73 | size_t nthreads_; 74 | volatile bool running_; 75 | std::vector thread_pool_; 76 | ServiceType service_; 77 | std::unique_ptr cq_; 78 | std::unique_ptr server_; 79 | }; 80 | 81 | } // namespace nexus 82 | 83 | #endif // NEXUS_COMMON_RPC_SERVICE_BASE_H_ 84 | -------------------------------------------------------------------------------- /src/nexus/common/message.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | 4 | #include "nexus/common/message.h" 5 | 6 | namespace nexus { 7 | 8 | #if 0 9 | #define htonll(x) \ 10 | ((1==htonl(1)) ? (x) : \ 11 | ((uint64_t) htonl((x) & 0xFFFFFFFF) << 32) | htonl((uint64_t)(x) >> 32)) 12 | 13 | #define ntohll(x) \ 14 | ((1==ntohl(1)) ? (x) : \ 15 | ((uint64_t) ntohl((x) & 0xFFFFFFFF) << 32) | ntohl((uint64_t)(x) >> 32)) 16 | #endif 17 | 18 | bool DecodeHeader(const char* buffer, MessageHeader* header) { 19 | header->magic_number = ntohl(*(const uint32_t*) buffer); 20 | if (header->magic_number != NEXUS_SERVICE_MAGIC_NUMBER) { 21 | return false; 22 | } 23 | header->msg_type = ntohl(*(const uint32_t*) (buffer + 4)); 24 | header->body_length = ntohl(*(const uint32_t*) (buffer + 8)); 25 | return true; 26 | } 27 | 28 | Message::Message(const MessageHeader& header) { 29 | type_ = static_cast(header.msg_type); 30 | body_length_ = header.body_length; 31 | data_ = new char[MESSAGE_HEADER_SIZE + body_length_]; 32 | *((uint32_t*) data_) = htonl(NEXUS_SERVICE_MAGIC_NUMBER); 33 | *((uint32_t*) (data_ + 4)) = htonl((uint32_t) type_); 34 | *((uint32_t*) (data_ + 8)) = htonl(body_length_); 35 | } 36 | 37 | Message::Message(MessageType type, size_t body_length) : 38 | type_(type), 39 | body_length_(body_length) { 40 | data_ = new char[MESSAGE_HEADER_SIZE + body_length]; 41 | *((uint32_t*) data_) = htonl(NEXUS_SERVICE_MAGIC_NUMBER); 42 | *((uint32_t*) (data_ + 4)) = htonl((uint32_t) type); 43 | *((uint32_t*) (data_ + 8)) = htonl(body_length_); 44 | } 45 | 46 | Message::~Message() { 47 | delete[] data_; 48 | } 49 | 50 | void Message::set_type(MessageType type) { 51 | type_ = type; 52 | *((uint32_t*) (data_ + 4)) = htonl((uint32_t) type); 53 | } 54 | 55 | void Message::DecodeBody(google::protobuf::Message* message) const { 56 | message->ParseFromArray(body(), body_length_); 57 | } 58 | 59 | void Message::EncodeBody(const google::protobuf::Message& message) { 60 | CHECK_GE(body_length_, message.ByteSizeLong()) << "Buffer is too small to " 61 | "store the message"; 62 | message.SerializeToArray(body(), body_length_); 63 | } 64 | 65 | } // namespace nexus 66 | -------------------------------------------------------------------------------- /src/nexus/backend/caffe_densecap_model.h: -------------------------------------------------------------------------------- 1 | #ifndef NEXUS_BACKEND_CAFFE_DENSECAP_MODEL_H_ 2 | #define NEXUS_BACKEND_CAFFE_DENSECAP_MODEL_H_ 3 | 4 | #ifdef USE_CAFFE 5 | 6 | #include 7 | 8 | #include "nexus/backend/model_ins.h" 9 | 10 | // Caffe headers 11 | // avoid redefined keywords from darknet 12 | #ifdef GPU 13 | #undef GPU 14 | #endif 15 | #ifdef CUDNN 16 | #undef CUDNN 17 | #endif 18 | #include "caffe/caffe.hpp" 19 | 20 | namespace nexus { 21 | namespace backend { 22 | 23 | class CaffeDenseCapModel : public ModelInstance { 24 | public: 25 | CaffeDenseCapModel(int gpu_id, const ModelInstanceConfig& config); 26 | 27 | Shape InputShape() final; 28 | 29 | std::unordered_map OutputShapes() final; 30 | 31 | ArrayPtr CreateInputGpuArray() final; 32 | 33 | std::unordered_map GetOutputGpuArrays() final; 34 | 35 | void Preprocess(std::shared_ptr task) final; 36 | 37 | void Forward(std::shared_ptr batch_task) final; 38 | 39 | void Postprocess(std::shared_ptr task) final; 40 | 41 | private: 42 | void LoadVocabulary(const std::string& filename); 43 | 44 | void TransformBbox(int im_height, int im_width, float scale, int nboxes, 45 | const float* rois, const float* bbox_deltas, float* out); 46 | 47 | // parameters 48 | int max_timestep_; 49 | int max_boxes_; 50 | float nms_threshold_; 51 | float score_threshold_; 52 | std::vector mean_values_; 53 | std::vector bbox_mean_; 54 | std::vector bbox_stds_; 55 | // networks and data 56 | std::unique_ptr > feature_net_; 57 | std::unique_ptr > rnn_net_; 58 | std::unique_ptr > embed_net_; 59 | std::vector vocabulary_; 60 | // shapes and sizes of input and output 61 | int image_height_; 62 | int image_width_; 63 | size_t input_size_; 64 | Shape input_shape_; 65 | std::unordered_map output_shapes_; 66 | //caffe::Blob* input_blob_; 67 | int feature_net_input_idx_; 68 | std::vector > > input_blobs_; 69 | // temporary buffer 70 | std::vector best_words_; 71 | std::unique_ptr > multiplier_; 72 | }; 73 | 74 | } // namespace backend 75 | } // namespace nexus 76 | 77 | #endif // USE_CAFFE 78 | 79 | #endif // NEXUS_BACKEND_CAFFE_DENSECAP_MODEL_H_ 80 | -------------------------------------------------------------------------------- /src/nexus/backend/tensorflow_model.h: -------------------------------------------------------------------------------- 1 | #ifndef NEXUS_BACKEND_TENSORFLOW_MODEL_H_ 2 | #define NEXUS_BACKEND_TENSORFLOW_MODEL_H_ 3 | 4 | #ifdef USE_TENSORFLOW 5 | 6 | #include "nexus/backend/model_ins.h" 7 | // Tensorflow headers 8 | #include "tensorflow/core/public/session.h" 9 | 10 | namespace tf = tensorflow; 11 | 12 | 13 | namespace nexus { 14 | namespace backend { 15 | 16 | class TFShareModel; 17 | 18 | class TensorflowModel : public ModelInstance { 19 | public: 20 | TensorflowModel(int gpu_id, const ModelInstanceConfig& config); 21 | 22 | ~TensorflowModel(); 23 | 24 | Shape InputShape() final; 25 | 26 | std::unordered_map OutputShapes() final; 27 | 28 | ArrayPtr CreateInputGpuArray() final; 29 | 30 | std::unordered_map GetOutputGpuArrays() final; 31 | 32 | void Preprocess(std::shared_ptr task) final; 33 | 34 | void Forward(std::shared_ptr batch_task) final; 35 | 36 | void Postprocess(std::shared_ptr task) final; 37 | 38 | uint64_t GetPeakBytesInUse() override; 39 | 40 | private: 41 | tf::Tensor* NewInputTensor(); 42 | 43 | void MarshalDetectionResult( 44 | const QueryProto& query, std::shared_ptr output, 45 | int im_height, int im_width, QueryResultProto* result); 46 | 47 | tf::SessionOptions gpu_option_; 48 | tf::SessionOptions cpu_option_; 49 | std::unique_ptr session_; 50 | int image_height_; 51 | int image_width_; 52 | std::string input_layer_; 53 | Shape input_shape_; 54 | size_t input_size_; 55 | DataType input_data_type_; 56 | std::vector output_layers_; 57 | std::unordered_map output_shapes_; 58 | std::unordered_map output_sizes_; 59 | std::vector input_mean_; 60 | std::vector input_std_; 61 | std::unordered_map classnames_; 62 | tf::Allocator* gpu_allocator_; 63 | std::vector > input_tensors_; 64 | bool first_input_array_; 65 | 66 | // supports for TFShareModel 67 | friend class TFShareModel; 68 | size_t num_suffixes_; 69 | std::unique_ptr slice_beg_tensor_; 70 | std::unique_ptr slice_end_tensor_; 71 | void set_slice_tensor(const std::unique_ptr& dst, const std::vector &src); 72 | }; 73 | 74 | } // namespace backend 75 | } // namespace nexus 76 | 77 | #endif // USE_TENSORFLOW 78 | 79 | #endif // NEXUS_BACKEND_TENSORFLOW_MODEL_H_ 80 | -------------------------------------------------------------------------------- /src/nexus/backend/gpu_executor.h: -------------------------------------------------------------------------------- 1 | #ifndef NEXUS_BACKEND_BASE_GPU_EXECUTOR_H_ 2 | #define NEXUS_BACKEND_BASE_GPU_EXECUTOR_H_ 3 | 4 | #ifdef USE_GPU 5 | 6 | #include 7 | #include 8 | #include 9 | #include 10 | #include 11 | 12 | #include "nexus/backend/model_exec.h" 13 | 14 | namespace nexus { 15 | namespace backend { 16 | 17 | class GpuExecutor { 18 | public: 19 | GpuExecutor() : duty_cycle_us_(0.) {} 20 | 21 | virtual ~GpuExecutor() {} 22 | 23 | void SetDutyCycle(double duty_cycle_us) { 24 | duty_cycle_us_.store(duty_cycle_us); 25 | } 26 | 27 | virtual void Start(int core = -1) = 0; 28 | virtual void Stop() = 0; 29 | virtual void AddModel(std::shared_ptr model) = 0; 30 | virtual void RemoveModel(std::shared_ptr model) = 0; 31 | virtual double CurrentUtilization() = 0; 32 | 33 | protected: 34 | std::atomic duty_cycle_us_; 35 | }; 36 | 37 | class GpuExecutorMultiBatching : public GpuExecutor { 38 | public: 39 | GpuExecutorMultiBatching(int gpu_id); 40 | 41 | inline int gpu_id() { return gpu_id_; } 42 | 43 | void Start(int core = -1) final; 44 | 45 | void Stop() final; 46 | 47 | void AddModel(std::shared_ptr model) final; 48 | 49 | void RemoveModel(std::shared_ptr model) final; 50 | 51 | double CurrentUtilization() final; 52 | 53 | private: 54 | void Run(); 55 | 56 | int gpu_id_; 57 | std::atomic_bool running_; 58 | std::thread thread_; 59 | std::vector > models_; 60 | std::vector > backup_models_; 61 | std::mutex models_mu_; 62 | double utilization_; 63 | TimePoint last_check_time_; 64 | std::mutex util_mu_; 65 | }; 66 | 67 | class GpuExecutorNoMultiBatching : public GpuExecutor { 68 | public: 69 | GpuExecutorNoMultiBatching(int gpu_id); 70 | 71 | inline int gpu_id() { return gpu_id_; } 72 | 73 | void Start(int core = -1); 74 | 75 | void Stop(); 76 | 77 | void AddModel(std::shared_ptr model) final; 78 | 79 | void RemoveModel(std::shared_ptr model) final; 80 | 81 | double CurrentUtilization() final; 82 | 83 | private: 84 | int gpu_id_; 85 | int core_; 86 | std::mutex mu_; 87 | std::unordered_map > threads_; 89 | }; 90 | 91 | } // namespace backend 92 | } // namespace nexus 93 | 94 | #endif // USE_GPU 95 | 96 | #endif // NEXUS_BACKEND_BASE_GPU_EXECUTOR_H_ 97 | -------------------------------------------------------------------------------- /Dockerfile: -------------------------------------------------------------------------------- 1 | FROM nvidia/cuda:10.0-cudnn7-devel-ubuntu18.04 AS builder 2 | COPY . /nexus 3 | RUN apt-get update \ 4 | && apt-get install -y unzip build-essential git autoconf automake libtool pkg-config curl make zlib1g-dev wget \ 5 | libswscale-dev libjpeg-dev libpng-dev libsm6 libxext6 libxrender-dev \ 6 | python-dev python-pip \ 7 | libcurl4-openssl-dev \ 8 | software-properties-common \ 9 | && python -m pip install --upgrade six numpy wheel setuptools mock 'future>=0.17.1' \ 10 | \ 11 | && wget -O - https://apt.kitware.com/keys/kitware-archive-latest.asc 2>/dev/null | apt-key add - \ 12 | && apt-add-repository 'deb https://apt.kitware.com/ubuntu/ bionic main' \ 13 | && apt-get update \ 14 | && apt-get install -y cmake \ 15 | && rm -rf /var/lib/apt/lists/* \ 16 | \ 17 | && /nexus/build-deps.bash \ 18 | && /nexus/build-tensorflow.bash \ 19 | && cd /nexus/build-dep-install/tensorflow/ \ 20 | && rm -rf c cc compiler core stream_executor \ 21 | && rm -rf /nexus/build-dep-src /root/.cache/bazel \ 22 | \ 23 | && mkdir /nexus/build \ 24 | && cd /nexus/build \ 25 | && cmake .. -DCMAKE_BUILD_TYPE=RelWithDebugInfo -DCUDA_PATH=/usr/local/cuda-10.0 -DUSE_TENSORFLOW=ON -DUSE_CAFFE2=OFF \ 26 | && make -j$(nproc) \ 27 | \ 28 | && find /nexus/build-dep-install -type d \( -name "bin" -o -name "include" -o -name "share" \) -exec rm -rf {} + \ 29 | && find /nexus/build-dep-install -type f -name "*.a" -exec rm -f {} + \ 30 | && rm -rf /nexus/build-dep-install/bazel \ 31 | && cd /nexus/build \ 32 | && rm -rf CMakeFiles gen *.a *.txt *.cmake Makefile bench_tfshare test_* 33 | 34 | 35 | FROM nvidia/cuda:10.0-cudnn7-devel-ubuntu18.04 36 | LABEL maintainer="Lequn Chen " 37 | COPY --from=builder /nexus /nexus 38 | RUN apt-get update \ 39 | && apt-get install -y libswscale4 libjpeg8 libpng16-16 \ 40 | software-properties-common wget \ 41 | && add-apt-repository -y ppa:deadsnakes/ppa \ 42 | && apt-get update \ 43 | && apt-get install -y python3.7 python3.7-dev \ 44 | && wget https://bootstrap.pypa.io/get-pip.py -O /tmp/get-pip.py \ 45 | && python3.7 /tmp/get-pip.py \ 46 | && rm /tmp/get-pip.py \ 47 | && python3.7 -m pip install --upgrade numpy protobuf pyyaml Pillow \ 48 | && python3.7 -m pip install --editable /nexus/python \ 49 | && python3.7 -m pip uninstall -y pip \ 50 | && apt-get purge -y python3.7-dev software-properties-common wget \ 51 | && apt-get autoremove -y \ 52 | && rm -rf /var/lib/apt/lists/* /root/.cache/pip 53 | WORKDIR /nexus 54 | -------------------------------------------------------------------------------- /src/nexus/common/time_util.h: -------------------------------------------------------------------------------- 1 | #ifndef NEXUS_COMMON_TIME_UTIL_H_ 2 | #define NEXUS_COMMON_TIME_UTIL_H_ 3 | 4 | #include 5 | #include 6 | #include 7 | #include 8 | #include 9 | #include 10 | #include 11 | 12 | namespace nexus { 13 | 14 | using Clock = std::chrono::high_resolution_clock; 15 | using TimePoint = std::chrono::time_point; 16 | 17 | /*! \brief Timer helps to record time and count duration between two time 18 | points */ 19 | class Timer { 20 | public: 21 | /*! 22 | * \brief Records the time point with tag 23 | * \param tag Tag of time point 24 | */ 25 | void Record(const std::string& tag); 26 | /*! 27 | * \brief Get the interval between two tags in millisecond 28 | * \param beg_tag Tag of begining time point 29 | * \param end_tag Tag of end time point 30 | * \return Duration in millisecond 31 | */ 32 | uint64_t GetLatencyMillis(const std::string& beg_tag, 33 | const std::string& end_tag); 34 | /*! 35 | * \brief Get the interval between two tags in microsecond 36 | * \param beg_tag Tag of begining time point 37 | * \param end_tag Tag of end time point 38 | * \return Duration in microsecond 39 | */ 40 | uint64_t GetLatencyMicros(const std::string& beg_tag, 41 | const std::string& end_tag); 42 | 43 | private: 44 | /*! 45 | * \brief Get the time point given the tag 46 | * \param tag Tag of time point 47 | * \return TimePoint pointer 48 | */ 49 | TimePoint* GetTimepoint(const std::string& tag); 50 | /*! \brief Map from tag to time points */ 51 | std::unordered_map time_points_; 52 | }; 53 | 54 | class Tickable { 55 | public: 56 | Tickable(uint32_t tick_interval_sec); 57 | 58 | virtual ~Tickable(); 59 | 60 | void Tick(); 61 | 62 | protected: 63 | virtual void TickImpl() = 0; 64 | 65 | protected: 66 | uint32_t tick_interval_sec_; 67 | uint32_t sec_since_last_tick_; 68 | }; 69 | 70 | class TimeSystem { 71 | public: 72 | static TimeSystem& Singleton(); 73 | 74 | ~TimeSystem(); 75 | 76 | void Stop(); 77 | 78 | bool AddTickable(std::shared_ptr tickable); 79 | 80 | bool RemoveTickable(std::shared_ptr tickable); 81 | 82 | private: 83 | TimeSystem(); 84 | 85 | void Run(); 86 | 87 | std::unordered_set> tickables_; 88 | std::mutex mutex_; 89 | std::atomic_bool running_; 90 | std::thread thread_; 91 | }; 92 | 93 | } // namespace nexus 94 | 95 | #endif // NEXUS_COMMON_TIME_UTIL_H_ 96 | -------------------------------------------------------------------------------- /tools/test_complex_query.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include "nexus/common/model_db.h" 5 | #include "nexus/scheduler/complex_query.h" 6 | 7 | using namespace nexus; 8 | using namespace nexus::scheduler; 9 | 10 | DEFINE_int32(avg_interval, 10, "Moving average interval for backend rate"); // for the sch_info.cpp linking error 11 | 12 | ComplexQuery::NodeID add_node(ComplexQuery &cq, const std::string &gpu, 13 | const std::string &framework, const std::string &model_name, 14 | int image_width, int image_height) { 15 | ComplexQuery::NodeID node(framework, model_name); 16 | auto model_sess_id = framework + ':' + model_name + ":0"; 17 | auto profile_id = framework + ':' + model_name + ":1"; 18 | if (image_height > 0) { 19 | profile_id += ":" + std::to_string(image_height) + "x" + std::to_string(image_width); 20 | } 21 | auto *profile = ModelDatabase::Singleton().GetModelProfile(gpu, "generic", profile_id); 22 | CHECK(profile != nullptr); 23 | cq.AddNode(node, model_sess_id, *profile); 24 | return node; 25 | } 26 | 27 | void add_node(ComplexQuery &cq, ComplexQuery::NodeID &node, const std::string &profile_id, const std::string &gpu) { 28 | auto model_sess_id = node.framework + ':' + node.model_name + ":0"; 29 | auto *profile = ModelDatabase::Singleton().GetModelProfile(gpu, "generic", profile_id); 30 | cq.AddNode(node, model_sess_id, *profile); 31 | } 32 | 33 | int main(int argc, char** argv) { 34 | FLAGS_logtostderr = 1; 35 | google::InitGoogleLogging(argv[0]); 36 | google::ParseCommandLineFlags(&argc, &argv, true); 37 | google::InstallFailureSignalHandler(); 38 | 39 | const int SLO_MS = 400; 40 | const int SEGMENTS = 500; 41 | const std::string gpu = "GeForce_GTX_1080_Ti"; 42 | 43 | ComplexQuery cq("cq_id", SLO_MS * 1000, SEGMENTS); 44 | auto node_ssd = add_node(cq, gpu, "tensorflow", "ssd_mobilenet", 300, 300); 45 | auto node_inception = add_node(cq, gpu, "tensorflow", "inception_0", 0, 0); 46 | auto node_vgg = add_node(cq, gpu, "tensorflow", "vgg16_0", 0, 0); 47 | cq.AddChild(node_ssd, node_inception); 48 | cq.AddChild(node_ssd, node_vgg); 49 | cq.Finalize(); 50 | 51 | cq.SetRequestRate(node_ssd, 200); 52 | cq.SetRequestRate(node_inception, 50); 53 | cq.SetRequestRate(node_vgg, 100); 54 | cq.DynamicProgramming(); 55 | std::cout << "minimal number of GPUs: " << cq.GetMinimalGPUs() << std::endl; 56 | auto split = cq.GetSLOms(); 57 | for (auto &node : split) 58 | std::cout << " " << node.first.ToString() << ": " << node.second << "ms" << std::endl; 59 | } 60 | -------------------------------------------------------------------------------- /src/nexus/common/connection.h: -------------------------------------------------------------------------------- 1 | #ifndef NEXUS_COMMON_CONNECTION_H_ 2 | #define NEXUS_COMMON_CONNECTION_H_ 3 | 4 | #include 5 | #include 6 | #include 7 | #include 8 | 9 | #include "nexus/common/message.h" 10 | 11 | namespace nexus { 12 | 13 | class Connection; // forward declare 14 | 15 | class MessageHandler { 16 | public: 17 | /*! 18 | * \brief Handles a new message 19 | * \param conn Connection that receives the message 20 | * \param message Received message 21 | */ 22 | virtual void HandleMessage(std::shared_ptr conn, 23 | std::shared_ptr message) = 0; 24 | /*! 25 | * \brief Handles error in connection 26 | * \param conn Connection that encounters an error 27 | * \param ec Boost error code 28 | */ 29 | virtual void HandleError(std::shared_ptr conn, 30 | boost::system::error_code ec) = 0; 31 | }; 32 | 33 | class Connection : public std::enable_shared_from_this { 34 | public: 35 | // disable copy 36 | Connection(const Connection&) = delete; 37 | Connection& operator=(const Connection&) = delete; 38 | // constructor 39 | explicit Connection(boost::asio::ip::tcp::socket socket, 40 | MessageHandler* handler); 41 | /*! \brief starts processing packets received from socket */ 42 | virtual void Start(); 43 | /*! \brief stops the socket */ 44 | virtual void Stop(); 45 | /*! 46 | * \brief sends a message through socket 47 | * \param msg Shared pointer of message, yield the ownership to the function 48 | */ 49 | virtual void Write(std::shared_ptr msg); 50 | 51 | protected: 52 | Connection(boost::asio::io_context& io_context, MessageHandler* handler); 53 | /*! \brief reads the header from the connection */ 54 | void DoReadHeader(); 55 | /*! \brief reads the body of message and invoke the handler */ 56 | void DoReadBody(std::shared_ptr msg); 57 | /*! \brief sends the message to the peer */ 58 | void DoWrite(); 59 | 60 | protected: 61 | /*! \brief Socket */ 62 | boost::asio::ip::tcp::socket socket_; 63 | std::mutex socket_mutex_; 64 | /*! \brief Message handler */ 65 | MessageHandler* handler_; 66 | /*! \brief Wrong header indicator */ 67 | bool wrong_header_; 68 | /*! \brief Receiving message */ 69 | //std::shared_ptr recv_message_; 70 | char msg_header_buffer_[MESSAGE_HEADER_SIZE]; 71 | /*! \brief Queue for outbound messages */ 72 | std::deque > write_queue_; 73 | /*! \brief Mutex for write_queue_ */ 74 | std::mutex write_queue_mutex_; 75 | }; 76 | 77 | } // namespace nexus 78 | 79 | #endif // NEXUS_COMMON_CONNECTION_H_ 80 | -------------------------------------------------------------------------------- /examples/README.md: -------------------------------------------------------------------------------- 1 | # Running Nexus with the Simple Example 2 | 3 | We have prepared a simple example application to walk you through how to run 4 | Nexus in concrete steps. We provide a Docker image so that you don't have to 5 | spend hours on building the dependencies. To download the Docker image, you 6 | can run: 7 | 8 | ```bash 9 | docker pull abcdabcd987/nexus 10 | ``` 11 | 12 | If you want to run Nexus on the host OS, make sure you have followed the 13 | [building instructions](../BUILDING.md) and have Nexus and its dependencies 14 | built. The commands in the following sections assumes Docker, but to run it 15 | on the host, you can simply drop lines containing `docker`, omit the command 16 | line arguments that specifies server address, and replace `/nexus` with the 17 | path to your Nexus build. 18 | 19 | ## Download Model Zoo 20 | 21 | ```bash 22 | git clone https://gitlab.cs.washington.edu/syslab/nexus-models 23 | cd nexus-models 24 | export MODEL_DIR=$(pwd) 25 | git lfs checkout 26 | ``` 27 | 28 | ## Profile ResNet-50 on GPU 0 29 | 30 | ```bash 31 | docker run -it --rm --gpus all -v $MODEL_DIR:$MODEL_DIR abcdabcd987/nexus \ 32 | python3.7 /nexus/tools/profiler/profiler.py --gpu_list=0 --gpu_uuid --model_root=$MODEL_DIR 33 | --framework=tensorflow --model=resnet_0 --width=224 --height=224 34 | ``` 35 | 36 | ## Run Nexus Scheduler and Backend, and Application Frontend 37 | 38 | ```bash 39 | docker network create nexus-net 40 | 41 | docker run -it --rm --gpus all --network=nexus-net -v=$MODEL_DIR:$MODEL_DIR --name=nexus-scheduler -p=10001 abcdabcd987/nexus \ 42 | /nexus/build/scheduler -model_root=$MODEL_DIR -alsologtostderr -colorlogtostderr -v 1 43 | 44 | docker run -it --rm --gpus all --network=nexus-net -v=$MODEL_DIR:$MODEL_DIR --name=nexus-gpu0 -p=8001 -p=8002 abcdabcd987/nexus \ 45 | /nexus/build/backend -model_root=$MODEL_DIR -gpu=0 -alsologtostderr -colorlogtostderr \ 46 | -sch_addr=nexus-scheduler:10001 47 | 48 | docker run -it --rm --gpus all --network=nexus-net --name=nexus-simple-frontend -p=9001 -p=9002 abcdabcd987/nexus \ 49 | /nexus/build/simple -framework=tensorflow -model=resnet_0 -latency=50 -width=224 -height=224 -alsologtostderr -colorlogtostderr \ 50 | -sch_addr=nexus-scheduler:10001 51 | ``` 52 | 53 | ## Send a Client Request 54 | 55 | ```bash 56 | curl https://upload.wikimedia.org/wikipedia/commons/4/4c/Chihuahua1_bvdb.jpg | docker run --rm -i --network=nexus-net abcdabcd987/nexus \ 57 | python3.7 /nexus /examples/simple_app/src/client.py - --server=nexus-simple-frontend:9001 58 | ``` 59 | 60 | The [image](https://upload.wikimedia.org/wikipedia/commons/4/4c/Chihuahua1_bvdb.jpg) 61 | should be classified as a *chihuahua*. 62 | -------------------------------------------------------------------------------- /src/nexus/common/device.cpp: -------------------------------------------------------------------------------- 1 | #include "nexus/common/device.h" 2 | #include 3 | 4 | namespace nexus { 5 | 6 | #ifdef USE_GPU 7 | 8 | DEFINE_bool(generic_profile, false, "Use the generic profile for all GPUs of the same model instead of using profiles for each GPU card. (Applicable to Backend only)"); 9 | 10 | GPUDevice::GPUDevice(int gpu_id) : 11 | Device(kGPU), gpu_id_(gpu_id) { 12 | std::stringstream ss; 13 | ss << "gpu:" << gpu_id; 14 | name_ = ss.str(); 15 | cudaDeviceProp prop; 16 | NEXUS_CUDA_CHECK(cudaSetDevice(gpu_id_)); 17 | NEXUS_CUDA_CHECK(cudaGetDeviceProperties(&prop, gpu_id_)); 18 | device_name_.assign(prop.name, strlen(prop.name)); 19 | std::replace(device_name_.begin(), device_name_.end(), ' ', '_'); 20 | total_memory_ = prop.totalGlobalMem; 21 | 22 | if (FLAGS_generic_profile) { 23 | uuid_ = "generic"; 24 | } else { 25 | auto *u = reinterpret_cast(&prop.uuid); 26 | char uuid_hex[37] = {}; 27 | sprintf(uuid_hex, 28 | "%02x%02x%02x%02x-%02x%02x-%02x%02x-%02x%02x-%02x%02x%02x%02x%02x%02x", 29 | u[0], u[1], u[2], u[3], 30 | u[4], u[5], 31 | u[6], u[7], 32 | u[8], u[9], 33 | u[10], u[11], u[12], u[13], u[14], u[15]); 34 | uuid_ = uuid_hex; 35 | } 36 | 37 | LOG(INFO) << "GPU " << gpu_id << " " << device_name_ 38 | << "(" << uuid_ << ")" 39 | << ": total memory " << total_memory_ / 1024. / 1024. / 1024. << "GB"; 40 | } 41 | 42 | void *GPUDevice::Allocate(size_t nbytes) { 43 | void* buf; 44 | NEXUS_CUDA_CHECK(cudaSetDevice(gpu_id_)); 45 | cudaError_t err = cudaMalloc(&buf, nbytes); 46 | if (err != cudaSuccess) { 47 | throw cudaGetErrorString(err); 48 | } 49 | return buf; 50 | } 51 | 52 | size_t GPUDevice::FreeMemory() const { 53 | size_t free_mem, total_mem; 54 | NEXUS_CUDA_CHECK(cudaSetDevice(gpu_id_)); 55 | NEXUS_CUDA_CHECK(cudaMemGetInfo(&free_mem, &total_mem)); 56 | return free_mem; 57 | } 58 | 59 | void GPUDevice::Free(void *buf) { 60 | NEXUS_CUDA_CHECK(cudaFree(buf)); 61 | } 62 | 63 | 64 | GPUDevice *DeviceManager::GetGPUDevice(int gpu_id) const { 65 | CHECK_LT(gpu_id, gpu_devices_.size()) << "GPU id " << gpu_id << 66 | " exceeds number of GPU devices (" << gpu_devices_.size() << ")"; 67 | return gpu_devices_[gpu_id]; 68 | } 69 | #endif 70 | 71 | DeviceManager::DeviceManager() { 72 | cpu_device_ = new CPUDevice(); 73 | int gpu_count; 74 | #ifdef USE_GPU 75 | NEXUS_CUDA_CHECK(cudaGetDeviceCount(&gpu_count)); 76 | #endif 77 | for (int i = 0; i < gpu_count; ++i) { 78 | gpu_devices_.push_back(new GPUDevice(i)); 79 | } 80 | } 81 | } 82 | -------------------------------------------------------------------------------- /src/nexus/backend/caffe2_model.h: -------------------------------------------------------------------------------- 1 | #ifndef NEXUS_BACKEND_CAFFE2_MODEL_H_ 2 | #define NEXUS_BACKEND_CAFFE2_MODEL_H_ 3 | 4 | #ifdef USE_CAFFE2 5 | 6 | #include "nexus/backend/model_ins.h" 7 | // Caffe2 headers 8 | #include "caffe2/core/context_gpu.h" 9 | #include "caffe2/core/predictor.h" 10 | 11 | namespace nexus { 12 | namespace backend { 13 | 14 | class Caffe2Model : public ModelInstance { 15 | public: 16 | Caffe2Model(int gpu_id, const ModelInstanceConfig& config); 17 | 18 | Shape InputShape() final; 19 | 20 | std::unordered_map OutputShapes() final; 21 | 22 | ArrayPtr CreateInputGpuArray() final; 23 | 24 | ArrayPtr CreateInputGpuArrayWithRawPointer(float* ptr, size_t nfloats) final; 25 | 26 | void RemoveInputGpuArray(ArrayPtr arr) final; 27 | 28 | std::unordered_map GetOutputGpuArrays() final; 29 | 30 | void Preprocess(std::shared_ptr task) final; 31 | 32 | void Forward(std::shared_ptr batch_task) final; 33 | 34 | void ForwardAsync(std::shared_ptr batch_task) final; 35 | 36 | void WaitOutput(std::shared_ptr batch_task) final; 37 | 38 | void Postprocess(std::shared_ptr task) final; 39 | 40 | private: 41 | void LoadModel(const std::string& init_path, const std::string& predict_path, 42 | const ModelInstanceConfig& config, caffe2::NetDef* init_net, 43 | caffe2::NetDef* predict_net); 44 | 45 | std::pair NewInputBlob(); 46 | 47 | std::pair NewInputBlob(float* ptr, size_t nfloats); 48 | 49 | std::unique_ptr gpu_ctx_; 50 | std::string net_name_; 51 | std::unique_ptr workspace_; 52 | caffe2::NetBase* net_; 53 | std::string input_blob_name_; 54 | std::string output_blob_name_; 55 | // image size 56 | int image_height_; 57 | int image_width_; 58 | // input shape of neural network 59 | Shape input_shape_; 60 | // output shape of neural network 61 | Shape output_shape_; 62 | // size of input in a single input 63 | size_t input_size_; 64 | // size of output in a single batch 65 | size_t output_size_; 66 | // Input tensor 67 | std::unordered_map > input_blobs_; 69 | bool first_input_array_; 70 | // Output tensor 71 | caffe2::TensorCUDA* output_tensor_; 72 | 73 | std::unordered_map classnames_; 74 | bool has_mean_file_; 75 | std::vector mean_value_; 76 | std::vector mean_blob_; 77 | float scale_; 78 | 79 | // transformer for input 80 | //std::unique_ptr > transformer_; 81 | }; 82 | 83 | } // namespace backend 84 | } // namespace nexus 85 | 86 | #endif // USE_CAFFE2 87 | 88 | #endif // NEXUS_BACKEND_CAFFE2_MODEL_H_ 89 | -------------------------------------------------------------------------------- /src/nexus/scheduler/frontend_delegate.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | #include "nexus/scheduler/frontend_delegate.h" 4 | #include "nexus/scheduler/scheduler.h" 5 | 6 | namespace nexus { 7 | namespace scheduler { 8 | 9 | FrontendDelegate::FrontendDelegate(uint32_t node_id, const std::string& ip, 10 | const std::string& server_port, 11 | const std::string& rpc_port, 12 | int beacon_sec): 13 | node_id_(node_id), 14 | ip_(ip), 15 | server_port_(server_port), 16 | rpc_port_(rpc_port), 17 | beacon_sec_(beacon_sec), 18 | timeout_ms_(beacon_sec * 3 * 1000) { 19 | std::stringstream rpc_addr; 20 | rpc_addr << ip_ << ":" << rpc_port_; 21 | auto channel = grpc::CreateChannel(rpc_addr.str(), 22 | grpc::InsecureChannelCredentials()); 23 | stub_ = FrontendCtrl::NewStub(channel); 24 | last_time_ = std::chrono::system_clock::now(); 25 | } 26 | 27 | std::time_t FrontendDelegate::LastAliveTime() { 28 | return std::chrono::system_clock::to_time_t(last_time_); 29 | } 30 | 31 | void FrontendDelegate::Tick() { 32 | last_time_ = std::chrono::system_clock::now(); 33 | } 34 | 35 | bool FrontendDelegate::IsAlive() { 36 | long elapse = std::chrono::duration_cast( 37 | std::chrono::system_clock::now() - last_time_).count(); 38 | if (elapse < timeout_ms_) { 39 | return true; 40 | } 41 | CheckAliveRequest request; 42 | request.set_node_type(FRONTEND_NODE); 43 | request.set_node_id(node_id_); 44 | RpcReply reply; 45 | 46 | // Inovke RPC CheckAlive 47 | grpc::ClientContext context; 48 | grpc::Status status = stub_->CheckAlive(&context, request, &reply); 49 | if (!status.ok()) { 50 | LOG(ERROR) << status.error_code() << ": " << status.error_message(); 51 | return false; 52 | } 53 | last_time_ = std::chrono::system_clock::now(); 54 | return true; 55 | } 56 | 57 | void FrontendDelegate::SubscribeModel(const std::string& model_session_id) { 58 | subscribe_models_.insert(model_session_id); 59 | } 60 | 61 | CtrlStatus FrontendDelegate::UpdateModelRoutesRpc( 62 | const ModelRouteUpdates& request) { 63 | RpcReply reply; 64 | // Inovke RPC CheckAlive 65 | grpc::ClientContext context; 66 | grpc::Status status = stub_->UpdateModelRoutes(&context, request, &reply); 67 | if (!status.ok()) { 68 | LOG(ERROR) << status.error_code() << ": " << status.error_message(); 69 | return CTRL_SERVER_UNREACHABLE; 70 | } 71 | last_time_ = std::chrono::system_clock::now(); 72 | if (reply.status() != CTRL_OK) { 73 | LOG(ERROR) << "Frontend " << node_id_ << " UpdateModelRoutes error: " << 74 | CtrlStatus_Name(reply.status()); 75 | } 76 | return reply.status(); 77 | } 78 | 79 | } // namespace scheduler 80 | } // namespace nexus 81 | -------------------------------------------------------------------------------- /BUILDING.md: -------------------------------------------------------------------------------- 1 | # Building Nexus on Ubuntu 18.04 2 | 3 | ## Install system-wide packages 4 | 5 | ```bash 6 | # Build system and utilities 7 | sudo apt-get install -y unzip build-essential git autoconf automake libtool pkg-config curl make zlib1g-dev wget 8 | 9 | # For OpenCV 10 | sudo apt-get install -y libswscale-dev libjpeg-dev libpng-dev 11 | 12 | # Python 2.7 for building Tensorflow 13 | sudo apt-get install -y python-dev python-pip 14 | pip install --upgrade --user pip six numpy wheel setuptools mock 'future>=0.17.1' 15 | 16 | # Python 3.7 for Nexus 17 | sudo apt-get install -y software-properties-common 18 | sudo add-apt-repository -y ppa:deadsnakes/ppa 19 | sudo apt-get update 20 | sudo apt-get install -y python3.7 python3.7-dev 21 | curl https://bootstrap.pypa.io/get-pip.py | python3.7 22 | python3.7 -m pip install --upgrade --user numpy protobuf Pillow pyyaml 23 | 24 | # CMake > 3.12 25 | # See https://apt.kitware.com/ for more details. 26 | wget -O - https://apt.kitware.com/keys/kitware-archive-latest.asc 2>/dev/null | sudo apt-key add - 27 | sudo apt-add-repository 'deb https://apt.kitware.com/ubuntu/ bionic main' 28 | sudo apt-get update 29 | sudo apt-get install -y cmake 30 | ``` 31 | 32 | ## Install NVIDIA driver 33 | 34 | ```bash 35 | sudo apt-get install -y software-properties-common 36 | sudo add-apt-repository -y ppa:graphics-drivers/ppa 37 | sudo apt-get update 38 | sudo apt-get install -y nvidia-headless-440 39 | ``` 40 | 41 | ## Install CUDA 10.0 42 | 43 | ```bash 44 | wget -n -O cuda_10.0.130_410.48_linux.run https://developer.nvidia.com/compute/cuda/10.0/Prod/local_installers/cuda_10.0.130_410.48_linux 45 | sudo sh cuda_10.0.130_410.48_linux.run -silent -toolkit 46 | sudo unlink /usr/local/cuda 47 | ``` 48 | 49 | ## Install cuDNN 7.6.5 50 | 51 | Download cuDNN 7.6.5 for CUDA 10.0 from [NVIDIA](https://developer.nvidia.com/rdp/cudnn-download) 52 | 53 | ```bash 54 | tar xf cudnn-10.0-linux-x64-v7.6.5.32.tgz 55 | sudo mv cuda/include/cudnn.h /usr/local/cuda-10.0/include 56 | sudo mv cuda/lib64/libcudnn* /usr/local/cuda-10.0/lib64 57 | sudo chmod a+r /usr/local/cuda-10.0/include/cudnn.h /usr/local/cuda-10.0/lib64/libcudnn* 58 | sudo ldconfig 59 | ``` 60 | 61 | ## Clone Nexus 62 | 63 | ```bash 64 | git clone https://github.com/uwsampl/nexus.git 65 | cd nexus 66 | ``` 67 | 68 | ## Build Nexus dependencies 69 | 70 | ```bash 71 | ./build-deps.bash 72 | ./build-tensorflow.bash 73 | ``` 74 | 75 | By default, the script will build TensorFlow with the following 76 | [CUDA compute capabilities](https://en.wikipedia.org/wiki/CUDA#GPUs_supported): 77 | `5.2, 6.1, 7.5`. If you want to change any build options 78 | for TensorFlow, set the environment variables specified in 79 | [`./build-tensorflow.bash`](build-tensorflow.bash) 80 | 81 | ## Build Nexus 82 | 83 | ```bash 84 | mkdir build 85 | cd build 86 | cmake .. -DCMAKE_BUILD_TYPE=RelWithDebugInfo -DCUDA_PATH=/usr/local/cuda-10.0 -DUSE_TENSORFLOW=ON -DUSE_CAFFE2=OFF 87 | make -j$(nproc) 88 | python3.7 -m pip install --user --editable ./python 89 | ``` 90 | -------------------------------------------------------------------------------- /src/nexus/common/model_def.h: -------------------------------------------------------------------------------- 1 | #ifndef NEXUS_COMMON_MODEL_DEF_H_ 2 | #define NEXUS_COMMON_MODEL_DEF_H_ 3 | 4 | #include 5 | #include 6 | #include 7 | #include 8 | 9 | #include "nexus/common/util.h" 10 | #include "nexus/proto/nnquery.pb.h" 11 | 12 | namespace nexus { 13 | 14 | inline std::string ModelID(const std::string& framework, 15 | const std::string& model_name, 16 | uint32_t version) { 17 | std::stringstream ss; 18 | ss << framework << ":" << model_name << ":" << version; 19 | return ss.str(); 20 | } 21 | 22 | inline void ParseModelID(const std::string model_id, 23 | ModelSession* model_session) { 24 | std::vector tokens; 25 | SplitString(model_id, ':', &tokens); 26 | model_session->set_framework(tokens[0]); 27 | model_session->set_model_name(tokens[1]); 28 | model_session->set_version(std::stoi(tokens[2])); 29 | } 30 | 31 | inline std::string ModelSessionToModelID(const ModelSession& model_session) { 32 | std::stringstream ss; 33 | ss << model_session.framework() << ":" << model_session.model_name() << ":" 34 | << model_session.version(); 35 | return ss.str(); 36 | } 37 | 38 | inline std::string ModelSessionToProfileID(const ModelSession& model_session) { 39 | std::stringstream ss; 40 | ss << model_session.framework() << ":" << model_session.model_name() << ":" 41 | << model_session.version(); 42 | if (model_session.image_height() > 0) { 43 | ss << ":" << model_session.image_height() << "x" << 44 | model_session.image_width(); 45 | } 46 | return ss.str(); 47 | } 48 | 49 | inline std::string ModelSessionToString(const ModelSession& model_session) { 50 | std::stringstream ss; 51 | ss << model_session.framework() << ":" << 52 | model_session.model_name() << ":" << model_session.version(); 53 | if (model_session.image_height() > 0) { 54 | ss << ":" << model_session.image_height() << "x" << 55 | model_session.image_width(); 56 | } 57 | ss << ":" << model_session.latency_sla(); 58 | return ss.str(); 59 | } 60 | 61 | inline bool ParseModelSession(const std::string& str, ModelSession* sess) { 62 | std::vector tokens; 63 | SplitString(str, ':', &tokens); 64 | if (tokens.size() < 4) { 65 | return false; 66 | } 67 | sess->set_framework(tokens[0]); 68 | sess->set_model_name(tokens[1]); 69 | sess->set_version(std::stoi(tokens[2])); 70 | if (tokens.size() == 4) { 71 | sess->set_latency_sla(std::stoi(tokens[3])); 72 | } else { 73 | sess->set_latency_sla(std::stoi(tokens[4])); 74 | // decode image size 75 | std::vector image_dims; 76 | SplitString(tokens[3], 'x', &image_dims); 77 | if (image_dims.size() != 2) { 78 | return false; 79 | } 80 | sess->set_image_height(std::stoi(image_dims[0])); 81 | sess->set_image_width(std::stoi(image_dims[1])); 82 | } 83 | return true; 84 | } 85 | 86 | } // namespace nexus 87 | 88 | #endif // NEXUS_COMMON_MODEL_DEF_H_ 89 | -------------------------------------------------------------------------------- /src/nexus/backend/backend_main.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | 4 | #include 5 | #include 6 | #include 7 | #include 8 | 9 | #include 10 | #include 11 | 12 | #include "nexus/backend/backend_server.h" 13 | #include "nexus/common/config.h" 14 | #include "nexus/common/image.h" 15 | #include "nexus/common/util.h" 16 | #include "nexus/proto/nnquery.pb.h" 17 | 18 | using namespace nexus; 19 | using namespace nexus::backend; 20 | 21 | DEFINE_string(port, std::to_string(BACKEND_DEFAULT_PORT), "server port"); 22 | DEFINE_string(rpc_port, std::to_string(BACKEND_DEFAULT_RPC_PORT), "RPC port"); 23 | DEFINE_string(sch_addr, "127.0.0.1", 24 | "scheduler IP address " 25 | "(use default port 10001 if no port specified)"); 26 | DEFINE_int32(gpu, 0, "gpu device ID (default: 0)"); 27 | DEFINE_uint64(num_workers, 0, "number of workers (default: 0)"); 28 | DEFINE_string(cores, "", "Specify cores to use, e.g., \"0-4\", or \"0-3,5\""); 29 | 30 | std::vector ParseCores(std::string s) { 31 | std::vector cores; 32 | std::vector segs; 33 | SplitString(s, ',', &segs); 34 | for (auto seg : segs) { 35 | if (seg.find('-') == std::string::npos) { 36 | cores.push_back(std::stoi(seg)); 37 | } else { 38 | std::vector range; 39 | SplitString(seg, '-', &range); 40 | CHECK_EQ(range.size(), 2) << "Wrong format of cores"; 41 | int beg = std::stoi(range[0]); 42 | int end = std::stoi(range[1]); 43 | for (int i = beg; i <= end; ++i) { 44 | cores.push_back(i); 45 | } 46 | } 47 | } 48 | return cores; 49 | } 50 | 51 | BackendServer *server_ptr; 52 | 53 | void sigint_handler(int _sig) { 54 | if (server_ptr) { 55 | server_ptr->Stop(); 56 | } 57 | std::exit(0); 58 | } 59 | 60 | int main(int argc, char **argv) { 61 | struct sigaction sig_handle; 62 | sig_handle.sa_handler = sigint_handler; 63 | sigemptyset(&sig_handle.sa_mask); 64 | sig_handle.sa_flags = 0; 65 | sigaction(SIGINT, &sig_handle, NULL); 66 | 67 | // Init glog 68 | google::InitGoogleLogging(argv[0]); 69 | // Parse command line flags 70 | google::ParseCommandLineFlags(&argc, &argv, true); 71 | // Setup backtrace on segfault 72 | google::InstallFailureSignalHandler(); 73 | // Decide server IP address 74 | LOG(INFO) << "Backend server: port " << FLAGS_port << ", rpc port " 75 | << FLAGS_rpc_port << ", workers " << FLAGS_num_workers << ", gpu " 76 | << FLAGS_gpu; 77 | // Initialize _Hack_Images 78 | { 79 | ImageProto image; 80 | image.set_hack_filename("__init_Hack_Images"); 81 | (void)_Hack_DecodeImageByFilename(image, ChannelOrder::CO_BGR); 82 | } 83 | // Create the backend server 84 | std::vector cores = ParseCores(FLAGS_cores); 85 | BackendServer server(FLAGS_port, FLAGS_rpc_port, FLAGS_sch_addr, FLAGS_gpu, 86 | FLAGS_num_workers, cores); 87 | server_ptr = &server; 88 | server.Run(); 89 | return 0; 90 | } 91 | -------------------------------------------------------------------------------- /src/nexus/common/time_util.cpp: -------------------------------------------------------------------------------- 1 | #include "nexus/common/time_util.h" 2 | 3 | namespace nexus { 4 | 5 | void Timer::Record(const std::string& tag) { 6 | time_points_.emplace(tag, Clock::now()); 7 | } 8 | 9 | uint64_t Timer::GetLatencyMillis(const std::string& beg_tag, 10 | const std::string& end_tag) { 11 | auto beg = GetTimepoint(beg_tag); 12 | auto end = GetTimepoint(end_tag); 13 | if (beg == nullptr || end == nullptr) { 14 | return 0; 15 | } 16 | auto d = std::chrono::duration_cast(*end - *beg); 17 | return d.count(); 18 | } 19 | 20 | uint64_t Timer::GetLatencyMicros(const std::string& beg_tag, 21 | const std::string& end_tag) { 22 | auto beg = GetTimepoint(beg_tag); 23 | auto end = GetTimepoint(end_tag); 24 | if (beg == nullptr || end == nullptr) { 25 | return 0; 26 | } 27 | auto d = std::chrono::duration_cast(*end - *beg); 28 | return d.count(); 29 | } 30 | 31 | TimePoint* Timer::GetTimepoint(const std::string& tag) { 32 | auto itr = time_points_.find(tag); 33 | if (itr == time_points_.end()) { 34 | return nullptr; 35 | } 36 | return &itr->second; 37 | } 38 | 39 | Tickable::Tickable(uint32_t tick_interval_sec) : 40 | tick_interval_sec_(tick_interval_sec), 41 | sec_since_last_tick_(0) { 42 | } 43 | 44 | Tickable::~Tickable() { 45 | } 46 | 47 | void Tickable::Tick() { 48 | ++sec_since_last_tick_; 49 | if (sec_since_last_tick_ == tick_interval_sec_) { 50 | TickImpl(); 51 | sec_since_last_tick_ = 0; 52 | } 53 | } 54 | 55 | TimeSystem& TimeSystem::Singleton() { 56 | static TimeSystem time_system_; 57 | return time_system_; 58 | } 59 | 60 | TimeSystem::TimeSystem() : 61 | running_(true) { 62 | thread_ = std::thread(&TimeSystem::Run, this); 63 | } 64 | 65 | TimeSystem::~TimeSystem() { 66 | running_ = false; 67 | thread_.join(); 68 | } 69 | 70 | void TimeSystem::Stop() { 71 | running_ = false; 72 | thread_.join(); 73 | } 74 | 75 | bool TimeSystem::AddTickable(std::shared_ptr tickable) { 76 | std::lock_guard lock(mutex_); 77 | if (tickables_.find(tickable) != tickables_.end()) { 78 | return false; 79 | } 80 | tickables_.insert(tickable); 81 | return true; 82 | } 83 | 84 | bool TimeSystem::RemoveTickable(std::shared_ptr tickable) { 85 | std::lock_guard lock(mutex_); 86 | auto iter = tickables_.find(tickable); 87 | if (iter == tickables_.end()) { 88 | return false; 89 | } 90 | tickables_.erase(iter); 91 | return true; 92 | } 93 | 94 | void TimeSystem::Run() { 95 | while (running_) { 96 | auto next_time = Clock::now() + std::chrono::seconds(1); 97 | { 98 | std::lock_guard lock(mutex_); 99 | for (auto item : tickables_) { 100 | item->Tick(); 101 | } 102 | } 103 | std::this_thread::sleep_until(next_time); 104 | } 105 | } 106 | 107 | } // namespace nexus 108 | -------------------------------------------------------------------------------- /src/nexus/common/device.h: -------------------------------------------------------------------------------- 1 | #ifndef NEXUS_COMMON_DEVICE_H_ 2 | #define NEXUS_COMMON_DEVICE_H_ 3 | 4 | #include 5 | #include 6 | #include 7 | #include 8 | 9 | #ifdef USE_GPU 10 | #include 11 | #endif 12 | 13 | namespace nexus { 14 | 15 | enum DeviceType { 16 | kCPU = 0, 17 | kGPU = 1, 18 | }; 19 | 20 | class DeviceManager; // forward declare 21 | 22 | class Device { 23 | public: 24 | virtual void* Allocate(size_t nbytes) = 0; 25 | 26 | virtual void Free(void* buf) = 0; 27 | 28 | virtual std::string name() const = 0; 29 | 30 | DeviceType type() const { return type_; } 31 | 32 | bool operator==(const Device& other) const { 33 | return name() == other.name(); 34 | } 35 | 36 | protected: 37 | Device(DeviceType type) : type_(type) {} 38 | // disable copy 39 | Device(const Device&) = delete; 40 | Device& operator=(const Device&) = delete; 41 | 42 | private: 43 | DeviceType type_; 44 | }; 45 | 46 | class CPUDevice : public Device { 47 | public: 48 | void* Allocate(size_t nbytes) final { 49 | void* buf = malloc(nbytes); 50 | return buf; 51 | } 52 | 53 | void Free(void* buf) final { 54 | free(buf); 55 | } 56 | 57 | std::string name() const final { return "cpu"; } 58 | 59 | private: 60 | CPUDevice() : Device(kCPU) {} 61 | friend class DeviceManager; 62 | }; 63 | 64 | #ifdef USE_GPU 65 | 66 | #define NEXUS_CUDA_CHECK(condition) \ 67 | do { \ 68 | cudaError_t err = (condition); \ 69 | CHECK_EQ(err, cudaSuccess) << cudaGetErrorString(err); \ 70 | } while (0) 71 | 72 | class GPUDevice : public Device { 73 | public: 74 | int gpu_id() const { return gpu_id_; } 75 | 76 | void* Allocate(size_t nbytes) final; 77 | 78 | void Free(void* buf) final; 79 | 80 | std::string name() const final { return name_; } 81 | 82 | std::string device_name() const { return device_name_; } 83 | 84 | std::string uuid() const { return uuid_; } 85 | 86 | size_t FreeMemory() const; 87 | 88 | size_t TotalMemory() const { return total_memory_; } 89 | 90 | private: 91 | explicit GPUDevice(int gpu_id); 92 | friend class DeviceManager; 93 | 94 | private: 95 | int gpu_id_; 96 | std::string name_; 97 | std::string device_name_; 98 | std::string uuid_; 99 | size_t total_memory_; 100 | }; 101 | 102 | #endif 103 | 104 | class DeviceManager { 105 | public: 106 | static DeviceManager& Singleton() { 107 | static DeviceManager device_manager; 108 | return device_manager; 109 | } 110 | 111 | CPUDevice* GetCPUDevice() const { 112 | return cpu_device_; 113 | } 114 | 115 | #ifdef USE_GPU 116 | GPUDevice* GetGPUDevice(int gpu_id) const; 117 | #endif 118 | 119 | private: 120 | DeviceManager(); 121 | 122 | CPUDevice* cpu_device_; 123 | #ifdef USE_GPU 124 | std::vector gpu_devices_; 125 | #endif 126 | }; 127 | 128 | } // namespec nexus 129 | 130 | #endif // NEXUS_COMMON_DEVICE_H_ 131 | -------------------------------------------------------------------------------- /src/nexus/backend/utils.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | 6 | #include "nexus/backend/utils.h" 7 | #include "nexus/common/util.h" 8 | 9 | DEFINE_bool(hack_reply_omit_output, false, 10 | "HACK: omit output field in ReplyProto"); 11 | 12 | namespace nexus { 13 | namespace backend { 14 | 15 | void LoadClassnames(const std::string &filepath, 16 | std::unordered_map *classnames) { 17 | std::ifstream infile(filepath); 18 | CHECK(infile.good()) << "Classname file " << filepath << " doesn't exist"; 19 | std::string line; 20 | int class_id = 0; 21 | while (std::getline(infile, line)) { 22 | std::vector items; 23 | SplitString(line, ',', &items); 24 | if (items.size() == 1) { 25 | classnames->emplace(class_id++, line); 26 | } else { 27 | int idx = std::stoi(items[0]); 28 | classnames->emplace(idx, items[1]); 29 | } 30 | } 31 | infile.close(); 32 | } 33 | 34 | void PostprocessClassification( 35 | const QueryProto &query, const float *prob, size_t nprobs, 36 | QueryResultProto *result, 37 | const std::unordered_map *classnames) { 38 | // TODO: handle top k and threshold in the query 39 | if (classnames != nullptr) { 40 | CHECK_EQ(classnames->size(), nprobs) << "Mismatch between number of " 41 | << "class names and number of outputs"; 42 | } 43 | std::unordered_set output_fields(query.output_field().begin(), 44 | query.output_field().end()); 45 | if (output_fields.empty()) { 46 | output_fields.insert("class_id"); 47 | output_fields.insert("class_prob"); 48 | output_fields.insert("class_name"); 49 | } 50 | float max_prob = 0.; 51 | int max_idx = -1; 52 | for (int i = 0; i < (int)nprobs; ++i) { 53 | float p = prob[i]; 54 | if (p > max_prob) { 55 | max_prob = p; 56 | max_idx = i; 57 | } 58 | } 59 | if (max_idx > -1) { 60 | auto record = result->add_output(); 61 | if (FLAGS_hack_reply_omit_output) 62 | return; 63 | for (auto field : output_fields) { 64 | if (field == "class_id") { 65 | auto value = record->add_named_value(); 66 | value->set_name("class_id"); 67 | value->set_data_type(DT_INT32); 68 | value->set_i(max_idx); 69 | } else if (field == "class_prob") { 70 | auto value = record->add_named_value(); 71 | value->set_name("class_prob"); 72 | value->set_data_type(DT_FLOAT); 73 | value->set_f(max_prob); 74 | } else if (field == "class_name") { 75 | auto value = record->add_named_value(); 76 | value->set_name("class_name"); 77 | value->set_data_type(DT_STRING); 78 | if (classnames != nullptr) { 79 | auto iter = classnames->find(max_idx); 80 | if (iter == classnames->end()) { 81 | LOG(ERROR) << "Cannot find class name for class id " << max_idx; 82 | } else { 83 | value->set_s(iter->second); 84 | } 85 | } 86 | } 87 | } 88 | } 89 | } 90 | 91 | } // namespace backend 92 | } // namespace nexus 93 | -------------------------------------------------------------------------------- /src/nexus/common/metric.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | #include "nexus/common/metric.h" 4 | 5 | namespace nexus { 6 | 7 | Counter::Counter() : 8 | count_(0) { 9 | } 10 | 11 | void Counter::Increase(uint64_t value) { 12 | count_.fetch_add(value, std::memory_order_relaxed); 13 | } 14 | 15 | void Counter::Reset() { 16 | count_.exchange(0, std::memory_order_relaxed); 17 | } 18 | 19 | IntervalCounter::IntervalCounter(uint32_t interval_sec) : 20 | Tickable(interval_sec), 21 | count_(0) { 22 | } 23 | 24 | void IntervalCounter::Increase(uint64_t value) { 25 | count_.fetch_add(value, std::memory_order_relaxed); 26 | } 27 | 28 | void IntervalCounter::Reset() { 29 | std::lock_guard guard(history_mutex_); 30 | count_.exchange(0, std::memory_order_relaxed); 31 | history_.clear(); 32 | } 33 | 34 | std::vector IntervalCounter::GetHistory() { 35 | std::lock_guard guard(history_mutex_); 36 | std::vector ret(std::move(history_)); 37 | history_.clear(); 38 | return ret; 39 | } 40 | 41 | void IntervalCounter::TickImpl() { 42 | uint64_t count = count_.exchange(0, std::memory_order_relaxed); 43 | std::lock_guard guard(history_mutex_); 44 | history_.push_back(count); 45 | } 46 | 47 | EWMA::EWMA(uint32_t sample_interval_sec, uint32_t avg_interval_sec) : 48 | sample_interval_sec_(sample_interval_sec), 49 | avg_interval_sec_(avg_interval_sec), 50 | rate_() { 51 | alpha_ = 1 - exp(-1. * sample_interval_sec_ / avg_interval_sec_); 52 | } 53 | 54 | EWMA::EWMA(const EWMA& other) : 55 | sample_interval_sec_(other.sample_interval_sec_), 56 | avg_interval_sec_(other.avg_interval_sec_), 57 | rate_(other.rate_), 58 | alpha_(other.alpha_) {} 59 | 60 | void EWMA::AddSample(uint64_t count) { 61 | double current_rate = static_cast(count) / sample_interval_sec_; 62 | if (rate_ < 0) { 63 | rate_ = current_rate; 64 | } else { 65 | rate_ += (current_rate - rate_) * alpha_; 66 | } 67 | } 68 | 69 | EWMA& EWMA::operator=(const EWMA& other) { 70 | if (this != &other) { 71 | sample_interval_sec_ = other.sample_interval_sec_; 72 | avg_interval_sec_ = other.avg_interval_sec_; 73 | rate_ = other.rate_; 74 | alpha_ = other.alpha_; 75 | } 76 | return *this; 77 | } 78 | 79 | MetricRegistry& MetricRegistry::Singleton() { 80 | static MetricRegistry metric_registry_; 81 | return metric_registry_; 82 | } 83 | 84 | std::shared_ptr MetricRegistry::CreateCounter() { 85 | std::lock_guard lock(mutex_); 86 | auto metric = std::make_shared(); 87 | metrics_.insert(metric); 88 | return metric; 89 | } 90 | 91 | std::shared_ptr MetricRegistry::CreateIntervalCounter( 92 | uint32_t interval_sec) { 93 | std::lock_guard lock(mutex_); 94 | auto metric = std::make_shared(interval_sec); 95 | metrics_.insert(metric); 96 | TimeSystem::Singleton().AddTickable(metric); 97 | return metric; 98 | } 99 | 100 | void MetricRegistry::RemoveMetric(std::shared_ptr metric) { 101 | std::lock_guard lock(mutex_); 102 | TimeSystem::Singleton().RemoveTickable(metric); 103 | metrics_.erase(metric); 104 | } 105 | 106 | } // namespace nexus 107 | -------------------------------------------------------------------------------- /src/nexus/app/app_base.cpp: -------------------------------------------------------------------------------- 1 | #include "nexus/app/app_base.h" 2 | 3 | namespace nexus { 4 | namespace app { 5 | 6 | AppBase::AppBase(const std::string& port, 7 | const std::string& rpc_port, 8 | const std::string& sch_addr, 9 | size_t nthreads) : 10 | Frontend(port, rpc_port, sch_addr), 11 | nthreads_(nthreads), 12 | qp_(nullptr), 13 | step_us_(0) 14 | { 15 | } 16 | 17 | AppBase::~AppBase() { 18 | if (qp_ != nullptr) { 19 | delete qp_; 20 | } 21 | } 22 | 23 | void AppBase::Start() { 24 | CHECK(qp_ != nullptr) << "Query processor is not initialized"; 25 | Run(qp_, nthreads_); 26 | } 27 | 28 | std::shared_ptr AppBase::GetModelHandler( 29 | const std::string& framework, const std::string& model_name, 30 | uint32_t version, uint64_t latency_sla, float estimate_workload, 31 | std::vector image_size, LoadBalancePolicy lb_policy) { 32 | LoadModelRequest req; 33 | req.set_node_id(node_id()); 34 | auto model_sess = req.mutable_model_session(); 35 | model_sess->set_framework(framework); 36 | model_sess->set_model_name(model_name); 37 | model_sess->set_version(version); 38 | model_sess->set_latency_sla(latency_sla); 39 | if (image_size.size() > 0) { 40 | if (image_size.size() != 2) { 41 | LOG(ERROR) << "Image size is not 2"; 42 | return nullptr; 43 | } 44 | model_sess->set_image_height(image_size[0]); 45 | model_sess->set_image_width(image_size[1]); 46 | } 47 | if (estimate_workload < 0) { 48 | LOG(ERROR) << "Estimate workload must be non-negative value"; 49 | return nullptr; 50 | } 51 | if (estimate_workload > 0) { 52 | req.set_estimate_workload(estimate_workload); 53 | } 54 | 55 | auto model_handler = LoadModel(req, lb_policy); 56 | if (model_handler == nullptr) { 57 | // TODO: load model failed, should retry after some time, 58 | // or wait for callback from scheduler 59 | LOG(FATAL) << "Load model failed"; 60 | } 61 | return model_handler; 62 | } 63 | 64 | bool AppBase::IsComplexQuery() const { 65 | return slo_us_ != 0; 66 | } 67 | 68 | void AppBase::ComplexQuerySetup(const std::string &cq_id, uint32_t slo_us, uint32_t step_us) { 69 | CHECK(!IsComplexQuery()) << "The complex query has been set up."; 70 | CHECK(!cq_id.empty()) << "cq_id cannot be empty."; 71 | CHECK(slo_us != 0) << "slo_us cannot be 0."; 72 | CHECK(step_us != 0) << "step_us cannot be 0."; 73 | cq_id_ = cq_id; 74 | slo_us_ = slo_us; 75 | step_us_ = step_us; 76 | 77 | ComplexQuerySetupRequest req; 78 | req.set_cq_id(cq_id_); 79 | req.set_slo_us(slo_us_); 80 | req.set_step_us(step_us); 81 | Frontend::ComplexQuerySetup(req); 82 | } 83 | 84 | void AppBase::ComplexQueryAddEdge(const std::shared_ptr& source, 85 | const std::shared_ptr& target) { 86 | ComplexQueryAddEdgeRequest req; 87 | req.set_cq_id(cq_id_); 88 | req.mutable_source()->CopyFrom(source->model_session()); 89 | req.mutable_target()->CopyFrom(target->model_session()); 90 | Frontend::ComplexQueryAddEdge(req); 91 | } 92 | 93 | void LaunchApp(AppBase* app) { 94 | app->Setup(); 95 | app->Start(); 96 | } 97 | 98 | } // namespace app 99 | } // namespace nexus 100 | -------------------------------------------------------------------------------- /src/nexus/common/rpc_call.h: -------------------------------------------------------------------------------- 1 | #ifndef NEXUS_COMMON_RPC_CALL_H_ 2 | #define NEXUS_COMMON_RPC_CALL_H_ 3 | 4 | #include 5 | 6 | namespace nexus { 7 | 8 | enum RpcCallStatus { 9 | RPC_CALL_CREATE, 10 | RPC_CALL_PROCESS, 11 | RPC_CALL_FINISH, 12 | }; 13 | 14 | class RpcCallBase { 15 | public: 16 | RpcCallBase(grpc::ServerCompletionQueue* cq) : 17 | cq_(cq), 18 | status_(RPC_CALL_CREATE) { 19 | } 20 | 21 | virtual ~RpcCallBase() {} 22 | 23 | virtual void Proceed() = 0; 24 | 25 | protected: 26 | grpc::ServerCompletionQueue* cq_; 27 | grpc::ServerContext ctx_; 28 | RpcCallStatus status_; 29 | }; 30 | 31 | #define INSTANTIATE_RPC_CALL(SERVICE, RPCCALL, REQUEST, REPLY) \ 32 | class RPCCALL##_Call : public RpcCallBase { \ 33 | public: \ 34 | using Handler = std::function; \ 36 | RPCCALL##_Call(SERVICE* service, grpc::ServerCompletionQueue* cq, \ 37 | Handler handle) : \ 38 | RpcCallBase(cq), \ 39 | service_(service), \ 40 | handle_(std::move(handle)), \ 41 | responder_(&ctx_) { \ 42 | Proceed(); \ 43 | } \ 44 | void Proceed() { \ 45 | if (status_ == RPC_CALL_CREATE) { \ 46 | status_ = RPC_CALL_PROCESS; \ 47 | service_->Request##RPCCALL( \ 48 | &ctx_, &request_, &responder_, cq_, cq_, this); \ 49 | } else if (status_ == RPC_CALL_PROCESS) { \ 50 | new RPCCALL##_Call(service_, cq_, handle_); \ 51 | handle_(ctx_, request_, &reply_); \ 52 | status_ = RPC_CALL_FINISH; \ 53 | responder_.Finish(reply_, grpc::Status::OK, this); \ 54 | } else { \ 55 | CHECK_EQ(status_, RPC_CALL_FINISH); \ 56 | delete this; \ 57 | } \ 58 | } \ 59 | private: \ 60 | SERVICE* service_; \ 61 | Handler handle_; \ 62 | grpc::ServerAsyncResponseWriter responder_; \ 63 | REQUEST request_; \ 64 | REPLY reply_; \ 65 | } 66 | 67 | } // namespace nexus 68 | 69 | #endif // NEXUS_COMMON_RPC_CALL_H_ 70 | -------------------------------------------------------------------------------- /python/nexus/client.py: -------------------------------------------------------------------------------- 1 | import struct 2 | import socket 3 | 4 | from .proto import nnquery_pb2 as npb 5 | 6 | MAGIC_NUMBER = 0xDEADBEEF 7 | HEADER_SIZE = 12 8 | # Message type 9 | MSG_USER_REGISTER = 1 10 | MSG_USER_REQUEST = 2 11 | MSG_USER_REPLY = 3 12 | 13 | 14 | class Client: 15 | def __init__(self, server_addr, user_id): 16 | self.server_addr = server_addr 17 | self.user_id = user_id 18 | self.req_id = 0 19 | self.sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM) 20 | self.sock.settimeout(1) # timeout after 1 second 21 | host, port = server_addr.split(':') 22 | try: 23 | self.sock.connect((host, int(port))) 24 | except: 25 | raise RuntimeError("Error in connecting to %s" % server_addr) 26 | self.register() 27 | 28 | 29 | def __del__(self): 30 | self.sock.close() 31 | 32 | 33 | def register(self): 34 | req = npb.RequestProto(user_id=self.user_id) 35 | msg = self._prepare_message(MSG_USER_REGISTER, req) 36 | self.sock.sendall(msg) 37 | reply = self._recv_reply() 38 | assert reply.status == 0 39 | 40 | 41 | def request(self, img): 42 | req = self._prepare_req(img) 43 | msg = self._prepare_message(MSG_USER_REQUEST, req) 44 | failed = 0 45 | while True: 46 | try: 47 | self.sock.sendall(msg) 48 | reply = self._recv_reply() 49 | break 50 | except socket.timeout: 51 | failed += 1 52 | if failed == 3: 53 | return None 54 | return reply 55 | 56 | 57 | def _prepare_req(self, img): 58 | req = npb.RequestProto() 59 | req.user_id = self.user_id 60 | req.req_id = self.req_id 61 | req.input.data_type = npb.DT_IMAGE 62 | req.input.image.data = img 63 | req.input.image.format = npb.ImageProto.JPEG 64 | req.input.image.color = True 65 | self.req_id += 1 66 | return req 67 | 68 | 69 | def _prepare_message(self, msg_type, request): 70 | body = request.SerializeToString() 71 | header = struct.pack('!LLL', MAGIC_NUMBER, msg_type, len(body)) 72 | return header + body 73 | 74 | 75 | def _recv_reply(self): 76 | body_length = self._recv_header() 77 | buf = self._read_nbytes(body_length) 78 | reply = npb.ReplyProto() 79 | reply.ParseFromString(buf) 80 | return reply 81 | 82 | 83 | def _recv_header(self): 84 | buf = self._read_nbytes(HEADER_SIZE) 85 | magic_no, msg_type, length = struct.unpack('!LLL', buf) 86 | assert magic_no == MAGIC_NUMBER 87 | assert msg_type == MSG_USER_REPLY 88 | return length 89 | 90 | 91 | def _read_nbytes(self, n): 92 | """ Read exactly n bytes from the socket. 93 | Raise RuntimeError if the connection closed before 94 | n bytes were read. 95 | """ 96 | buf = '' 97 | while n > 0: 98 | data = self.sock.recv(n) 99 | if data == '': 100 | raise RuntimeError("Unexpected connection close") 101 | buf += data 102 | n -= len(data) 103 | return buf 104 | -------------------------------------------------------------------------------- /src/nexus/backend/batch_task.cpp: -------------------------------------------------------------------------------- 1 | #include "nexus/backend/batch_task.h" 2 | #include "nexus/common/util.h" 3 | #include 4 | 5 | namespace nexus { 6 | namespace backend { 7 | 8 | BatchTask::BatchTask(uint32_t max_batch) : 9 | max_batch_(max_batch), 10 | input_write_pt_(nullptr), 11 | input_elements_(0) {} 12 | 13 | void BatchTask::SetInputArray(ArrayPtr arr) { 14 | input_array_ = arr; 15 | input_write_pt_ = input_array_->Data(); 16 | input_elements_ = 0; 17 | } 18 | 19 | void BatchTask::CreateInputArray(DataType data_type, 20 | size_t num_elements_per_input, 21 | Device* device) { 22 | input_array_ = std::make_shared( 23 | data_type, max_batch_ * num_elements_per_input, device); 24 | input_write_pt_ = input_array_->Data(); 25 | input_elements_ = 0; 26 | } 27 | 28 | void BatchTask::SetOutputArrays( 29 | const std::unordered_map& arrays) { 30 | output_arrays_ = arrays; 31 | } 32 | 33 | void BatchTask::CreateOutputArrays( 34 | const std::unordered_map& sizes, Device* device) { 35 | uint32_t batch = max_batch_; 36 | if (inputs_.size() > 0) { 37 | batch = inputs_.size(); 38 | } 39 | for (auto iter : sizes) { 40 | auto arr = std::make_shared(DT_FLOAT, batch * iter.second, 41 | device); 42 | output_arrays_.emplace(iter.first, arr); 43 | } 44 | } 45 | 46 | ArrayPtr BatchTask::GetOutputArray(const std::string& name) const { 47 | CHECK_GT(output_arrays_.count(name), 0) << "Output array " << name << 48 | " doesn't exist"; 49 | return output_arrays_.at(name); 50 | } 51 | 52 | void BatchTask::AppendInput(std::shared_ptr input, 53 | std::shared_ptr task) { 54 | CHECK_EQ(input_array_->data_type(), input->array->data_type()) << 55 | "Input data type mismatch"; 56 | CHECK_LT(inputs_.size(), max_batch_) << "Exceed max batch size"; 57 | CHECK_LE(input_elements_ + input->array->num_elements(), 58 | input_array_->num_elements()) << "Exceeds batch input array capacity"; 59 | inputs_.push_back(input); 60 | tasks_.push_back(task); 61 | auto in_arr = input->array; 62 | const char* src_data = in_arr->Data(); 63 | size_t nbytes = in_arr->num_elements() * type_size(input_array_->data_type()); 64 | Memcpy(input_write_pt_, input_array_->device(), src_data, in_arr->device(), 65 | nbytes); 66 | input_write_pt_ += nbytes; 67 | } 68 | 69 | void BatchTask::SliceOutputBatch( 70 | const std::unordered_map& slices) { 71 | CHECK(outputs_.empty()) << "Batch output is already sliced"; 72 | CHECK_EQ(output_arrays_.size(), slices.size()) << "Number of outputs must " 73 | "match the number of slices"; 74 | for (uint i = 0; i < inputs_.size(); ++i) { 75 | auto input = inputs_[i]; 76 | std::unordered_map slice_arrays; 77 | for (auto iter : output_arrays_) { 78 | auto const& slice = slices.at(iter.first); 79 | slice_arrays.emplace(iter.first, iter.second->Slice( 80 | slice.offset(i), slice.num_elements(i))); 81 | } 82 | outputs_.push_back(std::make_shared(input->task_id, input->index, 83 | slice_arrays)); 84 | } 85 | } 86 | 87 | void BatchTask::set_outputs( 88 | const std::vector>& outputs) { 89 | CHECK_EQ(outputs.size(), inputs_.size()) << "Number of outputs must match " 90 | "number of inputs"; 91 | outputs_.clear(); 92 | outputs_ = outputs; 93 | } 94 | 95 | } // namespace backend 96 | } // namespace nexus 97 | -------------------------------------------------------------------------------- /src/nexus/common/image.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | #include 6 | 7 | #include 8 | #include 9 | #include 10 | #include 11 | #include 12 | 13 | #include "nexus/common/image.h" 14 | 15 | DEFINE_string(hack_image_root, "", "HACK: path to directory of images"); 16 | 17 | class _Hack_Images { 18 | public: 19 | _Hack_Images(const std::string &root) { 20 | if (root.empty()) 21 | return; 22 | LOG(INFO) << "Initializing _Hack_Images, root: " << root; 23 | auto root_path = boost::filesystem::absolute(root); 24 | for (auto it = boost::filesystem::recursive_directory_iterator(root_path), 25 | end = boost::filesystem::recursive_directory_iterator(); 26 | it != end; ++it) { 27 | if (!boost::filesystem::is_regular_file(it->path())) 28 | continue; 29 | if (it->path().extension().string() != ".jpg") 30 | continue; 31 | 32 | std::ifstream fin(it->path().string(), std::ios::binary); 33 | std::istreambuf_iterator fin_beg(fin), fin_end; 34 | std::vector data(fin_beg, fin_end); 35 | 36 | auto rel_path = boost::filesystem::relative(it->path(), root_path); 37 | data_.emplace(rel_path.string(), std::move(data)); 38 | } 39 | LOG(INFO) << "Found " << data_.size() << " images in " << root; 40 | 41 | LOG(INFO) << "Keys of some random images:"; 42 | auto iter = data_.begin(); 43 | for (int i = 0; i < 10 && iter != data_.end(); ++i, ++iter) { 44 | LOG(INFO) << " " << iter->first; 45 | } 46 | 47 | LOG(INFO) << "_Hack_Images initilization finished"; 48 | } 49 | 50 | const std::vector &get(const std::string &filename) const { 51 | auto iter = data_.find(filename); 52 | return iter != data_.end() ? iter->second : empty_; 53 | } 54 | 55 | private: 56 | std::unordered_map> data_; 57 | std::vector empty_; 58 | }; 59 | 60 | namespace nexus { 61 | 62 | cv::Mat DecodeImageImpl(const std::vector &vec_data, bool color, 63 | ChannelOrder order) { 64 | cv::Mat img_bgr; 65 | int cv_read_flag = color ? cv::IMREAD_COLOR : cv::IMREAD_GRAYSCALE; 66 | img_bgr = cv::imdecode(vec_data, cv_read_flag); 67 | if (!img_bgr.data) { 68 | LOG(ERROR) << "Could not decode image"; 69 | } 70 | if (order == CO_BGR) { 71 | return img_bgr; 72 | } 73 | cv::Mat img_rgb; 74 | cv::cvtColor(img_bgr, img_rgb, cv::COLOR_BGR2RGB); 75 | return img_rgb; 76 | } 77 | 78 | cv::Mat _Hack_DecodeImageByFilename(const ImageProto &image, 79 | ChannelOrder order) { 80 | static _Hack_Images *_images = new _Hack_Images(FLAGS_hack_image_root); 81 | const auto &vec_data = _images->get(image.hack_filename()); 82 | if (vec_data.empty()) { 83 | if (image.hack_filename() != "__init_Hack_Images") 84 | LOG(ERROR) << "Cannot find image by filename: " << image.hack_filename(); 85 | return {}; 86 | } 87 | return DecodeImageImpl(vec_data, image.color(), order); 88 | } 89 | 90 | cv::Mat DecodeImage(const ImageProto &image, ChannelOrder order) { 91 | if (image.hack_filename().empty()) { 92 | const std::string &data = image.data(); 93 | std::vector vec_data(data.c_str(), data.c_str() + data.size()); 94 | return DecodeImageImpl(vec_data, image.color(), order); 95 | } else { 96 | return _Hack_DecodeImageByFilename(image, order); 97 | } 98 | } 99 | 100 | } // namespace nexus 101 | -------------------------------------------------------------------------------- /examples/simple_app/src/frontend.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | #include "nexus/app/app_base.h" 4 | 5 | using namespace nexus; 6 | using namespace nexus::app; 7 | 8 | class SimpleApp : public AppBase { 9 | public: 10 | SimpleApp(std::string port, std::string rpc_port, std::string sch_addr, 11 | size_t nthreads, const std::string& framework, 12 | const std::string& model_name, int version, int latency_sla_ms, 13 | float estimate_workload, int image_height, int image_width) : 14 | AppBase(port, rpc_port, sch_addr, nthreads), 15 | framework_(framework), 16 | model_name_(model_name), 17 | version_(version), 18 | latency_sla_ms_(latency_sla_ms), 19 | estimate_workload_(estimate_workload) { 20 | CHECK_GE(image_height, 0) << "Image height must be no less than 0"; 21 | CHECK_GE(image_width, 0) << "Image width must be no less than 0"; 22 | if (image_height == 0 || image_width == 0) { 23 | image_height_ = 0; 24 | image_width_ = 0; 25 | } else { 26 | image_height_ = image_height; 27 | image_width_ = image_width; 28 | } 29 | } 30 | 31 | void Setup() final { 32 | model_ = GetModelHandler(framework_, model_name_, version_, 33 | latency_sla_ms_, estimate_workload_, 34 | {image_height_, image_width_}); 35 | auto func1 = [&](std::shared_ptr ctx) { 36 | auto output = model_->Execute(ctx, ctx->const_request().input()); 37 | return std::vector{ 38 | std::make_shared("output", output)}; 39 | }; 40 | auto func2 = [&](std::shared_ptr ctx) { 41 | auto output = ctx->GetVariable("output")->result(); 42 | output->ToProto(ctx->reply()); 43 | return std::vector{}; 44 | }; 45 | ExecBlock* b1 = new ExecBlock(0, func1, {}); 46 | ExecBlock* b2 = new ExecBlock(1, func2, {"output"}); 47 | qp_ = new QueryProcessor({b1, b2}); 48 | } 49 | 50 | private: 51 | std::string framework_; 52 | std::string model_name_; 53 | int version_; 54 | int latency_sla_ms_; 55 | float estimate_workload_; 56 | uint image_height_; 57 | uint image_width_; 58 | std::shared_ptr model_; 59 | }; 60 | 61 | DEFINE_string(port, "9001", "Server port"); 62 | DEFINE_string(rpc_port, "9002", "RPC port"); 63 | DEFINE_string(sch_addr, "127.0.0.1", "Scheduler address"); 64 | DEFINE_int32(nthread, 4, "Number of threads processing requests"); 65 | DEFINE_string(framework, "", "Framework (caffe2, caffe, darknet, tensorflow)"); 66 | DEFINE_string(model, "", "Model name"); 67 | DEFINE_int32(model_version, 1, "Model version"); 68 | DEFINE_int32(latency, 500, "Latency SLA in ms"); 69 | DEFINE_double(workload, 0, "Estimated request rate"); 70 | DEFINE_int32(height, 0, "Image height"); 71 | DEFINE_int32(width, 0, "Image width"); 72 | 73 | int main(int argc, char** argv) { 74 | // log to stderr 75 | FLAGS_logtostderr = 1; 76 | // Init glog 77 | google::InitGoogleLogging(argv[0]); 78 | // Parse command line flags 79 | google::ParseCommandLineFlags(&argc, &argv, true); 80 | // Setup backtrace on segfault 81 | google::InstallFailureSignalHandler(); 82 | 83 | CHECK_GT(FLAGS_framework.length(), 0) << "Missing framework"; 84 | CHECK_GT(FLAGS_model.length(), 0) << "Missing model"; 85 | LOG(INFO) << "App port " << FLAGS_port << ", rpc port " << FLAGS_rpc_port; 86 | // Create the frontend server 87 | SimpleApp app(FLAGS_port, FLAGS_rpc_port, FLAGS_sch_addr, FLAGS_nthread, 88 | FLAGS_framework, FLAGS_model, FLAGS_model_version, 89 | FLAGS_latency, FLAGS_workload, FLAGS_height, FLAGS_width); 90 | LaunchApp(&app); 91 | 92 | return 0; 93 | } 94 | -------------------------------------------------------------------------------- /src/nexus/common/connection.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | #include "nexus/common/connection.h" 4 | 5 | namespace nexus { 6 | 7 | Connection::Connection(boost::asio::ip::tcp::socket socket, 8 | MessageHandler* handler) : 9 | socket_(std::move(socket)), 10 | handler_(handler), 11 | wrong_header_(false) { 12 | boost::asio::ip::tcp::no_delay option(true); 13 | socket_.set_option(option); 14 | } 15 | 16 | Connection::Connection(boost::asio::io_context& io_context, 17 | MessageHandler* handler) : 18 | socket_(io_context), 19 | handler_(handler), 20 | wrong_header_(false) { 21 | } 22 | 23 | void Connection::Start() { 24 | DoReadHeader(); 25 | } 26 | 27 | void Connection::Stop() { 28 | LOG(INFO) << "Connection Stop"; 29 | std::lock_guard socket_guard(socket_mutex_); 30 | socket_.close(); 31 | } 32 | 33 | void Connection::Write(std::shared_ptr msg) { 34 | std::lock_guard lock(write_queue_mutex_); 35 | bool write_in_progress = !write_queue_.empty(); 36 | write_queue_.push_back(std::move(msg)); 37 | if (!write_in_progress) { 38 | DoWrite(); 39 | } 40 | } 41 | 42 | void Connection::DoReadHeader() { 43 | auto self(shared_from_this()); 44 | std::lock_guard socket_guard(socket_mutex_); 45 | boost::asio::async_read( 46 | socket_, 47 | boost::asio::buffer(msg_header_buffer_, MESSAGE_HEADER_SIZE), 48 | [this, self](boost::system::error_code ec, size_t nbytes) { 49 | if (ec) { 50 | if (ec != boost::asio::error::operation_aborted) { 51 | handler_->HandleError(self, ec); 52 | } 53 | return; 54 | } 55 | MessageHeader msg_header; 56 | if (!DecodeHeader(msg_header_buffer_, &msg_header)) { 57 | if (!wrong_header_) { 58 | LOG(ERROR) << "Wrong header detected"; 59 | wrong_header_ = true; 60 | } 61 | DoReadHeader(); 62 | } else { 63 | wrong_header_ = false; 64 | auto msg = std::make_shared(msg_header); 65 | // LOG(INFO) << "msg type: " << msg->type() << ", body length: " << 66 | // msg->body_length(); 67 | DoReadBody(std::move(msg)); 68 | } 69 | }); 70 | } 71 | 72 | void Connection::DoReadBody(std::shared_ptr msg) { 73 | auto self(shared_from_this()); 74 | std::lock_guard socket_guard(socket_mutex_); 75 | boost::asio::async_read( 76 | socket_, 77 | boost::asio::buffer(msg->body(), msg->body_length()), 78 | [this, self, msg](boost::system::error_code ec, 79 | size_t /* bytes_transferred */) { 80 | if (ec) { 81 | if (ec != boost::asio::error::operation_aborted) { 82 | handler_->HandleError(self, ec); 83 | } 84 | } else { 85 | handler_->HandleMessage(self, std::move(msg)); 86 | DoReadHeader(); 87 | } 88 | }); 89 | } 90 | 91 | void Connection::DoWrite() { 92 | auto self(shared_from_this()); 93 | std::lock_guard socket_guard(socket_mutex_); 94 | boost::asio::async_write( 95 | socket_, 96 | boost::asio::buffer(write_queue_.front()->data(), 97 | write_queue_.front()->length()), 98 | [this, self](boost::system::error_code ec, size_t) { 99 | std::lock_guard lock(write_queue_mutex_); 100 | if (ec) { 101 | if (ec != boost::asio::error::operation_aborted) { 102 | handler_->HandleError(self, ec); 103 | } 104 | } else { 105 | write_queue_.pop_front(); 106 | if (!write_queue_.empty()) { 107 | DoWrite(); 108 | } 109 | } 110 | }); 111 | } 112 | 113 | } // namespace nexus 114 | -------------------------------------------------------------------------------- /tests/cpp/scheduler/backend_delegate_test.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | 6 | #include "nexus/common/config.h" 7 | #include "nexus/common/model_db.h" 8 | #include "nexus/proto/control.pb.h" 9 | #include "nexus/proto/nnquery.pb.h" 10 | #include "nexus/scheduler/backend_delegate.h" 11 | 12 | //DECLARE_string(model_db); 13 | DECLARE_int32(beacon); 14 | DECLARE_int32(epoch); 15 | 16 | namespace nexus { 17 | namespace scheduler { 18 | 19 | class BackendDelegateTest : public ::testing::Test { 20 | protected: 21 | virtual void SetUp() { 22 | gpu_device_ = "TITAN_X_(Pascal)"; 23 | gpu_available_memory_ = 12L * 1024L * 1024L * 1024L; 24 | FLAGS_beacon = 1; 25 | FLAGS_epoch = 5; 26 | backend_.reset(new BackendDelegate( 27 | 1, "127.0.0.1", "8001", "8002", gpu_device_, gpu_available_memory_, 28 | FLAGS_beacon)); 29 | } 30 | 31 | std::string gpu_device_; 32 | size_t gpu_available_memory_; 33 | std::unique_ptr backend_; 34 | }; 35 | 36 | TEST_F(BackendDelegateTest, PrepareLoadModel) { 37 | ModelSession vgg16_sess; 38 | vgg16_sess.set_framework("caffe"); 39 | vgg16_sess.set_model_name("vgg16"); 40 | vgg16_sess.set_version(1); 41 | vgg16_sess.set_latency_sla(500); 42 | 43 | ModelSession vgg_face_sess; 44 | vgg_face_sess.set_framework("caffe"); 45 | vgg_face_sess.set_model_name("vgg_face"); 46 | vgg_face_sess.set_version(1); 47 | vgg_face_sess.set_latency_sla(300); 48 | 49 | // Residue workload 50 | for (float workload : {50., 100., 150., 200., 250.}) { 51 | InstanceInfo info; 52 | double occupancy; 53 | bool ret = backend_->PrepareLoadModel(vgg16_sess, workload, &info, 54 | &occupancy); 55 | ASSERT_TRUE(ret); 56 | ASSERT_GE(info.throughput, workload); 57 | ASSERT_GT(info.batch, 0); 58 | ASSERT_LE(occupancy, 1.); 59 | } 60 | 61 | // Saturate entire gpu when workload > 298 62 | for (float workload : {300., 400., 500.}) { 63 | InstanceInfo info; 64 | double occupancy; 65 | bool ret = backend_->PrepareLoadModel(vgg16_sess, workload, &info, 66 | &occupancy); 67 | ASSERT_TRUE(ret); 68 | ASSERT_GT(info.batch, 0); 69 | ASSERT_EQ(occupancy, 1.); 70 | } 71 | 72 | InstanceInfo vgg16_info; 73 | double occupancy; 74 | backend_->PrepareLoadModel(vgg16_sess, 150., &vgg16_info, &occupancy); 75 | backend_->LoadModel(vgg16_info); 76 | ASSERT_NEAR(backend_->Occupancy(), occupancy, 1e-3); 77 | 78 | // Try load second model 79 | for (float workload : {50, 100, 125}) { 80 | InstanceInfo info; 81 | double occupancy; 82 | bool ret = backend_->PrepareLoadModel(vgg_face_sess, workload, &info, 83 | &occupancy); 84 | LOG(INFO) << occupancy; 85 | ASSERT_TRUE(ret); 86 | ASSERT_GE(info.throughput, workload); 87 | ASSERT_GT(info.batch, 0); 88 | ASSERT_LE(occupancy, 1.); 89 | } 90 | 91 | for (float workload : {150, 200, 250}) { 92 | InstanceInfo info; 93 | double occupancy; 94 | bool ret = backend_->PrepareLoadModel(vgg_face_sess, workload, &info, 95 | &occupancy); 96 | ASSERT_FALSE(ret); 97 | } 98 | 99 | InstanceInfo vgg_face_info; 100 | backend_->PrepareLoadModel(vgg_face_sess, 125., &vgg_face_info, &occupancy); 101 | 102 | backend_->LoadModel(vgg_face_info); 103 | ASSERT_NEAR(backend_->Occupancy(), occupancy, 1e-3); 104 | } 105 | 106 | TEST_F(BackendDelegateTest, CheckAlive) { 107 | std::this_thread::sleep_for(std::chrono::milliseconds(2100)); 108 | ASSERT_FALSE(backend_->IsAlive()); 109 | backend_->Tick(); 110 | ASSERT_TRUE(backend_->IsAlive()); 111 | } 112 | 113 | } // namespace scheduler 114 | } // namespace nexus 115 | -------------------------------------------------------------------------------- /python/nexus/async_client.py: -------------------------------------------------------------------------------- 1 | import struct 2 | import socket 3 | import asyncio 4 | from datetime import datetime 5 | 6 | from .proto import nnquery_pb2 as npb 7 | 8 | MAGIC_NUMBER = 0xDEADBEEF 9 | HEADER_SIZE = 12 10 | # Message type 11 | MSG_USER_REGISTER = 1 12 | MSG_USER_REQUEST = 2 13 | MSG_USER_REPLY = 3 14 | 15 | 16 | class AsyncClient: 17 | def __init__(self, server_addr, user_id): 18 | self._server_addr = server_addr 19 | self._user_id = user_id 20 | self._req_id = 0 21 | self._reader_lock = asyncio.Lock() 22 | self._replies = {} 23 | 24 | @property 25 | def next_req_id(self): 26 | return self._req_id 27 | 28 | async def __aenter__(self): 29 | host, port = self._server_addr.split(':') 30 | self._reader, self._writer = await asyncio.open_connection(host, port) 31 | return self 32 | 33 | async def __aexit__(self, exc_type, exc, tb): 34 | self._writer.close() 35 | await self._writer.wait_closed() 36 | 37 | async def register(self): 38 | req = npb.RequestProto(user_id=self.user_id) 39 | msg = self._prepare_message(MSG_USER_REGISTER, req) 40 | 41 | self._writer.write(msg) 42 | await self._writer.drain() 43 | 44 | reply, time = await self._wait_reply(req.req_id) 45 | assert reply.status == 0 46 | 47 | async def _do_request(self, req, msg): 48 | send_time = datetime.now() 49 | self._writer.write(msg) 50 | await self._writer.drain() 51 | 52 | reply, recv_time = await self._wait_reply(req.req_id) 53 | return send_time, recv_time, reply 54 | 55 | def request(self, img): 56 | req = self._prepare_req(img) 57 | msg = self._prepare_message(MSG_USER_REQUEST, req) 58 | return self._do_request(req, msg) 59 | 60 | def _prepare_req(self, img): 61 | req = npb.RequestProto() 62 | req.user_id = self._user_id 63 | req.req_id = self._req_id 64 | req.input.data_type = npb.DT_IMAGE 65 | req.input.image.data = img 66 | req.input.image.format = npb.ImageProto.JPEG 67 | req.input.image.color = True 68 | self._req_id += 1 69 | return req 70 | 71 | def request_with_hack_filename(self, filename): 72 | req = npb.RequestProto() 73 | req.user_id = self._user_id 74 | req.req_id = self._req_id 75 | req.input.data_type = npb.DT_IMAGE 76 | req.input.image.hack_filename = filename 77 | req.input.image.format = npb.ImageProto.JPEG 78 | req.input.image.color = True 79 | self._req_id += 1 80 | 81 | msg = self._prepare_message(MSG_USER_REQUEST, req) 82 | return self._do_request(req, msg) 83 | 84 | def _prepare_message(self, msg_type, request): 85 | body = request.SerializeToString() 86 | header = struct.pack('!LLL', MAGIC_NUMBER, msg_type, len(body)) 87 | return header + body 88 | 89 | async def _wait_reply(self, req_id): 90 | while True: 91 | async with self._reader_lock: 92 | reply = self._replies.pop(req_id, None) 93 | if reply is not None: 94 | return reply 95 | 96 | buf = await self._reader.readexactly(HEADER_SIZE) 97 | magic_no, msg_type, body_length = struct.unpack('!LLL', buf) 98 | assert magic_no == MAGIC_NUMBER 99 | assert msg_type == MSG_USER_REPLY 100 | 101 | buf = await self._reader.readexactly(body_length) 102 | reply = npb.ReplyProto() 103 | reply.ParseFromString(buf) 104 | self._replies[reply.req_id] = (reply, datetime.now()) 105 | 106 | # return early to avoid lock competition 107 | reply = self._replies.pop(req_id, None) 108 | if reply is not None: 109 | return reply 110 | -------------------------------------------------------------------------------- /src/nexus/scheduler/sch_info.h: -------------------------------------------------------------------------------- 1 | #ifndef NEXUS_SCHEDULER_SCH_INFO_H_ 2 | #define NEXUS_SCHEDULER_SCH_INFO_H_ 3 | 4 | #include 5 | #include 6 | #include 7 | #include 8 | #include 9 | #include 10 | #include "nexus/common/metric.h" 11 | #include "nexus/common/model_db.h" 12 | #include "nexus/common/model_def.h" 13 | #include "nexus/proto/nnquery.pb.h" 14 | #include "nexus/proto/control.pb.h" 15 | 16 | DECLARE_int32(avg_interval); 17 | 18 | namespace nexus { 19 | namespace scheduler { 20 | 21 | using SessionGroup = std::vector; 22 | using ServerList = std::unordered_set; 23 | 24 | struct SessionInfo { 25 | SessionInfo() : 26 | has_static_workload(false), 27 | unassigned_workload(0) {} 28 | 29 | double TotalThroughput() const; 30 | 31 | void SubscribeModelSession(uint32_t frontend_id, 32 | const std::string& model_sess_id); 33 | 34 | bool UnsubscribleModelSession(uint32_t frontend_id, const std::string& model_sess_id); 35 | 36 | void UpdateWorkload(uint32_t frontend_id, const ModelStatsProto& model_stats); 37 | 38 | SessionGroup model_sessions; 39 | /*! \brief Mapping from backend id to throughput */ 40 | std::unordered_map backend_weights; 41 | 42 | std::unordered_set backup_backends; 43 | /*! \brief Whether there is a static workload for this session */ 44 | bool has_static_workload; 45 | 46 | std::unordered_map session_subscribers; 47 | /*! \brief Map from frontend id to workload */ 48 | std::unordered_map > workloads; 49 | /*! \brief Workload request rate history */ 50 | std::deque rps_history; 51 | /*! \brief Gap between workload and throughput */ 52 | double unassigned_workload; 53 | /*! \brief Complex Query ID */ 54 | std::string complex_query_id; 55 | }; 56 | 57 | struct InstanceInfo { 58 | SessionGroup model_sessions; 59 | uint32_t batch; 60 | uint32_t max_batch; 61 | const ModelProfile* profile; 62 | double fwd_latency_us; 63 | double max_duty_cycle_us; 64 | double workload; 65 | double throughput; 66 | double weight; 67 | uint64_t memory_usage; 68 | bool backup; 69 | std::unordered_map backup_backends; 70 | 71 | InstanceInfo() : 72 | batch(0), 73 | max_batch(0), 74 | profile(nullptr), 75 | fwd_latency_us(0.), 76 | max_duty_cycle_us(0.), 77 | workload(0.), 78 | throughput(0.), 79 | weight(0.), 80 | memory_usage(0), 81 | backup(false) {} 82 | 83 | InstanceInfo(const InstanceInfo& other) : 84 | model_sessions(other.model_sessions), 85 | batch(other.batch), 86 | max_batch(other.max_batch), 87 | profile(other.profile), 88 | fwd_latency_us(other.fwd_latency_us), 89 | max_duty_cycle_us(other.max_duty_cycle_us), 90 | workload(other.workload), 91 | throughput(other.throughput), 92 | weight(other.weight), 93 | memory_usage(other.memory_usage), 94 | backup(other.backup) {} 95 | 96 | InstanceInfo& operator=(const InstanceInfo& other) { 97 | if (this != &other) { 98 | model_sessions = other.model_sessions; 99 | batch = other.batch; 100 | max_batch = other.max_batch; 101 | profile = other.profile; 102 | fwd_latency_us = other.fwd_latency_us; 103 | max_duty_cycle_us = other.max_duty_cycle_us; 104 | workload = other.workload; 105 | throughput = other.throughput; 106 | weight = other.weight; 107 | memory_usage = other.memory_usage; 108 | backup = other.backup; 109 | } 110 | return *this; 111 | } 112 | 113 | double GetWeight() const { 114 | return (weight > 0) ? weight : throughput; 115 | } 116 | }; 117 | 118 | } // namespace scheduler 119 | } // namespace nexus 120 | 121 | #endif // NEXUS_SCHEDULE_SCH_INFO_H_ 122 | -------------------------------------------------------------------------------- /src/nexus/common/message.h: -------------------------------------------------------------------------------- 1 | #ifndef NEXUS_COMMON_MESSAGE_H_ 2 | #define NEXUS_COMMON_MESSAGE_H_ 3 | 4 | #include 5 | #include 6 | #include 7 | 8 | namespace nexus { 9 | 10 | /*! \brief Message types */ 11 | enum MessageType { 12 | /*! \brief register user to frontend */ 13 | kUserRegister = 1, 14 | /*! \brief request from user to fronend */ 15 | kUserRequest = 2, 16 | /*! \brief reply from frontend to user */ 17 | kUserReply = 3, 18 | 19 | // Internal message type 20 | /*! \brief request from frontend to backend */ 21 | kBackendRequest = 100, 22 | /*! \brief reply from backend to frontend */ 23 | kBackendReply = 101, 24 | /*! \brief relay request from backend to backup */ 25 | kBackendRelay = 102, 26 | /*! \brief relay reply from backup */ 27 | kBackendRelayReply = 103, 28 | }; 29 | 30 | /*! \brief Message header format */ 31 | struct MessageHeader { 32 | /*! \brief magic number field */ 33 | uint32_t magic_number; 34 | /*! \brief message type */ 35 | uint32_t msg_type; 36 | /*! \brief length of payload */ 37 | uint32_t body_length; 38 | }; 39 | 40 | /*! \brief Magic number for Nexus service */ 41 | #define NEXUS_SERVICE_MAGIC_NUMBER 0xDEADBEEF 42 | /*! \brief Header length in bytes */ 43 | #define MESSAGE_HEADER_SIZE sizeof(MessageHeader) 44 | 45 | bool DecodeHeader(const char* buffer, MessageHeader* header); 46 | 47 | /*! 48 | * \brief Message is used to hold the packets that are communicated between 49 | * client and frontend server, and between frontend server and backend server. 50 | */ 51 | class Message { 52 | public: 53 | /*! 54 | * \brief Construct a nessage. 55 | * 56 | * It allocates the data buffer with maximal size. This constructor is mainly 57 | * used to hold an inbound packet when the message size is unknown. 58 | */ 59 | //Message(); 60 | Message(const MessageHeader& header); 61 | /*! 62 | * \brief Construct a nessage with explicit body length. 63 | * 64 | * It allocates the data buffer with body length plus header size. This 65 | * constructor is mainly used to hold an outbound packet when the message 66 | * size is known 67 | * 68 | * \param body_length Length of payload in bytes 69 | */ 70 | Message(MessageType type, size_t body_length); 71 | /*! \brief Destruct a message. */ 72 | ~Message(); 73 | /*! \brief Get the data pointer */ 74 | char* data() { return data_; } 75 | /*! \brief Get the read-only data pointer */ 76 | const char* data() const { return data_; } 77 | /*! \brief Get the body pointer */ 78 | char* body() { return data_ + MESSAGE_HEADER_SIZE; } 79 | /*! \brief Get the read-only body pointer */ 80 | const char* body() const { return data_ + MESSAGE_HEADER_SIZE; } 81 | /*! \brief Get the length of entire message in bytes */ 82 | size_t length() const { return MESSAGE_HEADER_SIZE + body_length_; } 83 | /*! \brief Get the length of body in bytes */ 84 | size_t body_length() const { return body_length_; } 85 | /*! \brief Get the type of message */ 86 | MessageType type() const { return type_; } 87 | /*! 88 | * \brief Set the message type 89 | * \param type Message type 90 | */ 91 | void set_type(MessageType type); 92 | /*! 93 | * \brief Decode the message from the body 94 | * \param message Protobuf message for the decoding result 95 | */ 96 | void DecodeBody(google::protobuf::Message* message) const; 97 | /*! 98 | * \brief Encode the protobuf message and store in the body 99 | * \param message Protobuf message to encode 100 | */ 101 | void EncodeBody(const google::protobuf::Message& message); 102 | 103 | private: 104 | /*! \brief Data buffer */ 105 | char* data_; 106 | /*! \brief Message type */ 107 | MessageType type_; 108 | /*! \brief Length of message body in bytes */ 109 | size_t body_length_; 110 | }; 111 | 112 | } // namespace nexus 113 | 114 | #endif // NEXUS_COMMON_MESSAGE_H_ 115 | -------------------------------------------------------------------------------- /src/nexus/backend/model_exec.h: -------------------------------------------------------------------------------- 1 | #ifndef NEXUS_BACKEND_MODEL_EXEC_H_ 2 | #define NEXUS_BACKEND_MODEL_EXEC_H_ 3 | 4 | #include 5 | #include 6 | #include 7 | 8 | #include "nexus/backend/model_ins.h" 9 | #include "nexus/common/block_queue.h" 10 | #include "nexus/common/metric.h" 11 | #include "nexus/common/model_db.h" 12 | 13 | namespace nexus { 14 | namespace backend { 15 | 16 | class ModelExecutor { 17 | public: 18 | ModelExecutor(int gpu_id, const ModelInstanceConfig& config, 19 | BlockPriorityQueue& task_queue); 20 | 21 | ~ModelExecutor(); 22 | 23 | ModelInstance* model() { return model_.get(); } 24 | 25 | const ModelInstance* model() const { return model_.get(); } 26 | /*! \brief Return whether this model is a backup model. */ 27 | bool backup() const { return backup_; } 28 | 29 | const ModelProfile* profile() const { return profile_; } 30 | 31 | void SetBatch(uint32_t batch) { model_->set_batch(batch); } 32 | 33 | double GetRequestRate(); 34 | 35 | double GetDropRate(); 36 | 37 | bool IsSharePrefixModel() const; 38 | bool IsTFShareModel() const; 39 | 40 | bool HasBackup(); 41 | 42 | std::vector BackupBackends(); 43 | 44 | void UpdateBackupBackends(const ModelInstanceConfig& config); 45 | 46 | bool Preprocess(std::shared_ptr task, bool force=false); 47 | 48 | bool AddPreprocessedTask(std::shared_ptr task, bool force=false); 49 | 50 | void Postprocess(std::shared_ptr task); 51 | 52 | uint64_t Execute(uint32_t batch = 0); 53 | 54 | TimePoint LastExecuteFinishTime(); 55 | 56 | int NumberOfOpenRequests() const; 57 | 58 | uint64_t GetPeakMemoryUsage(); 59 | 60 | private: 61 | std::pair, int> GetBatchTaskSlidingWindow(uint32_t batch_size); 62 | std::pair, int> GetBatchTaskEarliest(uint32_t batch_size); 63 | 64 | bool IncreaseOpenRequests(int cnt, bool limit_max_batch); 65 | 66 | void DecreaseOpenRequests(int cnt); 67 | /*! 68 | * \brief Get batch task from the task queue. 69 | * \param batch_size Expected batch size in the batch task. 70 | * \return Batch task and the number of inputs dequeued from input queue. 71 | */ 72 | std::pair, int> GetBatchTask(uint32_t batch_size); 73 | 74 | void RemoveTask(std::shared_ptr task); 75 | 76 | std::unique_ptr model_; 77 | bool backup_; 78 | const ModelProfile* profile_; 79 | BlockPriorityQueue& task_queue_; 80 | /*! 81 | * \brief Map from task id to current processing tasks. 82 | * Guarded by task_mu_. 83 | */ 84 | std::unordered_map > processing_tasks_; 85 | /*! \brief Priority queue of inputs based on deadline. Guarded by task_mu_. */ 86 | std::priority_queue, 87 | std::vector >, 88 | CompareDeadlineItem> input_queue_; 89 | /*! \brief Input array allocated in GPU memory to hold batch inputs. */ 90 | std::shared_ptr input_array_; 91 | /*! \brief Batch index. */ 92 | std::atomic batch_id_; 93 | /*! \brief Number of open requests. */ 94 | std::atomic open_requests_; 95 | /*! \brief Interval counter to count number of requests within each interval. 96 | */ 97 | std::shared_ptr req_counter_; 98 | std::shared_ptr drop_counter_; 99 | 100 | EWMA req_rate_; 101 | EWMA drop_rate_; 102 | 103 | std::vector backup_backends_; 104 | /*! 105 | * \brief Last time point that finishes the batch execution. 106 | * Guarded by time_mu_. 107 | */ 108 | TimePoint last_exec_finish_; 109 | /*! \brief Mutex to proect processing_tasks_ and input_queue_. */ 110 | std::mutex task_mu_; 111 | /*! \brief Mutex to proect last_exec_finish_. */ 112 | std::mutex time_mu_; 113 | 114 | std::mutex backup_mu_; 115 | }; 116 | 117 | using ModelExecutorPtr = std::shared_ptr; 118 | 119 | } // namespace backend 120 | } // namespace nexus 121 | 122 | #endif // NEXUS_BACKEND_MODEL_EXEC_H_ 123 | -------------------------------------------------------------------------------- /src/nexus/backend/model_ins.cpp: -------------------------------------------------------------------------------- 1 | #include "nexus/backend/caffe_densecap_model.h" 2 | #include "nexus/backend/caffe_model.h" 3 | #include "nexus/backend/caffe2_model.h" 4 | #include "nexus/backend/darknet_model.h" 5 | #include "nexus/backend/model_ins.h" 6 | #include "nexus/backend/share_prefix_model.h" 7 | #include "nexus/backend/tensorflow_model.h" 8 | #include "nexus/backend/tf_share_model.h" 9 | 10 | #include 11 | 12 | namespace nexus { 13 | namespace backend { 14 | 15 | void CreateModelInstance(int gpu_id, const ModelInstanceConfig& config, 16 | std::unique_ptr* model) { 17 | auto beg = Clock::now(); 18 | std::string framework = config.model_session(0).framework(); 19 | #ifdef USE_TENSORFLOW 20 | if (framework == "tf_share") { 21 | model->reset(new TFShareModel(gpu_id, config)); 22 | } else 23 | #endif 24 | if (config.model_session_size() > 1) { 25 | LOG(INFO) << "Create prefix model"; 26 | model->reset(new SharePrefixModel(gpu_id, config)); 27 | } else { 28 | std::string model_name = config.model_session(0).model_name(); 29 | #ifdef USE_DARKNET 30 | if (framework == "darknet") { 31 | model->reset(new DarknetModel(gpu_id, config)); 32 | } else 33 | #endif 34 | #ifdef USE_CAFFE 35 | if (framework == "caffe") { 36 | if (model_name == "densecap") { 37 | model->reset(new CaffeDenseCapModel(gpu_id, config)); 38 | } else { 39 | model->reset(new CaffeModel(gpu_id, config)); 40 | } 41 | } else 42 | #endif 43 | #ifdef USE_CAFFE2 44 | if (framework == "caffe2") { 45 | model->reset(new Caffe2Model(gpu_id, config)); 46 | } else 47 | #endif 48 | #ifdef USE_TENSORFLOW 49 | if (framework == "tensorflow") { 50 | model->reset(new TensorflowModel(gpu_id, config)); 51 | } else 52 | #endif 53 | { 54 | LOG(FATAL) << "Unknown framework " << framework; 55 | } 56 | } 57 | 58 | auto end = Clock::now(); 59 | auto duration = std::chrono::duration_cast( 60 | end - beg); 61 | LOG(INFO) << "Loading model time: " << duration.count() << "ms"; 62 | } 63 | 64 | ModelInstance::ModelInstance(int gpu_id, const ModelInstanceConfig &config) : 65 | gpu_id_(gpu_id), 66 | model_session_(config.model_session(0)), 67 | batch_(config.batch()), 68 | max_batch_(config.max_batch()) { 69 | CHECK_GT(batch_, 0) << "batch must be greater than 0"; 70 | CHECK_GE(max_batch_, batch_) << "max_batch must be greater than batch"; 71 | std::string model_id = ModelSessionToModelID(model_session_); 72 | auto info = ModelDatabase::Singleton().GetModelInfo(model_id); 73 | CHECK(info != nullptr) << "Model not found in the database"; 74 | model_info_ = *info; 75 | model_session_id_ = ModelSessionToString(model_session_); 76 | cpu_device_ = DeviceManager::Singleton().GetCPUDevice(); 77 | #ifdef USE_GPU 78 | gpu_device_ = DeviceManager::Singleton().GetGPUDevice(gpu_id); 79 | #endif 80 | LOG(INFO) << "Construct model " << model_session_id_ << ", batch " << 81 | batch_ << ", max batch " << max_batch_; 82 | } 83 | 84 | ModelInstance::~ModelInstance() { 85 | LOG(INFO) << "Deconstruct model " << model_session_id_; 86 | } 87 | void ModelInstance::set_batch(size_t batch) { 88 | CHECK_LE(batch, max_batch_) << "Batch size must be less than max_batch"; 89 | batch_.store(batch); 90 | } 91 | ArrayPtr ModelInstance::CreateInputGpuArrayWithRawPointer(float *ptr, size_t nfloats) { 92 | LOG(ERROR) << "Don't support create input gpu array with raw pointer"; 93 | return nullptr; 94 | } 95 | void ModelInstance::RemoveInputGpuArray(ArrayPtr arr) { 96 | LOG(WARNING) << "Don't support remove input gpu array"; 97 | } 98 | void ModelInstance::ForwardAsync(std::shared_ptr batch_task) { 99 | LOG(WARNING) << "Don't support async forward"; 100 | Forward(batch_task); 101 | } 102 | void ModelInstance::WaitOutput(std::shared_ptr batch_task) { 103 | LOG(WARNING) << "Don't support async forward"; 104 | } 105 | uint64_t ModelInstance::GetPeakBytesInUse() { 106 | LOG(FATAL) << "GetPeakBytesInUse not implemented"; 107 | } 108 | } // namespace backend 109 | } // namespace nexus 110 | -------------------------------------------------------------------------------- /src/nexus/common/util.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | #include 6 | 7 | #ifdef USE_GPU 8 | #include 9 | #endif 10 | 11 | #include "nexus/common/util.h" 12 | 13 | namespace nexus { 14 | 15 | void SplitString(const std::string& str, char delim, 16 | std::vector* tokens) { 17 | std::stringstream ss; 18 | ss.str(str); 19 | std::string token; 20 | tokens->clear(); 21 | while (std::getline(ss, token, delim)) { 22 | tokens->push_back(token); 23 | } 24 | } 25 | 26 | void Memcpy(void* dst, const Device* dst_device, const void* src, 27 | const Device* src_device, size_t nbytes) { 28 | if (dst == src && dst_device == src_device) { 29 | return; 30 | } 31 | DeviceType dst_type = dst_device->type(); 32 | DeviceType src_type = src_device->type(); 33 | #ifdef USE_GPU 34 | if (dst_type == kCPU) { 35 | if (src_type == kCPU) { 36 | memcpy(dst, src, nbytes); 37 | } else { // src_type == kGPU 38 | NEXUS_CUDA_CHECK(cudaMemcpy(dst, src, nbytes, cudaMemcpyDeviceToHost)); 39 | } 40 | } else { // dst_type == kGPU 41 | if (src_type == kCPU) { 42 | NEXUS_CUDA_CHECK(cudaMemcpy(dst, src, nbytes, cudaMemcpyHostToDevice)); 43 | } else { // src_type == kGPU 44 | NEXUS_CUDA_CHECK(cudaMemcpy(dst, src, nbytes, cudaMemcpyDeviceToDevice)); 45 | } 46 | } 47 | #else 48 | CHECK_EQ(dst_type, kCPU); 49 | CHECK_EQ(src_type, kCPU); 50 | memcpy(dst, src, nbytes); 51 | #endif 52 | } 53 | 54 | namespace { 55 | /*! \brief the list of all IPv4 addresses */ 56 | std::vector Ipv4Interfaces; 57 | } // namespace 58 | 59 | void ListIpv4Address() { 60 | if (Ipv4Interfaces.size() > 0) { 61 | Ipv4Interfaces.clear(); 62 | } 63 | struct ifaddrs* ifAddrStruct = nullptr; 64 | struct ifaddrs* ifa = nullptr; 65 | // get network interface addresses 66 | getifaddrs(&ifAddrStruct); 67 | // iterate over all addresses 68 | for (ifa = ifAddrStruct; ifa != nullptr; ifa = ifa->ifa_next) { 69 | if (!ifa->ifa_addr) { 70 | continue; 71 | } 72 | if (ifa->ifa_addr->sa_family == AF_INET) { 73 | // IPv4 Address 74 | in_addr* addr = &((sockaddr_in*) ifa->ifa_addr)->sin_addr; 75 | Ipv4Interfaces.push_back(*addr); 76 | } else if (ifa->ifa_addr->sa_family == AF_INET6) { 77 | continue; 78 | // IPv6 Address 79 | /*in6_addr* addr = &((sockaddr_in6*) ifa->ifa_addr)->sin6_addr; 80 | char ipv6[INET6_ADDRSTRLEN]; 81 | inet_ntop(AF_INET6, addr, ipv6, INET6_ADDRSTRLEN); 82 | //printf("%s IP Address %s\n", ifa->ifa_name, ipv6); 83 | ret = std::string(ipv6);*/ 84 | } 85 | } 86 | if (ifAddrStruct != nullptr) { 87 | freeifaddrs(ifAddrStruct); 88 | } 89 | } 90 | 91 | void ConvertPrefix(const std::string& prefix, uint32_t* addr, uint32_t* mask) { 92 | char *pref = new char[prefix.length() + 1]; 93 | strcpy(pref, prefix.c_str()); 94 | char *pch = strchr(pref, '/'); 95 | if (pch == nullptr) { 96 | *mask = 0xffffffff; 97 | } else { 98 | *pch = 0; 99 | ++pch; 100 | int prefix_len = atoi(pch); 101 | if (prefix_len > 32 || prefix_len < 0) { 102 | LOG(FATAL) << "Wrong prefix length: " << prefix_len; 103 | } 104 | *mask = ~(uint32_t)((1 << (32 - prefix_len)) - 1); 105 | } 106 | uint32_t prefix_addr = 0; 107 | pch = strtok(pref, "."); 108 | while (pch != nullptr) { 109 | prefix_addr = (prefix_addr << 8) | (uint8_t) atoi(pch); 110 | pch = strtok(NULL, "."); 111 | } 112 | *addr = prefix_addr & *mask; 113 | delete[] pref; 114 | } 115 | 116 | std::string GetIpAddress(const std::string& prefix) { 117 | if (Ipv4Interfaces.empty()) { 118 | ListIpv4Address(); 119 | } 120 | uint32_t prefix_addr; 121 | uint32_t prefix_mask; 122 | ConvertPrefix(prefix, &prefix_addr, &prefix_mask); 123 | for (size_t i = 0; i < Ipv4Interfaces.size(); ++i) { 124 | const in_addr* addr = &Ipv4Interfaces[i]; 125 | if ((ntohl(addr->s_addr) & prefix_mask) == prefix_addr) { 126 | char addr_str[INET_ADDRSTRLEN]; 127 | inet_ntop(AF_INET, addr, addr_str, INET_ADDRSTRLEN); 128 | return std::string(addr_str); 129 | } 130 | } 131 | return ""; 132 | } 133 | 134 | } // namespace nexus 135 | -------------------------------------------------------------------------------- /src/nexus/common/data_type.h: -------------------------------------------------------------------------------- 1 | #ifndef NEXUS_COMMON_DATA_TYPE_H_ 2 | #define NEXUS_COMMON_DATA_TYPE_H_ 3 | 4 | #include 5 | #include 6 | #include 7 | 8 | #include "nexus/common/buffer.h" 9 | #include "nexus/proto/nnquery.pb.h" 10 | 11 | namespace nexus { 12 | 13 | template struct TypeMap; 14 | 15 | template<> struct TypeMap { 16 | using type = bool; 17 | static constexpr size_t size = 1; 18 | }; 19 | 20 | template<> struct TypeMap { 21 | using type = int8_t; 22 | static constexpr size_t size = 1; 23 | }; 24 | 25 | template<> struct TypeMap { 26 | using type = uint8_t; 27 | static constexpr size_t size = 1; 28 | }; 29 | 30 | template<> struct TypeMap { 31 | using type = int32_t; 32 | static constexpr size_t size = 4; 33 | }; 34 | 35 | template<> struct TypeMap { 36 | using type = uint32_t; 37 | static constexpr size_t size = 4; 38 | }; 39 | 40 | template<> struct TypeMap { 41 | using type = float; 42 | static constexpr size_t size = 4; 43 | }; 44 | 45 | template<> struct TypeMap { 46 | using type = double; 47 | static constexpr size_t size = 8; 48 | }; 49 | 50 | inline size_t type_size(DataType type) { 51 | switch (type) { 52 | case DT_INT8: 53 | return TypeMap::size; 54 | case DT_UINT8: 55 | return TypeMap::size; 56 | case DT_INT32: 57 | return TypeMap::size; 58 | case DT_UINT32: 59 | return TypeMap::size; 60 | case DT_FLOAT: 61 | return TypeMap::size; 62 | case DT_DOUBLE: 63 | return TypeMap::size; 64 | case DT_BOOL: 65 | return TypeMap::size; 66 | default: 67 | return 0; 68 | }; 69 | } 70 | 71 | class Array { 72 | public: 73 | Array(); 74 | 75 | Array(DataType type, size_t num_elements, Device* device); 76 | 77 | Array(DataType type, size_t num_elements, std::shared_ptr buf); 78 | 79 | DataType data_type() const { return data_type_; } 80 | 81 | size_t num_elements() const { return num_elements_; } 82 | 83 | Device* device() const { return buffer_->device(); } 84 | 85 | DeviceType device_type() const { return buffer_->device()->type(); } 86 | 87 | void set_tag(int tag) { tag_ = tag; } 88 | 89 | int tag() const { return tag_; } 90 | 91 | template 92 | T* Data() { return (T*) buffer_->data(); } 93 | 94 | template 95 | const T* Data() const { return (const T*) buffer_->data(); } 96 | 97 | std::shared_ptr Slice(size_t offset, size_t num_elements); 98 | 99 | std::shared_ptr buffer() const { return buffer_; } 100 | 101 | private: 102 | DataType data_type_; 103 | size_t num_elements_; 104 | std::shared_ptr buffer_; 105 | int tag_; 106 | }; 107 | 108 | using ArrayPtr = std::shared_ptr; 109 | 110 | class Shape { 111 | public: 112 | Shape(); 113 | 114 | Shape(const std::vector& dims); 115 | 116 | Shape(std::initializer_list list); 117 | 118 | Shape(const Shape& other); 119 | 120 | int dim(int axis) const; 121 | 122 | const std::vector& dims() const; 123 | 124 | void set_dims(const std::vector& dims); 125 | 126 | void set_dims(const std::vector& dims); 127 | 128 | void set_dims(std::initializer_list list); 129 | 130 | size_t ndims() const; 131 | 132 | size_t NumElements(int axis = 0) const; 133 | 134 | friend std::ostream& operator<<(std::ostream& out, const Shape& shape); 135 | 136 | private: 137 | std::vector dims_; 138 | }; 139 | 140 | class Value { 141 | public: 142 | Value(const ValueProto& value); 143 | 144 | template 145 | const T& as() const; 146 | 147 | void ToProto(ValueProto* proto) const; 148 | 149 | private: 150 | DataType data_type_; 151 | bool b_; 152 | int i_; 153 | float f_; 154 | double d_; 155 | std::string s_; 156 | TensorProto tensor_; 157 | ImageProto image_; 158 | RectProto rect_; 159 | }; 160 | 161 | class Record { 162 | public: 163 | Record(const RecordProto& record); 164 | 165 | void ToProto(RecordProto* proto) const; 166 | 167 | const Value& operator[](const std::string&& key) const; 168 | 169 | private: 170 | std::unordered_map values_; 171 | }; 172 | 173 | } // namespace nexus 174 | 175 | #endif // NEXUS_COMMON_DATA_TYPE_H_ 176 | -------------------------------------------------------------------------------- /examples/traffic_complex/src/traffic_complex.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | #include "nexus/app/app_base.h" 4 | 5 | using namespace nexus; 6 | using namespace nexus::app; 7 | 8 | class TrafficApp : public AppBase { 9 | public: 10 | TrafficApp(std::string port, std::string rpc_port, std::string sch_addr, 11 | size_t nthreads, int latency_slo, int ssd_latency_ms) : 12 | AppBase(port, rpc_port, sch_addr, nthreads), 13 | latency_slo_(latency_slo), 14 | ssd_latency_ms_(ssd_latency_ms), 15 | rec_latency_ms_(latency_slo - ssd_latency_ms) {} 16 | 17 | void Setup() final { 18 | ssd_model_ = GetModelHandler("tensorflow", "ssd_mobilenet", 1, 19 | ssd_latency_ms_, 0, {}, LB_DeficitRR); 20 | car_model_ = GetModelHandler("caffe2", "googlenet_cars", 1, rec_latency_ms_); 21 | face_model_ = GetModelHandler("caffe2", "vgg_face_0", 1, rec_latency_ms_); 22 | auto func1 = [&](std::shared_ptr ctx) { 23 | auto ssd_output = ssd_model_->Execute(ctx, ctx->const_request().input()); 24 | return std::vector{ 25 | std::make_shared("ssd_output", ssd_output)}; 26 | }; 27 | auto func2 = [&](std::shared_ptr ctx) { 28 | auto ssd_output = ctx->GetVariable("ssd_output")->result(); 29 | std::vector > results; 30 | std::vector car_boxes; 31 | std::vector face_boxes; 32 | for (int i = 0; i < ssd_output->num_records(); ++i) { 33 | auto& rec = (*ssd_output)[i]; 34 | auto name = rec["class_name"].as(); 35 | if (name == "car" || name == "truck") { 36 | car_boxes.push_back(rec["rect"].as()); 37 | } else if (name == "person") { 38 | face_boxes.push_back(rec["rect"].as()); 39 | } 40 | } 41 | if (!car_boxes.empty()) { 42 | results.push_back( 43 | car_model_->Execute(ctx, ctx->const_request().input(), {}, 1, 44 | car_boxes)); 45 | } 46 | if (!face_boxes.empty()) { 47 | results.push_back( 48 | face_model_->Execute(ctx, ctx->const_request().input(), {}, 1, 49 | face_boxes)); 50 | } 51 | return std::vector{ 52 | std::make_shared("rec_output", results)}; 53 | }; 54 | auto func3 = [&](std::shared_ptr ctx) { 55 | auto rec_output = ctx->GetVariable("rec_output"); 56 | if (rec_output->count() > 0) { 57 | rec_output->result()->ToProto(ctx->reply()); 58 | } 59 | return std::vector{}; 60 | }; 61 | ExecBlock* b1 = new ExecBlock(0, func1, {}); 62 | ExecBlock* b2 = new ExecBlock(1, func2, {"ssd_output"}); 63 | ExecBlock* b3 = new ExecBlock(2, func3, {"rec_output"}); 64 | qp_ = new QueryProcessor({b1, b2, b3}); 65 | } 66 | 67 | private: 68 | RectProto GetRect(int left, int right, int top, int bottom) { 69 | RectProto rect; 70 | rect.set_left(left); 71 | rect.set_right(right); 72 | rect.set_top(top); 73 | rect.set_bottom(bottom); 74 | return rect; 75 | } 76 | 77 | int latency_slo_; 78 | int ssd_latency_ms_; 79 | int rec_latency_ms_; 80 | std::shared_ptr ssd_model_; 81 | std::shared_ptr car_model_; 82 | std::shared_ptr face_model_; 83 | }; 84 | 85 | DEFINE_string(port, "9001", "Server port"); 86 | DEFINE_string(rpc_port, "9002", "RPC port"); 87 | DEFINE_string(sch_addr, "127.0.0.1", "Scheduler address"); 88 | DEFINE_int32(nthread, 4, "Number of threads processing requests"); 89 | DEFINE_int32(latency, 0, "Latency SLO for query in ms"); 90 | DEFINE_int32(ssd_latency, 0, "Latency SLO for SSD model in ms"); 91 | 92 | int main(int argc, char** argv) { 93 | // log to stderr 94 | FLAGS_logtostderr = 1; 95 | // Init glog 96 | google::InitGoogleLogging(argv[0]); 97 | // Parse command line flags 98 | google::ParseCommandLineFlags(&argc, &argv, true); 99 | // Setup backtrace on segfault 100 | google::InstallFailureSignalHandler(); 101 | 102 | LOG(INFO) << "App port " << FLAGS_port << ", rpc port " << FLAGS_rpc_port; 103 | // Create the frontend server 104 | TrafficApp app(FLAGS_port, FLAGS_rpc_port, FLAGS_sch_addr, FLAGS_nthread, 105 | FLAGS_latency, FLAGS_ssd_latency); 106 | LaunchApp(&app); 107 | 108 | return 0; 109 | } 110 | -------------------------------------------------------------------------------- /src/nexus/backend/batch_task.h: -------------------------------------------------------------------------------- 1 | #ifndef NEXUS_BACKEND_BATCH_TASK_H_ 2 | #define NEXUS_BACKEND_BATCH_TASK_H_ 3 | 4 | #include 5 | 6 | #include "nexus/backend/slice.h" 7 | #include "nexus/backend/task.h" 8 | 9 | namespace nexus { 10 | namespace backend { 11 | 12 | /*! 13 | * \brief BatchTask holds a batch of inputs and outputs, and is used for 14 | * batch forwarding through a DNN model. 15 | */ 16 | class BatchTask { 17 | public: 18 | /*! 19 | * \brief Construct a batch task. 20 | * \param max_batch Max batch size. 21 | */ 22 | BatchTask(uint32_t max_batch); 23 | /*! 24 | * \brief Set batch id 25 | * \param batch_id Batch id 26 | */ 27 | inline void set_batch_id(uint64_t batch_id) { batch_id_ = batch_id; } 28 | /*! \brief Return batch id */ 29 | inline uint64_t batch_id() const { return batch_id_; } 30 | /*! \brief Return batch size */ 31 | inline uint32_t batch_size() const { return inputs_.size(); } 32 | /*! \brief Return max batch size */ 33 | inline uint32_t max_batch() const { return max_batch_; } 34 | /*! 35 | * \brief Set input array for holding the batch input data. 36 | * \param arr Array pointer. 37 | */ 38 | void SetInputArray(ArrayPtr arr); 39 | /*! 40 | * \brief Create input arrays to hold the batch input data. 41 | * \param data_type Data type of input. 42 | * \param num_elements_per_input Number of elements in a single input. 43 | * \param device Device for allocation of input array. 44 | */ 45 | void CreateInputArray(DataType data_type, size_t num_elements_per_input, 46 | Device* device); 47 | /*! 48 | * \brief Set output arrays for holding the batch output results. 49 | * \param arrays Map from name to arrays. 50 | */ 51 | void SetOutputArrays(const std::unordered_map& arrays); 52 | /*! 53 | * \brief Create output arrays to hold the batch output results. 54 | * \param sizes Map from name to output sizes in float for a single batch. 55 | * \param device Device for allocation of output arrays. 56 | */ 57 | void CreateOutputArrays(const std::unordered_map& sizes, 58 | Device* device); 59 | /*! \brief Return input batch array */ 60 | inline ArrayPtr GetInputArray() const { return input_array_; } 61 | /*! 62 | * \brief Get the output batch array given name. 63 | * \param name Name of array. 64 | * \return Array corresponding to the name 65 | */ 66 | ArrayPtr GetOutputArray(const std::string& name) const; 67 | /*! 68 | * \brief Append a new input into the batch input. 69 | * \param input A single input. 70 | */ 71 | void AppendInput(std::shared_ptr input, std::shared_ptr task); 72 | /*! 73 | * \brief Slice the batch output into individual outputs. 74 | * \param slices Slices for all arrays. 75 | */ 76 | void SliceOutputBatch(const std::unordered_map& slices); 77 | /*! \brief Get all individual inputs in the batch. */ 78 | inline const std::vector >& inputs() const { 79 | return inputs_; 80 | } 81 | /*! \brief Get all individual outputs in the batch. */ 82 | inline const std::vector >& outputs() const { 83 | return outputs_; 84 | } 85 | /*! \brief Set individual outputs. */ 86 | void set_outputs(const std::vector >& outputs); 87 | /*! \brief Get all tasks in the batch. */ 88 | inline const std::vector >& tasks() const { 89 | return tasks_; 90 | } 91 | 92 | private: 93 | /*! \brief Batch ID. */ 94 | uint64_t batch_id_; 95 | /*! \brief Max batch size. */ 96 | uint32_t max_batch_; 97 | /*! \brief Array that holds batch input data. */ 98 | ArrayPtr input_array_; 99 | /*! \brief Write pointer to input_array_. */ 100 | char* input_write_pt_; 101 | /*! \brief Number of elements added in the input_array_. */ 102 | size_t input_elements_; 103 | /*! \brief Map from name to array. */ 104 | std::unordered_map output_arrays_; 105 | /*! \brief Tasks in the batch */ 106 | std::vector > tasks_; 107 | /*! \brief Individual inputs in the batch */ 108 | std::vector > inputs_; 109 | /*! \brief Individual outputs in the batch */ 110 | std::vector > outputs_; 111 | }; 112 | 113 | } // namespace backend 114 | } // namespace nexus 115 | 116 | #endif // NEXUS_BACKEND_BATCH_TASK_H_ 117 | -------------------------------------------------------------------------------- /src/nexus/app/model_handler.h: -------------------------------------------------------------------------------- 1 | #ifndef NEXUS_COMMON_MODEL_HANDLER_H_ 2 | #define NEXUS_COMMON_MODEL_HANDLER_H_ 3 | 4 | #include 5 | #include 6 | #include 7 | #include 8 | #include 9 | #include 10 | #include 11 | 12 | #include "nexus/common/backend_pool.h" 13 | #include "nexus/common/data_type.h" 14 | #include "nexus/common/metric.h" 15 | #include "nexus/proto/nnquery.pb.h" 16 | 17 | namespace nexus { 18 | namespace app { 19 | 20 | /*! 21 | * \brief QueryResult provides a mechanism to access the result of 22 | * ansynchronous model execution. 23 | */ 24 | class QueryResult { 25 | public: 26 | /*! 27 | * \brief Constructor of OutputFuture 28 | * \param timeout_ms Timeout for output future in millisecond 29 | */ 30 | QueryResult(uint64_t qid); 31 | 32 | bool ready() const { return ready_; } 33 | 34 | uint64_t query_id() const { return qid_; } 35 | /*! \brief Gets the status of output result */ 36 | uint32_t status() const; 37 | /*! \brief Gets the error message if any error happens in the execution */ 38 | std::string error_message() const; 39 | /*! 40 | * \brief Output the result to reply protobuf 41 | * \param reply ReplyProto to be filled 42 | */ 43 | void ToProto(ReplyProto* reply) const; 44 | /*! 45 | * \brief Get the record given then index 46 | * \param idx Index of record 47 | * \return Record at idx 48 | */ 49 | const Record& operator[](uint32_t idx) const; 50 | /*! \brief Get number of records in the output */ 51 | uint32_t num_records() const; 52 | 53 | void SetResult(const QueryResultProto& result); 54 | 55 | private: 56 | void CheckReady() const; 57 | 58 | void SetError(uint32_t error, const std::string& error_msg); 59 | 60 | private: 61 | uint64_t qid_; 62 | std::atomic ready_; 63 | uint32_t status_; 64 | std::string error_message_; 65 | std::vector records_; 66 | }; 67 | 68 | class RequestContext; 69 | 70 | enum LoadBalancePolicy { 71 | // Weighted round robin 72 | LB_WeightedRR = 1, 73 | // Query 2 backends and pick one with lowest utilization 74 | LB_Query = 2, 75 | // Deficit round robin 76 | LB_DeficitRR = 3, 77 | }; 78 | 79 | class ModelHandler { 80 | public: 81 | ModelHandler(const std::string& model_session_id, BackendPool& pool, 82 | LoadBalancePolicy lb_policy); 83 | 84 | ~ModelHandler(); 85 | 86 | ModelSession model_session() const { return model_session_; } 87 | 88 | std::string model_session_id() const { return model_session_id_; } 89 | 90 | std::shared_ptr counter() const { return counter_; } 91 | 92 | std::shared_ptr Execute( 93 | std::shared_ptr ctx, const ValueProto& input, 94 | std::vector output_fields={}, uint32_t topk=1, 95 | std::vector windows={}); 96 | 97 | void HandleReply(const QueryResultProto& result); 98 | 99 | void UpdateRoute(const ModelRouteProto& route); 100 | 101 | std::vector BackendList(); 102 | 103 | private: 104 | std::shared_ptr GetBackend(); 105 | 106 | std::shared_ptr GetBackendWeightedRoundRobin(); 107 | 108 | std::shared_ptr GetBackendDeficitRoundRobin(); 109 | 110 | ModelSession model_session_; 111 | std::string model_session_id_; 112 | BackendPool& backend_pool_; 113 | LoadBalancePolicy lb_policy_; 114 | static std::atomic global_query_id_; 115 | 116 | std::vector backends_; 117 | /*! 118 | * \brief Mapping from backend id to its serving rate, 119 | * 120 | * Guarded by route_mu_ 121 | */ 122 | std::unordered_map backend_rates_; 123 | 124 | std::unordered_map backend_quanta_; 125 | double quantum_to_rate_ratio_ = 0; 126 | size_t current_drr_index_ = 0; 127 | float total_throughput_; 128 | /*! \brief Interval counter to count number of requests within each 129 | * interval. 130 | */ 131 | std::shared_ptr counter_; 132 | 133 | std::unordered_map > query_ctx_; 134 | std::mutex route_mu_; 135 | std::mutex query_ctx_mu_; 136 | /*! \brief random number generator */ 137 | std::random_device rd_; 138 | std::mt19937 rand_gen_; 139 | 140 | std::atomic running_; 141 | }; 142 | 143 | } // namespace app 144 | } // namespace nexus 145 | 146 | #endif // NEXUS_COMMON_MODEL_HANDLER_H_ 147 | -------------------------------------------------------------------------------- /src/nexus/common/model_db.h: -------------------------------------------------------------------------------- 1 | #ifndef NEXUS_COMMON_MODEL_DB_H_ 2 | #define NEXUS_COMMON_MODEL_DB_H_ 3 | 4 | #include 5 | #include 6 | #include 7 | 8 | namespace nexus { 9 | 10 | struct ProfileEntry { 11 | // latency in us unit 12 | float latency_mean; 13 | float latency_std; 14 | size_t memory_usage; 15 | int repeat; 16 | }; 17 | 18 | class ModelProfile { 19 | public: 20 | ModelProfile() {} 21 | 22 | ModelProfile(const std::string& file_path); 23 | 24 | void MergeProfile(const ModelProfile& rhs); 25 | 26 | void LoadProfile(const std::string& file_path); 27 | 28 | std::string profile_id() const { return profile_id_; } 29 | 30 | std::string gpu_device_name() const { return gpu_device_name_; } 31 | 32 | std::string gpu_uuid() const { return gpu_uuid_; } 33 | 34 | float GetForwardLatency(uint32_t batch) const; 35 | 36 | float GetPreprocessLatency() const; 37 | 38 | float GetPostprocessLatency() const; 39 | 40 | size_t GetMemoryUsage(uint32_t batch) const; 41 | /*! 42 | * \brief Computes the maximum batch size to use within latency_sla 43 | * \param latency_sla Latency SLA in ms 44 | * \return max batch size 45 | */ 46 | uint32_t GetMaxBatch(float latency_sla_ms) const; 47 | /*! 48 | * \brief Computes the maximum throughput can be achieved within latency_sla 49 | * \param latency_sla Latency SLA in ms 50 | * \return pair of best batch size and max throughput 51 | */ 52 | std::pair GetMaxThroughput(float latency_sla_ms) const; 53 | 54 | private: 55 | std::string profile_id_; 56 | std::string gpu_device_name_; 57 | std::string gpu_uuid_; 58 | std::unordered_map forward_lats_; 59 | ProfileEntry preprocess_; 60 | ProfileEntry postprocess_; 61 | float network_latency_us_ = 2000; // us 62 | }; 63 | 64 | struct TFShareSuffixInfo { 65 | size_t suffix_index; 66 | std::string model_name; 67 | std::string output_layer; 68 | std::string type; 69 | std::string class_names; 70 | 71 | TFShareSuffixInfo(size_t suffix_index_, const YAML::Node &node); 72 | }; 73 | 74 | struct TFShareInfo { 75 | std::string model_file; 76 | std::string input_layer; 77 | std::string slice_beg_vector; 78 | std::string slice_len_vector; 79 | int image_height; 80 | int image_width; 81 | std::unordered_map suffix_models; 82 | 83 | std::string hack_internal_id; 84 | explicit TFShareInfo(const YAML::Node &node); 85 | }; 86 | 87 | class ModelDatabase { 88 | public: 89 | static ModelDatabase& Singleton(); 90 | 91 | const YAML::Node* GetModelInfo(const std::string& model_id) const; 92 | 93 | const YAML::Node* GetModelInfo(const std::string& framework, 94 | const std::string& model_name, 95 | uint32_t version) const; 96 | 97 | const ModelProfile* GetModelProfile(const std::string& gpu_device, 98 | const std::string& gpu_uuid, 99 | const std::string& profile_id) const; 100 | 101 | int GetSharePrefixLength(const std::string& model_id1, 102 | const std::string& model_id2) const; 103 | 104 | std::vector GetPrefixShareModels(const std::string& model_id) 105 | const; 106 | 107 | std::shared_ptr GetTFShareInfo(const std::string& model_name) const; 108 | 109 | private: 110 | ModelDatabase(const std::string& model_root); 111 | 112 | void LoadModelInfo(const std::string& db_file); 113 | 114 | void LoadModelProfiles(const std::string& profile_dir); 115 | 116 | private: 117 | using ProfileTable = std::unordered_map; 118 | using PrefixMap = std::unordered_map; 119 | 120 | /*! \brief Model database root directory */ 121 | std::string db_root_dir_; 122 | /*! \brief Model store directory */ 123 | std::string model_store_dir_; 124 | /*! \brief Map from model ID to model information */ 125 | std::unordered_map model_info_table_; 126 | /*! \brief Map from device name to profile table */ 127 | std::unordered_map device_profile_table_; 128 | 129 | std::unordered_map share_prefix_models_; 130 | /*! \brief Map from model name to TFShareInfo */ 131 | std::unordered_map> tf_share_models_; 132 | }; 133 | 134 | } // namespace nexus 135 | 136 | #endif // NEXUS_COMMON_MODEL_DB_H_ 137 | -------------------------------------------------------------------------------- /src/nexus/scheduler/backend_delegate.h: -------------------------------------------------------------------------------- 1 | #ifndef NEXUS_SCHEDULER_BACKEND_DELEGATE_H_ 2 | #define NEXUS_SCHEDULER_BACKEND_DELEGATE_H_ 3 | 4 | #include 5 | #include 6 | #include 7 | #include 8 | #include 9 | 10 | #include "nexus/common/metric.h" 11 | #include "nexus/common/model_db.h" 12 | #include "nexus/common/model_def.h" 13 | #include "nexus/proto/control.grpc.pb.h" 14 | #include "nexus/scheduler/sch_info.h" 15 | 16 | namespace nexus { 17 | namespace scheduler { 18 | 19 | class Scheduler; 20 | 21 | using InstanceInfoPtr = std::shared_ptr; 22 | 23 | class BackendDelegate { 24 | public: 25 | BackendDelegate(uint32_t node_id, const std::string& ip, 26 | const std::string& server_port, const std::string& rpc_port, 27 | const std::string& gpu_device, const std::string& gpu_uuid, size_t gpu_available_memory, 28 | int beacon_sec); 29 | 30 | uint32_t node_id() const { return node_id_; } 31 | 32 | std::string gpu_device() const { return gpu_device_; } 33 | 34 | size_t gpu_available_memory() const { return gpu_available_memory_; } 35 | 36 | int workload_id() const { return workload_id_; } 37 | 38 | void set_workload_id(int id) { workload_id_ = id; } 39 | 40 | bool overload() const { return overload_; } 41 | 42 | double Occupancy() const; 43 | 44 | void GetInfo(BackendInfo* info) const; 45 | 46 | std::time_t LastAliveTime() const; 47 | 48 | void Tick(); 49 | 50 | bool Assign(const BackendDelegate& other); 51 | 52 | bool PrepareLoadModel(const ModelSession& model_sess, double workload, 53 | InstanceInfo* inst_info, double* occupancy) const; 54 | 55 | void LoadModel(const InstanceInfo& inst_info); 56 | 57 | void LoadModel(const YAML::Node& model_info); 58 | 59 | void LoadPrefixModel(const ModelSession& model_session, 60 | const ModelSession& shared_session); 61 | 62 | void UnloadModel(const std::string& model_sess_id); 63 | 64 | void AddBackupForModel(const std::string& model_sess_id, 65 | const BackendInfo& info); 66 | 67 | void RemoveBackupForModel(const std::string& model_sess_id, 68 | uint32_t backend_id); 69 | /*! 70 | * \brief Update model throughput given model session id and throughput. 71 | * \param model_sess_id Model session ID. 72 | * \param throughput Expected throughput to be achieved. 73 | * \return Left over throughput if expected throughput is not achieved, 74 | * otherwise 0. 75 | */ 76 | double UpdateModelThroughput(const std::string& model_sess_id, 77 | double throughput); 78 | 79 | void SpillOutWorkload(std::vector >* spillout); 80 | 81 | CtrlStatus UpdateModelTableRpc(); 82 | 83 | std::vector GetModelSessions() const; 84 | 85 | std::vector GetBackupModelSessions() const; 86 | 87 | std::vector GetModels() const { return models_; } 88 | 89 | const InstanceInfo* GetInstanceInfo(const std::string& model_sess_id) const; 90 | 91 | double GetModelThroughput(const std::string& model_sess_id) const; 92 | 93 | double GetModelGPUShare(const std::string& model_sess_id) const; 94 | 95 | double GetModelWeight(const std::string& model_sess_id) const; 96 | 97 | bool IsAlive(); 98 | 99 | bool IsIdle() const; 100 | 101 | private: 102 | void ComputeBatchSize(InstanceInfo* inst_info, double workload) const; 103 | 104 | void UpdateCycle(); 105 | 106 | uint32_t node_id_; 107 | std::string ip_; 108 | std::string server_port_; 109 | std::string rpc_port_; 110 | std::string gpu_device_; 111 | std::string gpu_uuid_; 112 | size_t gpu_available_memory_; 113 | int beacon_sec_; 114 | long timeout_ms_; 115 | std::unique_ptr stub_; 116 | 117 | int workload_id_; 118 | 119 | std::vector models_; 120 | std::vector backup_models_; 121 | /*! 122 | * \brief Mapping from model session id to instance information. 123 | * It's possible that multiple model session ids mapping to same instance 124 | * info due to prefix batching. 125 | */ 126 | std::unordered_map session_model_map_; 127 | double exec_cycle_us_; 128 | double duty_cycle_us_; 129 | bool overload_; 130 | /*! \brief Indicates whether model table is dirty. */ 131 | bool dirty_model_table_; 132 | std::chrono::time_point last_time_; 133 | }; 134 | 135 | } // namespace scheduler 136 | } // namespace nexus 137 | 138 | #endif // NEXUS_SCHEDULER_BACKEND_DELEGATE_H_ 139 | -------------------------------------------------------------------------------- /src/nexus/backend/worker.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | 5 | #include "nexus/backend/backend_server.h" 6 | #include "nexus/backend/model_ins.h" 7 | #include "nexus/backend/worker.h" 8 | 9 | namespace nexus { 10 | namespace backend { 11 | 12 | Worker::Worker(int index, BackendServer* server, 13 | BlockPriorityQueue& task_queue) : 14 | index_(index), 15 | server_(server), 16 | task_queue_(task_queue), 17 | running_(false) {} 18 | 19 | void Worker::Start(int core) { 20 | running_ = true; 21 | thread_ = std::thread(&Worker::Run, this); 22 | if (core >= 0) { 23 | cpu_set_t cpuset; 24 | CPU_ZERO(&cpuset); 25 | CPU_SET(core, &cpuset); 26 | int rc = pthread_setaffinity_np(thread_.native_handle(), 27 | sizeof(cpu_set_t), &cpuset); 28 | if (rc != 0) { 29 | LOG(ERROR) << "Error calling pthread_setaffinity_np: " << rc << "\n"; 30 | } 31 | LOG(INFO) << "Worker " << index_ << " is pinned on CPU " << core; 32 | } 33 | } 34 | 35 | void Worker::Stop() { 36 | running_ = false; 37 | if (thread_.joinable()) { 38 | thread_.join(); 39 | } 40 | } 41 | 42 | void Worker::Run() { 43 | std::this_thread::sleep_for(std::chrono::milliseconds(20)); 44 | LOG(INFO) << "Worker " << index_ << " starts"; 45 | auto timeout = std::chrono::milliseconds(50); 46 | while (running_) { 47 | std::shared_ptr task = task_queue_.pop(timeout); 48 | if (task == nullptr) { 49 | continue; 50 | } 51 | Process(task); 52 | } 53 | LOG(INFO) << "Worker " << index_ << " stopped"; 54 | } 55 | 56 | void Worker::Process(std::shared_ptr task) { 57 | switch (task->stage) { 58 | case kPreprocess: { 59 | task->model = server_->GetModel(task->query.model_session_id()); 60 | if (task->model == nullptr) { 61 | std::stringstream ss; 62 | ss << "Model session is not loaded: " << task->query.model_session_id(); 63 | task->result.set_status(MODEL_SESSION_NOT_LOADED); 64 | SendReply(std::move(task)); 65 | break; 66 | } 67 | // Preprocess task 68 | if (!task->model->Preprocess(task)) { 69 | if (task->result.status() != CTRL_OK) { 70 | SendReply(std::move(task)); 71 | } else { 72 | // Relay to the request to backup servers 73 | std::vector backups = task->model->BackupBackends(); 74 | double min_util = 1.; 75 | std::shared_ptr best_backup = nullptr; 76 | for (auto backend_id : backups) { 77 | auto backup = server_->GetBackupClient(backend_id); 78 | double util = backup->GetUtilization(); 79 | if (util < min_util) { 80 | min_util = util; 81 | best_backup = backup; 82 | } 83 | } 84 | if (best_backup != nullptr) { 85 | // LOG(INFO) << "Relay request " << task->query.model_session_id() << 86 | // " to backup " << best_backup->node_id() << 87 | // " with utilization " << min_util; 88 | best_backup->Forward(std::move(task)); 89 | } else { 90 | LOG(INFO) << "All backup servers are full"; 91 | task->model->Preprocess(task, true); 92 | } 93 | } 94 | } 95 | break; 96 | } 97 | case kPostprocess: { 98 | if (task->result.status() != CTRL_OK) { 99 | SendReply(std::move(task)); 100 | } else { 101 | task->model->Postprocess(task); 102 | SendReply(std::move(task)); 103 | } 104 | break; 105 | } 106 | default: 107 | LOG(ERROR) << "Wrong task stage: " << task->stage; 108 | } 109 | } 110 | 111 | void Worker::SendReply(std::shared_ptr task) { 112 | task->timer.Record("end"); 113 | task->result.set_query_id(task->query.query_id()); 114 | task->result.set_model_session_id(task->query.model_session_id()); 115 | task->result.set_latency_us(task->timer.GetLatencyMicros("begin", "end")); 116 | task->result.set_queuing_us(task->timer.GetLatencyMicros("begin", "exec")); 117 | if (task->model != nullptr && task->model->backup()) { 118 | task->result.set_use_backup(true); 119 | } else { 120 | task->result.set_use_backup(false); 121 | } 122 | MessageType reply_type = kBackendReply; 123 | if (task->msg_type == kBackendRelay) { 124 | reply_type = kBackendRelayReply; 125 | } 126 | auto msg = std::make_shared(reply_type, 127 | task->result.ByteSizeLong()); 128 | msg->EncodeBody(task->result); 129 | task->connection->Write(std::move(msg)); 130 | } 131 | 132 | } // namespace backend 133 | } // namespace nexus 134 | -------------------------------------------------------------------------------- /src/nexus/app/frontend.h: -------------------------------------------------------------------------------- 1 | #ifndef NEXUS_APP_FRONTEND_H_ 2 | #define NEXUS_APP_FRONTEND_H_ 3 | 4 | #include 5 | #include 6 | #include 7 | #include 8 | #include 9 | #include 10 | 11 | #include "nexus/app/model_handler.h" 12 | #include "nexus/app/query_processor.h" 13 | #include "nexus/app/request_context.h" 14 | #include "nexus/app/rpc_service.h" 15 | #include "nexus/app/user_session.h" 16 | #include "nexus/app/worker.h" 17 | #include "nexus/common/backend_pool.h" 18 | #include "nexus/common/block_queue.h" 19 | #include "nexus/common/connection.h" 20 | #include "nexus/common/model_def.h" 21 | #include "nexus/common/server_base.h" 22 | #include "nexus/common/spinlock.h" 23 | #include "nexus/proto/control.grpc.pb.h" 24 | #include "nexus/proto/nnquery.pb.h" 25 | 26 | namespace nexus { 27 | namespace app { 28 | 29 | class Frontend : public ServerBase, public MessageHandler { 30 | public: 31 | Frontend(std::string port, std::string rpc_port, std::string sch_addr); 32 | 33 | virtual ~Frontend(); 34 | 35 | //virtual void Process(const RequestProto& request, ReplyProto* reply) = 0; 36 | 37 | uint32_t node_id() const { return node_id_; } 38 | 39 | std::string rpc_port() const { return rpc_service_.port(); } 40 | 41 | void Run(QueryProcessor* qp, size_t nthreads); 42 | 43 | void Stop(); 44 | /*! \brief Accepts new user connection */ 45 | void HandleAccept() final; 46 | /*! 47 | * \brief Handles new messages from user or backend connections 48 | * \param conn Shared pointer of Connection 49 | * \param message Received message 50 | */ 51 | void HandleMessage(std::shared_ptr conn, 52 | std::shared_ptr message) final; 53 | /*! 54 | * \brief Handles connection error 55 | * \param conn Shared pointer of Connection 56 | * \param ec Boost error code 57 | */ 58 | void HandleError(std::shared_ptr conn, 59 | boost::system::error_code ec) final; 60 | 61 | void UpdateModelRoutes(const ModelRouteUpdates& request, RpcReply* reply); 62 | 63 | std::shared_ptr GetUserSession(uint32_t uid); 64 | 65 | protected: 66 | std::shared_ptr LoadModel(const LoadModelRequest& req); 67 | 68 | std::shared_ptr LoadModel(const LoadModelRequest& req, 69 | LoadBalancePolicy lb_policy); 70 | 71 | void ComplexQuerySetup(const ComplexQuerySetupRequest& req); 72 | 73 | void ComplexQueryAddEdge(const ComplexQueryAddEdgeRequest& req); 74 | 75 | private: 76 | void Register(); 77 | 78 | void Unregister(); 79 | 80 | void KeepAlive(); 81 | 82 | bool UpdateBackendPoolAndModelRoute(const ModelRouteProto& route); 83 | 84 | void RegisterUser(std::shared_ptr user_sess, 85 | const RequestProto& request, ReplyProto* reply); 86 | 87 | void Daemon(); 88 | 89 | void ReportWorkload(const WorkloadStatsProto& request); 90 | 91 | private: 92 | /*! \brief Indicator whether backend is running */ 93 | std::atomic_bool running_; 94 | /*! \brief Interval to update stats to scheduler in seconds */ 95 | uint32_t beacon_interval_sec_; 96 | /*! \brief Frontend node ID */ 97 | uint32_t node_id_; 98 | /*! \brief RPC service */ 99 | RpcService rpc_service_; 100 | /*! \brief RPC client connected to scheduler */ 101 | std::unique_ptr sch_stub_; 102 | /*! \brief Backend pool */ 103 | BackendPool backend_pool_; 104 | /*! 105 | * \brief Map from backend ID to model sessions servered at this backend. 106 | * Guarded by backend_sessions_mu_ 107 | */ 108 | std::unordered_map > backend_sessions_; 110 | /*! \brief Request pool */ 111 | RequestPool request_pool_; 112 | /*! \brief Worker pool for processing requests */ 113 | std::vector > workers_; 114 | /*! \brief User connection pool. Guarded by user_mutex_. */ 115 | std::unordered_set > connection_pool_; 116 | /*! \brief Map from user id to user session. Guarded by user_mutex_. */ 117 | std::unordered_map > user_sessions_; 118 | /*! 119 | * \brief Map from model session ID to model handler. 120 | */ 121 | std::unordered_map > model_pool_; 122 | 123 | std::thread daemon_thread_; 124 | /*! \brief Mutex for connection_pool_ and user_sessions_ */ 125 | std::mutex user_mutex_; 126 | 127 | std::mutex backend_sessions_mu_; 128 | /*! \brief Random number generator */ 129 | std::random_device rd_; 130 | std::mt19937 rand_gen_; 131 | }; 132 | 133 | } // namespace app 134 | } // namespace nexus 135 | 136 | #endif // NEXUS_APP_APP_BASE_H_ 137 | -------------------------------------------------------------------------------- /src/nexus/backend/task.h: -------------------------------------------------------------------------------- 1 | #ifndef NEXUS_BACKEND_TASK_H_ 2 | #define NEXUS_BACKEND_TASK_H_ 3 | 4 | #include 5 | #include 6 | #include 7 | #include 8 | #include 9 | 10 | #include "nexus/common/block_queue.h" 11 | #include "nexus/common/connection.h" 12 | #include "nexus/common/data_type.h" 13 | #include "nexus/proto/nnquery.pb.h" 14 | #include "nexus/proto/control.pb.h" 15 | 16 | namespace nexus { 17 | namespace backend { 18 | 19 | class ModelExecutor; 20 | class ModelInstance; 21 | class Task; 22 | 23 | /*! 24 | * \brief Input contains input data of a single input and related information 25 | * to neural networks. 26 | */ 27 | class Input : public DeadlineItem { 28 | public: 29 | /*! 30 | * \brief Construct a Input 31 | * \param deadline Deadline of corresponding task 32 | * \param tid Task id of corresponding task 33 | * \param idx Index in the inputs of task 34 | * \param arr Input array that contains the input data 35 | */ 36 | Input(TimePoint deadline, uint64_t tid, int idx, ArrayPtr arr); 37 | 38 | /*! \brief Task id */ 39 | uint64_t task_id; 40 | /*! \brief Index in the input vector of task. */ 41 | int index; 42 | /*! \brief Input array that contains the data. */ 43 | std::shared_ptr array; 44 | }; 45 | 46 | /*! 47 | * \brief Output contains the data of a single output. 48 | */ 49 | class Output { 50 | public: 51 | /*! 52 | * \brief Construct an Output. 53 | * \param tid Task id of corresponding task 54 | * \param idx Index in the outputs of task 55 | * \param arrs Map from name to output arrays. 56 | */ 57 | Output(uint64_t tid, int idx, 58 | const std::unordered_map& arrs); 59 | 60 | /*! \brief Task id */ 61 | uint64_t task_id; 62 | /*! \brief Index in the output vector of task. */ 63 | int index; 64 | /*! \brief Map from array name to array. */ 65 | std::unordered_map arrays; 66 | }; 67 | 68 | /*! \brief Stage indicates the context processing stage */ 69 | enum Stage { 70 | /* !\brief Task at the pre-processing stage */ 71 | kPreprocess = 0, 72 | /* !\brief Task at the forwarding model stage */ 73 | kForward, 74 | /* !\brief Task at the post-processing stage */ 75 | kPostprocess, 76 | }; 77 | 78 | class Task : public DeadlineItem, public std::enable_shared_from_this { 79 | public: 80 | /*! \brief Construct a task without connection. */ 81 | Task(); 82 | /*! 83 | * \brief Construct a task with connection to frontend. 84 | * \param conn Connection to frontend server 85 | */ 86 | Task(std::shared_ptr conn); 87 | /*! 88 | * \brief Decode query from message. 89 | * \param message Message received from frontend 90 | */ 91 | void DecodeQuery(std::shared_ptr message); 92 | /*! 93 | * \brief Append preprocessed input array to task. 94 | * \param arr Input array 95 | */ 96 | void AppendInput(ArrayPtr arr); 97 | /*! 98 | * \brief Add output at index location 99 | * \param index Index of the output 100 | * \param output Output content 101 | * \return whether all output has been filled in 102 | */ 103 | bool AddOutput(std::shared_ptr output); 104 | /*! 105 | * \brief Add virtual output at index location due to error such as timeout 106 | * \param index Index of the virtual output 107 | * \return whether all output has been filled in 108 | */ 109 | bool AddVirtualOutput(int index); 110 | 111 | /*! \brief Task id */ 112 | uint64_t task_id; 113 | /*! \brief Connection to frontend. */ 114 | std::shared_ptr connection; 115 | /*! \brief Message type */ 116 | MessageType msg_type; 117 | /*! \brief Query to process */ 118 | QueryProto query; 119 | /*! \brief Query result */ 120 | QueryResultProto result; 121 | /*! \brief Model instance to execute for the task */ 122 | std::shared_ptr model; 123 | /*! 124 | * \brief Suffix model for postprocessing, only used in the share prefix 125 | * model. 126 | */ 127 | std::shared_ptr suffix_model; 128 | /*! \brief Current task processing stage */ 129 | volatile Stage stage; 130 | std::vector > inputs; 131 | /*! \brief Outputs of the context */ 132 | std::vector > outputs; 133 | /*! \brief Number of outputs that has been filled in */ 134 | std::atomic filled_outputs; 135 | /*! \brief Attributes that needs to be kept during the task */ 136 | YAML::Node attrs; 137 | /*! \brief Timer that counts the time spent in each stage */ 138 | Timer timer; 139 | 140 | private: 141 | /*! \brief Global task ID */ 142 | static std::atomic global_task_id_; 143 | }; 144 | 145 | } // namespace backend 146 | } // namespace nexus 147 | 148 | #endif // NEXUS_BACKEND_TASK_H_ 149 | -------------------------------------------------------------------------------- /src/nexus/proto/nnquery.proto: -------------------------------------------------------------------------------- 1 | syntax = "proto3"; 2 | 3 | package nexus; 4 | 5 | message RectProto { 6 | uint32 left = 1; 7 | uint32 top = 2; 8 | uint32 right = 3; 9 | uint32 bottom = 4; 10 | } 11 | 12 | message ImageProto { 13 | enum ImageFormat { 14 | JPEG = 0; 15 | PNG = 1; 16 | GIF = 2; 17 | } 18 | bytes data = 1; 19 | ImageFormat format = 2; 20 | bool color = 3; 21 | 22 | // This is a hack. Provide the filename to the image instead of transfering 23 | // the real bytes. Hopefully this should save a lot of bandwidth so that a 24 | // single frontend server could handle all requests, thus we don't need to 25 | // deal with the problem of imbalanced load at backends. 26 | string hack_filename = 4; 27 | } 28 | 29 | enum DataType { 30 | DT_UNKNOWN = 0; 31 | DT_BOOL = 1; 32 | DT_INT8 = 2; 33 | DT_UINT8 = 3; 34 | DT_INT32 = 4; 35 | DT_UINT32 = 5; 36 | DT_FLOAT = 6; 37 | DT_DOUBLE = 7; 38 | DT_STRING = 8; 39 | DT_TENSOR = 50; 40 | DT_IMAGE = 51; 41 | DT_RECT = 52; 42 | } 43 | 44 | message TensorProto { 45 | // Shape of tensor 46 | repeated uint32 shape = 1; 47 | // Data type can only be int8, int32, float, double, or string 48 | DataType data_type = 2; 49 | // Exact ONE of the following fields must be present 50 | repeated bool bools = 10; // bool tensor 51 | repeated int32 ints = 11; // int tensor 52 | repeated float floats = 12; // float tensor 53 | repeated double doubles = 13; // double tensor 54 | repeated bytes strings = 14; // string tensor 55 | } 56 | 57 | message ValueProto { 58 | // name of value 59 | string name = 1; 60 | DataType data_type = 2; 61 | 62 | // Exact ONE of the following fields must be present. 63 | bool b = 10; // bool 64 | int32 i = 11; // int 65 | float f = 13; // float 66 | double d = 14; // double 67 | bytes s = 15; // string 68 | 69 | TensorProto tensor = 20; // tensor 70 | ImageProto image = 21; // image 71 | RectProto rect = 22; // bbox 72 | } 73 | 74 | message RecordProto { 75 | repeated ValueProto named_value = 1; 76 | } 77 | 78 | message RequestProto { 79 | // User ID 80 | uint32 user_id = 1; 81 | // Request ID 82 | uint32 req_id = 2; 83 | // Input 84 | ValueProto input = 3; 85 | } 86 | 87 | message ReplyProto { 88 | // User ID 89 | uint32 user_id = 1; 90 | // Request ID 91 | uint32 req_id = 2; 92 | // status 93 | int32 status = 3; 94 | // Error message 95 | string error_message = 4; 96 | // Output 97 | repeated RecordProto output = 5; 98 | // Latency 99 | uint64 latency_us = 100; 100 | // Breakdown latency for each query 101 | repeated QueryLatency query_latency = 101; 102 | } 103 | 104 | message ModelSession { 105 | // Framework 106 | string framework = 1; 107 | // Model name 108 | string model_name = 2; 109 | // Model version 110 | uint32 version = 3; 111 | // Latency SLA in milliseconds 112 | uint32 latency_sla = 4; 113 | // Specify image height and width for models whose input are resizable, 114 | // otherwise ignored 115 | uint32 image_height = 10; 116 | uint32 image_width = 11; 117 | } 118 | 119 | message QueryProto { 120 | // Query ID 121 | uint64 query_id = 1; 122 | // Model session ID 123 | string model_session_id = 2; 124 | // Input of query 125 | ValueProto input = 3; 126 | // Include top k records 127 | uint32 topk = 10; 128 | // Cropped windows in the image 129 | repeated RectProto window = 11; 130 | // Output fields 131 | repeated string output_field = 12; 132 | // Threshold for confidence, default is 0 133 | repeated ValueProto filter = 13; 134 | // Latency slack in milliseconds 135 | int32 slack_ms = 40; 136 | // Show breakdown latency in the result 137 | bool debug = 100; 138 | } 139 | 140 | message QueryResultProto { 141 | // Query ID 142 | uint64 query_id = 1; 143 | // Model session ID 144 | string model_session_id = 2; 145 | // status 146 | int32 status = 3; 147 | // Error message 148 | string error_message = 4; 149 | // Output 150 | repeated RecordProto output = 5; 151 | // Latency 152 | uint64 latency_us = 20; 153 | uint64 queuing_us = 21; 154 | 155 | bool use_backup = 22; 156 | } 157 | 158 | message QueryLatency { 159 | // Query ID 160 | uint64 query_id = 1; 161 | // Model session ID 162 | string model_session_id = 2; 163 | // Timestamp of sending query, relative to the time that receives user request 164 | uint64 frontend_send_timestamp_us = 3; 165 | // Timestamp of receiving query result, relative to the time that receives 166 | // user request 167 | uint64 frontend_recv_timestamp_us = 4; 168 | // Backend processing latency 169 | uint64 backend_latency_us = 5; 170 | // Backend queuing latency 171 | uint64 backend_queuing_us = 6; 172 | 173 | bool use_backup = 7; 174 | } 175 | --------------------------------------------------------------------------------