├── LICENSE ├── PubSubQueue.h ├── README.md └── test ├── .gitignore ├── build.sh ├── common.h ├── cpupin.h ├── multhread_test.cc ├── pub.cc ├── shmmap.h ├── sub.cc └── timestamp.h /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2018 Meng Rao 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /PubSubQueue.h: -------------------------------------------------------------------------------- 1 | /* 2 | MIT License 3 | 4 | Copyright (c) 2018 Meng Rao 5 | 6 | Permission is hereby granted, free of charge, to any person obtaining a copy 7 | of this software and associated documentation files (the "Software"), to deal 8 | in the Software without restriction, including without limitation the rights 9 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 10 | copies of the Software, and to permit persons to whom the Software is 11 | furnished to do so, subject to the following conditions: 12 | 13 | The above copyright notice and this permission notice shall be included in all 14 | copies or substantial portions of the Software. 15 | 16 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 17 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 18 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 19 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 20 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 21 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 22 | SOFTWARE. 23 | */ 24 | #pragma once 25 | 26 | // PubSubQueue is a single publisher(source) multiple subscriber(receiver) message queue. 27 | // Publisher is not affected(e.g. blocked) by or even aware of subscribers. 28 | // Subscriber is a pure reader, if it's not reading fast enough and falls far behind the publisher it'll lose message. 29 | // PubSubQueue can be zero initialized without calling constructor, which facilitates allocating in shared memory 30 | // It is also "crash safe" which means crash of either publisher or subscribers will not corrupt the queue 31 | 32 | // Bytes should be at least twice the size of the largest msg, otherwise alloc() could return nullptr 33 | template 34 | class PubSubQueue 35 | { 36 | public: 37 | struct MsgHeader 38 | { 39 | // size of this msg, including header itself 40 | uint32_t size; 41 | // userdata is used by user, e.g. save the msg_type 42 | // we assume user msg is 8 types aligned so there should be 4 bytes padding anyway if we don't define userdata 43 | uint32_t userdata; 44 | }; 45 | 46 | // allocate enough space(after the returned MsgHeader) to save the message to publish 47 | // return nullptr if size is too large 48 | MsgHeader* alloc(uint32_t size) { 49 | size += sizeof(MsgHeader); 50 | uint32_t blk_sz = toBlk(size); 51 | uint32_t padding_sz = BLK_CNT - (written_idx % BLK_CNT); 52 | bool rewind = blk_sz > padding_sz; 53 | uint32_t advance_sz = blk_sz + (rewind ? padding_sz : 0); 54 | if(advance_sz > BLK_CNT) { // msg size too large 55 | return nullptr; 56 | } 57 | writing_idx = written_idx + advance_sz; 58 | asm volatile("" : : "m"(writing_idx) :); 59 | if(rewind) { 60 | blk[written_idx % BLK_CNT].header.size = 0; 61 | asm volatile("" : : "m"(blk), "m"(written_idx) :); 62 | written_idx += padding_sz; 63 | } 64 | MsgHeader& header = blk[written_idx % BLK_CNT].header; 65 | header.size = size; 66 | return &header; 67 | } 68 | 69 | // publish the message allocated by alloc() 70 | // mark it as a key message so newly joined subscribers can start reading from this message 71 | void pub(bool key = false) { 72 | asm volatile("" : : "m"(blk), "m"(written_idx) :); 73 | uint32_t blk_sz = toBlk(blk[written_idx % BLK_CNT].header.size); 74 | if(key) last_key_idx = written_idx + 1; // +1 to allow last_key_idx to be zero initialized 75 | // it's OK that last_key_idx got changed and then process crashed/hang before written_idx can be updated 76 | // in this case sub(true) is effective sub(false), no big deal 77 | written_idx += blk_sz; 78 | asm volatile("" : : "m"(written_idx), "m"(last_key_idx) :); 79 | } 80 | 81 | // Newly joined subscriber calls sub() to get a message index to start reading at 82 | // set key to true if wanting to start from the last key message if any 83 | // or get the next message index the subscriber is going to write(can't read it immediately) 84 | uint64_t sub(bool key = false) { 85 | asm volatile("" : "=m"(last_key_idx), "=m"(writing_idx) : :); // force read memory 86 | if(key && last_key_idx > 0 && last_key_idx + BLK_CNT > writing_idx) 87 | return last_key_idx - 1; 88 | else 89 | return written_idx; 90 | } 91 | 92 | enum ReadRes 93 | { 94 | ReadOK, 95 | ReadAgain, 96 | ReadBuffTooShort, 97 | ReadNeedReSub 98 | }; 99 | 100 | // subscriber provides a message index to read at and a buffer to copy message(including MsgHeader) to 101 | // return ReadOK: successfully read a message and set idx to the next message 102 | // return ReadAgain: no message to read now, user should try again later 103 | // return ReadBuffTooShort: bufsize is too small to hold the message and MsgHeader is copied in the buffer to check 104 | // return ReadNeedReSub: idx is obsolete and the message is lost, user has to subscribe again to get a new index 105 | ReadRes read(uint64_t& __restrict__ idx, void* __restrict__ buf, uint32_t bufsize) { 106 | asm volatile("" : "=m"(written_idx), "=m"(blk) : :); // force read memory 107 | if(idx >= written_idx) return ReadAgain; 108 | // size may have been overridden/corrupted by the publisher, we'll check it later... 109 | uint32_t size = blk[idx % BLK_CNT].header.size; 110 | uint32_t padding_sz = BLK_CNT - (idx % BLK_CNT); 111 | if(size == 0) { // rewind 112 | asm volatile("" : "=m"(writing_idx) : :); 113 | if(idx + BLK_CNT < writing_idx) return ReadNeedReSub; 114 | idx += padding_sz; 115 | if(idx >= written_idx) return ReadAgain; 116 | size = blk[idx % BLK_CNT].header.size; 117 | padding_sz = BLK_CNT; 118 | } 119 | uint32_t copy_size = std::min(std::min(bufsize, size), padding_sz * (uint32_t)sizeof(Block)); 120 | memcpy(buf, &blk[idx % BLK_CNT], copy_size); 121 | asm volatile("" : "=m"(writing_idx), "=m"(blk) : :); 122 | if(idx + BLK_CNT < writing_idx) return ReadNeedReSub; 123 | if(copy_size < size) return ReadBuffTooShort; 124 | idx += toBlk(size); 125 | return ReadOK; 126 | } 127 | 128 | private: 129 | static inline uint32_t toBlk(uint32_t bytes) { 130 | return (bytes + sizeof(Block) - 1) / sizeof(Block); 131 | } 132 | 133 | struct Block 134 | { 135 | alignas(64) MsgHeader header; // make it 64 bytes aligned, same as cache line size 136 | }; 137 | 138 | static constexpr uint32_t BLK_CNT = Bytes / sizeof(Block); 139 | static_assert(BLK_CNT && !(BLK_CNT & (BLK_CNT - 1)), "BLK_CNT must be a power of 2"); 140 | 141 | Block blk[BLK_CNT]; 142 | 143 | alignas(64) uint64_t written_idx = 0; 144 | uint64_t last_key_idx = 0; 145 | uint64_t writing_idx = 0; 146 | }; 147 | 148 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | ## PubSubQueue 2 | PubSubQueue is a pub-sub model message queue on localhost, similar to IP multicast on the network, and it's highly optimized for low latency. 3 | 4 | It's a simple C++ class suitable for residing in shared memory for IPC, representing a topic on the host. 5 | 6 | It supports single publisher(source) and multiple subscriber(receiver), where publisher is not affected(e.g. blocked) by or even aware of subscribers. Subscriber is a pure reader of the queue, if it's not reading fast enough and falls far behind the publisher it'll lose messages. 7 | 8 | Msg type in PubSubQueue is blob and msg is guaranteed to be allocated in 8 bytes aligned address by queue, so a C/C++ struct can simply be used without performance penalty. 9 | 10 | ## Example 11 | Usage for publisher: 12 | ```c++ 13 | #include "PubSubQueue.h" 14 | 15 | using MsgQ = PubSubQueue<1024>; 16 | MsgQ q; 17 | 18 | // simply publish an int value of 123 19 | MsgQ::MsgHeader* header = q.alloc(sizeof(int)); 20 | *(int*)(header + 1) = 123; 21 | q.pub(); 22 | ``` 23 | 24 | Usage for Subscriber: 25 | ```c++ 26 | 27 | char buf[100]; 28 | auto idx = q.sub(); 29 | while(true) { 30 | auto res = q.read(idx, buf, sizeof(buf)); 31 | if(res == MsgQ::ReadOK) { 32 | MsgQ::MsgHeader* header = (MsgQ::MsgHeader*)buf; 33 | assert(header->size == sizeof(int)); 34 | int msg = *(int*)(header + 1); 35 | // handle msg... 36 | } 37 | // handle other res... 38 | } 39 | ``` 40 | For more examples, see [test](https://github.com/MengRao/IPC_PubSub/tree/master/test). 41 | 42 | ## Key msg 43 | One useful feature PubSubQueue supports is "key msg"(it's like key frame in audio/video stream), where publisher can mark the msg it publishes as key msg, and the queue will save the index of the last key msg so any newly joined subscribers can start reading from this message. 44 | 45 | Last value caching ([LVC](https://www.safaribooksonline.com/library/view/zeromq/9781449334437/ch05s03.html)) can be easily implemented by marking every msg to key msg. 46 | 47 | ## Performance 48 | The number of subscriber will not affect the performance. 49 | 50 | The latency of transmitting a msg from publisher to subscriber is stably lower than **1 us** if subscriber is busy polling on the queue. If process scheduling optimization is applied(such as SCHED_FIFO and sched_setaffinity), the latency can be lower than **200 ns**. 51 | 52 | ## How to support multiple publisher? 53 | There are two solutions: 54 | * Use multiple topics(thus multiple queues) and have subscribers subscribe for all these topics. 55 | * Add a [multiple producer single consumer queue](https://github.com/MengRao/MPSC_Queue) and have a dedicated worker consume and publish msgs. 56 | 57 | ## An Implementation for Fixed-Sized Msg 58 | If you are using fixed sized msgs, then take a look at [SPMC_Queue](https://github.com/MengRao/SPMC_Queue) as it's more effient and easy to use. 59 | 60 | -------------------------------------------------------------------------------- /test/.gitignore: -------------------------------------------------------------------------------- 1 | multhread_test 2 | pub 3 | sub 4 | -------------------------------------------------------------------------------- /test/build.sh: -------------------------------------------------------------------------------- 1 | g++ -std=c++11 -O3 -o multhread_test multhread_test.cc -pthread 2 | g++ -std=c++11 -O3 -o pub pub.cc -lrt 3 | g++ -std=c++11 -O3 -o sub sub.cc -lrt 4 | -------------------------------------------------------------------------------- /test/common.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | #include 3 | #include "shmmap.h" 4 | #include "../PubSubQueue.h" 5 | 6 | template 7 | struct Msg 8 | { 9 | static constexpr uint16_t msg_type = MSGTYPE; 10 | uint64_t ts; 11 | int tid; 12 | int val[N]; 13 | }; 14 | 15 | typedef Msg<1, 1> Msg1; 16 | typedef Msg<3, 2> Msg2; 17 | typedef Msg<8, 3> Msg3; 18 | typedef Msg<11, 4> Msg4; 19 | 20 | typedef PubSubQueue<4096> MsgQ; 21 | 22 | MsgQ* getMsgQ(const char* topic) { 23 | std::string path = "/"; 24 | path += topic; 25 | return shmmap(path.c_str()); 26 | } 27 | -------------------------------------------------------------------------------- /test/cpupin.h: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | bool cpupin(int cpuid, int prio = 99) { 4 | if(prio > 0) { 5 | sched_param param; 6 | param.sched_priority = prio; 7 | if(sched_setscheduler(0, SCHED_FIFO, ¶m)) { 8 | std::cout << "sched_setscheduler error: " << strerror(errno) << std::endl; 9 | return false; 10 | } 11 | } 12 | 13 | cpu_set_t my_set; 14 | CPU_ZERO(&my_set); 15 | CPU_SET(cpuid, &my_set); 16 | if(sched_setaffinity(0, sizeof(cpu_set_t), &my_set)) { 17 | std::cout << "sched_setaffinity error: " << strerror(errno) << std::endl; 18 | return false; 19 | } 20 | 21 | return true; 22 | } 23 | -------------------------------------------------------------------------------- /test/multhread_test.cc: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include "timestamp.h" 5 | #include "cpupin.h" 6 | #include "../PubSubQueue.h" 7 | 8 | using namespace std; 9 | 10 | template 11 | struct Msg 12 | { 13 | static constexpr uint16_t msg_type = MSGTYPE; 14 | long val[N]; 15 | }; 16 | 17 | typedef Msg<3, 1> Msg1; 18 | typedef Msg<8, 2> Msg2; 19 | typedef Msg<17, 3> Msg3; 20 | typedef Msg<45, 4> Msg4; 21 | 22 | typedef PubSubQueue<4096> MsgQ; 23 | 24 | MsgQ _q; 25 | const int MaxNum = 10000000; 26 | const int NumThr = 4; 27 | 28 | template 29 | void sendMsg(MsgQ* q, int& val) { 30 | MsgQ::MsgHeader* header = q->alloc(sizeof(T)); 31 | assert(header != nullptr); 32 | header->userdata = T::msg_type; 33 | T* msg = (T*)(header + 1); 34 | for(auto& v : msg->val) v = val++; 35 | q->pub(false); 36 | } 37 | 38 | void pubthread(int tid) { 39 | 40 | MsgQ* q = &_q; 41 | int val = 1; 42 | while(val <= MaxNum) { 43 | int tp = rand() % 4 + 1; 44 | switch(tp) { 45 | case 1: sendMsg(q, val); break; 46 | case 2: sendMsg(q, val); break; 47 | case 3: sendMsg(q, val); break; 48 | case 4: sendMsg(q, val); break; 49 | default: assert(false); 50 | } 51 | std::this_thread::yield(); 52 | } 53 | } 54 | 55 | template 56 | void handleMsg(MsgQ::MsgHeader* header, int& val, int tid) { 57 | T* msg = (T*)(header + 1); 58 | for(auto v : msg->val) { 59 | if(v <= val) { 60 | cout << tid << ": bad: got: " << v << " latest: " << val << endl; 61 | exit(1); 62 | } 63 | if(v > val + 1) { 64 | cout << tid << ": missing data, expect: " << (val + 1) << " got: " << v << endl; 65 | } 66 | val = v; 67 | } 68 | } 69 | 70 | void subthread(int tid) { 71 | 72 | MsgQ* q = &_q; 73 | uint64_t idx = q->sub(true); 74 | char buf[1024]; 75 | 76 | // int tid = (pid_t)::syscall(SYS_gettid); 77 | int i = 0; 78 | while(i < MaxNum) { 79 | auto res = q->read(idx, buf, sizeof(buf)); 80 | if(res == MsgQ::ReadOK) { 81 | MsgQ::MsgHeader* header = (MsgQ::MsgHeader*)buf; 82 | auto msg_type = header->userdata; 83 | switch(msg_type) { 84 | case 1: handleMsg(header, i, tid); break; 85 | case 2: handleMsg(header, i, tid); break; 86 | case 3: handleMsg(header, i, tid); break; 87 | case 4: handleMsg(header, i, tid); break; 88 | default: assert(false); 89 | } 90 | continue; 91 | } 92 | assert(res != MsgQ::ReadBuffTooShort); 93 | if(res == MsgQ::ReadNeedReSub) { 94 | cout << tid << ": need resub" << endl; 95 | idx = q->sub(true); 96 | } 97 | } 98 | } 99 | 100 | 101 | int main() { 102 | vector threads; 103 | for(int i = 0; i < NumThr; i++) { 104 | threads.emplace_back(subthread, i); 105 | } 106 | std::this_thread::yield(); 107 | threads.emplace_back(pubthread, NumThr); 108 | 109 | for(auto& thr : threads) { 110 | thr.join(); 111 | } 112 | 113 | return 0; 114 | } 115 | 116 | 117 | 118 | -------------------------------------------------------------------------------- /test/pub.cc: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include "timestamp.h" 5 | #include "cpupin.h" 6 | #include "common.h" 7 | 8 | using namespace std; 9 | 10 | MsgQ* q; 11 | int tid; 12 | const int MaxNum = 10000000; 13 | 14 | template 15 | void sendMsg(int& val) { 16 | MsgQ::MsgHeader* header = q->alloc(sizeof(T)); 17 | assert(header != nullptr); 18 | header->userdata = T::msg_type; 19 | T* msg = (T*)(header + 1); 20 | for(auto& v : msg->val) v = val++; 21 | msg->tid = tid; 22 | msg->ts = rdtsc(); 23 | q->pub(true); 24 | } 25 | 26 | 27 | int main(int argc, const char** argv) { 28 | // cpupin(2); 29 | if(argc < 2) { 30 | cout << "usage: " << argv[0] << " TOPIC" << endl; 31 | exit(1); 32 | } 33 | q = getMsgQ(argv[1]); 34 | if(!q) exit(1); 35 | tid = (pid_t)::syscall(SYS_gettid); 36 | 37 | int val = 1; 38 | while(val <= MaxNum) { 39 | int tp = rand() % 4 + 1; 40 | switch(tp) { 41 | case 1: sendMsg(val); break; 42 | case 2: sendMsg(val); break; 43 | case 3: sendMsg(val); break; 44 | case 4: sendMsg(val); break; 45 | default: assert(false); 46 | } 47 | this_thread::sleep_for(chrono::seconds(1)); 48 | } 49 | 50 | return 0; 51 | } 52 | 53 | 54 | 55 | 56 | -------------------------------------------------------------------------------- /test/shmmap.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | #include 3 | #include 4 | #include 5 | #include 6 | #include 7 | #include 8 | 9 | template 10 | T* shmmap(const char * filename) { 11 | int fd = shm_open(filename, O_CREAT | O_RDWR, 0666); 12 | if(fd == -1) { 13 | std::cerr << "shm_open failed: " << strerror(errno) << std::endl; 14 | return nullptr; 15 | } 16 | if(ftruncate(fd, sizeof(T))) { 17 | std::cerr << "ftruncate failed: " << strerror(errno) << std::endl; 18 | close(fd); 19 | return nullptr; 20 | } 21 | T* ret = (T*)mmap(0, sizeof(T), PROT_READ | PROT_WRITE, MAP_SHARED, fd, 0); 22 | close(fd); 23 | if(ret == MAP_FAILED) { 24 | std::cerr << "mmap failed: " << strerror(errno) << std::endl; 25 | return nullptr; 26 | } 27 | return ret; 28 | } 29 | -------------------------------------------------------------------------------- /test/sub.cc: -------------------------------------------------------------------------------- 1 | #include 2 | #include "timestamp.h" 3 | #include "cpupin.h" 4 | #include "common.h" 5 | 6 | using namespace std; 7 | 8 | template 9 | void handleMsg(MsgQ::MsgHeader* header, const char* topic, uint64_t now) { 10 | T* msg = (T*)(header + 1); 11 | auto latency = now - msg->ts; 12 | cout << "topic: " << topic << " pubtid: " << msg->tid << " latency: " << (now - msg->ts) << " val:"; 13 | for(auto v : msg->val) { 14 | cout << " " << v; 15 | } 16 | cout << endl; 17 | } 18 | 19 | int main(int argc, const char** argv) { 20 | // cpupin(3); 21 | if(argc < 2) { 22 | cout << "usage: " << argv[0] << " TOPIC1 [TOPIC2]..." << endl; 23 | exit(1); 24 | } 25 | vector> qs(argc); 26 | char buf[1024]; 27 | 28 | for(int i = 1; i < argc; i++) { 29 | if(!(qs[i].first = getMsgQ(argv[i]))) exit(1); 30 | qs[i].second = qs[i].first->sub(true); 31 | } 32 | 33 | while(true) { 34 | for(int i = 1; i < argc; i++) { 35 | auto q = qs[i].first; 36 | auto& idx = qs[i].second; 37 | auto res = q->read(idx, buf, sizeof(buf)); 38 | if(res == MsgQ::ReadNeedReSub) { 39 | cout << "topic: " << argv[i] << " need resub" << endl; 40 | idx = q->sub(true); 41 | res = q->read(idx, buf, sizeof(buf)); 42 | } 43 | if(res == MsgQ::ReadOK) { 44 | auto now = rdtsc(); 45 | MsgQ::MsgHeader* header = (MsgQ::MsgHeader*)buf; 46 | auto msg_type = header->userdata; 47 | switch(msg_type) { 48 | case 1: handleMsg(header, argv[i], now); break; 49 | case 2: handleMsg(header, argv[i], now); break; 50 | case 3: handleMsg(header, argv[i], now); break; 51 | case 4: handleMsg(header, argv[i], now); break; 52 | default: assert(false); 53 | } 54 | continue; 55 | } 56 | assert(res != MsgQ::ReadBuffTooShort); 57 | // res == ReadAgain 58 | } 59 | } 60 | 61 | 62 | return 0; 63 | } 64 | 65 | -------------------------------------------------------------------------------- /test/timestamp.h: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | static const int kMicroSecondsPerSecond = 1000 * 1000; 4 | 5 | inline int64_t now() { 6 | struct timeval tv; 7 | gettimeofday(&tv, NULL); 8 | int64_t seconds = tv.tv_sec; 9 | return seconds * kMicroSecondsPerSecond + tv.tv_usec; 10 | } 11 | 12 | inline unsigned long long rdtsc() { 13 | return __builtin_ia32_rdtsc(); 14 | } 15 | 16 | inline unsigned long long rdtscp() { 17 | unsigned int dummy; 18 | return __builtin_ia32_rdtscp(&dummy); 19 | } 20 | 21 | --------------------------------------------------------------------------------