├── .gitignore ├── .travis.yml ├── LICENSE ├── LightCTR ├── common │ ├── avx.h │ ├── barrier.h │ ├── buffer.h │ ├── buffer_fusion.h │ ├── float16.h │ ├── hash.h │ ├── lock.h │ ├── memory_pool.h │ ├── message.h │ ├── message_queue.h │ ├── network.h │ ├── persistent_buffer.h │ ├── system.h │ ├── thread_pool.h │ └── time.h ├── dag │ ├── aggregate_node.h │ ├── dag_pipeline.h │ ├── node_abst.h │ ├── operator │ │ ├── activations_op.h │ │ ├── add_op.h │ │ ├── loss_op.h │ │ ├── matmul_op.h │ │ └── multiply_op.h │ ├── source_node.h │ └── terminus_node.h ├── distribut │ ├── consistent_hash.h │ ├── dist_machine_abst.h │ ├── master.h │ ├── paramserver.h │ ├── pull.h │ ├── push.h │ ├── ring_collect.h │ └── worker.h ├── distributed_algo_abst.h ├── dl_algo_abst.h ├── em_algo_abst.h ├── fm_algo_abst.h ├── gbm_algo_abst.h ├── predict │ ├── ann_index.h │ ├── fm_predict.cpp │ ├── fm_predict.h │ ├── gbm_predict.cpp │ └── gbm_predict.h ├── third │ ├── install_third.sh │ └── zeromq │ │ ├── include │ │ ├── zmq.h │ │ └── zmq_utils.h │ │ └── lib │ │ └── libzmq.a ├── train │ ├── layer │ │ ├── adapterLayer.h │ │ ├── convLayer.h │ │ ├── fullyconnLayer.h │ │ ├── layer_abst.h │ │ ├── poolingLayer.h │ │ └── sampleLayer.h │ ├── train_cnn_algo.h │ ├── train_embed_algo.cpp │ ├── train_embed_algo.h │ ├── train_ffm_algo.cpp │ ├── train_ffm_algo.h │ ├── train_fm_algo.cpp │ ├── train_fm_algo.h │ ├── train_gbm_algo.cpp │ ├── train_gbm_algo.h │ ├── train_gmm_algo.cpp │ ├── train_gmm_algo.h │ ├── train_nfm_algo.cpp │ ├── train_nfm_algo.h │ ├── train_rnn_algo.h │ ├── train_tm_algo.cpp │ ├── train_tm_algo.h │ ├── train_vae_algo.h │ └── unit │ │ ├── attention_unit.h │ │ └── lstm_unit.h └── util │ ├── activations.h │ ├── ensembling.h │ ├── evaluator.h │ ├── gradientUpdater.h │ ├── loss.h │ ├── matrix.h │ ├── momentumUpdater.h │ ├── pca.h │ ├── product_quantizer.h │ ├── quantile_compress.h │ ├── random.h │ ├── shm_hashtable.h │ └── significance.h ├── LightCTR_LOGO.png ├── Makefile ├── README.md ├── benchmark ├── 4_node_ps.png ├── 4_node_ring.png ├── vs_libffm.png ├── vs_libfm.png └── vs_tf_cpu.png ├── build.sh ├── build_ring.sh ├── data ├── proc_file_split.py ├── proc_text_topic.py ├── test_sparse.csv ├── train_cluster.csv ├── train_dense.csv ├── train_sparse.csv ├── train_text.txt └── vocab.txt └── main.cpp /.gitignore: -------------------------------------------------------------------------------- 1 | *.o 2 | *.out 3 | *.dylib 4 | LightCTR.xcodeproj 5 | LightCTR/third/zeromq-4.2.2 6 | -------------------------------------------------------------------------------- /.travis.yml: -------------------------------------------------------------------------------- 1 | os: 2 | - osx 3 | - linux 4 | 5 | language: cpp 6 | 7 | script: 8 | - make 9 | - ./build.sh 3 3 127.0.0.1:17832 10 | - ./build_ring.sh 3 127.0.0.1:17832 11 | 12 | notifications: 13 | email: 14 | on_success: change 15 | on_failure: always 16 | -------------------------------------------------------------------------------- /LightCTR/common/barrier.h: -------------------------------------------------------------------------------- 1 | // 2 | // barrier.h 3 | // LightCTR 4 | // 5 | // Created by SongKuangshi on 2017/12/5. 6 | // Copyright © 2017年 SongKuangshi. All rights reserved. 7 | // 8 | 9 | #ifndef barrier_h 10 | #define barrier_h 11 | 12 | #include 13 | #include 14 | #include 15 | 16 | 17 | // fence in write 18 | #define wmb() __asm__ __volatile__("sfence":::"memory") 19 | // fence in read 20 | #define rmb() __asm__ __volatile__("lfence":::"memory") 21 | // fence in write and read 22 | #define rwmb() __asm__ __volatile__("mfence":::"memory") 23 | 24 | 25 | class Barrier { 26 | public: 27 | Barrier() { 28 | 29 | } 30 | explicit Barrier(size_t count) { 31 | flag_ = (int)count; 32 | } 33 | 34 | inline void reset(size_t count = 1) { 35 | std::unique_lock glock(lock_); 36 | flag_ = (int)count; 37 | } 38 | 39 | inline void block() { 40 | std::unique_lock glock(lock_); 41 | cond_.wait(glock, [this] { 42 | return flag_ <= 0; 43 | }); 44 | } 45 | 46 | inline bool block(time_t timeout_ms, std::function timeout_callback) { 47 | std::unique_lock glock(lock_); 48 | auto status = cond_.wait_for(glock, std::chrono::milliseconds(timeout_ms), [this] { 49 | return flag_ <= 0; 50 | }); 51 | if (!status && timeout_callback) { 52 | timeout_callback(); 53 | } 54 | // false if the predicate pred still evaluates to false 55 | // after the rel_time timeout expired, otherwise true 56 | return status; 57 | } 58 | 59 | inline void unblock() { 60 | std::unique_lock glock(lock_); 61 | flag_--; 62 | assert(flag_ >= 0); 63 | cond_.notify_one(); 64 | } 65 | 66 | private: 67 | int flag_{1}; 68 | std::condition_variable cond_; 69 | std::mutex lock_; 70 | }; 71 | 72 | #endif /* barrier_h */ 73 | -------------------------------------------------------------------------------- /LightCTR/common/buffer_fusion.h: -------------------------------------------------------------------------------- 1 | // 2 | // buffer_fusion.h 3 | // LightCTR 4 | // 5 | // Created by SongKuangshi on 2019/1/1. 6 | // Copyright © 2019 SongKuangshi. All rights reserved. 7 | // 8 | 9 | #ifndef buffer_fusion_h 10 | #define buffer_fusion_h 11 | 12 | #include "buffer.h" 13 | #include 14 | #include 15 | 16 | template 17 | class BufferFusion { 18 | public: 19 | BufferFusion() = delete; 20 | BufferFusion(bool _autoRelease, bool _lazyMode): 21 | autoRelease(_autoRelease), lazyMode(_lazyMode) { 22 | 23 | } 24 | 25 | ~BufferFusion() { 26 | if (autoRelease && lazyMode && lazyModeMemory) { 27 | delete lazyModeMemory; 28 | lazyModeMemory = nullptr; 29 | bufs_ptr_arr.clear(); 30 | bufs_size_arr.clear(); 31 | return; 32 | } 33 | if (!autoRelease) { 34 | bufs_ptr_arr.clear(); 35 | bufs_size_arr.clear(); 36 | return; 37 | } 38 | for (size_t i = 0; i < bufs_ptr_arr.size(); i++) { 39 | if (bufs_ptr_arr[i]) { 40 | delete[] bufs_ptr_arr[i]; 41 | bufs_ptr_arr[i] = NULL; 42 | } 43 | } 44 | bufs_ptr_arr.clear(); 45 | bufs_size_arr.clear(); 46 | } 47 | 48 | std::pair getMemory(size_t index) const { 49 | assert(index < bufs_size_arr.size()); 50 | return std::make_pair(bufs_ptr_arr[index], bufs_size_arr[index]); 51 | } 52 | 53 | void registMemChunk(T* ptr, size_t size) { 54 | assert(size > 0); 55 | if (ptr != nullptr) { 56 | bufs_ptr_arr.push_back(ptr); 57 | bufs_size_arr.push_back(size); 58 | total_size += size; 59 | } else { 60 | assert(lazyMode); 61 | // lazy mode 62 | bufs_size_arr.push_back(size); 63 | total_size += size; 64 | } 65 | } 66 | 67 | void lazyAllocate(float* allocatedMem = nullptr) { 68 | if (allocatedMem) { 69 | lazyModeMemory = allocatedMem; 70 | } else { 71 | lazyModeMemory = new T[total_size]; 72 | memset(lazyModeMemory, 0, total_size * sizeof(float)); 73 | } 74 | size_t inc_mem = 0; 75 | for (size_t i = 0; i < bufs_size_arr.size(); i++) { 76 | bufs_ptr_arr.push_back(lazyModeMemory + inc_mem); 77 | inc_mem += bufs_size_arr[i]; 78 | } 79 | assert(inc_mem == total_size); 80 | } 81 | 82 | size_t size() const { 83 | return total_size; 84 | } 85 | 86 | void memset_c(T __c) { 87 | if (likely(__c == 0)) { 88 | for (size_t i = 0; i < bufs_ptr_arr.size(); i++) { 89 | memset(bufs_ptr_arr[i], 0, bufs_size_arr[i] * sizeof(T)); 90 | } 91 | } else { 92 | for (size_t i = 0; i < bufs_ptr_arr.size(); i++) { 93 | for (size_t j = 0; j < bufs_size_arr[i]; j++) { 94 | *(bufs_ptr_arr[i] + j) = __c; 95 | } 96 | } 97 | } 98 | } 99 | 100 | void memcpy_out(Buffer** __dst, size_t __offset, size_t __n) const { 101 | assert(__offset + __n <= total_size); 102 | *__dst = new Buffer(__n); 103 | 104 | size_t which_one = 0; 105 | while (__offset >= bufs_size_arr[which_one]) { 106 | __offset -= bufs_size_arr[which_one]; 107 | which_one++; 108 | } 109 | const T* __src = bufs_ptr_arr[which_one] + __offset; 110 | if (__n <= bufs_size_arr[which_one] - __offset) { 111 | (*__dst)->append(__src, __n * sizeof(T)); 112 | return; 113 | } 114 | size_t offset = bufs_size_arr[which_one] - __offset; 115 | (*__dst)->append(__src, offset * sizeof(T)); 116 | __n -= offset; 117 | 118 | size_t tmp = bufs_size_arr[++which_one]; 119 | while (__n > tmp) { 120 | (*__dst)->append(bufs_ptr_arr[which_one], tmp * sizeof(T)); 121 | __n -= tmp; 122 | tmp = bufs_size_arr[++which_one]; 123 | } 124 | if (__n > 0) { 125 | (*__dst)->append(bufs_ptr_arr[which_one], __n * sizeof(T)); 126 | } 127 | } 128 | 129 | void memcpy_in(size_t __offset, const T* __src, size_t __n) { 130 | assert(__offset + __n <= total_size); 131 | size_t which_one = 0; 132 | while (__offset >= bufs_size_arr[which_one]) { 133 | __offset -= bufs_size_arr[which_one]; 134 | which_one++; 135 | } 136 | T* __dst = bufs_ptr_arr[which_one] + __offset; 137 | if (__n <= bufs_size_arr[which_one] - __offset) { 138 | memcpy(__dst, __src, __n * sizeof(T)); 139 | return; 140 | } 141 | size_t offset = bufs_size_arr[which_one] - __offset; 142 | memcpy(__dst, __src, offset * sizeof(T)); 143 | __n -= offset; 144 | 145 | size_t tmp = bufs_size_arr[++which_one]; 146 | while (__n > tmp) { 147 | memcpy(bufs_ptr_arr[which_one], __src + offset, tmp * sizeof(T)); 148 | __n -= tmp; 149 | offset += tmp; 150 | tmp = bufs_size_arr[++which_one]; 151 | } 152 | if (__n > 0) { 153 | memcpy(bufs_ptr_arr[which_one], __src + offset, __n * sizeof(T)); 154 | } 155 | } 156 | 157 | typedef std::function transform_callback_t; 158 | 159 | void transform(size_t __offset, size_t __n, transform_callback_t cb) const { 160 | assert(__offset + __n <= total_size); 161 | size_t which_one = 0; 162 | while (__offset >= bufs_size_arr[which_one]) { 163 | __offset -= bufs_size_arr[which_one]; 164 | which_one++; 165 | } 166 | T* __dst = bufs_ptr_arr[which_one] + __offset; 167 | if (__n <= bufs_size_arr[which_one] - __offset) { 168 | cb(__dst, __dst + __n); 169 | return; 170 | } 171 | size_t offset = bufs_size_arr[which_one] - __offset; 172 | cb(__dst, __dst + offset); 173 | __n -= offset; 174 | 175 | size_t tmp = bufs_size_arr[++which_one]; 176 | while (__n > tmp) { 177 | cb(bufs_ptr_arr[which_one], bufs_ptr_arr[which_one] + tmp); 178 | __n -= tmp; 179 | tmp = bufs_size_arr[++which_one]; 180 | } 181 | if (__n > 0) { 182 | cb(bufs_ptr_arr[which_one], bufs_ptr_arr[which_one] + __n); 183 | } 184 | } 185 | 186 | void flatten(Buffer** __dst) const { 187 | assert(total_size > 0); 188 | memcpy_out(__dst, 0, total_size); 189 | } 190 | 191 | private: 192 | bool autoRelease{false}; 193 | bool lazyMode{false}; 194 | T* lazyModeMemory = nullptr; 195 | 196 | std::vector bufs_ptr_arr; 197 | std::vector bufs_size_arr; 198 | size_t total_size = 0; 199 | }; 200 | 201 | #endif /* buffer_fusion_h */ 202 | -------------------------------------------------------------------------------- /LightCTR/common/float16.h: -------------------------------------------------------------------------------- 1 | // 2 | // float16.h 3 | // LightCTR 4 | // 5 | // Created by SongKuangshi on 2018/12/3. 6 | // Copyright © 2018 SongKuangshi. All rights reserved. 7 | // 8 | 9 | #ifndef float16_h 10 | #define float16_h 11 | 12 | #define float16_t unsigned short 13 | 14 | #include 15 | #include 16 | #include "assert.h" 17 | 18 | class Float16 { 19 | public: 20 | Float16() { 21 | assert(sizeof(float) * 8 == 32); 22 | assert(sizeof(float16_t) * 8 == 16); 23 | } 24 | 25 | explicit Float16(const void* src32) { 26 | _float32_value = *static_cast(src32); 27 | _float16_value = convert(_float32_value); 28 | } 29 | 30 | explicit Float16(float16_t src16) { 31 | _float16_value = src16; 32 | _float32_value = toFloat32(src16); 33 | } 34 | 35 | inline float16_t float16_value() { 36 | return _float16_value; 37 | } 38 | inline float float32_value() { 39 | return _float32_value; 40 | } 41 | 42 | void convert2Float16(const float* input, float16_t* output, int len) { 43 | std::transform(input, input + len, 44 | output, 45 | std::bind( 46 | &Float16::convert, 47 | this, 48 | std::placeholders::_1 49 | ) 50 | ); 51 | } 52 | 53 | void recover2Float32(const float16_t* input, float* output, int len) { 54 | std::transform(input, input + len, 55 | output, 56 | std::bind( 57 | &Float16::toFloat32, 58 | this, 59 | std::placeholders::_1 60 | ) 61 | ); 62 | } 63 | 64 | private: 65 | inline float toFloat32(float16_t h) { 66 | int sign = ((h >> 15) & 1); // 1 67 | int exp = ((h >> 10) & 0x1f); // 5 68 | int mantissa = (h & 0x3ff); // 10 69 | unsigned f = 0; 70 | 71 | if (exp > 0 && exp < 31) { 72 | // normal 73 | exp += 112; // 127 - 15 74 | f = (sign << 31) | (exp << 23) | (mantissa << 13); 75 | } else if (exp == 0) { 76 | if (mantissa) { 77 | // subnormal 78 | exp += 113; // 127 - 15 + 1 79 | while ((mantissa & (1 << 10)) == 0) { 80 | mantissa <<= 1; 81 | exp--; 82 | } 83 | mantissa &= 0x3ff; 84 | f = (sign << 31) | (exp << 23) | (mantissa << 13); 85 | } else { 86 | f = (sign << 31); // ±0.0 87 | } 88 | } else if (exp == 31) { 89 | if (mantissa) { 90 | f = 0x7fffffff; // NAN 91 | } else { 92 | f = (0xff << 23) | (sign << 31); // INF 93 | } 94 | } 95 | return *reinterpret_cast(&f); 96 | } 97 | 98 | inline float16_t convert(const float& src) { 99 | // convert Float32 into Binary float16 (unsigned short) based IEEE754 standard 100 | unsigned const& s = *reinterpret_cast(&src); 101 | 102 | uint16_t sign = uint16_t((s >> 16) & 0x8000); // 1 103 | int16_t exp = uint16_t(((s >> 23) & 0xff) - 127); // 8 104 | int mantissa = s & 0x7fffff; // 23 105 | 106 | if ((s & 0x7fffffff) == 0) { // ±0.0 107 | return 0; 108 | } 109 | // special number 110 | if (exp > 15) { // bias changes from 127 to 15 111 | if (exp == 128 && mantissa) { 112 | // still NAN 113 | return 0x7fff; 114 | } else { 115 | // exp > 15 causes upper overflow, INF 116 | return sign | 0x7c00; 117 | } 118 | } 119 | 120 | uint16_t u = 0; 121 | int sticky_bit = 0; 122 | 123 | if (exp >= -14) { 124 | // normal fp32 to normal fp16 125 | exp = uint16_t(exp + uint16_t(15)); 126 | u = uint16_t(((exp & 0x1f) << 10)); 127 | u = uint16_t(u | (mantissa >> 13)); 128 | } else { 129 | // normal float to subnormal (exp=0) 130 | int rshift = - (exp + 14); 131 | if (rshift < 32) { 132 | mantissa |= (1 << 23); 133 | sticky_bit = ((mantissa & ((1 << rshift) - 1)) != 0); 134 | 135 | mantissa = (mantissa >> rshift); 136 | u = (uint16_t(mantissa >> 13) & 0x3ff); 137 | } else { 138 | // drop precision 139 | mantissa = 0; 140 | u = 0; 141 | } 142 | } 143 | 144 | // round to nearest even 145 | int round_bit = ((mantissa >> 12) & 1); 146 | sticky_bit |= ((mantissa & ((1 << 12) - 1)) != 0); 147 | 148 | if ((round_bit && sticky_bit) || (round_bit && (u & 1))) { 149 | u = uint16_t(u + 1); 150 | } 151 | 152 | u |= sign; 153 | return *reinterpret_cast(&u); 154 | } 155 | 156 | void print_bin(float num) { 157 | printf("32: "); 158 | unsigned const& s = *reinterpret_cast(&num); 159 | for(size_t i = 1; i <= sizeof(num) * 8; i++) { 160 | printf("%d", (s >> (sizeof(num) * 8 - i)) & 1); 161 | if (i == 1 || i == 9 || i == 32) { 162 | printf("\t"); 163 | } 164 | } 165 | puts(""); 166 | } 167 | void print_bin16(float16_t num) { 168 | printf("16: "); 169 | unsigned const& s = *reinterpret_cast(&num); 170 | for(size_t i = 1; i <= sizeof(num) * 8; i++) { 171 | printf("%d", (s >> (sizeof(num) * 8 - i)) & 1); 172 | if (i == 1 || i == 6 || i == 16) { 173 | printf("\t"); 174 | } 175 | } 176 | puts(""); 177 | } 178 | 179 | float16_t _float16_value; 180 | float _float32_value; 181 | }; 182 | 183 | #endif /* float16_h */ 184 | -------------------------------------------------------------------------------- /LightCTR/common/hash.h: -------------------------------------------------------------------------------- 1 | // 2 | // hash.h 3 | // LightCTR 4 | // 5 | // Created by SongKuangshi on 2017/12/6. 6 | // Copyright © 2017年 SongKuangshi. All rights reserved. 7 | // 8 | 9 | #ifndef hash_h 10 | #define hash_h 11 | 12 | #include 13 | 14 | #define BIG_CONSTANT(x) (x##LLU) 15 | 16 | inline unsigned int murMurHash(const std::string& key) { 17 | int len = (int)key.length(); 18 | const unsigned int m = 0x5bd1e995; 19 | const int r = 24; 20 | const int seed = 97; 21 | unsigned int h = seed ^ len; 22 | // Mix 4 bytes at a time into the hash 23 | const unsigned char *data = (const unsigned char *)key.c_str(); 24 | while(len >= 4) 25 | { 26 | unsigned int k = *(unsigned int *)data; 27 | k *= m; 28 | k ^= k >> r; 29 | k *= m; 30 | h *= m; 31 | h ^= k; 32 | data += 4; 33 | len -= 4; 34 | } 35 | // Handle the last few bytes of the input array 36 | switch(len) 37 | { 38 | case 3: h ^= data[2] << 16; 39 | case 2: h ^= data[1] << 8; 40 | case 1: h ^= data[0]; 41 | h *= m; 42 | }; 43 | // Do a few final mixes of the hash to ensure the last few 44 | // bytes are well-incorporated. 45 | h ^= h >> 13; 46 | h *= m; 47 | h ^= h >> 15; 48 | return h; 49 | } 50 | 51 | inline unsigned int murMurHash(uint64_t k) { 52 | k ^= k >> 33; 53 | k *= BIG_CONSTANT(0xff51afd7ed558ccd); 54 | k ^= k >> 33; 55 | k *= BIG_CONSTANT(0xc4ceb9fe1a85ec53); 56 | k ^= k >> 33; 57 | return (unsigned int)k; 58 | } 59 | 60 | #endif /* hash_h */ 61 | -------------------------------------------------------------------------------- /LightCTR/common/lock.h: -------------------------------------------------------------------------------- 1 | // 2 | // lock.h 3 | // LightCTR 4 | // 5 | // Created by SongKuangshi on 2017/12/5. 6 | // Copyright © 2017年 SongKuangshi. All rights reserved. 7 | // 8 | 9 | #ifndef lock_h 10 | #define lock_h 11 | 12 | #include 13 | #include 14 | #include 15 | #include "assert.h" 16 | 17 | #define CAS32(ptr, val_old, val_new)({ char ret; __asm__ __volatile__("lock; cmpxchgl %2,%0; setz %1": "+m"(*ptr), "=q"(ret): "r"(val_new),"a"(val_old): "memory"); ret;}) 18 | 19 | inline bool atomic_compare_and_swap(float* ptr, const float &oldval, const float &newval) { 20 | return __sync_bool_compare_and_swap(reinterpret_cast(ptr), 21 | *reinterpret_cast(&oldval), 22 | *reinterpret_cast(&newval)); 23 | }; 24 | 25 | 26 | class SpinLock { 27 | public: 28 | SpinLock() : flag_{false} { 29 | } 30 | 31 | void lock() { 32 | while (flag_.test_and_set(std::memory_order_acquire)); 33 | } 34 | 35 | void unlock() { 36 | flag_.clear(std::memory_order_release); 37 | } 38 | protected: 39 | std::atomic_flag flag_; 40 | }; 41 | 42 | class RWLock { 43 | public: 44 | RWLock() { 45 | assert((pthread_rwlock_init(&lock_, NULL) == 0)); 46 | } 47 | ~RWLock() { 48 | assert((pthread_rwlock_destroy(&lock_) == 0)); 49 | } 50 | void rlock() { 51 | assert((pthread_rwlock_rdlock(&lock_) == 0)); 52 | } 53 | void wlock() { 54 | assert((pthread_rwlock_wrlock(&lock_) == 0)); 55 | } 56 | void unlock() { 57 | assert((pthread_rwlock_unlock(&lock_) == 0)); 58 | } 59 | private: 60 | pthread_rwlock_t lock_; 61 | }; 62 | 63 | #endif /* lock_h */ 64 | -------------------------------------------------------------------------------- /LightCTR/common/memory_pool.h: -------------------------------------------------------------------------------- 1 | // 2 | // memory_pool.h 3 | // LightCTR 4 | // 5 | // Created by SongKuangshi on 2019/5/26. 6 | // Copyright © 2019 SongKuangshi. All rights reserved. 7 | // 8 | 9 | #ifndef memory_pool_h 10 | #define memory_pool_h 11 | 12 | #include 13 | #include 14 | #include "lock.h" 15 | #include 16 | 17 | // Memory Pool for managing vector allocation and deallocation 18 | // Meanwhile, it can monitored memory leak and wild pointer 19 | class MemoryPool { 20 | public: 21 | static MemoryPool& Instance() { // singleton 22 | static MemoryPool pool; 23 | return pool; 24 | } 25 | 26 | ~MemoryPool() { 27 | leak_checkpoint(); 28 | std::unique_lock f_lock(freePtr_lock); 29 | for (auto& pair : freePtr_list) { 30 | free(pair.second); 31 | } 32 | } 33 | 34 | inline void leak_checkpoint() { 35 | assert(allocPtr_list.empty()); // memory leaks 36 | } 37 | 38 | inline void* allocate(size_t size) { 39 | { 40 | freePtr_lock.lock(); 41 | for (auto it = freePtr_list.begin(); it != freePtr_list.end(); it++) { 42 | const size_t tmp_size = it->first; 43 | if (tmp_size >= size && tmp_size <= (size * 3) >> 1) { 44 | void* tmp_ptr = it->second; 45 | freePtr_list.erase(it); 46 | freePtr_lock.unlock(); 47 | 48 | std::unique_lock a_lock(allocPtr_lock); 49 | allocPtr_list.push_back(std::make_pair(tmp_size, tmp_ptr)); 50 | return tmp_ptr; 51 | } 52 | } 53 | freePtr_lock.unlock(); 54 | } 55 | std::unique_lock a_lock(allocPtr_lock); 56 | size = _alignedMemSize(size); 57 | void* tmp_ptr = malloc(size); 58 | assert(tmp_ptr); // out of memory 59 | allocPtr_list.push_back(std::make_pair(size, tmp_ptr)); 60 | return tmp_ptr; 61 | } 62 | 63 | inline void deallocate(void* ptr) { 64 | assert(ptr); 65 | allocPtr_lock.lock(); 66 | for (auto it = allocPtr_list.begin(); it != allocPtr_list.end(); it++) { 67 | if (it->second == ptr) { 68 | const size_t tmp_size = it->first; 69 | allocPtr_list.erase(it); 70 | allocPtr_lock.unlock(); 71 | 72 | std::unique_lock f_lock(freePtr_lock); 73 | freePtr_list.push_back(std::make_pair(tmp_size, ptr)); 74 | return; 75 | } 76 | } 77 | allocPtr_lock.unlock(); 78 | assert(false); // wild pointer 79 | } 80 | 81 | private: 82 | static const int MemAlignment = 16; 83 | inline size_t _alignedMemSize(size_t size) const { 84 | return (size + MemAlignment - 1) & -MemAlignment; 85 | } 86 | 87 | 88 | std::list > freePtr_list, allocPtr_list; 89 | std::mutex freePtr_lock, allocPtr_lock; 90 | }; 91 | 92 | 93 | template 94 | class ArrayAllocator { 95 | public: 96 | typedef T value_type; 97 | typedef T* pointer; 98 | typedef const T* const_pointer; 99 | typedef T& reference; 100 | typedef const T& const_reference; 101 | typedef size_t size_type; 102 | typedef ptrdiff_t difference_type; 103 | 104 | template 105 | struct rebind { 106 | typedef std::allocator other; 107 | }; 108 | 109 | pointer allocate(size_type n, const void* hint=0) { 110 | return (T*)MemoryPool::Instance().allocate((difference_type)n * sizeof(T)); 111 | } 112 | 113 | void deallocate(pointer p, size_type n) { 114 | MemoryPool::Instance().deallocate(p); 115 | } 116 | 117 | void destroy(pointer p) { 118 | p->~T(); 119 | } 120 | 121 | pointer address(reference x) { 122 | return (pointer)&x; 123 | } 124 | 125 | const_pointer address(const_reference x) { 126 | return (const_pointer)&x; 127 | } 128 | 129 | size_type max_size() const { 130 | return size_type(UINTMAX_MAX / sizeof(T)); 131 | } 132 | }; 133 | 134 | #endif /* memory_pool_h */ 135 | -------------------------------------------------------------------------------- /LightCTR/common/message.h: -------------------------------------------------------------------------------- 1 | // 2 | // message.h 3 | // LightCTR 4 | // 5 | // Created by SongKuangshi on 2017/12/5. 6 | // Copyright © 2017年 SongKuangshi. All rights reserved. 7 | // 8 | 9 | #ifndef message_h 10 | #define message_h 11 | 12 | #include "../third/zeromq/include/zmq.h" 13 | #include "assert.h" 14 | #include "buffer.h" 15 | #include 16 | #include 17 | #include 18 | #include 19 | 20 | enum MsgType { 21 | RESPONSE = 0, 22 | REQUEST_HANDSHAKE, 23 | REQUEST_ACK, 24 | REQUEST_FIN, 25 | REQUEST_PUSH, 26 | REQUEST_PULL, 27 | REQUEST_INFER, 28 | HEARTBEAT, 29 | BREAKER, 30 | RESERVED, 31 | UNKNOWN 32 | }; 33 | 34 | class Package; 35 | class Buffer; 36 | class PackageDescript; 37 | 38 | typedef std::function)> response_callback_t; 39 | typedef std::function)> sync_barrier_callback_t; 40 | 41 | class ZMQ_Message { 42 | public: 43 | ZMQ_Message() { 44 | assert(0 == zmq_msg_init(&_zmg)); 45 | } 46 | ZMQ_Message(char* buf, size_t size) { 47 | assert(0 == zmq_msg_init_size(&_zmg, size)); 48 | memcpy((void *)buffer(), buf, size); 49 | } 50 | 51 | ZMQ_Message(const ZMQ_Message &) = delete; 52 | ZMQ_Message(const Buffer& buf) { 53 | assert(0 == zmq_msg_init_size(&_zmg, buf.size())); 54 | memcpy((void *)buffer(), buf.buffer(), buf.size()); 55 | } 56 | 57 | ~ZMQ_Message() { 58 | assert(0 == zmq_msg_close(&_zmg)); 59 | } 60 | 61 | ZMQ_Message &operator=(const ZMQ_Message &) = delete; 62 | ZMQ_Message &operator=(ZMQ_Message &&other) { 63 | if (this != &other) { 64 | assert(0 == zmq_msg_move(&_zmg, &other.zmg())); 65 | } 66 | return *this; 67 | } 68 | 69 | size_t size() { 70 | return zmq_msg_size(&_zmg); 71 | } 72 | 73 | const char* buffer() { 74 | return (char *)zmq_msg_data(&_zmg); 75 | } 76 | 77 | zmq_msg_t& zmg() { 78 | return _zmg; 79 | } 80 | 81 | private: 82 | zmq_msg_t _zmg; 83 | }; 84 | 85 | 86 | class PackageDescript { 87 | public: 88 | // fill by handler 89 | MsgType msgType; 90 | size_t epoch_version; 91 | 92 | // fill when send 93 | size_t node_id; 94 | size_t message_id; 95 | 96 | response_callback_t callback; 97 | sync_barrier_callback_t sync_callback = NULL; 98 | 99 | Buffer content; 100 | 101 | time_t send_time; // record for timeout monitor 102 | size_t to_node_id; 103 | 104 | ~PackageDescript() { 105 | 106 | } 107 | explicit PackageDescript(MsgType _msgType, size_t _epoch_version = 0) 108 | : msgType(_msgType), epoch_version(_epoch_version) { 109 | message_id = 0; 110 | send_time = 0; 111 | node_id = to_node_id = -1; 112 | if (msgType == REQUEST_PUSH) { 113 | assert(epoch_version > 0); 114 | } 115 | } 116 | PackageDescript &operator=(const PackageDescript &) = delete; 117 | PackageDescript &operator=(PackageDescript&& other) { 118 | if (this != &other) { 119 | msgType = other.msgType; 120 | epoch_version = other.epoch_version; 121 | node_id = other.node_id; 122 | message_id = other.message_id; 123 | send_time = other.send_time; 124 | to_node_id = other.to_node_id; 125 | callback = std::move(other.callback); 126 | sync_callback = std::move(other.sync_callback); 127 | other.callback = NULL; 128 | other.sync_callback = NULL; 129 | content = std::move(other.content); 130 | } 131 | return *this; 132 | } 133 | PackageDescript(const PackageDescript& other) { // copy only by constructor 134 | msgType = other.msgType; 135 | epoch_version = other.epoch_version; 136 | node_id = other.node_id; 137 | message_id = other.message_id; 138 | send_time = other.send_time; 139 | to_node_id = other.to_node_id; 140 | callback = other.callback; 141 | sync_callback = other.sync_callback; 142 | content = Buffer(other.content.buffer(), other.content.size()); 143 | } 144 | PackageDescript(PackageDescript&& other) { 145 | msgType = other.msgType; 146 | epoch_version = other.epoch_version; 147 | node_id = other.node_id; 148 | message_id = other.message_id; 149 | send_time = other.send_time; 150 | to_node_id = other.to_node_id; 151 | callback = std::move(other.callback); 152 | sync_callback = std::move(other.sync_callback); 153 | other.callback = NULL; 154 | other.sync_callback = NULL; 155 | content = std::move(other.content); 156 | } 157 | 158 | bool operator==(const PackageDescript& other) const { 159 | if (message_id == other.message_id) { 160 | return true; 161 | } 162 | return false; 163 | } 164 | }; 165 | 166 | const size_t _Head_size = sizeof(MsgType) + 3 * sizeof(size_t); 167 | 168 | class Package { 169 | public: 170 | Package() { 171 | } 172 | Package(const PackageDescript& pDesc) { 173 | head = ZMQ_Message((char *)&pDesc, _Head_size); 174 | content = ZMQ_Message(pDesc.content); 175 | } 176 | 177 | void Descript(std::shared_ptr& pDesc) { 178 | pDesc = std::make_shared(PackageDescript(UNKNOWN)); 179 | assert(pDesc); 180 | assert(head.size() == _Head_size); 181 | memcpy(pDesc.get(), head.buffer(), _Head_size); 182 | pDesc->content = Buffer(content.buffer(), content.size()); 183 | } 184 | 185 | Package &operator=(const Package &) = delete; 186 | Package(const Package &) = delete; 187 | Package(Package &&other) { 188 | head = std::move(other.head); 189 | content = std::move(other.content); 190 | } 191 | 192 | ZMQ_Message head; 193 | ZMQ_Message content; 194 | }; 195 | 196 | #endif /* message_h */ 197 | -------------------------------------------------------------------------------- /LightCTR/common/message_queue.h: -------------------------------------------------------------------------------- 1 | // 2 | // message_queue.h 3 | // LightCTR 4 | // 5 | // Created by SongKuangshi on 2017/12/14. 6 | // Copyright © 2017年 SongKuangshi. All rights reserved. 7 | // 8 | 9 | #ifndef message_queue_h 10 | #define message_queue_h 11 | 12 | #include 13 | #include 14 | #include "lock.h" 15 | #include "time.h" 16 | 17 | enum SendType { 18 | Immediately = 0, 19 | After, 20 | Period, 21 | Invalid 22 | }; 23 | 24 | struct MessageEventWrapper { 25 | SendType send_type; 26 | time_t after_or_period_time_ms; 27 | time_t time_record; 28 | std::function handler; 29 | 30 | MessageEventWrapper(SendType _send_type, 31 | time_t _time, 32 | std::function _handler) : 33 | send_type(_send_type), after_or_period_time_ms(_time), handler(_handler) { 34 | updateTime(); 35 | } 36 | 37 | void updateTime() { 38 | update_tv(); 39 | time_record = get_now_ms(); 40 | } 41 | }; 42 | 43 | template 44 | class MessageQueue { 45 | public: 46 | MessageQueue() { 47 | } 48 | 49 | inline const T& front() { 50 | std::unique_lock lk(mu_); 51 | cond_.wait(lk, [this]{ 52 | return !queue_.empty(); 53 | }); 54 | return queue_.front(); 55 | } 56 | 57 | inline void push(const T& new_value) { 58 | { 59 | std::unique_lock lk(mu_); 60 | queue_.emplace_back(T(new_value)); // do copy 61 | element_cnt++; 62 | } 63 | cond_.notify_all(); 64 | } 65 | 66 | inline void emplace(T&& new_value) { 67 | { 68 | std::unique_lock lk(mu_); 69 | queue_.emplace_back(std::forward(new_value)); 70 | element_cnt++; 71 | } 72 | cond_.notify_all(); 73 | } 74 | 75 | inline void pop() { 76 | std::unique_lock lk(mu_); 77 | cond_.wait(lk, [this]{ 78 | return !queue_.empty(); 79 | }); 80 | queue_.pop_front(); 81 | element_cnt--; 82 | } 83 | 84 | inline bool pop_if(const T& compare, T* value) { 85 | std::unique_lock lk(mu_); 86 | cond_.wait(lk, [this]{ 87 | return !queue_.empty(); 88 | }); 89 | if (compare == queue_.front()) { 90 | *value = std::move(queue_.front()); 91 | queue_.pop_front(); 92 | element_cnt--; 93 | return 1; 94 | } 95 | return 0; 96 | } 97 | 98 | inline typename std::list::iterator mutable_element(size_t index) { 99 | std::unique_lock lk(mu_); 100 | assert(index < element_cnt); 101 | auto it = queue_.begin(); 102 | while (index--) { 103 | it++; 104 | } 105 | return it; 106 | } 107 | 108 | inline int modify(const T& value, T* addr) { 109 | std::unique_lock lk(mu_); 110 | if (queue_.empty()) { 111 | return 0; 112 | } 113 | auto it = find(queue_.begin(), queue_.end(), value); 114 | if (it == queue_.end()) { 115 | return -1; 116 | } 117 | addr = &(*it); 118 | return 1; 119 | } 120 | 121 | inline int erase(const T& value) { 122 | std::unique_lock lk(mu_); 123 | if (queue_.empty()) { 124 | return 0; 125 | } 126 | auto it = find(queue_.begin(), queue_.end(), value); 127 | if (it == queue_.end()) { 128 | return -1; 129 | } 130 | queue_.erase(it); 131 | element_cnt--; 132 | return 1; 133 | } 134 | 135 | inline size_t size() { 136 | std::unique_lock lk(mu_); 137 | return element_cnt; 138 | } 139 | 140 | inline bool empty() { 141 | std::unique_lock lk(mu_); 142 | return element_cnt == 0; 143 | } 144 | 145 | protected: 146 | std::mutex mu_; 147 | size_t element_cnt = 0; 148 | std::list queue_; 149 | std::condition_variable cond_; 150 | }; 151 | 152 | class MessageQueueRunloop : public MessageQueue { 153 | public: 154 | MessageQueueRunloop() : runloop_thread(std::thread(&MessageQueueRunloop::runloop, this)){ 155 | } 156 | 157 | ~MessageQueueRunloop() { 158 | breakflag = true; 159 | wait_cond_.notify_all(); 160 | 161 | runloop_thread.join(); 162 | } 163 | 164 | private: 165 | void runloop() { 166 | for(;;) { 167 | std::unique_lock lk(mu_); 168 | if (breakflag) { 169 | return; 170 | } 171 | // in this case MessageQueue can't be added, so No need copy the queue 172 | 173 | time_t wait_time = 10 * 1000; 174 | 175 | for (auto it = queue_.begin(); it != queue_.end(); it++) { 176 | if (it->send_type == SendType::Invalid) { 177 | queue_.erase(it); 178 | wait_time = 0; 179 | break; 180 | } else if (it->send_type == SendType::Immediately) { 181 | it->handler(*it); 182 | queue_.erase(it); 183 | wait_time = 0; 184 | break; 185 | } else if (it->send_type == SendType::After) { 186 | time_t cost = gettickspan(it->time_record); 187 | if (cost >= it->after_or_period_time_ms) { 188 | it->handler(*it); 189 | queue_.erase(it); 190 | wait_time = 0; 191 | break; 192 | } else { 193 | wait_time = std::min(wait_time, it->after_or_period_time_ms - cost); 194 | } 195 | } else if (it->send_type == SendType::Period) { 196 | time_t cost = gettickspan(it->time_record); 197 | if (cost >= it->after_or_period_time_ms) { 198 | it->handler(*it); 199 | it->updateTime(); 200 | wait_time = 0; 201 | break; 202 | } else { 203 | wait_time = std::min(wait_time, it->after_or_period_time_ms - cost); 204 | } 205 | } 206 | } 207 | assert(wait_time >= 0); 208 | if (wait_time > 0) { 209 | wait_cond_.wait_for(lk, std::chrono::milliseconds(wait_time)); 210 | } 211 | } 212 | } 213 | private: 214 | std::thread runloop_thread; 215 | bool breakflag{false}; 216 | std::condition_variable wait_cond_; 217 | }; 218 | 219 | #endif /* message_queue_h */ 220 | -------------------------------------------------------------------------------- /LightCTR/common/persistent_buffer.h: -------------------------------------------------------------------------------- 1 | // 2 | // persistent_buffer.h 3 | // LightCTR 4 | // 5 | // Created by SongKuangshi on 2018/12/21. 6 | // Copyright © 2018 SongKuangshi. All rights reserved. 7 | // 8 | 9 | #ifndef persistent_buffer_h 10 | #define persistent_buffer_h 11 | 12 | #include "buffer.h" 13 | #include "system.h" 14 | 15 | #include 16 | #include 17 | #include 18 | #include 19 | #include 20 | #ifdef __APPLE__ 21 | #include 22 | #else 23 | #include 24 | #endif 25 | 26 | class PersistentBuffer { 27 | public: 28 | PersistentBuffer(const char* path, size_t size, bool alarm_when_exist) { 29 | int flag = O_CREAT | O_RDWR; 30 | if (alarm_when_exist) { 31 | flag |= O_EXCL; 32 | } 33 | int _fd = open(path, flag, 0666); 34 | if (_fd < 0) { 35 | printf("open file errno = %d %s\n", errno, strerror(errno)); 36 | } 37 | assert(_fd >= 0); 38 | 39 | _capacity = lseek(_fd, 0, SEEK_END); 40 | if (_capacity < size) { 41 | assert(ftruncate(_fd, size) == 0); 42 | lseek(_fd, 0, SEEK_END); 43 | _capacity = size; 44 | } 45 | assert(size <= _capacity); 46 | 47 | assert(close(_fd) == 0); 48 | 49 | _buffer = nullptr; 50 | assert(mmapLoad(path, (void**)&_buffer, true)); 51 | 52 | assert(_buffer); 53 | memset(_buffer, 0, _capacity); 54 | 55 | _cursor = _end = _buffer; 56 | } 57 | 58 | ~PersistentBuffer() { 59 | if (_buffer) { 60 | munmap(_buffer, _capacity); 61 | } 62 | } 63 | 64 | inline size_t size() const { 65 | return _end - _buffer; 66 | } 67 | 68 | template 69 | inline void write(T *x, size_t len) { 70 | assert(size() + len <= _capacity); // check address sanitizer 71 | memcpy(_end, x, len); 72 | _end += len; 73 | } 74 | 75 | template 76 | inline void read(T *x, size_t len = 0) { 77 | if (len == 0) { 78 | len = size(); // read all 79 | } 80 | memcpy(x, _cursor, len); 81 | _cursor += len; 82 | assert(_cursor <= _end); 83 | } 84 | 85 | private: 86 | char *_buffer = nullptr; 87 | char *_cursor = nullptr; 88 | char *_end = nullptr; 89 | size_t _capacity; 90 | }; 91 | 92 | #endif /* persistent_buffer_h */ 93 | -------------------------------------------------------------------------------- /LightCTR/common/system.h: -------------------------------------------------------------------------------- 1 | // 2 | // system.h 3 | // LightCTR 4 | // 5 | // Created by SongKuangshi on 2017/11/3. 6 | // Copyright © 2017年 SongKuangshi. All rights reserved. 7 | // 8 | 9 | #ifndef system_h 10 | #define system_h 11 | 12 | #include 13 | #include 14 | #include 15 | #include 16 | #include 17 | #include 18 | #include 19 | #include 20 | #include 21 | #include 22 | #include 23 | 24 | #include "assert.h" 25 | #include "lock.h" 26 | 27 | #ifndef likely 28 | #define likely(x) __builtin_expect(!!(x), 1) 29 | #endif 30 | #ifndef unlikely 31 | #define unlikely(x) __builtin_expect(!!(x), 0) 32 | #endif 33 | 34 | inline int getEnv(const char *env_var, int defalt) { 35 | auto p = std::getenv(env_var); 36 | if (!p) { 37 | return defalt; 38 | } 39 | return atoi(p); 40 | } 41 | 42 | inline const char * getEnv(const char *env_var, const char *defalt) { 43 | auto p = std::getenv(env_var); 44 | if (!p) { 45 | return defalt; 46 | } 47 | return p; 48 | } 49 | 50 | template 51 | auto ignore_signal_call(FUNC func, ARGS &&... args) -> 52 | typename std::result_of::type { 53 | for (;;) { 54 | auto err = func(args...); 55 | if (err < 0 && errno == EINTR) { 56 | puts("Ignored EINTR Signal, retry"); 57 | continue; 58 | } 59 | return err; 60 | } 61 | } 62 | 63 | double SystemMemoryUsage() { 64 | FILE* fp = fopen("/proc/meminfo", "r"); 65 | assert(fp); 66 | size_t bufsize = 256 * sizeof(char); 67 | char* buf = new (std::nothrow) char[bufsize]; 68 | assert(buf); 69 | int totalMem = -1, freeMem = -1, bufMem = -1, cacheMem = -1; 70 | 71 | while (getline(&buf, &bufsize, fp) >= 0) { 72 | if (0 == strncmp(buf, "MemTotal", 8)) { 73 | if (1 != sscanf(buf, "%*s%d", &totalMem)) { 74 | std::cout << "failed to get MemTotal from string: [" << buf << "]"; 75 | } 76 | } else if (0 == strncmp(buf, "MemFree", 7)) { 77 | if (1 != sscanf(buf, "%*s%d", &freeMem)) { 78 | std::cout << "failed to get MemFree from string: [" << buf << "]"; 79 | } 80 | } else if (0 == strncmp(buf, "Buffers", 7)) { 81 | if (1 != sscanf(buf, "%*s%d", &bufMem)) { 82 | std::cout << "failed to get Buffers from string: [" << buf << "]"; 83 | } 84 | } else if (0 == strncmp(buf, "Cached", 6)) { 85 | if (1 != sscanf(buf, "%*s%d", &cacheMem)) { 86 | std::cout << "failed to get Cached from string: [" << buf << "]"; 87 | } 88 | } 89 | if (totalMem != -1 && freeMem != -1 && bufMem != -1 && cacheMem != -1) { 90 | break; 91 | } 92 | } 93 | assert(totalMem != -1 && freeMem != -1 && bufMem != -1 && cacheMem != -1); 94 | fclose(fp); 95 | delete[] buf; 96 | double usedMem = 1.0 - 1.0 * (freeMem + bufMem + cacheMem) / totalMem; 97 | return usedMem; 98 | } 99 | 100 | bool mmapLoad(const char* filename, void** mmapPtr, bool writable) { 101 | int flag = O_RDONLY; 102 | if (writable) 103 | flag = O_RDWR; 104 | int _fd = open(filename, flag); 105 | if (_fd == -1) { 106 | printf("open file errno = %d %s\n", errno, strerror(errno)); 107 | return false; 108 | } 109 | off_t size = lseek(_fd, 0, SEEK_END); 110 | 111 | flag = PROT_READ; 112 | if (writable) 113 | flag |= PROT_WRITE; 114 | #ifdef MAP_POPULATE 115 | *mmapPtr = mmap( 116 | 0, size, flag, MAP_SHARED | MAP_POPULATE, _fd, 0); 117 | #else 118 | *mmapPtr = mmap( 119 | 0, size, flag, MAP_SHARED, _fd, 0); 120 | #endif 121 | close(_fd); 122 | if (*mmapPtr == MAP_FAILED) { 123 | return false; 124 | } 125 | return true; 126 | } 127 | 128 | char* getShmAddr(int key, size_t size, int flag = 0666|IPC_CREAT) { 129 | assert(key != 0); 130 | 131 | int shmId = shmget(key, size, flag); 132 | if (shmId < 0) { 133 | // ipcs -m 134 | // sysctl -w kern.sysv.shmmax to adjust shm max memory size 135 | printf("%d %s\n", errno, strerror(errno)); 136 | } 137 | assert(shmId >= 0); 138 | 139 | char* shmAddr = (char *)shmat(shmId, NULL, 0); 140 | assert(shmAddr != (char *)-1); 141 | 142 | return shmAddr; 143 | } 144 | 145 | #endif /* system_h */ 146 | -------------------------------------------------------------------------------- /LightCTR/common/thread_pool.h: -------------------------------------------------------------------------------- 1 | // 2 | // thread_pool.h 3 | // LightCTR 4 | // 5 | // Created by SongKuangshi on 2017/9/23. 6 | // Copyright © 2017年 SongKuangshi. All rights reserved. 7 | // 8 | 9 | #ifndef thread_pool_h 10 | #define thread_pool_h 11 | 12 | #include 13 | #include 14 | #include 15 | #include 16 | #include 17 | #include 18 | #include 19 | #include 20 | #include "assert.h" 21 | 22 | static std::atomic isSynchronized(true); 23 | 24 | inline void setNotSynchronized() { 25 | isSynchronized.store(false, std::memory_order_release); 26 | } 27 | inline void synchronize() { 28 | if(isSynchronized.load(std::memory_order_acquire)) { 29 | return; 30 | } 31 | isSynchronized.store(true, std::memory_order_release); 32 | } 33 | 34 | class ThreadPool { 35 | public: 36 | explicit ThreadPool(size_t); 37 | ThreadPool() = delete; 38 | ~ThreadPool(); 39 | 40 | static ThreadPool& Instance() { // singleton 41 | static ThreadPool threadpool(std::thread::hardware_concurrency()); 42 | return threadpool; 43 | } 44 | 45 | template 46 | auto addTask(F&& f, Args&&... args) 47 | -> std::future::type>; 48 | 49 | void wait(); 50 | 51 | private: 52 | void init(); 53 | 54 | size_t threads; 55 | std::vector workers; 56 | std::queue > tasks; 57 | 58 | std::mutex queue_mutex; 59 | std::condition_variable condition; 60 | std::atomic stop{false}; 61 | }; 62 | 63 | inline ThreadPool::ThreadPool(size_t _threads): threads(_threads) { 64 | init(); 65 | } 66 | 67 | inline void ThreadPool::init() { 68 | if (!workers.empty()) { 69 | return; 70 | } 71 | stop = false; 72 | for(size_t i = 0;i < threads; i++) { 73 | workers.emplace_back([this] { 74 | for(;;) { 75 | std::function task; 76 | { 77 | std::unique_lock lock(this->queue_mutex); 78 | this->condition.wait(lock, [this] { 79 | return this->stop || !this->tasks.empty(); 80 | }); 81 | if(this->stop && this->tasks.empty()) 82 | return; 83 | task = std::move(this->tasks.front()); 84 | this->tasks.pop(); 85 | } 86 | task(); 87 | } 88 | }); 89 | } 90 | } 91 | 92 | template 93 | auto ThreadPool::addTask(F&& f, Args&&... args) 94 | -> std::future::type> { 95 | if (workers.empty()) { 96 | init(); 97 | } 98 | using return_type = typename std::result_of::type; 99 | 100 | auto task = std::make_shared< std::packaged_task >( 101 | std::bind(std::forward(f), std::forward(args)...) 102 | ); 103 | 104 | std::future ret = task->get_future(); 105 | { 106 | std::unique_lock lock(queue_mutex); 107 | tasks.emplace([task](){ 108 | (*task)(); 109 | }); 110 | } 111 | condition.notify_one(); 112 | return ret; 113 | } 114 | 115 | inline void ThreadPool::wait() { 116 | { 117 | std::unique_lock lock(queue_mutex); 118 | stop = true; 119 | } 120 | condition.notify_all(); // notify to stop 121 | for (auto &worker : workers) { 122 | worker.join(); 123 | } 124 | workers.clear(); 125 | } 126 | 127 | // destruct after join all threads 128 | inline ThreadPool::~ThreadPool() { 129 | wait(); 130 | } 131 | 132 | template 133 | class ThreadLocal { 134 | public: 135 | ThreadLocal() { 136 | assert(pthread_key_create(&threadSpecificKey_, dataDestructor) == 0); 137 | } 138 | ~ThreadLocal() { 139 | pthread_key_delete(threadSpecificKey_); 140 | } 141 | 142 | // get thread local object 143 | inline T* get(bool createLocal = true) { 144 | T* p = (T*)pthread_getspecific(threadSpecificKey_); 145 | if (!p && createLocal) { 146 | p = new T(); 147 | assert(pthread_setspecific(threadSpecificKey_, p) == 0); 148 | } 149 | return p; 150 | } 151 | 152 | // overwrite threadlocal object and destructed last one 153 | inline void set(T* p) { 154 | if (T* q = get(false)) { 155 | dataDestructor(q); 156 | } 157 | assert(pthread_setspecific(threadSpecificKey_, p) == 0); 158 | } 159 | 160 | T& operator*() { return *get(); } 161 | 162 | operator T*() { 163 | return get(); 164 | } 165 | 166 | private: 167 | static void dataDestructor(void* p) { 168 | delete (T*)p; 169 | } 170 | pthread_key_t threadSpecificKey_; 171 | }; 172 | 173 | #endif /* thread_pool_h */ 174 | -------------------------------------------------------------------------------- /LightCTR/common/time.h: -------------------------------------------------------------------------------- 1 | // 2 | // time.h 3 | // LightCTR 4 | // 5 | // Created by SongKuangshi on 2017/11/3. 6 | // Copyright © 2017年 SongKuangshi. All rights reserved. 7 | // 8 | 9 | #ifndef time_h 10 | #define time_h 11 | 12 | #include 13 | #include 14 | 15 | #ifdef _WIN32 16 | #include 17 | #elif __APPLE__ 18 | #include 19 | #endif 20 | 21 | #define __must_inline__ __attribute__((always_inline)) 22 | 23 | typedef uint64_t Cycle; 24 | typedef double Second; 25 | 26 | struct timeval __g_now_tv; 27 | Cycle beginning_, ending_; 28 | Second beginning_seconds_, ending_seconds_; 29 | bool running_; 30 | 31 | inline void __must_inline__ update_tv() { 32 | gettimeofday(&__g_now_tv, NULL); 33 | } 34 | 35 | inline int64_t __must_inline__ get_now_ms() { 36 | return (int64_t)__g_now_tv.tv_sec * 1000 + __g_now_tv.tv_usec / 1000; 37 | } 38 | 39 | inline time_t __must_inline__ get_now_s(void) { 40 | return __g_now_tv.tv_sec; 41 | } 42 | 43 | inline time_t __must_inline__ gettickspan(uint64_t old_tick = get_now_ms()) { 44 | update_tv(); 45 | uint64_t cur_tick = get_now_ms(); 46 | if (old_tick > cur_tick) { 47 | return 0; 48 | } 49 | return cur_tick - old_tick; 50 | } 51 | 52 | inline uint64_t timestamp() { 53 | 54 | #ifdef _WIN32 55 | uint64_t cycles = 0; 56 | uint64_t frequency = 0; 57 | 58 | QueryPerformanceFrequency((LARGE_INTEGER*) &frequency); 59 | QueryPerformanceCounter((LARGE_INTEGER*) &cycles); 60 | 61 | return cycles / frequency; 62 | #elif __APPLE__ 63 | uint64_t absolute_time = mach_absolute_time(); 64 | mach_timebase_info_data_t info = {0,0}; 65 | 66 | if (info.denom == 0) mach_timebase_info(&info); 67 | uint64_t elapsednano = absolute_time * (info.numer / info.denom); 68 | 69 | timespec spec; 70 | spec.tv_sec = elapsednano * 1e-9; 71 | spec.tv_nsec = elapsednano - (spec.tv_sec * 1e9); 72 | 73 | return spec.tv_nsec + (uint64_t)spec.tv_sec * 1e9; 74 | #else 75 | timespec spec; 76 | clock_gettime(CLOCK_REALTIME, &spec); 77 | return spec.tv_nsec + (uint64_t)spec.tv_sec * 1e9; 78 | #endif 79 | } 80 | 81 | inline void clock_start() { 82 | beginning_ = timestamp(); 83 | beginning_seconds_ = (beginning_ + 0.0) * 1.0e-9; 84 | running_ = true; 85 | } 86 | 87 | inline void clock_stop() { 88 | ending_ = timestamp(); 89 | ending_seconds_ = (ending_ + 0.0) * 1.0e-9; 90 | running_ = false; 91 | } 92 | 93 | inline Cycle clock_cycles() { 94 | if(running_) { 95 | return (timestamp() - beginning_); 96 | } else { 97 | return (ending_ - beginning_); 98 | } 99 | } 100 | 101 | #endif /* time_h */ 102 | -------------------------------------------------------------------------------- /LightCTR/dag/aggregate_node.h: -------------------------------------------------------------------------------- 1 | // 2 | // aggregate_node.h 3 | // LightCTR 4 | // 5 | // Created by SongKuangshi on 2019/5/19. 6 | // Copyright © 2019 SongKuangshi. All rights reserved. 7 | // 8 | 9 | #ifndef aggregate_node_h 10 | #define aggregate_node_h 11 | 12 | #include 13 | #include "node_abst.h" 14 | 15 | // Aggregate or Scatter Flow 16 | class AggregateNode : public Autograd_Node_Abst { 17 | public: 18 | AggregateNode() = delete; 19 | AggregateNode(size_t _in_cnt, size_t _out_cnt = 1) : Autograd_Node_Abst(_in_cnt, _out_cnt) { 20 | assert(_in_cnt > 0 && _out_cnt > 0); 21 | } 22 | 23 | protected: 24 | virtual void forward_compute(const std::vector& in_outputs) = 0; 25 | 26 | virtual void backward_compute(const std::vector& out_deltas) = 0; 27 | }; 28 | 29 | #endif /* aggregate_node_h */ 30 | -------------------------------------------------------------------------------- /LightCTR/dag/dag_pipeline.h: -------------------------------------------------------------------------------- 1 | // 2 | // dag_pipeline.h 3 | // LightCTR 4 | // 5 | // Created by SongKuangshi on 2019/5/5. 6 | // Copyright © 2019 SongKuangshi. All rights reserved. 7 | // 8 | 9 | #ifndef dag_pipeline_h 10 | #define dag_pipeline_h 11 | 12 | #include "aggregate_node.h" 13 | #include "source_node.h" 14 | #include "terminus_node.h" 15 | 16 | #include "operator/add_op.h" 17 | #include "operator/multiply_op.h" 18 | #include "operator/matmul_op.h" 19 | #include "operator/activations_op.h" 20 | #include "operator/loss_op.h" 21 | 22 | // build up pipelines of computation 23 | // or directed acyclic graphs (DAGs) of computation 24 | 25 | class DAG_Pipeline { 26 | public: 27 | 28 | static void addDirectedFlow(std::shared_ptr source_ptr, 29 | std::shared_ptr terminus_ptr) { 30 | terminus_ptr->regist_in_node(source_ptr); 31 | } 32 | 33 | static void addAutogradFlow(std::shared_ptr source_ptr, 34 | std::shared_ptr terminus_ptr) { 35 | terminus_ptr->regist_in_node(source_ptr); 36 | source_ptr->regist_out_node(terminus_ptr); 37 | } 38 | 39 | }; 40 | 41 | 42 | #endif /* dag_pipeline_h */ 43 | -------------------------------------------------------------------------------- /LightCTR/dag/operator/activations_op.h: -------------------------------------------------------------------------------- 1 | // 2 | // activations_op.h 3 | // LightCTR 4 | // 5 | // Created by SongKuangshi on 2019/5/23. 6 | // Copyright © 2019 SongKuangshi. All rights reserved. 7 | // 8 | 9 | #ifndef activations_op_h 10 | #define activations_op_h 11 | 12 | #include "../../util/activations.h" 13 | #include "string.h" 14 | 15 | template 16 | class ActivationsOp : public AggregateNode { 17 | public: 18 | ActivationsOp() = delete; 19 | ActivationsOp(size_t _out_cnt) : AggregateNode(1, _out_cnt) { 20 | } 21 | 22 | protected: 23 | void forward_compute(const std::vector& in_outputs) { 24 | assert(in_outputs[0].data); 25 | const size_t len = in_outputs[0].data->size(); 26 | if (node_output.data == nullptr) { 27 | node_output.data = std::make_shared >(len); 28 | } 29 | std::memcpy(node_output.data->data(), in_outputs[0].data->data(), len * sizeof(float)); 30 | activFun.forward(node_output.data->data(), len); 31 | } 32 | 33 | void backward_compute(const std::vector& out_deltas) { 34 | const size_t len = out_deltas[0].data->size(); 35 | if (node_delta.data == nullptr) { 36 | node_delta.data = std::make_shared >(len); 37 | } 38 | std::memcpy(node_delta.data->data(), out_deltas[0].data->data(), len * sizeof(float)); 39 | 40 | activFun.backward(node_delta.data->data(), node_output.data->data(), 41 | node_delta.data->data(), len); 42 | } 43 | 44 | private: 45 | ActivationFunction activFun; 46 | }; 47 | 48 | #endif /* activations_op_h */ 49 | -------------------------------------------------------------------------------- /LightCTR/dag/operator/add_op.h: -------------------------------------------------------------------------------- 1 | // 2 | // add_op.h 3 | // LightCTR 4 | // 5 | // Created by SongKuangshi on 2019/5/20. 6 | // Copyright © 2019 SongKuangshi. All rights reserved. 7 | // 8 | 9 | #ifndef add_op_h 10 | #define add_op_h 11 | 12 | #include "../aggregate_node.h" 13 | #include "string.h" 14 | #include "../../common/avx.h" 15 | 16 | class AddOp : public AggregateNode { 17 | public: 18 | AddOp() = delete; 19 | AddOp(size_t _in_cnt, size_t _out_cnt = 1) : AggregateNode(_in_cnt, _out_cnt) { 20 | assert(_in_cnt > 0 && _out_cnt > 0); 21 | } 22 | 23 | protected: 24 | void forward_compute(const std::vector& in_outputs) { 25 | const size_t len = in_outputs[0].data->size(); 26 | assert(len == in_outputs[1].data->size()); 27 | if (node_output.data == nullptr) { 28 | node_output.data = std::make_shared >(len); 29 | } 30 | 31 | std::memset(node_output.data->data(), 0, len * sizeof(float)); 32 | for(auto& in_output : in_outputs) { 33 | avx_vecAdd(node_output.data->data(), in_output.data->data(), 34 | node_output.data->data(), len); 35 | } 36 | } 37 | 38 | void backward_compute(const std::vector& out_deltas) { 39 | const size_t len = out_deltas[0].data->size(); 40 | if (node_delta.data == nullptr) { 41 | node_delta.data = std::make_shared >(len); 42 | } 43 | 44 | std::memset(node_delta.data->data(), 0, len * sizeof(float)); 45 | for(auto& out_delta : out_deltas) { 46 | avx_vecAdd(node_delta.data->data(), out_delta.data->data(), 47 | node_delta.data->data(), len); 48 | } 49 | } 50 | }; 51 | 52 | #endif /* add_op_h */ 53 | -------------------------------------------------------------------------------- /LightCTR/dag/operator/loss_op.h: -------------------------------------------------------------------------------- 1 | // 2 | // loss_op.h 3 | // LightCTR 4 | // 5 | // Created by SongKuangshi on 2019/5/23. 6 | // Copyright © 2019 SongKuangshi. All rights reserved. 7 | // 8 | 9 | #ifndef loss_op_h 10 | #define loss_op_h 11 | 12 | #include "../terminus_node.h" 13 | 14 | template 15 | class LossOp : public TerminusNode { 16 | public: 17 | LossOp() : TerminusNode(1) { 18 | } 19 | 20 | float getLoss() const { 21 | return _loss; 22 | } 23 | 24 | void setLable(std::shared_ptr > label) { 25 | _label = label; 26 | } 27 | 28 | protected: 29 | void forward_compute(const std::vector& in_outputs) { 30 | // compute delta via loss function 31 | assert(_label && in_outputs.size() == 1); 32 | const size_t len = in_outputs[0].data->size(); 33 | if (node_output.data == nullptr) { 34 | node_output.data = std::make_shared >(len); 35 | } 36 | std::memcpy(node_output.data->data(), in_outputs[0].data->data(), len * sizeof(float)); 37 | _loss = lossFun.loss(in_outputs[0].data->data(), _label->data(), len); 38 | } 39 | 40 | void backward_compute(const std::vector& out_deltas) { 41 | // back propagate delta 42 | assert(_label); 43 | const size_t len = node_output.data->size(); 44 | assert(_label->size() == len); 45 | if (node_delta.data == nullptr) { 46 | node_delta.data = std::make_shared >(len); 47 | } 48 | lossFun.gradient(node_output.data->data(), _label->data(), 49 | node_delta.data->data(), len); 50 | } 51 | 52 | private: 53 | float _loss; 54 | std::shared_ptr > _label; 55 | LossFunction lossFun; 56 | }; 57 | 58 | #endif /* loss_op_h */ 59 | -------------------------------------------------------------------------------- /LightCTR/dag/operator/matmul_op.h: -------------------------------------------------------------------------------- 1 | // 2 | // matmul_op.h 3 | // LightCTR 4 | // 5 | // Created by SongKuangshi on 2019/5/24. 6 | // Copyright © 2019 SongKuangshi. All rights reserved. 7 | // 8 | 9 | #ifndef matmul_op_h 10 | #define matmul_op_h 11 | 12 | #include "../../common/avx.h" 13 | 14 | class MatmulOp : public AggregateNode { 15 | public: 16 | MatmulOp() = delete; 17 | MatmulOp(size_t _out_cnt) : AggregateNode(2, _out_cnt) { 18 | assert(_out_cnt > 0); 19 | } 20 | 21 | protected: 22 | void forward_compute(const std::vector& in_outputs) { 23 | assert(in_outputs.size() == 2); 24 | if (node_output.data == nullptr) { 25 | node_output.data = std::make_shared >(1); 26 | } 27 | compute_records.push_back(in_outputs[0]); 28 | compute_records.push_back(in_outputs[1]); 29 | node_output.data->at(0) = avx_dotProduct(in_outputs[0].data->data(), 30 | in_outputs[1].data->data(), 31 | in_outputs[0].data->size()); 32 | } 33 | 34 | void backward_compute(const std::vector& out_deltas) { 35 | float cur_delta = 0; 36 | for(auto& out_delta : out_deltas) { 37 | cur_delta += out_delta.data->at(0); 38 | } 39 | 40 | assert(compute_records.size() == 2); 41 | const size_t len = compute_records[0].data->size(); 42 | if (node_delta.data == nullptr) { 43 | node_delta.data = std::make_shared >(len); 44 | } 45 | 46 | auto& order_promises = get_in_complete_promises(); 47 | assert(order_promises.size() == 1); 48 | 49 | size_t index = 0; 50 | if (compute_records[1].node_id == get_first_target_id()) { 51 | index = 1; 52 | } 53 | 54 | avx_vecScale(compute_records[index].data->data(), 55 | node_delta.data->data(), 56 | len, cur_delta); 57 | order_promises[0].set_value(node_delta); 58 | 59 | // Notice to remove targeted promise for repeating set promise value 60 | // otherwise, it will be "terminating with uncaught exception of type 61 | // std::__1::future_error: The state of the promise has already been set" 62 | order_promises.clear(); 63 | 64 | avx_vecScale(compute_records[1 - index].data->data(), 65 | node_delta.data->data(), 66 | len, cur_delta); 67 | } 68 | }; 69 | 70 | #endif /* matmul_op_h */ 71 | -------------------------------------------------------------------------------- /LightCTR/dag/operator/multiply_op.h: -------------------------------------------------------------------------------- 1 | // 2 | // multiply_op.h 3 | // LightCTR 4 | // 5 | // Created by SongKuangshi on 2019/5/20. 6 | // Copyright © 2019 SongKuangshi. All rights reserved. 7 | // 8 | 9 | #ifndef multiply_op_h 10 | #define multiply_op_h 11 | 12 | #include "string.h" 13 | #include "../../common/avx.h" 14 | 15 | class MultiplyOp : public AggregateNode { 16 | public: 17 | MultiplyOp() = delete; 18 | MultiplyOp(size_t _in_cnt, size_t _out_cnt = 1) : AggregateNode(_in_cnt, _out_cnt) { 19 | assert(_in_cnt > 0 && _out_cnt > 0); 20 | } 21 | 22 | protected: 23 | void forward_compute(const std::vector& in_outputs) { 24 | const size_t len = in_outputs[0].data->size(); 25 | if (node_output.data == nullptr) { 26 | node_output.data = std::make_shared >(len); 27 | } 28 | std::memcpy(node_output.data->data(), in_outputs[0].data->data(), len * sizeof(float)); 29 | for(size_t i = 1; i < in_outputs.size(); i++) { 30 | compute_records.push_back(in_outputs[i]); 31 | avx_vecScale(node_output.data->data(), node_output.data->data(), 32 | len, in_outputs[i].data->data()); 33 | } 34 | } 35 | 36 | void backward_compute(const std::vector& out_deltas) { 37 | const size_t len = out_deltas[0].data->size(); 38 | std::vector delta_arr(len, 0); 39 | 40 | for(auto& out_delta : out_deltas) { 41 | avx_vecAdd(out_delta.data->data(), delta_arr.data(), 42 | delta_arr.data(), len); 43 | } 44 | avx_vecScale(delta_arr.data(), node_delta.data->data(), 45 | len, node_output.data->data()); 46 | 47 | auto& order_ids = get_in_promises_ids(); 48 | auto& order_promises = get_in_complete_promises(); 49 | 50 | for (size_t i = 0; i < order_ids.size(); i++) { 51 | const size_t target_id = order_ids[i]; 52 | for (auto& record : compute_records) { 53 | if (record.node_id == target_id) { 54 | avx_vecDiv(node_delta.data->data(), record.data->data(), 55 | node_delta.data->data(), len); 56 | order_promises[i].set_value(node_delta); 57 | break; 58 | } 59 | } 60 | } 61 | for (auto& record : compute_records) { 62 | if (record.node_id == get_first_target_id()) { 63 | avx_vecDiv(node_delta.data->data(), record.data->data(), 64 | node_delta.data->data(), len); 65 | return; 66 | } 67 | } 68 | assert(false); 69 | } 70 | }; 71 | 72 | #endif /* multiply_op_h */ 73 | -------------------------------------------------------------------------------- /LightCTR/dag/source_node.h: -------------------------------------------------------------------------------- 1 | // 2 | // source_node.h 3 | // LightCTR 4 | // 5 | // Created by SongKuangshi on 2019/5/19. 6 | // Copyright © 2019 SongKuangshi. All rights reserved. 7 | // 8 | 9 | #ifndef source_node_h 10 | #define source_node_h 11 | 12 | #include 13 | #include "string.h" 14 | #include "node_abst.h" 15 | #include "../common/avx.h" 16 | 17 | class SourceNode : public Autograd_Node_Abst { 18 | public: 19 | SourceNode() = delete; 20 | explicit SourceNode(size_t _out_cnt) : Autograd_Node_Abst(0, _out_cnt) { 21 | assert(_out_cnt > 0); 22 | } 23 | 24 | DAG_Output runFlow(bool keep_intermediate = false) { 25 | init_backward_Flow(keep_intermediate); 26 | return backward_run().get(); 27 | } 28 | 29 | void setValue(std::shared_ptr > data) { 30 | node_output.data = data; 31 | } 32 | 33 | protected: 34 | virtual void forward_compute(const std::vector&) { 35 | // provide value of data source 36 | } 37 | 38 | virtual void backward_compute(const std::vector& out_deltas) { 39 | // apply delta as gradient on the value 40 | } 41 | }; 42 | 43 | 44 | template 45 | class TrainableNode : public SourceNode { 46 | public: 47 | TrainableNode() = delete; 48 | explicit TrainableNode(size_t _out_cnt) : SourceNode(_out_cnt) { 49 | assert(_out_cnt > 0); 50 | } 51 | 52 | void setValue(std::shared_ptr > data) { 53 | node_output.data = data; 54 | updater.learnable_params_cnt(data->size()); 55 | } 56 | 57 | protected: 58 | void forward_compute(const std::vector&) { 59 | // provide value of data source 60 | assert(node_output.data); 61 | } 62 | 63 | void backward_compute(const std::vector& out_deltas) { 64 | // apply delta as gradient on the value 65 | const size_t len = out_deltas[0].data->size(); 66 | assert(len == node_output.data->size()); 67 | 68 | if (node_delta.data == nullptr) { 69 | node_delta.data = std::make_shared >(len); 70 | } 71 | std::memset(node_delta.data->data(), 0, len * sizeof(float)); 72 | for(auto& out_delta : out_deltas) { 73 | avx_vecAdd(node_delta.data->data(), out_delta.data->data(), 74 | node_delta.data->data(), len); 75 | } 76 | updater.update(0, len, node_output.data->data(), node_delta.data->data()); 77 | } 78 | private: 79 | UpdaterFunc updater; 80 | }; 81 | 82 | #endif /* source_node_h */ 83 | -------------------------------------------------------------------------------- /LightCTR/dag/terminus_node.h: -------------------------------------------------------------------------------- 1 | // 2 | // terminus_node.h 3 | // LightCTR 4 | // 5 | // Created by SongKuangshi on 2019/5/19. 6 | // Copyright © 2019 SongKuangshi. All rights reserved. 7 | // 8 | 9 | #ifndef terminus_node_h 10 | #define terminus_node_h 11 | 12 | #include 13 | #include "node_abst.h" 14 | #include "../common/avx.h" 15 | 16 | class TerminusNode : public Autograd_Node_Abst { 17 | public: 18 | TerminusNode() = delete; 19 | explicit TerminusNode(size_t _in_cnt) : Autograd_Node_Abst(_in_cnt, 0) { 20 | assert(_in_cnt > 0); 21 | } 22 | 23 | DAG_Output runFlow(bool keep_intermediate = false) { 24 | init_forward_Flow(keep_intermediate); 25 | return forward_run().get(); 26 | } 27 | 28 | protected: 29 | void forward_compute(const std::vector& in_outputs) { 30 | // compute delta via loss function 31 | } 32 | 33 | void backward_compute(const std::vector&) { 34 | // back propagate delta 35 | } 36 | }; 37 | 38 | #endif /* terminus_node_h */ 39 | -------------------------------------------------------------------------------- /LightCTR/distribut/consistent_hash.h: -------------------------------------------------------------------------------- 1 | // 2 | // consistent_hash.h 3 | // LightCTR 4 | // 5 | // Created by SongKuangshi on 2017/12/6. 6 | // Copyright © 2017年 SongKuangshi. All rights reserved. 7 | // 8 | 9 | #ifndef consistent_hash_h 10 | #define consistent_hash_h 11 | 12 | #include "../common/hash.h" 13 | #include 14 | #include 15 | #include 16 | 17 | // Make data shardings ditributed in PS clusters by DHT 18 | class ConsistentHash { 19 | public: 20 | static ConsistentHash& Instance() { // singleton 21 | static std::once_flag once; 22 | static ConsistentHash consist; 23 | std::call_once(once, [] { 24 | assert(__global_cluster_ps_cnt > 0); 25 | consist.init(__global_cluster_ps_cnt); 26 | }); 27 | return consist; 28 | } 29 | 30 | template 31 | inline uint32_t getNode(TKey key) { 32 | uint32_t partition = murMurHash(key); 33 | std::map::iterator it = 34 | server_nodes.lower_bound(partition); 35 | 36 | if(it == server_nodes.end()) { 37 | return server_nodes.begin()->second; 38 | } 39 | return it->second; 40 | } 41 | 42 | private: 43 | ConsistentHash() { 44 | 45 | } 46 | ConsistentHash(const ConsistentHash&) = delete; 47 | ConsistentHash(ConsistentHash&&) = delete; 48 | ConsistentHash &operator=(const ConsistentHash &) = delete; 49 | ConsistentHash &operator=(ConsistentHash &&) = delete; 50 | 51 | void init(uint32_t _node_cnt) { 52 | node_cnt = _node_cnt; 53 | for (uint32_t i = 0; i < node_cnt; i++) { 54 | for (uint32_t j = 0; j < virtual_node_cnt; j++) { 55 | std::stringstream node_key; 56 | node_key << i << "-" << j; 57 | uint32_t partition = murMurHash(node_key.str()); 58 | server_nodes[partition] = i; 59 | } 60 | } 61 | } 62 | 63 | uint32_t node_cnt; 64 | const uint32_t virtual_node_cnt{5}; // num of Replicas 65 | 66 | std::map server_nodes; 67 | }; 68 | 69 | #endif /* consistent_hash_h */ 70 | -------------------------------------------------------------------------------- /LightCTR/distribut/dist_machine_abst.h: -------------------------------------------------------------------------------- 1 | // 2 | // dist_machine_abst.h 3 | // LightCTR 4 | // 5 | // Created by SongKuangshi on 2017/12/5. 6 | // Copyright © 2017 SongKuangshi. All rights reserved. 7 | // 8 | 9 | #ifndef dist_machine_abst_h 10 | #define dist_machine_abst_h 11 | 12 | #include "../common/network.h" 13 | #include "../common/barrier.h" 14 | #include "../common/lock.h" 15 | 16 | enum Run_Mode { 17 | PS_Mode = 0, 18 | Ring_Mode 19 | }; 20 | 21 | class Dist_Machine_Abst { 22 | public: 23 | Dist_Machine_Abst() : gDelivery(Delivery::Instance()) { 24 | gDelivery.set_node_id(BEGIN_ID_OF_WORKER); 25 | regist_curNode_toMaster(); 26 | regist_master_ack_handler(); 27 | 28 | serving_barrier.block(); 29 | status_serving = true; 30 | } 31 | 32 | virtual ~Dist_Machine_Abst() { 33 | shutdown(NULL); 34 | } 35 | 36 | virtual inline size_t Rank() const { // Worker Rank begin from 1 37 | assert(status_serving); 38 | return gDelivery.node_id() - BEGIN_ID_OF_WORKER; 39 | } 40 | 41 | virtual inline bool status() const { 42 | return status_serving; 43 | } 44 | 45 | virtual inline void shutdown(std::function terminate_callback) { 46 | if (!status_serving) { 47 | return; 48 | } 49 | send_FIN_toMaster(terminate_callback); 50 | } 51 | 52 | private: 53 | void regist_curNode_toMaster() { 54 | PackageDescript desc(REQUEST_HANDSHAKE); 55 | const Addr& local_addr = gDelivery.local_addr(); 56 | std::string local_addr_str = local_addr.toString(); 57 | desc.content.append(local_addr_str.c_str(), local_addr_str.length()); 58 | 59 | desc.callback = [this](std::shared_ptr resp_package) { 60 | size_t node_id; 61 | resp_package->content >> node_id; 62 | printf("[Worker] Complete Register cur_node_id = %zu\n", node_id); 63 | gDelivery.set_node_id(node_id); 64 | assert(gDelivery.node_id() >= BEGIN_ID_OF_WORKER); 65 | serving_barrier.unblock(); 66 | }; 67 | gDelivery.send_async(desc, 0); 68 | } 69 | 70 | void regist_master_ack_handler() { 71 | request_handler_t ack_handler = [this]( 72 | std::shared_ptr request, 73 | PackageDescript& response) { 74 | #ifdef WORKER_RING 75 | size_t ps_id = BEGIN_ID_OF_WORKER + 1; 76 | #else 77 | size_t ps_id = BEGIN_ID_OF_PS; 78 | #endif 79 | while (!request->content.readEOF()) { // read keys needed by worker 80 | Addr ps_addr(request->content); 81 | printf("[Worker] Add ps_id = %zu router\n", ps_id); 82 | gDelivery.regist_router(ps_id++, std::move(ps_addr)); 83 | } 84 | serving_barrier.unblock(); 85 | }; 86 | gDelivery.regist_handler(REQUEST_ACK, std::move(ack_handler)); 87 | } 88 | 89 | void send_FIN_toMaster(std::function terminate_callback) { 90 | PackageDescript desc(REQUEST_FIN); 91 | desc.callback = [this, terminate_callback]( 92 | std::shared_ptr resp_package) { 93 | puts("[Worker] Fin is accepted"); 94 | gDelivery.shutdown(); 95 | if (terminate_callback) { 96 | terminate_callback(); 97 | } 98 | }; 99 | gDelivery.send_async(desc, 0); 100 | } 101 | 102 | bool status_serving{false}; 103 | Barrier serving_barrier{2}; 104 | protected: 105 | Delivery& gDelivery; 106 | }; 107 | 108 | #endif /* dist_machine_abst_h */ 109 | -------------------------------------------------------------------------------- /LightCTR/distribut/push.h: -------------------------------------------------------------------------------- 1 | // 2 | // push.h 3 | // LightCTR 4 | // 5 | // Created by SongKuangshi on 2017/12/5. 6 | // Copyright © 2017年 SongKuangshi. All rights reserved. 7 | // 8 | 9 | #ifndef push_h 10 | #define push_h 11 | 12 | #include 13 | #include 14 | #include "../common/thread_pool.h" 15 | #include "../common/barrier.h" 16 | #include "../common/network.h" 17 | #include "../common/buffer_fusion.h" 18 | 19 | // Push Grads to PS 20 | class Push { 21 | 22 | public: 23 | Push() = delete; 24 | explicit Push(char _headByte) : 25 | headByte(_headByte), 26 | gDelivery(Delivery::Instance()), 27 | gConsistentHash(ConsistentHash::Instance()) { 28 | } 29 | 30 | void registTensorFusion(std::shared_ptr > _buf_fusion) { 31 | assert(headByte == 'T'); 32 | buf_fusion = _buf_fusion; 33 | } 34 | 35 | template 36 | void sync(const std::unordered_map &grads, size_t epoch) { 37 | if (headByte == 'T') 38 | assert(buf_fusion); 39 | assert(epoch > 0); 40 | Barrier barrier; 41 | int candidate_ps = 0; 42 | sendToPS(grads, candidate_ps, epoch, 43 | [&barrier, &candidate_ps]() { 44 | candidate_ps--; 45 | if (candidate_ps <= 0) { 46 | barrier.unblock(); 47 | } 48 | }); 49 | barrier.block(); 50 | } 51 | 52 | private: 53 | template 54 | void sendToPS(const std::unordered_map &grads, 55 | int& candidate_ps, 56 | size_t epoch, 57 | std::function callback) { 58 | std::map > > push_map; 59 | 60 | for (auto it = grads.begin(); it != grads.end(); it++) { 61 | assert(it->second.checkValid()); 62 | if (!it->second.checkPreferredValue()) { 63 | continue; 64 | } 65 | const size_t to_id = BEGIN_ID_OF_PS + 66 | gConsistentHash.getNode(it->first); 67 | if (push_map.count(to_id) == 0) { 68 | push_map[to_id] = std::vector >(); 69 | candidate_ps++; 70 | } 71 | push_map[to_id].emplace_back(std::move(*it)); 72 | } 73 | 74 | if (push_map.size() == 0) { 75 | if (callback) { 76 | callback(); 77 | } 78 | } 79 | 80 | for (auto &item : push_map) { 81 | const size_t to_id = item.first; 82 | PackageDescript desc(REQUEST_PUSH, epoch); 83 | desc.content << headByte; 84 | for (auto &grad_pair : item.second) { 85 | // push data pair by VarUint & float16_t 86 | desc.content.appendVarUint(grad_pair.first); 87 | desc.content << Float16(&grad_pair.second).float16_value(); 88 | } 89 | desc.callback = [callback](std::shared_ptr resp_package) { 90 | // response without content 91 | if (callback) { 92 | callback(); 93 | } 94 | }; 95 | gDelivery.send_async(desc, to_id); 96 | } 97 | #ifdef DEBUG 98 | printf("[WORKER Push] %zu %c Grad-pairs Sended\n", grads.size(), headByte); 99 | #endif 100 | } 101 | 102 | template 103 | void sendToPS(const std::unordered_map &grads, 104 | int& candidate_ps, 105 | size_t epoch, 106 | std::function callback) { 107 | std::map > > push_map; 108 | 109 | for (auto it = grads.begin(); it != grads.end(); it++) { 110 | const size_t to_id = BEGIN_ID_OF_PS + 111 | gConsistentHash.getNode(it->first); 112 | if (push_map.count(to_id) == 0) { 113 | push_map[to_id] = std::vector >(); 114 | candidate_ps++; 115 | } 116 | push_map[to_id].emplace_back(std::move(*it)); 117 | } 118 | 119 | for (auto &item : push_map) { 120 | const size_t to_id = item.first; 121 | PackageDescript desc(REQUEST_PUSH, epoch); 122 | desc.content << headByte; 123 | for (auto &grad_pair : item.second) { 124 | desc.content.appendVarUint(grad_pair.first); 125 | auto memAddr = buf_fusion->getMemory(grad_pair.second); 126 | desc.content.appendVarUint(memAddr.second); 127 | 128 | for (size_t i = 0; i < memAddr.second; i++) { 129 | desc.content << Float16(memAddr.first + i).float16_value(); 130 | } 131 | } 132 | desc.callback = [callback](std::shared_ptr resp_package) { 133 | // response without content 134 | if (callback) { 135 | callback(); 136 | } 137 | }; 138 | gDelivery.send_async(desc, to_id); 139 | } 140 | #ifdef DEBUG 141 | printf("[WORKER Push] %zu %c Grad-Tensors Sended\n", grads.size(), headByte); 142 | #endif 143 | } 144 | 145 | char headByte = 'N'; 146 | std::shared_ptr > buf_fusion = nullptr; 147 | 148 | Delivery& gDelivery; 149 | ConsistentHash& gConsistentHash; 150 | }; 151 | 152 | #endif /* push_h */ 153 | -------------------------------------------------------------------------------- /LightCTR/distribut/worker.h: -------------------------------------------------------------------------------- 1 | // 2 | // worker.h 3 | // LightCTR 4 | // 5 | // Created by SongKuangshi on 2017/12/5. 6 | // Copyright © 2017年 SongKuangshi. All rights reserved. 7 | // 8 | 9 | #ifndef worker_h 10 | #define worker_h 11 | 12 | #include "consistent_hash.h" 13 | #include "dist_machine_abst.h" 14 | #include "push.h" 15 | #include "pull.h" 16 | 17 | template 18 | class Worker : public Dist_Machine_Abst { 19 | public: 20 | Worker() : gConsistentHash(ConsistentHash::Instance()) { 21 | } 22 | 23 | ~Worker() { 24 | 25 | } 26 | // for sparse model 27 | Push push_op = Push('N'); 28 | Pull pull_op = Pull('N'); 29 | // for dense model 30 | Push push_tensor_op = Push('T'); 31 | Pull pull_tensor_op = Pull('T'); 32 | 33 | private: 34 | ConsistentHash& gConsistentHash; 35 | }; 36 | 37 | #endif /* worker_h */ 38 | -------------------------------------------------------------------------------- /LightCTR/em_algo_abst.h: -------------------------------------------------------------------------------- 1 | // 2 | // em_algo_abst.h 3 | // LightCTR 4 | // 5 | // Created by SongKuangshi on 2017/10/13. 6 | // Copyright © 2017年 SongKuangshi. All rights reserved. 7 | // 8 | 9 | #ifndef em_algo_abst_h 10 | #define em_algo_abst_h 11 | 12 | #include 13 | #include 14 | #include 15 | #include 16 | #include 17 | #include 18 | #include 19 | #include "assert.h" 20 | using namespace std; 21 | 22 | template 23 | class EM_Algo_Abst { 24 | public: 25 | EM_Algo_Abst(string _dataFile, size_t _epoch, size_t _feature_cnt): 26 | epoch(_epoch), feature_cnt(_feature_cnt) { 27 | loadDataRow(_dataFile); 28 | } 29 | virtual ~EM_Algo_Abst() { 30 | dataSet.clear(); 31 | } 32 | 33 | void Train() { 34 | float lastLE = 0; 35 | for (size_t i = 0; i < this->epoch; i++) { 36 | T* latentVar = Train_EStep(); 37 | float likelihood = Train_MStep(latentVar); 38 | assert(!isnan(likelihood)); 39 | printf("Epoch %zu log likelihood ELOB = %.3f\n", i, likelihood); 40 | if (i == 0 || fabs(likelihood - lastLE) > 1e-3) { 41 | lastLE = likelihood; 42 | } else { 43 | puts("have been converge"); 44 | break; 45 | } 46 | } 47 | printArguments(); 48 | } 49 | void saveModel(size_t epoch) { 50 | 51 | } 52 | 53 | virtual void init() = 0; 54 | virtual T* Train_EStep() = 0; 55 | virtual float Train_MStep(const T*) = 0; 56 | virtual void printArguments() = 0; 57 | virtual vector Predict() = 0; 58 | 59 | void loadDataRow(string dataPath) { 60 | dataSet.clear(); 61 | 62 | ifstream fin_; 63 | string line; 64 | int nchar; 65 | float val; 66 | fin_.open(dataPath, ios::in); 67 | if(!fin_.is_open()){ 68 | cout << "open file error!" << endl; 69 | exit(1); 70 | } 71 | vector tmp; 72 | tmp.reserve(feature_cnt); 73 | while(!fin_.eof()){ 74 | getline(fin_, line); 75 | tmp.clear(); 76 | const char *pline = line.c_str(); 77 | while(pline < line.c_str() + (int)line.length() && 78 | sscanf(pline, "%f%n", &val, &nchar) >= 1){ 79 | pline += nchar + 1; 80 | assert(!isnan(val)); 81 | tmp.emplace_back(val); 82 | if (tmp.size() == feature_cnt) { 83 | assert(tmp.size() == feature_cnt); 84 | this->dataSet.emplace_back(move(tmp)); 85 | tmp.clear(); 86 | } 87 | } 88 | } 89 | this->dataRow_cnt = this->dataSet.size(); 90 | assert(this->dataRow_cnt > 0); 91 | } 92 | 93 | size_t epoch; 94 | size_t feature_cnt, dataRow_cnt; 95 | vector > dataSet; 96 | }; 97 | 98 | #endif /* em_algo_abst_h */ 99 | -------------------------------------------------------------------------------- /LightCTR/fm_algo_abst.h: -------------------------------------------------------------------------------- 1 | // 2 | // fm_algo_abst.h 3 | // LightCTR 4 | // 5 | // Created by SongKuangshi on 2017/9/23. 6 | // Copyright © 2017年 SongKuangshi. All rights reserved. 7 | // 8 | 9 | #ifndef fm_algo_abst_h 10 | #define fm_algo_abst_h 11 | 12 | #include 13 | #include 14 | #include 15 | #include 16 | #include 17 | #include 18 | #include 19 | #include 20 | #include "assert.h" 21 | #include "util/random.h" 22 | #include "util/gradientUpdater.h" 23 | #include "util/momentumUpdater.h" 24 | 25 | #define FM 26 | 27 | using namespace std; 28 | 29 | struct FMFeature { 30 | size_t first; // feature id 31 | float second; // value 32 | size_t field; 33 | FMFeature(size_t _first, float _second, size_t _field): 34 | first(_first), second(_second), field(_field) {} 35 | }; 36 | 37 | class FM_Algo_Abst { 38 | public: 39 | FM_Algo_Abst(string _dataPath, size_t _factor_cnt, 40 | size_t _field_cnt = 0, size_t _feature_cnt = 0): 41 | feature_cnt(_feature_cnt), field_cnt(_field_cnt), factor_cnt(_factor_cnt) { 42 | proc_cnt = thread::hardware_concurrency(); 43 | loadDataRow(_dataPath); 44 | init(); 45 | } 46 | virtual ~FM_Algo_Abst() { 47 | delete [] W; 48 | #ifdef FM 49 | delete [] V; 50 | delete [] sumVX; 51 | #endif 52 | } 53 | void init() { 54 | W = new float[this->feature_cnt]; 55 | memset(W, 0, sizeof(float) * this->feature_cnt); 56 | #ifdef FM 57 | size_t memsize = this->feature_cnt * this->factor_cnt; 58 | if (this->field_cnt > 0) { 59 | memsize = this->feature_cnt * this->field_cnt * this->factor_cnt; 60 | } 61 | V = new float[memsize]; 62 | const float scale = 1.0 / sqrt(this->factor_cnt); 63 | for (size_t i = 0; i < memsize; i++) { 64 | V[i] = GaussRand() * scale; 65 | } 66 | sumVX = NULL; 67 | #endif 68 | } 69 | 70 | void loadDataRow(string dataPath) { 71 | dataSet.clear(); 72 | 73 | ifstream fin_; 74 | string line; 75 | int nchar, y; 76 | size_t fid, fieldid; 77 | float val; 78 | fin_.open(dataPath, ios::in); 79 | if(!fin_.is_open()){ 80 | cout << "open file error!" << endl; 81 | exit(1); 82 | } 83 | vector tmp; 84 | while(!fin_.eof()){ 85 | getline(fin_, line); 86 | tmp.clear(); 87 | const char *pline = line.c_str(); 88 | if(sscanf(pline, "%d%n", &y, &nchar) >= 1){ 89 | pline += nchar + 1; 90 | label.emplace_back(y); 91 | while(pline < line.c_str() + (int)line.length() && 92 | sscanf(pline, "%zu:%zu:%f%n", &fieldid, &fid, &val, &nchar) >= 2){ 93 | pline += nchar + 1; 94 | tmp.emplace_back(*new FMFeature(fid, val, fieldid)); 95 | this->feature_cnt = max(this->feature_cnt, fid + 1); 96 | if (this->field_cnt > 0) { 97 | this->field_cnt = max(this->field_cnt, fieldid + 1); 98 | } 99 | } 100 | } 101 | if (tmp.empty()) { 102 | continue; 103 | } 104 | this->dataSet.emplace_back(move(tmp)); 105 | } 106 | this->dataRow_cnt = this->dataSet.size(); 107 | } 108 | 109 | void saveModel(size_t epoch) { 110 | char buffer[1024]; 111 | snprintf(buffer, 1024, "%d", (int)epoch); 112 | string filename = buffer; 113 | ofstream md("./output/model_epoch_" + filename + ".txt"); 114 | if(!md.is_open()){ 115 | cout<<"save model open file error" << endl; 116 | exit(1); 117 | } 118 | for (size_t fid = 0; fid < this->feature_cnt; fid++) { 119 | if (W[fid] != 0) { 120 | md << fid << ":" << W[fid] << " "; 121 | } 122 | } 123 | md << endl; 124 | #ifdef FM 125 | // print all factor V 126 | for (size_t fid = 0; fid < this->feature_cnt; fid++) { 127 | md << fid << ":"; 128 | for (size_t fac_itr = 0; fac_itr < this->factor_cnt; fac_itr++) { 129 | md << *getV(fid, fac_itr) << " "; 130 | } 131 | md << endl; 132 | } 133 | #endif 134 | md.close(); 135 | } 136 | 137 | virtual void Train() = 0; 138 | 139 | float L2Reg_ratio; 140 | 141 | float *W; 142 | size_t feature_cnt, proc_cnt, field_cnt, factor_cnt; 143 | size_t dataRow_cnt; 144 | 145 | float *V, *sumVX; 146 | inline float* getV(size_t fid, size_t facid) const { 147 | return &V[fid * this->factor_cnt + facid]; 148 | } 149 | inline float* getV_field(size_t fid, size_t fieldid, size_t facid) const { 150 | return &V[fid * this->field_cnt * this->factor_cnt + fieldid * this->factor_cnt + facid]; 151 | } 152 | inline float* getSumVX(size_t rid, size_t facid) const { 153 | return &sumVX[rid * this->factor_cnt + facid]; 154 | } 155 | 156 | vector > dataSet; 157 | 158 | protected: 159 | inline float LogisticGradW(float pred, float label, float x) { 160 | return (pred - label) * x; 161 | } 162 | inline float LogisticGradV(float gradW, float sum, float v, float x) { 163 | return gradW * (sum - v * x); 164 | } 165 | 166 | AdagradUpdater_Num updater; 167 | float __loss; 168 | float __accuracy; 169 | 170 | vector label; 171 | vector > cross_field; 172 | }; 173 | 174 | #endif /* fm_algo_abst_h */ 175 | -------------------------------------------------------------------------------- /LightCTR/predict/fm_predict.cpp: -------------------------------------------------------------------------------- 1 | // 2 | // fm_predict.cpp 3 | // LightCTR 4 | // 5 | // Created by SongKuangshi on 2017/9/24. 6 | // Copyright © 2017年 SongKuangshi. All rights reserved. 7 | // 8 | 9 | #include "fm_predict.h" 10 | #include 11 | 12 | void FM_Predict::Predict(string savePath) { 13 | vector ans; 14 | 15 | vector tmp_vec; 16 | tmp_vec.resize(fm->factor_cnt); 17 | 18 | for (size_t rid = 0; rid < this->test_dataRow_cnt; rid++) { // data row 19 | float fm_pred = 0.0f; 20 | if (fm->sumVX != NULL) { 21 | for (size_t i = 0; i < test_dataSet[rid].size(); i++) { // feature 22 | const size_t fid = test_dataSet[rid][i].first; 23 | assert(fid < fm->feature_cnt); 24 | const float X = test_dataSet[rid][i].second; 25 | fm_pred += fm->W[fid] * X; 26 | #ifdef FM 27 | avx_vecScale(fm->getV(fid, 0), tmp_vec.data(), fm->factor_cnt, X); 28 | fm_pred -= 0.5 * avx_dotProduct(tmp_vec.data(), tmp_vec.data(), fm->factor_cnt); 29 | #endif 30 | } 31 | #ifdef FM 32 | fm_pred += 0.5 * avx_dotProduct(fm->getSumVX(rid, 0), fm->getSumVX(rid, 0), fm->factor_cnt); 33 | #endif 34 | } else { 35 | // Field-aware FM 36 | for (size_t i = 0; i < test_dataSet[rid].size(); i++) { 37 | const size_t fid = test_dataSet[rid][i].first; 38 | const float X = test_dataSet[rid][i].second; 39 | const size_t field = test_dataSet[rid][i].field; 40 | 41 | fm_pred += fm->W[fid] * X; 42 | 43 | for (size_t j = i + 1; j < test_dataSet[rid].size(); j++) { 44 | const size_t fid2 = test_dataSet[rid][j].first; 45 | const float X2 = test_dataSet[rid][j].second; 46 | const size_t field2 = test_dataSet[rid][j].field; 47 | 48 | float field_w = avx_dotProduct(fm->getV_field(fid, field2, 0), 49 | fm->getV_field(fid2, field, 0), fm->factor_cnt); 50 | fm_pred += field_w * X * X2; 51 | } 52 | } 53 | } 54 | 55 | float pCTR = sigmoid.forward(fm_pred); 56 | 57 | ans.emplace_back(pCTR); 58 | } 59 | 60 | if (!test_label.empty()) { 61 | assert(ans.size() == test_label.size()); 62 | 63 | float loss = 0; 64 | int correct = 0; 65 | for (size_t i = 0; i < test_label.size(); i++) { 66 | loss += (int)this->test_label[i] == 1 ? -log(ans[i]) : -log(1.0 - ans[i]); 67 | if (ans[i] > 0.5 && this->test_label[i] == 1) { 68 | correct++; 69 | } else if (ans[i] < 0.5 && this->test_label[i] == 0) { 70 | correct++; 71 | } 72 | } 73 | cout << "total log likelihood = " << loss << " correct = " << setprecision(5) << 74 | (float)correct / test_dataRow_cnt; 75 | 76 | auc->init(&ans, &test_label); 77 | printf(" auc = %.4f\n", auc->Auc()); 78 | } 79 | if (savePath != "") { 80 | ofstream md(savePath); 81 | if(!md.is_open()){ 82 | cout << "save model open file error" << endl; 83 | exit(0); 84 | } 85 | for (auto val : ans) { 86 | md << val << endl; 87 | } 88 | md.close(); 89 | } 90 | } 91 | 92 | void FM_Predict::loadDataRow(string dataPath, bool with_valid_label) { 93 | test_dataSet.clear(); 94 | test_label.clear(); 95 | 96 | ifstream fin_; 97 | string line; 98 | int nchar, y; 99 | size_t fid, fieldid; 100 | float val; 101 | fin_.open(dataPath, ios::in); 102 | if(!fin_.is_open()){ 103 | cout << "open file error!" << endl; 104 | exit(1); 105 | } 106 | vector tmp; 107 | while(!fin_.eof()){ 108 | getline(fin_, line); 109 | tmp.clear(); 110 | const char *pline = line.c_str(); 111 | if (with_valid_label) { 112 | if(sscanf(pline, "%d%n", &y, &nchar) >= 1){ 113 | this->test_label.emplace_back(y); 114 | pline += nchar + 1; 115 | } 116 | } 117 | if(sscanf(pline, "%zu:%zu:%f%n", &fieldid, &fid, &val, &nchar) >= 2){ 118 | pline += nchar + 1; 119 | while(pline < line.c_str() + (int)line.length() && 120 | sscanf(pline, "%zu:%zu:%f%n", &fieldid, &fid, &val, &nchar) >= 2){ 121 | pline += nchar + 1; 122 | if (fid < fm->feature_cnt) { 123 | assert(!isnan(fid)); 124 | assert(!isnan(val)); 125 | tmp.emplace_back(FMFeature(fid, val, fieldid)); 126 | } 127 | } 128 | } 129 | if (tmp.empty()) { 130 | continue; 131 | } 132 | this->test_dataSet.emplace_back(move(tmp)); 133 | } 134 | this->test_dataRow_cnt = this->test_dataSet.size(); 135 | assert(test_dataRow_cnt > 0); 136 | } 137 | 138 | -------------------------------------------------------------------------------- /LightCTR/predict/fm_predict.h: -------------------------------------------------------------------------------- 1 | // 2 | // fm_predict.hpp 3 | // LightCTR 4 | // 5 | // Created by SongKuangshi on 2017/9/24. 6 | // Copyright © 2017年 SongKuangshi. All rights reserved. 7 | // 8 | 9 | #ifndef fm_predict_h 10 | #define fm_predict_h 11 | 12 | #include 13 | #include "../fm_algo_abst.h" 14 | #include "../util/evaluator.h" 15 | #include "../util/activations.h" 16 | 17 | class FM_Predict { 18 | public: 19 | FM_Predict(FM_Algo_Abst* p, string _testDataPath, bool with_valid_label) { 20 | this->fm = p; 21 | loadDataRow(_testDataPath, with_valid_label); 22 | auc = new AucEvaluator(); 23 | } 24 | ~FM_Predict() { 25 | delete auc; 26 | } 27 | void Predict(string); 28 | void loadDataRow(string, bool); 29 | 30 | private: 31 | FM_Algo_Abst* fm; 32 | size_t test_dataRow_cnt; 33 | vector > test_dataSet; 34 | vector test_label; 35 | 36 | AucEvaluator* auc; 37 | Sigmoid sigmoid; 38 | }; 39 | 40 | #endif /* fm_predict_h */ 41 | -------------------------------------------------------------------------------- /LightCTR/predict/gbm_predict.cpp: -------------------------------------------------------------------------------- 1 | // 2 | // gbm_predict.cpp 3 | // LightCTR 4 | // 5 | // Created by SongKuangshi on 2017/9/26. 6 | // Copyright © 2017年 SongKuangshi. All rights reserved. 7 | // 8 | 9 | #include "gbm_predict.h" 10 | #include 11 | 12 | void GBM_Predict::Predict(string savePath) { 13 | static vector ans, tmp; 14 | static vector pLabel; 15 | 16 | tmp.resize(gbm->multiclass); 17 | ans.clear(); 18 | pLabel.clear(); 19 | 20 | assert(gbm->RegTreeRootArr.size() % gbm->multiclass == 0); 21 | 22 | for (size_t rid = 0; rid < this->test_dataRow_cnt; rid++) { // data row 23 | fill(tmp.begin(), tmp.end(), 0); 24 | for (size_t tid = 0; tid < gbm->RegTreeRootArr.size(); tid+=gbm->multiclass) { 25 | for (size_t c = 0; c < gbm->multiclass; c++) { 26 | tmp[c] += gbm->locAtLeafWeight(gbm->RegTreeRootArr[tid + c], 27 | test_dataSet[rid]); 28 | } 29 | } 30 | 31 | float pCTR; 32 | if (gbm->multiclass == 1) { 33 | pCTR = sigmoid.forward(tmp[0]); 34 | pLabel.emplace_back(pCTR > 0.5 ? 1 : 0); 35 | } else { 36 | softmax.forward(tmp.data(), tmp.size()); 37 | size_t idx = softmax.forward_max(tmp.data(), tmp.size()); 38 | pCTR = tmp[idx]; 39 | pLabel.emplace_back(idx); 40 | } 41 | 42 | assert(!isnan(pCTR)); 43 | ans.emplace_back(pCTR); 44 | } 45 | 46 | if (!test_label.empty()) { 47 | assert(ans.size() == test_label.size()); 48 | float loss = 0; 49 | int correct = 0; 50 | for (size_t i = 0; i < test_label.size(); i++) { 51 | if (gbm->multiclass == 1) { 52 | assert(ans[i] > 0 && ans[i] < 1); 53 | loss += (int)this->test_label[i] == 1 ? log(ans[i]) : log(1.0 - ans[i]); 54 | } else { 55 | assert(ans[i] > 0 && ans[i] <= 1); 56 | loss += log(ans[i]); 57 | } 58 | 59 | assert(!isnan(loss)); 60 | if (this->test_label[i] == pLabel[i]) { 61 | correct++; 62 | } 63 | } 64 | cout << "total log likelihood = " << -loss << " correct = " << setprecision(5) << 65 | (float)correct / test_dataRow_cnt; 66 | 67 | if (gbm->multiclass == 1) { 68 | auc->init(&ans, &test_label); 69 | printf(" auc = %.4f", auc->Auc()); 70 | } 71 | printf("\n"); 72 | } 73 | } 74 | 75 | void GBM_Predict::loadDataRow(string dataPath, bool with_valid_label) { 76 | test_dataSet.clear(); 77 | test_label.clear(); 78 | 79 | ifstream fin_; 80 | string line; 81 | int nchar, y; 82 | size_t fid, rid = 0; 83 | int val; 84 | fin_.open(dataPath, ios::in); 85 | if(!fin_.is_open()){ 86 | cout << "open file error!" << endl; 87 | exit(1); 88 | } 89 | map tmp; 90 | while(!fin_.eof()){ 91 | getline(fin_, line); 92 | tmp.clear(); 93 | const char *pline = line.c_str(); 94 | if(sscanf(pline, "%d%n", &y, &nchar) >= 1){ 95 | pline += nchar + 1; 96 | if (gbm->multiclass > 1) { 97 | assert(y < gbm->multiclass); 98 | } else { 99 | y = y < 5 ? 0 : 1; 100 | } 101 | test_label.emplace_back(y); 102 | fid = 0; 103 | while(pline < line.c_str() + (int)line.length() && 104 | sscanf(pline, "%d%n", &val, &nchar) >= 1){ 105 | pline += nchar + 1; 106 | if (*pline == ',') 107 | pline += 1; 108 | fid++; 109 | if (val == 0) { 110 | continue; 111 | } 112 | tmp[fid] = val; 113 | } 114 | assert(!tmp.empty()); 115 | } 116 | if (tmp.empty()) { 117 | continue; 118 | } 119 | this->test_dataSet.emplace_back(move(tmp)); 120 | rid++; 121 | } 122 | this->test_dataRow_cnt = this->test_dataSet.size(); 123 | assert(test_dataRow_cnt > 0 && test_label.size() == test_dataRow_cnt); 124 | } 125 | 126 | -------------------------------------------------------------------------------- /LightCTR/predict/gbm_predict.h: -------------------------------------------------------------------------------- 1 | // 2 | // gbm_predict.h 3 | // LightCTR 4 | // 5 | // Created by SongKuangshi on 2017/9/26. 6 | // Copyright © 2017年 SongKuangshi. All rights reserved. 7 | // 8 | 9 | #ifndef gbm_predict_h 10 | #define gbm_predict_h 11 | 12 | #include 13 | #include "../gbm_algo_abst.h" 14 | #include 15 | #include "../util/evaluator.h" 16 | #include "../util/activations.h" 17 | 18 | class GBM_Predict { 19 | public: 20 | GBM_Predict(GBM_Algo_Abst* p, string _testDataPath, bool with_valid_label) { 21 | this->gbm = p; 22 | loadDataRow(_testDataPath, with_valid_label); 23 | auc = new AucEvaluator(); 24 | } 25 | ~GBM_Predict() { 26 | delete auc; 27 | } 28 | void Predict(string); 29 | void loadDataRow(string, bool); 30 | 31 | private: 32 | GBM_Algo_Abst* gbm; 33 | size_t test_dataRow_cnt; 34 | vector > test_dataSet; 35 | vector test_label; 36 | 37 | AucEvaluator* auc; 38 | Sigmoid sigmoid; 39 | Softmax softmax; 40 | }; 41 | 42 | #endif /* gbm_predict_h */ 43 | -------------------------------------------------------------------------------- /LightCTR/third/install_third.sh: -------------------------------------------------------------------------------- 1 | #/bin/bash 2 | set -x -e 3 | 4 | git clone git://github.com/zeromq/libzmq.git || true 5 | cd libzmq 6 | ./autogen.sh 7 | ./configure --prefix=`pwd`/../zeromq 8 | make && make install -------------------------------------------------------------------------------- /LightCTR/third/zeromq/include/zmq_utils.h: -------------------------------------------------------------------------------- 1 | /* 2 | Copyright (c) 2007-2016 Contributors as noted in the AUTHORS file 3 | 4 | This file is part of libzmq, the ZeroMQ core engine in C++. 5 | 6 | libzmq is free software; you can redistribute it and/or modify it under 7 | the terms of the GNU Lesser General Public License (LGPL) as published 8 | by the Free Software Foundation; either version 3 of the License, or 9 | (at your option) any later version. 10 | 11 | As a special exception, the Contributors give you permission to link 12 | this library with independent modules to produce an executable, 13 | regardless of the license terms of these independent modules, and to 14 | copy and distribute the resulting executable under terms of your choice, 15 | provided that you also meet, for each linked independent module, the 16 | terms and conditions of the license of that module. An independent 17 | module is a module which is not derived from or based on this library. 18 | If you modify this library, you must extend this exception to your 19 | version of the library. 20 | 21 | libzmq is distributed in the hope that it will be useful, but WITHOUT 22 | ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 23 | FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public 24 | License for more details. 25 | 26 | You should have received a copy of the GNU Lesser General Public License 27 | along with this program. If not, see . 28 | */ 29 | 30 | /* This file is deprecated, and all its functionality provided by zmq.h */ 31 | /* Note that -Wpedantic compilation requires GCC to avoid using its custom 32 | extensions such as #warning, hence the trick below. Also, pragmas for 33 | warnings or other messages are not standard, not portable, and not all 34 | compilers even have an equivalent concept. 35 | So in the worst case, this include file is treated as silently empty. */ 36 | 37 | #if defined(__clang__) || defined(__GNUC__) || defined(__GNUG__) || defined(_MSC_VER) 38 | #if defined(__GNUC__) || defined(__GNUG__) 39 | #pragma GCC diagnostic push 40 | #pragma GCC diagnostic warning "-Wcpp" 41 | #pragma GCC diagnostic ignored "-Werror" 42 | #pragma GCC diagnostic ignored "-Wall" 43 | #endif 44 | #pragma message("Warning: zmq_utils.h is deprecated. All its functionality is provided by zmq.h.") 45 | #if defined(__GNUC__) || defined(__GNUG__) 46 | #pragma GCC diagnostic pop 47 | #endif 48 | #endif 49 | -------------------------------------------------------------------------------- /LightCTR/third/zeromq/lib/libzmq.a: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cnkuangshi/LightCTR/620437720e683a1d8f554f8cdb7421e051843616/LightCTR/third/zeromq/lib/libzmq.a -------------------------------------------------------------------------------- /LightCTR/train/layer/adapterLayer.h: -------------------------------------------------------------------------------- 1 | // 2 | // adapterLayer.h 3 | // LightCTR 4 | // 5 | // Created by SongKuangshi on 2017/10/25. 6 | // Copyright © 2017年 SongKuangshi. All rights reserved. 7 | // 8 | 9 | #ifndef adapterLayer_h 10 | #define adapterLayer_h 11 | 12 | #include "layer_abst.h" 13 | #include 14 | 15 | // Flatten and concat Matrixs into dataRow adapting CNN to FC or LSTM sequences to Attention input 16 | template 17 | class Adapter_Layer : public Layer_Base { 18 | public: 19 | Adapter_Layer(Layer_Base* _prevLayer, size_t flatten_cnt): 20 | Layer_Base(_prevLayer, _prevLayer->output_dimension, _prevLayer->output_dimension) { 21 | this->activeFun = new ActivationFunction(); 22 | this->output_dimension *= flatten_cnt * flatten_cnt; 23 | 24 | printf("Adapter Layer\n"); 25 | } 26 | Adapter_Layer() = delete; 27 | 28 | ~Adapter_Layer() { 29 | } 30 | 31 | vector& forward(const vector& prevLOutput) { 32 | // init ThreadLocal var 33 | Matrix& output_act = *tl_output_act; 34 | MatrixArr& input_delta = *tl_input_delta; 35 | // indicate lazy init once 36 | assert(this->output_dimension == prevLOutput.size() * prevLOutput[0]->size()); 37 | output_act.reset(1, this->output_dimension); 38 | input_delta.arr.resize(this->input_dimension); 39 | FOR(i, this->input_dimension) { 40 | if (!input_delta.arr[i]) { 41 | input_delta.arr[i] = 42 | new Matrix(prevLOutput[0]->x_len, prevLOutput[0]->y_len); 43 | } 44 | } 45 | 46 | const size_t prevLOutput_size = prevLOutput[0]->size(); 47 | FOR(i, prevLOutput.size()) { 48 | const size_t offset = i * prevLOutput_size; 49 | // Flatten data row 50 | memcpy(output_act.getEle(0, offset), prevLOutput[i]->getEle(0, 0), 51 | prevLOutput_size * sizeof(float)); 52 | } 53 | 54 | // init threadlocal wrapper 55 | vector& wrapper = *tl_wrapper; 56 | wrapper.resize(1); 57 | wrapper[0] = &output_act; 58 | return this->nextLayer->forward(wrapper); 59 | } 60 | 61 | void backward(const vector& outputDeltaMatrix) { 62 | auto outputDelta = outputDeltaMatrix[0]->pointer(); 63 | assert(outputDelta->size() == this->output_dimension); 64 | 65 | MatrixArr& input_delta = *tl_input_delta; 66 | 67 | const size_t input_delta_size = input_delta.arr[0]->size(); 68 | FOR(i, this->input_dimension) { 69 | const size_t offset = i * input_delta_size; 70 | memcpy(input_delta.arr[i]->getEle(0, 0), 71 | outputDelta->data() + offset, input_delta_size * sizeof(float)); 72 | } 73 | this->prevLayer->backward(input_delta.arr); 74 | } 75 | 76 | const vector& output() { 77 | Matrix& output_act = *tl_output_act; 78 | vector& wrapper = *tl_wrapper; 79 | wrapper[0] = &output_act; 80 | return wrapper; 81 | } 82 | 83 | private: 84 | ThreadLocal tl_output_act; // wx + b with activation 85 | ThreadLocal tl_input_delta; // delta of prevLayer wx+b Z_(L-1) 86 | 87 | ThreadLocal > tl_wrapper; 88 | }; 89 | 90 | #endif /* adapterLayer_h */ 91 | -------------------------------------------------------------------------------- /LightCTR/train/layer/layer_abst.h: -------------------------------------------------------------------------------- 1 | // 2 | // layer_abst.h 3 | // LightCTR 4 | // 5 | // Created by SongKuangshi on 2017/10/20. 6 | // Copyright © 2017年 SongKuangshi. All rights reserved. 7 | // 8 | 9 | #ifndef layer_abst_h 10 | #define layer_abst_h 11 | 12 | #include 13 | #include 14 | #include "../../common/thread_pool.h" 15 | #include "../../common/lock.h" 16 | #include "../../util/activations.h" 17 | #include "../../util/matrix.h" 18 | #include "../../util/gradientUpdater.h" 19 | #include "../../util/momentumUpdater.h" 20 | #include "../../common/buffer_fusion.h" 21 | #include "assert.h" 22 | 23 | #define FOR(i,n) for(size_t i = 0;i < n;i++) 24 | 25 | class Layer_Base { 26 | public: 27 | Layer_Base(Layer_Base* _prevLayer, size_t _input_dimension, size_t _output_dimension): 28 | input_dimension(_input_dimension), output_dimension(_output_dimension) { 29 | nextLayer = prevLayer = NULL; 30 | if (_prevLayer != NULL) { 31 | assert(_prevLayer->output_dimension == this->input_dimension); 32 | this->prevLayer = _prevLayer; 33 | _prevLayer->nextLayer = this; 34 | bInputLayer = false; 35 | printf("Init %zux%zu ", _input_dimension, _output_dimension); 36 | } else { 37 | bInputLayer = true; 38 | printf("Init Input %zux%zu ", _input_dimension, _output_dimension); 39 | } 40 | } 41 | Layer_Base() = delete; 42 | virtual ~Layer_Base() { 43 | } 44 | 45 | virtual vector& forward(const vector& prevLOutputMatrix) = 0; 46 | 47 | virtual void backward(const vector& outputDeltaMatrix) = 0; 48 | 49 | virtual const vector& output() = 0; 50 | 51 | virtual void registerInitializer(std::shared_ptr > _buf_fusion) { 52 | if (this->nextLayer) { 53 | this->nextLayer->registerInitializer(_buf_fusion); 54 | } 55 | } 56 | 57 | virtual void registerGradient(std::shared_ptr > _buf_fusion) { 58 | if (this->nextLayer) { 59 | this->nextLayer->registerGradient(_buf_fusion); 60 | } 61 | } 62 | 63 | virtual void applyBatchGradient() { // for each mini-batch gradient batch update stage 64 | if (nextLayer) { 65 | nextLayer->applyBatchGradient(); 66 | } 67 | } 68 | 69 | Activation& getActiveFun() const { 70 | assert(activeFun); // Notice to init activeFun in instance 71 | return *activeFun; 72 | } 73 | 74 | Activation* activeFun; 75 | 76 | Layer_Base *nextLayer, *prevLayer; 77 | 78 | size_t input_dimension, output_dimension; 79 | 80 | bool bInputLayer; 81 | 82 | SpinLock lock; 83 | }; 84 | 85 | #endif /* layer_abst_h */ 86 | -------------------------------------------------------------------------------- /LightCTR/train/layer/poolingLayer.h: -------------------------------------------------------------------------------- 1 | // 2 | // poolingLayer.h 3 | // LightCTR 4 | // 5 | // Created by SongKuangshi on 2017/10/24. 6 | // Copyright © 2017年 SongKuangshi. All rights reserved. 7 | // 8 | 9 | #ifndef poolingLayer_h 10 | #define poolingLayer_h 11 | 12 | #include 13 | #include "../../util/matrix.h" 14 | #include "layer_abst.h" 15 | 16 | struct Pool_Config { 17 | size_t size; 18 | }; 19 | // Pooling or Maxout 20 | // TODO K-Max Pooling 21 | template 22 | class Max_Pooling_Layer : public Layer_Base { 23 | public: 24 | Max_Pooling_Layer(Layer_Base* _prevLayer, size_t _dimension, Pool_Config _config): 25 | Layer_Base(_prevLayer, _dimension, _dimension), config(_config) { 26 | this->activeFun = new ActivationFunction(); 27 | assert(this->input_dimension == this->output_dimension); 28 | 29 | printf("Pooling Layer\n"); 30 | } 31 | Max_Pooling_Layer() = delete; 32 | 33 | ~Max_Pooling_Layer() { 34 | } 35 | 36 | vector& forward(const vector& prevLOutput) { 37 | assert(prevLOutput.size() == this->input_dimension); 38 | 39 | // init ThreadLocal var 40 | MatrixArr& output_act = *tl_output_act; 41 | output_act.arr.resize(this->output_dimension); 42 | MatrixArr& input_delta = *tl_input_delta; 43 | input_delta.arr.resize(this->input_dimension); 44 | 45 | // do Max pooling 46 | FOR(feamid, this->input_dimension) { 47 | Matrix* mat = prevLOutput[feamid]; 48 | 49 | assert(mat->x_len >= config.size && mat->y_len >= config.size); 50 | 51 | if (input_delta.arr[feamid] == NULL) { 52 | output_act.arr[feamid] = new Matrix((mat->x_len - config.size) / config.size + 1, 53 | (mat->y_len - config.size) / config.size + 1); 54 | input_delta.arr[feamid] = new Matrix(mat->x_len, mat->y_len); 55 | } 56 | 57 | auto cur_out = output_act.arr[feamid]; 58 | cur_out->zeroInit(); 59 | auto cur_in = input_delta.arr[feamid]; 60 | cur_in->zeroInit(); 61 | for (size_t i = 0; i < mat->x_len - config.size + 1; i+= config.size) { 62 | for (size_t j = 0; j < mat->y_len - config.size + 1; j+=config.size) { 63 | float MaxV = *mat->getEle(i, j); 64 | size_t mx = i, my = j; 65 | for (size_t x = i; x < i + config.size; x++) { 66 | for (size_t y = j; y < j + config.size; y++) { 67 | if (MaxV < *mat->getEle(x, y)) { 68 | MaxV = *mat->getEle(x, y); 69 | mx = x, my = y; 70 | } 71 | } 72 | } 73 | *cur_out->getEle(i / config.size, j / config.size) = MaxV; 74 | *cur_in->getEle(mx, my) = 1; 75 | } 76 | } 77 | } 78 | return this->nextLayer->forward(output_act.arr); 79 | } 80 | 81 | void backward(const vector& outputDelta) { 82 | assert(outputDelta.size() == this->output_dimension); 83 | 84 | MatrixArr& input_delta = *tl_input_delta; 85 | 86 | // Unpooling 87 | FOR(fid, this->input_dimension) { 88 | Matrix* mat = input_delta.arr[fid]; 89 | for (size_t i = 0; i < mat->x_len - config.size + 1; i+= config.size) { 90 | for (size_t j = 0; j < mat->y_len - config.size + 1; j+=config.size) { 91 | // loop pooling size 92 | for (size_t x = i; x < i + config.size; x++) { 93 | for (size_t y = j; y < j + config.size; y++) { 94 | if (*mat->getEle(x, y) > 0) { 95 | *mat->getEle(x, y) = *outputDelta[fid]->getEle(i / config.size, j / config.size); 96 | } 97 | } 98 | } 99 | } 100 | } 101 | } 102 | return this->prevLayer->backward(input_delta.arr); 103 | } 104 | 105 | const vector& output() { 106 | MatrixArr& output_act = *tl_output_act; 107 | return output_act.arr; 108 | } 109 | 110 | private: 111 | Pool_Config config; 112 | ThreadLocal tl_output_act; 113 | ThreadLocal tl_input_delta; // mask of max position 114 | }; 115 | 116 | #endif /* poolingLayer_h */ 117 | -------------------------------------------------------------------------------- /LightCTR/train/layer/sampleLayer.h: -------------------------------------------------------------------------------- 1 | // 2 | // sampleLayer.h 3 | // LightCTR 4 | // 5 | // Created by SongKuangshi on 2017/10/21. 6 | // Copyright © 2017年 SongKuangshi. All rights reserved. 7 | // 8 | 9 | #ifndef sampleLayer_h 10 | #define sampleLayer_h 11 | 12 | #include "fullyconnLayer.h" 13 | #include "../../util/random.h" 14 | 15 | template 16 | class Sample_Layer : public Layer_Base { 17 | public: 18 | Sample_Layer(Layer_Base* _prevLayer, size_t _input_dimension): 19 | Layer_Base(_prevLayer, _input_dimension, _input_dimension >> 1) { 20 | assert((_input_dimension & 1) == 0); 21 | gauss_cnt = _input_dimension >> 1; 22 | noise = new float[gauss_cnt]; 23 | FOR(i, gauss_cnt) { 24 | noise[i] = GaussRand(); // only generate noise for sampling init once 25 | } 26 | bEncoding = false; 27 | 28 | this->activeFun = new ActivationFunction(); 29 | 30 | inner_scale = 1.0f; 31 | 32 | printf("Sample Layer\n"); 33 | } 34 | Sample_Layer() = delete; 35 | ~Sample_Layer() { 36 | delete[] noise; 37 | } 38 | 39 | vector& forward(const vector& prevLOutputMatrix) { 40 | auto prevLOutput = prevLOutputMatrix[0]->pointer(); 41 | assert(prevLOutput->size() == this->input_dimension); 42 | 43 | // init ThreadLocal var 44 | Matrix& output_act = *tl_output_act; 45 | output_act.reset(1, this->output_dimension); 46 | 47 | float gaussDelta = 0.0f; 48 | FOR(i, gauss_cnt) { 49 | // prev layer output is mu and log(sigma^2) 50 | float mu = prevLOutput->at(i); 51 | float logSigma2 = prevLOutput->at(i + gauss_cnt); 52 | 53 | // min[ 0.5 * sum( exp(log_Sigma^2) - (1 + log_Sigma^2) + mu^2 ) ] 54 | gaussDelta += exp(inner_scale * logSigma2) - (1 + logSigma2) + mu * mu; 55 | assert(!isinf(gaussDelta)); 56 | 57 | // standard deviation equal to exp(0.5 * logSigma2) 58 | *output_act.getEle(0, i) = exp(inner_scale * 0.5f * logSigma2) * noise[i] + mu; 59 | assert(!isinf(*output_act.getEle(0, i))); 60 | } 61 | gaussDelta *= 0.5f; 62 | // cout << endl << endl << "gaussDelta = " << gaussDelta << endl << endl; 63 | if (bEncoding) { 64 | return output_act.reference(); 65 | } 66 | // init threadlocal wrapper 67 | vector& wrapper = *tl_wrapper; 68 | wrapper.resize(1); 69 | wrapper[0] = &output_act; 70 | return this->nextLayer->forward(wrapper); 71 | } 72 | 73 | void backward(const vector& outputDeltaMatrix) { 74 | assert(this->prevLayer); 75 | auto outputDelta = outputDeltaMatrix[0]->pointer(); 76 | assert(outputDelta->size() == this->output_dimension); 77 | auto prev_output_act = this->prevLayer->output()[0]->pointer(); 78 | assert(prev_output_act->size() == this->input_dimension); 79 | 80 | // init ThreadLocal var 81 | Matrix& input_delta = *tl_input_delta; 82 | input_delta.reset(1, this->input_dimension); 83 | 84 | FOR(i, gauss_cnt) { 85 | assert(!isnan(outputDelta->at(i))); 86 | auto muPtr = input_delta.getEle(0, i); 87 | auto sigmaPtr = input_delta.getEle(0, i + gauss_cnt); 88 | 89 | // Target Loss about mu and log(sigma^2) 90 | auto sigmaGrad = 0.5f * exp(inner_scale * 0.5f * prev_output_act->at(i + gauss_cnt)) * noise[i]; 91 | *muPtr = outputDelta->at(i); 92 | *sigmaPtr = outputDelta->at(i) * sigmaGrad; 93 | assert(!isinf(*sigmaPtr)); 94 | 95 | // update Gauss Parameters Loss close to Normal distribution 96 | *muPtr += GradientUpdater::__global_learning_rate * prev_output_act->at(i); 97 | *sigmaPtr += GradientUpdater::__global_learning_rate * 98 | (exp(inner_scale * prev_output_act->at(i + gauss_cnt)) - 1.0f); 99 | 100 | assert(!isinf(*sigmaPtr)); 101 | } 102 | this->prevLayer->getActiveFun().backward(input_delta.pointer()->data(), 103 | prev_output_act->data(), 104 | input_delta.pointer()->data(), 105 | input_delta.size()); 106 | 107 | vector& wrapper = *tl_wrapper; 108 | wrapper[0] = &input_delta; 109 | this->prevLayer->backward(wrapper); 110 | } 111 | 112 | const vector& output() { 113 | Matrix& output_act = *tl_output_act; 114 | 115 | vector& wrapper = *tl_wrapper; 116 | wrapper[0] = &output_act; 117 | return wrapper; 118 | } 119 | 120 | bool bEncoding; // mark for forward encode 121 | 122 | private: 123 | ThreadLocal > tl_wrapper; 124 | 125 | ThreadLocal tl_output_act; // wx + b with activation 126 | ThreadLocal tl_input_delta; // delta of prevLayer wx+b Z_(L-1) 127 | 128 | float inner_scale; 129 | 130 | float* noise; 131 | size_t gauss_cnt; 132 | }; 133 | 134 | #endif /* sampleLayer_h */ 135 | -------------------------------------------------------------------------------- /LightCTR/train/train_cnn_algo.h: -------------------------------------------------------------------------------- 1 | // 2 | // train_cnn_algo.h 3 | // LightCTR 4 | // 5 | // Created by SongKuangshi on 2017/11/9. 6 | // Copyright © 2017年 SongKuangshi. All rights reserved. 7 | // 8 | 9 | #ifndef train_cnn_algo_h 10 | #define train_cnn_algo_h 11 | 12 | #include "../dl_algo_abst.h" 13 | #include "layer/poolingLayer.h" 14 | #include "layer/adapterLayer.h" 15 | #include "layer/convLayer.h" 16 | #include "../distribut/ring_collect.h" 17 | using namespace std; 18 | 19 | template 20 | class Train_CNN_Algo : public DL_Algo_Abst { 22 | public: 23 | Train_CNN_Algo(string dataPath, size_t _epoch, size_t _feature_cnt, 24 | size_t _hidden_size, size_t _multiclass_output_cnt = 1): 25 | DL_Algo_Abst( 26 | dataPath, _epoch, _feature_cnt, _hidden_size, _multiclass_output_cnt) { 27 | this->dl_algo = CNN; 28 | initNetwork(_hidden_size); 29 | } 30 | Train_CNN_Algo() = delete; 31 | ~Train_CNN_Algo() { 32 | #ifdef WORKER_RING 33 | delete syncer; 34 | #endif 35 | } 36 | 37 | void initNetwork(size_t hidden_size) { 38 | // Net structure of 28x28: 5x5 12 pool 6 3x3 4 3x3 2 flatten fc-100 39 | this->inputLayer = new Conv_Layer(NULL, 1, 6, CNN_Config{5, 0, 2}); 40 | this->appendNNLayer(this->inputLayer); 41 | 42 | Layer_Base* poolLayer = 43 | new Max_Pooling_Layer(this->inputLayer, 6, Pool_Config{2}); 44 | this->appendNNLayer(poolLayer); 45 | 46 | Layer_Base* hidden1 = 47 | new Conv_Layer(poolLayer, 6, 16, CNN_Config{3, 0, 1}); 48 | this->appendNNLayer(hidden1); 49 | 50 | Layer_Base* hidden2 = 51 | new Conv_Layer(hidden1, 16, 20, CNN_Config{3, 0, 1}); 52 | this->appendNNLayer(hidden2); 53 | 54 | Layer_Base* adapter = new Adapter_Layer(hidden2, 2); 55 | this->appendNNLayer(adapter); 56 | 57 | Layer_Base* fcLayer = 58 | new Fully_Conn_Layer(adapter, 20 * 2 * 2, hidden_size); 59 | this->appendNNLayer(fcLayer); 60 | 61 | this->outputLayer = new Fully_Conn_Layer(fcLayer, hidden_size, 62 | this->multiclass_output_cnt); 63 | this->appendNNLayer(this->outputLayer); 64 | #ifdef WORKER_RING 65 | syncer = new Worker_RingReduce(__global_cluster_worker_cnt); 66 | auto buf_fusion = std::make_shared >(false, false); 67 | this->inputLayer->registerInitializer(buf_fusion); 68 | syncer->syncInitializer(buf_fusion); 69 | puts("[RING] Sync initializer complete"); 70 | #endif 71 | } 72 | 73 | const vector& Predict(size_t rid, vector >& dataRow) { 74 | Matrix*& dataRow_Matrix = *tl_dataRow_Matrix; 75 | if (dataRow_Matrix == NULL) { 76 | dataRow_Matrix = new Matrix(sqrt((float)this->feature_cnt), 77 | sqrt((float)this->feature_cnt)); 78 | } 79 | dataRow_Matrix->pointer()->assign(dataRow[rid].begin(), dataRow[rid].end()); 80 | 81 | vector wrapper; 82 | wrapper.resize(1); 83 | wrapper[0] = dataRow_Matrix; 84 | return this->inputLayer->forward(wrapper); 85 | } 86 | 87 | void BP(size_t rid, const vector& grad) { 88 | this->outputLayer->backward(grad); 89 | } 90 | 91 | void applyBP(size_t epoch) const { 92 | #ifdef WORKER_RING 93 | auto buf_fusion = std::make_shared >(false, false); 94 | this->inputLayer->registerGradient(buf_fusion); 95 | syncer->syncGradient(buf_fusion, epoch); 96 | #endif 97 | this->inputLayer->applyBatchGradient(); 98 | } 99 | private: 100 | Worker_RingReduce* syncer; 101 | ThreadLocal tl_dataRow_Matrix; 102 | }; 103 | 104 | #endif /* train_cnn_algo_h */ 105 | -------------------------------------------------------------------------------- /LightCTR/train/train_ffm_algo.cpp: -------------------------------------------------------------------------------- 1 | // 2 | // train_ffm_algo.cpp 3 | // LightCTR 4 | // 5 | // Created by SongKuangshi on 2017/11/19. 6 | // Copyright © 2017年 SongKuangshi. All rights reserved. 7 | // 8 | 9 | #include "train_ffm_algo.h" 10 | #include "../common/avx.h" 11 | 12 | void Train_FFM_Algo::init() { 13 | L2Reg_ratio = 0.001f; 14 | 15 | learnable_params_cnt = this->feature_cnt * this->field_cnt * this->factor_cnt 16 | + this->feature_cnt; 17 | update_g = new float[learnable_params_cnt]; 18 | updater.learnable_params_cnt(learnable_params_cnt); 19 | 20 | printf("Training FFM\n"); 21 | } 22 | 23 | void Train_FFM_Algo::Train() { 24 | 25 | GradientUpdater::__global_bTraining = true; 26 | GradientUpdater::__global_minibatch_size = dataRow_cnt; 27 | 28 | for (size_t i = 0; i < this->epoch; i++) { 29 | __loss = 0; 30 | __accuracy = 0; 31 | 32 | this->proc_data_left = (int)this->dataRow_cnt; 33 | 34 | size_t thread_hold_dataRow_cnt = (this->dataRow_cnt + this->proc_cnt - 1) / this->proc_cnt; 35 | 36 | for (size_t pid = 0; pid < this->proc_cnt; pid++) { 37 | size_t start_pos = pid * thread_hold_dataRow_cnt; 38 | threadpool->addTask(bind(&Train_FFM_Algo::batchGradCompute, this, start_pos, 39 | min(start_pos + thread_hold_dataRow_cnt, this->dataRow_cnt))); 40 | } 41 | threadpool->wait(); 42 | 43 | printf("Epoch %zu Train Loss = %f Accuracy = %f\n", i, __loss, __accuracy / dataRow_cnt); 44 | // apply gradient 45 | ApplyGrad(); 46 | } 47 | 48 | GradientUpdater::__global_bTraining = false; 49 | } 50 | 51 | void Train_FFM_Algo::batchGradCompute(size_t rbegin, size_t rend) { 52 | for (size_t rid = rbegin; rid < rend; rid++) { // data row 53 | float fm_pred = 0.0f; 54 | 55 | for (size_t i = 0; i < dataSet[rid].size(); i++) { 56 | const size_t fid = dataSet[rid][i].first; 57 | const float X = dataSet[rid][i].second; 58 | const size_t field = dataSet[rid][i].field; 59 | 60 | fm_pred += W[fid] * X; 61 | 62 | for (size_t j = i + 1; j < dataSet[rid].size(); j++) { 63 | const size_t fid2 = dataSet[rid][j].first; 64 | const float X2 = dataSet[rid][j].second; 65 | const size_t field2 = dataSet[rid][j].field; 66 | 67 | float field_w = avx_dotProduct(getV_field(fid, field2, 0), 68 | getV_field(fid2, field, 0), factor_cnt); 69 | fm_pred += field_w * X * X2; 70 | } 71 | } 72 | accumWVGrad(rid, sigmoid.forward(fm_pred)); 73 | } 74 | assert(this->proc_data_left > 0); 75 | this->proc_data_left -= rend - rbegin; 76 | } 77 | 78 | void Train_FFM_Algo::accumWVGrad(size_t rid, float pred) { 79 | const float target = label[rid]; 80 | const float loss = pred - target; 81 | if (loss == 0) { 82 | return; 83 | } 84 | __loss += target == 1 ? -log(pred) : -log(1.0 - pred); 85 | if (pred > 0.5 && target == 1) { 86 | __accuracy++; 87 | } else if (pred < 0.5 && target == 0) { 88 | __accuracy++; 89 | } 90 | 91 | size_t fid, fid2, field, field2; 92 | float x, x2; 93 | for (size_t i = 0; i < dataSet[rid].size(); i++) { 94 | fid = dataSet[rid][i].first; 95 | x = dataSet[rid][i].second; 96 | field = dataSet[rid][i].field; 97 | 98 | *update_W(fid) += loss * x + L2Reg_ratio * W[fid]; 99 | 100 | for (size_t j = i + 1; j < dataSet[rid].size(); j++) { 101 | fid2 = dataSet[rid][j].first; 102 | x2 = dataSet[rid][j].second; 103 | field2 = dataSet[rid][j].field; 104 | 105 | const float scaler = x * x2 * loss; 106 | const float* v1 = getV_field(fid, field2, 0); 107 | const float* v2 = getV_field(fid2, field, 0); 108 | float* update_v1 = update_V(fid, field2, 0); 109 | float* update_v2 = update_V(fid2, field, 0); 110 | 111 | avx_vecScalerAdd(update_v1, v2, update_v1, scaler, factor_cnt); 112 | avx_vecScalerAdd(update_v1, v1, update_v1, L2Reg_ratio, factor_cnt); 113 | 114 | avx_vecScalerAdd(update_v2, v1, update_v2, scaler, factor_cnt); 115 | avx_vecScalerAdd(update_v2, v2, update_v2, L2Reg_ratio, factor_cnt); 116 | } 117 | } 118 | } 119 | 120 | void Train_FFM_Algo::ApplyGrad() { 121 | updater.update(0, this->feature_cnt, W, update_g); 122 | 123 | float *gradV = update_g + this->feature_cnt; 124 | updater.update(this->feature_cnt, this->feature_cnt * 125 | this->field_cnt * this->factor_cnt, V, gradV); 126 | } 127 | -------------------------------------------------------------------------------- /LightCTR/train/train_ffm_algo.h: -------------------------------------------------------------------------------- 1 | // 2 | // train_ffm_algo.h 3 | // LightCTR 4 | // 5 | // Created by SongKuangshi on 2017/11/19. 6 | // Copyright © 2017年 SongKuangshi. All rights reserved. 7 | // 8 | 9 | #ifndef train_ffm_algo_h 10 | #define train_ffm_algo_h 11 | 12 | #include "../fm_algo_abst.h" 13 | #include 14 | #include 15 | #include "../util/activations.h" 16 | #include "../util/gradientUpdater.h" 17 | #include "../common/thread_pool.h" 18 | #include "../common/lock.h" 19 | using namespace std; 20 | 21 | // Field-aware FM 22 | class Train_FFM_Algo : public FM_Algo_Abst { 23 | 24 | public: 25 | Train_FFM_Algo(string _dataPath, size_t _epoch_cnt, 26 | size_t _factor_cnt, size_t _field_cnt): 27 | FM_Algo_Abst(_dataPath, _factor_cnt, _field_cnt), epoch(_epoch_cnt) { 28 | assert(this->feature_cnt != 0); 29 | threadpool = new ThreadPool(this->proc_cnt); 30 | init(); 31 | } 32 | Train_FFM_Algo() = delete; 33 | 34 | ~Train_FFM_Algo() { 35 | delete threadpool; 36 | threadpool = NULL; 37 | } 38 | 39 | void init(); 40 | void Train(); 41 | 42 | private: 43 | size_t epoch; 44 | int proc_data_left; 45 | 46 | Sigmoid sigmoid; 47 | 48 | size_t learnable_params_cnt; 49 | 50 | void batchGradCompute(size_t, size_t); 51 | void accumWVGrad(size_t rid, float pred); 52 | 53 | float *update_g; 54 | inline float* update_W(size_t fid) { 55 | return &update_g[fid]; 56 | } 57 | inline float* update_V(size_t fid, size_t fieldid, size_t facid) { 58 | return &update_g[this->feature_cnt + fid * this->field_cnt * this->factor_cnt 59 | + fieldid * this->factor_cnt + facid]; 60 | } 61 | void ApplyGrad(); 62 | 63 | AdagradUpdater_Num updater; 64 | 65 | ThreadPool *threadpool; 66 | }; 67 | 68 | #endif /* train_ffm_algo_h */ 69 | -------------------------------------------------------------------------------- /LightCTR/train/train_fm_algo.cpp: -------------------------------------------------------------------------------- 1 | // 2 | // train_fm_algo.cpp 3 | // LightCTR 4 | // 5 | // Created by SongKuangshi on 2017/9/23. 6 | // Copyright © 2017年 SongKuangshi. All rights reserved. 7 | // 8 | 9 | #include "train_fm_algo.h" 10 | #include "../common/avx.h" 11 | 12 | void Train_FM_Algo::init() { 13 | L2Reg_ratio = 0.001f; 14 | #ifdef FM 15 | learnable_params_cnt = this->feature_cnt * (this->factor_cnt + 1); 16 | #else 17 | learnable_params_cnt = this->feature_cnt; 18 | #endif 19 | sumVX = new float[this->dataRow_cnt * this->factor_cnt]; 20 | assert(sumVX); 21 | memset(sumVX, 0, sizeof(float) * this->dataRow_cnt * this->factor_cnt); 22 | 23 | update_g = new float[learnable_params_cnt]; 24 | assert(update_g); 25 | updater.learnable_params_cnt(learnable_params_cnt); 26 | } 27 | 28 | void Train_FM_Algo::flash() { 29 | memset(update_g, 0, sizeof(float) * learnable_params_cnt); 30 | #ifdef FM 31 | memset(sumVX, 0, sizeof(float) * dataRow_cnt * factor_cnt); 32 | #endif 33 | } 34 | 35 | void Train_FM_Algo::Train() { 36 | 37 | GradientUpdater::__global_bTraining = true; 38 | GradientUpdater::__global_minibatch_size = dataRow_cnt; 39 | 40 | for (size_t i = 0; i < this->epoch_cnt; i++) { 41 | __loss = 0; 42 | __accuracy = 0; 43 | 44 | flash(); 45 | this->proc_data_left = (int)this->dataRow_cnt; 46 | 47 | size_t thread_hold_dataRow_cnt = (this->dataRow_cnt + this->proc_cnt - 1) / this->proc_cnt; 48 | 49 | for (size_t pid = 0; pid < this->proc_cnt; pid++) { 50 | size_t start_pos = pid * thread_hold_dataRow_cnt; 51 | threadpool->addTask(bind(&Train_FM_Algo::batchGradCompute, this, start_pos, 52 | min(start_pos + thread_hold_dataRow_cnt, this->dataRow_cnt))); 53 | } 54 | threadpool->wait(); 55 | 56 | printf("Epoch %zu Train Loss = %f Accuracy = %f\n", i, __loss, __accuracy / dataRow_cnt); 57 | ApplyGrad(); 58 | } 59 | 60 | GradientUpdater::__global_bTraining = false; 61 | } 62 | 63 | void Train_FM_Algo::batchGradCompute(size_t rbegin, size_t rend) { 64 | 65 | vector tmp_vec; 66 | tmp_vec.resize(factor_cnt); 67 | 68 | for (size_t rid = rbegin; rid < rend; rid++) { // data row 69 | float fm_pred = 0.0f; 70 | for (size_t i = 0; i < dataSet[rid].size(); i++) { 71 | const size_t fid = dataSet[rid][i].first; 72 | 73 | const float X = dataSet[rid][i].second; 74 | fm_pred += W[fid] * X; 75 | #ifdef FM 76 | avx_vecScale(getV(fid, 0), tmp_vec.data(), factor_cnt, X); 77 | avx_vecAdd(getSumVX(rid, 0), tmp_vec.data(), getSumVX(rid, 0), factor_cnt); 78 | fm_pred -= 0.5 * avx_dotProduct(tmp_vec.data(), tmp_vec.data(), factor_cnt); 79 | #endif 80 | } 81 | #ifdef FM 82 | fm_pred += 0.5 * avx_dotProduct(getSumVX(rid, 0), getSumVX(rid, 0), factor_cnt); 83 | #endif 84 | accumWVGrad(rid, sigmoid.forward(fm_pred)); 85 | } 86 | 87 | this->proc_data_left -= rend - rbegin; 88 | } 89 | 90 | void Train_FM_Algo::accumWVGrad(size_t rid, float pred) { 91 | const float target = label[rid]; 92 | 93 | __loss += target == 1 ? -log(pred) : -log(1.0 - pred); 94 | if (pred > 0.5 && target == 1) { 95 | __accuracy++; 96 | } else if (pred < 0.5 && target == 0) { 97 | __accuracy++; 98 | } 99 | 100 | size_t fid; 101 | float x; 102 | vector tmp_vec; 103 | tmp_vec.resize(factor_cnt); 104 | 105 | for (size_t i = 0; i < dataSet[rid].size(); i++) { 106 | fid = dataSet[rid][i].first; 107 | x = dataSet[rid][i].second; 108 | const float gradW = LogisticGradW(pred, target, x) + L2Reg_ratio * W[fid]; 109 | *update_W(fid) += gradW; 110 | #ifdef FM 111 | float* ptr = update_V(fid, 0); 112 | avx_vecScalerAdd(getSumVX(rid, 0), getV(fid, 0), 113 | tmp_vec.data(), -x, factor_cnt); 114 | avx_vecScalerAdd(ptr, tmp_vec.data(), ptr, gradW, factor_cnt); 115 | avx_vecScalerAdd(ptr, getV(fid, 0), ptr, L2Reg_ratio, factor_cnt); 116 | #endif 117 | } 118 | } 119 | 120 | void Train_FM_Algo::ApplyGrad() { 121 | 122 | updater.update(0, this->feature_cnt, W, update_g); 123 | #ifdef FM 124 | float *gradV = update_g + this->feature_cnt; 125 | updater.update(this->feature_cnt, this->feature_cnt * this->factor_cnt, V, gradV); 126 | #endif 127 | } 128 | -------------------------------------------------------------------------------- /LightCTR/train/train_fm_algo.h: -------------------------------------------------------------------------------- 1 | // 2 | // train_fm_algo.h 3 | // LightCTR 4 | // 5 | // Created by SongKuangshi on 2017/9/23. 6 | // Copyright © 2017年 SongKuangshi. All rights reserved. 7 | // 8 | 9 | #ifndef train_fm_algo_h 10 | #define train_fm_algo_h 11 | 12 | #include "../fm_algo_abst.h" 13 | #include 14 | #include 15 | #include "../util/activations.h" 16 | #include "../util/gradientUpdater.h" 17 | #include "../common/thread_pool.h" 18 | #include "../common/lock.h" 19 | using namespace std; 20 | 21 | class Train_FM_Algo : public FM_Algo_Abst { 22 | public: 23 | Train_FM_Algo(string _dataPath, size_t _epoch_cnt, 24 | size_t _factor_cnt): 25 | FM_Algo_Abst(_dataPath, _factor_cnt), epoch_cnt(_epoch_cnt) { 26 | assert(this->feature_cnt != 0); 27 | init(); 28 | threadpool = new ThreadPool(this->proc_cnt); 29 | } 30 | Train_FM_Algo() = delete; 31 | 32 | ~Train_FM_Algo() { 33 | delete [] update_g; 34 | delete threadpool; 35 | threadpool = NULL; 36 | } 37 | 38 | void init(); 39 | void Train(); 40 | 41 | private: 42 | ThreadPool *threadpool; 43 | int proc_data_left; 44 | size_t epoch_cnt; 45 | 46 | size_t learnable_params_cnt; 47 | 48 | void flash(); 49 | 50 | Sigmoid sigmoid; 51 | 52 | void batchGradCompute(size_t, size_t); 53 | void accumWVGrad(size_t, float); 54 | 55 | float *update_g; 56 | inline float* update_W(size_t fid) const { 57 | return &update_g[fid]; 58 | } 59 | inline float* update_V(size_t fid, size_t facid) const { 60 | return &update_g[this->feature_cnt + fid * this->factor_cnt + facid]; 61 | } 62 | void ApplyGrad(); 63 | }; 64 | 65 | #endif /* train_fm_algo_h */ 66 | -------------------------------------------------------------------------------- /LightCTR/train/train_gbm_algo.h: -------------------------------------------------------------------------------- 1 | // 2 | // train_gbm_algo.h 3 | // LightCTR 4 | // 5 | // Created by SongKuangshi on 2017/9/26. 6 | // Copyright © 2017年 SongKuangshi. All rights reserved. 7 | // 8 | 9 | #ifndef train_gbm_algo_h 10 | #define train_gbm_algo_h 11 | 12 | #include 13 | #include 14 | #include 15 | #include "../common/thread_pool.h" 16 | #include "../common/lock.h" 17 | #include "../util/random.h" 18 | #include "../util/activations.h" 19 | #include "../gbm_algo_abst.h" 20 | 21 | class Train_GBM_Algo : public GBM_Algo_Abst { 22 | struct SplitNodeStat_Thread { 23 | float sumGrad, sumHess; 24 | float gain; 25 | float split_threshold; 26 | float last_value_toCheck; 27 | bool dataNAN_go_Right; 28 | int split_feature_index; 29 | SplitNodeStat_Thread() { 30 | gain = 0, split_feature_index = -1, split_threshold = 0; 31 | dataNAN_go_Right = 0; 32 | clear(); 33 | } 34 | inline void clear() { 35 | sumGrad = 0.0f; 36 | sumHess = 0.0f; 37 | last_value_toCheck = 1e-8; 38 | } 39 | inline bool needUpdate(float splitGain, size_t split_index) { 40 | assert(!isnan(splitGain)); 41 | assert(split_index >= 0); 42 | if (split_feature_index <= split_index) { 43 | return splitGain > this->gain; 44 | } else { 45 | return !(this->gain > splitGain); 46 | } 47 | } 48 | }; 49 | public: 50 | Train_GBM_Algo(string _dataPath, size_t _epoch_cnt, size_t _maxDepth, 51 | size_t _minLeafW, size_t _multiclass): 52 | GBM_Algo_Abst(_dataPath, _maxDepth, _minLeafW, _multiclass), epoch_cnt(_epoch_cnt) { 53 | proc_cnt = thread::hardware_concurrency(); 54 | init(); 55 | threadpool = new ThreadPool(this->proc_cnt); 56 | } 57 | Train_GBM_Algo() = delete; 58 | 59 | ~Train_GBM_Algo() { 60 | delete [] sampleDataSetIndex; 61 | delete [] sampleFeatureSetIndex; 62 | delete [] dataRow_LocAtTree; 63 | delete [] splitNodeStat_thread; 64 | } 65 | 66 | void init(); 67 | void Train(); 68 | void flash(RegTreeNode *, size_t); 69 | void findSplitFeature(size_t, size_t, size_t, bool, size_t); 70 | void findSplitFeature_Wrapper(size_t, size_t, size_t, size_t); 71 | 72 | inline void sample() { 73 | memset(sampleDataSetIndex, 0, sizeof(bool) * this->dataRow_cnt); 74 | memset(dataRow_LocAtTree, NULL, sizeof(RegTreeNode*) * this->dataRow_cnt); 75 | for (size_t rid = 0; rid < this->dataRow_cnt; rid++) { 76 | if(SampleBinary(0.7)) 77 | sampleDataSetIndex[rid] = 1; 78 | } 79 | memset(sampleFeatureSetIndex, 0, sizeof(bool) * this->feature_cnt); 80 | for (size_t fid = 0; fid < this->feature_cnt; fid++) { 81 | if(dataSet_feature[fid].size() == 0) 82 | continue; 83 | if(SampleBinary(0.7)) 84 | sampleFeatureSetIndex[fid] = 1; 85 | } 86 | } 87 | 88 | inline float grad(float pred, float label) { 89 | return pred - label; 90 | } 91 | inline float hess(float pred) { 92 | return pred * (1 - pred); 93 | } 94 | inline float weight(float sumGrad, float sumHess) { 95 | return - ThresholdL1(sumGrad, lambda) / (sumHess + lambda); 96 | } 97 | inline float gain(float sumGrad, float sumHess) { 98 | return ThresholdL1(sumGrad, lambda) * ThresholdL1(sumGrad, lambda) / (sumHess + lambda); 99 | } 100 | inline float ThresholdL1(float w, float lambda) { 101 | if (w > +lambda) return w - lambda; 102 | if (w < -lambda) return w + lambda; 103 | return 0.0; 104 | } 105 | 106 | private: 107 | ThreadPool *threadpool; 108 | SpinLock lock; 109 | size_t proc_cnt; 110 | int proc_left; 111 | SplitNodeStat_Thread* splitNodeStat_thread; 112 | 113 | bool* sampleDataSetIndex; 114 | bool* sampleFeatureSetIndex; 115 | RegTreeNode** dataRow_LocAtTree; 116 | size_t epoch_cnt; 117 | 118 | Softmax softmax; 119 | Sigmoid sigmoid; 120 | 121 | float eps_feature_value, lambda, learning_rate; 122 | }; 123 | 124 | #endif /* train_gbm_algo_h */ 125 | -------------------------------------------------------------------------------- /LightCTR/train/train_gmm_algo.cpp: -------------------------------------------------------------------------------- 1 | // 2 | // train_gmm_algo.cpp 3 | // LightCTR 4 | // 5 | // Created by SongKuangshi on 2017/10/13. 6 | // Copyright © 2017年 SongKuangshi. All rights reserved. 7 | // 8 | 9 | #include "train_gmm_algo.h" 10 | #include 11 | #include "../util/random.h" 12 | #include "../common/avx.h" 13 | 14 | #define FOR(i,n) for(size_t i = 0;i < n;i++) 15 | const float PI = acos(-1); 16 | const float Log2PI = log(2 * PI); 17 | 18 | // log(exp(a) + exp(b)) 19 | inline float log_sum(float a, float b) { 20 | const float vmin = std::min(a, b); 21 | const float vmax = std::max(a, b); 22 | if (vmax > vmin + 30) { 23 | return vmax; 24 | } else { 25 | return vmax + std::log(1.0 + std::exp(vmin - vmax)); 26 | } 27 | } 28 | 29 | void Train_GMM_Algo::init() { 30 | gaussModels = new Gauss[cluster_cnt]; 31 | latentVar.resize(dataRow_cnt * cluster_cnt); 32 | FOR(i,cluster_cnt) { 33 | gaussModels[i].mu = new float[feature_cnt]; 34 | gaussModels[i].sigma = new float[feature_cnt]; 35 | memset(gaussModels[i].mu, 0, sizeof(float) * feature_cnt); 36 | FOR(fid, feature_cnt) { 37 | gaussModels[i].mu[fid] = UniformNumRand() - 0.5f; 38 | gaussModels[i].sigma[fid] = 5.0f; 39 | } 40 | gaussModels[i].weight = 1.0f / cluster_cnt; 41 | } 42 | } 43 | 44 | // Log Probability Density Function of Multivariate Gauss Distribution 45 | float Train_GMM_Algo::GaussianLPDF(size_t gasid, size_t rid) { 46 | float expN = 0, LogDetSigma = 0.0f, tmp = 0; 47 | FOR(fid, feature_cnt) { 48 | tmp = dataSet[rid][fid] * scale - gaussModels[gasid].mu[fid]; 49 | expN += tmp * tmp / gaussModels[gasid].sigma[fid]; 50 | LogDetSigma += log(gaussModels[gasid].sigma[fid]); 51 | } 52 | assert(!isnan(expN) && !isinf(expN) && !isnan(LogDetSigma) && !isinf(LogDetSigma)); 53 | tmp = log(gaussModels[gasid].weight) - 0.5 * (expN + LogDetSigma + feature_cnt * Log2PI); 54 | // assert(tmp < 0); 55 | return tmp; 56 | } 57 | 58 | vector* Train_GMM_Algo::Train_EStep() { 59 | FOR(rid,dataRow_cnt) { 60 | float LogSumPDF = 0; 61 | FOR(gasid,cluster_cnt) { 62 | gaussModels[gasid].pdf_tmp = GaussianLPDF(gasid, rid); 63 | if (gasid == 0) { 64 | LogSumPDF = gaussModels[gasid].pdf_tmp; 65 | } else { 66 | LogSumPDF = log_sum(LogSumPDF, gaussModels[gasid].pdf_tmp); 67 | } 68 | } 69 | // Normalization 70 | float expSum = 0; 71 | FOR(gasid,cluster_cnt) { 72 | float tmp = exp(gaussModels[gasid].pdf_tmp - LogSumPDF); 73 | assert(tmp <= 1); 74 | latentVar[rid * cluster_cnt + gasid] = tmp; 75 | expSum += tmp; 76 | } 77 | float* ptr = latentVar.data() + rid * cluster_cnt; 78 | avx_vecScale(ptr, ptr, cluster_cnt, 1.0 / expSum); 79 | } 80 | return &latentVar; 81 | } 82 | 83 | float Train_GMM_Algo::Train_MStep(const vector* latentVar) { 84 | FOR(gasid, cluster_cnt) { 85 | threadpool->addTask([&, gasid]() { 86 | float sumWeight = 0; 87 | FOR(rid,dataRow_cnt) { 88 | sumWeight += latentVar->at(rid * cluster_cnt + gasid); 89 | } 90 | assert(sumWeight > 0 && sumWeight < dataRow_cnt); 91 | gaussModels[gasid].sumRid_tmp = sumWeight; 92 | // update new gauss weight 93 | gaussModels[gasid].weight = sumWeight / dataRow_cnt; 94 | }); 95 | } 96 | threadpool->wait(); 97 | 98 | FOR(gasid, cluster_cnt) { 99 | threadpool->addTask([&, gasid]() { 100 | auto model = gaussModels[gasid]; 101 | // update new gauss mu and sigma 102 | FOR(fid, feature_cnt) { 103 | float sum_mu = 0.0f, sum_sigma = 0.0f; 104 | FOR(rid, dataRow_cnt) { 105 | sum_mu += latentVar->at(rid * cluster_cnt + gasid) * dataSet[rid][fid] * scale; 106 | const float t = dataSet[rid][fid] * scale - model.mu[fid]; 107 | sum_sigma += latentVar->at(rid * cluster_cnt + gasid) * t * t; 108 | } 109 | model.mu[fid] = sum_mu / model.sumRid_tmp; 110 | model.sigma[fid] = sum_sigma / model.sumRid_tmp; 111 | if (model.sigma[fid] < 0.01) { 112 | model.sigma[fid] = 0.01; // avoid detSigma beyand precision 113 | } 114 | } 115 | }); 116 | } 117 | threadpool->wait(); 118 | 119 | // compute log likelihood ELOB 120 | float likelihood = 0.0f; 121 | FOR(rid,dataRow_cnt) { 122 | float tmp = 0.0, raw_log_sum = 0.0; 123 | FOR(gasid,cluster_cnt) { 124 | tmp = GaussianLPDF(gasid, rid); 125 | if (gasid == 0) { 126 | raw_log_sum = tmp; 127 | } else { 128 | raw_log_sum = log_sum(raw_log_sum, tmp); 129 | } 130 | } 131 | likelihood += raw_log_sum; 132 | } 133 | return likelihood; 134 | } 135 | 136 | vector Train_GMM_Algo::Predict() { 137 | vector ans = vector(); 138 | ans.reserve(dataRow_cnt); 139 | FOR(rid,dataRow_cnt) { 140 | int whichTopic = -1; 141 | float maxP = 0.0f, tmp; 142 | FOR(gasid,cluster_cnt) { 143 | tmp = GaussianLPDF(gasid, rid); 144 | if (whichTopic == -1 || tmp > maxP) { 145 | maxP = tmp, whichTopic = (int)gasid; 146 | } 147 | } 148 | ans.emplace_back(whichTopic); 149 | } 150 | return ans; 151 | } 152 | 153 | void Train_GMM_Algo::printArguments() { 154 | ofstream md("./output/gmm_cluster.txt"); 155 | if(!md.is_open()){ 156 | cout<<"save model open file error" << endl; 157 | exit(1); 158 | } 159 | FOR(gasid, cluster_cnt) { 160 | md << "cluster " << gasid << " weight ="; 161 | md << " " << gaussModels[gasid].weight << endl; 162 | md << "cluster " << gasid << " mu ="; 163 | FOR(fid, feature_cnt) { 164 | md << " " << gaussModels[gasid].mu[fid]; 165 | } 166 | md << endl; 167 | md << "cluster " << gasid << " sigma ="; 168 | FOR(fid, feature_cnt) { 169 | md << " " << gaussModels[gasid].sigma[fid]; 170 | } 171 | md << endl; 172 | } 173 | md.close(); 174 | } 175 | 176 | -------------------------------------------------------------------------------- /LightCTR/train/train_gmm_algo.h: -------------------------------------------------------------------------------- 1 | // 2 | // train_gmm_algo.h 3 | // LightCTR 4 | // 5 | // Created by SongKuangshi on 2017/10/13. 6 | // Copyright © 2017年 SongKuangshi. All rights reserved. 7 | // 8 | 9 | #ifndef train_gmm_algo_h 10 | #define train_gmm_algo_h 11 | 12 | #include 13 | #include 14 | #include "../common/thread_pool.h" 15 | #include "../em_algo_abst.h" 16 | using namespace std; 17 | 18 | class Train_GMM_Algo : public EM_Algo_Abst > { 19 | 20 | struct Point { 21 | float* data; 22 | ~Point() { 23 | delete [] data; 24 | } 25 | }; 26 | struct Gauss { 27 | float* mu; 28 | float* sigma; // simple covariance to diagonal matrix 29 | float weight; 30 | float pdf_tmp; 31 | float sumRid_tmp; 32 | Gauss() { 33 | pdf_tmp = sumRid_tmp = 0; 34 | } 35 | }; 36 | public: 37 | Train_GMM_Algo(string _dataFile, size_t _epoch, size_t _cluster_cnt, 38 | size_t _feature_cnt, float _scale = 1.0f): 39 | EM_Algo_Abst(_dataFile, _epoch, _feature_cnt), cluster_cnt(_cluster_cnt), scale(_scale) { 40 | threadpool = new ThreadPool(thread::hardware_concurrency()); 41 | init(); 42 | } 43 | Train_GMM_Algo() = delete; 44 | 45 | ~Train_GMM_Algo() { 46 | for (size_t i = 0; i < cluster_cnt; i++) { 47 | delete [] gaussModels[i].mu; 48 | } 49 | delete [] gaussModels; 50 | delete threadpool; 51 | threadpool = NULL; 52 | } 53 | 54 | void init(); 55 | vector* Train_EStep(); 56 | float Train_MStep(const vector*); 57 | vector Predict(); 58 | 59 | float GaussianLPDF(size_t gasid, size_t rid); 60 | void printArguments(); 61 | 62 | size_t cluster_cnt; 63 | 64 | private: 65 | float scale; 66 | Gauss *gaussModels; 67 | vector latentVar; 68 | 69 | ThreadPool *threadpool; 70 | }; 71 | 72 | #endif /* train_gmm_algo_h */ 73 | -------------------------------------------------------------------------------- /LightCTR/train/train_nfm_algo.h: -------------------------------------------------------------------------------- 1 | // 2 | // train_nfm_algo.h 3 | // LightCTR 4 | // 5 | // Created by SongKuangshi on 2017/11/6. 6 | // Copyright © 2017年 SongKuangshi. All rights reserved. 7 | // 8 | 9 | #ifndef train_nfm_algo_h 10 | #define train_nfm_algo_h 11 | 12 | #include 13 | #include 14 | #include "../fm_algo_abst.h" 15 | #include "layer/fullyconnLayer.h" 16 | 17 | // Wide-Deep Neural Factorization Machine 18 | class Train_NFM_Algo : public FM_Algo_Abst { 19 | 20 | public: 21 | Train_NFM_Algo(string _dataPath, size_t _epoch_cnt, size_t _factor_cnt, 22 | size_t _hidden_layer_size): 23 | FM_Algo_Abst(_dataPath, _factor_cnt), epoch(_epoch_cnt), hidden_layer_size(_hidden_layer_size) { 24 | assert(this->feature_cnt != 0); 25 | threadpool = new ThreadPool(1); 26 | init(); 27 | } 28 | 29 | ~Train_NFM_Algo() { 30 | delete [] update_g; 31 | delete threadpool; 32 | threadpool = NULL; 33 | } 34 | 35 | void init(); 36 | void Train(); 37 | 38 | private: 39 | Train_NFM_Algo() = delete; 40 | 41 | size_t epoch; 42 | size_t batch_size; 43 | 44 | size_t hidden_layer_size; 45 | Fully_Conn_Layer *inputLayer, *outputLayer; 46 | Sigmoid sigmoid; 47 | 48 | size_t learnable_params_cnt; 49 | 50 | void batchGradCompute(size_t, size_t); 51 | void accumWideGrad(size_t, float); 52 | void accumDeepGrad(size_t, const vector&); 53 | 54 | float *update_g; 55 | inline float* update_W(size_t fid) { 56 | return &update_g[fid]; 57 | } 58 | inline float* update_V(size_t fid, size_t facid) { 59 | return &update_g[this->feature_cnt + fid * this->factor_cnt + facid]; 60 | } 61 | void ApplyGrad(); 62 | 63 | float loss; 64 | size_t accuracy; 65 | AdagradUpdater_Num updater; 66 | 67 | ThreadLocal tl_fc_input_Matrix, tl_fc_bp_Matrix; 68 | ThreadLocal > tl_wrapper; 69 | 70 | ThreadPool *threadpool; 71 | }; 72 | 73 | #endif /* train_nfm_algo_h */ 74 | -------------------------------------------------------------------------------- /LightCTR/train/train_rnn_algo.h: -------------------------------------------------------------------------------- 1 | // 2 | // train_rnn_algo.h 3 | // LightCTR 4 | // 5 | // Created by SongKuangshi on 2017/11/9. 6 | // Copyright © 2017年 SongKuangshi. All rights reserved. 7 | // 8 | 9 | #ifndef train_rnn_algo_h 10 | #define train_rnn_algo_h 11 | 12 | #include "../dl_algo_abst.h" 13 | #include "unit/lstm_unit.h" 14 | #include "unit/attention_unit.h" 15 | using namespace std; 16 | 17 | template 18 | class Train_RNN_Algo : public DL_Algo_Abst { 20 | public: 21 | Train_RNN_Algo(string dataPath, size_t _epoch, size_t _feature_cnt, 22 | size_t _hidden_size, size_t _recurrent_cnt, size_t _multiclass_output_cnt = 1): 23 | DL_Algo_Abst( 24 | dataPath, _epoch, _feature_cnt, _hidden_size, _multiclass_output_cnt), 25 | batch_size(_recurrent_cnt), hidden_size(_hidden_size) { 26 | this->dl_algo = RNN; 27 | initNetwork(hidden_size); 28 | } 29 | Train_RNN_Algo() = delete; 30 | 31 | ~Train_RNN_Algo() { 32 | } 33 | 34 | void initNetwork(size_t hidden_size) { 35 | inputLSTM = new LSTM_Unit(28, hidden_size, batch_size); 36 | this->appendNNLayer(inputLSTM); 37 | attentionLayer = 38 | new Attention_Unit(hidden_size, /*fc_hidden*/20, batch_size); 39 | this->appendNNLayer(attentionLayer); 40 | fcLayer = new Fully_Conn_Layer(attentionLayer, hidden_size, 72); 41 | this->appendNNLayer(fcLayer); 42 | this->outputLayer = 43 | new Fully_Conn_Layer(fcLayer, 72, this->multiclass_output_cnt); 44 | this->appendNNLayer(this->outputLayer); 45 | } 46 | 47 | vector& Predict(size_t rid, vector >& dataRow) { 48 | static Matrix* dataRow_Matrix = new Matrix(1, 28); 49 | static Matrix* dataRow_Matrix_fc = new Matrix(1, hidden_size); 50 | static vector tmp; 51 | tmp.resize(1); 52 | tmp[0] = dataRow_Matrix; 53 | 54 | auto begin = dataRow[rid].begin(); 55 | auto end = begin; 56 | FOR(i, batch_size) { 57 | begin = dataRow[rid].begin() + i * 28; 58 | end = dataRow[rid].begin() + (i + 1) * 28; 59 | dataRow_Matrix->pointer()->assign(begin, end); 60 | inputLSTM->forward(tmp); 61 | } 62 | assert(end == dataRow[rid].end()); 63 | 64 | // Attention Unit 65 | auto pred = attentionLayer->forward(inputLSTM->seq_output()); 66 | 67 | assert(pred.size() == hidden_size); 68 | dataRow_Matrix_fc->pointer()->assign(pred.begin(), pred.end()); 69 | tmp[0] = dataRow_Matrix_fc; 70 | return this->fcLayer->forward(tmp); 71 | } 72 | 73 | void BP(size_t rid, const vector& grad) { 74 | assert(GradientUpdater::__global_bTraining); 75 | this->outputLayer->backward(grad); 76 | inputLSTM->backward(attentionLayer->inputDelta()); 77 | } 78 | 79 | void applyBP(size_t epoch) const { 80 | inputLSTM->applyBatchGradient(); 81 | attentionLayer->applyBatchGradient(); 82 | } 83 | 84 | private: 85 | size_t batch_size, hidden_size; 86 | LSTM_Unit* inputLSTM; 87 | Attention_Unit* attentionLayer; 88 | Layer_Base* fcLayer; 89 | }; 90 | 91 | #endif /* train_rnn_algo_h */ 92 | -------------------------------------------------------------------------------- /LightCTR/train/train_tm_algo.h: -------------------------------------------------------------------------------- 1 | // 2 | // train_tm_algo.h 3 | // LightCTR 4 | // 5 | // Created by SongKuangshi on 2017/10/15. 6 | // Copyright © 2017年 SongKuangshi. All rights reserved. 7 | // 8 | 9 | #ifndef train_tm_algo_h 10 | #define train_tm_algo_h 11 | 12 | #include 13 | #include 14 | #include "../common/thread_pool.h" 15 | #include "../em_algo_abst.h" 16 | using namespace std; 17 | 18 | #define FOR(i,n) for(size_t i = 0;i < n;i++) 19 | 20 | #define PLSA 21 | 22 | // Topic Model impl by PLSA and Latent Dirichlet Allocation Algorithm 23 | class Train_TM_Algo : public EM_Algo_Abst > { 24 | public: 25 | Train_TM_Algo(string _dataFile, string _vocabFile, size_t _epoch, 26 | size_t _topic, size_t _words): 27 | EM_Algo_Abst(_dataFile, _epoch, _words), word_cnt(_words), topic_cnt(_topic) { 28 | doc_cnt = this->dataRow_cnt; 29 | threadpool = new ThreadPool(thread::hardware_concurrency()); 30 | init(); 31 | loadVocab(_vocabFile); 32 | } 33 | Train_TM_Algo() = delete; 34 | 35 | ~Train_TM_Algo() { 36 | delete threadpool; 37 | threadpool = NULL; 38 | } 39 | 40 | void init(); 41 | vector* Train_EStep(); 42 | float Train_MStep(const vector*); 43 | 44 | void printArguments(); 45 | vector Predict(); 46 | 47 | size_t word_cnt, topic_cnt, doc_cnt; 48 | vector vocab; 49 | 50 | void loadVocab(string dataPath) { 51 | ifstream fin_; 52 | string line; 53 | char str[128]; 54 | int val, fre; 55 | fin_.open(dataPath, ios::in); 56 | if(!fin_.is_open()){ 57 | cout << "open file error, please run data/proc_text_topic.py first." << endl; 58 | exit(1); 59 | } 60 | while(!fin_.eof()){ 61 | getline(fin_, line); 62 | const char *pline = line.c_str(); 63 | if(sscanf(pline, "%d %s %d", &val, str, &fre) >= 1){ 64 | assert(!isnan(val)); 65 | vocab.emplace_back(string(str)); 66 | } 67 | } 68 | assert(vocab.size() == word_cnt); 69 | } 70 | 71 | ThreadPool *threadpool; 72 | 73 | #ifdef PLSA 74 | vector latentVar; 75 | vector topics_of_docs; 76 | vector words_of_topics; 77 | vector wordCnt_of_doc; 78 | // cache for algorithm 79 | vector latent_word_sum; // word_sum[docid][tid] sum of all words 80 | vector latent_doc_sum; // doc_sum[wid][tid] sum of all docs 81 | vector latent_word_doc_sum; // word_doc_sum[tid] sum of all docs and words 82 | #endif 83 | }; 84 | 85 | #endif /* train_tm_algo_h */ 86 | -------------------------------------------------------------------------------- /LightCTR/train/train_vae_algo.h: -------------------------------------------------------------------------------- 1 | // 2 | // train_vae_algo.h 3 | // LightCTR 4 | // 5 | // Created by SongKuangshi on 2017/10/21. 6 | // Copyright © 2017年 SongKuangshi. All rights reserved. 7 | // 8 | 9 | #ifndef train_vae_algo_h 10 | #define train_vae_algo_h 11 | 12 | #include 13 | #include 14 | #include 15 | #include "../util/loss.h" 16 | #include "layer/layer_abst.h" 17 | #include "layer/convLayer.h" 18 | #include "layer/poolingLayer.h" 19 | #include "layer/sampleLayer.h" 20 | using namespace std; 21 | 22 | // Generative-Models Variational-Autoencoder 23 | template 24 | class Train_VAE_Algo { 25 | 26 | public: 27 | Train_VAE_Algo(string dataPath, size_t _epoch, size_t _feature_cnt, 28 | size_t hidden_size, size_t _gauss_cnt): 29 | epoch(_epoch), feature_cnt(_feature_cnt), gauss_cnt(_gauss_cnt) { 30 | loadDenseDataRow(dataPath); 31 | init(hidden_size, _gauss_cnt); 32 | } 33 | Train_VAE_Algo() = delete; 34 | 35 | ~Train_VAE_Algo() { 36 | delete encodeLayer; 37 | delete decodeLayer; 38 | delete outputLayer; 39 | delete sampleLayer; 40 | } 41 | 42 | void init(size_t hidden_size, size_t gauss_cnt) { 43 | // Find expectation nomal distribution 44 | encodeLayer = new Fully_Conn_Layer(NULL, feature_cnt, hidden_size); 45 | Fully_Conn_Layer* hidden1 = 46 | new Fully_Conn_Layer(encodeLayer, hidden_size, gauss_cnt * 2); 47 | // sample 48 | sampleLayer = new Sample_Layer(hidden1, gauss_cnt * 2); 49 | decodeLayer = new Fully_Conn_Layer(sampleLayer, gauss_cnt, hidden_size); 50 | // tuning parameters to Maximize Likelihood 51 | outputLayer = 52 | new Fully_Conn_Layer(decodeLayer, hidden_size, feature_cnt); 53 | } 54 | 55 | void Train() { 56 | static Matrix* dataRow_Matrix = new Matrix(1, feature_cnt); 57 | static Matrix* grad_Matrix = new Matrix(1, feature_cnt); 58 | static vector tmp(1); 59 | 60 | for (size_t p = 0; p < epoch; p++) { 61 | 62 | GradientUpdater::__global_bTraining = true; 63 | 64 | // Mini-Batch SGD 65 | for (size_t rid = 0; rid < dataRow_cnt; rid++) { 66 | dataRow_Matrix->pointer()->assign(dataSet[rid].begin(), dataSet[rid].end()); 67 | tmp[0] = dataRow_Matrix; 68 | vector& pred = this->encodeLayer->forward(tmp); 69 | outputActivFun.forward(pred.data(), pred.size()); 70 | assert(pred.size() == feature_cnt); 71 | 72 | lossFun.gradient(pred.data(), dataSet[rid].data(), 73 | grad_Matrix->reference().data(), grad_Matrix->size()); 74 | outputActivFun.backward(grad_Matrix->reference().data(), pred.data(), 75 | grad_Matrix->reference().data(), grad_Matrix->size()); 76 | 77 | // if LossFunction is Logistic, annotation last line 78 | tmp[0] = grad_Matrix; 79 | this->outputLayer->backward(tmp); 80 | if ((rid + 1) % GradientUpdater::__global_minibatch_size == 0) { 81 | this->encodeLayer->applyBatchGradient(); 82 | } 83 | } 84 | if (p % 2 == 0) { 85 | 86 | GradientUpdater::__global_bTraining = false; 87 | 88 | // Validate Loss 89 | float loss = 0.0f; 90 | for (size_t rid = 0; rid < dataRow_cnt; rid+=2) { 91 | dataRow_Matrix->pointer()->assign(dataSet[rid].begin(), dataSet[rid].end()); 92 | tmp[0] = dataRow_Matrix; 93 | vector pred = this->encodeLayer->forward(tmp); 94 | outputActivFun.forward(pred.data(), pred.size()); 95 | loss += lossFun.loss(pred.data(), dataSet[rid].data(), pred.size()); 96 | } 97 | printf("Epoch %zu Loss = %f\n", p, loss); 98 | } 99 | } 100 | } 101 | 102 | vector* encode(vector* input) { 103 | assert(input->size() == feature_cnt); 104 | sampleLayer->bEncoding = true; 105 | vector *encode = this->encodeLayer->forward(input); 106 | sampleLayer->bEncoding = false; 107 | assert(encode->size() == gauss_cnt); 108 | return encode; 109 | } 110 | 111 | void saveModel(size_t epoch) { 112 | 113 | } 114 | 115 | void loadDenseDataRow(string dataPath) { 116 | dataSet.clear(); 117 | 118 | ifstream fin_; 119 | string line; 120 | int nchar, y; 121 | int val, fid = 0; 122 | fin_.open(dataPath, ios::in); 123 | if(!fin_.is_open()){ 124 | cout << "open file error!" << endl; 125 | exit(1); 126 | } 127 | 128 | while(!fin_.eof()){ 129 | vector tmp; 130 | tmp.resize(feature_cnt); 131 | getline(fin_, line); 132 | fill(tmp.begin(), tmp.end(), 0); 133 | const char *pline = line.c_str(); 134 | if(sscanf(pline, "%d%n", &y, &nchar) >= 1){ 135 | pline += nchar + 1; 136 | fid = 0; 137 | while(pline < line.c_str() + (int)line.length() && 138 | sscanf(pline, "%d%n", &val, &nchar) >= 1){ 139 | pline += nchar + 1; 140 | if (*pline == ',') 141 | pline += 1; 142 | if (val != 0) { 143 | tmp[fid] = val / 255.0; 144 | } 145 | fid++; 146 | if (fid >= feature_cnt) { 147 | break; 148 | } 149 | } 150 | dataSet.emplace_back(tmp); 151 | if (dataSet.size() > 200) { 152 | break; 153 | } 154 | } 155 | } 156 | this->dataRow_cnt = this->dataSet.size(); 157 | assert(this->dataRow_cnt > 0); 158 | } 159 | 160 | private: 161 | LossFunction lossFun; 162 | Sigmoid outputActivFun; 163 | 164 | size_t epoch; 165 | Fully_Conn_Layer *encodeLayer, *decodeLayer, *outputLayer; 166 | Sample_Layer *sampleLayer; 167 | 168 | size_t dataRow_cnt, feature_cnt, gauss_cnt; 169 | vector > dataSet; 170 | }; 171 | 172 | #endif /* train_vae_algo_h */ 173 | -------------------------------------------------------------------------------- /LightCTR/train/unit/attention_unit.h: -------------------------------------------------------------------------------- 1 | // 2 | // attention_unit.h 3 | // LightCTR 4 | // 5 | // Created by SongKuangshi on 2017/11/2. 6 | // Copyright © 2017年 SongKuangshi. All rights reserved. 7 | // 8 | 9 | #ifndef attention_unit_h 10 | #define attention_unit_h 11 | 12 | #include 13 | #include "../../util/matrix.h" 14 | #include "../layer/fullyconnLayer.h" 15 | 16 | // Attention-based Encoder-Decoder build a RNN that has alignment attention 17 | template 18 | class Attention_Unit : public Layer_Base { 19 | public: 20 | Attention_Unit(size_t _dimension, size_t _hidden_size, size_t _recurrent_cnt): 21 | Layer_Base(NULL, _recurrent_cnt, _dimension), dimension(_dimension), batch_size(_recurrent_cnt) { 22 | this->activeFun = new ActivationFunction(); 23 | 24 | printf("Attention-based Unit\n"); 25 | // alpha transform is computed by DxH and Hx1 fc Layer 26 | printf("-- Attention Inner FC-1 "); 27 | transformFunc = new Fully_Conn_Layer(NULL, _dimension, _hidden_size); 28 | transformFunc->needInputDelta = true; 29 | printf("-- Attention Inner FC-2 "); 30 | transformFunc_bp = new Fully_Conn_Layer(transformFunc, _hidden_size, 1); 31 | } 32 | Attention_Unit() = delete; 33 | 34 | ~Attention_Unit() { 35 | delete transformFunc_bp; 36 | delete transformFunc; 37 | } 38 | 39 | // Attention input data should be data concating rnn encoder output sequence, rather than one cell's output 40 | vector& forward(const vector& prevLOutputMatrix) { 41 | assert(prevLOutputMatrix.size() == batch_size); 42 | 43 | // init threadlocal var 44 | MatrixArr& input = *tl_input; 45 | input.arr.resize(batch_size); 46 | Matrix& attentionOutput = *tl_attentionOutput; 47 | attentionOutput.reset(1, dimension); 48 | 49 | Matrix& fc_output_act = *tl_fc_output_act; 50 | fc_output_act.reset(1, batch_size); 51 | 52 | Matrix* cache = NULL; 53 | 54 | vector& wrapper = *tl_wrapper; 55 | wrapper.resize(1); 56 | 57 | FOR(idx, prevLOutputMatrix.size()) { 58 | input.arr[idx] = prevLOutputMatrix[idx]->copy(input.arr[idx]); // 1xD 59 | assert(input.arr[idx]->size() == dimension); 60 | wrapper[0] = input.arr[idx]; 61 | auto res = transformFunc->forward(wrapper); 62 | assert(res.size() == 1); 63 | *fc_output_act.getEle(0, idx) = res[0]; 64 | } 65 | // Softmax normalization 66 | softmax.forward(fc_output_act.pointer()->data(), fc_output_act.size()); 67 | 68 | attentionOutput.zeroInit(); 69 | FOR(idx, prevLOutputMatrix.size()) { 70 | cache = input.arr[idx]->copy(cache)->scale(*fc_output_act.getEle(0, idx)); 71 | attentionOutput.add(cache); 72 | } 73 | delete cache; 74 | return attentionOutput.reference(); 75 | } 76 | 77 | void backward(const vector& outputDeltaMatrix) { 78 | Matrix* outputDelta = outputDeltaMatrix[0]; 79 | assert(outputDelta->size() == this->output_dimension); 80 | 81 | // init threadlocal var 82 | MatrixArr& input = *tl_input; 83 | Matrix& fc_output_act = *tl_fc_output_act; 84 | 85 | vector& wrapper = *tl_wrapper; 86 | vector& scaleDelta = *tl_scaleDelta; 87 | scaleDelta.resize(batch_size); 88 | MatrixArr& input_delta = *tl_input_delta; 89 | input_delta.arr.resize(batch_size); 90 | Matrix* cache_bp = new Matrix(1, 1); 91 | Matrix* cache = NULL; 92 | 93 | FOR(idx, input.arr.size()) { 94 | // update softmax_fc by delta of softmax_fc(X) 95 | auto res = input.arr[idx]->Multiply(cache_bp, outputDelta->transpose()); 96 | outputDelta->transpose(); // recover 97 | assert(res->size() == 1); 98 | scaleDelta[idx] = *cache_bp->getEle(0, 0); 99 | } 100 | softmax.backward(scaleDelta.data(), fc_output_act.pointer()->data(), 101 | scaleDelta.data(), scaleDelta.size()); 102 | // update transformFunc 103 | FOR(idx, input.arr.size()) { 104 | *cache_bp->getEle(0, 0) = scaleDelta[idx]; 105 | wrapper[0] = cache_bp; 106 | transformFunc_bp->backward(wrapper); 107 | // input delta of transformFunc 108 | const Matrix& delta = transformFunc->inputDelta(); 109 | input_delta.arr[idx] = delta.copy(input_delta.arr[idx]); 110 | } 111 | // pass back delta of X 112 | FOR(idx, input.arr.size()) { 113 | cache = outputDelta->copy(cache)->scale(*fc_output_act.getEle(0, idx)); 114 | input_delta.arr[idx]->add(cache); 115 | } 116 | delete cache_bp; 117 | delete cache; 118 | } 119 | 120 | const vector& output() { 121 | Matrix& attentionOutput = *tl_attentionOutput; 122 | vector& wrapper = *tl_wrapper; 123 | wrapper[0] = &attentionOutput; 124 | return wrapper; 125 | } 126 | const vector& inputDelta() { 127 | MatrixArr& input_delta = *tl_input_delta; 128 | return input_delta.arr; 129 | } 130 | 131 | void applyBatchGradient() { 132 | transformFunc->applyBatchGradient(); 133 | if (nextLayer) { 134 | nextLayer->applyBatchGradient(); 135 | } 136 | } 137 | 138 | private: 139 | Fully_Conn_Layer *transformFunc, *transformFunc_bp; 140 | Softmax softmax; 141 | size_t batch_size, dimension; 142 | 143 | ThreadLocal tl_input; 144 | ThreadLocal tl_attentionOutput; 145 | ThreadLocal tl_fc_output_act; 146 | 147 | ThreadLocal > tl_scaleDelta; 148 | ThreadLocal tl_input_delta; 149 | 150 | ThreadLocal > tl_wrapper; 151 | }; 152 | 153 | #endif /* attention_unit_h */ 154 | -------------------------------------------------------------------------------- /LightCTR/util/activations.h: -------------------------------------------------------------------------------- 1 | // 2 | // activation.h 3 | // LightCTR 4 | // 5 | // Created by SongKuangshi on 2017/10/20. 6 | // Copyright © 2017年 SongKuangshi. All rights reserved. 7 | // 8 | 9 | #ifndef activation_h 10 | #define activation_h 11 | 12 | #include 13 | #include 14 | #include 15 | #include "assert.h" 16 | #include "../common/avx.h" 17 | using namespace std; 18 | 19 | class Activation { 20 | public: 21 | virtual void forward(float* input, size_t len) = 0; 22 | virtual void backward(const float* delta, const float* forward_output, float* to, size_t len) = 0; 23 | }; 24 | 25 | class Identity : public Activation { 26 | public: 27 | inline void forward(float* input, size_t len) { 28 | return; 29 | } 30 | inline void backward(const float* delta, const float* forward_output, float* to, size_t len) { 31 | for (size_t i = 0; i < len; i++) { 32 | to[i] = delta[i]; 33 | } 34 | } 35 | }; 36 | 37 | class Binary_Sigmoid : public Activation { 38 | // used in forward process of Binary Neural Network 39 | public: 40 | inline float forward(float input) { 41 | const float res = (input + 1.0f) / 2.0f; 42 | return fmax(0.0f, fmin(1.0f, res)); // clip to [0, 1] 43 | } 44 | inline void forward(float* input, size_t len) { 45 | float scaler = 0.0f; 46 | for (size_t i = 0; i < len; i++) { 47 | scaler += fabs(input[i]); // accumulate of L1-norm 48 | } 49 | scaler /= len; 50 | for (size_t i = 0; i < len; i++) { 51 | const float sign = input[i] > 0 ? 1 : -1; 52 | input[i] *= scaler * sign; 53 | } 54 | } 55 | inline void backward(const float* delta, const float* foutput, float* to, size_t len) { 56 | // standard backward propagation except binary weight 57 | for (size_t i = 0; i < len; i++) { 58 | to[i] = delta[i]; 59 | } 60 | } 61 | }; 62 | 63 | class Sigmoid : public Activation { 64 | public: 65 | inline float forward(float input) const { 66 | if(input < -16){ 67 | return 1e-7; 68 | } else if(input > 16) { 69 | return 1.0 - 1e-7; 70 | } 71 | return 1.0f / (1.0f + exp(-input)); 72 | } 73 | inline void forward(float* input, size_t len) { 74 | for (size_t i = 0; i < len; i++) { 75 | if(input[i] < -16){ 76 | input[i] = 1e-7; 77 | } else if(input[i] > 16) { 78 | input[i] = 1.0 - 1e-7; 79 | } else { 80 | input[i] = 1.0f / (1.0f + exp(- input[i])); 81 | } 82 | assert(!isnan(input[i])); 83 | } 84 | } 85 | inline void backward(const float* delta, const float* foutput, float* to, size_t len) { 86 | for (size_t i = 0; i < len; i++) { 87 | to[i] = delta[i] * foutput[i] * (1.0f - foutput[i]); 88 | assert(!isnan(to[i])); 89 | } 90 | } 91 | }; 92 | 93 | class Softmax : public Activation { 94 | public: 95 | Softmax(float _softTargetRate = 1.0f) : softTargetRate(_softTargetRate) { 96 | } 97 | inline size_t forward_max(const float* input, size_t len) const { 98 | return std::max_element(input, input + len) - input; 99 | } 100 | inline void forward(float* input, size_t len) { 101 | float sum = 0.0f; 102 | auto maxV = *max_element(input, input + len); 103 | // for numerical stability overflow 104 | for (size_t i = 0; i < len; i++) { 105 | sum += exp((input[i] - maxV) / softTargetRate); 106 | } 107 | for (size_t i = 0; i < len; i++) { 108 | input[i] = exp((input[i] - maxV) / softTargetRate) / sum; 109 | if (input[i] == 0) { 110 | input[i] = 1e-7; 111 | } else if (input[i] == 1) { 112 | input[i] = 1.0 - 1e-7; 113 | } 114 | } 115 | } 116 | inline void backward(const float* delta, const float* foutput, float* to, size_t len) { 117 | // softmax Derivative (whether i == j) * softmax(input[i]) - softmax(input[i]) * softmax(input[j]) 118 | // each derivative of Z_(L) = sum_i( delta_(i) * -forward_output_(i) ) * forward_output_(L) 119 | // + delta_(L) * forward_output_(L) 120 | float sum = avx_dotProduct(delta, foutput, len); 121 | avx_vecAdd(delta, -sum, to, len); 122 | avx_vecScale(to, to, len, foutput); 123 | avx_vecScale(to, to, len, 1.0 / softTargetRate); 124 | } 125 | private: 126 | // used in distillation soft target softmax, when larger than 1 makes smooth classification 127 | float softTargetRate; 128 | }; 129 | 130 | class Tanh : public Activation { 131 | public: 132 | inline void forward(float* input, size_t len) { 133 | float t1, t2; 134 | for (size_t i = 0; i < len; i++) { 135 | t1 = exp(input[i]), t2 = exp(- input[i]); 136 | input[i] = (t1 - t2) / (t1 + t2); 137 | } 138 | } 139 | inline void backward(const float* delta, const float* foutput, float* to, size_t len) { 140 | for (size_t i = 0; i < len; i++) { 141 | to[i] = delta[i] * (1.0f - foutput[i] * foutput[i]); 142 | } 143 | } 144 | }; 145 | 146 | class ReLU : public Activation { // Local Response Normalization 147 | public: 148 | inline void forward(float* input, size_t len) { 149 | for (size_t i = 0; i < len; i++) { 150 | if (input[i] < 0.0f) { 151 | input[i] = 0.0f; // negative slope is 0 152 | } 153 | } 154 | } 155 | inline void backward(const float* delta, const float* foutput, float* to, size_t len) { 156 | for (size_t i = 0; i < len; i++) { 157 | if (foutput[i] == 0.0f) { 158 | to[i] = 0.0f; 159 | } else { 160 | to[i] = delta[i]; 161 | } 162 | } 163 | } 164 | }; 165 | 166 | class SoftPlus : public Activation { 167 | public: 168 | inline void forward(float* input, size_t len) { 169 | for (size_t i = 0; i < len; i++) { 170 | input[i] = log(1 + exp(input[i])); 171 | } 172 | } 173 | inline void backward(const float* delta, const float* foutput, float* to, size_t len) { 174 | float t; 175 | for (size_t i = 0; i < len; i++) { 176 | t = exp(foutput[i]); 177 | to[i] = delta[i] * (t - 1) / t; 178 | } 179 | } 180 | }; 181 | 182 | #endif /* activation_h */ 183 | -------------------------------------------------------------------------------- /LightCTR/util/ensembling.h: -------------------------------------------------------------------------------- 1 | // 2 | // ensembling.h 3 | // LightCTR 4 | // 5 | // Created by SongKuangshi on 2018/12/3. 6 | // Copyright © 2018 SongKuangshi. All rights reserved. 7 | // 8 | 9 | #ifndef ensembling_h 10 | #define ensembling_h 11 | 12 | #include 13 | #include 14 | #include 15 | #include 16 | 17 | // Hard majority voting 18 | // Weighted Average Probabilities 19 | class Voting { 20 | public: 21 | Voting(bool _is_prob_avg_voting) { 22 | is_prob_avg_voting = _is_prob_avg_voting; 23 | } 24 | 25 | std::shared_ptr > final_result(vector >& sub_results) { 26 | assert(sub_results.size() > 0 && sub_results[0].size() > 0); 27 | vector res; 28 | res.resize(sub_results[0].size()); 29 | 30 | if (is_prob_avg_voting) { 31 | for (size_t i = 0; i < sub_results[0].size(); i++) { 32 | res[i] = 0; 33 | for (size_t j = 0; j < sub_results.size(); j++) { 34 | res[i] += sub_results[j][i]; 35 | } 36 | res[i] /= sub_results.size(); 37 | } 38 | } else { 39 | for (size_t i = 0; i < sub_results.size(); i++) { 40 | const size_t index = std::distance(sub_results[i].begin(), 41 | std::max_element(sub_results[i].begin(), sub_results[i].end()) 42 | ); 43 | assert(index < sub_results[0].size()); 44 | res[index]++; 45 | } 46 | } 47 | return std::make_shared >(res); 48 | } 49 | 50 | private: 51 | bool is_prob_avg_voting; 52 | }; 53 | 54 | // AdaBoost 55 | class AdaBoost { 56 | public: 57 | explicit AdaBoost(size_t _sample_cnt): sample_cnt(_sample_cnt) { 58 | weights = new float[_sample_cnt]; 59 | const float init_w = 1.0 / _sample_cnt; 60 | for (size_t i = 0; i < _sample_cnt; i++) { 61 | *(weights + i) = init_w; 62 | } 63 | } 64 | 65 | ~AdaBoost() { 66 | delete[] weights; 67 | _model_weights.clear(); 68 | } 69 | 70 | std::shared_ptr ensembling_weak_model(std::vector& pred_correct_mask) { 71 | float err_rate = 0.; 72 | for (size_t i = 0; i < sample_cnt; i++) { 73 | if (pred_correct_mask[i] == false) 74 | err_rate += 1.; 75 | } 76 | err_rate /= sample_cnt; 77 | 78 | float alpha = model_weighting(err_rate); 79 | _model_weights.emplace_back(alpha); 80 | 81 | float reweighting = std::exp(alpha); 82 | for (size_t i = 0; i < sample_cnt; i++) { 83 | if (pred_correct_mask[i] == false) { 84 | *(weights + i) *= reweighting; 85 | } else { 86 | *(weights + i) /= reweighting; 87 | } 88 | } 89 | return std::make_shared(*weights); 90 | } 91 | 92 | const vector& model_weights() { 93 | return _model_weights; 94 | } 95 | 96 | private: 97 | inline float model_weighting(float err_rate){ 98 | if (err_rate < 1e-4) { 99 | return 1000; // strongly outstanding 100 | } 101 | // calculate new weight 102 | return 0.5 * std::log((1 - err_rate) / err_rate); 103 | } 104 | 105 | size_t sample_cnt; 106 | float* weights = NULL; 107 | std::vector _model_weights; 108 | }; 109 | 110 | 111 | #endif /* ensembling_h */ 112 | -------------------------------------------------------------------------------- /LightCTR/util/evaluator.h: -------------------------------------------------------------------------------- 1 | // 2 | // evaluator.h 3 | // LightCTR 4 | // 5 | // Created by SongKuangshi on 2017/11/10. 6 | // Copyright © 2017年 SongKuangshi. All rights reserved. 7 | // 8 | 9 | #ifndef evaluator_h 10 | #define evaluator_h 11 | 12 | #include 13 | 14 | struct EvalInfo { 15 | // true positives, judge label=1 rightly 16 | float TP; 17 | // true negatives, judge label=0 rightly 18 | float TN; 19 | // false positives 20 | float FP; 21 | // false negatives 22 | float FN; 23 | 24 | EvalInfo() : TP(0.0), TN(0.0), FP(0.0), FN(0.0) {} 25 | }; 26 | 27 | inline static float Precision(float TP, float FP) { 28 | if (TP > 0.0 || FP > 0.0) { 29 | return TP / (TP + FP); 30 | } else { 31 | return 1.0; 32 | } 33 | } 34 | 35 | inline static float Recall(float TP, float FN) { 36 | if (TP > 0.0 || FN > 0.0) { 37 | return TP / (TP + FN); 38 | } else { 39 | return 1.0; 40 | } 41 | } 42 | 43 | inline static float F1Score(float precision, float recall) { 44 | if (precision > 0.0 || recall > 0.0) { 45 | return 2.0f * precision * recall / (precision + recall); 46 | } else { 47 | return 0; 48 | } 49 | } 50 | 51 | class AucEvaluator { 52 | public: 53 | AucEvaluator() { 54 | PosNum = new int[kHashLen + 1]; 55 | NegNum = new int[kHashLen + 1]; 56 | } 57 | ~AucEvaluator() { 58 | delete [] PosNum; 59 | delete [] NegNum; 60 | } 61 | void init(const vector* pCTR, const vector* label) { 62 | assert(pCTR->size() == label->size()); 63 | memset(PosNum, 0, sizeof(int) * (kHashLen + 1)); 64 | memset(NegNum, 0, sizeof(int) * (kHashLen + 1)); 65 | 66 | for (size_t i = 0; i < pCTR->size(); i++) { 67 | size_t index = pCTR->at(i) * kHashLen; 68 | if (label->at(i) == 1) { // Positive 69 | PosNum[index]++; 70 | } else { 71 | NegNum[index]++; 72 | } 73 | } 74 | } 75 | float Auc() { 76 | float totPos = 0.0, totNeg = 0.0; 77 | float totPosPrev = 0.0, totNegPrev = 0.0; 78 | float auc = 0.0; 79 | 80 | int64_t idx = kHashLen; 81 | while (idx >= 0) { 82 | totPosPrev = totPos; 83 | totNegPrev = totNeg; 84 | totPos += PosNum[idx]; 85 | totNeg += NegNum[idx]; 86 | auc += trapezoidArea(totNeg, totNegPrev, totPos, totPosPrev); 87 | --idx; 88 | } 89 | if (totPos > 0.0 && totNeg > 0.0) { 90 | return auc / totPos / totNeg; 91 | } else { 92 | return 0.0; 93 | } 94 | } 95 | private: 96 | inline float trapezoidArea(float X1, float X2, 97 | float Y1, float Y2) { 98 | return (X1 > X2 ? (X1 - X2) : (X2 - X1)) * (Y1 + Y2) / 2.0; 99 | } 100 | 101 | const size_t kHashLen = (1 << 24) - 1; 102 | int *PosNum, *NegNum; 103 | }; 104 | 105 | #endif /* evaluator_h */ 106 | -------------------------------------------------------------------------------- /LightCTR/util/loss.h: -------------------------------------------------------------------------------- 1 | // 2 | // loss.h 3 | // LightCTR 4 | // 5 | // Created by SongKuangshi on 2017/10/20. 6 | // Copyright © 2017年 SongKuangshi. All rights reserved. 7 | // 8 | 9 | #ifndef loss_h 10 | #define loss_h 11 | 12 | #include 13 | #include 14 | #include "assert.h" 15 | using namespace std; 16 | 17 | template 18 | class Loss { 19 | public: 20 | virtual T loss(const T* pred, const L* label, size_t len) const = 0; 21 | virtual void gradient(const T* pred, const L* label, T* gradient, size_t len) = 0; 22 | }; 23 | 24 | template 25 | class Square : public Loss { // Mean Squared Error 26 | public: 27 | T loss(const T* pred, const L* label, size_t len) const { 28 | T sum = 0.0f, tmp; 29 | for (size_t i = 0; i < len; i++) { 30 | tmp = pred[i] - label[i]; 31 | sum += tmp / 2 * tmp; 32 | } 33 | return sum; 34 | } 35 | void gradient(const T* pred, const L* label, T* gradient, size_t len) { 36 | for (size_t i = 0; i < len; i++) { 37 | gradient[i] = pred[i] - label[i]; 38 | } 39 | } 40 | }; 41 | 42 | template 43 | class Logistic : public Loss { 44 | public: 45 | T loss(const T* pred, const L* label, size_t len) const { 46 | T sum = 0.0f, p, l; 47 | for (size_t i = 0; i < len; i++) { 48 | p = pred[i]; 49 | l = label[i]; 50 | sum += (l - (p >= 0)) * p - log(1.0f + exp(p - 2.0f * (p >= 0) * p)); 51 | // sum += label->at(i) * log(pred->at(i)) + (1.0f - label->at(i)) * log(1.0f - pred->at(i)); 52 | } 53 | assert(!isnan(sum)); 54 | return sum; 55 | } 56 | void gradient(const T* pred, const L* label, T* gradient, size_t len) { 57 | // Notice output activator must be sigmoid 58 | for (size_t i = 0; i < len; i++) { 59 | gradient[i] = pred[i] - label[i]; 60 | } 61 | } 62 | }; 63 | 64 | template 65 | class Logistic_Softmax : public Loss { 66 | public: 67 | T loss(const T* pred, const L* label, size_t len) const { 68 | T sum = 0.0f; 69 | for (size_t i = 0; i < len; i++) { 70 | if (label[i] == 1) { 71 | sum += log(pred[i]); 72 | } 73 | } 74 | assert(!isnan(sum)); 75 | return sum; 76 | } 77 | void gradient(const T* pred, const L* label, T* gradient, size_t len) { 78 | for (size_t i = 0; i < len; i++) { 79 | if (label[i] == 1) { 80 | gradient[i] = 1.0f - pred[i]; 81 | } else { 82 | gradient[i] = - pred[i]; 83 | } 84 | } 85 | } 86 | }; 87 | 88 | #endif /* loss_h */ 89 | -------------------------------------------------------------------------------- /LightCTR/util/pca.h: -------------------------------------------------------------------------------- 1 | // 2 | // pca.h 3 | // LightCTR 4 | // 5 | // Created by SongKuangshi on 2018/5/4. 6 | // Copyright © 2018年 SongKuangshi. All rights reserved. 7 | // 8 | 9 | #ifndef pca_h 10 | #define pca_h 11 | 12 | #include "matrix.h" 13 | 14 | // Functions for principal component analysis 15 | class PCA { 16 | public: 17 | PCA(float _learning_rate, int _maxIters, int _neuronsNum, int _featureSize) { 18 | trainingData = NULL; 19 | learning_rate = _learning_rate; 20 | maxIters = _maxIters; 21 | neuronsNum = _neuronsNum; 22 | featureSize = _featureSize; 23 | 24 | weightsTmp = new Matrix(featureSize, neuronsNum); 25 | // Initializing Random weights for the first iteration 26 | weights = new Matrix(featureSize, neuronsNum); 27 | weights->randomInit(); 28 | } 29 | 30 | void loadMatrix(Matrix* _trainingData) { 31 | trainingData = _trainingData; 32 | } 33 | 34 | void Train() { 35 | assert(trainingData != NULL); 36 | // PCA trained by Generalized Hebbian Neuron 37 | for (int epoch = 0; epoch < maxIters; epoch++) 38 | { 39 | output = trainingData->Multiply(output, weights); 40 | weights->copy(weightsTmp); 41 | 42 | for (int row = 0; row < output->x_len; row++) { 43 | // each sample data 44 | for (int nid = 0; nid < neuronsNum; nid++) { 45 | for (int fid = 0; fid < featureSize; fid++) { 46 | // update each weight 47 | float sumTerm = getSum(row, nid, fid); 48 | *weights->getEle(fid, nid) += learning_rate * *output->getEle(row, nid) 49 | * (*trainingData->getEle(row, fid) - sumTerm); 50 | } 51 | } 52 | } 53 | 54 | if (weights->checkConvergence(weightsTmp)) { 55 | // if convergence then stop training 56 | printf("convergence in %d epoch", epoch); 57 | return; 58 | } 59 | } 60 | printf("[WARNING] stop training in %d epoch", maxIters); 61 | } 62 | 63 | Matrix* reduceDimension(Matrix* input, size_t reserve_pc_cnt = 1) { 64 | size_t orig = weights->y_len; 65 | weights->y_len = reserve_pc_cnt; 66 | output = input->Multiply(output, weights); 67 | weights->y_len = orig; 68 | return output; 69 | } 70 | 71 | Matrix* remove_pc(Matrix* input, size_t remove_pc_cnt = 1) { 72 | // V = V - (V * U) * U^T 73 | size_t orig = weights->y_len; 74 | weights->y_len = remove_pc_cnt; 75 | Matrix* lowDimentionM = NULL; 76 | lowDimentionM = input->Multiply(lowDimentionM, weights); 77 | output = lowDimentionM->Multiply(output, weights->transpose()); 78 | output->add(input, 1, -1); 79 | weights->y_len = orig; 80 | 81 | return output; 82 | } 83 | 84 | void saveModel(size_t epoch) { 85 | } 86 | 87 | private: 88 | float getSum(int row, int nid, int fid) { 89 | float sum = 0; 90 | for (int i = 0; i <= nid; i++) { 91 | assert(!isnan(*output->getEle(row, i))); 92 | sum += *output->getEle(row, i) * *weightsTmp->getEle(fid, i); 93 | } 94 | return sum; 95 | } 96 | 97 | float learning_rate; 98 | int maxIters; 99 | int neuronsNum, featureSize; 100 | 101 | Matrix* trainingData; 102 | 103 | Matrix* weights = NULL; 104 | Matrix* weightsTmp = NULL; 105 | Matrix* output = NULL; 106 | }; 107 | 108 | #endif /* pca_h */ 109 | -------------------------------------------------------------------------------- /LightCTR/util/quantile_compress.h: -------------------------------------------------------------------------------- 1 | // 2 | // quantile_compress.h 3 | // LightCTR 4 | // 5 | // Created by SongKuangshi on 2018/5/4. 6 | // Copyright © 2018年 SongKuangshi. All rights reserved. 7 | // 8 | 9 | #ifndef quantile_compress_h 10 | #define quantile_compress_h 11 | 12 | #include 13 | #include 14 | #include "significance.h" 15 | 16 | enum QuantileType { 17 | UNIFORM = 0, 18 | LOG, 19 | NORMAL_DISTRIBUT, // parameters usually obey the normal law 20 | CUSTOM_DISTRIBUT 21 | }; 22 | 23 | template 24 | class QuantileCompress { 25 | public: 26 | QuantileCompress(QuantileType _quantileType, RealT _min, RealT _max, 27 | RealT _mu = 0, RealT _sigma = 1) : 28 | quantileType(_quantileType), min(_min), max(_max), mu(_mu), sigma(_sigma) { 29 | assert(_min < _max); 30 | init(); 31 | } 32 | // Disable the copy and assignment operator 33 | QuantileCompress(const QuantileCompress &) = delete; 34 | QuantileCompress(QuantileCompress &&) = delete; 35 | QuantileCompress &operator=(const QuantileCompress &) = delete; 36 | QuantileCompress &operator=(QuantileCompress &&) = delete; 37 | 38 | void compress(const RealT *input, const int len, CompressT *output) { 39 | std::transform(input, input + len, 40 | output, 41 | std::bind( 42 | &QuantileCompress::encoding, 43 | this, 44 | std::placeholders::_1 45 | ) 46 | ); 47 | } 48 | void extract(const CompressT *input, const int len, RealT *output) { 49 | std::transform(input, input + len, 50 | output, 51 | std::bind( 52 | &QuantileCompress::decoding, 53 | this, 54 | std::placeholders::_1 55 | ) 56 | ); 57 | } 58 | 59 | private: 60 | RealT convert(RealT x) { 61 | if (quantileType == QuantileType::LOG) { 62 | x = log(x); 63 | } else if (quantileType == QuantileType::NORMAL_DISTRIBUT) { 64 | x = StandardCDF(x); 65 | } else if (quantileType == QuantileType::CUSTOM_DISTRIBUT) { 66 | x = CustomCDF(x, mu, sigma); 67 | } 68 | return x; 69 | } 70 | 71 | void init() { 72 | if (quantileType == QuantileType::LOG) { 73 | assert(-min == max); 74 | minCDF = convert(1e-4), maxCDF = convert(max); // fix min if quantile by log 75 | } else { 76 | minCDF = convert(min), maxCDF = convert(max); 77 | } 78 | assert(maxCDF > minCDF); 79 | 80 | _delta = (maxCDF - minCDF) / static_cast(N_INTERVALS); 81 | if (quantileType == QuantileType::LOG) { 82 | _delta *= 2.0f; // divided by positive and negative parts 83 | } 84 | 85 | if (quantileType == QuantileType::UNIFORM) { 86 | _real_value[0] = min; 87 | for (int i = 1; i < N_INTERVALS; i++) { 88 | _real_value[i] = _real_value[i - 1] + _delta; 89 | } 90 | } else if (quantileType == QuantileType::LOG) { 91 | const size_t half_size = N_INTERVALS >> 1; 92 | for (int i = 0; i < half_size; i++) { 93 | _real_value[half_size + i] = exp(minCDF + i * _delta); 94 | _real_value[half_size - i - 1] = - _real_value[half_size + i]; 95 | } 96 | } else if (quantileType == QuantileType::NORMAL_DISTRIBUT) { 97 | _real_value[0] = min; 98 | for (int i = 1; i < N_INTERVALS; i++) { 99 | _real_value[i] = ReverseCDF(minCDF + i * _delta, 0, 1); 100 | } 101 | } else if (quantileType == QuantileType::CUSTOM_DISTRIBUT) { 102 | _real_value[0] = min; 103 | for (int i = 1; i < N_INTERVALS; i++) { 104 | _real_value[i] = ReverseCDF(minCDF + i * _delta, mu, sigma); 105 | } 106 | } 107 | } 108 | 109 | CompressT encoding(RealT real) const { 110 | CompressT ret = CompressT(); 111 | if (real <= min) { 112 | ret = static_cast(0); 113 | } else if (real >= max) { 114 | ret = static_cast(N_INTERVALS - 1); 115 | } else { 116 | if (quantileType == QuantileType::UNIFORM) { 117 | real -= min; 118 | ret = static_cast(real / _delta); 119 | } else if (quantileType == QuantileType::LOG || 120 | quantileType == QuantileType::NORMAL_DISTRIBUT || 121 | quantileType == QuantileType::CUSTOM_DISTRIBUT) { 122 | ret = static_cast(_binary_search(real)); 123 | } 124 | } 125 | return ret; 126 | } 127 | 128 | RealT decoding(CompressT comp) const { 129 | int index = static_cast(comp); 130 | if (index < 0) { // deal with big-endian number 131 | index = N_INTERVALS + index; 132 | } 133 | assert(index >= 0 && index < N_INTERVALS); 134 | return _real_value[static_cast(index)]; 135 | } 136 | 137 | int _binary_search(RealT value) const { 138 | int lower = 0, upper = N_INTERVALS - 1, mid; 139 | while (lower <= upper) { 140 | mid = (lower + upper) >> 1; 141 | if (_real_value[mid] > value) { 142 | upper = mid - 1; 143 | } else { 144 | lower = mid + 1; 145 | } 146 | } 147 | return upper; 148 | } 149 | 150 | QuantileType quantileType; 151 | 152 | static const size_t N_INTERVALS = 1 << (sizeof(CompressT) * 8); 153 | RealT min, max; 154 | RealT minCDF, maxCDF; 155 | RealT mu, sigma; 156 | RealT _delta; 157 | RealT _real_value[N_INTERVALS]; 158 | }; 159 | 160 | #endif /* quantile_compress_h */ 161 | -------------------------------------------------------------------------------- /LightCTR/util/random.h: -------------------------------------------------------------------------------- 1 | // 2 | // random.h 3 | // LightCTR 4 | // 5 | // Created by SongKuangshi on 2017/10/24. 6 | // Copyright © 2017年 SongKuangshi. All rights reserved. 7 | // 8 | 9 | #ifndef random_h 10 | #define random_h 11 | 12 | #include 13 | #include 14 | #include 15 | #include "significance.h" 16 | 17 | inline void Seed(uint32_t seed) { 18 | srand(seed); 19 | } 20 | 21 | inline double UniformNumRand() { // [0, 1) 22 | return static_cast(rand()) / (static_cast(RAND_MAX) + 1.0); 23 | } 24 | 25 | inline double UniformNumRand2() { // (0, 1) 26 | return (static_cast(rand()) + 1.0) / (static_cast(RAND_MAX) + 2.0); 27 | } 28 | 29 | inline size_t Random_index(size_t n) { 30 | return rand() % n; 31 | } 32 | 33 | template 34 | inline void Shuffle(T *vec, size_t sz) { 35 | if (sz == 0) 36 | return; 37 | for (uint32_t i = (uint32_t)sz - 1; i > 0; i--) { 38 | std::swap(vec[i], vec[(uint32_t)(UniformNumRand() * (i + 1))]); 39 | } 40 | } 41 | 42 | inline double GaussRand() { // ~N(0, 1) 43 | static double V1, V2, S; 44 | static int phase = 0; 45 | double X; 46 | if(phase == 0){ 47 | do { 48 | V1 = 2.0 * UniformNumRand2() - 1.0; 49 | V2 = 2.0 * UniformNumRand2() - 1.0; 50 | S = V1 * V1 + V2 * V2; 51 | } while(S >= 1.0 || S == 0.0); 52 | X = V1 * sqrt(-2.0 * log(S) / S); 53 | } else { 54 | X = V2 * sqrt(-2.0 * log(S) / S); 55 | } 56 | phase = 1 - phase; 57 | return X; 58 | } 59 | 60 | inline double GaussRand(double mu, double sigma) { 61 | return GaussRand() * sigma + mu; 62 | } 63 | 64 | inline std::pair GaussRand2D() { 65 | static double V1, V2, S; 66 | static int phase = 0; 67 | double X; 68 | if(phase == 0){ 69 | do { 70 | V1 = 2.0 * UniformNumRand2() - 1.0; 71 | V2 = 2.0 * UniformNumRand2() - 1.0; 72 | S = V1 * V1 + V2 * V2; 73 | } while(S >= 1.0 || S == 0.0); 74 | X = V1 * sqrt(-2.0 * log(S) / S); 75 | } else { 76 | X = V2 * sqrt(-2.0 * log(S) / S); 77 | } 78 | phase = 1 - phase; 79 | return std::make_pair(V1 * X, V2 * X); 80 | } 81 | 82 | inline bool SampleBinary(double p) { 83 | return UniformNumRand() < p; 84 | } 85 | 86 | inline size_t subSampleSize(double sampleAlpha = 0.05, double sampleErrorBound = 0.05) { 87 | // indicate confidence level and error bound to determine a suitable sample size 88 | double z = ReverseAlpha(sampleAlpha / 2); 89 | size_t sampleSize = (size_t)((z * z / 4.0f) / (sampleErrorBound * sampleErrorBound)); 90 | // double minProb = 9.0f / (9.0f + sampleSize); 91 | // double maxProb = 1.0f * sampleSize / (9.0 + sampleSize); 92 | // sigma = sqrt(prob * (1 - prob) / sampleSize) 93 | // max(delta / sigma) determine the significance of distribution difference 94 | return sampleSize; 95 | } 96 | 97 | inline void shuffleSelectK(std::vector* rankResult, size_t n, size_t k) { 98 | // when n equal to k mean shuffle, otherwise sample should adjust k 99 | assert(n / 2 >= k); 100 | if (rankResult->size() != k) { 101 | rankResult->clear(); 102 | rankResult->resize(k); 103 | } 104 | std::vector array; 105 | array.resize(n); 106 | for (size_t i = 0; i < n; i++) { 107 | array[i] = i; 108 | } 109 | for (size_t i = 0; i < k; i++) { 110 | size_t index = UniformNumRand() * (n - i); 111 | rankResult->at(i) = array[index]; 112 | array[index] = array[n - 1 - i]; 113 | } 114 | } 115 | 116 | #endif /* random_h */ 117 | -------------------------------------------------------------------------------- /LightCTR/util/shm_hashtable.h: -------------------------------------------------------------------------------- 1 | // 2 | // shm_hashtable.h 3 | // LightCTR 4 | // 5 | // Created by SongKuangshi on 2018/12/7. 6 | // Copyright © 2018 SongKuangshi. All rights reserved. 7 | // 8 | 9 | #ifndef shm_hashtable_h 10 | #define shm_hashtable_h 11 | 12 | #include "../common/system.h" 13 | #include "../common/lock.h" 14 | #include "../common/hash.h" 15 | #include 16 | #include 17 | #include 18 | 19 | template 20 | class ShmHashTable { 21 | public: 22 | struct ShmHashNode { 23 | size_t key; // preserving zero for identifing empty 24 | T value; 25 | ShmHashNode() : key(0), value(0.0) {} 26 | }; 27 | 28 | static ShmHashTable& Instance(size_t hash_times) { 29 | static ShmHashTable _instance(hash_times); 30 | return _instance; 31 | } 32 | 33 | bool insert(const std::string& key, const T& value) { 34 | return insert(static_cast(murMurHash(key)), value); 35 | } 36 | 37 | bool update(const std::string& key, const T& value) { 38 | return update(static_cast(murMurHash(key)), value); 39 | } 40 | 41 | const T& getValue(const std::string& key) const { 42 | return getValue(static_cast(murMurHash(key))); 43 | } 44 | 45 | bool update(size_t key, const T& value) { 46 | return insert(key, value); 47 | } 48 | 49 | bool insert(size_t key, const T& value) { 50 | assert(g_pShmAddr); 51 | assert(key > 0); 52 | int res = insertOrUpdate(key, value, 0); 53 | return (res == 0 ? true : false); 54 | } 55 | 56 | const T& getValue(size_t key) const { 57 | for (int i = 0; i < hash_times; i++) { 58 | size_t inner_offset = key % primes[i]; 59 | ShmHashNode* addr = (ShmHashNode*)g_pShmAddr + prime_offset[i] + inner_offset; 60 | if (addr->key == key) { 61 | return addr->value; 62 | } 63 | } 64 | return NULL; 65 | } 66 | 67 | private: 68 | ShmHashTable() { 69 | 70 | } 71 | ~ShmHashTable() { 72 | if (g_pShmAddr) { 73 | shmdt(g_pShmAddr); 74 | g_pShmAddr = NULL; 75 | } 76 | } 77 | explicit ShmHashTable(size_t _hash_times) { 78 | hash_times = _hash_times; 79 | tashtable_reserve_size = hashspace * _hash_times * sizeof(ShmHashNode); 80 | 81 | initPrime(primes); 82 | 83 | g_pShmAddr = getShmAddr(0x5fef, tashtable_reserve_size); 84 | memset(g_pShmAddr, 0, tashtable_reserve_size); 85 | } 86 | ShmHashTable(const ShmHashTable&) = delete; 87 | ShmHashTable(ShmHashTable&&) = delete; 88 | ShmHashTable& operator=(const ShmHashTable&) = delete; 89 | ShmHashTable& operator=(ShmHashTable&&) = delete; 90 | 91 | int insertOrUpdate(size_t key, T value, size_t depth) { 92 | if (depth > 5) 93 | return -1; 94 | 95 | vector candidate_position; 96 | candidate_position.reserve(hash_times); 97 | 98 | for (int i = 0; i < hash_times; i++) { 99 | size_t inner_offset = key % primes[i]; 100 | ShmHashNode* addr = (ShmHashNode*)g_pShmAddr + prime_offset[i] + inner_offset; 101 | if (addr->key == 0) { 102 | candidate_position.emplace_back(addr); 103 | } else if (addr->key == key) { 104 | // update 105 | if(!atomic_compare_and_swap(&addr->value, addr->value, value)) { 106 | return insertOrUpdate(key, value, depth + 1); 107 | } 108 | } 109 | } 110 | 111 | // select one empty slot to insert 112 | if (likely(!candidate_position.empty())) { 113 | for (int i = 0; i < candidate_position.size(); i++) { 114 | ShmHashNode* addr = candidate_position[i]; 115 | if (addr->key == 0) { 116 | unique_lock glock(lock); 117 | if (addr->key == 0) { 118 | addr->key = key; 119 | addr->value = value; 120 | 121 | return 0; 122 | } 123 | } 124 | } 125 | } 126 | // conflict happened 127 | return insertOrUpdate(key, value, depth + 1); 128 | } 129 | 130 | void initPrime(std::vector& primes) { 131 | static const size_t MAX = (hashspace >> 1) + 1; 132 | bitset flag(0); 133 | 134 | primes.emplace_back(2); 135 | 136 | size_t i, j; 137 | for (i = 3; i < MAX; i += 2) { 138 | if (!(flag.test(i / 2))) 139 | primes.emplace_back(i); 140 | for (j = 1; j < primes.size() && i * primes[j] < MAX; j++) { 141 | flag.set(i * primes[j] / 2); 142 | if (i % primes[j] == 0) 143 | break; 144 | } 145 | } 146 | std::reverse(primes.begin(), primes.end()); 147 | primes.resize(hash_times); 148 | assert(primes.size() == hash_times); 149 | 150 | prime_offset.emplace_back(0); 151 | for (i = 0; i < hash_times; i++) { 152 | prime_offset.emplace_back(prime_offset.back() + primes[i]); 153 | } 154 | } 155 | 156 | void* g_pShmAddr = NULL; 157 | size_t hash_times; 158 | size_t tashtable_reserve_size = 0; 159 | 160 | static const size_t hashspace = 1 << 20; 161 | std::vector primes; 162 | std::vector prime_offset; 163 | 164 | SpinLock lock; 165 | }; 166 | 167 | #endif /* shm_hashtable_h */ 168 | -------------------------------------------------------------------------------- /LightCTR/util/significance.h: -------------------------------------------------------------------------------- 1 | // 2 | // significance.h 3 | // LightCTR 4 | // 5 | // Created by SongKuangshi on 2018/5/4. 6 | // Copyright © 2018年 SongKuangshi. All rights reserved. 7 | // 8 | 9 | #ifndef significance_h 10 | #define significance_h 11 | 12 | #include 13 | #include "assert.h" 14 | 15 | // error function 16 | inline double Erf(double x) { 17 | // handle either positive or negative x. because error function is negatively symmetric of x 18 | double a = 0.140012; 19 | double b = x * x; 20 | double item = -b * (4 / M_PI + a * b) / (1 + a * b); 21 | double result = sqrt(1 - exp(item)); 22 | if (x >= 0) 23 | return result; 24 | return -result; 25 | } 26 | 27 | inline double LogCDF(double x, double alpha = 10) { 28 | const double scaler = (alpha == 10) ? 1 : log(alpha); 29 | return (x * log(fabs(x)) - x) / scaler; 30 | } 31 | 32 | // calculate the standard cumulative distribution function F(x) = P(Z less or equal than x), 33 | // where Z follows a standard normal distribution. 34 | inline double StandardCDF(double x) { 35 | const double SquareRootOfTwo = 1.414213562373095; 36 | return (1.0 + Erf(x / SquareRootOfTwo)) / 2; 37 | } 38 | 39 | inline double CustomCDF(double x, double u, double sigma) { 40 | x = x - u; 41 | return 0.5 + 0.5 * Erf(x / sigma / 1.414213562373095); 42 | } 43 | 44 | inline double ReverseCDF(double p, double mu, double sigma) { 45 | double lower = -5.0, upper = 5.0, middle; 46 | while(1) { 47 | middle = (lower + upper) / 2; 48 | double estimate = CustomCDF(middle, mu, sigma); 49 | if (fabs(estimate - p) < 1e-7) 50 | break; 51 | // because standard CDF is monotonic, thus we use binary search 52 | if (estimate > p) { 53 | upper = middle; 54 | } else { 55 | lower = middle; 56 | } 57 | } 58 | return middle; 59 | } 60 | 61 | // given a confidence level we calculate the Z such that P(Z greater than alpha) = alpha 62 | inline double ReverseAlpha(double alpha) { 63 | assert(alpha > 0 && alpha < 1); 64 | return ReverseCDF(1.0f - alpha, 0, 1); 65 | } 66 | 67 | // calculate the statistical significance for a gaussian distribution 68 | // the observed x value, its mean value and standard deviation 69 | inline double GaussianSignificance(double x, double u, double sigma) { 70 | double cdf = CustomCDF(x, u, sigma); 71 | return 2 * cdf - 1; 72 | } 73 | 74 | #endif /* significance_h */ 75 | -------------------------------------------------------------------------------- /LightCTR_LOGO.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cnkuangshi/LightCTR/620437720e683a1d8f554f8cdb7421e051843616/LightCTR_LOGO.png -------------------------------------------------------------------------------- /Makefile: -------------------------------------------------------------------------------- 1 | export CC = gcc 2 | export CXX = g++ 3 | export CFLAGS = -std=c++11 -Wall -O3 -D__AVX__ -mavx -mssse3 -Wno-unknown-pragmas -Wno-reorder -Wno-conversion-null -Wno-strict-aliasing -Wno-sign-compare 4 | 5 | BIN = LightCTR_BIN 6 | ZMQ_INC = ./LightCTR/third/zeromq/include 7 | ZMQ_LIB = ./LightCTR/third/zeromq/lib/libzmq.a 8 | OBJ = 9 | .PHONY: clean all 10 | 11 | all: $(BIN) $(OBJ) 12 | export LDFLAGS= -pthread -lm -ldl 13 | 14 | STANDALONE = *.cpp LightCTR/*.h LightCTR/common/*.h LightCTR/predict/*.h LightCTR/predict/*.cpp LightCTR/util/*.h LightCTR/dag/*.h LightCTR/dag/operator/*.h LightCTR/train/*.h LightCTR/train/*.cpp LightCTR/train/layer/*.h LightCTR/train/unit/*.h 15 | DISTRIBUT = $(STANDALONE) $(ZMQ_INC) LightCTR/distribut/*.h 16 | 17 | LightCTR_BIN : $(STANDALONE) 18 | master : $(DISTRIBUT) 19 | ps : $(DISTRIBUT) 20 | worker : $(DISTRIBUT) 21 | ring_master : $(DISTRIBUT) 22 | ring_worker : $(DISTRIBUT) 23 | 24 | $(BIN) : 25 | $(CXX) $(CFLAGS) -o $@ $(filter %.cpp %.o %.c, $^) $(LDFLAGS) 26 | 27 | master : 28 | $(CXX) $(CFLAGS) -o LightCTR_BIN_Master $(filter %.cpp %.o %.c, $^) -D MASTER -Xlinker $(ZMQ_LIB) $(LDFLAGS) 29 | 30 | ps : 31 | $(CXX) $(CFLAGS) -o LightCTR_BIN_PS $(filter %.cpp %.o %.c, $^) -D PS -Xlinker $(ZMQ_LIB) $(LDFLAGS) 32 | 33 | worker : 34 | $(CXX) $(CFLAGS) -o LightCTR_BIN_Worker $(filter %.cpp %.o %.c, $^) -D WORKER -Xlinker $(ZMQ_LIB) $(LDFLAGS) 35 | 36 | ring_master : 37 | $(CXX) $(CFLAGS) -o LightCTR_BIN_Ring_Master $(filter %.cpp %.o %.c, $^) -D MASTER_RING -Xlinker $(ZMQ_LIB) $(LDFLAGS) 38 | 39 | ring_worker : 40 | $(CXX) $(CFLAGS) -o LightCTR_BIN_Ring_Worker $(filter %.cpp %.o %.c, $^) -DWORKER_RING -DTEST_CNN -Xlinker $(ZMQ_LIB) $(LDFLAGS) 41 | 42 | $(OBJ) : 43 | $(CXX) -c $(CFLAGS) -o $@ $(firstword $(filter %.cpp %.c, $^) ) 44 | 45 | install: 46 | cp -f -r $(BIN) $(INSTALL_PATH) 47 | 48 | clean: 49 | $(RM) $(OBJ) $(BIN) *~ 50 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | ![Alt text -w135](./LightCTR_LOGO.png) 2 | ## LightCTR Overview 3 | LightCTR is a lightweight and scalable framework that combines mainstream algorithms of Click-Through-Rate prediction based **computational DAG, philosophy of Parameter Server and Ring-AllReduce collective communication**. The library is suitable for sparse data and designed for large-scale distributed model training. 4 | 5 | Meanwhile, LightCTR is also an **undergoing experimental study** and open source project that oriented to code readers. The clear execution logic will be of significance to leaners on the machine-learning related field. 6 | 7 | ## Features 8 | * Distributed training based on Parameter Server and Ring-AllReduce collective communication 9 | * Directed Acyclic Graph(DAG) of autograd computation 10 | * Gradient clipping, stale synchronous parallel(SSP) and Asynchronous SGD with Delay compensation 11 | * Compressing Neural Network with Half precision and Quantization(PQ or Int8) 12 | * Shared parameters Key-Value pairs store in physical nodes by DHT in Shared memory 13 | * Lock-free Multi-threaded training and SIMD operations 14 | * Optimizer implemented by Mini-Batch GD, Adagrad, FTRL, Adam, etc 15 | 16 | ## List of Implemented Algorithms 17 | 18 | * Wide & Deep Model 19 | * Factorization Machine, Field-aware Factorization Machine, Neural Factorization Machine 20 | * Gradient Boosting Tree Model 21 | * Gaussian Mixture Clustering Model 22 | * Topic Model PLSA, Embedding Model 23 | * Ngram Convolution Neural Network, Self-Attention Recurrent Neural Network 24 | * Variational AutoEncoder 25 | * Approximate Nearest Neighbors Retrieval 26 | 27 | ## Benchmark 28 | #### High performance 29 |
30 | 31 | 32 | 33 |
34 | 35 | #### Scalable 36 |
37 | 38 | 39 |
40 | 41 | ## Introduction (zh) 42 | #### 用于群体发现 43 | 点击率预估即是给合适的用户群体投放合适的内容,以达成促进广告收益或交易转化率的目的。具体操作来说,将收集到的用户点击与行为数据,用离散值与连续值结构化描述特征、归一化与De-bias等处理后,选择合适的模型拟合流量的数据分布,来对用户是否会对某一内容感兴趣并带来商业转化的概率进行评估;通常可将所有特征组合输入集成树模型`LightCTR::GBM`预先找群体,训练得到的每个叶子节点代表一个用户群,再使用`LightCTR::LR`或`LightCTR::MLP`对树模型建立的低维0/1群体特征做进一步分类。 44 | 当标识类别的离散特征过多使得输入变得高维稀疏,可能达到树模型与神经网络的处理瓶颈;一般可使用`LightCTR::FM`或`LightCTR::FFM`将离散特征做特征交叉训练,提升了特征利用率并降低了数据稀疏下的过拟合的风险,每维特征映射在低维空间中,也方便作为连续特征输入其他模型。 45 | 相比使用FM预训练特征低维映射后、再输入DNN中的两阶段训练,通过DNN在输入层按Field内部局部连接,在输入层直接端到端训练特征低维Embedding,可以更好的保证模型时效性;或采用将DNN嵌入FM模型中的`LightCTR::NFM`及其他相关变种,进行特征非线性高维组合,提升模型的表征能力,拥有更好的AUC表现。 46 | 47 | #### 用于行为序列 48 | 用户点击内容序列往往蕴含内容间的局部相关性信息,如前后点击同一类商品或查看同一类网页,这些行为序列的局部关系可被`LightCTR::Embedding`建模捕获,得到点击内容或行为的低维隐向量表示;或基于变分自编码器`LightCTR::VAE`实现特征组合衍生,增强低维特征的表达能力;低维隐向量可被用来判断相关性或直接作为其他模型输入。进一步,序列数据经过平滑处理后,将训练好的行为隐向量按时序输入循环神经网络`LightCTR::LSTMUnit`,最后将RNN输出的特征表达输入`LightCTR::Softmax`分类器,利用预设的监督标签,训练对用户的评估模型或判别模型; 49 | 当预设标签覆盖率不足时,可将高维特征表达输入`LightCTR::GMM`进行无监督聚类,聚类簇概括为意图簇,作为用户意图匹配的依据,补充到用户画像的人群类别中。 50 | 51 | #### 用于内容分析 52 | 用户评论、搜索广告页面上下文等文本也蕴含很多用户的兴趣信息可被挖掘,如搜索广告场景下用户关键词需要匹配语义相关度最高的拍卖词进而投放高转化率的广告,因此分析文本信息是点击率预估的重要依据。在提取整段文本语义信息方面,首先可使用`LightCTR::Embedding`预先训练词向量表,对文本中出现的词按词频做词向量加权并移除主成分;或参考Skip-thought方法结合负例采样,将文本中每个词向量按时序输入`LightCTR::LSTMUnit`,训练得到文本的语义特征表达,向量内积对应文本相关度。此外,参考DSSM将由文本词向量构成的矩阵,输入Ngram卷积神经网络`LightCTR::CNN`用于提取句子局部语义相关性特征,并结合正负样本训练对预设标签的Pairwise判别模型。 53 | 当文本缺乏分类标签时,可使用`LightCTR::PLSA`无监督的获取文章主题分布,应用于按主题分布区分不同内容类别、计算长语料与短查询间的语义相似度,也可通过后验计算得到上下文中各词汇的重要程度,作为长文本关键词摘要。 54 | 55 | #### 分层模型融合 56 | 更复杂的模型带来更好的表征能力,但同时也加大了计算时间消耗,而响应时间与点击率呈强负相关,因此为了兼顾线上点击率预估的性能与效果,可使用不同模型逐层预测,如第一层采用在线学习、并引入稀疏解的简单模型`LightCTR::FTRL_LR`,第二层采用上文提到的输入层局部连接的`LightCTR::MLP`、或`LightCTR::NFM`等复杂模型进行精细预测。在系统层面,抽取并缓存DNN模型中最后一组全连接层权值或输出,作为用户或商品的固定表达,使用`LightCTR::ANN`近邻向量检索的TopN结果作为推荐召回,在最大化CTR/ROI的同时,降低线上推理的平均响应时间。此外,LightCTR在探索通过模型参数分位点压缩、二值网络等方法,在不损失预测精度前提下大幅提升计算效率。 57 | 58 | #### 多机多线程并行计算 59 | LightCTR使用SIMD向量化指令、流水线并行、多核心多线程计算、Cache-aware等多重优化手段实现单机高性能数值计算,但当模型参数量超过单机内存容量、或单机训练效率达不到时效性要求时,LightCTR进一步提供了基于参数服务器与Ring-AllReduce的可扩展模型集群训练方案。 60 | * 参数服务器模式下,集群分为Master, ParamServer与Worker三种角色;一个集群有一个Master负责集群启动与运行状态的维护,大规模模型稀疏与稠密Tensor参数以DHT散布在多个ParamServer上,与多个负责模型数据并行梯度运算的Worker协同,每轮训练都先从ParamServer拉取(Pull)一个样本Batch的参数,运算得到的参数梯度推送(Push)到ParamServer进行梯度汇总。ParamServer通过梯度TopK截断、延迟梯度补偿等手段,异步无锁、半同步的更新参数。参数在ParamServer上紧凑存储,按特征命中率进行优选与淘汰;使用变长编码/半精度/Int8的方式压缩梯度传输量,并用Batch化参数请求与读写分离的方法提升网络同步效率。 61 | * 环拓扑Ring-AllReduce模式下,LightCTR在不引入协调节点下实现集群训练进度的动态自平衡,结合梯度融合机制,实现了高效、高稳定性的去中心化梯度同步,适合稠密参数模型的可扩展训练;在这种模式下每个节点存储全量模型可单独提供推理预测能力,训练过程通过有限次迭代获取其他节点梯度结果,在一定集群规模与合适的Batch size、学习率设置下可实现训练任务的线性加速比。 62 | * LightCTR分布式集群采取心跳监控、消息重传等Failover容错方式。此外,LightCTR也在探索RDMA、DPDK、多网卡等网络优化方式来降低网络通信延时。 63 | 64 | ## Quick Start 65 | * LightCTR depends on C++11 and ZeroMQ only, lightweight and modular design 66 | * Easy to use, just change configuration (e.g. Learning Rate, Data source) in `main.cpp` 67 | * run `./build.sh` to start training task on Parameter Server mode or `./build_ring.sh` to start on Ring-AllReduce mode 68 | * Current CI Status: [![Build Status](https://travis-ci.org/cnkuangshi/LightCTR.svg?branch=master)](https://travis-ci.org/cnkuangshi/LightCTR) on Ubuntu and MacOS 69 | 70 | ## Welcome to Contribute 71 | * Welcome everyone interested in intersection of machine learning and scalable systems to contribute code, create issues or pull requests. 72 | * LightCTR is released under the Apache License, Version 2.0. 73 | 74 | ## Disclaimer 75 | * Please note that LightCTR is still undergoing and it does not give any warranties, as to the suitability or usability. 76 | 77 | -------------------------------------------------------------------------------- /benchmark/4_node_ps.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cnkuangshi/LightCTR/620437720e683a1d8f554f8cdb7421e051843616/benchmark/4_node_ps.png -------------------------------------------------------------------------------- /benchmark/4_node_ring.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cnkuangshi/LightCTR/620437720e683a1d8f554f8cdb7421e051843616/benchmark/4_node_ring.png -------------------------------------------------------------------------------- /benchmark/vs_libffm.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cnkuangshi/LightCTR/620437720e683a1d8f554f8cdb7421e051843616/benchmark/vs_libffm.png -------------------------------------------------------------------------------- /benchmark/vs_libfm.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cnkuangshi/LightCTR/620437720e683a1d8f554f8cdb7421e051843616/benchmark/vs_libfm.png -------------------------------------------------------------------------------- /benchmark/vs_tf_cpu.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cnkuangshi/LightCTR/620437720e683a1d8f554f8cdb7421e051843616/benchmark/vs_tf_cpu.png -------------------------------------------------------------------------------- /build.sh: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | if [ $# -lt 3 ]; then 3 | echo "usage: $0 [ps_num] [worker_num] [master_ip_port like 127.0.0.1:17832]" 4 | exit -1; 5 | fi 6 | 7 | cd ./LightCTR/third 8 | sh ./install_third.sh 9 | cd ../../ 10 | 11 | export LightCTR_PS_NUM=$1 12 | shift 13 | export LightCTR_WORKER_NUM=$1 14 | shift 15 | export LightCTR_MASTER_ADDR=$1 16 | 17 | make master & 18 | make ps & 19 | make worker & 20 | 21 | wait 22 | echo 23 | echo 24 | echo "[Build Success]" 25 | echo "Please copy different BIN file to corresponding machine, DON'T forget export LightCTR_PS_NUM, LightCTR_WORKER_NUM and LightCTR_MASTER_ADDR, run Master first" 26 | -------------------------------------------------------------------------------- /build_ring.sh: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | if [ $# -lt 2 ]; then 3 | echo "usage: $0 [worker_num] [master_ip_port like 127.0.0.1:17832]" 4 | exit -1; 5 | fi 6 | 7 | cd ./LightCTR/third 8 | sh ./install_third.sh 9 | cd ../../ 10 | 11 | export LightCTR_PS_NUM=0 12 | export LightCTR_WORKER_NUM=$1 13 | shift 14 | export LightCTR_MASTER_ADDR=$1 15 | 16 | make ring_master & 17 | make ring_worker & 18 | 19 | wait 20 | echo 21 | echo 22 | echo "[Build Success]" 23 | echo "Please copy different BIN file to corresponding machine, DON'T forget export LightCTR_WORKER_NUM and LightCTR_MASTER_ADDR, run Master first" 24 | -------------------------------------------------------------------------------- /data/proc_file_split.py: -------------------------------------------------------------------------------- 1 | import sys 2 | import random 3 | 4 | file_origin = open(sys.argv[1], 'r') 5 | shardings = int(sys.argv[2]) 6 | 7 | names = [] 8 | for i in xrange(shardings): 9 | name = sys.argv[1] + '_' + str(i + 1) # start from 1 10 | names.append(name) 11 | 12 | file_io_handle = [] 13 | for i in xrange(shardings): 14 | file_io_handle.append(open(names[i], 'w')) 15 | 16 | rand_stand = 1.0 / shardings 17 | 18 | for line in file_origin: 19 | v = random.random() 20 | part = int(v / rand_stand) 21 | assert part < shardings 22 | 23 | file_io_handle[part].write(line.strip()) 24 | file_io_handle[part].write('\n') 25 | 26 | for file in file_io_handle: 27 | file.close() 28 | 29 | file_origin.close() 30 | -------------------------------------------------------------------------------- /data/proc_text_topic.py: -------------------------------------------------------------------------------- 1 | # -*- coding:utf-8 -*- 2 | # 3 | # proc_text_topic.py 4 | # LightCTR 5 | # 6 | # Created by SongKuangshi on 2017/10/15. 7 | # Copyright © 2017年 SongKuangshi. All rights reserved. 8 | 9 | import os 10 | import sys 11 | 12 | stopset = {'a','the','of','to','an','but','or','its','about','would','and','in','that','is','are','be','been','will','this','was','for','on','as','from','at','by','with','have','which','has','had','were','it','not'} 13 | 14 | def generate(infile,word_id_file_path,training_file_path,vocab_size): 15 | term_dict = {} 16 | 17 | infp = open(infile,'r') 18 | for line in infp: 19 | line = line.rstrip() 20 | if line.find('<') != -1 and line.find('>') != -1: 21 | continue 22 | info = line.split(' ') 23 | for term in info: 24 | term = term.lower() 25 | if term == '' or not term.isalpha() or term in stopset: 26 | continue 27 | if term.isspace() or term.find(".") != -1 or term.find(" ") != -1: 28 | continue 29 | if term in term_dict: 30 | term_dict[term] += 1 31 | else: 32 | term_dict[term] = 1 33 | term_list = sorted(term_dict.items(),key=lambda x : x[1],reverse=True) 34 | print len(term_list) 35 | term_list = term_list[:int(vocab_size)] 36 | 37 | termid_dict = {} 38 | term_id = 0 39 | for term in term_list: 40 | termid_dict[term[0]] = term_id 41 | term_id += 1 42 | orderitems=[[v[1],v[0]] for v in termid_dict.items()] 43 | orderitems.sort() 44 | 45 | outfp = open(word_id_file_path,'w') 46 | for i in range(0, len(orderitems)): 47 | outfp.write('%d %s %d\n'%(orderitems[i][0], orderitems[i][1], term_dict[orderitems[i][1]])) 48 | outfp.close() 49 | 50 | print("Vocab file generating complete") 51 | # exit() 52 | 53 | infp.seek(0,0) 54 | outfp = open(training_file_path,'w') 55 | 56 | for line in infp: 57 | if line.find('<') != -1 and line.find('>') != -1: 58 | continue 59 | term_tf = {} 60 | info = line.rstrip().split(' ') 61 | flag = 1; 62 | for term in info: 63 | term = term.lower() 64 | if term not in termid_dict: 65 | continue 66 | if term in term_tf: 67 | term_tf[term] += 1 68 | else: 69 | term_tf[term] = 1 70 | flag = 0 71 | if flag == 1: 72 | continue 73 | out_line = '' 74 | for i in range(0, len(orderitems)): 75 | if out_line != '': 76 | out_line += ' ' 77 | term = orderitems[i][1] 78 | if term not in term_tf: 79 | out_line += '0' 80 | else: 81 | out_line += '%d'%(term_tf.get(term)) 82 | outfp.write(out_line+'\n') 83 | 84 | infp.close() 85 | outfp.close() 86 | 87 | if __name__ == '__main__': 88 | if len(sys.argv) != 3: 89 | print >> sys.stderr,'Usage : [%s] [input data file] [vocab size]'%(sys.argv[0]) 90 | sys.exit(0) 91 | generate(sys.argv[1],"./vocab.txt","./train_topic.csv",sys.argv[2]) 92 | --------------------------------------------------------------------------------