├── .gitignore
├── .travis.yml
├── LICENSE
├── LightCTR
    ├── common
    │   ├── avx.h
    │   ├── barrier.h
    │   ├── buffer.h
    │   ├── buffer_fusion.h
    │   ├── float16.h
    │   ├── hash.h
    │   ├── lock.h
    │   ├── memory_pool.h
    │   ├── message.h
    │   ├── message_queue.h
    │   ├── network.h
    │   ├── persistent_buffer.h
    │   ├── system.h
    │   ├── thread_pool.h
    │   └── time.h
    ├── dag
    │   ├── aggregate_node.h
    │   ├── dag_pipeline.h
    │   ├── node_abst.h
    │   ├── operator
    │   │   ├── activations_op.h
    │   │   ├── add_op.h
    │   │   ├── loss_op.h
    │   │   ├── matmul_op.h
    │   │   └── multiply_op.h
    │   ├── source_node.h
    │   └── terminus_node.h
    ├── distribut
    │   ├── consistent_hash.h
    │   ├── dist_machine_abst.h
    │   ├── master.h
    │   ├── paramserver.h
    │   ├── pull.h
    │   ├── push.h
    │   ├── ring_collect.h
    │   └── worker.h
    ├── distributed_algo_abst.h
    ├── dl_algo_abst.h
    ├── em_algo_abst.h
    ├── fm_algo_abst.h
    ├── gbm_algo_abst.h
    ├── predict
    │   ├── ann_index.h
    │   ├── fm_predict.cpp
    │   ├── fm_predict.h
    │   ├── gbm_predict.cpp
    │   └── gbm_predict.h
    ├── third
    │   ├── install_third.sh
    │   └── zeromq
    │   │   ├── include
    │   │       ├── zmq.h
    │   │       └── zmq_utils.h
    │   │   └── lib
    │   │       └── libzmq.a
    ├── train
    │   ├── layer
    │   │   ├── adapterLayer.h
    │   │   ├── convLayer.h
    │   │   ├── fullyconnLayer.h
    │   │   ├── layer_abst.h
    │   │   ├── poolingLayer.h
    │   │   └── sampleLayer.h
    │   ├── train_cnn_algo.h
    │   ├── train_embed_algo.cpp
    │   ├── train_embed_algo.h
    │   ├── train_ffm_algo.cpp
    │   ├── train_ffm_algo.h
    │   ├── train_fm_algo.cpp
    │   ├── train_fm_algo.h
    │   ├── train_gbm_algo.cpp
    │   ├── train_gbm_algo.h
    │   ├── train_gmm_algo.cpp
    │   ├── train_gmm_algo.h
    │   ├── train_nfm_algo.cpp
    │   ├── train_nfm_algo.h
    │   ├── train_rnn_algo.h
    │   ├── train_tm_algo.cpp
    │   ├── train_tm_algo.h
    │   ├── train_vae_algo.h
    │   └── unit
    │   │   ├── attention_unit.h
    │   │   └── lstm_unit.h
    └── util
    │   ├── activations.h
    │   ├── ensembling.h
    │   ├── evaluator.h
    │   ├── gradientUpdater.h
    │   ├── loss.h
    │   ├── matrix.h
    │   ├── momentumUpdater.h
    │   ├── pca.h
    │   ├── product_quantizer.h
    │   ├── quantile_compress.h
    │   ├── random.h
    │   ├── shm_hashtable.h
    │   └── significance.h
├── LightCTR_LOGO.png
├── Makefile
├── README.md
├── benchmark
    ├── 4_node_ps.png
    ├── 4_node_ring.png
    ├── vs_libffm.png
    ├── vs_libfm.png
    └── vs_tf_cpu.png
├── build.sh
├── build_ring.sh
├── data
    ├── proc_file_split.py
    ├── proc_text_topic.py
    ├── test_sparse.csv
    ├── train_cluster.csv
    ├── train_dense.csv
    ├── train_sparse.csv
    ├── train_text.txt
    └── vocab.txt
└── main.cpp


/.gitignore:
--------------------------------------------------------------------------------
1 | *.o
2 | *.out
3 | *.dylib
4 | LightCTR.xcodeproj
5 | LightCTR/third/zeromq-4.2.2
6 | 


--------------------------------------------------------------------------------
/.travis.yml:
--------------------------------------------------------------------------------
 1 | os:
 2 |   - osx
 3 |   - linux
 4 | 
 5 | language: cpp
 6 | 
 7 | script:
 8 |   - make
 9 |   - ./build.sh 3 3 127.0.0.1:17832
10 |   - ./build_ring.sh 3 127.0.0.1:17832
11 | 
12 | notifications:
13 |   email:
14 |     on_success: change
15 |     on_failure: always
16 | 


--------------------------------------------------------------------------------
/LightCTR/common/barrier.h:
--------------------------------------------------------------------------------
 1 | //
 2 | //  barrier.h
 3 | //  LightCTR
 4 | //
 5 | //  Created by SongKuangshi on 2017/12/5.
 6 | //  Copyright © 2017年 SongKuangshi. All rights reserved.
 7 | //
 8 | 
 9 | #ifndef barrier_h
10 | #define barrier_h
11 | 
12 | #include <mutex>
13 | #include <thread>
14 | #include <condition_variable>
15 | 
16 | 
17 | // fence in write
18 | #define wmb() __asm__ __volatile__("sfence":::"memory")
19 | // fence in read
20 | #define rmb() __asm__ __volatile__("lfence":::"memory")
21 | // fence in write and read
22 | #define rwmb() __asm__ __volatile__("mfence":::"memory")
23 | 
24 | 
25 | class Barrier {
26 | public:
27 |     Barrier() {
28 |         
29 |     }
30 |     explicit Barrier(size_t count) {
31 |         flag_ = (int)count;
32 |     }
33 |     
34 |     inline void reset(size_t count = 1) {
35 |         std::unique_lock<std::mutex> glock(lock_);
36 |         flag_ = (int)count;
37 |     }
38 |     
39 |     inline void block() {
40 |         std::unique_lock<std::mutex> glock(lock_);
41 |         cond_.wait(glock, [this] {
42 |             return flag_ <= 0;
43 |         });
44 |     }
45 |     
46 |     inline bool block(time_t timeout_ms, std::function<void()> timeout_callback) {
47 |         std::unique_lock<std::mutex> glock(lock_);
48 |         auto status = cond_.wait_for(glock, std::chrono::milliseconds(timeout_ms), [this] {
49 |             return flag_ <= 0;
50 |         });
51 |         if (!status && timeout_callback) {
52 |             timeout_callback();
53 |         }
54 |         // false if the predicate pred still evaluates to false
55 |         // after the rel_time timeout expired, otherwise true
56 |         return status;
57 |     }
58 |     
59 |     inline void unblock() {
60 |         std::unique_lock<std::mutex> glock(lock_);
61 |         flag_--;
62 |         assert(flag_ >= 0);
63 |         cond_.notify_one();
64 |     }
65 |     
66 | private:
67 |     int flag_{1};
68 |     std::condition_variable cond_;
69 |     std::mutex lock_;
70 | };
71 | 
72 | #endif /* barrier_h */
73 | 


--------------------------------------------------------------------------------
/LightCTR/common/buffer_fusion.h:
--------------------------------------------------------------------------------
  1 | //
  2 | //  buffer_fusion.h
  3 | //  LightCTR
  4 | //
  5 | //  Created by SongKuangshi on 2019/1/1.
  6 | //  Copyright © 2019 SongKuangshi. All rights reserved.
  7 | //
  8 | 
  9 | #ifndef buffer_fusion_h
 10 | #define buffer_fusion_h
 11 | 
 12 | #include "buffer.h"
 13 | #include <string.h>
 14 | #include <memory>
 15 | 
 16 | template <typename T>
 17 | class BufferFusion {
 18 | public:
 19 |     BufferFusion() = delete;
 20 |     BufferFusion(bool _autoRelease, bool _lazyMode):
 21 |         autoRelease(_autoRelease), lazyMode(_lazyMode) {
 22 |         
 23 |     }
 24 |     
 25 |     ~BufferFusion() {
 26 |         if (autoRelease && lazyMode && lazyModeMemory) {
 27 |             delete lazyModeMemory;
 28 |             lazyModeMemory = nullptr;
 29 |             bufs_ptr_arr.clear();
 30 |             bufs_size_arr.clear();
 31 |             return;
 32 |         }
 33 |         if (!autoRelease) {
 34 |             bufs_ptr_arr.clear();
 35 |             bufs_size_arr.clear();
 36 |             return;
 37 |         }
 38 |         for (size_t i = 0; i < bufs_ptr_arr.size(); i++) {
 39 |             if (bufs_ptr_arr[i]) {
 40 |                 delete[] bufs_ptr_arr[i];
 41 |                 bufs_ptr_arr[i] = NULL;
 42 |             }
 43 |         }
 44 |         bufs_ptr_arr.clear();
 45 |         bufs_size_arr.clear();
 46 |     }
 47 |     
 48 |     std::pair<T*, size_t> getMemory(size_t index) const {
 49 |         assert(index < bufs_size_arr.size());
 50 |         return std::make_pair(bufs_ptr_arr[index], bufs_size_arr[index]);
 51 |     }
 52 |     
 53 |     void registMemChunk(T* ptr, size_t size) {
 54 |         assert(size > 0);
 55 |         if (ptr != nullptr) {
 56 |             bufs_ptr_arr.push_back(ptr);
 57 |             bufs_size_arr.push_back(size);
 58 |             total_size += size;
 59 |         } else {
 60 |             assert(lazyMode);
 61 |             // lazy mode
 62 |             bufs_size_arr.push_back(size);
 63 |             total_size += size;
 64 |         }
 65 |     }
 66 |     
 67 |     void lazyAllocate(float* allocatedMem = nullptr) {
 68 |         if (allocatedMem) {
 69 |             lazyModeMemory = allocatedMem;
 70 |         } else {
 71 |             lazyModeMemory = new T[total_size];
 72 |             memset(lazyModeMemory, 0, total_size * sizeof(float));
 73 |         }
 74 |         size_t inc_mem = 0;
 75 |         for (size_t i = 0; i < bufs_size_arr.size(); i++) {
 76 |             bufs_ptr_arr.push_back(lazyModeMemory + inc_mem);
 77 |             inc_mem += bufs_size_arr[i];
 78 |         }
 79 |         assert(inc_mem == total_size);
 80 |     }
 81 |     
 82 |     size_t size() const {
 83 |         return total_size;
 84 |     }
 85 |     
 86 |     void memset_c(T __c) {
 87 |         if (likely(__c == 0)) {
 88 |             for (size_t i = 0; i < bufs_ptr_arr.size(); i++) {
 89 |                 memset(bufs_ptr_arr[i], 0, bufs_size_arr[i] * sizeof(T));
 90 |             }
 91 |         } else {
 92 |             for (size_t i = 0; i < bufs_ptr_arr.size(); i++) {
 93 |                 for (size_t j = 0; j < bufs_size_arr[i]; j++) {
 94 |                     *(bufs_ptr_arr[i] + j) = __c;
 95 |                 }
 96 |             }
 97 |         }
 98 |     }
 99 |     
100 |     void memcpy_out(Buffer** __dst, size_t __offset, size_t __n) const {
101 |         assert(__offset + __n <= total_size);
102 |         *__dst = new Buffer(__n);
103 |         
104 |         size_t which_one = 0;
105 |         while (__offset >= bufs_size_arr[which_one]) {
106 |             __offset -= bufs_size_arr[which_one];
107 |             which_one++;
108 |         }
109 |         const T* __src = bufs_ptr_arr[which_one] + __offset;
110 |         if (__n <= bufs_size_arr[which_one] - __offset) {
111 |             (*__dst)->append(__src, __n * sizeof(T));
112 |             return;
113 |         }
114 |         size_t offset = bufs_size_arr[which_one] - __offset;
115 |         (*__dst)->append(__src, offset * sizeof(T));
116 |         __n -= offset;
117 |         
118 |         size_t tmp = bufs_size_arr[++which_one];
119 |         while (__n > tmp) {
120 |             (*__dst)->append(bufs_ptr_arr[which_one], tmp * sizeof(T));
121 |             __n -= tmp;
122 |             tmp = bufs_size_arr[++which_one];
123 |         }
124 |         if (__n > 0) {
125 |             (*__dst)->append(bufs_ptr_arr[which_one], __n * sizeof(T));
126 |         }
127 |     }
128 |     
129 |     void memcpy_in(size_t __offset, const T* __src, size_t __n) {
130 |         assert(__offset + __n <= total_size);
131 |         size_t which_one = 0;
132 |         while (__offset >= bufs_size_arr[which_one]) {
133 |             __offset -= bufs_size_arr[which_one];
134 |             which_one++;
135 |         }
136 |         T* __dst = bufs_ptr_arr[which_one] + __offset;
137 |         if (__n <= bufs_size_arr[which_one] - __offset) {
138 |             memcpy(__dst, __src, __n * sizeof(T));
139 |             return;
140 |         }
141 |         size_t offset = bufs_size_arr[which_one] - __offset;
142 |         memcpy(__dst, __src, offset * sizeof(T));
143 |         __n -= offset;
144 |         
145 |         size_t tmp = bufs_size_arr[++which_one];
146 |         while (__n > tmp) {
147 |             memcpy(bufs_ptr_arr[which_one], __src + offset, tmp * sizeof(T));
148 |             __n -= tmp;
149 |             offset += tmp;
150 |             tmp = bufs_size_arr[++which_one];
151 |         }
152 |         if (__n > 0) {
153 |             memcpy(bufs_ptr_arr[which_one], __src + offset, __n * sizeof(T));
154 |         }
155 |     }
156 |     
157 |     typedef std::function<void(T*, T*)> transform_callback_t;
158 |     
159 |     void transform(size_t __offset, size_t __n, transform_callback_t cb) const {
160 |         assert(__offset + __n <= total_size);
161 |         size_t which_one = 0;
162 |         while (__offset >= bufs_size_arr[which_one]) {
163 |             __offset -= bufs_size_arr[which_one];
164 |             which_one++;
165 |         }
166 |         T* __dst = bufs_ptr_arr[which_one] + __offset;
167 |         if (__n <= bufs_size_arr[which_one] - __offset) {
168 |             cb(__dst, __dst + __n);
169 |             return;
170 |         }
171 |         size_t offset = bufs_size_arr[which_one] - __offset;
172 |         cb(__dst, __dst + offset);
173 |         __n -= offset;
174 |         
175 |         size_t tmp = bufs_size_arr[++which_one];
176 |         while (__n > tmp) {
177 |             cb(bufs_ptr_arr[which_one], bufs_ptr_arr[which_one] + tmp);
178 |             __n -= tmp;
179 |             tmp = bufs_size_arr[++which_one];
180 |         }
181 |         if (__n > 0) {
182 |             cb(bufs_ptr_arr[which_one], bufs_ptr_arr[which_one] + __n);
183 |         }
184 |     }
185 |     
186 |     void flatten(Buffer** __dst) const {
187 |         assert(total_size > 0);
188 |         memcpy_out(__dst, 0, total_size);
189 |     }
190 |     
191 | private:
192 |     bool autoRelease{false};
193 |     bool lazyMode{false};
194 |     T* lazyModeMemory = nullptr;
195 |     
196 |     std::vector<T*> bufs_ptr_arr;
197 |     std::vector<size_t> bufs_size_arr;
198 |     size_t total_size = 0;
199 | };
200 | 
201 | #endif /* buffer_fusion_h */
202 | 


--------------------------------------------------------------------------------
/LightCTR/common/float16.h:
--------------------------------------------------------------------------------
  1 | //
  2 | //  float16.h
  3 | //  LightCTR
  4 | //
  5 | //  Created by SongKuangshi on 2018/12/3.
  6 | //  Copyright © 2018 SongKuangshi. All rights reserved.
  7 | //
  8 | 
  9 | #ifndef float16_h
 10 | #define float16_h
 11 | 
 12 | #define float16_t unsigned short
 13 | 
 14 | #include <algorithm>
 15 | #include <functional>
 16 | #include "assert.h"
 17 | 
 18 | class Float16 {
 19 | public:
 20 |     Float16() {
 21 |         assert(sizeof(float) * 8 == 32);
 22 |         assert(sizeof(float16_t) * 8 == 16);
 23 |     }
 24 |     
 25 |     explicit Float16(const void* src32) {
 26 |         _float32_value = *static_cast<const float*>(src32);
 27 |         _float16_value = convert(_float32_value);
 28 |     }
 29 |     
 30 |     explicit Float16(float16_t src16) {
 31 |         _float16_value = src16;
 32 |         _float32_value = toFloat32(src16);
 33 |     }
 34 |     
 35 |     inline float16_t float16_value() {
 36 |         return _float16_value;
 37 |     }
 38 |     inline float float32_value() {
 39 |         return _float32_value;
 40 |     }
 41 |     
 42 |     void convert2Float16(const float* input, float16_t* output, int len) {
 43 |         std::transform(input, input + len,
 44 |                        output,
 45 |                        std::bind(
 46 |                                  &Float16::convert,
 47 |                                  this,
 48 |                                  std::placeholders::_1
 49 |                                  )
 50 |                        );
 51 |     }
 52 |     
 53 |     void recover2Float32(const float16_t* input, float* output, int len) {
 54 |         std::transform(input, input + len,
 55 |                        output,
 56 |                        std::bind(
 57 |                                  &Float16::toFloat32,
 58 |                                  this,
 59 |                                  std::placeholders::_1
 60 |                                  )
 61 |                        );
 62 |     }
 63 |     
 64 | private:
 65 |     inline float toFloat32(float16_t h) {
 66 |         int sign = ((h >> 15) & 1); // 1
 67 |         int exp = ((h >> 10) & 0x1f); // 5
 68 |         int mantissa = (h & 0x3ff); // 10
 69 |         unsigned f = 0;
 70 |         
 71 |         if (exp > 0 && exp < 31) {
 72 |             // normal
 73 |             exp += 112; // 127 - 15
 74 |             f = (sign << 31) | (exp << 23) | (mantissa << 13);
 75 |         } else if (exp == 0) {
 76 |             if (mantissa) {
 77 |                 // subnormal
 78 |                 exp += 113; // 127 - 15 + 1
 79 |                 while ((mantissa & (1 << 10)) == 0) {
 80 |                     mantissa <<= 1;
 81 |                     exp--;
 82 |                 }
 83 |                 mantissa &= 0x3ff;
 84 |                 f = (sign << 31) | (exp << 23) | (mantissa << 13);
 85 |             } else {
 86 |                 f = (sign << 31); // ±0.0
 87 |             }
 88 |         } else if (exp == 31) {
 89 |             if (mantissa) {
 90 |                 f = 0x7fffffff;  // NAN
 91 |             } else {
 92 |                 f = (0xff << 23) | (sign << 31);  // INF
 93 |             }
 94 |         }
 95 |         return *reinterpret_cast<float*>(&f);
 96 |     }
 97 |     
 98 |     inline float16_t convert(const float& src) {
 99 |         // convert Float32 into Binary float16 (unsigned short) based IEEE754 standard
100 |         unsigned const& s = *reinterpret_cast<unsigned const*>(&src);
101 | 
102 |         uint16_t sign = uint16_t((s >> 16) & 0x8000); // 1
103 |         int16_t exp = uint16_t(((s >> 23) & 0xff) - 127); // 8
104 |         int mantissa = s & 0x7fffff; // 23
105 |     
106 |         if ((s & 0x7fffffff) == 0) { // ±0.0
107 |             return 0;
108 |         }
109 |         // special number
110 |         if (exp > 15) { // bias changes from 127 to 15
111 |             if (exp == 128 && mantissa) {
112 |                 // still NAN
113 |                 return 0x7fff;
114 |             } else {
115 |                 // exp > 15 causes upper overflow, INF
116 |                 return sign | 0x7c00;
117 |             }
118 |         }
119 |     
120 |         uint16_t u = 0;
121 |         int sticky_bit = 0;
122 |     
123 |         if (exp >= -14) {
124 |             // normal fp32 to normal fp16
125 |             exp = uint16_t(exp + uint16_t(15));
126 |             u = uint16_t(((exp & 0x1f) << 10));
127 |             u = uint16_t(u | (mantissa >> 13));
128 |         } else {
129 |             // normal float to subnormal (exp=0)
130 |             int rshift = - (exp + 14);
131 |             if (rshift < 32) {
132 |                 mantissa |= (1 << 23);
133 |                 sticky_bit = ((mantissa & ((1 << rshift) - 1)) != 0);
134 |                 
135 |                 mantissa = (mantissa >> rshift);
136 |                 u = (uint16_t(mantissa >> 13) & 0x3ff);
137 |             } else {
138 |                 // drop precision
139 |                 mantissa = 0;
140 |                 u = 0;
141 |             }
142 |         }
143 |     
144 |         // round to nearest even
145 |         int round_bit = ((mantissa >> 12) & 1);
146 |         sticky_bit |= ((mantissa & ((1 << 12) - 1)) != 0);
147 |     
148 |         if ((round_bit && sticky_bit) || (round_bit && (u & 1))) {
149 |             u = uint16_t(u + 1);
150 |         }
151 |     
152 |         u |= sign;
153 |         return *reinterpret_cast<float16_t*>(&u);
154 |     }
155 |     
156 |     void print_bin(float num) {
157 |         printf("32: ");
158 |         unsigned const& s = *reinterpret_cast<unsigned const*>(&num);
159 |         for(size_t i = 1; i <= sizeof(num) * 8; i++) {
160 |             printf("%d", (s >> (sizeof(num) * 8 - i)) & 1);
161 |             if (i == 1 || i == 9 || i == 32) {
162 |                 printf("\t");
163 |             }
164 |         }
165 |         puts("");
166 |     }
167 |     void print_bin16(float16_t num) {
168 |         printf("16: ");
169 |         unsigned const& s = *reinterpret_cast<unsigned const*>(&num);
170 |         for(size_t i = 1; i <= sizeof(num) * 8; i++) {
171 |             printf("%d", (s >> (sizeof(num) * 8 - i)) & 1);
172 |             if (i == 1 || i == 6 || i == 16) {
173 |                 printf("\t");
174 |             }
175 |         }
176 |         puts("");
177 |     }
178 |     
179 |     float16_t _float16_value;
180 |     float _float32_value;
181 | };
182 | 
183 | #endif /* float16_h */
184 | 


--------------------------------------------------------------------------------
/LightCTR/common/hash.h:
--------------------------------------------------------------------------------
 1 | //
 2 | //  hash.h
 3 | //  LightCTR
 4 | //
 5 | //  Created by SongKuangshi on 2017/12/6.
 6 | //  Copyright © 2017年 SongKuangshi. All rights reserved.
 7 | //
 8 | 
 9 | #ifndef hash_h
10 | #define hash_h
11 | 
12 | #include <cstring>
13 | 
14 | #define BIG_CONSTANT(x) (x##LLU)
15 | 
16 | inline unsigned int murMurHash(const std::string& key) {
17 |     int len = (int)key.length();
18 |     const unsigned int m = 0x5bd1e995;
19 |     const int r = 24;
20 |     const int seed = 97;
21 |     unsigned int h = seed ^ len;
22 |     // Mix 4 bytes at a time into the hash
23 |     const unsigned char *data = (const unsigned char *)key.c_str();
24 |     while(len >= 4)
25 |     {
26 |         unsigned int k = *(unsigned int *)data;
27 |         k *= m;
28 |         k ^= k >> r;
29 |         k *= m;
30 |         h *= m;
31 |         h ^= k;
32 |         data += 4;
33 |         len -= 4;
34 |     }
35 |     // Handle the last few bytes of the input array
36 |     switch(len)
37 |     {
38 |         case 3: h ^= data[2] << 16;
39 |         case 2: h ^= data[1] << 8;
40 |         case 1: h ^= data[0];
41 |             h *= m;
42 |     };
43 |     // Do a few final mixes of the hash to ensure the last few
44 |     // bytes are well-incorporated.
45 |     h ^= h >> 13;
46 |     h *= m;
47 |     h ^= h >> 15;
48 |     return h;
49 | }
50 | 
51 | inline unsigned int murMurHash(uint64_t k) {
52 |     k ^= k >> 33;
53 |     k *= BIG_CONSTANT(0xff51afd7ed558ccd);
54 |     k ^= k >> 33;
55 |     k *= BIG_CONSTANT(0xc4ceb9fe1a85ec53);
56 |     k ^= k >> 33;
57 |     return (unsigned int)k;
58 | }
59 | 
60 | #endif /* hash_h */
61 | 


--------------------------------------------------------------------------------
/LightCTR/common/lock.h:
--------------------------------------------------------------------------------
 1 | //
 2 | //  lock.h
 3 | //  LightCTR
 4 | //
 5 | //  Created by SongKuangshi on 2017/12/5.
 6 | //  Copyright © 2017年 SongKuangshi. All rights reserved.
 7 | //
 8 | 
 9 | #ifndef lock_h
10 | #define lock_h
11 | 
12 | #include <thread>
13 | #include <mutex>
14 | #include <atomic>
15 | #include "assert.h"
16 | 
17 | #define CAS32(ptr, val_old, val_new)({ char ret; __asm__ __volatile__("lock; cmpxchgl %2,%0; setz %1": "+m"(*ptr), "=q"(ret): "r"(val_new),"a"(val_old): "memory"); ret;})
18 | 
19 | inline bool atomic_compare_and_swap(float* ptr, const float &oldval, const float &newval) {
20 |     return __sync_bool_compare_and_swap(reinterpret_cast<uint32_t*>(ptr),
21 |                                         *reinterpret_cast<const uint32_t*>(&oldval),
22 |                                         *reinterpret_cast<const uint32_t*>(&newval));
23 | };
24 | 
25 | 
26 | class SpinLock {
27 | public:
28 |     SpinLock() : flag_{false} {
29 |     }
30 |     
31 |     void lock() {
32 |         while (flag_.test_and_set(std::memory_order_acquire));
33 |     }
34 |     
35 |     void unlock() {
36 |         flag_.clear(std::memory_order_release);
37 |     }
38 | protected:
39 |     std::atomic_flag flag_;
40 | };
41 | 
42 | class RWLock {
43 | public:
44 |     RWLock() {
45 |         assert((pthread_rwlock_init(&lock_, NULL) == 0));
46 |     }
47 |     ~RWLock() {
48 |         assert((pthread_rwlock_destroy(&lock_) == 0));
49 |     }
50 |     void rlock() {
51 |         assert((pthread_rwlock_rdlock(&lock_) == 0));
52 |     }
53 |     void wlock() {
54 |         assert((pthread_rwlock_wrlock(&lock_) == 0));
55 |     }
56 |     void unlock() {
57 |         assert((pthread_rwlock_unlock(&lock_) == 0));
58 |     }
59 | private:
60 |     pthread_rwlock_t lock_;
61 | };
62 | 
63 | #endif /* lock_h */
64 | 


--------------------------------------------------------------------------------
/LightCTR/common/memory_pool.h:
--------------------------------------------------------------------------------
  1 | //
  2 | //  memory_pool.h
  3 | //  LightCTR
  4 | //
  5 | //  Created by SongKuangshi on 2019/5/26.
  6 | //  Copyright © 2019 SongKuangshi. All rights reserved.
  7 | //
  8 | 
  9 | #ifndef memory_pool_h
 10 | #define memory_pool_h
 11 | 
 12 | #include <list>
 13 | #include <memory>
 14 | #include "lock.h"
 15 | #include <stddef.h>
 16 | 
 17 | // Memory Pool for managing vector allocation and deallocation
 18 | // Meanwhile, it can monitored memory leak and wild pointer
 19 | class MemoryPool {
 20 | public:
 21 |     static MemoryPool& Instance() { // singleton
 22 |         static MemoryPool pool;
 23 |         return pool;
 24 |     }
 25 |     
 26 |     ~MemoryPool() {
 27 |         leak_checkpoint();
 28 |         std::unique_lock<std::mutex> f_lock(freePtr_lock);
 29 |         for (auto& pair : freePtr_list) {
 30 |             free(pair.second);
 31 |         }
 32 |     }
 33 |     
 34 |     inline void leak_checkpoint() {
 35 |         assert(allocPtr_list.empty()); // memory leaks
 36 |     }
 37 |     
 38 |     inline void* allocate(size_t size) {
 39 |         {
 40 |             freePtr_lock.lock();
 41 |             for (auto it = freePtr_list.begin(); it != freePtr_list.end(); it++) {
 42 |                 const size_t tmp_size = it->first;
 43 |                 if (tmp_size >= size && tmp_size <= (size * 3) >> 1) {
 44 |                     void* tmp_ptr = it->second;
 45 |                     freePtr_list.erase(it);
 46 |                     freePtr_lock.unlock();
 47 |                     
 48 |                     std::unique_lock<std::mutex> a_lock(allocPtr_lock);
 49 |                     allocPtr_list.push_back(std::make_pair(tmp_size, tmp_ptr));
 50 |                     return tmp_ptr;
 51 |                 }
 52 |             }
 53 |             freePtr_lock.unlock();
 54 |         }
 55 |         std::unique_lock<std::mutex> a_lock(allocPtr_lock);
 56 |         size = _alignedMemSize(size);
 57 |         void* tmp_ptr = malloc(size);
 58 |         assert(tmp_ptr); // out of memory
 59 |         allocPtr_list.push_back(std::make_pair(size, tmp_ptr));
 60 |         return tmp_ptr;
 61 |     }
 62 |     
 63 |     inline void deallocate(void* ptr) {
 64 |         assert(ptr);
 65 |         allocPtr_lock.lock();
 66 |         for (auto it = allocPtr_list.begin(); it != allocPtr_list.end(); it++) {
 67 |             if (it->second == ptr) {
 68 |                 const size_t tmp_size = it->first;
 69 |                 allocPtr_list.erase(it);
 70 |                 allocPtr_lock.unlock();
 71 |                 
 72 |                 std::unique_lock<std::mutex> f_lock(freePtr_lock);
 73 |                 freePtr_list.push_back(std::make_pair(tmp_size, ptr));
 74 |                 return;
 75 |             }
 76 |         }
 77 |         allocPtr_lock.unlock();
 78 |         assert(false); // wild pointer
 79 |     }
 80 |     
 81 | private:
 82 |     static const int MemAlignment = 16;
 83 |     inline size_t _alignedMemSize(size_t size) const {
 84 |         return (size + MemAlignment - 1) & -MemAlignment;
 85 |     }
 86 | 
 87 |     
 88 |     std::list<std::pair<size_t, void*> > freePtr_list, allocPtr_list;
 89 |     std::mutex freePtr_lock, allocPtr_lock;
 90 | };
 91 | 
 92 | 
 93 | template <typename T>
 94 | class ArrayAllocator {
 95 | public:
 96 |     typedef T           value_type;
 97 |     typedef T*          pointer;
 98 |     typedef const T*    const_pointer;
 99 |     typedef T&          reference;
100 |     typedef const T&    const_reference;
101 |     typedef size_t      size_type;
102 |     typedef ptrdiff_t   difference_type;
103 |     
104 |     template <typename U>
105 |     struct rebind {
106 |         typedef std::allocator<U> other;
107 |     };
108 |     
109 |     pointer allocate(size_type n, const void* hint=0) {
110 |         return (T*)MemoryPool::Instance().allocate((difference_type)n * sizeof(T));
111 |     }
112 |     
113 |     void deallocate(pointer p, size_type n) {
114 |         MemoryPool::Instance().deallocate(p);
115 |     }
116 |     
117 |     void destroy(pointer p) {
118 |         p->~T();
119 |     }
120 |     
121 |     pointer address(reference x) {
122 |         return (pointer)&x;
123 |     }
124 |     
125 |     const_pointer address(const_reference x) {
126 |         return (const_pointer)&x;
127 |     }
128 |     
129 |     size_type max_size() const {
130 |         return size_type(UINTMAX_MAX / sizeof(T));
131 |     }
132 | };
133 | 
134 | #endif /* memory_pool_h */
135 | 


--------------------------------------------------------------------------------
/LightCTR/common/message.h:
--------------------------------------------------------------------------------
  1 | //
  2 | //  message.h
  3 | //  LightCTR
  4 | //
  5 | //  Created by SongKuangshi on 2017/12/5.
  6 | //  Copyright © 2017年 SongKuangshi. All rights reserved.
  7 | //
  8 | 
  9 | #ifndef message_h
 10 | #define message_h
 11 | 
 12 | #include "../third/zeromq/include/zmq.h"
 13 | #include "assert.h"
 14 | #include "buffer.h"
 15 | #include <iostream>
 16 | #include <cstdio>
 17 | #include <cstring>
 18 | #include <cstdio>
 19 | 
 20 | enum MsgType {
 21 |     RESPONSE = 0,
 22 |     REQUEST_HANDSHAKE,
 23 |     REQUEST_ACK,
 24 |     REQUEST_FIN,
 25 |     REQUEST_PUSH,
 26 |     REQUEST_PULL,
 27 |     REQUEST_INFER,
 28 |     HEARTBEAT,
 29 |     BREAKER,
 30 |     RESERVED,
 31 |     UNKNOWN
 32 | };
 33 | 
 34 | class Package;
 35 | class Buffer;
 36 | class PackageDescript;
 37 | 
 38 | typedef std::function<void(std::shared_ptr<PackageDescript>)> response_callback_t;
 39 | typedef std::function<void(std::shared_ptr<PackageDescript>)> sync_barrier_callback_t;
 40 | 
 41 | class ZMQ_Message {
 42 | public:
 43 |     ZMQ_Message() {
 44 |         assert(0 == zmq_msg_init(&_zmg));
 45 |     }
 46 |     ZMQ_Message(char* buf, size_t size) {
 47 |         assert(0 == zmq_msg_init_size(&_zmg, size));
 48 |         memcpy((void *)buffer(), buf, size);
 49 |     }
 50 |     
 51 |     ZMQ_Message(const ZMQ_Message &) = delete;
 52 |     ZMQ_Message(const Buffer& buf) {
 53 |         assert(0 == zmq_msg_init_size(&_zmg, buf.size()));
 54 |         memcpy((void *)buffer(), buf.buffer(), buf.size());
 55 |     }
 56 |     
 57 |     ~ZMQ_Message() {
 58 |         assert(0 == zmq_msg_close(&_zmg));
 59 |     }
 60 |     
 61 |     ZMQ_Message &operator=(const ZMQ_Message &) = delete;
 62 |     ZMQ_Message &operator=(ZMQ_Message &&other) {
 63 |         if (this != &other) {
 64 |             assert(0 == zmq_msg_move(&_zmg, &other.zmg()));
 65 |         }
 66 |         return *this;
 67 |     }
 68 |     
 69 |     size_t size() {
 70 |         return zmq_msg_size(&_zmg);
 71 |     }
 72 |     
 73 |     const char* buffer() {
 74 |         return (char *)zmq_msg_data(&_zmg);
 75 |     }
 76 |     
 77 |     zmq_msg_t& zmg() {
 78 |         return _zmg;
 79 |     }
 80 |     
 81 | private:
 82 |     zmq_msg_t _zmg;
 83 | };
 84 | 
 85 | 
 86 | class PackageDescript {
 87 | public:
 88 |     // fill by handler
 89 |     MsgType msgType;
 90 |     size_t epoch_version;
 91 |     
 92 |     // fill when send
 93 |     size_t node_id;
 94 |     size_t message_id;
 95 |     
 96 |     response_callback_t callback;
 97 |     sync_barrier_callback_t sync_callback = NULL;
 98 |     
 99 |     Buffer content;
100 |     
101 |     time_t send_time; // record for timeout monitor
102 |     size_t to_node_id;
103 |     
104 |     ~PackageDescript() {
105 |         
106 |     }
107 |     explicit PackageDescript(MsgType _msgType, size_t _epoch_version = 0)
108 |         : msgType(_msgType), epoch_version(_epoch_version) {
109 |         message_id = 0;
110 |         send_time = 0;
111 |         node_id = to_node_id = -1;
112 |         if (msgType == REQUEST_PUSH) {
113 |             assert(epoch_version > 0);
114 |         }
115 |     }
116 |     PackageDescript &operator=(const PackageDescript &) = delete;
117 |     PackageDescript &operator=(PackageDescript&& other) {
118 |         if (this != &other) {
119 |             msgType = other.msgType;
120 |             epoch_version = other.epoch_version;
121 |             node_id = other.node_id;
122 |             message_id = other.message_id;
123 |             send_time = other.send_time;
124 |             to_node_id = other.to_node_id;
125 |             callback = std::move(other.callback);
126 |             sync_callback = std::move(other.sync_callback);
127 |             other.callback = NULL;
128 |             other.sync_callback = NULL;
129 |             content = std::move(other.content);
130 |         }
131 |         return *this;
132 |     }
133 |     PackageDescript(const PackageDescript& other) { // copy only by constructor
134 |         msgType = other.msgType;
135 |         epoch_version = other.epoch_version;
136 |         node_id = other.node_id;
137 |         message_id = other.message_id;
138 |         send_time = other.send_time;
139 |         to_node_id = other.to_node_id;
140 |         callback = other.callback;
141 |         sync_callback = other.sync_callback;
142 |         content = Buffer(other.content.buffer(), other.content.size());
143 |     }
144 |     PackageDescript(PackageDescript&& other) {
145 |         msgType = other.msgType;
146 |         epoch_version = other.epoch_version;
147 |         node_id = other.node_id;
148 |         message_id = other.message_id;
149 |         send_time = other.send_time;
150 |         to_node_id = other.to_node_id;
151 |         callback = std::move(other.callback);
152 |         sync_callback = std::move(other.sync_callback);
153 |         other.callback = NULL;
154 |         other.sync_callback = NULL;
155 |         content = std::move(other.content);
156 |     }
157 |     
158 |     bool operator==(const PackageDescript& other) const {
159 |         if (message_id == other.message_id) {
160 |             return true;
161 |         }
162 |         return false;
163 |     }
164 | };
165 | 
166 | const size_t _Head_size = sizeof(MsgType) + 3 * sizeof(size_t);
167 | 
168 | class Package {
169 | public:
170 |     Package() {
171 |     }
172 |     Package(const PackageDescript& pDesc) {
173 |         head = ZMQ_Message((char *)&pDesc, _Head_size);
174 |         content = ZMQ_Message(pDesc.content);
175 |     }
176 |     
177 |     void Descript(std::shared_ptr<PackageDescript>& pDesc) {
178 |         pDesc = std::make_shared<PackageDescript>(PackageDescript(UNKNOWN));
179 |         assert(pDesc);
180 |         assert(head.size() == _Head_size);
181 |         memcpy(pDesc.get(), head.buffer(), _Head_size);
182 |         pDesc->content = Buffer(content.buffer(), content.size());
183 |     }
184 |     
185 |     Package &operator=(const Package &) = delete;
186 |     Package(const Package &) = delete;
187 |     Package(Package &&other) {
188 |         head = std::move(other.head);
189 |         content = std::move(other.content);
190 |     }
191 |     
192 |     ZMQ_Message head;
193 |     ZMQ_Message content;
194 | };
195 | 
196 | #endif /* message_h */
197 | 


--------------------------------------------------------------------------------
/LightCTR/common/message_queue.h:
--------------------------------------------------------------------------------
  1 | //
  2 | //  message_queue.h
  3 | //  LightCTR
  4 | //
  5 | //  Created by SongKuangshi on 2017/12/14.
  6 | //  Copyright © 2017年 SongKuangshi. All rights reserved.
  7 | //
  8 | 
  9 | #ifndef message_queue_h
 10 | #define message_queue_h
 11 | 
 12 | #include <list>
 13 | #include <condition_variable>
 14 | #include "lock.h"
 15 | #include "time.h"
 16 | 
 17 | enum SendType {
 18 |     Immediately = 0,
 19 |     After,
 20 |     Period,
 21 |     Invalid
 22 | };
 23 | 
 24 | struct MessageEventWrapper {
 25 |     SendType send_type;
 26 |     time_t after_or_period_time_ms;
 27 |     time_t time_record;
 28 |     std::function<void(MessageEventWrapper&)> handler;
 29 |     
 30 |     MessageEventWrapper(SendType _send_type,
 31 |                         time_t _time,
 32 |                         std::function<void(MessageEventWrapper&)> _handler) :
 33 |                     send_type(_send_type), after_or_period_time_ms(_time), handler(_handler) {
 34 |         updateTime();
 35 |     }
 36 |     
 37 |     void updateTime() {
 38 |         update_tv();
 39 |         time_record = get_now_ms();
 40 |     }
 41 | };
 42 | 
 43 | template<typename T>
 44 | class MessageQueue {
 45 | public:
 46 |     MessageQueue() {
 47 |     }
 48 |     
 49 |     inline const T& front() {
 50 |         std::unique_lock<std::mutex> lk(mu_);
 51 |         cond_.wait(lk, [this]{
 52 |             return !queue_.empty();
 53 |         });
 54 |         return queue_.front();
 55 |     }
 56 |     
 57 |     inline void push(const T& new_value) {
 58 |         {
 59 |             std::unique_lock<std::mutex> lk(mu_);
 60 |             queue_.emplace_back(T(new_value)); // do copy
 61 |             element_cnt++;
 62 |         }
 63 |         cond_.notify_all();
 64 |     }
 65 |     
 66 |     inline void emplace(T&& new_value) {
 67 |         {
 68 |             std::unique_lock<std::mutex> lk(mu_);
 69 |             queue_.emplace_back(std::forward<T>(new_value));
 70 |             element_cnt++;
 71 |         }
 72 |         cond_.notify_all();
 73 |     }
 74 |     
 75 |     inline void pop() {
 76 |         std::unique_lock<std::mutex> lk(mu_);
 77 |         cond_.wait(lk, [this]{
 78 |             return !queue_.empty();
 79 |         });
 80 |         queue_.pop_front();
 81 |         element_cnt--;
 82 |     }
 83 |     
 84 |     inline bool pop_if(const T& compare, T* value) {
 85 |         std::unique_lock<std::mutex> lk(mu_);
 86 |         cond_.wait(lk, [this]{
 87 |             return !queue_.empty();
 88 |         });
 89 |         if (compare == queue_.front()) {
 90 |             *value = std::move(queue_.front());
 91 |             queue_.pop_front();
 92 |             element_cnt--;
 93 |             return 1;
 94 |         }
 95 |         return 0;
 96 |     }
 97 |     
 98 |     inline typename std::list<T>::iterator mutable_element(size_t index) {
 99 |         std::unique_lock<std::mutex> lk(mu_);
100 |         assert(index < element_cnt);
101 |         auto it = queue_.begin();
102 |         while (index--) {
103 |             it++;
104 |         }
105 |         return it;
106 |     }
107 |     
108 |     inline int modify(const T& value, T* addr) {
109 |         std::unique_lock<std::mutex> lk(mu_);
110 |         if (queue_.empty()) {
111 |             return 0;
112 |         }
113 |         auto it = find(queue_.begin(), queue_.end(), value);
114 |         if (it == queue_.end()) {
115 |             return -1;
116 |         }
117 |         addr = &(*it);
118 |         return 1;
119 |     }
120 |     
121 |     inline int erase(const T& value) {
122 |         std::unique_lock<std::mutex> lk(mu_);
123 |         if (queue_.empty()) {
124 |             return 0;
125 |         }
126 |         auto it = find(queue_.begin(), queue_.end(), value);
127 |         if (it == queue_.end()) {
128 |             return -1;
129 |         }
130 |         queue_.erase(it);
131 |         element_cnt--;
132 |         return 1;
133 |     }
134 |     
135 |     inline size_t size() {
136 |         std::unique_lock<std::mutex> lk(mu_);
137 |         return element_cnt;
138 |     }
139 |     
140 |     inline bool empty() {
141 |         std::unique_lock<std::mutex> lk(mu_);
142 |         return element_cnt == 0;
143 |     }
144 |     
145 | protected:
146 |     std::mutex mu_;
147 |     size_t element_cnt = 0;
148 |     std::list<T> queue_;
149 |     std::condition_variable cond_;
150 | };
151 | 
152 | class MessageQueueRunloop : public MessageQueue<MessageEventWrapper> {
153 | public:
154 |     MessageQueueRunloop() : runloop_thread(std::thread(&MessageQueueRunloop::runloop, this)){
155 |     }
156 |     
157 |     ~MessageQueueRunloop() {
158 |         breakflag = true;
159 |         wait_cond_.notify_all();
160 |         
161 |         runloop_thread.join();
162 |     }
163 |     
164 | private:
165 |     void runloop() {
166 |         for(;;) {
167 |             std::unique_lock<std::mutex> lk(mu_);
168 |             if (breakflag) {
169 |                 return;
170 |             }
171 |             // in this case MessageQueue can't be added, so No need copy the queue
172 |             
173 |             time_t wait_time = 10 * 1000;
174 |             
175 |             for (auto it = queue_.begin(); it != queue_.end(); it++) {
176 |                 if (it->send_type == SendType::Invalid) {
177 |                     queue_.erase(it);
178 |                     wait_time = 0;
179 |                     break;
180 |                 } else if (it->send_type == SendType::Immediately) {
181 |                     it->handler(*it);
182 |                     queue_.erase(it);
183 |                     wait_time = 0;
184 |                     break;
185 |                 } else if (it->send_type == SendType::After) {
186 |                     time_t cost = gettickspan(it->time_record);
187 |                     if (cost >= it->after_or_period_time_ms) {
188 |                         it->handler(*it);
189 |                         queue_.erase(it);
190 |                         wait_time = 0;
191 |                         break;
192 |                     } else {
193 |                         wait_time = std::min(wait_time, it->after_or_period_time_ms - cost);
194 |                     }
195 |                 } else if (it->send_type == SendType::Period) {
196 |                     time_t cost = gettickspan(it->time_record);
197 |                     if (cost >= it->after_or_period_time_ms) {
198 |                         it->handler(*it);
199 |                         it->updateTime();
200 |                         wait_time = 0;
201 |                         break;
202 |                     } else {
203 |                         wait_time = std::min(wait_time, it->after_or_period_time_ms - cost);
204 |                     }
205 |                 }
206 |             }
207 |             assert(wait_time >= 0);
208 |             if (wait_time > 0) {
209 |                 wait_cond_.wait_for(lk, std::chrono::milliseconds(wait_time));
210 |             }
211 |         }
212 |     }
213 | private:
214 |     std::thread runloop_thread;
215 |     bool breakflag{false};
216 |     std::condition_variable wait_cond_;
217 | };
218 | 
219 | #endif /* message_queue_h */
220 | 


--------------------------------------------------------------------------------
/LightCTR/common/persistent_buffer.h:
--------------------------------------------------------------------------------
 1 | //
 2 | //  persistent_buffer.h
 3 | //  LightCTR
 4 | //
 5 | //  Created by SongKuangshi on 2018/12/21.
 6 | //  Copyright © 2018 SongKuangshi. All rights reserved.
 7 | //
 8 | 
 9 | #ifndef persistent_buffer_h
10 | #define persistent_buffer_h
11 | 
12 | #include "buffer.h"
13 | #include "system.h"
14 | 
15 | #include <sys/types.h>
16 | #include <sys/mman.h>
17 | #include <sys/stat.h>
18 | #include <fcntl.h>
19 | #include <stdio.h>
20 | #ifdef __APPLE__
21 | #include <sys/uio.h>
22 | #else
23 | #include <sys/io.h>
24 | #endif
25 | 
26 | class PersistentBuffer  {
27 | public:
28 |     PersistentBuffer(const char* path, size_t size, bool alarm_when_exist) {
29 |         int flag = O_CREAT | O_RDWR;
30 |         if (alarm_when_exist) {
31 |             flag |= O_EXCL;
32 |         }
33 |         int _fd = open(path, flag, 0666);
34 |         if (_fd < 0) {
35 |             printf("open file errno = %d %s\n", errno, strerror(errno));
36 |         }
37 |         assert(_fd >= 0);
38 |         
39 |         _capacity = lseek(_fd, 0, SEEK_END);
40 |         if (_capacity < size) {
41 |             assert(ftruncate(_fd, size) == 0);
42 |             lseek(_fd, 0, SEEK_END);
43 |             _capacity = size;
44 |         }
45 |         assert(size <= _capacity);
46 |             
47 |         assert(close(_fd) == 0);
48 |         
49 |         _buffer = nullptr;
50 |         assert(mmapLoad(path, (void**)&_buffer, true));
51 |         
52 |         assert(_buffer);
53 |         memset(_buffer, 0, _capacity);
54 |         
55 |         _cursor = _end = _buffer;
56 |     }
57 |     
58 |     ~PersistentBuffer() {
59 |         if (_buffer) {
60 |             munmap(_buffer, _capacity);
61 |         }
62 |     }
63 |     
64 |     inline size_t size() const {
65 |         return _end - _buffer;
66 |     }
67 |     
68 |     template <typename T>
69 |     inline void write(T *x, size_t len) {
70 |         assert(size() + len <= _capacity); // check address sanitizer
71 |         memcpy(_end, x, len);
72 |         _end += len;
73 |     }
74 |     
75 |     template <typename T>
76 |     inline void read(T *x, size_t len = 0) {
77 |         if (len == 0) {
78 |             len = size(); // read all
79 |         }
80 |         memcpy(x, _cursor, len);
81 |         _cursor += len;
82 |         assert(_cursor <= _end);
83 |     }
84 |     
85 | private:
86 |     char *_buffer = nullptr;
87 |     char *_cursor = nullptr;
88 |     char *_end = nullptr;
89 |     size_t _capacity;
90 | };
91 | 
92 | #endif /* persistent_buffer_h */
93 | 


--------------------------------------------------------------------------------
/LightCTR/common/system.h:
--------------------------------------------------------------------------------
  1 | //
  2 | //  system.h
  3 | //  LightCTR
  4 | //
  5 | //  Created by SongKuangshi on 2017/11/3.
  6 | //  Copyright © 2017年 SongKuangshi. All rights reserved.
  7 | //
  8 | 
  9 | #ifndef system_h
 10 | #define system_h
 11 | 
 12 | #include <iostream>
 13 | #include <cstdio>
 14 | #include <fstream>
 15 | #include <vector>
 16 | #include <cstdlib>
 17 | #include <string.h>
 18 | #include <fcntl.h>
 19 | #include <sys/types.h>
 20 | #include <unistd.h>
 21 | #include <sys/mman.h>
 22 | #include <sys/shm.h>
 23 | 
 24 | #include "assert.h"
 25 | #include "lock.h"
 26 | 
 27 | #ifndef likely
 28 | #define likely(x)  __builtin_expect(!!(x), 1)
 29 | #endif
 30 | #ifndef unlikely
 31 | #define unlikely(x)  __builtin_expect(!!(x), 0)
 32 | #endif
 33 | 
 34 | inline int getEnv(const char *env_var, int defalt) {
 35 |     auto p = std::getenv(env_var);
 36 |     if (!p) {
 37 |         return defalt;
 38 |     }
 39 |     return atoi(p);
 40 | }
 41 | 
 42 | inline const char * getEnv(const char *env_var, const char *defalt) {
 43 |     auto p = std::getenv(env_var);
 44 |     if (!p) {
 45 |         return defalt;
 46 |     }
 47 |     return p;
 48 | }
 49 | 
 50 | template <class FUNC, class... ARGS>
 51 | auto ignore_signal_call(FUNC func, ARGS &&... args) ->
 52 | typename std::result_of<FUNC(ARGS...)>::type {
 53 |     for (;;) {
 54 |         auto err = func(args...);
 55 |         if (err < 0 && errno == EINTR) {
 56 |             puts("Ignored EINTR Signal, retry");
 57 |             continue;
 58 |         }
 59 |         return err;
 60 |     }
 61 | }
 62 | 
 63 | double SystemMemoryUsage() {
 64 |     FILE* fp = fopen("/proc/meminfo", "r");
 65 |     assert(fp);
 66 |     size_t bufsize = 256 * sizeof(char);
 67 |     char* buf = new (std::nothrow) char[bufsize];
 68 |     assert(buf);
 69 |     int totalMem = -1, freeMem = -1, bufMem = -1, cacheMem = -1;
 70 |     
 71 |     while (getline(&buf, &bufsize, fp) >= 0) {
 72 |         if (0 == strncmp(buf, "MemTotal", 8)) {
 73 |             if (1 != sscanf(buf, "%*s%d", &totalMem)) {
 74 |                 std::cout << "failed to get MemTotal from string: [" << buf << "]";
 75 |             }
 76 |         } else if (0 == strncmp(buf, "MemFree", 7)) {
 77 |             if (1 != sscanf(buf, "%*s%d", &freeMem)) {
 78 |                 std::cout << "failed to get MemFree from string: [" << buf << "]";
 79 |             }
 80 |         } else if (0 == strncmp(buf, "Buffers", 7)) {
 81 |             if (1 != sscanf(buf, "%*s%d", &bufMem)) {
 82 |                 std::cout << "failed to get Buffers from string: [" << buf << "]";
 83 |             }
 84 |         } else if (0 == strncmp(buf, "Cached", 6)) {
 85 |             if (1 != sscanf(buf, "%*s%d", &cacheMem)) {
 86 |                 std::cout << "failed to get Cached from string: [" << buf << "]";
 87 |             }
 88 |         }
 89 |         if (totalMem != -1 && freeMem != -1 && bufMem != -1 && cacheMem != -1) {
 90 |             break;
 91 |         }
 92 |     }
 93 |     assert(totalMem != -1 && freeMem != -1 && bufMem != -1 && cacheMem != -1);
 94 |     fclose(fp);
 95 |     delete[] buf;
 96 |     double usedMem = 1.0 - 1.0 * (freeMem + bufMem + cacheMem) / totalMem;
 97 |     return usedMem;
 98 | }
 99 | 
100 | bool mmapLoad(const char* filename, void** mmapPtr, bool writable) {
101 |     int flag = O_RDONLY;
102 |     if (writable)
103 |         flag = O_RDWR;
104 |     int _fd = open(filename, flag);
105 |     if (_fd == -1) {
106 |         printf("open file errno = %d %s\n", errno, strerror(errno));
107 |         return false;
108 |     }
109 |     off_t size = lseek(_fd, 0, SEEK_END);
110 |     
111 |     flag = PROT_READ;
112 |     if (writable)
113 |         flag |= PROT_WRITE;
114 | #ifdef MAP_POPULATE
115 |     *mmapPtr = mmap(
116 |                   0, size, flag, MAP_SHARED | MAP_POPULATE, _fd, 0);
117 | #else
118 |     *mmapPtr = mmap(
119 |                   0, size, flag, MAP_SHARED, _fd, 0);
120 | #endif
121 |     close(_fd);
122 |     if (*mmapPtr == MAP_FAILED) {
123 |         return false;
124 |     }
125 |     return true;
126 | }
127 | 
128 | char* getShmAddr(int key, size_t size, int flag = 0666|IPC_CREAT) {
129 |     assert(key != 0);
130 |     
131 |     int shmId = shmget(key, size, flag);
132 |     if (shmId < 0) {
133 |         // ipcs -m
134 |         // sysctl -w kern.sysv.shmmax to adjust shm max memory size
135 |         printf("%d %s\n", errno, strerror(errno));
136 |     }
137 |     assert(shmId >= 0);
138 |     
139 |     char* shmAddr = (char *)shmat(shmId, NULL, 0);
140 |     assert(shmAddr != (char *)-1);
141 |     
142 |     return shmAddr;
143 | }
144 | 
145 | #endif /* system_h */
146 | 


--------------------------------------------------------------------------------
/LightCTR/common/thread_pool.h:
--------------------------------------------------------------------------------
  1 | //
  2 | //  thread_pool.h
  3 | //  LightCTR
  4 | //
  5 | //  Created by SongKuangshi on 2017/9/23.
  6 | //  Copyright © 2017年 SongKuangshi. All rights reserved.
  7 | //
  8 | 
  9 | #ifndef thread_pool_h
 10 | #define thread_pool_h
 11 | 
 12 | #include <vector>
 13 | #include <queue>
 14 | #include <thread>
 15 | #include <mutex>
 16 | #include <condition_variable>
 17 | #include <future>
 18 | #include <functional>
 19 | #include <atomic>
 20 | #include "assert.h"
 21 | 
 22 | static std::atomic<bool> isSynchronized(true);
 23 | 
 24 | inline void setNotSynchronized() {
 25 |     isSynchronized.store(false, std::memory_order_release);
 26 | }
 27 | inline void synchronize() {
 28 |     if(isSynchronized.load(std::memory_order_acquire)) {
 29 |         return;
 30 |     }
 31 |     isSynchronized.store(true, std::memory_order_release);
 32 | }
 33 | 
 34 | class ThreadPool {
 35 | public:
 36 |     explicit ThreadPool(size_t);
 37 |     ThreadPool() = delete;
 38 |     ~ThreadPool();
 39 |     
 40 |     static ThreadPool& Instance() { // singleton
 41 |         static ThreadPool threadpool(std::thread::hardware_concurrency());
 42 |         return threadpool;
 43 |     }
 44 |     
 45 |     template<class F, class... Args>
 46 |     auto addTask(F&& f, Args&&... args) 
 47 |         -> std::future<typename std::result_of<F(Args...)>::type>;
 48 |     
 49 |     void wait();
 50 |     
 51 | private:
 52 |     void init();
 53 |     
 54 |     size_t threads;
 55 |     std::vector<std::thread> workers;
 56 |     std::queue<std::function<void()> > tasks;
 57 |     
 58 |     std::mutex queue_mutex;
 59 |     std::condition_variable condition;
 60 |     std::atomic<bool> stop{false};
 61 | };
 62 | 
 63 | inline ThreadPool::ThreadPool(size_t _threads): threads(_threads) {
 64 |     init();
 65 | }
 66 | 
 67 | inline void ThreadPool::init() {
 68 |     if (!workers.empty()) {
 69 |         return;
 70 |     }
 71 |     stop = false;
 72 |     for(size_t i = 0;i < threads; i++) {
 73 |         workers.emplace_back([this] {
 74 |             for(;;) {
 75 |                 std::function<void()> task;
 76 |                 {
 77 |                     std::unique_lock<std::mutex> lock(this->queue_mutex);
 78 |                     this->condition.wait(lock, [this] {
 79 |                         return this->stop || !this->tasks.empty();
 80 |                     });
 81 |                     if(this->stop && this->tasks.empty())
 82 |                         return;
 83 |                     task = std::move(this->tasks.front());
 84 |                     this->tasks.pop();
 85 |                 }
 86 |                 task();
 87 |             }
 88 |         });
 89 |     }
 90 | }
 91 | 
 92 | template<class F, class... Args>
 93 | auto ThreadPool::addTask(F&& f, Args&&... args)
 94 |     -> std::future<typename std::result_of<F(Args...)>::type> {
 95 |     if (workers.empty()) {
 96 |         init();
 97 |     }
 98 |     using return_type = typename std::result_of<F(Args...)>::type;
 99 | 
100 |     auto task = std::make_shared< std::packaged_task<return_type()> >(
101 |             std::bind(std::forward<F>(f), std::forward<Args>(args)...)
102 |         );
103 |         
104 |     std::future<return_type> ret = task->get_future();
105 |     {
106 |         std::unique_lock<std::mutex> lock(queue_mutex);
107 |         tasks.emplace([task](){
108 |             (*task)();
109 |         });
110 |     }
111 |     condition.notify_one();
112 |     return ret;
113 | }
114 | 
115 | inline void ThreadPool::wait() {
116 |     {
117 |         std::unique_lock<std::mutex> lock(queue_mutex);
118 |         stop = true;
119 |     }
120 |     condition.notify_all(); // notify to stop
121 |     for (auto &worker : workers) {
122 |         worker.join();
123 |     }
124 |     workers.clear();
125 | }
126 | 
127 | // destruct after join all threads
128 | inline ThreadPool::~ThreadPool() {
129 |     wait();
130 | }
131 | 
132 | template <class T>
133 | class ThreadLocal {
134 | public:
135 |     ThreadLocal() {
136 |         assert(pthread_key_create(&threadSpecificKey_, dataDestructor) == 0);
137 |     }
138 |     ~ThreadLocal() {
139 |         pthread_key_delete(threadSpecificKey_);
140 |     }
141 |     
142 |     // get thread local object
143 |     inline T* get(bool createLocal = true) {
144 |         T* p = (T*)pthread_getspecific(threadSpecificKey_);
145 |         if (!p && createLocal) {
146 |             p = new T();
147 |             assert(pthread_setspecific(threadSpecificKey_, p) == 0);
148 |         }
149 |         return p;
150 |     }
151 |     
152 |     // overwrite threadlocal object and destructed last one
153 |     inline void set(T* p) {
154 |         if (T* q = get(false)) {
155 |             dataDestructor(q);
156 |         }
157 |         assert(pthread_setspecific(threadSpecificKey_, p) == 0);
158 |     }
159 |     
160 |     T& operator*() { return *get(); }
161 |     
162 |     operator T*() {
163 |         return get();
164 |     }
165 |     
166 | private:
167 |     static void dataDestructor(void* p) {
168 |         delete (T*)p;
169 |     }
170 |     pthread_key_t threadSpecificKey_;
171 | };
172 | 
173 | #endif /* thread_pool_h */
174 | 


--------------------------------------------------------------------------------
/LightCTR/common/time.h:
--------------------------------------------------------------------------------
  1 | //
  2 | //  time.h
  3 | //  LightCTR
  4 | //
  5 | //  Created by SongKuangshi on 2017/11/3.
  6 | //  Copyright © 2017年 SongKuangshi. All rights reserved.
  7 | //
  8 | 
  9 | #ifndef time_h
 10 | #define time_h
 11 | 
 12 | #include <stdint.h>
 13 | #include <sys/time.h>
 14 | 
 15 | #ifdef _WIN32
 16 | #include <windows.h>
 17 | #elif __APPLE__
 18 | #include <mach/mach_time.h>
 19 | #endif
 20 | 
 21 | #define __must_inline__ __attribute__((always_inline))
 22 | 
 23 | typedef uint64_t Cycle;
 24 | typedef double Second;
 25 | 
 26 | struct timeval __g_now_tv;
 27 | Cycle beginning_, ending_;
 28 | Second beginning_seconds_, ending_seconds_;
 29 | bool running_;
 30 | 
 31 | inline void __must_inline__ update_tv() {
 32 |     gettimeofday(&__g_now_tv, NULL);
 33 | }
 34 | 
 35 | inline int64_t __must_inline__ get_now_ms() {
 36 |     return (int64_t)__g_now_tv.tv_sec * 1000 + __g_now_tv.tv_usec / 1000;
 37 | }
 38 | 
 39 | inline time_t __must_inline__ get_now_s(void) {
 40 |     return __g_now_tv.tv_sec;
 41 | }
 42 | 
 43 | inline time_t __must_inline__ gettickspan(uint64_t old_tick = get_now_ms()) {
 44 |     update_tv();
 45 |     uint64_t cur_tick = get_now_ms();
 46 |     if (old_tick > cur_tick) {
 47 |         return 0;
 48 |     }
 49 |     return cur_tick - old_tick;
 50 | }
 51 | 
 52 | inline uint64_t timestamp() {
 53 |     
 54 | #ifdef _WIN32
 55 |     uint64_t cycles = 0;
 56 |     uint64_t frequency = 0;
 57 |     
 58 |     QueryPerformanceFrequency((LARGE_INTEGER*) &frequency);
 59 |     QueryPerformanceCounter((LARGE_INTEGER*) &cycles);
 60 |     
 61 |     return cycles / frequency;
 62 | #elif __APPLE__
 63 |     uint64_t absolute_time = mach_absolute_time();
 64 |     mach_timebase_info_data_t info = {0,0};
 65 |     
 66 |     if (info.denom == 0) mach_timebase_info(&info);
 67 |     uint64_t elapsednano = absolute_time * (info.numer / info.denom);
 68 |     
 69 |     timespec spec;
 70 |     spec.tv_sec  = elapsednano * 1e-9;
 71 |     spec.tv_nsec = elapsednano - (spec.tv_sec * 1e9);
 72 |     
 73 |     return spec.tv_nsec + (uint64_t)spec.tv_sec * 1e9;
 74 | #else
 75 |     timespec spec;
 76 |     clock_gettime(CLOCK_REALTIME, &spec);
 77 |     return spec.tv_nsec + (uint64_t)spec.tv_sec * 1e9;
 78 | #endif
 79 | }
 80 | 
 81 | inline void clock_start() {
 82 |     beginning_ = timestamp();
 83 |     beginning_seconds_ = (beginning_ + 0.0) * 1.0e-9;
 84 |     running_ = true;
 85 | }
 86 | 
 87 | inline void clock_stop() {
 88 |     ending_ = timestamp();
 89 |     ending_seconds_ = (ending_ + 0.0) * 1.0e-9;
 90 |     running_ = false;
 91 | }
 92 | 
 93 | inline Cycle clock_cycles() {
 94 |     if(running_) {
 95 |         return (timestamp() - beginning_);
 96 |     } else {
 97 |         return (ending_ - beginning_);
 98 |     }
 99 | }
100 | 
101 | #endif /* time_h */
102 | 


--------------------------------------------------------------------------------
/LightCTR/dag/aggregate_node.h:
--------------------------------------------------------------------------------
 1 | //
 2 | //  aggregate_node.h
 3 | //  LightCTR
 4 | //
 5 | //  Created by SongKuangshi on 2019/5/19.
 6 | //  Copyright © 2019 SongKuangshi. All rights reserved.
 7 | //
 8 | 
 9 | #ifndef aggregate_node_h
10 | #define aggregate_node_h
11 | 
12 | #include <vector>
13 | #include "node_abst.h"
14 | 
15 | // Aggregate or Scatter Flow
16 | class AggregateNode : public Autograd_Node_Abst {
17 | public:
18 |     AggregateNode() = delete;
19 |     AggregateNode(size_t _in_cnt, size_t _out_cnt = 1) : Autograd_Node_Abst(_in_cnt, _out_cnt) {
20 |         assert(_in_cnt > 0 && _out_cnt > 0);
21 |     }
22 |     
23 | protected:
24 |     virtual void forward_compute(const std::vector<DAG_Output>& in_outputs) = 0;
25 |     
26 |     virtual void backward_compute(const std::vector<DAG_Output>& out_deltas) = 0;
27 | };
28 | 
29 | #endif /* aggregate_node_h */
30 | 


--------------------------------------------------------------------------------
/LightCTR/dag/dag_pipeline.h:
--------------------------------------------------------------------------------
 1 | //
 2 | //  dag_pipeline.h
 3 | //  LightCTR
 4 | //
 5 | //  Created by SongKuangshi on 2019/5/5.
 6 | //  Copyright © 2019 SongKuangshi. All rights reserved.
 7 | //
 8 | 
 9 | #ifndef dag_pipeline_h
10 | #define dag_pipeline_h
11 | 
12 | #include "aggregate_node.h"
13 | #include "source_node.h"
14 | #include "terminus_node.h"
15 | 
16 | #include "operator/add_op.h"
17 | #include "operator/multiply_op.h"
18 | #include "operator/matmul_op.h"
19 | #include "operator/activations_op.h"
20 | #include "operator/loss_op.h"
21 | 
22 | // build up pipelines of computation
23 | // or directed acyclic graphs (DAGs) of computation
24 | 
25 | class DAG_Pipeline {
26 | public:
27 |     
28 |     static void addDirectedFlow(std::shared_ptr<Autograd_Node_Abst> source_ptr,
29 |                          std::shared_ptr<Autograd_Node_Abst> terminus_ptr) {
30 |         terminus_ptr->regist_in_node(source_ptr);
31 |     }
32 |     
33 |     static void addAutogradFlow(std::shared_ptr<Autograd_Node_Abst> source_ptr,
34 |                                 std::shared_ptr<Autograd_Node_Abst> terminus_ptr) {
35 |         terminus_ptr->regist_in_node(source_ptr);
36 |         source_ptr->regist_out_node(terminus_ptr);
37 |     }
38 |     
39 | };
40 | 
41 | 
42 | #endif /* dag_pipeline_h */
43 | 


--------------------------------------------------------------------------------
/LightCTR/dag/operator/activations_op.h:
--------------------------------------------------------------------------------
 1 | //
 2 | //  activations_op.h
 3 | //  LightCTR
 4 | //
 5 | //  Created by SongKuangshi on 2019/5/23.
 6 | //  Copyright © 2019 SongKuangshi. All rights reserved.
 7 | //
 8 | 
 9 | #ifndef activations_op_h
10 | #define activations_op_h
11 | 
12 | #include "../../util/activations.h"
13 | #include "string.h"
14 | 
15 | template <typename ActivationFunction>
16 | class ActivationsOp : public AggregateNode {
17 | public:
18 |     ActivationsOp() = delete;
19 |     ActivationsOp(size_t _out_cnt) : AggregateNode(1, _out_cnt) {
20 |     }
21 |     
22 | protected:
23 |     void forward_compute(const std::vector<DAG_Output>& in_outputs) {
24 |         assert(in_outputs[0].data);
25 |         const size_t len = in_outputs[0].data->size();
26 |         if (node_output.data == nullptr) {
27 |             node_output.data = std::make_shared<std::vector<float> >(len);
28 |         }
29 |         std::memcpy(node_output.data->data(), in_outputs[0].data->data(), len * sizeof(float));
30 |         activFun.forward(node_output.data->data(), len);
31 |     }
32 |     
33 |     void backward_compute(const std::vector<DAG_Output>& out_deltas) {
34 |         const size_t len = out_deltas[0].data->size();
35 |         if (node_delta.data == nullptr) {
36 |             node_delta.data = std::make_shared<std::vector<float> >(len);
37 |         }
38 |         std::memcpy(node_delta.data->data(), out_deltas[0].data->data(), len * sizeof(float));
39 |         
40 |         activFun.backward(node_delta.data->data(), node_output.data->data(),
41 |                           node_delta.data->data(), len);
42 |     }
43 |     
44 | private:
45 |     ActivationFunction activFun;
46 | };
47 | 
48 | #endif /* activations_op_h */
49 | 


--------------------------------------------------------------------------------
/LightCTR/dag/operator/add_op.h:
--------------------------------------------------------------------------------
 1 | //
 2 | //  add_op.h
 3 | //  LightCTR
 4 | //
 5 | //  Created by SongKuangshi on 2019/5/20.
 6 | //  Copyright © 2019 SongKuangshi. All rights reserved.
 7 | //
 8 | 
 9 | #ifndef add_op_h
10 | #define add_op_h
11 | 
12 | #include "../aggregate_node.h"
13 | #include "string.h"
14 | #include "../../common/avx.h"
15 | 
16 | class AddOp : public AggregateNode {
17 | public:
18 |     AddOp() = delete;
19 |     AddOp(size_t _in_cnt, size_t _out_cnt = 1) : AggregateNode(_in_cnt, _out_cnt) {
20 |         assert(_in_cnt > 0 && _out_cnt > 0);
21 |     }
22 |     
23 | protected:
24 |     void forward_compute(const std::vector<DAG_Output>& in_outputs) {
25 |         const size_t len = in_outputs[0].data->size();
26 |         assert(len == in_outputs[1].data->size());
27 |         if (node_output.data == nullptr) {
28 |             node_output.data = std::make_shared<std::vector<float> >(len);
29 |         }
30 |         
31 |         std::memset(node_output.data->data(), 0, len * sizeof(float));
32 |         for(auto& in_output : in_outputs) {
33 |             avx_vecAdd(node_output.data->data(), in_output.data->data(),
34 |                        node_output.data->data(), len);
35 |         }
36 |     }
37 |     
38 |     void backward_compute(const std::vector<DAG_Output>& out_deltas) {
39 |         const size_t len = out_deltas[0].data->size();
40 |         if (node_delta.data == nullptr) {
41 |             node_delta.data = std::make_shared<std::vector<float> >(len);
42 |         }
43 |         
44 |         std::memset(node_delta.data->data(), 0, len * sizeof(float));
45 |         for(auto& out_delta : out_deltas) {
46 |             avx_vecAdd(node_delta.data->data(), out_delta.data->data(),
47 |                        node_delta.data->data(), len);
48 |         }
49 |     }
50 | };
51 | 
52 | #endif /* add_op_h */
53 | 


--------------------------------------------------------------------------------
/LightCTR/dag/operator/loss_op.h:
--------------------------------------------------------------------------------
 1 | //
 2 | //  loss_op.h
 3 | //  LightCTR
 4 | //
 5 | //  Created by SongKuangshi on 2019/5/23.
 6 | //  Copyright © 2019 SongKuangshi. All rights reserved.
 7 | //
 8 | 
 9 | #ifndef loss_op_h
10 | #define loss_op_h
11 | 
12 | #include "../terminus_node.h"
13 | 
14 | template <typename LossFunction>
15 | class LossOp : public TerminusNode {
16 | public:
17 |     LossOp() : TerminusNode(1) {
18 |     }
19 |     
20 |     float getLoss() const {
21 |         return _loss;
22 |     }
23 |     
24 |     void setLable(std::shared_ptr<std::vector<int> > label) {
25 |         _label = label;
26 |     }
27 |     
28 | protected:
29 |     void forward_compute(const std::vector<DAG_Output>& in_outputs) {
30 |         // compute delta via loss function
31 |         assert(_label && in_outputs.size() == 1);
32 |         const size_t len = in_outputs[0].data->size();
33 |         if (node_output.data == nullptr) {
34 |             node_output.data = std::make_shared<std::vector<float> >(len);
35 |         }
36 |         std::memcpy(node_output.data->data(), in_outputs[0].data->data(), len * sizeof(float));
37 |         _loss = lossFun.loss(in_outputs[0].data->data(), _label->data(), len);
38 |     }
39 |     
40 |     void backward_compute(const std::vector<DAG_Output>& out_deltas) {
41 |         // back propagate delta
42 |         assert(_label);
43 |         const size_t len = node_output.data->size();
44 |         assert(_label->size() == len);
45 |         if (node_delta.data == nullptr) {
46 |             node_delta.data = std::make_shared<std::vector<float> >(len);
47 |         }
48 |         lossFun.gradient(node_output.data->data(), _label->data(),
49 |                          node_delta.data->data(), len);
50 |     }
51 |     
52 | private:
53 |     float _loss;
54 |     std::shared_ptr<std::vector<int> > _label;
55 |     LossFunction lossFun;
56 | };
57 | 
58 | #endif /* loss_op_h */
59 | 


--------------------------------------------------------------------------------
/LightCTR/dag/operator/matmul_op.h:
--------------------------------------------------------------------------------
 1 | //
 2 | //  matmul_op.h
 3 | //  LightCTR
 4 | //
 5 | //  Created by SongKuangshi on 2019/5/24.
 6 | //  Copyright © 2019 SongKuangshi. All rights reserved.
 7 | //
 8 | 
 9 | #ifndef matmul_op_h
10 | #define matmul_op_h
11 | 
12 | #include "../../common/avx.h"
13 | 
14 | class MatmulOp : public AggregateNode {
15 | public:
16 |     MatmulOp() = delete;
17 |     MatmulOp(size_t _out_cnt) : AggregateNode(2, _out_cnt) {
18 |         assert(_out_cnt > 0);
19 |     }
20 |     
21 | protected:
22 |     void forward_compute(const std::vector<DAG_Output>& in_outputs) {
23 |         assert(in_outputs.size() == 2);
24 |         if (node_output.data == nullptr) {
25 |             node_output.data = std::make_shared<std::vector<float> >(1);
26 |         }
27 |         compute_records.push_back(in_outputs[0]);
28 |         compute_records.push_back(in_outputs[1]);
29 |         node_output.data->at(0) = avx_dotProduct(in_outputs[0].data->data(),
30 |                                                  in_outputs[1].data->data(),
31 |                                                  in_outputs[0].data->size());
32 |     }
33 |     
34 |     void backward_compute(const std::vector<DAG_Output>& out_deltas) {
35 |         float cur_delta = 0;
36 |         for(auto& out_delta : out_deltas) {
37 |             cur_delta += out_delta.data->at(0);
38 |         }
39 |         
40 |         assert(compute_records.size() == 2);
41 |         const size_t len = compute_records[0].data->size();
42 |         if (node_delta.data == nullptr) {
43 |             node_delta.data = std::make_shared<std::vector<float> >(len);
44 |         }
45 |         
46 |         auto& order_promises = get_in_complete_promises();
47 |         assert(order_promises.size() == 1);
48 |         
49 |         size_t index = 0;
50 |         if (compute_records[1].node_id == get_first_target_id()) {
51 |             index = 1;
52 |         }
53 |         
54 |         avx_vecScale(compute_records[index].data->data(),
55 |                      node_delta.data->data(),
56 |                      len, cur_delta);
57 |         order_promises[0].set_value(node_delta);
58 |         
59 |         // Notice to remove targeted promise for repeating set promise value
60 |         // otherwise, it will be "terminating with uncaught exception of type
61 |         // std::__1::future_error: The state of the promise has already been set"
62 |         order_promises.clear();
63 |         
64 |         avx_vecScale(compute_records[1 - index].data->data(),
65 |                      node_delta.data->data(),
66 |                      len, cur_delta);
67 |     }
68 | };
69 | 
70 | #endif /* matmul_op_h */
71 | 


--------------------------------------------------------------------------------
/LightCTR/dag/operator/multiply_op.h:
--------------------------------------------------------------------------------
 1 | //
 2 | //  multiply_op.h
 3 | //  LightCTR
 4 | //
 5 | //  Created by SongKuangshi on 2019/5/20.
 6 | //  Copyright © 2019 SongKuangshi. All rights reserved.
 7 | //
 8 | 
 9 | #ifndef multiply_op_h
10 | #define multiply_op_h
11 | 
12 | #include "string.h"
13 | #include "../../common/avx.h"
14 | 
15 | class MultiplyOp : public AggregateNode {
16 | public:
17 |     MultiplyOp() = delete;
18 |     MultiplyOp(size_t _in_cnt, size_t _out_cnt = 1) : AggregateNode(_in_cnt, _out_cnt) {
19 |         assert(_in_cnt > 0 && _out_cnt > 0);
20 |     }
21 |     
22 | protected:
23 |     void forward_compute(const std::vector<DAG_Output>& in_outputs) {
24 |         const size_t len = in_outputs[0].data->size();
25 |         if (node_output.data == nullptr) {
26 |             node_output.data = std::make_shared<std::vector<float> >(len);
27 |         }
28 |         std::memcpy(node_output.data->data(), in_outputs[0].data->data(), len * sizeof(float));
29 |         for(size_t i = 1; i < in_outputs.size(); i++) {
30 |             compute_records.push_back(in_outputs[i]);
31 |             avx_vecScale(node_output.data->data(), node_output.data->data(),
32 |                          len, in_outputs[i].data->data());
33 |         }
34 |     }
35 |     
36 |     void backward_compute(const std::vector<DAG_Output>& out_deltas) {
37 |         const size_t len = out_deltas[0].data->size();
38 |         std::vector<float> delta_arr(len, 0);
39 |         
40 |         for(auto& out_delta : out_deltas) {
41 |             avx_vecAdd(out_delta.data->data(), delta_arr.data(),
42 |                        delta_arr.data(), len);
43 |         }
44 |         avx_vecScale(delta_arr.data(), node_delta.data->data(),
45 |                      len, node_output.data->data());
46 |         
47 |         auto& order_ids = get_in_promises_ids();
48 |         auto& order_promises = get_in_complete_promises();
49 |         
50 |         for (size_t i = 0; i < order_ids.size(); i++) {
51 |             const size_t target_id = order_ids[i];
52 |             for (auto& record : compute_records) {
53 |                 if (record.node_id == target_id) {
54 |                     avx_vecDiv(node_delta.data->data(), record.data->data(),
55 |                                node_delta.data->data(), len);
56 |                     order_promises[i].set_value(node_delta);
57 |                     break;
58 |                 }
59 |             }
60 |         }
61 |         for (auto& record : compute_records) {
62 |             if (record.node_id == get_first_target_id()) {
63 |                 avx_vecDiv(node_delta.data->data(), record.data->data(),
64 |                            node_delta.data->data(), len);
65 |                 return;
66 |             }
67 |         }
68 |         assert(false);
69 |     }
70 | };
71 | 
72 | #endif /* multiply_op_h */
73 | 


--------------------------------------------------------------------------------
/LightCTR/dag/source_node.h:
--------------------------------------------------------------------------------
 1 | //
 2 | //  source_node.h
 3 | //  LightCTR
 4 | //
 5 | //  Created by SongKuangshi on 2019/5/19.
 6 | //  Copyright © 2019 SongKuangshi. All rights reserved.
 7 | //
 8 | 
 9 | #ifndef source_node_h
10 | #define source_node_h
11 | 
12 | #include <vector>
13 | #include "string.h"
14 | #include "node_abst.h"
15 | #include "../common/avx.h"
16 | 
17 | class SourceNode : public Autograd_Node_Abst {
18 | public:
19 |     SourceNode() = delete;
20 |     explicit SourceNode(size_t _out_cnt) : Autograd_Node_Abst(0, _out_cnt) {
21 |         assert(_out_cnt > 0);
22 |     }
23 |     
24 |     DAG_Output runFlow(bool keep_intermediate = false) {
25 |         init_backward_Flow(keep_intermediate);
26 |         return backward_run().get();
27 |     }
28 |     
29 |     void setValue(std::shared_ptr<std::vector<float> > data) {
30 |         node_output.data = data;
31 |     }
32 | 
33 | protected:
34 |     virtual void forward_compute(const std::vector<DAG_Output>&) {
35 |         // provide value of data source
36 |     }
37 |     
38 |     virtual void backward_compute(const std::vector<DAG_Output>& out_deltas) {
39 |         // apply delta as gradient on the value
40 |     }
41 | };
42 | 
43 | 
44 | template <typename UpdaterFunc>
45 | class TrainableNode : public SourceNode {
46 | public:
47 |     TrainableNode() = delete;
48 |     explicit TrainableNode(size_t _out_cnt) : SourceNode(_out_cnt) {
49 |         assert(_out_cnt > 0);
50 |     }
51 |     
52 |     void setValue(std::shared_ptr<std::vector<float> > data) {
53 |         node_output.data = data;
54 |         updater.learnable_params_cnt(data->size());
55 |     }
56 |     
57 | protected:
58 |     void forward_compute(const std::vector<DAG_Output>&) {
59 |         // provide value of data source
60 |         assert(node_output.data);
61 |     }
62 |     
63 |     void backward_compute(const std::vector<DAG_Output>& out_deltas) {
64 |         // apply delta as gradient on the value
65 |         const size_t len = out_deltas[0].data->size();
66 |         assert(len == node_output.data->size());
67 |         
68 |         if (node_delta.data == nullptr) {
69 |             node_delta.data = std::make_shared<std::vector<float> >(len);
70 |         }
71 |         std::memset(node_delta.data->data(), 0, len * sizeof(float));
72 |         for(auto& out_delta : out_deltas) {
73 |             avx_vecAdd(node_delta.data->data(), out_delta.data->data(),
74 |                        node_delta.data->data(), len);
75 |         }
76 |         updater.update(0, len, node_output.data->data(), node_delta.data->data());
77 |     }
78 | private:
79 |     UpdaterFunc updater;
80 | };
81 | 
82 | #endif /* source_node_h */
83 | 


--------------------------------------------------------------------------------
/LightCTR/dag/terminus_node.h:
--------------------------------------------------------------------------------
 1 | //
 2 | //  terminus_node.h
 3 | //  LightCTR
 4 | //
 5 | //  Created by SongKuangshi on 2019/5/19.
 6 | //  Copyright © 2019 SongKuangshi. All rights reserved.
 7 | //
 8 | 
 9 | #ifndef terminus_node_h
10 | #define terminus_node_h
11 | 
12 | #include <vector>
13 | #include "node_abst.h"
14 | #include "../common/avx.h"
15 | 
16 | class TerminusNode : public Autograd_Node_Abst {
17 | public:
18 |     TerminusNode() = delete;
19 |     explicit TerminusNode(size_t _in_cnt) : Autograd_Node_Abst(_in_cnt, 0) {
20 |         assert(_in_cnt > 0);
21 |     }
22 |     
23 |     DAG_Output runFlow(bool keep_intermediate = false) {
24 |         init_forward_Flow(keep_intermediate);
25 |         return forward_run().get();
26 |     }
27 |     
28 | protected:
29 |     void forward_compute(const std::vector<DAG_Output>& in_outputs) {
30 |         // compute delta via loss function
31 |     }
32 |     
33 |     void backward_compute(const std::vector<DAG_Output>&) {
34 |         // back propagate delta 
35 |     }
36 | };
37 | 
38 | #endif /* terminus_node_h */
39 | 


--------------------------------------------------------------------------------
/LightCTR/distribut/consistent_hash.h:
--------------------------------------------------------------------------------
 1 | //
 2 | //  consistent_hash.h
 3 | //  LightCTR
 4 | //
 5 | //  Created by SongKuangshi on 2017/12/6.
 6 | //  Copyright © 2017年 SongKuangshi. All rights reserved.
 7 | //
 8 | 
 9 | #ifndef consistent_hash_h
10 | #define consistent_hash_h
11 | 
12 | #include "../common/hash.h"
13 | #include <cstring>
14 | #include <sstream>
15 | #include <map>
16 | 
17 | // Make data shardings ditributed in PS clusters by DHT
18 | class ConsistentHash {
19 | public:
20 |     static ConsistentHash& Instance() { // singleton
21 |         static std::once_flag once;
22 |         static ConsistentHash consist;
23 |         std::call_once(once, [] {
24 |             assert(__global_cluster_ps_cnt > 0);
25 |             consist.init(__global_cluster_ps_cnt);
26 |         });
27 |         return consist;
28 |     }
29 |     
30 |     template <typename TKey>
31 |     inline uint32_t getNode(TKey key) {
32 |         uint32_t partition = murMurHash(key);
33 |         std::map<uint32_t, uint32_t>::iterator it =
34 |             server_nodes.lower_bound(partition);
35 |         
36 |         if(it == server_nodes.end()) {
37 |             return server_nodes.begin()->second;
38 |         }
39 |         return it->second;
40 |     }
41 |     
42 | private:
43 |     ConsistentHash() {
44 |         
45 |     }
46 |     ConsistentHash(const ConsistentHash&) = delete;
47 |     ConsistentHash(ConsistentHash&&) = delete;
48 |     ConsistentHash &operator=(const ConsistentHash &) = delete;
49 |     ConsistentHash &operator=(ConsistentHash &&) = delete;
50 |     
51 |     void init(uint32_t _node_cnt) {
52 |         node_cnt = _node_cnt;
53 |         for (uint32_t i = 0; i < node_cnt; i++) {
54 |             for (uint32_t j = 0; j < virtual_node_cnt; j++) {
55 |                 std::stringstream node_key;
56 |                 node_key << i << "-" << j;
57 |                 uint32_t partition = murMurHash(node_key.str());
58 |                 server_nodes[partition] = i;
59 |             }
60 |         }
61 |     }
62 |     
63 |     uint32_t node_cnt;
64 |     const uint32_t virtual_node_cnt{5}; // num of Replicas
65 |     
66 |     std::map<uint32_t, uint32_t> server_nodes;
67 | };
68 | 
69 | #endif /* consistent_hash_h */
70 | 


--------------------------------------------------------------------------------
/LightCTR/distribut/dist_machine_abst.h:
--------------------------------------------------------------------------------
  1 | //
  2 | //  dist_machine_abst.h
  3 | //  LightCTR
  4 | //
  5 | //  Created by SongKuangshi on 2017/12/5.
  6 | //  Copyright © 2017 SongKuangshi. All rights reserved.
  7 | //
  8 | 
  9 | #ifndef dist_machine_abst_h
 10 | #define dist_machine_abst_h
 11 | 
 12 | #include "../common/network.h"
 13 | #include "../common/barrier.h"
 14 | #include "../common/lock.h"
 15 | 
 16 | enum Run_Mode {
 17 |     PS_Mode = 0,
 18 |     Ring_Mode
 19 | };
 20 | 
 21 | class Dist_Machine_Abst {
 22 | public:
 23 |     Dist_Machine_Abst() : gDelivery(Delivery::Instance()) {
 24 |         gDelivery.set_node_id(BEGIN_ID_OF_WORKER);
 25 |         regist_curNode_toMaster();
 26 |         regist_master_ack_handler();
 27 |         
 28 |         serving_barrier.block();
 29 |         status_serving = true;
 30 |     }
 31 |     
 32 |     virtual ~Dist_Machine_Abst() {
 33 |         shutdown(NULL);
 34 |     }
 35 |     
 36 |     virtual inline size_t Rank() const { // Worker Rank begin from 1
 37 |         assert(status_serving);
 38 |         return gDelivery.node_id() - BEGIN_ID_OF_WORKER;
 39 |     }
 40 |     
 41 |     virtual inline bool status() const {
 42 |         return status_serving;
 43 |     }
 44 |     
 45 |     virtual inline void shutdown(std::function<void()> terminate_callback) {
 46 |         if (!status_serving) {
 47 |             return;
 48 |         }
 49 |         send_FIN_toMaster(terminate_callback);
 50 |     }
 51 |     
 52 | private:
 53 |     void regist_curNode_toMaster() {
 54 |         PackageDescript desc(REQUEST_HANDSHAKE);
 55 |         const Addr& local_addr = gDelivery.local_addr();
 56 |         std::string local_addr_str = local_addr.toString();
 57 |         desc.content.append(local_addr_str.c_str(), local_addr_str.length());
 58 |         
 59 |         desc.callback = [this](std::shared_ptr<PackageDescript> resp_package) {
 60 |             size_t node_id;
 61 |             resp_package->content >> node_id;
 62 |             printf("[Worker] Complete Register cur_node_id = %zu\n", node_id);
 63 |             gDelivery.set_node_id(node_id);
 64 |             assert(gDelivery.node_id() >= BEGIN_ID_OF_WORKER);
 65 |             serving_barrier.unblock();
 66 |         };
 67 |         gDelivery.send_async(desc, 0);
 68 |     }
 69 |     
 70 |     void regist_master_ack_handler() {
 71 |         request_handler_t ack_handler = [this](
 72 |                                                std::shared_ptr<PackageDescript> request,
 73 |                                                PackageDescript& response) {
 74 | #ifdef WORKER_RING
 75 |             size_t ps_id = BEGIN_ID_OF_WORKER + 1;
 76 | #else
 77 |             size_t ps_id = BEGIN_ID_OF_PS;
 78 | #endif
 79 |             while (!request->content.readEOF()) { // read keys needed by worker
 80 |                 Addr ps_addr(request->content);
 81 |                 printf("[Worker] Add ps_id = %zu router\n", ps_id);
 82 |                 gDelivery.regist_router(ps_id++, std::move(ps_addr));
 83 |             }
 84 |             serving_barrier.unblock();
 85 |         };
 86 |         gDelivery.regist_handler(REQUEST_ACK, std::move(ack_handler));
 87 |     }
 88 |     
 89 |     void send_FIN_toMaster(std::function<void()> terminate_callback) {
 90 |         PackageDescript desc(REQUEST_FIN);
 91 |         desc.callback = [this, terminate_callback](
 92 |                                                    std::shared_ptr<PackageDescript> resp_package) {
 93 |             puts("[Worker] Fin is accepted");
 94 |             gDelivery.shutdown();
 95 |             if (terminate_callback) {
 96 |                 terminate_callback();
 97 |             }
 98 |         };
 99 |         gDelivery.send_async(desc, 0);
100 |     }
101 |     
102 |     bool status_serving{false};
103 |     Barrier serving_barrier{2};
104 | protected:
105 |     Delivery& gDelivery;
106 | };
107 | 
108 | #endif /* dist_machine_abst_h */
109 | 


--------------------------------------------------------------------------------
/LightCTR/distribut/push.h:
--------------------------------------------------------------------------------
  1 | //
  2 | //  push.h
  3 | //  LightCTR
  4 | //
  5 | //  Created by SongKuangshi on 2017/12/5.
  6 | //  Copyright © 2017年 SongKuangshi. All rights reserved.
  7 | //
  8 | 
  9 | #ifndef push_h
 10 | #define push_h
 11 | 
 12 | #include <unordered_map>
 13 | #include <atomic>
 14 | #include "../common/thread_pool.h"
 15 | #include "../common/barrier.h"
 16 | #include "../common/network.h"
 17 | #include "../common/buffer_fusion.h"
 18 | 
 19 | // Push Grads to PS
 20 | class Push {
 21 |     
 22 | public:
 23 |     Push() = delete;
 24 |     explicit Push(char _headByte) :
 25 |              headByte(_headByte),
 26 |              gDelivery(Delivery::Instance()),
 27 |              gConsistentHash(ConsistentHash::Instance()) {
 28 |     }
 29 |     
 30 |     void registTensorFusion(std::shared_ptr<BufferFusion<float> > _buf_fusion) {
 31 |         assert(headByte == 'T');
 32 |         buf_fusion = _buf_fusion;
 33 |     }
 34 |     
 35 |     template <class TKey, class TValue>
 36 |     void sync(const std::unordered_map<TKey, TValue> &grads, size_t epoch) {
 37 |         if (headByte == 'T')
 38 |             assert(buf_fusion);
 39 |         assert(epoch > 0);
 40 |         Barrier barrier;
 41 |         int candidate_ps = 0;
 42 |         sendToPS(grads, candidate_ps, epoch,
 43 |                  [&barrier, &candidate_ps]() {
 44 |             candidate_ps--;
 45 |             if (candidate_ps <= 0) {
 46 |                 barrier.unblock();
 47 |             }
 48 |         });
 49 |         barrier.block();
 50 |     }
 51 |     
 52 | private:
 53 |     template <class TKey, class TValue>
 54 |     void sendToPS(const std::unordered_map<TKey, TValue> &grads,
 55 |                   int& candidate_ps,
 56 |                   size_t epoch,
 57 |                   std::function<void()> callback) {
 58 |         std::map<size_t, std::vector<std::pair<TKey, TValue> > > push_map;
 59 |         
 60 |         for (auto it = grads.begin(); it != grads.end(); it++) {
 61 |             assert(it->second.checkValid());
 62 |             if (!it->second.checkPreferredValue()) {
 63 |                 continue;
 64 |             }
 65 |             const size_t to_id = BEGIN_ID_OF_PS +
 66 |                                  gConsistentHash.getNode(it->first);
 67 |             if (push_map.count(to_id) == 0) {
 68 |                 push_map[to_id] = std::vector<std::pair<TKey, TValue> >();
 69 |                 candidate_ps++;
 70 |             }
 71 |             push_map[to_id].emplace_back(std::move(*it));
 72 |         }
 73 |         
 74 |         if (push_map.size() == 0) {
 75 |             if (callback) {
 76 |                 callback();
 77 |             }
 78 |         }
 79 |         
 80 |         for (auto &item : push_map) {
 81 |             const size_t to_id = item.first;
 82 |             PackageDescript desc(REQUEST_PUSH, epoch);
 83 |             desc.content << headByte;
 84 |             for (auto &grad_pair : item.second) {
 85 |                 // push data pair by VarUint & float16_t
 86 |                 desc.content.appendVarUint(grad_pair.first);
 87 |                 desc.content << Float16(&grad_pair.second).float16_value();
 88 |             }
 89 |             desc.callback = [callback](std::shared_ptr<PackageDescript> resp_package) {
 90 |                 // response without content
 91 |                 if (callback) {
 92 |                     callback();
 93 |                 }
 94 |             };
 95 |             gDelivery.send_async(desc, to_id);
 96 |         }
 97 | #ifdef DEBUG
 98 |         printf("[WORKER Push] %zu %c Grad-pairs Sended\n", grads.size(), headByte);
 99 | #endif
100 |     }
101 |     
102 |     template <class TKey>
103 |     void sendToPS(const std::unordered_map<TKey, size_t> &grads,
104 |                   int& candidate_ps,
105 |                   size_t epoch,
106 |                   std::function<void()> callback) {
107 |         std::map<size_t, std::vector<std::pair<TKey, float> > > push_map;
108 |         
109 |         for (auto it = grads.begin(); it != grads.end(); it++) {
110 |             const size_t to_id = BEGIN_ID_OF_PS +
111 |             gConsistentHash.getNode(it->first);
112 |             if (push_map.count(to_id) == 0) {
113 |                 push_map[to_id] = std::vector<std::pair<TKey, float> >();
114 |                 candidate_ps++;
115 |             }
116 |             push_map[to_id].emplace_back(std::move(*it));
117 |         }
118 |         
119 |         for (auto &item : push_map) {
120 |             const size_t to_id = item.first;
121 |             PackageDescript desc(REQUEST_PUSH, epoch);
122 |             desc.content << headByte;
123 |             for (auto &grad_pair : item.second) {
124 |                 desc.content.appendVarUint(grad_pair.first);
125 |                 auto memAddr = buf_fusion->getMemory(grad_pair.second);
126 |                 desc.content.appendVarUint(memAddr.second);
127 |                 
128 |                 for (size_t i = 0; i < memAddr.second; i++) {
129 |                     desc.content << Float16(memAddr.first + i).float16_value();
130 |                 }
131 |             }
132 |             desc.callback = [callback](std::shared_ptr<PackageDescript> resp_package) {
133 |                 // response without content
134 |                 if (callback) {
135 |                     callback();
136 |                 }
137 |             };
138 |             gDelivery.send_async(desc, to_id);
139 |         }
140 | #ifdef DEBUG
141 |         printf("[WORKER Push] %zu %c Grad-Tensors Sended\n", grads.size(), headByte);
142 | #endif
143 |     }
144 |     
145 |     char headByte = 'N';
146 |     std::shared_ptr<BufferFusion<float> > buf_fusion = nullptr;
147 |     
148 |     Delivery& gDelivery;
149 |     ConsistentHash& gConsistentHash;
150 | };
151 | 
152 | #endif /* push_h */
153 | 


--------------------------------------------------------------------------------
/LightCTR/distribut/worker.h:
--------------------------------------------------------------------------------
 1 | //
 2 | //  worker.h
 3 | //  LightCTR
 4 | //
 5 | //  Created by SongKuangshi on 2017/12/5.
 6 | //  Copyright © 2017年 SongKuangshi. All rights reserved.
 7 | //
 8 | 
 9 | #ifndef worker_h
10 | #define worker_h
11 | 
12 | #include "consistent_hash.h"
13 | #include "dist_machine_abst.h"
14 | #include "push.h"
15 | #include "pull.h"
16 | 
17 | template <typename TKey, typename TValue>
18 | class Worker : public Dist_Machine_Abst {
19 | public:
20 |     Worker() : gConsistentHash(ConsistentHash::Instance()) {
21 |     }
22 |     
23 |     ~Worker() {
24 |         
25 |     }
26 |     // for sparse model
27 |     Push push_op = Push('N');
28 |     Pull pull_op = Pull('N');
29 |     // for dense model
30 |     Push push_tensor_op = Push('T');
31 |     Pull pull_tensor_op = Pull('T');
32 |     
33 | private:
34 |     ConsistentHash& gConsistentHash;
35 | };
36 | 
37 | #endif /* worker_h */
38 | 


--------------------------------------------------------------------------------
/LightCTR/em_algo_abst.h:
--------------------------------------------------------------------------------
 1 | //
 2 | //  em_algo_abst.h
 3 | //  LightCTR
 4 | //
 5 | //  Created by SongKuangshi on 2017/10/13.
 6 | //  Copyright © 2017年 SongKuangshi. All rights reserved.
 7 | //
 8 | 
 9 | #ifndef em_algo_abst_h
10 | #define em_algo_abst_h
11 | 
12 | #include <iostream>
13 | #include <cstdio>
14 | #include <vector>
15 | #include <map>
16 | #include <cmath>
17 | #include <string>
18 | #include <fstream>
19 | #include "assert.h"
20 | using namespace std;
21 | 
22 | template <typename T>
23 | class EM_Algo_Abst {
24 | public:
25 |     EM_Algo_Abst(string _dataFile, size_t _epoch, size_t _feature_cnt):
26 |     epoch(_epoch), feature_cnt(_feature_cnt) {
27 |         loadDataRow(_dataFile);
28 |     }
29 |     virtual ~EM_Algo_Abst() {
30 |         dataSet.clear();
31 |     }
32 |     
33 |     void Train() {
34 |         float lastLE = 0;
35 |         for (size_t i = 0; i < this->epoch; i++) {
36 |             T* latentVar = Train_EStep();
37 |             float likelihood = Train_MStep(latentVar);
38 |             assert(!isnan(likelihood));
39 |             printf("Epoch %zu log likelihood ELOB = %.3f\n", i, likelihood);
40 |             if (i == 0 || fabs(likelihood - lastLE) > 1e-3) {
41 |                 lastLE = likelihood;
42 |             } else {
43 |                 puts("have been converge");
44 |                 break;
45 |             }
46 |         }
47 |         printArguments();
48 |     }
49 |     void saveModel(size_t epoch) {
50 |         
51 |     }
52 |     
53 |     virtual void init() = 0;
54 |     virtual T* Train_EStep() = 0;
55 |     virtual float Train_MStep(const T*) = 0;
56 |     virtual void printArguments() = 0;
57 |     virtual vector<int> Predict() = 0;
58 |     
59 |     void loadDataRow(string dataPath) {
60 |         dataSet.clear();
61 |         
62 |         ifstream fin_;
63 |         string line;
64 |         int nchar;
65 |         float val;
66 |         fin_.open(dataPath, ios::in);
67 |         if(!fin_.is_open()){
68 |             cout << "open file error!" << endl;
69 |             exit(1);
70 |         }
71 |         vector<float> tmp;
72 |         tmp.reserve(feature_cnt);
73 |         while(!fin_.eof()){
74 |             getline(fin_, line);
75 |             tmp.clear();
76 |             const char *pline = line.c_str();
77 |             while(pline < line.c_str() + (int)line.length() &&
78 |                   sscanf(pline, "%f%n", &val, &nchar) >= 1){
79 |                 pline += nchar + 1;
80 |                 assert(!isnan(val));
81 |                 tmp.emplace_back(val);
82 |                 if (tmp.size() == feature_cnt) {
83 |                     assert(tmp.size() == feature_cnt);
84 |                     this->dataSet.emplace_back(move(tmp));
85 |                     tmp.clear();
86 |                 }
87 |             }
88 |         }
89 |         this->dataRow_cnt = this->dataSet.size();
90 |         assert(this->dataRow_cnt > 0);
91 |     }
92 |     
93 |     size_t epoch;
94 |     size_t feature_cnt, dataRow_cnt;
95 |     vector<vector<float> > dataSet;
96 | };
97 | 
98 | #endif /* em_algo_abst_h */
99 | 


--------------------------------------------------------------------------------
/LightCTR/fm_algo_abst.h:
--------------------------------------------------------------------------------
  1 | //
  2 | //  fm_algo_abst.h
  3 | //  LightCTR
  4 | //
  5 | //  Created by SongKuangshi on 2017/9/23.
  6 | //  Copyright © 2017年 SongKuangshi. All rights reserved.
  7 | //
  8 | 
  9 | #ifndef fm_algo_abst_h
 10 | #define fm_algo_abst_h
 11 | 
 12 | #include <iostream>
 13 | #include <stdio.h>
 14 | #include <vector>
 15 | #include <set>
 16 | #include <thread>
 17 | #include <fstream>
 18 | #include <string>
 19 | #include <cmath>
 20 | #include "assert.h"
 21 | #include "util/random.h"
 22 | #include "util/gradientUpdater.h"
 23 | #include "util/momentumUpdater.h"
 24 | 
 25 | #define FM
 26 | 
 27 | using namespace std;
 28 | 
 29 | struct FMFeature {
 30 |     size_t first; // feature id
 31 |     float second; // value
 32 |     size_t field;
 33 |     FMFeature(size_t _first, float _second, size_t _field):
 34 |     first(_first), second(_second), field(_field) {}
 35 | };
 36 | 
 37 | class FM_Algo_Abst {
 38 | public:
 39 |     FM_Algo_Abst(string _dataPath, size_t _factor_cnt,
 40 |                  size_t _field_cnt = 0, size_t _feature_cnt = 0):
 41 |     feature_cnt(_feature_cnt), field_cnt(_field_cnt), factor_cnt(_factor_cnt) {
 42 |         proc_cnt = thread::hardware_concurrency();
 43 |         loadDataRow(_dataPath);
 44 |         init();
 45 |     }
 46 |     virtual ~FM_Algo_Abst() {
 47 |         delete [] W;
 48 | #ifdef FM
 49 |         delete [] V;
 50 |         delete [] sumVX;
 51 | #endif
 52 |     }
 53 |     void init() {
 54 |         W = new float[this->feature_cnt];
 55 |         memset(W, 0, sizeof(float) * this->feature_cnt);
 56 | #ifdef FM
 57 |         size_t memsize = this->feature_cnt * this->factor_cnt;
 58 |         if (this->field_cnt > 0) {
 59 |             memsize = this->feature_cnt * this->field_cnt * this->factor_cnt;
 60 |         }
 61 |         V = new float[memsize];
 62 |         const float scale = 1.0 / sqrt(this->factor_cnt);
 63 |         for (size_t i = 0; i < memsize; i++) {
 64 |             V[i] = GaussRand() * scale;
 65 |         }
 66 |         sumVX = NULL;
 67 | #endif
 68 |     }
 69 |     
 70 |     void loadDataRow(string dataPath) {
 71 |         dataSet.clear();
 72 |         
 73 |         ifstream fin_;
 74 |         string line;
 75 |         int nchar, y;
 76 |         size_t fid, fieldid;
 77 |         float val;
 78 |         fin_.open(dataPath, ios::in);
 79 |         if(!fin_.is_open()){
 80 |             cout << "open file error!" << endl;
 81 |             exit(1);
 82 |         }
 83 |         vector<FMFeature> tmp;
 84 |         while(!fin_.eof()){
 85 |             getline(fin_, line);
 86 |             tmp.clear();
 87 |             const char *pline = line.c_str();
 88 |             if(sscanf(pline, "%d%n", &y, &nchar) >= 1){
 89 |                 pline += nchar + 1;
 90 |                 label.emplace_back(y);
 91 |                 while(pline < line.c_str() + (int)line.length() &&
 92 |                       sscanf(pline, "%zu:%zu:%f%n", &fieldid, &fid, &val, &nchar) >= 2){
 93 |                     pline += nchar + 1;
 94 |                     tmp.emplace_back(*new FMFeature(fid, val, fieldid));
 95 |                     this->feature_cnt = max(this->feature_cnt, fid + 1);
 96 |                     if (this->field_cnt > 0) {
 97 |                         this->field_cnt = max(this->field_cnt, fieldid + 1);
 98 |                     }
 99 |                 }
100 |             }
101 |             if (tmp.empty()) {
102 |                 continue;
103 |             }
104 |             this->dataSet.emplace_back(move(tmp));
105 |         }
106 |         this->dataRow_cnt = this->dataSet.size();
107 |     }
108 |     
109 |     void saveModel(size_t epoch) {
110 |         char buffer[1024];
111 |         snprintf(buffer, 1024, "%d", (int)epoch);
112 |         string filename = buffer;
113 |         ofstream md("./output/model_epoch_" + filename + ".txt");
114 |         if(!md.is_open()){
115 |             cout<<"save model open file error" << endl;
116 |             exit(1);
117 |         }
118 |         for (size_t fid = 0; fid < this->feature_cnt; fid++) {
119 |             if (W[fid] != 0) {
120 |                 md << fid << ":" << W[fid] << " ";
121 |             }
122 |         }
123 |         md << endl;
124 | #ifdef FM
125 |         // print all factor V
126 |         for (size_t fid = 0; fid < this->feature_cnt; fid++) {
127 |             md << fid << ":";
128 |             for (size_t fac_itr = 0; fac_itr < this->factor_cnt; fac_itr++) {
129 |                 md << *getV(fid, fac_itr) << " ";
130 |             }
131 |             md << endl;
132 |         }
133 | #endif
134 |         md.close();
135 |     }
136 |     
137 |     virtual void Train() = 0;
138 |     
139 |     float L2Reg_ratio;
140 |     
141 |     float *W;
142 |     size_t feature_cnt, proc_cnt, field_cnt, factor_cnt;
143 |     size_t dataRow_cnt;
144 |     
145 |     float *V, *sumVX;
146 |     inline float* getV(size_t fid, size_t facid) const {
147 |         return &V[fid * this->factor_cnt + facid];
148 |     }
149 |     inline float* getV_field(size_t fid, size_t fieldid, size_t facid) const {
150 |         return &V[fid * this->field_cnt * this->factor_cnt + fieldid * this->factor_cnt + facid];
151 |     }
152 |     inline float* getSumVX(size_t rid, size_t facid) const {
153 |         return &sumVX[rid * this->factor_cnt + facid];
154 |     }
155 |     
156 |     vector<vector<FMFeature> > dataSet;
157 |     
158 | protected:
159 |     inline float LogisticGradW(float pred, float label, float x) {
160 |         return (pred - label) * x;
161 |     }
162 |     inline float LogisticGradV(float gradW, float sum, float v, float x) {
163 |         return gradW * (sum - v * x);
164 |     }
165 |     
166 |     AdagradUpdater_Num updater;
167 |     float __loss;
168 |     float __accuracy;
169 |     
170 |     vector<int> label;
171 |     vector<set<int> > cross_field;
172 | };
173 | 
174 | #endif /* fm_algo_abst_h */
175 | 


--------------------------------------------------------------------------------
/LightCTR/predict/fm_predict.cpp:
--------------------------------------------------------------------------------
  1 | //
  2 | //  fm_predict.cpp
  3 | //  LightCTR
  4 | //
  5 | //  Created by SongKuangshi on 2017/9/24.
  6 | //  Copyright © 2017年 SongKuangshi. All rights reserved.
  7 | //
  8 | 
  9 | #include "fm_predict.h"
 10 | #include <iomanip>
 11 | 
 12 | void FM_Predict::Predict(string savePath) {
 13 |     vector<float> ans;
 14 |     
 15 |     vector<float> tmp_vec;
 16 |     tmp_vec.resize(fm->factor_cnt);
 17 |     
 18 |     for (size_t rid = 0; rid < this->test_dataRow_cnt; rid++) { // data row
 19 |         float fm_pred = 0.0f;
 20 |         if (fm->sumVX != NULL) {
 21 |             for (size_t i = 0; i < test_dataSet[rid].size(); i++) { // feature
 22 |                 const size_t fid = test_dataSet[rid][i].first;
 23 |                 assert(fid < fm->feature_cnt);
 24 |                 const float X = test_dataSet[rid][i].second;
 25 |                 fm_pred += fm->W[fid] * X;
 26 | #ifdef FM
 27 |                 avx_vecScale(fm->getV(fid, 0), tmp_vec.data(), fm->factor_cnt, X);
 28 |                 fm_pred -= 0.5 * avx_dotProduct(tmp_vec.data(), tmp_vec.data(), fm->factor_cnt);
 29 | #endif
 30 |             }
 31 | #ifdef FM
 32 |             fm_pred += 0.5 * avx_dotProduct(fm->getSumVX(rid, 0), fm->getSumVX(rid, 0), fm->factor_cnt);
 33 | #endif
 34 |         } else {
 35 |             // Field-aware FM
 36 |             for (size_t i = 0; i < test_dataSet[rid].size(); i++) {
 37 |                 const size_t fid = test_dataSet[rid][i].first;
 38 |                 const float X = test_dataSet[rid][i].second;
 39 |                 const size_t field = test_dataSet[rid][i].field;
 40 |                 
 41 |                 fm_pred += fm->W[fid] * X;
 42 |                 
 43 |                 for (size_t j = i + 1; j < test_dataSet[rid].size(); j++) {
 44 |                     const size_t fid2 = test_dataSet[rid][j].first;
 45 |                     const float X2 = test_dataSet[rid][j].second;
 46 |                     const size_t field2 = test_dataSet[rid][j].field;
 47 |                     
 48 |                     float field_w = avx_dotProduct(fm->getV_field(fid, field2, 0),
 49 |                                                    fm->getV_field(fid2, field, 0), fm->factor_cnt);
 50 |                     fm_pred += field_w * X * X2;
 51 |                 }
 52 |             }
 53 |         }
 54 |         
 55 |         float pCTR = sigmoid.forward(fm_pred);
 56 |         
 57 |         ans.emplace_back(pCTR);
 58 |     }
 59 |     
 60 |     if (!test_label.empty()) {
 61 |         assert(ans.size() == test_label.size());
 62 |         
 63 |         float loss = 0;
 64 |         int correct = 0;
 65 |         for (size_t i = 0; i < test_label.size(); i++) {
 66 |             loss += (int)this->test_label[i] == 1 ? -log(ans[i]) : -log(1.0 - ans[i]);
 67 |             if (ans[i] > 0.5 && this->test_label[i] == 1) {
 68 |                 correct++;
 69 |             } else if (ans[i] < 0.5 && this->test_label[i] == 0) {
 70 |                 correct++;
 71 |             }
 72 |         }
 73 |         cout << "total log likelihood = " << loss << " correct = " << setprecision(5) <<
 74 |                 (float)correct / test_dataRow_cnt;
 75 |         
 76 |         auc->init(&ans, &test_label);
 77 |         printf(" auc = %.4f\n", auc->Auc());
 78 |     }
 79 |     if (savePath != "") {
 80 |         ofstream md(savePath);
 81 |         if(!md.is_open()){
 82 |             cout << "save model open file error" << endl;
 83 |             exit(0);
 84 |         }
 85 |         for (auto val : ans) {
 86 |             md << val << endl;
 87 |         }
 88 |         md.close();
 89 |     }
 90 | }
 91 | 
 92 | void FM_Predict::loadDataRow(string dataPath, bool with_valid_label) {
 93 |     test_dataSet.clear();
 94 |     test_label.clear();
 95 |     
 96 |     ifstream fin_;
 97 |     string line;
 98 |     int nchar, y;
 99 |     size_t fid, fieldid;
100 |     float val;
101 |     fin_.open(dataPath, ios::in);
102 |     if(!fin_.is_open()){
103 |         cout << "open file error!" << endl;
104 |         exit(1);
105 |     }
106 |     vector<FMFeature> tmp;
107 |     while(!fin_.eof()){
108 |         getline(fin_, line);
109 |         tmp.clear();
110 |         const char *pline = line.c_str();
111 |         if (with_valid_label) {
112 |             if(sscanf(pline, "%d%n", &y, &nchar) >= 1){
113 |                 this->test_label.emplace_back(y);
114 |                 pline += nchar + 1;
115 |             }
116 |         }
117 |         if(sscanf(pline, "%zu:%zu:%f%n", &fieldid, &fid, &val, &nchar) >= 2){
118 |             pline += nchar + 1;
119 |             while(pline < line.c_str() + (int)line.length() &&
120 |                   sscanf(pline, "%zu:%zu:%f%n", &fieldid, &fid, &val, &nchar) >= 2){
121 |                 pline += nchar + 1;
122 |                 if (fid < fm->feature_cnt) {
123 |                     assert(!isnan(fid));
124 |                     assert(!isnan(val));
125 |                     tmp.emplace_back(FMFeature(fid, val, fieldid));
126 |                 }
127 |             }
128 |         }
129 |         if (tmp.empty()) {
130 |             continue;
131 |         }
132 |         this->test_dataSet.emplace_back(move(tmp));
133 |     }
134 |     this->test_dataRow_cnt = this->test_dataSet.size();
135 |     assert(test_dataRow_cnt > 0);
136 | }
137 | 
138 | 


--------------------------------------------------------------------------------
/LightCTR/predict/fm_predict.h:
--------------------------------------------------------------------------------
 1 | //
 2 | //  fm_predict.hpp
 3 | //  LightCTR
 4 | //
 5 | //  Created by SongKuangshi on 2017/9/24.
 6 | //  Copyright © 2017年 SongKuangshi. All rights reserved.
 7 | //
 8 | 
 9 | #ifndef fm_predict_h
10 | #define fm_predict_h
11 | 
12 | #include <stdio.h>
13 | #include "../fm_algo_abst.h"
14 | #include "../util/evaluator.h"
15 | #include "../util/activations.h"
16 | 
17 | class FM_Predict {
18 | public:
19 |     FM_Predict(FM_Algo_Abst* p, string _testDataPath, bool with_valid_label) {
20 |         this->fm = p;
21 |         loadDataRow(_testDataPath, with_valid_label);
22 |         auc = new AucEvaluator();
23 |     }
24 |     ~FM_Predict() {
25 |         delete auc;
26 |     }
27 |     void Predict(string);
28 |     void loadDataRow(string, bool);
29 |     
30 | private:
31 |     FM_Algo_Abst* fm;
32 |     size_t test_dataRow_cnt;
33 |     vector<vector<FMFeature> > test_dataSet;
34 |     vector<int> test_label;
35 |     
36 |     AucEvaluator* auc;
37 |     Sigmoid sigmoid;
38 | };
39 | 
40 | #endif /* fm_predict_h */
41 | 


--------------------------------------------------------------------------------
/LightCTR/predict/gbm_predict.cpp:
--------------------------------------------------------------------------------
  1 | //
  2 | //  gbm_predict.cpp
  3 | //  LightCTR
  4 | //
  5 | //  Created by SongKuangshi on 2017/9/26.
  6 | //  Copyright © 2017年 SongKuangshi. All rights reserved.
  7 | //
  8 | 
  9 | #include "gbm_predict.h"
 10 | #include <iomanip>
 11 | 
 12 | void GBM_Predict::Predict(string savePath) {
 13 |     static vector<float> ans, tmp;
 14 |     static vector<int> pLabel;
 15 |     
 16 |     tmp.resize(gbm->multiclass);
 17 |     ans.clear();
 18 |     pLabel.clear();
 19 |     
 20 |     assert(gbm->RegTreeRootArr.size() % gbm->multiclass == 0);
 21 |     
 22 |     for (size_t rid = 0; rid < this->test_dataRow_cnt; rid++) { // data row
 23 |         fill(tmp.begin(), tmp.end(), 0);
 24 |         for (size_t tid = 0; tid < gbm->RegTreeRootArr.size(); tid+=gbm->multiclass) {
 25 |             for (size_t c = 0; c < gbm->multiclass; c++) {
 26 |                 tmp[c] += gbm->locAtLeafWeight(gbm->RegTreeRootArr[tid + c],
 27 |                                                test_dataSet[rid]);
 28 |             }
 29 |         }
 30 |         
 31 |         float pCTR;
 32 |         if (gbm->multiclass == 1) {
 33 |             pCTR = sigmoid.forward(tmp[0]);
 34 |             pLabel.emplace_back(pCTR > 0.5 ? 1 : 0);
 35 |         } else {
 36 |             softmax.forward(tmp.data(), tmp.size());
 37 |             size_t idx = softmax.forward_max(tmp.data(), tmp.size());
 38 |             pCTR = tmp[idx];
 39 |             pLabel.emplace_back(idx);
 40 |         }
 41 |         
 42 |         assert(!isnan(pCTR));
 43 |         ans.emplace_back(pCTR);
 44 |     }
 45 |     
 46 |     if (!test_label.empty()) {
 47 |         assert(ans.size() == test_label.size());
 48 |         float loss = 0;
 49 |         int correct = 0;
 50 |         for (size_t i = 0; i < test_label.size(); i++) {
 51 |             if (gbm->multiclass == 1) {
 52 |                 assert(ans[i] > 0 && ans[i] < 1);
 53 |                 loss += (int)this->test_label[i] == 1 ? log(ans[i]) : log(1.0 - ans[i]);
 54 |             } else {
 55 |                 assert(ans[i] > 0 && ans[i] <= 1);
 56 |                 loss += log(ans[i]);
 57 |             }
 58 |             
 59 |             assert(!isnan(loss));
 60 |             if (this->test_label[i] == pLabel[i]) {
 61 |                 correct++;
 62 |             }
 63 |         }
 64 |         cout << "total log likelihood = " << -loss << " correct = " << setprecision(5) <<
 65 |         (float)correct / test_dataRow_cnt;
 66 |         
 67 |         if (gbm->multiclass == 1) {
 68 |             auc->init(&ans, &test_label);
 69 |             printf(" auc = %.4f", auc->Auc());
 70 |         }
 71 |         printf("\n");
 72 |     }
 73 | }
 74 | 
 75 | void GBM_Predict::loadDataRow(string dataPath, bool with_valid_label) {
 76 |     test_dataSet.clear();
 77 |     test_label.clear();
 78 |     
 79 |     ifstream fin_;
 80 |     string line;
 81 |     int nchar, y;
 82 |     size_t fid, rid = 0;
 83 |     int val;
 84 |     fin_.open(dataPath, ios::in);
 85 |     if(!fin_.is_open()){
 86 |         cout << "open file error!" << endl;
 87 |         exit(1);
 88 |     }
 89 |     map<size_t, float> tmp;
 90 |     while(!fin_.eof()){
 91 |         getline(fin_, line);
 92 |         tmp.clear();
 93 |         const char *pline = line.c_str();
 94 |         if(sscanf(pline, "%d%n", &y, &nchar) >= 1){
 95 |             pline += nchar + 1;
 96 |             if (gbm->multiclass > 1) {
 97 |                 assert(y < gbm->multiclass);
 98 |             } else {
 99 |                 y = y < 5 ? 0 : 1;
100 |             }
101 |             test_label.emplace_back(y);
102 |             fid = 0;
103 |             while(pline < line.c_str() + (int)line.length() &&
104 |                   sscanf(pline, "%d%n", &val, &nchar) >= 1){
105 |                 pline += nchar + 1;
106 |                 if (*pline == ',')
107 |                     pline += 1;
108 |                 fid++;
109 |                 if (val == 0) {
110 |                     continue;
111 |                 }
112 |                 tmp[fid] = val;
113 |             }
114 |             assert(!tmp.empty());
115 |         }
116 |         if (tmp.empty()) {
117 |             continue;
118 |         }
119 |         this->test_dataSet.emplace_back(move(tmp));
120 |         rid++;
121 |     }
122 |     this->test_dataRow_cnt = this->test_dataSet.size();
123 |     assert(test_dataRow_cnt > 0 && test_label.size() == test_dataRow_cnt);
124 | }
125 | 
126 | 


--------------------------------------------------------------------------------
/LightCTR/predict/gbm_predict.h:
--------------------------------------------------------------------------------
 1 | //
 2 | //  gbm_predict.h
 3 | //  LightCTR
 4 | //
 5 | //  Created by SongKuangshi on 2017/9/26.
 6 | //  Copyright © 2017年 SongKuangshi. All rights reserved.
 7 | //
 8 | 
 9 | #ifndef gbm_predict_h
10 | #define gbm_predict_h
11 | 
12 | #include <stdio.h>
13 | #include "../gbm_algo_abst.h"
14 | #include <cmath>
15 | #include "../util/evaluator.h"
16 | #include "../util/activations.h"
17 | 
18 | class GBM_Predict {
19 | public:
20 |     GBM_Predict(GBM_Algo_Abst* p, string _testDataPath, bool with_valid_label) {
21 |         this->gbm = p;
22 |         loadDataRow(_testDataPath, with_valid_label);
23 |         auc = new AucEvaluator();
24 |     }
25 |     ~GBM_Predict() {
26 |         delete auc;
27 |     }
28 |     void Predict(string);
29 |     void loadDataRow(string, bool);
30 |     
31 | private:
32 |     GBM_Algo_Abst* gbm;
33 |     size_t test_dataRow_cnt;
34 |     vector<map<size_t, float> > test_dataSet;
35 |     vector<int> test_label;
36 |     
37 |     AucEvaluator* auc;
38 |     Sigmoid sigmoid;
39 |     Softmax softmax;
40 | };
41 | 
42 | #endif /* gbm_predict_h */
43 | 


--------------------------------------------------------------------------------
/LightCTR/third/install_third.sh:
--------------------------------------------------------------------------------
1 | #/bin/bash
2 | set -x -e
3 | 
4 | git clone git://github.com/zeromq/libzmq.git || true
5 | cd libzmq
6 | ./autogen.sh
7 | ./configure --prefix=`pwd`/../zeromq
8 | make && make install


--------------------------------------------------------------------------------
/LightCTR/third/zeromq/include/zmq_utils.h:
--------------------------------------------------------------------------------
 1 | /*
 2 |     Copyright (c) 2007-2016 Contributors as noted in the AUTHORS file
 3 | 
 4 |     This file is part of libzmq, the ZeroMQ core engine in C++.
 5 | 
 6 |     libzmq is free software; you can redistribute it and/or modify it under
 7 |     the terms of the GNU Lesser General Public License (LGPL) as published
 8 |     by the Free Software Foundation; either version 3 of the License, or
 9 |     (at your option) any later version.
10 | 
11 |     As a special exception, the Contributors give you permission to link
12 |     this library with independent modules to produce an executable,
13 |     regardless of the license terms of these independent modules, and to
14 |     copy and distribute the resulting executable under terms of your choice,
15 |     provided that you also meet, for each linked independent module, the
16 |     terms and conditions of the license of that module. An independent
17 |     module is a module which is not derived from or based on this library.
18 |     If you modify this library, you must extend this exception to your
19 |     version of the library.
20 | 
21 |     libzmq is distributed in the hope that it will be useful, but WITHOUT
22 |     ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
23 |     FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
24 |     License for more details.
25 | 
26 |     You should have received a copy of the GNU Lesser General Public License
27 |     along with this program.  If not, see <http://www.gnu.org/licenses/>.
28 | */
29 | 
30 | /*  This file is deprecated, and all its functionality provided by zmq.h     */
31 | /*  Note that -Wpedantic compilation requires GCC to avoid using its custom
32 |     extensions such as #warning, hence the trick below. Also, pragmas for
33 |     warnings or other messages are not standard, not portable, and not all
34 |     compilers even have an equivalent concept.
35 |     So in the worst case, this include file is treated as silently empty. */
36 | 
37 | #if defined(__clang__) || defined(__GNUC__) || defined(__GNUG__) || defined(_MSC_VER)
38 | #if defined(__GNUC__) || defined(__GNUG__)
39 | #pragma GCC diagnostic push
40 | #pragma GCC diagnostic warning "-Wcpp"
41 | #pragma GCC diagnostic ignored "-Werror"
42 | #pragma GCC diagnostic ignored "-Wall"
43 | #endif
44 | #pragma message("Warning: zmq_utils.h is deprecated. All its functionality is provided by zmq.h.")
45 | #if defined(__GNUC__) || defined(__GNUG__)
46 | #pragma GCC diagnostic pop
47 | #endif
48 | #endif
49 | 


--------------------------------------------------------------------------------
/LightCTR/third/zeromq/lib/libzmq.a:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/cnkuangshi/LightCTR/620437720e683a1d8f554f8cdb7421e051843616/LightCTR/third/zeromq/lib/libzmq.a


--------------------------------------------------------------------------------
/LightCTR/train/layer/adapterLayer.h:
--------------------------------------------------------------------------------
 1 | //
 2 | //  adapterLayer.h
 3 | //  LightCTR
 4 | //
 5 | //  Created by SongKuangshi on 2017/10/25.
 6 | //  Copyright © 2017年 SongKuangshi. All rights reserved.
 7 | //
 8 | 
 9 | #ifndef adapterLayer_h
10 | #define adapterLayer_h
11 | 
12 | #include "layer_abst.h"
13 | #include <string.h>
14 | 
15 | // Flatten and concat Matrixs into dataRow adapting CNN to FC or LSTM sequences to Attention input
16 | template <typename ActivationFunction>
17 | class Adapter_Layer : public Layer_Base {
18 | public:
19 |     Adapter_Layer(Layer_Base* _prevLayer, size_t flatten_cnt):
20 |     Layer_Base(_prevLayer, _prevLayer->output_dimension, _prevLayer->output_dimension) {
21 |         this->activeFun = new ActivationFunction();
22 |         this->output_dimension *= flatten_cnt * flatten_cnt;
23 |         
24 |         printf("Adapter Layer\n");
25 |     }
26 |     Adapter_Layer() = delete;
27 |     
28 |     ~Adapter_Layer() {
29 |     }
30 |     
31 |     vector<float>& forward(const vector<Matrix*>& prevLOutput) {
32 |         // init ThreadLocal var
33 |         Matrix& output_act = *tl_output_act;
34 |         MatrixArr& input_delta = *tl_input_delta;
35 |         // indicate lazy init once
36 |         assert(this->output_dimension == prevLOutput.size() * prevLOutput[0]->size());
37 |         output_act.reset(1, this->output_dimension);
38 |         input_delta.arr.resize(this->input_dimension);
39 |         FOR(i, this->input_dimension) {
40 |             if (!input_delta.arr[i]) {
41 |                 input_delta.arr[i] =
42 |                     new Matrix(prevLOutput[0]->x_len, prevLOutput[0]->y_len);
43 |             }
44 |         }
45 |         
46 |         const size_t prevLOutput_size = prevLOutput[0]->size();
47 |         FOR(i, prevLOutput.size()) {
48 |             const size_t offset = i * prevLOutput_size;
49 |             // Flatten data row
50 |             memcpy(output_act.getEle(0, offset), prevLOutput[i]->getEle(0, 0),
51 |                    prevLOutput_size * sizeof(float));
52 |         }
53 |         
54 |         // init threadlocal wrapper
55 |         vector<Matrix*>& wrapper = *tl_wrapper;
56 |         wrapper.resize(1);
57 |         wrapper[0] = &output_act;
58 |         return this->nextLayer->forward(wrapper);
59 |     }
60 |     
61 |     void backward(const vector<Matrix*>& outputDeltaMatrix) {
62 |         auto outputDelta = outputDeltaMatrix[0]->pointer();
63 |         assert(outputDelta->size() == this->output_dimension);
64 |         
65 |         MatrixArr& input_delta = *tl_input_delta;
66 |         
67 |         const size_t input_delta_size = input_delta.arr[0]->size();
68 |         FOR(i, this->input_dimension) {
69 |             const size_t offset = i * input_delta_size;
70 |             memcpy(input_delta.arr[i]->getEle(0, 0),
71 |                    outputDelta->data() + offset, input_delta_size * sizeof(float));
72 |         }
73 |         this->prevLayer->backward(input_delta.arr);
74 |     }
75 |     
76 |     const vector<Matrix*>& output() {
77 |         Matrix& output_act = *tl_output_act;
78 |         vector<Matrix*>& wrapper = *tl_wrapper;
79 |         wrapper[0] = &output_act;
80 |         return wrapper;
81 |     }
82 |     
83 | private:
84 |     ThreadLocal<Matrix> tl_output_act; // wx + b with activation
85 |     ThreadLocal<MatrixArr> tl_input_delta; // delta of prevLayer wx+b Z_(L-1)
86 |     
87 |     ThreadLocal<vector<Matrix*> > tl_wrapper;
88 | };
89 | 
90 | #endif /* adapterLayer_h */
91 | 


--------------------------------------------------------------------------------
/LightCTR/train/layer/layer_abst.h:
--------------------------------------------------------------------------------
 1 | //
 2 | //  layer_abst.h
 3 | //  LightCTR
 4 | //
 5 | //  Created by SongKuangshi on 2017/10/20.
 6 | //  Copyright © 2017年 SongKuangshi. All rights reserved.
 7 | //
 8 | 
 9 | #ifndef layer_abst_h
10 | #define layer_abst_h
11 | 
12 | #include <cstdio>
13 | #include <mutex>
14 | #include "../../common/thread_pool.h"
15 | #include "../../common/lock.h"
16 | #include "../../util/activations.h"
17 | #include "../../util/matrix.h"
18 | #include "../../util/gradientUpdater.h"
19 | #include "../../util/momentumUpdater.h"
20 | #include "../../common/buffer_fusion.h"
21 | #include "assert.h"
22 | 
23 | #define FOR(i,n) for(size_t i = 0;i < n;i++)
24 | 
25 | class Layer_Base {
26 | public:
27 |     Layer_Base(Layer_Base* _prevLayer, size_t _input_dimension, size_t _output_dimension):
28 |     input_dimension(_input_dimension), output_dimension(_output_dimension) {
29 |         nextLayer = prevLayer = NULL;
30 |         if (_prevLayer != NULL) {
31 |             assert(_prevLayer->output_dimension == this->input_dimension);
32 |             this->prevLayer = _prevLayer;
33 |             _prevLayer->nextLayer = this;
34 |             bInputLayer = false;
35 |             printf("Init %zux%zu ", _input_dimension, _output_dimension);
36 |         } else {
37 |             bInputLayer = true;
38 |             printf("Init Input %zux%zu ", _input_dimension, _output_dimension);
39 |         }
40 |     }
41 |     Layer_Base() = delete;
42 |     virtual ~Layer_Base() {
43 |     }
44 |     
45 |     virtual vector<float>& forward(const vector<Matrix*>& prevLOutputMatrix) = 0;
46 |     
47 |     virtual void backward(const vector<Matrix*>& outputDeltaMatrix) = 0;
48 |     
49 |     virtual const vector<Matrix*>& output() = 0;
50 |     
51 |     virtual void registerInitializer(std::shared_ptr<BufferFusion<float> > _buf_fusion) {
52 |         if (this->nextLayer) {
53 |             this->nextLayer->registerInitializer(_buf_fusion);
54 |         }
55 |     }
56 |     
57 |     virtual void registerGradient(std::shared_ptr<BufferFusion<float> > _buf_fusion) {
58 |         if (this->nextLayer) {
59 |             this->nextLayer->registerGradient(_buf_fusion);
60 |         }
61 |     }
62 |     
63 |     virtual void applyBatchGradient() { // for each mini-batch gradient batch update stage
64 |         if (nextLayer) {
65 |             nextLayer->applyBatchGradient();
66 |         }
67 |     }
68 |     
69 |     Activation& getActiveFun() const {
70 |         assert(activeFun); // Notice to init activeFun in instance
71 |         return *activeFun;
72 |     }
73 |     
74 |     Activation* activeFun;
75 |     
76 |     Layer_Base *nextLayer, *prevLayer;
77 |     
78 |     size_t input_dimension, output_dimension;
79 |     
80 |     bool bInputLayer;
81 |     
82 |     SpinLock lock;
83 | };
84 | 
85 | #endif /* layer_abst_h */
86 | 


--------------------------------------------------------------------------------
/LightCTR/train/layer/poolingLayer.h:
--------------------------------------------------------------------------------
  1 | //
  2 | //  poolingLayer.h
  3 | //  LightCTR
  4 | //
  5 | //  Created by SongKuangshi on 2017/10/24.
  6 | //  Copyright © 2017年 SongKuangshi. All rights reserved.
  7 | //
  8 | 
  9 | #ifndef poolingLayer_h
 10 | #define poolingLayer_h
 11 | 
 12 | #include <vector>
 13 | #include "../../util/matrix.h"
 14 | #include "layer_abst.h"
 15 | 
 16 | struct Pool_Config {
 17 |     size_t size;
 18 | };
 19 | // Pooling or Maxout
 20 | // TODO K-Max Pooling
 21 | template <typename ActivationFunction>
 22 | class Max_Pooling_Layer : public Layer_Base {
 23 | public:
 24 |     Max_Pooling_Layer(Layer_Base* _prevLayer, size_t _dimension, Pool_Config _config):
 25 |     Layer_Base(_prevLayer, _dimension, _dimension), config(_config) {
 26 |         this->activeFun = new ActivationFunction();
 27 |         assert(this->input_dimension == this->output_dimension);
 28 |         
 29 |         printf("Pooling Layer\n");
 30 |     }
 31 |     Max_Pooling_Layer() = delete;
 32 |     
 33 |     ~Max_Pooling_Layer() {
 34 |     }
 35 |     
 36 |     vector<float>& forward(const vector<Matrix*>& prevLOutput) {
 37 |         assert(prevLOutput.size() == this->input_dimension);
 38 |         
 39 |         // init ThreadLocal var
 40 |         MatrixArr& output_act = *tl_output_act;
 41 |         output_act.arr.resize(this->output_dimension);
 42 |         MatrixArr& input_delta = *tl_input_delta;
 43 |         input_delta.arr.resize(this->input_dimension);
 44 |         
 45 |         // do Max pooling
 46 |         FOR(feamid, this->input_dimension) {
 47 |             Matrix* mat = prevLOutput[feamid];
 48 |             
 49 |             assert(mat->x_len >= config.size && mat->y_len >= config.size);
 50 |             
 51 |             if (input_delta.arr[feamid] == NULL) {
 52 |                 output_act.arr[feamid] = new Matrix((mat->x_len - config.size) / config.size + 1,
 53 |                                                 (mat->y_len - config.size) / config.size + 1);
 54 |                 input_delta.arr[feamid] = new Matrix(mat->x_len, mat->y_len);
 55 |             }
 56 |             
 57 |             auto cur_out = output_act.arr[feamid];
 58 |             cur_out->zeroInit();
 59 |             auto cur_in = input_delta.arr[feamid];
 60 |             cur_in->zeroInit();
 61 |             for (size_t i = 0; i < mat->x_len - config.size + 1; i+= config.size) {
 62 |                 for (size_t j = 0; j < mat->y_len - config.size + 1; j+=config.size) {
 63 |                     float MaxV = *mat->getEle(i, j);
 64 |                     size_t mx = i, my = j;
 65 |                     for (size_t x = i; x < i + config.size; x++) {
 66 |                         for (size_t y = j; y < j + config.size; y++) {
 67 |                             if (MaxV < *mat->getEle(x, y)) {
 68 |                                 MaxV = *mat->getEle(x, y);
 69 |                                 mx = x, my = y;
 70 |                             }
 71 |                         }
 72 |                     }
 73 |                     *cur_out->getEle(i / config.size, j / config.size) = MaxV;
 74 |                     *cur_in->getEle(mx, my) = 1;
 75 |                 }
 76 |             }
 77 |         }
 78 |         return this->nextLayer->forward(output_act.arr);
 79 |     }
 80 |     
 81 |     void backward(const vector<Matrix*>& outputDelta) {
 82 |         assert(outputDelta.size() == this->output_dimension);
 83 |         
 84 |         MatrixArr& input_delta = *tl_input_delta;
 85 |         
 86 |         // Unpooling
 87 |         FOR(fid, this->input_dimension) {
 88 |             Matrix* mat = input_delta.arr[fid];
 89 |             for (size_t i = 0; i < mat->x_len - config.size + 1; i+= config.size) {
 90 |                 for (size_t j = 0; j < mat->y_len - config.size + 1; j+=config.size) {
 91 |                     // loop pooling size
 92 |                     for (size_t x = i; x < i + config.size; x++) {
 93 |                         for (size_t y = j; y < j + config.size; y++) {
 94 |                             if (*mat->getEle(x, y) > 0) {
 95 |                                 *mat->getEle(x, y) = *outputDelta[fid]->getEle(i / config.size, j / config.size);
 96 |                             }
 97 |                         }
 98 |                     }
 99 |                 }
100 |             }
101 |         }
102 |         return this->prevLayer->backward(input_delta.arr);
103 |     }
104 |     
105 |     const vector<Matrix*>& output() {
106 |         MatrixArr& output_act = *tl_output_act;
107 |         return output_act.arr;
108 |     }
109 |     
110 | private:
111 |     Pool_Config config;
112 |     ThreadLocal<MatrixArr> tl_output_act;
113 |     ThreadLocal<MatrixArr> tl_input_delta; // mask of max position
114 | };
115 | 
116 | #endif /* poolingLayer_h */
117 | 


--------------------------------------------------------------------------------
/LightCTR/train/layer/sampleLayer.h:
--------------------------------------------------------------------------------
  1 | //
  2 | //  sampleLayer.h
  3 | //  LightCTR
  4 | //
  5 | //  Created by SongKuangshi on 2017/10/21.
  6 | //  Copyright © 2017年 SongKuangshi. All rights reserved.
  7 | //
  8 | 
  9 | #ifndef sampleLayer_h
 10 | #define sampleLayer_h
 11 | 
 12 | #include "fullyconnLayer.h"
 13 | #include "../../util/random.h"
 14 | 
 15 | template <typename ActivationFunction>
 16 | class Sample_Layer : public Layer_Base {
 17 | public:
 18 |     Sample_Layer(Layer_Base* _prevLayer, size_t _input_dimension):
 19 |     Layer_Base(_prevLayer, _input_dimension, _input_dimension >> 1) {
 20 |         assert((_input_dimension & 1) == 0);
 21 |         gauss_cnt = _input_dimension >> 1;
 22 |         noise = new float[gauss_cnt];
 23 |         FOR(i, gauss_cnt) {
 24 |             noise[i] = GaussRand(); // only generate noise for sampling init once
 25 |         }
 26 |         bEncoding = false;
 27 |         
 28 |         this->activeFun = new ActivationFunction();
 29 |         
 30 |         inner_scale = 1.0f;
 31 |         
 32 |         printf("Sample Layer\n");
 33 |     }
 34 |     Sample_Layer() = delete;
 35 |     ~Sample_Layer() {
 36 |         delete[] noise;
 37 |     }
 38 |     
 39 |     vector<float>& forward(const vector<Matrix*>& prevLOutputMatrix) {
 40 |         auto prevLOutput = prevLOutputMatrix[0]->pointer();
 41 |         assert(prevLOutput->size() == this->input_dimension);
 42 |         
 43 |         // init ThreadLocal var
 44 |         Matrix& output_act = *tl_output_act;
 45 |         output_act.reset(1, this->output_dimension);
 46 |         
 47 |         float gaussDelta = 0.0f;
 48 |         FOR(i, gauss_cnt) {
 49 |             // prev layer output is mu and log(sigma^2)
 50 |             float mu = prevLOutput->at(i);
 51 |             float logSigma2 = prevLOutput->at(i + gauss_cnt);
 52 |             
 53 |             // min[ 0.5 * sum( exp(log_Sigma^2) - (1 + log_Sigma^2) + mu^2 ) ]
 54 |             gaussDelta += exp(inner_scale * logSigma2) - (1 + logSigma2) + mu * mu;
 55 |             assert(!isinf(gaussDelta));
 56 |             
 57 |             // standard deviation equal to exp(0.5 * logSigma2)
 58 |             *output_act.getEle(0, i) = exp(inner_scale * 0.5f * logSigma2) * noise[i] + mu;
 59 |             assert(!isinf(*output_act.getEle(0, i)));
 60 |         }
 61 |         gaussDelta *= 0.5f;
 62 | //        cout << endl << endl << "gaussDelta = " << gaussDelta << endl << endl;
 63 |         if (bEncoding) {
 64 |             return output_act.reference();
 65 |         }
 66 |         // init threadlocal wrapper
 67 |         vector<Matrix*>& wrapper = *tl_wrapper;
 68 |         wrapper.resize(1);
 69 |         wrapper[0] = &output_act;
 70 |         return this->nextLayer->forward(wrapper);
 71 |     }
 72 |     
 73 |     void backward(const vector<Matrix*>& outputDeltaMatrix) {
 74 |         assert(this->prevLayer);
 75 |         auto outputDelta = outputDeltaMatrix[0]->pointer();
 76 |         assert(outputDelta->size() == this->output_dimension);
 77 |         auto prev_output_act = this->prevLayer->output()[0]->pointer();
 78 |         assert(prev_output_act->size() == this->input_dimension);
 79 |         
 80 |         // init ThreadLocal var
 81 |         Matrix& input_delta = *tl_input_delta;
 82 |         input_delta.reset(1, this->input_dimension);
 83 |         
 84 |         FOR(i, gauss_cnt) {
 85 |             assert(!isnan(outputDelta->at(i)));
 86 |             auto muPtr = input_delta.getEle(0, i);
 87 |             auto sigmaPtr = input_delta.getEle(0, i + gauss_cnt);
 88 |             
 89 |             // Target Loss about mu and log(sigma^2)
 90 |             auto sigmaGrad = 0.5f * exp(inner_scale * 0.5f * prev_output_act->at(i + gauss_cnt)) * noise[i];
 91 |             *muPtr = outputDelta->at(i);
 92 |             *sigmaPtr = outputDelta->at(i) * sigmaGrad;
 93 |             assert(!isinf(*sigmaPtr));
 94 |             
 95 |             // update Gauss Parameters Loss close to Normal distribution
 96 |             *muPtr += GradientUpdater::__global_learning_rate * prev_output_act->at(i);
 97 |             *sigmaPtr += GradientUpdater::__global_learning_rate *
 98 |                          (exp(inner_scale * prev_output_act->at(i + gauss_cnt)) - 1.0f);
 99 |             
100 |             assert(!isinf(*sigmaPtr));
101 |         }
102 |         this->prevLayer->getActiveFun().backward(input_delta.pointer()->data(),
103 |                                                  prev_output_act->data(),
104 |                                                  input_delta.pointer()->data(),
105 |                                                  input_delta.size());
106 |         
107 |         vector<Matrix*>& wrapper = *tl_wrapper;
108 |         wrapper[0] = &input_delta;
109 |         this->prevLayer->backward(wrapper);
110 |     }
111 |     
112 |     const vector<Matrix*>& output() {
113 |         Matrix& output_act = *tl_output_act;
114 |         
115 |         vector<Matrix*>& wrapper = *tl_wrapper;
116 |         wrapper[0] = &output_act;
117 |         return wrapper;
118 |     }
119 |     
120 |     bool bEncoding; // mark for forward encode
121 |     
122 | private:
123 |     ThreadLocal<vector<Matrix*> > tl_wrapper;
124 |     
125 |     ThreadLocal<Matrix> tl_output_act; // wx + b with activation
126 |     ThreadLocal<Matrix> tl_input_delta; // delta of prevLayer wx+b Z_(L-1)
127 |     
128 |     float inner_scale;
129 |     
130 |     float* noise;
131 |     size_t gauss_cnt;
132 | };
133 | 
134 | #endif /* sampleLayer_h */
135 | 


--------------------------------------------------------------------------------
/LightCTR/train/train_cnn_algo.h:
--------------------------------------------------------------------------------
  1 | //
  2 | //  train_cnn_algo.h
  3 | //  LightCTR
  4 | //
  5 | //  Created by SongKuangshi on 2017/11/9.
  6 | //  Copyright © 2017年 SongKuangshi. All rights reserved.
  7 | //
  8 | 
  9 | #ifndef train_cnn_algo_h
 10 | #define train_cnn_algo_h
 11 | 
 12 | #include "../dl_algo_abst.h"
 13 | #include "layer/poolingLayer.h"
 14 | #include "layer/adapterLayer.h"
 15 | #include "layer/convLayer.h"
 16 | #include "../distribut/ring_collect.h"
 17 | using namespace std;
 18 | 
 19 | template <typename LossFunction, typename ActivationFunction, typename OutputActivationFunction>
 20 | class Train_CNN_Algo : public DL_Algo_Abst<LossFunction,
 21 |                               ActivationFunction, OutputActivationFunction> {
 22 | public:
 23 |     Train_CNN_Algo(string dataPath, size_t _epoch, size_t _feature_cnt,
 24 |                    size_t _hidden_size, size_t _multiclass_output_cnt = 1):
 25 |     DL_Algo_Abst<LossFunction, ActivationFunction, OutputActivationFunction>(
 26 |                    dataPath, _epoch, _feature_cnt, _hidden_size, _multiclass_output_cnt) {
 27 |         this->dl_algo = CNN;
 28 |         initNetwork(_hidden_size);
 29 |     }
 30 |     Train_CNN_Algo() = delete;
 31 |     ~Train_CNN_Algo() {
 32 | #ifdef WORKER_RING
 33 |         delete syncer;
 34 | #endif
 35 |     }
 36 |     
 37 |     void initNetwork(size_t hidden_size) {
 38 |         // Net structure of 28x28: 5x5 12 pool 6 3x3 4 3x3 2 flatten fc-100
 39 |         this->inputLayer = new Conv_Layer<ActivationFunction>(NULL, 1, 6, CNN_Config{5, 0, 2});
 40 |         this->appendNNLayer(this->inputLayer);
 41 |         
 42 |         Layer_Base* poolLayer =
 43 |             new Max_Pooling_Layer<Identity>(this->inputLayer, 6, Pool_Config{2});
 44 |         this->appendNNLayer(poolLayer);
 45 |         
 46 |         Layer_Base* hidden1 =
 47 |             new Conv_Layer<ActivationFunction>(poolLayer, 6, 16, CNN_Config{3, 0, 1});
 48 |         this->appendNNLayer(hidden1);
 49 |         
 50 |         Layer_Base* hidden2 =
 51 |             new Conv_Layer<ActivationFunction>(hidden1, 16, 20, CNN_Config{3, 0, 1});
 52 |         this->appendNNLayer(hidden2);
 53 |         
 54 |         Layer_Base* adapter = new Adapter_Layer<Identity>(hidden2, 2);
 55 |         this->appendNNLayer(adapter);
 56 |         
 57 |         Layer_Base* fcLayer =
 58 |             new Fully_Conn_Layer<ActivationFunction>(adapter, 20 * 2 * 2, hidden_size);
 59 |         this->appendNNLayer(fcLayer);
 60 |         
 61 |         this->outputLayer = new Fully_Conn_Layer<ActivationFunction>(fcLayer, hidden_size,
 62 |                                                      this->multiclass_output_cnt);
 63 |         this->appendNNLayer(this->outputLayer);
 64 | #ifdef WORKER_RING
 65 |         syncer = new Worker_RingReduce<float>(__global_cluster_worker_cnt);
 66 |         auto buf_fusion = std::make_shared<BufferFusion<float> >(false, false);
 67 |         this->inputLayer->registerInitializer(buf_fusion);
 68 |         syncer->syncInitializer(buf_fusion);
 69 |         puts("[RING] Sync initializer complete");
 70 | #endif
 71 |     }
 72 |     
 73 |     const vector<float>& Predict(size_t rid, vector<vector<float> >& dataRow) {
 74 |         Matrix*& dataRow_Matrix = *tl_dataRow_Matrix;
 75 |         if (dataRow_Matrix == NULL) {
 76 |             dataRow_Matrix = new Matrix(sqrt((float)this->feature_cnt),
 77 |                                         sqrt((float)this->feature_cnt));
 78 |         }
 79 |         dataRow_Matrix->pointer()->assign(dataRow[rid].begin(), dataRow[rid].end());
 80 |         
 81 |         vector<Matrix*> wrapper;
 82 |         wrapper.resize(1);
 83 |         wrapper[0] = dataRow_Matrix;
 84 |         return this->inputLayer->forward(wrapper);
 85 |     }
 86 |     
 87 |     void BP(size_t rid, const vector<Matrix*>& grad) {
 88 |         this->outputLayer->backward(grad);
 89 |     }
 90 |     
 91 |     void applyBP(size_t epoch) const {
 92 | #ifdef WORKER_RING
 93 |         auto buf_fusion = std::make_shared<BufferFusion<float> >(false, false);
 94 |         this->inputLayer->registerGradient(buf_fusion);
 95 |         syncer->syncGradient(buf_fusion, epoch);
 96 | #endif
 97 |         this->inputLayer->applyBatchGradient();
 98 |     }
 99 | private:
100 |     Worker_RingReduce<float>* syncer;
101 |     ThreadLocal<Matrix*> tl_dataRow_Matrix;
102 | };
103 | 
104 | #endif /* train_cnn_algo_h */
105 | 


--------------------------------------------------------------------------------
/LightCTR/train/train_ffm_algo.cpp:
--------------------------------------------------------------------------------
  1 | //
  2 | //  train_ffm_algo.cpp
  3 | //  LightCTR
  4 | //
  5 | //  Created by SongKuangshi on 2017/11/19.
  6 | //  Copyright © 2017年 SongKuangshi. All rights reserved.
  7 | //
  8 | 
  9 | #include "train_ffm_algo.h"
 10 | #include "../common/avx.h"
 11 | 
 12 | void Train_FFM_Algo::init() {
 13 |     L2Reg_ratio = 0.001f;
 14 |     
 15 |     learnable_params_cnt = this->feature_cnt * this->field_cnt * this->factor_cnt
 16 |                            + this->feature_cnt;
 17 |     update_g = new float[learnable_params_cnt];
 18 |     updater.learnable_params_cnt(learnable_params_cnt);
 19 |     
 20 |     printf("Training FFM\n");
 21 | }
 22 | 
 23 | void Train_FFM_Algo::Train() {
 24 |     
 25 |     GradientUpdater::__global_bTraining = true;
 26 |     GradientUpdater::__global_minibatch_size = dataRow_cnt;
 27 |     
 28 |     for (size_t i = 0; i < this->epoch; i++) {
 29 |         __loss = 0;
 30 |         __accuracy = 0;
 31 |         
 32 |         this->proc_data_left = (int)this->dataRow_cnt;
 33 |         
 34 |         size_t thread_hold_dataRow_cnt = (this->dataRow_cnt + this->proc_cnt - 1) / this->proc_cnt;
 35 |         
 36 |         for (size_t pid = 0; pid < this->proc_cnt; pid++) {
 37 |             size_t start_pos = pid * thread_hold_dataRow_cnt;
 38 |             threadpool->addTask(bind(&Train_FFM_Algo::batchGradCompute, this, start_pos,
 39 |                                      min(start_pos + thread_hold_dataRow_cnt, this->dataRow_cnt)));
 40 |         }
 41 |         threadpool->wait();
 42 |         
 43 |         printf("Epoch %zu Train Loss = %f Accuracy = %f\n", i, __loss, __accuracy / dataRow_cnt);
 44 |         // apply gradient
 45 |         ApplyGrad();
 46 |     }
 47 |     
 48 |     GradientUpdater::__global_bTraining = false;
 49 | }
 50 | 
 51 | void Train_FFM_Algo::batchGradCompute(size_t rbegin, size_t rend) {
 52 |     for (size_t rid = rbegin; rid < rend; rid++) { // data row
 53 |         float fm_pred = 0.0f;
 54 |         
 55 |         for (size_t i = 0; i < dataSet[rid].size(); i++) {
 56 |             const size_t fid = dataSet[rid][i].first;
 57 |             const float X = dataSet[rid][i].second;
 58 |             const size_t field = dataSet[rid][i].field;
 59 |             
 60 |             fm_pred += W[fid] * X;
 61 |             
 62 |             for (size_t j = i + 1; j < dataSet[rid].size(); j++) {
 63 |                 const size_t fid2 = dataSet[rid][j].first;
 64 |                 const float X2 = dataSet[rid][j].second;
 65 |                 const size_t field2 = dataSet[rid][j].field;
 66 |                 
 67 |                 float field_w = avx_dotProduct(getV_field(fid, field2, 0),
 68 |                                                getV_field(fid2, field, 0), factor_cnt);
 69 |                 fm_pred += field_w * X * X2;
 70 |             }
 71 |         }
 72 |         accumWVGrad(rid, sigmoid.forward(fm_pred));
 73 |     }
 74 |     assert(this->proc_data_left > 0);
 75 |     this->proc_data_left -= rend - rbegin;
 76 | }
 77 | 
 78 | void Train_FFM_Algo::accumWVGrad(size_t rid, float pred) {
 79 |     const float target = label[rid];
 80 |     const float loss = pred - target;
 81 |     if (loss == 0) {
 82 |         return;
 83 |     }
 84 |     __loss += target == 1 ? -log(pred) : -log(1.0 - pred);
 85 |     if (pred > 0.5 && target == 1) {
 86 |         __accuracy++;
 87 |     } else if (pred < 0.5 && target == 0) {
 88 |         __accuracy++;
 89 |     }
 90 |     
 91 |     size_t fid, fid2, field, field2;
 92 |     float x, x2;
 93 |     for (size_t i = 0; i < dataSet[rid].size(); i++) {
 94 |         fid = dataSet[rid][i].first;
 95 |         x = dataSet[rid][i].second;
 96 |         field = dataSet[rid][i].field;
 97 |         
 98 |         *update_W(fid) += loss * x + L2Reg_ratio * W[fid];
 99 |         
100 |         for (size_t j = i + 1; j < dataSet[rid].size(); j++) {
101 |             fid2 = dataSet[rid][j].first;
102 |             x2 = dataSet[rid][j].second;
103 |             field2 = dataSet[rid][j].field;
104 | 
105 |             const float scaler = x * x2 * loss;
106 |             const float* v1 = getV_field(fid, field2, 0);
107 |             const float* v2 = getV_field(fid2, field, 0);
108 |             float* update_v1 = update_V(fid, field2, 0);
109 |             float* update_v2 = update_V(fid2, field, 0);
110 |             
111 |             avx_vecScalerAdd(update_v1, v2, update_v1, scaler, factor_cnt);
112 |             avx_vecScalerAdd(update_v1, v1, update_v1, L2Reg_ratio, factor_cnt);
113 |             
114 |             avx_vecScalerAdd(update_v2, v1, update_v2, scaler, factor_cnt);
115 |             avx_vecScalerAdd(update_v2, v2, update_v2, L2Reg_ratio, factor_cnt);
116 |         }
117 |     }
118 | }
119 | 
120 | void Train_FFM_Algo::ApplyGrad() {
121 |     updater.update(0, this->feature_cnt, W, update_g);
122 |     
123 |     float *gradV = update_g + this->feature_cnt;
124 |     updater.update(this->feature_cnt, this->feature_cnt *
125 |                    this->field_cnt * this->factor_cnt, V, gradV);
126 | }
127 | 


--------------------------------------------------------------------------------
/LightCTR/train/train_ffm_algo.h:
--------------------------------------------------------------------------------
 1 | //
 2 | //  train_ffm_algo.h
 3 | //  LightCTR
 4 | //
 5 | //  Created by SongKuangshi on 2017/11/19.
 6 | //  Copyright © 2017年 SongKuangshi. All rights reserved.
 7 | //
 8 | 
 9 | #ifndef train_ffm_algo_h
10 | #define train_ffm_algo_h
11 | 
12 | #include "../fm_algo_abst.h"
13 | #include <mutex>
14 | #include <cmath>
15 | #include "../util/activations.h"
16 | #include "../util/gradientUpdater.h"
17 | #include "../common/thread_pool.h"
18 | #include "../common/lock.h"
19 | using namespace std;
20 | 
21 | // Field-aware FM
22 | class Train_FFM_Algo : public FM_Algo_Abst {
23 |     
24 | public:
25 |     Train_FFM_Algo(string _dataPath, size_t _epoch_cnt,
26 |                    size_t _factor_cnt, size_t _field_cnt):
27 |     FM_Algo_Abst(_dataPath, _factor_cnt, _field_cnt), epoch(_epoch_cnt) {
28 |         assert(this->feature_cnt != 0);
29 |         threadpool = new ThreadPool(this->proc_cnt);
30 |         init();
31 |     }
32 |     Train_FFM_Algo() = delete;
33 |     
34 |     ~Train_FFM_Algo() {
35 |         delete threadpool;
36 |         threadpool = NULL;
37 |     }
38 |     
39 |     void init();
40 |     void Train();
41 |     
42 | private:
43 |     size_t epoch;
44 |     int proc_data_left;
45 |     
46 |     Sigmoid sigmoid;
47 |     
48 |     size_t learnable_params_cnt;
49 |     
50 |     void batchGradCompute(size_t, size_t);
51 |     void accumWVGrad(size_t rid, float pred);
52 |     
53 |     float *update_g;
54 |     inline float* update_W(size_t fid) {
55 |         return &update_g[fid];
56 |     }
57 |     inline float* update_V(size_t fid, size_t fieldid, size_t facid) {
58 |         return &update_g[this->feature_cnt + fid * this->field_cnt * this->factor_cnt
59 |                          + fieldid * this->factor_cnt + facid];
60 |     }
61 |     void ApplyGrad();
62 |     
63 |     AdagradUpdater_Num updater;
64 |     
65 |     ThreadPool *threadpool;
66 | };
67 | 
68 | #endif /* train_ffm_algo_h */
69 | 


--------------------------------------------------------------------------------
/LightCTR/train/train_fm_algo.cpp:
--------------------------------------------------------------------------------
  1 | //
  2 | //  train_fm_algo.cpp
  3 | //  LightCTR
  4 | //
  5 | //  Created by SongKuangshi on 2017/9/23.
  6 | //  Copyright © 2017年 SongKuangshi. All rights reserved.
  7 | //
  8 | 
  9 | #include "train_fm_algo.h"
 10 | #include "../common/avx.h"
 11 | 
 12 | void Train_FM_Algo::init() {
 13 |     L2Reg_ratio = 0.001f;
 14 | #ifdef FM
 15 |     learnable_params_cnt = this->feature_cnt * (this->factor_cnt + 1);
 16 | #else
 17 |     learnable_params_cnt = this->feature_cnt;
 18 | #endif
 19 |     sumVX = new float[this->dataRow_cnt * this->factor_cnt];
 20 |     assert(sumVX);
 21 |     memset(sumVX, 0, sizeof(float) * this->dataRow_cnt * this->factor_cnt);
 22 |     
 23 |     update_g = new float[learnable_params_cnt];
 24 |     assert(update_g);
 25 |     updater.learnable_params_cnt(learnable_params_cnt);
 26 | }
 27 | 
 28 | void Train_FM_Algo::flash() {
 29 |     memset(update_g, 0, sizeof(float) * learnable_params_cnt);
 30 | #ifdef FM
 31 |     memset(sumVX, 0, sizeof(float) * dataRow_cnt * factor_cnt);
 32 | #endif
 33 | }
 34 | 
 35 | void Train_FM_Algo::Train() {
 36 |     
 37 |     GradientUpdater::__global_bTraining = true;
 38 |     GradientUpdater::__global_minibatch_size = dataRow_cnt;
 39 |     
 40 |     for (size_t i = 0; i < this->epoch_cnt; i++) {
 41 |         __loss = 0;
 42 |         __accuracy = 0;
 43 |         
 44 |         flash();
 45 |         this->proc_data_left = (int)this->dataRow_cnt;
 46 |         
 47 |         size_t thread_hold_dataRow_cnt = (this->dataRow_cnt + this->proc_cnt - 1) / this->proc_cnt;
 48 |         
 49 |         for (size_t pid = 0; pid < this->proc_cnt; pid++) {
 50 |             size_t start_pos = pid * thread_hold_dataRow_cnt;
 51 |             threadpool->addTask(bind(&Train_FM_Algo::batchGradCompute, this, start_pos,
 52 |                                      min(start_pos + thread_hold_dataRow_cnt, this->dataRow_cnt)));
 53 |         }
 54 |         threadpool->wait();
 55 |         
 56 |         printf("Epoch %zu Train Loss = %f Accuracy = %f\n", i, __loss, __accuracy / dataRow_cnt);
 57 |         ApplyGrad();
 58 |     }
 59 |     
 60 |     GradientUpdater::__global_bTraining = false;
 61 | }
 62 | 
 63 | void Train_FM_Algo::batchGradCompute(size_t rbegin, size_t rend) {
 64 |     
 65 |     vector<float> tmp_vec;
 66 |     tmp_vec.resize(factor_cnt);
 67 |     
 68 |     for (size_t rid = rbegin; rid < rend; rid++) { // data row
 69 |         float fm_pred = 0.0f;
 70 |         for (size_t i = 0; i < dataSet[rid].size(); i++) {
 71 |             const size_t fid = dataSet[rid][i].first;
 72 |             
 73 |             const float X = dataSet[rid][i].second;
 74 |             fm_pred += W[fid] * X;
 75 | #ifdef FM
 76 |             avx_vecScale(getV(fid, 0), tmp_vec.data(), factor_cnt, X);
 77 |             avx_vecAdd(getSumVX(rid, 0), tmp_vec.data(), getSumVX(rid, 0), factor_cnt);
 78 |             fm_pred -= 0.5 * avx_dotProduct(tmp_vec.data(), tmp_vec.data(), factor_cnt);
 79 | #endif
 80 |         }
 81 | #ifdef FM
 82 |         fm_pred += 0.5 * avx_dotProduct(getSumVX(rid, 0), getSumVX(rid, 0), factor_cnt);
 83 | #endif
 84 |         accumWVGrad(rid, sigmoid.forward(fm_pred));
 85 |     }
 86 |     
 87 |     this->proc_data_left -= rend - rbegin;
 88 | }
 89 | 
 90 | void Train_FM_Algo::accumWVGrad(size_t rid, float pred) {
 91 |     const float target = label[rid];
 92 |     
 93 |     __loss += target == 1 ? -log(pred) : -log(1.0 - pred);
 94 |     if (pred > 0.5 && target == 1) {
 95 |         __accuracy++;
 96 |     } else if (pred < 0.5 && target == 0) {
 97 |         __accuracy++;
 98 |     }
 99 |     
100 |     size_t fid;
101 |     float x;
102 |     vector<float> tmp_vec;
103 |     tmp_vec.resize(factor_cnt);
104 |     
105 |     for (size_t i = 0; i < dataSet[rid].size(); i++) {
106 |         fid = dataSet[rid][i].first;
107 |         x = dataSet[rid][i].second;
108 |         const float gradW = LogisticGradW(pred, target, x) + L2Reg_ratio * W[fid];
109 |         *update_W(fid) += gradW;
110 | #ifdef FM
111 |         float* ptr = update_V(fid, 0);
112 |         avx_vecScalerAdd(getSumVX(rid, 0), getV(fid, 0),
113 |                          tmp_vec.data(), -x, factor_cnt);
114 |         avx_vecScalerAdd(ptr, tmp_vec.data(), ptr, gradW, factor_cnt);
115 |         avx_vecScalerAdd(ptr, getV(fid, 0), ptr, L2Reg_ratio, factor_cnt);
116 | #endif
117 |     }
118 | }
119 | 
120 | void Train_FM_Algo::ApplyGrad() {
121 |     
122 |     updater.update(0, this->feature_cnt, W, update_g);
123 | #ifdef FM
124 |     float *gradV = update_g + this->feature_cnt;
125 |     updater.update(this->feature_cnt, this->feature_cnt * this->factor_cnt, V, gradV);
126 | #endif
127 | }
128 | 


--------------------------------------------------------------------------------
/LightCTR/train/train_fm_algo.h:
--------------------------------------------------------------------------------
 1 | //
 2 | //  train_fm_algo.h
 3 | //  LightCTR
 4 | //
 5 | //  Created by SongKuangshi on 2017/9/23.
 6 | //  Copyright © 2017年 SongKuangshi. All rights reserved.
 7 | //
 8 | 
 9 | #ifndef train_fm_algo_h
10 | #define train_fm_algo_h
11 | 
12 | #include "../fm_algo_abst.h"
13 | #include <mutex>
14 | #include <cmath>
15 | #include "../util/activations.h"
16 | #include "../util/gradientUpdater.h"
17 | #include "../common/thread_pool.h"
18 | #include "../common/lock.h"
19 | using namespace std;
20 | 
21 | class Train_FM_Algo : public FM_Algo_Abst {
22 | public:
23 |     Train_FM_Algo(string _dataPath, size_t _epoch_cnt,
24 |                   size_t _factor_cnt):
25 |     FM_Algo_Abst(_dataPath, _factor_cnt), epoch_cnt(_epoch_cnt) {
26 |         assert(this->feature_cnt != 0);
27 |         init();
28 |         threadpool = new ThreadPool(this->proc_cnt);
29 |     }
30 |     Train_FM_Algo() = delete;
31 |     
32 |     ~Train_FM_Algo() {
33 |         delete [] update_g;
34 |         delete threadpool;
35 |         threadpool = NULL;
36 |     }
37 |     
38 |     void init();
39 |     void Train();
40 |     
41 | private:
42 |     ThreadPool *threadpool;
43 |     int proc_data_left;
44 |     size_t epoch_cnt;
45 |     
46 |     size_t learnable_params_cnt;
47 |     
48 |     void flash();
49 |     
50 |     Sigmoid sigmoid;
51 |     
52 |     void batchGradCompute(size_t, size_t);
53 |     void accumWVGrad(size_t, float);
54 | 
55 |     float *update_g;
56 |     inline float* update_W(size_t fid) const {
57 |         return &update_g[fid];
58 |     }
59 |     inline float* update_V(size_t fid, size_t facid) const {
60 |         return &update_g[this->feature_cnt + fid * this->factor_cnt + facid];
61 |     }
62 |     void ApplyGrad();
63 | };
64 | 
65 | #endif /* train_fm_algo_h */
66 | 


--------------------------------------------------------------------------------
/LightCTR/train/train_gbm_algo.h:
--------------------------------------------------------------------------------
  1 | //
  2 | //  train_gbm_algo.h
  3 | //  LightCTR
  4 | //
  5 | //  Created by SongKuangshi on 2017/9/26.
  6 | //  Copyright © 2017年 SongKuangshi. All rights reserved.
  7 | //
  8 | 
  9 | #ifndef train_gbm_algo_h
 10 | #define train_gbm_algo_h
 11 | 
 12 | #include <stdio.h>
 13 | #include <cmath>
 14 | #include <string.h>
 15 | #include "../common/thread_pool.h"
 16 | #include "../common/lock.h"
 17 | #include "../util/random.h"
 18 | #include "../util/activations.h"
 19 | #include "../gbm_algo_abst.h"
 20 | 
 21 | class Train_GBM_Algo : public GBM_Algo_Abst {
 22 |     struct SplitNodeStat_Thread {
 23 |         float sumGrad, sumHess;
 24 |         float gain;
 25 |         float split_threshold;
 26 |         float last_value_toCheck;
 27 |         bool dataNAN_go_Right;
 28 |         int split_feature_index;
 29 |         SplitNodeStat_Thread() {
 30 |             gain = 0, split_feature_index = -1, split_threshold = 0;
 31 |             dataNAN_go_Right = 0;
 32 |             clear();
 33 |         }
 34 |         inline void clear() {
 35 |             sumGrad = 0.0f;
 36 |             sumHess = 0.0f;
 37 |             last_value_toCheck = 1e-8;
 38 |         }
 39 |         inline bool needUpdate(float splitGain, size_t split_index) {
 40 |             assert(!isnan(splitGain));
 41 |             assert(split_index >= 0);
 42 |             if (split_feature_index <= split_index) {
 43 |                 return splitGain > this->gain;
 44 |             } else {
 45 |                 return !(this->gain > splitGain);
 46 |             }
 47 |         }
 48 |     };
 49 | public:
 50 |     Train_GBM_Algo(string _dataPath, size_t _epoch_cnt, size_t _maxDepth,
 51 |                    size_t _minLeafW, size_t _multiclass):
 52 |     GBM_Algo_Abst(_dataPath, _maxDepth, _minLeafW, _multiclass), epoch_cnt(_epoch_cnt) {
 53 |         proc_cnt = thread::hardware_concurrency();
 54 |         init();
 55 |         threadpool = new ThreadPool(this->proc_cnt);
 56 |     }
 57 |     Train_GBM_Algo() = delete;
 58 |     
 59 |     ~Train_GBM_Algo() {
 60 |         delete [] sampleDataSetIndex;
 61 |         delete [] sampleFeatureSetIndex;
 62 |         delete [] dataRow_LocAtTree;
 63 |         delete [] splitNodeStat_thread;
 64 |     }
 65 |     
 66 |     void init();
 67 |     void Train();
 68 |     void flash(RegTreeNode *, size_t);
 69 |     void findSplitFeature(size_t, size_t, size_t, bool, size_t);
 70 |     void findSplitFeature_Wrapper(size_t, size_t, size_t, size_t);
 71 |     
 72 |     inline void sample() {
 73 |         memset(sampleDataSetIndex, 0, sizeof(bool) * this->dataRow_cnt);
 74 |         memset(dataRow_LocAtTree, NULL, sizeof(RegTreeNode*) * this->dataRow_cnt);
 75 |         for (size_t rid = 0; rid < this->dataRow_cnt; rid++) {
 76 |             if(SampleBinary(0.7))
 77 |                 sampleDataSetIndex[rid] = 1;
 78 |         }
 79 |         memset(sampleFeatureSetIndex, 0, sizeof(bool) * this->feature_cnt);
 80 |         for (size_t fid = 0; fid < this->feature_cnt; fid++) {
 81 |             if(dataSet_feature[fid].size() == 0)
 82 |                 continue;
 83 |             if(SampleBinary(0.7))
 84 |                 sampleFeatureSetIndex[fid] = 1;
 85 |         }
 86 |     }
 87 |     
 88 |     inline float grad(float pred, float label) {
 89 |         return pred - label;
 90 |     }
 91 |     inline float hess(float pred) {
 92 |         return pred * (1 - pred);
 93 |     }
 94 |     inline float weight(float sumGrad, float sumHess) {
 95 |         return - ThresholdL1(sumGrad, lambda) / (sumHess + lambda);
 96 |     }
 97 |     inline float gain(float sumGrad, float sumHess) {
 98 |         return ThresholdL1(sumGrad, lambda) * ThresholdL1(sumGrad, lambda) / (sumHess + lambda);
 99 |     }
100 |     inline float ThresholdL1(float w, float lambda) {
101 |         if (w > +lambda) return w - lambda;
102 |         if (w < -lambda) return w + lambda;
103 |         return 0.0;
104 |     }
105 |     
106 | private:
107 |     ThreadPool *threadpool;
108 |     SpinLock lock;
109 |     size_t proc_cnt;
110 |     int proc_left;
111 |     SplitNodeStat_Thread* splitNodeStat_thread;
112 |     
113 |     bool* sampleDataSetIndex;
114 |     bool* sampleFeatureSetIndex;
115 |     RegTreeNode** dataRow_LocAtTree;
116 |     size_t epoch_cnt;
117 |     
118 |     Softmax softmax;
119 |     Sigmoid sigmoid;
120 |     
121 |     float eps_feature_value, lambda, learning_rate;
122 | };
123 | 
124 | #endif /* train_gbm_algo_h */
125 | 


--------------------------------------------------------------------------------
/LightCTR/train/train_gmm_algo.cpp:
--------------------------------------------------------------------------------
  1 | //
  2 | //  train_gmm_algo.cpp
  3 | //  LightCTR
  4 | //
  5 | //  Created by SongKuangshi on 2017/10/13.
  6 | //  Copyright © 2017年 SongKuangshi. All rights reserved.
  7 | //
  8 | 
  9 | #include "train_gmm_algo.h"
 10 | #include <string.h>
 11 | #include "../util/random.h"
 12 | #include "../common/avx.h"
 13 | 
 14 | #define FOR(i,n) for(size_t i = 0;i < n;i++)
 15 | const float PI = acos(-1);
 16 | const float Log2PI = log(2 * PI);
 17 | 
 18 | // log(exp(a) + exp(b))
 19 | inline float log_sum(float a, float b) {
 20 |     const float vmin = std::min(a, b);
 21 |     const float vmax = std::max(a, b);
 22 |     if (vmax > vmin + 30) {
 23 |         return vmax;
 24 |     } else {
 25 |         return vmax + std::log(1.0 + std::exp(vmin - vmax));
 26 |     }
 27 | }
 28 | 
 29 | void Train_GMM_Algo::init() {
 30 |     gaussModels = new Gauss[cluster_cnt];
 31 |     latentVar.resize(dataRow_cnt * cluster_cnt);
 32 |     FOR(i,cluster_cnt) {
 33 |         gaussModels[i].mu = new float[feature_cnt];
 34 |         gaussModels[i].sigma = new float[feature_cnt];
 35 |         memset(gaussModels[i].mu, 0, sizeof(float) * feature_cnt);
 36 |         FOR(fid, feature_cnt) {
 37 |             gaussModels[i].mu[fid] = UniformNumRand() - 0.5f;
 38 |             gaussModels[i].sigma[fid] = 5.0f;
 39 |         }
 40 |         gaussModels[i].weight = 1.0f / cluster_cnt;
 41 |     }
 42 | }
 43 | 
 44 | // Log Probability Density Function of Multivariate Gauss Distribution
 45 | float Train_GMM_Algo::GaussianLPDF(size_t gasid, size_t rid) {
 46 |     float expN = 0, LogDetSigma = 0.0f, tmp = 0;
 47 |     FOR(fid, feature_cnt) {
 48 |         tmp = dataSet[rid][fid] * scale - gaussModels[gasid].mu[fid];
 49 |         expN += tmp * tmp / gaussModels[gasid].sigma[fid];
 50 |         LogDetSigma += log(gaussModels[gasid].sigma[fid]);
 51 |     }
 52 |     assert(!isnan(expN) && !isinf(expN) && !isnan(LogDetSigma) && !isinf(LogDetSigma));
 53 |     tmp = log(gaussModels[gasid].weight) - 0.5 * (expN + LogDetSigma + feature_cnt * Log2PI);
 54 | //    assert(tmp < 0);
 55 |     return tmp;
 56 | }
 57 | 
 58 | vector<float>* Train_GMM_Algo::Train_EStep() {
 59 |     FOR(rid,dataRow_cnt) {
 60 |         float LogSumPDF = 0;
 61 |         FOR(gasid,cluster_cnt) {
 62 |             gaussModels[gasid].pdf_tmp = GaussianLPDF(gasid, rid);
 63 |             if (gasid == 0) {
 64 |                 LogSumPDF = gaussModels[gasid].pdf_tmp;
 65 |             } else {
 66 |                 LogSumPDF = log_sum(LogSumPDF, gaussModels[gasid].pdf_tmp);
 67 |             }
 68 |         }
 69 |         // Normalization
 70 |         float expSum = 0;
 71 |         FOR(gasid,cluster_cnt) {
 72 |             float tmp = exp(gaussModels[gasid].pdf_tmp - LogSumPDF);
 73 |             assert(tmp <= 1);
 74 |             latentVar[rid * cluster_cnt + gasid] = tmp;
 75 |             expSum += tmp;
 76 |         }
 77 |         float* ptr = latentVar.data() + rid * cluster_cnt;
 78 |         avx_vecScale(ptr, ptr, cluster_cnt, 1.0 / expSum);
 79 |     }
 80 |     return &latentVar;
 81 | }
 82 | 
 83 | float Train_GMM_Algo::Train_MStep(const vector<float>* latentVar) {
 84 |     FOR(gasid, cluster_cnt) {
 85 |         threadpool->addTask([&, gasid]() {
 86 |             float sumWeight = 0;
 87 |             FOR(rid,dataRow_cnt) {
 88 |                 sumWeight += latentVar->at(rid * cluster_cnt + gasid);
 89 |             }
 90 |             assert(sumWeight > 0 && sumWeight < dataRow_cnt);
 91 |             gaussModels[gasid].sumRid_tmp = sumWeight;
 92 |             // update new gauss weight
 93 |             gaussModels[gasid].weight = sumWeight / dataRow_cnt;
 94 |         });
 95 |     }
 96 |     threadpool->wait();
 97 |     
 98 |     FOR(gasid, cluster_cnt) {
 99 |         threadpool->addTask([&, gasid]() {
100 |             auto model = gaussModels[gasid];
101 |             // update new gauss mu and sigma
102 |             FOR(fid, feature_cnt) {
103 |                 float sum_mu = 0.0f, sum_sigma = 0.0f;
104 |                 FOR(rid, dataRow_cnt) {
105 |                     sum_mu += latentVar->at(rid * cluster_cnt + gasid) * dataSet[rid][fid] * scale;
106 |                     const float t = dataSet[rid][fid] * scale - model.mu[fid];
107 |                     sum_sigma += latentVar->at(rid * cluster_cnt + gasid) * t * t;
108 |                 }
109 |                 model.mu[fid] = sum_mu / model.sumRid_tmp;
110 |                 model.sigma[fid] = sum_sigma / model.sumRid_tmp;
111 |                 if (model.sigma[fid] < 0.01) {
112 |                     model.sigma[fid] = 0.01; // avoid detSigma beyand precision
113 |                 }
114 |             }
115 |         });
116 |     }
117 |     threadpool->wait();
118 |     
119 |     // compute log likelihood ELOB
120 |     float likelihood = 0.0f;
121 |     FOR(rid,dataRow_cnt) {
122 |         float tmp = 0.0, raw_log_sum = 0.0;
123 |         FOR(gasid,cluster_cnt) {
124 |             tmp = GaussianLPDF(gasid, rid);
125 |             if (gasid == 0) {
126 |                 raw_log_sum = tmp;
127 |             } else {
128 |                 raw_log_sum = log_sum(raw_log_sum, tmp);
129 |             }
130 |         }
131 |         likelihood += raw_log_sum;
132 |     }
133 |     return likelihood;
134 | }
135 | 
136 | vector<int> Train_GMM_Algo::Predict() {
137 |     vector<int> ans = vector<int>();
138 |     ans.reserve(dataRow_cnt);
139 |     FOR(rid,dataRow_cnt) {
140 |         int whichTopic = -1;
141 |         float maxP = 0.0f, tmp;
142 |         FOR(gasid,cluster_cnt) {
143 |             tmp = GaussianLPDF(gasid, rid);
144 |             if (whichTopic == -1 || tmp > maxP) {
145 |                 maxP = tmp, whichTopic = (int)gasid;
146 |             }
147 |         }
148 |         ans.emplace_back(whichTopic);
149 |     }
150 |     return ans;
151 | }
152 | 
153 | void Train_GMM_Algo::printArguments() {
154 |     ofstream md("./output/gmm_cluster.txt");
155 |     if(!md.is_open()){
156 |         cout<<"save model open file error" << endl;
157 |         exit(1);
158 |     }
159 |     FOR(gasid, cluster_cnt) {
160 |         md << "cluster " << gasid << " weight =";
161 |         md << " " << gaussModels[gasid].weight << endl;
162 |         md << "cluster " << gasid << " mu =";
163 |         FOR(fid, feature_cnt) {
164 |             md << " " << gaussModels[gasid].mu[fid];
165 |         }
166 |         md << endl;
167 |         md << "cluster " << gasid << " sigma =";
168 |         FOR(fid, feature_cnt) {
169 |             md << " " << gaussModels[gasid].sigma[fid];
170 |         }
171 |         md << endl;
172 |     }
173 |     md.close();
174 | }
175 | 
176 | 


--------------------------------------------------------------------------------
/LightCTR/train/train_gmm_algo.h:
--------------------------------------------------------------------------------
 1 | //
 2 | //  train_gmm_algo.h
 3 | //  LightCTR
 4 | //
 5 | //  Created by SongKuangshi on 2017/10/13.
 6 | //  Copyright © 2017年 SongKuangshi. All rights reserved.
 7 | //
 8 | 
 9 | #ifndef train_gmm_algo_h
10 | #define train_gmm_algo_h
11 | 
12 | #include <stdio.h>
13 | #include <string>
14 | #include "../common/thread_pool.h"
15 | #include "../em_algo_abst.h"
16 | using namespace std;
17 | 
18 | class Train_GMM_Algo : public EM_Algo_Abst<vector<float> > {
19 | 
20 |     struct Point {
21 |         float* data;
22 |         ~Point() {
23 |             delete [] data;
24 |         }
25 |     };
26 |     struct Gauss {
27 |         float* mu;
28 |         float* sigma; // simple covariance to diagonal matrix
29 |         float weight;
30 |         float pdf_tmp;
31 |         float sumRid_tmp;
32 |         Gauss() {
33 |             pdf_tmp = sumRid_tmp = 0;
34 |         }
35 |     };
36 | public:
37 |     Train_GMM_Algo(string _dataFile, size_t _epoch, size_t _cluster_cnt,
38 |                    size_t _feature_cnt, float _scale = 1.0f):
39 |     EM_Algo_Abst(_dataFile, _epoch, _feature_cnt), cluster_cnt(_cluster_cnt), scale(_scale) {
40 |         threadpool = new ThreadPool(thread::hardware_concurrency());
41 |         init();
42 |     }
43 |     Train_GMM_Algo() = delete;
44 |     
45 |     ~Train_GMM_Algo() {
46 |         for (size_t i = 0; i < cluster_cnt; i++) {
47 |             delete [] gaussModels[i].mu;
48 |         }
49 |         delete [] gaussModels;
50 |         delete threadpool;
51 |         threadpool = NULL;
52 |     }
53 | 
54 |     void init();
55 |     vector<float>* Train_EStep();
56 |     float Train_MStep(const vector<float>*);
57 |     vector<int> Predict();
58 |     
59 |     float GaussianLPDF(size_t gasid, size_t rid);
60 |     void printArguments();
61 |     
62 |     size_t cluster_cnt;
63 |     
64 | private:
65 |     float scale;
66 |     Gauss *gaussModels;
67 |     vector<float> latentVar;
68 |     
69 |     ThreadPool *threadpool;
70 | };
71 | 
72 | #endif /* train_gmm_algo_h */
73 | 


--------------------------------------------------------------------------------
/LightCTR/train/train_nfm_algo.h:
--------------------------------------------------------------------------------
 1 | //
 2 | //  train_nfm_algo.h
 3 | //  LightCTR
 4 | //
 5 | //  Created by SongKuangshi on 2017/11/6.
 6 | //  Copyright © 2017年 SongKuangshi. All rights reserved.
 7 | //
 8 | 
 9 | #ifndef train_nfm_algo_h
10 | #define train_nfm_algo_h
11 | 
12 | #include <stdio.h>
13 | #include <mutex>
14 | #include "../fm_algo_abst.h"
15 | #include "layer/fullyconnLayer.h"
16 | 
17 | // Wide-Deep Neural Factorization Machine
18 | class Train_NFM_Algo : public FM_Algo_Abst {
19 |     
20 | public:
21 |     Train_NFM_Algo(string _dataPath, size_t _epoch_cnt, size_t _factor_cnt,
22 |                    size_t _hidden_layer_size):
23 |     FM_Algo_Abst(_dataPath, _factor_cnt), epoch(_epoch_cnt), hidden_layer_size(_hidden_layer_size) {
24 |         assert(this->feature_cnt != 0);
25 |         threadpool = new ThreadPool(1);
26 |         init();
27 |     }
28 |     
29 |     ~Train_NFM_Algo() {
30 |         delete [] update_g;
31 |         delete threadpool;
32 |         threadpool = NULL;
33 |     }
34 |     
35 |     void init();
36 |     void Train();
37 |     
38 | private:
39 |     Train_NFM_Algo() = delete;
40 |     
41 |     size_t epoch;
42 |     size_t batch_size;
43 |     
44 |     size_t hidden_layer_size;
45 |     Fully_Conn_Layer<Sigmoid> *inputLayer, *outputLayer;
46 |     Sigmoid sigmoid;
47 |     
48 |     size_t learnable_params_cnt;
49 |     
50 |     void batchGradCompute(size_t, size_t);
51 |     void accumWideGrad(size_t, float);
52 |     void accumDeepGrad(size_t, const vector<float>&);
53 |     
54 |     float *update_g;
55 |     inline float* update_W(size_t fid) {
56 |         return &update_g[fid];
57 |     }
58 |     inline float* update_V(size_t fid, size_t facid) {
59 |         return &update_g[this->feature_cnt + fid * this->factor_cnt + facid];
60 |     }
61 |     void ApplyGrad();
62 |     
63 |     float loss;
64 |     size_t accuracy;
65 |     AdagradUpdater_Num updater;
66 |     
67 |     ThreadLocal<Matrix*> tl_fc_input_Matrix, tl_fc_bp_Matrix;
68 |     ThreadLocal<vector<Matrix*> > tl_wrapper;
69 |     
70 |     ThreadPool *threadpool;
71 | };
72 | 
73 | #endif /* train_nfm_algo_h */
74 | 


--------------------------------------------------------------------------------
/LightCTR/train/train_rnn_algo.h:
--------------------------------------------------------------------------------
 1 | //
 2 | //  train_rnn_algo.h
 3 | //  LightCTR
 4 | //
 5 | //  Created by SongKuangshi on 2017/11/9.
 6 | //  Copyright © 2017年 SongKuangshi. All rights reserved.
 7 | //
 8 | 
 9 | #ifndef train_rnn_algo_h
10 | #define train_rnn_algo_h
11 | 
12 | #include "../dl_algo_abst.h"
13 | #include "unit/lstm_unit.h"
14 | #include "unit/attention_unit.h"
15 | using namespace std;
16 | 
17 | template <typename LossFunction, typename ActivationFunction, typename OutputActivationFunction>
18 | class Train_RNN_Algo : public DL_Algo_Abst<LossFunction, ActivationFunction,
19 |                               OutputActivationFunction> {
20 | public:
21 |     Train_RNN_Algo(string dataPath, size_t _epoch, size_t _feature_cnt,
22 |                    size_t _hidden_size, size_t _recurrent_cnt, size_t _multiclass_output_cnt = 1):
23 |     DL_Algo_Abst<LossFunction, ActivationFunction, OutputActivationFunction>(
24 |                    dataPath, _epoch, _feature_cnt, _hidden_size, _multiclass_output_cnt),
25 |     batch_size(_recurrent_cnt), hidden_size(_hidden_size) {
26 |         this->dl_algo = RNN;
27 |         initNetwork(hidden_size);
28 |     }
29 |     Train_RNN_Algo() = delete;
30 |                                   
31 |     ~Train_RNN_Algo() {
32 |     }
33 |     
34 |     void initNetwork(size_t hidden_size) {
35 |         inputLSTM = new LSTM_Unit<ActivationFunction>(28, hidden_size, batch_size);
36 |         this->appendNNLayer(inputLSTM);
37 |         attentionLayer =
38 |             new Attention_Unit<ActivationFunction>(hidden_size, /*fc_hidden*/20, batch_size);
39 |         this->appendNNLayer(attentionLayer);
40 |         fcLayer = new Fully_Conn_Layer<ActivationFunction>(attentionLayer, hidden_size, 72);
41 |         this->appendNNLayer(fcLayer);
42 |         this->outputLayer =
43 |             new Fully_Conn_Layer<ActivationFunction>(fcLayer, 72, this->multiclass_output_cnt);
44 |         this->appendNNLayer(this->outputLayer);
45 |     }
46 |     
47 |     vector<float>& Predict(size_t rid, vector<vector<float> >& dataRow) {
48 |         static Matrix* dataRow_Matrix = new Matrix(1, 28);
49 |         static Matrix* dataRow_Matrix_fc = new Matrix(1, hidden_size);
50 |         static vector<Matrix*> tmp;
51 |         tmp.resize(1);
52 |         tmp[0] = dataRow_Matrix;
53 |         
54 |         auto begin = dataRow[rid].begin();
55 |         auto end = begin;
56 |         FOR(i, batch_size) {
57 |             begin = dataRow[rid].begin() + i * 28;
58 |             end = dataRow[rid].begin() + (i + 1) * 28;
59 |             dataRow_Matrix->pointer()->assign(begin, end);
60 |             inputLSTM->forward(tmp);
61 |         }
62 |         assert(end == dataRow[rid].end());
63 |         
64 |         // Attention Unit
65 |         auto pred = attentionLayer->forward(inputLSTM->seq_output());
66 |         
67 |         assert(pred.size() == hidden_size);
68 |         dataRow_Matrix_fc->pointer()->assign(pred.begin(), pred.end());
69 |         tmp[0] = dataRow_Matrix_fc;
70 |         return this->fcLayer->forward(tmp);
71 |     }
72 |     
73 |     void BP(size_t rid, const vector<Matrix*>& grad) {
74 |         assert(GradientUpdater::__global_bTraining);
75 |         this->outputLayer->backward(grad);
76 |         inputLSTM->backward(attentionLayer->inputDelta());
77 |     }
78 |     
79 |     void applyBP(size_t epoch) const {
80 |         inputLSTM->applyBatchGradient();
81 |         attentionLayer->applyBatchGradient();
82 |     }
83 |     
84 | private:
85 |     size_t batch_size, hidden_size;
86 |     LSTM_Unit<ActivationFunction>* inputLSTM;
87 |     Attention_Unit<ActivationFunction>* attentionLayer;
88 |     Layer_Base* fcLayer;
89 | };
90 | 
91 | #endif /* train_rnn_algo_h */
92 | 


--------------------------------------------------------------------------------
/LightCTR/train/train_tm_algo.h:
--------------------------------------------------------------------------------
 1 | //
 2 | //  train_tm_algo.h
 3 | //  LightCTR
 4 | //
 5 | //  Created by SongKuangshi on 2017/10/15.
 6 | //  Copyright © 2017年 SongKuangshi. All rights reserved.
 7 | //
 8 | 
 9 | #ifndef train_tm_algo_h
10 | #define train_tm_algo_h
11 | 
12 | #include <stdio.h>
13 | #include <string>
14 | #include "../common/thread_pool.h"
15 | #include "../em_algo_abst.h"
16 | using namespace std;
17 | 
18 | #define FOR(i,n) for(size_t i = 0;i < n;i++)
19 | 
20 | #define PLSA
21 | 
22 | // Topic Model impl by PLSA and Latent Dirichlet Allocation Algorithm
23 | class Train_TM_Algo : public EM_Algo_Abst<vector<float> > {
24 | public:
25 |     Train_TM_Algo(string _dataFile, string _vocabFile, size_t _epoch,
26 |                   size_t _topic, size_t _words):
27 |     EM_Algo_Abst(_dataFile, _epoch, _words), word_cnt(_words), topic_cnt(_topic) {
28 |         doc_cnt = this->dataRow_cnt;
29 |         threadpool = new ThreadPool(thread::hardware_concurrency());
30 |         init();
31 |         loadVocab(_vocabFile);
32 |     }
33 |     Train_TM_Algo() = delete;
34 |     
35 |     ~Train_TM_Algo() {
36 |         delete threadpool;
37 |         threadpool = NULL;
38 |     }
39 |     
40 |     void init();
41 |     vector<float>* Train_EStep();
42 |     float Train_MStep(const vector<float>*);
43 |     
44 |     void printArguments();
45 |     vector<int> Predict();
46 |     
47 |     size_t word_cnt, topic_cnt, doc_cnt;
48 |     vector<string> vocab;
49 |     
50 |     void loadVocab(string dataPath) {
51 |         ifstream fin_;
52 |         string line;
53 |         char str[128];
54 |         int val, fre;
55 |         fin_.open(dataPath, ios::in);
56 |         if(!fin_.is_open()){
57 |             cout << "open file error, please run data/proc_text_topic.py first." << endl;
58 |             exit(1);
59 |         }
60 |         while(!fin_.eof()){
61 |             getline(fin_, line);
62 |             const char *pline = line.c_str();
63 |             if(sscanf(pline, "%d %s %d", &val, str, &fre) >= 1){
64 |                 assert(!isnan(val));
65 |                 vocab.emplace_back(string(str));
66 |             }
67 |         }
68 |         assert(vocab.size() == word_cnt);
69 |     }
70 |     
71 |     ThreadPool *threadpool;
72 |     
73 | #ifdef PLSA
74 |     vector<float> latentVar;
75 |     vector<float> topics_of_docs;
76 |     vector<float> words_of_topics;
77 |     vector<size_t> wordCnt_of_doc;
78 |     // cache for algorithm
79 |     vector<float> latent_word_sum; // word_sum[docid][tid] sum of all words
80 |     vector<float> latent_doc_sum; // doc_sum[wid][tid] sum of all docs
81 |     vector<float> latent_word_doc_sum; // word_doc_sum[tid] sum of all docs and words
82 | #endif
83 | };
84 | 
85 | #endif /* train_tm_algo_h */
86 | 


--------------------------------------------------------------------------------
/LightCTR/train/train_vae_algo.h:
--------------------------------------------------------------------------------
  1 | //
  2 | //  train_vae_algo.h
  3 | //  LightCTR
  4 | //
  5 | //  Created by SongKuangshi on 2017/10/21.
  6 | //  Copyright © 2017年 SongKuangshi. All rights reserved.
  7 | //
  8 | 
  9 | #ifndef train_vae_algo_h
 10 | #define train_vae_algo_h
 11 | 
 12 | #include <stdio.h>
 13 | #include <fstream>
 14 | #include <iostream>
 15 | #include "../util/loss.h"
 16 | #include "layer/layer_abst.h"
 17 | #include "layer/convLayer.h"
 18 | #include "layer/poolingLayer.h"
 19 | #include "layer/sampleLayer.h"
 20 | using namespace std;
 21 | 
 22 | // Generative-Models Variational-Autoencoder
 23 | template <typename LossFunction, typename ActivationFunction>
 24 | class Train_VAE_Algo {
 25 |     
 26 | public:
 27 |     Train_VAE_Algo(string dataPath, size_t _epoch, size_t _feature_cnt,
 28 |                    size_t hidden_size, size_t _gauss_cnt):
 29 |     epoch(_epoch), feature_cnt(_feature_cnt), gauss_cnt(_gauss_cnt) {
 30 |         loadDenseDataRow(dataPath);
 31 |         init(hidden_size, _gauss_cnt);
 32 |     }
 33 |     Train_VAE_Algo() = delete;
 34 |     
 35 |     ~Train_VAE_Algo() {
 36 |         delete encodeLayer;
 37 |         delete decodeLayer;
 38 |         delete outputLayer;
 39 |         delete sampleLayer;
 40 |     }
 41 |     
 42 |     void init(size_t hidden_size, size_t gauss_cnt) {
 43 |         // Find expectation nomal distribution
 44 |         encodeLayer = new Fully_Conn_Layer<ActivationFunction>(NULL, feature_cnt, hidden_size);
 45 |         Fully_Conn_Layer<Identity>* hidden1 =
 46 |             new Fully_Conn_Layer<Identity>(encodeLayer, hidden_size, gauss_cnt * 2);
 47 |         // sample
 48 |         sampleLayer = new Sample_Layer<Identity>(hidden1, gauss_cnt * 2);
 49 |         decodeLayer = new Fully_Conn_Layer<ActivationFunction>(sampleLayer, gauss_cnt, hidden_size);
 50 |         // tuning parameters to Maximize Likelihood
 51 |         outputLayer =
 52 |             new Fully_Conn_Layer<ActivationFunction>(decodeLayer, hidden_size, feature_cnt);
 53 |     }
 54 |     
 55 |     void Train() {
 56 |         static Matrix* dataRow_Matrix = new Matrix(1, feature_cnt);
 57 |         static Matrix* grad_Matrix = new Matrix(1, feature_cnt);
 58 |         static vector<Matrix*> tmp(1);
 59 |         
 60 |         for (size_t p = 0; p < epoch; p++) {
 61 |             
 62 |             GradientUpdater::__global_bTraining = true;
 63 |             
 64 |             // Mini-Batch SGD
 65 |             for (size_t rid = 0; rid < dataRow_cnt; rid++) {
 66 |                 dataRow_Matrix->pointer()->assign(dataSet[rid].begin(), dataSet[rid].end());
 67 |                 tmp[0] = dataRow_Matrix;
 68 |                 vector<float>& pred = this->encodeLayer->forward(tmp);
 69 |                 outputActivFun.forward(pred.data(), pred.size());
 70 |                 assert(pred.size() == feature_cnt);
 71 |                 
 72 |                 lossFun.gradient(pred.data(), dataSet[rid].data(),
 73 |                                  grad_Matrix->reference().data(), grad_Matrix->size());
 74 |                 outputActivFun.backward(grad_Matrix->reference().data(), pred.data(),
 75 |                                         grad_Matrix->reference().data(), grad_Matrix->size());
 76 |                 
 77 |                 // if LossFunction is Logistic, annotation last line
 78 |                 tmp[0] = grad_Matrix;
 79 |                 this->outputLayer->backward(tmp);
 80 |                 if ((rid + 1) % GradientUpdater::__global_minibatch_size == 0) {
 81 |                     this->encodeLayer->applyBatchGradient();
 82 |                 }
 83 |             }
 84 |             if (p % 2 == 0) {
 85 |                 
 86 |                 GradientUpdater::__global_bTraining = false;
 87 |                 
 88 |                 // Validate Loss
 89 |                 float loss = 0.0f;
 90 |                 for (size_t rid = 0; rid < dataRow_cnt; rid+=2) {
 91 |                     dataRow_Matrix->pointer()->assign(dataSet[rid].begin(), dataSet[rid].end());
 92 |                     tmp[0] = dataRow_Matrix;
 93 |                     vector<float> pred = this->encodeLayer->forward(tmp);
 94 |                     outputActivFun.forward(pred.data(), pred.size());
 95 |                     loss += lossFun.loss(pred.data(), dataSet[rid].data(), pred.size());
 96 |                 }
 97 |                 printf("Epoch %zu Loss = %f\n", p, loss);
 98 |             }
 99 |         }
100 |     }
101 |     
102 |     vector<float>* encode(vector<float>* input) {
103 |         assert(input->size() == feature_cnt);
104 |         sampleLayer->bEncoding = true;
105 |         vector<float> *encode = this->encodeLayer->forward(input);
106 |         sampleLayer->bEncoding = false;
107 |         assert(encode->size() == gauss_cnt);
108 |         return encode;
109 |     }
110 |     
111 |     void saveModel(size_t epoch) {
112 |         
113 |     }
114 |     
115 |     void loadDenseDataRow(string dataPath) {
116 |         dataSet.clear();
117 |         
118 |         ifstream fin_;
119 |         string line;
120 |         int nchar, y;
121 |         int val, fid = 0;
122 |         fin_.open(dataPath, ios::in);
123 |         if(!fin_.is_open()){
124 |             cout << "open file error!" << endl;
125 |             exit(1);
126 |         }
127 |         
128 |         while(!fin_.eof()){
129 |             vector<float> tmp;
130 |             tmp.resize(feature_cnt);
131 |             getline(fin_, line);
132 |             fill(tmp.begin(), tmp.end(), 0);
133 |             const char *pline = line.c_str();
134 |             if(sscanf(pline, "%d%n", &y, &nchar) >= 1){
135 |                 pline += nchar + 1;
136 |                 fid = 0;
137 |                 while(pline < line.c_str() + (int)line.length() &&
138 |                       sscanf(pline, "%d%n", &val, &nchar) >= 1){
139 |                     pline += nchar + 1;
140 |                     if (*pline == ',')
141 |                         pline += 1;
142 |                     if (val != 0) {
143 |                         tmp[fid] = val / 255.0;
144 |                     }
145 |                     fid++;
146 |                     if (fid >= feature_cnt) {
147 |                         break;
148 |                     }
149 |                 }
150 |                 dataSet.emplace_back(tmp);
151 |                 if (dataSet.size() > 200) {
152 |                     break;
153 |                 }
154 |             }
155 |         }
156 |         this->dataRow_cnt = this->dataSet.size();
157 |         assert(this->dataRow_cnt > 0);
158 |     }
159 |     
160 | private:
161 |     LossFunction lossFun;
162 |     Sigmoid outputActivFun;
163 |     
164 |     size_t epoch;
165 |     Fully_Conn_Layer<ActivationFunction> *encodeLayer, *decodeLayer, *outputLayer;
166 |     Sample_Layer<Identity> *sampleLayer;
167 |     
168 |     size_t dataRow_cnt, feature_cnt, gauss_cnt;
169 |     vector<vector<float> > dataSet;
170 | };
171 | 
172 | #endif /* train_vae_algo_h */
173 | 


--------------------------------------------------------------------------------
/LightCTR/train/unit/attention_unit.h:
--------------------------------------------------------------------------------
  1 | //
  2 | //  attention_unit.h
  3 | //  LightCTR
  4 | //
  5 | //  Created by SongKuangshi on 2017/11/2.
  6 | //  Copyright © 2017年 SongKuangshi. All rights reserved.
  7 | //
  8 | 
  9 | #ifndef attention_unit_h
 10 | #define attention_unit_h
 11 | 
 12 | #include <vector>
 13 | #include "../../util/matrix.h"
 14 | #include "../layer/fullyconnLayer.h"
 15 | 
 16 | // Attention-based Encoder-Decoder build a RNN that has alignment attention
 17 | template <typename ActivationFunction>
 18 | class Attention_Unit : public Layer_Base {
 19 | public:
 20 |     Attention_Unit(size_t _dimension, size_t _hidden_size, size_t _recurrent_cnt):
 21 |     Layer_Base(NULL, _recurrent_cnt, _dimension), dimension(_dimension), batch_size(_recurrent_cnt) {
 22 |         this->activeFun = new ActivationFunction();
 23 |         
 24 |         printf("Attention-based Unit\n");
 25 |         // alpha transform is computed by DxH and Hx1 fc Layer
 26 |         printf("-- Attention Inner FC-1 ");
 27 |         transformFunc = new Fully_Conn_Layer<Sigmoid>(NULL, _dimension, _hidden_size);
 28 |         transformFunc->needInputDelta = true;
 29 |         printf("-- Attention Inner FC-2 ");
 30 |         transformFunc_bp = new Fully_Conn_Layer<Sigmoid>(transformFunc, _hidden_size, 1);
 31 |     }
 32 |     Attention_Unit() = delete;
 33 |     
 34 |     ~Attention_Unit() {
 35 |         delete transformFunc_bp;
 36 |         delete transformFunc;
 37 |     }
 38 |     
 39 |     // Attention input data should be data concating rnn encoder output sequence, rather than one cell's output
 40 |     vector<float>& forward(const vector<Matrix*>& prevLOutputMatrix) {
 41 |         assert(prevLOutputMatrix.size() == batch_size);
 42 |         
 43 |         // init threadlocal var
 44 |         MatrixArr& input = *tl_input;
 45 |         input.arr.resize(batch_size);
 46 |         Matrix& attentionOutput = *tl_attentionOutput;
 47 |         attentionOutput.reset(1, dimension);
 48 |         
 49 |         Matrix& fc_output_act = *tl_fc_output_act;
 50 |         fc_output_act.reset(1, batch_size);
 51 |         
 52 |         Matrix* cache = NULL;
 53 |         
 54 |         vector<Matrix*>& wrapper = *tl_wrapper;
 55 |         wrapper.resize(1);
 56 |         
 57 |         FOR(idx, prevLOutputMatrix.size()) {
 58 |             input.arr[idx] = prevLOutputMatrix[idx]->copy(input.arr[idx]); // 1xD
 59 |             assert(input.arr[idx]->size() == dimension);
 60 |             wrapper[0] = input.arr[idx];
 61 |             auto res = transformFunc->forward(wrapper);
 62 |             assert(res.size() == 1);
 63 |             *fc_output_act.getEle(0, idx) = res[0];
 64 |         }
 65 |         // Softmax normalization
 66 |         softmax.forward(fc_output_act.pointer()->data(), fc_output_act.size());
 67 |         
 68 |         attentionOutput.zeroInit();
 69 |         FOR(idx, prevLOutputMatrix.size()) {
 70 |             cache = input.arr[idx]->copy(cache)->scale(*fc_output_act.getEle(0, idx));
 71 |             attentionOutput.add(cache);
 72 |         }
 73 |         delete cache;
 74 |         return attentionOutput.reference();
 75 |     }
 76 |     
 77 |     void backward(const vector<Matrix*>& outputDeltaMatrix) {
 78 |         Matrix* outputDelta = outputDeltaMatrix[0];
 79 |         assert(outputDelta->size() == this->output_dimension);
 80 |         
 81 |         // init threadlocal var
 82 |         MatrixArr& input = *tl_input;
 83 |         Matrix& fc_output_act = *tl_fc_output_act;
 84 |         
 85 |         vector<Matrix*>& wrapper = *tl_wrapper;
 86 |         vector<float>& scaleDelta = *tl_scaleDelta;
 87 |         scaleDelta.resize(batch_size);
 88 |         MatrixArr& input_delta = *tl_input_delta;
 89 |         input_delta.arr.resize(batch_size);
 90 |         Matrix* cache_bp = new Matrix(1, 1);
 91 |         Matrix* cache = NULL;
 92 |         
 93 |         FOR(idx, input.arr.size()) {
 94 |             // update softmax_fc by delta of softmax_fc(X)
 95 |             auto res = input.arr[idx]->Multiply(cache_bp, outputDelta->transpose());
 96 |             outputDelta->transpose(); // recover
 97 |             assert(res->size() == 1);
 98 |             scaleDelta[idx] = *cache_bp->getEle(0, 0);
 99 |         }
100 |         softmax.backward(scaleDelta.data(), fc_output_act.pointer()->data(),
101 |                          scaleDelta.data(), scaleDelta.size());
102 |         // update transformFunc
103 |         FOR(idx, input.arr.size()) {
104 |             *cache_bp->getEle(0, 0) = scaleDelta[idx];
105 |             wrapper[0] = cache_bp;
106 |             transformFunc_bp->backward(wrapper);
107 |             // input delta of transformFunc
108 |             const Matrix& delta = transformFunc->inputDelta();
109 |             input_delta.arr[idx] = delta.copy(input_delta.arr[idx]);
110 |         }
111 |         // pass back delta of X
112 |         FOR(idx, input.arr.size()) {
113 |             cache = outputDelta->copy(cache)->scale(*fc_output_act.getEle(0, idx));
114 |             input_delta.arr[idx]->add(cache);
115 |         }
116 |         delete cache_bp;
117 |         delete cache;
118 |     }
119 |     
120 |     const vector<Matrix*>& output() {
121 |         Matrix& attentionOutput = *tl_attentionOutput;
122 |         vector<Matrix*>& wrapper = *tl_wrapper;
123 |         wrapper[0] = &attentionOutput;
124 |         return wrapper;
125 |     }
126 |     const vector<Matrix*>& inputDelta() {
127 |         MatrixArr& input_delta = *tl_input_delta;
128 |         return input_delta.arr;
129 |     }
130 |     
131 |     void applyBatchGradient() {
132 |         transformFunc->applyBatchGradient();
133 |         if (nextLayer) {
134 |             nextLayer->applyBatchGradient();
135 |         }
136 |     }
137 |     
138 | private:
139 |     Fully_Conn_Layer<Sigmoid> *transformFunc, *transformFunc_bp;
140 |     Softmax softmax;
141 |     size_t batch_size, dimension;
142 |     
143 |     ThreadLocal<MatrixArr> tl_input;
144 |     ThreadLocal<Matrix> tl_attentionOutput;
145 |     ThreadLocal<Matrix> tl_fc_output_act;
146 |     
147 |     ThreadLocal<vector<float> > tl_scaleDelta;
148 |     ThreadLocal<MatrixArr> tl_input_delta;
149 |     
150 |     ThreadLocal<vector<Matrix*> > tl_wrapper;
151 | };
152 | 
153 | #endif /* attention_unit_h */
154 | 


--------------------------------------------------------------------------------
/LightCTR/util/activations.h:
--------------------------------------------------------------------------------
  1 | //
  2 | //  activation.h
  3 | //  LightCTR
  4 | //
  5 | //  Created by SongKuangshi on 2017/10/20.
  6 | //  Copyright © 2017年 SongKuangshi. All rights reserved.
  7 | //
  8 | 
  9 | #ifndef activation_h
 10 | #define activation_h
 11 | 
 12 | #include <algorithm>
 13 | #include <cmath>
 14 | #include <vector>
 15 | #include "assert.h"
 16 | #include "../common/avx.h"
 17 | using namespace std;
 18 | 
 19 | class Activation {
 20 | public:
 21 |     virtual void forward(float* input, size_t len) = 0;
 22 |     virtual void backward(const float* delta, const float* forward_output, float* to, size_t len) = 0;
 23 | };
 24 | 
 25 | class Identity : public Activation {
 26 | public:
 27 |     inline void forward(float* input, size_t len) {
 28 |         return;
 29 |     }
 30 |     inline void backward(const float* delta, const float* forward_output, float* to, size_t len) {
 31 |         for (size_t i = 0; i < len; i++) {
 32 |             to[i] = delta[i];
 33 |         }
 34 |     }
 35 | };
 36 | 
 37 | class Binary_Sigmoid : public Activation {
 38 |     // used in forward process of Binary Neural Network
 39 | public:
 40 |     inline float forward(float input) {
 41 |         const float res = (input + 1.0f) / 2.0f;
 42 |         return fmax(0.0f, fmin(1.0f, res)); // clip to [0, 1]
 43 |     }
 44 |     inline void forward(float* input, size_t len) {
 45 |         float scaler = 0.0f;
 46 |         for (size_t i = 0; i < len; i++) {
 47 |             scaler += fabs(input[i]); // accumulate of L1-norm
 48 |         }
 49 |         scaler /= len;
 50 |         for (size_t i = 0; i < len; i++) {
 51 |             const float sign = input[i] > 0 ? 1 : -1;
 52 |             input[i] *= scaler * sign;
 53 |         }
 54 |     }
 55 |     inline void backward(const float* delta, const float* foutput, float* to, size_t len) {
 56 |         // standard backward propagation except binary weight
 57 |         for (size_t i = 0; i < len; i++) {
 58 |             to[i] = delta[i];
 59 |         }
 60 |     }
 61 | };
 62 | 
 63 | class Sigmoid : public Activation {
 64 | public:
 65 |     inline float forward(float input) const {
 66 |         if(input < -16){
 67 |             return 1e-7;
 68 |         } else if(input > 16) {
 69 |             return 1.0 - 1e-7;
 70 |         }
 71 |         return 1.0f / (1.0f + exp(-input));
 72 |     }
 73 |     inline void forward(float* input, size_t len) {
 74 |         for (size_t i = 0; i < len; i++) {
 75 |             if(input[i] < -16){
 76 |                 input[i] = 1e-7;
 77 |             } else if(input[i] > 16) {
 78 |                 input[i] = 1.0 - 1e-7;
 79 |             } else {
 80 |                 input[i] = 1.0f / (1.0f + exp(- input[i]));
 81 |             }
 82 |             assert(!isnan(input[i]));
 83 |         }
 84 |     }
 85 |     inline void backward(const float* delta, const float* foutput, float* to, size_t len) {
 86 |         for (size_t i = 0; i < len; i++) {
 87 |             to[i] = delta[i] * foutput[i] * (1.0f - foutput[i]);
 88 |             assert(!isnan(to[i]));
 89 |         }
 90 |     }
 91 | };
 92 | 
 93 | class Softmax : public Activation {
 94 | public:
 95 |     Softmax(float _softTargetRate = 1.0f) : softTargetRate(_softTargetRate) {
 96 |     }
 97 |     inline size_t forward_max(const float* input, size_t len) const {
 98 |         return std::max_element(input, input + len) - input;
 99 |     }
100 |     inline void forward(float* input, size_t len) {
101 |         float sum = 0.0f;
102 |         auto maxV = *max_element(input, input + len);
103 |         // for numerical stability overflow
104 |         for (size_t i = 0; i < len; i++) {
105 |             sum += exp((input[i] - maxV) / softTargetRate);
106 |         }
107 |         for (size_t i = 0; i < len; i++) {
108 |             input[i] = exp((input[i] - maxV) / softTargetRate) / sum;
109 |             if (input[i] == 0) {
110 |                 input[i] = 1e-7;
111 |             } else if (input[i] == 1) {
112 |                 input[i] = 1.0 - 1e-7;
113 |             }
114 |         }
115 |     }
116 |     inline void backward(const float* delta, const float* foutput, float* to, size_t len) {
117 |         // softmax Derivative (whether i == j) * softmax(input[i]) - softmax(input[i]) * softmax(input[j])
118 |         // each derivative of Z_(L) = sum_i( delta_(i) * -forward_output_(i) ) * forward_output_(L)
119 |         //      + delta_(L) * forward_output_(L)
120 |         float sum = avx_dotProduct(delta, foutput, len);
121 |         avx_vecAdd(delta, -sum, to, len);
122 |         avx_vecScale(to, to, len, foutput);
123 |         avx_vecScale(to, to, len, 1.0 / softTargetRate);
124 |     }
125 | private:
126 |     // used in distillation soft target softmax, when larger than 1 makes smooth classification
127 |     float softTargetRate;
128 | };
129 | 
130 | class Tanh : public Activation {
131 | public:
132 |     inline void forward(float* input, size_t len) {
133 |         float t1, t2;
134 |         for (size_t i = 0; i < len; i++) {
135 |             t1 = exp(input[i]), t2 = exp(- input[i]);
136 |             input[i] = (t1 - t2) / (t1 + t2);
137 |         }
138 |     }
139 |     inline void backward(const float* delta, const float* foutput, float* to, size_t len) {
140 |         for (size_t i = 0; i < len; i++) {
141 |             to[i] = delta[i] * (1.0f - foutput[i] * foutput[i]);
142 |         }
143 |     }
144 | };
145 | 
146 | class ReLU : public Activation { // Local Response Normalization
147 | public:
148 |     inline void forward(float* input, size_t len) {
149 |         for (size_t i = 0; i < len; i++) {
150 |             if (input[i] < 0.0f) {
151 |                 input[i] = 0.0f; // negative slope is 0
152 |             }
153 |         }
154 |     }
155 |     inline void backward(const float* delta, const float* foutput, float* to, size_t len) {
156 |         for (size_t i = 0; i < len; i++) {
157 |             if (foutput[i] == 0.0f) {
158 |                 to[i] = 0.0f;
159 |             } else {
160 |                 to[i] = delta[i];
161 |             }
162 |         }
163 |     }
164 | };
165 | 
166 | class SoftPlus : public Activation {
167 | public:
168 |     inline void forward(float* input, size_t len) {
169 |         for (size_t i = 0; i < len; i++) {
170 |             input[i] = log(1 + exp(input[i]));
171 |         }
172 |     }
173 |     inline void backward(const float* delta, const float* foutput, float* to, size_t len) {
174 |         float t;
175 |         for (size_t i = 0; i < len; i++) {
176 |             t = exp(foutput[i]);
177 |             to[i] = delta[i] * (t - 1) / t;
178 |         }
179 |     }
180 | };
181 | 
182 | #endif /* activation_h */
183 | 


--------------------------------------------------------------------------------
/LightCTR/util/ensembling.h:
--------------------------------------------------------------------------------
  1 | //
  2 | //  ensembling.h
  3 | //  LightCTR
  4 | //
  5 | //  Created by SongKuangshi on 2018/12/3.
  6 | //  Copyright © 2018 SongKuangshi. All rights reserved.
  7 | //
  8 | 
  9 | #ifndef ensembling_h
 10 | #define ensembling_h
 11 | 
 12 | #include <cmath>
 13 | #include <vector>
 14 | #include <iostream>
 15 | #include <algorithm>
 16 | 
 17 | // Hard majority voting
 18 | // Weighted Average Probabilities
 19 | class Voting {
 20 | public:
 21 |     Voting(bool _is_prob_avg_voting) {
 22 |         is_prob_avg_voting = _is_prob_avg_voting;
 23 |     }
 24 |     
 25 |     std::shared_ptr<vector<float> > final_result(vector<vector<float> >& sub_results) {
 26 |         assert(sub_results.size() > 0 && sub_results[0].size() > 0);
 27 |         vector<float> res;
 28 |         res.resize(sub_results[0].size());
 29 |         
 30 |         if (is_prob_avg_voting) {
 31 |             for (size_t i = 0; i < sub_results[0].size(); i++) {
 32 |                 res[i] = 0;
 33 |                 for (size_t j = 0; j < sub_results.size(); j++) {
 34 |                     res[i] += sub_results[j][i];
 35 |                 }
 36 |                 res[i] /= sub_results.size();
 37 |             }
 38 |         } else {
 39 |             for (size_t i = 0; i < sub_results.size(); i++) {
 40 |                 const size_t index = std::distance(sub_results[i].begin(),
 41 |                                                    std::max_element(sub_results[i].begin(), sub_results[i].end())
 42 |                                                    );
 43 |                 assert(index < sub_results[0].size());
 44 |                 res[index]++;
 45 |             }
 46 |         }
 47 |         return std::make_shared<vector<float> >(res);
 48 |     }
 49 |     
 50 | private:
 51 |     bool is_prob_avg_voting;
 52 | };
 53 | 
 54 | // AdaBoost
 55 | class AdaBoost {
 56 | public:
 57 |     explicit AdaBoost(size_t _sample_cnt): sample_cnt(_sample_cnt) {
 58 |         weights = new float[_sample_cnt];
 59 |         const float init_w = 1.0 / _sample_cnt;
 60 |         for (size_t i = 0; i < _sample_cnt; i++) {
 61 |             *(weights + i) = init_w;
 62 |         }
 63 |     }
 64 |     
 65 |     ~AdaBoost() {
 66 |         delete[] weights;
 67 |         _model_weights.clear();
 68 |     }
 69 |     
 70 |     std::shared_ptr<float> ensembling_weak_model(std::vector<bool>& pred_correct_mask) {
 71 |         float err_rate = 0.;
 72 |         for (size_t i = 0; i < sample_cnt; i++) {
 73 |             if (pred_correct_mask[i] == false)
 74 |                 err_rate += 1.;
 75 |         }
 76 |         err_rate /= sample_cnt;
 77 |         
 78 |         float alpha = model_weighting(err_rate);
 79 |         _model_weights.emplace_back(alpha);
 80 |         
 81 |         float reweighting = std::exp(alpha);
 82 |         for (size_t i = 0; i < sample_cnt; i++) {
 83 |             if (pred_correct_mask[i] == false) {
 84 |                 *(weights + i) *= reweighting;
 85 |             } else {
 86 |                 *(weights + i) /= reweighting;
 87 |             }
 88 |         }
 89 |         return std::make_shared<float>(*weights);
 90 |     }
 91 |     
 92 |     const vector<float>& model_weights() {
 93 |         return _model_weights;
 94 |     }
 95 |     
 96 | private:
 97 |     inline float model_weighting(float err_rate){
 98 |         if (err_rate < 1e-4) {
 99 |             return 1000; // strongly outstanding
100 |         }
101 |         // calculate new weight
102 |         return 0.5 * std::log((1 - err_rate) / err_rate);
103 |     }
104 |     
105 |     size_t sample_cnt;
106 |     float* weights = NULL;
107 |     std::vector<float> _model_weights;
108 | };
109 | 
110 | 
111 | #endif /* ensembling_h */
112 | 


--------------------------------------------------------------------------------
/LightCTR/util/evaluator.h:
--------------------------------------------------------------------------------
  1 | //
  2 | //  evaluator.h
  3 | //  LightCTR
  4 | //
  5 | //  Created by SongKuangshi on 2017/11/10.
  6 | //  Copyright © 2017年 SongKuangshi. All rights reserved.
  7 | //
  8 | 
  9 | #ifndef evaluator_h
 10 | #define evaluator_h
 11 | 
 12 | #include <string.h>
 13 | 
 14 | struct EvalInfo {
 15 |     // true positives, judge label=1 rightly
 16 |     float TP;
 17 |     // true negatives, judge label=0 rightly
 18 |     float TN;
 19 |     // false positives
 20 |     float FP;
 21 |     // false negatives
 22 |     float FN;
 23 |     
 24 |     EvalInfo() : TP(0.0), TN(0.0), FP(0.0), FN(0.0) {}
 25 | };
 26 | 
 27 | inline static float Precision(float TP, float FP) {
 28 |     if (TP > 0.0 || FP > 0.0) {
 29 |         return TP / (TP + FP);
 30 |     } else {
 31 |         return 1.0;
 32 |     }
 33 | }
 34 | 
 35 | inline static float Recall(float TP, float FN) {
 36 |     if (TP > 0.0 || FN > 0.0) {
 37 |         return TP / (TP + FN);
 38 |     } else {
 39 |         return 1.0;
 40 |     }
 41 | }
 42 | 
 43 | inline static float F1Score(float precision, float recall) {
 44 |     if (precision > 0.0 || recall > 0.0) {
 45 |         return 2.0f * precision * recall / (precision + recall);
 46 |     } else {
 47 |         return 0;
 48 |     }
 49 | }
 50 | 
 51 | class AucEvaluator {
 52 | public:
 53 |     AucEvaluator() {
 54 |         PosNum = new int[kHashLen + 1];
 55 |         NegNum = new int[kHashLen + 1];
 56 |     }
 57 |     ~AucEvaluator() {
 58 |         delete [] PosNum;
 59 |         delete [] NegNum;
 60 |     }
 61 |     void init(const vector<float>* pCTR, const vector<int>* label) {
 62 |         assert(pCTR->size() == label->size());
 63 |         memset(PosNum, 0, sizeof(int) * (kHashLen + 1));
 64 |         memset(NegNum, 0, sizeof(int) * (kHashLen + 1));
 65 |         
 66 |         for (size_t i = 0; i < pCTR->size(); i++) {
 67 |             size_t index = pCTR->at(i) * kHashLen;
 68 |             if (label->at(i) == 1) { // Positive
 69 |                 PosNum[index]++;
 70 |             } else {
 71 |                 NegNum[index]++;
 72 |             }
 73 |         }
 74 |     }
 75 |     float Auc() {
 76 |         float totPos = 0.0, totNeg = 0.0;
 77 |         float totPosPrev = 0.0, totNegPrev = 0.0;
 78 |         float auc = 0.0;
 79 |         
 80 |         int64_t idx = kHashLen;
 81 |         while (idx >= 0) {
 82 |             totPosPrev = totPos;
 83 |             totNegPrev = totNeg;
 84 |             totPos += PosNum[idx];
 85 |             totNeg += NegNum[idx];
 86 |             auc += trapezoidArea(totNeg, totNegPrev, totPos, totPosPrev);
 87 |             --idx;
 88 |         }
 89 |         if (totPos > 0.0 && totNeg > 0.0) {
 90 |             return auc / totPos / totNeg;
 91 |         } else {
 92 |             return 0.0;
 93 |         }
 94 |     }
 95 | private:
 96 |     inline float trapezoidArea(float X1, float X2,
 97 |                                 float Y1, float Y2) {
 98 |         return (X1 > X2 ? (X1 - X2) : (X2 - X1)) * (Y1 + Y2) / 2.0;
 99 |     }
100 |     
101 |     const size_t kHashLen = (1 << 24) - 1;
102 |     int *PosNum, *NegNum;
103 | };
104 | 
105 | #endif /* evaluator_h */
106 | 


--------------------------------------------------------------------------------
/LightCTR/util/loss.h:
--------------------------------------------------------------------------------
 1 | //
 2 | //  loss.h
 3 | //  LightCTR
 4 | //
 5 | //  Created by SongKuangshi on 2017/10/20.
 6 | //  Copyright © 2017年 SongKuangshi. All rights reserved.
 7 | //
 8 | 
 9 | #ifndef loss_h
10 | #define loss_h
11 | 
12 | #include <cmath>
13 | #include <vector>
14 | #include "assert.h"
15 | using namespace std;
16 | 
17 | template <typename T, typename L>
18 | class Loss {
19 | public:
20 |     virtual T loss(const T* pred, const L* label, size_t len) const = 0;
21 |     virtual void gradient(const T* pred, const L* label, T* gradient, size_t len) = 0;
22 | };
23 | 
24 | template <typename T, typename L>
25 | class Square : public Loss<T, L> { // Mean Squared Error
26 | public:
27 |     T loss(const T* pred, const L* label, size_t len) const {
28 |         T sum = 0.0f, tmp;
29 |         for (size_t i = 0; i < len; i++) {
30 |             tmp = pred[i] - label[i];
31 |             sum += tmp / 2 * tmp;
32 |         }
33 |         return sum;
34 |     }
35 |     void gradient(const T* pred, const L* label, T* gradient, size_t len) {
36 |         for (size_t i = 0; i < len; i++) {
37 |             gradient[i] = pred[i] - label[i];
38 |         }
39 |     }
40 | };
41 | 
42 | template <typename T, typename L>
43 | class Logistic : public Loss<T, L> {
44 | public:
45 |     T loss(const T* pred, const L* label, size_t len) const {
46 |         T sum = 0.0f, p, l;
47 |         for (size_t i = 0; i < len; i++) {
48 |             p = pred[i];
49 |             l = label[i];
50 |             sum += (l - (p >= 0)) * p - log(1.0f + exp(p - 2.0f * (p >= 0) * p));
51 | //            sum += label->at(i) * log(pred->at(i)) + (1.0f - label->at(i)) * log(1.0f - pred->at(i));
52 |         }
53 |         assert(!isnan(sum));
54 |         return sum;
55 |     }
56 |     void gradient(const T* pred, const L* label, T* gradient, size_t len) {
57 |         // Notice output activator must be sigmoid
58 |         for (size_t i = 0; i < len; i++) {
59 |             gradient[i] = pred[i] - label[i];
60 |         }
61 |     }
62 | };
63 | 
64 | template <typename T, typename L>
65 | class Logistic_Softmax : public Loss<T, L> {
66 | public:
67 |     T loss(const T* pred, const L* label, size_t len) const {
68 |         T sum = 0.0f;
69 |         for (size_t i = 0; i < len; i++) {
70 |             if (label[i] == 1) {
71 |                 sum += log(pred[i]);
72 |             }
73 |         }
74 |         assert(!isnan(sum));
75 |         return sum;
76 |     }
77 |     void gradient(const T* pred, const L* label, T* gradient, size_t len) {
78 |         for (size_t i = 0; i < len; i++) {
79 |             if (label[i] == 1) {
80 |                 gradient[i] = 1.0f - pred[i];
81 |             } else {
82 |                 gradient[i] = - pred[i];
83 |             }
84 |         }
85 |     }
86 | };
87 | 
88 | #endif /* loss_h */
89 | 


--------------------------------------------------------------------------------
/LightCTR/util/pca.h:
--------------------------------------------------------------------------------
  1 | //
  2 | //  pca.h
  3 | //  LightCTR
  4 | //
  5 | //  Created by SongKuangshi on 2018/5/4.
  6 | //  Copyright © 2018年 SongKuangshi. All rights reserved.
  7 | //
  8 | 
  9 | #ifndef pca_h
 10 | #define pca_h
 11 | 
 12 | #include "matrix.h"
 13 | 
 14 | // Functions for principal component analysis
 15 | class PCA {
 16 | public:
 17 |     PCA(float _learning_rate, int _maxIters, int _neuronsNum, int _featureSize) {
 18 |         trainingData = NULL;
 19 |         learning_rate = _learning_rate;
 20 |         maxIters = _maxIters;
 21 |         neuronsNum = _neuronsNum;
 22 |         featureSize = _featureSize;
 23 |         
 24 |         weightsTmp = new Matrix(featureSize, neuronsNum);
 25 |         // Initializing Random weights for the first iteration
 26 |         weights = new Matrix(featureSize, neuronsNum);
 27 |         weights->randomInit();
 28 |     }
 29 |     
 30 |     void loadMatrix(Matrix* _trainingData) {
 31 |         trainingData = _trainingData;
 32 |     }
 33 |     
 34 |     void Train() {
 35 |         assert(trainingData != NULL);
 36 |         // PCA trained by Generalized Hebbian Neuron
 37 |         for (int epoch = 0; epoch < maxIters; epoch++)
 38 |         {
 39 |             output = trainingData->Multiply(output, weights);
 40 |             weights->copy(weightsTmp);
 41 |             
 42 |             for (int row = 0; row < output->x_len; row++) {
 43 |                 // each sample data
 44 |                 for (int nid = 0; nid < neuronsNum; nid++) {
 45 |                     for (int fid = 0; fid < featureSize; fid++) {
 46 |                         // update each weight
 47 |                         float sumTerm = getSum(row, nid, fid);
 48 |                         *weights->getEle(fid, nid) += learning_rate * *output->getEle(row, nid)
 49 |                                     * (*trainingData->getEle(row, fid) - sumTerm);
 50 |                     }
 51 |                 }
 52 |             }
 53 |             
 54 |             if (weights->checkConvergence(weightsTmp)) {
 55 |                 // if convergence then stop training
 56 |                 printf("convergence in %d epoch", epoch);
 57 |                 return;
 58 |             }
 59 |         }
 60 |         printf("[WARNING] stop training in %d epoch", maxIters);
 61 |     }
 62 |     
 63 |     Matrix* reduceDimension(Matrix* input, size_t reserve_pc_cnt = 1) {
 64 |         size_t orig = weights->y_len;
 65 |         weights->y_len = reserve_pc_cnt;
 66 |         output = input->Multiply(output, weights);
 67 |         weights->y_len = orig;
 68 |         return output;
 69 |     }
 70 |     
 71 |     Matrix* remove_pc(Matrix* input, size_t remove_pc_cnt = 1) {
 72 |         // V = V - (V * U) * U^T
 73 |         size_t orig = weights->y_len;
 74 |         weights->y_len = remove_pc_cnt;
 75 |         Matrix* lowDimentionM = NULL;
 76 |         lowDimentionM = input->Multiply(lowDimentionM, weights);
 77 |         output = lowDimentionM->Multiply(output, weights->transpose());
 78 |         output->add(input, 1, -1);
 79 |         weights->y_len = orig;
 80 |         
 81 |         return output;
 82 |     }
 83 |     
 84 |     void saveModel(size_t epoch) {
 85 |     }
 86 |     
 87 | private:
 88 |     float getSum(int row, int nid, int fid) {
 89 |         float sum = 0;
 90 |         for (int i = 0; i <= nid; i++) {
 91 |             assert(!isnan(*output->getEle(row, i)));
 92 |             sum += *output->getEle(row, i) * *weightsTmp->getEle(fid, i);
 93 |         }
 94 |         return sum;
 95 |     }
 96 |     
 97 |     float learning_rate;
 98 |     int maxIters;
 99 |     int neuronsNum, featureSize;
100 |     
101 |     Matrix* trainingData;
102 |     
103 |     Matrix* weights = NULL;
104 |     Matrix* weightsTmp = NULL;
105 |     Matrix* output = NULL;
106 | };
107 | 
108 | #endif /* pca_h */
109 | 


--------------------------------------------------------------------------------
/LightCTR/util/quantile_compress.h:
--------------------------------------------------------------------------------
  1 | //
  2 | //  quantile_compress.h
  3 | //  LightCTR
  4 | //
  5 | //  Created by SongKuangshi on 2018/5/4.
  6 | //  Copyright © 2018年 SongKuangshi. All rights reserved.
  7 | //
  8 | 
  9 | #ifndef quantile_compress_h
 10 | #define quantile_compress_h
 11 | 
 12 | #include <algorithm>
 13 | #include <functional>
 14 | #include "significance.h"
 15 | 
 16 | enum QuantileType {
 17 |     UNIFORM = 0,
 18 |     LOG,
 19 |     NORMAL_DISTRIBUT, // parameters usually obey the normal law
 20 |     CUSTOM_DISTRIBUT
 21 | };
 22 | 
 23 | template <typename RealT, typename CompressT>
 24 | class QuantileCompress {
 25 | public:
 26 |     QuantileCompress(QuantileType _quantileType, RealT _min, RealT _max,
 27 |                      RealT _mu = 0, RealT _sigma = 1) :
 28 |     quantileType(_quantileType), min(_min), max(_max), mu(_mu), sigma(_sigma) {
 29 |         assert(_min < _max);
 30 |         init();
 31 |     }
 32 |     // Disable the copy and assignment operator
 33 |     QuantileCompress(const QuantileCompress &) = delete;
 34 |     QuantileCompress(QuantileCompress &&) = delete;
 35 |     QuantileCompress &operator=(const QuantileCompress &) = delete;
 36 |     QuantileCompress &operator=(QuantileCompress &&) = delete;
 37 |     
 38 |     void compress(const RealT *input, const int len, CompressT *output) {
 39 |         std::transform(input, input + len,
 40 |                        output,
 41 |                        std::bind(
 42 |                                  &QuantileCompress<RealT, CompressT>::encoding,
 43 |                                  this,
 44 |                                  std::placeholders::_1
 45 |                                  )
 46 |                        );
 47 |     }
 48 |     void extract(const CompressT *input, const int len, RealT *output) {
 49 |         std::transform(input, input + len,
 50 |                        output,
 51 |                        std::bind(
 52 |                                  &QuantileCompress<RealT, CompressT>::decoding,
 53 |                                  this,
 54 |                                  std::placeholders::_1
 55 |                                  )
 56 |                        );
 57 |     }
 58 |     
 59 | private:
 60 |     RealT convert(RealT x) {
 61 |         if (quantileType == QuantileType::LOG) {
 62 |             x = log(x);
 63 |         } else if (quantileType == QuantileType::NORMAL_DISTRIBUT) {
 64 |             x = StandardCDF(x);
 65 |         } else if (quantileType == QuantileType::CUSTOM_DISTRIBUT) {
 66 |             x = CustomCDF(x, mu, sigma);
 67 |         }
 68 |         return x;
 69 |     }
 70 |     
 71 |     void init() {
 72 |         if (quantileType == QuantileType::LOG) {
 73 |             assert(-min == max);
 74 |             minCDF = convert(1e-4), maxCDF = convert(max); // fix min if quantile by log
 75 |         } else {
 76 |             minCDF = convert(min), maxCDF = convert(max);
 77 |         }
 78 |         assert(maxCDF > minCDF);
 79 |         
 80 |         _delta = (maxCDF - minCDF) / static_cast<RealT>(N_INTERVALS);
 81 |         if (quantileType == QuantileType::LOG) {
 82 |             _delta *= 2.0f; // divided by positive and negative parts
 83 |         }
 84 |         
 85 |         if (quantileType == QuantileType::UNIFORM) {
 86 |             _real_value[0] = min;
 87 |             for (int i = 1; i < N_INTERVALS; i++) {
 88 |                 _real_value[i] = _real_value[i - 1] + _delta;
 89 |             }
 90 |         } else if (quantileType == QuantileType::LOG) {
 91 |             const size_t half_size = N_INTERVALS >> 1;
 92 |             for (int i = 0; i < half_size; i++) {
 93 |                 _real_value[half_size + i] = exp(minCDF + i * _delta);
 94 |                 _real_value[half_size - i - 1] = - _real_value[half_size + i];
 95 |             }
 96 |         } else if (quantileType == QuantileType::NORMAL_DISTRIBUT) {
 97 |             _real_value[0] = min;
 98 |             for (int i = 1; i < N_INTERVALS; i++) {
 99 |                 _real_value[i] = ReverseCDF(minCDF + i * _delta, 0, 1);
100 |             }
101 |         } else if (quantileType == QuantileType::CUSTOM_DISTRIBUT) {
102 |             _real_value[0] = min;
103 |             for (int i = 1; i < N_INTERVALS; i++) {
104 |                 _real_value[i] = ReverseCDF(minCDF + i * _delta, mu, sigma);
105 |             }
106 |         }
107 |     }
108 |     
109 |     CompressT encoding(RealT real) const {
110 |         CompressT ret = CompressT();
111 |         if (real <= min) {
112 |             ret = static_cast<CompressT>(0);
113 |         } else if (real >= max) {
114 |             ret = static_cast<CompressT>(N_INTERVALS - 1);
115 |         } else {
116 |             if (quantileType == QuantileType::UNIFORM) {
117 |                 real -= min;
118 |                 ret = static_cast<CompressT>(real / _delta);
119 |             } else if (quantileType == QuantileType::LOG ||
120 |                        quantileType == QuantileType::NORMAL_DISTRIBUT ||
121 |                        quantileType == QuantileType::CUSTOM_DISTRIBUT) {
122 |                 ret = static_cast<CompressT>(_binary_search(real));
123 |             }
124 |         }
125 |         return ret;
126 |     }
127 |     
128 |     RealT decoding(CompressT comp) const {
129 |         int index = static_cast<int>(comp);
130 |         if (index < 0) { // deal with big-endian number
131 |             index = N_INTERVALS + index;
132 |         }
133 |         assert(index >= 0 && index < N_INTERVALS);
134 |         return _real_value[static_cast<size_t>(index)];
135 |     }
136 |     
137 |     int _binary_search(RealT value) const {
138 |         int lower = 0, upper = N_INTERVALS - 1, mid;
139 |         while (lower <= upper) {
140 |             mid = (lower + upper) >> 1;
141 |             if (_real_value[mid] > value) {
142 |                 upper = mid - 1;
143 |             } else {
144 |                 lower = mid + 1;
145 |             }
146 |         }
147 |         return upper;
148 |     }
149 |     
150 |     QuantileType quantileType;
151 |     
152 |     static const size_t N_INTERVALS = 1 << (sizeof(CompressT) * 8);
153 |     RealT min, max;
154 |     RealT minCDF, maxCDF;
155 |     RealT mu, sigma;
156 |     RealT _delta;
157 |     RealT _real_value[N_INTERVALS];
158 | };
159 | 
160 | #endif /* quantile_compress_h */
161 | 


--------------------------------------------------------------------------------
/LightCTR/util/random.h:
--------------------------------------------------------------------------------
  1 | //
  2 | //  random.h
  3 | //  LightCTR
  4 | //
  5 | //  Created by SongKuangshi on 2017/10/24.
  6 | //  Copyright © 2017年 SongKuangshi. All rights reserved.
  7 | //
  8 | 
  9 | #ifndef random_h
 10 | #define random_h
 11 | 
 12 | #include <cmath>
 13 | #include <cstdio>
 14 | #include <vector>
 15 | #include "significance.h"
 16 | 
 17 | inline void Seed(uint32_t seed) {
 18 |     srand(seed);
 19 | }
 20 | 
 21 | inline double UniformNumRand() { // [0, 1)
 22 |     return static_cast<double>(rand()) / (static_cast<double>(RAND_MAX) + 1.0);
 23 | }
 24 | 
 25 | inline double UniformNumRand2() { // (0, 1)
 26 |     return (static_cast<double>(rand()) + 1.0) / (static_cast<double>(RAND_MAX) + 2.0);
 27 | }
 28 | 
 29 | inline size_t Random_index(size_t n) {
 30 |     return rand() % n;
 31 | }
 32 | 
 33 | template<typename T>
 34 | inline void Shuffle(T *vec, size_t sz) {
 35 |     if (sz == 0)
 36 |         return;
 37 |     for (uint32_t i = (uint32_t)sz - 1; i > 0; i--) {
 38 |         std::swap(vec[i], vec[(uint32_t)(UniformNumRand() * (i + 1))]);
 39 |     }
 40 | }
 41 | 
 42 | inline double GaussRand() { // ~N(0, 1)
 43 |     static double V1, V2, S;
 44 |     static int phase = 0;
 45 |     double X;
 46 |     if(phase == 0){
 47 |         do {
 48 |             V1 = 2.0 * UniformNumRand2() - 1.0;
 49 |             V2 = 2.0 * UniformNumRand2() - 1.0;
 50 |             S = V1 * V1 + V2 * V2;
 51 |         } while(S >= 1.0 || S == 0.0);
 52 |         X = V1 * sqrt(-2.0 * log(S) / S);
 53 |     } else {
 54 |         X = V2 * sqrt(-2.0 * log(S) / S);
 55 |     }
 56 |     phase = 1 - phase;
 57 |     return X;
 58 | }
 59 | 
 60 | inline double GaussRand(double mu, double sigma) {
 61 |     return GaussRand() * sigma + mu;
 62 | }
 63 | 
 64 | inline std::pair<double, double> GaussRand2D() {
 65 |     static double V1, V2, S;
 66 |     static int phase = 0;
 67 |     double X;
 68 |     if(phase == 0){
 69 |         do {
 70 |             V1 = 2.0 * UniformNumRand2() - 1.0;
 71 |             V2 = 2.0 * UniformNumRand2() - 1.0;
 72 |             S = V1 * V1 + V2 * V2;
 73 |         } while(S >= 1.0 || S == 0.0);
 74 |         X = V1 * sqrt(-2.0 * log(S) / S);
 75 |     } else {
 76 |         X = V2 * sqrt(-2.0 * log(S) / S);
 77 |     }
 78 |     phase = 1 - phase;
 79 |     return std::make_pair(V1 * X, V2 * X);
 80 | }
 81 | 
 82 | inline bool SampleBinary(double p) {
 83 |     return UniformNumRand() < p;
 84 | }
 85 | 
 86 | inline size_t subSampleSize(double sampleAlpha = 0.05, double sampleErrorBound = 0.05) {
 87 |     // indicate confidence level and error bound to determine a suitable sample size
 88 |     double z = ReverseAlpha(sampleAlpha / 2);
 89 |     size_t sampleSize = (size_t)((z * z / 4.0f) / (sampleErrorBound * sampleErrorBound));
 90 |     // double minProb = 9.0f / (9.0f + sampleSize);
 91 |     // double maxProb = 1.0f * sampleSize / (9.0 + sampleSize);
 92 |     // sigma = sqrt(prob * (1 - prob) / sampleSize)
 93 |     // max(delta / sigma) determine the significance of distribution difference
 94 |     return sampleSize;
 95 | }
 96 | 
 97 | inline void shuffleSelectK(std::vector<size_t>* rankResult, size_t n, size_t k) {
 98 |     // when n equal to k mean shuffle, otherwise sample should adjust k
 99 |     assert(n / 2 >= k);
100 |     if (rankResult->size() != k) {
101 |         rankResult->clear();
102 |         rankResult->resize(k);
103 |     }
104 |     std::vector<size_t> array;
105 |     array.resize(n);
106 |     for (size_t i = 0; i < n; i++) {
107 |         array[i] = i;
108 |     }
109 |     for (size_t i = 0; i < k; i++) {
110 |         size_t index = UniformNumRand() * (n - i);
111 |         rankResult->at(i) = array[index];
112 |         array[index] = array[n - 1 - i];
113 |     }
114 | }
115 | 
116 | #endif /* random_h */
117 | 


--------------------------------------------------------------------------------
/LightCTR/util/shm_hashtable.h:
--------------------------------------------------------------------------------
  1 | //
  2 | //  shm_hashtable.h
  3 | //  LightCTR
  4 | //
  5 | //  Created by SongKuangshi on 2018/12/7.
  6 | //  Copyright © 2018 SongKuangshi. All rights reserved.
  7 | //
  8 | 
  9 | #ifndef shm_hashtable_h
 10 | #define shm_hashtable_h
 11 | 
 12 | #include "../common/system.h"
 13 | #include "../common/lock.h"
 14 | #include "../common/hash.h"
 15 | #include <vector>
 16 | #include <bitset>
 17 | #include <string>
 18 | 
 19 | template <typename T>
 20 | class ShmHashTable {
 21 | public:
 22 |     struct ShmHashNode {
 23 |         size_t key; // preserving zero for identifing empty
 24 |         T value;
 25 |         ShmHashNode() : key(0), value(0.0) {}
 26 |     };
 27 |     
 28 |     static ShmHashTable& Instance(size_t hash_times) {
 29 |         static ShmHashTable _instance(hash_times);
 30 |         return _instance;
 31 |     }
 32 |     
 33 |     bool insert(const std::string& key, const T& value) {
 34 |         return insert(static_cast<size_t>(murMurHash(key)), value);
 35 |     }
 36 |     
 37 |     bool update(const std::string& key, const T& value) {
 38 |         return update(static_cast<size_t>(murMurHash(key)), value);
 39 |     }
 40 |     
 41 |     const T& getValue(const std::string& key) const {
 42 |         return getValue(static_cast<size_t>(murMurHash(key)));
 43 |     }
 44 |     
 45 |     bool update(size_t key, const T& value) {
 46 |         return insert(key, value);
 47 |     }
 48 |     
 49 |     bool insert(size_t key, const T& value) {
 50 |         assert(g_pShmAddr);
 51 |         assert(key > 0);
 52 |         int res = insertOrUpdate(key, value, 0);
 53 |         return (res == 0 ? true : false);
 54 |     }
 55 |     
 56 |     const T& getValue(size_t key) const {
 57 |         for (int i = 0; i < hash_times; i++) {
 58 |             size_t inner_offset = key % primes[i];
 59 |             ShmHashNode* addr = (ShmHashNode*)g_pShmAddr + prime_offset[i] + inner_offset;
 60 |             if (addr->key == key) {
 61 |                 return addr->value;
 62 |             }
 63 |         }
 64 |         return NULL;
 65 |     }
 66 |     
 67 | private:
 68 |     ShmHashTable() {
 69 |         
 70 |     }
 71 |     ~ShmHashTable() {
 72 |         if (g_pShmAddr) {
 73 |             shmdt(g_pShmAddr);
 74 |             g_pShmAddr = NULL;
 75 |         }
 76 |     }
 77 |     explicit ShmHashTable(size_t _hash_times) {
 78 |         hash_times = _hash_times;
 79 |         tashtable_reserve_size = hashspace * _hash_times * sizeof(ShmHashNode);
 80 |         
 81 |         initPrime(primes);
 82 |         
 83 |         g_pShmAddr = getShmAddr(0x5fef, tashtable_reserve_size);
 84 |         memset(g_pShmAddr, 0, tashtable_reserve_size);
 85 |     }
 86 |     ShmHashTable(const ShmHashTable&) = delete;
 87 |     ShmHashTable(ShmHashTable&&) = delete;
 88 |     ShmHashTable& operator=(const ShmHashTable&) = delete;
 89 |     ShmHashTable& operator=(ShmHashTable&&) = delete;
 90 |     
 91 |     int insertOrUpdate(size_t key, T value, size_t depth) {
 92 |         if (depth > 5)
 93 |             return -1;
 94 |         
 95 |         vector<ShmHashNode*> candidate_position;
 96 |         candidate_position.reserve(hash_times);
 97 |         
 98 |         for (int i = 0; i < hash_times; i++) {
 99 |             size_t inner_offset = key % primes[i];
100 |             ShmHashNode* addr = (ShmHashNode*)g_pShmAddr + prime_offset[i] + inner_offset;
101 |             if (addr->key == 0) {
102 |                 candidate_position.emplace_back(addr);
103 |             } else if (addr->key == key) {
104 |                 // update
105 |                 if(!atomic_compare_and_swap(&addr->value, addr->value, value)) {
106 |                     return insertOrUpdate(key, value, depth + 1);
107 |                 }
108 |             }
109 |         }
110 |         
111 |         // select one empty slot to insert
112 |         if (likely(!candidate_position.empty())) {
113 |             for (int i = 0; i < candidate_position.size(); i++) {
114 |                 ShmHashNode* addr = candidate_position[i];
115 |                 if (addr->key == 0) {
116 |                     unique_lock<SpinLock> glock(lock);
117 |                     if (addr->key == 0) {
118 |                         addr->key = key;
119 |                         addr->value = value;
120 |                         
121 |                         return 0;
122 |                     }
123 |                 }
124 |             }
125 |         }
126 |         // conflict happened
127 |         return insertOrUpdate(key, value, depth + 1);
128 |     }
129 |     
130 |     void initPrime(std::vector<size_t>& primes) {
131 |         static const size_t MAX = (hashspace >> 1) + 1;
132 |         bitset<MAX> flag(0);
133 |         
134 |         primes.emplace_back(2);
135 |         
136 |         size_t i, j;
137 |         for (i = 3; i < MAX; i += 2) {
138 |             if (!(flag.test(i / 2)))
139 |                 primes.emplace_back(i);
140 |             for (j = 1; j < primes.size() && i * primes[j] < MAX; j++) {
141 |                 flag.set(i * primes[j] / 2);
142 |                 if (i % primes[j] == 0)
143 |                     break;
144 |             }
145 |         }
146 |         std::reverse(primes.begin(), primes.end());
147 |         primes.resize(hash_times);
148 |         assert(primes.size() == hash_times);
149 |         
150 |         prime_offset.emplace_back(0);
151 |         for (i = 0; i < hash_times; i++) {
152 |             prime_offset.emplace_back(prime_offset.back() + primes[i]);
153 |         }
154 |     }
155 |     
156 |     void* g_pShmAddr = NULL;
157 |     size_t hash_times;
158 |     size_t tashtable_reserve_size = 0;
159 |     
160 |     static const size_t hashspace = 1 << 20;
161 |     std::vector<size_t> primes;
162 |     std::vector<size_t> prime_offset;
163 |     
164 |     SpinLock lock;
165 | };
166 | 
167 | #endif /* shm_hashtable_h */
168 | 


--------------------------------------------------------------------------------
/LightCTR/util/significance.h:
--------------------------------------------------------------------------------
 1 | //
 2 | //  significance.h
 3 | //  LightCTR
 4 | //
 5 | //  Created by SongKuangshi on 2018/5/4.
 6 | //  Copyright © 2018年 SongKuangshi. All rights reserved.
 7 | //
 8 | 
 9 | #ifndef significance_h
10 | #define significance_h
11 | 
12 | #include <cmath>
13 | #include "assert.h"
14 | 
15 | // error function
16 | inline double Erf(double x) {
17 |     // handle either positive or negative x. because error function is negatively symmetric of x
18 |     double a = 0.140012;
19 |     double b = x * x;
20 |     double item = -b * (4 / M_PI + a * b) / (1 + a * b);
21 |     double result = sqrt(1 - exp(item));
22 |     if (x >= 0)
23 |         return result;
24 |     return -result;
25 | }
26 | 
27 | inline double LogCDF(double x, double alpha = 10) {
28 |     const double scaler = (alpha == 10) ? 1 : log(alpha);
29 |     return (x * log(fabs(x)) - x) / scaler;
30 | }
31 | 
32 | // calculate the standard cumulative distribution function F(x) = P(Z less or equal than x),
33 | // where Z follows a standard normal distribution.
34 | inline double StandardCDF(double x) {
35 |     const double SquareRootOfTwo = 1.414213562373095;
36 |     return (1.0 + Erf(x / SquareRootOfTwo)) / 2;
37 | }
38 | 
39 | inline double CustomCDF(double x, double u, double sigma) {
40 |     x = x - u;
41 |     return 0.5 + 0.5 * Erf(x / sigma / 1.414213562373095);
42 | }
43 | 
44 | inline double ReverseCDF(double p, double mu, double sigma) {
45 |     double lower = -5.0, upper = 5.0, middle;
46 |     while(1) {
47 |         middle = (lower + upper) / 2;
48 |         double estimate = CustomCDF(middle, mu, sigma);
49 |         if (fabs(estimate - p) < 1e-7)
50 |             break;
51 |         // because standard CDF is monotonic, thus we use binary search
52 |         if (estimate > p) {
53 |             upper = middle;
54 |         } else {
55 |             lower = middle;
56 |         }
57 |     }
58 |     return middle;
59 | }
60 | 
61 | // given a confidence level we calculate the Z such that P(Z greater than alpha) = alpha
62 | inline double ReverseAlpha(double alpha) {
63 |     assert(alpha > 0 && alpha < 1);
64 |     return ReverseCDF(1.0f - alpha, 0, 1);
65 | }
66 | 
67 | // calculate the statistical significance for a gaussian distribution
68 | // the observed x value, its mean value and standard deviation
69 | inline double GaussianSignificance(double x, double u, double sigma) {
70 |     double cdf = CustomCDF(x, u, sigma);
71 |     return 2 * cdf - 1;
72 | }
73 | 
74 | #endif /* significance_h */
75 | 


--------------------------------------------------------------------------------
/LightCTR_LOGO.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/cnkuangshi/LightCTR/620437720e683a1d8f554f8cdb7421e051843616/LightCTR_LOGO.png


--------------------------------------------------------------------------------
/Makefile:
--------------------------------------------------------------------------------
 1 | export CC  = gcc
 2 | export CXX = g++
 3 | export CFLAGS = -std=c++11 -Wall -O3 -D__AVX__ -mavx -mssse3 -Wno-unknown-pragmas -Wno-reorder -Wno-conversion-null -Wno-strict-aliasing -Wno-sign-compare
 4 | 
 5 | BIN = LightCTR_BIN
 6 | ZMQ_INC = ./LightCTR/third/zeromq/include
 7 | ZMQ_LIB = ./LightCTR/third/zeromq/lib/libzmq.a
 8 | OBJ =
 9 | .PHONY: clean all
10 | 
11 | all: $(BIN) $(OBJ)
12 | export LDFLAGS= -pthread -lm -ldl
13 | 
14 | STANDALONE = *.cpp LightCTR/*.h LightCTR/common/*.h LightCTR/predict/*.h LightCTR/predict/*.cpp LightCTR/util/*.h LightCTR/dag/*.h LightCTR/dag/operator/*.h LightCTR/train/*.h LightCTR/train/*.cpp LightCTR/train/layer/*.h LightCTR/train/unit/*.h
15 | DISTRIBUT = $(STANDALONE) $(ZMQ_INC) LightCTR/distribut/*.h
16 | 
17 | LightCTR_BIN : $(STANDALONE)
18 | master : $(DISTRIBUT)
19 | ps : $(DISTRIBUT)
20 | worker : $(DISTRIBUT)
21 | ring_master : $(DISTRIBUT)
22 | ring_worker : $(DISTRIBUT)
23 | 
24 | $(BIN) :
25 | 	$(CXX) $(CFLAGS) -o $@ $(filter %.cpp %.o %.c, $^) $(LDFLAGS)
26 | 
27 | master :
28 | 	$(CXX) $(CFLAGS) -o LightCTR_BIN_Master $(filter %.cpp %.o %.c, $^) -D MASTER -Xlinker $(ZMQ_LIB) $(LDFLAGS)
29 | 
30 | ps :
31 | 	$(CXX) $(CFLAGS) -o LightCTR_BIN_PS $(filter %.cpp %.o %.c, $^) -D PS -Xlinker $(ZMQ_LIB) $(LDFLAGS)
32 | 
33 | worker :
34 | 	$(CXX) $(CFLAGS) -o LightCTR_BIN_Worker $(filter %.cpp %.o %.c, $^) -D WORKER -Xlinker $(ZMQ_LIB) $(LDFLAGS)
35 | 
36 | ring_master :
37 | 	$(CXX) $(CFLAGS) -o LightCTR_BIN_Ring_Master $(filter %.cpp %.o %.c, $^) -D MASTER_RING -Xlinker $(ZMQ_LIB) $(LDFLAGS)
38 | 
39 | ring_worker :
40 | 	$(CXX) $(CFLAGS) -o LightCTR_BIN_Ring_Worker $(filter %.cpp %.o %.c, $^) -DWORKER_RING -DTEST_CNN -Xlinker $(ZMQ_LIB) $(LDFLAGS)
41 | 
42 | $(OBJ) :
43 | 	$(CXX) -c $(CFLAGS) -o $@ $(firstword $(filter %.cpp %.c, $^) )
44 | 
45 | install:
46 | 	cp -f -r $(BIN) $(INSTALL_PATH)
47 | 
48 | clean:
49 | 	$(RM) $(OBJ) $(BIN) *~
50 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | ![Alt text -w135](./LightCTR_LOGO.png)
 2 | ## LightCTR Overview
 3 | LightCTR is a lightweight and scalable framework that combines mainstream algorithms of Click-Through-Rate prediction based **computational DAG, philosophy of Parameter Server and Ring-AllReduce collective communication**. The library is suitable for sparse data and designed for large-scale distributed model training.
 4 | 
 5 | Meanwhile, LightCTR is also an **undergoing experimental study** and open source project that oriented to code readers. The clear execution logic will be of significance to leaners on the machine-learning related field.
 6 | 
 7 | ## Features
 8 | * Distributed training based on Parameter Server and Ring-AllReduce collective communication
 9 | * Directed Acyclic Graph(DAG) of autograd computation
10 | * Gradient clipping, stale synchronous parallel(SSP) and Asynchronous SGD with Delay compensation
11 | * Compressing Neural Network with Half precision and Quantization(PQ or Int8)
12 | * Shared parameters Key-Value pairs store in physical nodes by DHT in Shared memory
13 | * Lock-free Multi-threaded training and SIMD operations
14 | * Optimizer implemented by Mini-Batch GD, Adagrad, FTRL, Adam, etc
15 | 
16 | ## List of Implemented Algorithms
17 | 
18 | * Wide & Deep Model
19 | * Factorization Machine, Field-aware Factorization Machine, Neural Factorization Machine
20 | * Gradient Boosting Tree Model
21 | * Gaussian Mixture Clustering Model
22 | * Topic Model PLSA, Embedding Model
23 | * Ngram Convolution Neural Network, Self-Attention Recurrent Neural Network
24 | * Variational AutoEncoder
25 | * Approximate Nearest Neighbors Retrieval
26 | 
27 | ## Benchmark
28 | #### High performance
29 | <div>
30 | <img style="float:left;" src="https://github.com/cnkuangshi/LightCTR/blob/master/benchmark/vs_libfm.png" width = "350"/>
31 | <img style="float:left;" src="https://github.com/cnkuangshi/LightCTR/blob/master/benchmark/vs_libffm.png" width = "350"/>
32 | <img style="float:left;" src="https://github.com/cnkuangshi/LightCTR/blob/master/benchmark/vs_tf_cpu.png" width = "350"/>
33 | </div>
34 | 
35 | #### Scalable
36 | <div>
37 | <img style="float:left;" src="https://github.com/cnkuangshi/LightCTR/blob/master/benchmark/4_node_ps.png" width = "420"/>
38 | <img style="float:left;" src="https://github.com/cnkuangshi/LightCTR/blob/master/benchmark/4_node_ring.png" width = "420"/>
39 | </div>
40 | 
41 | ## Introduction (zh)
42 | #### 用于群体发现
43 | 点击率预估即是给合适的用户群体投放合适的内容，以达成促进广告收益或交易转化率的目的。具体操作来说，将收集到的用户点击与行为数据，用离散值与连续值结构化描述特征、归一化与De-bias等处理后，选择合适的模型拟合流量的数据分布，来对用户是否会对某一内容感兴趣并带来商业转化的概率进行评估；通常可将所有特征组合输入集成树模型`LightCTR::GBM`预先找群体，训练得到的每个叶子节点代表一个用户群，再使用`LightCTR::LR`或`LightCTR::MLP`对树模型建立的低维0/1群体特征做进一步分类。
44 | 当标识类别的离散特征过多使得输入变得高维稀疏，可能达到树模型与神经网络的处理瓶颈；一般可使用`LightCTR::FM`或`LightCTR::FFM`将离散特征做特征交叉训练，提升了特征利用率并降低了数据稀疏下的过拟合的风险，每维特征映射在低维空间中，也方便作为连续特征输入其他模型。
45 | 相比使用FM预训练特征低维映射后、再输入DNN中的两阶段训练，通过DNN在输入层按Field内部局部连接，在输入层直接端到端训练特征低维Embedding，可以更好的保证模型时效性；或采用将DNN嵌入FM模型中的`LightCTR::NFM`及其他相关变种，进行特征非线性高维组合，提升模型的表征能力，拥有更好的AUC表现。
46 | 
47 | #### 用于行为序列
48 | 用户点击内容序列往往蕴含内容间的局部相关性信息，如前后点击同一类商品或查看同一类网页，这些行为序列的局部关系可被`LightCTR::Embedding`建模捕获，得到点击内容或行为的低维隐向量表示；或基于变分自编码器`LightCTR::VAE`实现特征组合衍生，增强低维特征的表达能力；低维隐向量可被用来判断相关性或直接作为其他模型输入。进一步，序列数据经过平滑处理后，将训练好的行为隐向量按时序输入循环神经网络`LightCTR::LSTMUnit`，最后将RNN输出的特征表达输入`LightCTR::Softmax`分类器，利用预设的监督标签，训练对用户的评估模型或判别模型；
49 | 当预设标签覆盖率不足时，可将高维特征表达输入`LightCTR::GMM`进行无监督聚类，聚类簇概括为意图簇，作为用户意图匹配的依据，补充到用户画像的人群类别中。
50 | 
51 | #### 用于内容分析
52 | 用户评论、搜索广告页面上下文等文本也蕴含很多用户的兴趣信息可被挖掘，如搜索广告场景下用户关键词需要匹配语义相关度最高的拍卖词进而投放高转化率的广告，因此分析文本信息是点击率预估的重要依据。在提取整段文本语义信息方面，首先可使用`LightCTR::Embedding`预先训练词向量表，对文本中出现的词按词频做词向量加权并移除主成分；或参考Skip-thought方法结合负例采样，将文本中每个词向量按时序输入`LightCTR::LSTMUnit`，训练得到文本的语义特征表达，向量内积对应文本相关度。此外，参考DSSM将由文本词向量构成的矩阵，输入Ngram卷积神经网络`LightCTR::CNN`用于提取句子局部语义相关性特征，并结合正负样本训练对预设标签的Pairwise判别模型。
53 | 当文本缺乏分类标签时，可使用`LightCTR::PLSA`无监督的获取文章主题分布，应用于按主题分布区分不同内容类别、计算长语料与短查询间的语义相似度，也可通过后验计算得到上下文中各词汇的重要程度，作为长文本关键词摘要。
54 | 
55 | #### 分层模型融合
56 | 更复杂的模型带来更好的表征能力，但同时也加大了计算时间消耗，而响应时间与点击率呈强负相关，因此为了兼顾线上点击率预估的性能与效果，可使用不同模型逐层预测，如第一层采用在线学习、并引入稀疏解的简单模型`LightCTR::FTRL_LR`，第二层采用上文提到的输入层局部连接的`LightCTR::MLP`、或`LightCTR::NFM`等复杂模型进行精细预测。在系统层面，抽取并缓存DNN模型中最后一组全连接层权值或输出，作为用户或商品的固定表达，使用`LightCTR::ANN`近邻向量检索的TopN结果作为推荐召回，在最大化CTR/ROI的同时，降低线上推理的平均响应时间。此外，LightCTR在探索通过模型参数分位点压缩、二值网络等方法，在不损失预测精度前提下大幅提升计算效率。
57 | 
58 | #### 多机多线程并行计算
59 | LightCTR使用SIMD向量化指令、流水线并行、多核心多线程计算、Cache-aware等多重优化手段实现单机高性能数值计算，但当模型参数量超过单机内存容量、或单机训练效率达不到时效性要求时，LightCTR进一步提供了基于参数服务器与Ring-AllReduce的可扩展模型集群训练方案。
60 | * 参数服务器模式下，集群分为Master, ParamServer与Worker三种角色；一个集群有一个Master负责集群启动与运行状态的维护，大规模模型稀疏与稠密Tensor参数以DHT散布在多个ParamServer上，与多个负责模型数据并行梯度运算的Worker协同，每轮训练都先从ParamServer拉取(Pull)一个样本Batch的参数，运算得到的参数梯度推送(Push)到ParamServer进行梯度汇总。ParamServer通过梯度TopK截断、延迟梯度补偿等手段，异步无锁、半同步的更新参数。参数在ParamServer上紧凑存储，按特征命中率进行优选与淘汰；使用变长编码/半精度/Int8的方式压缩梯度传输量，并用Batch化参数请求与读写分离的方法提升网络同步效率。
61 | * 环拓扑Ring-AllReduce模式下，LightCTR在不引入协调节点下实现集群训练进度的动态自平衡，结合梯度融合机制，实现了高效、高稳定性的去中心化梯度同步，适合稠密参数模型的可扩展训练；在这种模式下每个节点存储全量模型可单独提供推理预测能力，训练过程通过有限次迭代获取其他节点梯度结果，在一定集群规模与合适的Batch size、学习率设置下可实现训练任务的线性加速比。
62 | * LightCTR分布式集群采取心跳监控、消息重传等Failover容错方式。此外，LightCTR也在探索RDMA、DPDK、多网卡等网络优化方式来降低网络通信延时。
63 | 
64 | ## Quick Start
65 | * LightCTR depends on C++11 and ZeroMQ only, lightweight and modular design
66 | * Easy to use, just change configuration (e.g. Learning Rate, Data source) in `main.cpp`
67 | * run `./build.sh` to start training task on Parameter Server mode or `./build_ring.sh` to start on Ring-AllReduce mode
68 | * Current CI Status: [![Build Status](https://travis-ci.org/cnkuangshi/LightCTR.svg?branch=master)](https://travis-ci.org/cnkuangshi/LightCTR) on Ubuntu and MacOS
69 | 
70 | ## Welcome to Contribute
71 | * Welcome everyone interested in intersection of machine learning and scalable systems to contribute code, create issues or pull requests.
72 | * LightCTR is released under the Apache License, Version 2.0.
73 | 
74 | ## Disclaimer
75 | * Please note that LightCTR is still undergoing and it does not give any warranties, as to the suitability or usability.
76 | 
77 | 


--------------------------------------------------------------------------------
/benchmark/4_node_ps.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/cnkuangshi/LightCTR/620437720e683a1d8f554f8cdb7421e051843616/benchmark/4_node_ps.png


--------------------------------------------------------------------------------
/benchmark/4_node_ring.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/cnkuangshi/LightCTR/620437720e683a1d8f554f8cdb7421e051843616/benchmark/4_node_ring.png


--------------------------------------------------------------------------------
/benchmark/vs_libffm.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/cnkuangshi/LightCTR/620437720e683a1d8f554f8cdb7421e051843616/benchmark/vs_libffm.png


--------------------------------------------------------------------------------
/benchmark/vs_libfm.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/cnkuangshi/LightCTR/620437720e683a1d8f554f8cdb7421e051843616/benchmark/vs_libfm.png


--------------------------------------------------------------------------------
/benchmark/vs_tf_cpu.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/cnkuangshi/LightCTR/620437720e683a1d8f554f8cdb7421e051843616/benchmark/vs_tf_cpu.png


--------------------------------------------------------------------------------
/build.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/sh
 2 | if [ $# -lt 3 ]; then
 3 |     echo "usage: $0 [ps_num] [worker_num] [master_ip_port like 127.0.0.1:17832]"
 4 |     exit -1;
 5 | fi
 6 | 
 7 | cd ./LightCTR/third
 8 | sh ./install_third.sh
 9 | cd ../../
10 | 
11 | export LightCTR_PS_NUM=$1
12 | shift
13 | export LightCTR_WORKER_NUM=$1
14 | shift
15 | export LightCTR_MASTER_ADDR=$1
16 | 
17 | make master &
18 | make ps &
19 | make worker &
20 | 
21 | wait
22 | echo
23 | echo
24 | echo "[Build Success]"
25 | echo "Please copy different BIN file to corresponding machine, DON'T forget export LightCTR_PS_NUM, LightCTR_WORKER_NUM and LightCTR_MASTER_ADDR, run Master first"
26 | 


--------------------------------------------------------------------------------
/build_ring.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/sh
 2 | if [ $# -lt 2 ]; then
 3 |     echo "usage: $0 [worker_num] [master_ip_port like 127.0.0.1:17832]"
 4 |     exit -1;
 5 | fi
 6 | 
 7 | cd ./LightCTR/third
 8 | sh ./install_third.sh
 9 | cd ../../
10 | 
11 | export LightCTR_PS_NUM=0
12 | export LightCTR_WORKER_NUM=$1
13 | shift
14 | export LightCTR_MASTER_ADDR=$1
15 | 
16 | make ring_master &
17 | make ring_worker &
18 | 
19 | wait
20 | echo
21 | echo
22 | echo "[Build Success]"
23 | echo "Please copy different BIN file to corresponding machine, DON'T forget export LightCTR_WORKER_NUM and LightCTR_MASTER_ADDR, run Master first"
24 | 


--------------------------------------------------------------------------------
/data/proc_file_split.py:
--------------------------------------------------------------------------------
 1 | import sys
 2 | import random
 3 | 
 4 | file_origin = open(sys.argv[1], 'r')
 5 | shardings = int(sys.argv[2])
 6 | 
 7 | names = []
 8 | for i in xrange(shardings):
 9 |     name = sys.argv[1] + '_' + str(i + 1) # start from 1
10 |     names.append(name)
11 | 
12 | file_io_handle = []
13 | for i in xrange(shardings):
14 |     file_io_handle.append(open(names[i], 'w'))
15 | 
16 | rand_stand = 1.0 / shardings
17 | 
18 | for line in file_origin:
19 |     v = random.random()
20 |     part = int(v / rand_stand)
21 |     assert part < shardings
22 | 
23 |     file_io_handle[part].write(line.strip())
24 |     file_io_handle[part].write('\n')
25 | 
26 | for file in file_io_handle:
27 |     file.close()
28 | 
29 | file_origin.close()
30 | 


--------------------------------------------------------------------------------
/data/proc_text_topic.py:
--------------------------------------------------------------------------------
 1 | # -*- coding:utf-8 -*-
 2 | #
 3 | #  proc_text_topic.py
 4 | #  LightCTR
 5 | #
 6 | #  Created by SongKuangshi on 2017/10/15.
 7 | #  Copyright © 2017年 SongKuangshi. All rights reserved.
 8 | 
 9 | import os
10 | import sys
11 | 
12 | stopset = {'a','the','of','to','an','but','or','its','about','would','and','in','that','is','are','be','been','will','this','was','for','on','as','from','at','by','with','have','which','has','had','were','it','not'}
13 | 
14 | def generate(infile,word_id_file_path,training_file_path,vocab_size):
15 | 	term_dict = {}
16 | 
17 | 	infp = open(infile,'r')
18 | 	for line in infp:
19 | 		line = line.rstrip()
20 | 		if line.find('<') != -1 and line.find('>') != -1:
21 | 			continue
22 | 		info = line.split(' ')
23 | 		for term in info:
24 | 			term = term.lower()
25 | 			if term == '' or not term.isalpha() or term in stopset:
26 | 				continue
27 | 			if term.isspace() or term.find(".") != -1 or term.find(" ") != -1:
28 | 				continue
29 | 			if term in term_dict:
30 | 				term_dict[term] += 1
31 | 			else:
32 | 				term_dict[term] = 1
33 | 	term_list = sorted(term_dict.items(),key=lambda x : x[1],reverse=True)
34 | 	print len(term_list)
35 | 	term_list = term_list[:int(vocab_size)]
36 | 
37 | 	termid_dict = {}
38 | 	term_id = 0
39 | 	for term in term_list:
40 | 		termid_dict[term[0]] = term_id
41 | 		term_id += 1
42 | 	orderitems=[[v[1],v[0]] for v in termid_dict.items()]
43 | 	orderitems.sort()
44 | 
45 | 	outfp = open(word_id_file_path,'w')
46 | 	for i in range(0, len(orderitems)):
47 | 		outfp.write('%d %s %d\n'%(orderitems[i][0], orderitems[i][1], term_dict[orderitems[i][1]]))
48 | 	outfp.close()
49 | 
50 | 	print("Vocab file generating complete")
51 | 	# exit()
52 | 
53 | 	infp.seek(0,0)
54 | 	outfp = open(training_file_path,'w')
55 | 
56 | 	for line in infp:
57 | 		if line.find('<') != -1 and line.find('>') != -1:
58 | 			continue
59 | 		term_tf = {}
60 | 		info = line.rstrip().split(' ')
61 | 		flag = 1;
62 | 		for term in info:
63 | 			term = term.lower()
64 | 			if term not in termid_dict:
65 | 				continue
66 | 			if term in term_tf:
67 | 				term_tf[term] += 1
68 | 			else:
69 | 				term_tf[term] = 1
70 | 				flag = 0
71 | 		if flag == 1:
72 | 			continue
73 | 		out_line = ''
74 | 		for i in range(0, len(orderitems)):
75 | 			if out_line != '':
76 | 				out_line += ' '
77 | 			term = orderitems[i][1]
78 | 			if term not in term_tf:
79 | 				out_line += '0'
80 | 			else:
81 | 				out_line += '%d'%(term_tf.get(term))
82 | 		outfp.write(out_line+'\n')
83 | 
84 | 	infp.close()
85 | 	outfp.close()
86 | 
87 | if __name__ == '__main__':
88 | 	if len(sys.argv) != 3:
89 | 		print >> sys.stderr,'Usage : [%s] [input data file] [vocab size]'%(sys.argv[0])
90 | 		sys.exit(0)
91 | 	generate(sys.argv[1],"./vocab.txt","./train_topic.csv",sys.argv[2])
92 | 


--------------------------------------------------------------------------------