├── GPUConfig.cuh ├── IMDBDemoCUDABits.cu ├── LICENSE ├── MultiClassTsetlinMachine.cuh ├── README.md ├── TsetlinMachine.cu ├── TsetlinMachine.cuh ├── TsetlinMachineConfig.cuh ├── TsetlinMachineKernels.cu ├── TsetlinMachineKernels.cuh ├── makefile └── produce_dataset.py /GPUConfig.cuh: -------------------------------------------------------------------------------- 1 | #define GRID_SIZE (16*13) 2 | #define BLOCK_SIZE 256 -------------------------------------------------------------------------------- /IMDBDemoCUDABits.cu: -------------------------------------------------------------------------------- 1 | /* 2 | 3 | Copyright (c) 2019 Ole-Christoffer Granmo 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | 23 | This code implements a multiclass version of the Tsetlin Machine from paper arXiv:1804.01508 24 | https://arxiv.org/abs/1804.01508 25 | 26 | */ 27 | 28 | #include 29 | #include 30 | #include 31 | #include 32 | #include 33 | 34 | #include 35 | #include 36 | #include 37 | 38 | #include "TsetlinMachineConfig.cuh" 39 | #include "MultiClassTsetlinMachine.cuh" 40 | #include "GPUConfig.cuh" 41 | 42 | #define NUMBER_OF_TRAINING_EXAMPLES 25000 43 | #define NUMBER_OF_TEST_EXAMPLES 25000 44 | 45 | #define EXPERIMENTS 100 46 | #define EPOCHS 200 47 | 48 | #define DEVICE 0 49 | 50 | int y_train[NUMBER_OF_TRAINING_EXAMPLES], y_test[NUMBER_OF_TEST_EXAMPLES]; 51 | int *X_train; 52 | int *X_test; 53 | 54 | void read_file(void) 55 | { 56 | FILE * fp; 57 | char * line = NULL; 58 | size_t len = 0; 59 | 60 | const char *s = " "; 61 | char *token = NULL; 62 | 63 | // Training Dataset 64 | 65 | for (int i = 0; i < NUMBER_OF_TRAINING_EXAMPLES; i++) { 66 | for (int j = 0; j < LA_CHUNKS; j++) { 67 | X_train[i*LA_CHUNKS + j] = 0; 68 | } 69 | } 70 | 71 | fp = fopen("IMDBTrainingData.txt", "r"); 72 | if (fp == NULL) { 73 | printf("Error opening\n"); 74 | exit(EXIT_FAILURE); 75 | } 76 | 77 | for (int i = 0; i < NUMBER_OF_TRAINING_EXAMPLES; i++) { 78 | getline(&line, &len, fp); 79 | 80 | token = strtok(line, s); 81 | for (int j = 0; j < FEATURES; j++) { 82 | if (atoi(token) == 1) { 83 | int chunk_nr = j / INT_SIZE; 84 | int chunk_pos = j % INT_SIZE; 85 | X_train[i*LA_CHUNKS + chunk_nr] |= (1 << chunk_pos); 86 | } else { 87 | int chunk_nr = (j + FEATURES) / INT_SIZE; 88 | int chunk_pos = (j + FEATURES) % INT_SIZE; 89 | X_train[i*LA_CHUNKS + chunk_nr] |= (1 << chunk_pos); 90 | } 91 | token=strtok(NULL,s); 92 | } 93 | y_train[i] = atoi(token); 94 | } 95 | fclose(fp); 96 | 97 | // Test Dataset 98 | 99 | for (int i = 0; i < NUMBER_OF_TEST_EXAMPLES; i++) { 100 | for (int j = 0; j < LA_CHUNKS; j++) { 101 | X_test[i*LA_CHUNKS + j] = 0; 102 | } 103 | } 104 | 105 | fp = fopen("IMDBTestData.txt", "r"); 106 | if (fp == NULL) { 107 | printf("Error opening\n"); 108 | exit(EXIT_FAILURE); 109 | } 110 | 111 | for (int i = 0; i < NUMBER_OF_TEST_EXAMPLES; i++) { 112 | getline(&line, &len, fp); 113 | 114 | token = strtok(line, s); 115 | for (int j = 0; j < FEATURES; j++) { 116 | if (atoi(token) == 1) { 117 | int chunk_nr = j / INT_SIZE; 118 | int chunk_pos = j % INT_SIZE; 119 | X_test[i*LA_CHUNKS + chunk_nr] |= (1 << chunk_pos); 120 | } else { 121 | int chunk_nr = (j + FEATURES) / INT_SIZE; 122 | int chunk_pos = (j + FEATURES) % INT_SIZE; 123 | X_test[i*LA_CHUNKS + chunk_nr] |= (1 << chunk_pos); 124 | } 125 | token=strtok(NULL,s); 126 | } 127 | y_test[i] = atoi(token); 128 | } 129 | fclose(fp); 130 | } 131 | 132 | __global__ void setup_kernel(curandState *state) 133 | { 134 | int id = threadIdx.x + blockIdx.x * blockDim.x; 135 | 136 | /* Each thread gets same seed, a different sequence 137 | number, no offset */ 138 | curand_init(1234, id, 0, &state[id]); 139 | } 140 | 141 | int main(void) 142 | { 143 | FILE *fp; 144 | 145 | curandState *devStates; 146 | 147 | cudaSetDevice(DEVICE); 148 | 149 | int numSMs; 150 | cudaDeviceGetAttribute(&numSMs, cudaDevAttrMultiProcessorCount, DEVICE); 151 | 152 | printf("Num SMS: %d\n", numSMs); 153 | 154 | // Allocate Unified Memory – accessible from CPU or GPU 155 | 156 | cudaMallocManaged(&X_train, NUMBER_OF_TRAINING_EXAMPLES * LA_CHUNKS * sizeof(int)); 157 | cudaMallocManaged(&X_test, NUMBER_OF_TEST_EXAMPLES * LA_CHUNKS * sizeof(int)); 158 | 159 | read_file(); 160 | 161 | cudaMallocManaged((void **)&devStates, GRID_SIZE * BLOCK_SIZE * 162 | sizeof(curandState)); 163 | 164 | setup_kernel<<>>(devStates); 165 | 166 | cudaDeviceSynchronize(); 167 | 168 | MultiClassTsetlinMachine<2> mc_tm; 169 | 170 | fp = fopen("./statistics.txt","w"); 171 | if (fp == NULL) { 172 | printf("Error opening\n"); 173 | exit(EXIT_FAILURE); 174 | } 175 | 176 | for (int e = 0; e < EXPERIMENTS; ++e) { 177 | printf("\nEXPERIMENT %d\n", e+1); 178 | mc_tm.initialize(); 179 | for (int i = 0; i < EPOCHS; ++i) { 180 | printf("\n##### EPOCH %d #####\n", i+1); 181 | 182 | clock_t start, end; 183 | double gpu_time_testing, gpu_time_training; 184 | 185 | start = clock(); 186 | mc_tm.fit(devStates, X_train, y_train, NUMBER_OF_TRAINING_EXAMPLES, S, 1); 187 | end = clock(); 188 | gpu_time_training = ((double) (end - start)) / CLOCKS_PER_SEC; 189 | 190 | start = clock(); 191 | mc_tm.evaluate(X_test, y_test, NUMBER_OF_TEST_EXAMPLES); 192 | end = clock(); 193 | gpu_time_testing = ((double) (end - start)) / CLOCKS_PER_SEC; 194 | 195 | for (int n = 0; n < 2; ++n) { 196 | printf("\n-- CLASS %d --\n\n", n+1); 197 | 198 | float precision = 1.0 * mc_tm.true_positive[n] / (mc_tm.true_positive[n] + mc_tm.false_positive[n]); 199 | printf("PRECISION: %.3f\n", precision); 200 | float recall = 1.0 * mc_tm.true_positive[n] / (mc_tm.true_positive[n] + mc_tm.false_negative[n]); 201 | printf("RECALL: %.3f\n", recall); 202 | float fscore = 2 * precision * recall / (precision + recall); 203 | printf("F-SCORE: %.3f\n", fscore); 204 | 205 | fprintf(fp, "%d %d %d %d %d %d %d %.4f %.4f %.4f %f %f\n", e, i, n, mc_tm.true_positive[n], mc_tm.false_positive[n], 206 | mc_tm.true_negative[n], mc_tm.false_negative[n], precision, recall, fscore, gpu_time_training, gpu_time_testing); 207 | fflush(fp); 208 | } 209 | printf("\n"); 210 | printf("TRAINING TIME: %f\n", gpu_time_training); 211 | printf("TESTING TIME: %f\n", gpu_time_testing); 212 | } 213 | } 214 | 215 | fclose(fp); 216 | 217 | delete &mc_tm; 218 | 219 | cudaFree(devStates); 220 | cudaFree(X_train); 221 | cudaFree(X_test); 222 | 223 | return 0; 224 | } 225 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2019 Ole-Christoffer Granmo 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /MultiClassTsetlinMachine.cuh: -------------------------------------------------------------------------------- 1 | /* 2 | 3 | Copyright (c) 2019 Ole-Christoffer Granmo 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | 23 | This code implements a multiclass version of the Tsetlin Machine from paper arXiv:1804.01508 24 | https://arxiv.org/abs/1804.01508 25 | 26 | */ 27 | 28 | #include "TsetlinMachine.cuh" 29 | 30 | static void shuffle(int *array, size_t n) 31 | { 32 | if (n > 1) { 33 | size_t i; 34 | for (i = 0; i < n - 1; i++) { 35 | size_t j = i + rand() / (RAND_MAX / (n - i) + 1); 36 | int t = array[j]; 37 | array[j] = array[i]; 38 | array[i] = t; 39 | } 40 | } 41 | } 42 | 43 | template class MultiClassTsetlinMachine { 44 | TsetlinMachine tsetlin_machines[N]; 45 | 46 | public: 47 | int false_positive[N], false_negative[N], true_positive[N], true_negative[N]; 48 | 49 | MultiClassTsetlinMachine(void) 50 | { 51 | } 52 | 53 | ~MultiClassTsetlinMachine(void) 54 | { 55 | for (int i = 0; i < N; i++) { 56 | delete &tsetlin_machines[i]; 57 | } 58 | } 59 | 60 | void initialize() 61 | { 62 | for (int tm = 0; tm < N; tm++) { 63 | tsetlin_machines[tm].initialize(); 64 | } 65 | } 66 | 67 | void fit(curandState *devStates, int *X, int *y, int number_of_examples, float s, int epochs) 68 | { 69 | int *index = (int *)malloc(sizeof(int)*number_of_examples); 70 | 71 | for (int i = 0; i < number_of_examples; i++) { 72 | index[i] = i; 73 | } 74 | 75 | for (int epoch = 0; epoch < epochs; epoch++) { 76 | shuffle(index, number_of_examples); 77 | 78 | for (int i = 0; i < number_of_examples; i++) { 79 | update(devStates, &X[index[i]*LA_CHUNKS], y[index[i]], s); 80 | } 81 | } 82 | 83 | free(index); 84 | } 85 | 86 | void update(curandState *devStates, int *Xi, int y, float s) 87 | { 88 | tsetlin_machines[y].update(devStates, Xi, 1, s); 89 | 90 | int neg_y = (int)N * 1.0*rand()/RAND_MAX; 91 | while (neg_y == y) { 92 | neg_y = (int)N * 1.0*rand()/RAND_MAX; 93 | } 94 | 95 | tsetlin_machines[neg_y].update(devStates, Xi, 0, s); 96 | } 97 | 98 | void evaluate(int *X, int *y, int number_of_examples) 99 | { 100 | for (int n = 0; n < N; n++) { 101 | true_negative[n] = 0; 102 | true_positive[n] = 0; 103 | false_negative[n] = 0; 104 | false_positive[n] = 0; 105 | } 106 | 107 | for (int i = 0; i < number_of_examples; i++) { 108 | int max_category = 0; 109 | int max_score = tsetlin_machines[0].score(&X[i*LA_CHUNKS]); 110 | for (int tm = 1; tm < N; tm++) { 111 | int score = tsetlin_machines[tm].score(&X[i*LA_CHUNKS]); 112 | if (score > max_score) { 113 | max_score = score; 114 | max_category = tm; 115 | } 116 | } 117 | 118 | if (max_category == y[i]) { 119 | true_positive[y[i]]++; 120 | 121 | for (int n = 0; n < N; n++) { 122 | if (n != max_category) { 123 | true_negative[n]++; 124 | } 125 | } 126 | } 127 | 128 | if (max_category != y[i]) { 129 | false_negative[y[i]]++; 130 | false_positive[max_category]++; 131 | for (int n = 0; n < N; n++) { 132 | if (n != max_category && n != y[i]) { 133 | true_negative[n]++; 134 | } 135 | } 136 | } 137 | } 138 | } 139 | 140 | int predict(int *Xi) 141 | { 142 | int max_category = 0; 143 | int max_score = tsetlin_machines[0].score(Xi); 144 | 145 | for (int i = 1; i < N; i++) { 146 | int score = tsetlin_machines[i].score(Xi); 147 | if (score > max_score) { 148 | max_score = score; 149 | max_category = i; 150 | } 151 | } 152 | 153 | return max_category; 154 | } 155 | 156 | }; -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Tsetlin Machine with Bitwise Operators Implemented in CUDA 2 | 3 | A CUDA implementation of the Tsetlin Machine (https://arxiv.org/abs/1804.01508) using bitwise operations for increased learning- and classification speed. On the IMDB dataset, parallel bit manipulation with CUDA leads to approx. 50 times faster learning compared to the vanilla Cython (https://github.com/cair/TsetlinMachine) and C (https://github.com/cair/TsetlinMachineC) implementations. 4 | 5 | ## Bit-Based Representation and Manipulation of Patterns 6 | 7 | The Tsetlin Machine solves complex pattern recognition problems with propositional formulas, composed by a collective of Tsetlin Automata. In this implementation, we express both inputs, patterns, and outputs as bits, while recognition and learning rely on bit manipulation. Briefly stated, the states of the Tsetlin Automata are jointly represented using multiple sequences of bits (e.g., 8 sequences to represent an 8 bit state index). Sequence 1 contains the first bit of each state index. Sequence 2 contains the second bit, and so on, as exemplified below for 24 Tsetlin Automata: 8 | 9 | ![Figure 4](https://github.com/olegranmo/blob/blob/master/Bit_Manipulation_3.png) 10 | 11 | The benefit of this representation is that the action of each Tsetlin Automaton is readily available from the most significant bit (sequence 8 in the figure). Thus, the output (recognized or not recognized pattern) can be obtained from the input based on fast bitwise operators (NOT, AND, and CMP - comparison). When deployed after training, only the sequence containing the most significant bit is required. The other sequences can be discarded because these bits are only used to keep track of the learning. This provides a further reduction in memory usage. 12 | 13 | ## IMDB Demo 14 | ```bash 15 | python ./produce_dataset.py 16 | make 17 | ./IMDBDemoCUDABits 18 | 19 | Num SMS: 80 20 | 21 | EXPERIMENT 1 22 | 23 | ##### EPOCH 1 ##### 24 | 25 | -- CLASS 1 -- 26 | 27 | PRECISION: 0.876 28 | RECALL: 0.856 29 | F-SCORE: 0.866 30 | 31 | -- CLASS 2 -- 32 | 33 | PRECISION: 0.859 34 | RECALL: 0.878 35 | F-SCORE: 0.869 36 | 37 | TRAINING TIME: 28.366195 38 | TESTING TIME: 19.398722 39 | 40 | ##### EPOCH 2 ##### 41 | 42 | -- CLASS 1 -- 43 | 44 | PRECISION: 0.874 45 | RECALL: 0.869 46 | F-SCORE: 0.872 47 | 48 | -- CLASS 2 -- 49 | 50 | PRECISION: 0.870 51 | RECALL: 0.875 52 | F-SCORE: 0.872 53 | 54 | TRAINING TIME: 29.422352 55 | TESTING TIME: 19.346641 56 | 57 | ##### EPOCH 3 ##### 58 | 59 | -- CLASS 1 -- 60 | 61 | PRECISION: 0.877 62 | RECALL: 0.877 63 | F-SCORE: 0.877 64 | 65 | -- CLASS 2 -- 66 | 67 | PRECISION: 0.877 68 | RECALL: 0.877 69 | F-SCORE: 0.877 70 | 71 | TRAINING TIME: 29.098980 72 | TESTING TIME: 19.351734 73 | ... 74 | 75 | ##### EPOCH 98 ##### 76 | 77 | -- CLASS 1 -- 78 | 79 | PRECISION: 0.903 80 | RECALL: 0.888 81 | F-SCORE: 0.895 82 | 83 | -- CLASS 2 -- 84 | 85 | PRECISION: 0.890 86 | RECALL: 0.905 87 | F-SCORE: 0.897 88 | 89 | TRAINING TIME: 26.510541 90 | TESTING TIME: 19.653829 91 | 92 | ##### EPOCH 99 ##### 93 | 94 | -- CLASS 1 -- 95 | 96 | PRECISION: 0.909 97 | RECALL: 0.883 98 | F-SCORE: 0.896 99 | 100 | -- CLASS 2 -- 101 | 102 | PRECISION: 0.886 103 | RECALL: 0.911 104 | F-SCORE: 0.899 105 | 106 | TRAINING TIME: 26.439740 107 | TESTING TIME: 19.726412 108 | 109 | ##### EPOCH 100 ##### 110 | 111 | -- CLASS 1 -- 112 | 113 | PRECISION: 0.916 114 | RECALL: 0.873 115 | F-SCORE: 0.894 116 | 117 | -- CLASS 2 -- 118 | 119 | PRECISION: 0.879 120 | RECALL: 0.920 121 | F-SCORE: 0.899 122 | 123 | TRAINING TIME: 25.989568 124 | TESTING TIME: 19.775639 125 | 126 | ``` 127 | ## Further Work 128 | 129 | * Perform a more extensive hyperparameter search (manipulating THRESHOLD, CLAUSES, STATE_BITS, and S in TsetlinMachineConfig.h). 130 | * Convolutional approach for more fine-grained modelling of semantics. 131 | -------------------------------------------------------------------------------- /TsetlinMachine.cu: -------------------------------------------------------------------------------- 1 | /* 2 | 3 | Copyright (c) 2019 Ole-Christoffer Granmo 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | 23 | This code implements the Tsetlin Machine from paper arXiv:1804.01508 24 | https://arxiv.org/abs/1804.01508 25 | 26 | */ 27 | 28 | #include "TsetlinMachineKernels.cuh" 29 | #include "TsetlinMachine.cuh" 30 | #include "TsetlinMachineConfig.cuh" 31 | #include "GPUConfig.cuh" 32 | 33 | #include 34 | #include 35 | #include 36 | #include 37 | 38 | /**********************************/ 39 | /***** Constructor/Destructor *****/ 40 | /**********************************/ 41 | 42 | TsetlinMachine::TsetlinMachine() 43 | { 44 | cudaMallocManaged(&class_sum, sizeof(int)); 45 | cudaMallocManaged(&ta_state, CLAUSES*LA_CHUNKS*STATE_BITS*sizeof(int)); 46 | cudaMallocManaged(&clause_output, CLAUSES*sizeof(int)); 47 | cudaMallocManaged(&clause_feedback, CLAUSES*sizeof(int)); 48 | cudaMallocManaged(&all_exclude, CLAUSES*sizeof(int)); 49 | //cudaMallocManaged(&la_feedback, LA_CHUNKS*sizeof(int)); 50 | 51 | initialize(); 52 | } 53 | 54 | TsetlinMachine::~TsetlinMachine() 55 | { 56 | cudaFree(class_sum); 57 | cudaFree(ta_state); 58 | cudaFree(clause_output); 59 | cudaFree(clause_feedback); 60 | cudaFree(all_exclude); 61 | } 62 | 63 | void TsetlinMachine::initialize() 64 | { 65 | // Initialize Tsetlin Automata states 66 | for (int j = 0; j < CLAUSES; ++j) { 67 | for (int k = 0; k < LA_CHUNKS; ++k) { 68 | for (int b = 0; b < STATE_BITS-1; ++b) { 69 | int id = j*LA_CHUNKS*STATE_BITS + k*STATE_BITS + b; 70 | ta_state[id] = ~0; 71 | } 72 | 73 | int id = j*LA_CHUNKS*STATE_BITS + k*STATE_BITS + STATE_BITS-1; 74 | ta_state[id] = 0; 75 | } 76 | } 77 | } 78 | 79 | /****************************/ 80 | /***** Public Functions *****/ 81 | /****************************/ 82 | 83 | void TsetlinMachine::update(curandState *devStates, int *Xi, int target, float s) 84 | { 85 | initialize_clause_output<<>>(clause_output); 86 | cudaDeviceSynchronize(); 87 | 88 | calculate_clause_output<<>>(ta_state, clause_output, Xi); 89 | cudaDeviceSynchronize(); 90 | 91 | int sum = 0; 92 | for (int clause = 0; clause < CLAUSES; ++clause) { 93 | int sign = 1 - 2 * (clause & 1); 94 | sum += sign * clause_output[clause]; 95 | } 96 | 97 | if (sum > THRESHOLD) { 98 | sum = THRESHOLD; 99 | } else if (sum < -THRESHOLD) { 100 | sum = -THRESHOLD; 101 | } 102 | 103 | generate_clause_feedback<<>>(devStates, clause_feedback, sum, target); 104 | cudaDeviceSynchronize(); 105 | 106 | type_i_feedback<<>>(devStates, ta_state, clause_feedback, clause_output, Xi, s); 107 | cudaDeviceSynchronize(); 108 | 109 | type_ii_feedback<<>>(ta_state, clause_feedback, clause_output, Xi); 110 | cudaDeviceSynchronize(); 111 | } 112 | 113 | int TsetlinMachine::get_state(int id) 114 | { 115 | return ta_state[id]; 116 | } 117 | 118 | int TsetlinMachine::score(int *Xi) 119 | { 120 | initialize_clause_output_predict<<>>(clause_output, all_exclude); 121 | cudaDeviceSynchronize(); 122 | 123 | calculate_clause_output_predict<<>>(ta_state, clause_output, all_exclude, Xi); 124 | cudaDeviceSynchronize(); 125 | 126 | update_with_all_exclude<<>>(clause_output, all_exclude); 127 | cudaDeviceSynchronize(); 128 | 129 | int sum = 0; 130 | for (int clause = 0; clause < CLAUSES; ++clause) { 131 | int sign = 1 - 2 * (clause & 1); 132 | sum += sign * clause_output[clause]; 133 | } 134 | 135 | if (sum > THRESHOLD) { 136 | sum = THRESHOLD; 137 | } else if (sum < -THRESHOLD) { 138 | sum = -THRESHOLD; 139 | } 140 | 141 | return sum; 142 | } 143 | 144 | -------------------------------------------------------------------------------- /TsetlinMachine.cuh: -------------------------------------------------------------------------------- 1 | /* 2 | 3 | Copyright (c) 2018 Ole-Christoffer Granmo 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | 23 | This code implements the Tsetlin Machine from paper arXiv:1804.01508 24 | https://arxiv.org/abs/1804.01508 25 | 26 | */ 27 | 28 | #include 29 | #include 30 | 31 | #define INT_SIZE 32 32 | 33 | #define LA_CHUNKS (((2*FEATURES-1)/INT_SIZE + 1)) 34 | #define CLAUSE_CHUNKS ((CLAUSES-1)/INT_SIZE + 1) 35 | 36 | #if ((FEATURES*2) % 32 != 0) 37 | #define FILTER (~(0xffffffff << ((FEATURES*2) % INT_SIZE))) 38 | #else 39 | #define FILTER 0xffffffff 40 | #endif 41 | 42 | class TsetlinMachine { 43 | /* Tsetlin Machine data structures */ 44 | 45 | int *class_sum; 46 | 47 | int *ta_state; 48 | int *clause_output; 49 | int *clause_feedback; 50 | int *all_exclude; 51 | 52 | public: 53 | TsetlinMachine(); 54 | 55 | ~TsetlinMachine(); 56 | 57 | void update(curandState *devStates, int *Xi, int target, float s); 58 | 59 | int score(int *Xi); 60 | 61 | int get_state(int id); 62 | 63 | void initialize(); 64 | }; 65 | 66 | 67 | 68 | 69 | 70 | 71 | -------------------------------------------------------------------------------- /TsetlinMachineConfig.cuh: -------------------------------------------------------------------------------- 1 | #define THRESHOLD 40 2 | #define FEATURES 5000 3 | #define CLAUSES 10000 4 | #define BOOST_TRUE_POSITIVE_FEEDBACK 1 5 | #define STATE_BITS 8 6 | #define S 27.0 7 | -------------------------------------------------------------------------------- /TsetlinMachineKernels.cu: -------------------------------------------------------------------------------- 1 | /* 2 | 3 | Copyright (c) 2019 Ole-Christoffer Granmo 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | 23 | This code implements a multiclass version of the Tsetlin Machine from paper arXiv:1804.01508 24 | https://arxiv.org/abs/1804.01508 25 | 26 | */ 27 | 28 | 29 | #include "TsetlinMachineConfig.cuh" 30 | #include "TsetlinMachine.cuh" 31 | 32 | #include 33 | #include 34 | #include 35 | #include 36 | 37 | // Increment the states of each of those 32 Tsetlin Automata flagged in the active bit vector. 38 | __device__ inline void inc(int *ta_state, int clause, int chunk, unsigned int active) 39 | { 40 | unsigned int carry, carry_next; 41 | 42 | int id = clause*LA_CHUNKS*STATE_BITS + chunk*STATE_BITS; 43 | 44 | carry = active; 45 | for (int b = 0; b < STATE_BITS; ++b) { 46 | if (carry == 0) 47 | break; 48 | 49 | carry_next = ta_state[id + b] & carry; // Sets carry bits (overflow) passing on to next bit 50 | ta_state[id + b] = ta_state[id + b] ^ carry; // Performs increments with XOR 51 | carry = carry_next; 52 | } 53 | 54 | if (carry > 0) { 55 | for (int b = 0; b < STATE_BITS; ++b) { 56 | ta_state[id + b] |= carry; 57 | } 58 | } 59 | } 60 | 61 | // Decrement the states of each of those 32 Tsetlin Automata flagged in the active bit vector. 62 | __device__ inline void dec(int *ta_state, int clause, int chunk, unsigned int active) 63 | { 64 | unsigned int carry, carry_next; 65 | 66 | int id = clause*LA_CHUNKS*STATE_BITS + chunk*STATE_BITS; 67 | 68 | carry = active; 69 | for (int b = 0; b < STATE_BITS; ++b) { 70 | if (carry == 0) 71 | break; 72 | 73 | carry_next = (~ta_state[id + b]) & carry; // Sets carry bits (overflow) passing on to next bit 74 | ta_state[id + b] = ta_state[id + b] ^ carry; // Performs increments with XOR 75 | carry = carry_next; 76 | } 77 | 78 | if (carry > 0) { 79 | for (int b = 0; b < STATE_BITS; ++b) { 80 | ta_state[id + b] &= ~carry; 81 | } 82 | } 83 | } 84 | 85 | __global__ void type_i_feedback(curandState *state, int *ta_state, int *clause_feedback, int *clause_output, int *Xi, float s) 86 | { 87 | int index = blockIdx.x * blockDim.x + threadIdx.x; 88 | int stride = blockDim.x * gridDim.x; 89 | 90 | /* Copy state to local memory for efficiency */ 91 | curandState localState = state[index]; 92 | 93 | for (int i = index; i < CLAUSES*LA_CHUNKS; i += stride) { 94 | int clause = i / LA_CHUNKS; 95 | 96 | if (clause_feedback[clause] != 1) { 97 | continue; 98 | } 99 | 100 | int la_chunk = i % LA_CHUNKS; 101 | 102 | // Generate random bit values 103 | 104 | int la_feedback; 105 | for (int b = 0; b < INT_SIZE; ++b) { 106 | if (curand_uniform(&localState) <= 1.0/S) { 107 | la_feedback |= (1 << b); 108 | } else { 109 | la_feedback &= ~(1 << b); 110 | } 111 | } 112 | 113 | if (clause_output[clause]) { 114 | #ifdef BOOST_TRUE_POSITIVE_FEEDBACK 115 | inc(ta_state, clause, la_chunk, Xi[la_chunk]); 116 | #else 117 | inc(ta_state, clause, la_chunk, Xi[la_chunk] & (~la_feedback)); 118 | #endif 119 | 120 | dec(ta_state, clause, la_chunk, (~Xi[la_chunk]) & la_feedback); 121 | } else { 122 | dec(ta_state, clause, la_chunk, la_feedback); 123 | } 124 | } 125 | 126 | state[index] = localState; 127 | } 128 | 129 | __global__ void type_ii_feedback(int *ta_state, int *clause_feedback, int *clause_output, int *Xi) 130 | { 131 | 132 | int index = blockIdx.x * blockDim.x + threadIdx.x; 133 | int stride = blockDim.x * gridDim.x; 134 | 135 | for (int i = index; i < CLAUSES*LA_CHUNKS; i += stride) { 136 | int clause = i / LA_CHUNKS; 137 | 138 | if (clause_feedback[clause] != -1 || clause_output[clause] == 0) { 139 | continue; 140 | } 141 | 142 | int la_chunk = i % LA_CHUNKS; 143 | 144 | int id = clause*LA_CHUNKS*STATE_BITS + la_chunk*STATE_BITS + STATE_BITS - 1; 145 | 146 | inc(ta_state, clause, la_chunk, (~Xi[la_chunk]) & (~ta_state[id])); 147 | } 148 | } 149 | 150 | __global__ void generate_clause_feedback(curandState *state, int *clause_feedback, int class_sum, int target) 151 | { 152 | int index = blockIdx.x * blockDim.x + threadIdx.x; 153 | int stride = blockDim.x * gridDim.x; 154 | 155 | /* Copy state to local memory for efficiency */ 156 | curandState localState = state[index]; 157 | 158 | for (int j = index; j < CLAUSES; j += stride) { 159 | int sign = 1 - 2 * (j & 1); 160 | 161 | if (target) { 162 | if (curand_uniform(&localState) > (1.0/(THRESHOLD*2))*(THRESHOLD - class_sum)) { 163 | clause_feedback[j] = 0; 164 | } else { 165 | clause_feedback[j] = sign; 166 | } 167 | } else { 168 | if (curand_uniform(&localState) > (1.0/(THRESHOLD*2))*(THRESHOLD + class_sum)) { 169 | clause_feedback[j] = 0; 170 | } else { 171 | clause_feedback[j] = -1*sign; 172 | } 173 | } 174 | } 175 | 176 | state[index] = localState; 177 | } 178 | 179 | __global__ void initialize_clause_output(int *clause_output) 180 | { 181 | int index = blockIdx.x * blockDim.x + threadIdx.x; 182 | int stride = blockDim.x * gridDim.x; 183 | 184 | // Initialize clause output 185 | for (int j = index; j < CLAUSES; j += stride) { 186 | clause_output[j] = 1; 187 | } 188 | } 189 | 190 | __global__ void calculate_clause_output(int *ta_state, int *clause_output, int *Xi) 191 | { 192 | int index = blockIdx.x * blockDim.x + threadIdx.x; 193 | int stride = blockDim.x * gridDim.x; 194 | 195 | for (int i = index; i < CLAUSES*LA_CHUNKS; i += stride) { 196 | int clause = i / LA_CHUNKS; 197 | int la_chunk = i % LA_CHUNKS; 198 | 199 | int id = clause*LA_CHUNKS*STATE_BITS + la_chunk*STATE_BITS + STATE_BITS - 1; 200 | if (la_chunk < LA_CHUNKS-1 && ((ta_state[id] & Xi[la_chunk]) != ta_state[id])) { 201 | clause_output[clause] = 0; 202 | } else if (la_chunk == LA_CHUNKS-1 && ((ta_state[id] & Xi[LA_CHUNKS-1] & FILTER) != (ta_state[id] & FILTER))) { 203 | clause_output[clause] = 0; 204 | } 205 | } 206 | } 207 | 208 | __global__ void initialize_clause_output_predict(int *clause_output, int *all_exclude) 209 | { 210 | int index = blockIdx.x * blockDim.x + threadIdx.x; 211 | int stride = blockDim.x * gridDim.x; 212 | 213 | // Initialize clause output 214 | for (int j = index; j < CLAUSES; j += stride) { 215 | clause_output[j] = 1; 216 | all_exclude[j] = 1; 217 | } 218 | } 219 | 220 | __global__ void calculate_clause_output_predict(int *ta_state, int *clause_output, int *all_exclude, int *Xi) 221 | { 222 | int index = blockIdx.x * blockDim.x + threadIdx.x; 223 | int stride = blockDim.x * gridDim.x; 224 | 225 | for (int i = index; i < CLAUSES*LA_CHUNKS; i += stride) { 226 | int clause = i / LA_CHUNKS; 227 | int la_chunk = i % LA_CHUNKS; 228 | 229 | int id = clause*LA_CHUNKS*STATE_BITS + la_chunk*STATE_BITS + STATE_BITS - 1; 230 | 231 | if ((la_chunk < LA_CHUNKS - 1) && ((ta_state[id] & Xi[la_chunk]) != ta_state[id])) { 232 | clause_output[clause] = 0; 233 | } else if ((la_chunk == LA_CHUNKS - 1) && ((ta_state[id] & Xi[LA_CHUNKS-1] & FILTER) != (ta_state[id] & FILTER))) { 234 | clause_output[clause] = 0; 235 | } 236 | 237 | if ((la_chunk < LA_CHUNKS - 1) && ((ta_state[id] & Xi[la_chunk]) > 0)) { 238 | all_exclude[clause] = 0; 239 | } else if ((la_chunk == LA_CHUNKS - 1) && ((ta_state[id] & Xi[LA_CHUNKS-1] & FILTER) > 0)) { 240 | all_exclude[clause] = 0; 241 | } 242 | } 243 | } 244 | 245 | __global__ void update_with_all_exclude(int *clause_output, int *all_exclude) 246 | { 247 | int index = blockIdx.x * blockDim.x + threadIdx.x; 248 | int stride = blockDim.x * gridDim.x; 249 | 250 | // Initialize clause output 251 | for (int j = index; j < CLAUSES; j += stride) { 252 | if (all_exclude[j] == 1) { 253 | clause_output[j] = 0; 254 | } 255 | } 256 | } 257 | -------------------------------------------------------------------------------- /TsetlinMachineKernels.cuh: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | 5 | __global__ void setup_kernel(curandState *state); 6 | 7 | 8 | __global__ void type_i_feedback(curandState *state, int *ta_state, int *clause_feedback, int *clause_output, int *Xi, float s); 9 | 10 | 11 | __global__ void type_ii_feedback(int *ta_state, int *clause_feedback, int *clause_output, int *Xi); 12 | 13 | 14 | /* Sum up the votes for each class (this is the multiclass version of the Tsetlin Machine) */ 15 | __global__ void sum_up_class_votes(int *clause_output, int *sum); 16 | 17 | 18 | /* Sum up the votes for each class (this is the multiclass version of the Tsetlin Machine) */ 19 | __global__ void generate_clause_feedback(curandState *state, int *clause_feedback, int class_sum, int target); 20 | 21 | __global__ void initialize_clause_output(int *clause_output); 22 | 23 | __global__ void calculate_clause_output(int *ta_state, int *clause_output, int *Xi); 24 | 25 | 26 | __global__ void initialize_clause_output_predict(int *clause_output, int *all_exclude); 27 | 28 | 29 | __global__ void calculate_clause_output_predict(int *ta_state, int *clause_output, int *all_exclude, int *Xi); 30 | 31 | 32 | __global__ void update_with_all_exclude(int *clause_output, int *all_exclude); -------------------------------------------------------------------------------- /makefile: -------------------------------------------------------------------------------- 1 | IMDBDemoCUDABits: IMDBDemoCUDABits.cu MultiClassTsetlinMachine.cuh TsetlinMachineKernels.cu TsetlinMachineKernels.cuh TsetlinMachine.cuh TsetlinMachine.cu TsetlinMachineConfig.cuh 2 | nvcc -o IMDBDemoCUDABits TsetlinMachine.cu IMDBDemoCUDABits.cu TsetlinMachineKernels.cu 3 | 4 | clean: 5 | rm *.o IMDBDemoCUDABits 6 | -------------------------------------------------------------------------------- /produce_dataset.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/python 2 | # -*- coding: utf-8 -*- 3 | 4 | import numpy as np 5 | import re 6 | import keras 7 | from sklearn.feature_selection import SelectKBest 8 | from sklearn.feature_selection import chi2 9 | from sklearn.feature_extraction.text import TfidfVectorizer 10 | from sklearn.feature_selection import mutual_info_classif 11 | from keras.datasets import imdb 12 | from sklearn.datasets import fetch_20newsgroups 13 | 14 | MAX_NGRAM = 2 15 | 16 | NUM_WORDS=5000 17 | INDEX_FROM=2 18 | 19 | FEATURES=5000 20 | 21 | # Save np.load 22 | np_load_old = np.load 23 | # Modify the default parameters of np.load 24 | np.load = lambda *a,**k: np_load_old(*a, allow_pickle=True, **k) 25 | train,test = keras.datasets.imdb.load_data(num_words=NUM_WORDS, index_from=INDEX_FROM) 26 | # Restore np.load for future normal usage 27 | np.load = np_load_old 28 | 29 | train_x,train_y = train 30 | test_x,test_y = test 31 | 32 | word_to_id = keras.datasets.imdb.get_word_index() 33 | word_to_id = {k:(v+INDEX_FROM) for k,v in word_to_id.items()} 34 | word_to_id[""] = 0 35 | word_to_id[""] = 1 36 | word_to_id[""] = 2 37 | 38 | id_to_word = {value:key for key,value in word_to_id.items()} 39 | 40 | vocabulary = {} 41 | for i in range(train_y.shape[0]): 42 | terms = [] 43 | for word_id in train_x[i]: 44 | terms.append(id_to_word[word_id]) 45 | 46 | for N in range(1,MAX_NGRAM+1): 47 | grams = [terms[j:j+N] for j in range(len(terms)-N+1)] 48 | for gram in grams: 49 | phrase = " ".join(gram) 50 | 51 | if phrase in vocabulary: 52 | vocabulary[phrase] += 1 53 | else: 54 | vocabulary[phrase] = 1 55 | 56 | phrase_bit_nr = {} 57 | bit_nr_phrase = {} 58 | bit_nr = 0 59 | for phrase in vocabulary.keys(): 60 | if vocabulary[phrase] < 10: 61 | continue 62 | 63 | phrase_bit_nr[phrase] = bit_nr 64 | bit_nr_phrase[bit_nr] = phrase 65 | bit_nr += 1 66 | 67 | # Create bit representation 68 | 69 | X_train = np.zeros((train_y.shape[0], len(phrase_bit_nr)), dtype=np.int32) 70 | y_train = np.zeros(train_y.shape[0], dtype=np.int32) 71 | for i in range(train_y.shape[0]): 72 | terms = [] 73 | for word_id in train_x[i]: 74 | terms.append(id_to_word[word_id]) 75 | 76 | for N in range(1,MAX_NGRAM+1): 77 | grams = [terms[j:j+N] for j in range(len(terms)-N+1)] 78 | for gram in grams: 79 | phrase = " ".join(gram) 80 | if phrase in phrase_bit_nr: 81 | X_train[i,phrase_bit_nr[phrase]] = 1 82 | 83 | y_train[i] = train_y[i] 84 | 85 | X_test = np.zeros((test_y.shape[0], len(phrase_bit_nr)), dtype=np.int32) 86 | y_test = np.zeros(test_y.shape[0], dtype=np.int32) 87 | 88 | for i in range(test_y.shape[0]): 89 | terms = [] 90 | for word_id in test_x[i]: 91 | terms.append(id_to_word[word_id]) 92 | 93 | for N in range(1,MAX_NGRAM+1): 94 | grams = [terms[j:j+N] for j in range(len(terms)-N+1)] 95 | for gram in grams: 96 | phrase = " ".join(gram) 97 | if phrase in phrase_bit_nr: 98 | X_test[i,phrase_bit_nr[phrase]] = 1 99 | 100 | y_test[i] = test_y[i] 101 | 102 | 103 | print("SELECTING FEATURES") 104 | SKB = SelectKBest(chi2, k=FEATURES) 105 | SKB.fit(X_train, y_train) 106 | 107 | selected_features = SKB.get_support(indices=True) 108 | X_train = SKB.transform(X_train) 109 | X_test = SKB.transform(X_test) 110 | 111 | output_test = np.c_[X_test, y_test] 112 | np.savetxt("IMDBTestData.txt", output_test, fmt="%d") 113 | 114 | output_train = np.c_[X_train, y_train] 115 | np.savetxt("IMDBTrainingData.txt", output_train, fmt="%d") 116 | 117 | 118 | --------------------------------------------------------------------------------