├── .cproject ├── .project ├── LICENSE.txt ├── Makefile ├── README.md ├── ac ├── ac.c └── list.h ├── bm └── bm.c ├── cuda ├── cuPrintf.cu ├── cuPrintf.cuh ├── cuda.h ├── cuda_ac.cu ├── cuda_sbom.cu ├── cuda_sh.cu ├── cuda_sog.cu └── cuda_wm.cu ├── deviceQuery.txt ├── execute.sh ├── kmp └── kmp.c ├── main.c ├── profile.sh ├── sbom └── sbom.c ├── sh ├── sh.c └── support │ ├── atomic.h │ ├── bitmap.h │ ├── dict.c │ ├── dict.h │ ├── endian.h │ ├── filedes.c │ ├── filedes.h │ ├── hashtable.c │ ├── hashtable.h │ ├── list.c │ ├── list.h │ ├── lock.h │ ├── log.c │ ├── log.h │ ├── macros.h │ ├── math.h │ ├── md5.h │ ├── md5c.c │ ├── multihash.c │ ├── multihash.h │ ├── prettyprint.c │ ├── prettyprint.h │ ├── profiler.c │ ├── profiler.h │ ├── radix.c │ ├── radix.h │ ├── serialize.c │ ├── serialize.h │ ├── slist.c │ ├── slist.h │ ├── stack.h │ ├── string.c │ ├── string.h │ ├── timer.c │ ├── timer.h │ ├── timestamp.h │ └── ue_space │ ├── ixa_sdk │ ├── README │ ├── ixa_sdk_4.1_LinuxPatched.tgz │ └── ixa_sdk_4.2_LinuxPatched.tgz │ └── uengine │ ├── Makefile │ ├── compat.h │ ├── ixp2000-lib-userspace.c │ ├── ixp2000-msf.h │ ├── ixp2000-ue-disas.c │ ├── ixp2000-ue-disas.h │ ├── ixp2000-uengine.c │ ├── ixp2000-uengine.h │ ├── ixp2400-msf.c │ ├── ixp2400-msf.h │ └── version ├── smatcher.h ├── sog ├── sog16.c ├── sog32.c └── sog8.c └── wu └── wu.c /.project: -------------------------------------------------------------------------------- 1 | 2 | 3 | cudaCharis 4 | 5 | 6 | 7 | 8 | 9 | org.eclipse.cdt.managedbuilder.core.genmakebuilder 10 | clean,full,incremental, 11 | 12 | 13 | ?name? 14 | 15 | 16 | 17 | org.eclipse.cdt.make.core.append_environment 18 | true 19 | 20 | 21 | org.eclipse.cdt.make.core.autoBuildTarget 22 | all 23 | 24 | 25 | org.eclipse.cdt.make.core.buildArguments 26 | 27 | 28 | 29 | org.eclipse.cdt.make.core.buildCommand 30 | make 31 | 32 | 33 | org.eclipse.cdt.make.core.buildLocation 34 | ${workspace_loc:/cudaCharis/Debug} 35 | 36 | 37 | org.eclipse.cdt.make.core.cleanBuildTarget 38 | clean 39 | 40 | 41 | org.eclipse.cdt.make.core.contents 42 | org.eclipse.cdt.make.core.activeConfigSettings 43 | 44 | 45 | org.eclipse.cdt.make.core.enableAutoBuild 46 | false 47 | 48 | 49 | org.eclipse.cdt.make.core.enableCleanBuild 50 | true 51 | 52 | 53 | org.eclipse.cdt.make.core.enableFullBuild 54 | true 55 | 56 | 57 | org.eclipse.cdt.make.core.fullBuildTarget 58 | all 59 | 60 | 61 | org.eclipse.cdt.make.core.stopOnError 62 | true 63 | 64 | 65 | org.eclipse.cdt.make.core.useDefaultBuildCmd 66 | true 67 | 68 | 69 | 70 | 71 | org.eclipse.cdt.managedbuilder.core.ScannerConfigBuilder 72 | full,incremental, 73 | 74 | 75 | 76 | 77 | 78 | org.eclipse.cdt.core.cnature 79 | org.eclipse.cdt.core.ccnature 80 | org.eclipse.cdt.managedbuilder.core.managedBuildNature 81 | org.eclipse.cdt.managedbuilder.core.ScannerConfigNature 82 | 83 | 84 | -------------------------------------------------------------------------------- /Makefile: -------------------------------------------------------------------------------- 1 | CC = mpicc 2 | NVCC = nvcc 3 | 4 | SDKPATH := ~/NVIDIA_GPU_Computing_SDK 5 | CUDAPATH := /usr/local/cuda 6 | 7 | TARGET = smatcher 8 | 9 | OBJS = kmp.o bm.o ac.o sh.o sbom.o wu.o sog8.o main.o helper.o cuda_ac.o cuda_sh.o cuda_sbom.o cuda_wm.o cuda_sog.o 10 | 11 | #CPPFLAGS=-Wall -Wno-pointer-sign -O0 -g -funroll-loops -pg 12 | CPPFLAGS=-Wall -Wno-pointer-sign -O2 -funroll-loops 13 | NVCCFLAGS= -O2 -I$(CUDAPATH)/include -I$(SDKPATH)/shared/inc -I$(SDKPATH)/C/common/inc --ptxas-options=-v -arch=compute_12 -code=sm_12,compute_12 14 | 15 | #LDFLAGS=-L$(CUDAPATH)/lib -L$(SDKPATH)/shared/lib/linux -L$(SDKPATH)/C/lib -lcuda -lcudart -lmpich 16 | LDFLAGS=-L$(CUDAPATH)/lib -lcuda -lcudart -lmpich 17 | 18 | all: $(TARGET) 19 | 20 | $(TARGET): $(OBJS) $(SEQUENTIAL-OBJS) 21 | $(CC) $(CPPFLAGS) $(OBJS) -o $(TARGET) $(LDFLAGS) 22 | 23 | main.o: main.c 24 | $(CC) $(CPPFLAGS) -c main.c 25 | 26 | kmp.o: kmp/kmp.c 27 | $(CC) $(CPPFLAGS) -c kmp/kmp.c 28 | 29 | bm.o: bm/bm.c 30 | $(CC) $(CPPFLAGS) -c bm/bm.c 31 | 32 | ac.o: ac/ac.c 33 | $(CC) $(CPPFLAGS) -c ac/ac.c 34 | 35 | sh.o: sh/sh.c 36 | $(CC) $(CPPFLAGS) -c sh/sh.c 37 | 38 | sbom.o: sbom/sbom.c 39 | $(CC) $(CPPFLAGS) -c sbom/sbom.c 40 | 41 | wu.o: wu/wu.c 42 | $(CC) $(CPPFLAGS) -c wu/wu.c 43 | 44 | sog8.o: sog/sog8.c 45 | $(CC) $(CPPFLAGS) -c sog/sog8.c 46 | 47 | helper.o: ../helper.c 48 | $(CC) $(CPPFLAGS) -c ../helper.c 49 | 50 | cuda_ac.o: cuda/cuda_ac.cu 51 | $(NVCC) $(NVCCFLAGS) -c cuda/cuda_ac.cu 52 | 53 | cuda_sh.o: cuda/cuda_sh.cu 54 | $(NVCC) $(NVCCFLAGS) -c cuda/cuda_sh.cu 55 | 56 | cuda_sbom.o: cuda/cuda_sbom.cu 57 | $(NVCC) $(NVCCFLAGS) -c cuda/cuda_sbom.cu 58 | 59 | cuda_wm.o: cuda/cuda_wm.cu 60 | $(NVCC) $(NVCCFLAGS) -c cuda/cuda_wm.cu 61 | 62 | cuda_sog.o: cuda/cuda_sog.cu 63 | $(NVCC) $(NVCCFLAGS) -c cuda/cuda_sog.cu 64 | 65 | clean: 66 | rm -f *.o *.d $(TARGET) core 67 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | ## A Hybrid Parallel Implementation of the Aho-Corasick and Wu-Manber Algorithms Using NVIDIA CUDA and MPI Evaluated on a Biological Sequence Database. 2 | 3 | Charalampos S. Kouzinopoulos, Yannis M. Assael, Themistoklis K. Pyrgiotis, Konstantinos G. Margaritis 4 | 5 | 6 | Multiple matching algorithms are used to locate the occurrences of patterns from a finite pattern set in a large input string. Aho-Corasick and Wu-Manber, two of the most well known algorithms for multiple matching require an increased computing power, particularly in cases where large-size datasets must be processed, as is common in computational biology applications. Over the past years, Graphics Processing Units (GPUs) have evolved to powerful parallel processors outperforming Central Processing Units (CPUs) in scientific calculations. Moreover, multiple GPUs can be used in parallel, forming hybrid computer cluster configurations to achieve an even higher processing throughput. This paper evaluates the speedup of the parallel implementation of the Aho-Corasick and Wu-Manber algorithms on a hybrid GPU cluster, when used to process a snapshot of the Expressed Sequence Tags of the human genome and for different problem parameters. 7 | 8 | 9 | ### Links 10 | [arXiv pre-print](http://arxiv.org/abs/1407.2889) 11 | 12 | ### Bibtex 13 | ``` 14 | @article{kouzinopoulos2015hybrid, 15 | title={A Hybrid Parallel Implementation of the Aho-Corasick and Wu-Manber Algorithms Using NVIDIA CUDA and MPI Evaluated on a Biological Sequence Database}, 16 | author={Kouzinopoulos, Charalampos S. and Assael, Yannis M. and Pyrgiotis, Themistoklis K. and Margaritis, Konstantinos G.}, 17 | journal={International Journal on Artificial Intelligence Tools}, 18 | volume={24}, 19 | number={1}, 20 | pages={1540001}, 21 | year={2015}, 22 | publisher={World Scientific} 23 | } 24 | ``` 25 | 26 | 27 | ### License 28 | Code licensed under the GNU General Public License v3.0. -------------------------------------------------------------------------------- /ac/ac.c: -------------------------------------------------------------------------------- 1 | /*This file is part of "A Hybrid Parallel Implementation of the Aho-Corasick and Wu-Manber Algorithms Using NVIDIA CUDA and MPI Evaluated on a Biological Sequence Database". 2 | 3 | "A Hybrid Parallel Implementation of the Aho-Corasick and Wu-Manber Algorithms Using NVIDIA CUDA and MPI Evaluated on a Biological Sequence Database" is free software: you can redistribute it and/or modify 4 | it under the terms of the GNU General Public License as published by 5 | the Free Software Foundation, either version 3 of the License, or 6 | (at your option) any later version. 7 | 8 | "A Hybrid Parallel Implementation of the Aho-Corasick and Wu-Manber Algorithms Using NVIDIA CUDA and MPI Evaluated on a Biological Sequence Database" is distributed in the hope that it will be useful, 9 | but WITHOUT ANY WARRANTY; without even the implied warranty of 10 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 11 | GNU General Public License for more details. 12 | 13 | You should have received a copy of the GNU General Public License 14 | along with "A Hybrid Parallel Implementation of the Aho-Corasick and Wu-Manber Algorithms Using NVIDIA CUDA and MPI Evaluated on a Biological Sequence Database". If not, see .*/ 15 | 16 | #include "../smatcher.h" 17 | 18 | #include "list.h" 19 | 20 | /// free an AC table from a given startnode (recursively) 21 | void ac_free ( struct ac_state *state, int alphabet ) { 22 | 23 | int i; 24 | 25 | for ( i = 0; i < alphabet; i++ ) 26 | if ( state->next[i] ) 27 | ac_free ( state->next[i], alphabet ); 28 | 29 | if ( state->output ) 30 | free ( state->output ); 31 | 32 | free ( state->next ); 33 | free ( state ); 34 | } 35 | 36 | /// initialize the empty-table 37 | void ac_init ( struct ac_table *g, int alphabet, int *state_transition ) { 38 | 39 | g->zerostate = NULL; 40 | g->patterncounter = 0; 41 | 42 | //Create the root note 43 | g->zerostate = malloc ( sizeof ( struct ac_state ) ); 44 | 45 | if ( !g->zerostate ) 46 | fail ( "Could not allocate memory\n" ); 47 | 48 | g->idcounter = 1; 49 | g->zerostate->id = 0; 50 | 51 | g->zerostate->output = NULL; 52 | 53 | g->zerostate->next = ( struct ac_state ** ) malloc ( alphabet * sizeof ( struct ac_state * ) ); 54 | 55 | //Set all alphabet bytes of root node->next to 0 56 | memset ( g->zerostate->next, 0, alphabet * sizeof ( struct ac_state * ) ); 57 | 58 | //Set all cells of transition table for state 0 to 0 59 | int i; 60 | 61 | for ( i = 0; i < alphabet; i++ ) 62 | state_transition[i] = 0; 63 | } 64 | 65 | /// free an entire AC table 66 | void ac_destroy ( struct ac_table *in, int alphabet ) { 67 | 68 | int i; 69 | 70 | for ( i = 0; i < alphabet; i++ ) 71 | if ( in->zerostate->next[i] && in->zerostate->next[i]->id > 0 ) { 72 | ac_free ( in->zerostate->next[i], alphabet ); 73 | in->zerostate->next[i] = NULL; 74 | } 75 | free ( in->zerostate->next ); 76 | free ( in->zerostate ); 77 | } 78 | 79 | void ac_maketree ( struct ac_table *g, int alphabet, unsigned int *state_supply ) { 80 | 81 | struct list *list = NULL; 82 | struct ac_state *state, *s, *cur; 83 | int i/*, j*/; 84 | 85 | // Set all NULL transitions of 0 state to point to itself 86 | for ( i = 0; i < alphabet; i++ ) { 87 | if ( !g->zerostate->next[i] ) 88 | g->zerostate->next[i] = g->zerostate; 89 | else { 90 | list = list_append ( list, g->zerostate->next[i] ); 91 | g->zerostate->next[i]->fail = g->zerostate; 92 | } 93 | } 94 | 95 | // Set fail() for depth > 0 96 | while ( list ) { 97 | 98 | cur = ( struct ac_state * )list->id; 99 | 100 | for ( i = 0; i < alphabet; i++ ) { 101 | 102 | s = cur->next[i]; 103 | 104 | if ( s ) { 105 | 106 | list = list_append ( list, s ); 107 | state = cur->fail; 108 | 109 | while ( !state->next[i] ) 110 | state = state->fail; 111 | 112 | s->fail = state->next[i]; 113 | 114 | state_supply[s->id] = s->fail->id; 115 | 116 | //printf("Created additional link from state %i to state %i\n", s->id, s->fail->id); 117 | } 118 | // Join outputs missing 119 | } 120 | list = list_pop ( list ); 121 | } 122 | 123 | list_destroy ( list ); 124 | } 125 | 126 | // Insert a string to the tree 127 | void ac_addstring ( struct ac_table *g, unsigned int i, unsigned char *string, int m, int alphabet, int *state_transition, unsigned int *state_final ) { 128 | 129 | struct ac_state *state, *next = NULL; 130 | int j, done = 0; 131 | 132 | // as long as next already exists follow them 133 | j = 0; 134 | state = g->zerostate; 135 | 136 | while ( !done && ( next = state->next[*( string + j )] ) != NULL ) { 137 | 138 | state = next; 139 | 140 | if ( j == m ) 141 | done = 1; 142 | 143 | j++; 144 | 145 | //printf("character %c state: %i\n", *( string + j ), state->id); 146 | } 147 | 148 | // not done yet 149 | if ( !done ) { 150 | while ( j < m ) { 151 | // Create new state 152 | next = malloc ( sizeof ( struct ac_state ) ); 153 | 154 | if ( !next ) 155 | fail ( "Could not allocate memory\n" ); 156 | 157 | next->next = ( struct ac_state ** ) malloc ( alphabet * sizeof ( struct ac_state * ) ); 158 | 159 | next->id = g->idcounter++; 160 | next->output = NULL; 161 | 162 | state_transition[state->id * alphabet + *( string + j )] = next->id; 163 | //printf("setting %i to %i\n", state->id * alphabet + *( string + j ), next->id); 164 | 165 | //printf("Created link from state %i to %i for character %i (j = %i)\n", state->id, next->id, *( string + j ), j ); 166 | 167 | //Set all alphabet bytes of the next node's->next to 0 168 | //This is the _extended_ Aho-Corasick algorithm. A complete automaton is used where all states 169 | //have an outgoing transition for every alphabet character of the alphabet 170 | memset ( next->next, 0, alphabet * sizeof ( struct ac_state * ) ); 171 | 172 | state->next[*( string + j )] = next; 173 | state = next; 174 | 175 | //printf("character %c state: %i\n", *( string + j ), state->id); 176 | j++; 177 | } 178 | } 179 | 180 | //printf(" Currently at state %i\n", state->id); 181 | 182 | //After finishing with the previous characters of the keyword, add the terminal state if it does not exist 183 | if ( !state->output ) { 184 | 185 | //printf(" For pattern %i added the terminal state %i of %i\n", i, state->id, g->patterncounter); 186 | state_final[state->id] = 1; 187 | 188 | //allocate memory and copy *string to state->output 189 | state->output = ( unsigned char * ) malloc ( sizeof ( unsigned char ) * m ); 190 | memcpy ( state->output, string, m ); 191 | 192 | state->keywordline = g->patterncounter; 193 | 194 | g->patterncounter++; 195 | } 196 | } 197 | 198 | unsigned int search_ac ( unsigned char *text, int n, struct ac_table *table ) { 199 | 200 | struct ac_state *head = table->zerostate; 201 | struct ac_state *r, *s; 202 | 203 | int column, matches = 0; 204 | 205 | r = head; 206 | 207 | for ( column = 0; column < n; column++ ) { 208 | 209 | while ( ( s = r->next[*( text + column ) ] ) == NULL ) 210 | r = r->fail; 211 | r = s; 212 | 213 | //printf("column %i r->id = %i\n", column, r->id); 214 | 215 | if ( r->output != NULL ) { 216 | matches++; 217 | //printf("match at %i for r %i\n", column, r->id); 218 | } 219 | } 220 | 221 | return matches; 222 | } 223 | 224 | struct ac_table *preproc_ac ( unsigned char **pattern, int m, int p_size, int alphabet, int *state_transition, unsigned int *state_supply, unsigned int *state_final ) { 225 | 226 | unsigned int i; 227 | 228 | struct ac_table *table; 229 | 230 | // allocate memory for the table 231 | 232 | table = malloc ( sizeof ( struct ac_table ) ); 233 | 234 | if ( !table ) 235 | fail ( "Could not initialize table\n" ); 236 | 237 | ac_init ( table, alphabet, state_transition ); 238 | 239 | for ( i = 0; i < p_size; i++ ) 240 | ac_addstring ( table, i, pattern[i], m, alphabet, state_transition, state_final ); 241 | 242 | ac_maketree ( table, alphabet, state_supply ); 243 | 244 | return table; 245 | } 246 | 247 | void free_ac ( struct ac_table *table, int alphabet ) { 248 | 249 | ac_destroy ( table, alphabet ); 250 | 251 | free ( table ); 252 | } 253 | -------------------------------------------------------------------------------- /ac/list.h: -------------------------------------------------------------------------------- 1 | // list.[ch] 2 | // a doubly linked list 3 | // 4 | // (c) 2005, Willem de Bruijn, Vrije Universiteit Amsterdam 5 | // email at willem -_at_- computer.org 6 | // 7 | // BSD license applies 8 | 9 | #ifndef WDB_SLIST_H 10 | #define WDB_SLIST_H 11 | 12 | #ifdef __KERNEL__ 13 | #include 14 | #include 15 | #include 16 | #else 17 | #include 18 | #include 19 | #include 20 | #endif 21 | 22 | 23 | struct list { 24 | void *id; 25 | struct list *next; 26 | struct list *prev; 27 | }; 28 | 29 | static inline struct list * list_create(void *id) 30 | { 31 | struct list * new; 32 | 33 | new = malloc (sizeof(struct list)); 34 | if (!new) 35 | return NULL; 36 | new->id = id; 37 | new->next = NULL; 38 | new->prev = NULL; 39 | 40 | return new; 41 | } 42 | 43 | static inline struct list * list_insert(struct list *start, void *id) 44 | { 45 | struct list *new = list_create(id); 46 | 47 | if (!new) 48 | return NULL; 49 | 50 | if (start){ 51 | new->next = start; 52 | start->prev = new; 53 | } 54 | return new; 55 | } 56 | 57 | static inline struct list * list_append(struct list *start, void *id) 58 | { 59 | struct list *new = list_create(id); 60 | struct list* cur; 61 | 62 | if (!new) 63 | return NULL; 64 | 65 | if (!start) 66 | return new; 67 | 68 | cur = start; 69 | while (cur->next) 70 | cur = cur->next; 71 | cur->next = new; 72 | new->prev = cur; 73 | return start; 74 | } 75 | 76 | /** strange function for a list 77 | * used only for duplicate removal 78 | * 79 | * note that the function returns NULL in two 80 | * distinct cases: no 'start', or 'start' is the only item 81 | */ 82 | static inline struct list * list_pop(struct list *start) 83 | { 84 | struct list *tmp; 85 | 86 | if (!start) 87 | return NULL; 88 | 89 | tmp = start; 90 | start = start->next; 91 | free(tmp); 92 | 93 | return start; 94 | } 95 | 96 | static inline struct list * list_invert(struct list *start) 97 | { 98 | struct list *cur, *tmp=NULL; 99 | 100 | if (!start->next) 101 | return start; 102 | 103 | cur = start; 104 | // swap {prev,next} pointers 105 | while (cur){ 106 | tmp = cur->next; 107 | cur->next = cur->prev; 108 | cur->prev = tmp; 109 | tmp = cur; 110 | cur = cur->prev; 111 | } 112 | 113 | return tmp; 114 | } 115 | 116 | // return the item in the list that matches the id 117 | static inline struct list * list_exists(struct list *start, void * id) 118 | { 119 | struct list *cur; 120 | 121 | if (!start) 122 | return NULL; 123 | 124 | // find our spot in the list 125 | cur = start; 126 | while (cur && cur->id != id) 127 | cur = cur->next; 128 | if (!cur) 129 | return NULL; 130 | else 131 | return cur; 132 | } 133 | 134 | /** unlink an item. can be used together with list_foreach */ 135 | static inline struct list * list_unlink(struct list *cur) 136 | { 137 | struct list *tmp = NULL; 138 | 139 | if (cur->next){ 140 | cur->next->prev = cur->prev; 141 | tmp = cur->next; 142 | } 143 | if (cur->prev){ 144 | cur->prev->next = cur->next; 145 | tmp = cur->prev; 146 | } 147 | 148 | if (!tmp) 149 | return NULL; // no cur->next && no cur->prev ? then it's an empty list 150 | 151 | while (tmp->prev) 152 | tmp = tmp->prev; 153 | return tmp; // return the new startnode 154 | } 155 | 156 | /** remove id if it exists. returns start of the list */ 157 | static inline struct list * list_remove(struct list *cur) 158 | { 159 | struct list * elem = list_unlink(cur); 160 | free(cur); 161 | return elem; 162 | } 163 | 164 | static inline struct list * list_remove_id(struct list * list, void * id) 165 | { 166 | struct list * elem = list_exists(list, id); 167 | if (elem) 168 | return list_remove(elem); 169 | return list; 170 | } 171 | 172 | struct list * list_insert_sorted(struct list *start, void *id); 173 | 174 | static inline int list_len(struct list *list) 175 | { 176 | int i=0; 177 | while (list){ 178 | i++; 179 | list = list->next; 180 | } 181 | return i; 182 | } 183 | 184 | #define list_foreach(list, cur) \ 185 | for (cur = list; cur; cur = cur->next) 186 | 187 | #define list_destroy(deadlist) \ 188 | while (deadlist) deadlist = list_pop(deadlist) 189 | 190 | #endif /* WDB_SLIST_H */ 191 | 192 | -------------------------------------------------------------------------------- /bm/bm.c: -------------------------------------------------------------------------------- 1 | /*This file is part of "A Hybrid Parallel Implementation of the Aho-Corasick and Wu-Manber Algorithms Using NVIDIA CUDA and MPI Evaluated on a Biological Sequence Database". 2 | 3 | "A Hybrid Parallel Implementation of the Aho-Corasick and Wu-Manber Algorithms Using NVIDIA CUDA and MPI Evaluated on a Biological Sequence Database" is free software: you can redistribute it and/or modify 4 | it under the terms of the GNU General Public License as published by 5 | the Free Software Foundation, either version 3 of the License, or 6 | (at your option) any later version. 7 | 8 | "A Hybrid Parallel Implementation of the Aho-Corasick and Wu-Manber Algorithms Using NVIDIA CUDA and MPI Evaluated on a Biological Sequence Database" is distributed in the hope that it will be useful, 9 | but WITHOUT ANY WARRANTY; without even the implied warranty of 10 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 11 | GNU General Public License for more details. 12 | 13 | You should have received a copy of the GNU General Public License 14 | along with "A Hybrid Parallel Implementation of the Aho-Corasick and Wu-Manber Algorithms Using NVIDIA CUDA and MPI Evaluated on a Biological Sequence Database". If not, see .*/ 15 | 16 | #include "../smatcher.h" 17 | 18 | //Bad character shift 19 | void preBmBc ( unsigned char **pattern, int m, int p_size, int alphabet, int *bmBc ) { 20 | 21 | unsigned int i, j; 22 | 23 | for (i = 0; i < alphabet; ++i) 24 | bmBc[i] = m; 25 | 26 | for ( j = 0; j < p_size; j++ ) 27 | for (i = 0; i < m - 1; ++i) 28 | bmBc[pattern[j][i]] = MIN ( m - i - 1, bmBc[pattern[j][i]]); 29 | } 30 | /* 31 | void suffixes ( unsigned char *x, int m, int *suff ) { 32 | 33 | int f, g, i; 34 | 35 | suff[m - 1] = m; 36 | 37 | printf("suff[%i] = %i\n", m - 1, suff[m - 1]); 38 | 39 | g = m - 1; 40 | 41 | for ( i = m - 2; i >= 0; --i ) { 42 | 43 | //printf("i = %i |>| g = %i AND suff[%i] = %i |<| %i\n", i, g, i + m - 1 - f, suff[i + m - 1 - f], i - g); 44 | 45 | if ( i > g && suff[i + m - 1 - f] < i - g ) { 46 | suff[i] = suff[i + m - 1 - f]; 47 | 48 | printf("suff[%i] = suff[%i] = %i\n", i, i + m - 1 - f, suff[i]); 49 | } 50 | 51 | else { 52 | if ( i < g ) 53 | g = i; 54 | 55 | f = i; 56 | 57 | while (g >= 0 && x[g] == x[g + m - 1 - f]) 58 | --g; 59 | 60 | suff[i] = f - g; 61 | 62 | printf("suff[%i] = %i\n", i, suff[i]); 63 | } 64 | } 65 | } 66 | 67 | //Good suffix shift 68 | void preBmGs( unsigned char **pattern, int m, int bmGs[] ) { 69 | 70 | int i, j, suff[m]; 71 | 72 | //suffixes( pattern, m, suff ); 73 | 74 | suffixes("AACAA", m, suff ); 75 | 76 | printf("\n"); 77 | 78 | for ( i = 0; i < m; ++i ) 79 | bmGs[i] = m; 80 | 81 | j = 0; 82 | 83 | for ( i = m - 1; i >= 0; --i ) 84 | if ( suff[i] == i + 1 ) 85 | for ( ; j < m - 1 - i; ++j ) 86 | if ( bmGs[j] == m ) 87 | bmGs[j] = m - 1 - i; 88 | 89 | for ( i = 0; i < m; i++ ) 90 | printf("bmGs[%i] = %i\n", i, bmGs[i]); 91 | printf("\n"); 92 | 93 | for (i = 0; i <= m - 2; ++i) 94 | bmGs[m - 1 - suff[i]] = m - 1 - i; 95 | 96 | for ( i = 0; i < m; i++ ) 97 | printf("bmGs[%i] = %i\n", i, bmGs[i]); 98 | 99 | exit(0); 100 | } 101 | */ 102 | -------------------------------------------------------------------------------- /cuda/cuPrintf.cuh: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 1993-2010 NVIDIA Corporation. All rights reserved. 3 | * 4 | * Please refer to the NVIDIA end user license agreement (EULA) associated 5 | * with this source code for terms and conditions that govern your use of 6 | * this software. Any use, reproduction, disclosure, or distribution of 7 | * this software and related documentation outside the terms of the EULA 8 | * is strictly prohibited. 9 | * 10 | */ 11 | 12 | #ifndef CUPRINTF_H 13 | #define CUPRINTF_H 14 | 15 | /* 16 | * This is the header file supporting cuPrintf.cu and defining both 17 | * the host and device-side interfaces. See that file for some more 18 | * explanation and sample use code. See also below for details of the 19 | * host-side interfaces. 20 | * 21 | * Quick sample code: 22 | * 23 | #include "cuPrintf.cu" 24 | 25 | __global__ void testKernel(int val) 26 | { 27 | cuPrintf("Value is: %d\n", val); 28 | } 29 | 30 | int main() 31 | { 32 | cudaPrintfInit(); 33 | testKernel<<< 2, 3 >>>(10); 34 | cudaPrintfDisplay(stdout, true); 35 | cudaPrintfEnd(); 36 | return 0; 37 | } 38 | */ 39 | 40 | /////////////////////////////////////////////////////////////////////////////// 41 | // DEVICE SIDE 42 | // External function definitions for device-side code 43 | 44 | // Abuse of templates to simulate varargs 45 | __device__ int cuPrintf(const char *fmt); 46 | template __device__ int cuPrintf(const char *fmt, T1 arg1); 47 | template __device__ int cuPrintf(const char *fmt, T1 arg1, T2 arg2); 48 | template __device__ int cuPrintf(const char *fmt, T1 arg1, T2 arg2, T3 arg3); 49 | template __device__ int cuPrintf(const char *fmt, T1 arg1, T2 arg2, T3 arg3, T4 arg4); 50 | template __device__ int cuPrintf(const char *fmt, T1 arg1, T2 arg2, T3 arg3, T4 arg4, T5 arg5); 51 | template __device__ int cuPrintf(const char *fmt, T1 arg1, T2 arg2, T3 arg3, T4 arg4, T5 arg5, T6 arg6); 52 | template __device__ int cuPrintf(const char *fmt, T1 arg1, T2 arg2, T3 arg3, T4 arg4, T5 arg5, T6 arg6, T7 arg7); 53 | template __device__ int cuPrintf(const char *fmt, T1 arg1, T2 arg2, T3 arg3, T4 arg4, T5 arg5, T6 arg6, T7 arg7, T8 arg8); 54 | template __device__ int cuPrintf(const char *fmt, T1 arg1, T2 arg2, T3 arg3, T4 arg4, T5 arg5, T6 arg6, T7 arg7, T8 arg8, T9 arg9); 55 | template __device__ int cuPrintf(const char *fmt, T1 arg1, T2 arg2, T3 arg3, T4 arg4, T5 arg5, T6 arg6, T7 arg7, T8 arg8, T9 arg9, T10 arg10); 56 | 57 | 58 | // 59 | // cuPrintfRestrict 60 | // 61 | // Called to restrict output to a given thread/block. Pass 62 | // the constant CUPRINTF_UNRESTRICTED to unrestrict output 63 | // for thread/block IDs. Note you can therefore allow 64 | // "all printfs from block 3" or "printfs from thread 2 65 | // on all blocks", or "printfs only from block 1, thread 5". 66 | // 67 | // Arguments: 68 | // threadid - Thread ID to allow printfs from 69 | // blockid - Block ID to allow printfs from 70 | // 71 | // NOTE: Restrictions last between invocations of 72 | // kernels unless cudaPrintfInit() is called again. 73 | // 74 | #define CUPRINTF_UNRESTRICTED -1 75 | __device__ void cuPrintfRestrict(int threadid, int blockid); 76 | 77 | 78 | 79 | /////////////////////////////////////////////////////////////////////////////// 80 | // HOST SIDE 81 | // External function definitions for host-side code 82 | 83 | // 84 | // cudaPrintfInit 85 | // 86 | // Call this once to initialise the printf system. If the output 87 | // file or buffer size needs to be changed, call cudaPrintfEnd() 88 | // before re-calling cudaPrintfInit(). 89 | // 90 | // The default size for the buffer is 1 megabyte. For CUDA 91 | // architecture 1.1 and above, the buffer is filled linearly and 92 | // is completely used; however for architecture 1.0, the buffer 93 | // is divided into as many segments are there are threads, even 94 | // if some threads do not call cuPrintf(). 95 | // 96 | // Arguments: 97 | // bufferLen - Length, in bytes, of total space to reserve 98 | // (in device global memory) for output. 99 | // 100 | // Returns: 101 | // cudaSuccess if all is well. 102 | // 103 | extern "C" cudaError_t cudaPrintfInit(size_t bufferLen=1048576); // 1-meg - that's enough for 4096 printfs by all threads put together 104 | 105 | // 106 | // cudaPrintfEnd 107 | // 108 | // Cleans up all memories allocated by cudaPrintfInit(). 109 | // Call this at exit, or before calling cudaPrintfInit() again. 110 | // 111 | extern "C" void cudaPrintfEnd(); 112 | 113 | // 114 | // cudaPrintfDisplay 115 | // 116 | // Dumps the contents of the output buffer to the specified 117 | // file pointer. If the output pointer is not specified, 118 | // the default "stdout" is used. 119 | // 120 | // Arguments: 121 | // outputFP - A file pointer to an output stream. 122 | // showThreadID - If "true", output strings are prefixed 123 | // by "[blockid, threadid] " at output. 124 | // 125 | // Returns: 126 | // cudaSuccess if all is well. 127 | // 128 | extern "C" cudaError_t cudaPrintfDisplay(void *outputFP=NULL, bool showThreadID=false); 129 | 130 | #endif // CUPRINTF_H 131 | -------------------------------------------------------------------------------- /cuda/cuda.h: -------------------------------------------------------------------------------- 1 | /*This file is part of "A Hybrid Parallel Implementation of the Aho-Corasick and Wu-Manber Algorithms Using NVIDIA CUDA and MPI Evaluated on a Biological Sequence Database". 2 | 3 | "A Hybrid Parallel Implementation of the Aho-Corasick and Wu-Manber Algorithms Using NVIDIA CUDA and MPI Evaluated on a Biological Sequence Database" is free software: you can redistribute it and/or modify 4 | it under the terms of the GNU General Public License as published by 5 | the Free Software Foundation, either version 3 of the License, or 6 | (at your option) any later version. 7 | 8 | "A Hybrid Parallel Implementation of the Aho-Corasick and Wu-Manber Algorithms Using NVIDIA CUDA and MPI Evaluated on a Biological Sequence Database" is distributed in the hope that it will be useful, 9 | but WITHOUT ANY WARRANTY; without even the implied warranty of 10 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 11 | GNU General Public License for more details. 12 | 13 | You should have received a copy of the GNU General Public License 14 | along with "A Hybrid Parallel Implementation of the Aho-Corasick and Wu-Manber Algorithms Using NVIDIA CUDA and MPI Evaluated on a Biological Sequence Database". If not, see .*/ 15 | 16 | #ifndef CUDA_H 17 | #define CUDA_H 18 | 19 | #include 20 | #include 21 | #include 22 | #include 23 | 24 | #define MAX(a,b) (a>b)?a:b 25 | 26 | static void checkCUDAError(const char *msg) { 27 | 28 | cudaError_t err = cudaGetLastError(); 29 | 30 | if (cudaSuccess != err) { 31 | fprintf(stderr, "Cuda error: %s: %s.\n", msg, cudaGetErrorString(err)); 32 | exit(EXIT_FAILURE); 33 | } 34 | } 35 | 36 | // This will output the proper CUDA error strings in the event that a CUDA host call returns an error 37 | #define checkCudaErrors(err) __checkCudaErrors (err, __FILE__, __LINE__) 38 | 39 | inline static void __checkCudaErrors(cudaError err, const char *file, 40 | const int line) { 41 | 42 | if (cudaSuccess != err) { 43 | fprintf(stderr, "%s(%i) : CUDA Runtime API error %d: %s.\n", file, line, 44 | (int) err, cudaGetErrorString(err)); 45 | exit(-1); 46 | } 47 | } 48 | 49 | #endif 50 | -------------------------------------------------------------------------------- /deviceQuery.txt: -------------------------------------------------------------------------------- 1 | /home/cuda/NVIDIA_GPU_Computing_SDK/C/bin/linux/release/deviceQuery Starting... 2 | 3 | CUDA Device Query (Runtime API) version (CUDART static linking) 4 | 5 | Found 1 CUDA Capable device(s) 6 | 7 | Device 0: "GeForce GTX 280" 8 | CUDA Driver Version / Runtime Version 4.2 / 4.2 9 | CUDA Capability Major/Minor version number: 1.3 10 | Total amount of global memory: 1023 MBytes (1073020928 bytes) 11 | (30) Multiprocessors x ( 8) CUDA Cores/MP: 240 CUDA Cores 12 | GPU Clock rate: 1296 MHz (1.30 GHz) 13 | Memory Clock rate: 1107 Mhz 14 | Memory Bus Width: 512-bit 15 | Max Texture Dimension Size (x,y,z) 1D=(8192), 2D=(65536,32768), 3D=(2048,2048,2048) 16 | Max Layered Texture Size (dim) x layers 1D=(8192) x 512, 2D=(8192,8192) x 512 17 | Total amount of constant memory: 65536 bytes 18 | Total amount of shared memory per block: 16384 bytes 19 | Total number of registers available per block: 16384 20 | Warp size: 32 21 | Maximum number of threads per multiprocessor: 1024 22 | Maximum number of threads per block: 512 23 | Maximum sizes of each dimension of a block: 512 x 512 x 64 24 | Maximum sizes of each dimension of a grid: 65535 x 65535 x 1 25 | Maximum memory pitch: 2147483647 bytes 26 | Texture alignment: 256 bytes 27 | Concurrent copy and execution: Yes with 1 copy engine(s) 28 | Run time limit on kernels: Yes 29 | Integrated GPU sharing Host Memory: No 30 | Support host page-locked memory mapping: Yes 31 | Concurrent kernel execution: No 32 | Alignment requirement for Surfaces: Yes 33 | Device has ECC support enabled: No 34 | Device is using TCC driver mode: No 35 | Device supports Unified Addressing (UVA): No 36 | Device PCI Bus ID / PCI location ID: 1 / 0 37 | Compute Mode: 38 | < Default (multiple host threads can use ::cudaSetDevice() with device simultaneously) > 39 | 40 | deviceQuery, CUDA Driver = CUDART, CUDA Driver Version = 4.2, CUDA Runtime Version = 4.2, NumDevs = 1, Device = GeForce GTX 280 41 | -------------------------------------------------------------------------------- /execute.sh: -------------------------------------------------------------------------------- 1 | make 2 | 3 | if [ $? -ne 0 ] 4 | then 5 | exit 1 6 | fi 7 | 8 | #8000 so that the arrays can fit inside the texture memory ( 8 * 8000 = 64000 < 65000 ) 9 | for j in 1000 8000 10 | do 11 | 12 | echo "$j" 13 | 14 | for i in sog 15 | do 16 | ./smatcher $i -m 8 -p_size $j -n 3999744 -alphabet 2 17 | done 18 | 19 | echo "" 20 | 21 | for i in sog 22 | do 23 | ./smatcher $i -m 8 -p_size $j -n 4628736 -alphabet 4 24 | done 25 | 26 | echo "" 27 | 28 | for i in sog 29 | do 30 | ./smatcher $i -m 8 -p_size $j -n 116234496 -alphabet 4 31 | done 32 | 33 | echo "" 34 | 35 | for i in sog 36 | do 37 | ./smatcher $i -m 8 -p_size $j -n 177649920 -alphabet 20 38 | done 39 | 40 | echo "" 41 | 42 | for i in sog 43 | do 44 | ./smatcher $i -m 8 -p_size $j -n 10821888 -alphabet 20 45 | done 46 | 47 | echo "" 48 | 49 | for i in sog 50 | do 51 | ./smatcher $i -m 8 -p_size $j -n 1903104 -alphabet 128 52 | done 53 | 54 | echo "" 55 | 56 | done 57 | -------------------------------------------------------------------------------- /kmp/kmp.c: -------------------------------------------------------------------------------- 1 | /*This file is part of "A Hybrid Parallel Implementation of the Aho-Corasick and Wu-Manber Algorithms Using NVIDIA CUDA and MPI Evaluated on a Biological Sequence Database". 2 | 3 | "A Hybrid Parallel Implementation of the Aho-Corasick and Wu-Manber Algorithms Using NVIDIA CUDA and MPI Evaluated on a Biological Sequence Database" is free software: you can redistribute it and/or modify 4 | it under the terms of the GNU General Public License as published by 5 | the Free Software Foundation, either version 3 of the License, or 6 | (at your option) any later version. 7 | 8 | "A Hybrid Parallel Implementation of the Aho-Corasick and Wu-Manber Algorithms Using NVIDIA CUDA and MPI Evaluated on a Biological Sequence Database" is distributed in the hope that it will be useful, 9 | but WITHOUT ANY WARRANTY; without even the implied warranty of 10 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 11 | GNU General Public License for more details. 12 | 13 | You should have received a copy of the GNU General Public License 14 | along with "A Hybrid Parallel Implementation of the Aho-Corasick and Wu-Manber Algorithms Using NVIDIA CUDA and MPI Evaluated on a Biological Sequence Database". If not, see .*/ 15 | 16 | #include "../smatcher.h" 17 | /* 18 | //Create the first node of the list 19 | struct node* setup_head ( char label ) { 20 | 21 | struct node* newState = malloc ( sizeof ( struct node ) ); 22 | 23 | newState->label = label; 24 | newState->id = 0; 25 | newState->supply = NULL; 26 | newState->next = NULL; 27 | 28 | return newState; 29 | } 30 | 31 | //append a node and visit it 32 | void append_node ( struct node** lastState, char label, int id ) { 33 | 34 | struct node* newState = malloc ( sizeof ( struct node ) ); 35 | 36 | newState->label = label; 37 | newState->id = id; 38 | newState->supply = NULL; 39 | newState->next = NULL; 40 | 41 | //the ->next of the last state in the list will now point to this state 42 | (*lastState)->next = newState; 43 | 44 | //The current node in the list will now be the last node we created 45 | *lastState = newState; 46 | } 47 | 48 | void free_kmp ( struct node* state ) { 49 | 50 | struct node* tmp; 51 | 52 | while ( 1 ) { 53 | 54 | tmp = state; 55 | state = state->next; 56 | 57 | free ( tmp ); 58 | 59 | if ( state == NULL ) 60 | break; 61 | } 62 | } 63 | 64 | //create the supply link for a node 65 | void addSupply ( struct node* head, int current, struct node* supply ) { 66 | 67 | int i; 68 | 69 | struct node* currentState = head; 70 | 71 | for ( i = 0; i < current; i++ ) 72 | currentState = currentState->next; 73 | 74 | currentState->supply = supply; 75 | 76 | } 77 | 78 | struct node* preKmpList ( struct node* head, unsigned int *pattern, int m ) { 79 | 80 | int i = 0, k; 81 | 82 | struct node* currentState = head; 83 | 84 | struct node* j = NULL; 85 | 86 | for ( k = 1; k <= m; k++ ) 87 | append_node( ¤tState, pattern[k], k ); 88 | 89 | while (i < m) { 90 | 91 | while ( j != NULL && pattern[i] != j->label) 92 | j = j->supply; 93 | 94 | i++; 95 | 96 | if ( j == NULL ) 97 | j = head; 98 | else 99 | j = j->next; 100 | 101 | if ( i < m && pattern[i] == j->label ) 102 | addSupply( head, i, j->supply ); 103 | else 104 | addSupply( head, i, j ); 105 | } 106 | 107 | return head; 108 | } 109 | 110 | unsigned int searchList ( struct node* head, unsigned int *pattern, int m, unsigned int *text, int n ) { 111 | 112 | int i = 0; 113 | 114 | struct node* j = head; 115 | 116 | while (i < n) { 117 | 118 | //mismatch occurs 119 | while ( j != NULL && j->label != text[i] ) 120 | j = j->supply; 121 | 122 | i++; 123 | 124 | if ( j == NULL ) 125 | j = head; 126 | else 127 | j = j->next; 128 | 129 | if ( j->id >= m ) { 130 | return ( i - j->id ); 131 | 132 | printf("->%i\n", i - j->id); 133 | 134 | j = j->supply; 135 | } 136 | } 137 | } 138 | */ 139 | 140 | void preKmp ( int *next, unsigned char *p, int m ) { 141 | 142 | int i=0; 143 | int j=-1; 144 | next[0] = -1; 145 | 146 | while (i < m) { 147 | 148 | while ( j >= 0 && p[i]!=p[j] ) 149 | j = next[j]; 150 | 151 | i++; j++; 152 | 153 | if ( i < m && p[i] == p[j] ) 154 | next[i] = next[j]; 155 | else 156 | next[i] = j; 157 | } 158 | } 159 | /* 160 | void search ( int *next, unsigned char *pattern, int m, unsigned char *text, int n ) { 161 | 162 | int i = 0; 163 | int j = 0; 164 | 165 | while (i < n) { 166 | 167 | //mismatch occurs 168 | while (j >= 0 && pattern[j] != text[i]) 169 | j = next[j]; 170 | 171 | i++; 172 | j++; 173 | 174 | if (j >= m) { 175 | //printf("->%i\n", i - j); 176 | j = next[j]; 177 | } 178 | } 179 | } 180 | 181 | int main ( void ) { 182 | 183 | int i; 184 | 185 | int m = 8; 186 | unsigned char *pattern = (unsigned char *)"AACGTAAC"; 187 | 188 | int n = 12; 189 | unsigned char *text = (unsigned char *)"TAATAACGTAAC"; 190 | 191 | preKmp( pattern, m ); 192 | 193 | search( pattern, m, text, n ); 194 | 195 | for ( i = 0; i < m; i++ ) 196 | printf("%i\n", next[i]); 197 | 198 | printf("\n"); 199 | 200 | struct node* head = setup_head( pattern[0] ); 201 | 202 | struct node* state = head; 203 | 204 | preKmpList( state, pattern, m ); 205 | 206 | searchList ( state, pattern, m, text, n ); 207 | 208 | while ( 1 ) { 209 | 210 | if ( state->label ) { 211 | if ( state->supply != NULL ) 212 | printf("Node %c points to node %i with a label %c\n", state->label, state->supply->id, state->supply->label); 213 | else 214 | printf("Node %c points to NULL\n", state->label); 215 | } 216 | state = state->next; 217 | 218 | if ( state == NULL ) 219 | break; 220 | } 221 | 222 | free_kmp ( head ); 223 | 224 | return 0; 225 | } 226 | */ 227 | 228 | -------------------------------------------------------------------------------- /profile.sh: -------------------------------------------------------------------------------- 1 | make 2 | 3 | if [ $? -ne 0 ] 4 | then 5 | exit 1 6 | fi 7 | 8 | 9 | #events="gld_32b,gld_64b,gld_128b,gld_incoherent,gld_coherent,branch,warp_serialize" 10 | #events="gld_32b,gld_64b,gld_128b" 11 | 12 | #nvprof --events $events ./smatcher ac -m 8 -p_size 8000 -n 116234496 -alphabet 4 13 | 14 | events="gld_incoherent,gld_coherent,branch,warp_serialize" 15 | 16 | nvprof --events $events ./smatcher sog -m 8 -p_size 1000 -n 116234496 -alphabet 4 17 | 18 | 19 | echo "" 20 | echo "gld_32b: Number of 32 byte global memory load transactions. This increments by 1 for each 32 byte transaction." 21 | echo "" 22 | echo "gld_64b: Number of 64 byte global memory load transactions. This increments by 1 for each 64 byte transaction." 23 | echo "" 24 | echo "gld_128b: Number of 128 byte global memory load transactions. This increments by 1 for each 128 byte transaction." 25 | echo "" 26 | echo "gld_incoherent: Number of non-coalesced global memory loads." 27 | echo "" 28 | echo "gld_coherent: Number of coalesced global memory loads." 29 | echo "" 30 | echo "branch: Number of branches taken by threads executing a kernel. This counter will be incremented by one if at least one thread in a warp takes the branch." 31 | echo "" 32 | echo "warp_serialize: If two addresses of a memory request fall in the same memory bank, there is a bank conflict and the access has to be serialized. This counter gives the number of thread warps that serialize on address conflicts to either shared or constant memory." 33 | 34 | -------------------------------------------------------------------------------- /sbom/sbom.c: -------------------------------------------------------------------------------- 1 | /*This file is part of "A Hybrid Parallel Implementation of the Aho-Corasick and Wu-Manber Algorithms Using NVIDIA CUDA and MPI Evaluated on a Biological Sequence Database". 2 | 3 | "A Hybrid Parallel Implementation of the Aho-Corasick and Wu-Manber Algorithms Using NVIDIA CUDA and MPI Evaluated on a Biological Sequence Database" is free software: you can redistribute it and/or modify 4 | it under the terms of the GNU General Public License as published by 5 | the Free Software Foundation, either version 3 of the License, or 6 | (at your option) any later version. 7 | 8 | "A Hybrid Parallel Implementation of the Aho-Corasick and Wu-Manber Algorithms Using NVIDIA CUDA and MPI Evaluated on a Biological Sequence Database" is distributed in the hope that it will be useful, 9 | but WITHOUT ANY WARRANTY; without even the implied warranty of 10 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 11 | GNU General Public License for more details. 12 | 13 | You should have received a copy of the GNU General Public License 14 | along with "A Hybrid Parallel Implementation of the Aho-Corasick and Wu-Manber Algorithms Using NVIDIA CUDA and MPI Evaluated on a Biological Sequence Database". If not, see .*/ 15 | 16 | #include "../smatcher.h" 17 | 18 | /// initialize the empty-table 19 | void sbom_init ( struct sbom_table *g, int alphabet, int *state_transition ) { 20 | 21 | g->zerostate = NULL; 22 | g->patterncounter = 0; 23 | 24 | //Create the root note 25 | g->zerostate = malloc ( sizeof ( struct sbom_state ) ); 26 | 27 | if ( !g->zerostate ) 28 | fail ( "Could not allocate memory\n" ); 29 | 30 | g->idcounter = 1; 31 | g->zerostate->id = 0; 32 | 33 | g->zerostate->F = NULL; 34 | 35 | //Set Supply(q_0) := fail 36 | g->zerostate->fail = NULL; 37 | 38 | g->zerostate->next = ( struct sbom_state ** ) malloc ( alphabet * sizeof ( struct sbom_state * ) ); 39 | 40 | //Set all alphabet bytes of root node->next to 0 41 | memset ( g->zerostate->next, 0, alphabet * sizeof ( struct sbom_state * ) ); 42 | 43 | //Set all cells of transition table for state 0 to 0 44 | int i; 45 | 46 | for ( i = 0; i < alphabet; i++ ) 47 | state_transition[i] = 0; 48 | } 49 | 50 | // Insert a string to the tree 51 | void sbom_addstring ( struct sbom_table *g, unsigned int i, unsigned char *string, int m, int p_size, int alphabet, int *state_transition, unsigned int *state_final_multi ) { 52 | 53 | struct sbom_state *state, *next = NULL, *k; 54 | int j, done = 0; 55 | 56 | // as long as next already exists follow them 57 | j = m - 1; 58 | state = g->zerostate; 59 | 60 | while ( !done && ( next = state->next[*( string + j )] ) != NULL ) { 61 | 62 | state = next; 63 | 64 | if ( j <= 0 ) 65 | done = 1; 66 | 67 | j--; 68 | } 69 | 70 | // not done yet 71 | if ( !done ) { 72 | 73 | while ( j >= 0 ) { 74 | // Create new state 75 | next = malloc ( sizeof ( struct sbom_state ) ); 76 | 77 | if ( !next ) 78 | fail ( "Could not allocate memory\n" ); 79 | 80 | next->next = ( struct sbom_state ** ) malloc ( alphabet * sizeof ( struct sbom_state * ) ); 81 | 82 | next->id = g->idcounter++; 83 | next->F = NULL; 84 | 85 | state_transition[state->id * alphabet + *( string + j )] = next->id; 86 | 87 | //Store the pointer to the new state in an array so it can be free'ed at the end 88 | pointer_array[next->id - 1] = next; 89 | 90 | //printf("Created link from state %i to %i for character %i (j = %i)\n", state->id, next->id, *( string + j ), j ); 91 | 92 | //Set all alphabet bytes of the next node's->next to 0 93 | //This is the _extended_ Aho-Corasick algorithm. A complete automaton is used where all states 94 | //have an outgoing transition for every alphabet character of the alphabet 95 | memset ( next->next, 0, alphabet * sizeof ( struct sbom_state * ) ); 96 | 97 | state->next[*( string + j )] = next; 98 | 99 | k = state->fail; 100 | 101 | while ( k != NULL && k->next[*( string + j )] == NULL ) { 102 | 103 | k->next[*( string + j )] = next; 104 | 105 | state_transition[k->id * alphabet + *( string + j )] = next->id; 106 | 107 | //printf(" Created additional link from state %i to %i for character %i\n", k->id, next->id, *( string + j ) ); 108 | 109 | k = k->fail; 110 | } 111 | 112 | if ( k != NULL ) 113 | next->fail = k->next[*( string + j )]; 114 | else 115 | next->fail = g->zerostate; 116 | 117 | state = next; 118 | 119 | j--; 120 | } 121 | } 122 | 123 | //printf(" Currently at state %i\n", state->id); 124 | 125 | //After finishing with the previous characters of the keyword, add the terminal state to F(q) 126 | if ( !state->F ) { 127 | 128 | //In the worst case, one state can correspond to all p_size patterns, needing p_size * number_of_terminal_states memory. A number of 200 indices should suffice. 129 | //state->F = ( unsigned int * ) malloc ( sizeof ( unsigned int ) * p_size ); 130 | state->F = ( unsigned int * ) malloc ( sizeof ( unsigned int ) * 200 ); 131 | 132 | if ( !state->F ) 133 | fail ( "Could not allocate memory\n" ); 134 | 135 | state->num = 0; 136 | } 137 | 138 | //Add the row index to the F array 139 | state->F[state->num] = g->patterncounter; 140 | 141 | //printf(" Added pattern %i to F[%i] of state %i\n", g->patterncounter, state->num, state->id); 142 | 143 | //Use state_final_multi[state][0] to store the number of matching patterns, enumerate them in cells state_final_multi[state][1-200] 144 | state_final_multi[state->id * 200] = state->num + 1; 145 | state_final_multi[state->id * 200 + state->num + 1] = g->patterncounter; 146 | 147 | state->num++; 148 | 149 | g->patterncounter++; 150 | } 151 | 152 | unsigned int search_sbom ( unsigned char **pattern, int m, unsigned char *text, int n, struct sbom_table *table ) { 153 | 154 | struct sbom_state *head = table->zerostate; 155 | struct sbom_state *r, *s; 156 | 157 | unsigned int i; 158 | 159 | int column = m - 1, matches = 0, j; 160 | 161 | while ( column < n ) { 162 | 163 | r = head; 164 | j = 0; 165 | 166 | while ( j < m && ( s = r->next[*( text + column - j )] ) != NULL ) { 167 | 168 | //printf("(%i) Going from %i to %i by %i\n", column - j, r->id, s->id, *( text + column - j )); 169 | 170 | r = s; 171 | 172 | j++; 173 | } 174 | 175 | //Verify all patterns in F(q) with the input string 176 | if ( r->F != NULL && r->num > 0 && j == m ) { 177 | 178 | for ( i = 0; i < r->num; i++ ) { 179 | 180 | if ( memcmp ( pattern[r->F[i]], text + column - m + 1, m ) == 0 ) { 181 | matches++; 182 | 183 | //printf("match of %i %i at %i\n", r->id, r->F[i], column); 184 | 185 | break; 186 | } 187 | } 188 | 189 | column++; 190 | } 191 | else 192 | column += MAX ( m - j, 1); 193 | } 194 | 195 | return matches; 196 | } 197 | 198 | struct sbom_table *preproc_sbom ( unsigned char **pattern, int m, int p_size, int alphabet, int *state_transition, unsigned int *state_final_multi ) { 199 | 200 | unsigned int i; 201 | 202 | struct sbom_table *table; 203 | 204 | // allocate memory for the table 205 | 206 | table = malloc ( sizeof ( struct sbom_table ) ); 207 | 208 | if ( !table ) 209 | fail ( "Could not initialize table\n" ); 210 | 211 | sbom_init ( table, alphabet, state_transition ); 212 | 213 | for ( i = 0; i < p_size; i++ ) 214 | sbom_addstring ( table, i, pattern[i], m, p_size, alphabet, state_transition, state_final_multi ); 215 | 216 | return table; 217 | } 218 | 219 | void free_sbom ( struct sbom_table *table, int m ) { 220 | 221 | int i; 222 | 223 | //We know exactly how many states we stored in the pointer_array ( table->idcounter - 1 ) 224 | for ( i = 0; i < table->idcounter - 1; i++ ) 225 | if ( pointer_array[i] ) { 226 | 227 | if ( pointer_array[i]->F ) 228 | free ( pointer_array[i]->F ); 229 | 230 | free ( pointer_array[i]->next ); 231 | free ( pointer_array[i] ); 232 | } 233 | 234 | free ( table->zerostate->next ); 235 | free ( table->zerostate ); 236 | free ( table ); 237 | } 238 | 239 | -------------------------------------------------------------------------------- /sh/sh.c: -------------------------------------------------------------------------------- 1 | /*This file is part of "A Hybrid Parallel Implementation of the Aho-Corasick and Wu-Manber Algorithms Using NVIDIA CUDA and MPI Evaluated on a Biological Sequence Database". 2 | 3 | "A Hybrid Parallel Implementation of the Aho-Corasick and Wu-Manber Algorithms Using NVIDIA CUDA and MPI Evaluated on a Biological Sequence Database" is free software: you can redistribute it and/or modify 4 | it under the terms of the GNU General Public License as published by 5 | the Free Software Foundation, either version 3 of the License, or 6 | (at your option) any later version. 7 | 8 | "A Hybrid Parallel Implementation of the Aho-Corasick and Wu-Manber Algorithms Using NVIDIA CUDA and MPI Evaluated on a Biological Sequence Database" is distributed in the hope that it will be useful, 9 | but WITHOUT ANY WARRANTY; without even the implied warranty of 10 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 11 | GNU General Public License for more details. 12 | 13 | You should have received a copy of the GNU General Public License 14 | along with "A Hybrid Parallel Implementation of the Aho-Corasick and Wu-Manber Algorithms Using NVIDIA CUDA and MPI Evaluated on a Biological Sequence Database". If not, see .*/ 15 | 16 | #include "../smatcher.h" 17 | 18 | /// free an AC table from a given startnode (recursively) 19 | void sh_free ( struct ac_state *state, int alphabet ) { 20 | 21 | int i; 22 | 23 | for ( i = 0; i < alphabet; i++ ) 24 | if ( state->next[i] ) 25 | sh_free ( state->next[i], alphabet ); 26 | 27 | if ( state->output ) 28 | free ( state->output ); 29 | 30 | free ( state->next ); 31 | free ( state ); 32 | } 33 | 34 | /// initialize the empty-table 35 | void sh_init ( struct ac_table *g, int alphabet, int *state_transition ) { 36 | 37 | g->zerostate = NULL; 38 | g->patterncounter = 0; 39 | 40 | //Create the root note 41 | g->zerostate = malloc ( sizeof ( struct ac_state ) ); 42 | 43 | if ( !g->zerostate ) 44 | fail ( "Could not allocate memory\n" ); 45 | 46 | g->idcounter = 1; 47 | g->zerostate->id = 0; 48 | 49 | g->zerostate->output = NULL; 50 | 51 | g->zerostate->next = ( struct ac_state ** ) malloc ( alphabet * sizeof ( struct ac_state * ) ); 52 | 53 | //Set all alphabet bytes of root node->next to 0 54 | memset ( g->zerostate->next, 0, alphabet * sizeof ( struct ac_state * ) ); 55 | 56 | //Set all cells of transition table for state 0 to 0 57 | int i; 58 | 59 | for ( i = 0; i < alphabet; i++ ) 60 | state_transition[i] = 0; 61 | } 62 | 63 | /// free an entire AC table 64 | void sh_destroy ( struct ac_table *in, int alphabet ) { 65 | 66 | int i; 67 | 68 | for ( i = 0; i < alphabet; i++ ) 69 | if ( in->zerostate->next[i] && in->zerostate->next[i]->id > 0 ) { 70 | 71 | //printf("id: %i i: %i\n", in->zerostate->next[i]->id, i); 72 | 73 | sh_free ( in->zerostate->next[i], alphabet ); 74 | in->zerostate->next[i] = NULL; 75 | } 76 | free ( in->zerostate->next ); 77 | free ( in->zerostate ); 78 | } 79 | 80 | // Insert a string to the tree 81 | void sh_addstring ( struct ac_table *g, unsigned int i, unsigned char *string, int m, int alphabet, int *state_transition, unsigned int *state_final ) { 82 | 83 | struct ac_state *state, *next = NULL; 84 | int j, done = 0; 85 | 86 | // as long as next already exists follow them 87 | j = m - 1; 88 | state = g->zerostate; 89 | 90 | while ( !done && ( next = state->next[*( string + j )] ) != NULL ) { 91 | 92 | //printf("id: %i j: %i\n", state->id, j); 93 | 94 | state = next; 95 | 96 | if ( j <= 0 ) 97 | done = 1; 98 | 99 | j--; 100 | 101 | //printf("character %c state: %i\n", *( string + j ), state->id); 102 | } 103 | 104 | // not done yet 105 | if ( !done ) { 106 | while ( j >= 0 ) { 107 | // Create new state 108 | next = malloc ( sizeof ( struct ac_state ) ); 109 | 110 | if ( !next ) 111 | fail ( "Could not allocate memory\n" ); 112 | 113 | next->next = ( struct ac_state ** ) malloc ( alphabet * sizeof ( struct ac_state * ) ); 114 | 115 | next->id = g->idcounter++; 116 | next->output = NULL; 117 | 118 | state_transition[state->id * alphabet + *( string + j )] = next->id; 119 | 120 | //printf("Created link from state %i to %i for character %c (j = %i)\n", state->id, next->id, *( string + j ), j ); 121 | 122 | //Set all alphabet bytes of the next node's->next to 0 123 | //This is the _extended_ Aho-Corasick algorithm. A complete automaton is used where all states 124 | //have an outgoing transition for every alphabet character of the alphabet 125 | memset ( next->next, 0, alphabet * sizeof ( struct ac_state * ) ); 126 | 127 | state->next[*( string + j )] = next; 128 | state = next; 129 | 130 | j--; 131 | } 132 | } 133 | 134 | //After finishing with the previous characters of the keyword, add the terminal state if it does not exist 135 | if ( !state->output ) { 136 | 137 | state_final[state->id] = 1; 138 | 139 | //allocate memory and copy *string to state->output 140 | state->output = ( unsigned char * ) malloc ( sizeof ( unsigned char ) * m ); 141 | memcpy ( state->output, string, m ); 142 | 143 | //printf("Adding output %s to state %i\n", state->output, state->id); 144 | 145 | state->keywordline = g->patterncounter; 146 | 147 | g->patterncounter++; 148 | } 149 | } 150 | 151 | unsigned int search_sh ( int m, unsigned char *text, int n, struct ac_table *table, int *bmBc ) { 152 | 153 | struct ac_state *head = table->zerostate; 154 | struct ac_state *r, *s; 155 | 156 | int column = m - 1, matches = 0, j; 157 | 158 | r = head; 159 | 160 | while ( column < n ) { 161 | 162 | r = head; 163 | j = 0; 164 | 165 | while ( j < m && ( s = r->next[*( text + column - j )] ) != NULL ) { 166 | 167 | r = s; 168 | j++; 169 | } 170 | 171 | if ( r->output != NULL ) 172 | matches++; 173 | 174 | column += bmBc[text[column]]; 175 | } 176 | 177 | return matches; 178 | } 179 | 180 | struct ac_table *preproc_sh ( unsigned char **pattern, int m, int p_size, int alphabet, int *state_transition, unsigned int *state_final ) { 181 | 182 | unsigned int i; 183 | 184 | struct ac_table *table; 185 | 186 | // allocate memory for the table 187 | 188 | table = malloc ( sizeof ( struct ac_table ) ); 189 | 190 | if ( !table ) 191 | fail ( "Could not initialize table\n" ); 192 | 193 | sh_init ( table, alphabet, state_transition ); 194 | 195 | for ( i = 0; i < p_size; i++ ) 196 | sh_addstring ( table, i, pattern[i], m, alphabet, state_transition, state_final ); 197 | 198 | return table; 199 | } 200 | 201 | void free_sh ( struct ac_table *table, int alphabet ) { 202 | 203 | sh_destroy ( table, alphabet ); 204 | 205 | free ( table ); 206 | } 207 | 208 | -------------------------------------------------------------------------------- /sh/support/atomic.h: -------------------------------------------------------------------------------- 1 | /** 2 | * support/atomic.[ch] 3 | * streamline wrapper around atomic operations 4 | * 5 | * (c) 2007, Willem de Bruijn, Vrije Universiteit Amsterdam 6 | * email at wdebruij _-at-_ users DOT sourceforge DOT net 7 | * 8 | * Based on the original SUNRPC implementation as found in GLIBC. 9 | * That version follows an MIT-like license. 10 | * Here LGPL applies. 11 | * */ 12 | 13 | #ifdef __KERNEL__ 14 | #include 15 | #else 16 | 17 | /** this is obviously NOT atomic. 18 | * TODO: fix. at least now we have the calls in place */ 19 | 20 | #define atomic_t int 21 | 22 | #define ATOMIC_INIT(x) (x) 23 | 24 | #define atomic_read(x) (*x) 25 | #define atomic_inc(x) ((*x))++ 26 | #define atomic_dec(x) ((*x)--) 27 | #define atomic_inc_and_test(x) ( ++(*x) ) 28 | 29 | #endif 30 | 31 | -------------------------------------------------------------------------------- /sh/support/bitmap.h: -------------------------------------------------------------------------------- 1 | // bitmap.h 2 | // support for per-bit operations 3 | // 4 | // (c) 2007, Willem de Bruijn, Vrije Universiteit Amsterdam 5 | // email at willem -_at_- computer.org 6 | // 7 | // LGPL license applies 8 | 9 | // set a bitmap; counting starts at 0 10 | static inline void bitmap_set(char *bitmap, unsigned int element) 11 | { 12 | bitmap[element >> 3] |= (1 << (element & 0x7)); 13 | } 14 | 15 | static inline int bitmap_isset(const char *bitmap, unsigned int element) 16 | { 17 | return (bitmap[element >> 3] & (1 << (element & 0x7))) ? 1 : 0; 18 | } 19 | 20 | static inline void bitmap_clear(char *bitmap, unsigned int element) 21 | { 22 | bitmap[element >> 3] &= ~(1 << (element & 0x7)); 23 | } 24 | 25 | -------------------------------------------------------------------------------- /sh/support/dict.c: -------------------------------------------------------------------------------- 1 | // dict.[ch] 2 | // an associative memory 3 | // 4 | // (c) 2005, Willem de Bruijn, Vrije Universiteit Amsterdam 5 | // email at willem -_AT_- computer.org 6 | // 7 | // BSD License applies 8 | 9 | #ifdef __KERNEL__ 10 | #include 11 | #include 12 | #include 13 | #else 14 | #include 15 | #include 16 | #include 17 | #endif 18 | 19 | #include "log.h" 20 | #include "macros.h" 21 | #include "dict.h" 22 | 23 | #define DICT_BYTELEN (sizeof(struct dictionary) * DICT_TABLE_LEN) 24 | 25 | struct dictionary * 26 | dict_create(void) 27 | { 28 | struct dictionary *dict; 29 | 30 | dict = myalloc(DICT_BYTELEN); 31 | memset(dict, 0, DICT_BYTELEN); 32 | return dict; 33 | } 34 | 35 | static struct dictionary * 36 | __dict_find_ex(struct dictionary *dict, const char *key, int n) 37 | { 38 | int i = -1, occur = -1; 39 | 40 | if (!dict) 41 | return NULL; 42 | 43 | while (occur < n) { 44 | if (++i == DICT_TABLE_LEN) 45 | return NULL; 46 | if (!dict[i].klen) 47 | continue; 48 | if (!memcmp(dict[i].key, key, 49 | max(dict[i].klen, (int) strlen(key)))){ 50 | occur++; 51 | } 52 | } 53 | return &dict[i]; 54 | } 55 | 56 | void * 57 | dict_lookup_ex(struct dictionary *dict, const char *key, int n) 58 | { 59 | struct dictionary * elem = __dict_find_ex(dict, key, n); 60 | if (elem) 61 | return elem->value; 62 | return NULL; 63 | } 64 | 65 | void * 66 | dict_lookup(struct dictionary *dict, const char *key) 67 | { 68 | return dict_lookup_ex(dict, key, 0); 69 | } 70 | 71 | char * 72 | dict_rlookup(struct dictionary *dict, void *data) 73 | { 74 | int i = -1; 75 | 76 | while (++i < DICT_TABLE_LEN) { 77 | if (dict[i].value == data) 78 | return dict[i].key; 79 | } 80 | return NULL; 81 | } 82 | 83 | int 84 | dict_insert_dup(struct dictionary *dict, const char *key, void *value) 85 | { 86 | int i = 0; 87 | 88 | // skip used items 89 | while(i < DICT_TABLE_LEN && dict[i].klen) 90 | i++; 91 | if (i == DICT_TABLE_LEN) { 92 | sl_log(LOG_WARN, "exhausted dictionary space"); 93 | return -1; 94 | } 95 | 96 | // fill item 97 | dict[i].klen = strlen(key) + 1; 98 | dict[i].key = myalloc(dict[i].klen); 99 | memcpy(dict[i].key, key, dict[i].klen); 100 | dict[i].value = value; 101 | return i; 102 | } 103 | 104 | int 105 | dict_insert(struct dictionary *dict, const char *key, void *value) 106 | { 107 | if (!key) { 108 | sl_log(LOG_WARN, "dict insert NULL key thwarted"); 109 | return -1; 110 | } 111 | 112 | // there are faster alternatives for duplicate checking 113 | if (dict_lookup(dict, key)){ 114 | sl_log(LOG_WARN, "dictionary collision on %s", key); 115 | sl_log(LOG_MSG, key); 116 | return -1; 117 | } 118 | return dict_insert_dup(dict, key, value); 119 | } 120 | 121 | void 122 | dict_replace(struct dictionary *dict, const char *key, void *value) 123 | { 124 | struct dictionary *item = __dict_find_ex(dict, key, 0); 125 | if (item) 126 | item->value = value; 127 | } 128 | 129 | int 130 | dict_len(struct dictionary *dict) 131 | { 132 | int i, occur = 0; 133 | 134 | for (i = 0; i < DICT_TABLE_LEN; i++) 135 | if (dict[i].klen) 136 | occur++; 137 | 138 | return occur; 139 | } 140 | 141 | void * 142 | dict_getnth(struct dictionary *dict, int n) 143 | { 144 | int i=-1, j=-1; 145 | while (++i < DICT_TABLE_LEN) 146 | if (dict[i].klen && ++j == n) 147 | return dict[i].value; 148 | return NULL; 149 | } 150 | 151 | void 152 | dict_delex(struct dictionary *dict, const char *key, int n) 153 | { 154 | struct dictionary * entry; 155 | 156 | if (!(entry = __dict_find_ex(dict, key, n))) 157 | return; 158 | 159 | myfree(entry->key); 160 | entry->klen = 0; 161 | entry->key = NULL; 162 | entry->value = NULL; 163 | } 164 | 165 | void 166 | dict_del(struct dictionary *dict, const char *key) 167 | { 168 | dict_delex(dict, key, 0); 169 | } 170 | 171 | void 172 | dict_clear(struct dictionary *dict, int free_values) 173 | { 174 | struct dictionary *elem; 175 | int i; 176 | 177 | dict_foreach_elem(dict, i, elem) { 178 | myfree(elem->key); 179 | elem->klen = 0; 180 | if (free_values) 181 | myfree(elem->value); 182 | } 183 | } 184 | 185 | void 186 | dict_destroy(struct dictionary *dict, int free_values) 187 | { 188 | dict_clear(dict, free_values); 189 | myfree(dict); 190 | } 191 | 192 | struct dictionary * 193 | dict_copy(struct dictionary *dict) 194 | { 195 | struct dictionary *new; 196 | int i; 197 | char *key; 198 | void *value; 199 | 200 | if ((new = dict_create())) 201 | dict_foreach(dict, i, key, value) 202 | dict_insert(new, key, value); 203 | 204 | return new; 205 | } 206 | 207 | #ifdef __KERNEL__ 208 | EXPORT_SYMBOL(dict_insert); 209 | EXPORT_SYMBOL(dict_replace); 210 | EXPORT_SYMBOL(dict_lookup); 211 | EXPORT_SYMBOL(dict_rlookup); 212 | EXPORT_SYMBOL(dict_del); 213 | EXPORT_SYMBOL(dict_delex); 214 | EXPORT_SYMBOL(dict_destroy); 215 | #endif 216 | 217 | 218 | -------------------------------------------------------------------------------- /sh/support/dict.h: -------------------------------------------------------------------------------- 1 | // dict.[ch] 2 | // an associative memory 3 | // 4 | // (c) 2005, Willem de Bruijn, Vrije Universiteit Amsterdam 5 | // email at willem -_AT_- computer.org 6 | // 7 | // BSD License applies 8 | 9 | #ifndef WJDB_DICT_H 10 | #define WJDB_DICT_H 11 | 12 | #define DICT_TABLE_LEN 128 ///< #elements 13 | 14 | /** a dictionary maps a key to a value. 15 | * 16 | * we allocate space for keys, but pass foreign pointers directly 17 | * the caller must take care not to free pointers still in the table 18 | * 19 | * index-based functions start counting at 0 */ 20 | 21 | /// a (key,value) pair as used in the table 22 | struct dictionary { 23 | char *key; 24 | int klen; 25 | void *value; 26 | }; 27 | 28 | #define STATIC_DICT(name) static struct dictionary name[DICT_TABLE_LEN]; 29 | 30 | struct dictionary * dict_create(void); 31 | void dict_clear(struct dictionary *dict, int free_values); 32 | void dict_destroy(struct dictionary *dict, int free_values); 33 | 34 | // add / replace / del 35 | int dict_insert(struct dictionary *dict, const char *key, void *value); 36 | int dict_insert_dup(struct dictionary *dict, const char *key, void *value); 37 | void dict_replace(struct dictionary *dict, const char *key, void *value); 38 | void dict_del(struct dictionary *dict, const char *key); 39 | void dict_delex(struct dictionary *dict, const char *key, int n); 40 | struct dictionary * dict_copy(struct dictionary *dict); 41 | 42 | int dict_len(struct dictionary *dict); 43 | 44 | // lookup by key / value / index 45 | void * dict_lookup(struct dictionary *dict, const char *key); 46 | void * dict_lookup_ex(struct dictionary *dict, const char *key, int n); 47 | char * dict_rlookup(struct dictionary *dict, void *data); 48 | void * dict_getnth(struct dictionary *dict, int n); 49 | 50 | /// get the next used entry (for internal use only) 51 | static inline int 52 | __dict_getnext(struct dictionary *dict, int i) 53 | { 54 | while (++i < DICT_TABLE_LEN) 55 | if (dict[i].klen) 56 | return i; 57 | return -1; 58 | } 59 | 60 | /// do something for each filled entry 61 | #define dict_foreach(dict, i, outkey, outval) \ 62 | for ((i) =__dict_getnext(dict, -1); \ 63 | (i) >= 0 && ((outkey) = dict[i].key) && ((outval) = dict[i].value); \ 64 | (i) = __dict_getnext(dict, i)) 65 | 66 | /// retrieve consecutive elements 67 | #define dict_foreach_elem(dict, i, elem) \ 68 | for ((i) = __dict_getnext(dict, -1); \ 69 | (i) >= 0 && ((elem) = &dict[i]) != NULL; \ 70 | (i) = __dict_getnext(dict, i)) 71 | 72 | #endif /* WJDB_DICT_H */ 73 | 74 | -------------------------------------------------------------------------------- /sh/support/endian.h: -------------------------------------------------------------------------------- 1 | // endian.h 2 | // detect and cope with varying endianness 3 | // 4 | // (c) 2007, Willem de Bruijn, Vrije Universiteit Amsterdam 5 | // email at willem -_at_- computer.org 6 | // 7 | // LGPL license applies 8 | 9 | #include "macros.h" 10 | 11 | #define ENDIAN_BIG 0x0 12 | #define ENDIAN_LITTLE 0x1 13 | 14 | __attribute__((pure)) static inline int arch_get_endianness(void) 15 | { 16 | #if defined i386 17 | return ENDIAN_LITTLE; 18 | #elif defined sparc 19 | return ENDIAN_BIG; 20 | #elif defined ppc || defined powerpc 21 | return ENDIAN_BIG; 22 | #elif defined armbe 23 | return ENDIAN_BIG; 24 | #else 25 | int16_t one = 1; 26 | char *cp = (char*)&one; 27 | if ( *cp == 0 ) 28 | return ENDIAN_LITTLE; 29 | return ENDIAN_BIG; 30 | #endif 31 | } 32 | 33 | #if defined i386 || defined x86-64 34 | #define SL_BYTEORDER ENDIAN_LITTLE 35 | #elif defined sparc 36 | #define SL_BYTEORDER ENDIAN_BIG 37 | #elif defined __ARMEB__ 38 | #define SL_BYTEORDER ENDIAN_BIG 39 | #elif defined ppc || defined powerpc 40 | #define SL_BYTEORDER ENDIAN_BIG 41 | #else 42 | #warning "cannot predefine endianness" 43 | #endif 44 | 45 | /// some archs (sun) have 8byte pointers but 4 byte ints, then *(int*) will fail 46 | /// use this as alternative 47 | #define swap16(A) ((((uint16_t)(A) & 0xff00) >> 8) | \ 48 | (((uint16_t)(A) & 0x00ff) << 8)) 49 | #define swap32(A) ((((uint32_t)(A) & 0xff000000) >> 24) | \ 50 | (((uint32_t)(A) & 0x00ff0000) >> 8) | \ 51 | (((uint32_t)(A) & 0x0000ff00) << 8) | \ 52 | (((uint32_t)(A) & 0x000000ff) << 24)) 53 | 54 | // swap on not equal: if endian is unequal to local endianness then swap 55 | static inline uint16_t swap16_ne(uint16_t var, int endian) 56 | { 57 | if (likely(endian == arch_get_endianness())) 58 | return var; 59 | return swap16(var); 60 | } 61 | 62 | -------------------------------------------------------------------------------- /sh/support/filedes.c: -------------------------------------------------------------------------------- 1 | /** 2 | * support/filedes.[ch] 3 | * support incoming signals (such as SIGIO in POSIX userspace) 4 | * 5 | * (c) 2007, Willem de Bruijn, Vrije Universiteit Amsterdam 6 | * email at willem _-at-_ computer DOT org 7 | * 8 | * 3-clause BSD applies 9 | * 10 | * Modified by Tudor Zaharia on Aug. 17 2010 11 | * tudor _at_ microcontroller DOT ro 12 | * - sinchronized access to slrun on each fd 13 | * */ 14 | 15 | #ifdef __KERNEL__ 16 | #else 17 | #include 18 | #include 19 | #include 20 | #include 21 | #include 22 | #include 23 | #include 24 | #include 25 | #include 26 | #endif 27 | 28 | #include 29 | 30 | #include "../support/macros.h" 31 | #include "../support/log.h" 32 | #include "../support/timer.h" 33 | #include "../wrap/file.h" 34 | #include "../wrap/origsocket.h" 35 | #include "../core/datapath.h" 36 | #include "filedes.h" 37 | 38 | struct sighandler { 39 | enum slsig_action action; 40 | union { 41 | struct instance *instance; 42 | void (*callback)(int fd); 43 | } ptr; 44 | int backtrack_fd; /// used to close clientfds after an acceptfd 45 | }; 46 | 47 | #define MAXFD 512 48 | static struct sighandler * handlerlist[MAXFD]; 49 | 50 | // these semaphores are used for synchronizing access to the slrun_slow() 51 | static sem_t sems[MAXFD]; 52 | 53 | /******** implementation-specific support code **********/ 54 | 55 | int fd_setasync(int fd) 56 | { 57 | #if linux 58 | int flags; 59 | 60 | flags = __orig_fcntl(fd, F_GETFL, 0); 61 | if (flags < 0) 62 | goto err; 63 | if (__orig_fcntl(fd, F_SETFL, flags | O_NONBLOCK | O_ASYNC) == -1) 64 | goto err; 65 | if (__orig_fcntl(fd, F_SETOWN, getpid()) == -1) 66 | goto err; 67 | return 0; 68 | err: 69 | sl_log(LOG_WARN, "failed to set fd to async"); 70 | return -1; 71 | #else 72 | sl_log(LOG_WARN, "async IO not supported"); 73 | return -1; 74 | #endif 75 | } 76 | 77 | /******** core functions: handlers and callback **********/ 78 | 79 | /** read out a filedescriptor 80 | * 81 | * because most callbacks will read from a buffer we implemented this 82 | * functionality locally */ 83 | static void sigaction_read(unsigned long sigid, struct instance *instance) 84 | { 85 | #define MAX_LINESZ 1500 86 | char data[MAX_LINESZ]; 87 | int size, total = 0; 88 | int fd = (int) sigid; 89 | 90 | // enter critical section 91 | if ( -1 == sem_wait(&sems[fd]) ) perror("semop error"); 92 | 93 | size = __orig_read(fd, data, MAX_LINESZ); 94 | while (size > 0) { 95 | total += size; 96 | slrun_slow(instance, data, size); 97 | size = __orig_read(fd, data, MAX_LINESZ); 98 | }; 99 | 100 | if (size < 0 && errno != EAGAIN) 101 | perror("read()"); 102 | else if (size == 0) { // EOF 103 | slrun_slow(instance, NULL, 0); 104 | } 105 | 106 | // leave critical section 107 | if ( -1 == sem_post(&sems[fd]) ) perror("semop error"); 108 | } 109 | 110 | static void sigaction_accept(unsigned long sigid, struct instance *instance) 111 | { 112 | int fd = (int) sigid; 113 | int client_fd; 114 | 115 | client_fd = __orig_accept(fd, NULL, NULL); 116 | if (client_fd < 0) { 117 | perror("accept()"); 118 | return; 119 | } 120 | filedes_add(client_fd, instance, SIGH_READ); 121 | handlerlist[client_fd]->backtrack_fd = sigid; 122 | } 123 | 124 | /** call a process2() member */ 125 | static void sigaction_process(unsigned long sigid, struct instance *instance) 126 | { 127 | instance->fdata.func->process2(NULL, NULL, &instance->fdata); 128 | } 129 | 130 | /* Handle a SIGIO signal. 131 | * 132 | * On receiving one of these two signals, this function 133 | * executes a non-blocking select() over all file descriptors 134 | * registered to support/filedes. On return, it executes all 135 | * registered handlers for the descriptors on which data is 136 | * available. 137 | * */ 138 | static void signal_callback(int signal) 139 | { 140 | struct timeval tv = { .tv_sec = 0, .tv_usec = 0}; 141 | fd_set readfds; 142 | int i, highest_fd = -1, total; 143 | 144 | // only handle registered signals 145 | if (signal != SIGIO) 146 | return; 147 | 148 | // add all file descriptors to the listen set 149 | FD_ZERO(&readfds); 150 | for(i = 0; i < MAXFD; i++) { 151 | if (handlerlist[i]) { 152 | FD_SET(i, &readfds); 153 | highest_fd = i; 154 | } 155 | } 156 | 157 | // listen on the descriptor set 158 | total = __orig_select(++highest_fd, &readfds, NULL, NULL, &tv); 159 | if (total < 0) { 160 | if (errno != EINVAL) 161 | dprintf("error in filedes select\n"); 162 | return; 163 | } 164 | 165 | // trigger actions for all descriptors on which data is waiting 166 | for(i = 0; total && i <= highest_fd; i++) { 167 | if (FD_ISSET(i,&readfds)) { 168 | switch (handlerlist[i]->action) { 169 | case SIGH_PROCESS : 170 | sigaction_process(i, handlerlist[i]->ptr.instance); 171 | break; 172 | case SIGH_READ : 173 | sigaction_read(i, handlerlist[i]->ptr.instance); 174 | break; 175 | case SIGH_ACCEPT : 176 | sigaction_accept(i, handlerlist[i]->ptr.instance); 177 | break; 178 | case SIGH_CALLBACK : 179 | handlerlist[i]->ptr.callback(i); 180 | break; 181 | }; 182 | total--; 183 | } 184 | } 185 | } 186 | 187 | /******** bookkeeping **********/ 188 | 189 | // install the SIGIO handler 190 | int filedes_init(void* unused) 191 | { 192 | signal(SIGIO, signal_callback); 193 | return 0; 194 | } 195 | 196 | int filedes_exit(void *unused) 197 | { 198 | signal(SIGIO, SIG_DFL); 199 | return 0; 200 | } 201 | 202 | int filedes_add(int fd, void *ptr, enum slsig_action action) 203 | { 204 | struct sighandler * sigh; 205 | 206 | assert(fd < MAXFD); 207 | if (handlerlist[fd]) 208 | return -1; 209 | 210 | // create the semaphore 211 | if (sem_init(&sems[fd], 0, 1) == -1) 212 | return -1; 213 | 214 | // create the structure 215 | sigh = myalloc(sizeof(struct sighandler)); 216 | sigh->ptr.instance = (struct instance *) ptr; 217 | sigh->backtrack_fd = -1; 218 | sigh->action = action; 219 | 220 | // add it to the list 221 | handlerlist[fd] = sigh; 222 | 223 | // ask the OS to signal us when data arrives on this fd. 224 | fd_setasync(fd); 225 | 226 | // bootstrap first read (for files) 227 | if (action == SIGH_READ || action == SIGH_PROCESS) 228 | signal_callback(SIGIO); 229 | 230 | return 0; 231 | } 232 | 233 | /** close all connections that depend on the parameter */ 234 | static void __signalhandler_deldep(int fd) 235 | { 236 | int i; 237 | 238 | for(i=0; ibacktrack_fd == fd) { 241 | __orig_close(i); 242 | myfree(handlerlist[i]); 243 | handlerlist[i] = NULL; 244 | } 245 | } 246 | 247 | /** remove a connection. may also recursively remove dependent connections */ 248 | int filedes_del(int fd) 249 | { 250 | struct sighandler * sigh; 251 | 252 | assert(fd < MAXFD); 253 | if (!handlerlist[fd]) 254 | return -1; 255 | 256 | sigh = handlerlist[fd]; 257 | handlerlist[fd] = NULL; 258 | 259 | // close all client connections if this is an accept descriptor. 260 | if (sigh->action == SIGH_ACCEPT) 261 | __signalhandler_deldep(fd); 262 | 263 | myfree(sigh); 264 | 265 | // destroy semaphore 266 | sem_destroy(&sems[fd]); 267 | 268 | return 0; 269 | } 270 | 271 | -------------------------------------------------------------------------------- /sh/support/filedes.h: -------------------------------------------------------------------------------- 1 | /** 2 | * support/filedes.[ch] 3 | * support incoming signals (such as SIGIO in POSIX userspace) 4 | * 5 | * (c) 2007, Willem de Bruijn, Vrije Universiteit Amsterdam 6 | * email at willem _-at-_ computer DOT org 7 | * 8 | * 3-clause BSD applies 9 | * */ 10 | 11 | #ifndef SL_SUPPORT_FILEDES_H 12 | #define SL_SUPPORT_FILEDES_H 13 | 14 | enum slsig_action {SIGH_PROCESS=1, SIGH_READ, SIGH_ACCEPT, SIGH_CALLBACK}; 15 | 16 | int filedes_add(int fd, void *ptr, enum slsig_action action); 17 | int filedes_del(int fd); 18 | 19 | int filedes_init(void*); 20 | int filedes_exit(void*); 21 | 22 | #endif 23 | 24 | -------------------------------------------------------------------------------- /sh/support/hashtable.c: -------------------------------------------------------------------------------- 1 | // hashtable.[ch] 2 | // a double-hashed hashtable 3 | // 4 | // (c) 2008, willem de bruijn, vrije universiteit amsterdam 5 | // email at willem -_at_- computer.org 6 | // 7 | // BSD license applies 8 | 9 | #include "hashtable.h" 10 | 11 | /** lookup an element by calling the hashfunction, like in hash_insert. 12 | return the nth match. NB: n starts at 1! */ 13 | int 14 | hash_lookup_by_value(struct hashtable *hash, void * value, int nth) 15 | { 16 | int i=-1, match=0, key=-1; 17 | 18 | check(nth > 0); 19 | while (i < MAX_DOUBLEHASH && match < nth){ 20 | key = hash_calc(value, ++i, HASHTBL_LEN); 21 | if (hash->table[key] == value) 22 | match++; 23 | } 24 | if (match == nth) 25 | return key; 26 | else 27 | return -1; 28 | } 29 | 30 | /** insert an item, we use double hashing for collision resolution */ 31 | int 32 | __hash_insert(struct hashtable *hash, void * value, const char *func) 33 | { 34 | int i=0, index; 35 | 36 | if (!value) 37 | returnbug(-1); 38 | 39 | index = hash_calc(value, i, HASHTBL_LEN); 40 | while (hash->table[index] && i < MAX_DOUBLEHASH) { 41 | index = hash_calc(value, ++i, HASHTBL_LEN); 42 | #ifndef NDEBUG 43 | if (hash->table[index] == value) 44 | dprintf("warning : duplicate hash %d: %p==%ld in %s\n", 45 | index, value, (long) value, func); 46 | #endif 47 | } 48 | 49 | if (i == MAX_DOUBLEHASH) { // give up 50 | dprintf("(BUG) hash full in %s\n", func); 51 | return -1; 52 | } 53 | 54 | hash->table[index] = value; 55 | return index; 56 | } 57 | 58 | -------------------------------------------------------------------------------- /sh/support/hashtable.h: -------------------------------------------------------------------------------- 1 | // hashtable.[ch] 2 | // a double-hashed hashtable 3 | // 4 | // (c) 2005, willem de bruijn, vrije universiteit amsterdam 5 | // email at willem -_at_- computer.org 6 | // 7 | // BSD license applies 8 | 9 | #ifndef WJDB_HASHTABLE 10 | #define WJDB_HASHTABLE 11 | 12 | #include "macros.h" 13 | 14 | #define HASHTBL_LEN 431 // use a prime 15 | #define MAX_DOUBLEHASH 24 // stop searching after so many rounds 16 | struct hashtable { 17 | // TODO: add length field, make default size (much) smaller and enable resizing 18 | void * table[HASHTBL_LEN]; 19 | }; 20 | 21 | #define hash_lookup_fast(htable, key) \ 22 | ((htable)->table[key]) 23 | 24 | // check against overflow. constructed in such a way that 25 | // we can see in what function the overflow occurred. 26 | #ifndef NDEBUG 27 | #define hash_lookup(htable, key) \ 28 | (( ((unsigned long) key) < HASHTBL_LEN) ? \ 29 | hash_lookup_fast(htable, key) : \ 30 | ((void*) (dprintf("BUG: key %d out of bounds in %s\n", \ 31 | key, __FUNCTION__) & 0L))) 32 | 33 | #define hash_insert_at_unconditional(htable, value, key) \ 34 | do {(htable)->table[key] = value;} while (0) 35 | 36 | // insert and check against overwriting. also see hash_lookup 37 | // returns the key, or <0 on error 38 | #define hash_insert_at(htable, value, key) \ 39 | ((!(htable)->table[key]) ? \ 40 | (((htable)->table[key] = value) ? key : -1) : \ 41 | ((dprintf("BUG: key %d in use in %s.%d\n", \ 42 | key, __FUNCTION__, __LINE__) & 0L))) 43 | #else 44 | 45 | #define hash_lookup(htable, key) \ 46 | ((((unsigned long) key) < HASHTBL_LEN) ? \ 47 | hash_lookup_fast(htable, key) : 0) 48 | 49 | // insert and check against overwriting. also see hash_lookup 50 | #define hash_insert_at(htable, value, key) \ 51 | ((!(htable)->table[key]) ? \ 52 | (((htable)->table[key] = value) ? key : -1) : 0) 53 | #endif 54 | 55 | static inline int 56 | hash_calc(void * value, int runno, int maxhash) 57 | { 58 | int h, k, i; 59 | 60 | i = 0; 61 | h = ((unsigned long) value) % maxhash; // primary hash function 62 | k = ((unsigned long) value) % (maxhash - 2); // secondary hash function 63 | 64 | return (h + runno * k) % maxhash; 65 | } 66 | 67 | int hash_lookup_by_value(struct hashtable *hash, void * value, int nth); 68 | 69 | // the __FUNCTION__ helps me locate collision origins 70 | #define hash_insert(a,b) __hash_insert(a,b,__FUNCTION__) 71 | int __hash_insert(struct hashtable *hash, void * value, const char *func); 72 | 73 | static inline int 74 | hash_del(struct hashtable *hash, int key) 75 | { 76 | #ifndef NDEBUG 77 | check(key >= 0 && key < HASHTBL_LEN); 78 | #endif 79 | hash->table[key] = NULL; 80 | return 0; 81 | } 82 | 83 | /** use the hashtable as a simple list */ 84 | static inline 85 | int hash_getnext(struct hashtable *hash, int key) 86 | { 87 | if (key >= HASHTBL_LEN || key < -1) 88 | return -1; 89 | 90 | while (!hash->table[++key]) 91 | if (key == HASHTBL_LEN-1) 92 | return -1; 93 | 94 | return key; 95 | } 96 | 97 | // use an integer for key 98 | #define hash_foreach(table, key, ptr) \ 99 | for(key = hash_getnext(table, -1);\ 100 | key >= 0 && key < HASHTBL_LEN && \ 101 | (((ptr) = hash_lookup_fast(table,key)) != NULL);\ 102 | key = hash_getnext(table,key)) 103 | 104 | // return 0 if the hashtable contains a value, !0 (i.e., true) otherwise 105 | static inline int 106 | hash_empty(struct hashtable *table) 107 | { 108 | int i; 109 | 110 | i = hash_getnext(table, -1); 111 | return (i < 0); 112 | } 113 | 114 | static inline int 115 | hash_len(struct hashtable *table) 116 | { 117 | int i, count=0; 118 | 119 | i = hash_getnext(table,-1); 120 | while(i >= 0){ 121 | count++; 122 | i = hash_getnext(table,i); 123 | } 124 | 125 | return count; 126 | } 127 | 128 | #endif /* WJDB_HASHTABLE */ 129 | 130 | -------------------------------------------------------------------------------- /sh/support/list.c: -------------------------------------------------------------------------------- 1 | // list.[ch] 2 | // a doubly linked list 3 | // 4 | // (c) 2005, Willem de Bruijn, Vrije Universiteit Amsterdam 5 | // email at willem -_at_- computer.org 6 | // 7 | // LGPL license applies 8 | 9 | #include "../support/macros.h" 10 | #include "list.h" 11 | 12 | struct list * list_insert_sorted(struct list *start, void *id) 13 | { 14 | struct list *cur, *new; 15 | 16 | new = list_create(id); 17 | if (!new) 18 | return NULL; 19 | 20 | if (!start) // start of list: update global startnode 21 | return new; 22 | 23 | // find our spot in the list 24 | // exception : test cur (we test cur->next in general) 25 | if (start->id > id){ 26 | start->prev = new; 27 | new->next = start; 28 | return new; 29 | } 30 | if (start->id == id){ 31 | // printf("skipping duplicate : cur=%p\n",new->id); 32 | free(new); 33 | return start; 34 | } 35 | 36 | cur = start; 37 | while (cur->next && cur->next->id < id) 38 | cur = cur->next; 39 | 40 | if (!cur->next){ // end of list: append or place just before the end-node 41 | // printf("inserting (%p) : cur=%p new=%p\n",id,cur ? cur->id : "[ ]",new->id); 42 | cur->next = new; 43 | new->prev = cur; 44 | return start; 45 | } 46 | 47 | if (cur->next->id == id){ // exception : found a duplicate. remove 48 | // printf("skipping duplicate : cur=%p\n",new->id); 49 | free(new); 50 | return start; 51 | } 52 | 53 | 54 | // insert into the sorted list 55 | // printf("inserting (%p) : cur=%p new=%p cur->next=%p\n",id,cur ? cur->id : "[ ]",new->id, cur->next->id); 56 | new->next = cur->next; 57 | new->prev = cur; 58 | cur->next->prev = new; 59 | cur->next = new; 60 | 61 | return start; 62 | } 63 | 64 | -------------------------------------------------------------------------------- /sh/support/list.h: -------------------------------------------------------------------------------- 1 | // list.[ch] 2 | // a doubly linked list 3 | // 4 | // (c) 2005, Willem de Bruijn, Vrije Universiteit Amsterdam 5 | // email at willem -_at_- computer.org 6 | // 7 | // BSD license applies 8 | 9 | #ifndef WDB_SLIST_H 10 | #define WDB_SLIST_H 11 | 12 | #ifdef __KERNEL__ 13 | #include 14 | #include 15 | #include 16 | #else 17 | #include 18 | #include 19 | #include 20 | #endif 21 | 22 | #include "macros.h" 23 | 24 | struct list { 25 | void *id; 26 | struct list *next; 27 | struct list *prev; 28 | }; 29 | 30 | static inline struct list * list_create(void *id) 31 | { 32 | struct list * new; 33 | 34 | new = myalloc (sizeof(struct list)); 35 | if (!new) 36 | return NULL; 37 | new->id = id; 38 | new->next = NULL; 39 | new->prev = NULL; 40 | 41 | return new; 42 | } 43 | 44 | static inline struct list * list_insert(struct list *start, void *id) 45 | { 46 | struct list *new = list_create(id); 47 | 48 | if (!new) 49 | return NULL; 50 | 51 | if (start){ 52 | new->next = start; 53 | start->prev = new; 54 | } 55 | return new; 56 | } 57 | 58 | static inline struct list * list_append(struct list *start, void *id) 59 | { 60 | struct list *new = list_create(id); 61 | struct list* cur; 62 | 63 | if (!new) 64 | return NULL; 65 | 66 | if (!start) 67 | return new; 68 | 69 | cur = start; 70 | while (cur->next) 71 | cur = cur->next; 72 | cur->next = new; 73 | new->prev = cur; 74 | return start; 75 | } 76 | 77 | /** strange function for a list 78 | * used only for duplicate removal 79 | * 80 | * note that the function returns NULL in two 81 | * distinct cases: no 'start', or 'start' is the only item 82 | */ 83 | static inline struct list * list_pop(struct list *start) 84 | { 85 | struct list *tmp; 86 | 87 | if (!start) 88 | return NULL; 89 | 90 | tmp = start; 91 | start = start->next; 92 | myfree(tmp); 93 | 94 | return start; 95 | } 96 | 97 | static inline struct list * list_invert(struct list *start) 98 | { 99 | struct list *cur, *tmp=NULL; 100 | 101 | if (!start->next) 102 | return start; 103 | 104 | cur = start; 105 | // swap {prev,next} pointers 106 | while (cur){ 107 | tmp = cur->next; 108 | cur->next = cur->prev; 109 | cur->prev = tmp; 110 | tmp = cur; 111 | cur = cur->prev; 112 | } 113 | 114 | return tmp; 115 | } 116 | 117 | // return the item in the list that matches the id 118 | static inline struct list * list_exists(struct list *start, void * id) 119 | { 120 | struct list *cur; 121 | 122 | if (!start) 123 | return NULL; 124 | 125 | // find our spot in the list 126 | cur = start; 127 | while (cur && cur->id != id) 128 | cur = cur->next; 129 | if (!cur) 130 | return NULL; 131 | else 132 | return cur; 133 | } 134 | 135 | /** unlink an item. can be used together with list_foreach */ 136 | static inline struct list * list_unlink(struct list *cur) 137 | { 138 | struct list *tmp = NULL; 139 | 140 | if (cur->next){ 141 | cur->next->prev = cur->prev; 142 | tmp = cur->next; 143 | } 144 | if (cur->prev){ 145 | cur->prev->next = cur->next; 146 | tmp = cur->prev; 147 | } 148 | 149 | if (!tmp) 150 | return NULL; // no cur->next && no cur->prev ? then it's an empty list 151 | 152 | while (tmp->prev) 153 | tmp = tmp->prev; 154 | return tmp; // return the new startnode 155 | } 156 | 157 | /** remove id if it exists. returns start of the list */ 158 | static inline struct list * list_remove(struct list *cur) 159 | { 160 | struct list * elem = list_unlink(cur); 161 | myfree(cur); 162 | return elem; 163 | } 164 | 165 | static inline struct list * list_remove_id(struct list * list, void * id) 166 | { 167 | struct list * elem = list_exists(list, id); 168 | if (elem) 169 | return list_remove(elem); 170 | return list; 171 | } 172 | 173 | struct list * list_insert_sorted(struct list *start, void *id); 174 | 175 | static inline int list_len(struct list *list) 176 | { 177 | int i=0; 178 | while (list){ 179 | i++; 180 | list = list->next; 181 | } 182 | return i; 183 | } 184 | 185 | #define list_foreach(list, cur) \ 186 | for (cur = list; cur; cur = cur->next) 187 | 188 | #define list_destroy(deadlist) \ 189 | while (deadlist) deadlist = list_pop(deadlist) 190 | 191 | #endif /* WDB_SLIST_H */ 192 | 193 | -------------------------------------------------------------------------------- /sh/support/lock.h: -------------------------------------------------------------------------------- 1 | // lock.h 2 | // mutual exclusion and other locking support 3 | // 4 | // (c) 2007, Willem de Bruijn, Vrije Universiteit Amsterdam 5 | // email at willem -_at_- computer.org 6 | // 7 | // LGPL license applies 8 | 9 | // this is a wrapper around various locking methods 10 | // note: slmutex_trylock returns !0 if a lock is held, 0 otherwise 11 | 12 | 13 | #ifdef __KERNEL__ 14 | #include 15 | 16 | #if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,17) 17 | 18 | #include 19 | #define slmutex struct semaphore 20 | #define slmutex_init(my_mutex) init_MUTEX(my_mutex) 21 | #define slmutex_static(my_mutex) DECLARE_MUTEX(my_mutex) 22 | #define slmutex_extern(my_mutex) extern struct semaphore my_mutex 23 | #define slmutex_lock(my_mutex) do {} while (down_interruptible(my_mutex)) 24 | #define slmutex_unlock up 25 | #define slmutex_trylock(my_mutex) (down_trylock(my_mutex) ? 0 : 1) 26 | 27 | #else // newer kernel? 28 | 29 | #include 30 | #include 31 | #include "macros.h" 32 | #define slmutex struct mutex 33 | #define slmutex_init(my_mutex) mutex_init(my_mutex) 34 | #define slmutex_static DEFINE_MUTEX 35 | #define slmutex_extern(my_mutex) extern struct mutex my_mutex 36 | #define slmutex_trylock mutex_trylock 37 | #if 1 38 | #define slmutex_lock mutex_lock 39 | #define slmutex_unlock mutex_unlock 40 | #else 41 | #define slmutex_lock(my_mutex) \ 42 | do {dprintf("mutex_lock in %s. atomic=%c locked=%c\n", __FUNCTION__, \ 43 | in_atomic()?'y':'n', mutex_is_locked(my_mutex)?'y':'n'); \ 44 | mutex_lock(my_mutex); \ 45 | dprintf("mutex locked\n"); \ 46 | } while(0) 47 | #define slmutex_unlock(my_mutex) \ 48 | do {dprintf("mutex_unlock in %s. atomic=%c locked=%c\n", __FUNCTION__,\ 49 | in_atomic()?'y':'n', mutex_is_locked(my_mutex)?'y':'n'); \ 50 | mutex_unlock(my_mutex); \ 51 | dprintf("mutex_unlocked\n"); \ 52 | } while(0) 53 | #endif 54 | #endif 55 | 56 | #else // userspace? 57 | 58 | #define in_atomic() (0) 59 | 60 | #include 61 | #define slmutex pthread_mutex_t 62 | #define slmutex_init(my_mutex) pthread_mutex_init(my_mutex, NULL); 63 | #define slmutex_static(my_mutex) pthread_mutex_t my_mutex = PTHREAD_MUTEX_INITIALIZER 64 | #define slmutex_extern(my_mutex) extern pthread_mutex_t my_mutex 65 | #define slmutex_trylock(my_mutex) (pthread_mutex_trylock(my_mutex) ? 0 : 1) 66 | #if 1 67 | #define slmutex_lock pthread_mutex_lock 68 | #define slmutex_unlock pthread_mutex_unlock 69 | #else 70 | #define slmutex_lock(my_mutex) \ 71 | do {dprintf("mutex_lock %p in %s\n", my_mutex, __FUNCTION__); \ 72 | pthread_mutex_lock(my_mutex); \ 73 | dprintf("mutex locked\n"); \ 74 | } while(0) 75 | #define slmutex_unlock(my_mutex) \ 76 | do {dprintf("mutex_unlock %p in %s\n", my_mutex, __FUNCTION__); \ 77 | pthread_mutex_unlock(my_mutex); \ 78 | } while(0) 79 | #endif 80 | #endif 81 | 82 | -------------------------------------------------------------------------------- /sh/support/log.c: -------------------------------------------------------------------------------- 1 | /** 2 | * support/log.[ch] 3 | * logging, an alternative to d{0,1}printf 4 | * 5 | * (c) 2007, Willem de Bruijn, Vrije Universiteit Amsterdam 6 | * email at willem _-at-_ computer DOT org 7 | * 8 | * 3-clause BSD applies 9 | * */ 10 | 11 | #ifdef __KERNEL__ 12 | #include 13 | #include 14 | #include 15 | #else 16 | #include 17 | #include 18 | #include 19 | #include 20 | #include 21 | #include 22 | #include 23 | #include 24 | #include 25 | #include "../wrap/file.h" 26 | #endif 27 | 28 | #include "log.h" 29 | 30 | static char whitespace[] = " "; 31 | static int sl_loglevel = LOG_STOP; 32 | 33 | #ifdef __KERNEL__ 34 | #define __print(fd, text) printk("%s", text) // weird format because of GCC 4.2 check 35 | #else 36 | #define __print(fd, text) __orig_write(fd, text, strlen(text)) 37 | #endif 38 | 39 | static void 40 | __write_whitespace(int fd, int len) 41 | { 42 | #ifndef NDEBUG 43 | if (len > 8) { // hardcoded to be below true length 44 | __print(fd, "out of whitespace\n"); 45 | } 46 | #endif 47 | if (len) { 48 | whitespace[len] = '\0'; 49 | __print(fd, whitespace); 50 | whitespace[len] = ' '; 51 | } 52 | } 53 | 54 | static void 55 | __write(int fd, enum logtype level, const char *pre, const char *line) 56 | { 57 | 58 | 59 | if (level <= sl_loglevel) { 60 | // write generic header, identifying level, type, etc. 61 | #if !defined __KERNEL__ && !defined NDEBUG 62 | { 63 | char pidbuf[16]; 64 | snprintf(pidbuf, 15, "[%u]", getpid()); 65 | __print(fd, pidbuf); 66 | } 67 | #endif 68 | __print(fd, pre); 69 | 70 | // add whitespace padding depending on level 71 | __write_whitespace(fd, level); 72 | 73 | // write actual message 74 | __print(fd, line); 75 | __print(fd, "\n"); 76 | } 77 | } 78 | 79 | #ifdef __KERNEL__ 80 | 81 | // TODO: move to using our own logging buffer 82 | static void 83 | write_log(enum logtype level, const char *pre, const char *line) 84 | { 85 | __write(0, level, pre, line); 86 | } 87 | 88 | int 89 | log_init(void *unused) 90 | { 91 | return 0; 92 | } 93 | 94 | int 95 | log_exit(void *unused) 96 | { 97 | return 0; 98 | } 99 | 100 | #else 101 | 102 | static int logfd = -1; 103 | 104 | /** write a message to the log */ 105 | static void 106 | write_log(enum logtype level, const char *pre, const char *line) 107 | { 108 | if (logfd >= 0) { 109 | __write(logfd, level, pre, line); 110 | 111 | // when debugging, copy important messages to screen 112 | #if !defined NDEBUG 113 | if (logfd > 2 && level <= LOG_WARN) 114 | __write(2, level, pre, line); 115 | #endif 116 | } 117 | } 118 | 119 | #define MAXNAME 64 120 | static char name[MAXNAME + 1]; 121 | 122 | int 123 | log_init(void* unused) 124 | { 125 | char *tmpdir, *user; 126 | char linkname[MAXNAME + 1]; 127 | int loglevel_set = 0; 128 | 129 | if (getenv("LOGLEVEL")) { 130 | sl_loglevel = strtol(getenv("LOGLEVEL"), NULL, 10); 131 | loglevel_set = 1; 132 | } 133 | 134 | // log to terminal? 135 | if (getenv("LOGTERM")) { 136 | logfd = 1; 137 | sl_log(LOG_LOW, "logging to terminal"); 138 | return 0; 139 | } 140 | 141 | // get some metadata to name the file descriptively 142 | // NB: this is unsafe. check that it is truly a dir? 143 | tmpdir = getenv("TMPDIR"); 144 | if (!tmpdir) 145 | tmpdir = "/tmp"; 146 | user = getlogin(); 147 | if (!user) 148 | user = getenv("USER"); 149 | 150 | // create and open a new log file 151 | snprintf(name, MAXNAME, "%s/streamline.%s.%lu.log", 152 | tmpdir, user, time(NULL)); 153 | logfd = __orig_open(name, O_WRONLY | O_CREAT, 0644); 154 | if (logfd < 0) { 155 | fprintf(stderr, "error opening log\n"); 156 | return -1; 157 | } 158 | 159 | // set the 'latest' symlink to this file 160 | snprintf(linkname, MAXNAME, "%s/streamline.%s.latest.log", 161 | tmpdir, user); 162 | unlink(linkname); // don't care whether there was a link before 163 | if (link(name, linkname) < 0) 164 | sl_log(LOG_WARN, "error linking log\n"); 165 | 166 | // we set this before, but defer output until initialized 167 | if (loglevel_set) 168 | sl_log(LOG_LOW, "set loglevel to %d", sl_loglevel); 169 | 170 | return 0; 171 | } 172 | 173 | // choose the output file descriptor 174 | void 175 | log_set(int fd) 176 | { 177 | logfd = fd; 178 | } 179 | 180 | // get the output file descriptor 181 | int 182 | log_get(void) 183 | { 184 | return logfd; 185 | } 186 | 187 | // get the output filename (if any) 188 | const char * 189 | log_getname(void) 190 | { 191 | if (logfd > 2) 192 | return name; 193 | else 194 | return NULL; 195 | } 196 | 197 | int 198 | log_exit(void* unused) 199 | { 200 | if (logfd >= 0) 201 | __orig_close(logfd); 202 | return 0; 203 | } 204 | 205 | #endif 206 | 207 | void 208 | sl_log(enum logtype type, const char *format, ...) 209 | { 210 | #define SLLOGSZ 256 211 | char buf[SLLOGSZ]; 212 | const char *pre; 213 | va_list ap; 214 | 215 | va_start(ap, format); 216 | vsnprintf(buf, SLLOGSZ - 1, format, ap); 217 | va_end(ap); 218 | 219 | switch (type) { 220 | case LOG_BUG : pre = "[BUG ] "; break; 221 | case LOG_ERR : pre = "[ERR ] "; break; 222 | case LOG_WARN : pre = "[WARN ] "; break; 223 | case LOG_MSG : pre = "[Info ] "; break; 224 | case LOG_START : pre = "[Start] "; break; 225 | case LOG_STOP : pre = "[Stop ] "; break; 226 | case LOG_LOW : pre = "[Info ] "; break; 227 | case LOG_ALL : pre = "[Info ] "; break; 228 | default : pre = "[LOGBUG]"; break; 229 | } 230 | write_log(type, pre, buf); 231 | } 232 | 233 | #ifdef __KERNEL__ 234 | EXPORT_SYMBOL(sl_log); 235 | EXPORT_SYMBOL(log_init); 236 | EXPORT_SYMBOL(log_exit); 237 | #endif 238 | 239 | -------------------------------------------------------------------------------- /sh/support/log.h: -------------------------------------------------------------------------------- 1 | /** 2 | * support/log.[ch] 3 | * logging, an alternative to d{0,1}printf 4 | * 5 | * (c) 2007, Willem de Bruijn, Vrije Universiteit Amsterdam 6 | * email at willem _-at-_ computer DOT org 7 | * 8 | * 3-clause BSD applies 9 | * */ 10 | 11 | #ifndef SL_SUPPORT_LOG_H 12 | #define SL_SUPPORT_LOG_H 13 | 14 | enum logtype {LOG_BUG = 0, LOG_ERR, LOG_WARN, LOG_MSG, LOG_START, LOG_STOP, LOG_LOW, LOG_ALL}; 15 | 16 | void sl_log(enum logtype type, const char *format, ...); 17 | 18 | int log_init(void* unused); 19 | int log_exit(void* unused); 20 | 21 | void log_set(int fd); 22 | int log_get(void); 23 | const char * log_getname(void); 24 | 25 | #endif 26 | 27 | -------------------------------------------------------------------------------- /sh/support/macros.h: -------------------------------------------------------------------------------- 1 | // macros.[ch] 2 | // simple macros that I reuse often 3 | // 4 | // (c) 2005, Willem de Bruijn, Vrije Universiteit Amsterdam 5 | // email at willem -_AT_- computer.org 6 | // 7 | // BSD License applies 8 | 9 | #ifndef WJDB_SUPPORT_H 10 | #define WJDB_SUPPORT_H 11 | 12 | ///////////// KERNELSPACE/USERSPACE COMPAT 13 | #ifdef __KERNEL__ 14 | #include 15 | #include 16 | #include 17 | #include 18 | #define myalloc(block) kmalloc(block, GFP_KERNEL) 19 | #define myfree(block) kfree(block) 20 | static inline void * mycalloc(size_t nmemb, size_t size) 21 | { 22 | void *data = kmalloc(nmemb * size, GFP_KERNEL); 23 | if (data) 24 | memset(data, 0, nmemb * size); 25 | return data; 26 | } 27 | #define clock() get_cycles() 28 | #define MY_CLOCKRATE 1800000000 29 | #define my_gettimeofday do_gettimeofday 30 | #if LINUX_VERSION_CODE >= KERNEL_VERSION(2, 6, 29) 31 | #define getpid() (current->pid) 32 | #define getuid() (current_uid()) 33 | #define getgid() (current_gid()) 34 | #else 35 | #define getpid() (current->pid) 36 | #define getuid() (current->uid) 37 | #define getgid() (current->gid) 38 | #endif 39 | #else 40 | #include 41 | #define myalloc malloc 42 | #define mycalloc calloc 43 | #define myfree(a) free(a) 44 | #define MY_CLOCKRATE CLOCKS_PER_SEC 45 | #define my_gettimeofday(a) gettimeofday(a, NULL) 46 | 47 | // PAGE_SIZE is not defined in userspace 48 | #if defined i386 || defined __x86_64__ 49 | #define PAGE_SIZE 4096 50 | #else 51 | #define PAGE_SIZE getpagesize() 52 | #endif 53 | // (un)likely is not defined in userspace 54 | #define likely(x) __builtin_expect(!!(x), 1) 55 | #define unlikely(x) __builtin_expect(!!(x), 0) 56 | #endif /* __KERNEL__ */ 57 | 58 | ///////////// 32/64 bit COMPAT 59 | #ifdef __LP64__ 60 | #define FMT64 "ld" 61 | #define FMT64U "lu" 62 | #else 63 | #define FMT64 "lld" 64 | #define FMT64U "llu" 65 | #endif 66 | 67 | ///////////// Portable ASSERTIONS 68 | // 69 | // an alternative to assert() that can also work in kernelspace 70 | // NB: it sins against the rule that no control-flow should be in macros... 71 | // ... but then, so does assert. 72 | // Update (17032008): this stuff is outdated, but 73 | // I'm too lazy to clean up all source 74 | #ifdef NDEBUG 75 | #define __check(expression, exec_stmt) \ 76 | do {\ 77 | if (unlikely((expression) == 0)) { \ 78 | exec_stmt; \ 79 | } \ 80 | } while(0) 81 | #else 82 | #define __check(expression,exec_stmt) \ 83 | do { \ 84 | if (unlikely((expression) == 0)) { \ 85 | dprintf("ASSERT FAILED at %s.%d\n",__FUNCTION__,__LINE__); \ 86 | exec_stmt; \ 87 | } \ 88 | } while(0) 89 | #endif 90 | 91 | #define check_noop(expression) __check(expression,) 92 | #define check(expression) __check(expression, return -1) 93 | #define check_ptr(expression) __check(expression, return NULL) 94 | #define check_void(expression) __check(expression, return ) 95 | #define check_goto(expression) __check(expression, goto cleanup) 96 | 97 | #ifdef __KERNEL__ 98 | #define assert(stmt) do {if (!(stmt)) panic("ASSERT FAILED at %s.%d\n", __FILE__, __LINE__); } while (0) 99 | #endif 100 | 101 | ///////////// Portable PRINT 102 | #ifdef __KERNEL__ 103 | #define aprintf printk 104 | #define dprintf printk 105 | #else 106 | #define aprintf printf 107 | #ifdef NDEBUG 108 | #define dprintf(...) 109 | #else 110 | #define dprintf aprintf 111 | #endif /* NDEBUG */ 112 | #endif /* __KERNEL__ */ 113 | 114 | ///////////// Other 115 | #define bug() dprintf("(BUG) at %s:%d\n",__FUNCTION__,__LINE__) 116 | #define returnbug(a) do {bug(); return(a);} while (0) 117 | 118 | #ifndef min 119 | #define min(a,b) ( ((a) < (b)) ? (a) : (b) ) 120 | #define max(a,b) ( ((a) > (b)) ? (a) : (b) ) 121 | #endif 122 | 123 | #define is_power2(x) (!(x & (x-1))) 124 | 125 | #define __OFF(a, b) (((unsigned long) a) - ((unsigned long) b)) 126 | 127 | #define IO_IN 1 128 | #define IO_OUT 2 129 | 130 | #endif /* WJDB_SUPPORT_H */ 131 | 132 | -------------------------------------------------------------------------------- /sh/support/math.h: -------------------------------------------------------------------------------- 1 | /* support/math.h 2 | * mathematical support routines 3 | * 4 | * (c) 2009, Willem de Bruijn, Vrije Universiteit Amsterdam 5 | * GPLv2 applies 6 | */ 7 | 8 | #ifndef STREAMLINE_SUPPORT_MATH_H 9 | #define STREAMLINE_SUPPORT_MATH_H 10 | 11 | static inline int 12 | order_log2(unsigned long in) 13 | { 14 | unsigned long value = in; 15 | int i, bytelen, order = 0; 16 | 17 | if (in == 0) 18 | return 0; 19 | 20 | bytelen = sizeof(unsigned long) * 8; 21 | 22 | for (i = 0; i < bytelen; i++) { 23 | if (value & 0x1) 24 | order = i; 25 | value = value >> 1; 26 | } 27 | 28 | // round up 29 | if (1 << order == in) 30 | return order; 31 | else 32 | return order + 1; 33 | } 34 | 35 | #endif /* STREAMLINE_SUPPORT_MATH_H */ 36 | 37 | -------------------------------------------------------------------------------- /sh/support/md5.h: -------------------------------------------------------------------------------- 1 | /* MD5.H - header file for MD5C.C 2 | */ 3 | 4 | /* Copyright (C) 1991-2, RSA Data Security, Inc. Created 1991. All 5 | rights reserved. 6 | 7 | License to copy and use this software is granted provided that it 8 | is identified as the "RSA Data Security, Inc. MD5 Message-Digest 9 | Algorithm" in all material mentioning or referencing this software 10 | or this function. 11 | 12 | License is also granted to make and use derivative works provided 13 | that such works are identified as "derived from the RSA Data 14 | Security, Inc. MD5 Message-Digest Algorithm" in all material 15 | mentioning or referencing the derived work. 16 | 17 | RSA Data Security, Inc. makes no representations concerning either 18 | the merchantability of this software or the suitability of this 19 | software for any particular purpose. It is provided "as is" 20 | without express or implied warranty of any kind. 21 | 22 | These notices must be retained in any copies of any part of this 23 | documentation and/or software. 24 | */ 25 | 26 | #include "macros.h" 27 | 28 | typedef unsigned char *POINTER; /* POINTER defines a generic pointer type */ 29 | typedef unsigned short int UINT2; /* UINT2 defines a two byte word */ 30 | typedef unsigned long int UINT4; /* UINT4 defines a four byte word */ 31 | 32 | struct md5 { 33 | unsigned char hash[16]; 34 | }; 35 | 36 | /* MD5 context. */ 37 | typedef struct { 38 | UINT4 state[4]; /* state (ABCD) */ 39 | UINT4 count[2]; /* number of bits, modulo 2^64 (lsb first) */ 40 | unsigned char buffer[64]; /* input buffer */ 41 | } MD5_CTX; 42 | 43 | // init a structure 44 | void MD5Init(MD5_CTX *); 45 | // add some data to calculate over 46 | void MD5Update(MD5_CTX *, unsigned char *, unsigned int); 47 | // finalize and output the 16B key 48 | void MD5Final(unsigned char [16], MD5_CTX *); 49 | 50 | static inline void 51 | calc_md5(struct md5 *hash, char *data, int len) 52 | { 53 | MD5_CTX ctx; 54 | MD5Init(&ctx); 55 | MD5Update(&ctx, (unsigned char*) data, len); 56 | MD5Final(hash->hash, &ctx); 57 | } 58 | 59 | static inline void 60 | print_md5(unsigned char *hash) 61 | { 62 | int i; 63 | 64 | dprintf("MD5=["); 65 | for (i = 0; i < 16; i++) 66 | dprintf("%hx.", hash[i] & 0xff); 67 | dprintf("]\n"); 68 | } 69 | 70 | -------------------------------------------------------------------------------- /sh/support/multihash.c: -------------------------------------------------------------------------------- 1 | /** 2 | * support/multihash.[ch] 3 | * a hashtable with lists for each element, to allow unlimited 'siblings' 4 | * 5 | * (c) 2007, Willem de Bruijn, Vrije Universiteit Amsterdam 6 | * email at willem _-at-_ computer DOT org 7 | * 8 | * 3-clause BSD applies 9 | * */ 10 | 11 | #include "multihash.h" 12 | 13 | // mhash_get without optimizations 14 | static inline struct list * __mhash_getelem(struct multihash * mh, 15 | uint16_t key, 16 | uint16_t subkey) 17 | { 18 | struct list * head, * elem; 19 | 20 | head = hash_lookup(&mh->table, key); 21 | list_foreach(head, elem) 22 | if (!subkey--) 23 | return elem; 24 | return NULL; 25 | } 26 | 27 | // optimization: expect this request to be part of a loop 28 | // we cache the pointer and compare {mh, key, subkey} with cached version 29 | // 30 | // nb: this is unsafe in a very specific situation: when the list into 31 | // which the cached pointer points is changed between calls. This is 32 | // highly unlikely. TODO: make certain this cannot occur 33 | void * mhash_get(struct multihash * mh, uint16_t key, uint16_t subkey) 34 | { 35 | static struct multihash * s_mh; 36 | static uint16_t s_key; 37 | static uint16_t s_sub; 38 | static struct list *s_elem; 39 | struct list *elem; 40 | 41 | // next iterator element in the current loop? 42 | if (s_mh == mh && s_key == key && s_sub + 1 == subkey) { 43 | s_sub++; 44 | s_elem = s_elem->next; 45 | elem = s_elem; 46 | } 47 | else if (!subkey) { // start of a new loop? 48 | s_mh = mh; 49 | s_key = key; 50 | s_sub = 0; 51 | s_elem = __mhash_getelem(mh, key, 0); 52 | elem = s_elem; 53 | } else { 54 | elem = __mhash_getelem(mh, key, subkey); 55 | } 56 | 57 | if (elem) 58 | return elem->id; 59 | else 60 | return NULL; 61 | } 62 | 63 | #ifdef __KERNEL__ 64 | 65 | #include 66 | EXPORT_SYMBOL(mhash_get); 67 | 68 | #endif 69 | 70 | -------------------------------------------------------------------------------- /sh/support/multihash.h: -------------------------------------------------------------------------------- 1 | /** 2 | * support/multihash.[ch] 3 | * a hashtable with lists for each element, to allow unlimited 'siblings' 4 | * 5 | * (c) 2007, Willem de Bruijn, Vrije Universiteit Amsterdam 6 | * email at willem _-at-_ computer DOT org 7 | * 8 | * 3-clause BSD applies 9 | * */ 10 | 11 | #ifndef SL_SUPP_MHASH 12 | #define SL_SUPP_MHASH 13 | 14 | #ifdef __KERNEL__ 15 | #include 16 | #else 17 | #include 18 | #endif 19 | 20 | #include "hashtable.h" 21 | #include "list.h" 22 | 23 | struct multihash { 24 | // each element is taken as the head of a list 25 | struct hashtable table; 26 | }; 27 | 28 | static inline void mhash_add(struct multihash * mh, uint16_t key, void *value) 29 | { 30 | struct list * list; 31 | 32 | list = hash_lookup(&mh->table, key); 33 | list = list_append(list, value); 34 | mh->table.table[key] = list; 35 | } 36 | 37 | // remove an entry. 38 | // or remove all entries with key 'key' by passing NULL as value 39 | static inline void mhash_del(struct multihash * mh, uint16_t key, void *value) 40 | { 41 | struct list * head, * elem; 42 | 43 | head = hash_lookup(&mh->table, key); 44 | list_foreach(head, elem) 45 | if (!value || elem->id == value) { 46 | head = list_remove(elem); 47 | mh->table.table[key] = head; 48 | return; 49 | } 50 | } 51 | 52 | // get all matches for mh(key). the iterator subkey starts at 0 53 | void * mhash_get(struct multihash * mh, uint16_t key, uint16_t subkey); 54 | 55 | #endif /* SL_SUPP_MHASH */ 56 | 57 | -------------------------------------------------------------------------------- /sh/support/prettyprint.c: -------------------------------------------------------------------------------- 1 | // prettyprint.[ch] 2 | // print non-trivial data to screen 3 | // 4 | // (c) 2007, Willem de Bruijn, Vrije Universiteit Amsterdam 5 | // email at willem -_at_- computer.org 6 | // 7 | // LGPL license applies 8 | 9 | #ifdef __KERNEL__ 10 | #include 11 | #include 12 | #include 13 | #include 14 | #include 15 | #include 16 | #include 17 | #include 18 | #else 19 | #include 20 | #include 21 | #include 22 | #include 23 | #if linux 24 | #include 25 | #include 26 | #include 27 | #include 28 | #include 29 | #else 30 | #include "../hw/proto.h" 31 | #endif 32 | #endif 33 | 34 | #include "macros.h" 35 | #include "lock.h" 36 | #include "prettyprint.h" 37 | 38 | /** Pretty print data in vertically split [hex | decimal] notation 39 | * 40 | * The passed string must be at least 80 bytes. 41 | * */ 42 | int 43 | writedata(char *out, int olen, const char *data, int dlen) { 44 | int i = 0, off = 0; 45 | int elem; 46 | 47 | olen--; // leave room for the terminating '\0' 48 | do { 49 | // phase 1: print hex 50 | for (elem = 0; elem < HEXWIDTH && i + elem < dlen; elem++) { 51 | off += snprintf(out + off, olen - off, "%x%x ", 52 | (data[i + elem] & 0xf0) >> 4, 53 | data[i + elem] & 0xf); 54 | 55 | if (elem == (HEXWIDTH / 2) - 1) { 56 | out[off] = ' '; 57 | off++; 58 | } 59 | } 60 | 61 | // fill out the last line 62 | for (; elem < HEXWIDTH; elem ++) { 63 | out[off] = ' '; 64 | out[off + 1] = ' '; 65 | out[off + 2] = ' '; 66 | off += 3; 67 | } 68 | 69 | // insert room 70 | out[off] = ' '; 71 | out[off + 1] = ' '; 72 | out[off + 2] = ' '; 73 | off += 3; 74 | 75 | // phase 2: print visible 76 | for (elem = 0; elem < HEXWIDTH && i + elem < dlen; elem++) { 77 | if (data[i + elem] >= 32 && data[i + elem] < 126) 78 | out[off + elem] = data[i + elem]; 79 | else 80 | out[off + elem] = '.'; 81 | } 82 | off += elem; 83 | out[off] = '\n'; 84 | off++; 85 | i += HEXWIDTH; 86 | } while (i < dlen && off < olen); 87 | 88 | out[off] = '\n'; 89 | off++; 90 | out[off] = '\0'; 91 | return off; 92 | } 93 | 94 | void 95 | displaydata(const char *data, int dlen) 96 | { 97 | char *out; 98 | int len, mlen; 99 | #ifndef __KERNEL__ 100 | int ret; 101 | #endif 102 | 103 | if (dlen) { 104 | len = 5 * dlen; 105 | mlen = max(len + 1, 800); 106 | 107 | // allocate the block. difficult only because of 108 | // possible execution in kernel interrupt context. 109 | #ifdef __KERNEL__ 110 | if (in_interrupt()) { 111 | out = kmalloc(mlen, GFP_ATOMIC); 112 | memset(out, 0, mlen); 113 | } 114 | else 115 | #endif 116 | out = mycalloc(mlen, 1); 117 | 118 | // malloc failed error handling 119 | if (!out) { 120 | const char error[] = "BUG in displaydata\n"; 121 | #ifdef __KERNEL__ 122 | printk(error); 123 | #else 124 | ret = write(1, error, 20); 125 | #endif 126 | return; 127 | } 128 | 129 | // fill in contents and write output 130 | len = writedata(out, len - 1, data, dlen); 131 | out[len] = '\0'; 132 | #ifdef __KERNEL__ 133 | printk("%s", out); 134 | #else 135 | ret = write(1, out, len + 1); 136 | #endif 137 | myfree(out); 138 | } 139 | } 140 | 141 | /** Prettyprint an IP address. 142 | * @returns the number of bytes written */ 143 | int 144 | writeip(char * data, int dlen, const uint8_t* ip, uint16_t port) 145 | { 146 | int res; 147 | #ifdef __KERNEL__ 148 | res = snprintf(data, dlen, "%hu.%hu.%hu.%hu", ip[0], ip[1], ip[2], ip[3]); 149 | #else 150 | res = snprintf(data, dlen, "%hhu.%hhu.%hhu.%hhu", ip[0], ip[1], ip[2], ip[3]); 151 | #endif 152 | if (port) 153 | res += snprintf(data + res, dlen - res, ":%hu", ntohs(port)); 154 | return res; 155 | } 156 | 157 | /** Print an ip address to stdout (w/o endline) */ 158 | void 159 | displayip(const uint8_t* ip, uint16_t port) 160 | { 161 | char buf[25]; 162 | writeip(buf, 24, ip, port); 163 | aprintf("%s", buf); 164 | } 165 | 166 | int 167 | writepktinfo(char *out, int olen, const char *pkt, unsigned int plen) 168 | { 169 | const struct ethhdr *eth = (struct ethhdr *) pkt; 170 | uint16_t sport=0, dport=0, off, i; 171 | 172 | olen--; // leave room for the terminating '\0' 173 | if (plen < ETH_HLEN) 174 | return snprintf(out, olen, "%dB: too small for ethernet\n", plen); 175 | 176 | off = snprintf(out, olen, "eth(type %hx, src ", ntohs(eth->h_proto)); 177 | for (i = 0; i < 6; i++) 178 | off += snprintf(out + off, olen - off, "%hx%hx.", 179 | (eth->h_source[i] & 0xf0) >> 4, 180 | eth->h_source[i] & 0xf); 181 | off += snprintf(out + off, olen - off, ", dest "); 182 | for (i = 0; i < 6; i++) 183 | off += snprintf(out + off, olen - off, "%hx%hx.", 184 | (eth->h_dest[i] & 0xf0) >> 4, 185 | eth->h_dest[i] & 0xf); 186 | off += snprintf(out + off, olen - off, ")\n"); 187 | 188 | if ((uint16_t) ntohs(eth->h_proto) == ETH_P_IP) { 189 | const struct iphdr *iph; 190 | 191 | iph = (struct iphdr*) (pkt + ETH_HLEN); 192 | off += snprintf(out + off, olen - off, 193 | "ip (proto %hu, ttl %hu, ihl %hu, total_len %hu," 194 | " src %hu.%hu.%hu.%hu, dst %hu.%hu.%hu.%hu)\n", 195 | iph->protocol, 196 | iph->ttl, 197 | iph->ihl, 198 | ntohs(iph->tot_len), 199 | iph->saddr & 0xff, 200 | (iph->saddr & 0xff00) >> 8, 201 | (iph->saddr & 0xff0000) >> 16, 202 | (iph->saddr &0xff000000) >> 24, 203 | iph->daddr & 0xff, 204 | (iph->daddr & 0xff00) >> 8, 205 | (iph->daddr & 0xff0000) >> 16, 206 | (iph->daddr &0xff000000) >> 24); 207 | 208 | if (iph->protocol == 6 /* TCP */){ 209 | // start of udp and tcp headers are identical. but the following is 210 | // a bit hackish, I admit 211 | const struct tcphdr *tcph; 212 | 213 | tcph = (struct tcphdr*) ( ((unsigned long) iph) + (4 * ((char) iph->ihl))); 214 | sport = tcph->source; 215 | dport = tcph->dest; 216 | off += snprintf(out + off, olen - off, 217 | "tcp(len=%u seqno=%u)\n", 218 | ntohs(iph->tot_len), ntohl(tcph->seq)); 219 | } 220 | else if (iph->protocol == 17 /* UDP */){ 221 | const struct udphdr *trans; 222 | 223 | trans = (struct udphdr*) ( ((unsigned long) iph) + (4 * ((char) iph->ihl))); 224 | sport = trans->source; 225 | dport = trans->dest; 226 | off += snprintf(out + off, olen - off, "udp(len=%u)\n", 227 | ntohs(iph->tot_len)); 228 | } 229 | else if (iph->protocol == 1 /* ICMP */){ 230 | const struct icmphdr *icmph; 231 | 232 | icmph = (struct icmphdr*) ( ((unsigned long) iph) + (4 * ((char) iph->ihl))); 233 | off += snprintf(out + off, olen - off, 234 | "icmp(len=%u type=%hu seq=%hu)\n", 235 | ntohs(iph->tot_len), icmph->type, 236 | icmph->un.echo.sequence); 237 | } 238 | } 239 | else 240 | off += snprintf(out + off, olen - off, "unknown()\n"); 241 | 242 | out[off] = '\n'; 243 | off++; 244 | out[off] = '\0'; 245 | return off; 246 | } 247 | 248 | void 249 | displaypktinfo(const void *data, int len) 250 | { 251 | char *out; 252 | #ifndef __KERNEL__ 253 | int ret; 254 | #endif 255 | 256 | out = myalloc(240); // 3 lines is the upper limit 257 | len = writepktinfo(out, 239, data, len); 258 | out[len] = '\0'; 259 | #ifdef __KERNEL__ 260 | printk("%s", out); 261 | #else 262 | ret = write(1, out, len + 1); 263 | #endif 264 | myfree(out); 265 | } 266 | 267 | -------------------------------------------------------------------------------- /sh/support/prettyprint.h: -------------------------------------------------------------------------------- 1 | // prettyprint.[ch] 2 | // print non-trivial data to screen 3 | // 4 | // (c) 2007, Willem de Bruijn, Vrije Universiteit Amsterdam 5 | // email at willem -_at_- computer.org 6 | // 7 | // LGPL license applies 8 | 9 | #ifndef __SL_SUPPORT_PRETTYPRINT_H 10 | #define __SL_SUPPORT_PRETTYPRINT_H 11 | 12 | #define HEXWIDTH 16 13 | 14 | void displaydata(const char *data, int dlen); 15 | void displayip(const uint8_t* ip, uint16_t port); 16 | void displaypktinfo(const void *data, int len); 17 | 18 | int writeip(char * data, int dlen, const uint8_t* ip, uint16_t port); 19 | int writedata(char *out, int olen, const char *data, int dlen); 20 | int writepktinfo(char *out, int olen, const char *pkt, unsigned int plen); 21 | 22 | #endif 23 | 24 | -------------------------------------------------------------------------------- /sh/support/profiler.c: -------------------------------------------------------------------------------- 1 | /* 2 | Fairly Fast Packet Filter 3 | 4 | `stealth' profiling sourcefile 5 | 6 | Licensed under the GPL 7 | Copyright (c) Herbert Bos, 2003-2004 8 | 9 | this version was created by Willem de Bruijn (wdebruij_AT_liacs.nl), 2004 10 | */ 11 | 12 | #ifdef PROFILE 13 | 14 | #ifdef __KERNEL__ 15 | #include 16 | #include 17 | #else 18 | #include 19 | #include 20 | #include 21 | #include 22 | #define CAN_PRINT_FLOATS 23 | #endif 24 | 25 | #include "macros.h" 26 | #include "timestamp.h" 27 | #include "profiler.h" 28 | 29 | struct profdata { 30 | tstamp_t cycles[PROFWIDTH][PROFLEN]; 31 | uint32_t index[PROFWIDTH]; 32 | }; 33 | 34 | static struct profdata prof; 35 | 36 | /** save a new processor count. */ 37 | inline void __internal_profiler(int class){ 38 | /* DIRTY : the first element isn't a diff. 39 | * Either forget about Avg and use only Mean, 40 | * or wait long enough for this element to be overwritten. */ 41 | prof.cycles[class][ prof.index[class] ] = timestamp_get(); 42 | prof.index[class] = (prof.index[class] + 1) % PROFLEN; 43 | } 44 | 45 | /** start a new processor count calculation. */ 46 | inline void __internal_profiler_begin(int class){ 47 | prof.cycles[class][prof.index[class] % PROFLEN] = timestamp_get(); 48 | } 49 | 50 | /** close a processor count calculation. */ 51 | inline void __internal_profiler_end(int class){ 52 | prof.cycles[class][prof.index[class]] = timestamp_get() - prof.cycles[class][prof.index[class]]; 53 | prof.index[class] = (prof.index[class] + 1) % PROFLEN; 54 | /* note that this would result in a negative result on signed values. We'll have to swap this when calculating results */ 55 | } 56 | 57 | /* quicksort implementation from wikipedia.org. 58 | 59 | we could have used the qsort(..) function call in userspace, but 60 | for simplicity we'll use this less optimal algorithm in both kernel- 61 | and userspace. 62 | */ 63 | void __qsort(tstamp_t* low, tstamp_t* high) 64 | { 65 | /* We naively use the first value in the array as the pivot */ 66 | /* this will not give good performance real usage */ 67 | 68 | tstamp_t * lowbound = low + 1; /* the high boundary of the low subarray */ 69 | tstamp_t * highbound = high - 1; /* the low boundary of the high subarray */ 70 | tstamp_t temp; 71 | 72 | while(lowbound <= highbound) /* partition the array */ 73 | { 74 | if(*lowbound < *low) /* compare to pivot */ 75 | lowbound++; /* move lowbound toward the middle */ 76 | else 77 | { 78 | temp = *lowbound; /* swap *lowbound and *highbound */ 79 | *lowbound = *highbound; 80 | *highbound = temp; 81 | highbound--; /* move highbound toward the middle */ 82 | } 83 | } 84 | 85 | highbound++; /* move bounds back to the correct positions */ 86 | lowbound--; 87 | 88 | temp = *low; /* move the pivot into the middle */ 89 | *low = *lowbound; 90 | *lowbound = temp; 91 | 92 | if(low != lowbound) /* recurse on the subarrays */ 93 | __qsort(low, lowbound); 94 | if(high != highbound) 95 | __qsort(highbound, high); 96 | } 97 | 98 | tstamp_t __median(int start, int stop, tstamp_t* ldList){ 99 | int middle_floor = start+(stop-start)/2; 100 | if ( (((stop-start) % 2) + 1) == 1) // odd number of elements 101 | return ldList[middle_floor]; 102 | else 103 | return ((tstamp_t) ( ldList[middle_floor] + ldList[middle_floor + 1]) ) / 2; 104 | } 105 | 106 | /** 107 | * calculate the mean and output information to the standard output queue. 108 | * this function is very similar to the one that outputs to procfs. 109 | * I currently don't have the time to properly merge the two. */ 110 | void __internal_profiler_show(void){ 111 | int i, j; 112 | tstamp_t Q1, Q2, Q3; 113 | 114 | for (i = 0; i < PROFWIDTH; i++) { 115 | /* are we using this class? */ 116 | if (prof.cycles[i][0]) { 117 | int val; 118 | double average; 119 | 120 | /* find the last used element in the list */ 121 | val = 0; 122 | while(val < PROFLEN && prof.cycles[i][val]) 123 | val++; 124 | if (prof.cycles[i][val]) 125 | val++; 126 | 127 | /* calculate the mean (and the lower and upper quartile) */ 128 | __qsort(&prof.cycles[i][0], &prof.cycles[i][val-1]); 129 | Q2 = __median(0,val-1, prof.cycles[i]); 130 | if (val % 2) { 131 | Q1 = __median(0, val/2 - 1, prof.cycles[i]); 132 | Q3 = __median(val/2 + 1, val - 1, prof.cycles[i]); 133 | } 134 | else{ 135 | Q1 = __median(1, val/2 - 2 , prof.cycles[i]); 136 | Q3 = __median(val/2 + 1, val - 1, prof.cycles[i]); 137 | } 138 | dprintf("class %d: Q1=%llu Q2=%llu Q3=%llu \n", 139 | i, Q1, Q2, Q3); 140 | 141 | /* calculate the average */ 142 | average = 0; 143 | for (j=0; j < val; j++) 144 | average += ((double) prof.cycles[i][j]) / val; 145 | dprintf("class %d: average is %lf\n",i, average); 146 | } 147 | } 148 | } 149 | 150 | #ifdef __KERNEL__ /* we can only export to procfs from the kernel, naturally */ 151 | 152 | /** static buffer for keeping our fake procfs */ 153 | static char procfs_buffer[80 + 5*80 * PROFWIDTH]; /** used for exporting information to procfs */ 154 | 155 | /** export information to procfs. */ 156 | int __internal_profiler_procfs(char *buffer, char **buffer_location, off_t offset, int buffer_length, int zero){ 157 | int len; 158 | int i, j, val; 159 | tstamp_t Q1, Q2, Q3; 160 | double average; 161 | 162 | if (offset > 0) 163 | return 0; 164 | 165 | memset(procfs_buffer,0, 80 + (5*80 * PROFWIDTH) - 1); 166 | len = snprintf(procfs_buffer, 17, "kernel profiler\n\n"); 167 | /* Fill the buffer and get its length */ 168 | for (i = 0; i < PROFWIDTH; i++){ 169 | if (prof.cycles[i][0]){ /* are we using this class? then a 0 value is highly unlikely */ 170 | /* find the last used element in the list (might well be PROFLEN */ 171 | val=0; 172 | while(val < PROFLEN && prof.cycles[i][val]){ 173 | //dprintf("%d,%d:%llu\n",i,val,prof.cycles[i][val]); 174 | val++; 175 | } 176 | if (prof.cycles[i][val]) 177 | val++; 178 | 179 | /* calculate the mean (and the lower and upper quartile) */ 180 | __qsort(&prof.cycles[i][0],&prof.cycles[i][val-1]); 181 | Q2 = __median(0,val-1, prof.cycles[i]); 182 | if ((val % 2) == 1){ // odd 183 | Q1 = __median(0,val/2 -1, prof.cycles[i]); 184 | // skip the middle element 185 | Q3 = __median(val/2 +1,val-1, prof.cycles[i]); 186 | } 187 | else{ 188 | Q1 = __median(1,val/2 -2 , prof.cycles[i]); 189 | // ski the two middle elements 190 | Q3 = __median(val/2 +1,val-1, prof.cycles[i]); 191 | } 192 | len += snprintf(&procfs_buffer[len], 5*80*PROFWIDTH - len, "Profiler Class %d\nMedian (Q2) is %llu; Q1=%llu; Q3=%llu \n", i, Q2, Q1, Q3); 193 | 194 | /* calculate the average */ 195 | average = 0; 196 | for (j=0; jowner = THIS_MODULE; 221 | } 222 | 223 | /** unregister from procfs */ 224 | void __internal_profiler_procfs_close(void){ 225 | remove_proc_entry(PROFILER_PROC_NAME,NULL); 226 | } 227 | #endif /* __KERNEL__ */ 228 | 229 | #endif /* PROFILE */ 230 | -------------------------------------------------------------------------------- /sh/support/profiler.h: -------------------------------------------------------------------------------- 1 | /* 2 | Fairly Fast Packet Filter 3 | 4 | `stealth' profiling headerfile 5 | 6 | Licensed under the GPL 7 | Copyright (c) Herbert Bos, 2003-2004 8 | 9 | this version was created by Willem de Bruijn (wdebruij_AT_dds.nl), 2004 10 | 11 | \file 12 | this `class' implements a clockcycle profiler. When the PROFILE 13 | macro is set, the code will calculate processor cycle counts 14 | for abitrary program flows by converting the profiler(x), 15 | profiler_begin(x) and profiler_end(x) macro's into full functions. 16 | 17 | Anytime one of the profiler functions is encountered, data is 18 | collected and stored for later calculation. 19 | 20 | Note that by not setting the PROFILE macro, the macro's are not 21 | expanded and therefore the profiler will have no impact on the 22 | executables' performance. 23 | 24 | [USAGE] define the variable PROFILER_PROC_NAME somewhere to the 25 | name of the file under /proc that you want to create. If left 26 | undefined it will default to "ffpf" 27 | 28 | since profiler uses integers to discriminate among classes, I 29 | suggest you add macro's that expand to unique classkeys either in 30 | your own code or (if your code will be bundled with this package) 31 | below (near PROFILER_HOOK) . 32 | 33 | [NB] the kernel's print function, printk, cannot output floating 34 | point values. Therefore we have resorted to printing the floats 35 | in another notation, namely as hex integers. Use this output 36 | by converting it to the right representation in userspace. For 37 | this you could use a perl shellscript or something. Currently, 38 | no such scripts has been written. 39 | */ 40 | 41 | #ifndef PROFILE_H 42 | #define PROFILE_H 43 | 44 | #ifndef PROFILER_PROC_NAME 45 | #define PROFILER_PROC_NAME "ffpf" 46 | #endif 47 | 48 | #define PROFLEN 101 ///< number of samples per class 49 | #define PROFWIDTH 9 ///< number of classes 50 | 51 | /** 52 | defines to keep track of profile code 53 | 54 | you are advised to use these (add yours), 55 | so that you don't end up with duplicate calls. 56 | */ 57 | #define PROFILER_HOOK 1 58 | #define PROFILER_FILTER 2 59 | #define PROFILER_COPY 3 60 | #define PROFILER_COPY_PKT 4 61 | #define PROFILER_TEST 5 62 | #define PROFILER_BPF_CHECK 6 63 | #define PROFILER_SIGMIN 7 64 | #define PROFILER_SIGMAX 8 65 | 66 | 67 | /** the profiler routine stores the processor counter 68 | @param int class. separates streams of statistics. 69 | */ 70 | inline void __internal_profiler(int class); 71 | 72 | /** a more explicit version of exec_profiler(..). 73 | use this function and exec_profiler_end to be 74 | sure when data collection starts and finishes. 75 | Consecutive calls to exec_profiler_begin will 76 | reset the temporary databuffer. The endresult 77 | is an offset, instead of the raw values. This 78 | is probably what you want. 79 | 80 | Note that we have no safety checks in place 81 | for buffer overflows. That's your resposibility. 82 | */ 83 | inline void __internal_profiler_begin(int class); 84 | 85 | /** see exec_profiler_begin .*/ 86 | inline void __internal_profiler_end(int class); 87 | 88 | /** output profiler data. */ 89 | inline void __internal_profiler_show(void); 90 | 91 | #ifdef __KERNEL__ 92 | /** register to procfs. Automatically calls init_profiler (just in case you forget) */ 93 | void __internal_profiler_procfs_open(void); 94 | /** unregister from procfs */ 95 | void __internal_profiler_procfs_close(void); 96 | #endif /* __KERNEL__ */ 97 | 98 | #ifdef PROFILE 99 | 100 | #define profiler(x) __internal_profiler(x) 101 | #define profiler_begin(x) __internal_profiler_begin(x) 102 | #define profiler_end(x) __internal_profiler_end(x) 103 | 104 | #ifdef __KERNEL__ 105 | #define profiler_procfs_open() __internal_profiler_procfs_open() 106 | #define profiler_procfs_close() __internal_profiler_procfs_close() 107 | #else 108 | #define profiler_procfs_open() 109 | #define profiler_procfs_close() 110 | #endif /* __KERNEL__ */ 111 | 112 | #define profiler_init() __internal_profiler_init() 113 | #define profiler_show() __internal_profiler_show() 114 | 115 | #else /* PROFILE */ 116 | 117 | #define profiler(x) 118 | #define profiler_begin(x) 119 | #define profiler_end(x) 120 | 121 | #define profiler_procfs_open() 122 | #define profiler_procfs_close() 123 | 124 | #define profiler_init() 125 | #define profiler_show() 126 | 127 | #endif /* PROFILE */ 128 | 129 | #endif /* PROFILE_H */ 130 | 131 | -------------------------------------------------------------------------------- /sh/support/radix.h: -------------------------------------------------------------------------------- 1 | /* support/radix.[ch] 2 | * implementation of a radix tree 3 | * 4 | * the tree works on arbitrary binary strings. \0 is not necessary 5 | * duplicate keys are not allowed 6 | * 7 | * (c) 2007, Willem de Bruijn, Vrije Universiteit Amsterdam 8 | * GPLv2 applies 9 | */ 10 | 11 | #ifndef SL_SUPPORT_RADIX_H 12 | #define SL_SUPPORT_RADIX_H 13 | 14 | #ifdef __KERNEL__ 15 | #include 16 | #else 17 | #include 18 | #endif 19 | 20 | struct radix_node; 21 | 22 | /// lookup a value by a tuple. 23 | // returns NULL on failure 24 | void * radix_lookup(struct radix_node *, unsigned char *, size_t); 25 | 26 | /// lookup a value or its nearest predecessor. 27 | // returns NULL on failure 28 | void * radix_lookup_predecessor(struct radix_node *, unsigned char *, size_t); 29 | 30 | /// insert a tuple. 31 | // returns NULL on failure 32 | struct radix_node * radix_insert(struct radix_node *, unsigned char *, size_t, 33 | void *); 34 | 35 | /// delete a tuple by passing the associated node 36 | // returns 1 if we removed the rootnode, 0 otherwise 37 | int radix_delete(struct radix_node *, unsigned char *key, size_t keylen); 38 | 39 | /// destroy an entire tree 40 | void radix_destroy(struct radix_node *); 41 | 42 | #endif 43 | 44 | -------------------------------------------------------------------------------- /sh/support/serialize.c: -------------------------------------------------------------------------------- 1 | // serialize.[ch] 2 | // pack/unpack a bunch of parameters 3 | // 4 | // (c) 2005, Willem de Bruijn, Vrije Universiteit Amsterdam 5 | // email at willem -_at_- computer.org 6 | // 7 | // BSD license applies 8 | 9 | #ifndef __KERNEL__ 10 | #include 11 | #include 12 | #include 13 | #include 14 | #include 15 | #else 16 | #include 17 | #include 18 | #include 19 | #include 20 | #include 21 | #endif 22 | 23 | #include "macros.h" 24 | #include "serialize.h" 25 | 26 | //a maximum, this (theoretically) limits stringlen, and can serve as a rudimentary test 27 | #define MAXTUPLELEN 1 << 16 28 | 29 | inline int serialized_totallen(const char *serialized) 30 | { 31 | struct shead *shead; 32 | 33 | if (!serialized) 34 | return 0; 35 | shead = (struct shead*) serialized; 36 | return sizeof(struct shead) + shead->bytelen + (sizeof(uint32_t) * shead->tuplelen); 37 | } 38 | 39 | char * serialize(int argcount, ...) 40 | { 41 | __attribute__((unused)) va_list list; 42 | int i, empty; 43 | uint32_t elemsize, offset; 44 | char *output, *param; 45 | struct shead shead; 46 | 47 | // phase 1 : figure out how much space we need to allocate 48 | shead.bytelen = 0; 49 | shead.tuplelen = 0; 50 | va_start (list, argcount); 51 | for (i = 0; i < (2 * argcount); i += 2) { 52 | shead.bytelen += va_arg(list, uint32_t); 53 | shead.tuplelen++; 54 | empty = va_arg(list, int); // skip the data. gives warning with GCC 4.1. FIXED 55 | } 56 | va_end (list); 57 | 58 | // phase 2 : copy the data 59 | output = myalloc (sizeof(struct shead) + (argcount * sizeof(uint32_t)) + shead.bytelen); 60 | if (!output){ 61 | dprintf("failed to allocated memory (size is %d)\n",shead.bytelen); 62 | return NULL; 63 | } 64 | 65 | // write the string header 66 | offset = sizeof(struct shead); 67 | memcpy(output, &shead, offset); 68 | 69 | // place the elements 70 | va_start (list, argcount); 71 | for (i=0;ituplelen > MAXTUPLELEN) // some safety integrity checks. 97 | return NULL; 98 | 99 | oldlen = serialized_totallen(serialized); 100 | newheader = (uint32_t) dlen; 101 | new = myalloc (oldlen + dlen + sizeof(uint32_t)); 102 | if (!new) 103 | return NULL; 104 | 105 | memcpy(new,serialized,oldlen); 106 | memcpy(&new[oldlen],&newheader,sizeof(uint32_t)); 107 | memcpy(&new[oldlen+sizeof(uint32_t)],data,dlen); 108 | shead = (struct shead *) new; 109 | shead->tuplelen++; 110 | shead->bytelen+=dlen; 111 | myfree(serialized); 112 | return new; 113 | } 114 | 115 | int is_serialized(const char *serialized) 116 | { 117 | struct shead *shead = (struct shead *) serialized; 118 | if (!shead) 119 | return 0; 120 | 121 | if (shead->tuplelen < MAXTUPLELEN && shead->tuplelen <= shead->bytelen) 122 | return 1; 123 | else 124 | return 0; 125 | } 126 | 127 | int deserialize(char *string, ...) 128 | { 129 | va_list list; 130 | struct shead shead; 131 | int i, j, offset; 132 | uint32_t elemsize; 133 | char **ptr; 134 | 135 | memcpy(&shead, string, sizeof(struct shead)); 136 | offset = sizeof(struct shead); 137 | va_start (list, string); 138 | for (i=0;ituplelen > elemno); 172 | 173 | offset = sizeof(struct shead); 174 | for(i = 0; i < elemno; i++){ 175 | memcpy(&itemlen, serialized + offset, sizeof(uint32_t)); 176 | offset += sizeof(uint32_t) + itemlen; 177 | } 178 | if (dlen) 179 | // BUG: uint32_t -> int 180 | memcpy(dlen, serialized + offset, sizeof(uint32_t)); 181 | if (*(uint32_t*) serialized + offset == 0) // NULL pointer? 182 | return NULL; 183 | return &serialized[offset + sizeof(uint32_t)]; // skip past header 184 | } 185 | 186 | char * serialize_duplicate(char * in) 187 | { 188 | char *out; 189 | int size; 190 | 191 | check_ptr (in && is_serialized(in)); 192 | 193 | size = serialized_totallen(in); 194 | out = myalloc (size); 195 | check_ptr(out); 196 | 197 | memcpy(out, in, size); 198 | return out; 199 | } 200 | 201 | char * serialize_merge(char * one, char * two, int del) 202 | { 203 | struct shead *h_out, *h_one, *h_two; 204 | char * out; 205 | int size_one, size_two; 206 | 207 | if (!one && !two) 208 | return NULL; 209 | if (!one){ 210 | if (del) 211 | return two; 212 | else 213 | return serialize_duplicate(two); 214 | } 215 | if (!two){ 216 | if (del) 217 | return one; 218 | else 219 | return serialize_duplicate(one); 220 | } 221 | check_ptr (is_serialized(one) && is_serialized(two)); 222 | 223 | // calculate new information length 224 | h_one = (struct shead *) one; 225 | size_one = h_one->bytelen + (sizeof(uint32_t) * h_one->tuplelen); 226 | 227 | h_two = (struct shead *) two; 228 | size_two= h_two->bytelen + (sizeof(uint32_t) * h_two->tuplelen); 229 | 230 | // allocate space 231 | out = myalloc (sizeof(struct shead) + size_one + size_two); 232 | if (!out) 233 | return NULL; 234 | 235 | // copy information 236 | h_out = (struct shead *) out; 237 | h_out->tuplelen = h_one->tuplelen + h_two->tuplelen; 238 | h_out->bytelen = h_one->bytelen + h_two->bytelen; 239 | 240 | if (!memcpy(&out[sizeof(struct shead)], 241 | &one[sizeof(struct shead)],size_one)) 242 | goto cleanup; 243 | if (!memcpy(&out[sizeof(struct shead) + size_one], 244 | &two[sizeof(struct shead)],size_two)) 245 | goto cleanup; 246 | 247 | // destroy old information 248 | if (del){ 249 | myfree (one); 250 | myfree (two); 251 | } 252 | 253 | return out; 254 | 255 | cleanup: 256 | myfree (out); 257 | return NULL; 258 | } 259 | 260 | #ifdef __KERNEL__ 261 | EXPORT_SYMBOL(serialize); 262 | EXPORT_SYMBOL(serialize_add); 263 | EXPORT_SYMBOL(is_serialized); 264 | EXPORT_SYMBOL(serialized_data); 265 | EXPORT_SYMBOL(serialized_totallen); 266 | EXPORT_SYMBOL(serialize_merge); 267 | #endif 268 | 269 | -------------------------------------------------------------------------------- /sh/support/serialize.h: -------------------------------------------------------------------------------- 1 | 2 | // serialize.[ch] 3 | // pack/unpack a bunch of parameters 4 | // 5 | // (c) 2005, Willem de Bruijn, Vrije Universiteit Amsterdam 6 | // email at willem -_at_- computer.org 7 | // 8 | // BSD license applies 9 | 10 | #ifndef WJDB_SERIALIZE_H 11 | #define WJDB_SERIALIZE_H 12 | 13 | #ifdef __KERNEL__ 14 | #include 15 | #include 16 | #else 17 | #include 18 | #include 19 | #include 20 | #endif 21 | 22 | /** serialize a bunch of parameters 23 | * 24 | * call this function with a list of (int, char*) tuples, whereby 25 | * int contains the #bytes that should be copied starting at the pointer. 26 | * 27 | * for example: serial_string = serialize(2, sizeof(int), &my_int, 10, "0612345678"); 28 | * NB: for \0 terminated strings, don't forget to serialize strlen+1 bytes, instead of strlen 29 | * 30 | * 31 | * @param argcount contains the number of tuples 32 | * @return a newly allocated memory block containing the serialized structure 33 | * 34 | * the function takes platform specific int's as input, but generates uint32_t's 35 | * for its internal datasize headers. 36 | * */ 37 | char * serialize(int argcount, ...); 38 | 39 | /// add an element to an existing serialized string 40 | char * serialize_add(char *serialized, int dlen, const void *data); 41 | 42 | /// deserialize a string that was previously encoded with serialize(..) 43 | int deserialize(char *string, ...); 44 | 45 | /// return the number of elements that are encoded in the string 46 | static inline unsigned int serialized_tuplelen(const char *serialized) 47 | { 48 | if (serialized) 49 | return (unsigned int) ((uint32_t*) serialized)[0]; 50 | else 51 | return 0; 52 | } 53 | 54 | /// return the number of bytes are encoded (i.e., don't include metadata size in this calculation) 55 | static inline unsigned int serialized_bytelen(const char *serialized) 56 | { 57 | if (serialized) 58 | return (unsigned int) ((uint32_t*) serialized)[1]; 59 | else 60 | return 0; 61 | } 62 | 63 | /// get the length in bytes of the entire serialized package 64 | int serialized_totallen(const char *serialized); 65 | 66 | /// is this string one of our serialized strings? 67 | int is_serialized(const char *serialized); 68 | 69 | /// create a duplicate 70 | char * serialize_duplicate(char * to); 71 | 72 | /// merge two strings. the two inputs will be destroyed. 73 | /// @param del set to 1 to delete the original strings 74 | char * serialize_merge(char * one, char * two, int del); 75 | 76 | /// return a pointer into the packet string 77 | /// @param dlen may be NULL, otherwise it contains the length of the element on return 78 | /// counting of elements starts at 0 79 | #define ser_data(ser, elemno) serialized_data(ser, elemno, NULL) 80 | const char * serialized_data(const char *serialized, int elemno, int *dlen); 81 | 82 | #define serialized_foreach(serialized, i, data, len) \ 83 | for(i=0; \ 84 | i 11 | #include 12 | #else 13 | #include 14 | #include 15 | #endif 16 | 17 | #include "slist.h" 18 | 19 | #define INCFACTOR 2 /**< expansion rate */ 20 | #define STARTLEN 4 21 | 22 | int 23 | slist_add(struct slist * sl, unsigned long key, void *arg) 24 | { 25 | // realloc 26 | if (sl->used == sl->len) { 27 | struct slist_elem *bak; 28 | int bytelen; 29 | 30 | bytelen = sl->len * sizeof(struct slist_elem); 31 | if (bytelen) { 32 | bak = sl->list; 33 | sl->list = myalloc(INCFACTOR * bytelen); 34 | memcpy(sl->list, bak, bytelen); 35 | myfree(bak); 36 | sl->len *= INCFACTOR; 37 | } 38 | else { 39 | sl->len = STARTLEN; 40 | sl->list = myalloc(sl->len * sizeof(struct slist_elem)); 41 | } 42 | } 43 | 44 | // add 45 | sl->list[sl->used].key = key; 46 | sl->list[sl->used].arg = arg; 47 | sl->used++; 48 | return 0; 49 | } 50 | 51 | int 52 | slist_del(struct slist *sl, unsigned long key) 53 | { 54 | int i = __sllist_get(sl, key); 55 | 56 | if (i < 0) { 57 | sl_log(LOG_WARN, "deallocation from slist failed"); 58 | return -1; 59 | } 60 | 61 | // place last element into newly created hole 62 | if (i < sl->used - 1) { 63 | sl->list[i].key = sl->list[sl->used - 1].key; 64 | sl->list[i].arg = sl->list[sl->used - 1].arg; 65 | } 66 | sl->used--; 67 | 68 | return 0; 69 | } 70 | 71 | -------------------------------------------------------------------------------- /sh/support/slist.h: -------------------------------------------------------------------------------- 1 | /* support/slist.[ch] 2 | * A stack-based list implementation optimized for lookup 3 | * (at the cost of insertion and deletion). 4 | * 5 | * (c) 2008, Willem de Bruijn, Vrije Universiteit Amsterdam 6 | * GPLv2 applies 7 | * 8 | * */ 9 | 10 | #include "macros.h" 11 | #include "log.h" 12 | 13 | struct slist_elem { 14 | unsigned long key; 15 | void *arg; 16 | }; 17 | 18 | 19 | /* A stack-based list allocates an array of pointers 20 | * and grows as needed. 21 | * 22 | * No initialization is necessary besides setting len and used to 0. 23 | * */ 24 | struct slist { 25 | int len; 26 | int used; 27 | 28 | struct slist_elem *list; 29 | }; 30 | 31 | static inline int 32 | __sllist_get(struct slist *sl, unsigned long key) 33 | { 34 | int i; 35 | 36 | for (i = 0; i < sl->used; i++) { 37 | if (key == sl->list[i].key) 38 | return i; 39 | } 40 | 41 | return -1; 42 | } 43 | 44 | static inline void * 45 | slist_get(struct slist * sl, unsigned long key) 46 | { 47 | int i = __sllist_get(sl, key); 48 | 49 | if (likely(i >= 0)) 50 | return sl->list[i].arg; 51 | else 52 | return NULL; 53 | } 54 | 55 | int slist_add(struct slist * sl, unsigned long key, void *elem); 56 | int slist_del(struct slist *sl, unsigned long key); 57 | 58 | -------------------------------------------------------------------------------- /sh/support/stack.h: -------------------------------------------------------------------------------- 1 | // stack.h 2 | // very simple stack that used to be part of macros.h 3 | // 4 | // (c) 2008, Willem de Bruijn, Vrije Universiteit Amsterdam 5 | // email at willem -_AT_- computer.org 6 | // 7 | // BSD License applies 8 | 9 | #ifndef SL_SUPPORT_STACK_H 10 | #define SL_SUPPORT_STACK_H 11 | 12 | #include "macros.h" 13 | 14 | #define MAGICKEY (-1) 15 | 16 | // STACK 17 | // 18 | // NB: MAGICKEY is NOT an acceptable value 19 | // because stack_pop returns this on an empty stack. 20 | // 21 | // a static stack with private length. can be initialized as 22 | // full or empty 23 | #define __STACK_EX(keyword, intype, inname, inlen, infull) \ 24 | keyword int stack_##inname##_len = inlen; \ 25 | keyword int stack_##inname##_filled = infull; \ 26 | keyword intype stack_##inname##_entries[inlen]; \ 27 | \ 28 | __attribute__((unused)) \ 29 | keyword void \ 30 | stack_##inname##_clear(int clearfill) { \ 31 | bzero(stack_##inname##_entries, sizeof(intype) * stack_##inname##_len); \ 32 | if (clearfill) \ 33 | stack_##inname##_filled = 0; \ 34 | } \ 35 | keyword inline int \ 36 | stack_##inname##_push(intype elem) { \ 37 | if (likely(stack_##inname##_filled < stack_##inname##_len)) { \ 38 | stack_##inname##_entries[stack_##inname##_filled++] = elem; \ 39 | return 0; \ 40 | } \ 41 | else { \ 42 | dprintf("stack " #inname " overflow\n"); \ 43 | return -1; \ 44 | } \ 45 | } \ 46 | \ 47 | keyword intype \ 48 | stack_##inname##_pop(void) { \ 49 | if (stack_##inname##_filled) { \ 50 | return stack_##inname##_entries[--stack_##inname##_filled]; \ 51 | } \ 52 | else{ \ 53 | dprintf("stack " #inname " underflow\n"); \ 54 | return (intype) MAGICKEY; \ 55 | } \ 56 | } 57 | 58 | 59 | #define STATIC_STACK(type, name, len, full) __STACK_EX(static, type, name, len, full) 60 | #define STACK(type, name, len, full) __STACK_EX( , type, name, len, full) 61 | 62 | #define stack_clear(inname, fill) stack_##inname##_clear(fill) 63 | #define stack_empty(inname) (unlikely(stack_##inname##_filled == 0)) 64 | #define stack_push(inname, elem) stack_##inname##_push(elem) 65 | #define stack_pop(inname) stack_##inname##_pop() 66 | 67 | /// It may seem complex, with the stack_empty tests, 68 | // but that is only to avoid 'underflow' warnings. 69 | #define stack_foreach(inname, elem) \ 70 | for (elem = (stack_empty(inname) ? ((typeof(elem)) MAGICKEY) : stack_pop(inname));\ 71 | elem != ((typeof(elem)) MAGICKEY); \ 72 | elem = (stack_empty(inname) ? ((typeof(elem)) MAGICKEY) : stack_pop(inname))) 73 | 74 | #endif /* SL_SUPPORT_STACK_H */ 75 | 76 | -------------------------------------------------------------------------------- /sh/support/string.c: -------------------------------------------------------------------------------- 1 | // string.c 2 | // standard string functionality that is not always available 3 | // 4 | // (c) 2008, Willem de Bruijn, Vrije Universiteit Amsterdam 5 | // email at willem -_at_- computer.org 6 | // 7 | // LGPL license applies 8 | 9 | #ifdef __KERNEL__ 10 | #else 11 | #include 12 | #endif 13 | 14 | #include "macros.h" 15 | #include "log.h" 16 | #include "string.h" 17 | 18 | #ifdef __KERNEL__ 19 | long 20 | strtol(const char *in, char **out, int base) 21 | { 22 | long total=0; 23 | int i=0; 24 | 25 | if (out || base != 10) { 26 | sl_log(LOG_ERR, "incomplete strtol called in unsupported mode"); 27 | return 0; 28 | } 29 | 30 | while (in[i] >= '0' && in[i] <= '9') { 31 | total *= 10; 32 | total += in[i] - '0'; 33 | i++; 34 | } 35 | return total; 36 | } 37 | 38 | // yes, this is an almost exact copy of above. I should've used ## 39 | unsigned long 40 | strtoul_ex(const char *in, char **out, int base, int *err) 41 | { 42 | unsigned long total=0; 43 | int i=0; 44 | 45 | if (out || base != 10) 46 | return 0; 47 | 48 | while (in[i] >= '0' && in[i] <= '9') { 49 | total *= 10; 50 | total += in[i] - '0'; 51 | i++; 52 | } 53 | 54 | // set error if non-digit characters were encountered 55 | if (err) { 56 | if (in[i] == '\0') 57 | *err = 0; 58 | else 59 | *err = 1; 60 | } 61 | 62 | return total; 63 | } 64 | 65 | unsigned long 66 | strtoul(const char *in, char **out, int base) 67 | { 68 | return strtoul_ex(in, out, base, NULL); 69 | } 70 | 71 | char * 72 | strdup(const char *in) 73 | { 74 | char *out; 75 | int len; 76 | 77 | len = strlen(in); 78 | out = myalloc(len + 1); 79 | if (out) 80 | memcpy(out, in, len); 81 | out[len]='\0'; 82 | return out; 83 | } 84 | #endif 85 | 86 | uint32_t 87 | strtohost(const char *string, uint16_t *port) 88 | { 89 | const char * token; 90 | unsigned short ipseg[4]; 91 | 92 | token = strchr(string, ':'); 93 | if (token) 94 | sscanf(string, "%hu.%hu.%hu.%hu:%hu", 95 | &ipseg[0], &ipseg[1], &ipseg[2], &ipseg[3], port); 96 | else { 97 | sscanf(string, "%hu.%hu.%hu.%hu", 98 | &ipseg[0], &ipseg[1], &ipseg[2], &ipseg[3]); 99 | *port = 0; 100 | } 101 | 102 | return (ipseg[0] << 24) + (ipseg[1] << 16) + (ipseg[2] << 8) 103 | + ipseg[3]; 104 | } 105 | -------------------------------------------------------------------------------- /sh/support/string.h: -------------------------------------------------------------------------------- 1 | // string.h 2 | // standard string functionality that is not always available 3 | // 4 | // (c) 2007, Willem de Bruijn, Vrije Universiteit Amsterdam 5 | // email at willem -_at_- computer.org 6 | // 7 | // LGPL license applies 8 | 9 | #include "macros.h" 10 | 11 | #ifdef __KERNEL__ 12 | #include 13 | #include 14 | #include 15 | #else 16 | #include 17 | #include 18 | #endif 19 | 20 | #ifdef __KERNEL__ 21 | long strtol(const char *in, char **out, int base); 22 | unsigned long strtoul(const char *in, char **out, int base); 23 | unsigned long strtoul_ex(const char *in, char **out, int base, int *err); 24 | char * strdup(const char *in); 25 | #endif // __KERNEL__ 26 | uint32_t strtohost(const char *string, uint16_t *port); 27 | 28 | // the following *should* not be here, but strnlen is sometimes missing 29 | #ifndef strnlen 30 | #define mystrnlen(a,b) ((strlen(a) > b) ? (b) : strlen(a)) 31 | #endif 32 | 33 | -------------------------------------------------------------------------------- /sh/support/timer.c: -------------------------------------------------------------------------------- 1 | // timer.[ch] 2 | // wrapper around OS-specific alarm signals 3 | // 4 | // (c) 2005, Willem de Bruijn, Vrije Universiteit Amsterdam 5 | // email at willem -_AT_- computer.org 6 | // 7 | // LGPL License applies 8 | 9 | #ifdef __KERNEL__ 10 | #include 11 | #include 12 | #include 13 | #include 14 | #include 15 | #include 16 | #include 17 | #include 18 | #else 19 | #include 20 | #include 21 | #include 22 | #include 23 | #include 24 | #include 25 | #include 26 | #include 27 | #include 28 | #include "../wrap/file.h" 29 | #endif 30 | #include 31 | 32 | #include "../core/config.h" 33 | #include "list.h" 34 | #include "log.h" 35 | #include "macros.h" 36 | #include "timer.h" 37 | #include "lock.h" 38 | 39 | #ifdef __KERNEL__ 40 | 41 | /// the number of active tasks. 42 | // is forced to 0 by interrupt_deep to cancel all tasks 43 | int tasks_stop = 0; 44 | 45 | struct list * timers; 46 | 47 | struct task { 48 | unsigned long jiffies; 49 | unsigned long recur; 50 | void (*func)(void *); 51 | void * arg; 52 | int forced_stop; 53 | #if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,20) 54 | struct work_struct dws; 55 | #else 56 | struct delayed_work dws; 57 | #endif 58 | }; 59 | 60 | // callback: calls the function and reenables the timer 61 | #if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,20) 62 | static void task_callback(void * arg) { 63 | struct task * t = arg; 64 | #else 65 | static void task_callback(struct work_struct * ws) { 66 | struct delayed_work * dw = container_of(ws, struct delayed_work, work); 67 | struct task * t = container_of(dw, struct task, dws); 68 | #endif 69 | 70 | if (!tasks_stop && !t->forced_stop) { 71 | if (t->recur > 0) 72 | t->recur--; 73 | 74 | if (unlikely(!t->func)) 75 | dprintf("ERR at %s.%d", __FUNCTION__, __LINE__); 76 | else 77 | t->func(t->arg); 78 | 79 | if (t->recur) { 80 | schedule_delayed_work(&t->dws, t->jiffies); 81 | return; 82 | } 83 | } 84 | 85 | kfree(t); 86 | } 87 | 88 | void * task_start(void(*func)(void*), void * arg, long recur, long timeout) 89 | { 90 | struct task * t; 91 | 92 | // fill our structure 93 | t = kzalloc(sizeof(struct task), GFP_ATOMIC); 94 | if (!t) { 95 | sl_log(LOG_ERR, "out of atomic memory"); 96 | return NULL; 97 | } 98 | t->func = func; 99 | t->arg = arg; 100 | t->recur = recur; 101 | t->jiffies = (HZ * timeout) / 1000000; 102 | 103 | // initialize the waitqueue element 104 | #if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,20) 105 | INIT_WORK(&t->dws, task_callback, t); 106 | #else 107 | INIT_DELAYED_WORK(&t->dws, task_callback); 108 | #endif 109 | 110 | if (t->jiffies) 111 | schedule_delayed_work(&t->dws, t->jiffies); 112 | else 113 | #if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,20) 114 | schedule_work(&t->dws); 115 | #else 116 | schedule_work(&t->dws.work); 117 | #endif 118 | return t; 119 | } 120 | 121 | void task_stop(void *task) 122 | { 123 | struct task * t = task; 124 | 125 | t->forced_stop = 1; 126 | cancel_delayed_work(&t->dws); 127 | flush_scheduled_work(); 128 | } 129 | 130 | int task_stop_all(void) 131 | { 132 | tasks_stop = 1; 133 | flush_scheduled_work(); 134 | return 0; 135 | } 136 | 137 | int usleep_deep(long timeout) 138 | { 139 | set_current_state(TASK_INTERRUPTIBLE); 140 | schedule_timeout(usecs_to_jiffies(timeout)); 141 | return 0; 142 | } 143 | 144 | int interrupt_deep(void) 145 | { 146 | dprintf("%s called in kernel: unsupported\n", __FUNCTION__); 147 | return -1; 148 | } 149 | 150 | EXPORT_SYMBOL(task_start); 151 | EXPORT_SYMBOL(task_stop); 152 | EXPORT_SYMBOL(task_stop_all); 153 | 154 | EXPORT_SYMBOL(interrupt_deep); 155 | EXPORT_SYMBOL(usleep_deep); 156 | 157 | #else /* !__KERNEL__ */ 158 | 159 | struct task_args { 160 | void (*func)(void*); 161 | pthread_t thread; 162 | void * arg; 163 | long timeout; 164 | long recur; 165 | }; 166 | 167 | // TODO: use atomic types 168 | static int tasks_stop; 169 | static int tasks_active; 170 | 171 | static void * 172 | delayed_func(void *thread_arg) 173 | { 174 | struct task_args *ta = thread_arg; 175 | 176 | while (ta->recur > 0 || ta->recur == -1) { 177 | if (usleep_deep(ta->timeout) < 0) { 178 | ta->recur = 0; 179 | break; 180 | } 181 | 182 | if (tasks_stop) 183 | break; 184 | 185 | ta->func(ta->arg); 186 | 187 | if (ta->recur > 0) 188 | ta->recur--; 189 | } 190 | 191 | // task_stop_all does not call pthread_join 192 | // and noone is waiting if recurrence ended 193 | if (tasks_stop || ta->recur == 0) { 194 | pthread_detach(ta->thread); 195 | myfree(ta); 196 | tasks_active--; 197 | } 198 | 199 | return NULL; 200 | } 201 | 202 | void * task_start(void(*func)(void*), void * arg, long recur, long timeout) 203 | { 204 | struct task_args *ta; 205 | 206 | if (tasks_stop) 207 | return NULL; 208 | 209 | ta = mycalloc(1, sizeof(struct task_args)); 210 | ta->func = func; 211 | ta->arg = arg; 212 | ta->timeout = timeout; 213 | ta->recur = recur; 214 | tasks_active++; 215 | 216 | pthread_create(&ta->thread, NULL, delayed_func, ta); 217 | return ta; 218 | } 219 | 220 | // don't allow the purging of all tasks interfere with a single task 221 | // that is to be closed 222 | slmutex_static(task_mutex); 223 | 224 | void task_stop(void *task) 225 | { 226 | struct task_args *ta = task; 227 | 228 | slmutex_lock(&task_mutex); 229 | if (ta) { 230 | if (!tasks_active) 231 | sl_log(LOG_BUG, "waiting for nonexistent task"); 232 | ta->recur = -2; // stop, signal that we will wait for the result 233 | interrupt_deep(); 234 | pthread_join(ta->thread, NULL); 235 | myfree(ta); 236 | tasks_active--; 237 | } 238 | slmutex_unlock(&task_mutex); 239 | } 240 | 241 | /// May only be called from process context, because it may sleep. 242 | int task_stop_all(void) 243 | { 244 | slmutex_lock(&task_mutex); 245 | if (tasks_active) { 246 | tasks_stop = 1; 247 | if (interrupt_deep()) { 248 | sl_log(LOG_ERR, "Failed to interrupt"); 249 | tasks_stop = 0; 250 | tasks_active = 0; // try to set to a 'stable' state 251 | return -1; 252 | } 253 | while (tasks_active) { 254 | sl_log(LOG_MSG, "Waiting for %d tasks to finish\n", tasks_active); 255 | sleep(1); 256 | tasks_stop = 0; 257 | } 258 | } 259 | slmutex_unlock(&task_mutex); 260 | 261 | return 0; 262 | } 263 | 264 | // HACKHACKHACK replace with nice open on load + close on unload 265 | static int shallowfd = -1; 266 | static int deepfd = -1; 267 | 268 | static int 269 | __usleep_sl(int *fd, const char *name, long timeout) 270 | { 271 | if (unlikely((*fd) == -1)) { 272 | (*fd) = __orig_open(name, O_WRONLY); 273 | if ((*fd) < 0) { 274 | sl_log(LOG_ERR, "open timer failure"); 275 | return -1; 276 | } 277 | } 278 | return __orig_write((*fd), &timeout, sizeof(long)); 279 | } 280 | 281 | // pause the thread for the given number of microseconds 282 | // 283 | // We try to avoid having to use POSIX signals. If kernelspace Streamline 284 | // exists, we use the sysfs timer file, otherwise we rever to SIGALRM 285 | // 286 | // returns 0 on success. timeout left if > 0, signal arrived if < 0 287 | int usleep_deep(long timeout) 288 | { 289 | return __usleep_sl(&deepfd, SYSFS_TIMER_DEEP, timeout); 290 | } 291 | 292 | // pause the thread for the given number of microseconds 293 | // or until a streamline signal arrives 294 | // 295 | // returns 0 on success. timeout left if > 0, signal arrived if < 0 296 | int usleep_shallow(long timeout) 297 | { 298 | return __usleep_sl(&shallowfd, SYSFS_TIMER_SHALLOW, timeout); 299 | } 300 | 301 | int interrupt_deep(void) 302 | { 303 | int fd; 304 | char useless = 0; 305 | 306 | fd = __orig_open(SYSFS_TIMER_INTERRUPT, O_WRONLY); 307 | if (unlikely(fd < 0)) { 308 | sl_log(LOG_LOW, "failed to call deep interrupt. POSIX timers?"); 309 | return -1; 310 | } 311 | 312 | if (unlikely(__orig_write(fd, &useless, 1)) < 0) 313 | return -1; 314 | 315 | if (unlikely(__orig_close(fd))) 316 | return -1; 317 | 318 | return 0; 319 | } 320 | 321 | #endif /* !__KERNEL__ */ 322 | 323 | -------------------------------------------------------------------------------- /sh/support/timer.h: -------------------------------------------------------------------------------- 1 | // timer.[ch] 2 | // wrapper around OS-specific alarm signals 3 | // 4 | // (c) 2005, Willem de Bruijn, Vrije Universiteit Amsterdam 5 | // email at willem -_AT_- computer.org 6 | // 7 | // LGPL License applies 8 | 9 | #ifndef SL_SUPPORT_TIMER 10 | #define SL_SUPPORT_TIMER 11 | 12 | #include 13 | 14 | /** execute a task in the background 15 | * 16 | * @param recur sets how often the task should be executed, 17 | * -1 for indefinite or until task_stop is called. 18 | * 19 | * @return an opaque pointer to pass to task_stop */ 20 | void * task_start(void(*func)(void*), void * arg, long recur, long timeout); 21 | void task_stop(void *task); 22 | 23 | /** Cancel all outstanding tasks. 24 | * Some tasks may still fire, but all are stopped 25 | * when this function returns. 26 | * 27 | * return 0 on success, failure otherwise */ 28 | int task_stop_all(void); 29 | 30 | 31 | #endif 32 | 33 | -------------------------------------------------------------------------------- /sh/support/timestamp.h: -------------------------------------------------------------------------------- 1 | // timestamp.h 2 | // location independent timestamping 3 | // 4 | // (c) 2005, willem de bruijn, vrije universiteit amsterdam 5 | // email at willem -_at_- computer.org 6 | // 7 | // BSD license applies 8 | 9 | 10 | #ifdef __KERNEL__ 11 | #include 12 | #include 13 | #if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,18) 14 | #include // TSC available? 15 | #endif 16 | #include 17 | #include // platform independent backup 18 | #ifdef CONFIG_X86_TSC 19 | #include // cpufreq. a lousy method 20 | #include // 64bit cycle-accurate counter 21 | #include 22 | #endif 23 | #else 24 | #include 25 | #include 26 | #include 27 | #endif 28 | 29 | #ifdef CONFIG_ARM 30 | /// hack TODO: fix 31 | #define cpu_khz 600000 32 | #endif 33 | 34 | // timestamp_get 35 | #if (defined __KERNEL__ && defined CONFIG_X86_TSC) || !defined NO_X86 36 | typedef uint64_t tstamp_t; 37 | static inline uint64_t timestamp_get(void) { 38 | register uint32_t count_low, count_high; 39 | asm("rdtsc" :"=a" (count_low), "=d" (count_high)); 40 | return ( ((uint64_t) count_high) << 32) + count_low; 41 | } 42 | #else 43 | #ifdef __KERNEL__ 44 | typedef cycles_t tstamp_t; 45 | #define timestamp_get get_cycles 46 | #else 47 | typedef clock_t tstamp_t; 48 | #define timestamp_get clock 49 | #endif 50 | #endif 51 | 52 | // timestamp_to 53 | #ifdef __KERNEL__ 54 | static inline tstamp_t timestamp_to(int sec, int usec) 55 | { 56 | return (cpu_khz * usec) + (cpu_khz * 1000 * sec); 57 | } 58 | #else 59 | static inline tstamp_t timestamp_to(int sec, int usec) 60 | { 61 | return (CLOCKS_PER_SEC * sec) + ((CLOCKS_PER_SEC/1000000) * usec); 62 | } 63 | #endif 64 | 65 | 66 | -------------------------------------------------------------------------------- /sh/support/ue_space/ixa_sdk/README: -------------------------------------------------------------------------------- 1 | README for FFPF 1.5.0 2 | by Mihai Cristea, february 27th, 2006. 3 | contact us at ffpf-devel_-AT-_lists.sourceforge.net 4 | 5 | ## 1. Building Intel me_tools for linux 6 | 7 | The linux port of me_tools depends on Intel proprietary files. 8 | We are not allowed to supply these, but they can be found in the Intel IXP SDK 4.x 9 | (CD1_IXASDK_4.1.zip/ixa_sdk_4.1.tgz/me_tools). However, the current sources 10 | support Montavista linux only, you can compile them for a common linux kernel 11 | by applying the included patches: ixa_sdk_4.1_LinuxPatched.tgz). 12 | 13 | ## 2 Prerequisites 14 | 15 | a) Crosscompile toolchain used: 16 | http://ixp2xxx.sourceforge.net/toolchain/armeb-unknown-linux-gnu-gcc-3.3.3-glibc-2.3.2.tar.bz2; 17 | The path to crosstools is set up in the main FFPF Makefile like: 18 | CC_CROSS = /opt/crosstool/armeb-unknown-linux-gnu/gcc-3.3.3-glibc-2.3.2/bin/gcc 19 | 20 | b) Linux sources: 21 | Kernel version 2.6.15 has support for IXP2xxx. However, for older versions and for specific hw platform 22 | (e.g., Radisys-2611, or Intel IXDP2850, there are kernel patches at http://ixp2xxx.sourceforge.net/kernel). 23 | The me_tools path to the linux sources (default is /usr/src/linux_arm) is written in: me_tools/XSC_CoreLibs/Makefile 24 | 25 | ## 3 Using of me_tools: 26 | 27 | The main object code used is the cross-compiled kernel module: 'halMev2.ko'. It's loading is needed before 28 | using of our FFPF's ue_manager (insmod halMev2.ko). This module offers support for uengine code object (.uof files) loading, 29 | uengine start/stop, reset, etc. 30 | Moreover, halMev2 is also useful in case of remote hardware debugging by DevWorkbench running on a host Windows machine. 31 | 32 | -------------------------------------------------------------------------------- /sh/support/ue_space/ixa_sdk/ixa_sdk_4.1_LinuxPatched.tgz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/iassael/cuda-aho-corasick-wu-manber/e2f8b551a1c0b979884963dcef28f363b865154f/sh/support/ue_space/ixa_sdk/ixa_sdk_4.1_LinuxPatched.tgz -------------------------------------------------------------------------------- /sh/support/ue_space/ixa_sdk/ixa_sdk_4.2_LinuxPatched.tgz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/iassael/cuda-aho-corasick-wu-manber/e2f8b551a1c0b979884963dcef28f363b865154f/sh/support/ue_space/ixa_sdk/ixa_sdk_4.2_LinuxPatched.tgz -------------------------------------------------------------------------------- /sh/support/ue_space/uengine/Makefile: -------------------------------------------------------------------------------- 1 | all: ixp2000-ue-disas \ 2 | ixp2000-lib-userspace.o \ 3 | ixp2000-uengine.o \ 4 | ixp2400-msf.o 5 | 6 | clean: 7 | rm -f ixp2000-ue-disas *.o 8 | 9 | ixp2000-lib-userspace.o: ixp2000-lib-userspace.c 10 | $(CC_CROSS) $(CFLAGS_ARM) -c -o ixp2000-lib-userspace.o ixp2000-lib-userspace.c 11 | 12 | ixp2000-uengine.o: ixp2000-uengine.c ixp2000-uengine.h 13 | $(CC_CROSS) $(CFLAGS_ARM) -c -o ixp2000-uengine.o ixp2000-uengine.c 14 | 15 | ixp2000-ue-disas: ixp2000-ue-disas.c ixp2000-ue-disas.h 16 | $(CC_CROSS) $(CFLAGS_ARM) -DTEST -o ixp2000-ue-disas ixp2000-ue-disas.c 17 | 18 | ixp2400-msf.o: ixp2400-msf.c ixp2400-msf.h 19 | $(CC_CROSS) $(CFLAGS_ARM) -c -o ixp2400-msf.o ixp2400-msf.c 20 | -------------------------------------------------------------------------------- /sh/support/ue_space/uengine/compat.h: -------------------------------------------------------------------------------- 1 | #ifndef __COMPAT_H 2 | #define __COMPAT_H 3 | 4 | #ifndef __KERNEL__ 5 | #include 6 | #include 7 | 8 | typedef u_int8_t u8; 9 | typedef u_int32_t u32; 10 | typedef u_int64_t u64; 11 | 12 | extern void *IXP2000_GLOBAL_REG_VIRT_BASE; 13 | extern void *IXP2000_MSF_VIRT_BASE; 14 | extern void *IXP2000_RBUF_TBUF_VIRT_BASE; 15 | extern void *IXP2000_UENGINE_CSR_VIRT_BASE; 16 | extern void *IXP2000_INT_CONTROLLER_VIRT_BASE; 17 | extern u32 ixp2000_uengine_mask; 18 | 19 | #define IXP2000_PRODUCT_ID ((volatile u32 *)(IXP2000_GLOBAL_REG_VIRT_BASE + 0x0a00)) 20 | #define IXP2000_MISC_CONTROL ((volatile u32 *)(IXP2000_GLOBAL_REG_VIRT_BASE + 0x0a04)) 21 | #define IXP2000_MSF_CLK_CNTRL ((volatile u32 *)(IXP2000_GLOBAL_REG_VIRT_BASE + 0x0a08)) 22 | #define IXP2000_RESET0 ((volatile u32 *)(IXP2000_GLOBAL_REG_VIRT_BASE + 0x0a0c)) 23 | #define IXP2000_RESET1 ((volatile u32 *)(IXP2000_GLOBAL_REG_VIRT_BASE + 0x0a10)) 24 | #define IXP2000_CLOCK_CONTROL ((volatile u32 *)(IXP2000_GLOBAL_REG_VIRT_BASE + 0x0a14)) 25 | #define IXP2000_STRAP_OPTIONS ((volatile u32 *)(IXP2000_GLOBAL_REG_VIRT_BASE + 0x0a18)) 26 | 27 | void *ioremap_nocache(unsigned long phys, unsigned long size); 28 | void iounmap_length(volatile void *virt, unsigned long size); 29 | void udelay(unsigned long usecs); 30 | 31 | static inline u32 hweight32(u32 w) 32 | { 33 | u32 res; 34 | 35 | res = (w & 0x55555555) + ((w >> 1) & 0x55555555); 36 | res = (res & 0x33333333) + ((res >> 2) & 0x33333333); 37 | res = (res & 0x0F0F0F0F) + ((res >> 4) & 0x0F0F0F0F); 38 | res = (res & 0x00FF00FF) + ((res >> 8) & 0x00FF00FF); 39 | res = (res & 0x0000FFFF) + ((res >> 16) & 0x0000FFFF); 40 | 41 | return res; 42 | } 43 | 44 | static inline unsigned int ixdp2x00_master_npu(void) 45 | { 46 | return !!(*IXP2000_STRAP_OPTIONS & 4); 47 | } 48 | #else 49 | #include 50 | #include 51 | #endif 52 | 53 | 54 | #endif 55 | -------------------------------------------------------------------------------- /sh/support/ue_space/uengine/ixp2000-lib-userspace.c: -------------------------------------------------------------------------------- 1 | /* 2 | * Generic library functions for the MSF (Media and Switch Fabric 3 | * unit) and microengines found on the Intel IXP2000 series of network 4 | * processors. 5 | * 6 | * Stub functions to make it work from userspace. 7 | * 8 | * Copyright (C) 2004, 2005 Lennert Buytenhek 9 | * Dedicated to Marija Kulikova. 10 | * 11 | * This program is free software; you can redistribute it and/or modify 12 | * it under the terms of the GNU Lesser General Public License as 13 | * published by the Free Software Foundation; either version 2.1 of the 14 | * License, or (at your option) any later version. 15 | */ 16 | 17 | #include 18 | #include 19 | #include 20 | #include 21 | #include 22 | #include 23 | #include 24 | #include "compat.h" 25 | 26 | #define dprintf(...) 27 | 28 | static int dev_mem_fd; 29 | void *IXP2000_GLOBAL_REG_VIRT_BASE; 30 | void *IXP2000_MSF_VIRT_BASE; 31 | void *IXP2000_RBUF_TBUF_VIRT_BASE; 32 | void *IXP2000_UENGINE_CSR_VIRT_BASE; 33 | void *IXP2000_INT_CONTROLLER_VIRT_BASE; 34 | u32 ixp2000_uengine_mask; 35 | 36 | static void ixp2000_map(void) __attribute__((constructor)); 37 | static void ixp2000_map(void) 38 | { 39 | u32 product_id; 40 | 41 | dev_mem_fd = open("/dev/mem", O_RDWR | O_SYNC); 42 | if (dev_mem_fd < 0) { 43 | perror("open(\"/dev/mem\")"); 44 | exit(-1); 45 | } 46 | 47 | IXP2000_GLOBAL_REG_VIRT_BASE = ioremap_nocache(0xc0004000, 4096); 48 | IXP2000_MSF_VIRT_BASE = ioremap_nocache(0xc8000000, 8192); 49 | IXP2000_RBUF_TBUF_VIRT_BASE = ioremap_nocache(0xc8002000, 8192); 50 | IXP2000_UENGINE_CSR_VIRT_BASE = ioremap_nocache(0xc0018000, 32768); 51 | IXP2000_INT_CONTROLLER_VIRT_BASE = ioremap_nocache(0xd6000000, 4096); 52 | 53 | // @@@ we should check that we're really on an ixp2000 54 | product_id = *IXP2000_PRODUCT_ID; 55 | 56 | switch ((product_id >> 8) & 0x1fff) { 57 | case 0: 58 | dprintf("detected IXP2800 rev %c%x\n", 59 | 'A' + ((product_id >> 4) & 0xf), product_id & 0xf); 60 | ixp2000_uengine_mask = 0x00ff00ff; 61 | break; 62 | 63 | case 1: 64 | dprintf("detected IXP2850 rev %c%x\n", 65 | 'A' + ((product_id >> 4) & 0xf), product_id & 0xf); 66 | ixp2000_uengine_mask = 0x00ff00ff; 67 | break; 68 | 69 | case 2: 70 | dprintf("detected IXP2400 rev %c%x\n", 71 | 'A' + ((product_id >> 4) & 0xf), product_id & 0xf); 72 | ixp2000_uengine_mask = 0x000f000f; 73 | break; 74 | 75 | default: 76 | fprintf(stderr, "unknown ixp2000 model (%.8x)\n", product_id); 77 | ixp2000_uengine_mask = 0; 78 | break; 79 | } 80 | } 81 | 82 | static void ixp2000_unmap(void) __attribute__((destructor)); 83 | static void ixp2000_unmap(void) 84 | { 85 | if (dev_mem_fd >= 0) { 86 | iounmap_length(IXP2000_GLOBAL_REG_VIRT_BASE, 4096); 87 | iounmap_length(IXP2000_MSF_VIRT_BASE, 8192); 88 | iounmap_length(IXP2000_RBUF_TBUF_VIRT_BASE, 8192); 89 | iounmap_length(IXP2000_UENGINE_CSR_VIRT_BASE, 32768); 90 | iounmap_length(IXP2000_INT_CONTROLLER_VIRT_BASE, 4096); 91 | close(dev_mem_fd); 92 | } 93 | } 94 | 95 | void *ioremap_nocache(unsigned long phys, unsigned long size) 96 | { 97 | void *x; 98 | 99 | x = mmap(0, size, PROT_READ | PROT_WRITE, MAP_SHARED, dev_mem_fd, phys); 100 | if (x == MAP_FAILED) { 101 | perror("mmap"); 102 | exit(-1); 103 | } 104 | 105 | return x; 106 | } 107 | 108 | void iounmap_length(volatile void *virt, unsigned long size) 109 | { 110 | munmap((void *)virt, size); 111 | } 112 | 113 | void udelay(unsigned long usecs) 114 | { 115 | usleep(usecs); 116 | } 117 | -------------------------------------------------------------------------------- /sh/support/ue_space/uengine/ixp2000-msf.h: -------------------------------------------------------------------------------- 1 | /* 2 | * Register definitions for the MSF (Media and Switch Fabric) unit 3 | * found on the Intel IXP2000 series of network processors. 4 | * 5 | * Copyright (C) 2005 Lennert Buytenhek 6 | * Dedicated to Marija Kulikova. 7 | * 8 | * This program is free software; you can redistribute it and/or modify 9 | * it under the terms of the GNU Lesser General Public License as 10 | * published by the Free Software Foundation; either version 2.1 of the 11 | * License, or (at your option) any later version. 12 | */ 13 | 14 | #ifndef __IXP2000_MSF_H 15 | #define __IXP2000_MSF_H 16 | 17 | #include "compat.h" 18 | 19 | #define IXP2000_MSF_REG(x) ((volatile u32 *)(IXP2000_MSF_VIRT_BASE + (x))) 20 | #define IXP2000_MSF_RX_CONTROL IXP2000_MSF_REG(0x0000) 21 | #define IXP2000_MSF_TX_CONTROL IXP2000_MSF_REG(0x0004) 22 | #define IXP2000_MSF_INTERRUPT_STATUS IXP2000_MSF_REG(0x0008) 23 | #define IXP2000_MSF_INTERRUPT_ENABLE IXP2000_MSF_REG(0x000c) 24 | #define IXP2000_MSF_CSIX_TYPE_MAP IXP2000_MSF_REG(0x0010) 25 | #define IXP2000_MSF_FC_EGRESS_STATUS IXP2000_MSF_REG(0x0014) 26 | #define IXP2000_MSF_FC_INGRESS_STATUS IXP2000_MSF_REG(0x0018) 27 | #define IXP2000_MSF_HWM_CONTROL IXP2000_MSF_REG(0x0024) 28 | #define IXP2000_MSF_FC_STATUS_OVERRIDE IXP2000_MSF_REG(0x0028) 29 | #define IXP2000_MSF_CLOCK_CONTROL IXP2000_MSF_REG(0x002c) 30 | #define IXP2000_MSF_RX_PORT_MAP IXP2000_MSF_REG(0x0040) 31 | #define IXP2000_MSF_RBUF_ELEMENT_DONE IXP2000_MSF_REG(0x0044) 32 | #define IXP2000_MSF_RX_MPHY_POLL_LIMIT IXP2000_MSF_REG(0x0048) 33 | #define IXP2000_MSF_RX_CALENDAR_LENGTH IXP2000_MSF_REG(0x0048) 34 | #define IXP2000_MSF_RX_THREAD_FREELIST_TIMEOUT_0 IXP2000_MSF_REG(0x0050) 35 | #define IXP2000_MSF_RX_THREAD_FREELIST_TIMEOUT_1 IXP2000_MSF_REG(0x0054) 36 | #define IXP2000_MSF_RX_THREAD_FREELIST_TIMEOUT_2 IXP2000_MSF_REG(0x0058) 37 | #define IXP2000_MSF_TX_SEQUENCE_0 IXP2000_MSF_REG(0x0060) 38 | #define IXP2000_MSF_TX_SEQUENCE_1 IXP2000_MSF_REG(0x0064) 39 | #define IXP2000_MSF_TX_SEQUENCE_2 IXP2000_MSF_REG(0x0068) 40 | #define IXP2000_MSF_TX_MPHY_POLL_LIMIT IXP2000_MSF_REG(0x0070) 41 | #define IXP2000_MSF_TX_CALENDAR_LENGTH IXP2000_MSF_REG(0x0070) 42 | #define IXP2000_MSF_RX_UP_CONTROL_0 IXP2000_MSF_REG(0x0080) 43 | #define IXP2000_MSF_RX_UP_CONTROL_1 IXP2000_MSF_REG(0x0084) 44 | #define IXP2000_MSF_RX_UP_CONTROL_2 IXP2000_MSF_REG(0x0088) 45 | #define IXP2000_MSF_RX_UP_CONTROL_3 IXP2000_MSF_REG(0x008c) 46 | #define IXP2000_MSF_TX_UP_CONTROL_0 IXP2000_MSF_REG(0x0090) 47 | #define IXP2000_MSF_TX_UP_CONTROL_1 IXP2000_MSF_REG(0x0094) 48 | #define IXP2000_MSF_TX_UP_CONTROL_2 IXP2000_MSF_REG(0x0098) 49 | #define IXP2000_MSF_TX_UP_CONTROL_3 IXP2000_MSF_REG(0x009c) 50 | #define IXP2000_MSF_TRAIN_DATA IXP2000_MSF_REG(0x00a0) 51 | #define IXP2000_MSF_TRAIN_CALENDAR IXP2000_MSF_REG(0x00a4) 52 | #define IXP2000_MSF_TRAIN_FLOW_CONTROL IXP2000_MSF_REG(0x00a8) 53 | #define IXP2000_MSF_TX_CALENDAR_0 IXP2000_MSF_REG(0x1000) 54 | #define IXP2000_MSF_RX_PORT_CALENDAR_STATUS IXP2000_MSF_REG(0x1400) 55 | 56 | 57 | #endif 58 | -------------------------------------------------------------------------------- /sh/support/ue_space/uengine/ixp2000-ue-disas.h: -------------------------------------------------------------------------------- 1 | /* 2 | * Disassembler for the IXP2000 microengine (MEv2) instruction format. 3 | * 4 | * Copyright (C) 2005 Lennert Buytenhek 5 | * Dedicated to Marija Kulikova. 6 | * 7 | * This program is free software; you can redistribute it and/or modify 8 | * it under the terms of the GNU Lesser General Public License as 9 | * published by the Free Software Foundation; either version 2.1 of the 10 | * License, or (at your option) any later version. 11 | */ 12 | 13 | #ifndef __IXP2000_UE_DISAS_H 14 | #define __IXP2000_UE_DISAS_H 15 | 16 | #define CONTEXTS_4 4 17 | #define CONTEXTS_8 8 18 | 19 | char *ixp2000_ue_disassemble(u_int64_t insn, int contexts_mode); 20 | 21 | 22 | #endif 23 | -------------------------------------------------------------------------------- /sh/support/ue_space/uengine/ixp2000-uengine.h: -------------------------------------------------------------------------------- 1 | /* 2 | * Generic library functions for the microengines found on the Intel 3 | * IXP2000 series of network processors. 4 | * 5 | * Copyright (C) 2004, 2005 Lennert Buytenhek 6 | * Dedicated to Marija Kulikova. 7 | * 8 | * This program is free software; you can redistribute it and/or modify 9 | * it under the terms of the GNU Lesser General Public License as 10 | * published by the Free Software Foundation; either version 2.1 of the 11 | * License, or (at your option) any later version. 12 | */ 13 | 14 | #ifndef __IXP2000_UENGINE_H 15 | #define __IXP2000_UENGINE_H 16 | 17 | #include "compat.h" 18 | 19 | struct ixp2000_uengine_code 20 | { 21 | u32 cpu_model_bitmask; 22 | u8 cpu_min_revision; 23 | u8 cpu_max_revision; 24 | 25 | u32 uengine_parameters; 26 | 27 | struct ixp2000_reg_value { 28 | int reg; 29 | u32 value; 30 | } *initial_reg_values; 31 | 32 | int num_insns; 33 | u8 *insns; 34 | }; 35 | 36 | u32 ixp2000_uengine_csr_read(int uengine, int offset); 37 | void ixp2000_uengine_csr_write(int uengine, int offset, u32 value); 38 | void ixp2000_uengine_reset(u32 uengine_mask); 39 | void ixp2000_uengine_init_timestamp_counters(void); 40 | void ixp2000_uengine_set_mode(int uengine, u32 mode); 41 | void ixp2000_uengine_load_microcode(int uengine, u8 *ucode, int insns); 42 | void ixp2000_uengine_init_context(int uengine, int context, int pc); 43 | void ixp2000_uengine_start_contexts(int uengine, u8 ctx_mask); 44 | void ixp2000_uengine_stop_contexts(int uengine, u8 ctx_mask); 45 | int ixp2000_uengine_load(int uengine, struct ixp2000_uengine_code *c); 46 | 47 | #define IXP2000_UENGINE_8_CONTEXTS 0x00000000 48 | #define IXP2000_UENGINE_4_CONTEXTS 0x80000000 49 | #define IXP2000_UENGINE_PRN_UPDATE_EVERY 0x40000000 50 | #define IXP2000_UENGINE_PRN_UPDATE_ON_ACCESS 0x00000000 51 | #define IXP2000_UENGINE_NN_FROM_SELF 0x00100000 52 | #define IXP2000_UENGINE_NN_FROM_PREVIOUS 0x00000000 53 | #define IXP2000_UENGINE_ASSERT_EMPTY_AT_3 0x000c0000 54 | #define IXP2000_UENGINE_ASSERT_EMPTY_AT_2 0x00080000 55 | #define IXP2000_UENGINE_ASSERT_EMPTY_AT_1 0x00040000 56 | #define IXP2000_UENGINE_ASSERT_EMPTY_AT_0 0x00000000 57 | #define IXP2000_UENGINE_LM_ADDR1_GLOBAL 0x00020000 58 | #define IXP2000_UENGINE_LM_ADDR1_PER_CONTEXT 0x00000000 59 | #define IXP2000_UENGINE_LM_ADDR0_GLOBAL 0x00010000 60 | #define IXP2000_UENGINE_LM_ADDR0_PER_CONTEXT 0x00000000 61 | 62 | 63 | #endif 64 | -------------------------------------------------------------------------------- /sh/support/ue_space/uengine/ixp2400-msf.c: -------------------------------------------------------------------------------- 1 | /* 2 | * Generic library functions for the MSF (Media and Switch Fabric) unit 3 | * found on the Intel IXP2400 network processor. 4 | * 5 | * Copyright (C) 2004, 2005 Lennert Buytenhek 6 | * Dedicated to Marija Kulikova. 7 | * 8 | * This program is free software; you can redistribute it and/or modify 9 | * it under the terms of the GNU Lesser General Public License as 10 | * published by the Free Software Foundation; either version 2.1 of the 11 | * License, or (at your option) any later version. 12 | */ 13 | 14 | #include "ixp2000-msf.h" 15 | #include "ixp2400-msf.h" 16 | 17 | /* 18 | * This is the Intel recommended PLL init procedure as described on 19 | * page 340 of the IXP2400/IXP2800 Programmer's Reference Manual. 20 | */ 21 | static void ixp2400_pll_init(struct ixp2400_msf_parameters *mp) 22 | { 23 | int rx_dual_clock; 24 | int tx_dual_clock; 25 | u32 value; 26 | 27 | /* 28 | * If the RX mode is not 1x32, we have to enable both RX PLLs 29 | * (#0 and #1.) The same thing for the TX direction. 30 | */ 31 | rx_dual_clock = !!(mp->rx_mode & IXP2400_RX_MODE_WIDTH_MASK); 32 | tx_dual_clock = !!(mp->tx_mode & IXP2400_TX_MODE_WIDTH_MASK); 33 | 34 | /* 35 | * Read initial value. 36 | */ 37 | value = *IXP2000_MSF_CLK_CNTRL; 38 | 39 | /* 40 | * Put PLLs in powerdown and bypass mode. 41 | */ 42 | value |= 0x0000f0f0; 43 | *IXP2000_MSF_CLK_CNTRL = value; 44 | 45 | /* 46 | * Set single or dual clock mode bits. 47 | */ 48 | value &= ~0x03000000; 49 | value |= (rx_dual_clock << 24) | (tx_dual_clock << 25); 50 | 51 | /* 52 | * Set multipliers. 53 | */ 54 | value &= ~0x00ff0000; 55 | value |= mp->rxclk01_multiplier << 16; 56 | value |= mp->rxclk23_multiplier << 18; 57 | value |= mp->txclk01_multiplier << 20; 58 | value |= mp->txclk23_multiplier << 22; 59 | 60 | /* 61 | * And write value. 62 | */ 63 | *IXP2000_MSF_CLK_CNTRL = value; 64 | 65 | /* 66 | * Disable PLL bypass mode. 67 | */ 68 | value &= ~(0x00005000 | rx_dual_clock << 13 | tx_dual_clock << 15); 69 | *IXP2000_MSF_CLK_CNTRL = value; 70 | 71 | /* 72 | * Turn on PLLs. 73 | */ 74 | value &= ~(0x00000050 | rx_dual_clock << 5 | tx_dual_clock << 7); 75 | *IXP2000_MSF_CLK_CNTRL = value; 76 | 77 | /* 78 | * Wait for PLLs to lock. There are lock status bits, but IXP2400 79 | * erratum #65 says that these lock bits should not be relied upon 80 | * as they might not accurately reflect the true state of the PLLs. 81 | */ 82 | udelay(100); 83 | } 84 | 85 | /* 86 | * Needed according to p480 of Programmer's Reference Manual. 87 | */ 88 | static void ixp2400_msf_free_rbuf_entries(struct ixp2400_msf_parameters *mp) 89 | { 90 | int size_bits; 91 | int i; 92 | 93 | /* 94 | * Work around IXP2400 erratum #69 (silent RBUF-to-DRAM transfer 95 | * corruption) in the Intel-recommended way: do not add the RBUF 96 | * elements susceptible to corruption to the freelist. 97 | */ 98 | size_bits = mp->rx_mode & IXP2400_RX_MODE_RBUF_SIZE_MASK; 99 | if (size_bits == IXP2400_RX_MODE_RBUF_SIZE_64) { 100 | for (i=1;i<128;i++) { 101 | if (i == 9 || i == 18 || i == 27) 102 | continue; 103 | *IXP2000_MSF_RBUF_ELEMENT_DONE = i; 104 | } 105 | } else if (size_bits == IXP2400_RX_MODE_RBUF_SIZE_128) { 106 | for (i=1;i<64;i++) { 107 | if (i == 4 || i == 9 || i == 13) 108 | continue; 109 | *IXP2000_MSF_RBUF_ELEMENT_DONE = i; 110 | } 111 | } else if (size_bits == IXP2400_RX_MODE_RBUF_SIZE_256) { 112 | for (i=1;i<32;i++) { 113 | if (i == 2 || i == 4 || i == 6) 114 | continue; 115 | *IXP2000_MSF_RBUF_ELEMENT_DONE = i; 116 | } 117 | } 118 | } 119 | 120 | static u32 ixp2400_msf_valid_channels(u32 reg) 121 | { 122 | u32 channels; 123 | 124 | channels = 0; 125 | switch (reg & IXP2400_RX_MODE_WIDTH_MASK) { 126 | case IXP2400_RX_MODE_1x32: 127 | channels = 0x1; 128 | if (reg & IXP2400_RX_MODE_MPHY && 129 | !(reg & IXP2400_RX_MODE_MPHY_32)) 130 | channels = 0xf; 131 | break; 132 | 133 | case IXP2400_RX_MODE_2x16: 134 | channels = 0x5; 135 | break; 136 | 137 | case IXP2400_RX_MODE_4x8: 138 | channels = 0xf; 139 | break; 140 | 141 | case IXP2400_RX_MODE_1x16_2x8: 142 | channels = 0xd; 143 | break; 144 | } 145 | 146 | return channels; 147 | } 148 | 149 | static void ixp2400_msf_enable_rx(struct ixp2400_msf_parameters *mp) 150 | { 151 | u32 value; 152 | 153 | value = *IXP2000_MSF_RX_CONTROL & 0x0fffffff; 154 | value |= ixp2400_msf_valid_channels(mp->rx_mode) << 28; 155 | *IXP2000_MSF_RX_CONTROL = value; 156 | } 157 | 158 | static void ixp2400_msf_enable_tx(struct ixp2400_msf_parameters *mp) 159 | { 160 | u32 value; 161 | 162 | value = *IXP2000_MSF_TX_CONTROL & 0x0fffffff; 163 | value |= ixp2400_msf_valid_channels(mp->tx_mode) << 28; 164 | *IXP2000_MSF_TX_CONTROL = value; 165 | } 166 | 167 | 168 | void ixp2400_msf_init(struct ixp2400_msf_parameters *mp) 169 | { 170 | u32 value; 171 | int i; 172 | 173 | /* 174 | * Init the RX/TX PLLs based on the passed parameter block. 175 | */ 176 | ixp2400_pll_init(mp); 177 | 178 | /* 179 | * Reset MSF. Bit 7 in IXP_RESET_0 resets the MSF. 180 | */ 181 | value = *IXP2000_RESET0; 182 | *IXP2000_RESET0 = value | 0x80; 183 | *IXP2000_RESET0 = value & ~0x80; 184 | 185 | /* 186 | * Initialise the RX section. 187 | */ 188 | *IXP2000_MSF_RX_MPHY_POLL_LIMIT = mp->rx_poll_ports - 1; 189 | *IXP2000_MSF_RX_CONTROL = mp->rx_mode; 190 | for (i=0;i<4;i++) 191 | IXP2000_MSF_RX_UP_CONTROL_0[i] = mp->rx_channel_mode[i]; 192 | ixp2400_msf_free_rbuf_entries(mp); 193 | ixp2400_msf_enable_rx(mp); 194 | 195 | /* 196 | * Initialise the TX section. 197 | */ 198 | *IXP2000_MSF_TX_MPHY_POLL_LIMIT = mp->tx_poll_ports - 1; 199 | *IXP2000_MSF_TX_CONTROL = mp->tx_mode; 200 | for (i=0;i<4;i++) 201 | IXP2000_MSF_TX_UP_CONTROL_0[i] = mp->tx_channel_mode[i]; 202 | ixp2400_msf_enable_tx(mp); 203 | } 204 | -------------------------------------------------------------------------------- /sh/support/ue_space/uengine/ixp2400-msf.h: -------------------------------------------------------------------------------- 1 | /* 2 | * Generic library functions for the MSF (Media and Switch Fabric) unit 3 | * found on the Intel IXP2400 network processor. 4 | * 5 | * Copyright (C) 2004, 2005 Lennert Buytenhek 6 | * Dedicated to Marija Kulikova. 7 | * 8 | * This program is free software; you can redistribute it and/or modify 9 | * it under the terms of the GNU Lesser General Public License as 10 | * published by the Free Software Foundation; either version 2.1 of the 11 | * License, or (at your option) any later version. 12 | */ 13 | 14 | #ifndef __IXP2400_MSF_H 15 | #define __IXP2400_MSF_H 16 | 17 | #include "compat.h" 18 | 19 | struct ixp2400_msf_parameters 20 | { 21 | u32 rx_mode; 22 | unsigned rxclk01_multiplier:2; 23 | unsigned rxclk23_multiplier:2; 24 | unsigned rx_poll_ports:6; 25 | u32 rx_channel_mode[4]; 26 | 27 | u32 tx_mode; 28 | unsigned txclk01_multiplier:2; 29 | unsigned txclk23_multiplier:2; 30 | unsigned tx_poll_ports:6; 31 | u32 tx_channel_mode[4]; 32 | }; 33 | 34 | void ixp2400_msf_init(struct ixp2400_msf_parameters *mp); 35 | 36 | #define IXP2400_PLL_MULTIPLIER_48 0x00 37 | #define IXP2400_PLL_MULTIPLIER_24 0x01 38 | #define IXP2400_PLL_MULTIPLIER_16 0x02 39 | #define IXP2400_PLL_MULTIPLIER_12 0x03 40 | 41 | #define IXP2400_RX_MODE_CSIX 0x00400000 42 | #define IXP2400_RX_MODE_UTOPIA_POS 0x00000000 43 | #define IXP2400_RX_MODE_WIDTH_MASK 0x00300000 44 | #define IXP2400_RX_MODE_1x16_2x8 0x00300000 45 | #define IXP2400_RX_MODE_4x8 0x00200000 46 | #define IXP2400_RX_MODE_2x16 0x00100000 47 | #define IXP2400_RX_MODE_1x32 0x00000000 48 | #define IXP2400_RX_MODE_MPHY 0x00080000 49 | #define IXP2400_RX_MODE_SPHY 0x00000000 50 | #define IXP2400_RX_MODE_MPHY_32 0x00040000 51 | #define IXP2400_RX_MODE_MPHY_4 0x00000000 52 | #define IXP2400_RX_MODE_MPHY_POLLED_STATUS 0x00020000 53 | #define IXP2400_RX_MODE_MPHY_DIRECT_STATUS 0x00000000 54 | #define IXP2400_RX_MODE_CBUS_FULL_DUPLEX 0x00010000 55 | #define IXP2400_RX_MODE_CBUS_SIMPLEX 0x00000000 56 | #define IXP2400_RX_MODE_MPHY_LEVEL2 0x00004000 57 | #define IXP2400_RX_MODE_MPHY_LEVEL3 0x00000000 58 | #define IXP2400_RX_MODE_CBUS_8BIT 0x00002000 59 | #define IXP2400_RX_MODE_CBUS_4BIT 0x00000000 60 | #define IXP2400_RX_MODE_CSIX_SINGLE_FREELIST 0x00000200 61 | #define IXP2400_RX_MODE_CSIX_SPLIT_FREELISTS 0x00000000 62 | #define IXP2400_RX_MODE_RBUF_SIZE_MASK 0x0000000c 63 | #define IXP2400_RX_MODE_RBUF_SIZE_256 0x00000008 64 | #define IXP2400_RX_MODE_RBUF_SIZE_128 0x00000004 65 | #define IXP2400_RX_MODE_RBUF_SIZE_64 0x00000000 66 | 67 | #define IXP2400_PORT_RX_MODE_SLAVE 0x00000040 68 | #define IXP2400_PORT_RX_MODE_MASTER 0x00000000 69 | #define IXP2400_PORT_RX_MODE_POS_PHY_L3 0x00000020 70 | #define IXP2400_PORT_RX_MODE_POS_PHY_L2 0x00000000 71 | #define IXP2400_PORT_RX_MODE_POS_PHY 0x00000010 72 | #define IXP2400_PORT_RX_MODE_UTOPIA 0x00000000 73 | #define IXP2400_PORT_RX_MODE_EVEN_PARITY 0x0000000c 74 | #define IXP2400_PORT_RX_MODE_ODD_PARITY 0x00000008 75 | #define IXP2400_PORT_RX_MODE_NO_PARITY 0x00000000 76 | #define IXP2400_PORT_RX_MODE_UTOPIA_BIG_CELLS 0x00000002 77 | #define IXP2400_PORT_RX_MODE_UTOPIA_NORMAL_CELLS 0x00000000 78 | #define IXP2400_PORT_RX_MODE_2_CYCLE_DECODE 0x00000001 79 | #define IXP2400_PORT_RX_MODE_1_CYCLE_DECODE 0x00000000 80 | 81 | #define IXP2400_TX_MODE_CSIX 0x00400000 82 | #define IXP2400_TX_MODE_UTOPIA_POS 0x00000000 83 | #define IXP2400_TX_MODE_WIDTH_MASK 0x00300000 84 | #define IXP2400_TX_MODE_1x16_2x8 0x00300000 85 | #define IXP2400_TX_MODE_4x8 0x00200000 86 | #define IXP2400_TX_MODE_2x16 0x00100000 87 | #define IXP2400_TX_MODE_1x32 0x00000000 88 | #define IXP2400_TX_MODE_MPHY 0x00080000 89 | #define IXP2400_TX_MODE_SPHY 0x00000000 90 | #define IXP2400_TX_MODE_MPHY_32 0x00040000 91 | #define IXP2400_TX_MODE_MPHY_4 0x00000000 92 | #define IXP2400_TX_MODE_MPHY_POLLED_STATUS 0x00020000 93 | #define IXP2400_TX_MODE_MPHY_DIRECT_STATUS 0x00000000 94 | #define IXP2400_TX_MODE_CBUS_FULL_DUPLEX 0x00010000 95 | #define IXP2400_TX_MODE_CBUS_SIMPLEX 0x00000000 96 | #define IXP2400_TX_MODE_MPHY_LEVEL2 0x00004000 97 | #define IXP2400_TX_MODE_MPHY_LEVEL3 0x00000000 98 | #define IXP2400_TX_MODE_CBUS_8BIT 0x00002000 99 | #define IXP2400_TX_MODE_CBUS_4BIT 0x00000000 100 | #define IXP2400_TX_MODE_TBUF_SIZE_MASK 0x0000000c 101 | #define IXP2400_TX_MODE_TBUF_SIZE_256 0x00000008 102 | #define IXP2400_TX_MODE_TBUF_SIZE_128 0x00000004 103 | #define IXP2400_TX_MODE_TBUF_SIZE_64 0x00000000 104 | 105 | #define IXP2400_PORT_TX_MODE_SLAVE 0x00000040 106 | #define IXP2400_PORT_TX_MODE_MASTER 0x00000000 107 | #define IXP2400_PORT_TX_MODE_POS_PHY 0x00000010 108 | #define IXP2400_PORT_TX_MODE_UTOPIA 0x00000000 109 | #define IXP2400_PORT_TX_MODE_EVEN_PARITY 0x0000000c 110 | #define IXP2400_PORT_TX_MODE_ODD_PARITY 0x00000008 111 | #define IXP2400_PORT_TX_MODE_NO_PARITY 0x00000000 112 | #define IXP2400_PORT_TX_MODE_UTOPIA_BIG_CELLS 0x00000002 113 | #define IXP2400_PORT_TX_MODE_2_CYCLE_DECODE 0x00000001 114 | #define IXP2400_PORT_TX_MODE_1_CYCLE_DECODE 0x00000000 115 | 116 | 117 | #endif 118 | -------------------------------------------------------------------------------- /sh/support/ue_space/uengine/version: -------------------------------------------------------------------------------- 1 | version 0.0.36 2 | 3 | http://ixp2xxx.sf.net 4 | -------------------------------------------------------------------------------- /smatcher.h: -------------------------------------------------------------------------------- 1 | /*This file is part of "A Hybrid Parallel Implementation of the Aho-Corasick and Wu-Manber Algorithms Using NVIDIA CUDA and MPI Evaluated on a Biological Sequence Database". 2 | 3 | "A Hybrid Parallel Implementation of the Aho-Corasick and Wu-Manber Algorithms Using NVIDIA CUDA and MPI Evaluated on a Biological Sequence Database" is free software: you can redistribute it and/or modify 4 | it under the terms of the GNU General Public License as published by 5 | the Free Software Foundation, either version 3 of the License, or 6 | (at your option) any later version. 7 | 8 | "A Hybrid Parallel Implementation of the Aho-Corasick and Wu-Manber Algorithms Using NVIDIA CUDA and MPI Evaluated on a Biological Sequence Database" is distributed in the hope that it will be useful, 9 | but WITHOUT ANY WARRANTY; without even the implied warranty of 10 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 11 | GNU General Public License for more details. 12 | 13 | You should have received a copy of the GNU General Public License 14 | along with "A Hybrid Parallel Implementation of the Aho-Corasick and Wu-Manber Algorithms Using NVIDIA CUDA and MPI Evaluated on a Biological Sequence Database". If not, see .*/ 15 | 16 | #ifndef SMATCHER_H 17 | #define SMATCHER_H 18 | 19 | #include 20 | #include 21 | #include 22 | 23 | #include 24 | #include 25 | //#include 26 | #include 27 | #include 28 | #include 29 | #include 30 | 31 | #include "../helper2.h" 32 | 33 | //KMP 34 | struct node { 35 | char label; 36 | int id; 37 | struct node* supply; 38 | struct node* next; 39 | }; 40 | 41 | struct ac_state { 42 | unsigned int id; 43 | unsigned int keywordline; //Remember which keyword row corresponds to the accepting word 44 | unsigned char *output; //The output contains the whole keyword to be printed when a terminal state is encountered 45 | struct ac_state *fail; 46 | struct ac_state **next; 47 | }; 48 | 49 | struct ac_table { 50 | unsigned int idcounter; 51 | unsigned int patterncounter; 52 | struct ac_state *zerostate; 53 | }; 54 | 55 | struct sbom_state **pointer_array; 56 | 57 | struct sbom_state { 58 | unsigned int id; 59 | unsigned int *F; //Remember which keyword rows correspond to the accepting word 60 | unsigned int num; //Store the number of different pattern rows that correspond to the same terminal state 61 | struct sbom_state *fail; 62 | struct sbom_state **next; 63 | }; 64 | 65 | struct sbom_table { 66 | unsigned int idcounter; 67 | unsigned int patterncounter; 68 | struct sbom_state *zerostate; 69 | }; 70 | 71 | unsigned short m_nBitsInShift; 72 | 73 | unsigned int shiftsize; 74 | 75 | //SOG 76 | //Total number of 3 grams returned by the GET3GRAM macro 77 | #define SIZE_3GRAM_TABLE 0x1000000 78 | #define CHAR_WIDTH_3GRAM 8 79 | 80 | #define GET3GRAM(address) ((((uint32_t) (address)[0])) + (((uint32_t)((address)[1])) << CHAR_WIDTH_3GRAM) + (((uint32_t)((address)[2])) << (CHAR_WIDTH_3GRAM << 1))) 81 | 82 | //Bit masks used in 2-level hashing 83 | static const uint8_t mask[] = {0x01, 0x02, 0x04, 0x08, 0x10, 0x20, 0x40, 0x80}; 84 | /* 85 | uint8_t T8[SIZE_3GRAM_TABLE]; 86 | uint16_t T16[SIZE_3GRAM_TABLE]; 87 | uint32_t T32[SIZE_3GRAM_TABLE];*/ 88 | 89 | struct ac_table *preproc_ac ( unsigned char **, int, int, int, int *, unsigned int *, unsigned int * ); 90 | unsigned search_ac ( unsigned char *, int, struct ac_table * ); 91 | void free_ac ( struct ac_table *, int ); 92 | 93 | struct ac_table *preproc_sh ( unsigned char **, int, int, int, int *, unsigned int * ); 94 | unsigned search_sh ( int, unsigned char *, int, struct ac_table *, int * ); 95 | void free_sh ( struct ac_table *, int ); 96 | 97 | struct sbom_table * preproc_sbom ( unsigned char **, int, int, int, int *, unsigned int * ); 98 | unsigned search_sbom ( unsigned char **, int, unsigned char *, int, struct sbom_table * ); 99 | void free_sbom ( struct sbom_table *, int ); 100 | 101 | void preproc_wu ( unsigned char **, int, int, int, int, int *, int *, int *, int * ); 102 | void preproc_wu2 ( unsigned char *, int, int, int, int, int *, int *, int *, int * ); 103 | void wu_determine_shiftsize ( int ); 104 | //void wu_init ( int, int, int, int **, struct prefixArray ** ); 105 | unsigned int search_wu ( unsigned char **, int, int, unsigned char *, int, int *, int *, int *, int * ); 106 | unsigned int search_wu2 ( unsigned char *, int, int, unsigned char *, int, int *, int *, int *, int * ); 107 | //void wu_free ( int **, struct prefixArray ** ); 108 | 109 | void preproc_sog8 ( uint8_t *T8, uint32_t *scanner_hs, int *scanner_index, uint8_t *scanner_hs2, unsigned char **pattern, int m, unsigned char *text, int n, int p_size, int B ); 110 | unsigned int search_sog8 ( uint8_t *T8, uint32_t *scanner_hs, int *scanner_index, uint8_t *scanner_hs2, unsigned char **pattern, int m, unsigned char *text, int n, int p_size, int B ); 111 | 112 | /* 113 | void preproc_sog8 ( unsigned char **, int, int ); 114 | void sog_init8 ( int ); 115 | unsigned int search_sog8 ( unsigned char **, int, unsigned char *, int, int, int ); 116 | void sog_free8 (); 117 | void my_preproc_sog8 ( unsigned char **pattern, int m, unsigned char *text, int n, int p_size, int B ); 118 | 119 | void preproc_sog16 ( unsigned char **, int, int ); 120 | void sog_init16 ( int ); 121 | unsigned int search_sog16 ( unsigned char **, int, unsigned char *, int, int, int ); 122 | void sog_free16 (); 123 | 124 | void preproc_sog32 ( unsigned char **, int, int ); 125 | void sog_init32 ( int ); 126 | unsigned int search_sog32 ( unsigned char **, int, unsigned char *, int, int, int ); 127 | void sog_free32 (); 128 | */ 129 | 130 | void preKmp ( int *, unsigned char *, int ); 131 | 132 | //void preBmGs ( unsigned char **, int, int [] ); 133 | void preBmBc ( unsigned char **, int, int, int, int * ); 134 | 135 | #endif 136 | -------------------------------------------------------------------------------- /sog/sog16.c: -------------------------------------------------------------------------------- 1 | /*This file is part of "A Hybrid Parallel Implementation of the Aho-Corasick and Wu-Manber Algorithms Using NVIDIA CUDA and MPI Evaluated on a Biological Sequence Database". 2 | 3 | "A Hybrid Parallel Implementation of the Aho-Corasick and Wu-Manber Algorithms Using NVIDIA CUDA and MPI Evaluated on a Biological Sequence Database" is free software: you can redistribute it and/or modify 4 | it under the terms of the GNU General Public License as published by 5 | the Free Software Foundation, either version 3 of the License, or 6 | (at your option) any later version. 7 | 8 | "A Hybrid Parallel Implementation of the Aho-Corasick and Wu-Manber Algorithms Using NVIDIA CUDA and MPI Evaluated on a Biological Sequence Database" is distributed in the hope that it will be useful, 9 | but WITHOUT ANY WARRANTY; without even the implied warranty of 10 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 11 | GNU General Public License for more details. 12 | 13 | You should have received a copy of the GNU General Public License 14 | along with "A Hybrid Parallel Implementation of the Aho-Corasick and Wu-Manber Algorithms Using NVIDIA CUDA and MPI Evaluated on a Biological Sequence Database". If not, see .*/ 15 | 16 | #include "../smatcher.h" 17 | 18 | // A structure for holding the hash value and the pattern for the 8-byte Rabin-Karp implementation 19 | typedef struct { 20 | 21 | uint32_t hs; 22 | uint8_t pat[16]; 23 | int index; 24 | 25 | } pat_hs_t16; 26 | 27 | //Scanner that provides final matching for 8-byte patterns with Rabin-Karp. 28 | typedef struct { 29 | 30 | // 2-level hash table 31 | uint8_t hs2[256*32]; 32 | 33 | // Table holding the patterns and their hash values. This table is ordered according to the hash values 34 | pat_hs_t16 *patterns; 35 | 36 | // Position of the first empty slot in the pattern table 37 | int pos; 38 | 39 | } sog_scanner16; 40 | 41 | sog_scanner16 *scanner16; 42 | 43 | #define GET32(address) (((uint32_t)((address)[0]) << 24) + ((uint32_t)((address)[1]) << 16) + ((uint32_t)((address)[2]) << 8) + (address)[3]) 44 | 45 | //Compare two patterns using their hash values 46 | static int compSign ( const void* s1, const void* s2 ) { 47 | 48 | uint32_t h1 = ( (pat_hs_t16 *) s1 )->hs; 49 | uint32_t h2 = ( (pat_hs_t16 *) s2 )->hs; 50 | 51 | if (h1 < h2) 52 | return -1; 53 | else if (h1 == h2) 54 | return 0; 55 | else 56 | return 1; 57 | } 58 | 59 | int sog_rkbt_verification16 ( unsigned char *text, int m, int p_size ) { 60 | 61 | uint32_t hs = ( GET32((text)) ^ GET32((text + 4)) ) ^ ( GET32((text + 8)) ^ GET32((text + 12)) ); 62 | 63 | /* printf("text = %c%c%c%c\n", *(text), *(text + 1), *(text + 2), *(text + 3)); 64 | printf("text = %s\n", text); 65 | printf("text hs = %i\n", hs); 66 | */ 67 | uint16_t hs2level = (uint16_t) ((hs >> 16) ^ hs); 68 | 69 | //printf("---%s\n", scanner16->patterns[lookfor].pat); 70 | 71 | /* check 2-level hash */ 72 | if ( scanner16->hs2[hs2level >> 3] & mask[hs2level & 0x07] ) { 73 | 74 | int lo = 0; 75 | int hi = p_size - 1; 76 | int mid; 77 | uint32_t hs_pat; 78 | 79 | // do the binary search 80 | while ( hi >= lo ) { 81 | 82 | mid = ( lo + hi ) / 2; 83 | hs_pat = scanner16->patterns[mid].hs; 84 | 85 | //if ( verbose ) 86 | //printf(">mid = %i hs = %i hs_pat = %i index = %i pat = %s \n", mid, hs, scanner16->patterns[mid].hs, scanner16->patterns[mid].index, scanner16->patterns[mid].pat); 87 | 88 | if ( hs > hs_pat ) 89 | lo = ++mid; 90 | 91 | else if ( hs < hs_pat ) 92 | hi = --mid; 93 | 94 | //if text hash equals pattern hash verify the match 95 | else { 96 | // check for duplicates and patterns with same hash 97 | while ( mid > 0 && hs == scanner16->patterns[mid - 1].hs ) 98 | mid--; 99 | 100 | do { 101 | //printf("%c%c%c%c%c%c%c%c - %s\n", *(index - 7), *(index - 6), *(index - 5), *(index - 4), *(index - 3), *(index - 2), *(index - 1), *(index - 0), scanner16->patterns[mid].pat ); 102 | 103 | if ( memcmp ( text, scanner16->patterns[mid].pat, 16 ) == 0 ) 104 | return 1; 105 | 106 | mid++; 107 | 108 | } while ( mid < p_size && hs == scanner16->patterns[mid].hs ); 109 | 110 | break; 111 | } 112 | } 113 | } 114 | return -1; 115 | } 116 | 117 | unsigned int search_sog16 ( unsigned char **pattern, int m, unsigned char *text, int n, int p_size, int B ) { 118 | 119 | register uint16_t E = 0xffff; 120 | 121 | int column, matches = 0; 122 | 123 | for ( column = 0; column < n - 2; column++ ) { 124 | 125 | E = (E << 1) | T8[GET3GRAM( text + column )]; 126 | 127 | //printbinary(E, 8); 128 | 129 | //printbinary(E & 0x20, 8); 130 | 131 | if ( E & 0x2000 ) 132 | continue; 133 | 134 | //printf("potential match at column %i\n", column + B - 1); 135 | 136 | //if ( column > 50 ) 137 | // exit ( 0 ); 138 | 139 | if ( sog_rkbt_verification16 ( (unsigned char *)text + column - m + B, m, p_size ) != -1 ) 140 | matches++; 141 | } 142 | 143 | return matches; 144 | 145 | 146 | } 147 | 148 | static void sog_add_pattern2 ( uint8_t *pattern, int m, int p_size ) { 149 | 150 | int i; 151 | 152 | uint32_t hs; 153 | uint16_t hs2level; 154 | 155 | if ( scanner16->pos < p_size ) { 156 | 157 | //add pattern 158 | for ( i = 0; i < m; i++ ) 159 | scanner16->patterns[scanner16->pos].pat[i] = pattern[i]; 160 | 161 | //add index 162 | scanner16->patterns[scanner16->pos].index = scanner16->pos; 163 | 164 | // Count hash 165 | scanner16->patterns[scanner16->pos].hs = ( GET32(pattern) ^ GET32(&pattern[4]) ) ^ ( GET32(&pattern[8]) ^ GET32(&pattern[12]) ); 166 | 167 | // Count 2-level hash 168 | hs = scanner16->patterns[scanner16->pos].hs; 169 | hs2level = ( uint16_t ) ( ( hs >> 16 ) ^ hs ); 170 | 171 | scanner16->hs2[hs2level >> 3] |= mask[hs2level & 0x07]; 172 | scanner16->pos++; 173 | } 174 | } 175 | 176 | static void sog_add_pattern ( uint8_t *pattern, int m, int p_size ) { 177 | 178 | uint8_t *index = &pattern[0]; 179 | uint8_t *limit = &pattern[15]; 180 | 181 | unsigned int i = 0; 182 | 183 | uint32_t hs; 184 | 185 | sog_add_pattern2 ( pattern, m, p_size ); 186 | 187 | while ( index < limit ) { 188 | hs = GET3GRAM( index ); 189 | 190 | //printbinary(hs, 32); 191 | //printf("hs: %i T[hs]: %i ", hs, T[hs]); 192 | 193 | T16[hs] &= 0xffff - ( 1 << i ); 194 | 195 | //printbinary(T[hs], 8); 196 | 197 | index++; 198 | i++; 199 | } 200 | 201 | //printf("\n"); 202 | } 203 | 204 | static void sog_reset_patterns ( int m ) { 205 | 206 | unsigned int i; 207 | 208 | for ( i = 0; i < SIZE_3GRAM_TABLE; i++ ) 209 | T16[i] = 0xffff; 210 | 211 | scanner16->pos = 0; 212 | 213 | // Reset 2-level hashes 214 | for ( i = 0; i < 32 * 256; i++ ) 215 | scanner16->hs2[i] = 0x00; 216 | } 217 | 218 | void sog_init16 ( int p_size ) { 219 | 220 | scanner16 = malloc ( sizeof ( sog_scanner16 ) ); 221 | scanner16->patterns = malloc ( p_size * sizeof ( pat_hs_t16 ) ); 222 | } 223 | 224 | void sog_free16 () { 225 | 226 | free ( scanner16->patterns ); 227 | free ( scanner16 ); 228 | } 229 | 230 | void preproc_sog16 ( unsigned char **pattern, int m, int p_size ) { 231 | 232 | unsigned int i; 233 | 234 | sog_reset_patterns ( p_size ); 235 | 236 | for ( i = 0; i < p_size; i++ ) 237 | sog_add_pattern ( pattern[i], m, p_size ); 238 | 239 | //Sort the patterns so that binary search can be used 240 | qsort ( scanner16->patterns, p_size, sizeof( pat_hs_t16 ), compSign ); 241 | } 242 | 243 | -------------------------------------------------------------------------------- /sog/sog32.c: -------------------------------------------------------------------------------- 1 | /*This file is part of "A Hybrid Parallel Implementation of the Aho-Corasick and Wu-Manber Algorithms Using NVIDIA CUDA and MPI Evaluated on a Biological Sequence Database". 2 | 3 | "A Hybrid Parallel Implementation of the Aho-Corasick and Wu-Manber Algorithms Using NVIDIA CUDA and MPI Evaluated on a Biological Sequence Database" is free software: you can redistribute it and/or modify 4 | it under the terms of the GNU General Public License as published by 5 | the Free Software Foundation, either version 3 of the License, or 6 | (at your option) any later version. 7 | 8 | "A Hybrid Parallel Implementation of the Aho-Corasick and Wu-Manber Algorithms Using NVIDIA CUDA and MPI Evaluated on a Biological Sequence Database" is distributed in the hope that it will be useful, 9 | but WITHOUT ANY WARRANTY; without even the implied warranty of 10 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 11 | GNU General Public License for more details. 12 | 13 | You should have received a copy of the GNU General Public License 14 | along with "A Hybrid Parallel Implementation of the Aho-Corasick and Wu-Manber Algorithms Using NVIDIA CUDA and MPI Evaluated on a Biological Sequence Database". If not, see .*/ 15 | 16 | #include "../smatcher.h" 17 | 18 | // A structure for holding the hash value and the pattern for the 8-byte Rabin-Karp implementation 19 | typedef struct { 20 | 21 | uint32_t hs; 22 | uint8_t pat[32]; 23 | int index; 24 | 25 | } pat_hs_t32; 26 | 27 | //Scanner that provides final matching for 8-byte patterns with Rabin-Karp. 28 | typedef struct { 29 | 30 | // 2-level hash table 31 | uint8_t hs2[256*32]; 32 | 33 | // Table holding the patterns and their hash values. This table is ordered according to the hash values 34 | pat_hs_t32 *patterns; 35 | 36 | // Position of the first empty slot in the pattern table 37 | int pos; 38 | 39 | } sog_scanner32; 40 | 41 | sog_scanner32 *scanner32; 42 | 43 | #define GET32(address) (((uint32_t)((address)[0]) << 24) + ((uint32_t)((address)[1]) << 16) + ((uint32_t)((address)[2]) << 8) + (address)[3]) 44 | 45 | //Compare two patterns using their hash values 46 | static int compSign ( const void* s1, const void* s2 ) { 47 | 48 | uint32_t h1 = ( (pat_hs_t32 *) s1 )->hs; 49 | uint32_t h2 = ( (pat_hs_t32 *) s2 )->hs; 50 | 51 | if (h1 < h2) 52 | return -1; 53 | else if (h1 == h2) 54 | return 0; 55 | else 56 | return 1; 57 | } 58 | 59 | int sog_rkbt_verification32 ( unsigned char *text, int m, int p_size ) { 60 | 61 | uint32_t hs = ( GET32((text)) ^ GET32((text + 4)) ) ^ ( GET32((text + 8)) ^ GET32((text + 12)) ) ^ ( GET32((text + 16)) ^ GET32((text + 20)) ) ^ ( GET32((text + 24)) ^ GET32((text + 28)) ); 62 | 63 | /* printf("text = %c%c%c%c\n", *(text), *(text + 1), *(text + 2), *(text + 3)); 64 | printf("text = %s\n", text); 65 | printf("text hs = %i\n", hs); 66 | */ 67 | uint16_t hs2level = (uint16_t) ((hs >> 16) ^ hs); 68 | 69 | //printf("---%s\n", scanner32->patterns[lookfor].pat); 70 | 71 | /* check 2-level hash */ 72 | if ( scanner32->hs2[hs2level >> 3] & mask[hs2level & 0x07] ) { 73 | 74 | int lo = 0; 75 | int hi = p_size - 1; 76 | int mid; 77 | uint32_t hs_pat; 78 | 79 | // do the binary search 80 | while ( hi >= lo ) { 81 | 82 | mid = ( lo + hi ) / 2; 83 | hs_pat = scanner32->patterns[mid].hs; 84 | 85 | //printf(" mid = %i hs = %i hs_pat = %i index = %i pat = %s \n", mid, hs, scanner32->patterns[mid].hs, scanner32->patterns[mid].index, scanner32->patterns[mid].pat); 86 | 87 | if ( hs > hs_pat ) 88 | lo = ++mid; 89 | 90 | else if ( hs < hs_pat ) 91 | hi = --mid; 92 | 93 | //if text hash equals pattern hash verify the match 94 | else { 95 | 96 | // check for duplicates and patterns with same hash 97 | while ( mid > 0 && hs == scanner32->patterns[mid - 1].hs ) 98 | mid--; 99 | 100 | do { 101 | //printf("%c%c%c%c%c%c%c%c - %s\n", *(index - 7), *(index - 6), *(index - 5), *(index - 4), *(index - 3), *(index - 2), *(index - 1), *(index - 0), scanner32->patterns[mid].pat ); 102 | 103 | if ( memcmp ( text, scanner32->patterns[mid].pat, 32 ) == 0 ) 104 | return 1; 105 | 106 | mid++; 107 | 108 | } while ( mid < p_size && hs == scanner32->patterns[mid].hs ); 109 | 110 | break; 111 | } 112 | } 113 | } 114 | return -1; 115 | } 116 | 117 | unsigned int search_sog32 ( unsigned char **pattern, int m, unsigned char *text, int n, int p_size, int B ) { 118 | 119 | register uint32_t E = 0xffffffff; 120 | 121 | int column, matches = 0; 122 | 123 | for ( column = 0; column < n - 2; column++ ) { 124 | 125 | //printf("\ncolumn %i character %c\n", column, *(text + column)); 126 | 127 | //printbinary(E, 8); 128 | 129 | //printf("hs: %i T[hs]: %i ", GET3GRAM( text + column ), T8[GET3GRAM( text + column )] ); 130 | 131 | //printbinary(T8[GET3GRAM( text + column )], 8); 132 | 133 | E = (E << 1) | T8[GET3GRAM( text + column )]; 134 | 135 | //printbinary(E, 8); 136 | 137 | //printbinary(E & 0x20, 8); 138 | 139 | if ( E & 0x20000000 ) 140 | continue; 141 | 142 | //printf("potential match at column %i\n", column + B - 1); 143 | 144 | if ( sog_rkbt_verification32 ( (unsigned char *)text + column - m + B, m, p_size ) != -1 ) 145 | matches++; 146 | 147 | } 148 | 149 | return matches; 150 | } 151 | 152 | static void sog_add_pattern2 ( uint8_t *pattern, int m, int p_size ) { 153 | 154 | int i; 155 | 156 | uint32_t hs; 157 | uint16_t hs2level; 158 | 159 | if ( scanner32->pos < p_size ) { 160 | 161 | //add pattern 162 | for ( i = 0; i < m; i++ ) 163 | scanner32->patterns[scanner32->pos].pat[i] = pattern[i]; 164 | 165 | //add index 166 | scanner32->patterns[scanner32->pos].index = scanner32->pos; 167 | 168 | // Count hash 169 | scanner32->patterns[scanner32->pos].hs = ( GET32(pattern) ^ GET32(&pattern[4]) ) ^ ( GET32(&pattern[8]) ^ GET32(&pattern[12]) ) ^ ( GET32(&pattern[16]) ^ GET32(&pattern[20]) ) ^ ( GET32(&pattern[24]) ^ GET32(&pattern[28]) ); 170 | 171 | //printf("scanner32->patterns[%i].hs = %i\n", scanner32->pos, scanner32->patterns[scanner32->pos].hs); 172 | 173 | // Count 2-level hash 174 | hs = scanner32->patterns[scanner32->pos].hs; 175 | hs2level = ( uint16_t ) ( ( hs >> 16 ) ^ hs ); 176 | 177 | scanner32->hs2[hs2level >> 3] |= mask[hs2level & 0x07]; 178 | 179 | scanner32->pos++; 180 | } 181 | } 182 | 183 | static void sog_add_pattern ( uint8_t *pattern, int m, int p_size ) { 184 | 185 | uint8_t *index = &pattern[0]; 186 | uint8_t *limit = &pattern[31]; 187 | 188 | unsigned int i = 0; 189 | 190 | uint32_t hs; 191 | 192 | sog_add_pattern2 ( pattern, m, p_size ); 193 | 194 | while ( index < limit ) { 195 | hs = GET3GRAM( index ); 196 | 197 | //printbinary(hs, 32); 198 | //printf("hs: %i T[hs]: %i ", hs, T[hs]); 199 | 200 | T32[hs] &= 0xffffffff - ( 1 << i ); 201 | 202 | //printbinary(T[hs], 8); 203 | 204 | index++; 205 | i++; 206 | } 207 | 208 | //printf("\n"); 209 | } 210 | 211 | static void sog_reset_patterns ( int m ) { 212 | 213 | unsigned int i; 214 | 215 | for ( i = 0; i < SIZE_3GRAM_TABLE; i++ ) 216 | T32[i] = 0xffffffff; 217 | 218 | scanner32->pos = 0; 219 | 220 | // Reset 2-level hashes 221 | for ( i = 0; i < 32 * 256; i++ ) 222 | scanner32->hs2[i] = 0x00; 223 | } 224 | 225 | void sog_init32 ( int p_size) { 226 | 227 | scanner32 = malloc ( sizeof ( sog_scanner32 ) ); 228 | scanner32->patterns = malloc ( p_size * sizeof ( pat_hs_t32 ) ); 229 | } 230 | 231 | void sog_free32 () { 232 | 233 | free ( scanner32->patterns ); 234 | free ( scanner32 ); 235 | } 236 | 237 | void preproc_sog32 ( unsigned char **pattern, int m, int p_size ) { 238 | 239 | unsigned int i; 240 | 241 | sog_reset_patterns ( p_size ); 242 | 243 | for ( i = 0; i < p_size; i++ ) 244 | sog_add_pattern ( pattern[i], m, p_size ); 245 | 246 | //Sort the patterns so that binary search can be used 247 | qsort ( scanner32->patterns, p_size, sizeof( pat_hs_t32 ), compSign ); 248 | } 249 | 250 | -------------------------------------------------------------------------------- /sog/sog8.c: -------------------------------------------------------------------------------- 1 | /*This file is part of "A Hybrid Parallel Implementation of the Aho-Corasick and Wu-Manber Algorithms Using NVIDIA CUDA and MPI Evaluated on a Biological Sequence Database". 2 | 3 | "A Hybrid Parallel Implementation of the Aho-Corasick and Wu-Manber Algorithms Using NVIDIA CUDA and MPI Evaluated on a Biological Sequence Database" is free software: you can redistribute it and/or modify 4 | it under the terms of the GNU General Public License as published by 5 | the Free Software Foundation, either version 3 of the License, or 6 | (at your option) any later version. 7 | 8 | "A Hybrid Parallel Implementation of the Aho-Corasick and Wu-Manber Algorithms Using NVIDIA CUDA and MPI Evaluated on a Biological Sequence Database" is distributed in the hope that it will be useful, 9 | but WITHOUT ANY WARRANTY; without even the implied warranty of 10 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 11 | GNU General Public License for more details. 12 | 13 | You should have received a copy of the GNU General Public License 14 | along with "A Hybrid Parallel Implementation of the Aho-Corasick and Wu-Manber Algorithms Using NVIDIA CUDA and MPI Evaluated on a Biological Sequence Database". If not, see .*/ 15 | 16 | #include "../smatcher.h" 17 | 18 | #define GET32(address) (((uint32_t)((address)[0]) << 24) + ((uint32_t)((address)[1]) << 16) + ((uint32_t)((address)[2]) << 8) + (address)[3]) 19 | 20 | //quicksort implementation 21 | void swap ( int *a, int *b ) { 22 | 23 | int t = *a; 24 | *a = *b; 25 | *b = t; 26 | } 27 | 28 | void my_sort ( uint32_t *hs, int *index, int beg, int end ) { 29 | 30 | if ( end > beg + 1 ) { 31 | 32 | int piv = hs[beg], l = beg + 1, r = end; 33 | 34 | while ( l < r ) { 35 | 36 | if ( hs[l] <= piv ) 37 | l++; 38 | else { 39 | swap ( &hs[l], &hs[--r]); 40 | swap ( &index[l], &index[r]); 41 | } 42 | } 43 | 44 | swap ( &hs[--l], &hs[beg]); 45 | swap ( &index[l], &index[beg]); 46 | my_sort ( hs, index, beg, l ); 47 | my_sort ( hs, index, r, end ); 48 | } 49 | } 50 | 51 | int sog_rkbt_verification8 ( uint32_t *scanner_hs, int *scanner_index, uint8_t *scanner_hs2, unsigned char **pattern, unsigned char *text, int m, int p_size ) { 52 | 53 | uint32_t hs = GET32((text)) ^ GET32((text + 4)); 54 | uint16_t hs2level = (uint16_t) ((hs >> 16) ^ hs); 55 | 56 | // check 2-level hash 57 | if ( scanner_hs2[hs2level >> 3] & mask[hs2level & 0x07] ) { 58 | 59 | int lo = 0; 60 | int hi = p_size - 1; 61 | int mid; 62 | uint32_t hs_pat; 63 | 64 | // do the binary search 65 | while ( hi >= lo ) { 66 | 67 | mid = ( lo + hi ) / 2; 68 | hs_pat = scanner_hs[mid]; 69 | 70 | if ( hs > hs_pat ) 71 | lo = ++mid; 72 | 73 | else if ( hs < hs_pat ) 74 | hi = --mid; 75 | 76 | //if text hash equals pattern hash verify the match 77 | else { 78 | // check for duplicates and patterns with same hash 79 | while ( mid > 0 && hs == scanner_hs[mid - 1] ) 80 | mid--; 81 | 82 | do { 83 | if ( memcmp ( text, pattern[scanner_index[mid]], 8 ) == 0 ) 84 | return 1; 85 | 86 | mid++; 87 | 88 | } while ( mid < p_size && hs == scanner_hs[mid] ); 89 | 90 | break; 91 | } 92 | } 93 | } 94 | return -1; 95 | } 96 | 97 | unsigned int search_sog8 ( uint8_t *T8, uint32_t *scanner_hs, int *scanner_index, uint8_t *scanner_hs2, unsigned char **pattern, int m, unsigned char *text, int n, int p_size, int B ) { 98 | 99 | register uint8_t E = 0xff; 100 | 101 | int column, matches = 0; 102 | 103 | for ( column = 0; column < n - 2; column++ ) { 104 | 105 | E = (E << 1) | T8[GET3GRAM( text + column )]; 106 | 107 | if ( E & 0x20 ) 108 | continue; 109 | 110 | if ( sog_rkbt_verification8 ( scanner_hs, scanner_index, scanner_hs2, pattern, (unsigned char *)text + column - m + B, m, p_size ) != -1 ) 111 | matches++; 112 | } 113 | 114 | return matches; 115 | } 116 | 117 | static void sog_add_pattern ( uint8_t **T8, int *scanner_pos, uint32_t **scanner_hs, int **scanner_index, uint8_t **scanner_hs2, uint8_t *pattern, int m, int p_size ) { 118 | 119 | uint8_t *index = &pattern[0]; 120 | uint8_t *limit = &pattern[6]; 121 | 122 | unsigned int i = 0; 123 | 124 | uint32_t hs, hs2; 125 | uint16_t hs2level; 126 | 127 | //add index 128 | *( *scanner_index + *scanner_pos ) = *scanner_pos; 129 | 130 | // Count hash 131 | *( *scanner_hs + *scanner_pos ) = GET32(pattern) ^ GET32(&pattern[4]); 132 | 133 | // Count 2-level hash 134 | hs2 = *( *scanner_hs + *scanner_pos ); 135 | hs2level = ( uint16_t ) ( ( hs >> 16 ) ^ hs2 ); 136 | 137 | *( *scanner_hs2 + ( hs2level >> 3 ) ) |= mask[hs2level & 0x07]; 138 | *scanner_pos = *scanner_pos + 1; 139 | 140 | while ( index < limit ) { 141 | hs = GET3GRAM( index ); 142 | 143 | *( *T8 + hs) &= 0xff - ( 1 << i ); 144 | 145 | index++; 146 | i++; 147 | } 148 | } 149 | 150 | static void sog_reset_patterns ( uint8_t **T8, uint8_t **scanner_hs2) { 151 | 152 | unsigned int i; 153 | 154 | for ( i = 0; i < SIZE_3GRAM_TABLE; i++ ) 155 | *( *T8 + i ) = 0xff; 156 | 157 | // Reset 2-level hashes 158 | for ( i = 0; i < 32 * 256; i++ ) 159 | *( *scanner_hs2 + i ) = 0x00; 160 | } 161 | 162 | void preproc_sog8 ( uint8_t *T8, uint32_t *scanner_hs, int *scanner_index, uint8_t *scanner_hs2, unsigned char **pattern, int m, unsigned char *text, int n, int p_size, int B ) { 163 | 164 | int i; 165 | 166 | int scanner_pos = 0; 167 | 168 | sog_reset_patterns ( &T8, &scanner_hs2 ); 169 | 170 | for ( i = 0; i < p_size; i++ ) 171 | sog_add_pattern ( &T8, &scanner_pos, &scanner_hs, &scanner_index, &scanner_hs2, pattern[i], m, p_size ); 172 | 173 | my_sort ( scanner_hs, scanner_index, 0, p_size ); 174 | } 175 | 176 | -------------------------------------------------------------------------------- /wu/wu.c: -------------------------------------------------------------------------------- 1 | /*This file is part of "A Hybrid Parallel Implementation of the Aho-Corasick and Wu-Manber Algorithms Using NVIDIA CUDA and MPI Evaluated on a Biological Sequence Database". 2 | 3 | "A Hybrid Parallel Implementation of the Aho-Corasick and Wu-Manber Algorithms Using NVIDIA CUDA and MPI Evaluated on a Biological Sequence Database" is free software: you can redistribute it and/or modify 4 | it under the terms of the GNU General Public License as published by 5 | the Free Software Foundation, either version 3 of the License, or 6 | (at your option) any later version. 7 | 8 | "A Hybrid Parallel Implementation of the Aho-Corasick and Wu-Manber Algorithms Using NVIDIA CUDA and MPI Evaluated on a Biological Sequence Database" is distributed in the hope that it will be useful, 9 | but WITHOUT ANY WARRANTY; without even the implied warranty of 10 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 11 | GNU General Public License for more details. 12 | 13 | You should have received a copy of the GNU General Public License 14 | along with "A Hybrid Parallel Implementation of the Aho-Corasick and Wu-Manber Algorithms Using NVIDIA CUDA and MPI Evaluated on a Biological Sequence Database". If not, see .*/ 15 | 16 | #include "../smatcher.h" 17 | 18 | void wu_determine_shiftsize(int alphabet) { 19 | 20 | //the maximum size of the hash value of the B-size suffix of the patterns for the Wu-Manber algorithm 21 | if (alphabet == 2) 22 | shiftsize = 22; // 1 << 2 + 1 << 2 + 1 + 1 23 | 24 | else if (alphabet == 4) 25 | shiftsize = 64; // 3 << 2 + 3 << 2 + 3 + 1 26 | 27 | else if (alphabet == 8) 28 | shiftsize = 148; // 7 << 2 + 7 << 2 + 7 + 1 29 | 30 | else if (alphabet == 20) 31 | shiftsize = 400; // 19 << 2 + 19 << 2 + 19 + 1 32 | 33 | else if (alphabet == 128) 34 | shiftsize = 2668; // 127 << 2 + 127 << 2 + 127 + 1 35 | 36 | else if (alphabet == 256) 37 | shiftsize = 5356; //304 << 2 + 304 << 2 + 304 + 1 38 | 39 | else if (alphabet == 512) 40 | shiftsize = 10732; //560 << 2 + 560 << 2 + 560 + 1 41 | 42 | else if (alphabet == 1024) 43 | shiftsize = 21484; //1072 << 2 + 1072 << 2 + 1072 + 1 44 | 45 | else 46 | fail("The alphabet size is not supported by wu-manber\n"); 47 | } 48 | 49 | unsigned int search_wu(unsigned char **pattern, int m, int p_size, 50 | unsigned char *text, int n, int *SHIFT, int *PREFIX_value, 51 | int *PREFIX_index, int *PREFIX_size) { 52 | 53 | int column = m - 1, i; 54 | 55 | unsigned int hash1, hash2; 56 | 57 | unsigned int matches = 0; 58 | 59 | size_t shift; 60 | 61 | while (column < n) { 62 | 63 | hash1 = text[column - 2]; 64 | hash1 <<= m_nBitsInShift; 65 | hash1 += text[column - 1]; 66 | hash1 <<= m_nBitsInShift; 67 | hash1 += text[column]; 68 | 69 | shift = SHIFT[hash1]; 70 | 71 | //printf("column %i hash1 = %i shift = %i\n", column, hash1, shift); 72 | 73 | if (shift == 0) { 74 | 75 | hash2 = text[column - m + 1]; 76 | hash2 <<= m_nBitsInShift; 77 | hash2 += text[column - m + 2]; 78 | 79 | //printf("hash2 = %i PREFIX[hash1].size = %i\n", hash2, PREFIX[hash1].size); 80 | 81 | //For every pattern with the same suffix as the text 82 | for (i = 0; i < PREFIX_size[hash1]; i++) { 83 | 84 | //If the prefix of the pattern matches that of the text 85 | if (hash2 == PREFIX_value[hash1 * p_size + i]) { 86 | 87 | //Compare directly the pattern with the text 88 | if (memcmp(pattern[PREFIX_index[hash1 * p_size + i]], 89 | text + column - m + 1, m) == 0) { 90 | 91 | matches++; 92 | 93 | //printf("Match of pattern index %i at %i\n", PREFIX_index[hash1 * p_size + i], column); 94 | 95 | break; 96 | } 97 | 98 | } 99 | } 100 | 101 | column++; 102 | } else 103 | column += shift; 104 | } 105 | 106 | return matches; 107 | } 108 | 109 | void preproc_wu(unsigned char **pattern, int m, int p_size, int alphabet, int B, 110 | int *SHIFT, int *PREFIX_value, int *PREFIX_index, int *PREFIX_size) { 111 | 112 | unsigned int j, q, hash; 113 | 114 | size_t shiftlen, prefixhash; 115 | 116 | for (j = 0; j < p_size; ++j) { 117 | 118 | //add each 3-character subpattern (similar to q-grams) 119 | for (q = m; q >= B; --q) { 120 | 121 | hash = pattern[j][q - 2 - 1]; // bring in offsets of X in pattern j 122 | hash <<= m_nBitsInShift; 123 | hash += pattern[j][q - 1 - 1]; 124 | hash <<= m_nBitsInShift; 125 | hash += pattern[j][q - 1]; 126 | 127 | //printf("hash = %i pattern[%i][%i] = %i pattern[%i][%i] = %i pattern[%i][%i] = %i\n", hash, j, q - 2 - 1, pattern[j][q - 2 - 1], j, q - 2, pattern[j][q - 2], j, q - 1, pattern[j][q - 1], j ); 128 | 129 | shiftlen = m - q; 130 | 131 | SHIFT[hash] = MIN(SHIFT[hash], shiftlen); 132 | 133 | //calculate the hash of the prefixes for each pattern 134 | if (shiftlen == 0) { 135 | 136 | prefixhash = pattern[j][0]; 137 | prefixhash <<= m_nBitsInShift; 138 | prefixhash += pattern[j][1]; 139 | 140 | PREFIX_value[hash * p_size + PREFIX_size[hash]] = prefixhash; 141 | PREFIX_index[hash * p_size + PREFIX_size[hash]] = j; 142 | 143 | PREFIX_size[hash]++; 144 | 145 | //printf("%i) PREFIX[%i].value[%i] = %i PREFIX[%i].index[%i] = %i\n", j, hash, PREFIX[hash].size - 1, PREFIX[hash].value[PREFIX[hash].size - 1], hash, PREFIX[hash].size - 1, hashmap[j].index ); 146 | } 147 | } 148 | } 149 | } 150 | 151 | unsigned int search_wu2(unsigned char *pattern, int m, int p_size, 152 | unsigned char *text, int n, int *SHIFT, int *PREFIX_value, 153 | int *PREFIX_index, int *PREFIX_size) { 154 | 155 | int column = m - 1, i; 156 | 157 | unsigned int hash1, hash2; 158 | 159 | unsigned int matches = 0; 160 | 161 | size_t shift; 162 | 163 | while (column < n) { 164 | 165 | hash1 = text[column - 2]; 166 | hash1 <<= m_nBitsInShift; 167 | hash1 += text[column - 1]; 168 | hash1 <<= m_nBitsInShift; 169 | hash1 += text[column]; 170 | 171 | shift = SHIFT[hash1]; 172 | 173 | //printf("column %i hash1 = %i shift = %i\n", column, hash1, shift); 174 | 175 | if (shift == 0) { 176 | 177 | hash2 = text[column - m + 1]; 178 | hash2 <<= m_nBitsInShift; 179 | hash2 += text[column - m + 2]; 180 | 181 | //printf("hash2 = %i PREFIX[hash1].size = %i\n", hash2, PREFIX[hash1].size); 182 | 183 | //For every pattern with the same suffix as the text 184 | for (i = 0; i < PREFIX_size[hash1]; i++) { 185 | 186 | //If the prefix of the pattern matches that of the text 187 | if (hash2 == PREFIX_value[hash1 * p_size + i]) { 188 | 189 | //Compare directly the pattern with the text 190 | if (memcmp(pattern + (PREFIX_index[hash1 * p_size + i] * m), 191 | text + column - m + 1, m) == 0) { 192 | 193 | matches++; 194 | 195 | //printf("Match of pattern index %i at %i\n", PREFIX_index[hash1 * p_size + i], column); 196 | 197 | break; 198 | } 199 | 200 | } 201 | } 202 | 203 | column++; 204 | } else 205 | column += shift; 206 | } 207 | 208 | return matches; 209 | } 210 | 211 | void preproc_wu2(unsigned char *pattern, int m, int p_size, int alphabet, int B, 212 | int *SHIFT, int *PREFIX_value, int *PREFIX_index, int *PREFIX_size) { 213 | 214 | unsigned int j, q, hash; 215 | 216 | size_t shiftlen, prefixhash; 217 | 218 | for (j = 0; j < p_size; ++j) { 219 | 220 | //add each 3-character subpattern (similar to q-grams) 221 | for (q = m; q >= B; --q) { 222 | 223 | hash = pattern[j * m + (q - 2 - 1)]; // bring in offsets of X in pattern j 224 | hash <<= m_nBitsInShift; 225 | hash += pattern[j * m + (q - 1 - 1)]; 226 | hash <<= m_nBitsInShift; 227 | hash += pattern[j * m + (q - 1)]; 228 | 229 | //printf("hash = %i pattern[%i][%i] = %i pattern[%i][%i] = %i pattern[%i][%i] = %i\n", hash, j, q - 2 - 1, pattern[j][q - 2 - 1], j, q - 2, pattern[j][q - 2], j, q - 1, pattern[j][q - 1], j ); 230 | 231 | shiftlen = m - q; 232 | 233 | SHIFT[hash] = MIN(SHIFT[hash], shiftlen); 234 | 235 | //calculate the hash of the prefixes for each pattern 236 | if (shiftlen == 0) { 237 | 238 | prefixhash = pattern[j * m]; 239 | prefixhash <<= m_nBitsInShift; 240 | prefixhash += pattern[j * m + 1]; 241 | 242 | PREFIX_value[hash * p_size + PREFIX_size[hash]] = prefixhash; 243 | PREFIX_index[hash * p_size + PREFIX_size[hash]] = j; 244 | 245 | PREFIX_size[hash]++; 246 | 247 | //printf("%i) PREFIX[%i].value[%i] = %i PREFIX[%i].index[%i] = %i\n", j, hash, PREFIX[hash].size - 1, PREFIX[hash].value[PREFIX[hash].size - 1], hash, PREFIX[hash].size - 1, hashmap[j].index ); 248 | } 249 | } 250 | } 251 | } 252 | 253 | --------------------------------------------------------------------------------