├── .cproject
├── .project
├── LICENSE.txt
├── Makefile
├── README.md
├── ac
├── ac.c
└── list.h
├── bm
└── bm.c
├── cuda
├── cuPrintf.cu
├── cuPrintf.cuh
├── cuda.h
├── cuda_ac.cu
├── cuda_sbom.cu
├── cuda_sh.cu
├── cuda_sog.cu
└── cuda_wm.cu
├── deviceQuery.txt
├── execute.sh
├── kmp
└── kmp.c
├── main.c
├── profile.sh
├── sbom
└── sbom.c
├── sh
├── sh.c
└── support
│ ├── atomic.h
│ ├── bitmap.h
│ ├── dict.c
│ ├── dict.h
│ ├── endian.h
│ ├── filedes.c
│ ├── filedes.h
│ ├── hashtable.c
│ ├── hashtable.h
│ ├── list.c
│ ├── list.h
│ ├── lock.h
│ ├── log.c
│ ├── log.h
│ ├── macros.h
│ ├── math.h
│ ├── md5.h
│ ├── md5c.c
│ ├── multihash.c
│ ├── multihash.h
│ ├── prettyprint.c
│ ├── prettyprint.h
│ ├── profiler.c
│ ├── profiler.h
│ ├── radix.c
│ ├── radix.h
│ ├── serialize.c
│ ├── serialize.h
│ ├── slist.c
│ ├── slist.h
│ ├── stack.h
│ ├── string.c
│ ├── string.h
│ ├── timer.c
│ ├── timer.h
│ ├── timestamp.h
│ └── ue_space
│ ├── ixa_sdk
│ ├── README
│ ├── ixa_sdk_4.1_LinuxPatched.tgz
│ └── ixa_sdk_4.2_LinuxPatched.tgz
│ └── uengine
│ ├── Makefile
│ ├── compat.h
│ ├── ixp2000-lib-userspace.c
│ ├── ixp2000-msf.h
│ ├── ixp2000-ue-disas.c
│ ├── ixp2000-ue-disas.h
│ ├── ixp2000-uengine.c
│ ├── ixp2000-uengine.h
│ ├── ixp2400-msf.c
│ ├── ixp2400-msf.h
│ └── version
├── smatcher.h
├── sog
├── sog16.c
├── sog32.c
└── sog8.c
└── wu
└── wu.c
/.project:
--------------------------------------------------------------------------------
1 |
2 |
3 | cudaCharis
4 |
5 |
6 |
7 |
8 |
9 | org.eclipse.cdt.managedbuilder.core.genmakebuilder
10 | clean,full,incremental,
11 |
12 |
13 | ?name?
14 |
15 |
16 |
17 | org.eclipse.cdt.make.core.append_environment
18 | true
19 |
20 |
21 | org.eclipse.cdt.make.core.autoBuildTarget
22 | all
23 |
24 |
25 | org.eclipse.cdt.make.core.buildArguments
26 |
27 |
28 |
29 | org.eclipse.cdt.make.core.buildCommand
30 | make
31 |
32 |
33 | org.eclipse.cdt.make.core.buildLocation
34 | ${workspace_loc:/cudaCharis/Debug}
35 |
36 |
37 | org.eclipse.cdt.make.core.cleanBuildTarget
38 | clean
39 |
40 |
41 | org.eclipse.cdt.make.core.contents
42 | org.eclipse.cdt.make.core.activeConfigSettings
43 |
44 |
45 | org.eclipse.cdt.make.core.enableAutoBuild
46 | false
47 |
48 |
49 | org.eclipse.cdt.make.core.enableCleanBuild
50 | true
51 |
52 |
53 | org.eclipse.cdt.make.core.enableFullBuild
54 | true
55 |
56 |
57 | org.eclipse.cdt.make.core.fullBuildTarget
58 | all
59 |
60 |
61 | org.eclipse.cdt.make.core.stopOnError
62 | true
63 |
64 |
65 | org.eclipse.cdt.make.core.useDefaultBuildCmd
66 | true
67 |
68 |
69 |
70 |
71 | org.eclipse.cdt.managedbuilder.core.ScannerConfigBuilder
72 | full,incremental,
73 |
74 |
75 |
76 |
77 |
78 | org.eclipse.cdt.core.cnature
79 | org.eclipse.cdt.core.ccnature
80 | org.eclipse.cdt.managedbuilder.core.managedBuildNature
81 | org.eclipse.cdt.managedbuilder.core.ScannerConfigNature
82 |
83 |
84 |
--------------------------------------------------------------------------------
/Makefile:
--------------------------------------------------------------------------------
1 | CC = mpicc
2 | NVCC = nvcc
3 |
4 | SDKPATH := ~/NVIDIA_GPU_Computing_SDK
5 | CUDAPATH := /usr/local/cuda
6 |
7 | TARGET = smatcher
8 |
9 | OBJS = kmp.o bm.o ac.o sh.o sbom.o wu.o sog8.o main.o helper.o cuda_ac.o cuda_sh.o cuda_sbom.o cuda_wm.o cuda_sog.o
10 |
11 | #CPPFLAGS=-Wall -Wno-pointer-sign -O0 -g -funroll-loops -pg
12 | CPPFLAGS=-Wall -Wno-pointer-sign -O2 -funroll-loops
13 | NVCCFLAGS= -O2 -I$(CUDAPATH)/include -I$(SDKPATH)/shared/inc -I$(SDKPATH)/C/common/inc --ptxas-options=-v -arch=compute_12 -code=sm_12,compute_12
14 |
15 | #LDFLAGS=-L$(CUDAPATH)/lib -L$(SDKPATH)/shared/lib/linux -L$(SDKPATH)/C/lib -lcuda -lcudart -lmpich
16 | LDFLAGS=-L$(CUDAPATH)/lib -lcuda -lcudart -lmpich
17 |
18 | all: $(TARGET)
19 |
20 | $(TARGET): $(OBJS) $(SEQUENTIAL-OBJS)
21 | $(CC) $(CPPFLAGS) $(OBJS) -o $(TARGET) $(LDFLAGS)
22 |
23 | main.o: main.c
24 | $(CC) $(CPPFLAGS) -c main.c
25 |
26 | kmp.o: kmp/kmp.c
27 | $(CC) $(CPPFLAGS) -c kmp/kmp.c
28 |
29 | bm.o: bm/bm.c
30 | $(CC) $(CPPFLAGS) -c bm/bm.c
31 |
32 | ac.o: ac/ac.c
33 | $(CC) $(CPPFLAGS) -c ac/ac.c
34 |
35 | sh.o: sh/sh.c
36 | $(CC) $(CPPFLAGS) -c sh/sh.c
37 |
38 | sbom.o: sbom/sbom.c
39 | $(CC) $(CPPFLAGS) -c sbom/sbom.c
40 |
41 | wu.o: wu/wu.c
42 | $(CC) $(CPPFLAGS) -c wu/wu.c
43 |
44 | sog8.o: sog/sog8.c
45 | $(CC) $(CPPFLAGS) -c sog/sog8.c
46 |
47 | helper.o: ../helper.c
48 | $(CC) $(CPPFLAGS) -c ../helper.c
49 |
50 | cuda_ac.o: cuda/cuda_ac.cu
51 | $(NVCC) $(NVCCFLAGS) -c cuda/cuda_ac.cu
52 |
53 | cuda_sh.o: cuda/cuda_sh.cu
54 | $(NVCC) $(NVCCFLAGS) -c cuda/cuda_sh.cu
55 |
56 | cuda_sbom.o: cuda/cuda_sbom.cu
57 | $(NVCC) $(NVCCFLAGS) -c cuda/cuda_sbom.cu
58 |
59 | cuda_wm.o: cuda/cuda_wm.cu
60 | $(NVCC) $(NVCCFLAGS) -c cuda/cuda_wm.cu
61 |
62 | cuda_sog.o: cuda/cuda_sog.cu
63 | $(NVCC) $(NVCCFLAGS) -c cuda/cuda_sog.cu
64 |
65 | clean:
66 | rm -f *.o *.d $(TARGET) core
67 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | ## A Hybrid Parallel Implementation of the Aho-Corasick and Wu-Manber Algorithms Using NVIDIA CUDA and MPI Evaluated on a Biological Sequence Database.
2 |
3 | Charalampos S. Kouzinopoulos, Yannis M. Assael, Themistoklis K. Pyrgiotis, Konstantinos G. Margaritis
4 |
5 |
6 | Multiple matching algorithms are used to locate the occurrences of patterns from a finite pattern set in a large input string. Aho-Corasick and Wu-Manber, two of the most well known algorithms for multiple matching require an increased computing power, particularly in cases where large-size datasets must be processed, as is common in computational biology applications. Over the past years, Graphics Processing Units (GPUs) have evolved to powerful parallel processors outperforming Central Processing Units (CPUs) in scientific calculations. Moreover, multiple GPUs can be used in parallel, forming hybrid computer cluster configurations to achieve an even higher processing throughput. This paper evaluates the speedup of the parallel implementation of the Aho-Corasick and Wu-Manber algorithms on a hybrid GPU cluster, when used to process a snapshot of the Expressed Sequence Tags of the human genome and for different problem parameters.
7 |
8 |
9 | ### Links
10 | [arXiv pre-print](http://arxiv.org/abs/1407.2889)
11 |
12 | ### Bibtex
13 | ```
14 | @article{kouzinopoulos2015hybrid,
15 | title={A Hybrid Parallel Implementation of the Aho-Corasick and Wu-Manber Algorithms Using NVIDIA CUDA and MPI Evaluated on a Biological Sequence Database},
16 | author={Kouzinopoulos, Charalampos S. and Assael, Yannis M. and Pyrgiotis, Themistoklis K. and Margaritis, Konstantinos G.},
17 | journal={International Journal on Artificial Intelligence Tools},
18 | volume={24},
19 | number={1},
20 | pages={1540001},
21 | year={2015},
22 | publisher={World Scientific}
23 | }
24 | ```
25 |
26 |
27 | ### License
28 | Code licensed under the GNU General Public License v3.0.
--------------------------------------------------------------------------------
/ac/ac.c:
--------------------------------------------------------------------------------
1 | /*This file is part of "A Hybrid Parallel Implementation of the Aho-Corasick and Wu-Manber Algorithms Using NVIDIA CUDA and MPI Evaluated on a Biological Sequence Database".
2 |
3 | "A Hybrid Parallel Implementation of the Aho-Corasick and Wu-Manber Algorithms Using NVIDIA CUDA and MPI Evaluated on a Biological Sequence Database" is free software: you can redistribute it and/or modify
4 | it under the terms of the GNU General Public License as published by
5 | the Free Software Foundation, either version 3 of the License, or
6 | (at your option) any later version.
7 |
8 | "A Hybrid Parallel Implementation of the Aho-Corasick and Wu-Manber Algorithms Using NVIDIA CUDA and MPI Evaluated on a Biological Sequence Database" is distributed in the hope that it will be useful,
9 | but WITHOUT ANY WARRANTY; without even the implied warranty of
10 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
11 | GNU General Public License for more details.
12 |
13 | You should have received a copy of the GNU General Public License
14 | along with "A Hybrid Parallel Implementation of the Aho-Corasick and Wu-Manber Algorithms Using NVIDIA CUDA and MPI Evaluated on a Biological Sequence Database". If not, see .*/
15 |
16 | #include "../smatcher.h"
17 |
18 | #include "list.h"
19 |
20 | /// free an AC table from a given startnode (recursively)
21 | void ac_free ( struct ac_state *state, int alphabet ) {
22 |
23 | int i;
24 |
25 | for ( i = 0; i < alphabet; i++ )
26 | if ( state->next[i] )
27 | ac_free ( state->next[i], alphabet );
28 |
29 | if ( state->output )
30 | free ( state->output );
31 |
32 | free ( state->next );
33 | free ( state );
34 | }
35 |
36 | /// initialize the empty-table
37 | void ac_init ( struct ac_table *g, int alphabet, int *state_transition ) {
38 |
39 | g->zerostate = NULL;
40 | g->patterncounter = 0;
41 |
42 | //Create the root note
43 | g->zerostate = malloc ( sizeof ( struct ac_state ) );
44 |
45 | if ( !g->zerostate )
46 | fail ( "Could not allocate memory\n" );
47 |
48 | g->idcounter = 1;
49 | g->zerostate->id = 0;
50 |
51 | g->zerostate->output = NULL;
52 |
53 | g->zerostate->next = ( struct ac_state ** ) malloc ( alphabet * sizeof ( struct ac_state * ) );
54 |
55 | //Set all alphabet bytes of root node->next to 0
56 | memset ( g->zerostate->next, 0, alphabet * sizeof ( struct ac_state * ) );
57 |
58 | //Set all cells of transition table for state 0 to 0
59 | int i;
60 |
61 | for ( i = 0; i < alphabet; i++ )
62 | state_transition[i] = 0;
63 | }
64 |
65 | /// free an entire AC table
66 | void ac_destroy ( struct ac_table *in, int alphabet ) {
67 |
68 | int i;
69 |
70 | for ( i = 0; i < alphabet; i++ )
71 | if ( in->zerostate->next[i] && in->zerostate->next[i]->id > 0 ) {
72 | ac_free ( in->zerostate->next[i], alphabet );
73 | in->zerostate->next[i] = NULL;
74 | }
75 | free ( in->zerostate->next );
76 | free ( in->zerostate );
77 | }
78 |
79 | void ac_maketree ( struct ac_table *g, int alphabet, unsigned int *state_supply ) {
80 |
81 | struct list *list = NULL;
82 | struct ac_state *state, *s, *cur;
83 | int i/*, j*/;
84 |
85 | // Set all NULL transitions of 0 state to point to itself
86 | for ( i = 0; i < alphabet; i++ ) {
87 | if ( !g->zerostate->next[i] )
88 | g->zerostate->next[i] = g->zerostate;
89 | else {
90 | list = list_append ( list, g->zerostate->next[i] );
91 | g->zerostate->next[i]->fail = g->zerostate;
92 | }
93 | }
94 |
95 | // Set fail() for depth > 0
96 | while ( list ) {
97 |
98 | cur = ( struct ac_state * )list->id;
99 |
100 | for ( i = 0; i < alphabet; i++ ) {
101 |
102 | s = cur->next[i];
103 |
104 | if ( s ) {
105 |
106 | list = list_append ( list, s );
107 | state = cur->fail;
108 |
109 | while ( !state->next[i] )
110 | state = state->fail;
111 |
112 | s->fail = state->next[i];
113 |
114 | state_supply[s->id] = s->fail->id;
115 |
116 | //printf("Created additional link from state %i to state %i\n", s->id, s->fail->id);
117 | }
118 | // Join outputs missing
119 | }
120 | list = list_pop ( list );
121 | }
122 |
123 | list_destroy ( list );
124 | }
125 |
126 | // Insert a string to the tree
127 | void ac_addstring ( struct ac_table *g, unsigned int i, unsigned char *string, int m, int alphabet, int *state_transition, unsigned int *state_final ) {
128 |
129 | struct ac_state *state, *next = NULL;
130 | int j, done = 0;
131 |
132 | // as long as next already exists follow them
133 | j = 0;
134 | state = g->zerostate;
135 |
136 | while ( !done && ( next = state->next[*( string + j )] ) != NULL ) {
137 |
138 | state = next;
139 |
140 | if ( j == m )
141 | done = 1;
142 |
143 | j++;
144 |
145 | //printf("character %c state: %i\n", *( string + j ), state->id);
146 | }
147 |
148 | // not done yet
149 | if ( !done ) {
150 | while ( j < m ) {
151 | // Create new state
152 | next = malloc ( sizeof ( struct ac_state ) );
153 |
154 | if ( !next )
155 | fail ( "Could not allocate memory\n" );
156 |
157 | next->next = ( struct ac_state ** ) malloc ( alphabet * sizeof ( struct ac_state * ) );
158 |
159 | next->id = g->idcounter++;
160 | next->output = NULL;
161 |
162 | state_transition[state->id * alphabet + *( string + j )] = next->id;
163 | //printf("setting %i to %i\n", state->id * alphabet + *( string + j ), next->id);
164 |
165 | //printf("Created link from state %i to %i for character %i (j = %i)\n", state->id, next->id, *( string + j ), j );
166 |
167 | //Set all alphabet bytes of the next node's->next to 0
168 | //This is the _extended_ Aho-Corasick algorithm. A complete automaton is used where all states
169 | //have an outgoing transition for every alphabet character of the alphabet
170 | memset ( next->next, 0, alphabet * sizeof ( struct ac_state * ) );
171 |
172 | state->next[*( string + j )] = next;
173 | state = next;
174 |
175 | //printf("character %c state: %i\n", *( string + j ), state->id);
176 | j++;
177 | }
178 | }
179 |
180 | //printf(" Currently at state %i\n", state->id);
181 |
182 | //After finishing with the previous characters of the keyword, add the terminal state if it does not exist
183 | if ( !state->output ) {
184 |
185 | //printf(" For pattern %i added the terminal state %i of %i\n", i, state->id, g->patterncounter);
186 | state_final[state->id] = 1;
187 |
188 | //allocate memory and copy *string to state->output
189 | state->output = ( unsigned char * ) malloc ( sizeof ( unsigned char ) * m );
190 | memcpy ( state->output, string, m );
191 |
192 | state->keywordline = g->patterncounter;
193 |
194 | g->patterncounter++;
195 | }
196 | }
197 |
198 | unsigned int search_ac ( unsigned char *text, int n, struct ac_table *table ) {
199 |
200 | struct ac_state *head = table->zerostate;
201 | struct ac_state *r, *s;
202 |
203 | int column, matches = 0;
204 |
205 | r = head;
206 |
207 | for ( column = 0; column < n; column++ ) {
208 |
209 | while ( ( s = r->next[*( text + column ) ] ) == NULL )
210 | r = r->fail;
211 | r = s;
212 |
213 | //printf("column %i r->id = %i\n", column, r->id);
214 |
215 | if ( r->output != NULL ) {
216 | matches++;
217 | //printf("match at %i for r %i\n", column, r->id);
218 | }
219 | }
220 |
221 | return matches;
222 | }
223 |
224 | struct ac_table *preproc_ac ( unsigned char **pattern, int m, int p_size, int alphabet, int *state_transition, unsigned int *state_supply, unsigned int *state_final ) {
225 |
226 | unsigned int i;
227 |
228 | struct ac_table *table;
229 |
230 | // allocate memory for the table
231 |
232 | table = malloc ( sizeof ( struct ac_table ) );
233 |
234 | if ( !table )
235 | fail ( "Could not initialize table\n" );
236 |
237 | ac_init ( table, alphabet, state_transition );
238 |
239 | for ( i = 0; i < p_size; i++ )
240 | ac_addstring ( table, i, pattern[i], m, alphabet, state_transition, state_final );
241 |
242 | ac_maketree ( table, alphabet, state_supply );
243 |
244 | return table;
245 | }
246 |
247 | void free_ac ( struct ac_table *table, int alphabet ) {
248 |
249 | ac_destroy ( table, alphabet );
250 |
251 | free ( table );
252 | }
253 |
--------------------------------------------------------------------------------
/ac/list.h:
--------------------------------------------------------------------------------
1 | // list.[ch]
2 | // a doubly linked list
3 | //
4 | // (c) 2005, Willem de Bruijn, Vrije Universiteit Amsterdam
5 | // email at willem -_at_- computer.org
6 | //
7 | // BSD license applies
8 |
9 | #ifndef WDB_SLIST_H
10 | #define WDB_SLIST_H
11 |
12 | #ifdef __KERNEL__
13 | #include
14 | #include
15 | #include
16 | #else
17 | #include
18 | #include
19 | #include
20 | #endif
21 |
22 |
23 | struct list {
24 | void *id;
25 | struct list *next;
26 | struct list *prev;
27 | };
28 |
29 | static inline struct list * list_create(void *id)
30 | {
31 | struct list * new;
32 |
33 | new = malloc (sizeof(struct list));
34 | if (!new)
35 | return NULL;
36 | new->id = id;
37 | new->next = NULL;
38 | new->prev = NULL;
39 |
40 | return new;
41 | }
42 |
43 | static inline struct list * list_insert(struct list *start, void *id)
44 | {
45 | struct list *new = list_create(id);
46 |
47 | if (!new)
48 | return NULL;
49 |
50 | if (start){
51 | new->next = start;
52 | start->prev = new;
53 | }
54 | return new;
55 | }
56 |
57 | static inline struct list * list_append(struct list *start, void *id)
58 | {
59 | struct list *new = list_create(id);
60 | struct list* cur;
61 |
62 | if (!new)
63 | return NULL;
64 |
65 | if (!start)
66 | return new;
67 |
68 | cur = start;
69 | while (cur->next)
70 | cur = cur->next;
71 | cur->next = new;
72 | new->prev = cur;
73 | return start;
74 | }
75 |
76 | /** strange function for a list
77 | * used only for duplicate removal
78 | *
79 | * note that the function returns NULL in two
80 | * distinct cases: no 'start', or 'start' is the only item
81 | */
82 | static inline struct list * list_pop(struct list *start)
83 | {
84 | struct list *tmp;
85 |
86 | if (!start)
87 | return NULL;
88 |
89 | tmp = start;
90 | start = start->next;
91 | free(tmp);
92 |
93 | return start;
94 | }
95 |
96 | static inline struct list * list_invert(struct list *start)
97 | {
98 | struct list *cur, *tmp=NULL;
99 |
100 | if (!start->next)
101 | return start;
102 |
103 | cur = start;
104 | // swap {prev,next} pointers
105 | while (cur){
106 | tmp = cur->next;
107 | cur->next = cur->prev;
108 | cur->prev = tmp;
109 | tmp = cur;
110 | cur = cur->prev;
111 | }
112 |
113 | return tmp;
114 | }
115 |
116 | // return the item in the list that matches the id
117 | static inline struct list * list_exists(struct list *start, void * id)
118 | {
119 | struct list *cur;
120 |
121 | if (!start)
122 | return NULL;
123 |
124 | // find our spot in the list
125 | cur = start;
126 | while (cur && cur->id != id)
127 | cur = cur->next;
128 | if (!cur)
129 | return NULL;
130 | else
131 | return cur;
132 | }
133 |
134 | /** unlink an item. can be used together with list_foreach */
135 | static inline struct list * list_unlink(struct list *cur)
136 | {
137 | struct list *tmp = NULL;
138 |
139 | if (cur->next){
140 | cur->next->prev = cur->prev;
141 | tmp = cur->next;
142 | }
143 | if (cur->prev){
144 | cur->prev->next = cur->next;
145 | tmp = cur->prev;
146 | }
147 |
148 | if (!tmp)
149 | return NULL; // no cur->next && no cur->prev ? then it's an empty list
150 |
151 | while (tmp->prev)
152 | tmp = tmp->prev;
153 | return tmp; // return the new startnode
154 | }
155 |
156 | /** remove id if it exists. returns start of the list */
157 | static inline struct list * list_remove(struct list *cur)
158 | {
159 | struct list * elem = list_unlink(cur);
160 | free(cur);
161 | return elem;
162 | }
163 |
164 | static inline struct list * list_remove_id(struct list * list, void * id)
165 | {
166 | struct list * elem = list_exists(list, id);
167 | if (elem)
168 | return list_remove(elem);
169 | return list;
170 | }
171 |
172 | struct list * list_insert_sorted(struct list *start, void *id);
173 |
174 | static inline int list_len(struct list *list)
175 | {
176 | int i=0;
177 | while (list){
178 | i++;
179 | list = list->next;
180 | }
181 | return i;
182 | }
183 |
184 | #define list_foreach(list, cur) \
185 | for (cur = list; cur; cur = cur->next)
186 |
187 | #define list_destroy(deadlist) \
188 | while (deadlist) deadlist = list_pop(deadlist)
189 |
190 | #endif /* WDB_SLIST_H */
191 |
192 |
--------------------------------------------------------------------------------
/bm/bm.c:
--------------------------------------------------------------------------------
1 | /*This file is part of "A Hybrid Parallel Implementation of the Aho-Corasick and Wu-Manber Algorithms Using NVIDIA CUDA and MPI Evaluated on a Biological Sequence Database".
2 |
3 | "A Hybrid Parallel Implementation of the Aho-Corasick and Wu-Manber Algorithms Using NVIDIA CUDA and MPI Evaluated on a Biological Sequence Database" is free software: you can redistribute it and/or modify
4 | it under the terms of the GNU General Public License as published by
5 | the Free Software Foundation, either version 3 of the License, or
6 | (at your option) any later version.
7 |
8 | "A Hybrid Parallel Implementation of the Aho-Corasick and Wu-Manber Algorithms Using NVIDIA CUDA and MPI Evaluated on a Biological Sequence Database" is distributed in the hope that it will be useful,
9 | but WITHOUT ANY WARRANTY; without even the implied warranty of
10 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
11 | GNU General Public License for more details.
12 |
13 | You should have received a copy of the GNU General Public License
14 | along with "A Hybrid Parallel Implementation of the Aho-Corasick and Wu-Manber Algorithms Using NVIDIA CUDA and MPI Evaluated on a Biological Sequence Database". If not, see .*/
15 |
16 | #include "../smatcher.h"
17 |
18 | //Bad character shift
19 | void preBmBc ( unsigned char **pattern, int m, int p_size, int alphabet, int *bmBc ) {
20 |
21 | unsigned int i, j;
22 |
23 | for (i = 0; i < alphabet; ++i)
24 | bmBc[i] = m;
25 |
26 | for ( j = 0; j < p_size; j++ )
27 | for (i = 0; i < m - 1; ++i)
28 | bmBc[pattern[j][i]] = MIN ( m - i - 1, bmBc[pattern[j][i]]);
29 | }
30 | /*
31 | void suffixes ( unsigned char *x, int m, int *suff ) {
32 |
33 | int f, g, i;
34 |
35 | suff[m - 1] = m;
36 |
37 | printf("suff[%i] = %i\n", m - 1, suff[m - 1]);
38 |
39 | g = m - 1;
40 |
41 | for ( i = m - 2; i >= 0; --i ) {
42 |
43 | //printf("i = %i |>| g = %i AND suff[%i] = %i |<| %i\n", i, g, i + m - 1 - f, suff[i + m - 1 - f], i - g);
44 |
45 | if ( i > g && suff[i + m - 1 - f] < i - g ) {
46 | suff[i] = suff[i + m - 1 - f];
47 |
48 | printf("suff[%i] = suff[%i] = %i\n", i, i + m - 1 - f, suff[i]);
49 | }
50 |
51 | else {
52 | if ( i < g )
53 | g = i;
54 |
55 | f = i;
56 |
57 | while (g >= 0 && x[g] == x[g + m - 1 - f])
58 | --g;
59 |
60 | suff[i] = f - g;
61 |
62 | printf("suff[%i] = %i\n", i, suff[i]);
63 | }
64 | }
65 | }
66 |
67 | //Good suffix shift
68 | void preBmGs( unsigned char **pattern, int m, int bmGs[] ) {
69 |
70 | int i, j, suff[m];
71 |
72 | //suffixes( pattern, m, suff );
73 |
74 | suffixes("AACAA", m, suff );
75 |
76 | printf("\n");
77 |
78 | for ( i = 0; i < m; ++i )
79 | bmGs[i] = m;
80 |
81 | j = 0;
82 |
83 | for ( i = m - 1; i >= 0; --i )
84 | if ( suff[i] == i + 1 )
85 | for ( ; j < m - 1 - i; ++j )
86 | if ( bmGs[j] == m )
87 | bmGs[j] = m - 1 - i;
88 |
89 | for ( i = 0; i < m; i++ )
90 | printf("bmGs[%i] = %i\n", i, bmGs[i]);
91 | printf("\n");
92 |
93 | for (i = 0; i <= m - 2; ++i)
94 | bmGs[m - 1 - suff[i]] = m - 1 - i;
95 |
96 | for ( i = 0; i < m; i++ )
97 | printf("bmGs[%i] = %i\n", i, bmGs[i]);
98 |
99 | exit(0);
100 | }
101 | */
102 |
--------------------------------------------------------------------------------
/cuda/cuPrintf.cuh:
--------------------------------------------------------------------------------
1 | /*
2 | * Copyright 1993-2010 NVIDIA Corporation. All rights reserved.
3 | *
4 | * Please refer to the NVIDIA end user license agreement (EULA) associated
5 | * with this source code for terms and conditions that govern your use of
6 | * this software. Any use, reproduction, disclosure, or distribution of
7 | * this software and related documentation outside the terms of the EULA
8 | * is strictly prohibited.
9 | *
10 | */
11 |
12 | #ifndef CUPRINTF_H
13 | #define CUPRINTF_H
14 |
15 | /*
16 | * This is the header file supporting cuPrintf.cu and defining both
17 | * the host and device-side interfaces. See that file for some more
18 | * explanation and sample use code. See also below for details of the
19 | * host-side interfaces.
20 | *
21 | * Quick sample code:
22 | *
23 | #include "cuPrintf.cu"
24 |
25 | __global__ void testKernel(int val)
26 | {
27 | cuPrintf("Value is: %d\n", val);
28 | }
29 |
30 | int main()
31 | {
32 | cudaPrintfInit();
33 | testKernel<<< 2, 3 >>>(10);
34 | cudaPrintfDisplay(stdout, true);
35 | cudaPrintfEnd();
36 | return 0;
37 | }
38 | */
39 |
40 | ///////////////////////////////////////////////////////////////////////////////
41 | // DEVICE SIDE
42 | // External function definitions for device-side code
43 |
44 | // Abuse of templates to simulate varargs
45 | __device__ int cuPrintf(const char *fmt);
46 | template __device__ int cuPrintf(const char *fmt, T1 arg1);
47 | template __device__ int cuPrintf(const char *fmt, T1 arg1, T2 arg2);
48 | template __device__ int cuPrintf(const char *fmt, T1 arg1, T2 arg2, T3 arg3);
49 | template __device__ int cuPrintf(const char *fmt, T1 arg1, T2 arg2, T3 arg3, T4 arg4);
50 | template __device__ int cuPrintf(const char *fmt, T1 arg1, T2 arg2, T3 arg3, T4 arg4, T5 arg5);
51 | template __device__ int cuPrintf(const char *fmt, T1 arg1, T2 arg2, T3 arg3, T4 arg4, T5 arg5, T6 arg6);
52 | template __device__ int cuPrintf(const char *fmt, T1 arg1, T2 arg2, T3 arg3, T4 arg4, T5 arg5, T6 arg6, T7 arg7);
53 | template __device__ int cuPrintf(const char *fmt, T1 arg1, T2 arg2, T3 arg3, T4 arg4, T5 arg5, T6 arg6, T7 arg7, T8 arg8);
54 | template __device__ int cuPrintf(const char *fmt, T1 arg1, T2 arg2, T3 arg3, T4 arg4, T5 arg5, T6 arg6, T7 arg7, T8 arg8, T9 arg9);
55 | template __device__ int cuPrintf(const char *fmt, T1 arg1, T2 arg2, T3 arg3, T4 arg4, T5 arg5, T6 arg6, T7 arg7, T8 arg8, T9 arg9, T10 arg10);
56 |
57 |
58 | //
59 | // cuPrintfRestrict
60 | //
61 | // Called to restrict output to a given thread/block. Pass
62 | // the constant CUPRINTF_UNRESTRICTED to unrestrict output
63 | // for thread/block IDs. Note you can therefore allow
64 | // "all printfs from block 3" or "printfs from thread 2
65 | // on all blocks", or "printfs only from block 1, thread 5".
66 | //
67 | // Arguments:
68 | // threadid - Thread ID to allow printfs from
69 | // blockid - Block ID to allow printfs from
70 | //
71 | // NOTE: Restrictions last between invocations of
72 | // kernels unless cudaPrintfInit() is called again.
73 | //
74 | #define CUPRINTF_UNRESTRICTED -1
75 | __device__ void cuPrintfRestrict(int threadid, int blockid);
76 |
77 |
78 |
79 | ///////////////////////////////////////////////////////////////////////////////
80 | // HOST SIDE
81 | // External function definitions for host-side code
82 |
83 | //
84 | // cudaPrintfInit
85 | //
86 | // Call this once to initialise the printf system. If the output
87 | // file or buffer size needs to be changed, call cudaPrintfEnd()
88 | // before re-calling cudaPrintfInit().
89 | //
90 | // The default size for the buffer is 1 megabyte. For CUDA
91 | // architecture 1.1 and above, the buffer is filled linearly and
92 | // is completely used; however for architecture 1.0, the buffer
93 | // is divided into as many segments are there are threads, even
94 | // if some threads do not call cuPrintf().
95 | //
96 | // Arguments:
97 | // bufferLen - Length, in bytes, of total space to reserve
98 | // (in device global memory) for output.
99 | //
100 | // Returns:
101 | // cudaSuccess if all is well.
102 | //
103 | extern "C" cudaError_t cudaPrintfInit(size_t bufferLen=1048576); // 1-meg - that's enough for 4096 printfs by all threads put together
104 |
105 | //
106 | // cudaPrintfEnd
107 | //
108 | // Cleans up all memories allocated by cudaPrintfInit().
109 | // Call this at exit, or before calling cudaPrintfInit() again.
110 | //
111 | extern "C" void cudaPrintfEnd();
112 |
113 | //
114 | // cudaPrintfDisplay
115 | //
116 | // Dumps the contents of the output buffer to the specified
117 | // file pointer. If the output pointer is not specified,
118 | // the default "stdout" is used.
119 | //
120 | // Arguments:
121 | // outputFP - A file pointer to an output stream.
122 | // showThreadID - If "true", output strings are prefixed
123 | // by "[blockid, threadid] " at output.
124 | //
125 | // Returns:
126 | // cudaSuccess if all is well.
127 | //
128 | extern "C" cudaError_t cudaPrintfDisplay(void *outputFP=NULL, bool showThreadID=false);
129 |
130 | #endif // CUPRINTF_H
131 |
--------------------------------------------------------------------------------
/cuda/cuda.h:
--------------------------------------------------------------------------------
1 | /*This file is part of "A Hybrid Parallel Implementation of the Aho-Corasick and Wu-Manber Algorithms Using NVIDIA CUDA and MPI Evaluated on a Biological Sequence Database".
2 |
3 | "A Hybrid Parallel Implementation of the Aho-Corasick and Wu-Manber Algorithms Using NVIDIA CUDA and MPI Evaluated on a Biological Sequence Database" is free software: you can redistribute it and/or modify
4 | it under the terms of the GNU General Public License as published by
5 | the Free Software Foundation, either version 3 of the License, or
6 | (at your option) any later version.
7 |
8 | "A Hybrid Parallel Implementation of the Aho-Corasick and Wu-Manber Algorithms Using NVIDIA CUDA and MPI Evaluated on a Biological Sequence Database" is distributed in the hope that it will be useful,
9 | but WITHOUT ANY WARRANTY; without even the implied warranty of
10 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
11 | GNU General Public License for more details.
12 |
13 | You should have received a copy of the GNU General Public License
14 | along with "A Hybrid Parallel Implementation of the Aho-Corasick and Wu-Manber Algorithms Using NVIDIA CUDA and MPI Evaluated on a Biological Sequence Database". If not, see .*/
15 |
16 | #ifndef CUDA_H
17 | #define CUDA_H
18 |
19 | #include
20 | #include
21 | #include
22 | #include
23 |
24 | #define MAX(a,b) (a>b)?a:b
25 |
26 | static void checkCUDAError(const char *msg) {
27 |
28 | cudaError_t err = cudaGetLastError();
29 |
30 | if (cudaSuccess != err) {
31 | fprintf(stderr, "Cuda error: %s: %s.\n", msg, cudaGetErrorString(err));
32 | exit(EXIT_FAILURE);
33 | }
34 | }
35 |
36 | // This will output the proper CUDA error strings in the event that a CUDA host call returns an error
37 | #define checkCudaErrors(err) __checkCudaErrors (err, __FILE__, __LINE__)
38 |
39 | inline static void __checkCudaErrors(cudaError err, const char *file,
40 | const int line) {
41 |
42 | if (cudaSuccess != err) {
43 | fprintf(stderr, "%s(%i) : CUDA Runtime API error %d: %s.\n", file, line,
44 | (int) err, cudaGetErrorString(err));
45 | exit(-1);
46 | }
47 | }
48 |
49 | #endif
50 |
--------------------------------------------------------------------------------
/deviceQuery.txt:
--------------------------------------------------------------------------------
1 | /home/cuda/NVIDIA_GPU_Computing_SDK/C/bin/linux/release/deviceQuery Starting...
2 |
3 | CUDA Device Query (Runtime API) version (CUDART static linking)
4 |
5 | Found 1 CUDA Capable device(s)
6 |
7 | Device 0: "GeForce GTX 280"
8 | CUDA Driver Version / Runtime Version 4.2 / 4.2
9 | CUDA Capability Major/Minor version number: 1.3
10 | Total amount of global memory: 1023 MBytes (1073020928 bytes)
11 | (30) Multiprocessors x ( 8) CUDA Cores/MP: 240 CUDA Cores
12 | GPU Clock rate: 1296 MHz (1.30 GHz)
13 | Memory Clock rate: 1107 Mhz
14 | Memory Bus Width: 512-bit
15 | Max Texture Dimension Size (x,y,z) 1D=(8192), 2D=(65536,32768), 3D=(2048,2048,2048)
16 | Max Layered Texture Size (dim) x layers 1D=(8192) x 512, 2D=(8192,8192) x 512
17 | Total amount of constant memory: 65536 bytes
18 | Total amount of shared memory per block: 16384 bytes
19 | Total number of registers available per block: 16384
20 | Warp size: 32
21 | Maximum number of threads per multiprocessor: 1024
22 | Maximum number of threads per block: 512
23 | Maximum sizes of each dimension of a block: 512 x 512 x 64
24 | Maximum sizes of each dimension of a grid: 65535 x 65535 x 1
25 | Maximum memory pitch: 2147483647 bytes
26 | Texture alignment: 256 bytes
27 | Concurrent copy and execution: Yes with 1 copy engine(s)
28 | Run time limit on kernels: Yes
29 | Integrated GPU sharing Host Memory: No
30 | Support host page-locked memory mapping: Yes
31 | Concurrent kernel execution: No
32 | Alignment requirement for Surfaces: Yes
33 | Device has ECC support enabled: No
34 | Device is using TCC driver mode: No
35 | Device supports Unified Addressing (UVA): No
36 | Device PCI Bus ID / PCI location ID: 1 / 0
37 | Compute Mode:
38 | < Default (multiple host threads can use ::cudaSetDevice() with device simultaneously) >
39 |
40 | deviceQuery, CUDA Driver = CUDART, CUDA Driver Version = 4.2, CUDA Runtime Version = 4.2, NumDevs = 1, Device = GeForce GTX 280
41 |
--------------------------------------------------------------------------------
/execute.sh:
--------------------------------------------------------------------------------
1 | make
2 |
3 | if [ $? -ne 0 ]
4 | then
5 | exit 1
6 | fi
7 |
8 | #8000 so that the arrays can fit inside the texture memory ( 8 * 8000 = 64000 < 65000 )
9 | for j in 1000 8000
10 | do
11 |
12 | echo "$j"
13 |
14 | for i in sog
15 | do
16 | ./smatcher $i -m 8 -p_size $j -n 3999744 -alphabet 2
17 | done
18 |
19 | echo ""
20 |
21 | for i in sog
22 | do
23 | ./smatcher $i -m 8 -p_size $j -n 4628736 -alphabet 4
24 | done
25 |
26 | echo ""
27 |
28 | for i in sog
29 | do
30 | ./smatcher $i -m 8 -p_size $j -n 116234496 -alphabet 4
31 | done
32 |
33 | echo ""
34 |
35 | for i in sog
36 | do
37 | ./smatcher $i -m 8 -p_size $j -n 177649920 -alphabet 20
38 | done
39 |
40 | echo ""
41 |
42 | for i in sog
43 | do
44 | ./smatcher $i -m 8 -p_size $j -n 10821888 -alphabet 20
45 | done
46 |
47 | echo ""
48 |
49 | for i in sog
50 | do
51 | ./smatcher $i -m 8 -p_size $j -n 1903104 -alphabet 128
52 | done
53 |
54 | echo ""
55 |
56 | done
57 |
--------------------------------------------------------------------------------
/kmp/kmp.c:
--------------------------------------------------------------------------------
1 | /*This file is part of "A Hybrid Parallel Implementation of the Aho-Corasick and Wu-Manber Algorithms Using NVIDIA CUDA and MPI Evaluated on a Biological Sequence Database".
2 |
3 | "A Hybrid Parallel Implementation of the Aho-Corasick and Wu-Manber Algorithms Using NVIDIA CUDA and MPI Evaluated on a Biological Sequence Database" is free software: you can redistribute it and/or modify
4 | it under the terms of the GNU General Public License as published by
5 | the Free Software Foundation, either version 3 of the License, or
6 | (at your option) any later version.
7 |
8 | "A Hybrid Parallel Implementation of the Aho-Corasick and Wu-Manber Algorithms Using NVIDIA CUDA and MPI Evaluated on a Biological Sequence Database" is distributed in the hope that it will be useful,
9 | but WITHOUT ANY WARRANTY; without even the implied warranty of
10 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
11 | GNU General Public License for more details.
12 |
13 | You should have received a copy of the GNU General Public License
14 | along with "A Hybrid Parallel Implementation of the Aho-Corasick and Wu-Manber Algorithms Using NVIDIA CUDA and MPI Evaluated on a Biological Sequence Database". If not, see .*/
15 |
16 | #include "../smatcher.h"
17 | /*
18 | //Create the first node of the list
19 | struct node* setup_head ( char label ) {
20 |
21 | struct node* newState = malloc ( sizeof ( struct node ) );
22 |
23 | newState->label = label;
24 | newState->id = 0;
25 | newState->supply = NULL;
26 | newState->next = NULL;
27 |
28 | return newState;
29 | }
30 |
31 | //append a node and visit it
32 | void append_node ( struct node** lastState, char label, int id ) {
33 |
34 | struct node* newState = malloc ( sizeof ( struct node ) );
35 |
36 | newState->label = label;
37 | newState->id = id;
38 | newState->supply = NULL;
39 | newState->next = NULL;
40 |
41 | //the ->next of the last state in the list will now point to this state
42 | (*lastState)->next = newState;
43 |
44 | //The current node in the list will now be the last node we created
45 | *lastState = newState;
46 | }
47 |
48 | void free_kmp ( struct node* state ) {
49 |
50 | struct node* tmp;
51 |
52 | while ( 1 ) {
53 |
54 | tmp = state;
55 | state = state->next;
56 |
57 | free ( tmp );
58 |
59 | if ( state == NULL )
60 | break;
61 | }
62 | }
63 |
64 | //create the supply link for a node
65 | void addSupply ( struct node* head, int current, struct node* supply ) {
66 |
67 | int i;
68 |
69 | struct node* currentState = head;
70 |
71 | for ( i = 0; i < current; i++ )
72 | currentState = currentState->next;
73 |
74 | currentState->supply = supply;
75 |
76 | }
77 |
78 | struct node* preKmpList ( struct node* head, unsigned int *pattern, int m ) {
79 |
80 | int i = 0, k;
81 |
82 | struct node* currentState = head;
83 |
84 | struct node* j = NULL;
85 |
86 | for ( k = 1; k <= m; k++ )
87 | append_node( ¤tState, pattern[k], k );
88 |
89 | while (i < m) {
90 |
91 | while ( j != NULL && pattern[i] != j->label)
92 | j = j->supply;
93 |
94 | i++;
95 |
96 | if ( j == NULL )
97 | j = head;
98 | else
99 | j = j->next;
100 |
101 | if ( i < m && pattern[i] == j->label )
102 | addSupply( head, i, j->supply );
103 | else
104 | addSupply( head, i, j );
105 | }
106 |
107 | return head;
108 | }
109 |
110 | unsigned int searchList ( struct node* head, unsigned int *pattern, int m, unsigned int *text, int n ) {
111 |
112 | int i = 0;
113 |
114 | struct node* j = head;
115 |
116 | while (i < n) {
117 |
118 | //mismatch occurs
119 | while ( j != NULL && j->label != text[i] )
120 | j = j->supply;
121 |
122 | i++;
123 |
124 | if ( j == NULL )
125 | j = head;
126 | else
127 | j = j->next;
128 |
129 | if ( j->id >= m ) {
130 | return ( i - j->id );
131 |
132 | printf("->%i\n", i - j->id);
133 |
134 | j = j->supply;
135 | }
136 | }
137 | }
138 | */
139 |
140 | void preKmp ( int *next, unsigned char *p, int m ) {
141 |
142 | int i=0;
143 | int j=-1;
144 | next[0] = -1;
145 |
146 | while (i < m) {
147 |
148 | while ( j >= 0 && p[i]!=p[j] )
149 | j = next[j];
150 |
151 | i++; j++;
152 |
153 | if ( i < m && p[i] == p[j] )
154 | next[i] = next[j];
155 | else
156 | next[i] = j;
157 | }
158 | }
159 | /*
160 | void search ( int *next, unsigned char *pattern, int m, unsigned char *text, int n ) {
161 |
162 | int i = 0;
163 | int j = 0;
164 |
165 | while (i < n) {
166 |
167 | //mismatch occurs
168 | while (j >= 0 && pattern[j] != text[i])
169 | j = next[j];
170 |
171 | i++;
172 | j++;
173 |
174 | if (j >= m) {
175 | //printf("->%i\n", i - j);
176 | j = next[j];
177 | }
178 | }
179 | }
180 |
181 | int main ( void ) {
182 |
183 | int i;
184 |
185 | int m = 8;
186 | unsigned char *pattern = (unsigned char *)"AACGTAAC";
187 |
188 | int n = 12;
189 | unsigned char *text = (unsigned char *)"TAATAACGTAAC";
190 |
191 | preKmp( pattern, m );
192 |
193 | search( pattern, m, text, n );
194 |
195 | for ( i = 0; i < m; i++ )
196 | printf("%i\n", next[i]);
197 |
198 | printf("\n");
199 |
200 | struct node* head = setup_head( pattern[0] );
201 |
202 | struct node* state = head;
203 |
204 | preKmpList( state, pattern, m );
205 |
206 | searchList ( state, pattern, m, text, n );
207 |
208 | while ( 1 ) {
209 |
210 | if ( state->label ) {
211 | if ( state->supply != NULL )
212 | printf("Node %c points to node %i with a label %c\n", state->label, state->supply->id, state->supply->label);
213 | else
214 | printf("Node %c points to NULL\n", state->label);
215 | }
216 | state = state->next;
217 |
218 | if ( state == NULL )
219 | break;
220 | }
221 |
222 | free_kmp ( head );
223 |
224 | return 0;
225 | }
226 | */
227 |
228 |
--------------------------------------------------------------------------------
/profile.sh:
--------------------------------------------------------------------------------
1 | make
2 |
3 | if [ $? -ne 0 ]
4 | then
5 | exit 1
6 | fi
7 |
8 |
9 | #events="gld_32b,gld_64b,gld_128b,gld_incoherent,gld_coherent,branch,warp_serialize"
10 | #events="gld_32b,gld_64b,gld_128b"
11 |
12 | #nvprof --events $events ./smatcher ac -m 8 -p_size 8000 -n 116234496 -alphabet 4
13 |
14 | events="gld_incoherent,gld_coherent,branch,warp_serialize"
15 |
16 | nvprof --events $events ./smatcher sog -m 8 -p_size 1000 -n 116234496 -alphabet 4
17 |
18 |
19 | echo ""
20 | echo "gld_32b: Number of 32 byte global memory load transactions. This increments by 1 for each 32 byte transaction."
21 | echo ""
22 | echo "gld_64b: Number of 64 byte global memory load transactions. This increments by 1 for each 64 byte transaction."
23 | echo ""
24 | echo "gld_128b: Number of 128 byte global memory load transactions. This increments by 1 for each 128 byte transaction."
25 | echo ""
26 | echo "gld_incoherent: Number of non-coalesced global memory loads."
27 | echo ""
28 | echo "gld_coherent: Number of coalesced global memory loads."
29 | echo ""
30 | echo "branch: Number of branches taken by threads executing a kernel. This counter will be incremented by one if at least one thread in a warp takes the branch."
31 | echo ""
32 | echo "warp_serialize: If two addresses of a memory request fall in the same memory bank, there is a bank conflict and the access has to be serialized. This counter gives the number of thread warps that serialize on address conflicts to either shared or constant memory."
33 |
34 |
--------------------------------------------------------------------------------
/sbom/sbom.c:
--------------------------------------------------------------------------------
1 | /*This file is part of "A Hybrid Parallel Implementation of the Aho-Corasick and Wu-Manber Algorithms Using NVIDIA CUDA and MPI Evaluated on a Biological Sequence Database".
2 |
3 | "A Hybrid Parallel Implementation of the Aho-Corasick and Wu-Manber Algorithms Using NVIDIA CUDA and MPI Evaluated on a Biological Sequence Database" is free software: you can redistribute it and/or modify
4 | it under the terms of the GNU General Public License as published by
5 | the Free Software Foundation, either version 3 of the License, or
6 | (at your option) any later version.
7 |
8 | "A Hybrid Parallel Implementation of the Aho-Corasick and Wu-Manber Algorithms Using NVIDIA CUDA and MPI Evaluated on a Biological Sequence Database" is distributed in the hope that it will be useful,
9 | but WITHOUT ANY WARRANTY; without even the implied warranty of
10 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
11 | GNU General Public License for more details.
12 |
13 | You should have received a copy of the GNU General Public License
14 | along with "A Hybrid Parallel Implementation of the Aho-Corasick and Wu-Manber Algorithms Using NVIDIA CUDA and MPI Evaluated on a Biological Sequence Database". If not, see .*/
15 |
16 | #include "../smatcher.h"
17 |
18 | /// initialize the empty-table
19 | void sbom_init ( struct sbom_table *g, int alphabet, int *state_transition ) {
20 |
21 | g->zerostate = NULL;
22 | g->patterncounter = 0;
23 |
24 | //Create the root note
25 | g->zerostate = malloc ( sizeof ( struct sbom_state ) );
26 |
27 | if ( !g->zerostate )
28 | fail ( "Could not allocate memory\n" );
29 |
30 | g->idcounter = 1;
31 | g->zerostate->id = 0;
32 |
33 | g->zerostate->F = NULL;
34 |
35 | //Set Supply(q_0) := fail
36 | g->zerostate->fail = NULL;
37 |
38 | g->zerostate->next = ( struct sbom_state ** ) malloc ( alphabet * sizeof ( struct sbom_state * ) );
39 |
40 | //Set all alphabet bytes of root node->next to 0
41 | memset ( g->zerostate->next, 0, alphabet * sizeof ( struct sbom_state * ) );
42 |
43 | //Set all cells of transition table for state 0 to 0
44 | int i;
45 |
46 | for ( i = 0; i < alphabet; i++ )
47 | state_transition[i] = 0;
48 | }
49 |
50 | // Insert a string to the tree
51 | void sbom_addstring ( struct sbom_table *g, unsigned int i, unsigned char *string, int m, int p_size, int alphabet, int *state_transition, unsigned int *state_final_multi ) {
52 |
53 | struct sbom_state *state, *next = NULL, *k;
54 | int j, done = 0;
55 |
56 | // as long as next already exists follow them
57 | j = m - 1;
58 | state = g->zerostate;
59 |
60 | while ( !done && ( next = state->next[*( string + j )] ) != NULL ) {
61 |
62 | state = next;
63 |
64 | if ( j <= 0 )
65 | done = 1;
66 |
67 | j--;
68 | }
69 |
70 | // not done yet
71 | if ( !done ) {
72 |
73 | while ( j >= 0 ) {
74 | // Create new state
75 | next = malloc ( sizeof ( struct sbom_state ) );
76 |
77 | if ( !next )
78 | fail ( "Could not allocate memory\n" );
79 |
80 | next->next = ( struct sbom_state ** ) malloc ( alphabet * sizeof ( struct sbom_state * ) );
81 |
82 | next->id = g->idcounter++;
83 | next->F = NULL;
84 |
85 | state_transition[state->id * alphabet + *( string + j )] = next->id;
86 |
87 | //Store the pointer to the new state in an array so it can be free'ed at the end
88 | pointer_array[next->id - 1] = next;
89 |
90 | //printf("Created link from state %i to %i for character %i (j = %i)\n", state->id, next->id, *( string + j ), j );
91 |
92 | //Set all alphabet bytes of the next node's->next to 0
93 | //This is the _extended_ Aho-Corasick algorithm. A complete automaton is used where all states
94 | //have an outgoing transition for every alphabet character of the alphabet
95 | memset ( next->next, 0, alphabet * sizeof ( struct sbom_state * ) );
96 |
97 | state->next[*( string + j )] = next;
98 |
99 | k = state->fail;
100 |
101 | while ( k != NULL && k->next[*( string + j )] == NULL ) {
102 |
103 | k->next[*( string + j )] = next;
104 |
105 | state_transition[k->id * alphabet + *( string + j )] = next->id;
106 |
107 | //printf(" Created additional link from state %i to %i for character %i\n", k->id, next->id, *( string + j ) );
108 |
109 | k = k->fail;
110 | }
111 |
112 | if ( k != NULL )
113 | next->fail = k->next[*( string + j )];
114 | else
115 | next->fail = g->zerostate;
116 |
117 | state = next;
118 |
119 | j--;
120 | }
121 | }
122 |
123 | //printf(" Currently at state %i\n", state->id);
124 |
125 | //After finishing with the previous characters of the keyword, add the terminal state to F(q)
126 | if ( !state->F ) {
127 |
128 | //In the worst case, one state can correspond to all p_size patterns, needing p_size * number_of_terminal_states memory. A number of 200 indices should suffice.
129 | //state->F = ( unsigned int * ) malloc ( sizeof ( unsigned int ) * p_size );
130 | state->F = ( unsigned int * ) malloc ( sizeof ( unsigned int ) * 200 );
131 |
132 | if ( !state->F )
133 | fail ( "Could not allocate memory\n" );
134 |
135 | state->num = 0;
136 | }
137 |
138 | //Add the row index to the F array
139 | state->F[state->num] = g->patterncounter;
140 |
141 | //printf(" Added pattern %i to F[%i] of state %i\n", g->patterncounter, state->num, state->id);
142 |
143 | //Use state_final_multi[state][0] to store the number of matching patterns, enumerate them in cells state_final_multi[state][1-200]
144 | state_final_multi[state->id * 200] = state->num + 1;
145 | state_final_multi[state->id * 200 + state->num + 1] = g->patterncounter;
146 |
147 | state->num++;
148 |
149 | g->patterncounter++;
150 | }
151 |
152 | unsigned int search_sbom ( unsigned char **pattern, int m, unsigned char *text, int n, struct sbom_table *table ) {
153 |
154 | struct sbom_state *head = table->zerostate;
155 | struct sbom_state *r, *s;
156 |
157 | unsigned int i;
158 |
159 | int column = m - 1, matches = 0, j;
160 |
161 | while ( column < n ) {
162 |
163 | r = head;
164 | j = 0;
165 |
166 | while ( j < m && ( s = r->next[*( text + column - j )] ) != NULL ) {
167 |
168 | //printf("(%i) Going from %i to %i by %i\n", column - j, r->id, s->id, *( text + column - j ));
169 |
170 | r = s;
171 |
172 | j++;
173 | }
174 |
175 | //Verify all patterns in F(q) with the input string
176 | if ( r->F != NULL && r->num > 0 && j == m ) {
177 |
178 | for ( i = 0; i < r->num; i++ ) {
179 |
180 | if ( memcmp ( pattern[r->F[i]], text + column - m + 1, m ) == 0 ) {
181 | matches++;
182 |
183 | //printf("match of %i %i at %i\n", r->id, r->F[i], column);
184 |
185 | break;
186 | }
187 | }
188 |
189 | column++;
190 | }
191 | else
192 | column += MAX ( m - j, 1);
193 | }
194 |
195 | return matches;
196 | }
197 |
198 | struct sbom_table *preproc_sbom ( unsigned char **pattern, int m, int p_size, int alphabet, int *state_transition, unsigned int *state_final_multi ) {
199 |
200 | unsigned int i;
201 |
202 | struct sbom_table *table;
203 |
204 | // allocate memory for the table
205 |
206 | table = malloc ( sizeof ( struct sbom_table ) );
207 |
208 | if ( !table )
209 | fail ( "Could not initialize table\n" );
210 |
211 | sbom_init ( table, alphabet, state_transition );
212 |
213 | for ( i = 0; i < p_size; i++ )
214 | sbom_addstring ( table, i, pattern[i], m, p_size, alphabet, state_transition, state_final_multi );
215 |
216 | return table;
217 | }
218 |
219 | void free_sbom ( struct sbom_table *table, int m ) {
220 |
221 | int i;
222 |
223 | //We know exactly how many states we stored in the pointer_array ( table->idcounter - 1 )
224 | for ( i = 0; i < table->idcounter - 1; i++ )
225 | if ( pointer_array[i] ) {
226 |
227 | if ( pointer_array[i]->F )
228 | free ( pointer_array[i]->F );
229 |
230 | free ( pointer_array[i]->next );
231 | free ( pointer_array[i] );
232 | }
233 |
234 | free ( table->zerostate->next );
235 | free ( table->zerostate );
236 | free ( table );
237 | }
238 |
239 |
--------------------------------------------------------------------------------
/sh/sh.c:
--------------------------------------------------------------------------------
1 | /*This file is part of "A Hybrid Parallel Implementation of the Aho-Corasick and Wu-Manber Algorithms Using NVIDIA CUDA and MPI Evaluated on a Biological Sequence Database".
2 |
3 | "A Hybrid Parallel Implementation of the Aho-Corasick and Wu-Manber Algorithms Using NVIDIA CUDA and MPI Evaluated on a Biological Sequence Database" is free software: you can redistribute it and/or modify
4 | it under the terms of the GNU General Public License as published by
5 | the Free Software Foundation, either version 3 of the License, or
6 | (at your option) any later version.
7 |
8 | "A Hybrid Parallel Implementation of the Aho-Corasick and Wu-Manber Algorithms Using NVIDIA CUDA and MPI Evaluated on a Biological Sequence Database" is distributed in the hope that it will be useful,
9 | but WITHOUT ANY WARRANTY; without even the implied warranty of
10 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
11 | GNU General Public License for more details.
12 |
13 | You should have received a copy of the GNU General Public License
14 | along with "A Hybrid Parallel Implementation of the Aho-Corasick and Wu-Manber Algorithms Using NVIDIA CUDA and MPI Evaluated on a Biological Sequence Database". If not, see .*/
15 |
16 | #include "../smatcher.h"
17 |
18 | /// free an AC table from a given startnode (recursively)
19 | void sh_free ( struct ac_state *state, int alphabet ) {
20 |
21 | int i;
22 |
23 | for ( i = 0; i < alphabet; i++ )
24 | if ( state->next[i] )
25 | sh_free ( state->next[i], alphabet );
26 |
27 | if ( state->output )
28 | free ( state->output );
29 |
30 | free ( state->next );
31 | free ( state );
32 | }
33 |
34 | /// initialize the empty-table
35 | void sh_init ( struct ac_table *g, int alphabet, int *state_transition ) {
36 |
37 | g->zerostate = NULL;
38 | g->patterncounter = 0;
39 |
40 | //Create the root note
41 | g->zerostate = malloc ( sizeof ( struct ac_state ) );
42 |
43 | if ( !g->zerostate )
44 | fail ( "Could not allocate memory\n" );
45 |
46 | g->idcounter = 1;
47 | g->zerostate->id = 0;
48 |
49 | g->zerostate->output = NULL;
50 |
51 | g->zerostate->next = ( struct ac_state ** ) malloc ( alphabet * sizeof ( struct ac_state * ) );
52 |
53 | //Set all alphabet bytes of root node->next to 0
54 | memset ( g->zerostate->next, 0, alphabet * sizeof ( struct ac_state * ) );
55 |
56 | //Set all cells of transition table for state 0 to 0
57 | int i;
58 |
59 | for ( i = 0; i < alphabet; i++ )
60 | state_transition[i] = 0;
61 | }
62 |
63 | /// free an entire AC table
64 | void sh_destroy ( struct ac_table *in, int alphabet ) {
65 |
66 | int i;
67 |
68 | for ( i = 0; i < alphabet; i++ )
69 | if ( in->zerostate->next[i] && in->zerostate->next[i]->id > 0 ) {
70 |
71 | //printf("id: %i i: %i\n", in->zerostate->next[i]->id, i);
72 |
73 | sh_free ( in->zerostate->next[i], alphabet );
74 | in->zerostate->next[i] = NULL;
75 | }
76 | free ( in->zerostate->next );
77 | free ( in->zerostate );
78 | }
79 |
80 | // Insert a string to the tree
81 | void sh_addstring ( struct ac_table *g, unsigned int i, unsigned char *string, int m, int alphabet, int *state_transition, unsigned int *state_final ) {
82 |
83 | struct ac_state *state, *next = NULL;
84 | int j, done = 0;
85 |
86 | // as long as next already exists follow them
87 | j = m - 1;
88 | state = g->zerostate;
89 |
90 | while ( !done && ( next = state->next[*( string + j )] ) != NULL ) {
91 |
92 | //printf("id: %i j: %i\n", state->id, j);
93 |
94 | state = next;
95 |
96 | if ( j <= 0 )
97 | done = 1;
98 |
99 | j--;
100 |
101 | //printf("character %c state: %i\n", *( string + j ), state->id);
102 | }
103 |
104 | // not done yet
105 | if ( !done ) {
106 | while ( j >= 0 ) {
107 | // Create new state
108 | next = malloc ( sizeof ( struct ac_state ) );
109 |
110 | if ( !next )
111 | fail ( "Could not allocate memory\n" );
112 |
113 | next->next = ( struct ac_state ** ) malloc ( alphabet * sizeof ( struct ac_state * ) );
114 |
115 | next->id = g->idcounter++;
116 | next->output = NULL;
117 |
118 | state_transition[state->id * alphabet + *( string + j )] = next->id;
119 |
120 | //printf("Created link from state %i to %i for character %c (j = %i)\n", state->id, next->id, *( string + j ), j );
121 |
122 | //Set all alphabet bytes of the next node's->next to 0
123 | //This is the _extended_ Aho-Corasick algorithm. A complete automaton is used where all states
124 | //have an outgoing transition for every alphabet character of the alphabet
125 | memset ( next->next, 0, alphabet * sizeof ( struct ac_state * ) );
126 |
127 | state->next[*( string + j )] = next;
128 | state = next;
129 |
130 | j--;
131 | }
132 | }
133 |
134 | //After finishing with the previous characters of the keyword, add the terminal state if it does not exist
135 | if ( !state->output ) {
136 |
137 | state_final[state->id] = 1;
138 |
139 | //allocate memory and copy *string to state->output
140 | state->output = ( unsigned char * ) malloc ( sizeof ( unsigned char ) * m );
141 | memcpy ( state->output, string, m );
142 |
143 | //printf("Adding output %s to state %i\n", state->output, state->id);
144 |
145 | state->keywordline = g->patterncounter;
146 |
147 | g->patterncounter++;
148 | }
149 | }
150 |
151 | unsigned int search_sh ( int m, unsigned char *text, int n, struct ac_table *table, int *bmBc ) {
152 |
153 | struct ac_state *head = table->zerostate;
154 | struct ac_state *r, *s;
155 |
156 | int column = m - 1, matches = 0, j;
157 |
158 | r = head;
159 |
160 | while ( column < n ) {
161 |
162 | r = head;
163 | j = 0;
164 |
165 | while ( j < m && ( s = r->next[*( text + column - j )] ) != NULL ) {
166 |
167 | r = s;
168 | j++;
169 | }
170 |
171 | if ( r->output != NULL )
172 | matches++;
173 |
174 | column += bmBc[text[column]];
175 | }
176 |
177 | return matches;
178 | }
179 |
180 | struct ac_table *preproc_sh ( unsigned char **pattern, int m, int p_size, int alphabet, int *state_transition, unsigned int *state_final ) {
181 |
182 | unsigned int i;
183 |
184 | struct ac_table *table;
185 |
186 | // allocate memory for the table
187 |
188 | table = malloc ( sizeof ( struct ac_table ) );
189 |
190 | if ( !table )
191 | fail ( "Could not initialize table\n" );
192 |
193 | sh_init ( table, alphabet, state_transition );
194 |
195 | for ( i = 0; i < p_size; i++ )
196 | sh_addstring ( table, i, pattern[i], m, alphabet, state_transition, state_final );
197 |
198 | return table;
199 | }
200 |
201 | void free_sh ( struct ac_table *table, int alphabet ) {
202 |
203 | sh_destroy ( table, alphabet );
204 |
205 | free ( table );
206 | }
207 |
208 |
--------------------------------------------------------------------------------
/sh/support/atomic.h:
--------------------------------------------------------------------------------
1 | /**
2 | * support/atomic.[ch]
3 | * streamline wrapper around atomic operations
4 | *
5 | * (c) 2007, Willem de Bruijn, Vrije Universiteit Amsterdam
6 | * email at wdebruij _-at-_ users DOT sourceforge DOT net
7 | *
8 | * Based on the original SUNRPC implementation as found in GLIBC.
9 | * That version follows an MIT-like license.
10 | * Here LGPL applies.
11 | * */
12 |
13 | #ifdef __KERNEL__
14 | #include
15 | #else
16 |
17 | /** this is obviously NOT atomic.
18 | * TODO: fix. at least now we have the calls in place */
19 |
20 | #define atomic_t int
21 |
22 | #define ATOMIC_INIT(x) (x)
23 |
24 | #define atomic_read(x) (*x)
25 | #define atomic_inc(x) ((*x))++
26 | #define atomic_dec(x) ((*x)--)
27 | #define atomic_inc_and_test(x) ( ++(*x) )
28 |
29 | #endif
30 |
31 |
--------------------------------------------------------------------------------
/sh/support/bitmap.h:
--------------------------------------------------------------------------------
1 | // bitmap.h
2 | // support for per-bit operations
3 | //
4 | // (c) 2007, Willem de Bruijn, Vrije Universiteit Amsterdam
5 | // email at willem -_at_- computer.org
6 | //
7 | // LGPL license applies
8 |
9 | // set a bitmap; counting starts at 0
10 | static inline void bitmap_set(char *bitmap, unsigned int element)
11 | {
12 | bitmap[element >> 3] |= (1 << (element & 0x7));
13 | }
14 |
15 | static inline int bitmap_isset(const char *bitmap, unsigned int element)
16 | {
17 | return (bitmap[element >> 3] & (1 << (element & 0x7))) ? 1 : 0;
18 | }
19 |
20 | static inline void bitmap_clear(char *bitmap, unsigned int element)
21 | {
22 | bitmap[element >> 3] &= ~(1 << (element & 0x7));
23 | }
24 |
25 |
--------------------------------------------------------------------------------
/sh/support/dict.c:
--------------------------------------------------------------------------------
1 | // dict.[ch]
2 | // an associative memory
3 | //
4 | // (c) 2005, Willem de Bruijn, Vrije Universiteit Amsterdam
5 | // email at willem -_AT_- computer.org
6 | //
7 | // BSD License applies
8 |
9 | #ifdef __KERNEL__
10 | #include
11 | #include
12 | #include
13 | #else
14 | #include
15 | #include
16 | #include
17 | #endif
18 |
19 | #include "log.h"
20 | #include "macros.h"
21 | #include "dict.h"
22 |
23 | #define DICT_BYTELEN (sizeof(struct dictionary) * DICT_TABLE_LEN)
24 |
25 | struct dictionary *
26 | dict_create(void)
27 | {
28 | struct dictionary *dict;
29 |
30 | dict = myalloc(DICT_BYTELEN);
31 | memset(dict, 0, DICT_BYTELEN);
32 | return dict;
33 | }
34 |
35 | static struct dictionary *
36 | __dict_find_ex(struct dictionary *dict, const char *key, int n)
37 | {
38 | int i = -1, occur = -1;
39 |
40 | if (!dict)
41 | return NULL;
42 |
43 | while (occur < n) {
44 | if (++i == DICT_TABLE_LEN)
45 | return NULL;
46 | if (!dict[i].klen)
47 | continue;
48 | if (!memcmp(dict[i].key, key,
49 | max(dict[i].klen, (int) strlen(key)))){
50 | occur++;
51 | }
52 | }
53 | return &dict[i];
54 | }
55 |
56 | void *
57 | dict_lookup_ex(struct dictionary *dict, const char *key, int n)
58 | {
59 | struct dictionary * elem = __dict_find_ex(dict, key, n);
60 | if (elem)
61 | return elem->value;
62 | return NULL;
63 | }
64 |
65 | void *
66 | dict_lookup(struct dictionary *dict, const char *key)
67 | {
68 | return dict_lookup_ex(dict, key, 0);
69 | }
70 |
71 | char *
72 | dict_rlookup(struct dictionary *dict, void *data)
73 | {
74 | int i = -1;
75 |
76 | while (++i < DICT_TABLE_LEN) {
77 | if (dict[i].value == data)
78 | return dict[i].key;
79 | }
80 | return NULL;
81 | }
82 |
83 | int
84 | dict_insert_dup(struct dictionary *dict, const char *key, void *value)
85 | {
86 | int i = 0;
87 |
88 | // skip used items
89 | while(i < DICT_TABLE_LEN && dict[i].klen)
90 | i++;
91 | if (i == DICT_TABLE_LEN) {
92 | sl_log(LOG_WARN, "exhausted dictionary space");
93 | return -1;
94 | }
95 |
96 | // fill item
97 | dict[i].klen = strlen(key) + 1;
98 | dict[i].key = myalloc(dict[i].klen);
99 | memcpy(dict[i].key, key, dict[i].klen);
100 | dict[i].value = value;
101 | return i;
102 | }
103 |
104 | int
105 | dict_insert(struct dictionary *dict, const char *key, void *value)
106 | {
107 | if (!key) {
108 | sl_log(LOG_WARN, "dict insert NULL key thwarted");
109 | return -1;
110 | }
111 |
112 | // there are faster alternatives for duplicate checking
113 | if (dict_lookup(dict, key)){
114 | sl_log(LOG_WARN, "dictionary collision on %s", key);
115 | sl_log(LOG_MSG, key);
116 | return -1;
117 | }
118 | return dict_insert_dup(dict, key, value);
119 | }
120 |
121 | void
122 | dict_replace(struct dictionary *dict, const char *key, void *value)
123 | {
124 | struct dictionary *item = __dict_find_ex(dict, key, 0);
125 | if (item)
126 | item->value = value;
127 | }
128 |
129 | int
130 | dict_len(struct dictionary *dict)
131 | {
132 | int i, occur = 0;
133 |
134 | for (i = 0; i < DICT_TABLE_LEN; i++)
135 | if (dict[i].klen)
136 | occur++;
137 |
138 | return occur;
139 | }
140 |
141 | void *
142 | dict_getnth(struct dictionary *dict, int n)
143 | {
144 | int i=-1, j=-1;
145 | while (++i < DICT_TABLE_LEN)
146 | if (dict[i].klen && ++j == n)
147 | return dict[i].value;
148 | return NULL;
149 | }
150 |
151 | void
152 | dict_delex(struct dictionary *dict, const char *key, int n)
153 | {
154 | struct dictionary * entry;
155 |
156 | if (!(entry = __dict_find_ex(dict, key, n)))
157 | return;
158 |
159 | myfree(entry->key);
160 | entry->klen = 0;
161 | entry->key = NULL;
162 | entry->value = NULL;
163 | }
164 |
165 | void
166 | dict_del(struct dictionary *dict, const char *key)
167 | {
168 | dict_delex(dict, key, 0);
169 | }
170 |
171 | void
172 | dict_clear(struct dictionary *dict, int free_values)
173 | {
174 | struct dictionary *elem;
175 | int i;
176 |
177 | dict_foreach_elem(dict, i, elem) {
178 | myfree(elem->key);
179 | elem->klen = 0;
180 | if (free_values)
181 | myfree(elem->value);
182 | }
183 | }
184 |
185 | void
186 | dict_destroy(struct dictionary *dict, int free_values)
187 | {
188 | dict_clear(dict, free_values);
189 | myfree(dict);
190 | }
191 |
192 | struct dictionary *
193 | dict_copy(struct dictionary *dict)
194 | {
195 | struct dictionary *new;
196 | int i;
197 | char *key;
198 | void *value;
199 |
200 | if ((new = dict_create()))
201 | dict_foreach(dict, i, key, value)
202 | dict_insert(new, key, value);
203 |
204 | return new;
205 | }
206 |
207 | #ifdef __KERNEL__
208 | EXPORT_SYMBOL(dict_insert);
209 | EXPORT_SYMBOL(dict_replace);
210 | EXPORT_SYMBOL(dict_lookup);
211 | EXPORT_SYMBOL(dict_rlookup);
212 | EXPORT_SYMBOL(dict_del);
213 | EXPORT_SYMBOL(dict_delex);
214 | EXPORT_SYMBOL(dict_destroy);
215 | #endif
216 |
217 |
218 |
--------------------------------------------------------------------------------
/sh/support/dict.h:
--------------------------------------------------------------------------------
1 | // dict.[ch]
2 | // an associative memory
3 | //
4 | // (c) 2005, Willem de Bruijn, Vrije Universiteit Amsterdam
5 | // email at willem -_AT_- computer.org
6 | //
7 | // BSD License applies
8 |
9 | #ifndef WJDB_DICT_H
10 | #define WJDB_DICT_H
11 |
12 | #define DICT_TABLE_LEN 128 ///< #elements
13 |
14 | /** a dictionary maps a key to a value.
15 | *
16 | * we allocate space for keys, but pass foreign pointers directly
17 | * the caller must take care not to free pointers still in the table
18 | *
19 | * index-based functions start counting at 0 */
20 |
21 | /// a (key,value) pair as used in the table
22 | struct dictionary {
23 | char *key;
24 | int klen;
25 | void *value;
26 | };
27 |
28 | #define STATIC_DICT(name) static struct dictionary name[DICT_TABLE_LEN];
29 |
30 | struct dictionary * dict_create(void);
31 | void dict_clear(struct dictionary *dict, int free_values);
32 | void dict_destroy(struct dictionary *dict, int free_values);
33 |
34 | // add / replace / del
35 | int dict_insert(struct dictionary *dict, const char *key, void *value);
36 | int dict_insert_dup(struct dictionary *dict, const char *key, void *value);
37 | void dict_replace(struct dictionary *dict, const char *key, void *value);
38 | void dict_del(struct dictionary *dict, const char *key);
39 | void dict_delex(struct dictionary *dict, const char *key, int n);
40 | struct dictionary * dict_copy(struct dictionary *dict);
41 |
42 | int dict_len(struct dictionary *dict);
43 |
44 | // lookup by key / value / index
45 | void * dict_lookup(struct dictionary *dict, const char *key);
46 | void * dict_lookup_ex(struct dictionary *dict, const char *key, int n);
47 | char * dict_rlookup(struct dictionary *dict, void *data);
48 | void * dict_getnth(struct dictionary *dict, int n);
49 |
50 | /// get the next used entry (for internal use only)
51 | static inline int
52 | __dict_getnext(struct dictionary *dict, int i)
53 | {
54 | while (++i < DICT_TABLE_LEN)
55 | if (dict[i].klen)
56 | return i;
57 | return -1;
58 | }
59 |
60 | /// do something for each filled entry
61 | #define dict_foreach(dict, i, outkey, outval) \
62 | for ((i) =__dict_getnext(dict, -1); \
63 | (i) >= 0 && ((outkey) = dict[i].key) && ((outval) = dict[i].value); \
64 | (i) = __dict_getnext(dict, i))
65 |
66 | /// retrieve consecutive elements
67 | #define dict_foreach_elem(dict, i, elem) \
68 | for ((i) = __dict_getnext(dict, -1); \
69 | (i) >= 0 && ((elem) = &dict[i]) != NULL; \
70 | (i) = __dict_getnext(dict, i))
71 |
72 | #endif /* WJDB_DICT_H */
73 |
74 |
--------------------------------------------------------------------------------
/sh/support/endian.h:
--------------------------------------------------------------------------------
1 | // endian.h
2 | // detect and cope with varying endianness
3 | //
4 | // (c) 2007, Willem de Bruijn, Vrije Universiteit Amsterdam
5 | // email at willem -_at_- computer.org
6 | //
7 | // LGPL license applies
8 |
9 | #include "macros.h"
10 |
11 | #define ENDIAN_BIG 0x0
12 | #define ENDIAN_LITTLE 0x1
13 |
14 | __attribute__((pure)) static inline int arch_get_endianness(void)
15 | {
16 | #if defined i386
17 | return ENDIAN_LITTLE;
18 | #elif defined sparc
19 | return ENDIAN_BIG;
20 | #elif defined ppc || defined powerpc
21 | return ENDIAN_BIG;
22 | #elif defined armbe
23 | return ENDIAN_BIG;
24 | #else
25 | int16_t one = 1;
26 | char *cp = (char*)&one;
27 | if ( *cp == 0 )
28 | return ENDIAN_LITTLE;
29 | return ENDIAN_BIG;
30 | #endif
31 | }
32 |
33 | #if defined i386 || defined x86-64
34 | #define SL_BYTEORDER ENDIAN_LITTLE
35 | #elif defined sparc
36 | #define SL_BYTEORDER ENDIAN_BIG
37 | #elif defined __ARMEB__
38 | #define SL_BYTEORDER ENDIAN_BIG
39 | #elif defined ppc || defined powerpc
40 | #define SL_BYTEORDER ENDIAN_BIG
41 | #else
42 | #warning "cannot predefine endianness"
43 | #endif
44 |
45 | /// some archs (sun) have 8byte pointers but 4 byte ints, then *(int*) will fail
46 | /// use this as alternative
47 | #define swap16(A) ((((uint16_t)(A) & 0xff00) >> 8) | \
48 | (((uint16_t)(A) & 0x00ff) << 8))
49 | #define swap32(A) ((((uint32_t)(A) & 0xff000000) >> 24) | \
50 | (((uint32_t)(A) & 0x00ff0000) >> 8) | \
51 | (((uint32_t)(A) & 0x0000ff00) << 8) | \
52 | (((uint32_t)(A) & 0x000000ff) << 24))
53 |
54 | // swap on not equal: if endian is unequal to local endianness then swap
55 | static inline uint16_t swap16_ne(uint16_t var, int endian)
56 | {
57 | if (likely(endian == arch_get_endianness()))
58 | return var;
59 | return swap16(var);
60 | }
61 |
62 |
--------------------------------------------------------------------------------
/sh/support/filedes.c:
--------------------------------------------------------------------------------
1 | /**
2 | * support/filedes.[ch]
3 | * support incoming signals (such as SIGIO in POSIX userspace)
4 | *
5 | * (c) 2007, Willem de Bruijn, Vrije Universiteit Amsterdam
6 | * email at willem _-at-_ computer DOT org
7 | *
8 | * 3-clause BSD applies
9 | *
10 | * Modified by Tudor Zaharia on Aug. 17 2010
11 | * tudor _at_ microcontroller DOT ro
12 | * - sinchronized access to slrun on each fd
13 | * */
14 |
15 | #ifdef __KERNEL__
16 | #else
17 | #include
18 | #include
19 | #include
20 | #include
21 | #include
22 | #include
23 | #include
24 | #include
25 | #include
26 | #endif
27 |
28 | #include
29 |
30 | #include "../support/macros.h"
31 | #include "../support/log.h"
32 | #include "../support/timer.h"
33 | #include "../wrap/file.h"
34 | #include "../wrap/origsocket.h"
35 | #include "../core/datapath.h"
36 | #include "filedes.h"
37 |
38 | struct sighandler {
39 | enum slsig_action action;
40 | union {
41 | struct instance *instance;
42 | void (*callback)(int fd);
43 | } ptr;
44 | int backtrack_fd; /// used to close clientfds after an acceptfd
45 | };
46 |
47 | #define MAXFD 512
48 | static struct sighandler * handlerlist[MAXFD];
49 |
50 | // these semaphores are used for synchronizing access to the slrun_slow()
51 | static sem_t sems[MAXFD];
52 |
53 | /******** implementation-specific support code **********/
54 |
55 | int fd_setasync(int fd)
56 | {
57 | #if linux
58 | int flags;
59 |
60 | flags = __orig_fcntl(fd, F_GETFL, 0);
61 | if (flags < 0)
62 | goto err;
63 | if (__orig_fcntl(fd, F_SETFL, flags | O_NONBLOCK | O_ASYNC) == -1)
64 | goto err;
65 | if (__orig_fcntl(fd, F_SETOWN, getpid()) == -1)
66 | goto err;
67 | return 0;
68 | err:
69 | sl_log(LOG_WARN, "failed to set fd to async");
70 | return -1;
71 | #else
72 | sl_log(LOG_WARN, "async IO not supported");
73 | return -1;
74 | #endif
75 | }
76 |
77 | /******** core functions: handlers and callback **********/
78 |
79 | /** read out a filedescriptor
80 | *
81 | * because most callbacks will read from a buffer we implemented this
82 | * functionality locally */
83 | static void sigaction_read(unsigned long sigid, struct instance *instance)
84 | {
85 | #define MAX_LINESZ 1500
86 | char data[MAX_LINESZ];
87 | int size, total = 0;
88 | int fd = (int) sigid;
89 |
90 | // enter critical section
91 | if ( -1 == sem_wait(&sems[fd]) ) perror("semop error");
92 |
93 | size = __orig_read(fd, data, MAX_LINESZ);
94 | while (size > 0) {
95 | total += size;
96 | slrun_slow(instance, data, size);
97 | size = __orig_read(fd, data, MAX_LINESZ);
98 | };
99 |
100 | if (size < 0 && errno != EAGAIN)
101 | perror("read()");
102 | else if (size == 0) { // EOF
103 | slrun_slow(instance, NULL, 0);
104 | }
105 |
106 | // leave critical section
107 | if ( -1 == sem_post(&sems[fd]) ) perror("semop error");
108 | }
109 |
110 | static void sigaction_accept(unsigned long sigid, struct instance *instance)
111 | {
112 | int fd = (int) sigid;
113 | int client_fd;
114 |
115 | client_fd = __orig_accept(fd, NULL, NULL);
116 | if (client_fd < 0) {
117 | perror("accept()");
118 | return;
119 | }
120 | filedes_add(client_fd, instance, SIGH_READ);
121 | handlerlist[client_fd]->backtrack_fd = sigid;
122 | }
123 |
124 | /** call a process2() member */
125 | static void sigaction_process(unsigned long sigid, struct instance *instance)
126 | {
127 | instance->fdata.func->process2(NULL, NULL, &instance->fdata);
128 | }
129 |
130 | /* Handle a SIGIO signal.
131 | *
132 | * On receiving one of these two signals, this function
133 | * executes a non-blocking select() over all file descriptors
134 | * registered to support/filedes. On return, it executes all
135 | * registered handlers for the descriptors on which data is
136 | * available.
137 | * */
138 | static void signal_callback(int signal)
139 | {
140 | struct timeval tv = { .tv_sec = 0, .tv_usec = 0};
141 | fd_set readfds;
142 | int i, highest_fd = -1, total;
143 |
144 | // only handle registered signals
145 | if (signal != SIGIO)
146 | return;
147 |
148 | // add all file descriptors to the listen set
149 | FD_ZERO(&readfds);
150 | for(i = 0; i < MAXFD; i++) {
151 | if (handlerlist[i]) {
152 | FD_SET(i, &readfds);
153 | highest_fd = i;
154 | }
155 | }
156 |
157 | // listen on the descriptor set
158 | total = __orig_select(++highest_fd, &readfds, NULL, NULL, &tv);
159 | if (total < 0) {
160 | if (errno != EINVAL)
161 | dprintf("error in filedes select\n");
162 | return;
163 | }
164 |
165 | // trigger actions for all descriptors on which data is waiting
166 | for(i = 0; total && i <= highest_fd; i++) {
167 | if (FD_ISSET(i,&readfds)) {
168 | switch (handlerlist[i]->action) {
169 | case SIGH_PROCESS :
170 | sigaction_process(i, handlerlist[i]->ptr.instance);
171 | break;
172 | case SIGH_READ :
173 | sigaction_read(i, handlerlist[i]->ptr.instance);
174 | break;
175 | case SIGH_ACCEPT :
176 | sigaction_accept(i, handlerlist[i]->ptr.instance);
177 | break;
178 | case SIGH_CALLBACK :
179 | handlerlist[i]->ptr.callback(i);
180 | break;
181 | };
182 | total--;
183 | }
184 | }
185 | }
186 |
187 | /******** bookkeeping **********/
188 |
189 | // install the SIGIO handler
190 | int filedes_init(void* unused)
191 | {
192 | signal(SIGIO, signal_callback);
193 | return 0;
194 | }
195 |
196 | int filedes_exit(void *unused)
197 | {
198 | signal(SIGIO, SIG_DFL);
199 | return 0;
200 | }
201 |
202 | int filedes_add(int fd, void *ptr, enum slsig_action action)
203 | {
204 | struct sighandler * sigh;
205 |
206 | assert(fd < MAXFD);
207 | if (handlerlist[fd])
208 | return -1;
209 |
210 | // create the semaphore
211 | if (sem_init(&sems[fd], 0, 1) == -1)
212 | return -1;
213 |
214 | // create the structure
215 | sigh = myalloc(sizeof(struct sighandler));
216 | sigh->ptr.instance = (struct instance *) ptr;
217 | sigh->backtrack_fd = -1;
218 | sigh->action = action;
219 |
220 | // add it to the list
221 | handlerlist[fd] = sigh;
222 |
223 | // ask the OS to signal us when data arrives on this fd.
224 | fd_setasync(fd);
225 |
226 | // bootstrap first read (for files)
227 | if (action == SIGH_READ || action == SIGH_PROCESS)
228 | signal_callback(SIGIO);
229 |
230 | return 0;
231 | }
232 |
233 | /** close all connections that depend on the parameter */
234 | static void __signalhandler_deldep(int fd)
235 | {
236 | int i;
237 |
238 | for(i=0; ibacktrack_fd == fd) {
241 | __orig_close(i);
242 | myfree(handlerlist[i]);
243 | handlerlist[i] = NULL;
244 | }
245 | }
246 |
247 | /** remove a connection. may also recursively remove dependent connections */
248 | int filedes_del(int fd)
249 | {
250 | struct sighandler * sigh;
251 |
252 | assert(fd < MAXFD);
253 | if (!handlerlist[fd])
254 | return -1;
255 |
256 | sigh = handlerlist[fd];
257 | handlerlist[fd] = NULL;
258 |
259 | // close all client connections if this is an accept descriptor.
260 | if (sigh->action == SIGH_ACCEPT)
261 | __signalhandler_deldep(fd);
262 |
263 | myfree(sigh);
264 |
265 | // destroy semaphore
266 | sem_destroy(&sems[fd]);
267 |
268 | return 0;
269 | }
270 |
271 |
--------------------------------------------------------------------------------
/sh/support/filedes.h:
--------------------------------------------------------------------------------
1 | /**
2 | * support/filedes.[ch]
3 | * support incoming signals (such as SIGIO in POSIX userspace)
4 | *
5 | * (c) 2007, Willem de Bruijn, Vrije Universiteit Amsterdam
6 | * email at willem _-at-_ computer DOT org
7 | *
8 | * 3-clause BSD applies
9 | * */
10 |
11 | #ifndef SL_SUPPORT_FILEDES_H
12 | #define SL_SUPPORT_FILEDES_H
13 |
14 | enum slsig_action {SIGH_PROCESS=1, SIGH_READ, SIGH_ACCEPT, SIGH_CALLBACK};
15 |
16 | int filedes_add(int fd, void *ptr, enum slsig_action action);
17 | int filedes_del(int fd);
18 |
19 | int filedes_init(void*);
20 | int filedes_exit(void*);
21 |
22 | #endif
23 |
24 |
--------------------------------------------------------------------------------
/sh/support/hashtable.c:
--------------------------------------------------------------------------------
1 | // hashtable.[ch]
2 | // a double-hashed hashtable
3 | //
4 | // (c) 2008, willem de bruijn, vrije universiteit amsterdam
5 | // email at willem -_at_- computer.org
6 | //
7 | // BSD license applies
8 |
9 | #include "hashtable.h"
10 |
11 | /** lookup an element by calling the hashfunction, like in hash_insert.
12 | return the nth match. NB: n starts at 1! */
13 | int
14 | hash_lookup_by_value(struct hashtable *hash, void * value, int nth)
15 | {
16 | int i=-1, match=0, key=-1;
17 |
18 | check(nth > 0);
19 | while (i < MAX_DOUBLEHASH && match < nth){
20 | key = hash_calc(value, ++i, HASHTBL_LEN);
21 | if (hash->table[key] == value)
22 | match++;
23 | }
24 | if (match == nth)
25 | return key;
26 | else
27 | return -1;
28 | }
29 |
30 | /** insert an item, we use double hashing for collision resolution */
31 | int
32 | __hash_insert(struct hashtable *hash, void * value, const char *func)
33 | {
34 | int i=0, index;
35 |
36 | if (!value)
37 | returnbug(-1);
38 |
39 | index = hash_calc(value, i, HASHTBL_LEN);
40 | while (hash->table[index] && i < MAX_DOUBLEHASH) {
41 | index = hash_calc(value, ++i, HASHTBL_LEN);
42 | #ifndef NDEBUG
43 | if (hash->table[index] == value)
44 | dprintf("warning : duplicate hash %d: %p==%ld in %s\n",
45 | index, value, (long) value, func);
46 | #endif
47 | }
48 |
49 | if (i == MAX_DOUBLEHASH) { // give up
50 | dprintf("(BUG) hash full in %s\n", func);
51 | return -1;
52 | }
53 |
54 | hash->table[index] = value;
55 | return index;
56 | }
57 |
58 |
--------------------------------------------------------------------------------
/sh/support/hashtable.h:
--------------------------------------------------------------------------------
1 | // hashtable.[ch]
2 | // a double-hashed hashtable
3 | //
4 | // (c) 2005, willem de bruijn, vrije universiteit amsterdam
5 | // email at willem -_at_- computer.org
6 | //
7 | // BSD license applies
8 |
9 | #ifndef WJDB_HASHTABLE
10 | #define WJDB_HASHTABLE
11 |
12 | #include "macros.h"
13 |
14 | #define HASHTBL_LEN 431 // use a prime
15 | #define MAX_DOUBLEHASH 24 // stop searching after so many rounds
16 | struct hashtable {
17 | // TODO: add length field, make default size (much) smaller and enable resizing
18 | void * table[HASHTBL_LEN];
19 | };
20 |
21 | #define hash_lookup_fast(htable, key) \
22 | ((htable)->table[key])
23 |
24 | // check against overflow. constructed in such a way that
25 | // we can see in what function the overflow occurred.
26 | #ifndef NDEBUG
27 | #define hash_lookup(htable, key) \
28 | (( ((unsigned long) key) < HASHTBL_LEN) ? \
29 | hash_lookup_fast(htable, key) : \
30 | ((void*) (dprintf("BUG: key %d out of bounds in %s\n", \
31 | key, __FUNCTION__) & 0L)))
32 |
33 | #define hash_insert_at_unconditional(htable, value, key) \
34 | do {(htable)->table[key] = value;} while (0)
35 |
36 | // insert and check against overwriting. also see hash_lookup
37 | // returns the key, or <0 on error
38 | #define hash_insert_at(htable, value, key) \
39 | ((!(htable)->table[key]) ? \
40 | (((htable)->table[key] = value) ? key : -1) : \
41 | ((dprintf("BUG: key %d in use in %s.%d\n", \
42 | key, __FUNCTION__, __LINE__) & 0L)))
43 | #else
44 |
45 | #define hash_lookup(htable, key) \
46 | ((((unsigned long) key) < HASHTBL_LEN) ? \
47 | hash_lookup_fast(htable, key) : 0)
48 |
49 | // insert and check against overwriting. also see hash_lookup
50 | #define hash_insert_at(htable, value, key) \
51 | ((!(htable)->table[key]) ? \
52 | (((htable)->table[key] = value) ? key : -1) : 0)
53 | #endif
54 |
55 | static inline int
56 | hash_calc(void * value, int runno, int maxhash)
57 | {
58 | int h, k, i;
59 |
60 | i = 0;
61 | h = ((unsigned long) value) % maxhash; // primary hash function
62 | k = ((unsigned long) value) % (maxhash - 2); // secondary hash function
63 |
64 | return (h + runno * k) % maxhash;
65 | }
66 |
67 | int hash_lookup_by_value(struct hashtable *hash, void * value, int nth);
68 |
69 | // the __FUNCTION__ helps me locate collision origins
70 | #define hash_insert(a,b) __hash_insert(a,b,__FUNCTION__)
71 | int __hash_insert(struct hashtable *hash, void * value, const char *func);
72 |
73 | static inline int
74 | hash_del(struct hashtable *hash, int key)
75 | {
76 | #ifndef NDEBUG
77 | check(key >= 0 && key < HASHTBL_LEN);
78 | #endif
79 | hash->table[key] = NULL;
80 | return 0;
81 | }
82 |
83 | /** use the hashtable as a simple list */
84 | static inline
85 | int hash_getnext(struct hashtable *hash, int key)
86 | {
87 | if (key >= HASHTBL_LEN || key < -1)
88 | return -1;
89 |
90 | while (!hash->table[++key])
91 | if (key == HASHTBL_LEN-1)
92 | return -1;
93 |
94 | return key;
95 | }
96 |
97 | // use an integer for key
98 | #define hash_foreach(table, key, ptr) \
99 | for(key = hash_getnext(table, -1);\
100 | key >= 0 && key < HASHTBL_LEN && \
101 | (((ptr) = hash_lookup_fast(table,key)) != NULL);\
102 | key = hash_getnext(table,key))
103 |
104 | // return 0 if the hashtable contains a value, !0 (i.e., true) otherwise
105 | static inline int
106 | hash_empty(struct hashtable *table)
107 | {
108 | int i;
109 |
110 | i = hash_getnext(table, -1);
111 | return (i < 0);
112 | }
113 |
114 | static inline int
115 | hash_len(struct hashtable *table)
116 | {
117 | int i, count=0;
118 |
119 | i = hash_getnext(table,-1);
120 | while(i >= 0){
121 | count++;
122 | i = hash_getnext(table,i);
123 | }
124 |
125 | return count;
126 | }
127 |
128 | #endif /* WJDB_HASHTABLE */
129 |
130 |
--------------------------------------------------------------------------------
/sh/support/list.c:
--------------------------------------------------------------------------------
1 | // list.[ch]
2 | // a doubly linked list
3 | //
4 | // (c) 2005, Willem de Bruijn, Vrije Universiteit Amsterdam
5 | // email at willem -_at_- computer.org
6 | //
7 | // LGPL license applies
8 |
9 | #include "../support/macros.h"
10 | #include "list.h"
11 |
12 | struct list * list_insert_sorted(struct list *start, void *id)
13 | {
14 | struct list *cur, *new;
15 |
16 | new = list_create(id);
17 | if (!new)
18 | return NULL;
19 |
20 | if (!start) // start of list: update global startnode
21 | return new;
22 |
23 | // find our spot in the list
24 | // exception : test cur (we test cur->next in general)
25 | if (start->id > id){
26 | start->prev = new;
27 | new->next = start;
28 | return new;
29 | }
30 | if (start->id == id){
31 | // printf("skipping duplicate : cur=%p\n",new->id);
32 | free(new);
33 | return start;
34 | }
35 |
36 | cur = start;
37 | while (cur->next && cur->next->id < id)
38 | cur = cur->next;
39 |
40 | if (!cur->next){ // end of list: append or place just before the end-node
41 | // printf("inserting (%p) : cur=%p new=%p\n",id,cur ? cur->id : "[ ]",new->id);
42 | cur->next = new;
43 | new->prev = cur;
44 | return start;
45 | }
46 |
47 | if (cur->next->id == id){ // exception : found a duplicate. remove
48 | // printf("skipping duplicate : cur=%p\n",new->id);
49 | free(new);
50 | return start;
51 | }
52 |
53 |
54 | // insert into the sorted list
55 | // printf("inserting (%p) : cur=%p new=%p cur->next=%p\n",id,cur ? cur->id : "[ ]",new->id, cur->next->id);
56 | new->next = cur->next;
57 | new->prev = cur;
58 | cur->next->prev = new;
59 | cur->next = new;
60 |
61 | return start;
62 | }
63 |
64 |
--------------------------------------------------------------------------------
/sh/support/list.h:
--------------------------------------------------------------------------------
1 | // list.[ch]
2 | // a doubly linked list
3 | //
4 | // (c) 2005, Willem de Bruijn, Vrije Universiteit Amsterdam
5 | // email at willem -_at_- computer.org
6 | //
7 | // BSD license applies
8 |
9 | #ifndef WDB_SLIST_H
10 | #define WDB_SLIST_H
11 |
12 | #ifdef __KERNEL__
13 | #include
14 | #include
15 | #include
16 | #else
17 | #include
18 | #include
19 | #include
20 | #endif
21 |
22 | #include "macros.h"
23 |
24 | struct list {
25 | void *id;
26 | struct list *next;
27 | struct list *prev;
28 | };
29 |
30 | static inline struct list * list_create(void *id)
31 | {
32 | struct list * new;
33 |
34 | new = myalloc (sizeof(struct list));
35 | if (!new)
36 | return NULL;
37 | new->id = id;
38 | new->next = NULL;
39 | new->prev = NULL;
40 |
41 | return new;
42 | }
43 |
44 | static inline struct list * list_insert(struct list *start, void *id)
45 | {
46 | struct list *new = list_create(id);
47 |
48 | if (!new)
49 | return NULL;
50 |
51 | if (start){
52 | new->next = start;
53 | start->prev = new;
54 | }
55 | return new;
56 | }
57 |
58 | static inline struct list * list_append(struct list *start, void *id)
59 | {
60 | struct list *new = list_create(id);
61 | struct list* cur;
62 |
63 | if (!new)
64 | return NULL;
65 |
66 | if (!start)
67 | return new;
68 |
69 | cur = start;
70 | while (cur->next)
71 | cur = cur->next;
72 | cur->next = new;
73 | new->prev = cur;
74 | return start;
75 | }
76 |
77 | /** strange function for a list
78 | * used only for duplicate removal
79 | *
80 | * note that the function returns NULL in two
81 | * distinct cases: no 'start', or 'start' is the only item
82 | */
83 | static inline struct list * list_pop(struct list *start)
84 | {
85 | struct list *tmp;
86 |
87 | if (!start)
88 | return NULL;
89 |
90 | tmp = start;
91 | start = start->next;
92 | myfree(tmp);
93 |
94 | return start;
95 | }
96 |
97 | static inline struct list * list_invert(struct list *start)
98 | {
99 | struct list *cur, *tmp=NULL;
100 |
101 | if (!start->next)
102 | return start;
103 |
104 | cur = start;
105 | // swap {prev,next} pointers
106 | while (cur){
107 | tmp = cur->next;
108 | cur->next = cur->prev;
109 | cur->prev = tmp;
110 | tmp = cur;
111 | cur = cur->prev;
112 | }
113 |
114 | return tmp;
115 | }
116 |
117 | // return the item in the list that matches the id
118 | static inline struct list * list_exists(struct list *start, void * id)
119 | {
120 | struct list *cur;
121 |
122 | if (!start)
123 | return NULL;
124 |
125 | // find our spot in the list
126 | cur = start;
127 | while (cur && cur->id != id)
128 | cur = cur->next;
129 | if (!cur)
130 | return NULL;
131 | else
132 | return cur;
133 | }
134 |
135 | /** unlink an item. can be used together with list_foreach */
136 | static inline struct list * list_unlink(struct list *cur)
137 | {
138 | struct list *tmp = NULL;
139 |
140 | if (cur->next){
141 | cur->next->prev = cur->prev;
142 | tmp = cur->next;
143 | }
144 | if (cur->prev){
145 | cur->prev->next = cur->next;
146 | tmp = cur->prev;
147 | }
148 |
149 | if (!tmp)
150 | return NULL; // no cur->next && no cur->prev ? then it's an empty list
151 |
152 | while (tmp->prev)
153 | tmp = tmp->prev;
154 | return tmp; // return the new startnode
155 | }
156 |
157 | /** remove id if it exists. returns start of the list */
158 | static inline struct list * list_remove(struct list *cur)
159 | {
160 | struct list * elem = list_unlink(cur);
161 | myfree(cur);
162 | return elem;
163 | }
164 |
165 | static inline struct list * list_remove_id(struct list * list, void * id)
166 | {
167 | struct list * elem = list_exists(list, id);
168 | if (elem)
169 | return list_remove(elem);
170 | return list;
171 | }
172 |
173 | struct list * list_insert_sorted(struct list *start, void *id);
174 |
175 | static inline int list_len(struct list *list)
176 | {
177 | int i=0;
178 | while (list){
179 | i++;
180 | list = list->next;
181 | }
182 | return i;
183 | }
184 |
185 | #define list_foreach(list, cur) \
186 | for (cur = list; cur; cur = cur->next)
187 |
188 | #define list_destroy(deadlist) \
189 | while (deadlist) deadlist = list_pop(deadlist)
190 |
191 | #endif /* WDB_SLIST_H */
192 |
193 |
--------------------------------------------------------------------------------
/sh/support/lock.h:
--------------------------------------------------------------------------------
1 | // lock.h
2 | // mutual exclusion and other locking support
3 | //
4 | // (c) 2007, Willem de Bruijn, Vrije Universiteit Amsterdam
5 | // email at willem -_at_- computer.org
6 | //
7 | // LGPL license applies
8 |
9 | // this is a wrapper around various locking methods
10 | // note: slmutex_trylock returns !0 if a lock is held, 0 otherwise
11 |
12 |
13 | #ifdef __KERNEL__
14 | #include
15 |
16 | #if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,17)
17 |
18 | #include
19 | #define slmutex struct semaphore
20 | #define slmutex_init(my_mutex) init_MUTEX(my_mutex)
21 | #define slmutex_static(my_mutex) DECLARE_MUTEX(my_mutex)
22 | #define slmutex_extern(my_mutex) extern struct semaphore my_mutex
23 | #define slmutex_lock(my_mutex) do {} while (down_interruptible(my_mutex))
24 | #define slmutex_unlock up
25 | #define slmutex_trylock(my_mutex) (down_trylock(my_mutex) ? 0 : 1)
26 |
27 | #else // newer kernel?
28 |
29 | #include
30 | #include
31 | #include "macros.h"
32 | #define slmutex struct mutex
33 | #define slmutex_init(my_mutex) mutex_init(my_mutex)
34 | #define slmutex_static DEFINE_MUTEX
35 | #define slmutex_extern(my_mutex) extern struct mutex my_mutex
36 | #define slmutex_trylock mutex_trylock
37 | #if 1
38 | #define slmutex_lock mutex_lock
39 | #define slmutex_unlock mutex_unlock
40 | #else
41 | #define slmutex_lock(my_mutex) \
42 | do {dprintf("mutex_lock in %s. atomic=%c locked=%c\n", __FUNCTION__, \
43 | in_atomic()?'y':'n', mutex_is_locked(my_mutex)?'y':'n'); \
44 | mutex_lock(my_mutex); \
45 | dprintf("mutex locked\n"); \
46 | } while(0)
47 | #define slmutex_unlock(my_mutex) \
48 | do {dprintf("mutex_unlock in %s. atomic=%c locked=%c\n", __FUNCTION__,\
49 | in_atomic()?'y':'n', mutex_is_locked(my_mutex)?'y':'n'); \
50 | mutex_unlock(my_mutex); \
51 | dprintf("mutex_unlocked\n"); \
52 | } while(0)
53 | #endif
54 | #endif
55 |
56 | #else // userspace?
57 |
58 | #define in_atomic() (0)
59 |
60 | #include
61 | #define slmutex pthread_mutex_t
62 | #define slmutex_init(my_mutex) pthread_mutex_init(my_mutex, NULL);
63 | #define slmutex_static(my_mutex) pthread_mutex_t my_mutex = PTHREAD_MUTEX_INITIALIZER
64 | #define slmutex_extern(my_mutex) extern pthread_mutex_t my_mutex
65 | #define slmutex_trylock(my_mutex) (pthread_mutex_trylock(my_mutex) ? 0 : 1)
66 | #if 1
67 | #define slmutex_lock pthread_mutex_lock
68 | #define slmutex_unlock pthread_mutex_unlock
69 | #else
70 | #define slmutex_lock(my_mutex) \
71 | do {dprintf("mutex_lock %p in %s\n", my_mutex, __FUNCTION__); \
72 | pthread_mutex_lock(my_mutex); \
73 | dprintf("mutex locked\n"); \
74 | } while(0)
75 | #define slmutex_unlock(my_mutex) \
76 | do {dprintf("mutex_unlock %p in %s\n", my_mutex, __FUNCTION__); \
77 | pthread_mutex_unlock(my_mutex); \
78 | } while(0)
79 | #endif
80 | #endif
81 |
82 |
--------------------------------------------------------------------------------
/sh/support/log.c:
--------------------------------------------------------------------------------
1 | /**
2 | * support/log.[ch]
3 | * logging, an alternative to d{0,1}printf
4 | *
5 | * (c) 2007, Willem de Bruijn, Vrije Universiteit Amsterdam
6 | * email at willem _-at-_ computer DOT org
7 | *
8 | * 3-clause BSD applies
9 | * */
10 |
11 | #ifdef __KERNEL__
12 | #include
13 | #include
14 | #include
15 | #else
16 | #include
17 | #include
18 | #include
19 | #include
20 | #include
21 | #include
22 | #include
23 | #include
24 | #include
25 | #include "../wrap/file.h"
26 | #endif
27 |
28 | #include "log.h"
29 |
30 | static char whitespace[] = " ";
31 | static int sl_loglevel = LOG_STOP;
32 |
33 | #ifdef __KERNEL__
34 | #define __print(fd, text) printk("%s", text) // weird format because of GCC 4.2 check
35 | #else
36 | #define __print(fd, text) __orig_write(fd, text, strlen(text))
37 | #endif
38 |
39 | static void
40 | __write_whitespace(int fd, int len)
41 | {
42 | #ifndef NDEBUG
43 | if (len > 8) { // hardcoded to be below true length
44 | __print(fd, "out of whitespace\n");
45 | }
46 | #endif
47 | if (len) {
48 | whitespace[len] = '\0';
49 | __print(fd, whitespace);
50 | whitespace[len] = ' ';
51 | }
52 | }
53 |
54 | static void
55 | __write(int fd, enum logtype level, const char *pre, const char *line)
56 | {
57 |
58 |
59 | if (level <= sl_loglevel) {
60 | // write generic header, identifying level, type, etc.
61 | #if !defined __KERNEL__ && !defined NDEBUG
62 | {
63 | char pidbuf[16];
64 | snprintf(pidbuf, 15, "[%u]", getpid());
65 | __print(fd, pidbuf);
66 | }
67 | #endif
68 | __print(fd, pre);
69 |
70 | // add whitespace padding depending on level
71 | __write_whitespace(fd, level);
72 |
73 | // write actual message
74 | __print(fd, line);
75 | __print(fd, "\n");
76 | }
77 | }
78 |
79 | #ifdef __KERNEL__
80 |
81 | // TODO: move to using our own logging buffer
82 | static void
83 | write_log(enum logtype level, const char *pre, const char *line)
84 | {
85 | __write(0, level, pre, line);
86 | }
87 |
88 | int
89 | log_init(void *unused)
90 | {
91 | return 0;
92 | }
93 |
94 | int
95 | log_exit(void *unused)
96 | {
97 | return 0;
98 | }
99 |
100 | #else
101 |
102 | static int logfd = -1;
103 |
104 | /** write a message to the log */
105 | static void
106 | write_log(enum logtype level, const char *pre, const char *line)
107 | {
108 | if (logfd >= 0) {
109 | __write(logfd, level, pre, line);
110 |
111 | // when debugging, copy important messages to screen
112 | #if !defined NDEBUG
113 | if (logfd > 2 && level <= LOG_WARN)
114 | __write(2, level, pre, line);
115 | #endif
116 | }
117 | }
118 |
119 | #define MAXNAME 64
120 | static char name[MAXNAME + 1];
121 |
122 | int
123 | log_init(void* unused)
124 | {
125 | char *tmpdir, *user;
126 | char linkname[MAXNAME + 1];
127 | int loglevel_set = 0;
128 |
129 | if (getenv("LOGLEVEL")) {
130 | sl_loglevel = strtol(getenv("LOGLEVEL"), NULL, 10);
131 | loglevel_set = 1;
132 | }
133 |
134 | // log to terminal?
135 | if (getenv("LOGTERM")) {
136 | logfd = 1;
137 | sl_log(LOG_LOW, "logging to terminal");
138 | return 0;
139 | }
140 |
141 | // get some metadata to name the file descriptively
142 | // NB: this is unsafe. check that it is truly a dir?
143 | tmpdir = getenv("TMPDIR");
144 | if (!tmpdir)
145 | tmpdir = "/tmp";
146 | user = getlogin();
147 | if (!user)
148 | user = getenv("USER");
149 |
150 | // create and open a new log file
151 | snprintf(name, MAXNAME, "%s/streamline.%s.%lu.log",
152 | tmpdir, user, time(NULL));
153 | logfd = __orig_open(name, O_WRONLY | O_CREAT, 0644);
154 | if (logfd < 0) {
155 | fprintf(stderr, "error opening log\n");
156 | return -1;
157 | }
158 |
159 | // set the 'latest' symlink to this file
160 | snprintf(linkname, MAXNAME, "%s/streamline.%s.latest.log",
161 | tmpdir, user);
162 | unlink(linkname); // don't care whether there was a link before
163 | if (link(name, linkname) < 0)
164 | sl_log(LOG_WARN, "error linking log\n");
165 |
166 | // we set this before, but defer output until initialized
167 | if (loglevel_set)
168 | sl_log(LOG_LOW, "set loglevel to %d", sl_loglevel);
169 |
170 | return 0;
171 | }
172 |
173 | // choose the output file descriptor
174 | void
175 | log_set(int fd)
176 | {
177 | logfd = fd;
178 | }
179 |
180 | // get the output file descriptor
181 | int
182 | log_get(void)
183 | {
184 | return logfd;
185 | }
186 |
187 | // get the output filename (if any)
188 | const char *
189 | log_getname(void)
190 | {
191 | if (logfd > 2)
192 | return name;
193 | else
194 | return NULL;
195 | }
196 |
197 | int
198 | log_exit(void* unused)
199 | {
200 | if (logfd >= 0)
201 | __orig_close(logfd);
202 | return 0;
203 | }
204 |
205 | #endif
206 |
207 | void
208 | sl_log(enum logtype type, const char *format, ...)
209 | {
210 | #define SLLOGSZ 256
211 | char buf[SLLOGSZ];
212 | const char *pre;
213 | va_list ap;
214 |
215 | va_start(ap, format);
216 | vsnprintf(buf, SLLOGSZ - 1, format, ap);
217 | va_end(ap);
218 |
219 | switch (type) {
220 | case LOG_BUG : pre = "[BUG ] "; break;
221 | case LOG_ERR : pre = "[ERR ] "; break;
222 | case LOG_WARN : pre = "[WARN ] "; break;
223 | case LOG_MSG : pre = "[Info ] "; break;
224 | case LOG_START : pre = "[Start] "; break;
225 | case LOG_STOP : pre = "[Stop ] "; break;
226 | case LOG_LOW : pre = "[Info ] "; break;
227 | case LOG_ALL : pre = "[Info ] "; break;
228 | default : pre = "[LOGBUG]"; break;
229 | }
230 | write_log(type, pre, buf);
231 | }
232 |
233 | #ifdef __KERNEL__
234 | EXPORT_SYMBOL(sl_log);
235 | EXPORT_SYMBOL(log_init);
236 | EXPORT_SYMBOL(log_exit);
237 | #endif
238 |
239 |
--------------------------------------------------------------------------------
/sh/support/log.h:
--------------------------------------------------------------------------------
1 | /**
2 | * support/log.[ch]
3 | * logging, an alternative to d{0,1}printf
4 | *
5 | * (c) 2007, Willem de Bruijn, Vrije Universiteit Amsterdam
6 | * email at willem _-at-_ computer DOT org
7 | *
8 | * 3-clause BSD applies
9 | * */
10 |
11 | #ifndef SL_SUPPORT_LOG_H
12 | #define SL_SUPPORT_LOG_H
13 |
14 | enum logtype {LOG_BUG = 0, LOG_ERR, LOG_WARN, LOG_MSG, LOG_START, LOG_STOP, LOG_LOW, LOG_ALL};
15 |
16 | void sl_log(enum logtype type, const char *format, ...);
17 |
18 | int log_init(void* unused);
19 | int log_exit(void* unused);
20 |
21 | void log_set(int fd);
22 | int log_get(void);
23 | const char * log_getname(void);
24 |
25 | #endif
26 |
27 |
--------------------------------------------------------------------------------
/sh/support/macros.h:
--------------------------------------------------------------------------------
1 | // macros.[ch]
2 | // simple macros that I reuse often
3 | //
4 | // (c) 2005, Willem de Bruijn, Vrije Universiteit Amsterdam
5 | // email at willem -_AT_- computer.org
6 | //
7 | // BSD License applies
8 |
9 | #ifndef WJDB_SUPPORT_H
10 | #define WJDB_SUPPORT_H
11 |
12 | ///////////// KERNELSPACE/USERSPACE COMPAT
13 | #ifdef __KERNEL__
14 | #include
15 | #include
16 | #include
17 | #include
18 | #define myalloc(block) kmalloc(block, GFP_KERNEL)
19 | #define myfree(block) kfree(block)
20 | static inline void * mycalloc(size_t nmemb, size_t size)
21 | {
22 | void *data = kmalloc(nmemb * size, GFP_KERNEL);
23 | if (data)
24 | memset(data, 0, nmemb * size);
25 | return data;
26 | }
27 | #define clock() get_cycles()
28 | #define MY_CLOCKRATE 1800000000
29 | #define my_gettimeofday do_gettimeofday
30 | #if LINUX_VERSION_CODE >= KERNEL_VERSION(2, 6, 29)
31 | #define getpid() (current->pid)
32 | #define getuid() (current_uid())
33 | #define getgid() (current_gid())
34 | #else
35 | #define getpid() (current->pid)
36 | #define getuid() (current->uid)
37 | #define getgid() (current->gid)
38 | #endif
39 | #else
40 | #include
41 | #define myalloc malloc
42 | #define mycalloc calloc
43 | #define myfree(a) free(a)
44 | #define MY_CLOCKRATE CLOCKS_PER_SEC
45 | #define my_gettimeofday(a) gettimeofday(a, NULL)
46 |
47 | // PAGE_SIZE is not defined in userspace
48 | #if defined i386 || defined __x86_64__
49 | #define PAGE_SIZE 4096
50 | #else
51 | #define PAGE_SIZE getpagesize()
52 | #endif
53 | // (un)likely is not defined in userspace
54 | #define likely(x) __builtin_expect(!!(x), 1)
55 | #define unlikely(x) __builtin_expect(!!(x), 0)
56 | #endif /* __KERNEL__ */
57 |
58 | ///////////// 32/64 bit COMPAT
59 | #ifdef __LP64__
60 | #define FMT64 "ld"
61 | #define FMT64U "lu"
62 | #else
63 | #define FMT64 "lld"
64 | #define FMT64U "llu"
65 | #endif
66 |
67 | ///////////// Portable ASSERTIONS
68 | //
69 | // an alternative to assert() that can also work in kernelspace
70 | // NB: it sins against the rule that no control-flow should be in macros...
71 | // ... but then, so does assert.
72 | // Update (17032008): this stuff is outdated, but
73 | // I'm too lazy to clean up all source
74 | #ifdef NDEBUG
75 | #define __check(expression, exec_stmt) \
76 | do {\
77 | if (unlikely((expression) == 0)) { \
78 | exec_stmt; \
79 | } \
80 | } while(0)
81 | #else
82 | #define __check(expression,exec_stmt) \
83 | do { \
84 | if (unlikely((expression) == 0)) { \
85 | dprintf("ASSERT FAILED at %s.%d\n",__FUNCTION__,__LINE__); \
86 | exec_stmt; \
87 | } \
88 | } while(0)
89 | #endif
90 |
91 | #define check_noop(expression) __check(expression,)
92 | #define check(expression) __check(expression, return -1)
93 | #define check_ptr(expression) __check(expression, return NULL)
94 | #define check_void(expression) __check(expression, return )
95 | #define check_goto(expression) __check(expression, goto cleanup)
96 |
97 | #ifdef __KERNEL__
98 | #define assert(stmt) do {if (!(stmt)) panic("ASSERT FAILED at %s.%d\n", __FILE__, __LINE__); } while (0)
99 | #endif
100 |
101 | ///////////// Portable PRINT
102 | #ifdef __KERNEL__
103 | #define aprintf printk
104 | #define dprintf printk
105 | #else
106 | #define aprintf printf
107 | #ifdef NDEBUG
108 | #define dprintf(...)
109 | #else
110 | #define dprintf aprintf
111 | #endif /* NDEBUG */
112 | #endif /* __KERNEL__ */
113 |
114 | ///////////// Other
115 | #define bug() dprintf("(BUG) at %s:%d\n",__FUNCTION__,__LINE__)
116 | #define returnbug(a) do {bug(); return(a);} while (0)
117 |
118 | #ifndef min
119 | #define min(a,b) ( ((a) < (b)) ? (a) : (b) )
120 | #define max(a,b) ( ((a) > (b)) ? (a) : (b) )
121 | #endif
122 |
123 | #define is_power2(x) (!(x & (x-1)))
124 |
125 | #define __OFF(a, b) (((unsigned long) a) - ((unsigned long) b))
126 |
127 | #define IO_IN 1
128 | #define IO_OUT 2
129 |
130 | #endif /* WJDB_SUPPORT_H */
131 |
132 |
--------------------------------------------------------------------------------
/sh/support/math.h:
--------------------------------------------------------------------------------
1 | /* support/math.h
2 | * mathematical support routines
3 | *
4 | * (c) 2009, Willem de Bruijn, Vrije Universiteit Amsterdam
5 | * GPLv2 applies
6 | */
7 |
8 | #ifndef STREAMLINE_SUPPORT_MATH_H
9 | #define STREAMLINE_SUPPORT_MATH_H
10 |
11 | static inline int
12 | order_log2(unsigned long in)
13 | {
14 | unsigned long value = in;
15 | int i, bytelen, order = 0;
16 |
17 | if (in == 0)
18 | return 0;
19 |
20 | bytelen = sizeof(unsigned long) * 8;
21 |
22 | for (i = 0; i < bytelen; i++) {
23 | if (value & 0x1)
24 | order = i;
25 | value = value >> 1;
26 | }
27 |
28 | // round up
29 | if (1 << order == in)
30 | return order;
31 | else
32 | return order + 1;
33 | }
34 |
35 | #endif /* STREAMLINE_SUPPORT_MATH_H */
36 |
37 |
--------------------------------------------------------------------------------
/sh/support/md5.h:
--------------------------------------------------------------------------------
1 | /* MD5.H - header file for MD5C.C
2 | */
3 |
4 | /* Copyright (C) 1991-2, RSA Data Security, Inc. Created 1991. All
5 | rights reserved.
6 |
7 | License to copy and use this software is granted provided that it
8 | is identified as the "RSA Data Security, Inc. MD5 Message-Digest
9 | Algorithm" in all material mentioning or referencing this software
10 | or this function.
11 |
12 | License is also granted to make and use derivative works provided
13 | that such works are identified as "derived from the RSA Data
14 | Security, Inc. MD5 Message-Digest Algorithm" in all material
15 | mentioning or referencing the derived work.
16 |
17 | RSA Data Security, Inc. makes no representations concerning either
18 | the merchantability of this software or the suitability of this
19 | software for any particular purpose. It is provided "as is"
20 | without express or implied warranty of any kind.
21 |
22 | These notices must be retained in any copies of any part of this
23 | documentation and/or software.
24 | */
25 |
26 | #include "macros.h"
27 |
28 | typedef unsigned char *POINTER; /* POINTER defines a generic pointer type */
29 | typedef unsigned short int UINT2; /* UINT2 defines a two byte word */
30 | typedef unsigned long int UINT4; /* UINT4 defines a four byte word */
31 |
32 | struct md5 {
33 | unsigned char hash[16];
34 | };
35 |
36 | /* MD5 context. */
37 | typedef struct {
38 | UINT4 state[4]; /* state (ABCD) */
39 | UINT4 count[2]; /* number of bits, modulo 2^64 (lsb first) */
40 | unsigned char buffer[64]; /* input buffer */
41 | } MD5_CTX;
42 |
43 | // init a structure
44 | void MD5Init(MD5_CTX *);
45 | // add some data to calculate over
46 | void MD5Update(MD5_CTX *, unsigned char *, unsigned int);
47 | // finalize and output the 16B key
48 | void MD5Final(unsigned char [16], MD5_CTX *);
49 |
50 | static inline void
51 | calc_md5(struct md5 *hash, char *data, int len)
52 | {
53 | MD5_CTX ctx;
54 | MD5Init(&ctx);
55 | MD5Update(&ctx, (unsigned char*) data, len);
56 | MD5Final(hash->hash, &ctx);
57 | }
58 |
59 | static inline void
60 | print_md5(unsigned char *hash)
61 | {
62 | int i;
63 |
64 | dprintf("MD5=[");
65 | for (i = 0; i < 16; i++)
66 | dprintf("%hx.", hash[i] & 0xff);
67 | dprintf("]\n");
68 | }
69 |
70 |
--------------------------------------------------------------------------------
/sh/support/multihash.c:
--------------------------------------------------------------------------------
1 | /**
2 | * support/multihash.[ch]
3 | * a hashtable with lists for each element, to allow unlimited 'siblings'
4 | *
5 | * (c) 2007, Willem de Bruijn, Vrije Universiteit Amsterdam
6 | * email at willem _-at-_ computer DOT org
7 | *
8 | * 3-clause BSD applies
9 | * */
10 |
11 | #include "multihash.h"
12 |
13 | // mhash_get without optimizations
14 | static inline struct list * __mhash_getelem(struct multihash * mh,
15 | uint16_t key,
16 | uint16_t subkey)
17 | {
18 | struct list * head, * elem;
19 |
20 | head = hash_lookup(&mh->table, key);
21 | list_foreach(head, elem)
22 | if (!subkey--)
23 | return elem;
24 | return NULL;
25 | }
26 |
27 | // optimization: expect this request to be part of a loop
28 | // we cache the pointer and compare {mh, key, subkey} with cached version
29 | //
30 | // nb: this is unsafe in a very specific situation: when the list into
31 | // which the cached pointer points is changed between calls. This is
32 | // highly unlikely. TODO: make certain this cannot occur
33 | void * mhash_get(struct multihash * mh, uint16_t key, uint16_t subkey)
34 | {
35 | static struct multihash * s_mh;
36 | static uint16_t s_key;
37 | static uint16_t s_sub;
38 | static struct list *s_elem;
39 | struct list *elem;
40 |
41 | // next iterator element in the current loop?
42 | if (s_mh == mh && s_key == key && s_sub + 1 == subkey) {
43 | s_sub++;
44 | s_elem = s_elem->next;
45 | elem = s_elem;
46 | }
47 | else if (!subkey) { // start of a new loop?
48 | s_mh = mh;
49 | s_key = key;
50 | s_sub = 0;
51 | s_elem = __mhash_getelem(mh, key, 0);
52 | elem = s_elem;
53 | } else {
54 | elem = __mhash_getelem(mh, key, subkey);
55 | }
56 |
57 | if (elem)
58 | return elem->id;
59 | else
60 | return NULL;
61 | }
62 |
63 | #ifdef __KERNEL__
64 |
65 | #include
66 | EXPORT_SYMBOL(mhash_get);
67 |
68 | #endif
69 |
70 |
--------------------------------------------------------------------------------
/sh/support/multihash.h:
--------------------------------------------------------------------------------
1 | /**
2 | * support/multihash.[ch]
3 | * a hashtable with lists for each element, to allow unlimited 'siblings'
4 | *
5 | * (c) 2007, Willem de Bruijn, Vrije Universiteit Amsterdam
6 | * email at willem _-at-_ computer DOT org
7 | *
8 | * 3-clause BSD applies
9 | * */
10 |
11 | #ifndef SL_SUPP_MHASH
12 | #define SL_SUPP_MHASH
13 |
14 | #ifdef __KERNEL__
15 | #include
16 | #else
17 | #include
18 | #endif
19 |
20 | #include "hashtable.h"
21 | #include "list.h"
22 |
23 | struct multihash {
24 | // each element is taken as the head of a list
25 | struct hashtable table;
26 | };
27 |
28 | static inline void mhash_add(struct multihash * mh, uint16_t key, void *value)
29 | {
30 | struct list * list;
31 |
32 | list = hash_lookup(&mh->table, key);
33 | list = list_append(list, value);
34 | mh->table.table[key] = list;
35 | }
36 |
37 | // remove an entry.
38 | // or remove all entries with key 'key' by passing NULL as value
39 | static inline void mhash_del(struct multihash * mh, uint16_t key, void *value)
40 | {
41 | struct list * head, * elem;
42 |
43 | head = hash_lookup(&mh->table, key);
44 | list_foreach(head, elem)
45 | if (!value || elem->id == value) {
46 | head = list_remove(elem);
47 | mh->table.table[key] = head;
48 | return;
49 | }
50 | }
51 |
52 | // get all matches for mh(key). the iterator subkey starts at 0
53 | void * mhash_get(struct multihash * mh, uint16_t key, uint16_t subkey);
54 |
55 | #endif /* SL_SUPP_MHASH */
56 |
57 |
--------------------------------------------------------------------------------
/sh/support/prettyprint.c:
--------------------------------------------------------------------------------
1 | // prettyprint.[ch]
2 | // print non-trivial data to screen
3 | //
4 | // (c) 2007, Willem de Bruijn, Vrije Universiteit Amsterdam
5 | // email at willem -_at_- computer.org
6 | //
7 | // LGPL license applies
8 |
9 | #ifdef __KERNEL__
10 | #include
11 | #include
12 | #include
13 | #include
14 | #include
15 | #include
16 | #include
17 | #include
18 | #else
19 | #include
20 | #include
21 | #include
22 | #include
23 | #if linux
24 | #include
25 | #include
26 | #include
27 | #include
28 | #include
29 | #else
30 | #include "../hw/proto.h"
31 | #endif
32 | #endif
33 |
34 | #include "macros.h"
35 | #include "lock.h"
36 | #include "prettyprint.h"
37 |
38 | /** Pretty print data in vertically split [hex | decimal] notation
39 | *
40 | * The passed string must be at least 80 bytes.
41 | * */
42 | int
43 | writedata(char *out, int olen, const char *data, int dlen) {
44 | int i = 0, off = 0;
45 | int elem;
46 |
47 | olen--; // leave room for the terminating '\0'
48 | do {
49 | // phase 1: print hex
50 | for (elem = 0; elem < HEXWIDTH && i + elem < dlen; elem++) {
51 | off += snprintf(out + off, olen - off, "%x%x ",
52 | (data[i + elem] & 0xf0) >> 4,
53 | data[i + elem] & 0xf);
54 |
55 | if (elem == (HEXWIDTH / 2) - 1) {
56 | out[off] = ' ';
57 | off++;
58 | }
59 | }
60 |
61 | // fill out the last line
62 | for (; elem < HEXWIDTH; elem ++) {
63 | out[off] = ' ';
64 | out[off + 1] = ' ';
65 | out[off + 2] = ' ';
66 | off += 3;
67 | }
68 |
69 | // insert room
70 | out[off] = ' ';
71 | out[off + 1] = ' ';
72 | out[off + 2] = ' ';
73 | off += 3;
74 |
75 | // phase 2: print visible
76 | for (elem = 0; elem < HEXWIDTH && i + elem < dlen; elem++) {
77 | if (data[i + elem] >= 32 && data[i + elem] < 126)
78 | out[off + elem] = data[i + elem];
79 | else
80 | out[off + elem] = '.';
81 | }
82 | off += elem;
83 | out[off] = '\n';
84 | off++;
85 | i += HEXWIDTH;
86 | } while (i < dlen && off < olen);
87 |
88 | out[off] = '\n';
89 | off++;
90 | out[off] = '\0';
91 | return off;
92 | }
93 |
94 | void
95 | displaydata(const char *data, int dlen)
96 | {
97 | char *out;
98 | int len, mlen;
99 | #ifndef __KERNEL__
100 | int ret;
101 | #endif
102 |
103 | if (dlen) {
104 | len = 5 * dlen;
105 | mlen = max(len + 1, 800);
106 |
107 | // allocate the block. difficult only because of
108 | // possible execution in kernel interrupt context.
109 | #ifdef __KERNEL__
110 | if (in_interrupt()) {
111 | out = kmalloc(mlen, GFP_ATOMIC);
112 | memset(out, 0, mlen);
113 | }
114 | else
115 | #endif
116 | out = mycalloc(mlen, 1);
117 |
118 | // malloc failed error handling
119 | if (!out) {
120 | const char error[] = "BUG in displaydata\n";
121 | #ifdef __KERNEL__
122 | printk(error);
123 | #else
124 | ret = write(1, error, 20);
125 | #endif
126 | return;
127 | }
128 |
129 | // fill in contents and write output
130 | len = writedata(out, len - 1, data, dlen);
131 | out[len] = '\0';
132 | #ifdef __KERNEL__
133 | printk("%s", out);
134 | #else
135 | ret = write(1, out, len + 1);
136 | #endif
137 | myfree(out);
138 | }
139 | }
140 |
141 | /** Prettyprint an IP address.
142 | * @returns the number of bytes written */
143 | int
144 | writeip(char * data, int dlen, const uint8_t* ip, uint16_t port)
145 | {
146 | int res;
147 | #ifdef __KERNEL__
148 | res = snprintf(data, dlen, "%hu.%hu.%hu.%hu", ip[0], ip[1], ip[2], ip[3]);
149 | #else
150 | res = snprintf(data, dlen, "%hhu.%hhu.%hhu.%hhu", ip[0], ip[1], ip[2], ip[3]);
151 | #endif
152 | if (port)
153 | res += snprintf(data + res, dlen - res, ":%hu", ntohs(port));
154 | return res;
155 | }
156 |
157 | /** Print an ip address to stdout (w/o endline) */
158 | void
159 | displayip(const uint8_t* ip, uint16_t port)
160 | {
161 | char buf[25];
162 | writeip(buf, 24, ip, port);
163 | aprintf("%s", buf);
164 | }
165 |
166 | int
167 | writepktinfo(char *out, int olen, const char *pkt, unsigned int plen)
168 | {
169 | const struct ethhdr *eth = (struct ethhdr *) pkt;
170 | uint16_t sport=0, dport=0, off, i;
171 |
172 | olen--; // leave room for the terminating '\0'
173 | if (plen < ETH_HLEN)
174 | return snprintf(out, olen, "%dB: too small for ethernet\n", plen);
175 |
176 | off = snprintf(out, olen, "eth(type %hx, src ", ntohs(eth->h_proto));
177 | for (i = 0; i < 6; i++)
178 | off += snprintf(out + off, olen - off, "%hx%hx.",
179 | (eth->h_source[i] & 0xf0) >> 4,
180 | eth->h_source[i] & 0xf);
181 | off += snprintf(out + off, olen - off, ", dest ");
182 | for (i = 0; i < 6; i++)
183 | off += snprintf(out + off, olen - off, "%hx%hx.",
184 | (eth->h_dest[i] & 0xf0) >> 4,
185 | eth->h_dest[i] & 0xf);
186 | off += snprintf(out + off, olen - off, ")\n");
187 |
188 | if ((uint16_t) ntohs(eth->h_proto) == ETH_P_IP) {
189 | const struct iphdr *iph;
190 |
191 | iph = (struct iphdr*) (pkt + ETH_HLEN);
192 | off += snprintf(out + off, olen - off,
193 | "ip (proto %hu, ttl %hu, ihl %hu, total_len %hu,"
194 | " src %hu.%hu.%hu.%hu, dst %hu.%hu.%hu.%hu)\n",
195 | iph->protocol,
196 | iph->ttl,
197 | iph->ihl,
198 | ntohs(iph->tot_len),
199 | iph->saddr & 0xff,
200 | (iph->saddr & 0xff00) >> 8,
201 | (iph->saddr & 0xff0000) >> 16,
202 | (iph->saddr &0xff000000) >> 24,
203 | iph->daddr & 0xff,
204 | (iph->daddr & 0xff00) >> 8,
205 | (iph->daddr & 0xff0000) >> 16,
206 | (iph->daddr &0xff000000) >> 24);
207 |
208 | if (iph->protocol == 6 /* TCP */){
209 | // start of udp and tcp headers are identical. but the following is
210 | // a bit hackish, I admit
211 | const struct tcphdr *tcph;
212 |
213 | tcph = (struct tcphdr*) ( ((unsigned long) iph) + (4 * ((char) iph->ihl)));
214 | sport = tcph->source;
215 | dport = tcph->dest;
216 | off += snprintf(out + off, olen - off,
217 | "tcp(len=%u seqno=%u)\n",
218 | ntohs(iph->tot_len), ntohl(tcph->seq));
219 | }
220 | else if (iph->protocol == 17 /* UDP */){
221 | const struct udphdr *trans;
222 |
223 | trans = (struct udphdr*) ( ((unsigned long) iph) + (4 * ((char) iph->ihl)));
224 | sport = trans->source;
225 | dport = trans->dest;
226 | off += snprintf(out + off, olen - off, "udp(len=%u)\n",
227 | ntohs(iph->tot_len));
228 | }
229 | else if (iph->protocol == 1 /* ICMP */){
230 | const struct icmphdr *icmph;
231 |
232 | icmph = (struct icmphdr*) ( ((unsigned long) iph) + (4 * ((char) iph->ihl)));
233 | off += snprintf(out + off, olen - off,
234 | "icmp(len=%u type=%hu seq=%hu)\n",
235 | ntohs(iph->tot_len), icmph->type,
236 | icmph->un.echo.sequence);
237 | }
238 | }
239 | else
240 | off += snprintf(out + off, olen - off, "unknown()\n");
241 |
242 | out[off] = '\n';
243 | off++;
244 | out[off] = '\0';
245 | return off;
246 | }
247 |
248 | void
249 | displaypktinfo(const void *data, int len)
250 | {
251 | char *out;
252 | #ifndef __KERNEL__
253 | int ret;
254 | #endif
255 |
256 | out = myalloc(240); // 3 lines is the upper limit
257 | len = writepktinfo(out, 239, data, len);
258 | out[len] = '\0';
259 | #ifdef __KERNEL__
260 | printk("%s", out);
261 | #else
262 | ret = write(1, out, len + 1);
263 | #endif
264 | myfree(out);
265 | }
266 |
267 |
--------------------------------------------------------------------------------
/sh/support/prettyprint.h:
--------------------------------------------------------------------------------
1 | // prettyprint.[ch]
2 | // print non-trivial data to screen
3 | //
4 | // (c) 2007, Willem de Bruijn, Vrije Universiteit Amsterdam
5 | // email at willem -_at_- computer.org
6 | //
7 | // LGPL license applies
8 |
9 | #ifndef __SL_SUPPORT_PRETTYPRINT_H
10 | #define __SL_SUPPORT_PRETTYPRINT_H
11 |
12 | #define HEXWIDTH 16
13 |
14 | void displaydata(const char *data, int dlen);
15 | void displayip(const uint8_t* ip, uint16_t port);
16 | void displaypktinfo(const void *data, int len);
17 |
18 | int writeip(char * data, int dlen, const uint8_t* ip, uint16_t port);
19 | int writedata(char *out, int olen, const char *data, int dlen);
20 | int writepktinfo(char *out, int olen, const char *pkt, unsigned int plen);
21 |
22 | #endif
23 |
24 |
--------------------------------------------------------------------------------
/sh/support/profiler.c:
--------------------------------------------------------------------------------
1 | /*
2 | Fairly Fast Packet Filter
3 |
4 | `stealth' profiling sourcefile
5 |
6 | Licensed under the GPL
7 | Copyright (c) Herbert Bos, 2003-2004
8 |
9 | this version was created by Willem de Bruijn (wdebruij_AT_liacs.nl), 2004
10 | */
11 |
12 | #ifdef PROFILE
13 |
14 | #ifdef __KERNEL__
15 | #include
16 | #include
17 | #else
18 | #include
19 | #include
20 | #include
21 | #include
22 | #define CAN_PRINT_FLOATS
23 | #endif
24 |
25 | #include "macros.h"
26 | #include "timestamp.h"
27 | #include "profiler.h"
28 |
29 | struct profdata {
30 | tstamp_t cycles[PROFWIDTH][PROFLEN];
31 | uint32_t index[PROFWIDTH];
32 | };
33 |
34 | static struct profdata prof;
35 |
36 | /** save a new processor count. */
37 | inline void __internal_profiler(int class){
38 | /* DIRTY : the first element isn't a diff.
39 | * Either forget about Avg and use only Mean,
40 | * or wait long enough for this element to be overwritten. */
41 | prof.cycles[class][ prof.index[class] ] = timestamp_get();
42 | prof.index[class] = (prof.index[class] + 1) % PROFLEN;
43 | }
44 |
45 | /** start a new processor count calculation. */
46 | inline void __internal_profiler_begin(int class){
47 | prof.cycles[class][prof.index[class] % PROFLEN] = timestamp_get();
48 | }
49 |
50 | /** close a processor count calculation. */
51 | inline void __internal_profiler_end(int class){
52 | prof.cycles[class][prof.index[class]] = timestamp_get() - prof.cycles[class][prof.index[class]];
53 | prof.index[class] = (prof.index[class] + 1) % PROFLEN;
54 | /* note that this would result in a negative result on signed values. We'll have to swap this when calculating results */
55 | }
56 |
57 | /* quicksort implementation from wikipedia.org.
58 |
59 | we could have used the qsort(..) function call in userspace, but
60 | for simplicity we'll use this less optimal algorithm in both kernel-
61 | and userspace.
62 | */
63 | void __qsort(tstamp_t* low, tstamp_t* high)
64 | {
65 | /* We naively use the first value in the array as the pivot */
66 | /* this will not give good performance real usage */
67 |
68 | tstamp_t * lowbound = low + 1; /* the high boundary of the low subarray */
69 | tstamp_t * highbound = high - 1; /* the low boundary of the high subarray */
70 | tstamp_t temp;
71 |
72 | while(lowbound <= highbound) /* partition the array */
73 | {
74 | if(*lowbound < *low) /* compare to pivot */
75 | lowbound++; /* move lowbound toward the middle */
76 | else
77 | {
78 | temp = *lowbound; /* swap *lowbound and *highbound */
79 | *lowbound = *highbound;
80 | *highbound = temp;
81 | highbound--; /* move highbound toward the middle */
82 | }
83 | }
84 |
85 | highbound++; /* move bounds back to the correct positions */
86 | lowbound--;
87 |
88 | temp = *low; /* move the pivot into the middle */
89 | *low = *lowbound;
90 | *lowbound = temp;
91 |
92 | if(low != lowbound) /* recurse on the subarrays */
93 | __qsort(low, lowbound);
94 | if(high != highbound)
95 | __qsort(highbound, high);
96 | }
97 |
98 | tstamp_t __median(int start, int stop, tstamp_t* ldList){
99 | int middle_floor = start+(stop-start)/2;
100 | if ( (((stop-start) % 2) + 1) == 1) // odd number of elements
101 | return ldList[middle_floor];
102 | else
103 | return ((tstamp_t) ( ldList[middle_floor] + ldList[middle_floor + 1]) ) / 2;
104 | }
105 |
106 | /**
107 | * calculate the mean and output information to the standard output queue.
108 | * this function is very similar to the one that outputs to procfs.
109 | * I currently don't have the time to properly merge the two. */
110 | void __internal_profiler_show(void){
111 | int i, j;
112 | tstamp_t Q1, Q2, Q3;
113 |
114 | for (i = 0; i < PROFWIDTH; i++) {
115 | /* are we using this class? */
116 | if (prof.cycles[i][0]) {
117 | int val;
118 | double average;
119 |
120 | /* find the last used element in the list */
121 | val = 0;
122 | while(val < PROFLEN && prof.cycles[i][val])
123 | val++;
124 | if (prof.cycles[i][val])
125 | val++;
126 |
127 | /* calculate the mean (and the lower and upper quartile) */
128 | __qsort(&prof.cycles[i][0], &prof.cycles[i][val-1]);
129 | Q2 = __median(0,val-1, prof.cycles[i]);
130 | if (val % 2) {
131 | Q1 = __median(0, val/2 - 1, prof.cycles[i]);
132 | Q3 = __median(val/2 + 1, val - 1, prof.cycles[i]);
133 | }
134 | else{
135 | Q1 = __median(1, val/2 - 2 , prof.cycles[i]);
136 | Q3 = __median(val/2 + 1, val - 1, prof.cycles[i]);
137 | }
138 | dprintf("class %d: Q1=%llu Q2=%llu Q3=%llu \n",
139 | i, Q1, Q2, Q3);
140 |
141 | /* calculate the average */
142 | average = 0;
143 | for (j=0; j < val; j++)
144 | average += ((double) prof.cycles[i][j]) / val;
145 | dprintf("class %d: average is %lf\n",i, average);
146 | }
147 | }
148 | }
149 |
150 | #ifdef __KERNEL__ /* we can only export to procfs from the kernel, naturally */
151 |
152 | /** static buffer for keeping our fake procfs */
153 | static char procfs_buffer[80 + 5*80 * PROFWIDTH]; /** used for exporting information to procfs */
154 |
155 | /** export information to procfs. */
156 | int __internal_profiler_procfs(char *buffer, char **buffer_location, off_t offset, int buffer_length, int zero){
157 | int len;
158 | int i, j, val;
159 | tstamp_t Q1, Q2, Q3;
160 | double average;
161 |
162 | if (offset > 0)
163 | return 0;
164 |
165 | memset(procfs_buffer,0, 80 + (5*80 * PROFWIDTH) - 1);
166 | len = snprintf(procfs_buffer, 17, "kernel profiler\n\n");
167 | /* Fill the buffer and get its length */
168 | for (i = 0; i < PROFWIDTH; i++){
169 | if (prof.cycles[i][0]){ /* are we using this class? then a 0 value is highly unlikely */
170 | /* find the last used element in the list (might well be PROFLEN */
171 | val=0;
172 | while(val < PROFLEN && prof.cycles[i][val]){
173 | //dprintf("%d,%d:%llu\n",i,val,prof.cycles[i][val]);
174 | val++;
175 | }
176 | if (prof.cycles[i][val])
177 | val++;
178 |
179 | /* calculate the mean (and the lower and upper quartile) */
180 | __qsort(&prof.cycles[i][0],&prof.cycles[i][val-1]);
181 | Q2 = __median(0,val-1, prof.cycles[i]);
182 | if ((val % 2) == 1){ // odd
183 | Q1 = __median(0,val/2 -1, prof.cycles[i]);
184 | // skip the middle element
185 | Q3 = __median(val/2 +1,val-1, prof.cycles[i]);
186 | }
187 | else{
188 | Q1 = __median(1,val/2 -2 , prof.cycles[i]);
189 | // ski the two middle elements
190 | Q3 = __median(val/2 +1,val-1, prof.cycles[i]);
191 | }
192 | len += snprintf(&procfs_buffer[len], 5*80*PROFWIDTH - len, "Profiler Class %d\nMedian (Q2) is %llu; Q1=%llu; Q3=%llu \n", i, Q2, Q1, Q3);
193 |
194 | /* calculate the average */
195 | average = 0;
196 | for (j=0; jowner = THIS_MODULE;
221 | }
222 |
223 | /** unregister from procfs */
224 | void __internal_profiler_procfs_close(void){
225 | remove_proc_entry(PROFILER_PROC_NAME,NULL);
226 | }
227 | #endif /* __KERNEL__ */
228 |
229 | #endif /* PROFILE */
230 |
--------------------------------------------------------------------------------
/sh/support/profiler.h:
--------------------------------------------------------------------------------
1 | /*
2 | Fairly Fast Packet Filter
3 |
4 | `stealth' profiling headerfile
5 |
6 | Licensed under the GPL
7 | Copyright (c) Herbert Bos, 2003-2004
8 |
9 | this version was created by Willem de Bruijn (wdebruij_AT_dds.nl), 2004
10 |
11 | \file
12 | this `class' implements a clockcycle profiler. When the PROFILE
13 | macro is set, the code will calculate processor cycle counts
14 | for abitrary program flows by converting the profiler(x),
15 | profiler_begin(x) and profiler_end(x) macro's into full functions.
16 |
17 | Anytime one of the profiler functions is encountered, data is
18 | collected and stored for later calculation.
19 |
20 | Note that by not setting the PROFILE macro, the macro's are not
21 | expanded and therefore the profiler will have no impact on the
22 | executables' performance.
23 |
24 | [USAGE] define the variable PROFILER_PROC_NAME somewhere to the
25 | name of the file under /proc that you want to create. If left
26 | undefined it will default to "ffpf"
27 |
28 | since profiler uses integers to discriminate among classes, I
29 | suggest you add macro's that expand to unique classkeys either in
30 | your own code or (if your code will be bundled with this package)
31 | below (near PROFILER_HOOK) .
32 |
33 | [NB] the kernel's print function, printk, cannot output floating
34 | point values. Therefore we have resorted to printing the floats
35 | in another notation, namely as hex integers. Use this output
36 | by converting it to the right representation in userspace. For
37 | this you could use a perl shellscript or something. Currently,
38 | no such scripts has been written.
39 | */
40 |
41 | #ifndef PROFILE_H
42 | #define PROFILE_H
43 |
44 | #ifndef PROFILER_PROC_NAME
45 | #define PROFILER_PROC_NAME "ffpf"
46 | #endif
47 |
48 | #define PROFLEN 101 ///< number of samples per class
49 | #define PROFWIDTH 9 ///< number of classes
50 |
51 | /**
52 | defines to keep track of profile code
53 |
54 | you are advised to use these (add yours),
55 | so that you don't end up with duplicate calls.
56 | */
57 | #define PROFILER_HOOK 1
58 | #define PROFILER_FILTER 2
59 | #define PROFILER_COPY 3
60 | #define PROFILER_COPY_PKT 4
61 | #define PROFILER_TEST 5
62 | #define PROFILER_BPF_CHECK 6
63 | #define PROFILER_SIGMIN 7
64 | #define PROFILER_SIGMAX 8
65 |
66 |
67 | /** the profiler routine stores the processor counter
68 | @param int class. separates streams of statistics.
69 | */
70 | inline void __internal_profiler(int class);
71 |
72 | /** a more explicit version of exec_profiler(..).
73 | use this function and exec_profiler_end to be
74 | sure when data collection starts and finishes.
75 | Consecutive calls to exec_profiler_begin will
76 | reset the temporary databuffer. The endresult
77 | is an offset, instead of the raw values. This
78 | is probably what you want.
79 |
80 | Note that we have no safety checks in place
81 | for buffer overflows. That's your resposibility.
82 | */
83 | inline void __internal_profiler_begin(int class);
84 |
85 | /** see exec_profiler_begin .*/
86 | inline void __internal_profiler_end(int class);
87 |
88 | /** output profiler data. */
89 | inline void __internal_profiler_show(void);
90 |
91 | #ifdef __KERNEL__
92 | /** register to procfs. Automatically calls init_profiler (just in case you forget) */
93 | void __internal_profiler_procfs_open(void);
94 | /** unregister from procfs */
95 | void __internal_profiler_procfs_close(void);
96 | #endif /* __KERNEL__ */
97 |
98 | #ifdef PROFILE
99 |
100 | #define profiler(x) __internal_profiler(x)
101 | #define profiler_begin(x) __internal_profiler_begin(x)
102 | #define profiler_end(x) __internal_profiler_end(x)
103 |
104 | #ifdef __KERNEL__
105 | #define profiler_procfs_open() __internal_profiler_procfs_open()
106 | #define profiler_procfs_close() __internal_profiler_procfs_close()
107 | #else
108 | #define profiler_procfs_open()
109 | #define profiler_procfs_close()
110 | #endif /* __KERNEL__ */
111 |
112 | #define profiler_init() __internal_profiler_init()
113 | #define profiler_show() __internal_profiler_show()
114 |
115 | #else /* PROFILE */
116 |
117 | #define profiler(x)
118 | #define profiler_begin(x)
119 | #define profiler_end(x)
120 |
121 | #define profiler_procfs_open()
122 | #define profiler_procfs_close()
123 |
124 | #define profiler_init()
125 | #define profiler_show()
126 |
127 | #endif /* PROFILE */
128 |
129 | #endif /* PROFILE_H */
130 |
131 |
--------------------------------------------------------------------------------
/sh/support/radix.h:
--------------------------------------------------------------------------------
1 | /* support/radix.[ch]
2 | * implementation of a radix tree
3 | *
4 | * the tree works on arbitrary binary strings. \0 is not necessary
5 | * duplicate keys are not allowed
6 | *
7 | * (c) 2007, Willem de Bruijn, Vrije Universiteit Amsterdam
8 | * GPLv2 applies
9 | */
10 |
11 | #ifndef SL_SUPPORT_RADIX_H
12 | #define SL_SUPPORT_RADIX_H
13 |
14 | #ifdef __KERNEL__
15 | #include
16 | #else
17 | #include
18 | #endif
19 |
20 | struct radix_node;
21 |
22 | /// lookup a value by a tuple.
23 | // returns NULL on failure
24 | void * radix_lookup(struct radix_node *, unsigned char *, size_t);
25 |
26 | /// lookup a value or its nearest predecessor.
27 | // returns NULL on failure
28 | void * radix_lookup_predecessor(struct radix_node *, unsigned char *, size_t);
29 |
30 | /// insert a tuple.
31 | // returns NULL on failure
32 | struct radix_node * radix_insert(struct radix_node *, unsigned char *, size_t,
33 | void *);
34 |
35 | /// delete a tuple by passing the associated node
36 | // returns 1 if we removed the rootnode, 0 otherwise
37 | int radix_delete(struct radix_node *, unsigned char *key, size_t keylen);
38 |
39 | /// destroy an entire tree
40 | void radix_destroy(struct radix_node *);
41 |
42 | #endif
43 |
44 |
--------------------------------------------------------------------------------
/sh/support/serialize.c:
--------------------------------------------------------------------------------
1 | // serialize.[ch]
2 | // pack/unpack a bunch of parameters
3 | //
4 | // (c) 2005, Willem de Bruijn, Vrije Universiteit Amsterdam
5 | // email at willem -_at_- computer.org
6 | //
7 | // BSD license applies
8 |
9 | #ifndef __KERNEL__
10 | #include
11 | #include
12 | #include
13 | #include
14 | #include
15 | #else
16 | #include
17 | #include
18 | #include
19 | #include
20 | #include
21 | #endif
22 |
23 | #include "macros.h"
24 | #include "serialize.h"
25 |
26 | //a maximum, this (theoretically) limits stringlen, and can serve as a rudimentary test
27 | #define MAXTUPLELEN 1 << 16
28 |
29 | inline int serialized_totallen(const char *serialized)
30 | {
31 | struct shead *shead;
32 |
33 | if (!serialized)
34 | return 0;
35 | shead = (struct shead*) serialized;
36 | return sizeof(struct shead) + shead->bytelen + (sizeof(uint32_t) * shead->tuplelen);
37 | }
38 |
39 | char * serialize(int argcount, ...)
40 | {
41 | __attribute__((unused)) va_list list;
42 | int i, empty;
43 | uint32_t elemsize, offset;
44 | char *output, *param;
45 | struct shead shead;
46 |
47 | // phase 1 : figure out how much space we need to allocate
48 | shead.bytelen = 0;
49 | shead.tuplelen = 0;
50 | va_start (list, argcount);
51 | for (i = 0; i < (2 * argcount); i += 2) {
52 | shead.bytelen += va_arg(list, uint32_t);
53 | shead.tuplelen++;
54 | empty = va_arg(list, int); // skip the data. gives warning with GCC 4.1. FIXED
55 | }
56 | va_end (list);
57 |
58 | // phase 2 : copy the data
59 | output = myalloc (sizeof(struct shead) + (argcount * sizeof(uint32_t)) + shead.bytelen);
60 | if (!output){
61 | dprintf("failed to allocated memory (size is %d)\n",shead.bytelen);
62 | return NULL;
63 | }
64 |
65 | // write the string header
66 | offset = sizeof(struct shead);
67 | memcpy(output, &shead, offset);
68 |
69 | // place the elements
70 | va_start (list, argcount);
71 | for (i=0;ituplelen > MAXTUPLELEN) // some safety integrity checks.
97 | return NULL;
98 |
99 | oldlen = serialized_totallen(serialized);
100 | newheader = (uint32_t) dlen;
101 | new = myalloc (oldlen + dlen + sizeof(uint32_t));
102 | if (!new)
103 | return NULL;
104 |
105 | memcpy(new,serialized,oldlen);
106 | memcpy(&new[oldlen],&newheader,sizeof(uint32_t));
107 | memcpy(&new[oldlen+sizeof(uint32_t)],data,dlen);
108 | shead = (struct shead *) new;
109 | shead->tuplelen++;
110 | shead->bytelen+=dlen;
111 | myfree(serialized);
112 | return new;
113 | }
114 |
115 | int is_serialized(const char *serialized)
116 | {
117 | struct shead *shead = (struct shead *) serialized;
118 | if (!shead)
119 | return 0;
120 |
121 | if (shead->tuplelen < MAXTUPLELEN && shead->tuplelen <= shead->bytelen)
122 | return 1;
123 | else
124 | return 0;
125 | }
126 |
127 | int deserialize(char *string, ...)
128 | {
129 | va_list list;
130 | struct shead shead;
131 | int i, j, offset;
132 | uint32_t elemsize;
133 | char **ptr;
134 |
135 | memcpy(&shead, string, sizeof(struct shead));
136 | offset = sizeof(struct shead);
137 | va_start (list, string);
138 | for (i=0;ituplelen > elemno);
172 |
173 | offset = sizeof(struct shead);
174 | for(i = 0; i < elemno; i++){
175 | memcpy(&itemlen, serialized + offset, sizeof(uint32_t));
176 | offset += sizeof(uint32_t) + itemlen;
177 | }
178 | if (dlen)
179 | // BUG: uint32_t -> int
180 | memcpy(dlen, serialized + offset, sizeof(uint32_t));
181 | if (*(uint32_t*) serialized + offset == 0) // NULL pointer?
182 | return NULL;
183 | return &serialized[offset + sizeof(uint32_t)]; // skip past header
184 | }
185 |
186 | char * serialize_duplicate(char * in)
187 | {
188 | char *out;
189 | int size;
190 |
191 | check_ptr (in && is_serialized(in));
192 |
193 | size = serialized_totallen(in);
194 | out = myalloc (size);
195 | check_ptr(out);
196 |
197 | memcpy(out, in, size);
198 | return out;
199 | }
200 |
201 | char * serialize_merge(char * one, char * two, int del)
202 | {
203 | struct shead *h_out, *h_one, *h_two;
204 | char * out;
205 | int size_one, size_two;
206 |
207 | if (!one && !two)
208 | return NULL;
209 | if (!one){
210 | if (del)
211 | return two;
212 | else
213 | return serialize_duplicate(two);
214 | }
215 | if (!two){
216 | if (del)
217 | return one;
218 | else
219 | return serialize_duplicate(one);
220 | }
221 | check_ptr (is_serialized(one) && is_serialized(two));
222 |
223 | // calculate new information length
224 | h_one = (struct shead *) one;
225 | size_one = h_one->bytelen + (sizeof(uint32_t) * h_one->tuplelen);
226 |
227 | h_two = (struct shead *) two;
228 | size_two= h_two->bytelen + (sizeof(uint32_t) * h_two->tuplelen);
229 |
230 | // allocate space
231 | out = myalloc (sizeof(struct shead) + size_one + size_two);
232 | if (!out)
233 | return NULL;
234 |
235 | // copy information
236 | h_out = (struct shead *) out;
237 | h_out->tuplelen = h_one->tuplelen + h_two->tuplelen;
238 | h_out->bytelen = h_one->bytelen + h_two->bytelen;
239 |
240 | if (!memcpy(&out[sizeof(struct shead)],
241 | &one[sizeof(struct shead)],size_one))
242 | goto cleanup;
243 | if (!memcpy(&out[sizeof(struct shead) + size_one],
244 | &two[sizeof(struct shead)],size_two))
245 | goto cleanup;
246 |
247 | // destroy old information
248 | if (del){
249 | myfree (one);
250 | myfree (two);
251 | }
252 |
253 | return out;
254 |
255 | cleanup:
256 | myfree (out);
257 | return NULL;
258 | }
259 |
260 | #ifdef __KERNEL__
261 | EXPORT_SYMBOL(serialize);
262 | EXPORT_SYMBOL(serialize_add);
263 | EXPORT_SYMBOL(is_serialized);
264 | EXPORT_SYMBOL(serialized_data);
265 | EXPORT_SYMBOL(serialized_totallen);
266 | EXPORT_SYMBOL(serialize_merge);
267 | #endif
268 |
269 |
--------------------------------------------------------------------------------
/sh/support/serialize.h:
--------------------------------------------------------------------------------
1 |
2 | // serialize.[ch]
3 | // pack/unpack a bunch of parameters
4 | //
5 | // (c) 2005, Willem de Bruijn, Vrije Universiteit Amsterdam
6 | // email at willem -_at_- computer.org
7 | //
8 | // BSD license applies
9 |
10 | #ifndef WJDB_SERIALIZE_H
11 | #define WJDB_SERIALIZE_H
12 |
13 | #ifdef __KERNEL__
14 | #include
15 | #include
16 | #else
17 | #include
18 | #include
19 | #include
20 | #endif
21 |
22 | /** serialize a bunch of parameters
23 | *
24 | * call this function with a list of (int, char*) tuples, whereby
25 | * int contains the #bytes that should be copied starting at the pointer.
26 | *
27 | * for example: serial_string = serialize(2, sizeof(int), &my_int, 10, "0612345678");
28 | * NB: for \0 terminated strings, don't forget to serialize strlen+1 bytes, instead of strlen
29 | *
30 | *
31 | * @param argcount contains the number of tuples
32 | * @return a newly allocated memory block containing the serialized structure
33 | *
34 | * the function takes platform specific int's as input, but generates uint32_t's
35 | * for its internal datasize headers.
36 | * */
37 | char * serialize(int argcount, ...);
38 |
39 | /// add an element to an existing serialized string
40 | char * serialize_add(char *serialized, int dlen, const void *data);
41 |
42 | /// deserialize a string that was previously encoded with serialize(..)
43 | int deserialize(char *string, ...);
44 |
45 | /// return the number of elements that are encoded in the string
46 | static inline unsigned int serialized_tuplelen(const char *serialized)
47 | {
48 | if (serialized)
49 | return (unsigned int) ((uint32_t*) serialized)[0];
50 | else
51 | return 0;
52 | }
53 |
54 | /// return the number of bytes are encoded (i.e., don't include metadata size in this calculation)
55 | static inline unsigned int serialized_bytelen(const char *serialized)
56 | {
57 | if (serialized)
58 | return (unsigned int) ((uint32_t*) serialized)[1];
59 | else
60 | return 0;
61 | }
62 |
63 | /// get the length in bytes of the entire serialized package
64 | int serialized_totallen(const char *serialized);
65 |
66 | /// is this string one of our serialized strings?
67 | int is_serialized(const char *serialized);
68 |
69 | /// create a duplicate
70 | char * serialize_duplicate(char * to);
71 |
72 | /// merge two strings. the two inputs will be destroyed.
73 | /// @param del set to 1 to delete the original strings
74 | char * serialize_merge(char * one, char * two, int del);
75 |
76 | /// return a pointer into the packet string
77 | /// @param dlen may be NULL, otherwise it contains the length of the element on return
78 | /// counting of elements starts at 0
79 | #define ser_data(ser, elemno) serialized_data(ser, elemno, NULL)
80 | const char * serialized_data(const char *serialized, int elemno, int *dlen);
81 |
82 | #define serialized_foreach(serialized, i, data, len) \
83 | for(i=0; \
84 | i
11 | #include
12 | #else
13 | #include
14 | #include
15 | #endif
16 |
17 | #include "slist.h"
18 |
19 | #define INCFACTOR 2 /**< expansion rate */
20 | #define STARTLEN 4
21 |
22 | int
23 | slist_add(struct slist * sl, unsigned long key, void *arg)
24 | {
25 | // realloc
26 | if (sl->used == sl->len) {
27 | struct slist_elem *bak;
28 | int bytelen;
29 |
30 | bytelen = sl->len * sizeof(struct slist_elem);
31 | if (bytelen) {
32 | bak = sl->list;
33 | sl->list = myalloc(INCFACTOR * bytelen);
34 | memcpy(sl->list, bak, bytelen);
35 | myfree(bak);
36 | sl->len *= INCFACTOR;
37 | }
38 | else {
39 | sl->len = STARTLEN;
40 | sl->list = myalloc(sl->len * sizeof(struct slist_elem));
41 | }
42 | }
43 |
44 | // add
45 | sl->list[sl->used].key = key;
46 | sl->list[sl->used].arg = arg;
47 | sl->used++;
48 | return 0;
49 | }
50 |
51 | int
52 | slist_del(struct slist *sl, unsigned long key)
53 | {
54 | int i = __sllist_get(sl, key);
55 |
56 | if (i < 0) {
57 | sl_log(LOG_WARN, "deallocation from slist failed");
58 | return -1;
59 | }
60 |
61 | // place last element into newly created hole
62 | if (i < sl->used - 1) {
63 | sl->list[i].key = sl->list[sl->used - 1].key;
64 | sl->list[i].arg = sl->list[sl->used - 1].arg;
65 | }
66 | sl->used--;
67 |
68 | return 0;
69 | }
70 |
71 |
--------------------------------------------------------------------------------
/sh/support/slist.h:
--------------------------------------------------------------------------------
1 | /* support/slist.[ch]
2 | * A stack-based list implementation optimized for lookup
3 | * (at the cost of insertion and deletion).
4 | *
5 | * (c) 2008, Willem de Bruijn, Vrije Universiteit Amsterdam
6 | * GPLv2 applies
7 | *
8 | * */
9 |
10 | #include "macros.h"
11 | #include "log.h"
12 |
13 | struct slist_elem {
14 | unsigned long key;
15 | void *arg;
16 | };
17 |
18 |
19 | /* A stack-based list allocates an array of pointers
20 | * and grows as needed.
21 | *
22 | * No initialization is necessary besides setting len and used to 0.
23 | * */
24 | struct slist {
25 | int len;
26 | int used;
27 |
28 | struct slist_elem *list;
29 | };
30 |
31 | static inline int
32 | __sllist_get(struct slist *sl, unsigned long key)
33 | {
34 | int i;
35 |
36 | for (i = 0; i < sl->used; i++) {
37 | if (key == sl->list[i].key)
38 | return i;
39 | }
40 |
41 | return -1;
42 | }
43 |
44 | static inline void *
45 | slist_get(struct slist * sl, unsigned long key)
46 | {
47 | int i = __sllist_get(sl, key);
48 |
49 | if (likely(i >= 0))
50 | return sl->list[i].arg;
51 | else
52 | return NULL;
53 | }
54 |
55 | int slist_add(struct slist * sl, unsigned long key, void *elem);
56 | int slist_del(struct slist *sl, unsigned long key);
57 |
58 |
--------------------------------------------------------------------------------
/sh/support/stack.h:
--------------------------------------------------------------------------------
1 | // stack.h
2 | // very simple stack that used to be part of macros.h
3 | //
4 | // (c) 2008, Willem de Bruijn, Vrije Universiteit Amsterdam
5 | // email at willem -_AT_- computer.org
6 | //
7 | // BSD License applies
8 |
9 | #ifndef SL_SUPPORT_STACK_H
10 | #define SL_SUPPORT_STACK_H
11 |
12 | #include "macros.h"
13 |
14 | #define MAGICKEY (-1)
15 |
16 | // STACK
17 | //
18 | // NB: MAGICKEY is NOT an acceptable value
19 | // because stack_pop returns this on an empty stack.
20 | //
21 | // a static stack with private length. can be initialized as
22 | // full or empty
23 | #define __STACK_EX(keyword, intype, inname, inlen, infull) \
24 | keyword int stack_##inname##_len = inlen; \
25 | keyword int stack_##inname##_filled = infull; \
26 | keyword intype stack_##inname##_entries[inlen]; \
27 | \
28 | __attribute__((unused)) \
29 | keyword void \
30 | stack_##inname##_clear(int clearfill) { \
31 | bzero(stack_##inname##_entries, sizeof(intype) * stack_##inname##_len); \
32 | if (clearfill) \
33 | stack_##inname##_filled = 0; \
34 | } \
35 | keyword inline int \
36 | stack_##inname##_push(intype elem) { \
37 | if (likely(stack_##inname##_filled < stack_##inname##_len)) { \
38 | stack_##inname##_entries[stack_##inname##_filled++] = elem; \
39 | return 0; \
40 | } \
41 | else { \
42 | dprintf("stack " #inname " overflow\n"); \
43 | return -1; \
44 | } \
45 | } \
46 | \
47 | keyword intype \
48 | stack_##inname##_pop(void) { \
49 | if (stack_##inname##_filled) { \
50 | return stack_##inname##_entries[--stack_##inname##_filled]; \
51 | } \
52 | else{ \
53 | dprintf("stack " #inname " underflow\n"); \
54 | return (intype) MAGICKEY; \
55 | } \
56 | }
57 |
58 |
59 | #define STATIC_STACK(type, name, len, full) __STACK_EX(static, type, name, len, full)
60 | #define STACK(type, name, len, full) __STACK_EX( , type, name, len, full)
61 |
62 | #define stack_clear(inname, fill) stack_##inname##_clear(fill)
63 | #define stack_empty(inname) (unlikely(stack_##inname##_filled == 0))
64 | #define stack_push(inname, elem) stack_##inname##_push(elem)
65 | #define stack_pop(inname) stack_##inname##_pop()
66 |
67 | /// It may seem complex, with the stack_empty tests,
68 | // but that is only to avoid 'underflow' warnings.
69 | #define stack_foreach(inname, elem) \
70 | for (elem = (stack_empty(inname) ? ((typeof(elem)) MAGICKEY) : stack_pop(inname));\
71 | elem != ((typeof(elem)) MAGICKEY); \
72 | elem = (stack_empty(inname) ? ((typeof(elem)) MAGICKEY) : stack_pop(inname)))
73 |
74 | #endif /* SL_SUPPORT_STACK_H */
75 |
76 |
--------------------------------------------------------------------------------
/sh/support/string.c:
--------------------------------------------------------------------------------
1 | // string.c
2 | // standard string functionality that is not always available
3 | //
4 | // (c) 2008, Willem de Bruijn, Vrije Universiteit Amsterdam
5 | // email at willem -_at_- computer.org
6 | //
7 | // LGPL license applies
8 |
9 | #ifdef __KERNEL__
10 | #else
11 | #include
12 | #endif
13 |
14 | #include "macros.h"
15 | #include "log.h"
16 | #include "string.h"
17 |
18 | #ifdef __KERNEL__
19 | long
20 | strtol(const char *in, char **out, int base)
21 | {
22 | long total=0;
23 | int i=0;
24 |
25 | if (out || base != 10) {
26 | sl_log(LOG_ERR, "incomplete strtol called in unsupported mode");
27 | return 0;
28 | }
29 |
30 | while (in[i] >= '0' && in[i] <= '9') {
31 | total *= 10;
32 | total += in[i] - '0';
33 | i++;
34 | }
35 | return total;
36 | }
37 |
38 | // yes, this is an almost exact copy of above. I should've used ##
39 | unsigned long
40 | strtoul_ex(const char *in, char **out, int base, int *err)
41 | {
42 | unsigned long total=0;
43 | int i=0;
44 |
45 | if (out || base != 10)
46 | return 0;
47 |
48 | while (in[i] >= '0' && in[i] <= '9') {
49 | total *= 10;
50 | total += in[i] - '0';
51 | i++;
52 | }
53 |
54 | // set error if non-digit characters were encountered
55 | if (err) {
56 | if (in[i] == '\0')
57 | *err = 0;
58 | else
59 | *err = 1;
60 | }
61 |
62 | return total;
63 | }
64 |
65 | unsigned long
66 | strtoul(const char *in, char **out, int base)
67 | {
68 | return strtoul_ex(in, out, base, NULL);
69 | }
70 |
71 | char *
72 | strdup(const char *in)
73 | {
74 | char *out;
75 | int len;
76 |
77 | len = strlen(in);
78 | out = myalloc(len + 1);
79 | if (out)
80 | memcpy(out, in, len);
81 | out[len]='\0';
82 | return out;
83 | }
84 | #endif
85 |
86 | uint32_t
87 | strtohost(const char *string, uint16_t *port)
88 | {
89 | const char * token;
90 | unsigned short ipseg[4];
91 |
92 | token = strchr(string, ':');
93 | if (token)
94 | sscanf(string, "%hu.%hu.%hu.%hu:%hu",
95 | &ipseg[0], &ipseg[1], &ipseg[2], &ipseg[3], port);
96 | else {
97 | sscanf(string, "%hu.%hu.%hu.%hu",
98 | &ipseg[0], &ipseg[1], &ipseg[2], &ipseg[3]);
99 | *port = 0;
100 | }
101 |
102 | return (ipseg[0] << 24) + (ipseg[1] << 16) + (ipseg[2] << 8)
103 | + ipseg[3];
104 | }
105 |
--------------------------------------------------------------------------------
/sh/support/string.h:
--------------------------------------------------------------------------------
1 | // string.h
2 | // standard string functionality that is not always available
3 | //
4 | // (c) 2007, Willem de Bruijn, Vrije Universiteit Amsterdam
5 | // email at willem -_at_- computer.org
6 | //
7 | // LGPL license applies
8 |
9 | #include "macros.h"
10 |
11 | #ifdef __KERNEL__
12 | #include
13 | #include
14 | #include
15 | #else
16 | #include
17 | #include
18 | #endif
19 |
20 | #ifdef __KERNEL__
21 | long strtol(const char *in, char **out, int base);
22 | unsigned long strtoul(const char *in, char **out, int base);
23 | unsigned long strtoul_ex(const char *in, char **out, int base, int *err);
24 | char * strdup(const char *in);
25 | #endif // __KERNEL__
26 | uint32_t strtohost(const char *string, uint16_t *port);
27 |
28 | // the following *should* not be here, but strnlen is sometimes missing
29 | #ifndef strnlen
30 | #define mystrnlen(a,b) ((strlen(a) > b) ? (b) : strlen(a))
31 | #endif
32 |
33 |
--------------------------------------------------------------------------------
/sh/support/timer.c:
--------------------------------------------------------------------------------
1 | // timer.[ch]
2 | // wrapper around OS-specific alarm signals
3 | //
4 | // (c) 2005, Willem de Bruijn, Vrije Universiteit Amsterdam
5 | // email at willem -_AT_- computer.org
6 | //
7 | // LGPL License applies
8 |
9 | #ifdef __KERNEL__
10 | #include
11 | #include
12 | #include
13 | #include
14 | #include
15 | #include
16 | #include
17 | #include
18 | #else
19 | #include
20 | #include
21 | #include
22 | #include
23 | #include
24 | #include
25 | #include
26 | #include
27 | #include
28 | #include "../wrap/file.h"
29 | #endif
30 | #include
31 |
32 | #include "../core/config.h"
33 | #include "list.h"
34 | #include "log.h"
35 | #include "macros.h"
36 | #include "timer.h"
37 | #include "lock.h"
38 |
39 | #ifdef __KERNEL__
40 |
41 | /// the number of active tasks.
42 | // is forced to 0 by interrupt_deep to cancel all tasks
43 | int tasks_stop = 0;
44 |
45 | struct list * timers;
46 |
47 | struct task {
48 | unsigned long jiffies;
49 | unsigned long recur;
50 | void (*func)(void *);
51 | void * arg;
52 | int forced_stop;
53 | #if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,20)
54 | struct work_struct dws;
55 | #else
56 | struct delayed_work dws;
57 | #endif
58 | };
59 |
60 | // callback: calls the function and reenables the timer
61 | #if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,20)
62 | static void task_callback(void * arg) {
63 | struct task * t = arg;
64 | #else
65 | static void task_callback(struct work_struct * ws) {
66 | struct delayed_work * dw = container_of(ws, struct delayed_work, work);
67 | struct task * t = container_of(dw, struct task, dws);
68 | #endif
69 |
70 | if (!tasks_stop && !t->forced_stop) {
71 | if (t->recur > 0)
72 | t->recur--;
73 |
74 | if (unlikely(!t->func))
75 | dprintf("ERR at %s.%d", __FUNCTION__, __LINE__);
76 | else
77 | t->func(t->arg);
78 |
79 | if (t->recur) {
80 | schedule_delayed_work(&t->dws, t->jiffies);
81 | return;
82 | }
83 | }
84 |
85 | kfree(t);
86 | }
87 |
88 | void * task_start(void(*func)(void*), void * arg, long recur, long timeout)
89 | {
90 | struct task * t;
91 |
92 | // fill our structure
93 | t = kzalloc(sizeof(struct task), GFP_ATOMIC);
94 | if (!t) {
95 | sl_log(LOG_ERR, "out of atomic memory");
96 | return NULL;
97 | }
98 | t->func = func;
99 | t->arg = arg;
100 | t->recur = recur;
101 | t->jiffies = (HZ * timeout) / 1000000;
102 |
103 | // initialize the waitqueue element
104 | #if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,20)
105 | INIT_WORK(&t->dws, task_callback, t);
106 | #else
107 | INIT_DELAYED_WORK(&t->dws, task_callback);
108 | #endif
109 |
110 | if (t->jiffies)
111 | schedule_delayed_work(&t->dws, t->jiffies);
112 | else
113 | #if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,20)
114 | schedule_work(&t->dws);
115 | #else
116 | schedule_work(&t->dws.work);
117 | #endif
118 | return t;
119 | }
120 |
121 | void task_stop(void *task)
122 | {
123 | struct task * t = task;
124 |
125 | t->forced_stop = 1;
126 | cancel_delayed_work(&t->dws);
127 | flush_scheduled_work();
128 | }
129 |
130 | int task_stop_all(void)
131 | {
132 | tasks_stop = 1;
133 | flush_scheduled_work();
134 | return 0;
135 | }
136 |
137 | int usleep_deep(long timeout)
138 | {
139 | set_current_state(TASK_INTERRUPTIBLE);
140 | schedule_timeout(usecs_to_jiffies(timeout));
141 | return 0;
142 | }
143 |
144 | int interrupt_deep(void)
145 | {
146 | dprintf("%s called in kernel: unsupported\n", __FUNCTION__);
147 | return -1;
148 | }
149 |
150 | EXPORT_SYMBOL(task_start);
151 | EXPORT_SYMBOL(task_stop);
152 | EXPORT_SYMBOL(task_stop_all);
153 |
154 | EXPORT_SYMBOL(interrupt_deep);
155 | EXPORT_SYMBOL(usleep_deep);
156 |
157 | #else /* !__KERNEL__ */
158 |
159 | struct task_args {
160 | void (*func)(void*);
161 | pthread_t thread;
162 | void * arg;
163 | long timeout;
164 | long recur;
165 | };
166 |
167 | // TODO: use atomic types
168 | static int tasks_stop;
169 | static int tasks_active;
170 |
171 | static void *
172 | delayed_func(void *thread_arg)
173 | {
174 | struct task_args *ta = thread_arg;
175 |
176 | while (ta->recur > 0 || ta->recur == -1) {
177 | if (usleep_deep(ta->timeout) < 0) {
178 | ta->recur = 0;
179 | break;
180 | }
181 |
182 | if (tasks_stop)
183 | break;
184 |
185 | ta->func(ta->arg);
186 |
187 | if (ta->recur > 0)
188 | ta->recur--;
189 | }
190 |
191 | // task_stop_all does not call pthread_join
192 | // and noone is waiting if recurrence ended
193 | if (tasks_stop || ta->recur == 0) {
194 | pthread_detach(ta->thread);
195 | myfree(ta);
196 | tasks_active--;
197 | }
198 |
199 | return NULL;
200 | }
201 |
202 | void * task_start(void(*func)(void*), void * arg, long recur, long timeout)
203 | {
204 | struct task_args *ta;
205 |
206 | if (tasks_stop)
207 | return NULL;
208 |
209 | ta = mycalloc(1, sizeof(struct task_args));
210 | ta->func = func;
211 | ta->arg = arg;
212 | ta->timeout = timeout;
213 | ta->recur = recur;
214 | tasks_active++;
215 |
216 | pthread_create(&ta->thread, NULL, delayed_func, ta);
217 | return ta;
218 | }
219 |
220 | // don't allow the purging of all tasks interfere with a single task
221 | // that is to be closed
222 | slmutex_static(task_mutex);
223 |
224 | void task_stop(void *task)
225 | {
226 | struct task_args *ta = task;
227 |
228 | slmutex_lock(&task_mutex);
229 | if (ta) {
230 | if (!tasks_active)
231 | sl_log(LOG_BUG, "waiting for nonexistent task");
232 | ta->recur = -2; // stop, signal that we will wait for the result
233 | interrupt_deep();
234 | pthread_join(ta->thread, NULL);
235 | myfree(ta);
236 | tasks_active--;
237 | }
238 | slmutex_unlock(&task_mutex);
239 | }
240 |
241 | /// May only be called from process context, because it may sleep.
242 | int task_stop_all(void)
243 | {
244 | slmutex_lock(&task_mutex);
245 | if (tasks_active) {
246 | tasks_stop = 1;
247 | if (interrupt_deep()) {
248 | sl_log(LOG_ERR, "Failed to interrupt");
249 | tasks_stop = 0;
250 | tasks_active = 0; // try to set to a 'stable' state
251 | return -1;
252 | }
253 | while (tasks_active) {
254 | sl_log(LOG_MSG, "Waiting for %d tasks to finish\n", tasks_active);
255 | sleep(1);
256 | tasks_stop = 0;
257 | }
258 | }
259 | slmutex_unlock(&task_mutex);
260 |
261 | return 0;
262 | }
263 |
264 | // HACKHACKHACK replace with nice open on load + close on unload
265 | static int shallowfd = -1;
266 | static int deepfd = -1;
267 |
268 | static int
269 | __usleep_sl(int *fd, const char *name, long timeout)
270 | {
271 | if (unlikely((*fd) == -1)) {
272 | (*fd) = __orig_open(name, O_WRONLY);
273 | if ((*fd) < 0) {
274 | sl_log(LOG_ERR, "open timer failure");
275 | return -1;
276 | }
277 | }
278 | return __orig_write((*fd), &timeout, sizeof(long));
279 | }
280 |
281 | // pause the thread for the given number of microseconds
282 | //
283 | // We try to avoid having to use POSIX signals. If kernelspace Streamline
284 | // exists, we use the sysfs timer file, otherwise we rever to SIGALRM
285 | //
286 | // returns 0 on success. timeout left if > 0, signal arrived if < 0
287 | int usleep_deep(long timeout)
288 | {
289 | return __usleep_sl(&deepfd, SYSFS_TIMER_DEEP, timeout);
290 | }
291 |
292 | // pause the thread for the given number of microseconds
293 | // or until a streamline signal arrives
294 | //
295 | // returns 0 on success. timeout left if > 0, signal arrived if < 0
296 | int usleep_shallow(long timeout)
297 | {
298 | return __usleep_sl(&shallowfd, SYSFS_TIMER_SHALLOW, timeout);
299 | }
300 |
301 | int interrupt_deep(void)
302 | {
303 | int fd;
304 | char useless = 0;
305 |
306 | fd = __orig_open(SYSFS_TIMER_INTERRUPT, O_WRONLY);
307 | if (unlikely(fd < 0)) {
308 | sl_log(LOG_LOW, "failed to call deep interrupt. POSIX timers?");
309 | return -1;
310 | }
311 |
312 | if (unlikely(__orig_write(fd, &useless, 1)) < 0)
313 | return -1;
314 |
315 | if (unlikely(__orig_close(fd)))
316 | return -1;
317 |
318 | return 0;
319 | }
320 |
321 | #endif /* !__KERNEL__ */
322 |
323 |
--------------------------------------------------------------------------------
/sh/support/timer.h:
--------------------------------------------------------------------------------
1 | // timer.[ch]
2 | // wrapper around OS-specific alarm signals
3 | //
4 | // (c) 2005, Willem de Bruijn, Vrije Universiteit Amsterdam
5 | // email at willem -_AT_- computer.org
6 | //
7 | // LGPL License applies
8 |
9 | #ifndef SL_SUPPORT_TIMER
10 | #define SL_SUPPORT_TIMER
11 |
12 | #include
13 |
14 | /** execute a task in the background
15 | *
16 | * @param recur sets how often the task should be executed,
17 | * -1 for indefinite or until task_stop is called.
18 | *
19 | * @return an opaque pointer to pass to task_stop */
20 | void * task_start(void(*func)(void*), void * arg, long recur, long timeout);
21 | void task_stop(void *task);
22 |
23 | /** Cancel all outstanding tasks.
24 | * Some tasks may still fire, but all are stopped
25 | * when this function returns.
26 | *
27 | * return 0 on success, failure otherwise */
28 | int task_stop_all(void);
29 |
30 |
31 | #endif
32 |
33 |
--------------------------------------------------------------------------------
/sh/support/timestamp.h:
--------------------------------------------------------------------------------
1 | // timestamp.h
2 | // location independent timestamping
3 | //
4 | // (c) 2005, willem de bruijn, vrije universiteit amsterdam
5 | // email at willem -_at_- computer.org
6 | //
7 | // BSD license applies
8 |
9 |
10 | #ifdef __KERNEL__
11 | #include
12 | #include
13 | #if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,18)
14 | #include // TSC available?
15 | #endif
16 | #include
17 | #include // platform independent backup
18 | #ifdef CONFIG_X86_TSC
19 | #include // cpufreq. a lousy method
20 | #include // 64bit cycle-accurate counter
21 | #include
22 | #endif
23 | #else
24 | #include
25 | #include
26 | #include
27 | #endif
28 |
29 | #ifdef CONFIG_ARM
30 | /// hack TODO: fix
31 | #define cpu_khz 600000
32 | #endif
33 |
34 | // timestamp_get
35 | #if (defined __KERNEL__ && defined CONFIG_X86_TSC) || !defined NO_X86
36 | typedef uint64_t tstamp_t;
37 | static inline uint64_t timestamp_get(void) {
38 | register uint32_t count_low, count_high;
39 | asm("rdtsc" :"=a" (count_low), "=d" (count_high));
40 | return ( ((uint64_t) count_high) << 32) + count_low;
41 | }
42 | #else
43 | #ifdef __KERNEL__
44 | typedef cycles_t tstamp_t;
45 | #define timestamp_get get_cycles
46 | #else
47 | typedef clock_t tstamp_t;
48 | #define timestamp_get clock
49 | #endif
50 | #endif
51 |
52 | // timestamp_to
53 | #ifdef __KERNEL__
54 | static inline tstamp_t timestamp_to(int sec, int usec)
55 | {
56 | return (cpu_khz * usec) + (cpu_khz * 1000 * sec);
57 | }
58 | #else
59 | static inline tstamp_t timestamp_to(int sec, int usec)
60 | {
61 | return (CLOCKS_PER_SEC * sec) + ((CLOCKS_PER_SEC/1000000) * usec);
62 | }
63 | #endif
64 |
65 |
66 |
--------------------------------------------------------------------------------
/sh/support/ue_space/ixa_sdk/README:
--------------------------------------------------------------------------------
1 | README for FFPF 1.5.0
2 | by Mihai Cristea, february 27th, 2006.
3 | contact us at ffpf-devel_-AT-_lists.sourceforge.net
4 |
5 | ## 1. Building Intel me_tools for linux
6 |
7 | The linux port of me_tools depends on Intel proprietary files.
8 | We are not allowed to supply these, but they can be found in the Intel IXP SDK 4.x
9 | (CD1_IXASDK_4.1.zip/ixa_sdk_4.1.tgz/me_tools). However, the current sources
10 | support Montavista linux only, you can compile them for a common linux kernel
11 | by applying the included patches: ixa_sdk_4.1_LinuxPatched.tgz).
12 |
13 | ## 2 Prerequisites
14 |
15 | a) Crosscompile toolchain used:
16 | http://ixp2xxx.sourceforge.net/toolchain/armeb-unknown-linux-gnu-gcc-3.3.3-glibc-2.3.2.tar.bz2;
17 | The path to crosstools is set up in the main FFPF Makefile like:
18 | CC_CROSS = /opt/crosstool/armeb-unknown-linux-gnu/gcc-3.3.3-glibc-2.3.2/bin/gcc
19 |
20 | b) Linux sources:
21 | Kernel version 2.6.15 has support for IXP2xxx. However, for older versions and for specific hw platform
22 | (e.g., Radisys-2611, or Intel IXDP2850, there are kernel patches at http://ixp2xxx.sourceforge.net/kernel).
23 | The me_tools path to the linux sources (default is /usr/src/linux_arm) is written in: me_tools/XSC_CoreLibs/Makefile
24 |
25 | ## 3 Using of me_tools:
26 |
27 | The main object code used is the cross-compiled kernel module: 'halMev2.ko'. It's loading is needed before
28 | using of our FFPF's ue_manager (insmod halMev2.ko). This module offers support for uengine code object (.uof files) loading,
29 | uengine start/stop, reset, etc.
30 | Moreover, halMev2 is also useful in case of remote hardware debugging by DevWorkbench running on a host Windows machine.
31 |
32 |
--------------------------------------------------------------------------------
/sh/support/ue_space/ixa_sdk/ixa_sdk_4.1_LinuxPatched.tgz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/iassael/cuda-aho-corasick-wu-manber/e2f8b551a1c0b979884963dcef28f363b865154f/sh/support/ue_space/ixa_sdk/ixa_sdk_4.1_LinuxPatched.tgz
--------------------------------------------------------------------------------
/sh/support/ue_space/ixa_sdk/ixa_sdk_4.2_LinuxPatched.tgz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/iassael/cuda-aho-corasick-wu-manber/e2f8b551a1c0b979884963dcef28f363b865154f/sh/support/ue_space/ixa_sdk/ixa_sdk_4.2_LinuxPatched.tgz
--------------------------------------------------------------------------------
/sh/support/ue_space/uengine/Makefile:
--------------------------------------------------------------------------------
1 | all: ixp2000-ue-disas \
2 | ixp2000-lib-userspace.o \
3 | ixp2000-uengine.o \
4 | ixp2400-msf.o
5 |
6 | clean:
7 | rm -f ixp2000-ue-disas *.o
8 |
9 | ixp2000-lib-userspace.o: ixp2000-lib-userspace.c
10 | $(CC_CROSS) $(CFLAGS_ARM) -c -o ixp2000-lib-userspace.o ixp2000-lib-userspace.c
11 |
12 | ixp2000-uengine.o: ixp2000-uengine.c ixp2000-uengine.h
13 | $(CC_CROSS) $(CFLAGS_ARM) -c -o ixp2000-uengine.o ixp2000-uengine.c
14 |
15 | ixp2000-ue-disas: ixp2000-ue-disas.c ixp2000-ue-disas.h
16 | $(CC_CROSS) $(CFLAGS_ARM) -DTEST -o ixp2000-ue-disas ixp2000-ue-disas.c
17 |
18 | ixp2400-msf.o: ixp2400-msf.c ixp2400-msf.h
19 | $(CC_CROSS) $(CFLAGS_ARM) -c -o ixp2400-msf.o ixp2400-msf.c
20 |
--------------------------------------------------------------------------------
/sh/support/ue_space/uengine/compat.h:
--------------------------------------------------------------------------------
1 | #ifndef __COMPAT_H
2 | #define __COMPAT_H
3 |
4 | #ifndef __KERNEL__
5 | #include
6 | #include
7 |
8 | typedef u_int8_t u8;
9 | typedef u_int32_t u32;
10 | typedef u_int64_t u64;
11 |
12 | extern void *IXP2000_GLOBAL_REG_VIRT_BASE;
13 | extern void *IXP2000_MSF_VIRT_BASE;
14 | extern void *IXP2000_RBUF_TBUF_VIRT_BASE;
15 | extern void *IXP2000_UENGINE_CSR_VIRT_BASE;
16 | extern void *IXP2000_INT_CONTROLLER_VIRT_BASE;
17 | extern u32 ixp2000_uengine_mask;
18 |
19 | #define IXP2000_PRODUCT_ID ((volatile u32 *)(IXP2000_GLOBAL_REG_VIRT_BASE + 0x0a00))
20 | #define IXP2000_MISC_CONTROL ((volatile u32 *)(IXP2000_GLOBAL_REG_VIRT_BASE + 0x0a04))
21 | #define IXP2000_MSF_CLK_CNTRL ((volatile u32 *)(IXP2000_GLOBAL_REG_VIRT_BASE + 0x0a08))
22 | #define IXP2000_RESET0 ((volatile u32 *)(IXP2000_GLOBAL_REG_VIRT_BASE + 0x0a0c))
23 | #define IXP2000_RESET1 ((volatile u32 *)(IXP2000_GLOBAL_REG_VIRT_BASE + 0x0a10))
24 | #define IXP2000_CLOCK_CONTROL ((volatile u32 *)(IXP2000_GLOBAL_REG_VIRT_BASE + 0x0a14))
25 | #define IXP2000_STRAP_OPTIONS ((volatile u32 *)(IXP2000_GLOBAL_REG_VIRT_BASE + 0x0a18))
26 |
27 | void *ioremap_nocache(unsigned long phys, unsigned long size);
28 | void iounmap_length(volatile void *virt, unsigned long size);
29 | void udelay(unsigned long usecs);
30 |
31 | static inline u32 hweight32(u32 w)
32 | {
33 | u32 res;
34 |
35 | res = (w & 0x55555555) + ((w >> 1) & 0x55555555);
36 | res = (res & 0x33333333) + ((res >> 2) & 0x33333333);
37 | res = (res & 0x0F0F0F0F) + ((res >> 4) & 0x0F0F0F0F);
38 | res = (res & 0x00FF00FF) + ((res >> 8) & 0x00FF00FF);
39 | res = (res & 0x0000FFFF) + ((res >> 16) & 0x0000FFFF);
40 |
41 | return res;
42 | }
43 |
44 | static inline unsigned int ixdp2x00_master_npu(void)
45 | {
46 | return !!(*IXP2000_STRAP_OPTIONS & 4);
47 | }
48 | #else
49 | #include
50 | #include
51 | #endif
52 |
53 |
54 | #endif
55 |
--------------------------------------------------------------------------------
/sh/support/ue_space/uengine/ixp2000-lib-userspace.c:
--------------------------------------------------------------------------------
1 | /*
2 | * Generic library functions for the MSF (Media and Switch Fabric
3 | * unit) and microengines found on the Intel IXP2000 series of network
4 | * processors.
5 | *
6 | * Stub functions to make it work from userspace.
7 | *
8 | * Copyright (C) 2004, 2005 Lennert Buytenhek
9 | * Dedicated to Marija Kulikova.
10 | *
11 | * This program is free software; you can redistribute it and/or modify
12 | * it under the terms of the GNU Lesser General Public License as
13 | * published by the Free Software Foundation; either version 2.1 of the
14 | * License, or (at your option) any later version.
15 | */
16 |
17 | #include
18 | #include
19 | #include
20 | #include
21 | #include
22 | #include
23 | #include
24 | #include "compat.h"
25 |
26 | #define dprintf(...)
27 |
28 | static int dev_mem_fd;
29 | void *IXP2000_GLOBAL_REG_VIRT_BASE;
30 | void *IXP2000_MSF_VIRT_BASE;
31 | void *IXP2000_RBUF_TBUF_VIRT_BASE;
32 | void *IXP2000_UENGINE_CSR_VIRT_BASE;
33 | void *IXP2000_INT_CONTROLLER_VIRT_BASE;
34 | u32 ixp2000_uengine_mask;
35 |
36 | static void ixp2000_map(void) __attribute__((constructor));
37 | static void ixp2000_map(void)
38 | {
39 | u32 product_id;
40 |
41 | dev_mem_fd = open("/dev/mem", O_RDWR | O_SYNC);
42 | if (dev_mem_fd < 0) {
43 | perror("open(\"/dev/mem\")");
44 | exit(-1);
45 | }
46 |
47 | IXP2000_GLOBAL_REG_VIRT_BASE = ioremap_nocache(0xc0004000, 4096);
48 | IXP2000_MSF_VIRT_BASE = ioremap_nocache(0xc8000000, 8192);
49 | IXP2000_RBUF_TBUF_VIRT_BASE = ioremap_nocache(0xc8002000, 8192);
50 | IXP2000_UENGINE_CSR_VIRT_BASE = ioremap_nocache(0xc0018000, 32768);
51 | IXP2000_INT_CONTROLLER_VIRT_BASE = ioremap_nocache(0xd6000000, 4096);
52 |
53 | // @@@ we should check that we're really on an ixp2000
54 | product_id = *IXP2000_PRODUCT_ID;
55 |
56 | switch ((product_id >> 8) & 0x1fff) {
57 | case 0:
58 | dprintf("detected IXP2800 rev %c%x\n",
59 | 'A' + ((product_id >> 4) & 0xf), product_id & 0xf);
60 | ixp2000_uengine_mask = 0x00ff00ff;
61 | break;
62 |
63 | case 1:
64 | dprintf("detected IXP2850 rev %c%x\n",
65 | 'A' + ((product_id >> 4) & 0xf), product_id & 0xf);
66 | ixp2000_uengine_mask = 0x00ff00ff;
67 | break;
68 |
69 | case 2:
70 | dprintf("detected IXP2400 rev %c%x\n",
71 | 'A' + ((product_id >> 4) & 0xf), product_id & 0xf);
72 | ixp2000_uengine_mask = 0x000f000f;
73 | break;
74 |
75 | default:
76 | fprintf(stderr, "unknown ixp2000 model (%.8x)\n", product_id);
77 | ixp2000_uengine_mask = 0;
78 | break;
79 | }
80 | }
81 |
82 | static void ixp2000_unmap(void) __attribute__((destructor));
83 | static void ixp2000_unmap(void)
84 | {
85 | if (dev_mem_fd >= 0) {
86 | iounmap_length(IXP2000_GLOBAL_REG_VIRT_BASE, 4096);
87 | iounmap_length(IXP2000_MSF_VIRT_BASE, 8192);
88 | iounmap_length(IXP2000_RBUF_TBUF_VIRT_BASE, 8192);
89 | iounmap_length(IXP2000_UENGINE_CSR_VIRT_BASE, 32768);
90 | iounmap_length(IXP2000_INT_CONTROLLER_VIRT_BASE, 4096);
91 | close(dev_mem_fd);
92 | }
93 | }
94 |
95 | void *ioremap_nocache(unsigned long phys, unsigned long size)
96 | {
97 | void *x;
98 |
99 | x = mmap(0, size, PROT_READ | PROT_WRITE, MAP_SHARED, dev_mem_fd, phys);
100 | if (x == MAP_FAILED) {
101 | perror("mmap");
102 | exit(-1);
103 | }
104 |
105 | return x;
106 | }
107 |
108 | void iounmap_length(volatile void *virt, unsigned long size)
109 | {
110 | munmap((void *)virt, size);
111 | }
112 |
113 | void udelay(unsigned long usecs)
114 | {
115 | usleep(usecs);
116 | }
117 |
--------------------------------------------------------------------------------
/sh/support/ue_space/uengine/ixp2000-msf.h:
--------------------------------------------------------------------------------
1 | /*
2 | * Register definitions for the MSF (Media and Switch Fabric) unit
3 | * found on the Intel IXP2000 series of network processors.
4 | *
5 | * Copyright (C) 2005 Lennert Buytenhek
6 | * Dedicated to Marija Kulikova.
7 | *
8 | * This program is free software; you can redistribute it and/or modify
9 | * it under the terms of the GNU Lesser General Public License as
10 | * published by the Free Software Foundation; either version 2.1 of the
11 | * License, or (at your option) any later version.
12 | */
13 |
14 | #ifndef __IXP2000_MSF_H
15 | #define __IXP2000_MSF_H
16 |
17 | #include "compat.h"
18 |
19 | #define IXP2000_MSF_REG(x) ((volatile u32 *)(IXP2000_MSF_VIRT_BASE + (x)))
20 | #define IXP2000_MSF_RX_CONTROL IXP2000_MSF_REG(0x0000)
21 | #define IXP2000_MSF_TX_CONTROL IXP2000_MSF_REG(0x0004)
22 | #define IXP2000_MSF_INTERRUPT_STATUS IXP2000_MSF_REG(0x0008)
23 | #define IXP2000_MSF_INTERRUPT_ENABLE IXP2000_MSF_REG(0x000c)
24 | #define IXP2000_MSF_CSIX_TYPE_MAP IXP2000_MSF_REG(0x0010)
25 | #define IXP2000_MSF_FC_EGRESS_STATUS IXP2000_MSF_REG(0x0014)
26 | #define IXP2000_MSF_FC_INGRESS_STATUS IXP2000_MSF_REG(0x0018)
27 | #define IXP2000_MSF_HWM_CONTROL IXP2000_MSF_REG(0x0024)
28 | #define IXP2000_MSF_FC_STATUS_OVERRIDE IXP2000_MSF_REG(0x0028)
29 | #define IXP2000_MSF_CLOCK_CONTROL IXP2000_MSF_REG(0x002c)
30 | #define IXP2000_MSF_RX_PORT_MAP IXP2000_MSF_REG(0x0040)
31 | #define IXP2000_MSF_RBUF_ELEMENT_DONE IXP2000_MSF_REG(0x0044)
32 | #define IXP2000_MSF_RX_MPHY_POLL_LIMIT IXP2000_MSF_REG(0x0048)
33 | #define IXP2000_MSF_RX_CALENDAR_LENGTH IXP2000_MSF_REG(0x0048)
34 | #define IXP2000_MSF_RX_THREAD_FREELIST_TIMEOUT_0 IXP2000_MSF_REG(0x0050)
35 | #define IXP2000_MSF_RX_THREAD_FREELIST_TIMEOUT_1 IXP2000_MSF_REG(0x0054)
36 | #define IXP2000_MSF_RX_THREAD_FREELIST_TIMEOUT_2 IXP2000_MSF_REG(0x0058)
37 | #define IXP2000_MSF_TX_SEQUENCE_0 IXP2000_MSF_REG(0x0060)
38 | #define IXP2000_MSF_TX_SEQUENCE_1 IXP2000_MSF_REG(0x0064)
39 | #define IXP2000_MSF_TX_SEQUENCE_2 IXP2000_MSF_REG(0x0068)
40 | #define IXP2000_MSF_TX_MPHY_POLL_LIMIT IXP2000_MSF_REG(0x0070)
41 | #define IXP2000_MSF_TX_CALENDAR_LENGTH IXP2000_MSF_REG(0x0070)
42 | #define IXP2000_MSF_RX_UP_CONTROL_0 IXP2000_MSF_REG(0x0080)
43 | #define IXP2000_MSF_RX_UP_CONTROL_1 IXP2000_MSF_REG(0x0084)
44 | #define IXP2000_MSF_RX_UP_CONTROL_2 IXP2000_MSF_REG(0x0088)
45 | #define IXP2000_MSF_RX_UP_CONTROL_3 IXP2000_MSF_REG(0x008c)
46 | #define IXP2000_MSF_TX_UP_CONTROL_0 IXP2000_MSF_REG(0x0090)
47 | #define IXP2000_MSF_TX_UP_CONTROL_1 IXP2000_MSF_REG(0x0094)
48 | #define IXP2000_MSF_TX_UP_CONTROL_2 IXP2000_MSF_REG(0x0098)
49 | #define IXP2000_MSF_TX_UP_CONTROL_3 IXP2000_MSF_REG(0x009c)
50 | #define IXP2000_MSF_TRAIN_DATA IXP2000_MSF_REG(0x00a0)
51 | #define IXP2000_MSF_TRAIN_CALENDAR IXP2000_MSF_REG(0x00a4)
52 | #define IXP2000_MSF_TRAIN_FLOW_CONTROL IXP2000_MSF_REG(0x00a8)
53 | #define IXP2000_MSF_TX_CALENDAR_0 IXP2000_MSF_REG(0x1000)
54 | #define IXP2000_MSF_RX_PORT_CALENDAR_STATUS IXP2000_MSF_REG(0x1400)
55 |
56 |
57 | #endif
58 |
--------------------------------------------------------------------------------
/sh/support/ue_space/uengine/ixp2000-ue-disas.h:
--------------------------------------------------------------------------------
1 | /*
2 | * Disassembler for the IXP2000 microengine (MEv2) instruction format.
3 | *
4 | * Copyright (C) 2005 Lennert Buytenhek
5 | * Dedicated to Marija Kulikova.
6 | *
7 | * This program is free software; you can redistribute it and/or modify
8 | * it under the terms of the GNU Lesser General Public License as
9 | * published by the Free Software Foundation; either version 2.1 of the
10 | * License, or (at your option) any later version.
11 | */
12 |
13 | #ifndef __IXP2000_UE_DISAS_H
14 | #define __IXP2000_UE_DISAS_H
15 |
16 | #define CONTEXTS_4 4
17 | #define CONTEXTS_8 8
18 |
19 | char *ixp2000_ue_disassemble(u_int64_t insn, int contexts_mode);
20 |
21 |
22 | #endif
23 |
--------------------------------------------------------------------------------
/sh/support/ue_space/uengine/ixp2000-uengine.h:
--------------------------------------------------------------------------------
1 | /*
2 | * Generic library functions for the microengines found on the Intel
3 | * IXP2000 series of network processors.
4 | *
5 | * Copyright (C) 2004, 2005 Lennert Buytenhek
6 | * Dedicated to Marija Kulikova.
7 | *
8 | * This program is free software; you can redistribute it and/or modify
9 | * it under the terms of the GNU Lesser General Public License as
10 | * published by the Free Software Foundation; either version 2.1 of the
11 | * License, or (at your option) any later version.
12 | */
13 |
14 | #ifndef __IXP2000_UENGINE_H
15 | #define __IXP2000_UENGINE_H
16 |
17 | #include "compat.h"
18 |
19 | struct ixp2000_uengine_code
20 | {
21 | u32 cpu_model_bitmask;
22 | u8 cpu_min_revision;
23 | u8 cpu_max_revision;
24 |
25 | u32 uengine_parameters;
26 |
27 | struct ixp2000_reg_value {
28 | int reg;
29 | u32 value;
30 | } *initial_reg_values;
31 |
32 | int num_insns;
33 | u8 *insns;
34 | };
35 |
36 | u32 ixp2000_uengine_csr_read(int uengine, int offset);
37 | void ixp2000_uengine_csr_write(int uengine, int offset, u32 value);
38 | void ixp2000_uengine_reset(u32 uengine_mask);
39 | void ixp2000_uengine_init_timestamp_counters(void);
40 | void ixp2000_uengine_set_mode(int uengine, u32 mode);
41 | void ixp2000_uengine_load_microcode(int uengine, u8 *ucode, int insns);
42 | void ixp2000_uengine_init_context(int uengine, int context, int pc);
43 | void ixp2000_uengine_start_contexts(int uengine, u8 ctx_mask);
44 | void ixp2000_uengine_stop_contexts(int uengine, u8 ctx_mask);
45 | int ixp2000_uengine_load(int uengine, struct ixp2000_uengine_code *c);
46 |
47 | #define IXP2000_UENGINE_8_CONTEXTS 0x00000000
48 | #define IXP2000_UENGINE_4_CONTEXTS 0x80000000
49 | #define IXP2000_UENGINE_PRN_UPDATE_EVERY 0x40000000
50 | #define IXP2000_UENGINE_PRN_UPDATE_ON_ACCESS 0x00000000
51 | #define IXP2000_UENGINE_NN_FROM_SELF 0x00100000
52 | #define IXP2000_UENGINE_NN_FROM_PREVIOUS 0x00000000
53 | #define IXP2000_UENGINE_ASSERT_EMPTY_AT_3 0x000c0000
54 | #define IXP2000_UENGINE_ASSERT_EMPTY_AT_2 0x00080000
55 | #define IXP2000_UENGINE_ASSERT_EMPTY_AT_1 0x00040000
56 | #define IXP2000_UENGINE_ASSERT_EMPTY_AT_0 0x00000000
57 | #define IXP2000_UENGINE_LM_ADDR1_GLOBAL 0x00020000
58 | #define IXP2000_UENGINE_LM_ADDR1_PER_CONTEXT 0x00000000
59 | #define IXP2000_UENGINE_LM_ADDR0_GLOBAL 0x00010000
60 | #define IXP2000_UENGINE_LM_ADDR0_PER_CONTEXT 0x00000000
61 |
62 |
63 | #endif
64 |
--------------------------------------------------------------------------------
/sh/support/ue_space/uengine/ixp2400-msf.c:
--------------------------------------------------------------------------------
1 | /*
2 | * Generic library functions for the MSF (Media and Switch Fabric) unit
3 | * found on the Intel IXP2400 network processor.
4 | *
5 | * Copyright (C) 2004, 2005 Lennert Buytenhek
6 | * Dedicated to Marija Kulikova.
7 | *
8 | * This program is free software; you can redistribute it and/or modify
9 | * it under the terms of the GNU Lesser General Public License as
10 | * published by the Free Software Foundation; either version 2.1 of the
11 | * License, or (at your option) any later version.
12 | */
13 |
14 | #include "ixp2000-msf.h"
15 | #include "ixp2400-msf.h"
16 |
17 | /*
18 | * This is the Intel recommended PLL init procedure as described on
19 | * page 340 of the IXP2400/IXP2800 Programmer's Reference Manual.
20 | */
21 | static void ixp2400_pll_init(struct ixp2400_msf_parameters *mp)
22 | {
23 | int rx_dual_clock;
24 | int tx_dual_clock;
25 | u32 value;
26 |
27 | /*
28 | * If the RX mode is not 1x32, we have to enable both RX PLLs
29 | * (#0 and #1.) The same thing for the TX direction.
30 | */
31 | rx_dual_clock = !!(mp->rx_mode & IXP2400_RX_MODE_WIDTH_MASK);
32 | tx_dual_clock = !!(mp->tx_mode & IXP2400_TX_MODE_WIDTH_MASK);
33 |
34 | /*
35 | * Read initial value.
36 | */
37 | value = *IXP2000_MSF_CLK_CNTRL;
38 |
39 | /*
40 | * Put PLLs in powerdown and bypass mode.
41 | */
42 | value |= 0x0000f0f0;
43 | *IXP2000_MSF_CLK_CNTRL = value;
44 |
45 | /*
46 | * Set single or dual clock mode bits.
47 | */
48 | value &= ~0x03000000;
49 | value |= (rx_dual_clock << 24) | (tx_dual_clock << 25);
50 |
51 | /*
52 | * Set multipliers.
53 | */
54 | value &= ~0x00ff0000;
55 | value |= mp->rxclk01_multiplier << 16;
56 | value |= mp->rxclk23_multiplier << 18;
57 | value |= mp->txclk01_multiplier << 20;
58 | value |= mp->txclk23_multiplier << 22;
59 |
60 | /*
61 | * And write value.
62 | */
63 | *IXP2000_MSF_CLK_CNTRL = value;
64 |
65 | /*
66 | * Disable PLL bypass mode.
67 | */
68 | value &= ~(0x00005000 | rx_dual_clock << 13 | tx_dual_clock << 15);
69 | *IXP2000_MSF_CLK_CNTRL = value;
70 |
71 | /*
72 | * Turn on PLLs.
73 | */
74 | value &= ~(0x00000050 | rx_dual_clock << 5 | tx_dual_clock << 7);
75 | *IXP2000_MSF_CLK_CNTRL = value;
76 |
77 | /*
78 | * Wait for PLLs to lock. There are lock status bits, but IXP2400
79 | * erratum #65 says that these lock bits should not be relied upon
80 | * as they might not accurately reflect the true state of the PLLs.
81 | */
82 | udelay(100);
83 | }
84 |
85 | /*
86 | * Needed according to p480 of Programmer's Reference Manual.
87 | */
88 | static void ixp2400_msf_free_rbuf_entries(struct ixp2400_msf_parameters *mp)
89 | {
90 | int size_bits;
91 | int i;
92 |
93 | /*
94 | * Work around IXP2400 erratum #69 (silent RBUF-to-DRAM transfer
95 | * corruption) in the Intel-recommended way: do not add the RBUF
96 | * elements susceptible to corruption to the freelist.
97 | */
98 | size_bits = mp->rx_mode & IXP2400_RX_MODE_RBUF_SIZE_MASK;
99 | if (size_bits == IXP2400_RX_MODE_RBUF_SIZE_64) {
100 | for (i=1;i<128;i++) {
101 | if (i == 9 || i == 18 || i == 27)
102 | continue;
103 | *IXP2000_MSF_RBUF_ELEMENT_DONE = i;
104 | }
105 | } else if (size_bits == IXP2400_RX_MODE_RBUF_SIZE_128) {
106 | for (i=1;i<64;i++) {
107 | if (i == 4 || i == 9 || i == 13)
108 | continue;
109 | *IXP2000_MSF_RBUF_ELEMENT_DONE = i;
110 | }
111 | } else if (size_bits == IXP2400_RX_MODE_RBUF_SIZE_256) {
112 | for (i=1;i<32;i++) {
113 | if (i == 2 || i == 4 || i == 6)
114 | continue;
115 | *IXP2000_MSF_RBUF_ELEMENT_DONE = i;
116 | }
117 | }
118 | }
119 |
120 | static u32 ixp2400_msf_valid_channels(u32 reg)
121 | {
122 | u32 channels;
123 |
124 | channels = 0;
125 | switch (reg & IXP2400_RX_MODE_WIDTH_MASK) {
126 | case IXP2400_RX_MODE_1x32:
127 | channels = 0x1;
128 | if (reg & IXP2400_RX_MODE_MPHY &&
129 | !(reg & IXP2400_RX_MODE_MPHY_32))
130 | channels = 0xf;
131 | break;
132 |
133 | case IXP2400_RX_MODE_2x16:
134 | channels = 0x5;
135 | break;
136 |
137 | case IXP2400_RX_MODE_4x8:
138 | channels = 0xf;
139 | break;
140 |
141 | case IXP2400_RX_MODE_1x16_2x8:
142 | channels = 0xd;
143 | break;
144 | }
145 |
146 | return channels;
147 | }
148 |
149 | static void ixp2400_msf_enable_rx(struct ixp2400_msf_parameters *mp)
150 | {
151 | u32 value;
152 |
153 | value = *IXP2000_MSF_RX_CONTROL & 0x0fffffff;
154 | value |= ixp2400_msf_valid_channels(mp->rx_mode) << 28;
155 | *IXP2000_MSF_RX_CONTROL = value;
156 | }
157 |
158 | static void ixp2400_msf_enable_tx(struct ixp2400_msf_parameters *mp)
159 | {
160 | u32 value;
161 |
162 | value = *IXP2000_MSF_TX_CONTROL & 0x0fffffff;
163 | value |= ixp2400_msf_valid_channels(mp->tx_mode) << 28;
164 | *IXP2000_MSF_TX_CONTROL = value;
165 | }
166 |
167 |
168 | void ixp2400_msf_init(struct ixp2400_msf_parameters *mp)
169 | {
170 | u32 value;
171 | int i;
172 |
173 | /*
174 | * Init the RX/TX PLLs based on the passed parameter block.
175 | */
176 | ixp2400_pll_init(mp);
177 |
178 | /*
179 | * Reset MSF. Bit 7 in IXP_RESET_0 resets the MSF.
180 | */
181 | value = *IXP2000_RESET0;
182 | *IXP2000_RESET0 = value | 0x80;
183 | *IXP2000_RESET0 = value & ~0x80;
184 |
185 | /*
186 | * Initialise the RX section.
187 | */
188 | *IXP2000_MSF_RX_MPHY_POLL_LIMIT = mp->rx_poll_ports - 1;
189 | *IXP2000_MSF_RX_CONTROL = mp->rx_mode;
190 | for (i=0;i<4;i++)
191 | IXP2000_MSF_RX_UP_CONTROL_0[i] = mp->rx_channel_mode[i];
192 | ixp2400_msf_free_rbuf_entries(mp);
193 | ixp2400_msf_enable_rx(mp);
194 |
195 | /*
196 | * Initialise the TX section.
197 | */
198 | *IXP2000_MSF_TX_MPHY_POLL_LIMIT = mp->tx_poll_ports - 1;
199 | *IXP2000_MSF_TX_CONTROL = mp->tx_mode;
200 | for (i=0;i<4;i++)
201 | IXP2000_MSF_TX_UP_CONTROL_0[i] = mp->tx_channel_mode[i];
202 | ixp2400_msf_enable_tx(mp);
203 | }
204 |
--------------------------------------------------------------------------------
/sh/support/ue_space/uengine/ixp2400-msf.h:
--------------------------------------------------------------------------------
1 | /*
2 | * Generic library functions for the MSF (Media and Switch Fabric) unit
3 | * found on the Intel IXP2400 network processor.
4 | *
5 | * Copyright (C) 2004, 2005 Lennert Buytenhek
6 | * Dedicated to Marija Kulikova.
7 | *
8 | * This program is free software; you can redistribute it and/or modify
9 | * it under the terms of the GNU Lesser General Public License as
10 | * published by the Free Software Foundation; either version 2.1 of the
11 | * License, or (at your option) any later version.
12 | */
13 |
14 | #ifndef __IXP2400_MSF_H
15 | #define __IXP2400_MSF_H
16 |
17 | #include "compat.h"
18 |
19 | struct ixp2400_msf_parameters
20 | {
21 | u32 rx_mode;
22 | unsigned rxclk01_multiplier:2;
23 | unsigned rxclk23_multiplier:2;
24 | unsigned rx_poll_ports:6;
25 | u32 rx_channel_mode[4];
26 |
27 | u32 tx_mode;
28 | unsigned txclk01_multiplier:2;
29 | unsigned txclk23_multiplier:2;
30 | unsigned tx_poll_ports:6;
31 | u32 tx_channel_mode[4];
32 | };
33 |
34 | void ixp2400_msf_init(struct ixp2400_msf_parameters *mp);
35 |
36 | #define IXP2400_PLL_MULTIPLIER_48 0x00
37 | #define IXP2400_PLL_MULTIPLIER_24 0x01
38 | #define IXP2400_PLL_MULTIPLIER_16 0x02
39 | #define IXP2400_PLL_MULTIPLIER_12 0x03
40 |
41 | #define IXP2400_RX_MODE_CSIX 0x00400000
42 | #define IXP2400_RX_MODE_UTOPIA_POS 0x00000000
43 | #define IXP2400_RX_MODE_WIDTH_MASK 0x00300000
44 | #define IXP2400_RX_MODE_1x16_2x8 0x00300000
45 | #define IXP2400_RX_MODE_4x8 0x00200000
46 | #define IXP2400_RX_MODE_2x16 0x00100000
47 | #define IXP2400_RX_MODE_1x32 0x00000000
48 | #define IXP2400_RX_MODE_MPHY 0x00080000
49 | #define IXP2400_RX_MODE_SPHY 0x00000000
50 | #define IXP2400_RX_MODE_MPHY_32 0x00040000
51 | #define IXP2400_RX_MODE_MPHY_4 0x00000000
52 | #define IXP2400_RX_MODE_MPHY_POLLED_STATUS 0x00020000
53 | #define IXP2400_RX_MODE_MPHY_DIRECT_STATUS 0x00000000
54 | #define IXP2400_RX_MODE_CBUS_FULL_DUPLEX 0x00010000
55 | #define IXP2400_RX_MODE_CBUS_SIMPLEX 0x00000000
56 | #define IXP2400_RX_MODE_MPHY_LEVEL2 0x00004000
57 | #define IXP2400_RX_MODE_MPHY_LEVEL3 0x00000000
58 | #define IXP2400_RX_MODE_CBUS_8BIT 0x00002000
59 | #define IXP2400_RX_MODE_CBUS_4BIT 0x00000000
60 | #define IXP2400_RX_MODE_CSIX_SINGLE_FREELIST 0x00000200
61 | #define IXP2400_RX_MODE_CSIX_SPLIT_FREELISTS 0x00000000
62 | #define IXP2400_RX_MODE_RBUF_SIZE_MASK 0x0000000c
63 | #define IXP2400_RX_MODE_RBUF_SIZE_256 0x00000008
64 | #define IXP2400_RX_MODE_RBUF_SIZE_128 0x00000004
65 | #define IXP2400_RX_MODE_RBUF_SIZE_64 0x00000000
66 |
67 | #define IXP2400_PORT_RX_MODE_SLAVE 0x00000040
68 | #define IXP2400_PORT_RX_MODE_MASTER 0x00000000
69 | #define IXP2400_PORT_RX_MODE_POS_PHY_L3 0x00000020
70 | #define IXP2400_PORT_RX_MODE_POS_PHY_L2 0x00000000
71 | #define IXP2400_PORT_RX_MODE_POS_PHY 0x00000010
72 | #define IXP2400_PORT_RX_MODE_UTOPIA 0x00000000
73 | #define IXP2400_PORT_RX_MODE_EVEN_PARITY 0x0000000c
74 | #define IXP2400_PORT_RX_MODE_ODD_PARITY 0x00000008
75 | #define IXP2400_PORT_RX_MODE_NO_PARITY 0x00000000
76 | #define IXP2400_PORT_RX_MODE_UTOPIA_BIG_CELLS 0x00000002
77 | #define IXP2400_PORT_RX_MODE_UTOPIA_NORMAL_CELLS 0x00000000
78 | #define IXP2400_PORT_RX_MODE_2_CYCLE_DECODE 0x00000001
79 | #define IXP2400_PORT_RX_MODE_1_CYCLE_DECODE 0x00000000
80 |
81 | #define IXP2400_TX_MODE_CSIX 0x00400000
82 | #define IXP2400_TX_MODE_UTOPIA_POS 0x00000000
83 | #define IXP2400_TX_MODE_WIDTH_MASK 0x00300000
84 | #define IXP2400_TX_MODE_1x16_2x8 0x00300000
85 | #define IXP2400_TX_MODE_4x8 0x00200000
86 | #define IXP2400_TX_MODE_2x16 0x00100000
87 | #define IXP2400_TX_MODE_1x32 0x00000000
88 | #define IXP2400_TX_MODE_MPHY 0x00080000
89 | #define IXP2400_TX_MODE_SPHY 0x00000000
90 | #define IXP2400_TX_MODE_MPHY_32 0x00040000
91 | #define IXP2400_TX_MODE_MPHY_4 0x00000000
92 | #define IXP2400_TX_MODE_MPHY_POLLED_STATUS 0x00020000
93 | #define IXP2400_TX_MODE_MPHY_DIRECT_STATUS 0x00000000
94 | #define IXP2400_TX_MODE_CBUS_FULL_DUPLEX 0x00010000
95 | #define IXP2400_TX_MODE_CBUS_SIMPLEX 0x00000000
96 | #define IXP2400_TX_MODE_MPHY_LEVEL2 0x00004000
97 | #define IXP2400_TX_MODE_MPHY_LEVEL3 0x00000000
98 | #define IXP2400_TX_MODE_CBUS_8BIT 0x00002000
99 | #define IXP2400_TX_MODE_CBUS_4BIT 0x00000000
100 | #define IXP2400_TX_MODE_TBUF_SIZE_MASK 0x0000000c
101 | #define IXP2400_TX_MODE_TBUF_SIZE_256 0x00000008
102 | #define IXP2400_TX_MODE_TBUF_SIZE_128 0x00000004
103 | #define IXP2400_TX_MODE_TBUF_SIZE_64 0x00000000
104 |
105 | #define IXP2400_PORT_TX_MODE_SLAVE 0x00000040
106 | #define IXP2400_PORT_TX_MODE_MASTER 0x00000000
107 | #define IXP2400_PORT_TX_MODE_POS_PHY 0x00000010
108 | #define IXP2400_PORT_TX_MODE_UTOPIA 0x00000000
109 | #define IXP2400_PORT_TX_MODE_EVEN_PARITY 0x0000000c
110 | #define IXP2400_PORT_TX_MODE_ODD_PARITY 0x00000008
111 | #define IXP2400_PORT_TX_MODE_NO_PARITY 0x00000000
112 | #define IXP2400_PORT_TX_MODE_UTOPIA_BIG_CELLS 0x00000002
113 | #define IXP2400_PORT_TX_MODE_2_CYCLE_DECODE 0x00000001
114 | #define IXP2400_PORT_TX_MODE_1_CYCLE_DECODE 0x00000000
115 |
116 |
117 | #endif
118 |
--------------------------------------------------------------------------------
/sh/support/ue_space/uengine/version:
--------------------------------------------------------------------------------
1 | version 0.0.36
2 |
3 | http://ixp2xxx.sf.net
4 |
--------------------------------------------------------------------------------
/smatcher.h:
--------------------------------------------------------------------------------
1 | /*This file is part of "A Hybrid Parallel Implementation of the Aho-Corasick and Wu-Manber Algorithms Using NVIDIA CUDA and MPI Evaluated on a Biological Sequence Database".
2 |
3 | "A Hybrid Parallel Implementation of the Aho-Corasick and Wu-Manber Algorithms Using NVIDIA CUDA and MPI Evaluated on a Biological Sequence Database" is free software: you can redistribute it and/or modify
4 | it under the terms of the GNU General Public License as published by
5 | the Free Software Foundation, either version 3 of the License, or
6 | (at your option) any later version.
7 |
8 | "A Hybrid Parallel Implementation of the Aho-Corasick and Wu-Manber Algorithms Using NVIDIA CUDA and MPI Evaluated on a Biological Sequence Database" is distributed in the hope that it will be useful,
9 | but WITHOUT ANY WARRANTY; without even the implied warranty of
10 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
11 | GNU General Public License for more details.
12 |
13 | You should have received a copy of the GNU General Public License
14 | along with "A Hybrid Parallel Implementation of the Aho-Corasick and Wu-Manber Algorithms Using NVIDIA CUDA and MPI Evaluated on a Biological Sequence Database". If not, see .*/
15 |
16 | #ifndef SMATCHER_H
17 | #define SMATCHER_H
18 |
19 | #include
20 | #include
21 | #include
22 |
23 | #include
24 | #include
25 | //#include
26 | #include
27 | #include
28 | #include
29 | #include
30 |
31 | #include "../helper2.h"
32 |
33 | //KMP
34 | struct node {
35 | char label;
36 | int id;
37 | struct node* supply;
38 | struct node* next;
39 | };
40 |
41 | struct ac_state {
42 | unsigned int id;
43 | unsigned int keywordline; //Remember which keyword row corresponds to the accepting word
44 | unsigned char *output; //The output contains the whole keyword to be printed when a terminal state is encountered
45 | struct ac_state *fail;
46 | struct ac_state **next;
47 | };
48 |
49 | struct ac_table {
50 | unsigned int idcounter;
51 | unsigned int patterncounter;
52 | struct ac_state *zerostate;
53 | };
54 |
55 | struct sbom_state **pointer_array;
56 |
57 | struct sbom_state {
58 | unsigned int id;
59 | unsigned int *F; //Remember which keyword rows correspond to the accepting word
60 | unsigned int num; //Store the number of different pattern rows that correspond to the same terminal state
61 | struct sbom_state *fail;
62 | struct sbom_state **next;
63 | };
64 |
65 | struct sbom_table {
66 | unsigned int idcounter;
67 | unsigned int patterncounter;
68 | struct sbom_state *zerostate;
69 | };
70 |
71 | unsigned short m_nBitsInShift;
72 |
73 | unsigned int shiftsize;
74 |
75 | //SOG
76 | //Total number of 3 grams returned by the GET3GRAM macro
77 | #define SIZE_3GRAM_TABLE 0x1000000
78 | #define CHAR_WIDTH_3GRAM 8
79 |
80 | #define GET3GRAM(address) ((((uint32_t) (address)[0])) + (((uint32_t)((address)[1])) << CHAR_WIDTH_3GRAM) + (((uint32_t)((address)[2])) << (CHAR_WIDTH_3GRAM << 1)))
81 |
82 | //Bit masks used in 2-level hashing
83 | static const uint8_t mask[] = {0x01, 0x02, 0x04, 0x08, 0x10, 0x20, 0x40, 0x80};
84 | /*
85 | uint8_t T8[SIZE_3GRAM_TABLE];
86 | uint16_t T16[SIZE_3GRAM_TABLE];
87 | uint32_t T32[SIZE_3GRAM_TABLE];*/
88 |
89 | struct ac_table *preproc_ac ( unsigned char **, int, int, int, int *, unsigned int *, unsigned int * );
90 | unsigned search_ac ( unsigned char *, int, struct ac_table * );
91 | void free_ac ( struct ac_table *, int );
92 |
93 | struct ac_table *preproc_sh ( unsigned char **, int, int, int, int *, unsigned int * );
94 | unsigned search_sh ( int, unsigned char *, int, struct ac_table *, int * );
95 | void free_sh ( struct ac_table *, int );
96 |
97 | struct sbom_table * preproc_sbom ( unsigned char **, int, int, int, int *, unsigned int * );
98 | unsigned search_sbom ( unsigned char **, int, unsigned char *, int, struct sbom_table * );
99 | void free_sbom ( struct sbom_table *, int );
100 |
101 | void preproc_wu ( unsigned char **, int, int, int, int, int *, int *, int *, int * );
102 | void preproc_wu2 ( unsigned char *, int, int, int, int, int *, int *, int *, int * );
103 | void wu_determine_shiftsize ( int );
104 | //void wu_init ( int, int, int, int **, struct prefixArray ** );
105 | unsigned int search_wu ( unsigned char **, int, int, unsigned char *, int, int *, int *, int *, int * );
106 | unsigned int search_wu2 ( unsigned char *, int, int, unsigned char *, int, int *, int *, int *, int * );
107 | //void wu_free ( int **, struct prefixArray ** );
108 |
109 | void preproc_sog8 ( uint8_t *T8, uint32_t *scanner_hs, int *scanner_index, uint8_t *scanner_hs2, unsigned char **pattern, int m, unsigned char *text, int n, int p_size, int B );
110 | unsigned int search_sog8 ( uint8_t *T8, uint32_t *scanner_hs, int *scanner_index, uint8_t *scanner_hs2, unsigned char **pattern, int m, unsigned char *text, int n, int p_size, int B );
111 |
112 | /*
113 | void preproc_sog8 ( unsigned char **, int, int );
114 | void sog_init8 ( int );
115 | unsigned int search_sog8 ( unsigned char **, int, unsigned char *, int, int, int );
116 | void sog_free8 ();
117 | void my_preproc_sog8 ( unsigned char **pattern, int m, unsigned char *text, int n, int p_size, int B );
118 |
119 | void preproc_sog16 ( unsigned char **, int, int );
120 | void sog_init16 ( int );
121 | unsigned int search_sog16 ( unsigned char **, int, unsigned char *, int, int, int );
122 | void sog_free16 ();
123 |
124 | void preproc_sog32 ( unsigned char **, int, int );
125 | void sog_init32 ( int );
126 | unsigned int search_sog32 ( unsigned char **, int, unsigned char *, int, int, int );
127 | void sog_free32 ();
128 | */
129 |
130 | void preKmp ( int *, unsigned char *, int );
131 |
132 | //void preBmGs ( unsigned char **, int, int [] );
133 | void preBmBc ( unsigned char **, int, int, int, int * );
134 |
135 | #endif
136 |
--------------------------------------------------------------------------------
/sog/sog16.c:
--------------------------------------------------------------------------------
1 | /*This file is part of "A Hybrid Parallel Implementation of the Aho-Corasick and Wu-Manber Algorithms Using NVIDIA CUDA and MPI Evaluated on a Biological Sequence Database".
2 |
3 | "A Hybrid Parallel Implementation of the Aho-Corasick and Wu-Manber Algorithms Using NVIDIA CUDA and MPI Evaluated on a Biological Sequence Database" is free software: you can redistribute it and/or modify
4 | it under the terms of the GNU General Public License as published by
5 | the Free Software Foundation, either version 3 of the License, or
6 | (at your option) any later version.
7 |
8 | "A Hybrid Parallel Implementation of the Aho-Corasick and Wu-Manber Algorithms Using NVIDIA CUDA and MPI Evaluated on a Biological Sequence Database" is distributed in the hope that it will be useful,
9 | but WITHOUT ANY WARRANTY; without even the implied warranty of
10 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
11 | GNU General Public License for more details.
12 |
13 | You should have received a copy of the GNU General Public License
14 | along with "A Hybrid Parallel Implementation of the Aho-Corasick and Wu-Manber Algorithms Using NVIDIA CUDA and MPI Evaluated on a Biological Sequence Database". If not, see .*/
15 |
16 | #include "../smatcher.h"
17 |
18 | // A structure for holding the hash value and the pattern for the 8-byte Rabin-Karp implementation
19 | typedef struct {
20 |
21 | uint32_t hs;
22 | uint8_t pat[16];
23 | int index;
24 |
25 | } pat_hs_t16;
26 |
27 | //Scanner that provides final matching for 8-byte patterns with Rabin-Karp.
28 | typedef struct {
29 |
30 | // 2-level hash table
31 | uint8_t hs2[256*32];
32 |
33 | // Table holding the patterns and their hash values. This table is ordered according to the hash values
34 | pat_hs_t16 *patterns;
35 |
36 | // Position of the first empty slot in the pattern table
37 | int pos;
38 |
39 | } sog_scanner16;
40 |
41 | sog_scanner16 *scanner16;
42 |
43 | #define GET32(address) (((uint32_t)((address)[0]) << 24) + ((uint32_t)((address)[1]) << 16) + ((uint32_t)((address)[2]) << 8) + (address)[3])
44 |
45 | //Compare two patterns using their hash values
46 | static int compSign ( const void* s1, const void* s2 ) {
47 |
48 | uint32_t h1 = ( (pat_hs_t16 *) s1 )->hs;
49 | uint32_t h2 = ( (pat_hs_t16 *) s2 )->hs;
50 |
51 | if (h1 < h2)
52 | return -1;
53 | else if (h1 == h2)
54 | return 0;
55 | else
56 | return 1;
57 | }
58 |
59 | int sog_rkbt_verification16 ( unsigned char *text, int m, int p_size ) {
60 |
61 | uint32_t hs = ( GET32((text)) ^ GET32((text + 4)) ) ^ ( GET32((text + 8)) ^ GET32((text + 12)) );
62 |
63 | /* printf("text = %c%c%c%c\n", *(text), *(text + 1), *(text + 2), *(text + 3));
64 | printf("text = %s\n", text);
65 | printf("text hs = %i\n", hs);
66 | */
67 | uint16_t hs2level = (uint16_t) ((hs >> 16) ^ hs);
68 |
69 | //printf("---%s\n", scanner16->patterns[lookfor].pat);
70 |
71 | /* check 2-level hash */
72 | if ( scanner16->hs2[hs2level >> 3] & mask[hs2level & 0x07] ) {
73 |
74 | int lo = 0;
75 | int hi = p_size - 1;
76 | int mid;
77 | uint32_t hs_pat;
78 |
79 | // do the binary search
80 | while ( hi >= lo ) {
81 |
82 | mid = ( lo + hi ) / 2;
83 | hs_pat = scanner16->patterns[mid].hs;
84 |
85 | //if ( verbose )
86 | //printf(">mid = %i hs = %i hs_pat = %i index = %i pat = %s \n", mid, hs, scanner16->patterns[mid].hs, scanner16->patterns[mid].index, scanner16->patterns[mid].pat);
87 |
88 | if ( hs > hs_pat )
89 | lo = ++mid;
90 |
91 | else if ( hs < hs_pat )
92 | hi = --mid;
93 |
94 | //if text hash equals pattern hash verify the match
95 | else {
96 | // check for duplicates and patterns with same hash
97 | while ( mid > 0 && hs == scanner16->patterns[mid - 1].hs )
98 | mid--;
99 |
100 | do {
101 | //printf("%c%c%c%c%c%c%c%c - %s\n", *(index - 7), *(index - 6), *(index - 5), *(index - 4), *(index - 3), *(index - 2), *(index - 1), *(index - 0), scanner16->patterns[mid].pat );
102 |
103 | if ( memcmp ( text, scanner16->patterns[mid].pat, 16 ) == 0 )
104 | return 1;
105 |
106 | mid++;
107 |
108 | } while ( mid < p_size && hs == scanner16->patterns[mid].hs );
109 |
110 | break;
111 | }
112 | }
113 | }
114 | return -1;
115 | }
116 |
117 | unsigned int search_sog16 ( unsigned char **pattern, int m, unsigned char *text, int n, int p_size, int B ) {
118 |
119 | register uint16_t E = 0xffff;
120 |
121 | int column, matches = 0;
122 |
123 | for ( column = 0; column < n - 2; column++ ) {
124 |
125 | E = (E << 1) | T8[GET3GRAM( text + column )];
126 |
127 | //printbinary(E, 8);
128 |
129 | //printbinary(E & 0x20, 8);
130 |
131 | if ( E & 0x2000 )
132 | continue;
133 |
134 | //printf("potential match at column %i\n", column + B - 1);
135 |
136 | //if ( column > 50 )
137 | // exit ( 0 );
138 |
139 | if ( sog_rkbt_verification16 ( (unsigned char *)text + column - m + B, m, p_size ) != -1 )
140 | matches++;
141 | }
142 |
143 | return matches;
144 |
145 |
146 | }
147 |
148 | static void sog_add_pattern2 ( uint8_t *pattern, int m, int p_size ) {
149 |
150 | int i;
151 |
152 | uint32_t hs;
153 | uint16_t hs2level;
154 |
155 | if ( scanner16->pos < p_size ) {
156 |
157 | //add pattern
158 | for ( i = 0; i < m; i++ )
159 | scanner16->patterns[scanner16->pos].pat[i] = pattern[i];
160 |
161 | //add index
162 | scanner16->patterns[scanner16->pos].index = scanner16->pos;
163 |
164 | // Count hash
165 | scanner16->patterns[scanner16->pos].hs = ( GET32(pattern) ^ GET32(&pattern[4]) ) ^ ( GET32(&pattern[8]) ^ GET32(&pattern[12]) );
166 |
167 | // Count 2-level hash
168 | hs = scanner16->patterns[scanner16->pos].hs;
169 | hs2level = ( uint16_t ) ( ( hs >> 16 ) ^ hs );
170 |
171 | scanner16->hs2[hs2level >> 3] |= mask[hs2level & 0x07];
172 | scanner16->pos++;
173 | }
174 | }
175 |
176 | static void sog_add_pattern ( uint8_t *pattern, int m, int p_size ) {
177 |
178 | uint8_t *index = &pattern[0];
179 | uint8_t *limit = &pattern[15];
180 |
181 | unsigned int i = 0;
182 |
183 | uint32_t hs;
184 |
185 | sog_add_pattern2 ( pattern, m, p_size );
186 |
187 | while ( index < limit ) {
188 | hs = GET3GRAM( index );
189 |
190 | //printbinary(hs, 32);
191 | //printf("hs: %i T[hs]: %i ", hs, T[hs]);
192 |
193 | T16[hs] &= 0xffff - ( 1 << i );
194 |
195 | //printbinary(T[hs], 8);
196 |
197 | index++;
198 | i++;
199 | }
200 |
201 | //printf("\n");
202 | }
203 |
204 | static void sog_reset_patterns ( int m ) {
205 |
206 | unsigned int i;
207 |
208 | for ( i = 0; i < SIZE_3GRAM_TABLE; i++ )
209 | T16[i] = 0xffff;
210 |
211 | scanner16->pos = 0;
212 |
213 | // Reset 2-level hashes
214 | for ( i = 0; i < 32 * 256; i++ )
215 | scanner16->hs2[i] = 0x00;
216 | }
217 |
218 | void sog_init16 ( int p_size ) {
219 |
220 | scanner16 = malloc ( sizeof ( sog_scanner16 ) );
221 | scanner16->patterns = malloc ( p_size * sizeof ( pat_hs_t16 ) );
222 | }
223 |
224 | void sog_free16 () {
225 |
226 | free ( scanner16->patterns );
227 | free ( scanner16 );
228 | }
229 |
230 | void preproc_sog16 ( unsigned char **pattern, int m, int p_size ) {
231 |
232 | unsigned int i;
233 |
234 | sog_reset_patterns ( p_size );
235 |
236 | for ( i = 0; i < p_size; i++ )
237 | sog_add_pattern ( pattern[i], m, p_size );
238 |
239 | //Sort the patterns so that binary search can be used
240 | qsort ( scanner16->patterns, p_size, sizeof( pat_hs_t16 ), compSign );
241 | }
242 |
243 |
--------------------------------------------------------------------------------
/sog/sog32.c:
--------------------------------------------------------------------------------
1 | /*This file is part of "A Hybrid Parallel Implementation of the Aho-Corasick and Wu-Manber Algorithms Using NVIDIA CUDA and MPI Evaluated on a Biological Sequence Database".
2 |
3 | "A Hybrid Parallel Implementation of the Aho-Corasick and Wu-Manber Algorithms Using NVIDIA CUDA and MPI Evaluated on a Biological Sequence Database" is free software: you can redistribute it and/or modify
4 | it under the terms of the GNU General Public License as published by
5 | the Free Software Foundation, either version 3 of the License, or
6 | (at your option) any later version.
7 |
8 | "A Hybrid Parallel Implementation of the Aho-Corasick and Wu-Manber Algorithms Using NVIDIA CUDA and MPI Evaluated on a Biological Sequence Database" is distributed in the hope that it will be useful,
9 | but WITHOUT ANY WARRANTY; without even the implied warranty of
10 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
11 | GNU General Public License for more details.
12 |
13 | You should have received a copy of the GNU General Public License
14 | along with "A Hybrid Parallel Implementation of the Aho-Corasick and Wu-Manber Algorithms Using NVIDIA CUDA and MPI Evaluated on a Biological Sequence Database". If not, see .*/
15 |
16 | #include "../smatcher.h"
17 |
18 | // A structure for holding the hash value and the pattern for the 8-byte Rabin-Karp implementation
19 | typedef struct {
20 |
21 | uint32_t hs;
22 | uint8_t pat[32];
23 | int index;
24 |
25 | } pat_hs_t32;
26 |
27 | //Scanner that provides final matching for 8-byte patterns with Rabin-Karp.
28 | typedef struct {
29 |
30 | // 2-level hash table
31 | uint8_t hs2[256*32];
32 |
33 | // Table holding the patterns and their hash values. This table is ordered according to the hash values
34 | pat_hs_t32 *patterns;
35 |
36 | // Position of the first empty slot in the pattern table
37 | int pos;
38 |
39 | } sog_scanner32;
40 |
41 | sog_scanner32 *scanner32;
42 |
43 | #define GET32(address) (((uint32_t)((address)[0]) << 24) + ((uint32_t)((address)[1]) << 16) + ((uint32_t)((address)[2]) << 8) + (address)[3])
44 |
45 | //Compare two patterns using their hash values
46 | static int compSign ( const void* s1, const void* s2 ) {
47 |
48 | uint32_t h1 = ( (pat_hs_t32 *) s1 )->hs;
49 | uint32_t h2 = ( (pat_hs_t32 *) s2 )->hs;
50 |
51 | if (h1 < h2)
52 | return -1;
53 | else if (h1 == h2)
54 | return 0;
55 | else
56 | return 1;
57 | }
58 |
59 | int sog_rkbt_verification32 ( unsigned char *text, int m, int p_size ) {
60 |
61 | uint32_t hs = ( GET32((text)) ^ GET32((text + 4)) ) ^ ( GET32((text + 8)) ^ GET32((text + 12)) ) ^ ( GET32((text + 16)) ^ GET32((text + 20)) ) ^ ( GET32((text + 24)) ^ GET32((text + 28)) );
62 |
63 | /* printf("text = %c%c%c%c\n", *(text), *(text + 1), *(text + 2), *(text + 3));
64 | printf("text = %s\n", text);
65 | printf("text hs = %i\n", hs);
66 | */
67 | uint16_t hs2level = (uint16_t) ((hs >> 16) ^ hs);
68 |
69 | //printf("---%s\n", scanner32->patterns[lookfor].pat);
70 |
71 | /* check 2-level hash */
72 | if ( scanner32->hs2[hs2level >> 3] & mask[hs2level & 0x07] ) {
73 |
74 | int lo = 0;
75 | int hi = p_size - 1;
76 | int mid;
77 | uint32_t hs_pat;
78 |
79 | // do the binary search
80 | while ( hi >= lo ) {
81 |
82 | mid = ( lo + hi ) / 2;
83 | hs_pat = scanner32->patterns[mid].hs;
84 |
85 | //printf(" mid = %i hs = %i hs_pat = %i index = %i pat = %s \n", mid, hs, scanner32->patterns[mid].hs, scanner32->patterns[mid].index, scanner32->patterns[mid].pat);
86 |
87 | if ( hs > hs_pat )
88 | lo = ++mid;
89 |
90 | else if ( hs < hs_pat )
91 | hi = --mid;
92 |
93 | //if text hash equals pattern hash verify the match
94 | else {
95 |
96 | // check for duplicates and patterns with same hash
97 | while ( mid > 0 && hs == scanner32->patterns[mid - 1].hs )
98 | mid--;
99 |
100 | do {
101 | //printf("%c%c%c%c%c%c%c%c - %s\n", *(index - 7), *(index - 6), *(index - 5), *(index - 4), *(index - 3), *(index - 2), *(index - 1), *(index - 0), scanner32->patterns[mid].pat );
102 |
103 | if ( memcmp ( text, scanner32->patterns[mid].pat, 32 ) == 0 )
104 | return 1;
105 |
106 | mid++;
107 |
108 | } while ( mid < p_size && hs == scanner32->patterns[mid].hs );
109 |
110 | break;
111 | }
112 | }
113 | }
114 | return -1;
115 | }
116 |
117 | unsigned int search_sog32 ( unsigned char **pattern, int m, unsigned char *text, int n, int p_size, int B ) {
118 |
119 | register uint32_t E = 0xffffffff;
120 |
121 | int column, matches = 0;
122 |
123 | for ( column = 0; column < n - 2; column++ ) {
124 |
125 | //printf("\ncolumn %i character %c\n", column, *(text + column));
126 |
127 | //printbinary(E, 8);
128 |
129 | //printf("hs: %i T[hs]: %i ", GET3GRAM( text + column ), T8[GET3GRAM( text + column )] );
130 |
131 | //printbinary(T8[GET3GRAM( text + column )], 8);
132 |
133 | E = (E << 1) | T8[GET3GRAM( text + column )];
134 |
135 | //printbinary(E, 8);
136 |
137 | //printbinary(E & 0x20, 8);
138 |
139 | if ( E & 0x20000000 )
140 | continue;
141 |
142 | //printf("potential match at column %i\n", column + B - 1);
143 |
144 | if ( sog_rkbt_verification32 ( (unsigned char *)text + column - m + B, m, p_size ) != -1 )
145 | matches++;
146 |
147 | }
148 |
149 | return matches;
150 | }
151 |
152 | static void sog_add_pattern2 ( uint8_t *pattern, int m, int p_size ) {
153 |
154 | int i;
155 |
156 | uint32_t hs;
157 | uint16_t hs2level;
158 |
159 | if ( scanner32->pos < p_size ) {
160 |
161 | //add pattern
162 | for ( i = 0; i < m; i++ )
163 | scanner32->patterns[scanner32->pos].pat[i] = pattern[i];
164 |
165 | //add index
166 | scanner32->patterns[scanner32->pos].index = scanner32->pos;
167 |
168 | // Count hash
169 | scanner32->patterns[scanner32->pos].hs = ( GET32(pattern) ^ GET32(&pattern[4]) ) ^ ( GET32(&pattern[8]) ^ GET32(&pattern[12]) ) ^ ( GET32(&pattern[16]) ^ GET32(&pattern[20]) ) ^ ( GET32(&pattern[24]) ^ GET32(&pattern[28]) );
170 |
171 | //printf("scanner32->patterns[%i].hs = %i\n", scanner32->pos, scanner32->patterns[scanner32->pos].hs);
172 |
173 | // Count 2-level hash
174 | hs = scanner32->patterns[scanner32->pos].hs;
175 | hs2level = ( uint16_t ) ( ( hs >> 16 ) ^ hs );
176 |
177 | scanner32->hs2[hs2level >> 3] |= mask[hs2level & 0x07];
178 |
179 | scanner32->pos++;
180 | }
181 | }
182 |
183 | static void sog_add_pattern ( uint8_t *pattern, int m, int p_size ) {
184 |
185 | uint8_t *index = &pattern[0];
186 | uint8_t *limit = &pattern[31];
187 |
188 | unsigned int i = 0;
189 |
190 | uint32_t hs;
191 |
192 | sog_add_pattern2 ( pattern, m, p_size );
193 |
194 | while ( index < limit ) {
195 | hs = GET3GRAM( index );
196 |
197 | //printbinary(hs, 32);
198 | //printf("hs: %i T[hs]: %i ", hs, T[hs]);
199 |
200 | T32[hs] &= 0xffffffff - ( 1 << i );
201 |
202 | //printbinary(T[hs], 8);
203 |
204 | index++;
205 | i++;
206 | }
207 |
208 | //printf("\n");
209 | }
210 |
211 | static void sog_reset_patterns ( int m ) {
212 |
213 | unsigned int i;
214 |
215 | for ( i = 0; i < SIZE_3GRAM_TABLE; i++ )
216 | T32[i] = 0xffffffff;
217 |
218 | scanner32->pos = 0;
219 |
220 | // Reset 2-level hashes
221 | for ( i = 0; i < 32 * 256; i++ )
222 | scanner32->hs2[i] = 0x00;
223 | }
224 |
225 | void sog_init32 ( int p_size) {
226 |
227 | scanner32 = malloc ( sizeof ( sog_scanner32 ) );
228 | scanner32->patterns = malloc ( p_size * sizeof ( pat_hs_t32 ) );
229 | }
230 |
231 | void sog_free32 () {
232 |
233 | free ( scanner32->patterns );
234 | free ( scanner32 );
235 | }
236 |
237 | void preproc_sog32 ( unsigned char **pattern, int m, int p_size ) {
238 |
239 | unsigned int i;
240 |
241 | sog_reset_patterns ( p_size );
242 |
243 | for ( i = 0; i < p_size; i++ )
244 | sog_add_pattern ( pattern[i], m, p_size );
245 |
246 | //Sort the patterns so that binary search can be used
247 | qsort ( scanner32->patterns, p_size, sizeof( pat_hs_t32 ), compSign );
248 | }
249 |
250 |
--------------------------------------------------------------------------------
/sog/sog8.c:
--------------------------------------------------------------------------------
1 | /*This file is part of "A Hybrid Parallel Implementation of the Aho-Corasick and Wu-Manber Algorithms Using NVIDIA CUDA and MPI Evaluated on a Biological Sequence Database".
2 |
3 | "A Hybrid Parallel Implementation of the Aho-Corasick and Wu-Manber Algorithms Using NVIDIA CUDA and MPI Evaluated on a Biological Sequence Database" is free software: you can redistribute it and/or modify
4 | it under the terms of the GNU General Public License as published by
5 | the Free Software Foundation, either version 3 of the License, or
6 | (at your option) any later version.
7 |
8 | "A Hybrid Parallel Implementation of the Aho-Corasick and Wu-Manber Algorithms Using NVIDIA CUDA and MPI Evaluated on a Biological Sequence Database" is distributed in the hope that it will be useful,
9 | but WITHOUT ANY WARRANTY; without even the implied warranty of
10 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
11 | GNU General Public License for more details.
12 |
13 | You should have received a copy of the GNU General Public License
14 | along with "A Hybrid Parallel Implementation of the Aho-Corasick and Wu-Manber Algorithms Using NVIDIA CUDA and MPI Evaluated on a Biological Sequence Database". If not, see .*/
15 |
16 | #include "../smatcher.h"
17 |
18 | #define GET32(address) (((uint32_t)((address)[0]) << 24) + ((uint32_t)((address)[1]) << 16) + ((uint32_t)((address)[2]) << 8) + (address)[3])
19 |
20 | //quicksort implementation
21 | void swap ( int *a, int *b ) {
22 |
23 | int t = *a;
24 | *a = *b;
25 | *b = t;
26 | }
27 |
28 | void my_sort ( uint32_t *hs, int *index, int beg, int end ) {
29 |
30 | if ( end > beg + 1 ) {
31 |
32 | int piv = hs[beg], l = beg + 1, r = end;
33 |
34 | while ( l < r ) {
35 |
36 | if ( hs[l] <= piv )
37 | l++;
38 | else {
39 | swap ( &hs[l], &hs[--r]);
40 | swap ( &index[l], &index[r]);
41 | }
42 | }
43 |
44 | swap ( &hs[--l], &hs[beg]);
45 | swap ( &index[l], &index[beg]);
46 | my_sort ( hs, index, beg, l );
47 | my_sort ( hs, index, r, end );
48 | }
49 | }
50 |
51 | int sog_rkbt_verification8 ( uint32_t *scanner_hs, int *scanner_index, uint8_t *scanner_hs2, unsigned char **pattern, unsigned char *text, int m, int p_size ) {
52 |
53 | uint32_t hs = GET32((text)) ^ GET32((text + 4));
54 | uint16_t hs2level = (uint16_t) ((hs >> 16) ^ hs);
55 |
56 | // check 2-level hash
57 | if ( scanner_hs2[hs2level >> 3] & mask[hs2level & 0x07] ) {
58 |
59 | int lo = 0;
60 | int hi = p_size - 1;
61 | int mid;
62 | uint32_t hs_pat;
63 |
64 | // do the binary search
65 | while ( hi >= lo ) {
66 |
67 | mid = ( lo + hi ) / 2;
68 | hs_pat = scanner_hs[mid];
69 |
70 | if ( hs > hs_pat )
71 | lo = ++mid;
72 |
73 | else if ( hs < hs_pat )
74 | hi = --mid;
75 |
76 | //if text hash equals pattern hash verify the match
77 | else {
78 | // check for duplicates and patterns with same hash
79 | while ( mid > 0 && hs == scanner_hs[mid - 1] )
80 | mid--;
81 |
82 | do {
83 | if ( memcmp ( text, pattern[scanner_index[mid]], 8 ) == 0 )
84 | return 1;
85 |
86 | mid++;
87 |
88 | } while ( mid < p_size && hs == scanner_hs[mid] );
89 |
90 | break;
91 | }
92 | }
93 | }
94 | return -1;
95 | }
96 |
97 | unsigned int search_sog8 ( uint8_t *T8, uint32_t *scanner_hs, int *scanner_index, uint8_t *scanner_hs2, unsigned char **pattern, int m, unsigned char *text, int n, int p_size, int B ) {
98 |
99 | register uint8_t E = 0xff;
100 |
101 | int column, matches = 0;
102 |
103 | for ( column = 0; column < n - 2; column++ ) {
104 |
105 | E = (E << 1) | T8[GET3GRAM( text + column )];
106 |
107 | if ( E & 0x20 )
108 | continue;
109 |
110 | if ( sog_rkbt_verification8 ( scanner_hs, scanner_index, scanner_hs2, pattern, (unsigned char *)text + column - m + B, m, p_size ) != -1 )
111 | matches++;
112 | }
113 |
114 | return matches;
115 | }
116 |
117 | static void sog_add_pattern ( uint8_t **T8, int *scanner_pos, uint32_t **scanner_hs, int **scanner_index, uint8_t **scanner_hs2, uint8_t *pattern, int m, int p_size ) {
118 |
119 | uint8_t *index = &pattern[0];
120 | uint8_t *limit = &pattern[6];
121 |
122 | unsigned int i = 0;
123 |
124 | uint32_t hs, hs2;
125 | uint16_t hs2level;
126 |
127 | //add index
128 | *( *scanner_index + *scanner_pos ) = *scanner_pos;
129 |
130 | // Count hash
131 | *( *scanner_hs + *scanner_pos ) = GET32(pattern) ^ GET32(&pattern[4]);
132 |
133 | // Count 2-level hash
134 | hs2 = *( *scanner_hs + *scanner_pos );
135 | hs2level = ( uint16_t ) ( ( hs >> 16 ) ^ hs2 );
136 |
137 | *( *scanner_hs2 + ( hs2level >> 3 ) ) |= mask[hs2level & 0x07];
138 | *scanner_pos = *scanner_pos + 1;
139 |
140 | while ( index < limit ) {
141 | hs = GET3GRAM( index );
142 |
143 | *( *T8 + hs) &= 0xff - ( 1 << i );
144 |
145 | index++;
146 | i++;
147 | }
148 | }
149 |
150 | static void sog_reset_patterns ( uint8_t **T8, uint8_t **scanner_hs2) {
151 |
152 | unsigned int i;
153 |
154 | for ( i = 0; i < SIZE_3GRAM_TABLE; i++ )
155 | *( *T8 + i ) = 0xff;
156 |
157 | // Reset 2-level hashes
158 | for ( i = 0; i < 32 * 256; i++ )
159 | *( *scanner_hs2 + i ) = 0x00;
160 | }
161 |
162 | void preproc_sog8 ( uint8_t *T8, uint32_t *scanner_hs, int *scanner_index, uint8_t *scanner_hs2, unsigned char **pattern, int m, unsigned char *text, int n, int p_size, int B ) {
163 |
164 | int i;
165 |
166 | int scanner_pos = 0;
167 |
168 | sog_reset_patterns ( &T8, &scanner_hs2 );
169 |
170 | for ( i = 0; i < p_size; i++ )
171 | sog_add_pattern ( &T8, &scanner_pos, &scanner_hs, &scanner_index, &scanner_hs2, pattern[i], m, p_size );
172 |
173 | my_sort ( scanner_hs, scanner_index, 0, p_size );
174 | }
175 |
176 |
--------------------------------------------------------------------------------
/wu/wu.c:
--------------------------------------------------------------------------------
1 | /*This file is part of "A Hybrid Parallel Implementation of the Aho-Corasick and Wu-Manber Algorithms Using NVIDIA CUDA and MPI Evaluated on a Biological Sequence Database".
2 |
3 | "A Hybrid Parallel Implementation of the Aho-Corasick and Wu-Manber Algorithms Using NVIDIA CUDA and MPI Evaluated on a Biological Sequence Database" is free software: you can redistribute it and/or modify
4 | it under the terms of the GNU General Public License as published by
5 | the Free Software Foundation, either version 3 of the License, or
6 | (at your option) any later version.
7 |
8 | "A Hybrid Parallel Implementation of the Aho-Corasick and Wu-Manber Algorithms Using NVIDIA CUDA and MPI Evaluated on a Biological Sequence Database" is distributed in the hope that it will be useful,
9 | but WITHOUT ANY WARRANTY; without even the implied warranty of
10 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
11 | GNU General Public License for more details.
12 |
13 | You should have received a copy of the GNU General Public License
14 | along with "A Hybrid Parallel Implementation of the Aho-Corasick and Wu-Manber Algorithms Using NVIDIA CUDA and MPI Evaluated on a Biological Sequence Database". If not, see .*/
15 |
16 | #include "../smatcher.h"
17 |
18 | void wu_determine_shiftsize(int alphabet) {
19 |
20 | //the maximum size of the hash value of the B-size suffix of the patterns for the Wu-Manber algorithm
21 | if (alphabet == 2)
22 | shiftsize = 22; // 1 << 2 + 1 << 2 + 1 + 1
23 |
24 | else if (alphabet == 4)
25 | shiftsize = 64; // 3 << 2 + 3 << 2 + 3 + 1
26 |
27 | else if (alphabet == 8)
28 | shiftsize = 148; // 7 << 2 + 7 << 2 + 7 + 1
29 |
30 | else if (alphabet == 20)
31 | shiftsize = 400; // 19 << 2 + 19 << 2 + 19 + 1
32 |
33 | else if (alphabet == 128)
34 | shiftsize = 2668; // 127 << 2 + 127 << 2 + 127 + 1
35 |
36 | else if (alphabet == 256)
37 | shiftsize = 5356; //304 << 2 + 304 << 2 + 304 + 1
38 |
39 | else if (alphabet == 512)
40 | shiftsize = 10732; //560 << 2 + 560 << 2 + 560 + 1
41 |
42 | else if (alphabet == 1024)
43 | shiftsize = 21484; //1072 << 2 + 1072 << 2 + 1072 + 1
44 |
45 | else
46 | fail("The alphabet size is not supported by wu-manber\n");
47 | }
48 |
49 | unsigned int search_wu(unsigned char **pattern, int m, int p_size,
50 | unsigned char *text, int n, int *SHIFT, int *PREFIX_value,
51 | int *PREFIX_index, int *PREFIX_size) {
52 |
53 | int column = m - 1, i;
54 |
55 | unsigned int hash1, hash2;
56 |
57 | unsigned int matches = 0;
58 |
59 | size_t shift;
60 |
61 | while (column < n) {
62 |
63 | hash1 = text[column - 2];
64 | hash1 <<= m_nBitsInShift;
65 | hash1 += text[column - 1];
66 | hash1 <<= m_nBitsInShift;
67 | hash1 += text[column];
68 |
69 | shift = SHIFT[hash1];
70 |
71 | //printf("column %i hash1 = %i shift = %i\n", column, hash1, shift);
72 |
73 | if (shift == 0) {
74 |
75 | hash2 = text[column - m + 1];
76 | hash2 <<= m_nBitsInShift;
77 | hash2 += text[column - m + 2];
78 |
79 | //printf("hash2 = %i PREFIX[hash1].size = %i\n", hash2, PREFIX[hash1].size);
80 |
81 | //For every pattern with the same suffix as the text
82 | for (i = 0; i < PREFIX_size[hash1]; i++) {
83 |
84 | //If the prefix of the pattern matches that of the text
85 | if (hash2 == PREFIX_value[hash1 * p_size + i]) {
86 |
87 | //Compare directly the pattern with the text
88 | if (memcmp(pattern[PREFIX_index[hash1 * p_size + i]],
89 | text + column - m + 1, m) == 0) {
90 |
91 | matches++;
92 |
93 | //printf("Match of pattern index %i at %i\n", PREFIX_index[hash1 * p_size + i], column);
94 |
95 | break;
96 | }
97 |
98 | }
99 | }
100 |
101 | column++;
102 | } else
103 | column += shift;
104 | }
105 |
106 | return matches;
107 | }
108 |
109 | void preproc_wu(unsigned char **pattern, int m, int p_size, int alphabet, int B,
110 | int *SHIFT, int *PREFIX_value, int *PREFIX_index, int *PREFIX_size) {
111 |
112 | unsigned int j, q, hash;
113 |
114 | size_t shiftlen, prefixhash;
115 |
116 | for (j = 0; j < p_size; ++j) {
117 |
118 | //add each 3-character subpattern (similar to q-grams)
119 | for (q = m; q >= B; --q) {
120 |
121 | hash = pattern[j][q - 2 - 1]; // bring in offsets of X in pattern j
122 | hash <<= m_nBitsInShift;
123 | hash += pattern[j][q - 1 - 1];
124 | hash <<= m_nBitsInShift;
125 | hash += pattern[j][q - 1];
126 |
127 | //printf("hash = %i pattern[%i][%i] = %i pattern[%i][%i] = %i pattern[%i][%i] = %i\n", hash, j, q - 2 - 1, pattern[j][q - 2 - 1], j, q - 2, pattern[j][q - 2], j, q - 1, pattern[j][q - 1], j );
128 |
129 | shiftlen = m - q;
130 |
131 | SHIFT[hash] = MIN(SHIFT[hash], shiftlen);
132 |
133 | //calculate the hash of the prefixes for each pattern
134 | if (shiftlen == 0) {
135 |
136 | prefixhash = pattern[j][0];
137 | prefixhash <<= m_nBitsInShift;
138 | prefixhash += pattern[j][1];
139 |
140 | PREFIX_value[hash * p_size + PREFIX_size[hash]] = prefixhash;
141 | PREFIX_index[hash * p_size + PREFIX_size[hash]] = j;
142 |
143 | PREFIX_size[hash]++;
144 |
145 | //printf("%i) PREFIX[%i].value[%i] = %i PREFIX[%i].index[%i] = %i\n", j, hash, PREFIX[hash].size - 1, PREFIX[hash].value[PREFIX[hash].size - 1], hash, PREFIX[hash].size - 1, hashmap[j].index );
146 | }
147 | }
148 | }
149 | }
150 |
151 | unsigned int search_wu2(unsigned char *pattern, int m, int p_size,
152 | unsigned char *text, int n, int *SHIFT, int *PREFIX_value,
153 | int *PREFIX_index, int *PREFIX_size) {
154 |
155 | int column = m - 1, i;
156 |
157 | unsigned int hash1, hash2;
158 |
159 | unsigned int matches = 0;
160 |
161 | size_t shift;
162 |
163 | while (column < n) {
164 |
165 | hash1 = text[column - 2];
166 | hash1 <<= m_nBitsInShift;
167 | hash1 += text[column - 1];
168 | hash1 <<= m_nBitsInShift;
169 | hash1 += text[column];
170 |
171 | shift = SHIFT[hash1];
172 |
173 | //printf("column %i hash1 = %i shift = %i\n", column, hash1, shift);
174 |
175 | if (shift == 0) {
176 |
177 | hash2 = text[column - m + 1];
178 | hash2 <<= m_nBitsInShift;
179 | hash2 += text[column - m + 2];
180 |
181 | //printf("hash2 = %i PREFIX[hash1].size = %i\n", hash2, PREFIX[hash1].size);
182 |
183 | //For every pattern with the same suffix as the text
184 | for (i = 0; i < PREFIX_size[hash1]; i++) {
185 |
186 | //If the prefix of the pattern matches that of the text
187 | if (hash2 == PREFIX_value[hash1 * p_size + i]) {
188 |
189 | //Compare directly the pattern with the text
190 | if (memcmp(pattern + (PREFIX_index[hash1 * p_size + i] * m),
191 | text + column - m + 1, m) == 0) {
192 |
193 | matches++;
194 |
195 | //printf("Match of pattern index %i at %i\n", PREFIX_index[hash1 * p_size + i], column);
196 |
197 | break;
198 | }
199 |
200 | }
201 | }
202 |
203 | column++;
204 | } else
205 | column += shift;
206 | }
207 |
208 | return matches;
209 | }
210 |
211 | void preproc_wu2(unsigned char *pattern, int m, int p_size, int alphabet, int B,
212 | int *SHIFT, int *PREFIX_value, int *PREFIX_index, int *PREFIX_size) {
213 |
214 | unsigned int j, q, hash;
215 |
216 | size_t shiftlen, prefixhash;
217 |
218 | for (j = 0; j < p_size; ++j) {
219 |
220 | //add each 3-character subpattern (similar to q-grams)
221 | for (q = m; q >= B; --q) {
222 |
223 | hash = pattern[j * m + (q - 2 - 1)]; // bring in offsets of X in pattern j
224 | hash <<= m_nBitsInShift;
225 | hash += pattern[j * m + (q - 1 - 1)];
226 | hash <<= m_nBitsInShift;
227 | hash += pattern[j * m + (q - 1)];
228 |
229 | //printf("hash = %i pattern[%i][%i] = %i pattern[%i][%i] = %i pattern[%i][%i] = %i\n", hash, j, q - 2 - 1, pattern[j][q - 2 - 1], j, q - 2, pattern[j][q - 2], j, q - 1, pattern[j][q - 1], j );
230 |
231 | shiftlen = m - q;
232 |
233 | SHIFT[hash] = MIN(SHIFT[hash], shiftlen);
234 |
235 | //calculate the hash of the prefixes for each pattern
236 | if (shiftlen == 0) {
237 |
238 | prefixhash = pattern[j * m];
239 | prefixhash <<= m_nBitsInShift;
240 | prefixhash += pattern[j * m + 1];
241 |
242 | PREFIX_value[hash * p_size + PREFIX_size[hash]] = prefixhash;
243 | PREFIX_index[hash * p_size + PREFIX_size[hash]] = j;
244 |
245 | PREFIX_size[hash]++;
246 |
247 | //printf("%i) PREFIX[%i].value[%i] = %i PREFIX[%i].index[%i] = %i\n", j, hash, PREFIX[hash].size - 1, PREFIX[hash].value[PREFIX[hash].size - 1], hash, PREFIX[hash].size - 1, hashmap[j].index );
248 | }
249 | }
250 | }
251 | }
252 |
253 |
--------------------------------------------------------------------------------