├── README.md ├── genipop.py ├── selfchecktb.v ├── systolicsort.v └── verify4vsort.py /README.md: -------------------------------------------------------------------------------- 1 | # systolic-array-sorting 2 | Implementation of a Systolic Array based sorting engine using Verilog 3 | 4 | Additional details available on https://wowelec.wordpress.com/2017/05/11/systolic-array-sorting-engine/ 5 | -------------------------------------------------------------------------------- /genipop.py: -------------------------------------------------------------------------------- 1 | # Simple program to generate a random array and a sorted array 2 | # Saves the random array to one file, sorted one to another file 3 | 4 | from random import randint 5 | xf = open("xin.txt", "w") 6 | sf = open("sin.txt","w") 7 | MIN=1; MAX=100 8 | NUMINPUTS = 16 9 | a=[] 10 | for i in range(NUMINPUTS): 11 | a.append(randint(MIN, MAX)) 12 | xf.write("%s\n"%str(bin(a[-1]&0xFF)[2:].zfill(16))) 13 | 14 | s = sorted(a) 15 | for i in range(NUMINPUTS): 16 | sf.write("%s\n"%str(bin(s[i]&0xFF)[2:].zfill(16))) 17 | -------------------------------------------------------------------------------- /selfchecktb.v: -------------------------------------------------------------------------------- 1 | module sortingengine_tb; 2 | parameter NUMINPUTS = 16; 3 | parameter WIDTH = 16; 4 | integer i, errors; 5 | 6 | reg [WIDTH-1:0] xin[0:NUMINPUTS-1]; 7 | reg [WIDTH-1:0] xout[0:NUMINPUTS-1]; 8 | reg [WIDTH-1:0] x[0:NUMINPUTS-1]; 9 | reg [WIDTH-1:0] exps[0:NUMINPUTS-1]; 10 | wire [WIDTH-1:0] s[0:NUMINPUTS-1]; 11 | reg clk, reset; 12 | 13 | sorting_engine dut(clk, reset, x[0],x[1],x[2],x[3],x[4],x[5],x[6],x[7],x[8],x[9],x[10],x[11],x[12],x[13],x[14],x[15],s[0],s[1],s[2],s[3],s[4],s[5],s[6],s[7],s[8],s[9],s[10],s[11],s[12],s[13],s[14],s[15]); 14 | 15 | initial begin 16 | errors = 0; 17 | 18 | $readmemb("xin.txt", xin); 19 | $readmemb("sin.txt", xout); 20 | 21 | for(i=0; i 0) begin 32 | $display("[FAIL] There were %d errors in the output!", errors); 33 | end else begin 34 | $display("[PASS] No errors!"); 35 | end 36 | end 37 | 38 | endmodule // sortingengine_tb 39 | -------------------------------------------------------------------------------- /systolicsort.v: -------------------------------------------------------------------------------- 1 | module sorting_engine( 2 | input clk, 3 | input reset 4 | ); 5 | 6 | wire [15:0] right [0:6]; 7 | wire [1:0] odd_L, even_L, odd_R, even_R; 8 | 9 | wire [1:0] sp_even_R; 10 | assign sp_even_R = (even_R==2'b10)?2'b00:even_R; 11 | 12 | wire [1:0] sp_odd_L; 13 | assign sp_odd_L = (odd_L==2'b10)?2'b00:odd_L; 14 | 15 | bus_controller controller ( .even_L(even_L), 16 | .even_R(even_R), 17 | .odd_L(odd_L), 18 | .odd_R(odd_R), 19 | .reset(reset), 20 | .clk(clk) 21 | ); 22 | 23 | processor p1 ( //.left(left), 24 | .right(right[0]), 25 | .L(sp_odd_L), 26 | .R(odd_R), 27 | .reset(reset), 28 | .clk(clk), 29 | .initval(32'h000f000e) 30 | ); 31 | 32 | processor p2 ( .left(right[0]), 33 | .right(right[1]), 34 | .L(even_L), 35 | .R(even_R), 36 | .reset(reset), 37 | .clk(clk), 38 | .initval(32'h000a000d) 39 | ); 40 | 41 | processor p3 ( .left(right[1]), 42 | .right(right[2]), 43 | .L(odd_L), 44 | .R(odd_R), 45 | .reset(reset), 46 | .clk(clk), 47 | .initval(32'h000c000b) 48 | ); 49 | 50 | processor p4 ( .left(right[2]), 51 | .right(right[3]), 52 | .L(even_L), 53 | .R(even_R), 54 | .reset(reset), 55 | .clk(clk), 56 | .initval(32'h00080007) 57 | ); 58 | 59 | processor p5 ( .left(right[3]), 60 | .right(right[4]), 61 | .L(odd_L), 62 | .R(odd_R), 63 | .reset(reset), 64 | .clk(clk), 65 | .initval(32'h00090006) 66 | ); 67 | 68 | processor p6 ( .left(right[4]), 69 | .right(right[5]), 70 | .L(even_L), 71 | .R(even_R), 72 | .reset(reset), 73 | .clk(clk), 74 | .initval(32'h00030004) 75 | ); 76 | 77 | processor p7 ( .left(right[5]), 78 | .right(right[6]), 79 | .L(odd_L), 80 | .R(odd_R), 81 | .reset(reset), 82 | .clk(clk), 83 | .initval(32'h00010002) 84 | ); 85 | 86 | processor p8 ( .left(right[6]), 87 | //.right(right[7]), 88 | .L(even_L), 89 | .R(sp_even_R), 90 | .reset(reset), 91 | .clk(clk), 92 | .initval(32'h00000005) 93 | ); 94 | 95 | endmodule 96 | 97 | module cmp_swap( 98 | input [15:0] a, 99 | input [15:0] b, 100 | output [15:0] c, 101 | output [15:0] d 102 | ); 103 | 104 | assign c = (a>b)?b:a; 105 | assign d = (a>b)?a:b; 106 | 107 | endmodule 108 | 109 | 110 | module processor( 111 | inout [15:0] left, 112 | inout [15:0] right, 113 | input [1:0] L, 114 | input [1:0] R, 115 | input reset, 116 | input clk, 117 | input [31:0] initval 118 | ); 119 | 120 | reg [15:0] r0, r1, r2, r3, r4, r5, r6, r7, r8; 121 | 122 | assign left = (L==2'b11)?((r8==0)?r0:r1):16'bz; 123 | assign right = (R==2'b11)?((r8==0)?r2:r3):16'bz; 124 | 125 | wire [15:0] in1, in2; 126 | 127 | assign in1 = (r8==0)?r0:(r8==1)?r2:(r8==2)?r1:(r8==3)?r0:(r8==4)?r2:(r8==5)?r1:r8; 128 | assign in2 = (r8==0)?r1:(r8==1)?r3:(r8==2)?r2:(r8==3)?r1:(r8==4)?r3:(r8==5)?r2:r8; 129 | 130 | wire [15:0] out1, out2; 131 | 132 | cmp_swap cmp ( .a(in1), .b(in2), .c(out1), .d(out2)); 133 | 134 | always @(posedge clk, negedge reset) 135 | begin 136 | if(reset==0) 137 | begin 138 | {r0, r1} <= initval; 139 | r8 <= 0; 140 | end 141 | else 142 | begin 143 | if(L==2'b10 && R==2'b00) 144 | begin 145 | if(r8==0) 146 | begin 147 | r0<=left; 148 | end 149 | else 150 | begin 151 | r1<=left; 152 | end 153 | if(r8==16'h0) 154 | begin 155 | r8 <= r8 + 1'b1; 156 | end 157 | else 158 | begin 159 | r8 <= 16'h0; 160 | end 161 | end 162 | else if ((L==2'b01 || L==2'b11) && R==2'b00) 163 | begin 164 | r0<=r0; 165 | r1<=r1; 166 | r2<=r2; 167 | r3<=r3; 168 | if(r8==16'h0) 169 | begin 170 | r8 <= r8 + 1'b1; 171 | end 172 | else 173 | begin 174 | r8 <= 16'h0; 175 | end 176 | end 177 | else if(R==2'b10 && L==2'b00) 178 | begin 179 | if(r8==0) 180 | begin 181 | r2<=right; 182 | end 183 | else 184 | begin 185 | r3<=right; 186 | end 187 | if(r8==16'h0) 188 | begin 189 | r8 <= r8 + 1'b1; 190 | end 191 | else 192 | begin 193 | r8 <= 16'h0; 194 | end 195 | end 196 | else if(R==2'b01 && L==2'b00) 197 | begin 198 | 199 | if(r8<16'h5) 200 | begin 201 | r8 <= r8 + 1'b1; 202 | end 203 | else 204 | begin 205 | r8 <= 0; 206 | end 207 | 208 | case(r8) 209 | 'h0: 210 | begin 211 | r0 <= out1; 212 | r1 <= out2; 213 | end 214 | 'h1: 215 | begin 216 | r2 <= out1; 217 | r3 <= out2; 218 | end 219 | 'h2: 220 | begin 221 | r1 <= out1; 222 | r2 <= out2; 223 | end 224 | 'h3: 225 | begin 226 | r0 <= out1; 227 | r1 <= out2; 228 | end 229 | 'h4: 230 | begin 231 | r2 <= out1; 232 | r3 <= out2; 233 | end 234 | 'h5: 235 | begin 236 | r1 <= out1; 237 | r2 <= out2; 238 | end 239 | endcase 240 | end 241 | else 242 | begin 243 | r0<=r0; 244 | r1<=r1; 245 | r2<=r2; 246 | r3<=r3; 247 | if(r8==16'h0) 248 | begin 249 | r8 <= r8 + 1'b1; 250 | end 251 | else 252 | begin 253 | r8 <= 16'h0; 254 | end 255 | end 256 | end 257 | end 258 | endmodule 259 | 260 | module bus_controller( 261 | output reg [1:0] even_L, 262 | output reg [1:0] even_R, 263 | output reg [1:0] odd_L, 264 | output reg [1:0] odd_R, 265 | input reset, 266 | input clk 267 | ); 268 | 269 | /* 270 | R = L = 00 -> Idle 271 | R = 11 -> Send Right, SL = XX 272 | L = 11 -> Send Left, SR = XX 273 | L = 10 -> Receive Left, SR = XX 274 | R = 10 -> Receive Right, SL = XX 275 | R = 01 -> Compare with value received from right 276 | This is because comparison is only after receiving from right 277 | */ 278 | 279 | reg [2:0] sort_state; 280 | reg [2:0] data_state; 281 | reg [2:0] cmp_count; 282 | 283 | initial 284 | begin 285 | cmp_count = 3'h0; 286 | sort_state = 3'h0; 287 | data_state = 3'h0; 288 | even_L = 0; 289 | even_R = 0; 290 | odd_L = 0; 291 | odd_R = 0; 292 | end 293 | 294 | /* 295 | state = 000 -> odd RR, even SL 296 | state = 001 -> odd cmp 297 | state = 010 -> odd SR, even RL 298 | state = 011 -> odd SL, even RR 299 | state = 100 -> even cmp 300 | state = 101 -> odd RL, even SR 301 | */ 302 | 303 | always @(posedge clk, negedge reset) 304 | begin 305 | if(reset==0) 306 | begin 307 | sort_state <= 0; 308 | data_state <= 0; 309 | cmp_count <= 0; 310 | even_L = 0; 311 | even_R = 0; 312 | odd_L = 0; 313 | odd_R = 0; 314 | end 315 | else 316 | begin 317 | if(data_state==3'h0) 318 | begin 319 | sort_state <= 3'h0; 320 | data_state <= data_state + 1'b1; 321 | end 322 | else 323 | begin 324 | if(sort_state==3'h0) 325 | begin 326 | sort_state <= 3'h1; 327 | end 328 | if(data_state==3'h5) 329 | begin 330 | data_state <= 3'h0; 331 | end 332 | end 333 | case(sort_state) 334 | 'h1: 335 | begin 336 | odd_R <= 2'b10; 337 | odd_L <= 2'b00; 338 | even_L <= 2'b11; 339 | even_R <= 2'b00; 340 | cmp_count <= cmp_count + 1'b1; 341 | if(cmp_count==3'h1) 342 | begin 343 | sort_state <= 3'h2; 344 | cmp_count <= 0; 345 | end 346 | end 347 | 'h2: 348 | begin 349 | odd_R <= 2'b01; 350 | odd_L <= 2'b00; 351 | even_L <= 2'b00; 352 | even_R <= 2'b00; 353 | cmp_count <= cmp_count + 1'b1; 354 | if(cmp_count==3'h5) 355 | begin 356 | sort_state <= 3'h3; 357 | cmp_count <= 0; 358 | end 359 | end 360 | 'h3: 361 | begin 362 | odd_R <= 2'b11; 363 | odd_L <= 2'b00; 364 | even_L <= 2'b10; 365 | even_R <= 2'b00; 366 | cmp_count <= cmp_count + 1'b1; 367 | if(cmp_count==3'h1) 368 | begin 369 | sort_state <= 3'h4; 370 | cmp_count <= 0; 371 | end 372 | end 373 | 'h4: 374 | begin 375 | odd_R <= 2'b00; 376 | odd_L <= 2'b11; 377 | even_L <= 2'b00; 378 | even_R <= 2'b10; 379 | cmp_count <= cmp_count + 1'b1; 380 | if(cmp_count==3'h1) 381 | begin 382 | sort_state <= 3'h5; 383 | cmp_count <= 0; 384 | end 385 | end 386 | 'h5: 387 | begin 388 | odd_R <= 2'b00; 389 | odd_L <= 2'b00; 390 | even_L <= 2'b00; 391 | even_R <= 2'b01; 392 | cmp_count <= cmp_count + 1'b1; 393 | if(cmp_count==3'h5) 394 | begin 395 | sort_state <= 3'h6; 396 | cmp_count <= 0; 397 | end 398 | end 399 | 'h6: 400 | begin 401 | odd_R <= 2'b00; 402 | odd_L <= 2'b10; 403 | even_L <= 2'b00; 404 | even_R <= 2'b11; 405 | cmp_count <= cmp_count + 1'b1; 406 | if(cmp_count==3'h1) 407 | begin 408 | sort_state <= 3'h1; 409 | cmp_count <= 0; 410 | data_state <= data_state + 1'b1; 411 | end 412 | end 413 | endcase 414 | end 415 | end 416 | 417 | endmodule 418 | -------------------------------------------------------------------------------- /verify4vsort.py: -------------------------------------------------------------------------------- 1 | def psort(t): 2 | array=list(t) 3 | if(array[0]>array[1]): 4 | array[0],array[1]=array[1],array[0] 5 | if(array[2]>array[3]): 6 | array[2],array[3]=array[3],array[2] 7 | if(array[1]>array[2]): 8 | array[1],array[2]=array[2],array[1] 9 | if(array[2]>array[3]): 10 | array[2],array[3]=array[3],array[2] 11 | if(array[0]>array[1]): 12 | array[0],array[1]=array[1],array[0] 13 | if(array[1]>array[2]): 14 | array[1],array[2]=array[2],array[1] 15 | return array 16 | --------------------------------------------------------------------------------