├── .gitignore ├── README.md └── RTL ├── pieo_datatypes.sv ├── dual_port_bram.v └── pieo.sv /.gitignore: -------------------------------------------------------------------------------- 1 | **/synth 2 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # PIEO-Scheduler 2 | 3 | This is the FPGA implementation of PIEO scheduler. The design was synthesized on an Altera Stratix V FPGA. For more information, please refer to our [SIGCOMM'19 paper](https://conferences.sigcomm.org/sigcomm/2019/). 4 | -------------------------------------------------------------------------------- /RTL/pieo_datatypes.sv: -------------------------------------------------------------------------------- 1 | `ifndef PIEO_DATATYPES 2 | `define PIEO_DATATYPES 3 | 4 | package pieo_datatypes; 5 | 6 | parameter LIST_SIZE = (2**6); 7 | parameter ID_LOG = $clog2(LIST_SIZE); 8 | parameter RANK_LOG = 16; 9 | parameter TIME_LOG = 16; 10 | 11 | parameter NUM_OF_ELEMENTS_PER_SUBLIST = (2**3); //sqrt(LIST_SIZE) 12 | parameter NUM_OF_SUBLIST = (2**4); //2*NUM_OF_ELEMENTS_PER_SUBLIST 13 | 14 | typedef struct packed 15 | { 16 | logic [ID_LOG-1:0] id; 17 | logic [RANK_LOG-1:0] rank; //init with infinity 18 | logic [TIME_LOG-1:0] send_time; 19 | } SublistElement; 20 | 21 | typedef struct packed 22 | { 23 | logic [$clog2(NUM_OF_SUBLIST)-1:0] id; 24 | logic [RANK_LOG-1:0] smallest_rank; //init with infinity 25 | logic [TIME_LOG-1:0] smallest_send_time; //init with infinity 26 | logic full; 27 | logic [$clog2(NUM_OF_SUBLIST/2)-1:0] num; 28 | } PointerElement; 29 | 30 | endpackage 31 | `endif 32 | -------------------------------------------------------------------------------- /RTL/dual_port_bram.v: -------------------------------------------------------------------------------- 1 | // synopsys translate_off 2 | `timescale 1 ns / 1 ps 3 | // synopsys translate_on 4 | 5 | module alt_ram 6 | #( 7 | parameter RAM_WIDTH = 1, 8 | parameter RAM_ADDR_BITS = 1, 9 | parameter USE_OUTPUT_REGISTER = 0, 10 | parameter INIT_FILE = "" 11 | ) 12 | ( 13 | address_a, 14 | address_b, 15 | clock, 16 | data_a, 17 | data_b, 18 | rden_a, 19 | rden_b, 20 | wren_a, 21 | wren_b, 22 | q_a, 23 | q_b); 24 | 25 | input [RAM_ADDR_BITS-1:0] address_a; 26 | input [RAM_ADDR_BITS-1:0] address_b; 27 | input clock; 28 | input [RAM_WIDTH-1:0] data_a; 29 | input [RAM_WIDTH-1:0] data_b; 30 | input rden_a; 31 | input rden_b; 32 | input wren_a; 33 | input wren_b; 34 | output [RAM_WIDTH-1:0] q_a; 35 | output [RAM_WIDTH-1:0] q_b; 36 | `ifndef ALTERA_RESERVED_QIS 37 | // synopsys translate_off 38 | `endif 39 | tri1 clock; 40 | tri1 rden_a; 41 | tri1 rden_b; 42 | tri0 wren_a; 43 | tri0 wren_b; 44 | `ifndef ALTERA_RESERVED_QIS 45 | // synopsys translate_on 46 | `endif 47 | 48 | localparam OUT_REG = (USE_OUTPUT_REGISTER) ? "CLOCK0" : "UNREGISTERED"; 49 | 50 | 51 | wire [RAM_WIDTH-1:0] sub_wire0; 52 | wire [RAM_WIDTH-1:0] sub_wire1; 53 | wire [RAM_WIDTH-1:0] q_a = sub_wire0[RAM_WIDTH-1:0]; 54 | wire [RAM_WIDTH-1:0] q_b = sub_wire1[RAM_WIDTH-1:0]; 55 | 56 | altsyncram altsyncram_component ( 57 | .clock0 (clock), 58 | .wren_a (wren_a), 59 | .address_b (address_b), 60 | .data_b (data_b), 61 | .rden_a (rden_a), 62 | .wren_b (wren_b), 63 | .address_a (address_a), 64 | .data_a (data_a), 65 | .rden_b (rden_b), 66 | .q_a (sub_wire0), 67 | .q_b (sub_wire1), 68 | .aclr0 (1'b0), 69 | .aclr1 (1'b0), 70 | .addressstall_a (1'b0), 71 | .addressstall_b (1'b0), 72 | .byteena_a (1'b1), 73 | .byteena_b (1'b1), 74 | .clock1 (1'b1), 75 | .clocken0 (1'b1), 76 | .clocken1 (1'b1), 77 | .clocken2 (1'b1), 78 | .clocken3 (1'b1), 79 | .eccstatus ()); 80 | defparam 81 | altsyncram_component.address_reg_b = "CLOCK0", 82 | altsyncram_component.clock_enable_input_a = "BYPASS", 83 | altsyncram_component.clock_enable_input_b = "BYPASS", 84 | altsyncram_component.clock_enable_output_a = "BYPASS", 85 | altsyncram_component.clock_enable_output_b = "BYPASS", 86 | altsyncram_component.indata_reg_b = "CLOCK0", 87 | `ifdef NO_PLI 88 | altsyncram_component.init_file = "somefile.rif" 89 | `else 90 | altsyncram_component.init_file = INIT_FILE 91 | `endif 92 | , 93 | altsyncram_component.intended_device_family = "Stratix V", 94 | altsyncram_component.lpm_type = "altsyncram", 95 | altsyncram_component.numwords_a = 2**RAM_ADDR_BITS, 96 | altsyncram_component.numwords_b = 2**RAM_ADDR_BITS, 97 | altsyncram_component.operation_mode = "BIDIR_DUAL_PORT", 98 | altsyncram_component.outdata_aclr_a = "NONE", 99 | altsyncram_component.outdata_aclr_b = "NONE", 100 | altsyncram_component.outdata_reg_a = OUT_REG, 101 | altsyncram_component.outdata_reg_b = OUT_REG, 102 | altsyncram_component.power_up_uninitialized = "FALSE", 103 | altsyncram_component.read_during_write_mode_mixed_ports = "DONT_CARE", 104 | altsyncram_component.read_during_write_mode_port_a = "NEW_DATA_NO_NBE_READ", 105 | altsyncram_component.read_during_write_mode_port_b = "NEW_DATA_NO_NBE_READ", 106 | altsyncram_component.widthad_a = RAM_ADDR_BITS, 107 | altsyncram_component.widthad_b = RAM_ADDR_BITS, 108 | altsyncram_component.width_a = RAM_WIDTH, 109 | altsyncram_component.width_b = RAM_WIDTH, 110 | altsyncram_component.width_byteena_a = 1, 111 | altsyncram_component.width_byteena_b = 1, 112 | altsyncram_component.wrcontrol_wraddress_reg_b = "CLOCK0"; 113 | 114 | endmodule 115 | 116 | module dual_port_bram # 117 | ( 118 | parameter RAM_WIDTH = 32, 119 | parameter RAM_ADDR_BITS = 10, 120 | parameter RAM_LINES = (2**RAM_ADDR_BITS), 121 | parameter USE_OUTPUT_REGISTER = 0, 122 | parameter INIT_FILE = "" 123 | ) 124 | ( 125 | input clk, 126 | 127 | input [RAM_WIDTH-1:0] Data_In_A, 128 | input [RAM_ADDR_BITS-1:0] Addr_A, 129 | input En_A, 130 | input Wen_A, 131 | output [RAM_WIDTH-1:0] Data_Out_A, 132 | 133 | input [RAM_WIDTH-1:0] Data_In_B, 134 | input [RAM_ADDR_BITS-1:0] Addr_B, 135 | input En_B, 136 | input Wen_B, 137 | output [RAM_WIDTH-1:0] Data_Out_B 138 | 139 | ); 140 | 141 | 142 | alt_ram #( 143 | .RAM_WIDTH( RAM_WIDTH ), 144 | .RAM_ADDR_BITS( RAM_ADDR_BITS ), 145 | .USE_OUTPUT_REGISTER( USE_OUTPUT_REGISTER ), 146 | .INIT_FILE( INIT_FILE ) 147 | ) 148 | bram 149 | ( 150 | .clock( clk ), 151 | 152 | .address_a( Addr_A ), 153 | .rden_a( En_A & ~Wen_A ), 154 | .wren_a( En_A & Wen_A ), 155 | .data_a( Data_In_A ), 156 | .q_a( Data_Out_A ), 157 | 158 | .address_b( Addr_B ), 159 | .rden_b( En_B & ~Wen_B), 160 | .wren_b( En_B & Wen_B ), 161 | .data_b( Data_In_B ), 162 | .q_b( Data_Out_B ) 163 | ); 164 | 165 | endmodule 166 | -------------------------------------------------------------------------------- /RTL/pieo.sv: -------------------------------------------------------------------------------- 1 | // synopsys translate_off 2 | `timescale 1 ns / 1 ps 3 | // synopsys translate_on 4 | 5 | //`define SIMULATION 6 | 7 | import pieo_datatypes::*; 8 | 9 | /* NULL element is all 1s, i.e., e.id = '1, e.rank = '1, and e.send_time = '1 10 | * It is assumed that '1 for rank and send_time values equals Infinity 11 | */ 12 | 13 | module pieo 14 | ( 15 | input clk, 16 | input rst, 17 | 18 | /* signal that PIEO has reset all the internal datastructures */ 19 | output logic pieo_reset_done_out, 20 | 21 | /* signal to start the PIEO scheduler 22 | * this signal should be set once PIEO has reset all it's datastructures 23 | */ 24 | input start, 25 | 26 | /* signal that PIEO is ready for the next primitive operation 27 | * wait for this signal to be set before issuing the next primitive operation 28 | */ 29 | output logic pieo_ready_for_nxt_op_out, 30 | 31 | /* interface for enqueue(f) operation */ 32 | input enqueue_f_in, 33 | input SublistElement f_in, 34 | output logic enq_valid_out, 35 | output logic [$clog2(NUM_OF_SUBLIST):0] f_enqueued_in_sublist_out, 36 | 37 | /* input interface for dequeue() operation */ 38 | input dequeue_in, 39 | input [TIME_LOG-1:0] curr_time_in, 40 | 41 | /* input interface for dequeue(f) operation */ 42 | input dequeue_f_in, 43 | input [ID_LOG-1:0] flow_id_in, 44 | input [$clog2(NUM_OF_SUBLIST)-1:0] sublist_id_in, 45 | 46 | /* output interface for dequeue() and dequeue(f) operations */ 47 | output logic deq_valid_out, 48 | output SublistElement deq_element_out, 49 | 50 | /* element moved during a primitive operation */ 51 | output logic [ID_LOG:0] flow_id_moved_out, 52 | output logic [$clog2(NUM_OF_SUBLIST):0] flow_id_moved_to_sublist_out 53 | ); 54 | 55 | `ifdef SIMULATION 56 | integer f; 57 | `endif 58 | 59 | //latching the inputs 60 | SublistElement f_in_reg; 61 | logic dequeue_in_reg; 62 | logic [TIME_LOG-1:0] curr_time_in_reg; 63 | logic dequeue_f_in_reg; 64 | logic [ID_LOG-1:0] flow_id_in_reg; 65 | 66 | typedef enum { 67 | LEFT, 68 | RIGHT, 69 | FREE, 70 | NONE 71 | } neigh_types; 72 | 73 | //ordered list in SRAM 74 | logic enable_A [NUM_OF_ELEMENTS_PER_SUBLIST-1:0]; 75 | logic write_A [NUM_OF_ELEMENTS_PER_SUBLIST-1:0]; 76 | logic [$clog2(NUM_OF_SUBLIST)-1:0] address_A [NUM_OF_ELEMENTS_PER_SUBLIST-1:0]; 77 | SublistElement wr_data_A [NUM_OF_ELEMENTS_PER_SUBLIST-1:0]; 78 | SublistElement rd_data_A [NUM_OF_ELEMENTS_PER_SUBLIST-1:0]; 79 | 80 | logic enable_B [NUM_OF_ELEMENTS_PER_SUBLIST-1:0]; 81 | logic write_B [NUM_OF_ELEMENTS_PER_SUBLIST-1:0]; 82 | logic [$clog2(NUM_OF_SUBLIST)-1:0] address_B [NUM_OF_ELEMENTS_PER_SUBLIST-1:0]; 83 | SublistElement wr_data_B [NUM_OF_ELEMENTS_PER_SUBLIST-1:0]; 84 | SublistElement rd_data_B [NUM_OF_ELEMENTS_PER_SUBLIST-1:0]; 85 | 86 | generate 87 | genvar i; 88 | for (i=0; i 0 188 | & ~pointer_array[s_idx_reg-1].full) 189 | ? pointer_array[s_idx_reg-1] : 0; 190 | assign s_neigh_deq_type = (pointer_array[s_idx_reg].full 191 | & s_idx_reg+1 < NUM_OF_SUBLIST 192 | & ~pointer_array[s_idx_reg+1].full 193 | & s_idx_reg+1 != free_list_head_reg) 194 | ? RIGHT 195 | : (pointer_array[s_idx_reg].full 196 | & s_idx_reg > 0 197 | & ~pointer_array[s_idx_reg-1].full) 198 | ? LEFT : NONE; 199 | assign s_free = pointer_array[free_list_head_reg]; 200 | assign element_moving_idx = (pointer_array[s_idx_reg].full 201 | & s_idx_reg+1 < NUM_OF_SUBLIST 202 | & ~pointer_array[s_idx_reg+1].full 203 | & s_idx_reg+1 != free_list_head_reg) 204 | ? 0 205 | : (pointer_array[s_idx_reg].full 206 | & s_idx_reg > 0 207 | & ~pointer_array[s_idx_reg-1].full) 208 | ? pointer_array[s_idx_reg-1].num-1 : '1; 209 | 210 | //priority encoder for pointer array 211 | logic [NUM_OF_SUBLIST-1:0] bit_vector; 212 | logic [$clog2(NUM_OF_SUBLIST)-1:0] encode; 213 | logic [$clog2(NUM_OF_SUBLIST)-1:0] encode_reg; 214 | logic valid; 215 | logic valid_reg; 216 | 217 | priority_encode_log#( 218 | .width(NUM_OF_SUBLIST), 219 | .log_width($clog2(NUM_OF_SUBLIST)) 220 | ) pri_encoder(bit_vector, encode, valid); 221 | 222 | //priority encoder for rank sublist 223 | logic [NUM_OF_SUBLIST/2-1:0] bit_vector_A; 224 | logic [$clog2(NUM_OF_SUBLIST/2)-1:0] encode_A; 225 | logic [$clog2(NUM_OF_SUBLIST/2)-1:0] encode_A_reg; 226 | logic valid_A; 227 | logic valid_A_reg; 228 | 229 | priority_encode_log#( 230 | .width(NUM_OF_SUBLIST/2), 231 | .log_width($clog2(NUM_OF_SUBLIST/2)) 232 | ) pri_encoder_A(bit_vector_A, encode_A, valid_A); 233 | 234 | //priority encoder for pred sublist 235 | logic [NUM_OF_SUBLIST/2-1:0] bit_vector_AA; 236 | logic [$clog2(NUM_OF_SUBLIST/2)-1:0] encode_AA; 237 | logic [$clog2(NUM_OF_SUBLIST/2)-1:0] encode_AA_reg; 238 | logic valid_AA; 239 | logic valid_AA_reg; 240 | 241 | priority_encode_log#( 242 | .width(NUM_OF_SUBLIST/2), 243 | .log_width($clog2(NUM_OF_SUBLIST/2)) 244 | ) pri_encoder_AA(bit_vector_AA, encode_AA, valid_AA); 245 | 246 | //priority encoder for pred sublist 247 | logic [NUM_OF_SUBLIST/2-1:0] bit_vector_BB; 248 | logic [$clog2(NUM_OF_SUBLIST/2)-1:0] encode_BB; 249 | logic [$clog2(NUM_OF_SUBLIST/2)-1:0] encode_BB_reg; 250 | logic valid_BB; 251 | logic valid_BB_reg; 252 | 253 | priority_encode_log#( 254 | .width(NUM_OF_SUBLIST/2), 255 | .log_width($clog2(NUM_OF_SUBLIST/2)) 256 | ) pri_encoder_BB(bit_vector_BB, encode_BB, valid_BB); 257 | 258 | typedef enum { 259 | `ifdef SIMULATION 260 | PRINT, 261 | CONT_PRINTING, 262 | `endif 263 | RESET, 264 | RESET_DONE, 265 | IDLE, 266 | ENQ_FETCH_SUBLIST_FROM_MEM, 267 | POS_TO_ENQUEUE, 268 | ENQ_WRITE_BACK_TO_MEM, 269 | DEQ_FETCH_SUBLIST_FROM_MEM, 270 | POS_TO_DEQUEUE, 271 | DEQ_WRITE_BACK_TO_MEM 272 | } pieo_ops; 273 | 274 | pieo_ops curr_state, nxt_state; 275 | 276 | reg [31:0] curr_address; 277 | 278 | SublistElement element_moving_reg; 279 | logic [$clog2(NUM_OF_ELEMENTS_PER_SUBLIST)-1:0] element_moving_idx_reg; 280 | logic [TIME_LOG-1:0] pred_moving_reg; 281 | 282 | logic [1:0] enqueue_case_reg; 283 | 284 | logic [$clog2(NUM_OF_SUBLIST)-1:0] idx_enq_reg; 285 | logic [$clog2(NUM_OF_SUBLIST)-1:0] idx_enq; 286 | 287 | SublistElement element_dequeued_reg; 288 | always @(posedge clk) begin 289 | if (~rst) element_dequeued_reg <= rd_data_A[encode_A]; 290 | end 291 | 292 | SublistElement element_moving; 293 | assign element_moving = rd_data_B[element_moving_idx_reg]; 294 | 295 | logic [TIME_LOG-1:0] pred_val_deq; 296 | 297 | `ifdef SIMULATION 298 | reg [63:0] cntr; 299 | reg [63:0] insertion_cnt; 300 | reg [63:0] deletion_cnt; 301 | `endif 302 | 303 | always_comb begin 304 | for (integer i=0; i f_in.rank); 370 | end 371 | nxt_state = ENQ_FETCH_SUBLIST_FROM_MEM; 372 | end 373 | else if (dequeue_in) begin 374 | //figure out the right sublist to deq 375 | for (integer i=0; i= pointer_array[i].smallest_send_time); 379 | end 380 | nxt_state = DEQ_FETCH_SUBLIST_FROM_MEM; 381 | end else if (dequeue_f_in) begin 382 | //figure out the right sublist to deq 383 | for (integer i=0; i f_in_reg.rank); 419 | end 420 | for (integer i=0; i f_in_reg.send_time); 422 | end 423 | end else begin 424 | //new element is getting inserted in B 425 | if (f_in_reg.rank 426 | >= rd_data_A[NUM_OF_ELEMENTS_PER_SUBLIST-1].rank) begin 427 | for (integer i=0; i f_in_reg.send_time); 429 | end 430 | end 431 | else begin //new element in A, last element of A in B 432 | for (integer i=0; i f_in_reg.rank); 434 | end 435 | for (integer i=0; i f_in_reg.send_time); 437 | end 438 | for (integer i=0; i rd_data_A[NUM_OF_ELEMENTS_PER_SUBLIST-1].send_time); 441 | end 442 | end 443 | end 444 | nxt_state = ENQ_WRITE_BACK_TO_MEM; 445 | end 446 | 447 | ENQ_WRITE_BACK_TO_MEM: begin 448 | case (enqueue_case_reg) 449 | 0: begin 450 | if (valid_A_reg & valid_AA_reg) begin 451 | enq_valid_out = 1; 452 | f_enqueued_in_sublist_out = s_reg.id; 453 | flow_id_moved_out = '1; 454 | flow_id_moved_to_sublist_out = '1; 455 | for (integer i=0; i idx_enq && i != 0 563 | && rd_data_AA_reg[i] <= pred_moving_reg) begin 564 | wr_data_AA[i] = rd_data_AA_reg[i-1]; 565 | end else begin 566 | wr_data_AA[i] = rd_data_AA_reg[i]; 567 | end 568 | end 569 | enable_B[i] = 1; 570 | write_B[i] = 1; 571 | address_B[i] = s_neigh_reg.id; 572 | if (i == 0) begin 573 | wr_data_B[i] = element_moving_reg; 574 | end else if (i != 0) begin 575 | wr_data_B[i] = rd_data_B_reg[i-1]; 576 | end 577 | enable_BB[i] = 1; 578 | write_BB[i] = 1; 579 | address_BB[i] = s_neigh_reg.id; 580 | if (i < encode_BB_reg) begin 581 | wr_data_BB[i] = rd_data_BB_reg[i]; 582 | end else if (i == encode_BB_reg) begin 583 | wr_data_BB[i] = pred_moving_reg; 584 | end else if (i != 0) begin 585 | wr_data_BB[i] = rd_data_BB_reg[i-1]; 586 | end 587 | end 588 | `ifdef SIMULATION 589 | nxt_state = PRINT; 590 | `else 591 | nxt_state = IDLE; 592 | `endif 593 | end 594 | end 595 | 596 | default: begin 597 | enq_valid_out = 0; 598 | f_enqueued_in_sublist_out = '1; 599 | flow_id_moved_out = '1; 600 | flow_id_moved_to_sublist_out = '1; 601 | for (integer i=0; i= rd_data_A[NUM_OF_ELEMENTS_PER_SUBLIST-1].rank) begin 965 | valid_BB_reg <= valid_BB; 966 | encode_BB_reg <= encode_BB; 967 | enqueue_case_reg <= 1; 968 | end else begin 969 | //new element in A, last element of A in B 970 | valid_A_reg <= valid_A; 971 | encode_A_reg <= encode_A; 972 | valid_AA_reg <= valid_AA; 973 | encode_AA_reg <= encode_AA; 974 | valid_BB_reg <= valid_BB; 975 | encode_BB_reg <= encode_BB; 976 | enqueue_case_reg <= 2; 977 | if (f_in_reg.send_time >= rd_data_AA[NUM_OF_ELEMENTS_PER_SUBLIST-1]) 978 | idx_enq_reg <= NUM_OF_ELEMENTS_PER_SUBLIST; 979 | else 980 | idx_enq_reg <= 0; 981 | end 982 | //update pointer array 983 | if (s_neigh_type_reg == FREE) begin 984 | for (integer i = 0; i < NUM_OF_SUBLIST-1; i=i+1) begin 985 | if (i > s_idx_reg && 986 | i < free_list_head_reg) begin 987 | pointer_array[i+1] <= pointer_array[i]; 988 | if (i == s_idx_reg+1) begin 989 | pointer_array[i] <= s_free_reg; 990 | end 991 | end 992 | end 993 | free_list_head_reg <= free_list_head_reg + 1; 994 | end else if (s_idx_reg+1 == free_list_head_reg) begin 995 | free_list_head_reg <= free_list_head_reg + 1; 996 | end 997 | end 998 | end else if (curr_state == ENQ_WRITE_BACK_TO_MEM) begin 999 | if (enqueue_case_reg == 0) begin 1000 | if (valid_A_reg & valid_AA_reg) begin 1001 | for (integer i=0; i= s_idx_reg 1096 | && i < free_list_head_reg 1097 | && i < NUM_OF_SUBLIST-1)begin 1098 | pointer_array[i] <= pointer_array[i+1]; 1099 | end 1100 | end 1101 | free_list_head_reg <= free_list_head_reg - 1; 1102 | end else begin 1103 | if (s_neigh_type_reg != NONE) begin 1104 | element_moving_reg <= element_moving; 1105 | pred_moving_reg <= element_moving.send_time; 1106 | if (rd_data_B[element_moving_idx_reg].send_time 1107 | >= rd_data_AA[NUM_OF_ELEMENTS_PER_SUBLIST-1]) begin 1108 | idx_enq_reg <= NUM_OF_ELEMENTS_PER_SUBLIST; 1109 | end else begin 1110 | idx_enq_reg <= 0; 1111 | end 1112 | if (s_neigh_reg.num == 1) begin 1113 | //re-arrange pointer array 1114 | for (integer i=0; i= ((s_neigh_type_reg==LEFT) 1123 | ? s_idx_reg-1 : s_idx_reg+1) 1124 | && i < free_list_head_reg-1 1125 | && i < NUM_OF_SUBLIST-1) begin 1126 | pointer_array[i] <= pointer_array[i+1]; 1127 | end 1128 | end 1129 | free_list_head_reg <= free_list_head_reg - 1; 1130 | if (s_neigh_type_reg == LEFT) s_idx_reg <= s_idx_reg-1; 1131 | end 1132 | end 1133 | end 1134 | end else if (curr_state == DEQ_WRITE_BACK_TO_MEM) begin 1135 | if (~valid_A_reg) begin 1136 | `ifdef SIMULATION 1137 | $fwrite(f,"DEQ ELEMENT -- %0d %0d %0d\n", 1138 | 0, (2**RANK_LOG)-1, (2**TIME_LOG)-1); 1139 | `endif 1140 | end else begin 1141 | if (s_neigh_type_reg == NONE) begin 1142 | if (valid_A_reg) begin 1143 | `ifdef SIMULATION 1144 | $fwrite(f,"DEQ ELEMENT -- %0d %0d %0d\n", 1145 | element_dequeued_reg.id, 1146 | element_dequeued_reg.rank, 1147 | element_dequeued_reg.send_time); 1148 | `endif 1149 | if (s_reg.num != 1) begin 1150 | for (integer i=0; i