├── .gitignore
├── LICENSE
├── README.md
├── example
├── test_multicore
│ ├── Makefile
│ └── test_multicore.c
├── test_simd
│ ├── Makefile
│ └── test_simd.c
└── test_stop
│ ├── Makefile
│ └── test_stop.c
├── inc
├── bit.h
├── crc.h
├── simd_bit.h
├── simd_ldpc.h
└── thread_pool.h
├── lib
└── libsimd_5gfec.a
└── src
└── thread_pool.c
/.gitignore:
--------------------------------------------------------------------------------
1 | main
2 | *.o
3 | *.txt
4 | .vscode
5 | src/ldpc_hbg.c
6 | src/simd_ldpc.c
7 | lib/Makefile
--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
1 | MIT License
2 |
3 | Copyright (c) 2019 SherlockHsu
4 |
5 | Permission is hereby granted, free of charge, to any person obtaining a copy
6 | of this software and associated documentation files (the "Software"), to deal
7 | in the Software without restriction, including without limitation the rights
8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 |
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 |
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | 5G-SIMD-LDPC
2 | ==============================
3 |
4 | * Vesion: 3.1
5 | * Date: 2019.03.27
6 | * Author: Xu Yi
7 |
8 | ---
9 | File Specification
10 | ------------------------------
11 |
12 | ```
13 | 5G-SIMD-LDPC
14 | │ README.md
15 | │ LICENSE
16 | │
17 | └───inc
18 | │ │ bit.h // Data pack and unpack head file
19 | │ │ crc.h // CRC head file
20 | │ │ simd_bit.h // Data pack and unpack with SIMD head file
21 | │ │ simd_ldpc.h // SIMD LDPC head file
22 | │ │ thread_pool.h // Thread pool head file
23 | │
24 | └───src
25 | │ │ thread_pool.c // Thread pool source file
26 | │
27 | └───lib
28 | │ │ libsimd_5gfec.a // Static library for x86_64 Linux
29 | │
30 | └───example
31 | └───test_multicore // example for multi-core test
32 | │ │ test_multicore.c
33 | │ │ Makefile
34 | │
35 | └───test_simd // example for different SIMD instruction sets
36 | │ │ test_simd.c
37 | │ │ Makefile
38 | │
39 | └───test_stop // example for early stopping strategy
40 | │ test_stip.c
41 | │ Makefile
42 | ```
43 |
44 | Build and Execution Instructions
45 | ------------------------------
46 |
47 | ### Build:
48 | > cd example/[example name]/
49 |
50 | > make
51 |
52 | ### Execution:
53 | > ./main
54 |
55 | ### Clean:
56 | > make clean
57 |
58 | Decoder Performance
59 | ------------------------------------------------------------
60 |
61 | ### Environment:
62 | * OS: Ubuntu 16.04 xenial
63 | * Kernel: x86_64 Linux 4.4.0-21-generic
64 | * CPU: 4x Intel Xeon Gold 6154 CPU @ 3.001GHz
65 | * ICC: 18.0.2
66 |
67 | ### Simulation Parameter:
68 |
69 | * Base Graph: 5GNR Base Graph 1(iLS = 2)
70 | * Code Block Length: 8448
71 | * Code Rate: 5/6
72 |
73 | ### Result:
74 | | SIMD | Throughput | Code Block Latency |
75 | | ------------- | ------------- | --------------------- |
76 | | SSE4.1 | 62.34Mbps | 135.52μs |
77 | | AVX2 | 130.23Mbps | 64.87μs |
78 | | AVX512 | 223.29Mbps | 37.83μs |
79 |
--------------------------------------------------------------------------------
/example/test_multicore/Makefile:
--------------------------------------------------------------------------------
1 | CC = icc
2 |
3 | PWD = ../..
4 |
5 | SRCS = $(wildcard *.c $(PWD)/src/*.c)
6 |
7 | OBJS = $(SRCS:.c = .o)
8 |
9 | INCLUDES = -I$(PWD)/inc
10 |
11 | LIBS = -lm -lpthread -lmkl_rt -fopenmp $(PWD)/lib/*.a
12 |
13 | CCFLAGS = -Wall -O3 -march=core-avx512 -std=c99 -g
14 |
15 | OUTPUT = main
16 |
17 | all:$(OUTPUT)
18 |
19 | $(OUTPUT) : $(OBJS)
20 | $(CC) $^ -o $@ $(INCLUDES) $(LIBS)
21 |
22 | %.o : %.c
23 | $(CC) -c $< $(CCFLAGS)
24 |
25 | clean:
26 | rm -rf main *.o *.txt #清除中间文件及生成文件
27 |
28 | .PHONY:clean
29 |
--------------------------------------------------------------------------------
/example/test_multicore/test_multicore.c:
--------------------------------------------------------------------------------
1 | // version 3.1
2 | #include "simd_ldpc.h"
3 | #include "simd_bit.h"
4 | #include "crc.h"
5 | #include "bit.h"
6 | #include "thread_pool.h"
7 |
8 | #ifndef _GNU_SOURCE
9 | #define _GNU_SOURCE
10 | #include
11 | #endif
12 |
13 | #include
14 | #include
15 | #include
16 | #include
17 | #include
18 | #include
19 | #include
20 | #include
21 |
22 | #if defined(_MSC_VER)
23 | #include
24 | #else
25 | #include
26 | #endif
27 |
28 | #define CORE_NUM 18
29 | #define TEST_CORE_NUM 18
30 |
31 | #define CRC_24A 0x1864CFB
32 | #define BLOCK_SIZE 10000
33 |
34 | pthread_mutex_t mutex;
35 | pthread_mutex_t demutex;
36 | volatile int cnt;
37 | double decode_run_time;
38 |
39 | typedef struct ldpc_decoder_thrd_t
40 | {
41 | const float *llr;
42 | nr5g_ldpc_simd_t *h;
43 | int32_t I_max;
44 | float coef;
45 | int32_t decoder_mode;
46 | int8_t *decoded_bits;
47 | float *decoded_llr;
48 | } ldpc_decoder_thrd_t;
49 |
50 | void ldpc_decoder_thrd(void *arg)
51 | {
52 | #if defined(_MSC_VER)
53 | LARGE_INTEGER num;
54 | long long start, end, freq;
55 | #else
56 | struct timeval start, end;
57 | long timeuse;
58 | #endif
59 | ldpc_decoder_thrd_t *h = (ldpc_decoder_thrd_t *)arg;
60 |
61 | #if defined(_MSC_VER)
62 | QueryPerformanceFrequency(&num);
63 | freq = num.QuadPart;
64 | QueryPerformanceCounter(&num);
65 | start = num.QuadPart;
66 | #else
67 | gettimeofday(&start, NULL);
68 | #endif
69 | for (int i = 0; i < BLOCK_SIZE; ++i)
70 | nr5g_ldpc_simd_decoder(h->llr, h->h, h->I_max, h->coef, h->decoder_mode, EARLY_STOP_OFF, h->decoded_bits, h->decoded_llr);
71 | #if defined(_MSC_VER)
72 | QueryPerformanceCounter(&num);
73 | end = num.QuadPart;
74 | pthread_mutex_lock(&demutex);
75 | decode_run_time += (double)(end - start) / freq;
76 | pthread_mutex_unlock(&demutex);
77 | #else
78 | gettimeofday(&end, NULL);
79 | timeuse = 1000000 * (end.tv_sec - start.tv_sec) + end.tv_usec - start.tv_usec;
80 | pthread_mutex_lock(&demutex);
81 | decode_run_time += (double)timeuse / 1000000.0;
82 | pthread_mutex_unlock(&demutex);
83 | #endif
84 | pthread_mutex_lock(&mutex);
85 | cnt++;
86 | pthread_mutex_unlock(&mutex);
87 | }
88 |
89 | #define B_NUM 2
90 | #define R_NUM 3
91 | #define EBN0_SIZE 6
92 |
93 | int main()
94 | {
95 | int B_list[B_NUM] = {8448, 3840};
96 | int R_list[R_NUM] = {853, 768, 512};
97 | int core_list[TEST_CORE_NUM] = {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18};
98 | int j, k;
99 | FILE *fp, *fq;
100 | fp = fopen("mutlicore_tp.txt", "a");
101 | fq = fopen("mutlicore_latency.txt", "a");
102 | for (int indx_core = 0; indx_core < TEST_CORE_NUM; indx_core++)
103 | {
104 | printf("==================================================\n");
105 | printf("CORE NUM: %d\n", core_list[indx_core]);
106 | printf("--------------------------------------------------\n");
107 | for (int j = 0; j < B_NUM; j++)
108 | for (int k = 0; k < R_NUM; k++)
109 | {
110 | double encode_run_time;
111 | #if defined(_MSC_VER)
112 | LARGE_INTEGER num;
113 | long long start, end, freq;
114 | #else
115 | struct timeval start, end;
116 | long timeuse;
117 | #endif
118 | double avg_tp, avg_latency;
119 |
120 | int32_t i, indx_block, indx_ebn0, sum_err_bits, test_size;
121 | int32_t B, R, I_max, decoder_mode;
122 | float coef;
123 | nr5g_ldpc_simd_t *ldpc_arg[CORE_NUM];
124 | VSLStreamStatePtr stream_g;
125 | VSLStreamStatePtr stream_b;
126 | int8_t *info_bits[CORE_NUM];
127 | int8_t *info_byte[CORE_NUM];
128 | int8_t *rmed_bits[CORE_NUM];
129 | float *mapped_sig[CORE_NUM];
130 | float *noise[CORE_NUM];
131 | float *llr[CORE_NUM];
132 | float *decoded_llr[CORE_NUM];
133 | int8_t *decbs_bits[CORE_NUM];
134 | float EbN0, sigma2, sigma;
135 | int32_t err_bits[BLOCK_SIZE];
136 |
137 | ldpc_decoder_thrd_t *ldpct[CORE_NUM];
138 | pool_init(0, core_list[indx_core], 0);
139 |
140 | nr5g_crc_t crc_t;
141 | nr5g_crc_init(&crc_t, CRC_24A, 24);
142 |
143 | /* set parameters */
144 | B = B_list[j];
145 | R = R_list[k];
146 | I_max = 10;
147 | decoder_mode = DECODER_MODE_OMS;
148 |
149 | switch (decoder_mode)
150 | {
151 | case DECODER_MODE_OMS:
152 | coef = (float)2 / 4; // beta for oms
153 | break;
154 | case DECODER_MODE_NMS:
155 | coef = (float)25 / 32; // alpha for nms
156 | break;
157 | default:
158 | printf("ERROR(main): SIMD MODE %d IS NOT EXISTED.\n", decoder_mode);
159 | exit(0);
160 | break;
161 | }
162 |
163 | float EbN0_list[EBN0_SIZE] = {0.0, 1.0, 2.0, 3.0, 4.0, 5.0};
164 | test_size = EBN0_SIZE;
165 | pthread_mutex_init(&demutex, NULL);
166 | pthread_mutex_init(&mutex, NULL);
167 | cnt = 0;
168 |
169 | /* initialize */
170 | for (int c = 0; c < core_list[indx_core]; ++c)
171 | {
172 | ldpc_arg[c] = (nr5g_ldpc_simd_t *)malloc(sizeof(nr5g_ldpc_simd_t));
173 | nr5g_ldpc_simd_init(ldpc_arg[c], B, R, SIMD_MODE_AUTO);
174 | ldpc_arg[c]->crc_t = &crc_t;
175 |
176 | info_bits[c] = (int8_t *)malloc(sizeof(int8_t) * ldpc_arg[c]->B);
177 | info_byte[c] = (int8_t *)malloc(sizeof(int8_t) * ldpc_arg[c]->B / 8);
178 | rmed_bits[c] = (int8_t *)malloc(sizeof(int8_t) * ldpc_arg[c]->G);
179 | mapped_sig[c] = (float *)malloc(sizeof(float) * ldpc_arg[c]->G);
180 | noise[c] = (float *)malloc(sizeof(float) * ldpc_arg[c]->G);
181 | llr[c] = (float *)malloc(sizeof(float) * ldpc_arg[c]->G);
182 | decoded_llr[c] = (float *)malloc(sizeof(float) * ldpc_arg[c]->G);
183 | decbs_bits[c] = (int8_t *)malloc(sizeof(int8_t) * ldpc_arg[c]->B);
184 |
185 | ldpct[c] = (ldpc_decoder_thrd_t *)malloc(sizeof(ldpc_decoder_thrd_t));
186 | ldpct[c]->llr = ldpc_arg[c]->rdmed_llr;
187 | ldpct[c]->h = ldpc_arg[c];
188 | ldpct[c]->I_max = I_max;
189 | ldpct[c]->coef = coef;
190 | ldpct[c]->decoder_mode = decoder_mode;
191 | ldpct[c]->decoded_bits = ldpc_arg[c]->decoded_bits;
192 | ldpct[c]->decoded_llr = decoded_llr[c];
193 | }
194 |
195 | vslNewStream(&stream_g, VSL_BRNG_MCG31, 0);
196 | vslNewStream(&stream_b, VSL_BRNG_MCG31, 1);
197 |
198 | avg_tp = 0.0;
199 | avg_latency = 0.0;
200 |
201 | /* test loop start */
202 | for (indx_ebn0 = 0; indx_ebn0 < test_size; indx_ebn0++)
203 | {
204 | EbN0 = EbN0_list[indx_ebn0];
205 | // sigma2 = (float)(1 / (pow(10, (double)EbN0 / 10) * 2 * R / 1024));
206 | sigma2 = (float)(1 / pow(10, (double)EbN0 / 10));
207 | sigma = (float)sqrt(sigma2);
208 | sum_err_bits = 0;
209 | encode_run_time = 0.0;
210 | decode_run_time = 0.0;
211 |
212 | for (indx_block = 0; indx_block < 1; indx_block++)
213 | {
214 | err_bits[indx_block] = 0;
215 |
216 | /* generate random tbs */
217 | for (int c = 0; c < core_list[indx_core]; ++c)
218 | {
219 | /* generate random tbs */
220 | viRngUniformBits(VSL_RNG_METHOD_UNIFORMBITS_STD, stream_b, (B - 1) / 32 + 1, (uint32_t *)info_byte[c]);
221 | nr5g_crc_attach_byte(&crc_t, info_byte[c], B - 24);
222 | nr5g_bit_unpack_vector(info_byte[c], info_bits[c], B);
223 |
224 | /* cbs */
225 | nr5g_ldpc_simd_cbs(info_bits[c], ldpc_arg[c], ldpc_arg[c]->cbs_bits);
226 |
227 | /* encode */
228 | #if defined(_MSC_VER)
229 | QueryPerformanceFrequency(&num);
230 | freq = num.QuadPart;
231 | QueryPerformanceCounter(&num);
232 | start = num.QuadPart;
233 | #else
234 | gettimeofday(&start, NULL);
235 | #endif
236 | nr5g_ldpc_simd_encoder(ldpc_arg[c]->cbs_bits, ldpc_arg[c], ldpc_arg[c]->coded_bits);
237 | #if defined(_MSC_VER)
238 | QueryPerformanceCounter(&num);
239 | end = num.QuadPart;
240 | encode_run_time += (double)(end - start) / freq;
241 | #else
242 | gettimeofday(&end, NULL);
243 | timeuse = 1000000 * (end.tv_sec - start.tv_sec) + end.tv_usec - start.tv_usec;
244 | encode_run_time += (double)timeuse / 1000000.0;
245 | #endif
246 |
247 | /* rate matching */
248 | nr5g_ldpc_simd_rate_matching(ldpc_arg[c]->coded_bits, ldpc_arg[c], rmed_bits[c]);
249 |
250 | /* BPSK map */
251 | for (i = 0; i < ldpc_arg[c]->G; i++)
252 | mapped_sig[c][i] = (float)2 * rmed_bits[c][i] - 1;
253 |
254 | /* pass AWGN channel */
255 | vsRngGaussian(VSL_RNG_METHOD_GAUSSIANMV_BOXMULLER, stream_g, ldpc_arg[c]->G, noise[c], 0.0, sigma);
256 | for (i = 0; i < ldpc_arg[c]->G; i++)
257 | llr[c][i] = 2 * (mapped_sig[c][i] + noise[c][i]) / sigma2;
258 |
259 | /* rate dematching */
260 | nr5g_ldpc_simd_rate_dematching(llr[c], ldpc_arg[c], ldpc_arg[c]->rdmed_llr);
261 | }
262 |
263 | /* decode */
264 | for (int c = 0; c < core_list[indx_core]; ++c)
265 | pool_add_task(ldpc_decoder_thrd, (void *)ldpct[c], 0);
266 | while (cnt < core_list[indx_core])
267 | ;
268 | cnt = 0;
269 |
270 | for (int c = 0; c < core_list[indx_core]; ++c)
271 | {
272 | /* decbs */
273 | nr5g_ldpc_simd_decbs(ldpc_arg[c]->decoded_bits, ldpc_arg[c], decbs_bits[c]);
274 |
275 | /* statistics */
276 | for (i = 0; i < ldpc_arg[c]->B; i++)
277 | err_bits[indx_block] += (info_bits[c][i] == decbs_bits[c][i] ? 0 : 1);
278 |
279 | sum_err_bits += err_bits[indx_block];
280 | }
281 | }
282 |
283 | /* print results */
284 | avg_tp += (double)B * BLOCK_SIZE * core_list[indx_core] / (decode_run_time / core_list[indx_core]) / 1e6;
285 | avg_latency += decode_run_time / core_list[indx_core] * 1e6 / BLOCK_SIZE;
286 | }
287 |
288 | avg_tp /= test_size;
289 | avg_latency /= test_size;
290 | printf("B = %d, R = %d\n", B, R);
291 | printf("Average Throughput:\t%.2lfMbps\n", avg_tp);
292 | printf("Average Latency:\t%.2lfus\n", avg_latency);
293 |
294 | fprintf(fp, "%.2lf\t", avg_tp);
295 | fprintf(fq, "%.2lf\t", avg_latency);
296 | printf("--------------------------------------------------\n");
297 |
298 | for (int c = 0; c < core_list[indx_core]; ++c)
299 | {
300 | free(info_byte[c]);
301 | free(info_bits[c]);
302 | free(rmed_bits[c]);
303 | free(mapped_sig[c]);
304 | free(noise[c]);
305 | free(llr[c]);
306 | free(decoded_llr[c]);
307 | free(decbs_bits[c]);
308 | free_nr5g_ldpc_simd_t(ldpc_arg[c]);
309 | }
310 | pool_destroy(0);
311 | }
312 | fprintf(fp, "\n");
313 | fprintf(fq, "\n");
314 | printf("==================================================\n\n");
315 | }
316 |
317 | fclose(fp);
318 | fclose(fq);
319 |
320 | return 0;
321 | }
--------------------------------------------------------------------------------
/example/test_simd/Makefile:
--------------------------------------------------------------------------------
1 | CC = icc
2 |
3 | PWD = ../..
4 |
5 | SRCS = $(wildcard *.c $(PWD)/src/*.c)
6 |
7 | OBJS = $(SRCS:.c = .o)
8 |
9 | INCLUDES = -I$(PWD)/inc
10 |
11 | LIBS = -lm -lpthread -lmkl_rt -fopenmp $(PWD)/lib/*.a
12 |
13 | CCFLAGS = -Wall -O3 -march=core-avx512 -std=c99 -g
14 |
15 | OUTPUT = main
16 |
17 | all:$(OUTPUT)
18 |
19 | $(OUTPUT) : $(OBJS)
20 | $(CC) $^ -o $@ $(INCLUDES) $(LIBS)
21 |
22 | %.o : %.c
23 | $(CC) -c $< $(CCFLAGS)
24 |
25 | clean:
26 | rm -rf main *.o *.txt #清除中间文件及生成文件
27 |
28 | .PHONY:clean
29 |
--------------------------------------------------------------------------------
/example/test_simd/test_simd.c:
--------------------------------------------------------------------------------
1 | // version 3.1
2 | #include "simd_ldpc.h"
3 | #include "simd_bit.h"
4 | #include "bit.h"
5 | #include "crc.h"
6 |
7 | #include
8 | #include
9 | #include
10 | #include
11 | #include
12 | #include
13 | #include
14 | #include
15 |
16 | #if defined(_MSC_VER)
17 | #include
18 | #else
19 | #include
20 | #endif
21 |
22 | #define CRC_24A 0x1864CFB
23 | #define BLOCK_SIZE 10000
24 |
25 | #define B_NUM 2
26 | #define R_NUM 3
27 | #define SIMD_MODE_NUM 3
28 | #define EBN0_SIZE 21
29 |
30 | int main()
31 | {
32 | /* test parameters */
33 | int simd_list[SIMD_MODE_NUM] = {SIMD_MODE_SSE, SIMD_MODE_AVX2, SIMD_MODE_AVX512};
34 | int B_list[B_NUM] = {8448, 3840};
35 | int R_list[R_NUM] = {853, 768, 512};
36 | float EbN0_list[EBN0_SIZE] = {0.0, 0.5, 1.0, 1.5, 2.0, 2.5, 3.0, 3.5, 4.0, 4.5, 5.0, 5.5, 6.0, 6.5, 7.0, 7.5, 8.0, 8.5, 9.0, 9.5, 10.0};
37 | int indx_B, indx_R, indx_block, indx_ebn0, indx_simd;
38 |
39 | /* file parameters */
40 | FILE *fber[SIMD_MODE_NUM], *fbler[SIMD_MODE_NUM], *ftp[SIMD_MODE_NUM], *fla[SIMD_MODE_NUM], *fatp[SIMD_MODE_NUM], *fala[SIMD_MODE_NUM];
41 |
42 | fber[0] = fopen("sse_BER.txt", "a");
43 | fbler[0] = fopen("sse_BLER.txt", "a");
44 | ftp[0] = fopen("sse_tp.txt", "a");
45 | fla[0] = fopen("sse_latency.txt", "a");
46 | fatp[0] = fopen("sse_avg_tp.txt", "a");
47 | fala[0] = fopen("sse_avg_latency.txt", "a");
48 | fber[1] = fopen("avx2_BER.txt", "a");
49 | fbler[1] = fopen("avx2_BLER.txt", "a");
50 | ftp[1] = fopen("avx2_tp.txt", "a");
51 | fla[1] = fopen("avx2_latency.txt", "a");
52 | fatp[1] = fopen("avx2_avg_tp.txt", "a");
53 | fala[1] = fopen("avx2_avg_latency.txt", "a");
54 | fber[2] = fopen("avx512_BER.txt", "a");
55 | fbler[2] = fopen("avx512_BLER.txt", "a");
56 | ftp[2] = fopen("avx512_tp.txt", "a");
57 | fla[2] = fopen("avx512_latency.txt", "a");
58 | fatp[2] = fopen("avx512_avg_tp.txt", "a");
59 | fala[2] = fopen("avx512_avg_latency.txt", "a");
60 |
61 |
62 | for (indx_B = 0; indx_B < B_NUM; indx_B++)
63 | for (indx_R = 0; indx_R < R_NUM; indx_R++)
64 | for (indx_simd = 0; indx_simd < SIMD_MODE_NUM; indx_simd++)
65 | {
66 | printf("==================================================\n");
67 | printf("B:%d\tR:%d\t%s\n", B_list[indx_B], R_list[indx_R],
68 | (simd_list[indx_simd] == SIMD_MODE_SSE ? "SIMD_MODE_SSE" : (simd_list[indx_simd] == SIMD_MODE_AVX2 ? "SIMD_MODE_AVX2" : "SIMD_MODE_AVX512")));
69 | printf("--------------------------------------------------\n");
70 | /* time parameters */
71 | double encode_run_time;
72 | double decode_run_time;
73 | #if defined(_MSC_VER)
74 | LARGE_INTEGER num;
75 | long long start, end, freq;
76 | #else
77 | struct timeval start, end;
78 | long timeuse;
79 | #endif
80 | double avg_tp, avg_latency;
81 |
82 | /* mkl stream parameters */
83 | VSLStreamStatePtr stream_g;
84 | VSLStreamStatePtr stream_b;
85 |
86 | int32_t i, sum_err_bits, test_size;
87 | int32_t B, R, I_max, decoder_mode;
88 | float coef;
89 | float EbN0, sigma2, sigma;
90 |
91 | nr5g_ldpc_simd_t *ldpc_arg;
92 | int8_t *info_byte;
93 | int8_t *info_bits;
94 | int8_t *rmed_bits;
95 | float *mapped_sig;
96 | float *noise;
97 | float *llr;
98 | float *decoded_llr;
99 | int8_t *decbs_bits;
100 |
101 | int32_t err_bits[BLOCK_SIZE];
102 | int32_t err_bl;
103 |
104 | nr5g_crc_t crc_t;
105 | nr5g_crc_init(&crc_t, CRC_24A, 24);
106 |
107 | /* set parameters */
108 | B = B_list[indx_B];
109 | R = R_list[indx_R];
110 | I_max = 10;
111 | decoder_mode = DECODER_MODE_OMS;
112 |
113 | switch (decoder_mode)
114 | {
115 | case DECODER_MODE_OMS:
116 | coef = (float)2 / 4; // beta for oms
117 | break;
118 | case DECODER_MODE_NMS:
119 | coef = (float)25 / 32; // alpha for nms
120 | break;
121 | default:
122 | printf("ERROR(main): SIMD MODE %d IS NOT EXISTED.\n", decoder_mode);
123 | exit(0);
124 | break;
125 | }
126 |
127 | test_size = EBN0_SIZE;
128 |
129 | /* initialize */
130 | ldpc_arg = (nr5g_ldpc_simd_t *)malloc(sizeof(nr5g_ldpc_simd_t));
131 | nr5g_ldpc_simd_init(ldpc_arg, B, R, simd_list[indx_simd]);
132 | ldpc_arg->crc_t = &crc_t;
133 |
134 | info_bits = (int8_t *)malloc(sizeof(int8_t) * ldpc_arg->B);
135 | info_byte = (int8_t *)malloc(sizeof(int8_t) * ldpc_arg->B / 8);
136 | rmed_bits = (int8_t *)malloc(sizeof(int8_t) * ldpc_arg->G);
137 | mapped_sig = (float *)malloc(sizeof(float) * ldpc_arg->G);
138 | noise = (float *)malloc(sizeof(float) * ldpc_arg->G);
139 | llr = (float *)malloc(sizeof(float) * ldpc_arg->G);
140 | decoded_llr = (float *)malloc(sizeof(float) * ldpc_arg->G);
141 | decbs_bits = (int8_t *)malloc(sizeof(int8_t) * ldpc_arg->B);
142 |
143 | vslNewStream(&stream_g, VSL_BRNG_MCG31, 0);
144 | vslNewStream(&stream_b, VSL_BRNG_MCG31, 1);
145 |
146 | avg_tp = 0.0;
147 | avg_latency = 0.0;
148 |
149 | /* test loop start */
150 | for (indx_ebn0 = 0; indx_ebn0 < test_size; indx_ebn0++)
151 | {
152 | EbN0 = EbN0_list[indx_ebn0];
153 | sigma2 = (float)(1 / (pow(10, (double)EbN0 / 10) * 2 * R / 1024));
154 | // sigma2 = (float)(1 / pow(10, (double)EbN0 / 10));
155 | sigma = (float)sqrt(sigma2);
156 | sum_err_bits = 0;
157 | err_bl = 0;
158 | encode_run_time = 0.0;
159 | decode_run_time = 0.0;
160 |
161 | for (indx_block = 0; indx_block < BLOCK_SIZE; indx_block++)
162 | {
163 | err_bits[indx_block] = 0;
164 |
165 | /* generate random tbs */
166 | viRngUniformBits(VSL_RNG_METHOD_UNIFORMBITS_STD, stream_b, (B - 1) / 32 + 1, (uint32_t *)info_byte);
167 | nr5g_crc_attach_byte(&crc_t, info_byte, B - 24);
168 | // fast_extend_avx512(info_byte, B / 8, info_bits);
169 | nr5g_bit_unpack_vector(info_byte, info_bits, B);
170 |
171 | /* cbs */
172 | nr5g_ldpc_simd_cbs(info_bits, ldpc_arg, ldpc_arg->cbs_bits);
173 |
174 | /* encode */
175 | #if defined(_MSC_VER)
176 | QueryPerformanceFrequency(&num);
177 | freq = num.QuadPart;
178 | QueryPerformanceCounter(&num);
179 | start = num.QuadPart;
180 | #else
181 | gettimeofday(&start, NULL);
182 | #endif
183 | nr5g_ldpc_simd_encoder(ldpc_arg->cbs_bits, ldpc_arg, ldpc_arg->coded_bits);
184 | #if defined(_MSC_VER)
185 | QueryPerformanceCounter(&num);
186 | end = num.QuadPart;
187 | encode_run_time += (double)(end - start) / freq;
188 | #else
189 | gettimeofday(&end, NULL);
190 | timeuse = 1000000 * (end.tv_sec - start.tv_sec) + end.tv_usec - start.tv_usec;
191 | encode_run_time += (double)timeuse / 1000000.0;
192 | #endif
193 |
194 | /* rate matching */
195 | nr5g_ldpc_simd_rate_matching(ldpc_arg->coded_bits, ldpc_arg, rmed_bits);
196 |
197 | /* BPSK map */
198 | for (i = 0; i < ldpc_arg->G; i++)
199 | mapped_sig[i] = (float)2 * rmed_bits[i] - 1;
200 |
201 | /* pass AWGN channel */
202 | vsRngGaussian(VSL_RNG_METHOD_GAUSSIANMV_BOXMULLER, stream_g, ldpc_arg->G, noise, 0.0, sigma);
203 | for (i = 0; i < ldpc_arg->G; i++)
204 | llr[i] = 2 * (mapped_sig[i] + noise[i]) / sigma2;
205 |
206 | /* rate dematching */
207 | nr5g_ldpc_simd_rate_dematching(llr, ldpc_arg, ldpc_arg->rdmed_llr);
208 |
209 | /* decode */
210 | #if defined(_MSC_VER)
211 | QueryPerformanceFrequency(&num);
212 | freq = num.QuadPart;
213 | QueryPerformanceCounter(&num);
214 | start = num.QuadPart;
215 | #else
216 | gettimeofday(&start, NULL);
217 | #endif
218 |
219 | nr5g_ldpc_simd_decoder(ldpc_arg->rdmed_llr, ldpc_arg, I_max, coef, decoder_mode, EARLY_STOP_OFF, ldpc_arg->decoded_bits, decoded_llr);
220 |
221 | #if defined(_MSC_VER)
222 | QueryPerformanceCounter(&num);
223 | end = num.QuadPart;
224 | decode_run_time += (double)(end - start) / freq;
225 | #else
226 | gettimeofday(&end, NULL);
227 | timeuse = 1000000 * (end.tv_sec - start.tv_sec) + end.tv_usec - start.tv_usec;
228 | decode_run_time += (double)timeuse / 1000000.0;
229 | #endif
230 |
231 | /* decbs */
232 | nr5g_ldpc_simd_decbs(ldpc_arg->decoded_bits, ldpc_arg, decbs_bits);
233 |
234 | /* statistics */
235 | for (i = 0; i < ldpc_arg->B; i++)
236 | err_bits[indx_block] += (info_bits[i] == decbs_bits[i] ? 0 : 1);
237 | if (err_bits[indx_block])
238 | err_bl++;
239 |
240 | sum_err_bits += err_bits[indx_block];
241 | }
242 |
243 | /* print results */
244 | float ber = (float)sum_err_bits / (B * BLOCK_SIZE);
245 | float bler = (float)err_bl / BLOCK_SIZE;
246 | printf("Eb/N0:%.2f:\tBER:%.2e(%d/%d)\tBLER:%.2e(%d/%d)\n", EbN0_list[indx_ebn0], ber, sum_err_bits, B * BLOCK_SIZE, bler, err_bl, BLOCK_SIZE);
247 | printf("encode_Latency:%lfus\n", encode_run_time / BLOCK_SIZE * 1e6);
248 | printf("encode_Throughput:%.2lfMbps\n", (double)B * BLOCK_SIZE / encode_run_time / 1e6);
249 | printf("decode_Latency:%lfus\n", decode_run_time / BLOCK_SIZE * 1e6);
250 | printf("decode_Throughput:%.2lfMbps\n", (double)B * BLOCK_SIZE / decode_run_time / 1e6);
251 | printf("--------------------------------------------------\n");
252 | fprintf(fber[indx_simd], "%.2e\t", ber);
253 | fprintf(fbler[indx_simd], "%.2e\t", bler);
254 | fprintf(ftp[indx_simd], "%.2lf\t", (double)B * BLOCK_SIZE / decode_run_time / 1e6);
255 | fprintf(fla[indx_simd], "%.2lf\t", (double)decode_run_time * 1e6 / BLOCK_SIZE);
256 |
257 | avg_tp += (double)B * BLOCK_SIZE / decode_run_time / 1e6;
258 | avg_latency += decode_run_time * 1e6 / BLOCK_SIZE;
259 | }
260 |
261 | avg_tp /= test_size;
262 | avg_latency /= test_size;
263 | fprintf(fatp[indx_simd], "%.2lf\t", avg_tp);
264 | fprintf(fala[indx_simd], "%.2lf\t", avg_latency);
265 |
266 | printf("B:%d\tR:%d\t%s\n", B_list[indx_B], R_list[indx_R], (indx_simd == 1 ? "SIMD_MODE_SSE" : (indx_simd == 2 ? "SIMD_MODE_AVX2" : "SIMD_MODE_AVX512")));
267 | printf("Average Throughput:\t%.2lfMbps\n", avg_tp);
268 | printf("Average Latency:\t%.2lfus\n", avg_latency);
269 | printf("==================================================\n\n");
270 |
271 | fprintf(fber[indx_simd], "\n");
272 | fprintf(fbler[indx_simd], "\n");
273 | fprintf(ftp[indx_simd], "\n");
274 | fprintf(fla[indx_simd], "\n");
275 |
276 | free(info_byte);
277 | free(info_bits);
278 | free(rmed_bits);
279 | free(mapped_sig);
280 | free(noise);
281 | free(llr);
282 | free(decoded_llr);
283 | free(decbs_bits);
284 | free_nr5g_ldpc_simd_t(ldpc_arg);
285 | }
286 | for (indx_simd = 0; indx_simd < SIMD_MODE_NUM; ++indx_simd)
287 | {
288 | fprintf(fber[indx_simd], "\n");
289 | fprintf(fbler[indx_simd], "\n");
290 | fprintf(ftp[indx_simd], "\n");
291 | fprintf(fla[indx_simd], "\n");
292 | fprintf(fatp[indx_simd], "\n");
293 | fprintf(fala[indx_simd], "\n");
294 | fclose(fber[indx_simd]);
295 | fclose(fbler[indx_simd]);
296 | fclose(ftp[indx_simd]);
297 | fclose(fla[indx_simd]);
298 | }
299 | return 0;
300 | }
--------------------------------------------------------------------------------
/example/test_stop/Makefile:
--------------------------------------------------------------------------------
1 | CC = icc
2 |
3 | PWD = ../..
4 |
5 | SRCS = $(wildcard *.c $(PWD)/src/*.c)
6 |
7 | OBJS = $(SRCS:.c = .o)
8 |
9 | INCLUDES = -I$(PWD)/inc
10 |
11 | LIBS = -lm -lpthread -lmkl_rt -fopenmp $(PWD)/lib/*.a
12 |
13 | CCFLAGS = -Wall -O3 -march=core-avx512 -std=c99 -g
14 |
15 | OUTPUT = main
16 |
17 | all:$(OUTPUT)
18 |
19 | $(OUTPUT) : $(OBJS)
20 | $(CC) $^ -o $@ $(INCLUDES) $(LIBS)
21 |
22 | %.o : %.c
23 | $(CC) -c $< $(CCFLAGS)
24 |
25 | clean:
26 | rm -rf main *.o *.txt #清除中间文件及生成文件
27 |
28 | .PHONY:clean
29 |
--------------------------------------------------------------------------------
/example/test_stop/test_stop.c:
--------------------------------------------------------------------------------
1 | // version 3.1
2 | #include "simd_ldpc.h"
3 | #include "simd_bit.h"
4 | #include "bit.h"
5 | #include "crc.h"
6 |
7 | #include
8 | #include
9 | #include
10 | #include
11 | #include
12 | #include
13 | #include
14 | #include
15 |
16 | #if defined(_MSC_VER)
17 | #include
18 | #else
19 | #include
20 | #endif
21 |
22 | #define CRC_24A 0x1864CFB
23 | #define BLOCK_SIZE 10000
24 |
25 | #define B_NUM 2
26 | #define R_NUM 3
27 | #define EARLY_STOP_NUM 2
28 | #define EBN0_SIZE 21
29 |
30 | int main()
31 | {
32 | /* test parameters */
33 | int es_list[EARLY_STOP_NUM] = {EARLY_STOP_OFF, EARLY_STOP_ON};
34 | int B_list[B_NUM] = {8448, 3840};
35 | int R_list[R_NUM] = {853, 768, 512};
36 | float EbN0_list[EBN0_SIZE] = {0.0, 0.5, 1.0, 1.5, 2.0, 2.5, 3.0, 3.5, 4.0, 4.5, 5.0, 5.5, 6.0, 6.5, 7.0, 7.5, 8.0, 8.5, 9.0, 9.5, 10.0};
37 | int indx_B, indx_R, indx_block, indx_ebn0, indx_es;
38 |
39 | /* file parameters */
40 | FILE *fber[EARLY_STOP_NUM], *fbler[EARLY_STOP_NUM], *ftp[EARLY_STOP_NUM], *fla[EARLY_STOP_NUM], *fatp[EARLY_STOP_NUM], *fala[EARLY_STOP_NUM];
41 | fber[0] = fopen("nostop_BER.txt", "a");
42 | fbler[0] = fopen("nostop_BLER.txt", "a");
43 | ftp[0] = fopen("nostop_tp.txt", "a");
44 | fla[0] = fopen("nostop_latency.txt", "a");
45 | fatp[0] = fopen("nostop_avg_tp.txt", "a");
46 | fala[0] = fopen("nostop_avg_latency.txt", "a");
47 | fber[1] = fopen("stop_BER.txt", "a");
48 | fbler[1] = fopen("stop_BLER.txt", "a");
49 | ftp[1] = fopen("stop_tp.txt", "a");
50 | fla[1] = fopen("stop_latency.txt", "a");
51 | fatp[1] = fopen("stop_avg_tp.txt", "a");
52 | fala[1] = fopen("stop_avg_latency.txt", "a");
53 |
54 | for (indx_B = 0; indx_B < B_NUM; indx_B++)
55 | for (indx_R = 0; indx_R < R_NUM; indx_R++)
56 | for (indx_es = 0; indx_es < EARLY_STOP_NUM; indx_es++)
57 | {
58 | printf("==================================================\n");
59 | printf("B:%d\tR:%d\t%s\n", B_list[indx_B], R_list[indx_R],
60 | (es_list[indx_es] == EARLY_STOP_ON ? "EARLY_STOP_ON" : "EARLY_STOP_OFF"));
61 | printf("--------------------------------------------------\n");
62 | /* time parameters */
63 | double encode_run_time;
64 | double decode_run_time;
65 | #if defined(_MSC_VER)
66 | LARGE_INTEGER num;
67 | long long start, end, freq;
68 | #else
69 | struct timeval start, end;
70 | long timeuse;
71 | #endif
72 | double avg_tp, avg_latency;
73 |
74 | /* mkl stream parameters */
75 | VSLStreamStatePtr stream_g;
76 | VSLStreamStatePtr stream_b;
77 |
78 | int32_t i, sum_err_bits, test_size;
79 | int32_t B, R, I_max, decoder_mode;
80 | float coef;
81 | float EbN0, sigma2, sigma;
82 |
83 | nr5g_ldpc_simd_t *ldpc_arg;
84 | int8_t *info_byte;
85 | int8_t *info_bits;
86 | int8_t *rmed_bits;
87 | float *mapped_sig;
88 | float *noise;
89 | float *llr;
90 | float *decoded_llr;
91 | int8_t *decbs_bits;
92 |
93 | int32_t err_bits[BLOCK_SIZE];
94 | int32_t err_bl;
95 |
96 | nr5g_crc_t crc_t;
97 | nr5g_crc_init(&crc_t, CRC_24A, 24);
98 |
99 | /* set parameters */
100 | B = B_list[indx_B];
101 | R = R_list[indx_R];
102 | I_max = 10;
103 | decoder_mode = DECODER_MODE_OMS;
104 |
105 | switch (decoder_mode)
106 | {
107 | case DECODER_MODE_OMS:
108 | coef = (float)2 / 4; // beta for oms
109 | break;
110 | case DECODER_MODE_NMS:
111 | coef = (float)25 / 32; // alpha for nms
112 | break;
113 | default:
114 | printf("ERROR(main): SIMD MODE %d IS NOT EXISTED.\n", decoder_mode);
115 | exit(0);
116 | break;
117 | }
118 |
119 | test_size = EBN0_SIZE;
120 |
121 | /* initialize */
122 | ldpc_arg = (nr5g_ldpc_simd_t *)malloc(sizeof(nr5g_ldpc_simd_t));
123 | nr5g_ldpc_simd_init(ldpc_arg, B, R, SIMD_MODE_AUTO);
124 | ldpc_arg->crc_t = &crc_t;
125 |
126 | info_bits = (int8_t *)malloc(sizeof(int8_t) * ldpc_arg->B);
127 | info_byte = (int8_t *)malloc(sizeof(int8_t) * ldpc_arg->B / 8);
128 | rmed_bits = (int8_t *)malloc(sizeof(int8_t) * ldpc_arg->G);
129 | mapped_sig = (float *)malloc(sizeof(float) * ldpc_arg->G);
130 | noise = (float *)malloc(sizeof(float) * ldpc_arg->G);
131 | llr = (float *)malloc(sizeof(float) * ldpc_arg->G);
132 | decoded_llr = (float *)malloc(sizeof(float) * ldpc_arg->G);
133 | decbs_bits = (int8_t *)malloc(sizeof(int8_t) * ldpc_arg->B);
134 |
135 | vslNewStream(&stream_g, VSL_BRNG_MCG31, 0);
136 | vslNewStream(&stream_b, VSL_BRNG_MCG31, 1);
137 |
138 | avg_tp = 0.0;
139 | avg_latency = 0.0;
140 |
141 | /* test loop start */
142 | for (indx_ebn0 = 0; indx_ebn0 < test_size; indx_ebn0++)
143 | {
144 | EbN0 = EbN0_list[indx_ebn0];
145 | sigma2 = (float)(1 / (pow(10, (double)EbN0 / 10) * 2 * R / 1024));
146 | // sigma2 = (float)(1 / pow(10, (double)EbN0 / 10));
147 | sigma = (float)sqrt(sigma2);
148 | sum_err_bits = 0;
149 | err_bl = 0;
150 | encode_run_time = 0.0;
151 | decode_run_time = 0.0;
152 |
153 | for (indx_block = 0; indx_block < BLOCK_SIZE; indx_block++)
154 | {
155 | err_bits[indx_block] = 0;
156 |
157 | /* generate random tbs */
158 | viRngUniformBits(VSL_RNG_METHOD_UNIFORMBITS_STD, stream_b, (B - 1) / 32 + 1, (uint32_t *)info_byte);
159 | nr5g_crc_attach_byte(&crc_t, info_byte, B - 24);
160 | // fast_extend_avx512(info_byte, B / 8, info_bits);
161 | nr5g_bit_unpack_vector(info_byte, info_bits, B);
162 |
163 | /* cbs */
164 | nr5g_ldpc_simd_cbs(info_bits, ldpc_arg, ldpc_arg->cbs_bits);
165 |
166 | /* encode */
167 | #if defined(_MSC_VER)
168 | QueryPerformanceFrequency(&num);
169 | freq = num.QuadPart;
170 | QueryPerformanceCounter(&num);
171 | start = num.QuadPart;
172 | #else
173 | gettimeofday(&start, NULL);
174 | #endif
175 | nr5g_ldpc_simd_encoder(ldpc_arg->cbs_bits, ldpc_arg, ldpc_arg->coded_bits);
176 | #if defined(_MSC_VER)
177 | QueryPerformanceCounter(&num);
178 | end = num.QuadPart;
179 | encode_run_time += (double)(end - start) / freq;
180 | #else
181 | gettimeofday(&end, NULL);
182 | timeuse = 1000000 * (end.tv_sec - start.tv_sec) + end.tv_usec - start.tv_usec;
183 | encode_run_time += (double)timeuse / 1000000.0;
184 | #endif
185 |
186 | /* rate matching */
187 | nr5g_ldpc_simd_rate_matching(ldpc_arg->coded_bits, ldpc_arg, rmed_bits);
188 |
189 | /* BPSK map */
190 | for (i = 0; i < ldpc_arg->G; i++)
191 | mapped_sig[i] = (float)2 * rmed_bits[i] - 1;
192 |
193 | /* pass AWGN channel */
194 | vsRngGaussian(VSL_RNG_METHOD_GAUSSIANMV_BOXMULLER, stream_g, ldpc_arg->G, noise, 0.0, sigma);
195 | for (i = 0; i < ldpc_arg->G; i++)
196 | llr[i] = 2 * (mapped_sig[i] + noise[i]) / sigma2;
197 |
198 | /* rate dematching */
199 | nr5g_ldpc_simd_rate_dematching(llr, ldpc_arg, ldpc_arg->rdmed_llr);
200 |
201 | /* decode */
202 | #if defined(_MSC_VER)
203 | QueryPerformanceFrequency(&num);
204 | freq = num.QuadPart;
205 | QueryPerformanceCounter(&num);
206 | start = num.QuadPart;
207 | #else
208 | gettimeofday(&start, NULL);
209 | #endif
210 |
211 | nr5g_ldpc_simd_decoder(ldpc_arg->rdmed_llr, ldpc_arg, I_max, coef, decoder_mode, es_list[indx_es], ldpc_arg->decoded_bits, decoded_llr);
212 |
213 | #if defined(_MSC_VER)
214 | QueryPerformanceCounter(&num);
215 | end = num.QuadPart;
216 | decode_run_time += (double)(end - start) / freq;
217 | #else
218 | gettimeofday(&end, NULL);
219 | timeuse = 1000000 * (end.tv_sec - start.tv_sec) + end.tv_usec - start.tv_usec;
220 | decode_run_time += (double)timeuse / 1000000.0;
221 | #endif
222 |
223 | /* decbs */
224 | nr5g_ldpc_simd_decbs(ldpc_arg->decoded_bits, ldpc_arg, decbs_bits);
225 |
226 | /* statistics */
227 | for (i = 0; i < ldpc_arg->B; i++)
228 | err_bits[indx_block] += (info_bits[i] == decbs_bits[i] ? 0 : 1);
229 | if (err_bits[indx_block])
230 | err_bl++;
231 |
232 | sum_err_bits += err_bits[indx_block];
233 | }
234 |
235 | /* print results */
236 | float ber = (float)sum_err_bits / (B * BLOCK_SIZE);
237 | float bler = (float)err_bl / BLOCK_SIZE;
238 | printf("Eb/N0:%.2f:\tBER:%.2e(%d/%d)\tBLER:%.2e(%d/%d)\n", EbN0_list[indx_ebn0], ber, sum_err_bits, B * BLOCK_SIZE, bler, err_bl, BLOCK_SIZE);
239 | printf("encode_Latency:%lfus\n", encode_run_time / BLOCK_SIZE * 1e6);
240 | printf("encode_Throughput:%.2lfMbps\n", (double)B * BLOCK_SIZE / encode_run_time / 1e6);
241 | printf("decode_Latency:%lfus\n", decode_run_time / BLOCK_SIZE * 1e6);
242 | printf("decode_Throughput:%.2lfMbps\n", (double)B * BLOCK_SIZE / decode_run_time / 1e6);
243 | printf("--------------------------------------------------\n");
244 | fprintf(fber[indx_es], "%.2e\t", ber);
245 | fprintf(fbler[indx_es], "%.2e\t", bler);
246 | fprintf(ftp[indx_es], "%.2lf\t", (double)B * BLOCK_SIZE / decode_run_time / 1e6);
247 | fprintf(fla[indx_es], "%.2lf\t", (double)decode_run_time * 1e6 / BLOCK_SIZE);
248 |
249 | avg_tp += (double)B * BLOCK_SIZE / decode_run_time / 1e6;
250 | avg_latency += decode_run_time * 1e6 / BLOCK_SIZE;
251 | }
252 |
253 | avg_tp /= test_size;
254 | avg_latency /= test_size;
255 | fprintf(fatp[indx_es], "%.2lf\t", avg_tp);
256 | fprintf(fala[indx_es], "%.2lf\t", avg_latency);
257 |
258 | printf("B:%d\tR:%d\t%s\n", B_list[indx_B], R_list[indx_R], (indx_es ? "EARLY_STOP_ON" : "EARLY_STOP_OFF"));
259 | printf("Average Throughput:\t%.2lfMbps\n", avg_tp);
260 | printf("Average Latency:\t%.2lfus\n", avg_latency);
261 | printf("==================================================\n\n");
262 |
263 | fprintf(fber[indx_es], "\n");
264 | fprintf(fbler[indx_es], "\n");
265 | fprintf(ftp[indx_es], "\n");
266 | fprintf(fla[indx_es], "\n");
267 |
268 | free(info_byte);
269 | free(info_bits);
270 | free(rmed_bits);
271 | free(mapped_sig);
272 | free(noise);
273 | free(llr);
274 | free(decoded_llr);
275 | free(decbs_bits);
276 | free_nr5g_ldpc_simd_t(ldpc_arg);
277 | }
278 |
279 | for (indx_es = 0; indx_es < EARLY_STOP_NUM; ++indx_es)
280 | {
281 | fprintf(fber[indx_es], "\n");
282 | fprintf(fbler[indx_es], "\n");
283 | fprintf(ftp[indx_es], "\n");
284 | fprintf(fla[indx_es], "\n");
285 | fprintf(fatp[indx_es], "\n");
286 | fprintf(fala[indx_es], "\n");
287 | fclose(fber[indx_es]);
288 | fclose(fbler[indx_es]);
289 | fclose(ftp[indx_es]);
290 | fclose(fla[indx_es]);
291 | fclose(fatp[indx_es]);
292 | fclose(fala[indx_es]);
293 | }
294 |
295 | return 0;
296 | }
--------------------------------------------------------------------------------
/inc/bit.h:
--------------------------------------------------------------------------------
1 | #ifndef NR5G_BIT_H
2 | #define NR5G_BIT_H
3 |
4 | #include
5 |
6 | void nr5g_bit_unpack_vector(uint8_t *packed,
7 | uint8_t *unpacked,
8 | int nof_bits);
9 |
10 | void nr5g_bit_pack_vector(uint8_t *unpacked,
11 | uint8_t *packed,
12 | int nof_bits);
13 |
14 | void nr5g_bit_unpack(uint32_t value,
15 | uint8_t **bits,
16 | int nof_bits);
17 |
18 | uint32_t nr5g_bit_pack(uint8_t **bits,
19 | int nof_bits);
20 |
21 | #endif
--------------------------------------------------------------------------------
/inc/crc.h:
--------------------------------------------------------------------------------
1 | #ifndef NR5G_CRC_H
2 | #define NR5G_CRC_H
3 |
4 | #include
5 |
6 | typedef struct
7 | {
8 | uint64_t table[256];
9 | int polynom;
10 | int order;
11 | uint64_t crcinit;
12 | uint64_t crcmask;
13 | uint64_t crchighbit;
14 | uint32_t nr5g_crc_out;
15 | } nr5g_crc_t;
16 |
17 | int nr5g_crc_init(nr5g_crc_t *h,
18 | uint32_t nr5g_crc_poly,
19 | int nr5g_crc_order);
20 |
21 | int nr5g_crc_set_init(nr5g_crc_t *h,
22 | uint64_t init_value);
23 |
24 | uint32_t nr5g_crc_attach_byte(nr5g_crc_t *h,
25 | uint8_t *data,
26 | int len);
27 |
28 | static inline void nr5g_crc_checksum_put_byte(nr5g_crc_t *h, uint8_t byte)
29 | {
30 |
31 | // Polynom order 8, 16, 24 or 32 only.
32 | int ord = h->order - 8;
33 | uint64_t crc = h->crcinit;
34 |
35 | crc = (crc << 8) ^ h->table[((crc >> (ord)) & 0xff) ^ byte];
36 | h->crcinit = crc;
37 | }
38 |
39 | static inline uint64_t nr5g_crc_checksum_get(nr5g_crc_t *h)
40 | {
41 | return (h->crcinit & h->crcmask);
42 | }
43 |
44 | uint32_t nr5g_crc_checksum_byte(nr5g_crc_t *h,
45 | uint8_t *data,
46 | int len);
47 |
48 |
49 | uint32_t nr5g_crc_checksum_16(nr5g_crc_t *h, uint8_t *data, int len);
50 |
51 | static inline void nr5g_crc_checksum_put_16(nr5g_crc_t *h, uint16_t byte)
52 | {
53 |
54 | // Polynom order 8, 16, 24 or 32 only.
55 | int ord = h->order - 16;
56 | uint64_t crc = h->crcinit;
57 |
58 | crc = (crc << 16) ^ h->table[((crc >> (ord)) & 0xffff) ^ byte];
59 | h->crcinit = crc;
60 | }
61 |
62 | #endif
--------------------------------------------------------------------------------
/inc/simd_bit.h:
--------------------------------------------------------------------------------
1 | #ifndef SIMD_BIT
2 | #define SIMD_BIT
3 |
4 | #include
5 | #include
6 |
7 | int fast_decide_byte_avx512(const int8_t *input, int len, uint8_t *output);
8 |
9 | int fast_extend_avx512(const uint8_t *input, int len, uint8_t *output);
10 |
11 | int fast_decide_bit_avx512(const int8_t *input, int len, uint8_t *output);
12 |
13 | int fast_decide_bit_avx2(const int8_t *input, int len, uint8_t *output);
14 |
15 | int fast_decide_bit_sse(const int8_t *input, int len, uint8_t *output);
16 |
17 | #endif
--------------------------------------------------------------------------------
/inc/simd_ldpc.h:
--------------------------------------------------------------------------------
1 | // version 3.1
2 | #ifndef SIMD_LDPC_H
3 | #define SIMD_LDPC_H
4 |
5 | #include
6 | #include
7 | #include
8 | #include "crc.h"
9 |
10 | #define SIMD_MODE_AUTO 0
11 | #define SIMD_MODE_SSE 1
12 | #define SIMD_MODE_AVX2 2
13 | #define SIMD_MODE_AVX512 3
14 |
15 | #define DECODER_MODE_OMS 1
16 | #define DECODER_MODE_NMS 2
17 |
18 | #define EARLY_STOP_OFF 0 // early stop function off
19 | #define EARLY_STOP_ON 1 // early stop function on
20 |
21 | typedef struct nr5g_ldpc_simd_t
22 | {
23 | /* General Parameters */
24 | int32_t B; // the length of TBS, 'B' in TS38.212
25 | int32_t R; // code rate*1024
26 | int32_t K_b; // block number of message bits, 'K_b' in TS38.212
27 | int32_t K_p; // 'K_+' in TS38.212
28 | int32_t K_n; // 'K_-' in TS38.212
29 | int32_t C; // number of code block after CBS, 'C' in TS38.212
30 | int32_t L; // CRC lenth, 'L' in TS38.212
31 | int32_t iLS; // LDPC lifting size
32 | int32_t BG_sel; // number of base graph
33 | int32_t Z_c; // block size, 'Z_c' in TS38.212
34 | int32_t K; // message bits length, 'K' in TS38.212
35 | int32_t N; // matrix length, 'N' in TS38.212
36 | int32_t col_hbg; // column number of base graph
37 | int32_t row_hbg; // row number of base graph
38 | int32_t K_cb; // maximum code block size, 'K_cb' in TS38.212
39 | int32_t E; // transmit block length
40 | int32_t k0; // start point of rate matching
41 | int32_t N_cb; // decode bits block length
42 | int32_t Nd; // decode H matrix col num
43 | int32_t Md; // decode H matrix row num
44 | int32_t G; // total number of coded bits for transmission
45 | int32_t E_p; // 'E_+'
46 | int32_t E_n; // 'E_-'
47 | int16_t *H_BG; // base graph
48 | int32_t simd_mode;
49 |
50 | /* Encoder Parameters */
51 | int8_t *p0, *p1, *p2, *p3;
52 | int8_t *cbs_bits;
53 | int8_t *coded_bits;
54 | int8_t *crc_serial;
55 |
56 | /* Decoder Parameters */
57 | int32_t M_whole;
58 | int32_t col_hbg_d;
59 | int32_t row_hbg_d;
60 | float *rdmed_llr;
61 | int8_t *decoded_bits;
62 | int8_t *degree; // number of connective check nodes(length:M/hbg_row_d)
63 | __m128i *cn_msg_sse; // sse message from cn to vn(length:M_whole)
64 | __m128i *vn_msg_sse; // temp sse message from vn to cn(length:19)
65 | __m256i *cn_msg_avx2; // avx2 message from cn to vn(length:M_whole)
66 | __m256i *vn_msg_avx2; // temp avx2 message from vn to cn(length:19)
67 | __m512i *cn_msg_avx512; // avx512 message from cn to vn(length:M_whole)
68 | __m512i *vn_msg_avx512; // temp avx512 message from vn to cn(length:19)
69 | int8_t *llr_fixed; // fixed llr info(length:2*REG_SIZE+Nd)
70 | int32_t units; // floor(Z_c/REG_SIZE)
71 | int32_t whole_degree; // sum of degree of every hbg_row_d
72 | int8_t **llr_addr_start; // llr address(length:whole_degree*units)
73 | int8_t *llr_addr_flag; // flag for access type(0:no mask;1:1 mask;2:2 mask)
74 | int8_t **llr_addr_pre; // extra llr address for flag=2(length:whole_degree)
75 | __m128i *mask_sse; // mask1 for flag=2(length:whole_degree)
76 | __m128i *mask_pre_sse; // mask2 for flag=2(length:whole_degree)
77 | __m128i endmask_sse; // mask for flag=1
78 | __m256i *mask_avx2; // mask1 for flag=2(length:whole_degree)
79 | __m256i *mask_pre_avx2; // mask2 for flag=2(length:whole_degree)
80 | __m256i endmask_avx2; // mask for flag=1
81 | #ifdef PAST_METHOD
82 | __m512i *mask_avx512; // mask1 for flag=2(length:whole_degree)
83 | __m512i *mask_pre_avx512; // mask2 for flag=2(length:whole_degree)
84 | __m512i endmask_avx512; // mask for flag=1
85 | #else
86 | __mmask64 *mmask_avx512; // mmask1 for flag=2(length:whole_degree)
87 | __mmask64 *mmask_pre_avx512; // mmask2 for flag=2(length:whole_degree)
88 | __mmask64 endmmask_avx512; // mmask for flag=1
89 | #endif
90 |
91 | nr5g_crc_t *crc_t;
92 | uint8_t *byte_list;
93 |
94 | } nr5g_ldpc_simd_t;
95 |
96 | /*************************************************************************************/
97 | /* Declare LDPC initial functions */
98 | /*************************************************************************************/
99 |
100 | void nr5g_ldpc_simd_init(nr5g_ldpc_simd_t *h, int32_t B, int32_t R, int32_t simd_mode);
101 |
102 | /* initial LDPC parameter */
103 | void nr5g_ldpc_simd_mode_init(nr5g_ldpc_simd_t *h);
104 | void nr5g_ldpc_simd_param_init(nr5g_ldpc_simd_t *h, int32_t B, int32_t R);
105 | void nr5g_ldpc_simd_rvid_param_init(nr5g_ldpc_simd_t *h, int32_t rvid);
106 |
107 | /* initial base graph matrix */
108 | void nr5g_ldpc_simd_matrix_init(nr5g_ldpc_simd_t *h);
109 |
110 | /* initial encoder parameters */
111 | void nr5g_ldpc_simd_encoder_mem_init(nr5g_ldpc_simd_t *h);
112 |
113 | /* initial decoder parameters */
114 | void nr5g_ldpc_simd_decoder_mem_init(nr5g_ldpc_simd_t *h);
115 | void nr5g_ldpc_simd_decoder_param_init(nr5g_ldpc_simd_t *h);
116 |
117 | /*************************************************************************************/
118 | /* Declare LDPC free functions */
119 | /*************************************************************************************/
120 |
121 | void free_nr5g_ldpc_simd_t(nr5g_ldpc_simd_t *h);
122 | void free_nr5g_ldpc_encoder(nr5g_ldpc_simd_t *h);
123 | void free_nr5g_ldpc_decoder(nr5g_ldpc_simd_t *h);
124 |
125 | /*************************************************************************************/
126 | /* Declare LDPC code block segmentation functions */
127 | /*************************************************************************************/
128 |
129 | void nr5g_ldpc_simd_cbs(const int8_t *input_bits, nr5g_ldpc_simd_t *h, int8_t *output_bits);
130 |
131 | void nr5g_ldpc_simd_cbs_scb(const int8_t *input_bits, nr5g_ldpc_simd_t *h, int8_t *output_bits, int32_t r);
132 |
133 | void nr5g_fec_crc_encode(const int8_t *input_bits, int32_t len, int32_t L, int8_t *output_bits);
134 |
135 | void nr5g_ldpc_simd_decbs(const int8_t *input_bits, nr5g_ldpc_simd_t *h, int8_t *output_bits);
136 |
137 | void nr5g_ldpc_simd_decbs_scb(const int8_t *input_bits, nr5g_ldpc_simd_t *h, int8_t *output_bits, int32_t r);
138 |
139 | /*************************************************************************************/
140 | /* Declare LDPC encode functions */
141 | /*************************************************************************************/
142 |
143 | void nr5g_ldpc_simd_encoder(const int8_t *info_bits, nr5g_ldpc_simd_t *h, int8_t *coded_bits);
144 |
145 | void nr5g_ldpc_simd_encoder_scb(const int8_t *info_bits, nr5g_ldpc_simd_t *h, int8_t *coded_bits);
146 |
147 | /*************************************************************************************/
148 | /* Declare LDPC rate matching functions */
149 | /*************************************************************************************/
150 |
151 | void nr5g_ldpc_simd_rate_matching(const int8_t *coded_bits, nr5g_ldpc_simd_t *h, int8_t *rmed_bits);
152 |
153 | void nr5g_ldpc_simd_rate_matching_scb(const int8_t *coded_bits, nr5g_ldpc_simd_t *h, int8_t *rmed_bits, int32_t r);
154 |
155 | void nr5g_ldpc_simd_rate_dematching(const float *llr, nr5g_ldpc_simd_t *h, float *rdmed_llr);
156 |
157 | void nr5g_ldpc_simd_rate_dematching_scb(const float *llr, nr5g_ldpc_simd_t *h, float *rdmed_llr, int32_t r);
158 |
159 | /*************************************************************************************/
160 | /* Declare LDPC decode functions */
161 | /*************************************************************************************/
162 |
163 | void nr5g_ldpc_simd_decoder(const float *llr, nr5g_ldpc_simd_t *h, int32_t I_max, float coef, int32_t decoder_mode, int32_t early_stop, int8_t *decoded_bits, float *decoded_llr);
164 |
165 | void nr5g_ldpc_fast_load_llr_simd_scb(const float *llr, nr5g_ldpc_simd_t *h, int32_t r);
166 |
167 | void nr5g_ldpc_simd_decoder_scb(nr5g_ldpc_simd_t *h, int32_t I_max, float coef, int32_t decoder_mode, int32_t early_stop, int8_t *decoded_bits, float *decoded_llr, int32_t r);
168 |
169 | /*************************************************************************************/
170 | /* Declare LDPC combo functions */
171 | /*************************************************************************************/
172 |
173 | void nr5g_ldpc_simd_cbs_enc_rm(const int8_t *info_bits, nr5g_ldpc_simd_t *h, int8_t *rmed_bits);
174 |
175 | void nr5g_ldpc_simd_rdm_dec_decbs(const float *llr, nr5g_ldpc_simd_t *h, int32_t I_max, int32_t decoder_mode, int32_t early_stop, float coef, int8_t *decbs_bits, float *decoded_llr);
176 |
177 | int is_ldpc_code(nr5g_ldpc_simd_t *h, uint8_t *code);
178 |
179 | #endif // !SIMD_LDPC_H
180 |
--------------------------------------------------------------------------------
/inc/thread_pool.h:
--------------------------------------------------------------------------------
1 | #ifndef _THREAD_POOL_
2 | #define _THREAD_POOL_
3 | #ifndef __USE_GNU
4 | #define __USE_GNU
5 | #endif
6 | #include
7 | #include
8 | #include
9 |
10 | typedef void (*Fun)(void *arg);
11 | // define a struct for passing argument to a thread function
12 | struct Arg
13 | {
14 | int coreId;
15 | int re_idx;
16 | int data_idx;
17 | };
18 |
19 | struct Task
20 | {
21 | Fun myfun;
22 | void *arg;
23 | struct Task *next;
24 | };
25 |
26 | struct Thread_Pool
27 | {
28 | pthread_mutex_t mutex;
29 | pthread_cond_t cond;
30 | pthread_mutex_t mutex_flag;
31 | pthread_cond_t cond_flag;
32 | struct Task *taskHead;
33 | bool isClose;
34 | int threadNum;
35 | int threadNum_Idle;
36 | pthread_t *threadId;
37 | };
38 |
39 | struct pool_arg_t
40 | {
41 | int coreId;
42 | int pool_index;
43 | };
44 |
45 | void pool_init(int coreId_start, int _threadNum, int pool_index);
46 | void pool_add_task(Fun myfun, void *arg, int pool_index);
47 | void pool_destroy(int pool_index);
48 | void *thread_run(void *arg);
49 |
50 | extern struct Thread_Pool *pool[12];
51 |
52 | #endif
53 |
--------------------------------------------------------------------------------
/lib/libsimd_5gfec.a:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/SherlockHsu/5G-SIMD-LDPC/5bbc01c27b96d1ffe917d44b84be662d3ed0e96c/lib/libsimd_5gfec.a
--------------------------------------------------------------------------------
/src/thread_pool.c:
--------------------------------------------------------------------------------
1 | #include
2 | #include
3 | #include
4 | #include
5 | #include
6 | #include
7 | #include "thread_pool.h"
8 |
9 | struct Thread_Pool *pool[12];
10 | static int coreId[72];
11 |
12 | static int stick_this_thread_to_core(int core_id)
13 | {
14 | int num_cores = sysconf(_SC_NPROCESSORS_ONLN); //read cpu_core number
15 | if (core_id < 0 || core_id >= num_cores)
16 | return EINVAL;
17 |
18 | cpu_set_t cpuset;
19 | CPU_ZERO(&cpuset);
20 | CPU_SET(core_id, &cpuset);
21 |
22 | pthread_t current_thread = pthread_self();
23 | return pthread_setaffinity_np(current_thread, sizeof(cpu_set_t), &cpuset);
24 | }
25 |
26 | void pool_init(int coreId_start, int _threadNum, int pool_index)
27 | {
28 | int j = pool_index;
29 | struct pool_arg_t *pool_arg = (struct pool_arg_t *)malloc(sizeof(struct pool_arg_t) * 72);
30 |
31 | pool[j] = (struct Thread_Pool *)malloc(sizeof(struct Thread_Pool));
32 | assert(pool[j] != NULL);
33 |
34 | pthread_mutex_init(&(pool[j]->mutex), NULL);
35 | pthread_cond_init(&(pool[j]->cond), NULL);
36 | pool[j]->taskHead = NULL;
37 | pool[j]->isClose = false;
38 | pool[j]->threadNum = _threadNum;
39 | pool[j]->threadId = (pthread_t *)malloc(sizeof(pthread_t) * pool[j]->threadNum);
40 |
41 | int i;
42 | //int coreId_start = 0;
43 | for (i = 0; i < pool[j]->threadNum; ++i)
44 | {
45 | coreId[i] = coreId_start + i;
46 | // printf("coreId_start=%d coreId=%d\n", coreId_start, coreId[i]);
47 | pool_arg[i].coreId = coreId[i];
48 | pool_arg[i].pool_index = pool_index;
49 | if (pthread_create(&(pool[j]->threadId[i]), NULL, thread_run, (void *)&pool_arg[i]))
50 | {
51 | printf("pthread_creat failed!\n");
52 | return;
53 | }
54 | }
55 | }
56 |
57 | void pool_add_task(Fun _myfun, void *_arg, int pool_index)
58 | {
59 | int j = pool_index;
60 | //构造一个新任务
61 | struct Task *newTask = (struct Task *)malloc(sizeof(struct Task));
62 | newTask->myfun = _myfun;
63 | newTask->arg = _arg;
64 | newTask->next = NULL; //别忘置空
65 | struct Task *head;
66 |
67 | //将任务加到任务链表中
68 | pthread_mutex_lock(&(pool[j]->mutex));
69 | head = pool[j]->taskHead;
70 | if (head == NULL)
71 | pool[j]->taskHead = newTask;
72 | else
73 | {
74 | while (head->next)
75 | head = head->next;
76 | head->next = newTask;
77 | }
78 | //printf("newTask: %d\n", pthread_self());
79 | pthread_mutex_unlock(&(pool[j]->mutex));
80 | pthread_cond_signal(&(pool[j]->cond));
81 | }
82 |
83 | void pool_destroy(int pool_index)
84 | {
85 | int j = pool_index;
86 | if (pool[j]->isClose == true) //防止多次调用该函数
87 | return;
88 | pool[j]->isClose = true;
89 | //唤醒所有等待线程,然后销毁线程池
90 | pthread_cond_broadcast(&(pool[j]->cond));
91 |
92 | //回收线程
93 | int i;
94 | for (i = 0; i < pool[j]->threadNum; ++i)
95 | pthread_join(pool[j]->threadId[i], NULL);
96 | free(pool[j]->threadId);
97 |
98 | //销毁任务链表
99 | struct Task *tmpTask;
100 | while (pool[j]->taskHead != NULL)
101 | {
102 | tmpTask = pool[j]->taskHead;
103 | pool[j]->taskHead = pool[j]->taskHead->next;
104 | free(tmpTask);
105 | }
106 |
107 | //销毁条件变量与互斥量
108 | pthread_mutex_destroy(&(pool[j]->mutex));
109 | pthread_cond_destroy(&(pool[j]->cond));
110 |
111 | free(pool[j]);
112 | //释放内存后将指针置空
113 | pool[j] = NULL;
114 | }
115 |
116 | void *thread_run(void *_arg)
117 | {
118 | //printf("thread %d is ready\n", pthread_self());
119 | struct Task *curTask;
120 | struct pool_arg_t pool_arg = *((struct pool_arg_t *)_arg);
121 | int coreId = pool_arg.coreId;
122 | int j = pool_arg.pool_index;
123 | //printf("I use core %d\n", coreId);
124 | //stick the thread to coreId
125 | // printf("coreId = %d\n", coreId);
126 | if (stick_this_thread_to_core(coreId))
127 | printf("Stick to core %d is failed!\n", coreId);
128 |
129 | while (1)
130 | {
131 | pthread_mutex_lock(&(pool[j]->mutex));
132 | while (pool[j]->taskHead == NULL && pool[j]->isClose == false)
133 | {
134 | //printf("thread %d is waiting\n", pthread_self());
135 | pthread_cond_wait(&(pool[j]->cond), &(pool[j]->mutex));
136 | //printf("thread: %d wakes up, taskHead: %d\n", pthread_self(), pool[j] -> taskHead);
137 | }
138 | if (pool[j]->taskHead == NULL && pool[j]->isClose == true) //销毁线程池时保证任务链表已空
139 | {
140 | pthread_cond_broadcast(&(pool[j]->cond));
141 | pthread_mutex_unlock(&(pool[j]->mutex));
142 | //printf("thread %d is over\n", pthread_self());
143 | pthread_exit(NULL);
144 | }
145 | //printf("thread %d is going to work\n", pthread_self());
146 | //printf("thread: %d wakes up, taskHead: %d\n", pthread_self(), pool[j] -> taskHead);
147 | // assert(pool[j]->taskHead != NULL);
148 |
149 | curTask = pool[j]->taskHead;
150 | pool[j]->taskHead = pool[j]->taskHead->next;
151 | pthread_mutex_unlock(&(pool[j]->mutex));
152 | //执行任务函数
153 | (curTask->myfun)(curTask->arg);
154 | free(curTask);
155 | curTask = NULL;
156 | }
157 | }
158 |
--------------------------------------------------------------------------------