├── .gitignore ├── LICENSE ├── README.md ├── example ├── test_multicore │ ├── Makefile │ └── test_multicore.c ├── test_simd │ ├── Makefile │ └── test_simd.c └── test_stop │ ├── Makefile │ └── test_stop.c ├── inc ├── bit.h ├── crc.h ├── simd_bit.h ├── simd_ldpc.h └── thread_pool.h ├── lib └── libsimd_5gfec.a └── src └── thread_pool.c /.gitignore: -------------------------------------------------------------------------------- 1 | main 2 | *.o 3 | *.txt 4 | .vscode 5 | src/ldpc_hbg.c 6 | src/simd_ldpc.c 7 | lib/Makefile -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2019 SherlockHsu 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | 5G-SIMD-LDPC 2 | ============================== 3 | 4 | * Vesion: 3.1 5 | * Date: 2019.03.27 6 | * Author: Xu Yi 7 | 8 | --- 9 | File Specification 10 | ------------------------------ 11 | 12 | ``` 13 | 5G-SIMD-LDPC 14 | │ README.md 15 | │ LICENSE 16 | │ 17 | └───inc 18 | │ │ bit.h // Data pack and unpack head file 19 | │ │ crc.h // CRC head file 20 | │ │ simd_bit.h // Data pack and unpack with SIMD head file 21 | │ │ simd_ldpc.h // SIMD LDPC head file 22 | │ │ thread_pool.h // Thread pool head file 23 | │ 24 | └───src 25 | │ │ thread_pool.c // Thread pool source file 26 | │ 27 | └───lib 28 | │ │ libsimd_5gfec.a // Static library for x86_64 Linux 29 | │ 30 | └───example 31 | └───test_multicore // example for multi-core test 32 | │ │ test_multicore.c 33 | │ │ Makefile 34 | │ 35 | └───test_simd // example for different SIMD instruction sets 36 | │ │ test_simd.c 37 | │ │ Makefile 38 | │ 39 | └───test_stop // example for early stopping strategy 40 | │ test_stip.c 41 | │ Makefile 42 | ``` 43 | 44 | Build and Execution Instructions 45 | ------------------------------ 46 | 47 | ### Build: 48 | > cd example/[example name]/ 49 | 50 | > make 51 | 52 | ### Execution: 53 | > ./main 54 | 55 | ### Clean: 56 | > make clean 57 | 58 | Decoder Performance 59 | ------------------------------------------------------------ 60 | 61 | ### Environment: 62 | * OS: Ubuntu 16.04 xenial 63 | * Kernel: x86_64 Linux 4.4.0-21-generic 64 | * CPU: 4x Intel Xeon Gold 6154 CPU @ 3.001GHz 65 | * ICC: 18.0.2 66 | 67 | ### Simulation Parameter: 68 | 69 | * Base Graph: 5GNR Base Graph 1(iLS = 2) 70 | * Code Block Length: 8448 71 | * Code Rate: 5/6 72 | 73 | ### Result: 74 | | SIMD | Throughput | Code Block Latency | 75 | | ------------- | ------------- | --------------------- | 76 | | SSE4.1 | 62.34Mbps | 135.52μs | 77 | | AVX2 | 130.23Mbps | 64.87μs | 78 | | AVX512 | 223.29Mbps | 37.83μs | 79 | -------------------------------------------------------------------------------- /example/test_multicore/Makefile: -------------------------------------------------------------------------------- 1 | CC = icc 2 | 3 | PWD = ../.. 4 | 5 | SRCS = $(wildcard *.c $(PWD)/src/*.c) 6 | 7 | OBJS = $(SRCS:.c = .o) 8 | 9 | INCLUDES = -I$(PWD)/inc 10 | 11 | LIBS = -lm -lpthread -lmkl_rt -fopenmp $(PWD)/lib/*.a 12 | 13 | CCFLAGS = -Wall -O3 -march=core-avx512 -std=c99 -g 14 | 15 | OUTPUT = main 16 | 17 | all:$(OUTPUT) 18 | 19 | $(OUTPUT) : $(OBJS) 20 | $(CC) $^ -o $@ $(INCLUDES) $(LIBS) 21 | 22 | %.o : %.c 23 | $(CC) -c $< $(CCFLAGS) 24 | 25 | clean: 26 | rm -rf main *.o *.txt #清除中间文件及生成文件 27 | 28 | .PHONY:clean 29 | -------------------------------------------------------------------------------- /example/test_multicore/test_multicore.c: -------------------------------------------------------------------------------- 1 | // version 3.1 2 | #include "simd_ldpc.h" 3 | #include "simd_bit.h" 4 | #include "crc.h" 5 | #include "bit.h" 6 | #include "thread_pool.h" 7 | 8 | #ifndef _GNU_SOURCE 9 | #define _GNU_SOURCE 10 | #include 11 | #endif 12 | 13 | #include 14 | #include 15 | #include 16 | #include 17 | #include 18 | #include 19 | #include 20 | #include 21 | 22 | #if defined(_MSC_VER) 23 | #include 24 | #else 25 | #include 26 | #endif 27 | 28 | #define CORE_NUM 18 29 | #define TEST_CORE_NUM 18 30 | 31 | #define CRC_24A 0x1864CFB 32 | #define BLOCK_SIZE 10000 33 | 34 | pthread_mutex_t mutex; 35 | pthread_mutex_t demutex; 36 | volatile int cnt; 37 | double decode_run_time; 38 | 39 | typedef struct ldpc_decoder_thrd_t 40 | { 41 | const float *llr; 42 | nr5g_ldpc_simd_t *h; 43 | int32_t I_max; 44 | float coef; 45 | int32_t decoder_mode; 46 | int8_t *decoded_bits; 47 | float *decoded_llr; 48 | } ldpc_decoder_thrd_t; 49 | 50 | void ldpc_decoder_thrd(void *arg) 51 | { 52 | #if defined(_MSC_VER) 53 | LARGE_INTEGER num; 54 | long long start, end, freq; 55 | #else 56 | struct timeval start, end; 57 | long timeuse; 58 | #endif 59 | ldpc_decoder_thrd_t *h = (ldpc_decoder_thrd_t *)arg; 60 | 61 | #if defined(_MSC_VER) 62 | QueryPerformanceFrequency(&num); 63 | freq = num.QuadPart; 64 | QueryPerformanceCounter(&num); 65 | start = num.QuadPart; 66 | #else 67 | gettimeofday(&start, NULL); 68 | #endif 69 | for (int i = 0; i < BLOCK_SIZE; ++i) 70 | nr5g_ldpc_simd_decoder(h->llr, h->h, h->I_max, h->coef, h->decoder_mode, EARLY_STOP_OFF, h->decoded_bits, h->decoded_llr); 71 | #if defined(_MSC_VER) 72 | QueryPerformanceCounter(&num); 73 | end = num.QuadPart; 74 | pthread_mutex_lock(&demutex); 75 | decode_run_time += (double)(end - start) / freq; 76 | pthread_mutex_unlock(&demutex); 77 | #else 78 | gettimeofday(&end, NULL); 79 | timeuse = 1000000 * (end.tv_sec - start.tv_sec) + end.tv_usec - start.tv_usec; 80 | pthread_mutex_lock(&demutex); 81 | decode_run_time += (double)timeuse / 1000000.0; 82 | pthread_mutex_unlock(&demutex); 83 | #endif 84 | pthread_mutex_lock(&mutex); 85 | cnt++; 86 | pthread_mutex_unlock(&mutex); 87 | } 88 | 89 | #define B_NUM 2 90 | #define R_NUM 3 91 | #define EBN0_SIZE 6 92 | 93 | int main() 94 | { 95 | int B_list[B_NUM] = {8448, 3840}; 96 | int R_list[R_NUM] = {853, 768, 512}; 97 | int core_list[TEST_CORE_NUM] = {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18}; 98 | int j, k; 99 | FILE *fp, *fq; 100 | fp = fopen("mutlicore_tp.txt", "a"); 101 | fq = fopen("mutlicore_latency.txt", "a"); 102 | for (int indx_core = 0; indx_core < TEST_CORE_NUM; indx_core++) 103 | { 104 | printf("==================================================\n"); 105 | printf("CORE NUM: %d\n", core_list[indx_core]); 106 | printf("--------------------------------------------------\n"); 107 | for (int j = 0; j < B_NUM; j++) 108 | for (int k = 0; k < R_NUM; k++) 109 | { 110 | double encode_run_time; 111 | #if defined(_MSC_VER) 112 | LARGE_INTEGER num; 113 | long long start, end, freq; 114 | #else 115 | struct timeval start, end; 116 | long timeuse; 117 | #endif 118 | double avg_tp, avg_latency; 119 | 120 | int32_t i, indx_block, indx_ebn0, sum_err_bits, test_size; 121 | int32_t B, R, I_max, decoder_mode; 122 | float coef; 123 | nr5g_ldpc_simd_t *ldpc_arg[CORE_NUM]; 124 | VSLStreamStatePtr stream_g; 125 | VSLStreamStatePtr stream_b; 126 | int8_t *info_bits[CORE_NUM]; 127 | int8_t *info_byte[CORE_NUM]; 128 | int8_t *rmed_bits[CORE_NUM]; 129 | float *mapped_sig[CORE_NUM]; 130 | float *noise[CORE_NUM]; 131 | float *llr[CORE_NUM]; 132 | float *decoded_llr[CORE_NUM]; 133 | int8_t *decbs_bits[CORE_NUM]; 134 | float EbN0, sigma2, sigma; 135 | int32_t err_bits[BLOCK_SIZE]; 136 | 137 | ldpc_decoder_thrd_t *ldpct[CORE_NUM]; 138 | pool_init(0, core_list[indx_core], 0); 139 | 140 | nr5g_crc_t crc_t; 141 | nr5g_crc_init(&crc_t, CRC_24A, 24); 142 | 143 | /* set parameters */ 144 | B = B_list[j]; 145 | R = R_list[k]; 146 | I_max = 10; 147 | decoder_mode = DECODER_MODE_OMS; 148 | 149 | switch (decoder_mode) 150 | { 151 | case DECODER_MODE_OMS: 152 | coef = (float)2 / 4; // beta for oms 153 | break; 154 | case DECODER_MODE_NMS: 155 | coef = (float)25 / 32; // alpha for nms 156 | break; 157 | default: 158 | printf("ERROR(main): SIMD MODE %d IS NOT EXISTED.\n", decoder_mode); 159 | exit(0); 160 | break; 161 | } 162 | 163 | float EbN0_list[EBN0_SIZE] = {0.0, 1.0, 2.0, 3.0, 4.0, 5.0}; 164 | test_size = EBN0_SIZE; 165 | pthread_mutex_init(&demutex, NULL); 166 | pthread_mutex_init(&mutex, NULL); 167 | cnt = 0; 168 | 169 | /* initialize */ 170 | for (int c = 0; c < core_list[indx_core]; ++c) 171 | { 172 | ldpc_arg[c] = (nr5g_ldpc_simd_t *)malloc(sizeof(nr5g_ldpc_simd_t)); 173 | nr5g_ldpc_simd_init(ldpc_arg[c], B, R, SIMD_MODE_AUTO); 174 | ldpc_arg[c]->crc_t = &crc_t; 175 | 176 | info_bits[c] = (int8_t *)malloc(sizeof(int8_t) * ldpc_arg[c]->B); 177 | info_byte[c] = (int8_t *)malloc(sizeof(int8_t) * ldpc_arg[c]->B / 8); 178 | rmed_bits[c] = (int8_t *)malloc(sizeof(int8_t) * ldpc_arg[c]->G); 179 | mapped_sig[c] = (float *)malloc(sizeof(float) * ldpc_arg[c]->G); 180 | noise[c] = (float *)malloc(sizeof(float) * ldpc_arg[c]->G); 181 | llr[c] = (float *)malloc(sizeof(float) * ldpc_arg[c]->G); 182 | decoded_llr[c] = (float *)malloc(sizeof(float) * ldpc_arg[c]->G); 183 | decbs_bits[c] = (int8_t *)malloc(sizeof(int8_t) * ldpc_arg[c]->B); 184 | 185 | ldpct[c] = (ldpc_decoder_thrd_t *)malloc(sizeof(ldpc_decoder_thrd_t)); 186 | ldpct[c]->llr = ldpc_arg[c]->rdmed_llr; 187 | ldpct[c]->h = ldpc_arg[c]; 188 | ldpct[c]->I_max = I_max; 189 | ldpct[c]->coef = coef; 190 | ldpct[c]->decoder_mode = decoder_mode; 191 | ldpct[c]->decoded_bits = ldpc_arg[c]->decoded_bits; 192 | ldpct[c]->decoded_llr = decoded_llr[c]; 193 | } 194 | 195 | vslNewStream(&stream_g, VSL_BRNG_MCG31, 0); 196 | vslNewStream(&stream_b, VSL_BRNG_MCG31, 1); 197 | 198 | avg_tp = 0.0; 199 | avg_latency = 0.0; 200 | 201 | /* test loop start */ 202 | for (indx_ebn0 = 0; indx_ebn0 < test_size; indx_ebn0++) 203 | { 204 | EbN0 = EbN0_list[indx_ebn0]; 205 | // sigma2 = (float)(1 / (pow(10, (double)EbN0 / 10) * 2 * R / 1024)); 206 | sigma2 = (float)(1 / pow(10, (double)EbN0 / 10)); 207 | sigma = (float)sqrt(sigma2); 208 | sum_err_bits = 0; 209 | encode_run_time = 0.0; 210 | decode_run_time = 0.0; 211 | 212 | for (indx_block = 0; indx_block < 1; indx_block++) 213 | { 214 | err_bits[indx_block] = 0; 215 | 216 | /* generate random tbs */ 217 | for (int c = 0; c < core_list[indx_core]; ++c) 218 | { 219 | /* generate random tbs */ 220 | viRngUniformBits(VSL_RNG_METHOD_UNIFORMBITS_STD, stream_b, (B - 1) / 32 + 1, (uint32_t *)info_byte[c]); 221 | nr5g_crc_attach_byte(&crc_t, info_byte[c], B - 24); 222 | nr5g_bit_unpack_vector(info_byte[c], info_bits[c], B); 223 | 224 | /* cbs */ 225 | nr5g_ldpc_simd_cbs(info_bits[c], ldpc_arg[c], ldpc_arg[c]->cbs_bits); 226 | 227 | /* encode */ 228 | #if defined(_MSC_VER) 229 | QueryPerformanceFrequency(&num); 230 | freq = num.QuadPart; 231 | QueryPerformanceCounter(&num); 232 | start = num.QuadPart; 233 | #else 234 | gettimeofday(&start, NULL); 235 | #endif 236 | nr5g_ldpc_simd_encoder(ldpc_arg[c]->cbs_bits, ldpc_arg[c], ldpc_arg[c]->coded_bits); 237 | #if defined(_MSC_VER) 238 | QueryPerformanceCounter(&num); 239 | end = num.QuadPart; 240 | encode_run_time += (double)(end - start) / freq; 241 | #else 242 | gettimeofday(&end, NULL); 243 | timeuse = 1000000 * (end.tv_sec - start.tv_sec) + end.tv_usec - start.tv_usec; 244 | encode_run_time += (double)timeuse / 1000000.0; 245 | #endif 246 | 247 | /* rate matching */ 248 | nr5g_ldpc_simd_rate_matching(ldpc_arg[c]->coded_bits, ldpc_arg[c], rmed_bits[c]); 249 | 250 | /* BPSK map */ 251 | for (i = 0; i < ldpc_arg[c]->G; i++) 252 | mapped_sig[c][i] = (float)2 * rmed_bits[c][i] - 1; 253 | 254 | /* pass AWGN channel */ 255 | vsRngGaussian(VSL_RNG_METHOD_GAUSSIANMV_BOXMULLER, stream_g, ldpc_arg[c]->G, noise[c], 0.0, sigma); 256 | for (i = 0; i < ldpc_arg[c]->G; i++) 257 | llr[c][i] = 2 * (mapped_sig[c][i] + noise[c][i]) / sigma2; 258 | 259 | /* rate dematching */ 260 | nr5g_ldpc_simd_rate_dematching(llr[c], ldpc_arg[c], ldpc_arg[c]->rdmed_llr); 261 | } 262 | 263 | /* decode */ 264 | for (int c = 0; c < core_list[indx_core]; ++c) 265 | pool_add_task(ldpc_decoder_thrd, (void *)ldpct[c], 0); 266 | while (cnt < core_list[indx_core]) 267 | ; 268 | cnt = 0; 269 | 270 | for (int c = 0; c < core_list[indx_core]; ++c) 271 | { 272 | /* decbs */ 273 | nr5g_ldpc_simd_decbs(ldpc_arg[c]->decoded_bits, ldpc_arg[c], decbs_bits[c]); 274 | 275 | /* statistics */ 276 | for (i = 0; i < ldpc_arg[c]->B; i++) 277 | err_bits[indx_block] += (info_bits[c][i] == decbs_bits[c][i] ? 0 : 1); 278 | 279 | sum_err_bits += err_bits[indx_block]; 280 | } 281 | } 282 | 283 | /* print results */ 284 | avg_tp += (double)B * BLOCK_SIZE * core_list[indx_core] / (decode_run_time / core_list[indx_core]) / 1e6; 285 | avg_latency += decode_run_time / core_list[indx_core] * 1e6 / BLOCK_SIZE; 286 | } 287 | 288 | avg_tp /= test_size; 289 | avg_latency /= test_size; 290 | printf("B = %d, R = %d\n", B, R); 291 | printf("Average Throughput:\t%.2lfMbps\n", avg_tp); 292 | printf("Average Latency:\t%.2lfus\n", avg_latency); 293 | 294 | fprintf(fp, "%.2lf\t", avg_tp); 295 | fprintf(fq, "%.2lf\t", avg_latency); 296 | printf("--------------------------------------------------\n"); 297 | 298 | for (int c = 0; c < core_list[indx_core]; ++c) 299 | { 300 | free(info_byte[c]); 301 | free(info_bits[c]); 302 | free(rmed_bits[c]); 303 | free(mapped_sig[c]); 304 | free(noise[c]); 305 | free(llr[c]); 306 | free(decoded_llr[c]); 307 | free(decbs_bits[c]); 308 | free_nr5g_ldpc_simd_t(ldpc_arg[c]); 309 | } 310 | pool_destroy(0); 311 | } 312 | fprintf(fp, "\n"); 313 | fprintf(fq, "\n"); 314 | printf("==================================================\n\n"); 315 | } 316 | 317 | fclose(fp); 318 | fclose(fq); 319 | 320 | return 0; 321 | } -------------------------------------------------------------------------------- /example/test_simd/Makefile: -------------------------------------------------------------------------------- 1 | CC = icc 2 | 3 | PWD = ../.. 4 | 5 | SRCS = $(wildcard *.c $(PWD)/src/*.c) 6 | 7 | OBJS = $(SRCS:.c = .o) 8 | 9 | INCLUDES = -I$(PWD)/inc 10 | 11 | LIBS = -lm -lpthread -lmkl_rt -fopenmp $(PWD)/lib/*.a 12 | 13 | CCFLAGS = -Wall -O3 -march=core-avx512 -std=c99 -g 14 | 15 | OUTPUT = main 16 | 17 | all:$(OUTPUT) 18 | 19 | $(OUTPUT) : $(OBJS) 20 | $(CC) $^ -o $@ $(INCLUDES) $(LIBS) 21 | 22 | %.o : %.c 23 | $(CC) -c $< $(CCFLAGS) 24 | 25 | clean: 26 | rm -rf main *.o *.txt #清除中间文件及生成文件 27 | 28 | .PHONY:clean 29 | -------------------------------------------------------------------------------- /example/test_simd/test_simd.c: -------------------------------------------------------------------------------- 1 | // version 3.1 2 | #include "simd_ldpc.h" 3 | #include "simd_bit.h" 4 | #include "bit.h" 5 | #include "crc.h" 6 | 7 | #include 8 | #include 9 | #include 10 | #include 11 | #include 12 | #include 13 | #include 14 | #include 15 | 16 | #if defined(_MSC_VER) 17 | #include 18 | #else 19 | #include 20 | #endif 21 | 22 | #define CRC_24A 0x1864CFB 23 | #define BLOCK_SIZE 10000 24 | 25 | #define B_NUM 2 26 | #define R_NUM 3 27 | #define SIMD_MODE_NUM 3 28 | #define EBN0_SIZE 21 29 | 30 | int main() 31 | { 32 | /* test parameters */ 33 | int simd_list[SIMD_MODE_NUM] = {SIMD_MODE_SSE, SIMD_MODE_AVX2, SIMD_MODE_AVX512}; 34 | int B_list[B_NUM] = {8448, 3840}; 35 | int R_list[R_NUM] = {853, 768, 512}; 36 | float EbN0_list[EBN0_SIZE] = {0.0, 0.5, 1.0, 1.5, 2.0, 2.5, 3.0, 3.5, 4.0, 4.5, 5.0, 5.5, 6.0, 6.5, 7.0, 7.5, 8.0, 8.5, 9.0, 9.5, 10.0}; 37 | int indx_B, indx_R, indx_block, indx_ebn0, indx_simd; 38 | 39 | /* file parameters */ 40 | FILE *fber[SIMD_MODE_NUM], *fbler[SIMD_MODE_NUM], *ftp[SIMD_MODE_NUM], *fla[SIMD_MODE_NUM], *fatp[SIMD_MODE_NUM], *fala[SIMD_MODE_NUM]; 41 | 42 | fber[0] = fopen("sse_BER.txt", "a"); 43 | fbler[0] = fopen("sse_BLER.txt", "a"); 44 | ftp[0] = fopen("sse_tp.txt", "a"); 45 | fla[0] = fopen("sse_latency.txt", "a"); 46 | fatp[0] = fopen("sse_avg_tp.txt", "a"); 47 | fala[0] = fopen("sse_avg_latency.txt", "a"); 48 | fber[1] = fopen("avx2_BER.txt", "a"); 49 | fbler[1] = fopen("avx2_BLER.txt", "a"); 50 | ftp[1] = fopen("avx2_tp.txt", "a"); 51 | fla[1] = fopen("avx2_latency.txt", "a"); 52 | fatp[1] = fopen("avx2_avg_tp.txt", "a"); 53 | fala[1] = fopen("avx2_avg_latency.txt", "a"); 54 | fber[2] = fopen("avx512_BER.txt", "a"); 55 | fbler[2] = fopen("avx512_BLER.txt", "a"); 56 | ftp[2] = fopen("avx512_tp.txt", "a"); 57 | fla[2] = fopen("avx512_latency.txt", "a"); 58 | fatp[2] = fopen("avx512_avg_tp.txt", "a"); 59 | fala[2] = fopen("avx512_avg_latency.txt", "a"); 60 | 61 | 62 | for (indx_B = 0; indx_B < B_NUM; indx_B++) 63 | for (indx_R = 0; indx_R < R_NUM; indx_R++) 64 | for (indx_simd = 0; indx_simd < SIMD_MODE_NUM; indx_simd++) 65 | { 66 | printf("==================================================\n"); 67 | printf("B:%d\tR:%d\t%s\n", B_list[indx_B], R_list[indx_R], 68 | (simd_list[indx_simd] == SIMD_MODE_SSE ? "SIMD_MODE_SSE" : (simd_list[indx_simd] == SIMD_MODE_AVX2 ? "SIMD_MODE_AVX2" : "SIMD_MODE_AVX512"))); 69 | printf("--------------------------------------------------\n"); 70 | /* time parameters */ 71 | double encode_run_time; 72 | double decode_run_time; 73 | #if defined(_MSC_VER) 74 | LARGE_INTEGER num; 75 | long long start, end, freq; 76 | #else 77 | struct timeval start, end; 78 | long timeuse; 79 | #endif 80 | double avg_tp, avg_latency; 81 | 82 | /* mkl stream parameters */ 83 | VSLStreamStatePtr stream_g; 84 | VSLStreamStatePtr stream_b; 85 | 86 | int32_t i, sum_err_bits, test_size; 87 | int32_t B, R, I_max, decoder_mode; 88 | float coef; 89 | float EbN0, sigma2, sigma; 90 | 91 | nr5g_ldpc_simd_t *ldpc_arg; 92 | int8_t *info_byte; 93 | int8_t *info_bits; 94 | int8_t *rmed_bits; 95 | float *mapped_sig; 96 | float *noise; 97 | float *llr; 98 | float *decoded_llr; 99 | int8_t *decbs_bits; 100 | 101 | int32_t err_bits[BLOCK_SIZE]; 102 | int32_t err_bl; 103 | 104 | nr5g_crc_t crc_t; 105 | nr5g_crc_init(&crc_t, CRC_24A, 24); 106 | 107 | /* set parameters */ 108 | B = B_list[indx_B]; 109 | R = R_list[indx_R]; 110 | I_max = 10; 111 | decoder_mode = DECODER_MODE_OMS; 112 | 113 | switch (decoder_mode) 114 | { 115 | case DECODER_MODE_OMS: 116 | coef = (float)2 / 4; // beta for oms 117 | break; 118 | case DECODER_MODE_NMS: 119 | coef = (float)25 / 32; // alpha for nms 120 | break; 121 | default: 122 | printf("ERROR(main): SIMD MODE %d IS NOT EXISTED.\n", decoder_mode); 123 | exit(0); 124 | break; 125 | } 126 | 127 | test_size = EBN0_SIZE; 128 | 129 | /* initialize */ 130 | ldpc_arg = (nr5g_ldpc_simd_t *)malloc(sizeof(nr5g_ldpc_simd_t)); 131 | nr5g_ldpc_simd_init(ldpc_arg, B, R, simd_list[indx_simd]); 132 | ldpc_arg->crc_t = &crc_t; 133 | 134 | info_bits = (int8_t *)malloc(sizeof(int8_t) * ldpc_arg->B); 135 | info_byte = (int8_t *)malloc(sizeof(int8_t) * ldpc_arg->B / 8); 136 | rmed_bits = (int8_t *)malloc(sizeof(int8_t) * ldpc_arg->G); 137 | mapped_sig = (float *)malloc(sizeof(float) * ldpc_arg->G); 138 | noise = (float *)malloc(sizeof(float) * ldpc_arg->G); 139 | llr = (float *)malloc(sizeof(float) * ldpc_arg->G); 140 | decoded_llr = (float *)malloc(sizeof(float) * ldpc_arg->G); 141 | decbs_bits = (int8_t *)malloc(sizeof(int8_t) * ldpc_arg->B); 142 | 143 | vslNewStream(&stream_g, VSL_BRNG_MCG31, 0); 144 | vslNewStream(&stream_b, VSL_BRNG_MCG31, 1); 145 | 146 | avg_tp = 0.0; 147 | avg_latency = 0.0; 148 | 149 | /* test loop start */ 150 | for (indx_ebn0 = 0; indx_ebn0 < test_size; indx_ebn0++) 151 | { 152 | EbN0 = EbN0_list[indx_ebn0]; 153 | sigma2 = (float)(1 / (pow(10, (double)EbN0 / 10) * 2 * R / 1024)); 154 | // sigma2 = (float)(1 / pow(10, (double)EbN0 / 10)); 155 | sigma = (float)sqrt(sigma2); 156 | sum_err_bits = 0; 157 | err_bl = 0; 158 | encode_run_time = 0.0; 159 | decode_run_time = 0.0; 160 | 161 | for (indx_block = 0; indx_block < BLOCK_SIZE; indx_block++) 162 | { 163 | err_bits[indx_block] = 0; 164 | 165 | /* generate random tbs */ 166 | viRngUniformBits(VSL_RNG_METHOD_UNIFORMBITS_STD, stream_b, (B - 1) / 32 + 1, (uint32_t *)info_byte); 167 | nr5g_crc_attach_byte(&crc_t, info_byte, B - 24); 168 | // fast_extend_avx512(info_byte, B / 8, info_bits); 169 | nr5g_bit_unpack_vector(info_byte, info_bits, B); 170 | 171 | /* cbs */ 172 | nr5g_ldpc_simd_cbs(info_bits, ldpc_arg, ldpc_arg->cbs_bits); 173 | 174 | /* encode */ 175 | #if defined(_MSC_VER) 176 | QueryPerformanceFrequency(&num); 177 | freq = num.QuadPart; 178 | QueryPerformanceCounter(&num); 179 | start = num.QuadPart; 180 | #else 181 | gettimeofday(&start, NULL); 182 | #endif 183 | nr5g_ldpc_simd_encoder(ldpc_arg->cbs_bits, ldpc_arg, ldpc_arg->coded_bits); 184 | #if defined(_MSC_VER) 185 | QueryPerformanceCounter(&num); 186 | end = num.QuadPart; 187 | encode_run_time += (double)(end - start) / freq; 188 | #else 189 | gettimeofday(&end, NULL); 190 | timeuse = 1000000 * (end.tv_sec - start.tv_sec) + end.tv_usec - start.tv_usec; 191 | encode_run_time += (double)timeuse / 1000000.0; 192 | #endif 193 | 194 | /* rate matching */ 195 | nr5g_ldpc_simd_rate_matching(ldpc_arg->coded_bits, ldpc_arg, rmed_bits); 196 | 197 | /* BPSK map */ 198 | for (i = 0; i < ldpc_arg->G; i++) 199 | mapped_sig[i] = (float)2 * rmed_bits[i] - 1; 200 | 201 | /* pass AWGN channel */ 202 | vsRngGaussian(VSL_RNG_METHOD_GAUSSIANMV_BOXMULLER, stream_g, ldpc_arg->G, noise, 0.0, sigma); 203 | for (i = 0; i < ldpc_arg->G; i++) 204 | llr[i] = 2 * (mapped_sig[i] + noise[i]) / sigma2; 205 | 206 | /* rate dematching */ 207 | nr5g_ldpc_simd_rate_dematching(llr, ldpc_arg, ldpc_arg->rdmed_llr); 208 | 209 | /* decode */ 210 | #if defined(_MSC_VER) 211 | QueryPerformanceFrequency(&num); 212 | freq = num.QuadPart; 213 | QueryPerformanceCounter(&num); 214 | start = num.QuadPart; 215 | #else 216 | gettimeofday(&start, NULL); 217 | #endif 218 | 219 | nr5g_ldpc_simd_decoder(ldpc_arg->rdmed_llr, ldpc_arg, I_max, coef, decoder_mode, EARLY_STOP_OFF, ldpc_arg->decoded_bits, decoded_llr); 220 | 221 | #if defined(_MSC_VER) 222 | QueryPerformanceCounter(&num); 223 | end = num.QuadPart; 224 | decode_run_time += (double)(end - start) / freq; 225 | #else 226 | gettimeofday(&end, NULL); 227 | timeuse = 1000000 * (end.tv_sec - start.tv_sec) + end.tv_usec - start.tv_usec; 228 | decode_run_time += (double)timeuse / 1000000.0; 229 | #endif 230 | 231 | /* decbs */ 232 | nr5g_ldpc_simd_decbs(ldpc_arg->decoded_bits, ldpc_arg, decbs_bits); 233 | 234 | /* statistics */ 235 | for (i = 0; i < ldpc_arg->B; i++) 236 | err_bits[indx_block] += (info_bits[i] == decbs_bits[i] ? 0 : 1); 237 | if (err_bits[indx_block]) 238 | err_bl++; 239 | 240 | sum_err_bits += err_bits[indx_block]; 241 | } 242 | 243 | /* print results */ 244 | float ber = (float)sum_err_bits / (B * BLOCK_SIZE); 245 | float bler = (float)err_bl / BLOCK_SIZE; 246 | printf("Eb/N0:%.2f:\tBER:%.2e(%d/%d)\tBLER:%.2e(%d/%d)\n", EbN0_list[indx_ebn0], ber, sum_err_bits, B * BLOCK_SIZE, bler, err_bl, BLOCK_SIZE); 247 | printf("encode_Latency:%lfus\n", encode_run_time / BLOCK_SIZE * 1e6); 248 | printf("encode_Throughput:%.2lfMbps\n", (double)B * BLOCK_SIZE / encode_run_time / 1e6); 249 | printf("decode_Latency:%lfus\n", decode_run_time / BLOCK_SIZE * 1e6); 250 | printf("decode_Throughput:%.2lfMbps\n", (double)B * BLOCK_SIZE / decode_run_time / 1e6); 251 | printf("--------------------------------------------------\n"); 252 | fprintf(fber[indx_simd], "%.2e\t", ber); 253 | fprintf(fbler[indx_simd], "%.2e\t", bler); 254 | fprintf(ftp[indx_simd], "%.2lf\t", (double)B * BLOCK_SIZE / decode_run_time / 1e6); 255 | fprintf(fla[indx_simd], "%.2lf\t", (double)decode_run_time * 1e6 / BLOCK_SIZE); 256 | 257 | avg_tp += (double)B * BLOCK_SIZE / decode_run_time / 1e6; 258 | avg_latency += decode_run_time * 1e6 / BLOCK_SIZE; 259 | } 260 | 261 | avg_tp /= test_size; 262 | avg_latency /= test_size; 263 | fprintf(fatp[indx_simd], "%.2lf\t", avg_tp); 264 | fprintf(fala[indx_simd], "%.2lf\t", avg_latency); 265 | 266 | printf("B:%d\tR:%d\t%s\n", B_list[indx_B], R_list[indx_R], (indx_simd == 1 ? "SIMD_MODE_SSE" : (indx_simd == 2 ? "SIMD_MODE_AVX2" : "SIMD_MODE_AVX512"))); 267 | printf("Average Throughput:\t%.2lfMbps\n", avg_tp); 268 | printf("Average Latency:\t%.2lfus\n", avg_latency); 269 | printf("==================================================\n\n"); 270 | 271 | fprintf(fber[indx_simd], "\n"); 272 | fprintf(fbler[indx_simd], "\n"); 273 | fprintf(ftp[indx_simd], "\n"); 274 | fprintf(fla[indx_simd], "\n"); 275 | 276 | free(info_byte); 277 | free(info_bits); 278 | free(rmed_bits); 279 | free(mapped_sig); 280 | free(noise); 281 | free(llr); 282 | free(decoded_llr); 283 | free(decbs_bits); 284 | free_nr5g_ldpc_simd_t(ldpc_arg); 285 | } 286 | for (indx_simd = 0; indx_simd < SIMD_MODE_NUM; ++indx_simd) 287 | { 288 | fprintf(fber[indx_simd], "\n"); 289 | fprintf(fbler[indx_simd], "\n"); 290 | fprintf(ftp[indx_simd], "\n"); 291 | fprintf(fla[indx_simd], "\n"); 292 | fprintf(fatp[indx_simd], "\n"); 293 | fprintf(fala[indx_simd], "\n"); 294 | fclose(fber[indx_simd]); 295 | fclose(fbler[indx_simd]); 296 | fclose(ftp[indx_simd]); 297 | fclose(fla[indx_simd]); 298 | } 299 | return 0; 300 | } -------------------------------------------------------------------------------- /example/test_stop/Makefile: -------------------------------------------------------------------------------- 1 | CC = icc 2 | 3 | PWD = ../.. 4 | 5 | SRCS = $(wildcard *.c $(PWD)/src/*.c) 6 | 7 | OBJS = $(SRCS:.c = .o) 8 | 9 | INCLUDES = -I$(PWD)/inc 10 | 11 | LIBS = -lm -lpthread -lmkl_rt -fopenmp $(PWD)/lib/*.a 12 | 13 | CCFLAGS = -Wall -O3 -march=core-avx512 -std=c99 -g 14 | 15 | OUTPUT = main 16 | 17 | all:$(OUTPUT) 18 | 19 | $(OUTPUT) : $(OBJS) 20 | $(CC) $^ -o $@ $(INCLUDES) $(LIBS) 21 | 22 | %.o : %.c 23 | $(CC) -c $< $(CCFLAGS) 24 | 25 | clean: 26 | rm -rf main *.o *.txt #清除中间文件及生成文件 27 | 28 | .PHONY:clean 29 | -------------------------------------------------------------------------------- /example/test_stop/test_stop.c: -------------------------------------------------------------------------------- 1 | // version 3.1 2 | #include "simd_ldpc.h" 3 | #include "simd_bit.h" 4 | #include "bit.h" 5 | #include "crc.h" 6 | 7 | #include 8 | #include 9 | #include 10 | #include 11 | #include 12 | #include 13 | #include 14 | #include 15 | 16 | #if defined(_MSC_VER) 17 | #include 18 | #else 19 | #include 20 | #endif 21 | 22 | #define CRC_24A 0x1864CFB 23 | #define BLOCK_SIZE 10000 24 | 25 | #define B_NUM 2 26 | #define R_NUM 3 27 | #define EARLY_STOP_NUM 2 28 | #define EBN0_SIZE 21 29 | 30 | int main() 31 | { 32 | /* test parameters */ 33 | int es_list[EARLY_STOP_NUM] = {EARLY_STOP_OFF, EARLY_STOP_ON}; 34 | int B_list[B_NUM] = {8448, 3840}; 35 | int R_list[R_NUM] = {853, 768, 512}; 36 | float EbN0_list[EBN0_SIZE] = {0.0, 0.5, 1.0, 1.5, 2.0, 2.5, 3.0, 3.5, 4.0, 4.5, 5.0, 5.5, 6.0, 6.5, 7.0, 7.5, 8.0, 8.5, 9.0, 9.5, 10.0}; 37 | int indx_B, indx_R, indx_block, indx_ebn0, indx_es; 38 | 39 | /* file parameters */ 40 | FILE *fber[EARLY_STOP_NUM], *fbler[EARLY_STOP_NUM], *ftp[EARLY_STOP_NUM], *fla[EARLY_STOP_NUM], *fatp[EARLY_STOP_NUM], *fala[EARLY_STOP_NUM]; 41 | fber[0] = fopen("nostop_BER.txt", "a"); 42 | fbler[0] = fopen("nostop_BLER.txt", "a"); 43 | ftp[0] = fopen("nostop_tp.txt", "a"); 44 | fla[0] = fopen("nostop_latency.txt", "a"); 45 | fatp[0] = fopen("nostop_avg_tp.txt", "a"); 46 | fala[0] = fopen("nostop_avg_latency.txt", "a"); 47 | fber[1] = fopen("stop_BER.txt", "a"); 48 | fbler[1] = fopen("stop_BLER.txt", "a"); 49 | ftp[1] = fopen("stop_tp.txt", "a"); 50 | fla[1] = fopen("stop_latency.txt", "a"); 51 | fatp[1] = fopen("stop_avg_tp.txt", "a"); 52 | fala[1] = fopen("stop_avg_latency.txt", "a"); 53 | 54 | for (indx_B = 0; indx_B < B_NUM; indx_B++) 55 | for (indx_R = 0; indx_R < R_NUM; indx_R++) 56 | for (indx_es = 0; indx_es < EARLY_STOP_NUM; indx_es++) 57 | { 58 | printf("==================================================\n"); 59 | printf("B:%d\tR:%d\t%s\n", B_list[indx_B], R_list[indx_R], 60 | (es_list[indx_es] == EARLY_STOP_ON ? "EARLY_STOP_ON" : "EARLY_STOP_OFF")); 61 | printf("--------------------------------------------------\n"); 62 | /* time parameters */ 63 | double encode_run_time; 64 | double decode_run_time; 65 | #if defined(_MSC_VER) 66 | LARGE_INTEGER num; 67 | long long start, end, freq; 68 | #else 69 | struct timeval start, end; 70 | long timeuse; 71 | #endif 72 | double avg_tp, avg_latency; 73 | 74 | /* mkl stream parameters */ 75 | VSLStreamStatePtr stream_g; 76 | VSLStreamStatePtr stream_b; 77 | 78 | int32_t i, sum_err_bits, test_size; 79 | int32_t B, R, I_max, decoder_mode; 80 | float coef; 81 | float EbN0, sigma2, sigma; 82 | 83 | nr5g_ldpc_simd_t *ldpc_arg; 84 | int8_t *info_byte; 85 | int8_t *info_bits; 86 | int8_t *rmed_bits; 87 | float *mapped_sig; 88 | float *noise; 89 | float *llr; 90 | float *decoded_llr; 91 | int8_t *decbs_bits; 92 | 93 | int32_t err_bits[BLOCK_SIZE]; 94 | int32_t err_bl; 95 | 96 | nr5g_crc_t crc_t; 97 | nr5g_crc_init(&crc_t, CRC_24A, 24); 98 | 99 | /* set parameters */ 100 | B = B_list[indx_B]; 101 | R = R_list[indx_R]; 102 | I_max = 10; 103 | decoder_mode = DECODER_MODE_OMS; 104 | 105 | switch (decoder_mode) 106 | { 107 | case DECODER_MODE_OMS: 108 | coef = (float)2 / 4; // beta for oms 109 | break; 110 | case DECODER_MODE_NMS: 111 | coef = (float)25 / 32; // alpha for nms 112 | break; 113 | default: 114 | printf("ERROR(main): SIMD MODE %d IS NOT EXISTED.\n", decoder_mode); 115 | exit(0); 116 | break; 117 | } 118 | 119 | test_size = EBN0_SIZE; 120 | 121 | /* initialize */ 122 | ldpc_arg = (nr5g_ldpc_simd_t *)malloc(sizeof(nr5g_ldpc_simd_t)); 123 | nr5g_ldpc_simd_init(ldpc_arg, B, R, SIMD_MODE_AUTO); 124 | ldpc_arg->crc_t = &crc_t; 125 | 126 | info_bits = (int8_t *)malloc(sizeof(int8_t) * ldpc_arg->B); 127 | info_byte = (int8_t *)malloc(sizeof(int8_t) * ldpc_arg->B / 8); 128 | rmed_bits = (int8_t *)malloc(sizeof(int8_t) * ldpc_arg->G); 129 | mapped_sig = (float *)malloc(sizeof(float) * ldpc_arg->G); 130 | noise = (float *)malloc(sizeof(float) * ldpc_arg->G); 131 | llr = (float *)malloc(sizeof(float) * ldpc_arg->G); 132 | decoded_llr = (float *)malloc(sizeof(float) * ldpc_arg->G); 133 | decbs_bits = (int8_t *)malloc(sizeof(int8_t) * ldpc_arg->B); 134 | 135 | vslNewStream(&stream_g, VSL_BRNG_MCG31, 0); 136 | vslNewStream(&stream_b, VSL_BRNG_MCG31, 1); 137 | 138 | avg_tp = 0.0; 139 | avg_latency = 0.0; 140 | 141 | /* test loop start */ 142 | for (indx_ebn0 = 0; indx_ebn0 < test_size; indx_ebn0++) 143 | { 144 | EbN0 = EbN0_list[indx_ebn0]; 145 | sigma2 = (float)(1 / (pow(10, (double)EbN0 / 10) * 2 * R / 1024)); 146 | // sigma2 = (float)(1 / pow(10, (double)EbN0 / 10)); 147 | sigma = (float)sqrt(sigma2); 148 | sum_err_bits = 0; 149 | err_bl = 0; 150 | encode_run_time = 0.0; 151 | decode_run_time = 0.0; 152 | 153 | for (indx_block = 0; indx_block < BLOCK_SIZE; indx_block++) 154 | { 155 | err_bits[indx_block] = 0; 156 | 157 | /* generate random tbs */ 158 | viRngUniformBits(VSL_RNG_METHOD_UNIFORMBITS_STD, stream_b, (B - 1) / 32 + 1, (uint32_t *)info_byte); 159 | nr5g_crc_attach_byte(&crc_t, info_byte, B - 24); 160 | // fast_extend_avx512(info_byte, B / 8, info_bits); 161 | nr5g_bit_unpack_vector(info_byte, info_bits, B); 162 | 163 | /* cbs */ 164 | nr5g_ldpc_simd_cbs(info_bits, ldpc_arg, ldpc_arg->cbs_bits); 165 | 166 | /* encode */ 167 | #if defined(_MSC_VER) 168 | QueryPerformanceFrequency(&num); 169 | freq = num.QuadPart; 170 | QueryPerformanceCounter(&num); 171 | start = num.QuadPart; 172 | #else 173 | gettimeofday(&start, NULL); 174 | #endif 175 | nr5g_ldpc_simd_encoder(ldpc_arg->cbs_bits, ldpc_arg, ldpc_arg->coded_bits); 176 | #if defined(_MSC_VER) 177 | QueryPerformanceCounter(&num); 178 | end = num.QuadPart; 179 | encode_run_time += (double)(end - start) / freq; 180 | #else 181 | gettimeofday(&end, NULL); 182 | timeuse = 1000000 * (end.tv_sec - start.tv_sec) + end.tv_usec - start.tv_usec; 183 | encode_run_time += (double)timeuse / 1000000.0; 184 | #endif 185 | 186 | /* rate matching */ 187 | nr5g_ldpc_simd_rate_matching(ldpc_arg->coded_bits, ldpc_arg, rmed_bits); 188 | 189 | /* BPSK map */ 190 | for (i = 0; i < ldpc_arg->G; i++) 191 | mapped_sig[i] = (float)2 * rmed_bits[i] - 1; 192 | 193 | /* pass AWGN channel */ 194 | vsRngGaussian(VSL_RNG_METHOD_GAUSSIANMV_BOXMULLER, stream_g, ldpc_arg->G, noise, 0.0, sigma); 195 | for (i = 0; i < ldpc_arg->G; i++) 196 | llr[i] = 2 * (mapped_sig[i] + noise[i]) / sigma2; 197 | 198 | /* rate dematching */ 199 | nr5g_ldpc_simd_rate_dematching(llr, ldpc_arg, ldpc_arg->rdmed_llr); 200 | 201 | /* decode */ 202 | #if defined(_MSC_VER) 203 | QueryPerformanceFrequency(&num); 204 | freq = num.QuadPart; 205 | QueryPerformanceCounter(&num); 206 | start = num.QuadPart; 207 | #else 208 | gettimeofday(&start, NULL); 209 | #endif 210 | 211 | nr5g_ldpc_simd_decoder(ldpc_arg->rdmed_llr, ldpc_arg, I_max, coef, decoder_mode, es_list[indx_es], ldpc_arg->decoded_bits, decoded_llr); 212 | 213 | #if defined(_MSC_VER) 214 | QueryPerformanceCounter(&num); 215 | end = num.QuadPart; 216 | decode_run_time += (double)(end - start) / freq; 217 | #else 218 | gettimeofday(&end, NULL); 219 | timeuse = 1000000 * (end.tv_sec - start.tv_sec) + end.tv_usec - start.tv_usec; 220 | decode_run_time += (double)timeuse / 1000000.0; 221 | #endif 222 | 223 | /* decbs */ 224 | nr5g_ldpc_simd_decbs(ldpc_arg->decoded_bits, ldpc_arg, decbs_bits); 225 | 226 | /* statistics */ 227 | for (i = 0; i < ldpc_arg->B; i++) 228 | err_bits[indx_block] += (info_bits[i] == decbs_bits[i] ? 0 : 1); 229 | if (err_bits[indx_block]) 230 | err_bl++; 231 | 232 | sum_err_bits += err_bits[indx_block]; 233 | } 234 | 235 | /* print results */ 236 | float ber = (float)sum_err_bits / (B * BLOCK_SIZE); 237 | float bler = (float)err_bl / BLOCK_SIZE; 238 | printf("Eb/N0:%.2f:\tBER:%.2e(%d/%d)\tBLER:%.2e(%d/%d)\n", EbN0_list[indx_ebn0], ber, sum_err_bits, B * BLOCK_SIZE, bler, err_bl, BLOCK_SIZE); 239 | printf("encode_Latency:%lfus\n", encode_run_time / BLOCK_SIZE * 1e6); 240 | printf("encode_Throughput:%.2lfMbps\n", (double)B * BLOCK_SIZE / encode_run_time / 1e6); 241 | printf("decode_Latency:%lfus\n", decode_run_time / BLOCK_SIZE * 1e6); 242 | printf("decode_Throughput:%.2lfMbps\n", (double)B * BLOCK_SIZE / decode_run_time / 1e6); 243 | printf("--------------------------------------------------\n"); 244 | fprintf(fber[indx_es], "%.2e\t", ber); 245 | fprintf(fbler[indx_es], "%.2e\t", bler); 246 | fprintf(ftp[indx_es], "%.2lf\t", (double)B * BLOCK_SIZE / decode_run_time / 1e6); 247 | fprintf(fla[indx_es], "%.2lf\t", (double)decode_run_time * 1e6 / BLOCK_SIZE); 248 | 249 | avg_tp += (double)B * BLOCK_SIZE / decode_run_time / 1e6; 250 | avg_latency += decode_run_time * 1e6 / BLOCK_SIZE; 251 | } 252 | 253 | avg_tp /= test_size; 254 | avg_latency /= test_size; 255 | fprintf(fatp[indx_es], "%.2lf\t", avg_tp); 256 | fprintf(fala[indx_es], "%.2lf\t", avg_latency); 257 | 258 | printf("B:%d\tR:%d\t%s\n", B_list[indx_B], R_list[indx_R], (indx_es ? "EARLY_STOP_ON" : "EARLY_STOP_OFF")); 259 | printf("Average Throughput:\t%.2lfMbps\n", avg_tp); 260 | printf("Average Latency:\t%.2lfus\n", avg_latency); 261 | printf("==================================================\n\n"); 262 | 263 | fprintf(fber[indx_es], "\n"); 264 | fprintf(fbler[indx_es], "\n"); 265 | fprintf(ftp[indx_es], "\n"); 266 | fprintf(fla[indx_es], "\n"); 267 | 268 | free(info_byte); 269 | free(info_bits); 270 | free(rmed_bits); 271 | free(mapped_sig); 272 | free(noise); 273 | free(llr); 274 | free(decoded_llr); 275 | free(decbs_bits); 276 | free_nr5g_ldpc_simd_t(ldpc_arg); 277 | } 278 | 279 | for (indx_es = 0; indx_es < EARLY_STOP_NUM; ++indx_es) 280 | { 281 | fprintf(fber[indx_es], "\n"); 282 | fprintf(fbler[indx_es], "\n"); 283 | fprintf(ftp[indx_es], "\n"); 284 | fprintf(fla[indx_es], "\n"); 285 | fprintf(fatp[indx_es], "\n"); 286 | fprintf(fala[indx_es], "\n"); 287 | fclose(fber[indx_es]); 288 | fclose(fbler[indx_es]); 289 | fclose(ftp[indx_es]); 290 | fclose(fla[indx_es]); 291 | fclose(fatp[indx_es]); 292 | fclose(fala[indx_es]); 293 | } 294 | 295 | return 0; 296 | } -------------------------------------------------------------------------------- /inc/bit.h: -------------------------------------------------------------------------------- 1 | #ifndef NR5G_BIT_H 2 | #define NR5G_BIT_H 3 | 4 | #include 5 | 6 | void nr5g_bit_unpack_vector(uint8_t *packed, 7 | uint8_t *unpacked, 8 | int nof_bits); 9 | 10 | void nr5g_bit_pack_vector(uint8_t *unpacked, 11 | uint8_t *packed, 12 | int nof_bits); 13 | 14 | void nr5g_bit_unpack(uint32_t value, 15 | uint8_t **bits, 16 | int nof_bits); 17 | 18 | uint32_t nr5g_bit_pack(uint8_t **bits, 19 | int nof_bits); 20 | 21 | #endif -------------------------------------------------------------------------------- /inc/crc.h: -------------------------------------------------------------------------------- 1 | #ifndef NR5G_CRC_H 2 | #define NR5G_CRC_H 3 | 4 | #include 5 | 6 | typedef struct 7 | { 8 | uint64_t table[256]; 9 | int polynom; 10 | int order; 11 | uint64_t crcinit; 12 | uint64_t crcmask; 13 | uint64_t crchighbit; 14 | uint32_t nr5g_crc_out; 15 | } nr5g_crc_t; 16 | 17 | int nr5g_crc_init(nr5g_crc_t *h, 18 | uint32_t nr5g_crc_poly, 19 | int nr5g_crc_order); 20 | 21 | int nr5g_crc_set_init(nr5g_crc_t *h, 22 | uint64_t init_value); 23 | 24 | uint32_t nr5g_crc_attach_byte(nr5g_crc_t *h, 25 | uint8_t *data, 26 | int len); 27 | 28 | static inline void nr5g_crc_checksum_put_byte(nr5g_crc_t *h, uint8_t byte) 29 | { 30 | 31 | // Polynom order 8, 16, 24 or 32 only. 32 | int ord = h->order - 8; 33 | uint64_t crc = h->crcinit; 34 | 35 | crc = (crc << 8) ^ h->table[((crc >> (ord)) & 0xff) ^ byte]; 36 | h->crcinit = crc; 37 | } 38 | 39 | static inline uint64_t nr5g_crc_checksum_get(nr5g_crc_t *h) 40 | { 41 | return (h->crcinit & h->crcmask); 42 | } 43 | 44 | uint32_t nr5g_crc_checksum_byte(nr5g_crc_t *h, 45 | uint8_t *data, 46 | int len); 47 | 48 | 49 | uint32_t nr5g_crc_checksum_16(nr5g_crc_t *h, uint8_t *data, int len); 50 | 51 | static inline void nr5g_crc_checksum_put_16(nr5g_crc_t *h, uint16_t byte) 52 | { 53 | 54 | // Polynom order 8, 16, 24 or 32 only. 55 | int ord = h->order - 16; 56 | uint64_t crc = h->crcinit; 57 | 58 | crc = (crc << 16) ^ h->table[((crc >> (ord)) & 0xffff) ^ byte]; 59 | h->crcinit = crc; 60 | } 61 | 62 | #endif -------------------------------------------------------------------------------- /inc/simd_bit.h: -------------------------------------------------------------------------------- 1 | #ifndef SIMD_BIT 2 | #define SIMD_BIT 3 | 4 | #include 5 | #include 6 | 7 | int fast_decide_byte_avx512(const int8_t *input, int len, uint8_t *output); 8 | 9 | int fast_extend_avx512(const uint8_t *input, int len, uint8_t *output); 10 | 11 | int fast_decide_bit_avx512(const int8_t *input, int len, uint8_t *output); 12 | 13 | int fast_decide_bit_avx2(const int8_t *input, int len, uint8_t *output); 14 | 15 | int fast_decide_bit_sse(const int8_t *input, int len, uint8_t *output); 16 | 17 | #endif -------------------------------------------------------------------------------- /inc/simd_ldpc.h: -------------------------------------------------------------------------------- 1 | // version 3.1 2 | #ifndef SIMD_LDPC_H 3 | #define SIMD_LDPC_H 4 | 5 | #include 6 | #include 7 | #include 8 | #include "crc.h" 9 | 10 | #define SIMD_MODE_AUTO 0 11 | #define SIMD_MODE_SSE 1 12 | #define SIMD_MODE_AVX2 2 13 | #define SIMD_MODE_AVX512 3 14 | 15 | #define DECODER_MODE_OMS 1 16 | #define DECODER_MODE_NMS 2 17 | 18 | #define EARLY_STOP_OFF 0 // early stop function off 19 | #define EARLY_STOP_ON 1 // early stop function on 20 | 21 | typedef struct nr5g_ldpc_simd_t 22 | { 23 | /* General Parameters */ 24 | int32_t B; // the length of TBS, 'B' in TS38.212 25 | int32_t R; // code rate*1024 26 | int32_t K_b; // block number of message bits, 'K_b' in TS38.212 27 | int32_t K_p; // 'K_+' in TS38.212 28 | int32_t K_n; // 'K_-' in TS38.212 29 | int32_t C; // number of code block after CBS, 'C' in TS38.212 30 | int32_t L; // CRC lenth, 'L' in TS38.212 31 | int32_t iLS; // LDPC lifting size 32 | int32_t BG_sel; // number of base graph 33 | int32_t Z_c; // block size, 'Z_c' in TS38.212 34 | int32_t K; // message bits length, 'K' in TS38.212 35 | int32_t N; // matrix length, 'N' in TS38.212 36 | int32_t col_hbg; // column number of base graph 37 | int32_t row_hbg; // row number of base graph 38 | int32_t K_cb; // maximum code block size, 'K_cb' in TS38.212 39 | int32_t E; // transmit block length 40 | int32_t k0; // start point of rate matching 41 | int32_t N_cb; // decode bits block length 42 | int32_t Nd; // decode H matrix col num 43 | int32_t Md; // decode H matrix row num 44 | int32_t G; // total number of coded bits for transmission 45 | int32_t E_p; // 'E_+' 46 | int32_t E_n; // 'E_-' 47 | int16_t *H_BG; // base graph 48 | int32_t simd_mode; 49 | 50 | /* Encoder Parameters */ 51 | int8_t *p0, *p1, *p2, *p3; 52 | int8_t *cbs_bits; 53 | int8_t *coded_bits; 54 | int8_t *crc_serial; 55 | 56 | /* Decoder Parameters */ 57 | int32_t M_whole; 58 | int32_t col_hbg_d; 59 | int32_t row_hbg_d; 60 | float *rdmed_llr; 61 | int8_t *decoded_bits; 62 | int8_t *degree; // number of connective check nodes(length:M/hbg_row_d) 63 | __m128i *cn_msg_sse; // sse message from cn to vn(length:M_whole) 64 | __m128i *vn_msg_sse; // temp sse message from vn to cn(length:19) 65 | __m256i *cn_msg_avx2; // avx2 message from cn to vn(length:M_whole) 66 | __m256i *vn_msg_avx2; // temp avx2 message from vn to cn(length:19) 67 | __m512i *cn_msg_avx512; // avx512 message from cn to vn(length:M_whole) 68 | __m512i *vn_msg_avx512; // temp avx512 message from vn to cn(length:19) 69 | int8_t *llr_fixed; // fixed llr info(length:2*REG_SIZE+Nd) 70 | int32_t units; // floor(Z_c/REG_SIZE) 71 | int32_t whole_degree; // sum of degree of every hbg_row_d 72 | int8_t **llr_addr_start; // llr address(length:whole_degree*units) 73 | int8_t *llr_addr_flag; // flag for access type(0:no mask;1:1 mask;2:2 mask) 74 | int8_t **llr_addr_pre; // extra llr address for flag=2(length:whole_degree) 75 | __m128i *mask_sse; // mask1 for flag=2(length:whole_degree) 76 | __m128i *mask_pre_sse; // mask2 for flag=2(length:whole_degree) 77 | __m128i endmask_sse; // mask for flag=1 78 | __m256i *mask_avx2; // mask1 for flag=2(length:whole_degree) 79 | __m256i *mask_pre_avx2; // mask2 for flag=2(length:whole_degree) 80 | __m256i endmask_avx2; // mask for flag=1 81 | #ifdef PAST_METHOD 82 | __m512i *mask_avx512; // mask1 for flag=2(length:whole_degree) 83 | __m512i *mask_pre_avx512; // mask2 for flag=2(length:whole_degree) 84 | __m512i endmask_avx512; // mask for flag=1 85 | #else 86 | __mmask64 *mmask_avx512; // mmask1 for flag=2(length:whole_degree) 87 | __mmask64 *mmask_pre_avx512; // mmask2 for flag=2(length:whole_degree) 88 | __mmask64 endmmask_avx512; // mmask for flag=1 89 | #endif 90 | 91 | nr5g_crc_t *crc_t; 92 | uint8_t *byte_list; 93 | 94 | } nr5g_ldpc_simd_t; 95 | 96 | /*************************************************************************************/ 97 | /* Declare LDPC initial functions */ 98 | /*************************************************************************************/ 99 | 100 | void nr5g_ldpc_simd_init(nr5g_ldpc_simd_t *h, int32_t B, int32_t R, int32_t simd_mode); 101 | 102 | /* initial LDPC parameter */ 103 | void nr5g_ldpc_simd_mode_init(nr5g_ldpc_simd_t *h); 104 | void nr5g_ldpc_simd_param_init(nr5g_ldpc_simd_t *h, int32_t B, int32_t R); 105 | void nr5g_ldpc_simd_rvid_param_init(nr5g_ldpc_simd_t *h, int32_t rvid); 106 | 107 | /* initial base graph matrix */ 108 | void nr5g_ldpc_simd_matrix_init(nr5g_ldpc_simd_t *h); 109 | 110 | /* initial encoder parameters */ 111 | void nr5g_ldpc_simd_encoder_mem_init(nr5g_ldpc_simd_t *h); 112 | 113 | /* initial decoder parameters */ 114 | void nr5g_ldpc_simd_decoder_mem_init(nr5g_ldpc_simd_t *h); 115 | void nr5g_ldpc_simd_decoder_param_init(nr5g_ldpc_simd_t *h); 116 | 117 | /*************************************************************************************/ 118 | /* Declare LDPC free functions */ 119 | /*************************************************************************************/ 120 | 121 | void free_nr5g_ldpc_simd_t(nr5g_ldpc_simd_t *h); 122 | void free_nr5g_ldpc_encoder(nr5g_ldpc_simd_t *h); 123 | void free_nr5g_ldpc_decoder(nr5g_ldpc_simd_t *h); 124 | 125 | /*************************************************************************************/ 126 | /* Declare LDPC code block segmentation functions */ 127 | /*************************************************************************************/ 128 | 129 | void nr5g_ldpc_simd_cbs(const int8_t *input_bits, nr5g_ldpc_simd_t *h, int8_t *output_bits); 130 | 131 | void nr5g_ldpc_simd_cbs_scb(const int8_t *input_bits, nr5g_ldpc_simd_t *h, int8_t *output_bits, int32_t r); 132 | 133 | void nr5g_fec_crc_encode(const int8_t *input_bits, int32_t len, int32_t L, int8_t *output_bits); 134 | 135 | void nr5g_ldpc_simd_decbs(const int8_t *input_bits, nr5g_ldpc_simd_t *h, int8_t *output_bits); 136 | 137 | void nr5g_ldpc_simd_decbs_scb(const int8_t *input_bits, nr5g_ldpc_simd_t *h, int8_t *output_bits, int32_t r); 138 | 139 | /*************************************************************************************/ 140 | /* Declare LDPC encode functions */ 141 | /*************************************************************************************/ 142 | 143 | void nr5g_ldpc_simd_encoder(const int8_t *info_bits, nr5g_ldpc_simd_t *h, int8_t *coded_bits); 144 | 145 | void nr5g_ldpc_simd_encoder_scb(const int8_t *info_bits, nr5g_ldpc_simd_t *h, int8_t *coded_bits); 146 | 147 | /*************************************************************************************/ 148 | /* Declare LDPC rate matching functions */ 149 | /*************************************************************************************/ 150 | 151 | void nr5g_ldpc_simd_rate_matching(const int8_t *coded_bits, nr5g_ldpc_simd_t *h, int8_t *rmed_bits); 152 | 153 | void nr5g_ldpc_simd_rate_matching_scb(const int8_t *coded_bits, nr5g_ldpc_simd_t *h, int8_t *rmed_bits, int32_t r); 154 | 155 | void nr5g_ldpc_simd_rate_dematching(const float *llr, nr5g_ldpc_simd_t *h, float *rdmed_llr); 156 | 157 | void nr5g_ldpc_simd_rate_dematching_scb(const float *llr, nr5g_ldpc_simd_t *h, float *rdmed_llr, int32_t r); 158 | 159 | /*************************************************************************************/ 160 | /* Declare LDPC decode functions */ 161 | /*************************************************************************************/ 162 | 163 | void nr5g_ldpc_simd_decoder(const float *llr, nr5g_ldpc_simd_t *h, int32_t I_max, float coef, int32_t decoder_mode, int32_t early_stop, int8_t *decoded_bits, float *decoded_llr); 164 | 165 | void nr5g_ldpc_fast_load_llr_simd_scb(const float *llr, nr5g_ldpc_simd_t *h, int32_t r); 166 | 167 | void nr5g_ldpc_simd_decoder_scb(nr5g_ldpc_simd_t *h, int32_t I_max, float coef, int32_t decoder_mode, int32_t early_stop, int8_t *decoded_bits, float *decoded_llr, int32_t r); 168 | 169 | /*************************************************************************************/ 170 | /* Declare LDPC combo functions */ 171 | /*************************************************************************************/ 172 | 173 | void nr5g_ldpc_simd_cbs_enc_rm(const int8_t *info_bits, nr5g_ldpc_simd_t *h, int8_t *rmed_bits); 174 | 175 | void nr5g_ldpc_simd_rdm_dec_decbs(const float *llr, nr5g_ldpc_simd_t *h, int32_t I_max, int32_t decoder_mode, int32_t early_stop, float coef, int8_t *decbs_bits, float *decoded_llr); 176 | 177 | int is_ldpc_code(nr5g_ldpc_simd_t *h, uint8_t *code); 178 | 179 | #endif // !SIMD_LDPC_H 180 | -------------------------------------------------------------------------------- /inc/thread_pool.h: -------------------------------------------------------------------------------- 1 | #ifndef _THREAD_POOL_ 2 | #define _THREAD_POOL_ 3 | #ifndef __USE_GNU 4 | #define __USE_GNU 5 | #endif 6 | #include 7 | #include 8 | #include 9 | 10 | typedef void (*Fun)(void *arg); 11 | // define a struct for passing argument to a thread function 12 | struct Arg 13 | { 14 | int coreId; 15 | int re_idx; 16 | int data_idx; 17 | }; 18 | 19 | struct Task 20 | { 21 | Fun myfun; 22 | void *arg; 23 | struct Task *next; 24 | }; 25 | 26 | struct Thread_Pool 27 | { 28 | pthread_mutex_t mutex; 29 | pthread_cond_t cond; 30 | pthread_mutex_t mutex_flag; 31 | pthread_cond_t cond_flag; 32 | struct Task *taskHead; 33 | bool isClose; 34 | int threadNum; 35 | int threadNum_Idle; 36 | pthread_t *threadId; 37 | }; 38 | 39 | struct pool_arg_t 40 | { 41 | int coreId; 42 | int pool_index; 43 | }; 44 | 45 | void pool_init(int coreId_start, int _threadNum, int pool_index); 46 | void pool_add_task(Fun myfun, void *arg, int pool_index); 47 | void pool_destroy(int pool_index); 48 | void *thread_run(void *arg); 49 | 50 | extern struct Thread_Pool *pool[12]; 51 | 52 | #endif 53 | -------------------------------------------------------------------------------- /lib/libsimd_5gfec.a: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SherlockHsu/5G-SIMD-LDPC/5bbc01c27b96d1ffe917d44b84be662d3ed0e96c/lib/libsimd_5gfec.a -------------------------------------------------------------------------------- /src/thread_pool.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | #include 6 | #include 7 | #include "thread_pool.h" 8 | 9 | struct Thread_Pool *pool[12]; 10 | static int coreId[72]; 11 | 12 | static int stick_this_thread_to_core(int core_id) 13 | { 14 | int num_cores = sysconf(_SC_NPROCESSORS_ONLN); //read cpu_core number 15 | if (core_id < 0 || core_id >= num_cores) 16 | return EINVAL; 17 | 18 | cpu_set_t cpuset; 19 | CPU_ZERO(&cpuset); 20 | CPU_SET(core_id, &cpuset); 21 | 22 | pthread_t current_thread = pthread_self(); 23 | return pthread_setaffinity_np(current_thread, sizeof(cpu_set_t), &cpuset); 24 | } 25 | 26 | void pool_init(int coreId_start, int _threadNum, int pool_index) 27 | { 28 | int j = pool_index; 29 | struct pool_arg_t *pool_arg = (struct pool_arg_t *)malloc(sizeof(struct pool_arg_t) * 72); 30 | 31 | pool[j] = (struct Thread_Pool *)malloc(sizeof(struct Thread_Pool)); 32 | assert(pool[j] != NULL); 33 | 34 | pthread_mutex_init(&(pool[j]->mutex), NULL); 35 | pthread_cond_init(&(pool[j]->cond), NULL); 36 | pool[j]->taskHead = NULL; 37 | pool[j]->isClose = false; 38 | pool[j]->threadNum = _threadNum; 39 | pool[j]->threadId = (pthread_t *)malloc(sizeof(pthread_t) * pool[j]->threadNum); 40 | 41 | int i; 42 | //int coreId_start = 0; 43 | for (i = 0; i < pool[j]->threadNum; ++i) 44 | { 45 | coreId[i] = coreId_start + i; 46 | // printf("coreId_start=%d coreId=%d\n", coreId_start, coreId[i]); 47 | pool_arg[i].coreId = coreId[i]; 48 | pool_arg[i].pool_index = pool_index; 49 | if (pthread_create(&(pool[j]->threadId[i]), NULL, thread_run, (void *)&pool_arg[i])) 50 | { 51 | printf("pthread_creat failed!\n"); 52 | return; 53 | } 54 | } 55 | } 56 | 57 | void pool_add_task(Fun _myfun, void *_arg, int pool_index) 58 | { 59 | int j = pool_index; 60 | //构造一个新任务 61 | struct Task *newTask = (struct Task *)malloc(sizeof(struct Task)); 62 | newTask->myfun = _myfun; 63 | newTask->arg = _arg; 64 | newTask->next = NULL; //别忘置空 65 | struct Task *head; 66 | 67 | //将任务加到任务链表中 68 | pthread_mutex_lock(&(pool[j]->mutex)); 69 | head = pool[j]->taskHead; 70 | if (head == NULL) 71 | pool[j]->taskHead = newTask; 72 | else 73 | { 74 | while (head->next) 75 | head = head->next; 76 | head->next = newTask; 77 | } 78 | //printf("newTask: %d\n", pthread_self()); 79 | pthread_mutex_unlock(&(pool[j]->mutex)); 80 | pthread_cond_signal(&(pool[j]->cond)); 81 | } 82 | 83 | void pool_destroy(int pool_index) 84 | { 85 | int j = pool_index; 86 | if (pool[j]->isClose == true) //防止多次调用该函数 87 | return; 88 | pool[j]->isClose = true; 89 | //唤醒所有等待线程,然后销毁线程池 90 | pthread_cond_broadcast(&(pool[j]->cond)); 91 | 92 | //回收线程 93 | int i; 94 | for (i = 0; i < pool[j]->threadNum; ++i) 95 | pthread_join(pool[j]->threadId[i], NULL); 96 | free(pool[j]->threadId); 97 | 98 | //销毁任务链表 99 | struct Task *tmpTask; 100 | while (pool[j]->taskHead != NULL) 101 | { 102 | tmpTask = pool[j]->taskHead; 103 | pool[j]->taskHead = pool[j]->taskHead->next; 104 | free(tmpTask); 105 | } 106 | 107 | //销毁条件变量与互斥量 108 | pthread_mutex_destroy(&(pool[j]->mutex)); 109 | pthread_cond_destroy(&(pool[j]->cond)); 110 | 111 | free(pool[j]); 112 | //释放内存后将指针置空 113 | pool[j] = NULL; 114 | } 115 | 116 | void *thread_run(void *_arg) 117 | { 118 | //printf("thread %d is ready\n", pthread_self()); 119 | struct Task *curTask; 120 | struct pool_arg_t pool_arg = *((struct pool_arg_t *)_arg); 121 | int coreId = pool_arg.coreId; 122 | int j = pool_arg.pool_index; 123 | //printf("I use core %d\n", coreId); 124 | //stick the thread to coreId 125 | // printf("coreId = %d\n", coreId); 126 | if (stick_this_thread_to_core(coreId)) 127 | printf("Stick to core %d is failed!\n", coreId); 128 | 129 | while (1) 130 | { 131 | pthread_mutex_lock(&(pool[j]->mutex)); 132 | while (pool[j]->taskHead == NULL && pool[j]->isClose == false) 133 | { 134 | //printf("thread %d is waiting\n", pthread_self()); 135 | pthread_cond_wait(&(pool[j]->cond), &(pool[j]->mutex)); 136 | //printf("thread: %d wakes up, taskHead: %d\n", pthread_self(), pool[j] -> taskHead); 137 | } 138 | if (pool[j]->taskHead == NULL && pool[j]->isClose == true) //销毁线程池时保证任务链表已空 139 | { 140 | pthread_cond_broadcast(&(pool[j]->cond)); 141 | pthread_mutex_unlock(&(pool[j]->mutex)); 142 | //printf("thread %d is over\n", pthread_self()); 143 | pthread_exit(NULL); 144 | } 145 | //printf("thread %d is going to work\n", pthread_self()); 146 | //printf("thread: %d wakes up, taskHead: %d\n", pthread_self(), pool[j] -> taskHead); 147 | // assert(pool[j]->taskHead != NULL); 148 | 149 | curTask = pool[j]->taskHead; 150 | pool[j]->taskHead = pool[j]->taskHead->next; 151 | pthread_mutex_unlock(&(pool[j]->mutex)); 152 | //执行任务函数 153 | (curTask->myfun)(curTask->arg); 154 | free(curTask); 155 | curTask = NULL; 156 | } 157 | } 158 | --------------------------------------------------------------------------------