├── Makefile.am ├── test ├── Makefile.am └── example.c ├── .gitignore ├── include ├── gf_method.h ├── gf_rand.h ├── gf_w64.h ├── gf_w16.h ├── gf_w32.h ├── gf_w4.h ├── liberation.h ├── cauchy.h ├── gf_general.h ├── reed_sol.h ├── gf_w8.h ├── lrc.h ├── galois.h ├── gf_complete.h └── gf_int.h ├── m4 ├── ltversion.m4 ├── ax_check_compile_flag.m4 ├── ax_gcc_x86_cpuid.m4 ├── ax_gcc_x86_avx_xgetbv.m4 ├── ltsugar.m4 ├── lt~obsolete.m4 └── ax_ext.m4 ├── src ├── Makefile.am ├── gf_rand.c ├── gf_method.c ├── neon │ ├── gf_w4_neon.c │ ├── gf_w32_neon.c │ ├── gf_w8_neon.c │ ├── gf_w64_neon.c │ └── gf_w16_neon.c ├── liberation.c ├── reed_sol.c ├── galois.c └── lrc.c ├── LICENSE ├── configure.ac ├── missing ├── compile └── README.md /Makefile.am: -------------------------------------------------------------------------------- 1 | ACLOCAL_AMFLAGS = -I m4 2 | 3 | SUBDIRS = src test 4 | include_HEADERS = include/lrc.h 5 | -------------------------------------------------------------------------------- /test/Makefile.am: -------------------------------------------------------------------------------- 1 | # GF-Complete 'test' AM file 2 | 3 | AM_CPPFLAGS = -I$(top_srcdir)/include 4 | AM_CFLAGS = -O2 $(SIMD_FLAGS) -fPIC 5 | 6 | noinst_PROGRAMS = test_lrc example 7 | 8 | test_lrc_SOURCES = test_lrc.c 9 | test_lrc_LDADD = ../src/liblrc.la 10 | 11 | example_SOURCES = example.c 12 | example_LDADD = ../src/liblrc.la 13 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Object files 2 | *.o 3 | *.ko 4 | *.obj 5 | *.elf 6 | 7 | # Precompiled Headers 8 | *.gch 9 | *.pch 10 | 11 | # Libraries 12 | *.lib 13 | *.a 14 | *.la 15 | *.lo 16 | 17 | # Shared objects (inc. Windows DLLs) 18 | *.dll 19 | *.so 20 | *.so.* 21 | *.dylib 22 | 23 | # Executables 24 | *.exe 25 | *.out 26 | *.app 27 | *.i*86 28 | *.x86_64 29 | *.hex 30 | 31 | # Debug files 32 | *.dSYM/ 33 | -------------------------------------------------------------------------------- /include/gf_method.h: -------------------------------------------------------------------------------- 1 | /* 2 | * GF-Complete: A Comprehensive Open Source Library for Galois Field Arithmetic 3 | * James S. Plank, Ethan L. Miller, Kevin M. Greenan, 4 | * Benjamin A. Arnold, John A. Burnum, Adam W. Disney, Allen C. McBride. 5 | * 6 | * gf_method.h 7 | * 8 | * Parses argv to figure out the flags and arguments. Creates the gf. 9 | */ 10 | 11 | #pragma once 12 | 13 | #include "gf_complete.h" 14 | 15 | /* Parses argv starting at "starting". 16 | 17 | Returns 0 on failure. 18 | On success, it returns one past the last argument it read in argv. */ 19 | 20 | extern int create_gf_from_argv(gf_t *gf, int w, int argc, char **argv, int starting); 21 | -------------------------------------------------------------------------------- /m4/ltversion.m4: -------------------------------------------------------------------------------- 1 | # ltversion.m4 -- version numbers -*- Autoconf -*- 2 | # 3 | # Copyright (C) 2004, 2011-2015 Free Software Foundation, Inc. 4 | # Written by Scott James Remnant, 2004 5 | # 6 | # This file is free software; the Free Software Foundation gives 7 | # unlimited permission to copy and/or distribute it, with or without 8 | # modifications, as long as this notice is preserved. 9 | 10 | # @configure_input@ 11 | 12 | # serial 4179 ltversion.m4 13 | # This file is part of GNU Libtool 14 | 15 | m4_define([LT_PACKAGE_VERSION], [2.4.6]) 16 | m4_define([LT_PACKAGE_REVISION], [2.4.6]) 17 | 18 | AC_DEFUN([LTVERSION_VERSION], 19 | [macro_version='2.4.6' 20 | macro_revision='2.4.6' 21 | _LT_DECL(, macro_version, 0, [Which release of libtool.m4 was used?]) 22 | _LT_DECL(, macro_revision, 0) 23 | ]) 24 | -------------------------------------------------------------------------------- /include/gf_rand.h: -------------------------------------------------------------------------------- 1 | /* 2 | * GF-Complete: A Comprehensive Open Source Library for Galois Field Arithmetic 3 | * James S. Plank, Ethan L. Miller, Kevin M. Greenan, 4 | * Benjamin A. Arnold, John A. Burnum, Adam W. Disney, Allen C. McBride. 5 | * 6 | * gf_rand.h 7 | * 8 | * Random number generation, using the "Mother of All" random number generator. */ 9 | 10 | #pragma once 11 | #include 12 | #include 13 | #include 14 | 15 | /* These are all pretty self-explanatory */ 16 | uint32_t MOA_Random_32(); 17 | uint64_t MOA_Random_64(); 18 | void MOA_Random_128(uint64_t *x); 19 | uint32_t MOA_Random_W(int w, int zero_ok); 20 | void MOA_Fill_Random_Region (void *reg, int size); /* reg should be aligned to 4 bytes, but 21 | size can be anything. */ 22 | void MOA_Seed(uint32_t seed); 23 | -------------------------------------------------------------------------------- /src/Makefile.am: -------------------------------------------------------------------------------- 1 | AM_CPPFLAGS = -I$(top_srcdir)/include 2 | AM_CFLAGS = -O2 $(SIMD_FLAGS) -fPIC 3 | 4 | ACLOCAL_AMFLAGS = -I m4 5 | 6 | lib_LTLIBRARIES = liblrc.la 7 | liblrc_la_SOURCES = \ 8 | gf.c \ 9 | gf_method.c \ 10 | gf_wgen.c \ 11 | gf_w4.c \ 12 | gf_w8.c \ 13 | gf_w16.c \ 14 | gf_w32.c \ 15 | gf_w64.c \ 16 | gf_w128.c \ 17 | gf_rand.c \ 18 | gf_general.c \ 19 | cauchy.c \ 20 | galois.c \ 21 | liberation.c \ 22 | reed_sol.c \ 23 | jerasure.c \ 24 | lrc.c 25 | 26 | if HAVE_NEON 27 | liblrc_la_SOURCES += neon/gf_w4_neon.c \ 28 | neon/gf_w8_neon.c \ 29 | neon/gf_w16_neon.c \ 30 | neon/gf_w32_neon.c \ 31 | neon/gf_w64_neon.c 32 | endif 33 | 34 | # vim:expandtab 35 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | The MIT License (MIT) 2 | 3 | Copyright (c) 2015 张炎泼 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | 23 | -------------------------------------------------------------------------------- /include/gf_w64.h: -------------------------------------------------------------------------------- 1 | /* 2 | * GF-Complete: A Comprehensive Open Source Library for Galois Field Arithmetic 3 | * James S. Plank, Ethan L. Miller, Kevin M. Greenan, 4 | * Benjamin A. Arnold, John A. Burnum, Adam W. Disney, Allen C. McBride. 5 | * 6 | * gf_w64.h 7 | * 8 | * Defines and data structures for 64-bit Galois fields 9 | */ 10 | 11 | #ifndef GF_COMPLETE_GF_W64_H 12 | #define GF_COMPLETE_GF_W64_H 13 | 14 | #include 15 | 16 | #define GF_FIELD_WIDTH (64) 17 | #define GF_FIRST_BIT (1ULL << 63) 18 | 19 | #define GF_BASE_FIELD_WIDTH (32) 20 | #define GF_BASE_FIELD_SIZE (1ULL << GF_BASE_FIELD_WIDTH) 21 | #define GF_BASE_FIELD_GROUP_SIZE GF_BASE_FIELD_SIZE-1 22 | 23 | struct gf_w64_group_data { 24 | uint64_t *reduce; 25 | uint64_t *shift; 26 | uint64_t *memory; 27 | }; 28 | 29 | struct gf_split_4_64_lazy_data { 30 | uint64_t tables[16][16]; 31 | uint64_t last_value; 32 | }; 33 | 34 | struct gf_split_8_64_lazy_data { 35 | uint64_t tables[8][(1<<8)]; 36 | uint64_t last_value; 37 | }; 38 | 39 | struct gf_split_16_64_lazy_data { 40 | uint64_t tables[4][(1<<16)]; 41 | uint64_t last_value; 42 | }; 43 | 44 | struct gf_split_8_8_data { 45 | uint64_t tables[15][256][256]; 46 | }; 47 | 48 | void gf_w64_neon_split_init(gf_t *gf); 49 | 50 | #endif /* GF_COMPLETE_GF_W64_H */ 51 | -------------------------------------------------------------------------------- /test/example.c: -------------------------------------------------------------------------------- 1 | #include "lrc.h" 2 | 3 | #include 4 | #include 5 | #include 6 | 7 | /* gcc example.c -llrc */ 8 | 9 | int main(int argc, char **argv) { 10 | 11 | int k, m, i; 12 | int size = 8; 13 | lrc_t *lrc = &(lrc_t) {0}; 14 | lrc_buf_t *buf = &(lrc_buf_t) {0}; 15 | 16 | if (lrc_init_n(lrc, 2, (uint8_t[]) {2, 2}, 3) != 0) { 17 | exit(-1); 18 | } 19 | 20 | if (lrc_buf_init(buf, lrc, size) != 0) { 21 | exit(-1); 22 | } 23 | 24 | strcpy(buf->data[0], "hello"); 25 | strcpy(buf->data[1], "world"); 26 | strcpy(buf->data[2], "lrc"); 27 | strcpy(buf->data[3], "ec"); 28 | 29 | if (lrc_encode(lrc, buf) != 0) { 30 | exit(-1); 31 | } 32 | 33 | for (k = 0; k < lrc->k; k++) { 34 | printf("data[%d]: ", k); 35 | for (i = 0; i < size; i++) { 36 | printf("%02x ", (uint8_t)buf->data[k][i]); 37 | } 38 | printf("\n"); 39 | } 40 | 41 | for (m = 0; m < lrc->m; m++) { 42 | printf("code[%d]: ", m); 43 | for (i = 0; i < size; i++) { 44 | printf("%02x ", (uint8_t)buf->code[m][i]); 45 | } 46 | printf("\n"); 47 | } 48 | 49 | int8_t erased[2 + 2 + 3] = {1, 0, 0, 0, 0, 0}; 50 | 51 | strcpy(buf->data[0], "*"); 52 | 53 | printf("damaged: %s %s %s %s\n", 54 | buf->data[0], buf->data[1], buf->data[2], buf->data[3]); 55 | 56 | if (lrc_decode(lrc, buf, erased) != 0) { 57 | exit(-1); 58 | } 59 | 60 | printf("reconstructed: %s %s %s %s\n", 61 | buf->data[0], buf->data[1], buf->data[2], buf->data[3]); 62 | 63 | lrc_destroy(lrc); 64 | lrc_buf_destroy(buf); 65 | 66 | return 0; 67 | } 68 | 69 | // vim:sw=2:fdl=0 70 | -------------------------------------------------------------------------------- /include/gf_w16.h: -------------------------------------------------------------------------------- 1 | /* 2 | * GF-Complete: A Comprehensive Open Source Library for Galois Field Arithmetic 3 | * James S. Plank, Ethan L. Miller, Kevin M. Greenan, 4 | * Benjamin A. Arnold, John A. Burnum, Adam W. Disney, Allen C. McBride. 5 | * 6 | * gf_w16.h 7 | * 8 | * Defines and data structures for 16-bit Galois fields 9 | */ 10 | 11 | #ifndef GF_COMPLETE_GF_W16_H 12 | #define GF_COMPLETE_GF_W16_H 13 | 14 | #include 15 | 16 | #define GF_FIELD_WIDTH (16) 17 | #define GF_FIELD_SIZE (1 << GF_FIELD_WIDTH) 18 | #define GF_MULT_GROUP_SIZE GF_FIELD_SIZE-1 19 | 20 | #define GF_BASE_FIELD_WIDTH (8) 21 | #define GF_BASE_FIELD_SIZE (1 << GF_BASE_FIELD_WIDTH) 22 | 23 | struct gf_w16_logtable_data { 24 | uint16_t log_tbl[GF_FIELD_SIZE]; 25 | uint16_t antilog_tbl[GF_FIELD_SIZE * 2]; 26 | uint16_t inv_tbl[GF_FIELD_SIZE]; 27 | uint16_t *d_antilog; 28 | }; 29 | 30 | struct gf_w16_zero_logtable_data { 31 | int log_tbl[GF_FIELD_SIZE]; 32 | uint16_t _antilog_tbl[GF_FIELD_SIZE * 4]; 33 | uint16_t *antilog_tbl; 34 | uint16_t inv_tbl[GF_FIELD_SIZE]; 35 | }; 36 | 37 | struct gf_w16_lazytable_data { 38 | uint16_t log_tbl[GF_FIELD_SIZE]; 39 | uint16_t antilog_tbl[GF_FIELD_SIZE * 2]; 40 | uint16_t inv_tbl[GF_FIELD_SIZE]; 41 | uint16_t *d_antilog; 42 | uint16_t lazytable[GF_FIELD_SIZE]; 43 | }; 44 | 45 | struct gf_w16_bytwo_data { 46 | uint64_t prim_poly; 47 | uint64_t mask1; 48 | uint64_t mask2; 49 | }; 50 | 51 | struct gf_w16_split_8_8_data { 52 | uint16_t tables[3][256][256]; 53 | }; 54 | 55 | struct gf_w16_group_4_4_data { 56 | uint16_t reduce[16]; 57 | uint16_t shift[16]; 58 | }; 59 | 60 | struct gf_w16_composite_data { 61 | uint8_t *mult_table; 62 | }; 63 | 64 | void gf_w16_neon_split_init(gf_t *gf); 65 | 66 | #endif /* GF_COMPLETE_GF_W16_H */ 67 | -------------------------------------------------------------------------------- /include/gf_w32.h: -------------------------------------------------------------------------------- 1 | /* 2 | * GF-Complete: A Comprehensive Open Source Library for Galois Field Arithmetic 3 | * James S. Plank, Ethan L. Miller, Kevin M. Greenan, 4 | * Benjamin A. Arnold, John A. Burnum, Adam W. Disney, Allen C. McBride. 5 | * 6 | * gf_w32.h 7 | * 8 | * Defines and data structures for 32-bit Galois fields 9 | */ 10 | 11 | #ifndef GF_COMPLETE_GF_W32_H 12 | #define GF_COMPLETE_GF_W32_H 13 | 14 | #include 15 | 16 | #define GF_FIELD_WIDTH (32) 17 | #define GF_FIRST_BIT (1 << 31) 18 | 19 | #define GF_BASE_FIELD_WIDTH (16) 20 | #define GF_BASE_FIELD_SIZE (1 << GF_BASE_FIELD_WIDTH) 21 | #define GF_BASE_FIELD_GROUP_SIZE GF_BASE_FIELD_SIZE-1 22 | #define GF_MULTBY_TWO(p) (((p) & GF_FIRST_BIT) ? (((p) << 1) ^ h->prim_poly) : (p) << 1) 23 | 24 | struct gf_split_2_32_lazy_data { 25 | uint32_t tables[16][4]; 26 | uint32_t last_value; 27 | }; 28 | 29 | struct gf_w32_split_8_8_data { 30 | uint32_t tables[7][256][256]; 31 | uint32_t region_tables[4][256]; 32 | uint32_t last_value; 33 | }; 34 | 35 | struct gf_w32_group_data { 36 | uint32_t *reduce; 37 | uint32_t *shift; 38 | int tshift; 39 | uint64_t rmask; 40 | uint32_t *memory; 41 | }; 42 | 43 | struct gf_split_16_32_lazy_data { 44 | uint32_t tables[2][(1<<16)]; 45 | uint32_t last_value; 46 | }; 47 | 48 | struct gf_split_8_32_lazy_data { 49 | uint32_t tables[4][256]; 50 | uint32_t last_value; 51 | }; 52 | 53 | struct gf_split_4_32_lazy_data { 54 | uint32_t tables[8][16]; 55 | uint32_t last_value; 56 | }; 57 | 58 | struct gf_w32_bytwo_data { 59 | uint64_t prim_poly; 60 | uint64_t mask1; 61 | uint64_t mask2; 62 | }; 63 | 64 | struct gf_w32_composite_data { 65 | uint16_t *log; 66 | uint16_t *alog; 67 | }; 68 | 69 | void gf_w32_neon_split_init(gf_t *gf); 70 | 71 | #endif /* GF_COMPLETE_GF_W32_H */ 72 | -------------------------------------------------------------------------------- /configure.ac: -------------------------------------------------------------------------------- 1 | # gf-complete autoconf template 2 | 3 | # FIXME - add project url as the last argument 4 | AC_INIT(lrc-ec, 1.0) 5 | 6 | # Override default CFLAGS 7 | : ${CFLAGS="-std=gnu99 -Wall -Wpointer-arith -O2 -g"} 8 | 9 | AC_PREREQ([2.61]) 10 | 11 | AM_INIT_AUTOMAKE([no-dependencies foreign parallel-tests subdir-objects]) 12 | LT_INIT # libtool 13 | 14 | AC_CONFIG_MACRO_DIRS([m4]) 15 | 16 | # This prevents './configure; make' from trying to run autotools. 17 | AM_MAINTAINER_MODE([disable]) 18 | 19 | dnl Compiling with per-target flags requires AM_PROG_CC_C_O. 20 | AC_PROG_CC 21 | 22 | # Check for functions to provide aligned memory 23 | # 24 | AC_CHECK_FUNCS([posix_memalign], 25 | [found_memalign=yes; break]) 26 | 27 | AS_IF([test "x$found_memalign" != "xyes"], [AC_MSG_WARN([No function for aligned memory allocation found])]) 28 | 29 | AX_EXT() 30 | 31 | AC_ARG_ENABLE([neon], 32 | AS_HELP_STRING([--disable-neon], [Build without NEON optimizations])) 33 | 34 | AS_IF([test "x$enable_neon" != "xno"], 35 | [noneon_CPPFLAGS=$CPPFLAGS 36 | CPPFLAGS="$CPPFLAGS $SIMD_FLAGS" 37 | AC_CHECK_HEADER([arm_neon.h], 38 | [have_neon=yes], 39 | [have_neon=no 40 | CPPFLAGS=$noneon_CPPFLAGS])], 41 | [have_neon=no 42 | AS_IF([test "x$ax_cv_have_neon_ext" = "xyes"], 43 | [SIMD_FLAGS=""]) 44 | ]) 45 | 46 | AS_IF([test "x$have_neon" = "xno"], 47 | [AS_IF([test "x$enable_neon" = "xyes"], 48 | [AC_MSG_ERROR([neon requested but arm_neon.h not found])]) 49 | ]) 50 | AM_CONDITIONAL([HAVE_NEON], [test "x$have_neon" = "xyes"]) 51 | 52 | AC_ARG_ENABLE([sse], 53 | AS_HELP_STRING([--disable-sse], [Build without SSE optimizations]), 54 | [if test "x$enableval" = "xno" ; then 55 | SIMD_FLAGS="" 56 | echo "DISABLED SSE!!!" 57 | fi] 58 | ) 59 | 60 | AC_CONFIG_FILES([Makefile 61 | src/Makefile 62 | test/Makefile 63 | ]) 64 | AC_OUTPUT 65 | -------------------------------------------------------------------------------- /include/gf_w4.h: -------------------------------------------------------------------------------- 1 | /* 2 | * GF-Complete: A Comprehensive Open Source Library for Galois Field Arithmetic 3 | * James S. Plank, Ethan L. Miller, Kevin M. Greenan, 4 | * Benjamin A. Arnold, John A. Burnum, Adam W. Disney, Allen C. McBride. 5 | * 6 | * gf_w4.h 7 | * 8 | * Defines and data structures for 4-bit Galois fields 9 | */ 10 | 11 | #ifndef GF_COMPLETE_GF_W4_H 12 | #define GF_COMPLETE_GF_W4_H 13 | 14 | #include 15 | 16 | #define GF_FIELD_WIDTH 4 17 | #define GF_DOUBLE_WIDTH (GF_FIELD_WIDTH*2) 18 | #define GF_FIELD_SIZE (1 << GF_FIELD_WIDTH) 19 | #define GF_MULT_GROUP_SIZE (GF_FIELD_SIZE-1) 20 | 21 | /* ------------------------------------------------------------ 22 | JSP: Each implementation has its own data, which is allocated 23 | at one time as part of the handle. For that reason, it 24 | shouldn't be hierarchical -- i.e. one should be able to 25 | allocate it with one call to malloc. */ 26 | 27 | struct gf_logtable_data { 28 | uint8_t log_tbl[GF_FIELD_SIZE]; 29 | uint8_t antilog_tbl[GF_FIELD_SIZE * 2]; 30 | uint8_t *antilog_tbl_div; 31 | }; 32 | 33 | struct gf_single_table_data { 34 | uint8_t mult[GF_FIELD_SIZE][GF_FIELD_SIZE]; 35 | uint8_t div[GF_FIELD_SIZE][GF_FIELD_SIZE]; 36 | }; 37 | 38 | struct gf_double_table_data { 39 | uint8_t div[GF_FIELD_SIZE][GF_FIELD_SIZE]; 40 | uint8_t mult[GF_FIELD_SIZE][GF_FIELD_SIZE*GF_FIELD_SIZE]; 41 | }; 42 | struct gf_quad_table_data { 43 | uint8_t div[GF_FIELD_SIZE][GF_FIELD_SIZE]; 44 | uint16_t mult[GF_FIELD_SIZE][(1<<16)]; 45 | }; 46 | 47 | struct gf_quad_table_lazy_data { 48 | uint8_t div[GF_FIELD_SIZE][GF_FIELD_SIZE]; 49 | uint8_t smult[GF_FIELD_SIZE][GF_FIELD_SIZE]; 50 | uint16_t mult[(1 << 16)]; 51 | }; 52 | 53 | struct gf_bytwo_data { 54 | uint64_t prim_poly; 55 | uint64_t mask1; 56 | uint64_t mask2; 57 | }; 58 | 59 | // ARM NEON init functions 60 | int gf_w4_neon_cfm_init(gf_t *gf); 61 | void gf_w4_neon_single_table_init(gf_t *gf); 62 | 63 | #endif /* GF_COMPLETE_GF_W4_H */ 64 | -------------------------------------------------------------------------------- /src/gf_rand.c: -------------------------------------------------------------------------------- 1 | /* 2 | * GF-Complete: A Comprehensive Open Source Library for Galois Field Arithmetic 3 | * James S. Plank, Ethan L. Miller, Kevin M. Greenan, 4 | * Benjamin A. Arnold, John A. Burnum, Adam W. Disney, Allen C. McBride. 5 | * 6 | * gf_rand.c -- Random number generator. 7 | */ 8 | 9 | #include 10 | #include 11 | #include 12 | #include "gf_rand.h" 13 | 14 | /* Lifted the "Mother of All" random number generator from http://www.agner.org/random/ */ 15 | 16 | static uint32_t MOA_X[5]; 17 | 18 | uint32_t MOA_Random_32() { 19 | uint64_t sum; 20 | sum = (uint64_t)2111111111UL * (uint64_t)MOA_X[3] + 21 | (uint64_t)1492 * (uint64_t)(MOA_X[2]) + 22 | (uint64_t)1776 * (uint64_t)(MOA_X[1]) + 23 | (uint64_t)5115 * (uint64_t)(MOA_X[0]) + 24 | (uint64_t)MOA_X[4]; 25 | MOA_X[3] = MOA_X[2]; MOA_X[2] = MOA_X[1]; MOA_X[1] = MOA_X[0]; 26 | MOA_X[4] = (uint32_t)(sum >> 32); 27 | MOA_X[0] = (uint32_t)sum; 28 | return MOA_X[0]; 29 | } 30 | 31 | uint64_t MOA_Random_64() { 32 | uint64_t sum; 33 | 34 | sum = MOA_Random_32(); 35 | sum <<= 32; 36 | sum |= MOA_Random_32(); 37 | return sum; 38 | } 39 | 40 | void MOA_Random_128(uint64_t *x) { 41 | x[0] = MOA_Random_64(); 42 | x[1] = MOA_Random_64(); 43 | return; 44 | } 45 | 46 | uint32_t MOA_Random_W(int w, int zero_ok) 47 | { 48 | uint32_t b; 49 | 50 | do { 51 | b = MOA_Random_32(); 52 | if (w == 31) b &= 0x7fffffff; 53 | if (w < 31) b %= (1 << w); 54 | } while (!zero_ok && b == 0); 55 | return b; 56 | } 57 | 58 | void MOA_Seed(uint32_t seed) { 59 | int i; 60 | uint32_t s = seed; 61 | for (i = 0; i < 5; i++) { 62 | s = s * 29943829 - 1; 63 | MOA_X[i] = s; 64 | } 65 | for (i=0; i<19; i++) MOA_Random_32(); 66 | } 67 | 68 | 69 | void MOA_Fill_Random_Region (void *reg, int size) 70 | { 71 | uint32_t *r32; 72 | uint8_t *r8; 73 | int i; 74 | 75 | r32 = (uint32_t *) reg; 76 | r8 = (uint8_t *) reg; 77 | for (i = 0; i < size/4; i++) r32[i] = MOA_Random_32(); 78 | for (i *= 4; i < size; i++) r8[i] = MOA_Random_W(8, 1); 79 | } 80 | 81 | -------------------------------------------------------------------------------- /include/liberation.h: -------------------------------------------------------------------------------- 1 | /* * 2 | * Copyright (c) 2013, James S. Plank and Kevin Greenan 3 | * All rights reserved. 4 | * 5 | * Jerasure - A C/C++ Library for a Variety of Reed-Solomon and RAID-6 Erasure 6 | * Coding Techniques 7 | * 8 | * Revision 2.0: Galois Field backend now links to GF-Complete 9 | * 10 | * Redistribution and use in source and binary forms, with or without 11 | * modification, are permitted provided that the following conditions 12 | * are met: 13 | * 14 | * - Redistributions of source code must retain the above copyright 15 | * notice, this list of conditions and the following disclaimer. 16 | * 17 | * - Redistributions in binary form must reproduce the above copyright 18 | * notice, this list of conditions and the following disclaimer in 19 | * the documentation and/or other materials provided with the 20 | * distribution. 21 | * 22 | * - Neither the name of the University of Tennessee nor the names of its 23 | * contributors may be used to endorse or promote products derived 24 | * from this software without specific prior written permission. 25 | * 26 | * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 27 | * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 28 | * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 29 | * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 30 | * HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, 31 | * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, 32 | * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS 33 | * OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED 34 | * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 35 | * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY 36 | * WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 37 | * POSSIBILITY OF SUCH DAMAGE. 38 | */ 39 | 40 | 41 | #ifndef _LIBERATION 42 | 43 | extern int *liberation_coding_bitmatrix(int k, int w); 44 | extern int *liber8tion_coding_bitmatrix(int k); 45 | extern int *blaum_roth_coding_bitmatrix(int k, int w); 46 | 47 | #endif 48 | -------------------------------------------------------------------------------- /include/cauchy.h: -------------------------------------------------------------------------------- 1 | /* * 2 | * Copyright (c) 2013, James S. Plank and Kevin Greenan 3 | * All rights reserved. 4 | * 5 | * Jerasure - A C/C++ Library for a Variety of Reed-Solomon and RAID-6 Erasure 6 | * Coding Techniques 7 | * 8 | * Revision 2.0: Galois Field backend now links to GF-Complete 9 | * 10 | * Redistribution and use in source and binary forms, with or without 11 | * modification, are permitted provided that the following conditions 12 | * are met: 13 | * 14 | * - Redistributions of source code must retain the above copyright 15 | * notice, this list of conditions and the following disclaimer. 16 | * 17 | * - Redistributions in binary form must reproduce the above copyright 18 | * notice, this list of conditions and the following disclaimer in 19 | * the documentation and/or other materials provided with the 20 | * distribution. 21 | * 22 | * - Neither the name of the University of Tennessee nor the names of its 23 | * contributors may be used to endorse or promote products derived 24 | * from this software without specific prior written permission. 25 | * 26 | * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 27 | * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 28 | * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 29 | * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 30 | * HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, 31 | * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, 32 | * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS 33 | * OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED 34 | * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 35 | * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY 36 | * WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 37 | * POSSIBILITY OF SUCH DAMAGE. 38 | */ 39 | 40 | 41 | extern int *cauchy_original_coding_matrix(int k, int m, int w); 42 | extern int *cauchy_xy_coding_matrix(int k, int m, int w, int *x, int *y); 43 | extern void cauchy_improve_coding_matrix(int k, int m, int w, int *matrix); 44 | extern int *cauchy_good_general_coding_matrix(int k, int m, int w); 45 | extern int cauchy_n_ones(int n, int w); 46 | -------------------------------------------------------------------------------- /include/gf_general.h: -------------------------------------------------------------------------------- 1 | /* 2 | * GF-Complete: A Comprehensive Open Source Library for Galois Field Arithmetic 3 | * James S. Plank, Ethan L. Miller, Kevin M. Greenan, 4 | * Benjamin A. Arnold, John A. Burnum, Adam W. Disney, Allen C. McBride. 5 | * 6 | * gf_general.h 7 | * 8 | * This file has helper routines for doing basic GF operations with any 9 | * legal value of w. The problem is that w <= 32, w=64 and w=128 all have 10 | * different data types, which is a pain. The procedures in this file try 11 | * to alleviate that pain. They are used in gf_unit and gf_time. 12 | */ 13 | 14 | #pragma once 15 | 16 | #include 17 | #include 18 | #include 19 | #include 20 | #include 21 | #include 22 | 23 | #include "gf_complete.h" 24 | 25 | typedef union { 26 | uint32_t w32; 27 | uint64_t w64; 28 | uint64_t w128[2]; 29 | } gf_general_t; 30 | 31 | void gf_general_set_zero(gf_general_t *v, int w); 32 | void gf_general_set_one(gf_general_t *v, int w); 33 | void gf_general_set_two(gf_general_t *v, int w); 34 | 35 | int gf_general_is_zero(gf_general_t *v, int w); 36 | int gf_general_is_one(gf_general_t *v, int w); 37 | int gf_general_are_equal(gf_general_t *v1, gf_general_t *v2, int w); 38 | 39 | void gf_general_val_to_s(gf_general_t *v, int w, char *s, int hex); 40 | int gf_general_s_to_val(gf_general_t *v, int w, char *s, int hex); 41 | 42 | void gf_general_set_random(gf_general_t *v, int w, int zero_ok); 43 | 44 | void gf_general_add(gf_t *gf, gf_general_t *a, gf_general_t *b, gf_general_t *c); 45 | void gf_general_multiply(gf_t *gf, gf_general_t *a, gf_general_t *b, gf_general_t *c); 46 | void gf_general_divide(gf_t *gf, gf_general_t *a, gf_general_t *b, gf_general_t *c); 47 | void gf_general_inverse(gf_t *gf, gf_general_t *a, gf_general_t *b); 48 | 49 | void gf_general_do_region_multiply(gf_t *gf, gf_general_t *a, 50 | void *ra, void *rb, 51 | int bytes, int xor); 52 | 53 | void gf_general_do_region_check(gf_t *gf, gf_general_t *a, 54 | void *orig_a, void *orig_target, void *final_target, 55 | int bytes, int xor); 56 | 57 | 58 | /* Which is M, D or I for multiply, divide or inverse. */ 59 | 60 | void gf_general_set_up_single_timing_test(int w, void *ra, void *rb, int size); 61 | int gf_general_do_single_timing_test(gf_t *gf, void *ra, void *rb, int size, char which); 62 | -------------------------------------------------------------------------------- /include/reed_sol.h: -------------------------------------------------------------------------------- 1 | /* * 2 | * Copyright (c) 2013, James S. Plank and Kevin Greenan 3 | * All rights reserved. 4 | * 5 | * Jerasure - A C/C++ Library for a Variety of Reed-Solomon and RAID-6 Erasure 6 | * Coding Techniques 7 | * 8 | * Revision 2.0: Galois Field backend now links to GF-Complete 9 | * 10 | * Redistribution and use in source and binary forms, with or without 11 | * modification, are permitted provided that the following conditions 12 | * are met: 13 | * 14 | * - Redistributions of source code must retain the above copyright 15 | * notice, this list of conditions and the following disclaimer. 16 | * 17 | * - Redistributions in binary form must reproduce the above copyright 18 | * notice, this list of conditions and the following disclaimer in 19 | * the documentation and/or other materials provided with the 20 | * distribution. 21 | * 22 | * - Neither the name of the University of Tennessee nor the names of its 23 | * contributors may be used to endorse or promote products derived 24 | * from this software without specific prior written permission. 25 | * 26 | * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 27 | * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 28 | * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 29 | * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 30 | * HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, 31 | * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, 32 | * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS 33 | * OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED 34 | * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 35 | * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY 36 | * WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 37 | * POSSIBILITY OF SUCH DAMAGE. 38 | */ 39 | 40 | 41 | extern int *reed_sol_vandermonde_coding_matrix(int k, int m, int w); 42 | extern int *reed_sol_extended_vandermonde_matrix(int rows, int cols, int w); 43 | extern int *reed_sol_big_vandermonde_distribution_matrix(int rows, int cols, int w); 44 | 45 | extern int reed_sol_r6_encode(int k, int w, char **data_ptrs, char **coding_ptrs, int size); 46 | extern int *reed_sol_r6_coding_matrix(int k, int w); 47 | 48 | extern void reed_sol_galois_w08_region_multby_2(char *region, int nbytes); 49 | extern void reed_sol_galois_w16_region_multby_2(char *region, int nbytes); 50 | extern void reed_sol_galois_w32_region_multby_2(char *region, int nbytes); 51 | -------------------------------------------------------------------------------- /include/gf_w8.h: -------------------------------------------------------------------------------- 1 | /* 2 | * GF-Complete: A Comprehensive Open Source Library for Galois Field Arithmetic 3 | * James S. Plank, Ethan L. Miller, Kevin M. Greenan, 4 | * Benjamin A. Arnold, John A. Burnum, Adam W. Disney, Allen C. McBride. 5 | * 6 | * gf_w8.c 7 | * 8 | * Defines and data stuctures for 8-bit Galois fields 9 | */ 10 | 11 | #ifndef GF_COMPLETE_GF_W8_H 12 | #define GF_COMPLETE_GF_W8_H 13 | 14 | #include "gf_int.h" 15 | #include 16 | 17 | #define GF_FIELD_WIDTH (8) 18 | #define GF_FIELD_SIZE (1 << GF_FIELD_WIDTH) 19 | #define GF_HALF_SIZE (1 << (GF_FIELD_WIDTH/2)) 20 | #define GF_MULT_GROUP_SIZE GF_FIELD_SIZE-1 21 | 22 | #define GF_BASE_FIELD_WIDTH (4) 23 | #define GF_BASE_FIELD_SIZE (1 << GF_BASE_FIELD_WIDTH) 24 | 25 | struct gf_w8_logtable_data { 26 | uint8_t log_tbl[GF_FIELD_SIZE]; 27 | uint8_t antilog_tbl[GF_FIELD_SIZE * 2]; 28 | uint8_t inv_tbl[GF_FIELD_SIZE]; 29 | }; 30 | 31 | struct gf_w8_logzero_table_data { 32 | short log_tbl[GF_FIELD_SIZE]; /* Make this signed, so that we can divide easily */ 33 | uint8_t antilog_tbl[512+512+1]; 34 | uint8_t *div_tbl; 35 | uint8_t *inv_tbl; 36 | }; 37 | 38 | struct gf_w8_logzero_small_table_data { 39 | short log_tbl[GF_FIELD_SIZE]; /* Make this signed, so that we can divide easily */ 40 | uint8_t antilog_tbl[255*3]; 41 | uint8_t inv_tbl[GF_FIELD_SIZE]; 42 | uint8_t *div_tbl; 43 | }; 44 | 45 | struct gf_w8_composite_data { 46 | uint8_t *mult_table; 47 | }; 48 | 49 | /* Don't change the order of these relative to gf_w8_half_table_data */ 50 | 51 | struct gf_w8_default_data { 52 | uint8_t high[GF_FIELD_SIZE][GF_HALF_SIZE]; 53 | uint8_t low[GF_FIELD_SIZE][GF_HALF_SIZE]; 54 | uint8_t divtable[GF_FIELD_SIZE][GF_FIELD_SIZE]; 55 | uint8_t multtable[GF_FIELD_SIZE][GF_FIELD_SIZE]; 56 | }; 57 | 58 | struct gf_w8_half_table_data { 59 | uint8_t high[GF_FIELD_SIZE][GF_HALF_SIZE]; 60 | uint8_t low[GF_FIELD_SIZE][GF_HALF_SIZE]; 61 | }; 62 | 63 | struct gf_w8_single_table_data { 64 | uint8_t divtable[GF_FIELD_SIZE][GF_FIELD_SIZE]; 65 | uint8_t multtable[GF_FIELD_SIZE][GF_FIELD_SIZE]; 66 | }; 67 | 68 | struct gf_w8_double_table_data { 69 | uint8_t div[GF_FIELD_SIZE][GF_FIELD_SIZE]; 70 | uint16_t mult[GF_FIELD_SIZE][GF_FIELD_SIZE*GF_FIELD_SIZE]; 71 | }; 72 | 73 | struct gf_w8_double_table_lazy_data { 74 | uint8_t div[GF_FIELD_SIZE][GF_FIELD_SIZE]; 75 | uint8_t smult[GF_FIELD_SIZE][GF_FIELD_SIZE]; 76 | uint16_t mult[GF_FIELD_SIZE*GF_FIELD_SIZE]; 77 | }; 78 | 79 | struct gf_w4_logtable_data { 80 | uint8_t log_tbl[GF_BASE_FIELD_SIZE]; 81 | uint8_t antilog_tbl[GF_BASE_FIELD_SIZE * 2]; 82 | uint8_t *antilog_tbl_div; 83 | }; 84 | 85 | struct gf_w4_single_table_data { 86 | uint8_t div[GF_BASE_FIELD_SIZE][GF_BASE_FIELD_SIZE]; 87 | uint8_t mult[GF_BASE_FIELD_SIZE][GF_BASE_FIELD_SIZE]; 88 | }; 89 | 90 | struct gf_w8_bytwo_data { 91 | uint64_t prim_poly; 92 | uint64_t mask1; 93 | uint64_t mask2; 94 | }; 95 | 96 | int gf_w8_neon_cfm_init(gf_t *gf); 97 | void gf_w8_neon_split_init(gf_t *gf); 98 | 99 | #endif /* GF_COMPLETE_GF_W8_H */ 100 | -------------------------------------------------------------------------------- /m4/ax_check_compile_flag.m4: -------------------------------------------------------------------------------- 1 | # =========================================================================== 2 | # http://www.gnu.org/software/autoconf-archive/ax_check_compile_flag.html 3 | # =========================================================================== 4 | # 5 | # SYNOPSIS 6 | # 7 | # AX_CHECK_COMPILE_FLAG(FLAG, [ACTION-SUCCESS], [ACTION-FAILURE], [EXTRA-FLAGS]) 8 | # 9 | # DESCRIPTION 10 | # 11 | # Check whether the given FLAG works with the current language's compiler 12 | # or gives an error. (Warnings, however, are ignored) 13 | # 14 | # ACTION-SUCCESS/ACTION-FAILURE are shell commands to execute on 15 | # success/failure. 16 | # 17 | # If EXTRA-FLAGS is defined, it is added to the current language's default 18 | # flags (e.g. CFLAGS) when the check is done. The check is thus made with 19 | # the flags: "CFLAGS EXTRA-FLAGS FLAG". This can for example be used to 20 | # force the compiler to issue an error when a bad flag is given. 21 | # 22 | # NOTE: Implementation based on AX_CFLAGS_GCC_OPTION. Please keep this 23 | # macro in sync with AX_CHECK_{PREPROC,LINK}_FLAG. 24 | # 25 | # LICENSE 26 | # 27 | # Copyright (c) 2008 Guido U. Draheim 28 | # Copyright (c) 2011 Maarten Bosmans 29 | # 30 | # This program is free software: you can redistribute it and/or modify it 31 | # under the terms of the GNU General Public License as published by the 32 | # Free Software Foundation, either version 3 of the License, or (at your 33 | # option) any later version. 34 | # 35 | # This program is distributed in the hope that it will be useful, but 36 | # WITHOUT ANY WARRANTY; without even the implied warranty of 37 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General 38 | # Public License for more details. 39 | # 40 | # You should have received a copy of the GNU General Public License along 41 | # with this program. If not, see . 42 | # 43 | # As a special exception, the respective Autoconf Macro's copyright owner 44 | # gives unlimited permission to copy, distribute and modify the configure 45 | # scripts that are the output of Autoconf when processing the Macro. You 46 | # need not follow the terms of the GNU General Public License when using 47 | # or distributing such scripts, even though portions of the text of the 48 | # Macro appear in them. The GNU General Public License (GPL) does govern 49 | # all other use of the material that constitutes the Autoconf Macro. 50 | # 51 | # This special exception to the GPL applies to versions of the Autoconf 52 | # Macro released by the Autoconf Archive. When you make and distribute a 53 | # modified version of the Autoconf Macro, you may extend this special 54 | # exception to the GPL to apply to your modified version as well. 55 | 56 | #serial 2 57 | 58 | AC_DEFUN([AX_CHECK_COMPILE_FLAG], 59 | [AC_PREREQ(2.59)dnl for _AC_LANG_PREFIX 60 | AS_VAR_PUSHDEF([CACHEVAR],[ax_cv_check_[]_AC_LANG_ABBREV[]flags_$4_$1])dnl 61 | AC_CACHE_CHECK([whether _AC_LANG compiler accepts $1], CACHEVAR, [ 62 | ax_check_save_flags=$[]_AC_LANG_PREFIX[]FLAGS 63 | _AC_LANG_PREFIX[]FLAGS="$[]_AC_LANG_PREFIX[]FLAGS $4 $1" 64 | AC_COMPILE_IFELSE([AC_LANG_PROGRAM()], 65 | [AS_VAR_SET(CACHEVAR,[yes])], 66 | [AS_VAR_SET(CACHEVAR,[no])]) 67 | _AC_LANG_PREFIX[]FLAGS=$ax_check_save_flags]) 68 | AS_IF([test x"AS_VAR_GET(CACHEVAR)" = xyes], 69 | [m4_default([$2], :)], 70 | [m4_default([$3], :)]) 71 | AS_VAR_POPDEF([CACHEVAR])dnl 72 | ])dnl AX_CHECK_COMPILE_FLAGS 73 | -------------------------------------------------------------------------------- /m4/ax_gcc_x86_cpuid.m4: -------------------------------------------------------------------------------- 1 | # =========================================================================== 2 | # http://www.gnu.org/software/autoconf-archive/ax_gcc_x86_cpuid.html 3 | # =========================================================================== 4 | # 5 | # SYNOPSIS 6 | # 7 | # AX_GCC_X86_CPUID(OP) 8 | # 9 | # DESCRIPTION 10 | # 11 | # On Pentium and later x86 processors, with gcc or a compiler that has a 12 | # compatible syntax for inline assembly instructions, run a small program 13 | # that executes the cpuid instruction with input OP. This can be used to 14 | # detect the CPU type. 15 | # 16 | # On output, the values of the eax, ebx, ecx, and edx registers are stored 17 | # as hexadecimal strings as "eax:ebx:ecx:edx" in the cache variable 18 | # ax_cv_gcc_x86_cpuid_OP. 19 | # 20 | # If the cpuid instruction fails (because you are running a 21 | # cross-compiler, or because you are not using gcc, or because you are on 22 | # a processor that doesn't have this instruction), ax_cv_gcc_x86_cpuid_OP 23 | # is set to the string "unknown". 24 | # 25 | # This macro mainly exists to be used in AX_GCC_ARCHFLAG. 26 | # 27 | # LICENSE 28 | # 29 | # Copyright (c) 2008 Steven G. Johnson 30 | # Copyright (c) 2008 Matteo Frigo 31 | # 32 | # This program is free software: you can redistribute it and/or modify it 33 | # under the terms of the GNU General Public License as published by the 34 | # Free Software Foundation, either version 3 of the License, or (at your 35 | # option) any later version. 36 | # 37 | # This program is distributed in the hope that it will be useful, but 38 | # WITHOUT ANY WARRANTY; without even the implied warranty of 39 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General 40 | # Public License for more details. 41 | # 42 | # You should have received a copy of the GNU General Public License along 43 | # with this program. If not, see . 44 | # 45 | # As a special exception, the respective Autoconf Macro's copyright owner 46 | # gives unlimited permission to copy, distribute and modify the configure 47 | # scripts that are the output of Autoconf when processing the Macro. You 48 | # need not follow the terms of the GNU General Public License when using 49 | # or distributing such scripts, even though portions of the text of the 50 | # Macro appear in them. The GNU General Public License (GPL) does govern 51 | # all other use of the material that constitutes the Autoconf Macro. 52 | # 53 | # This special exception to the GPL applies to versions of the Autoconf 54 | # Macro released by the Autoconf Archive. When you make and distribute a 55 | # modified version of the Autoconf Macro, you may extend this special 56 | # exception to the GPL to apply to your modified version as well. 57 | 58 | #serial 7 59 | 60 | AC_DEFUN([AX_GCC_X86_CPUID], 61 | [AC_REQUIRE([AC_PROG_CC]) 62 | AC_LANG_PUSH([C]) 63 | AC_CACHE_CHECK(for x86 cpuid $1 output, ax_cv_gcc_x86_cpuid_$1, 64 | [AC_RUN_IFELSE([AC_LANG_PROGRAM([#include ], [ 65 | int op = $1, eax, ebx, ecx, edx; 66 | FILE *f; 67 | __asm__("cpuid" 68 | : "=a" (eax), "=b" (ebx), "=c" (ecx), "=d" (edx) 69 | : "a" (op)); 70 | f = fopen("conftest_cpuid", "w"); if (!f) return 1; 71 | fprintf(f, "%x:%x:%x:%x\n", eax, ebx, ecx, edx); 72 | fclose(f); 73 | return 0; 74 | ])], 75 | [ax_cv_gcc_x86_cpuid_$1=`cat conftest_cpuid`; rm -f conftest_cpuid], 76 | [ax_cv_gcc_x86_cpuid_$1=unknown; rm -f conftest_cpuid], 77 | [ax_cv_gcc_x86_cpuid_$1=unknown])]) 78 | AC_LANG_POP([C]) 79 | ]) 80 | -------------------------------------------------------------------------------- /m4/ax_gcc_x86_avx_xgetbv.m4: -------------------------------------------------------------------------------- 1 | # =========================================================================== 2 | # http://www.gnu.org/software/autoconf-archive/ax_gcc_x86_avx_xgetbv.html 3 | # =========================================================================== 4 | # 5 | # SYNOPSIS 6 | # 7 | # AX_GCC_X86_AVX_XGETBV 8 | # 9 | # DESCRIPTION 10 | # 11 | # On later x86 processors with AVX SIMD support, with gcc or a compiler 12 | # that has a compatible syntax for inline assembly instructions, run a 13 | # small program that executes the xgetbv instruction with input OP. This 14 | # can be used to detect if the OS supports AVX instruction usage. 15 | # 16 | # On output, the values of the eax and edx registers are stored as 17 | # hexadecimal strings as "eax:edx" in the cache variable 18 | # ax_cv_gcc_x86_avx_xgetbv. 19 | # 20 | # If the xgetbv instruction fails (because you are running a 21 | # cross-compiler, or because you are not using gcc, or because you are on 22 | # a processor that doesn't have this instruction), 23 | # ax_cv_gcc_x86_avx_xgetbv_OP is set to the string "unknown". 24 | # 25 | # This macro mainly exists to be used in AX_EXT. 26 | # 27 | # LICENSE 28 | # 29 | # Copyright (c) 2013 Michael Petch 30 | # 31 | # This program is free software: you can redistribute it and/or modify it 32 | # under the terms of the GNU General Public License as published by the 33 | # Free Software Foundation, either version 3 of the License, or (at your 34 | # option) any later version. 35 | # 36 | # This program is distributed in the hope that it will be useful, but 37 | # WITHOUT ANY WARRANTY; without even the implied warranty of 38 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General 39 | # Public License for more details. 40 | # 41 | # You should have received a copy of the GNU General Public License along 42 | # with this program. If not, see . 43 | # 44 | # As a special exception, the respective Autoconf Macro's copyright owner 45 | # gives unlimited permission to copy, distribute and modify the configure 46 | # scripts that are the output of Autoconf when processing the Macro. You 47 | # need not follow the terms of the GNU General Public License when using 48 | # or distributing such scripts, even though portions of the text of the 49 | # Macro appear in them. The GNU General Public License (GPL) does govern 50 | # all other use of the material that constitutes the Autoconf Macro. 51 | # 52 | # This special exception to the GPL applies to versions of the Autoconf 53 | # Macro released by the Autoconf Archive. When you make and distribute a 54 | # modified version of the Autoconf Macro, you may extend this special 55 | # exception to the GPL to apply to your modified version as well. 56 | 57 | #serial 1 58 | 59 | AC_DEFUN([AX_GCC_X86_AVX_XGETBV], 60 | [AC_REQUIRE([AC_PROG_CC]) 61 | AC_LANG_PUSH([C]) 62 | AC_CACHE_CHECK(for x86-AVX xgetbv $1 output, ax_cv_gcc_x86_avx_xgetbv_$1, 63 | [AC_RUN_IFELSE([AC_LANG_PROGRAM([#include ], [ 64 | int op = $1, eax, edx; 65 | FILE *f; 66 | /* Opcodes for xgetbv */ 67 | __asm__(".byte 0x0f, 0x01, 0xd0" 68 | : "=a" (eax), "=d" (edx) 69 | : "c" (op)); 70 | f = fopen("conftest_xgetbv", "w"); if (!f) return 1; 71 | fprintf(f, "%x:%x\n", eax, edx); 72 | fclose(f); 73 | return 0; 74 | ])], 75 | [ax_cv_gcc_x86_avx_xgetbv_$1=`cat conftest_xgetbv`; rm -f conftest_xgetbv], 76 | [ax_cv_gcc_x86_avx_xgetbv_$1=unknown; rm -f conftest_xgetbv], 77 | [ax_cv_gcc_x86_avx_xgetbv_$1=unknown])]) 78 | AC_LANG_POP([C]) 79 | ]) 80 | -------------------------------------------------------------------------------- /include/lrc.h: -------------------------------------------------------------------------------- 1 | /* 2 | * The MIT License (MIT) 3 | * 4 | * Copyright (c) 2015 Zhang Yanpo (张炎泼) 5 | */ 6 | 7 | #ifndef LRC_EC_LRC_ 8 | #define LRC_EC_LRC_ 9 | 10 | #include 11 | 12 | /* #define LRC_DEBUG 1 */ 13 | 14 | #define LRC_OUT_OF_MEMORY (-1) 15 | #define LRC_UNRECOVERABLE (-2) 16 | #define LRC_INIT_TWICE (-3) 17 | #define LRC_INVALID_M (-4) 18 | 19 | #ifdef LRC_DEBUG 20 | # define dd( _fmt, ... ) fprintf( stderr, _fmt "\n", ##__VA_ARGS__) 21 | # define dlog( _fmt, ... ) fprintf( stderr, _fmt, ##__VA_ARGS__) 22 | # define lrc_debug_buf_line(...) lrc_debug_buf_line_( __VA_ARGS__ ) 23 | # define lrc_debug_matrix(...) lrc_debug_matrix_( __VA_ARGS__ ) 24 | # define lrc_debug_sources(...) lrc_debug_sources_( __VA_ARGS__ ) 25 | #else 26 | # define dd( _fmt, ... ) 27 | # define dlog( _fmt, ... ) 28 | # define lrc_debug_buf_line(...) 29 | # define lrc_debug_matrix(...) 30 | # define lrc_debug_sources(...) 31 | #endif /* LRC_DEBUG */ 32 | 33 | #define lrc_align_16(val) (((val - 1) / 16 + 1) * 16) 34 | 35 | #define _lrc_concat(a, b) a ## b 36 | 37 | #define _lrc_n_arr_k(...) (sizeof((uint8_t[]){__VA_ARGS__}) / sizeof(uint8_t)), \ 38 | (uint8_t[]){__VA_ARGS__} 39 | 40 | /* k_param is in form: 'k(2, 3, 4)' 41 | * 42 | * expansion of thie macro: 43 | * 44 | * lrc_init(lrc, k(2, 3), 2) 45 | * -> lrc_init_n(lrc, _lrc_concat(_lrc_n_arr_, k(2, 3), 2) 46 | * -> lrc_init_n(lrc, _lrc_n_arr_ ## k(2, 3), 2) 47 | * -> lrc_init_n(lrc, _lrc_n_arr_k(2, 3), 2) 48 | * -> lrc_init_n(lrc, (sizeof((uint8_t[]){2, 3}) / sizeof(uint8_t)), (uint8_t[]){2, 3}, 2) 49 | * -> lrc_init_n(lrc, 2, (uint8_t[]){2, 3}, 2) 50 | */ 51 | #define lrc_init(lrc, k_param, m) \ 52 | lrc_init_n((lrc), _lrc_concat(_lrc_n_arr_, k_param), (m)) 53 | 54 | 55 | extern int *reed_sol_vandermonde_coding_matrix(int k, int m, int w); 56 | extern int jerasure_matrix_decode(int k, int m, int w, 57 | int *matrix, int row_k_ones, int *erasures, 58 | char **data_ptrs, char **coding_ptrs, int size); 59 | 60 | typedef struct { 61 | uint8_t start; 62 | uint8_t len; 63 | } lrc_local_t; 64 | 65 | typedef struct { 66 | 67 | int n_data; 68 | int n_code; 69 | int n; /* n_data + n_code */ 70 | 71 | char *data[512]; 72 | char **code; 73 | 74 | int64_t chunk_size; 75 | int64_t aligned_chunk_size; 76 | char *buf; 77 | 78 | int8_t buf_owned; 79 | int8_t inited_; 80 | 81 | } lrc_buf_t; 82 | 83 | typedef struct { 84 | 85 | int k; /* nr of data */ 86 | int m; /* nr of code of original reed-solomon ec */ 87 | int n; /* total number of chunks: k + m */ 88 | 89 | int n_local; /* nr of local EC */ 90 | lrc_local_t *locals; /* start index and nr of elts of each local EC */ 91 | 92 | int *matrix; /* ecoding matrix m *k */ 93 | int8_t *code_erased; /* for encode */ 94 | 95 | int8_t inited_; 96 | 97 | } lrc_t; 98 | 99 | typedef struct { 100 | 101 | lrc_t *lrc; 102 | lrc_buf_t buf; 103 | 104 | int8_t erased[512]; /* array of index of erased data/code */ 105 | int8_t source[512]; /* data/code indexes those are required to reconstruct */ 106 | int *decode_matrix; /* matrix with damaged data-row/unnecessary code-row removed */ 107 | 108 | int8_t inited_; 109 | 110 | } lrc_decoder_t; 111 | 112 | int lrc_init_n(lrc_t *lrc, int n_local, uint8_t *local_k_arr, int m); 113 | void lrc_destroy(lrc_t *lrc); 114 | int lrc_encode(lrc_t *lrc, lrc_buf_t *lb); 115 | int lrc_decode(lrc_t *lrc, lrc_buf_t *lb, int8_t *erased); 116 | int lrc_get_source(lrc_t *lrc, int8_t *erased, int8_t *source); 117 | 118 | int *lrc_make_matrix(lrc_t *lrc); 119 | int lrc_get_n_locally_erased(lrc_t *lrc, int idx_local, int8_t *erased); 120 | int lrc_count_erased(int n, int8_t *erased); 121 | 122 | void lrc_debug_buf_line_(lrc_buf_t *lb, int n); 123 | void lrc_debug_matrix_(int *matrix, int row, int col); 124 | void lrc_debug_sources_(int n, int8_t *source); 125 | 126 | int lrc_buf_init(lrc_buf_t *lb, lrc_t *lrc, int64_t chunk_size); 127 | void lrc_buf_destroy(lrc_buf_t *lb); 128 | int lrc_buf_shadow(lrc_buf_t *lb, lrc_buf_t *src); 129 | 130 | int lrc_decoder_init(lrc_decoder_t *dec, lrc_t *lrc, lrc_buf_t *lb, int8_t *erased); 131 | void lrc_decoder_destroy(lrc_decoder_t *dec); 132 | int lrc_decoder_decode(lrc_decoder_t *dec); 133 | 134 | #endif /* LRC_EC_LRC_ */ 135 | // vim:sw=2:fdl=1 136 | -------------------------------------------------------------------------------- /m4/ltsugar.m4: -------------------------------------------------------------------------------- 1 | # ltsugar.m4 -- libtool m4 base layer. -*-Autoconf-*- 2 | # 3 | # Copyright (C) 2004-2005, 2007-2008, 2011-2015 Free Software 4 | # Foundation, Inc. 5 | # Written by Gary V. Vaughan, 2004 6 | # 7 | # This file is free software; the Free Software Foundation gives 8 | # unlimited permission to copy and/or distribute it, with or without 9 | # modifications, as long as this notice is preserved. 10 | 11 | # serial 6 ltsugar.m4 12 | 13 | # This is to help aclocal find these macros, as it can't see m4_define. 14 | AC_DEFUN([LTSUGAR_VERSION], [m4_if([0.1])]) 15 | 16 | 17 | # lt_join(SEP, ARG1, [ARG2...]) 18 | # ----------------------------- 19 | # Produce ARG1SEPARG2...SEPARGn, omitting [] arguments and their 20 | # associated separator. 21 | # Needed until we can rely on m4_join from Autoconf 2.62, since all earlier 22 | # versions in m4sugar had bugs. 23 | m4_define([lt_join], 24 | [m4_if([$#], [1], [], 25 | [$#], [2], [[$2]], 26 | [m4_if([$2], [], [], [[$2]_])$0([$1], m4_shift(m4_shift($@)))])]) 27 | m4_define([_lt_join], 28 | [m4_if([$#$2], [2], [], 29 | [m4_if([$2], [], [], [[$1$2]])$0([$1], m4_shift(m4_shift($@)))])]) 30 | 31 | 32 | # lt_car(LIST) 33 | # lt_cdr(LIST) 34 | # ------------ 35 | # Manipulate m4 lists. 36 | # These macros are necessary as long as will still need to support 37 | # Autoconf-2.59, which quotes differently. 38 | m4_define([lt_car], [[$1]]) 39 | m4_define([lt_cdr], 40 | [m4_if([$#], 0, [m4_fatal([$0: cannot be called without arguments])], 41 | [$#], 1, [], 42 | [m4_dquote(m4_shift($@))])]) 43 | m4_define([lt_unquote], $1) 44 | 45 | 46 | # lt_append(MACRO-NAME, STRING, [SEPARATOR]) 47 | # ------------------------------------------ 48 | # Redefine MACRO-NAME to hold its former content plus 'SEPARATOR''STRING'. 49 | # Note that neither SEPARATOR nor STRING are expanded; they are appended 50 | # to MACRO-NAME as is (leaving the expansion for when MACRO-NAME is invoked). 51 | # No SEPARATOR is output if MACRO-NAME was previously undefined (different 52 | # than defined and empty). 53 | # 54 | # This macro is needed until we can rely on Autoconf 2.62, since earlier 55 | # versions of m4sugar mistakenly expanded SEPARATOR but not STRING. 56 | m4_define([lt_append], 57 | [m4_define([$1], 58 | m4_ifdef([$1], [m4_defn([$1])[$3]])[$2])]) 59 | 60 | 61 | 62 | # lt_combine(SEP, PREFIX-LIST, INFIX, SUFFIX1, [SUFFIX2...]) 63 | # ---------------------------------------------------------- 64 | # Produce a SEP delimited list of all paired combinations of elements of 65 | # PREFIX-LIST with SUFFIX1 through SUFFIXn. Each element of the list 66 | # has the form PREFIXmINFIXSUFFIXn. 67 | # Needed until we can rely on m4_combine added in Autoconf 2.62. 68 | m4_define([lt_combine], 69 | [m4_if(m4_eval([$# > 3]), [1], 70 | [m4_pushdef([_Lt_sep], [m4_define([_Lt_sep], m4_defn([lt_car]))])]]dnl 71 | [[m4_foreach([_Lt_prefix], [$2], 72 | [m4_foreach([_Lt_suffix], 73 | ]m4_dquote(m4_dquote(m4_shift(m4_shift(m4_shift($@)))))[, 74 | [_Lt_sep([$1])[]m4_defn([_Lt_prefix])[$3]m4_defn([_Lt_suffix])])])])]) 75 | 76 | 77 | # lt_if_append_uniq(MACRO-NAME, VARNAME, [SEPARATOR], [UNIQ], [NOT-UNIQ]) 78 | # ----------------------------------------------------------------------- 79 | # Iff MACRO-NAME does not yet contain VARNAME, then append it (delimited 80 | # by SEPARATOR if supplied) and expand UNIQ, else NOT-UNIQ. 81 | m4_define([lt_if_append_uniq], 82 | [m4_ifdef([$1], 83 | [m4_if(m4_index([$3]m4_defn([$1])[$3], [$3$2$3]), [-1], 84 | [lt_append([$1], [$2], [$3])$4], 85 | [$5])], 86 | [lt_append([$1], [$2], [$3])$4])]) 87 | 88 | 89 | # lt_dict_add(DICT, KEY, VALUE) 90 | # ----------------------------- 91 | m4_define([lt_dict_add], 92 | [m4_define([$1($2)], [$3])]) 93 | 94 | 95 | # lt_dict_add_subkey(DICT, KEY, SUBKEY, VALUE) 96 | # -------------------------------------------- 97 | m4_define([lt_dict_add_subkey], 98 | [m4_define([$1($2:$3)], [$4])]) 99 | 100 | 101 | # lt_dict_fetch(DICT, KEY, [SUBKEY]) 102 | # ---------------------------------- 103 | m4_define([lt_dict_fetch], 104 | [m4_ifval([$3], 105 | m4_ifdef([$1($2:$3)], [m4_defn([$1($2:$3)])]), 106 | m4_ifdef([$1($2)], [m4_defn([$1($2)])]))]) 107 | 108 | 109 | # lt_if_dict_fetch(DICT, KEY, [SUBKEY], VALUE, IF-TRUE, [IF-FALSE]) 110 | # ----------------------------------------------------------------- 111 | m4_define([lt_if_dict_fetch], 112 | [m4_if(lt_dict_fetch([$1], [$2], [$3]), [$4], 113 | [$5], 114 | [$6])]) 115 | 116 | 117 | # lt_dict_filter(DICT, [SUBKEY], VALUE, [SEPARATOR], KEY, [...]) 118 | # -------------------------------------------------------------- 119 | m4_define([lt_dict_filter], 120 | [m4_if([$5], [], [], 121 | [lt_join(m4_quote(m4_default([$4], [[, ]])), 122 | lt_unquote(m4_split(m4_normalize(m4_foreach(_Lt_key, lt_car([m4_shiftn(4, $@)]), 123 | [lt_if_dict_fetch([$1], _Lt_key, [$2], [$3], [_Lt_key ])])))))])[]dnl 124 | ]) 125 | -------------------------------------------------------------------------------- /include/galois.h: -------------------------------------------------------------------------------- 1 | /* * 2 | * Copyright (c) 2013, James S. Plank and Kevin Greenan 3 | * All rights reserved. 4 | * 5 | * Jerasure - A C/C++ Library for a Variety of Reed-Solomon and RAID-6 Erasure 6 | * Coding Techniques 7 | * 8 | * Revision 2.0: Galois Field backend now links to GF-Complete 9 | * 10 | * Redistribution and use in source and binary forms, with or without 11 | * modification, are permitted provided that the following conditions 12 | * are met: 13 | * 14 | * - Redistributions of source code must retain the above copyright 15 | * notice, this list of conditions and the following disclaimer. 16 | * 17 | * - Redistributions in binary form must reproduce the above copyright 18 | * notice, this list of conditions and the following disclaimer in 19 | * the documentation and/or other materials provided with the 20 | * distribution. 21 | * 22 | * - Neither the name of the University of Tennessee nor the names of its 23 | * contributors may be used to endorse or promote products derived 24 | * from this software without specific prior written permission. 25 | * 26 | * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 27 | * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 28 | * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 29 | * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 30 | * HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, 31 | * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, 32 | * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS 33 | * OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED 34 | * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 35 | * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY 36 | * WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 37 | * POSSIBILITY OF SUCH DAMAGE. 38 | */ 39 | 40 | 41 | #ifndef _GALOIS_H 42 | #define _GALOIS_H 43 | 44 | #include 45 | #include 46 | #include 47 | 48 | extern void galois_change_technique(gf_t *gf, int w); 49 | 50 | extern int galois_single_multiply(int a, int b, int w); 51 | extern int galois_single_divide(int a, int b, int w); 52 | extern int galois_inverse(int x, int w); 53 | 54 | void galois_region_xor( char *src, /* Source Region */ 55 | char *dest, /* Dest Region (holds result) */ 56 | int nbytes); /* Number of bytes in region */ 57 | 58 | /* These multiply regions in w=8, w=16 and w=32. They are much faster 59 | than calling galois_single_multiply. The regions must be long word aligned. */ 60 | 61 | void galois_w08_region_multiply(char *region, /* Region to multiply */ 62 | int multby, /* Number to multiply by */ 63 | int nbytes, /* Number of bytes in region */ 64 | char *r2, /* If r2 != NULL, products go here. 65 | Otherwise region is overwritten */ 66 | int add); /* If (r2 != NULL && add) the produce is XOR'd with r2 */ 67 | 68 | void galois_w16_region_multiply(char *region, /* Region to multiply */ 69 | int multby, /* Number to multiply by */ 70 | int nbytes, /* Number of bytes in region */ 71 | char *r2, /* If r2 != NULL, products go here. 72 | Otherwise region is overwritten */ 73 | int add); /* If (r2 != NULL && add) the produce is XOR'd with r2 */ 74 | 75 | void galois_w32_region_multiply(char *region, /* Region to multiply */ 76 | int multby, /* Number to multiply by */ 77 | int nbytes, /* Number of bytes in region */ 78 | char *r2, /* If r2 != NULL, products go here. 79 | Otherwise region is overwritten */ 80 | int add); /* If (r2 != NULL && add) the produce is XOR'd with r2 */ 81 | 82 | gf_t* galois_init_field(int w, 83 | int mult_type, 84 | int region_type, 85 | int divide_type, 86 | uint64_t prim_poly, 87 | int arg1, 88 | int arg2); 89 | 90 | gf_t* galois_init_composite_field(int w, 91 | int region_type, 92 | int divide_type, 93 | int degree, 94 | gf_t* base_gf); 95 | 96 | gf_t * galois_get_field_ptr(int w); 97 | 98 | 99 | #endif 100 | -------------------------------------------------------------------------------- /m4/lt~obsolete.m4: -------------------------------------------------------------------------------- 1 | # lt~obsolete.m4 -- aclocal satisfying obsolete definitions. -*-Autoconf-*- 2 | # 3 | # Copyright (C) 2004-2005, 2007, 2009, 2011-2015 Free Software 4 | # Foundation, Inc. 5 | # Written by Scott James Remnant, 2004. 6 | # 7 | # This file is free software; the Free Software Foundation gives 8 | # unlimited permission to copy and/or distribute it, with or without 9 | # modifications, as long as this notice is preserved. 10 | 11 | # serial 5 lt~obsolete.m4 12 | 13 | # These exist entirely to fool aclocal when bootstrapping libtool. 14 | # 15 | # In the past libtool.m4 has provided macros via AC_DEFUN (or AU_DEFUN), 16 | # which have later been changed to m4_define as they aren't part of the 17 | # exported API, or moved to Autoconf or Automake where they belong. 18 | # 19 | # The trouble is, aclocal is a bit thick. It'll see the old AC_DEFUN 20 | # in /usr/share/aclocal/libtool.m4 and remember it, then when it sees us 21 | # using a macro with the same name in our local m4/libtool.m4 it'll 22 | # pull the old libtool.m4 in (it doesn't see our shiny new m4_define 23 | # and doesn't know about Autoconf macros at all.) 24 | # 25 | # So we provide this file, which has a silly filename so it's always 26 | # included after everything else. This provides aclocal with the 27 | # AC_DEFUNs it wants, but when m4 processes it, it doesn't do anything 28 | # because those macros already exist, or will be overwritten later. 29 | # We use AC_DEFUN over AU_DEFUN for compatibility with aclocal-1.6. 30 | # 31 | # Anytime we withdraw an AC_DEFUN or AU_DEFUN, remember to add it here. 32 | # Yes, that means every name once taken will need to remain here until 33 | # we give up compatibility with versions before 1.7, at which point 34 | # we need to keep only those names which we still refer to. 35 | 36 | # This is to help aclocal find these macros, as it can't see m4_define. 37 | AC_DEFUN([LTOBSOLETE_VERSION], [m4_if([1])]) 38 | 39 | m4_ifndef([AC_LIBTOOL_LINKER_OPTION], [AC_DEFUN([AC_LIBTOOL_LINKER_OPTION])]) 40 | m4_ifndef([AC_PROG_EGREP], [AC_DEFUN([AC_PROG_EGREP])]) 41 | m4_ifndef([_LT_AC_PROG_ECHO_BACKSLASH], [AC_DEFUN([_LT_AC_PROG_ECHO_BACKSLASH])]) 42 | m4_ifndef([_LT_AC_SHELL_INIT], [AC_DEFUN([_LT_AC_SHELL_INIT])]) 43 | m4_ifndef([_LT_AC_SYS_LIBPATH_AIX], [AC_DEFUN([_LT_AC_SYS_LIBPATH_AIX])]) 44 | m4_ifndef([_LT_PROG_LTMAIN], [AC_DEFUN([_LT_PROG_LTMAIN])]) 45 | m4_ifndef([_LT_AC_TAGVAR], [AC_DEFUN([_LT_AC_TAGVAR])]) 46 | m4_ifndef([AC_LTDL_ENABLE_INSTALL], [AC_DEFUN([AC_LTDL_ENABLE_INSTALL])]) 47 | m4_ifndef([AC_LTDL_PREOPEN], [AC_DEFUN([AC_LTDL_PREOPEN])]) 48 | m4_ifndef([_LT_AC_SYS_COMPILER], [AC_DEFUN([_LT_AC_SYS_COMPILER])]) 49 | m4_ifndef([_LT_AC_LOCK], [AC_DEFUN([_LT_AC_LOCK])]) 50 | m4_ifndef([AC_LIBTOOL_SYS_OLD_ARCHIVE], [AC_DEFUN([AC_LIBTOOL_SYS_OLD_ARCHIVE])]) 51 | m4_ifndef([_LT_AC_TRY_DLOPEN_SELF], [AC_DEFUN([_LT_AC_TRY_DLOPEN_SELF])]) 52 | m4_ifndef([AC_LIBTOOL_PROG_CC_C_O], [AC_DEFUN([AC_LIBTOOL_PROG_CC_C_O])]) 53 | m4_ifndef([AC_LIBTOOL_SYS_HARD_LINK_LOCKS], [AC_DEFUN([AC_LIBTOOL_SYS_HARD_LINK_LOCKS])]) 54 | m4_ifndef([AC_LIBTOOL_OBJDIR], [AC_DEFUN([AC_LIBTOOL_OBJDIR])]) 55 | m4_ifndef([AC_LTDL_OBJDIR], [AC_DEFUN([AC_LTDL_OBJDIR])]) 56 | m4_ifndef([AC_LIBTOOL_PROG_LD_HARDCODE_LIBPATH], [AC_DEFUN([AC_LIBTOOL_PROG_LD_HARDCODE_LIBPATH])]) 57 | m4_ifndef([AC_LIBTOOL_SYS_LIB_STRIP], [AC_DEFUN([AC_LIBTOOL_SYS_LIB_STRIP])]) 58 | m4_ifndef([AC_PATH_MAGIC], [AC_DEFUN([AC_PATH_MAGIC])]) 59 | m4_ifndef([AC_PROG_LD_GNU], [AC_DEFUN([AC_PROG_LD_GNU])]) 60 | m4_ifndef([AC_PROG_LD_RELOAD_FLAG], [AC_DEFUN([AC_PROG_LD_RELOAD_FLAG])]) 61 | m4_ifndef([AC_DEPLIBS_CHECK_METHOD], [AC_DEFUN([AC_DEPLIBS_CHECK_METHOD])]) 62 | m4_ifndef([AC_LIBTOOL_PROG_COMPILER_NO_RTTI], [AC_DEFUN([AC_LIBTOOL_PROG_COMPILER_NO_RTTI])]) 63 | m4_ifndef([AC_LIBTOOL_SYS_GLOBAL_SYMBOL_PIPE], [AC_DEFUN([AC_LIBTOOL_SYS_GLOBAL_SYMBOL_PIPE])]) 64 | m4_ifndef([AC_LIBTOOL_PROG_COMPILER_PIC], [AC_DEFUN([AC_LIBTOOL_PROG_COMPILER_PIC])]) 65 | m4_ifndef([AC_LIBTOOL_PROG_LD_SHLIBS], [AC_DEFUN([AC_LIBTOOL_PROG_LD_SHLIBS])]) 66 | m4_ifndef([AC_LIBTOOL_POSTDEP_PREDEP], [AC_DEFUN([AC_LIBTOOL_POSTDEP_PREDEP])]) 67 | m4_ifndef([LT_AC_PROG_EGREP], [AC_DEFUN([LT_AC_PROG_EGREP])]) 68 | m4_ifndef([LT_AC_PROG_SED], [AC_DEFUN([LT_AC_PROG_SED])]) 69 | m4_ifndef([_LT_CC_BASENAME], [AC_DEFUN([_LT_CC_BASENAME])]) 70 | m4_ifndef([_LT_COMPILER_BOILERPLATE], [AC_DEFUN([_LT_COMPILER_BOILERPLATE])]) 71 | m4_ifndef([_LT_LINKER_BOILERPLATE], [AC_DEFUN([_LT_LINKER_BOILERPLATE])]) 72 | m4_ifndef([_AC_PROG_LIBTOOL], [AC_DEFUN([_AC_PROG_LIBTOOL])]) 73 | m4_ifndef([AC_LIBTOOL_SETUP], [AC_DEFUN([AC_LIBTOOL_SETUP])]) 74 | m4_ifndef([_LT_AC_CHECK_DLFCN], [AC_DEFUN([_LT_AC_CHECK_DLFCN])]) 75 | m4_ifndef([AC_LIBTOOL_SYS_DYNAMIC_LINKER], [AC_DEFUN([AC_LIBTOOL_SYS_DYNAMIC_LINKER])]) 76 | m4_ifndef([_LT_AC_TAGCONFIG], [AC_DEFUN([_LT_AC_TAGCONFIG])]) 77 | m4_ifndef([AC_DISABLE_FAST_INSTALL], [AC_DEFUN([AC_DISABLE_FAST_INSTALL])]) 78 | m4_ifndef([_LT_AC_LANG_CXX], [AC_DEFUN([_LT_AC_LANG_CXX])]) 79 | m4_ifndef([_LT_AC_LANG_F77], [AC_DEFUN([_LT_AC_LANG_F77])]) 80 | m4_ifndef([_LT_AC_LANG_GCJ], [AC_DEFUN([_LT_AC_LANG_GCJ])]) 81 | m4_ifndef([AC_LIBTOOL_LANG_C_CONFIG], [AC_DEFUN([AC_LIBTOOL_LANG_C_CONFIG])]) 82 | m4_ifndef([_LT_AC_LANG_C_CONFIG], [AC_DEFUN([_LT_AC_LANG_C_CONFIG])]) 83 | m4_ifndef([AC_LIBTOOL_LANG_CXX_CONFIG], [AC_DEFUN([AC_LIBTOOL_LANG_CXX_CONFIG])]) 84 | m4_ifndef([_LT_AC_LANG_CXX_CONFIG], [AC_DEFUN([_LT_AC_LANG_CXX_CONFIG])]) 85 | m4_ifndef([AC_LIBTOOL_LANG_F77_CONFIG], [AC_DEFUN([AC_LIBTOOL_LANG_F77_CONFIG])]) 86 | m4_ifndef([_LT_AC_LANG_F77_CONFIG], [AC_DEFUN([_LT_AC_LANG_F77_CONFIG])]) 87 | m4_ifndef([AC_LIBTOOL_LANG_GCJ_CONFIG], [AC_DEFUN([AC_LIBTOOL_LANG_GCJ_CONFIG])]) 88 | m4_ifndef([_LT_AC_LANG_GCJ_CONFIG], [AC_DEFUN([_LT_AC_LANG_GCJ_CONFIG])]) 89 | m4_ifndef([AC_LIBTOOL_LANG_RC_CONFIG], [AC_DEFUN([AC_LIBTOOL_LANG_RC_CONFIG])]) 90 | m4_ifndef([_LT_AC_LANG_RC_CONFIG], [AC_DEFUN([_LT_AC_LANG_RC_CONFIG])]) 91 | m4_ifndef([AC_LIBTOOL_CONFIG], [AC_DEFUN([AC_LIBTOOL_CONFIG])]) 92 | m4_ifndef([_LT_AC_FILE_LTDLL_C], [AC_DEFUN([_LT_AC_FILE_LTDLL_C])]) 93 | m4_ifndef([_LT_REQUIRED_DARWIN_CHECKS], [AC_DEFUN([_LT_REQUIRED_DARWIN_CHECKS])]) 94 | m4_ifndef([_LT_AC_PROG_CXXCPP], [AC_DEFUN([_LT_AC_PROG_CXXCPP])]) 95 | m4_ifndef([_LT_PREPARE_SED_QUOTE_VARS], [AC_DEFUN([_LT_PREPARE_SED_QUOTE_VARS])]) 96 | m4_ifndef([_LT_PROG_ECHO_BACKSLASH], [AC_DEFUN([_LT_PROG_ECHO_BACKSLASH])]) 97 | m4_ifndef([_LT_PROG_F77], [AC_DEFUN([_LT_PROG_F77])]) 98 | m4_ifndef([_LT_PROG_FC], [AC_DEFUN([_LT_PROG_FC])]) 99 | m4_ifndef([_LT_PROG_CXX], [AC_DEFUN([_LT_PROG_CXX])]) 100 | -------------------------------------------------------------------------------- /src/gf_method.c: -------------------------------------------------------------------------------- 1 | /* 2 | * GF-Complete: A Comprehensive Open Source Library for Galois Field Arithmetic 3 | * James S. Plank, Ethan L. Miller, Kevin M. Greenan, 4 | * Benjamin A. Arnold, John A. Burnum, Adam W. Disney, Allen C. McBride. 5 | * 6 | * gf_method.c 7 | * 8 | * Parses argv to figure out the mult_type and arguments. Returns the gf. 9 | */ 10 | 11 | #include 12 | #include 13 | #include 14 | #include 15 | #include 16 | 17 | #include "gf_complete.h" 18 | #include "gf_int.h" 19 | #include "gf_method.h" 20 | 21 | int create_gf_from_argv(gf_t *gf, int w, int argc, char **argv, int starting) 22 | { 23 | int mult_type, divide_type, region_type; 24 | int arg1, arg2; 25 | uint64_t prim_poly; 26 | gf_t *base; 27 | 28 | mult_type = GF_MULT_DEFAULT; 29 | region_type = GF_REGION_DEFAULT; 30 | divide_type = GF_DIVIDE_DEFAULT; 31 | prim_poly = 0; 32 | base = NULL; 33 | arg1 = 0; 34 | arg2 = 0; 35 | while (1) { 36 | if (argc > starting) { 37 | if (strcmp(argv[starting], "-m") == 0) { 38 | starting++; 39 | if (mult_type != GF_MULT_DEFAULT) { 40 | if (base != NULL) gf_free(base, 1); 41 | _gf_errno = GF_E_TWOMULT; 42 | return 0; 43 | } 44 | if (strcmp(argv[starting], "SHIFT") == 0) { 45 | mult_type = GF_MULT_SHIFT; 46 | starting++; 47 | } else if (strcmp(argv[starting], "CARRY_FREE") == 0) { 48 | mult_type = GF_MULT_CARRY_FREE; 49 | starting++; 50 | } else if (strcmp(argv[starting], "CARRY_FREE_GK") == 0) { 51 | mult_type = GF_MULT_CARRY_FREE_GK; 52 | starting++; 53 | } else if (strcmp(argv[starting], "GROUP") == 0) { 54 | mult_type = GF_MULT_GROUP; 55 | if (argc < starting + 3) { 56 | _gf_errno = GF_E_GROUPAR; 57 | return 0; 58 | } 59 | if (sscanf(argv[starting+1], "%d", &arg1) == 0 || 60 | sscanf(argv[starting+2], "%d", &arg2) == 0) { 61 | _gf_errno = GF_E_GROUPNU; 62 | return 0; 63 | } 64 | starting += 3; 65 | } else if (strcmp(argv[starting], "BYTWO_p") == 0) { 66 | mult_type = GF_MULT_BYTWO_p; 67 | starting++; 68 | } else if (strcmp(argv[starting], "BYTWO_b") == 0) { 69 | mult_type = GF_MULT_BYTWO_b; 70 | starting++; 71 | } else if (strcmp(argv[starting], "TABLE") == 0) { 72 | mult_type = GF_MULT_TABLE; 73 | starting++; 74 | } else if (strcmp(argv[starting], "LOG") == 0) { 75 | mult_type = GF_MULT_LOG_TABLE; 76 | starting++; 77 | } else if (strcmp(argv[starting], "LOG_ZERO") == 0) { 78 | mult_type = GF_MULT_LOG_ZERO; 79 | starting++; 80 | } else if (strcmp(argv[starting], "LOG_ZERO_EXT") == 0) { 81 | mult_type = GF_MULT_LOG_ZERO_EXT; 82 | starting++; 83 | } else if (strcmp(argv[starting], "SPLIT") == 0) { 84 | mult_type = GF_MULT_SPLIT_TABLE; 85 | if (argc < starting + 3) { 86 | _gf_errno = GF_E_SPLITAR; 87 | return 0; 88 | } 89 | if (sscanf(argv[starting+1], "%d", &arg1) == 0 || 90 | sscanf(argv[starting+2], "%d", &arg2) == 0) { 91 | _gf_errno = GF_E_SPLITNU; 92 | return 0; 93 | } 94 | starting += 3; 95 | } else if (strcmp(argv[starting], "COMPOSITE") == 0) { 96 | mult_type = GF_MULT_COMPOSITE; 97 | if (argc < starting + 2) { _gf_errno = GF_E_FEWARGS; return 0; } 98 | if (sscanf(argv[starting+1], "%d", &arg1) == 0) { 99 | _gf_errno = GF_E_COMP_A2; 100 | return 0; 101 | } 102 | starting += 2; 103 | base = (gf_t *) malloc(sizeof(gf_t)); 104 | starting = create_gf_from_argv(base, w/arg1, argc, argv, starting); 105 | if (starting == 0) { 106 | free(base); 107 | return 0; 108 | } 109 | } else { 110 | _gf_errno = GF_E_UNKNOWN; 111 | return 0; 112 | } 113 | } else if (strcmp(argv[starting], "-r") == 0) { 114 | starting++; 115 | if (strcmp(argv[starting], "DOUBLE") == 0) { 116 | region_type |= GF_REGION_DOUBLE_TABLE; 117 | starting++; 118 | } else if (strcmp(argv[starting], "QUAD") == 0) { 119 | region_type |= GF_REGION_QUAD_TABLE; 120 | starting++; 121 | } else if (strcmp(argv[starting], "LAZY") == 0) { 122 | region_type |= GF_REGION_LAZY; 123 | starting++; 124 | } else if (strcmp(argv[starting], "SIMD") == 0) { 125 | region_type |= GF_REGION_SIMD; 126 | starting++; 127 | } else if (strcmp(argv[starting], "NOSIMD") == 0) { 128 | region_type |= GF_REGION_NOSIMD; 129 | starting++; 130 | } else if (strcmp(argv[starting], "SSE") == 0) { 131 | region_type |= GF_REGION_SIMD; 132 | starting++; 133 | } else if (strcmp(argv[starting], "NOSSE") == 0) { 134 | region_type |= GF_REGION_NOSIMD; 135 | starting++; 136 | } else if (strcmp(argv[starting], "CAUCHY") == 0) { 137 | region_type |= GF_REGION_CAUCHY; 138 | starting++; 139 | } else if (strcmp(argv[starting], "ALTMAP") == 0) { 140 | region_type |= GF_REGION_ALTMAP; 141 | starting++; 142 | } else { 143 | if (base != NULL) gf_free(base, 1); 144 | _gf_errno = GF_E_UNK_REG; 145 | return 0; 146 | } 147 | } else if (strcmp(argv[starting], "-p") == 0) { 148 | starting++; 149 | if (sscanf(argv[starting], "%llx", (long long unsigned int *)(&prim_poly)) == 0) { 150 | if (base != NULL) gf_free(base, 1); 151 | _gf_errno = GF_E_POLYSPC; 152 | return 0; 153 | } 154 | starting++; 155 | } else if (strcmp(argv[starting], "-d") == 0) { 156 | starting++; 157 | if (divide_type != GF_DIVIDE_DEFAULT) { 158 | if (base != NULL) gf_free(base, 1); 159 | _gf_errno = GF_E_TWO_DIV; 160 | return 0; 161 | } else if (strcmp(argv[starting], "EUCLID") == 0) { 162 | divide_type = GF_DIVIDE_EUCLID; 163 | starting++; 164 | } else if (strcmp(argv[starting], "MATRIX") == 0) { 165 | divide_type = GF_DIVIDE_MATRIX; 166 | starting++; 167 | } else { 168 | _gf_errno = GF_E_UNK_DIV; 169 | return 0; 170 | } 171 | } else if (strcmp(argv[starting], "-") == 0) { 172 | /* 173 | printf("Scratch size: %d\n", gf_scratch_size(w, 174 | mult_type, region_type, divide_type, arg1, arg2)); 175 | */ 176 | if (gf_init_hard(gf, w, mult_type, region_type, divide_type, 177 | prim_poly, arg1, arg2, base, NULL) == 0) { 178 | if (base != NULL) gf_free(base, 1); 179 | return 0; 180 | } else 181 | return starting + 1; 182 | } else { 183 | if (base != NULL) gf_free(base, 1); 184 | _gf_errno = GF_E_UNKFLAG; 185 | return 0; 186 | } 187 | } else { 188 | if (base != NULL) gf_free(base, 1); 189 | _gf_errno = GF_E_FEWARGS; 190 | return 0; 191 | } 192 | } 193 | } 194 | -------------------------------------------------------------------------------- /include/gf_complete.h: -------------------------------------------------------------------------------- 1 | /* 2 | * GF-Complete: A Comprehensive Open Source Library for Galois Field Arithmetic 3 | * James S. Plank, Ethan L. Miller, Kevin M. Greenan, 4 | * Benjamin A. Arnold, John A. Burnum, Adam W. Disney, Allen C. McBride. 5 | * 6 | * gf_complete.h 7 | * 8 | * The main include file for gf_complete. 9 | */ 10 | 11 | #ifndef _GF_COMPLETE_H_ 12 | #define _GF_COMPLETE_H_ 13 | #include 14 | 15 | #ifdef INTEL_SSE4 16 | #ifdef __SSE4_2__ 17 | #include 18 | #endif 19 | #ifdef __SSE4_1__ 20 | #include 21 | #endif 22 | #endif 23 | 24 | #ifdef INTEL_SSSE3 25 | #include 26 | #endif 27 | 28 | #ifdef INTEL_SSE2 29 | #include 30 | #endif 31 | 32 | #ifdef INTEL_SSE4_PCLMUL 33 | #include 34 | #endif 35 | 36 | #if defined(ARM_NEON) 37 | #include 38 | #endif 39 | 40 | 41 | /* These are the different ways to perform multiplication. 42 | Not all are implemented for all values of w. 43 | See the paper for an explanation of how they work. */ 44 | 45 | typedef enum {GF_MULT_DEFAULT, 46 | GF_MULT_SHIFT, 47 | GF_MULT_CARRY_FREE, 48 | GF_MULT_CARRY_FREE_GK, 49 | GF_MULT_GROUP, 50 | GF_MULT_BYTWO_p, 51 | GF_MULT_BYTWO_b, 52 | GF_MULT_TABLE, 53 | GF_MULT_LOG_TABLE, 54 | GF_MULT_LOG_ZERO, 55 | GF_MULT_LOG_ZERO_EXT, 56 | GF_MULT_SPLIT_TABLE, 57 | GF_MULT_COMPOSITE } gf_mult_type_t; 58 | 59 | /* These are the different ways to optimize region 60 | operations. They are bits because you can compose them. 61 | Certain optimizations only apply to certain gf_mult_type_t's. 62 | Again, please see documentation for how to use these */ 63 | 64 | #define GF_REGION_DEFAULT (0x0) 65 | #define GF_REGION_DOUBLE_TABLE (0x1) 66 | #define GF_REGION_QUAD_TABLE (0x2) 67 | #define GF_REGION_LAZY (0x4) 68 | #define GF_REGION_SIMD (0x8) 69 | #define GF_REGION_SSE (0x8) 70 | #define GF_REGION_NOSIMD (0x10) 71 | #define GF_REGION_NOSSE (0x10) 72 | #define GF_REGION_ALTMAP (0x20) 73 | #define GF_REGION_CAUCHY (0x40) 74 | 75 | typedef uint32_t gf_region_type_t; 76 | 77 | /* These are different ways to implement division. 78 | Once again, it's best to use "DEFAULT". However, 79 | there are times when you may want to experiment 80 | with the others. */ 81 | 82 | typedef enum { GF_DIVIDE_DEFAULT, 83 | GF_DIVIDE_MATRIX, 84 | GF_DIVIDE_EUCLID } gf_division_type_t; 85 | 86 | /* We support w=4,8,16,32,64 and 128 with their own data types and 87 | operations for multiplication, division, etc. We also support 88 | a "gen" type so that you can do general gf arithmetic for any 89 | value of w from 1 to 32. You can perform a "region" operation 90 | on these if you use "CAUCHY" as the mapping. 91 | */ 92 | 93 | typedef uint32_t gf_val_32_t; 94 | typedef uint64_t gf_val_64_t; 95 | typedef uint64_t *gf_val_128_t; 96 | 97 | extern int _gf_errno; 98 | extern void gf_error(); 99 | 100 | typedef struct gf *GFP; 101 | 102 | typedef union gf_func_a_b { 103 | gf_val_32_t (*w32) (GFP gf, gf_val_32_t a, gf_val_32_t b); 104 | gf_val_64_t (*w64) (GFP gf, gf_val_64_t a, gf_val_64_t b); 105 | void (*w128)(GFP gf, gf_val_128_t a, gf_val_128_t b, gf_val_128_t c); 106 | } gf_func_a_b; 107 | 108 | typedef union { 109 | gf_val_32_t (*w32) (GFP gf, gf_val_32_t a); 110 | gf_val_64_t (*w64) (GFP gf, gf_val_64_t a); 111 | void (*w128)(GFP gf, gf_val_128_t a, gf_val_128_t b); 112 | } gf_func_a; 113 | 114 | typedef union { 115 | void (*w32) (GFP gf, void *src, void *dest, gf_val_32_t val, int bytes, int add); 116 | void (*w64) (GFP gf, void *src, void *dest, gf_val_64_t val, int bytes, int add); 117 | void (*w128)(GFP gf, void *src, void *dest, gf_val_128_t val, int bytes, int add); 118 | } gf_region; 119 | 120 | typedef union { 121 | gf_val_32_t (*w32) (GFP gf, void *start, int bytes, int index); 122 | gf_val_64_t (*w64) (GFP gf, void *start, int bytes, int index); 123 | void (*w128)(GFP gf, void *start, int bytes, int index, gf_val_128_t rv); 124 | } gf_extract; 125 | 126 | typedef struct gf { 127 | gf_func_a_b multiply; 128 | gf_func_a_b divide; 129 | gf_func_a inverse; 130 | gf_region multiply_region; 131 | gf_extract extract_word; 132 | void *scratch; 133 | } gf_t; 134 | 135 | /* Initializes the GF to defaults. Pass it a pointer to a gf_t. 136 | Returns 0 on failure, 1 on success. */ 137 | 138 | extern int gf_init_easy(GFP gf, int w); 139 | 140 | /* Initializes the GF changing the defaults. 141 | Returns 0 on failure, 1 on success. 142 | Pass it a pointer to a gf_t. 143 | For mult_type and divide_type, use one of gf_mult_type_t gf_divide_type_t . 144 | For region_type, OR together the GF_REGION_xxx's defined above. 145 | Use 0 as prim_poly for defaults. Otherwise, the leading 1 is optional. 146 | Use NULL for scratch_memory to have init_hard allocate memory. Otherwise, 147 | use gf_scratch_size() to determine how big scratch_memory has to be. 148 | */ 149 | 150 | extern int gf_init_hard(GFP gf, 151 | int w, 152 | int mult_type, 153 | int region_type, 154 | int divide_type, 155 | uint64_t prim_poly, 156 | int arg1, 157 | int arg2, 158 | GFP base_gf, 159 | void *scratch_memory); 160 | 161 | /* Determines the size for scratch_memory. 162 | Returns 0 on failure and non-zero on success. */ 163 | 164 | extern int gf_scratch_size(int w, 165 | int mult_type, 166 | int region_type, 167 | int divide_type, 168 | int arg1, 169 | int arg2); 170 | 171 | /* This reports the gf_scratch_size of a gf_t that has already been created */ 172 | 173 | extern int gf_size(GFP gf); 174 | 175 | /* Frees scratch memory if gf_init_easy/gf_init_hard called malloc. 176 | If recursive = 1, then it calls itself recursively on base_gf. */ 177 | 178 | extern int gf_free(GFP gf, int recursive); 179 | 180 | /* This is support for inline single multiplications and divisions. 181 | I know it's yucky, but if you've got to be fast, you've got to be fast. 182 | We support inlining for w=4, w=8 and w=16. 183 | 184 | To use inline multiplication and division with w=4 or 8, you should use the 185 | default gf_t, or one with a single table. Otherwise, gf_w4/8_get_mult_table() 186 | will return NULL. Similarly, with w=16, the gf_t must be LOG */ 187 | 188 | uint8_t *gf_w4_get_mult_table(GFP gf); 189 | uint8_t *gf_w4_get_div_table(GFP gf); 190 | 191 | #define GF_W4_INLINE_MULTDIV(table, a, b) (table[((a)<<4)|(b)]) 192 | 193 | uint8_t *gf_w8_get_mult_table(GFP gf); 194 | uint8_t *gf_w8_get_div_table(GFP gf); 195 | 196 | #define GF_W8_INLINE_MULTDIV(table, a, b) (table[(((uint32_t) (a))<<8)|(b)]) 197 | 198 | uint16_t *gf_w16_get_log_table(GFP gf); 199 | uint16_t *gf_w16_get_mult_alog_table(GFP gf); 200 | uint16_t *gf_w16_get_div_alog_table(GFP gf); 201 | 202 | #define GF_W16_INLINE_MULT(log, alog, a, b) ((a) == 0 || (b) == 0) ? 0 : (alog[(uint32_t)log[a]+(uint32_t)log[b]]) 203 | #define GF_W16_INLINE_DIV(log, alog, a, b) ((a) == 0 || (b) == 0) ? 0 : (alog[(int)log[a]-(int)log[b]]) 204 | #endif 205 | -------------------------------------------------------------------------------- /missing: -------------------------------------------------------------------------------- 1 | #! /bin/sh 2 | # Common wrapper for a few potentially missing GNU programs. 3 | 4 | scriptversion=2013-10-28.13; # UTC 5 | 6 | # Copyright (C) 1996-2014 Free Software Foundation, Inc. 7 | # Originally written by Fran,cois Pinard , 1996. 8 | 9 | # This program is free software; you can redistribute it and/or modify 10 | # it under the terms of the GNU General Public License as published by 11 | # the Free Software Foundation; either version 2, or (at your option) 12 | # any later version. 13 | 14 | # This program is distributed in the hope that it will be useful, 15 | # but WITHOUT ANY WARRANTY; without even the implied warranty of 16 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 17 | # GNU General Public License for more details. 18 | 19 | # You should have received a copy of the GNU General Public License 20 | # along with this program. If not, see . 21 | 22 | # As a special exception to the GNU General Public License, if you 23 | # distribute this file as part of a program that contains a 24 | # configuration script generated by Autoconf, you may include it under 25 | # the same distribution terms that you use for the rest of that program. 26 | 27 | if test $# -eq 0; then 28 | echo 1>&2 "Try '$0 --help' for more information" 29 | exit 1 30 | fi 31 | 32 | case $1 in 33 | 34 | --is-lightweight) 35 | # Used by our autoconf macros to check whether the available missing 36 | # script is modern enough. 37 | exit 0 38 | ;; 39 | 40 | --run) 41 | # Back-compat with the calling convention used by older automake. 42 | shift 43 | ;; 44 | 45 | -h|--h|--he|--hel|--help) 46 | echo "\ 47 | $0 [OPTION]... PROGRAM [ARGUMENT]... 48 | 49 | Run 'PROGRAM [ARGUMENT]...', returning a proper advice when this fails due 50 | to PROGRAM being missing or too old. 51 | 52 | Options: 53 | -h, --help display this help and exit 54 | -v, --version output version information and exit 55 | 56 | Supported PROGRAM values: 57 | aclocal autoconf autoheader autom4te automake makeinfo 58 | bison yacc flex lex help2man 59 | 60 | Version suffixes to PROGRAM as well as the prefixes 'gnu-', 'gnu', and 61 | 'g' are ignored when checking the name. 62 | 63 | Send bug reports to ." 64 | exit $? 65 | ;; 66 | 67 | -v|--v|--ve|--ver|--vers|--versi|--versio|--version) 68 | echo "missing $scriptversion (GNU Automake)" 69 | exit $? 70 | ;; 71 | 72 | -*) 73 | echo 1>&2 "$0: unknown '$1' option" 74 | echo 1>&2 "Try '$0 --help' for more information" 75 | exit 1 76 | ;; 77 | 78 | esac 79 | 80 | # Run the given program, remember its exit status. 81 | "$@"; st=$? 82 | 83 | # If it succeeded, we are done. 84 | test $st -eq 0 && exit 0 85 | 86 | # Also exit now if we it failed (or wasn't found), and '--version' was 87 | # passed; such an option is passed most likely to detect whether the 88 | # program is present and works. 89 | case $2 in --version|--help) exit $st;; esac 90 | 91 | # Exit code 63 means version mismatch. This often happens when the user 92 | # tries to use an ancient version of a tool on a file that requires a 93 | # minimum version. 94 | if test $st -eq 63; then 95 | msg="probably too old" 96 | elif test $st -eq 127; then 97 | # Program was missing. 98 | msg="missing on your system" 99 | else 100 | # Program was found and executed, but failed. Give up. 101 | exit $st 102 | fi 103 | 104 | perl_URL=http://www.perl.org/ 105 | flex_URL=http://flex.sourceforge.net/ 106 | gnu_software_URL=http://www.gnu.org/software 107 | 108 | program_details () 109 | { 110 | case $1 in 111 | aclocal|automake) 112 | echo "The '$1' program is part of the GNU Automake package:" 113 | echo "<$gnu_software_URL/automake>" 114 | echo "It also requires GNU Autoconf, GNU m4 and Perl in order to run:" 115 | echo "<$gnu_software_URL/autoconf>" 116 | echo "<$gnu_software_URL/m4/>" 117 | echo "<$perl_URL>" 118 | ;; 119 | autoconf|autom4te|autoheader) 120 | echo "The '$1' program is part of the GNU Autoconf package:" 121 | echo "<$gnu_software_URL/autoconf/>" 122 | echo "It also requires GNU m4 and Perl in order to run:" 123 | echo "<$gnu_software_URL/m4/>" 124 | echo "<$perl_URL>" 125 | ;; 126 | esac 127 | } 128 | 129 | give_advice () 130 | { 131 | # Normalize program name to check for. 132 | normalized_program=`echo "$1" | sed ' 133 | s/^gnu-//; t 134 | s/^gnu//; t 135 | s/^g//; t'` 136 | 137 | printf '%s\n' "'$1' is $msg." 138 | 139 | configure_deps="'configure.ac' or m4 files included by 'configure.ac'" 140 | case $normalized_program in 141 | autoconf*) 142 | echo "You should only need it if you modified 'configure.ac'," 143 | echo "or m4 files included by it." 144 | program_details 'autoconf' 145 | ;; 146 | autoheader*) 147 | echo "You should only need it if you modified 'acconfig.h' or" 148 | echo "$configure_deps." 149 | program_details 'autoheader' 150 | ;; 151 | automake*) 152 | echo "You should only need it if you modified 'Makefile.am' or" 153 | echo "$configure_deps." 154 | program_details 'automake' 155 | ;; 156 | aclocal*) 157 | echo "You should only need it if you modified 'acinclude.m4' or" 158 | echo "$configure_deps." 159 | program_details 'aclocal' 160 | ;; 161 | autom4te*) 162 | echo "You might have modified some maintainer files that require" 163 | echo "the 'autom4te' program to be rebuilt." 164 | program_details 'autom4te' 165 | ;; 166 | bison*|yacc*) 167 | echo "You should only need it if you modified a '.y' file." 168 | echo "You may want to install the GNU Bison package:" 169 | echo "<$gnu_software_URL/bison/>" 170 | ;; 171 | lex*|flex*) 172 | echo "You should only need it if you modified a '.l' file." 173 | echo "You may want to install the Fast Lexical Analyzer package:" 174 | echo "<$flex_URL>" 175 | ;; 176 | help2man*) 177 | echo "You should only need it if you modified a dependency" \ 178 | "of a man page." 179 | echo "You may want to install the GNU Help2man package:" 180 | echo "<$gnu_software_URL/help2man/>" 181 | ;; 182 | makeinfo*) 183 | echo "You should only need it if you modified a '.texi' file, or" 184 | echo "any other file indirectly affecting the aspect of the manual." 185 | echo "You might want to install the Texinfo package:" 186 | echo "<$gnu_software_URL/texinfo/>" 187 | echo "The spurious makeinfo call might also be the consequence of" 188 | echo "using a buggy 'make' (AIX, DU, IRIX), in which case you might" 189 | echo "want to install GNU make:" 190 | echo "<$gnu_software_URL/make/>" 191 | ;; 192 | *) 193 | echo "You might have modified some files without having the proper" 194 | echo "tools for further handling them. Check the 'README' file, it" 195 | echo "often tells you about the needed prerequisites for installing" 196 | echo "this package. You may also peek at any GNU archive site, in" 197 | echo "case some other package contains this missing '$1' program." 198 | ;; 199 | esac 200 | } 201 | 202 | give_advice "$1" | sed -e '1s/^/WARNING: /' \ 203 | -e '2,$s/^/ /' >&2 204 | 205 | # Propagate the correct exit status (expected to be 127 for a program 206 | # not found, 63 for a program that failed due to version mismatch). 207 | exit $st 208 | 209 | # Local variables: 210 | # eval: (add-hook 'write-file-hooks 'time-stamp) 211 | # time-stamp-start: "scriptversion=" 212 | # time-stamp-format: "%:y-%02m-%02d.%02H" 213 | # time-stamp-time-zone: "UTC" 214 | # time-stamp-end: "; # UTC" 215 | # End: 216 | -------------------------------------------------------------------------------- /src/neon/gf_w4_neon.c: -------------------------------------------------------------------------------- 1 | /* 2 | * GF-Complete: A Comprehensive Open Source Library for Galois Field Arithmetic 3 | * James S. Plank, Ethan L. Miller, Kevin M. Greenan, 4 | * Benjamin A. Arnold, John A. Burnum, Adam W. Disney, Allen C. McBride. 5 | * 6 | * Copyright (c) 2014: Janne Grunau 7 | * 8 | * Redistribution and use in source and binary forms, with or without 9 | * modification, are permitted provided that the following conditions 10 | * are met: 11 | * 12 | * - Redistributions of source code must retain the above copyright 13 | * notice, this list of conditions and the following disclaimer. 14 | * 15 | * - Redistributions in binary form must reproduce the above copyright 16 | * notice, this list of conditions and the following disclaimer in 17 | * the documentation and/or other materials provided with the 18 | * distribution. 19 | * 20 | * - Neither the name of the University of Tennessee nor the names of its 21 | * contributors may be used to endorse or promote products derived 22 | * from this software without specific prior written permission. 23 | * 24 | * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 25 | * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 26 | * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 27 | * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 28 | * HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, 29 | * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, 30 | * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS 31 | * OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED 32 | * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 33 | * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY 34 | * WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 35 | * POSSIBILITY OF SUCH DAMAGE. 36 | * 37 | * gf_w4_neon.c 38 | * 39 | * Neon routines for 4-bit Galois fields 40 | * 41 | */ 42 | 43 | #include "gf_int.h" 44 | #include 45 | #include 46 | #include "gf_w4.h" 47 | 48 | static 49 | gf_val_32_t 50 | gf_w4_neon_clm_multiply (gf_t *gf, gf_val_32_t a4, gf_val_32_t b4) 51 | { 52 | gf_val_32_t rv = 0; 53 | poly8x8_t result, prim_poly; 54 | poly8x8_t a, b, w; 55 | uint8x8_t v; 56 | gf_internal_t * h = gf->scratch; 57 | 58 | a = vdup_n_p8 (a4); 59 | b = vdup_n_p8 (b4); 60 | 61 | prim_poly = vdup_n_p8 ((uint32_t)(h->prim_poly & 0x1fULL)); 62 | 63 | /* Do the initial multiply */ 64 | result = vmul_p8 (a, b); 65 | v = vshr_n_u8 (vreinterpret_u8_p8(result), 4); 66 | w = vmul_p8 (prim_poly, vreinterpret_p8_u8(v)); 67 | result = vreinterpret_p8_u8 (veor_u8 (vreinterpret_u8_p8(result), vreinterpret_u8_p8(w))); 68 | 69 | /* Extracts 32 bit value from result. */ 70 | rv = (gf_val_32_t)vget_lane_u8 (vreinterpret_u8_p8 (result), 0); 71 | 72 | return rv; 73 | } 74 | 75 | static inline void 76 | neon_clm_multiply_region_from_single (gf_t *gf, uint8_t *s8, uint8_t *d8, 77 | gf_val_32_t val, uint8_t *d_end, int xor) 78 | { 79 | gf_internal_t * h = gf->scratch; 80 | poly8x8_t prim_poly; 81 | poly8x8_t a, w, even, odd; 82 | uint8x8_t b, c, v, mask; 83 | 84 | a = vdup_n_p8 (val); 85 | mask = vdup_n_u8 (0xf); 86 | prim_poly = vdup_n_p8 ((uint8_t)(h->prim_poly & 0x1fULL)); 87 | 88 | while (d8 < d_end) { 89 | b = vld1_u8 (s8); 90 | 91 | even = vreinterpret_p8_u8 (vand_u8 (b, mask)); 92 | odd = vreinterpret_p8_u8 (vshr_n_u8 (b, 4)); 93 | 94 | if (xor) 95 | c = vld1_u8 (d8); 96 | 97 | even = vmul_p8 (a, even); 98 | odd = vmul_p8 (a, odd); 99 | 100 | v = vshr_n_u8 (vreinterpret_u8_p8(even), 4); 101 | w = vmul_p8 (prim_poly, vreinterpret_p8_u8(v)); 102 | even = vreinterpret_p8_u8 (veor_u8 (vreinterpret_u8_p8(even), vreinterpret_u8_p8(w))); 103 | 104 | v = vshr_n_u8 (vreinterpret_u8_p8(odd), 4); 105 | w = vmul_p8 (prim_poly, vreinterpret_p8_u8(v)); 106 | odd = vreinterpret_p8_u8 (veor_u8 (vreinterpret_u8_p8(odd), vreinterpret_u8_p8(w))); 107 | 108 | v = veor_u8 (vreinterpret_u8_p8 (even), vshl_n_u8 (vreinterpret_u8_p8 (odd), 4)); 109 | 110 | if (xor) 111 | v = veor_u8 (c, v); 112 | 113 | vst1_u8 (d8, v); 114 | 115 | d8 += 8; 116 | s8 += 8; 117 | } 118 | } 119 | 120 | 121 | static void 122 | gf_w4_neon_clm_multiply_region_from_single (gf_t *gf, void *src, void *dest, 123 | gf_val_32_t val, int bytes, int xor) 124 | { 125 | gf_region_data rd; 126 | uint8_t *s8; 127 | uint8_t *d8; 128 | 129 | if (val == 0) { gf_multby_zero(dest, bytes, xor); return; } 130 | if (val == 1) { gf_multby_one(src, dest, bytes, xor); return; } 131 | 132 | gf_set_region_data(&rd, gf, src, dest, bytes, val, xor, 16); 133 | gf_do_initial_region_alignment(&rd); 134 | 135 | s8 = (uint8_t *) rd.s_start; 136 | d8 = (uint8_t *) rd.d_start; 137 | 138 | if (xor) 139 | neon_clm_multiply_region_from_single (gf, s8, d8, val, rd.d_top, 1); 140 | else 141 | neon_clm_multiply_region_from_single (gf, s8, d8, val, rd.d_top, 0); 142 | 143 | gf_do_final_region_alignment(&rd); 144 | } 145 | 146 | #ifndef ARCH_AARCH64 147 | #define vqtbl1q_u8(tbl, v) vcombine_u8(vtbl2_u8(tbl, vget_low_u8(v)), \ 148 | vtbl2_u8(tbl, vget_high_u8(v))) 149 | #endif 150 | 151 | static 152 | inline 153 | void 154 | w4_single_table_multiply_region_neon(gf_t *gf, uint8_t *src, uint8_t *dst, 155 | uint8_t * d_end, gf_val_32_t val, int xor) 156 | { 157 | struct gf_single_table_data *std; 158 | uint8_t *base; 159 | uint8x16_t r, va, vh, vl, loset; 160 | 161 | #ifdef ARCH_AARCH64 162 | uint8x16_t th, tl; 163 | #else 164 | uint8x8x2_t th, tl; 165 | #endif 166 | 167 | std = (struct gf_single_table_data *) ((gf_internal_t *) (gf->scratch))->private; 168 | base = (uint8_t *) std->mult; 169 | base += (val << GF_FIELD_WIDTH); 170 | 171 | #ifdef ARCH_AARCH64 172 | tl = vld1q_u8 (base); 173 | th = vshlq_n_u8 (tl, 4); 174 | #else 175 | tl.val[0] = vld1_u8 (base); 176 | tl.val[1] = vld1_u8 (base + 8); 177 | th.val[0] = vshl_n_u8 (tl.val[0], 4); 178 | th.val[1] = vshl_n_u8 (tl.val[1], 4); 179 | #endif 180 | 181 | loset = vdupq_n_u8(0xf); 182 | 183 | while (dst < d_end) { 184 | va = vld1q_u8 (src); 185 | 186 | vh = vshrq_n_u8 (va, 4); 187 | vl = vandq_u8 (va, loset); 188 | 189 | if (xor) 190 | va = vld1q_u8 (dst); 191 | 192 | vh = vqtbl1q_u8 (th, vh); 193 | vl = vqtbl1q_u8 (tl, vl); 194 | 195 | r = veorq_u8 (vh, vl); 196 | 197 | if (xor) 198 | r = veorq_u8 (va, r); 199 | 200 | vst1q_u8 (dst, r); 201 | 202 | dst += 16; 203 | src += 16; 204 | } 205 | } 206 | 207 | static 208 | void 209 | gf_w4_single_table_multiply_region_neon(gf_t *gf, void *src, void *dest, 210 | gf_val_32_t val, int bytes, int xor) 211 | { 212 | gf_region_data rd; 213 | uint8_t *sptr, *dptr, *top; 214 | 215 | if (val == 0) { gf_multby_zero(dest, bytes, xor); return; } 216 | if (val == 1) { gf_multby_one(src, dest, bytes, xor); return; } 217 | 218 | gf_set_region_data(&rd, gf, src, dest, bytes, val, xor, 16); 219 | gf_do_initial_region_alignment(&rd); 220 | 221 | sptr = rd.s_start; 222 | dptr = rd.d_start; 223 | top = rd.d_top; 224 | 225 | if (xor) 226 | w4_single_table_multiply_region_neon(gf, sptr, dptr, top, val, 1); 227 | else 228 | w4_single_table_multiply_region_neon(gf, sptr, dptr, top, val, 0); 229 | 230 | gf_do_final_region_alignment(&rd); 231 | 232 | } 233 | 234 | 235 | int gf_w4_neon_cfm_init(gf_t *gf) 236 | { 237 | // single clm multiplication probably pointless 238 | gf->multiply.w32 = gf_w4_neon_clm_multiply; 239 | gf->multiply_region.w32 = gf_w4_neon_clm_multiply_region_from_single; 240 | 241 | return 1; 242 | } 243 | 244 | void gf_w4_neon_single_table_init(gf_t *gf) 245 | { 246 | gf->multiply_region.w32 = gf_w4_single_table_multiply_region_neon; 247 | } 248 | -------------------------------------------------------------------------------- /src/liberation.c: -------------------------------------------------------------------------------- 1 | /* * 2 | * Copyright (c) 2014, James S. Plank and Kevin Greenan 3 | * All rights reserved. 4 | * 5 | * Jerasure - A C/C++ Library for a Variety of Reed-Solomon and RAID-6 Erasure 6 | * Coding Techniques 7 | * 8 | * Revision 2.0: Galois Field backend now links to GF-Complete 9 | * 10 | * Redistribution and use in source and binary forms, with or without 11 | * modification, are permitted provided that the following conditions 12 | * are met: 13 | * 14 | * - Redistributions of source code must retain the above copyright 15 | * notice, this list of conditions and the following disclaimer. 16 | * 17 | * - Redistributions in binary form must reproduce the above copyright 18 | * notice, this list of conditions and the following disclaimer in 19 | * the documentation and/or other materials provided with the 20 | * distribution. 21 | * 22 | * - Neither the name of the University of Tennessee nor the names of its 23 | * contributors may be used to endorse or promote products derived 24 | * from this software without specific prior written permission. 25 | * 26 | * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 27 | * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 28 | * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 29 | * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 30 | * HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, 31 | * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, 32 | * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS 33 | * OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED 34 | * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 35 | * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY 36 | * WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 37 | * POSSIBILITY OF SUCH DAMAGE. 38 | */ 39 | 40 | /* Jerasure's authors: 41 | 42 | Revision 2.x - 2014: James S. Plank and Kevin M. Greenan 43 | Revision 1.2 - 2008: James S. Plank, Scott Simmerman and Catherine D. Schuman. 44 | Revision 1.0 - 2007: James S. Plank 45 | */ 46 | 47 | #include 48 | #include 49 | #include 50 | 51 | #include "galois.h" 52 | #include "jerasure.h" 53 | #include "liberation.h" 54 | 55 | #define talloc(type, num) (type *) malloc(sizeof(type)*(num)) 56 | 57 | int *liberation_coding_bitmatrix(int k, int w) 58 | { 59 | int *matrix, i, j, index; 60 | 61 | if (k > w) return NULL; 62 | matrix = talloc(int, 2*k*w*w); 63 | if (matrix == NULL) return NULL; 64 | bzero(matrix, sizeof(int)*2*k*w*w); 65 | 66 | /* Set up identity matrices */ 67 | 68 | for(i = 0; i < w; i++) { 69 | index = i*k*w+i; 70 | for (j = 0; j < k; j++) { 71 | matrix[index] = 1; 72 | index += w; 73 | } 74 | } 75 | 76 | /* Set up liberation matrices */ 77 | 78 | for (j = 0; j < k; j++) { 79 | index = k*w*w+j*w; 80 | for (i = 0; i < w; i++) { 81 | matrix[index+(j+i)%w] = 1; 82 | index += (k*w); 83 | } 84 | if (j > 0) { 85 | i = (j*((w-1)/2))%w; 86 | matrix[k*w*w+j*w+i*k*w+(i+j-1)%w] = 1; 87 | } 88 | } 89 | return matrix; 90 | } 91 | 92 | 93 | int *liber8tion_coding_bitmatrix(int k) 94 | { 95 | int *matrix, i, j, index; 96 | int w; 97 | 98 | w = 8; 99 | if (k > w) return NULL; 100 | matrix = talloc(int, 2*k*w*w); 101 | if (matrix == NULL) return NULL; 102 | bzero(matrix, sizeof(int)*2*k*w*w); 103 | 104 | /* Set up identity matrices */ 105 | 106 | for(i = 0; i < w; i++) { 107 | index = i*k*w+i; 108 | for (j = 0; j < k; j++) { 109 | matrix[index] = 1; 110 | index += w; 111 | } 112 | } 113 | 114 | /* Set up liber8tion matrices */ 115 | 116 | index = k*w*w; 117 | 118 | if (k == 0) return matrix; 119 | matrix[index+0*k*w+0*w+0] = 1; 120 | matrix[index+1*k*w+0*w+1] = 1; 121 | matrix[index+2*k*w+0*w+2] = 1; 122 | matrix[index+3*k*w+0*w+3] = 1; 123 | matrix[index+4*k*w+0*w+4] = 1; 124 | matrix[index+5*k*w+0*w+5] = 1; 125 | matrix[index+6*k*w+0*w+6] = 1; 126 | matrix[index+7*k*w+0*w+7] = 1; 127 | 128 | if (k == 1) return matrix; 129 | matrix[index+0*k*w+1*w+7] = 1; 130 | matrix[index+1*k*w+1*w+3] = 1; 131 | matrix[index+2*k*w+1*w+0] = 1; 132 | matrix[index+3*k*w+1*w+2] = 1; 133 | matrix[index+4*k*w+1*w+6] = 1; 134 | matrix[index+5*k*w+1*w+1] = 1; 135 | matrix[index+6*k*w+1*w+5] = 1; 136 | matrix[index+7*k*w+1*w+4] = 1; 137 | matrix[index+4*k*w+1*w+7] = 1; 138 | 139 | if (k == 2) return matrix; 140 | matrix[index+0*k*w+2*w+6] = 1; 141 | matrix[index+1*k*w+2*w+2] = 1; 142 | matrix[index+2*k*w+2*w+4] = 1; 143 | matrix[index+3*k*w+2*w+0] = 1; 144 | matrix[index+4*k*w+2*w+7] = 1; 145 | matrix[index+5*k*w+2*w+3] = 1; 146 | matrix[index+6*k*w+2*w+1] = 1; 147 | matrix[index+7*k*w+2*w+5] = 1; 148 | matrix[index+1*k*w+2*w+3] = 1; 149 | 150 | if (k == 3) return matrix; 151 | matrix[index+0*k*w+3*w+2] = 1; 152 | matrix[index+1*k*w+3*w+5] = 1; 153 | matrix[index+2*k*w+3*w+7] = 1; 154 | matrix[index+3*k*w+3*w+6] = 1; 155 | matrix[index+4*k*w+3*w+0] = 1; 156 | matrix[index+5*k*w+3*w+3] = 1; 157 | matrix[index+6*k*w+3*w+4] = 1; 158 | matrix[index+7*k*w+3*w+1] = 1; 159 | matrix[index+5*k*w+3*w+4] = 1; 160 | 161 | if (k == 4) return matrix; 162 | matrix[index+0*k*w+4*w+5] = 1; 163 | matrix[index+1*k*w+4*w+6] = 1; 164 | matrix[index+2*k*w+4*w+1] = 1; 165 | matrix[index+3*k*w+4*w+7] = 1; 166 | matrix[index+4*k*w+4*w+2] = 1; 167 | matrix[index+5*k*w+4*w+4] = 1; 168 | matrix[index+6*k*w+4*w+3] = 1; 169 | matrix[index+7*k*w+4*w+0] = 1; 170 | matrix[index+2*k*w+4*w+0] = 1; 171 | 172 | if (k == 5) return matrix; 173 | matrix[index+0*k*w+5*w+1] = 1; 174 | matrix[index+1*k*w+5*w+2] = 1; 175 | matrix[index+2*k*w+5*w+3] = 1; 176 | matrix[index+3*k*w+5*w+4] = 1; 177 | matrix[index+4*k*w+5*w+5] = 1; 178 | matrix[index+5*k*w+5*w+6] = 1; 179 | matrix[index+6*k*w+5*w+7] = 1; 180 | matrix[index+7*k*w+5*w+0] = 1; 181 | matrix[index+7*k*w+5*w+2] = 1; 182 | 183 | if (k == 6) return matrix; 184 | matrix[index+0*k*w+6*w+3] = 1; 185 | matrix[index+1*k*w+6*w+0] = 1; 186 | matrix[index+2*k*w+6*w+6] = 1; 187 | matrix[index+3*k*w+6*w+5] = 1; 188 | matrix[index+4*k*w+6*w+1] = 1; 189 | matrix[index+5*k*w+6*w+7] = 1; 190 | matrix[index+6*k*w+6*w+4] = 1; 191 | matrix[index+7*k*w+6*w+2] = 1; 192 | matrix[index+6*k*w+6*w+5] = 1; 193 | 194 | if (k == 7) return matrix; 195 | matrix[index+0*k*w+7*w+4] = 1; 196 | matrix[index+1*k*w+7*w+7] = 1; 197 | matrix[index+2*k*w+7*w+1] = 1; 198 | matrix[index+3*k*w+7*w+5] = 1; 199 | matrix[index+4*k*w+7*w+3] = 1; 200 | matrix[index+5*k*w+7*w+2] = 1; 201 | matrix[index+6*k*w+7*w+0] = 1; 202 | matrix[index+7*k*w+7*w+6] = 1; 203 | matrix[index+3*k*w+7*w+1] = 1; 204 | 205 | return matrix; 206 | } 207 | 208 | int *blaum_roth_coding_bitmatrix(int k, int w) 209 | { 210 | int *matrix, i, j, index, l, m, p; 211 | 212 | if (k > w) return NULL ; 213 | 214 | matrix = talloc(int, 2*k*w*w); 215 | if (matrix == NULL) return NULL; 216 | bzero(matrix, sizeof(int)*2*k*w*w); 217 | 218 | /* Set up identity matrices */ 219 | 220 | for(i = 0; i < w; i++) { 221 | index = i*k*w+i; 222 | for (j = 0; j < k; j++) { 223 | matrix[index] = 1; 224 | index += w; 225 | } 226 | } 227 | 228 | /* Set up blaum_roth matrices -- Ignore identity */ 229 | 230 | p = w+1; 231 | for (j = 0; j < k; j++) { 232 | index = k*w*w+j*w; 233 | if (j == 0) { 234 | for (l = 0; l < w; l++) { 235 | matrix[index+l] = 1; 236 | index += k*w; 237 | } 238 | } else { 239 | i = j; 240 | for (l = 1; l <= w; l++) { 241 | if (l != p-i) { 242 | m = l+i; 243 | if (m >= p) m -= p; 244 | m--; 245 | matrix[index+m] = 1; 246 | } else { 247 | matrix[index+i-1] = 1; 248 | if (i%2 == 0) { 249 | m = i/2; 250 | } else { 251 | m = (p/2) + 1 + (i/2); 252 | } 253 | m--; 254 | matrix[index+m] = 1; 255 | } 256 | index += k*w; 257 | } 258 | } 259 | } 260 | 261 | return matrix; 262 | } 263 | -------------------------------------------------------------------------------- /compile: -------------------------------------------------------------------------------- 1 | #! /bin/sh 2 | # Wrapper for compilers which do not understand '-c -o'. 3 | 4 | scriptversion=2012-10-14.11; # UTC 5 | 6 | # Copyright (C) 1999-2014 Free Software Foundation, Inc. 7 | # Written by Tom Tromey . 8 | # 9 | # This program is free software; you can redistribute it and/or modify 10 | # it under the terms of the GNU General Public License as published by 11 | # the Free Software Foundation; either version 2, or (at your option) 12 | # any later version. 13 | # 14 | # This program is distributed in the hope that it will be useful, 15 | # but WITHOUT ANY WARRANTY; without even the implied warranty of 16 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 17 | # GNU General Public License for more details. 18 | # 19 | # You should have received a copy of the GNU General Public License 20 | # along with this program. If not, see . 21 | 22 | # As a special exception to the GNU General Public License, if you 23 | # distribute this file as part of a program that contains a 24 | # configuration script generated by Autoconf, you may include it under 25 | # the same distribution terms that you use for the rest of that program. 26 | 27 | # This file is maintained in Automake, please report 28 | # bugs to or send patches to 29 | # . 30 | 31 | nl=' 32 | ' 33 | 34 | # We need space, tab and new line, in precisely that order. Quoting is 35 | # there to prevent tools from complaining about whitespace usage. 36 | IFS=" "" $nl" 37 | 38 | file_conv= 39 | 40 | # func_file_conv build_file lazy 41 | # Convert a $build file to $host form and store it in $file 42 | # Currently only supports Windows hosts. If the determined conversion 43 | # type is listed in (the comma separated) LAZY, no conversion will 44 | # take place. 45 | func_file_conv () 46 | { 47 | file=$1 48 | case $file in 49 | / | /[!/]*) # absolute file, and not a UNC file 50 | if test -z "$file_conv"; then 51 | # lazily determine how to convert abs files 52 | case `uname -s` in 53 | MINGW*) 54 | file_conv=mingw 55 | ;; 56 | CYGWIN*) 57 | file_conv=cygwin 58 | ;; 59 | *) 60 | file_conv=wine 61 | ;; 62 | esac 63 | fi 64 | case $file_conv/,$2, in 65 | *,$file_conv,*) 66 | ;; 67 | mingw/*) 68 | file=`cmd //C echo "$file " | sed -e 's/"\(.*\) " *$/\1/'` 69 | ;; 70 | cygwin/*) 71 | file=`cygpath -m "$file" || echo "$file"` 72 | ;; 73 | wine/*) 74 | file=`winepath -w "$file" || echo "$file"` 75 | ;; 76 | esac 77 | ;; 78 | esac 79 | } 80 | 81 | # func_cl_dashL linkdir 82 | # Make cl look for libraries in LINKDIR 83 | func_cl_dashL () 84 | { 85 | func_file_conv "$1" 86 | if test -z "$lib_path"; then 87 | lib_path=$file 88 | else 89 | lib_path="$lib_path;$file" 90 | fi 91 | linker_opts="$linker_opts -LIBPATH:$file" 92 | } 93 | 94 | # func_cl_dashl library 95 | # Do a library search-path lookup for cl 96 | func_cl_dashl () 97 | { 98 | lib=$1 99 | found=no 100 | save_IFS=$IFS 101 | IFS=';' 102 | for dir in $lib_path $LIB 103 | do 104 | IFS=$save_IFS 105 | if $shared && test -f "$dir/$lib.dll.lib"; then 106 | found=yes 107 | lib=$dir/$lib.dll.lib 108 | break 109 | fi 110 | if test -f "$dir/$lib.lib"; then 111 | found=yes 112 | lib=$dir/$lib.lib 113 | break 114 | fi 115 | if test -f "$dir/lib$lib.a"; then 116 | found=yes 117 | lib=$dir/lib$lib.a 118 | break 119 | fi 120 | done 121 | IFS=$save_IFS 122 | 123 | if test "$found" != yes; then 124 | lib=$lib.lib 125 | fi 126 | } 127 | 128 | # func_cl_wrapper cl arg... 129 | # Adjust compile command to suit cl 130 | func_cl_wrapper () 131 | { 132 | # Assume a capable shell 133 | lib_path= 134 | shared=: 135 | linker_opts= 136 | for arg 137 | do 138 | if test -n "$eat"; then 139 | eat= 140 | else 141 | case $1 in 142 | -o) 143 | # configure might choose to run compile as 'compile cc -o foo foo.c'. 144 | eat=1 145 | case $2 in 146 | *.o | *.[oO][bB][jJ]) 147 | func_file_conv "$2" 148 | set x "$@" -Fo"$file" 149 | shift 150 | ;; 151 | *) 152 | func_file_conv "$2" 153 | set x "$@" -Fe"$file" 154 | shift 155 | ;; 156 | esac 157 | ;; 158 | -I) 159 | eat=1 160 | func_file_conv "$2" mingw 161 | set x "$@" -I"$file" 162 | shift 163 | ;; 164 | -I*) 165 | func_file_conv "${1#-I}" mingw 166 | set x "$@" -I"$file" 167 | shift 168 | ;; 169 | -l) 170 | eat=1 171 | func_cl_dashl "$2" 172 | set x "$@" "$lib" 173 | shift 174 | ;; 175 | -l*) 176 | func_cl_dashl "${1#-l}" 177 | set x "$@" "$lib" 178 | shift 179 | ;; 180 | -L) 181 | eat=1 182 | func_cl_dashL "$2" 183 | ;; 184 | -L*) 185 | func_cl_dashL "${1#-L}" 186 | ;; 187 | -static) 188 | shared=false 189 | ;; 190 | -Wl,*) 191 | arg=${1#-Wl,} 192 | save_ifs="$IFS"; IFS=',' 193 | for flag in $arg; do 194 | IFS="$save_ifs" 195 | linker_opts="$linker_opts $flag" 196 | done 197 | IFS="$save_ifs" 198 | ;; 199 | -Xlinker) 200 | eat=1 201 | linker_opts="$linker_opts $2" 202 | ;; 203 | -*) 204 | set x "$@" "$1" 205 | shift 206 | ;; 207 | *.cc | *.CC | *.cxx | *.CXX | *.[cC]++) 208 | func_file_conv "$1" 209 | set x "$@" -Tp"$file" 210 | shift 211 | ;; 212 | *.c | *.cpp | *.CPP | *.lib | *.LIB | *.Lib | *.OBJ | *.obj | *.[oO]) 213 | func_file_conv "$1" mingw 214 | set x "$@" "$file" 215 | shift 216 | ;; 217 | *) 218 | set x "$@" "$1" 219 | shift 220 | ;; 221 | esac 222 | fi 223 | shift 224 | done 225 | if test -n "$linker_opts"; then 226 | linker_opts="-link$linker_opts" 227 | fi 228 | exec "$@" $linker_opts 229 | exit 1 230 | } 231 | 232 | eat= 233 | 234 | case $1 in 235 | '') 236 | echo "$0: No command. Try '$0 --help' for more information." 1>&2 237 | exit 1; 238 | ;; 239 | -h | --h*) 240 | cat <<\EOF 241 | Usage: compile [--help] [--version] PROGRAM [ARGS] 242 | 243 | Wrapper for compilers which do not understand '-c -o'. 244 | Remove '-o dest.o' from ARGS, run PROGRAM with the remaining 245 | arguments, and rename the output as expected. 246 | 247 | If you are trying to build a whole package this is not the 248 | right script to run: please start by reading the file 'INSTALL'. 249 | 250 | Report bugs to . 251 | EOF 252 | exit $? 253 | ;; 254 | -v | --v*) 255 | echo "compile $scriptversion" 256 | exit $? 257 | ;; 258 | cl | *[/\\]cl | cl.exe | *[/\\]cl.exe ) 259 | func_cl_wrapper "$@" # Doesn't return... 260 | ;; 261 | esac 262 | 263 | ofile= 264 | cfile= 265 | 266 | for arg 267 | do 268 | if test -n "$eat"; then 269 | eat= 270 | else 271 | case $1 in 272 | -o) 273 | # configure might choose to run compile as 'compile cc -o foo foo.c'. 274 | # So we strip '-o arg' only if arg is an object. 275 | eat=1 276 | case $2 in 277 | *.o | *.obj) 278 | ofile=$2 279 | ;; 280 | *) 281 | set x "$@" -o "$2" 282 | shift 283 | ;; 284 | esac 285 | ;; 286 | *.c) 287 | cfile=$1 288 | set x "$@" "$1" 289 | shift 290 | ;; 291 | *) 292 | set x "$@" "$1" 293 | shift 294 | ;; 295 | esac 296 | fi 297 | shift 298 | done 299 | 300 | if test -z "$ofile" || test -z "$cfile"; then 301 | # If no '-o' option was seen then we might have been invoked from a 302 | # pattern rule where we don't need one. That is ok -- this is a 303 | # normal compilation that the losing compiler can handle. If no 304 | # '.c' file was seen then we are probably linking. That is also 305 | # ok. 306 | exec "$@" 307 | fi 308 | 309 | # Name of file we expect compiler to create. 310 | cofile=`echo "$cfile" | sed 's|^.*[\\/]||; s|^[a-zA-Z]:||; s/\.c$/.o/'` 311 | 312 | # Create the lock directory. 313 | # Note: use '[/\\:.-]' here to ensure that we don't use the same name 314 | # that we are using for the .o file. Also, base the name on the expected 315 | # object file name, since that is what matters with a parallel build. 316 | lockdir=`echo "$cofile" | sed -e 's|[/\\:.-]|_|g'`.d 317 | while true; do 318 | if mkdir "$lockdir" >/dev/null 2>&1; then 319 | break 320 | fi 321 | sleep 1 322 | done 323 | # FIXME: race condition here if user kills between mkdir and trap. 324 | trap "rmdir '$lockdir'; exit 1" 1 2 15 325 | 326 | # Run the compile. 327 | "$@" 328 | ret=$? 329 | 330 | if test -f "$cofile"; then 331 | test "$cofile" = "$ofile" || mv "$cofile" "$ofile" 332 | elif test -f "${cofile}bj"; then 333 | test "${cofile}bj" = "$ofile" || mv "${cofile}bj" "$ofile" 334 | fi 335 | 336 | rmdir "$lockdir" 337 | exit $ret 338 | 339 | # Local Variables: 340 | # mode: shell-script 341 | # sh-indentation: 2 342 | # eval: (add-hook 'write-file-hooks 'time-stamp) 343 | # time-stamp-start: "scriptversion=" 344 | # time-stamp-format: "%:y-%02m-%02d.%02H" 345 | # time-stamp-time-zone: "UTC" 346 | # time-stamp-end: "; # UTC" 347 | # End: 348 | -------------------------------------------------------------------------------- /src/reed_sol.c: -------------------------------------------------------------------------------- 1 | /* * 2 | * Copyright (c) 2014, James S. Plank and Kevin Greenan 3 | * All rights reserved. 4 | * 5 | * Jerasure - A C/C++ Library for a Variety of Reed-Solomon and RAID-6 Erasure 6 | * Coding Techniques 7 | * 8 | * Revision 2.0: Galois Field backend now links to GF-Complete 9 | * 10 | * Redistribution and use in source and binary forms, with or without 11 | * modification, are permitted provided that the following conditions 12 | * are met: 13 | * 14 | * - Redistributions of source code must retain the above copyright 15 | * notice, this list of conditions and the following disclaimer. 16 | * 17 | * - Redistributions in binary form must reproduce the above copyright 18 | * notice, this list of conditions and the following disclaimer in 19 | * the documentation and/or other materials provided with the 20 | * distribution. 21 | * 22 | * - Neither the name of the University of Tennessee nor the names of its 23 | * contributors may be used to endorse or promote products derived 24 | * from this software without specific prior written permission. 25 | * 26 | * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 27 | * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 28 | * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 29 | * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 30 | * HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, 31 | * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, 32 | * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS 33 | * OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED 34 | * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 35 | * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY 36 | * WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 37 | * POSSIBILITY OF SUCH DAMAGE. 38 | */ 39 | 40 | /* Jerasure's authors: 41 | 42 | Revision 2.x - 2014: James S. Plank and Kevin M. Greenan 43 | Revision 1.2 - 2008: James S. Plank, Scott Simmerman and Catherine D. Schuman. 44 | Revision 1.0 - 2007: James S. Plank 45 | */ 46 | 47 | #include 48 | #include 49 | #include 50 | 51 | #include 52 | #include "galois.h" 53 | #include "jerasure.h" 54 | #include "reed_sol.h" 55 | 56 | #define talloc(type, num) (type *) malloc(sizeof(type)*(num)) 57 | 58 | int *reed_sol_r6_coding_matrix(int k, int w) 59 | { 60 | int *matrix; 61 | int i, tmp; 62 | 63 | if (w != 8 && w != 16 && w != 32) return NULL; 64 | 65 | matrix = talloc(int, 2*k); 66 | if (matrix == NULL) return NULL; 67 | 68 | for (i = 0; i < k; i++) matrix[i] = 1; 69 | matrix[k] = 1; 70 | tmp = 1; 71 | for (i = 1; i < k; i++) { 72 | tmp = galois_single_multiply(tmp, 2, w); 73 | matrix[k+i] = tmp; 74 | } 75 | return matrix; 76 | } 77 | 78 | int *reed_sol_vandermonde_coding_matrix(int k, int m, int w) 79 | { 80 | int i, j; 81 | int *vdm, *dist; 82 | 83 | vdm = reed_sol_big_vandermonde_distribution_matrix(k+m, k, w); 84 | if (vdm == NULL) return NULL; 85 | dist = talloc(int, m*k); 86 | if (dist == NULL) { 87 | free(vdm); 88 | return NULL; 89 | } 90 | 91 | i = k*k; 92 | for (j = 0; j < m*k; j++) { 93 | dist[j] = vdm[i]; 94 | i++; 95 | } 96 | free(vdm); 97 | return dist; 98 | } 99 | 100 | static int prim08 = -1; 101 | static gf_t GF08; 102 | 103 | void reed_sol_galois_w08_region_multby_2(char *region, int nbytes) 104 | { 105 | if (prim08 == -1) { 106 | prim08 = galois_single_multiply((1 << 7), 2, 8); 107 | if (!gf_init_hard(&GF08, 8, GF_MULT_BYTWO_b, GF_REGION_DEFAULT, GF_DIVIDE_DEFAULT, 108 | prim08, 0, 0, NULL, NULL)) { 109 | fprintf(stderr, "Error: Can't initialize the GF for reed_sol_galois_w08_region_multby_2\n"); 110 | exit(1); 111 | } 112 | } 113 | GF08.multiply_region.w32(&GF08, region, region, 2, nbytes, 0); 114 | } 115 | 116 | static int prim16 = -1; 117 | static gf_t GF16; 118 | 119 | void reed_sol_galois_w16_region_multby_2(char *region, int nbytes) 120 | { 121 | if (prim16 == -1) { 122 | prim16 = galois_single_multiply((1 << 15), 2, 16); 123 | if (!gf_init_hard(&GF16, 16, GF_MULT_BYTWO_b, GF_REGION_DEFAULT, GF_DIVIDE_DEFAULT, 124 | prim16, 0, 0, NULL, NULL)) { 125 | fprintf(stderr, "Error: Can't initialize the GF for reed_sol_galois_w16_region_multby_2\n"); 126 | exit(1); 127 | } 128 | } 129 | GF16.multiply_region.w32(&GF16, region, region, 2, nbytes, 0); 130 | } 131 | 132 | static int prim32 = -1; 133 | static gf_t GF32; 134 | 135 | void reed_sol_galois_w32_region_multby_2(char *region, int nbytes) 136 | { 137 | if (prim32 == -1) { 138 | prim32 = galois_single_multiply((1 << 31), 2, 32); 139 | if (!gf_init_hard(&GF32, 32, GF_MULT_BYTWO_b, GF_REGION_DEFAULT, GF_DIVIDE_DEFAULT, 140 | prim32, 0, 0, NULL, NULL)) { 141 | fprintf(stderr, "Error: Can't initialize the GF for reed_sol_galois_w32_region_multby_2\n"); 142 | exit(1); 143 | } 144 | } 145 | GF32.multiply_region.w32(&GF32, region, region, 2, nbytes, 0); 146 | } 147 | 148 | int reed_sol_r6_encode(int k, int w, char **data_ptrs, char **coding_ptrs, int size) 149 | { 150 | int i; 151 | 152 | /* First, put the XOR into coding region 0 */ 153 | 154 | memcpy(coding_ptrs[0], data_ptrs[0], size); 155 | 156 | for (i = 1; i < k; i++) galois_region_xor(data_ptrs[i], coding_ptrs[0], size); 157 | 158 | /* Next, put the sum of (2^j)*Dj into coding region 1 */ 159 | 160 | memcpy(coding_ptrs[1], data_ptrs[k-1], size); 161 | 162 | for (i = k-2; i >= 0; i--) { 163 | switch (w) { 164 | case 8: reed_sol_galois_w08_region_multby_2(coding_ptrs[1], size); break; 165 | case 16: reed_sol_galois_w16_region_multby_2(coding_ptrs[1], size); break; 166 | case 32: reed_sol_galois_w32_region_multby_2(coding_ptrs[1], size); break; 167 | default: return 0; 168 | } 169 | 170 | galois_region_xor(data_ptrs[i], coding_ptrs[1], size); 171 | } 172 | return 1; 173 | } 174 | 175 | int *reed_sol_extended_vandermonde_matrix(int rows, int cols, int w) 176 | { 177 | int *vdm; 178 | int i, j, k; 179 | 180 | if (w < 30 && (1 << w) < rows) return NULL; 181 | if (w < 30 && (1 << w) < cols) return NULL; 182 | 183 | vdm = talloc(int, rows*cols); 184 | if (vdm == NULL) { return NULL; } 185 | 186 | vdm[0] = 1; 187 | for (j = 1; j < cols; j++) vdm[j] = 0; 188 | if (rows == 1) return vdm; 189 | 190 | i=(rows-1)*cols; 191 | for (j = 0; j < cols-1; j++) vdm[i+j] = 0; 192 | vdm[i+j] = 1; 193 | if (rows == 2) return vdm; 194 | 195 | for (i = 1; i < rows-1; i++) { 196 | k = 1; 197 | for (j = 0; j < cols; j++) { 198 | vdm[i*cols+j] = k; 199 | k = galois_single_multiply(k, i, w); 200 | } 201 | } 202 | return vdm; 203 | } 204 | 205 | int *reed_sol_big_vandermonde_distribution_matrix(int rows, int cols, int w) 206 | { 207 | int *dist; 208 | int i, j, k; 209 | int sindex, srindex, siindex, tmp; 210 | 211 | if (cols >= rows) return NULL; 212 | 213 | dist = reed_sol_extended_vandermonde_matrix(rows, cols, w); 214 | if (dist == NULL) return NULL; 215 | 216 | sindex = 0; 217 | for (i = 1; i < cols; i++) { 218 | sindex += cols; 219 | 220 | /* Find an appropriate row -- where i,i != 0 */ 221 | srindex = sindex+i; 222 | for (j = i; j < rows && dist[srindex] == 0; j++) srindex += cols; 223 | if (j >= rows) { /* This should never happen if rows/w are correct */ 224 | fprintf(stderr, "reed_sol_big_vandermonde_distribution_matrix(%d,%d,%d) - couldn't make matrix\n", 225 | rows, cols, w); 226 | exit(1); 227 | } 228 | 229 | /* If necessary, swap rows */ 230 | if (j != i) { 231 | srindex -= i; 232 | for (k = 0; k < cols; k++) { 233 | tmp = dist[srindex+k]; 234 | dist[srindex+k] = dist[sindex+k]; 235 | dist[sindex+k] = tmp; 236 | } 237 | } 238 | 239 | /* If Element i,i is not equal to 1, multiply the column by 1/i */ 240 | 241 | if (dist[sindex+i] != 1) { 242 | tmp = galois_single_divide(1, dist[sindex+i], w); 243 | srindex = i; 244 | for (j = 0; j < rows; j++) { 245 | dist[srindex] = galois_single_multiply(tmp, dist[srindex], w); 246 | srindex += cols; 247 | } 248 | } 249 | 250 | /* Now, for each element in row i that is not in column 1, you need 251 | to make it zero. Suppose that this is column j, and the element 252 | at i,j = e. Then you want to replace all of column j with 253 | (col-j + col-i*e). Note, that in row i, col-i = 1 and col-j = e. 254 | So (e + 1e) = 0, which is indeed what we want. */ 255 | 256 | for (j = 0; j < cols; j++) { 257 | tmp = dist[sindex+j]; 258 | if (j != i && tmp != 0) { 259 | srindex = j; 260 | siindex = i; 261 | for (k = 0; k < rows; k++) { 262 | dist[srindex] = dist[srindex] ^ galois_single_multiply(tmp, dist[siindex], w); 263 | srindex += cols; 264 | siindex += cols; 265 | } 266 | } 267 | } 268 | } 269 | /* We desire to have row k be all ones. To do that, multiply 270 | the entire column j by 1/dist[k,j]. Then row j by 1/dist[j,j]. */ 271 | 272 | sindex = cols*cols; 273 | for (j = 0; j < cols; j++) { 274 | tmp = dist[sindex]; 275 | if (tmp != 1) { 276 | tmp = galois_single_divide(1, tmp, w); 277 | srindex = sindex; 278 | for (i = cols; i < rows; i++) { 279 | dist[srindex] = galois_single_multiply(tmp, dist[srindex], w); 280 | srindex += cols; 281 | } 282 | } 283 | sindex++; 284 | } 285 | 286 | /* Finally, we'd like the first column of each row to be all ones. To 287 | do that, we multiply the row by the inverse of the first element. */ 288 | 289 | sindex = cols*(cols+1); 290 | for (i = cols+1; i < rows; i++) { 291 | tmp = dist[sindex]; 292 | if (tmp != 1) { 293 | tmp = galois_single_divide(1, tmp, w); 294 | for (j = 0; j < cols; j++) dist[sindex+j] = galois_single_multiply(dist[sindex+j], tmp, w); 295 | } 296 | sindex += cols; 297 | } 298 | 299 | return dist; 300 | } 301 | 302 | -------------------------------------------------------------------------------- /src/neon/gf_w32_neon.c: -------------------------------------------------------------------------------- 1 | /* 2 | * GF-Complete: A Comprehensive Open Source Library for Galois Field Arithmetic 3 | * James S. Plank, Ethan L. Miller, Kevin M. Greenan, 4 | * Benjamin A. Arnold, John A. Burnum, Adam W. Disney, Allen C. McBride. 5 | * 6 | * Copyright (c) 2014: Janne Grunau 7 | * 8 | * Redistribution and use in source and binary forms, with or without 9 | * modification, are permitted provided that the following conditions 10 | * are met: 11 | * 12 | * - Redistributions of source code must retain the above copyright 13 | * notice, this list of conditions and the following disclaimer. 14 | * 15 | * - Redistributions in binary form must reproduce the above copyright 16 | * notice, this list of conditions and the following disclaimer in 17 | * the documentation and/or other materials provided with the 18 | * distribution. 19 | * 20 | * - Neither the name of the University of Tennessee nor the names of its 21 | * contributors may be used to endorse or promote products derived 22 | * from this software without specific prior written permission. 23 | * 24 | * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 25 | * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 26 | * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 27 | * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 28 | * HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, 29 | * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, 30 | * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS 31 | * OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED 32 | * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 33 | * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY 34 | * WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 35 | * POSSIBILITY OF SUCH DAMAGE. 36 | * 37 | * gf_w32_neon.c 38 | * 39 | * Neon routines for 32-bit Galois fields 40 | * 41 | */ 42 | 43 | 44 | #include "gf_int.h" 45 | #include 46 | #include 47 | #include "gf_w32.h" 48 | 49 | #ifndef ARCH_AARCH64 50 | #define vqtbl1q_u8(tbl, v) vcombine_u8(vtbl2_u8(tbl, vget_low_u8(v)), \ 51 | vtbl2_u8(tbl, vget_high_u8(v))) 52 | #endif 53 | 54 | static 55 | void 56 | neon_w32_split_4_32_multiply_region(gf_t *gf, uint32_t *src, uint32_t *dst, 57 | uint32_t *d_end, uint8_t btable[8][4][16], 58 | uint32_t val, int xor, int altmap) 59 | { 60 | int i, j; 61 | #ifdef ARCH_AARCH64 62 | uint8x16_t tables[8][4]; 63 | #else 64 | uint8x8x2_t tables[8][4]; 65 | #endif 66 | uint32x4_t v0, v1, v2, v3, s0, s1, s2, s3; 67 | uint8x16_t p0, p1, p2, p3, si, mask1; 68 | uint16x8x2_t r0, r1; 69 | uint8x16x2_t q0, q1; 70 | 71 | for (i = 0; i < 8; i++) { 72 | for (j = 0; j < 4; j++) { 73 | #ifdef ARCH_AARCH64 74 | tables[i][j] = vld1q_u8(btable[i][j]); 75 | #else 76 | tables[i][j].val[0] = vld1_u8(btable[i][j]); 77 | tables[i][j].val[1] = vld1_u8(btable[i][j] + 8); 78 | #endif 79 | } 80 | } 81 | 82 | mask1 = vdupq_n_u8(0xf); 83 | 84 | while (dst < d_end) { 85 | 86 | v0 = vld1q_u32(src); src += 4; 87 | v1 = vld1q_u32(src); src += 4; 88 | v2 = vld1q_u32(src); src += 4; 89 | v3 = vld1q_u32(src); src += 4; 90 | 91 | if (altmap) { 92 | q0.val[0] = vreinterpretq_u8_u32(v0); 93 | q0.val[1] = vreinterpretq_u8_u32(v1); 94 | q1.val[0] = vreinterpretq_u8_u32(v2); 95 | q1.val[1] = vreinterpretq_u8_u32(v3); 96 | } else { 97 | r0 = vtrnq_u16(vreinterpretq_u16_u32(v0), vreinterpretq_u16_u32(v2)); 98 | r1 = vtrnq_u16(vreinterpretq_u16_u32(v1), vreinterpretq_u16_u32(v3)); 99 | 100 | q0 = vtrnq_u8(vreinterpretq_u8_u16(r0.val[0]), 101 | vreinterpretq_u8_u16(r1.val[0])); 102 | q1 = vtrnq_u8(vreinterpretq_u8_u16(r0.val[1]), 103 | vreinterpretq_u8_u16(r1.val[1])); 104 | } 105 | 106 | si = vandq_u8(q0.val[0], mask1); 107 | p0 = vqtbl1q_u8(tables[0][0], si); 108 | p1 = vqtbl1q_u8(tables[0][1], si); 109 | p2 = vqtbl1q_u8(tables[0][2], si); 110 | p3 = vqtbl1q_u8(tables[0][3], si); 111 | 112 | si = vshrq_n_u8(q0.val[0], 4); 113 | p0 = veorq_u8(p0, vqtbl1q_u8(tables[1][0], si)); 114 | p1 = veorq_u8(p1, vqtbl1q_u8(tables[1][1], si)); 115 | p2 = veorq_u8(p2, vqtbl1q_u8(tables[1][2], si)); 116 | p3 = veorq_u8(p3, vqtbl1q_u8(tables[1][3], si)); 117 | 118 | si = vandq_u8(q0.val[1], mask1); 119 | p0 = veorq_u8(p0, vqtbl1q_u8(tables[2][0], si)); 120 | p1 = veorq_u8(p1, vqtbl1q_u8(tables[2][1], si)); 121 | p2 = veorq_u8(p2, vqtbl1q_u8(tables[2][2], si)); 122 | p3 = veorq_u8(p3, vqtbl1q_u8(tables[2][3], si)); 123 | 124 | si = vshrq_n_u8(q0.val[1], 4); 125 | p0 = veorq_u8(p0, vqtbl1q_u8(tables[3][0], si)); 126 | p1 = veorq_u8(p1, vqtbl1q_u8(tables[3][1], si)); 127 | p2 = veorq_u8(p2, vqtbl1q_u8(tables[3][2], si)); 128 | p3 = veorq_u8(p3, vqtbl1q_u8(tables[3][3], si)); 129 | 130 | si = vandq_u8(q1.val[0], mask1); 131 | p0 = veorq_u8(p0, vqtbl1q_u8(tables[4][0], si)); 132 | p1 = veorq_u8(p1, vqtbl1q_u8(tables[4][1], si)); 133 | p2 = veorq_u8(p2, vqtbl1q_u8(tables[4][2], si)); 134 | p3 = veorq_u8(p3, vqtbl1q_u8(tables[4][3], si)); 135 | 136 | si = vshrq_n_u8(q1.val[0], 4); 137 | p0 = veorq_u8(p0, vqtbl1q_u8(tables[5][0], si)); 138 | p1 = veorq_u8(p1, vqtbl1q_u8(tables[5][1], si)); 139 | p2 = veorq_u8(p2, vqtbl1q_u8(tables[5][2], si)); 140 | p3 = veorq_u8(p3, vqtbl1q_u8(tables[5][3], si)); 141 | 142 | si = vandq_u8(q1.val[1], mask1); 143 | p0 = veorq_u8(p0, vqtbl1q_u8(tables[6][0], si)); 144 | p1 = veorq_u8(p1, vqtbl1q_u8(tables[6][1], si)); 145 | p2 = veorq_u8(p2, vqtbl1q_u8(tables[6][2], si)); 146 | p3 = veorq_u8(p3, vqtbl1q_u8(tables[6][3], si)); 147 | 148 | si = vshrq_n_u8(q1.val[1], 4); 149 | p0 = veorq_u8(p0, vqtbl1q_u8(tables[7][0], si)); 150 | p1 = veorq_u8(p1, vqtbl1q_u8(tables[7][1], si)); 151 | p2 = veorq_u8(p2, vqtbl1q_u8(tables[7][2], si)); 152 | p3 = veorq_u8(p3, vqtbl1q_u8(tables[7][3], si)); 153 | 154 | if (altmap) { 155 | s0 = vreinterpretq_u32_u8(p0); 156 | s1 = vreinterpretq_u32_u8(p1); 157 | s2 = vreinterpretq_u32_u8(p2); 158 | s3 = vreinterpretq_u32_u8(p3); 159 | } else { 160 | q0 = vtrnq_u8(p0, p1); 161 | q1 = vtrnq_u8(p2, p3); 162 | 163 | r0 = vtrnq_u16(vreinterpretq_u16_u8(q0.val[0]), 164 | vreinterpretq_u16_u8(q1.val[0])); 165 | r1 = vtrnq_u16(vreinterpretq_u16_u8(q0.val[1]), 166 | vreinterpretq_u16_u8(q1.val[1])); 167 | 168 | s0 = vreinterpretq_u32_u16(r0.val[0]); 169 | s1 = vreinterpretq_u32_u16(r1.val[0]); 170 | s2 = vreinterpretq_u32_u16(r0.val[1]); 171 | s3 = vreinterpretq_u32_u16(r1.val[1]); 172 | } 173 | 174 | if (xor) { 175 | v0 = vld1q_u32(dst); 176 | v1 = vld1q_u32(dst + 4); 177 | v2 = vld1q_u32(dst + 8); 178 | v3 = vld1q_u32(dst + 12); 179 | s0 = veorq_u32(s0, v0); 180 | s1 = veorq_u32(s1, v1); 181 | s2 = veorq_u32(s2, v2); 182 | s3 = veorq_u32(s3, v3); 183 | } 184 | 185 | vst1q_u32(dst, s0); 186 | vst1q_u32(dst + 4, s1); 187 | vst1q_u32(dst + 8, s2); 188 | vst1q_u32(dst + 12, s3); 189 | 190 | dst += 16; 191 | } 192 | } 193 | 194 | static 195 | inline 196 | void 197 | neon_w32_split_4_32_lazy_multiply_region(gf_t *gf, void *src, void *dest, uint32_t val, int bytes, int xor, int altmap) 198 | { 199 | gf_internal_t *h; 200 | int i, j, k; 201 | uint32_t pp, v, *s32, *d32, *top, tmp_table[16]; 202 | uint8_t btable[8][4][16]; 203 | gf_region_data rd; 204 | 205 | if (val == 0) { gf_multby_zero(dest, bytes, xor); return; } 206 | if (val == 1) { gf_multby_one(src, dest, bytes, xor); return; } 207 | 208 | h = (gf_internal_t *) gf->scratch; 209 | pp = h->prim_poly; 210 | 211 | gf_set_region_data(&rd, gf, src, dest, bytes, val, xor, 64); 212 | gf_do_initial_region_alignment(&rd); 213 | 214 | s32 = (uint32_t *) rd.s_start; 215 | d32 = (uint32_t *) rd.d_start; 216 | top = (uint32_t *) rd.d_top; 217 | 218 | v = val; 219 | for (i = 0; i < 8; i++) { 220 | tmp_table[0] = 0; 221 | for (j = 1; j < 16; j <<= 1) { 222 | for (k = 0; k < j; k++) { 223 | tmp_table[k^j] = (v ^ tmp_table[k]); 224 | } 225 | v = (v & GF_FIRST_BIT) ? ((v << 1) ^ pp) : (v << 1); 226 | } 227 | for (j = 0; j < 4; j++) { 228 | for (k = 0; k < 16; k++) { 229 | btable[i][j][k] = (uint8_t) tmp_table[k]; 230 | tmp_table[k] >>= 8; 231 | } 232 | } 233 | } 234 | 235 | if (xor) 236 | neon_w32_split_4_32_multiply_region(gf, s32, d32, top, btable, val, 1, altmap); 237 | else 238 | neon_w32_split_4_32_multiply_region(gf, s32, d32, top, btable, val, 0, altmap); 239 | 240 | gf_do_final_region_alignment(&rd); 241 | } 242 | 243 | static 244 | void 245 | gf_w32_split_4_32_lazy_multiply_region_neon(gf_t *gf, void *src, void *dest, 246 | gf_val_32_t val, int bytes, int xor) 247 | { 248 | neon_w32_split_4_32_lazy_multiply_region(gf, src, dest, val, bytes, xor, 0); 249 | } 250 | 251 | static 252 | void 253 | gf_w32_split_4_32_lazy_altmap_multiply_region_neon(gf_t *gf, void *src, 254 | void *dest, gf_val_32_t val, 255 | int bytes, int xor) 256 | { 257 | neon_w32_split_4_32_lazy_multiply_region(gf, src, dest, val, bytes, xor, 1); 258 | } 259 | 260 | void gf_w32_neon_split_init(gf_t *gf) 261 | { 262 | gf_internal_t *h = (gf_internal_t *) gf->scratch; 263 | 264 | if (h->region_type & GF_REGION_ALTMAP) 265 | gf->multiply_region.w32 = gf_w32_split_4_32_lazy_altmap_multiply_region_neon; 266 | else 267 | gf->multiply_region.w32 = gf_w32_split_4_32_lazy_multiply_region_neon; 268 | 269 | } 270 | -------------------------------------------------------------------------------- /include/gf_int.h: -------------------------------------------------------------------------------- 1 | /* 2 | * GF-Complete: A Comprehensive Open Source Library for Galois Field Arithmetic 3 | * James S. Plank, Ethan L. Miller, Kevin M. Greenan, 4 | * Benjamin A. Arnold, John A. Burnum, Adam W. Disney, Allen C. McBride. 5 | * 6 | * gf_int.h 7 | * 8 | * Internal code for Galois field routines. This is not meant for 9 | * users to include, but for the internal GF files to use. 10 | */ 11 | 12 | #pragma once 13 | 14 | #include "gf_complete.h" 15 | 16 | #include 17 | 18 | extern void timer_start (double *t); 19 | extern double timer_split (const double *t); 20 | extern void galois_fill_random (void *buf, int len, unsigned int seed); 21 | 22 | typedef struct { 23 | int mult_type; 24 | int region_type; 25 | int divide_type; 26 | int w; 27 | uint64_t prim_poly; 28 | int free_me; 29 | int arg1; 30 | int arg2; 31 | gf_t *base_gf; 32 | void *private; 33 | } gf_internal_t; 34 | 35 | extern int gf_w4_init (gf_t *gf); 36 | extern int gf_w4_scratch_size(int mult_type, int region_type, int divide_type, int arg1, int arg2); 37 | 38 | extern int gf_w8_init (gf_t *gf); 39 | extern int gf_w8_scratch_size(int mult_type, int region_type, int divide_type, int arg1, int arg2); 40 | 41 | extern int gf_w16_init (gf_t *gf); 42 | extern int gf_w16_scratch_size(int mult_type, int region_type, int divide_type, int arg1, int arg2); 43 | 44 | extern int gf_w32_init (gf_t *gf); 45 | extern int gf_w32_scratch_size(int mult_type, int region_type, int divide_type, int arg1, int arg2); 46 | 47 | extern int gf_w64_init (gf_t *gf); 48 | extern int gf_w64_scratch_size(int mult_type, int region_type, int divide_type, int arg1, int arg2); 49 | 50 | extern int gf_w128_init (gf_t *gf); 51 | extern int gf_w128_scratch_size(int mult_type, int region_type, int divide_type, int arg1, int arg2); 52 | 53 | extern int gf_wgen_init (gf_t *gf); 54 | extern int gf_wgen_scratch_size(int w, int mult_type, int region_type, int divide_type, int arg1, int arg2); 55 | 56 | void gf_wgen_cauchy_region(gf_t *gf, void *src, void *dest, gf_val_32_t val, int bytes, int xor); 57 | gf_val_32_t gf_wgen_extract_word(gf_t *gf, void *start, int bytes, int index); 58 | 59 | extern void gf_alignment_error(char *s, int a); 60 | 61 | extern uint32_t gf_bitmatrix_inverse(uint32_t y, int w, uint32_t pp); 62 | 63 | /* This returns the correct default for prim_poly when base is used as the base 64 | field for COMPOSITE. It returns 0 if we don't have a default prim_poly. */ 65 | 66 | extern uint64_t gf_composite_get_default_poly(gf_t *base); 67 | 68 | /* This structure lets you define a region multiply. It helps because you can handle 69 | unaligned portions of the data with the procedures below, which really cleans 70 | up the code. */ 71 | 72 | typedef struct { 73 | gf_t *gf; 74 | void *src; 75 | void *dest; 76 | int bytes; 77 | uint64_t val; 78 | int xor; 79 | int align; /* The number of bytes to which to align. */ 80 | void *s_start; /* The start and the top of the aligned region. */ 81 | void *d_start; 82 | void *s_top; 83 | void *d_top; 84 | } gf_region_data; 85 | 86 | /* This lets you set up one of these in one call. It also sets the start/top pointers. */ 87 | 88 | void gf_set_region_data(gf_region_data *rd, 89 | gf_t *gf, 90 | void *src, 91 | void *dest, 92 | int bytes, 93 | uint64_t val, 94 | int xor, 95 | int align); 96 | 97 | /* This performs gf->multiply.32() on all of the unaligned bytes in the beginning of the region */ 98 | 99 | extern void gf_do_initial_region_alignment(gf_region_data *rd); 100 | 101 | /* This performs gf->multiply.32() on all of the unaligned bytes in the end of the region */ 102 | 103 | extern void gf_do_final_region_alignment(gf_region_data *rd); 104 | 105 | extern void gf_two_byte_region_table_multiply(gf_region_data *rd, uint16_t *base); 106 | 107 | extern void gf_multby_zero(void *dest, int bytes, int xor); 108 | extern void gf_multby_one(void *src, void *dest, int bytes, int xor); 109 | 110 | typedef enum {GF_E_MDEFDIV, /* Dev != Default && Mult == Default */ 111 | GF_E_MDEFREG, /* Reg != Default && Mult == Default */ 112 | GF_E_MDEFARG, /* Args != Default && Mult == Default */ 113 | GF_E_DIVCOMP, /* Mult == Composite && Div != Default */ 114 | GF_E_CAUCOMP, /* Mult == Composite && Reg == CAUCHY */ 115 | GF_E_DOUQUAD, /* Reg == DOUBLE && Reg == QUAD */ 116 | GF_E_SIMD_NO, /* Reg == SIMD && Reg == NOSIMD */ 117 | GF_E_CAUCHYB, /* Reg == CAUCHY && Other Reg */ 118 | GF_E_CAUGT32, /* Reg == CAUCHY && w > 32*/ 119 | GF_E_ARG1SET, /* Arg1 != 0 && Mult \notin COMPOSITE/SPLIT/GROUP */ 120 | GF_E_ARG2SET, /* Arg2 != 0 && Mult \notin SPLIT/GROUP */ 121 | GF_E_MATRIXW, /* Div == MATRIX && w > 32 */ 122 | GF_E_BAD___W, /* Illegal w */ 123 | GF_E_DOUBLET, /* Reg == DOUBLE && Mult != TABLE */ 124 | GF_E_DOUBLEW, /* Reg == DOUBLE && w \notin {4,8} */ 125 | GF_E_DOUBLEJ, /* Reg == DOUBLE && other Reg */ 126 | GF_E_DOUBLEL, /* Reg == DOUBLE & LAZY but w = 4 */ 127 | GF_E_QUAD__T, /* Reg == QUAD && Mult != TABLE */ 128 | GF_E_QUAD__W, /* Reg == QUAD && w != 4 */ 129 | GF_E_QUAD__J, /* Reg == QUAD && other Reg */ 130 | GF_E_LAZY__X, /* Reg == LAZY && not DOUBLE or QUAD*/ 131 | GF_E_ALTSHIF, /* Mult == Shift && Reg == ALTMAP */ 132 | GF_E_SSESHIF, /* Mult == Shift && Reg == SIMD|NOSIMD */ 133 | GF_E_ALT_CFM, /* Mult == CARRY_FREE && Reg == ALTMAP */ 134 | GF_E_SSE_CFM, /* Mult == CARRY_FREE && Reg == SIMD|NOSIMD */ 135 | GF_E_PCLMULX, /* Mult == Carry_Free && No PCLMUL */ 136 | GF_E_ALT_BY2, /* Mult == Bytwo_x && Reg == ALTMAP */ 137 | GF_E_BY2_SSE, /* Mult == Bytwo_x && Reg == SSE && No SSE2 */ 138 | GF_E_LOGBADW, /* Mult == LOGx, w too big*/ 139 | GF_E_LOG___J, /* Mult == LOGx, && Reg == SSE|ALTMAP|NOSSE */ 140 | GF_E_ZERBADW, /* Mult == LOG_ZERO, w \notin {8,16} */ 141 | GF_E_ZEXBADW, /* Mult == LOG_ZERO_EXT, w != 8 */ 142 | GF_E_LOGPOLY, /* Mult == LOG & poly not primitive */ 143 | GF_E_GR_ARGX, /* Mult == GROUP, Bad arg1/2 */ 144 | GF_E_GR_W_48, /* Mult == GROUP, w \in { 4, 8 } */ 145 | GF_E_GR_W_16, /* Mult == GROUP, w == 16, arg1 != 4 || arg2 != 4 */ 146 | GF_E_GR_128A, /* Mult == GROUP, w == 128, bad args */ 147 | GF_E_GR_A_27, /* Mult == GROUP, either arg > 27 */ 148 | GF_E_GR_AR_W, /* Mult == GROUP, either arg > w */ 149 | GF_E_GR____J, /* Mult == GROUP, Reg == SSE|ALTMAP|NOSSE */ 150 | GF_E_TABLE_W, /* Mult == TABLE, w too big */ 151 | GF_E_TAB_SSE, /* Mult == TABLE, SIMD|NOSIMD only apply to w == 4 */ 152 | GF_E_TABSSE3, /* Mult == TABLE, Need SSSE3 for SSE */ 153 | GF_E_TAB_ALT, /* Mult == TABLE, Reg == ALTMAP */ 154 | GF_E_SP128AR, /* Mult == SPLIT, w=128, Bad arg1/arg2 */ 155 | GF_E_SP128AL, /* Mult == SPLIT, w=128, SSE requires ALTMAP */ 156 | GF_E_SP128AS, /* Mult == SPLIT, w=128, ALTMAP requires SSE */ 157 | GF_E_SP128_A, /* Mult == SPLIT, w=128, ALTMAP only with 4/128 */ 158 | GF_E_SP128_S, /* Mult == SPLIT, w=128, SSE only with 4/128 */ 159 | GF_E_SPLIT_W, /* Mult == SPLIT, Bad w (8, 16, 32, 64, 128) */ 160 | GF_E_SP_16AR, /* Mult == SPLIT, w=16, Bad arg1/arg2 */ 161 | GF_E_SP_16_A, /* Mult == SPLIT, w=16, ALTMAP only with 4/16 */ 162 | GF_E_SP_16_S, /* Mult == SPLIT, w=16, SSE only with 4/16 */ 163 | GF_E_SP_32AR, /* Mult == SPLIT, w=32, Bad arg1/arg2 */ 164 | GF_E_SP_32AS, /* Mult == SPLIT, w=32, ALTMAP requires SSE */ 165 | GF_E_SP_32_A, /* Mult == SPLIT, w=32, ALTMAP only with 4/32 */ 166 | GF_E_SP_32_S, /* Mult == SPLIT, w=32, SSE only with 4/32 */ 167 | GF_E_SP_64AR, /* Mult == SPLIT, w=64, Bad arg1/arg2 */ 168 | GF_E_SP_64AS, /* Mult == SPLIT, w=64, ALTMAP requires SSE */ 169 | GF_E_SP_64_A, /* Mult == SPLIT, w=64, ALTMAP only with 4/64 */ 170 | GF_E_SP_64_S, /* Mult == SPLIT, w=64, SSE only with 4/64 */ 171 | GF_E_SP_8_AR, /* Mult == SPLIT, w=8, Bad arg1/arg2 */ 172 | GF_E_SP_8__A, /* Mult == SPLIT, w=8, no ALTMAP */ 173 | GF_E_SP_SSE3, /* Mult == SPLIT, Need SSSE3 for SSE */ 174 | GF_E_COMP_A2, /* Mult == COMP, arg1 must be = 2 */ 175 | GF_E_COMP_SS, /* Mult == COMP, SIMD|NOSIMD */ 176 | GF_E_COMP__W, /* Mult == COMP, Bad w. */ 177 | GF_E_UNKFLAG, /* Unknown flag in create_from.... */ 178 | GF_E_UNKNOWN, /* Unknown mult_type. */ 179 | GF_E_UNK_REG, /* Unknown region_type. */ 180 | GF_E_UNK_DIV, /* Unknown divide_type. */ 181 | GF_E_CFM___W, /* Mult == CFM, Bad w. */ 182 | GF_E_CFM4POL, /* Mult == CFM & Prim Poly has high bits set. */ 183 | GF_E_CFM8POL, /* Mult == CFM & Prim Poly has high bits set. */ 184 | GF_E_CF16POL, /* Mult == CFM & Prim Poly has high bits set. */ 185 | GF_E_CF32POL, /* Mult == CFM & Prim Poly has high bits set. */ 186 | GF_E_CF64POL, /* Mult == CFM & Prim Poly has high bits set. */ 187 | GF_E_FEWARGS, /* Too few args in argc/argv. */ 188 | GF_E_BADPOLY, /* Bad primitive polynomial -- too many bits set. */ 189 | GF_E_COMP_PP, /* Bad primitive polynomial -- bigger than sub-field. */ 190 | GF_E_COMPXPP, /* Can't derive a default pp for composite field. */ 191 | GF_E_BASE__W, /* Composite -- Base field is the wrong size. */ 192 | GF_E_TWOMULT, /* In create_from... two -m's. */ 193 | GF_E_TWO_DIV, /* In create_from... two -d's. */ 194 | GF_E_POLYSPC, /* Bad numbera after -p. */ 195 | GF_E_SPLITAR, /* Ran out of arguments in SPLIT */ 196 | GF_E_SPLITNU, /* Arguments not integers in SPLIT. */ 197 | GF_E_GROUPAR, /* Ran out of arguments in GROUP */ 198 | GF_E_GROUPNU, /* Arguments not integers in GROUP. */ 199 | GF_E_DEFAULT } gf_error_type_t; 200 | 201 | -------------------------------------------------------------------------------- /src/neon/gf_w8_neon.c: -------------------------------------------------------------------------------- 1 | /* 2 | * GF-Complete: A Comprehensive Open Source Library for Galois Field Arithmetic 3 | * James S. Plank, Ethan L. Miller, Kevin M. Greenan, 4 | * Benjamin A. Arnold, John A. Burnum, Adam W. Disney, Allen C. McBride. 5 | * 6 | * Copyright (c) 2014: Janne Grunau 7 | * 8 | * Redistribution and use in source and binary forms, with or without 9 | * modification, are permitted provided that the following conditions 10 | * are met: 11 | * 12 | * - Redistributions of source code must retain the above copyright 13 | * notice, this list of conditions and the following disclaimer. 14 | * 15 | * - Redistributions in binary form must reproduce the above copyright 16 | * notice, this list of conditions and the following disclaimer in 17 | * the documentation and/or other materials provided with the 18 | * distribution. 19 | * 20 | * - Neither the name of the University of Tennessee nor the names of its 21 | * contributors may be used to endorse or promote products derived 22 | * from this software without specific prior written permission. 23 | * 24 | * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 25 | * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 26 | * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 27 | * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 28 | * HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, 29 | * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, 30 | * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS 31 | * OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED 32 | * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 33 | * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY 34 | * WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 35 | * POSSIBILITY OF SUCH DAMAGE. 36 | * 37 | * gf_w8_neon.c 38 | * 39 | * Neon optimized routines for 8-bit Galois fields 40 | * 41 | */ 42 | 43 | #include "gf_int.h" 44 | #include "gf_w8.h" 45 | #include 46 | #include 47 | 48 | /* ARM NEON reducing macro for the carry free multiplication 49 | * vmull_p8 is the carryless multiply operation. Here vshrn_n_u16 shifts 50 | * the result to the right by 1 byte. This allows us to multiply 51 | * the prim_poly by the leading bits of the result. We then xor the result 52 | * of that operation back with the result. */ 53 | #define NEON_CFM_REDUCE(v, w, result, prim_poly, initial) \ 54 | do { \ 55 | if (initial) \ 56 | v = vshrn_n_u16 (vreinterpretq_u16_p16(result), 8); \ 57 | else \ 58 | v = veor_u8 (v, vshrn_n_u16 (vreinterpretq_u16_p16(result), 8)); \ 59 | w = vmull_p8 (prim_poly, vreinterpret_p8_u8(v)); \ 60 | result = vreinterpretq_p16_u16 (veorq_u16 (vreinterpretq_u16_p16(result), vreinterpretq_u16_p16(w))); \ 61 | } while (0) 62 | 63 | static 64 | inline 65 | gf_val_32_t 66 | gf_w8_neon_clm_multiply_x (gf_t *gf, gf_val_32_t a8, gf_val_32_t b8, int x) 67 | { 68 | gf_val_32_t rv = 0; 69 | poly8x8_t a, b; 70 | uint8x8_t v; 71 | poly16x8_t result; 72 | poly8x8_t prim_poly; 73 | poly16x8_t w; 74 | gf_internal_t * h = gf->scratch; 75 | 76 | a = vdup_n_p8 (a8); 77 | b = vdup_n_p8 (b8); 78 | 79 | prim_poly = vdup_n_p8 ((uint32_t)(h->prim_poly & 0x1ffULL)); 80 | 81 | /* Do the initial multiply */ 82 | result = vmull_p8 (a, b); 83 | 84 | /* Ben: Do prim_poly reduction twice. We are guaranteed that we will only 85 | have to do the reduction at most twice, because (w-2)/z == 2. Where 86 | z is equal to the number of zeros after the leading 1 */ 87 | NEON_CFM_REDUCE (v, w, result, prim_poly, 1); 88 | NEON_CFM_REDUCE (v, w, result, prim_poly, 0); 89 | if (x >= 3) { 90 | NEON_CFM_REDUCE (v, w, result, prim_poly, 0); 91 | } 92 | if (x >= 4) { 93 | NEON_CFM_REDUCE (v, w, result, prim_poly, 0); 94 | } 95 | /* Extracts 32 bit value from result. */ 96 | rv = (gf_val_32_t)vget_lane_u8 (vmovn_u16 (vreinterpretq_u16_p16 (result)), 0); 97 | 98 | return rv; 99 | } 100 | 101 | #define CLM_MULTIPLY(x) \ 102 | static gf_val_32_t gf_w8_neon_clm_multiply_ ## x (gf_t *gf, gf_val_32_t a8, gf_val_32_t b8) \ 103 | {\ 104 | return gf_w8_neon_clm_multiply_x (gf, a8, b8, x);\ 105 | } 106 | 107 | CLM_MULTIPLY(2) 108 | CLM_MULTIPLY(3) 109 | CLM_MULTIPLY(4) 110 | 111 | static inline void 112 | neon_clm_multiply_region_from_single_x(gf_t *gf, uint8_t *s8, uint8_t *d8, 113 | gf_val_32_t val, uint8_t *d_end, 114 | int xor, int x) 115 | { 116 | gf_internal_t * h = gf->scratch; 117 | poly8x8_t a, b; 118 | uint8x8_t c, v; 119 | poly16x8_t result; 120 | poly8x8_t prim_poly; 121 | poly16x8_t w; 122 | 123 | a = vdup_n_p8 (val); 124 | prim_poly = vdup_n_p8 ((uint8_t)(h->prim_poly & 0xffULL)); 125 | 126 | while (d8 < d_end) { 127 | b = vld1_p8 ((poly8_t *) s8); 128 | 129 | if (xor) 130 | c = vld1_u8 (d8); 131 | 132 | result = vmull_p8 (a, b); 133 | 134 | NEON_CFM_REDUCE(v, w, result, prim_poly, 1); 135 | NEON_CFM_REDUCE (v, w, result, prim_poly, 0); 136 | if (x >= 3) { 137 | NEON_CFM_REDUCE (v, w, result, prim_poly, 0); 138 | } 139 | if (x >= 4) { 140 | NEON_CFM_REDUCE (v, w, result, prim_poly, 0); 141 | } 142 | v = vmovn_u16 (vreinterpretq_u16_p16 (result)); 143 | if (xor) 144 | v = veor_u8 (c, v); 145 | 146 | vst1_u8 (d8, v); 147 | 148 | d8 += 8; 149 | s8 += 8; 150 | } 151 | } 152 | 153 | #define CLM_MULT_REGION(x) \ 154 | static void \ 155 | gf_w8_neon_clm_multiply_region_from_single_ ## x (gf_t *gf, void *src, \ 156 | void *dest, \ 157 | gf_val_32_t val, int bytes, \ 158 | int xor) \ 159 | { \ 160 | gf_region_data rd; \ 161 | uint8_t *s8; \ 162 | uint8_t *d8; \ 163 | \ 164 | if (val == 0) { gf_multby_zero(dest, bytes, xor); return; } \ 165 | if (val == 1) { gf_multby_one(src, dest, bytes, xor); return; } \ 166 | \ 167 | gf_set_region_data(&rd, gf, src, dest, bytes, val, xor, 16); \ 168 | gf_do_initial_region_alignment(&rd); \ 169 | s8 = (uint8_t *) rd.s_start; \ 170 | d8 = (uint8_t *) rd.d_start; \ 171 | \ 172 | if (xor) \ 173 | neon_clm_multiply_region_from_single_x (gf, s8, d8, val, rd.d_top, 1, x); \ 174 | else \ 175 | neon_clm_multiply_region_from_single_x (gf, s8, d8, val, rd.d_top, 0, x);\ 176 | gf_do_final_region_alignment(&rd); \ 177 | } 178 | 179 | CLM_MULT_REGION(2) 180 | CLM_MULT_REGION(3) 181 | CLM_MULT_REGION(4) 182 | 183 | 184 | int gf_w8_neon_cfm_init(gf_t *gf) 185 | { 186 | gf_internal_t *h; 187 | 188 | h = (gf_internal_t *) gf->scratch; 189 | 190 | if ((0xe0 & h->prim_poly) == 0){ 191 | gf->multiply.w32 = gf_w8_neon_clm_multiply_2; 192 | gf->multiply_region.w32 = gf_w8_neon_clm_multiply_region_from_single_2; 193 | }else if ((0xc0 & h->prim_poly) == 0){ 194 | gf->multiply.w32 = gf_w8_neon_clm_multiply_3; 195 | gf->multiply_region.w32 = gf_w8_neon_clm_multiply_region_from_single_3; 196 | }else if ((0x80 & h->prim_poly) == 0){ 197 | gf->multiply.w32 = gf_w8_neon_clm_multiply_4; 198 | gf->multiply_region.w32 = gf_w8_neon_clm_multiply_region_from_single_4; 199 | }else{ 200 | return 0; 201 | } 202 | return 1; 203 | } 204 | 205 | #ifndef ARCH_AARCH64 206 | #define vqtbl1q_u8(tbl, v) vcombine_u8(vtbl2_u8(tbl, vget_low_u8(v)), \ 207 | vtbl2_u8(tbl, vget_high_u8(v))) 208 | #endif 209 | 210 | static 211 | void 212 | gf_w8_split_multiply_region_neon(gf_t *gf, void *src, void *dest, gf_val_32_t val, int bytes, int xor) 213 | { 214 | uint8_t *bh, *bl, *sptr, *dptr; 215 | uint8x16_t r, va, vh, vl, loset; 216 | #ifdef ARCH_AARCH64 217 | uint8x16_t mth, mtl; 218 | #else 219 | uint8x8x2_t mth, mtl; 220 | #endif 221 | struct gf_w8_half_table_data *htd; 222 | gf_region_data rd; 223 | 224 | if (val == 0) { gf_multby_zero(dest, bytes, xor); return; } 225 | if (val == 1) { gf_multby_one(src, dest, bytes, xor); return; } 226 | 227 | htd = (struct gf_w8_half_table_data *) ((gf_internal_t *) (gf->scratch))->private; 228 | 229 | gf_set_region_data(&rd, gf, src, dest, bytes, val, xor, 16); 230 | gf_do_initial_region_alignment(&rd); 231 | 232 | bh = (uint8_t *) htd->high; 233 | bh += (val << 4); 234 | bl = (uint8_t *) htd->low; 235 | bl += (val << 4); 236 | 237 | sptr = rd.s_start; 238 | dptr = rd.d_start; 239 | 240 | #ifdef ARCH_AARCH64 241 | mth = vld1q_u8 (bh); 242 | mtl = vld1q_u8 (bl); 243 | #else 244 | mth.val[0] = vld1_u8 (bh); 245 | mtl.val[0] = vld1_u8 (bl); 246 | mth.val[1] = vld1_u8 (bh + 8); 247 | mtl.val[1] = vld1_u8 (bl + 8); 248 | #endif 249 | 250 | loset = vdupq_n_u8(0xf); 251 | 252 | if (xor) { 253 | while (sptr < (uint8_t *) rd.s_top) { 254 | va = vld1q_u8 (sptr); 255 | 256 | vh = vshrq_n_u8 (va, 4); 257 | vl = vandq_u8 (va, loset); 258 | va = vld1q_u8 (dptr); 259 | 260 | vh = vqtbl1q_u8 (mth, vh); 261 | vl = vqtbl1q_u8 (mtl, vl); 262 | 263 | r = veorq_u8 (vh, vl); 264 | 265 | vst1q_u8 (dptr, veorq_u8 (va, r)); 266 | 267 | dptr += 16; 268 | sptr += 16; 269 | } 270 | } else { 271 | while (sptr < (uint8_t *) rd.s_top) { 272 | va = vld1q_u8 (sptr); 273 | 274 | vh = vshrq_n_u8 (va, 4); 275 | vl = vandq_u8 (va, loset); 276 | #ifdef ARCH_AARCH64 277 | vh = vqtbl1q_u8 (mth, vh); 278 | vl = vqtbl1q_u8 (mtl, vl); 279 | #else 280 | vh = vcombine_u8 (vtbl2_u8 (mth, vget_low_u8 (vh)), 281 | vtbl2_u8 (mth, vget_high_u8 (vh))); 282 | vl = vcombine_u8 (vtbl2_u8 (mtl, vget_low_u8 (vl)), 283 | vtbl2_u8 (mtl, vget_high_u8 (vl))); 284 | #endif 285 | 286 | r = veorq_u8 (vh, vl); 287 | 288 | vst1q_u8(dptr, r); 289 | 290 | dptr += 16; 291 | sptr += 16; 292 | } 293 | } 294 | 295 | gf_do_final_region_alignment(&rd); 296 | } 297 | 298 | 299 | void gf_w8_neon_split_init(gf_t *gf) 300 | { 301 | gf->multiply_region.w32 = gf_w8_split_multiply_region_neon; 302 | } 303 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # LRC-Erasure-Code 2 | 3 | LRC(Local Reconstruction Codes) Erasure Code based on Reed-Solomon with Vandermonde matrix. 4 | 5 | 6 | 7 | **Table of Contents** *generated with [DocToc](https://github.com/thlorenz/doctoc)* 8 | 9 | - [Status](#status) 10 | - [Description](#description) 11 | - [LRC parameters and the differences from original Erasure Code](#lrc-parameters-and-the-differences-from-original-erasure-code) 12 | - [Synopsis](#synopsis) 13 | - [Install](#install) 14 | - [API](#api) 15 | - [lrc_init_n](#lrc_init_n) 16 | - [lrc_destroy](#lrc_destroy) 17 | - [lrc_encode](#lrc_encode) 18 | - [lrc_decode](#lrc_decode) 19 | - [lrc_get_source](#lrc_get_source) 20 | - [lrc_buf_init](#lrc_buf_init) 21 | - [lrc_buf_destroy](#lrc_buf_destroy) 22 | - [Analysis](#analysis) 23 | - [Reliability](#reliability) 24 | - [IO bandwidth](#io-bandwidth) 25 | - [TODO](#todo) 26 | - [Author](#author) 27 | - [Copyright and License](#copyright-and-license) 28 | 29 | 30 | 31 | # Status 32 | 33 | This library is considered production ready. 34 | 35 | And it is the core EC implementation in [open.sinastorage.com](http://open.sinastorage.com), which has been protecting dozens of PB user data. 36 | 37 | # Description 38 | 39 | LRC(Local Reconstruction Codes) Erasure Code supplies almost the same functionality and reliability as original Erasure Code does. 40 | And at the same time it reduces reconstruction IO consumption by 50% or more. 41 | 42 | Erasure Code Algorithm makes it possible to achieve as high reliability(11 9s) 43 | as 3-copy replication provides, with highly reduced storage overhead(130% against 300%). 44 | 45 | But one of the problems with Erasure Code is the high IO consumption during 46 | data reconstruction. 47 | Normally to reconstruct `1` chunk it is required to read `n` chunks. 48 | 49 | LRC is a trade-off between storage cost and IO cost. 50 | 51 | With several additional **local** `Coding` chunks calculated from subsets of `Data` 52 | chunks, average IO consumption for reconstruction would be reduced to 53 | `1 / number_of_local_sets`(normally 10% ~ 50%), at the cost of only about 10%(depends on LRC policy) more space used. 54 | 55 | ## LRC parameters and the differences from original Erasure Code 56 | 57 | For a collection there are 5 data chunks in it. 58 | To create LRC Erasure Code with: 59 | 60 | * 2 local EC codes; 61 | * 2 global EC codes; 62 | 63 | LRC should be initialized with: 64 | 65 | `lrc_init_n(lrc, 2, (uint8_t[]){3, 2}, 4)` 66 | 67 | Here in this example, the first local EC code will be created from the 1st 3 68 | data chunks, and the 2nd local EC code will be created from the last 2 data 69 | chunks. 70 | 71 | 4 is the total number of codes, which includes: 72 | * 2 of them are local EC codes, for data[0, 1, 2] and data[3, 4] respectively. 73 | * 2 additional global EC codes. 74 | 75 | The encoding matrix for this LRC parameter is: 76 | 77 | ``` 78 | 1 1 1 0 0 79 | 0 0 0 1 1 80 | 1 2 4 8 16 81 | 1 3 9 27 81 82 | ``` 83 | 84 | LRC-EC `(2,2)+4` is identical to original EC `5+3`, except that it splits the first row 85 | into 2 rows(which makes it possible to use less data/code chunks to reconstruct one). 86 | The original EC `5+3` encoding matrix is: 87 | 88 | ``` 89 | 1 1 1 1 1 90 | 1 2 4 8 16 91 | 1 3 9 27 81 92 | ``` 93 | 94 | If you prefer to use original EC `5+3` like above, `lrc` can be initialized with: 95 | 96 | `lrc_init_n(lrc, 1, (uint8_t[]){5}, 3)` 97 | 98 | # Synopsis 99 | 100 | ```c 101 | #include "lrc.h" 102 | 103 | #include 104 | #include 105 | #include 106 | 107 | int main(int argc, char **argv) { 108 | 109 | int size = 16; 110 | lrc_t *lrc = &(lrc_t) {0}; 111 | lrc_buf_t *buf = &(lrc_buf_t) {0}; 112 | 113 | if (lrc_init_n(lrc, 2, (uint8_t[]) {2, 2}, 3) != 0) { 114 | exit(-1); 115 | } 116 | 117 | if (lrc_buf_init(buf, lrc, size) != 0) { 118 | exit(-1); 119 | } 120 | 121 | strcpy(buf->data[0], "hello"); 122 | strcpy(buf->data[1], "world"); 123 | strcpy(buf->data[2], "lrc"); 124 | strcpy(buf->data[3], "ec"); 125 | 126 | if (lrc_encode(lrc, buf) != 0) { 127 | exit(-1); 128 | } 129 | 130 | strcpy(buf->data[0], "*"); 131 | 132 | printf("damaged: %s %s %s %s\n", buf->data[0], buf->data[1], buf->data[2], buf->data[3]); 133 | 134 | int8_t erased[2 + 2 + 3] = { 135 | 1, 0, 136 | 0, 0, 137 | 0, 0, 0}; 138 | 139 | if (lrc_decode(lrc, buf, erased) != 0) { 140 | exit(-1); 141 | } 142 | 143 | printf("reconstructed: %s %s %s %s\n", buf->data[0], buf->data[1], buf->data[2], buf->data[3]); 144 | 145 | lrc_destroy(lrc); 146 | lrc_buf_destroy(buf); 147 | 148 | return 0; 149 | } 150 | ``` 151 | 152 | # Install 153 | 154 | ```shell 155 | ./configure 156 | make 157 | sudo make install 158 | 159 | # run a test 160 | cd test 161 | gcc example.c -o example -llrc 162 | ./example 163 | ``` 164 | 165 | # API 166 | 167 | ## lrc_init_n 168 | 169 | `int lrc_init_n(lrc_t *lrc, int n_local, uint8_t *local_arr, int m)` 170 | 171 | Initializes LRC descriptor `lrc`. 172 | 173 | Parameters: 174 | 175 | * `lrc` 176 | Pointer to a struct `lrc_t`. a `lrc_t` describes the parameters LRC to 177 | generate codes. 178 | 179 | * `n_local` 180 | Specify the number of local EC to create. 181 | 182 | * `local_arr` 183 | An array of length `n_local` of number of data chunks in each local EC. 184 | 185 | * `m` 186 | Specifies the total number of codes. It must be equal or greater than `n_local`. 187 | Thus there are `n_local` local EC codes and `m - n_local + 1` global EC codes. 188 | Because the first global EC code can be calculated by `local-code-1 ^ local-code-2 ^ ...` 189 | 190 | Returns: 191 | 192 | * `0` 193 | If Success. 194 | 195 | * `LRC_INIT_TWICE` 196 | If `lrc` is already initialized. 197 | 198 | * `LRC_INVALID_M` 199 | If `m` is less than `n_local`. 200 | 201 | * `LRC_OUT_OF_MEMORY` 202 | If any `malloc()` fails during initializing. 203 | 204 | ## lrc_destroy 205 | 206 | `void lrc_destroy(lrc_t *lrc);` 207 | 208 | Free memory allocated by `lrc_init_n()`. It does not free `*lrc` itself. 209 | 210 | ## lrc_encode 211 | 212 | `int lrc_encode(lrc_t *lrc, lrc_buf_t *lrc_buf);` 213 | 214 | Generate `m`(from `lrc_init_n()`) code chunks from all `k` data chunks. `k = sum(local_arr)`. 215 | `lrc_buf_t` is the container of all data chunks and code chunks. It must be 216 | initialized with `lrc_buf_init()` before use. 217 | 218 | After `lrc_encode()`, your program should save `lrc_buf->data[0..k-1]` and 219 | `lrc_buf->code[0..m-1]` on persistent storage for later reconstruction. 220 | 221 | Returns: 222 | 223 | * `0` 224 | If Success. 225 | 226 | * `LRC_OUT_OF_MEMORY` 227 | If any `malloc()` fails during initializing. 228 | 229 | ## lrc_decode 230 | 231 | `int lrc_decode(lrc_t *lrc, lrc_buf_t *lrc_buf, int8_t *erased);` 232 | 233 | Reconstruct lost data and code chunks from existing data and code. 234 | 235 | If too many data or code are lost, reconstruction 236 | 237 | Parameters: 238 | 239 | * `lrc_buf` 240 | Specifies data/code buffer for reconstruction and the buffer to store 241 | reconstructed data/code. 242 | 243 | * `erased` 244 | Specifies which data / code are missing that needs to reconstruct. 245 | It is an array of length `k + m`. 246 | Array element `erased[i]` value `1` means the data(`idata[i]` or `lrc_buf->code[i-k]`. 248 | 249 | Returns: 250 | 251 | * `0` 252 | If Success. 253 | 254 | * `LRC_OUT_OF_MEMORY` 255 | If any `malloc()` fails during decoding. 256 | 257 | * `LRC_UNRECOVERABLE` 258 | If there is not enough data / code to reconstruct the missing ones. 259 | 260 | ## lrc_get_source 261 | 262 | `int lrc_get_source(lrc_t *lrc, int8_t *erased, int8_t *source);` 263 | 264 | If LRC is used(`n_local` passed to `lrc_init_n()` is greater than 1), not 265 | always all data/code are required. 266 | This function calculate which data/code is required. 267 | 268 | For example if LRC parameter is `2, 2, 3`, and `erased = {1, 0, 0, 0, 0, 0, 0}` 269 | which means only 0-th data is missing, `source` will be filled in with: `{0, 1, 0, 0, 1, 0, 0}` 270 | which means only `data[1], code[0]` are required to reconstruct the missing 271 | `data[0]`. 272 | 273 | Parameters: 274 | 275 | * `erased` 276 | Specifies missing data/code. There must be at least `k+m` 0/1 elements in 277 | `erased`. 278 | 279 | * `source` 280 | Specifies where to store the indexes of source data/code for reconstruction. 281 | There must be at least `k+m` available bytes in `source`. 282 | 283 | Returns: 284 | 285 | * `0` 286 | If Success. 287 | 288 | * `LRC_UNRECOVERABLE` 289 | If there is not enough data / code to reconstruct the missing ones. 290 | 291 | ## lrc_buf_init 292 | 293 | `int lrc_buf_init(lrc_buf_t *lrc_buf, lrc_t *lrc, int64_t chunk_size);` 294 | 295 | Allocate memory that will be used during reconstruction, 296 | which includes: `k+m` byte arrays and a matrix for reconstruction. 297 | 298 | Parameters: 299 | 300 | * `lrc` 301 | Specifies LRC parameters. It must have been initialized by `lrc_init_n()` first. 302 | 303 | * `chunk_size` 304 | Specifies the size for each of `k+m` data/code buffers. 305 | Internally, actual memory allocated is 16 byte aligned in order to utilize SMID 306 | instructions. 307 | 308 | Returns: 309 | 310 | * `0` 311 | If Success. 312 | 313 | * `LRC_INIT_TWICE` 314 | If `lrc` is already initialized. 315 | 316 | * `LRC_OUT_OF_MEMORY` 317 | If any `malloc()` fails during initializing. 318 | 319 | ## lrc_buf_destroy 320 | 321 | `void lrc_buf_destroy(lrc_buf_t *lrc_buf);` 322 | 323 | Free memory allocated by `lrc_buf_init()`. 324 | It does not free `lrc_buf`. 325 | 326 | 327 | This is a specialized Erasure Code implementation for storage service. 328 | What matrix to choose does not matter. 329 | Because usually most CPU cycles are spent on matrix multiplication to decode lost data, 330 | but not on finding reversed matrix. 331 | 332 | In this implementation Vandermonde matrix is used. 333 | 334 | # Analysis 335 | 336 | ## Reliability 337 | 338 | * If `k`(number of data chunks) is not very large, reliability of Erasure Code(LRC-EC or EC) with `m` code is similar with n-copy replication with `m+1` copies. 339 | 340 | * LRC-EC can always reconstruct `m - n_local + 1` data loss. In a `(6,6)+4` 341 | LRC-EC, 3 data loss is always reconstructible. 342 | 343 | * LRC-EC with `m` codes can not always reconstruct `m` data loss. 344 | In a `(6,6)+4` LRC, there are 1820 different combinations but only 1568 of 345 | them can be reconstructed(87%). 346 | 347 | ## IO bandwidth 348 | 349 | In calculation, each TB of storage requires 350 | `k * 0.13G` IO throughput(both for network and disk drive) each day 351 | to reconstruct lost data. 352 | Where `k` is the number of members in a Erasure Code group. 353 | 354 | # TODO 355 | 356 | * Another local code that covers all global codes. 357 | 358 | # Author 359 | 360 | Zhang Yanpo (张炎泼) 361 | 362 | # Copyright and License 363 | 364 | The MIT License (MIT) 365 | 366 | Copyright (c) 2015 Zhang Yanpo (张炎泼) 367 | -------------------------------------------------------------------------------- /src/galois.c: -------------------------------------------------------------------------------- 1 | /* * 2 | * Copyright (c) 2014, James S. Plank and Kevin Greenan 3 | * All rights reserved. 4 | * 5 | * Jerasure - A C/C++ Library for a Variety of Reed-Solomon and RAID-6 Erasure 6 | * Coding Techniques 7 | * 8 | * Revision 2.0: Galois Field backend now links to GF-Complete 9 | * 10 | * Redistribution and use in source and binary forms, with or without 11 | * modification, are permitted provided that the following conditions 12 | * are met: 13 | * 14 | * - Redistributions of source code must retain the above copyright 15 | * notice, this list of conditions and the following disclaimer. 16 | * 17 | * - Redistributions in binary form must reproduce the above copyright 18 | * notice, this list of conditions and the following disclaimer in 19 | * the documentation and/or other materials provided with the 20 | * distribution. 21 | * 22 | * - Neither the name of the University of Tennessee nor the names of its 23 | * contributors may be used to endorse or promote products derived 24 | * from this software without specific prior written permission. 25 | * 26 | * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 27 | * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 28 | * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 29 | * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 30 | * HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, 31 | * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, 32 | * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS 33 | * OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED 34 | * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 35 | * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY 36 | * WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 37 | * POSSIBILITY OF SUCH DAMAGE. 38 | */ 39 | 40 | /* Jerasure's authors: 41 | 42 | Revision 2.x - 2014: James S. Plank and Kevin M. Greenan 43 | Revision 1.2 - 2008: James S. Plank, Scott Simmerman and Catherine D. Schuman. 44 | Revision 1.0 - 2007: James S. Plank 45 | */ 46 | 47 | #include 48 | #include 49 | #include 50 | #include 51 | 52 | #include "galois.h" 53 | 54 | #define MAX_GF_INSTANCES 64 55 | gf_t *gfp_array[MAX_GF_INSTANCES] = { 0 }; 56 | int gfp_is_composite[MAX_GF_INSTANCES] = { 0 }; 57 | 58 | gf_t *galois_get_field_ptr(int w) 59 | { 60 | if (gfp_array[w] != NULL) { 61 | return gfp_array[w]; 62 | } 63 | 64 | return NULL; 65 | } 66 | 67 | gf_t* galois_init_field(int w, 68 | int mult_type, 69 | int region_type, 70 | int divide_type, 71 | uint64_t prim_poly, 72 | int arg1, 73 | int arg2) 74 | { 75 | int scratch_size; 76 | void *scratch_memory; 77 | gf_t *gfp; 78 | 79 | if (w <= 0 || w > 32) { 80 | fprintf(stderr, "ERROR -- cannot init default Galois field for w=%d\n", w); 81 | exit(1); 82 | } 83 | 84 | gfp = (gf_t *) malloc(sizeof(gf_t)); 85 | if (!gfp) { 86 | fprintf(stderr, "ERROR -- cannot allocate memory for Galois field w=%d\n", w); 87 | exit(1); 88 | } 89 | 90 | scratch_size = gf_scratch_size(w, mult_type, region_type, divide_type, arg1, arg2); 91 | if (!scratch_size) { 92 | fprintf(stderr, "ERROR -- cannot get scratch size for base field w=%d\n", w); 93 | exit(1); 94 | } 95 | 96 | scratch_memory = malloc(scratch_size); 97 | if (!scratch_memory) { 98 | fprintf(stderr, "ERROR -- cannot get scratch memory for base field w=%d\n", w); 99 | exit(1); 100 | } 101 | 102 | if(!gf_init_hard(gfp, 103 | w, 104 | mult_type, 105 | region_type, 106 | divide_type, 107 | prim_poly, 108 | arg1, 109 | arg2, 110 | NULL, 111 | scratch_memory)) 112 | { 113 | fprintf(stderr, "ERROR -- cannot init default Galois field for w=%d\n", w); 114 | exit(1); 115 | } 116 | 117 | gfp_is_composite[w] = 0; 118 | return gfp; 119 | } 120 | 121 | gf_t* galois_init_composite_field(int w, 122 | int region_type, 123 | int divide_type, 124 | int degree, 125 | gf_t* base_gf) 126 | { 127 | int scratch_size; 128 | void *scratch_memory; 129 | gf_t *gfp; 130 | 131 | if (w <= 0 || w > 32) { 132 | fprintf(stderr, "ERROR -- cannot init composite field for w=%d\n", w); 133 | exit(1); 134 | } 135 | 136 | gfp = (gf_t *) malloc(sizeof(gf_t)); 137 | if (!gfp) { 138 | fprintf(stderr, "ERROR -- cannot allocate memory for Galois field w=%d\n", w); 139 | exit(1); 140 | } 141 | 142 | scratch_size = gf_scratch_size(w, GF_MULT_COMPOSITE, region_type, divide_type, degree, 0); 143 | if (!scratch_size) { 144 | fprintf(stderr, "ERROR -- cannot get scratch size for composite field w=%d\n", w); 145 | exit(1); 146 | } 147 | 148 | scratch_memory = malloc(scratch_size); 149 | if (!scratch_memory) { 150 | fprintf(stderr, "ERROR -- cannot get scratch memory for composite field w=%d\n", w); 151 | exit(1); 152 | } 153 | 154 | if(!gf_init_hard(gfp, 155 | w, 156 | GF_MULT_COMPOSITE, 157 | region_type, 158 | divide_type, 159 | 0, 160 | degree, 161 | 0, 162 | base_gf, 163 | scratch_memory)) 164 | { 165 | fprintf(stderr, "ERROR -- cannot init default composite field for w=%d\n", w); 166 | exit(1); 167 | } 168 | gfp_is_composite[w] = 1; 169 | return gfp; 170 | } 171 | 172 | static void galois_init_default_field(int w) 173 | { 174 | if (w <= 0 || w > 32) { 175 | fprintf(stderr, "ERROR -- cannot init default Galois field for w=%d\n", w); 176 | exit(1); 177 | } 178 | 179 | if (gfp_array[w] == NULL) { 180 | gfp_array[w] = (gf_t*)malloc(sizeof(gf_t)); 181 | if (gfp_array[w] == NULL) { 182 | fprintf(stderr, "ERROR -- cannot allocate memory for Galois field w=%d\n", w); 183 | exit(1); 184 | } 185 | } 186 | 187 | if (!gf_init_easy(gfp_array[w], w)) { 188 | fprintf(stderr, "ERROR -- cannot init default Galois field for w=%d\n", w); 189 | exit(1); 190 | } 191 | } 192 | 193 | 194 | static int is_valid_gf(gf_t *gf, int w) 195 | { 196 | // TODO: I assume we may eventually 197 | // want to do w=64 and 128, so w 198 | // will be needed to perform this check 199 | (void)w; 200 | 201 | if (gf == NULL) { 202 | return 0; 203 | } 204 | if (gf->multiply.w32 == NULL) { 205 | return 0; 206 | } 207 | if (gf->multiply_region.w32 == NULL) { 208 | return 0; 209 | } 210 | if (gf->divide.w32 == NULL) { 211 | return 0; 212 | } 213 | if (gf->inverse.w32 == NULL) { 214 | return 0; 215 | } 216 | if (gf->extract_word.w32 == NULL) { 217 | return 0; 218 | } 219 | 220 | return 1; 221 | } 222 | 223 | void galois_change_technique(gf_t *gf, int w) 224 | { 225 | if (w <= 0 || w > 32) { 226 | fprintf(stderr, "ERROR -- cannot support Galois field for w=%d\n", w); 227 | exit(1); 228 | } 229 | 230 | if (!is_valid_gf(gf, w)) { 231 | fprintf(stderr, "ERROR -- overriding with invalid Galois field for w=%d\n", w); 232 | exit(1); 233 | } 234 | 235 | if (gfp_array[w] != NULL) { 236 | gf_free(gfp_array[w], gfp_is_composite[w]); 237 | } 238 | 239 | gfp_array[w] = gf; 240 | } 241 | 242 | int galois_single_multiply(int x, int y, int w) 243 | { 244 | if (x == 0 || y == 0) return 0; 245 | 246 | if (gfp_array[w] == NULL) { 247 | galois_init_default_field(w); 248 | } 249 | 250 | if (w <= 32) { 251 | return gfp_array[w]->multiply.w32(gfp_array[w], x, y); 252 | } else { 253 | fprintf(stderr, "ERROR -- Galois field not implemented for w=%d\n", w); 254 | raise(SIGSEGV); 255 | return 0; 256 | } 257 | } 258 | 259 | int galois_single_divide(int x, int y, int w) 260 | { 261 | if (x == 0) return 0; 262 | if (y == 0) return -1; 263 | 264 | if (gfp_array[w] == NULL) { 265 | galois_init_default_field(w); 266 | } 267 | 268 | if (w <= 32) { 269 | return gfp_array[w]->divide.w32(gfp_array[w], x, y); 270 | } else { 271 | fprintf(stderr, "ERROR -- Galois field not implemented for w=%d\n", w); 272 | raise(SIGSEGV); 273 | return 0; 274 | } 275 | } 276 | 277 | void galois_w08_region_multiply(char *region, /* Region to multiply */ 278 | int multby, /* Number to multiply by */ 279 | int nbytes, /* Number of bytes in region */ 280 | char *r2, /* If r2 != NULL, products go here */ 281 | int add) 282 | { 283 | if (gfp_array[8] == NULL) { 284 | galois_init_default_field(8); 285 | } 286 | gfp_array[8]->multiply_region.w32(gfp_array[8], region, r2, multby, nbytes, add); 287 | } 288 | 289 | void galois_w16_region_multiply(char *region, /* Region to multiply */ 290 | int multby, /* Number to multiply by */ 291 | int nbytes, /* Number of bytes in region */ 292 | char *r2, /* If r2 != NULL, products go here */ 293 | int add) 294 | { 295 | if (gfp_array[16] == NULL) { 296 | galois_init_default_field(16); 297 | } 298 | gfp_array[16]->multiply_region.w32(gfp_array[16], region, r2, multby, nbytes, add); 299 | } 300 | 301 | 302 | void galois_w32_region_multiply(char *region, /* Region to multiply */ 303 | int multby, /* Number to multiply by */ 304 | int nbytes, /* Number of bytes in region */ 305 | char *r2, /* If r2 != NULL, products go here */ 306 | int add) 307 | { 308 | if (gfp_array[32] == NULL) { 309 | galois_init_default_field(32); 310 | } 311 | gfp_array[32]->multiply_region.w32(gfp_array[32], region, r2, multby, nbytes, add); 312 | } 313 | 314 | void galois_w8_region_xor(void *src, void *dest, int nbytes) 315 | { 316 | if (gfp_array[8] == NULL) { 317 | galois_init_default_field(8); 318 | } 319 | gfp_array[8]->multiply_region.w32(gfp_array[32], src, dest, 1, nbytes, 1); 320 | } 321 | 322 | void galois_w16_region_xor(void *src, void *dest, int nbytes) 323 | { 324 | if (gfp_array[16] == NULL) { 325 | galois_init_default_field(16); 326 | } 327 | gfp_array[16]->multiply_region.w32(gfp_array[16], src, dest, 1, nbytes, 1); 328 | } 329 | 330 | void galois_w32_region_xor(void *src, void *dest, int nbytes) 331 | { 332 | if (gfp_array[32] == NULL) { 333 | galois_init_default_field(32); 334 | } 335 | gfp_array[32]->multiply_region.w32(gfp_array[32], src, dest, 1, nbytes, 1); 336 | } 337 | 338 | void galois_region_xor(char *src, char *dest, int nbytes) 339 | { 340 | if (nbytes >= 16) { 341 | galois_w32_region_xor(src, dest, nbytes); 342 | } else { 343 | int i = 0; 344 | for (i = 0; i < nbytes; i++) { 345 | *dest ^= *src; 346 | dest++; 347 | src++; 348 | } 349 | } 350 | } 351 | 352 | int galois_inverse(int y, int w) 353 | { 354 | if (y == 0) return -1; 355 | return galois_single_divide(1, y, w); 356 | } 357 | -------------------------------------------------------------------------------- /src/neon/gf_w64_neon.c: -------------------------------------------------------------------------------- 1 | /* 2 | * GF-Complete: A Comprehensive Open Source Library for Galois Field Arithmetic 3 | * James S. Plank, Ethan L. Miller, Kevin M. Greenan, 4 | * Benjamin A. Arnold, John A. Burnum, Adam W. Disney, Allen C. McBride. 5 | * 6 | * Copyright (c) 2014: Janne Grunau 7 | * 8 | * Redistribution and use in source and binary forms, with or without 9 | * modification, are permitted provided that the following conditions 10 | * are met: 11 | * 12 | * - Redistributions of source code must retain the above copyright 13 | * notice, this list of conditions and the following disclaimer. 14 | * 15 | * - Redistributions in binary form must reproduce the above copyright 16 | * notice, this list of conditions and the following disclaimer in 17 | * the documentation and/or other materials provided with the 18 | * distribution. 19 | * 20 | * - Neither the name of the University of Tennessee nor the names of its 21 | * contributors may be used to endorse or promote products derived 22 | * from this software without specific prior written permission. 23 | * 24 | * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 25 | * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 26 | * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 27 | * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 28 | * HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, 29 | * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, 30 | * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS 31 | * OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED 32 | * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 33 | * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY 34 | * WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 35 | * POSSIBILITY OF SUCH DAMAGE. 36 | * 37 | * gf_w64_neon.c 38 | * 39 | * Neon routines for 64-bit Galois fields 40 | * 41 | */ 42 | 43 | #include "gf_int.h" 44 | #include 45 | #include 46 | #include "gf_w64.h" 47 | 48 | 49 | #ifndef ARCH_AARCH64 50 | #define vqtbl1q_u8(tbl, v) vcombine_u8(vtbl2_u8(tbl, vget_low_u8(v)), \ 51 | vtbl2_u8(tbl, vget_high_u8(v))) 52 | #endif 53 | 54 | static 55 | inline 56 | void 57 | neon_w64_split_4_lazy_altmap_multiply_region(gf_t *gf, uint64_t *src, 58 | uint64_t *dst, uint64_t *d_end, 59 | uint64_t val, int xor) 60 | { 61 | unsigned i, j, k; 62 | uint8_t btable[16]; 63 | #ifdef ARCH_AARCH64 64 | uint8x16_t tables[16][8]; 65 | #else 66 | uint8x8x2_t tables[16][8]; 67 | #endif 68 | uint8x16_t p[8], mask1, si; 69 | 70 | gf_internal_t *h = (gf_internal_t *) gf->scratch; 71 | struct gf_split_4_64_lazy_data *ld = (struct gf_split_4_64_lazy_data *) h->private; 72 | 73 | for (i = 0; i < 16; i++) { 74 | for (j = 0; j < 8; j++) { 75 | for (k = 0; k < 16; k++) { 76 | btable[k] = (uint8_t) ld->tables[i][k]; 77 | ld->tables[i][k] >>= 8; 78 | } 79 | #ifdef ARCH_AARCH64 80 | tables[i][j] = vld1q_u8(btable); 81 | #else 82 | tables[i][j].val[0] = vld1_u8(btable); 83 | tables[i][j].val[1] = vld1_u8(btable + 8); 84 | #endif 85 | } 86 | } 87 | 88 | mask1 = vdupq_n_u8(0xf); 89 | 90 | while (dst < d_end) { 91 | 92 | if (xor) { 93 | for (i = 0; i < 8; i++) 94 | p[i] = vld1q_u8((uint8_t *) (dst + i * 2)); 95 | } else { 96 | for (i = 0; i < 8; i++) 97 | p[i] = vdupq_n_u8(0); 98 | } 99 | 100 | i = 0; 101 | for (k = 0; k < 8; k++) { 102 | uint8x16_t v0 = vld1q_u8((uint8_t *) src); 103 | src += 2; 104 | 105 | si = vandq_u8(v0, mask1); 106 | for (j = 0; j < 8; j++) { 107 | p[j] = veorq_u8(p[j], vqtbl1q_u8(tables[i][j], si)); 108 | } 109 | i++; 110 | si = vshrq_n_u8(v0, 4); 111 | for (j = 0; j < 8; j++) { 112 | p[j] = veorq_u8(p[j], vqtbl1q_u8(tables[i][j], si)); 113 | } 114 | i++; 115 | 116 | } 117 | for (i = 0; i < 8; i++) { 118 | vst1q_u8((uint8_t *) dst, p[i]); 119 | dst += 2; 120 | } 121 | } 122 | } 123 | 124 | static 125 | inline 126 | void 127 | neon_w64_split_4_lazy_multiply_region(gf_t *gf, uint64_t *src, uint64_t *dst, 128 | uint64_t *d_end, uint64_t val, int xor) 129 | { 130 | unsigned i, j, k; 131 | uint8_t btable[16]; 132 | #ifdef ARCH_AARCH64 133 | uint8x16_t tables[16][8]; 134 | #else 135 | uint8x8x2_t tables[16][8]; 136 | #endif 137 | uint8x16_t p[8], mask1, si; 138 | uint64x2_t st[8]; 139 | uint32x4x2_t s32[4]; 140 | uint16x8x2_t s16[4]; 141 | uint8x16x2_t s8[4]; 142 | 143 | gf_internal_t *h = (gf_internal_t *) gf->scratch; 144 | struct gf_split_4_64_lazy_data *ld = (struct gf_split_4_64_lazy_data *) h->private; 145 | 146 | for (i = 0; i < 16; i++) { 147 | for (j = 0; j < 8; j++) { 148 | for (k = 0; k < 16; k++) { 149 | btable[k] = (uint8_t) ld->tables[i][k]; 150 | ld->tables[i][k] >>= 8; 151 | } 152 | #ifdef ARCH_AARCH64 153 | tables[i][j] = vld1q_u8(btable); 154 | #else 155 | tables[i][j].val[0] = vld1_u8(btable); 156 | tables[i][j].val[1] = vld1_u8(btable + 8); 157 | #endif 158 | } 159 | } 160 | 161 | mask1 = vdupq_n_u8(0xf); 162 | 163 | while (dst < d_end) { 164 | 165 | for (k = 0; k < 8; k++) { 166 | st[k] = vld1q_u64(src); 167 | src += 2; 168 | p[k] = vdupq_n_u8(0); 169 | } 170 | 171 | s32[0] = vuzpq_u32(vreinterpretq_u32_u64(st[0]), 172 | vreinterpretq_u32_u64(st[1])); 173 | s32[1] = vuzpq_u32(vreinterpretq_u32_u64(st[2]), 174 | vreinterpretq_u32_u64(st[3])); 175 | s32[2] = vuzpq_u32(vreinterpretq_u32_u64(st[4]), 176 | vreinterpretq_u32_u64(st[5])); 177 | s32[3] = vuzpq_u32(vreinterpretq_u32_u64(st[6]), 178 | vreinterpretq_u32_u64(st[7])); 179 | 180 | s16[0] = vuzpq_u16(vreinterpretq_u16_u32(s32[0].val[0]), 181 | vreinterpretq_u16_u32(s32[1].val[0])); 182 | s16[1] = vuzpq_u16(vreinterpretq_u16_u32(s32[2].val[0]), 183 | vreinterpretq_u16_u32(s32[3].val[0])); 184 | s16[2] = vuzpq_u16(vreinterpretq_u16_u32(s32[0].val[1]), 185 | vreinterpretq_u16_u32(s32[1].val[1])); 186 | s16[3] = vuzpq_u16(vreinterpretq_u16_u32(s32[2].val[1]), 187 | vreinterpretq_u16_u32(s32[3].val[1])); 188 | 189 | s8[0] = vuzpq_u8(vreinterpretq_u8_u16(s16[0].val[0]), 190 | vreinterpretq_u8_u16(s16[1].val[0])); 191 | s8[1] = vuzpq_u8(vreinterpretq_u8_u16(s16[0].val[1]), 192 | vreinterpretq_u8_u16(s16[1].val[1])); 193 | s8[2] = vuzpq_u8(vreinterpretq_u8_u16(s16[2].val[0]), 194 | vreinterpretq_u8_u16(s16[3].val[0])); 195 | s8[3] = vuzpq_u8(vreinterpretq_u8_u16(s16[2].val[1]), 196 | vreinterpretq_u8_u16(s16[3].val[1])); 197 | 198 | i = 0; 199 | for (k = 0; k < 8; k++) { 200 | si = vandq_u8(s8[k >> 1].val[k & 1], mask1); 201 | for (j = 0; j < 8; j++) { 202 | p[j] = veorq_u8(p[j], vqtbl1q_u8(tables[i][j], si)); 203 | } 204 | i++; 205 | si = vshrq_n_u8(s8[k >> 1].val[k & 1], 4); 206 | for (j = 0; j < 8; j++) { 207 | p[j] = veorq_u8(p[j], vqtbl1q_u8(tables[i][j], si)); 208 | } 209 | i++; 210 | } 211 | 212 | s8[0] = vzipq_u8(p[0], p[1]); 213 | s8[1] = vzipq_u8(p[2], p[3]); 214 | s8[2] = vzipq_u8(p[4], p[5]); 215 | s8[3] = vzipq_u8(p[6], p[7]); 216 | 217 | s16[0] = vzipq_u16(vreinterpretq_u16_u8(s8[0].val[0]), 218 | vreinterpretq_u16_u8(s8[1].val[0])); 219 | s16[1] = vzipq_u16(vreinterpretq_u16_u8(s8[2].val[0]), 220 | vreinterpretq_u16_u8(s8[3].val[0])); 221 | s16[2] = vzipq_u16(vreinterpretq_u16_u8(s8[0].val[1]), 222 | vreinterpretq_u16_u8(s8[1].val[1])); 223 | s16[3] = vzipq_u16(vreinterpretq_u16_u8(s8[2].val[1]), 224 | vreinterpretq_u16_u8(s8[3].val[1])); 225 | 226 | s32[0] = vzipq_u32(vreinterpretq_u32_u16(s16[0].val[0]), 227 | vreinterpretq_u32_u16(s16[1].val[0])); 228 | s32[1] = vzipq_u32(vreinterpretq_u32_u16(s16[0].val[1]), 229 | vreinterpretq_u32_u16(s16[1].val[1])); 230 | s32[2] = vzipq_u32(vreinterpretq_u32_u16(s16[2].val[0]), 231 | vreinterpretq_u32_u16(s16[3].val[0])); 232 | s32[3] = vzipq_u32(vreinterpretq_u32_u16(s16[2].val[1]), 233 | vreinterpretq_u32_u16(s16[3].val[1])); 234 | 235 | for (k = 0; k < 8; k ++) { 236 | st[k] = vreinterpretq_u64_u32(s32[k >> 1].val[k & 1]); 237 | } 238 | 239 | if (xor) { 240 | for (i = 0; i < 8; i++) { 241 | uint64x2_t t1 = vld1q_u64(dst); 242 | vst1q_u64(dst, veorq_u64(st[i], t1)); 243 | dst += 2; 244 | } 245 | } else { 246 | for (i = 0; i < 8; i++) { 247 | vst1q_u64(dst, st[i]); 248 | dst += 2; 249 | } 250 | } 251 | 252 | } 253 | } 254 | 255 | static 256 | void 257 | gf_w64_neon_split_4_lazy_multiply_region(gf_t *gf, void *src, void *dest, 258 | uint64_t val, int bytes, int xor, 259 | int altmap) 260 | { 261 | gf_internal_t *h; 262 | int i, j, k; 263 | uint64_t pp, v, *s64, *d64, *top; 264 | struct gf_split_4_64_lazy_data *ld; 265 | gf_region_data rd; 266 | 267 | if (val == 0) { gf_multby_zero(dest, bytes, xor); return; } 268 | if (val == 1) { gf_multby_one(src, dest, bytes, xor); return; } 269 | 270 | gf_set_region_data(&rd, gf, src, dest, bytes, val, xor, 128); 271 | gf_do_initial_region_alignment(&rd); 272 | 273 | s64 = (uint64_t *) rd.s_start; 274 | d64 = (uint64_t *) rd.d_start; 275 | top = (uint64_t *) rd.d_top; 276 | 277 | h = (gf_internal_t *) gf->scratch; 278 | pp = h->prim_poly; 279 | ld = (struct gf_split_4_64_lazy_data *) h->private; 280 | 281 | v = val; 282 | for (i = 0; i < 16; i++) { 283 | ld->tables[i][0] = 0; 284 | for (j = 1; j < 16; j <<= 1) { 285 | for (k = 0; k < j; k++) { 286 | ld->tables[i][k^j] = (v ^ ld->tables[i][k]); 287 | } 288 | v = (v & GF_FIRST_BIT) ? ((v << 1) ^ pp) : (v << 1); 289 | } 290 | } 291 | 292 | if (altmap) { 293 | if (xor) 294 | neon_w64_split_4_lazy_altmap_multiply_region(gf, s64, d64, top, val, 1); 295 | else 296 | neon_w64_split_4_lazy_altmap_multiply_region(gf, s64, d64, top, val, 0); 297 | } else { 298 | if (xor) 299 | neon_w64_split_4_lazy_multiply_region(gf, s64, d64, top, val, 1); 300 | else 301 | neon_w64_split_4_lazy_multiply_region(gf, s64, d64, top, val, 0); 302 | } 303 | 304 | gf_do_final_region_alignment(&rd); 305 | } 306 | 307 | static 308 | void 309 | gf_w64_split_4_64_lazy_multiply_region_neon(gf_t *gf, void *src, void *dest, 310 | uint64_t val, int bytes, int xor) 311 | { 312 | gf_w64_neon_split_4_lazy_multiply_region(gf, src, dest, val, bytes, xor, 0); 313 | } 314 | 315 | static 316 | void 317 | gf_w64_split_4_64_lazy_altmap_multiply_region_neon(gf_t *gf, void *src, 318 | void *dest, uint64_t val, 319 | int bytes, int xor) 320 | { 321 | gf_w64_neon_split_4_lazy_multiply_region(gf, src, dest, val, bytes, xor, 1); 322 | } 323 | 324 | void gf_w64_neon_split_init(gf_t *gf) 325 | { 326 | gf_internal_t *h = (gf_internal_t *) gf->scratch; 327 | 328 | if (h->region_type & GF_REGION_ALTMAP) 329 | gf->multiply_region.w64 = gf_w64_split_4_64_lazy_altmap_multiply_region_neon; 330 | else 331 | gf->multiply_region.w64 = gf_w64_split_4_64_lazy_multiply_region_neon; 332 | 333 | } 334 | -------------------------------------------------------------------------------- /src/lrc.c: -------------------------------------------------------------------------------- 1 | /* 2 | * The MIT License (MIT) 3 | * 4 | * Copyright (c) 2015 Zhang Yanpo (张炎泼) 5 | */ 6 | 7 | #include "lrc.h" 8 | 9 | #include 10 | #include 11 | #include 12 | #include 13 | #include 14 | 15 | int lrc_init_n(lrc_t *lrc, int n_local, uint8_t *local_k_arr, int m) { 16 | 17 | int ret = 0; 18 | 19 | if (lrc->inited_ == 1) { 20 | return LRC_INIT_TWICE; 21 | } 22 | 23 | if (m < n_local) { 24 | return LRC_INVALID_M; 25 | } 26 | 27 | bzero(lrc, sizeof(*lrc)); 28 | 29 | lrc->n_local = n_local; 30 | 31 | lrc->locals = malloc(sizeof(*lrc->locals) * lrc->n_local); 32 | if (lrc->locals == NULL) { 33 | ret = LRC_OUT_OF_MEMORY; 34 | goto exit; 35 | } 36 | 37 | lrc->k = 0; 38 | lrc->m = m; 39 | 40 | for (int i = 0; i < n_local; i++) { 41 | 42 | lrc->locals[i].start = lrc->k; 43 | lrc->locals[i].len = local_k_arr[i]; 44 | 45 | lrc->k += local_k_arr[i]; 46 | } 47 | 48 | lrc->n = lrc->k + lrc->m; 49 | 50 | /* matrix */ 51 | lrc->matrix = lrc_make_matrix(lrc); 52 | if (lrc->matrix == NULL) { 53 | ret = LRC_OUT_OF_MEMORY; 54 | goto exit; 55 | } 56 | 57 | /* An error index that indicates all codes are damaged */ 58 | lrc->code_erased = calloc(sizeof(lrc->code_erased[0]), lrc->n); 59 | if (lrc->code_erased == NULL) { 60 | ret = LRC_OUT_OF_MEMORY; 61 | goto exit; 62 | } 63 | 64 | for (int i = 0; i < lrc->m; i++) { 65 | lrc->code_erased[lrc->k + i] = 1; 66 | } 67 | 68 | lrc->inited_ = 1; 69 | 70 | exit: 71 | 72 | if (ret != 0) { 73 | free(lrc->code_erased); 74 | free(lrc->locals); 75 | free(lrc->matrix); 76 | } 77 | 78 | return ret; 79 | } 80 | 81 | void lrc_destroy(lrc_t *lrc) { 82 | 83 | if (lrc->inited_ == 0) { 84 | return; 85 | } 86 | 87 | free(lrc->code_erased); 88 | free(lrc->locals); 89 | free(lrc->matrix); 90 | 91 | bzero(lrc, sizeof(*lrc)); 92 | } 93 | 94 | int lrc_encode(lrc_t *lrc, lrc_buf_t *lb) { 95 | return lrc_decode(lrc, lb, lrc->code_erased); 96 | } 97 | 98 | int lrc_decode(lrc_t *lrc, lrc_buf_t *lb, int8_t *erased) { 99 | 100 | int ret = 0; 101 | lrc_decoder_t *dec = &(lrc_decoder_t) {0}; 102 | 103 | ret = lrc_decoder_init(dec, lrc, lb, erased); 104 | if (ret != 0) { 105 | goto exit; 106 | } 107 | 108 | ret = lrc_decoder_decode(dec); 109 | 110 | exit: 111 | 112 | lrc_decoder_destroy(dec); 113 | 114 | return ret; 115 | } 116 | 117 | int lrc_get_source(lrc_t *lrc, int8_t *erased, int8_t *source) { 118 | 119 | /* we need at least as many equations as erased chunks */ 120 | 121 | int n_erased = lrc_count_erased(lrc->n, erased); 122 | int ret = 0; 123 | 124 | for (int i = 0; i < lrc->n_local; i++) { 125 | 126 | int n = lrc_get_n_locally_erased(lrc, i, erased); 127 | if (n == 0) { 128 | continue; 129 | } 130 | 131 | n_erased--; 132 | 133 | /* local data for reconstruction */ 134 | lrc_local_t *l = &lrc->locals[i]; 135 | 136 | for (int j = l->start; j < l->start + l->len; j++) { 137 | source[j] = erased[j] == 0; 138 | } 139 | 140 | /* local code for reconstruction */ 141 | int j = lrc->k + i; 142 | source[j] = erased[j] == 0; 143 | 144 | } 145 | 146 | if (n_erased > 0) { 147 | 148 | for (int i = 0; i < lrc->k; i++) { 149 | source[i] = (erased[i] == 0); 150 | } 151 | 152 | for (int i = lrc->k + lrc->n_local; i < lrc->n; i++) { 153 | 154 | source[i] = (erased[i] == 0); 155 | 156 | n_erased--; 157 | 158 | if (n_erased == 0) { 159 | break; 160 | } 161 | } 162 | } 163 | 164 | if (n_erased > 0) { 165 | ret = LRC_UNRECOVERABLE; 166 | goto exit; 167 | } 168 | 169 | lrc_debug_sources(lrc->n, source); 170 | 171 | exit: 172 | 173 | return ret; 174 | } 175 | 176 | int *lrc_make_matrix(lrc_t *lrc) { 177 | /* 178 | * LRC Erasure Code: 179 | * d0 d1 d2 d3 d4 d5 180 | * ------- ------- ------- 181 | * c1.1 c1.2 c1.3 182 | * 183 | * c1.1 c1.2 c1.3 are codes calculated from a sub set. 184 | * We have c1.1 ^ c1.2 ^ c.13 = c1 185 | * Because coefficient of the row 1 in a Vandermonde Matrix are always 1 186 | * 187 | * k = 6, n_local = 3, m = 5 188 | * 189 | * | 1 1 0 0 0 0 | | d1 | | c1.1 | | 190 | * | 0 0 1 1 0 0 | | d2 | | c1.2 | | ^= c1 191 | * | 0 0 0 0 1 1 | X | d3 | = | c1.3 | | 192 | * | 1 2 4 8 * * | | d4 | | c2 | 193 | * | 1 3 9 * * * | | d5 | | c3 | 194 | */ 195 | 196 | int k = lrc->k; 197 | int m = lrc->m; 198 | int *matrix = NULL; 199 | int *lrc_matrix = NULL; 200 | 201 | matrix = reed_sol_vandermonde_coding_matrix(k, m - lrc->n_local + 1, 8); 202 | if (matrix == NULL) { 203 | goto exit; 204 | } 205 | 206 | lrc_matrix = malloc(sizeof(int) * k * lrc->m); 207 | if (lrc_matrix == NULL) { 208 | goto exit; 209 | } 210 | 211 | bzero(lrc_matrix, sizeof(int) * k * lrc->m); 212 | 213 | for (int i = 0; i < lrc->n_local; i++) { 214 | 215 | lrc_local_t *l = &lrc->locals[i]; 216 | 217 | for (int j = 0; j < l->len; j++) { 218 | lrc_matrix[i * k + l->start + j] = 1; 219 | } 220 | 221 | } 222 | 223 | for (int i = 0; i < m - lrc->n_local; i++) { 224 | for (int j = 0; j < k; j++) { 225 | lrc_matrix[(lrc->n_local + i)*k + j] = matrix[(i + 1) * k + j]; 226 | } 227 | } 228 | 229 | exit: 230 | free(matrix); 231 | return lrc_matrix; 232 | } 233 | 234 | int lrc_get_n_locally_erased(lrc_t *lrc, int idx_local, int8_t *erased) { 235 | 236 | int start = lrc->locals[idx_local].start; 237 | int end = start + lrc->locals[idx_local].len; 238 | int n_damaged = 0; 239 | 240 | /* data in this region is damaged or its code is damaged */ 241 | for (int i = start; i < end; i++) { 242 | if (erased[i] == 1) { 243 | n_damaged++; 244 | } 245 | } 246 | 247 | if (erased[lrc->k + idx_local]) { 248 | n_damaged++; 249 | } 250 | 251 | return n_damaged; 252 | } 253 | 254 | int lrc_count_erased(int n, int8_t *erased) { 255 | 256 | int en = 0; 257 | 258 | for (int i = 0; i < n; i++) { 259 | if (erased[i]) { 260 | en++; 261 | } 262 | } 263 | 264 | return en; 265 | } 266 | 267 | void lrc_debug_buf_line_(lrc_buf_t *lb, int n) { 268 | 269 | char *b; 270 | (void)b; 271 | 272 | dd("#%04d:", n); 273 | if (n < 0 || n >= lb->chunk_size) { 274 | dlog("--\n"); 275 | return; 276 | } 277 | 278 | for (int i = 0; i < lb->n_data; i++) { 279 | b = lb->data[i]; 280 | if (b[n] == 0) { 281 | dlog(" . "); 282 | } else { 283 | dlog("%02x ", (unsigned char)b[n]); 284 | } 285 | } 286 | 287 | dlog("| "); 288 | 289 | for (int i = 0; i < lb->n_code; i++) { 290 | b = lb->code[i]; 291 | if (b[n] == 0) { 292 | dlog(" . "); 293 | } else { 294 | dlog("%02x ", (unsigned char)b[n]); 295 | } 296 | } 297 | 298 | dlog("\n"); 299 | } 300 | 301 | void lrc_debug_matrix_(int *matrix, int row, int col) { 302 | 303 | dd("matrix:"); 304 | 305 | for (int i = 0; i < row; i++) { 306 | 307 | for (int j = 0; j < col; j++) { 308 | 309 | int e = matrix[i * col + j]; 310 | if (e == 0) { 311 | dlog(" . "); 312 | } else { 313 | dlog("%02x ", e); 314 | } 315 | } 316 | dlog("\n"); 317 | } 318 | } 319 | 320 | void lrc_debug_sources_(int n, int8_t *source) { 321 | 322 | dd("source:"); 323 | 324 | for (int i = 0; i < n; i++) { 325 | 326 | int8_t e = source[i]; 327 | 328 | if (e == 0) { 329 | dlog(" . "); 330 | } else { 331 | dlog("%02x ", e); 332 | } 333 | } 334 | 335 | dlog("\n"); 336 | } 337 | 338 | /* lrc_buf_t */ 339 | 340 | int lrc_buf_init(lrc_buf_t *lb, lrc_t *lrc, int64_t chunk_size) { 341 | 342 | int ret = 0; 343 | 344 | if (lb->inited_ == 1) { 345 | return LRC_INIT_TWICE; 346 | } 347 | 348 | bzero(lb, sizeof(*lb)); 349 | 350 | lb->n_data = lrc->k; 351 | lb->n_code = lrc->m; 352 | lb->n = lb->n_data + lb->n_code; 353 | 354 | lb->chunk_size = chunk_size; 355 | lb->aligned_chunk_size = lrc_align_16(chunk_size); 356 | 357 | ret = posix_memalign((void **)&lb->buf, 16, 358 | lb->aligned_chunk_size * lb->n); 359 | if (ret != 0) { 360 | goto exit; 361 | } 362 | 363 | for (int i = 0; i < lb->n; i++) { 364 | lb->data[i] = lb->buf + lb->aligned_chunk_size * i; 365 | } 366 | 367 | lb->code = &lb->data[lb->n_data]; 368 | 369 | lb->buf_owned = 1; 370 | lb->inited_ = 1; 371 | 372 | exit: 373 | 374 | if (ret != 0) { 375 | free(lb->buf); 376 | } 377 | 378 | return ret; 379 | } 380 | 381 | void lrc_buf_destroy(lrc_buf_t *lb) { 382 | 383 | if (lb == NULL || lb->inited_ == 0) { 384 | return; 385 | } 386 | 387 | if (lb->buf_owned == 1) { 388 | free(lb->buf); 389 | } 390 | 391 | bzero(lb, sizeof(*lb)); 392 | } 393 | 394 | int lrc_buf_shadow(lrc_buf_t *lb, lrc_buf_t *src) { 395 | *lb = *src; 396 | lb->code = &lb->data[lb->n_data]; 397 | lb->buf_owned = 0; 398 | return 0; 399 | } 400 | 401 | /* lrc decoder */ 402 | 403 | int lrc_decoder_init(lrc_decoder_t *dec, lrc_t *lrc, lrc_buf_t *lb, int8_t *erased) { 404 | 405 | /* 406 | * To a certain pattern of data loss, a specific matrix specific is required 407 | * to be created for decoding. 408 | * Matrix rows that do not cover lost data is removed. 409 | * 410 | * Because jerasure uses only first n_of_damaged row of the encoding matrix 411 | * to decode. It is not enough if lrc is used. 412 | * 413 | * For example with a encoding matrix 3*5: 414 | * 1 1 1 0 0 415 | * 0 0 0 1 1 416 | * 1 2 4 8 16 417 | * If data [0], [1], [2] are lost, row[0] contributes nothing to decode. 418 | */ 419 | 420 | int k = lrc->k; 421 | int ret = 0; 422 | 423 | if (dec->inited_ == 1) { 424 | return LRC_INIT_TWICE; 425 | } 426 | 427 | bzero(dec, sizeof(*dec)); 428 | 429 | dec->lrc = lrc; 430 | 431 | ret = lrc_buf_shadow(&dec->buf, lb); 432 | if (ret != 0) { 433 | goto exit; 434 | } 435 | 436 | ret = lrc_get_source(lrc, erased, dec->source); 437 | if (ret != 0) { 438 | goto exit; 439 | } 440 | 441 | /* only copy erased data. erased code will be remapped */ 442 | for (int i = 0; i < lrc->k; i++) { 443 | dec->erased[i] = erased[i]; 444 | } 445 | 446 | dec->decode_matrix = malloc(sizeof(int) * lrc->m * k); 447 | if (dec->decode_matrix == NULL) { 448 | ret = LRC_OUT_OF_MEMORY; 449 | goto exit; 450 | } 451 | 452 | int to = k; 453 | for (int i = lrc->k; i < lrc->n; i++) { 454 | if (dec->source[i] == 1 || erased[i] == 1) { 455 | dd("decoder map: %d -> %d", i, to); 456 | dec->buf.code[to - k] = lb->code[i - k]; 457 | dec->erased[to] = erased[i]; 458 | memcpy(&dec->decode_matrix[(to - k) * k], &lrc->matrix[(i - k) * k], sizeof(lrc->matrix[0]) * k); 459 | to++; 460 | } 461 | } 462 | 463 | dec->buf.n_code = to - k; 464 | dec->buf.n = dec->buf.n_data + dec->buf.n_code; 465 | dec->inited_ = 1; 466 | 467 | dd("\ndecoder inited:"); 468 | lrc_debug_matrix(dec->decode_matrix, to - k, k); 469 | lrc_debug_sources(dec->lrc->n, dec->source); 470 | 471 | exit: 472 | 473 | if (ret != 0) { 474 | free(dec->decode_matrix); 475 | } 476 | 477 | return ret; 478 | } 479 | 480 | void lrc_decoder_destroy(lrc_decoder_t *dec) { 481 | 482 | if (dec == NULL || dec->inited_ == 0) { 483 | return; 484 | } 485 | 486 | free(dec->decode_matrix); 487 | 488 | bzero(dec, sizeof(*dec)); 489 | } 490 | 491 | int lrc_decoder_decode(lrc_decoder_t *dec) { 492 | 493 | lrc_buf_t *lb = &dec->buf; 494 | int erasures[512] = {0}; 495 | int start = 0; 496 | 497 | for (int i = 0; i < dec->lrc->n; i++) { 498 | 499 | if (dec->erased[i] == 1) { 500 | 501 | erasures[start] = i; 502 | dd("erasures: %d", i); 503 | start++; 504 | } 505 | } 506 | erasures[start] = -1; 507 | 508 | return jerasure_matrix_decode(lb->n_data, lb->n_code, 8, dec->decode_matrix, 0, 509 | erasures, lb->data, lb->code, 510 | lb->chunk_size); 511 | } 512 | 513 | // vim:sw=2:fdl=0 514 | -------------------------------------------------------------------------------- /src/neon/gf_w16_neon.c: -------------------------------------------------------------------------------- 1 | /* 2 | * GF-Complete: A Comprehensive Open Source Library for Galois Field Arithmetic 3 | * James S. Plank, Ethan L. Miller, Kevin M. Greenan, 4 | * Benjamin A. Arnold, John A. Burnum, Adam W. Disney, Allen C. McBride. 5 | * 6 | * Copyright (c) 2014: Janne Grunau 7 | * 8 | * Redistribution and use in source and binary forms, with or without 9 | * modification, are permitted provided that the following conditions 10 | * are met: 11 | * 12 | * - Redistributions of source code must retain the above copyright 13 | * notice, this list of conditions and the following disclaimer. 14 | * 15 | * - Redistributions in binary form must reproduce the above copyright 16 | * notice, this list of conditions and the following disclaimer in 17 | * the documentation and/or other materials provided with the 18 | * distribution. 19 | * 20 | * - Neither the name of the University of Tennessee nor the names of its 21 | * contributors may be used to endorse or promote products derived 22 | * from this software without specific prior written permission. 23 | * 24 | * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 25 | * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 26 | * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 27 | * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 28 | * HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, 29 | * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, 30 | * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS 31 | * OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED 32 | * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 33 | * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY 34 | * WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 35 | * POSSIBILITY OF SUCH DAMAGE. 36 | * 37 | * 38 | * gf_w16_neon.c 39 | * 40 | * Neon routines for 16-bit Galois fields 41 | * 42 | */ 43 | 44 | #include "gf_int.h" 45 | #include 46 | #include 47 | #include "gf_w16.h" 48 | 49 | #ifdef ARCH_AARCH64 50 | static 51 | inline 52 | void 53 | neon_w16_split_4_multiply_region(gf_t *gf, uint16_t *src, uint16_t *dst, 54 | uint16_t *d_end, uint8_t *tbl, 55 | gf_val_32_t val, int xor) 56 | { 57 | unsigned i; 58 | uint8_t *high = tbl + 4 * 16; 59 | uint16x8_t va0, va1, r0, r1; 60 | uint8x16_t loset, rl, rh; 61 | uint8x16x2_t va; 62 | 63 | uint8x16_t tbl_h[4], tbl_l[4]; 64 | for (i = 0; i < 4; i++) { 65 | tbl_l[i] = vld1q_u8(tbl + i*16); 66 | tbl_h[i] = vld1q_u8(high + i*16); 67 | } 68 | 69 | loset = vdupq_n_u8(0xf); 70 | 71 | while (dst < d_end) { 72 | va0 = vld1q_u16(src); 73 | va1 = vld1q_u16(src + 8); 74 | 75 | va = vtrnq_u8(vreinterpretq_u8_u16(va0), vreinterpretq_u8_u16(va1)); 76 | 77 | rl = vqtbl1q_u8(tbl_l[0], vandq_u8(va.val[0], loset)); 78 | rh = vqtbl1q_u8(tbl_h[0], vandq_u8(va.val[0], loset)); 79 | rl = veorq_u8(rl, vqtbl1q_u8(tbl_l[2], vandq_u8(va.val[1], loset))); 80 | rh = veorq_u8(rh, vqtbl1q_u8(tbl_h[2], vandq_u8(va.val[1], loset))); 81 | 82 | va.val[0] = vshrq_n_u8(va.val[0], 4); 83 | va.val[1] = vshrq_n_u8(va.val[1], 4); 84 | 85 | rl = veorq_u8(rl, vqtbl1q_u8(tbl_l[1], va.val[0])); 86 | rh = veorq_u8(rh, vqtbl1q_u8(tbl_h[1], va.val[0])); 87 | rl = veorq_u8(rl, vqtbl1q_u8(tbl_l[3], va.val[1])); 88 | rh = veorq_u8(rh, vqtbl1q_u8(tbl_h[3], va.val[1])); 89 | 90 | va = vtrnq_u8(rl, rh); 91 | r0 = vreinterpretq_u16_u8(va.val[0]); 92 | r1 = vreinterpretq_u16_u8(va.val[1]); 93 | 94 | if (xor) { 95 | va0 = vld1q_u16(dst); 96 | va1 = vld1q_u16(dst + 8); 97 | r0 = veorq_u16(r0, va0); 98 | r1 = veorq_u16(r1, va1); 99 | } 100 | vst1q_u16(dst, r0); 101 | vst1q_u16(dst + 8, r1); 102 | 103 | src += 16; 104 | dst += 16; 105 | } 106 | } 107 | 108 | static 109 | inline 110 | void 111 | neon_w16_split_4_altmap_multiply_region(gf_t *gf, uint8_t *src, 112 | uint8_t *dst, uint8_t *d_end, 113 | uint8_t *tbl, gf_val_32_t val, 114 | int xor) 115 | { 116 | unsigned i; 117 | uint8_t *high = tbl + 4 * 16; 118 | uint8x16_t vh, vl, rh, rl; 119 | uint8x16_t loset; 120 | 121 | uint8x16_t tbl_h[4], tbl_l[4]; 122 | for (i = 0; i < 4; i++) { 123 | tbl_l[i] = vld1q_u8(tbl + i*16); 124 | tbl_h[i] = vld1q_u8(high + i*16); 125 | } 126 | 127 | loset = vdupq_n_u8(0xf); 128 | 129 | while (dst < d_end) { 130 | vh = vld1q_u8(src); 131 | vl = vld1q_u8(src + 16); 132 | 133 | rl = vqtbl1q_u8(tbl_l[0], vandq_u8(vl, loset)); 134 | rh = vqtbl1q_u8(tbl_h[0], vandq_u8(vl, loset)); 135 | rl = veorq_u8(rl, vqtbl1q_u8(tbl_l[2], vandq_u8(vh, loset))); 136 | rh = veorq_u8(rh, vqtbl1q_u8(tbl_h[2], vandq_u8(vh, loset))); 137 | 138 | vl = vshrq_n_u8(vl, 4); 139 | vh = vshrq_n_u8(vh, 4); 140 | 141 | rl = veorq_u8(rl, vqtbl1q_u8(tbl_l[1], vl)); 142 | rh = veorq_u8(rh, vqtbl1q_u8(tbl_h[1], vl)); 143 | rl = veorq_u8(rl, vqtbl1q_u8(tbl_l[3], vh)); 144 | rh = veorq_u8(rh, vqtbl1q_u8(tbl_h[3], vh)); 145 | 146 | if (xor) { 147 | vh = vld1q_u8(dst); 148 | vl = vld1q_u8(dst + 16); 149 | rh = veorq_u8(rh, vh); 150 | rl = veorq_u8(rl, vl); 151 | } 152 | vst1q_u8(dst, rh); 153 | vst1q_u8(dst + 16, rl); 154 | 155 | src += 32; 156 | dst += 32; 157 | } 158 | } 159 | 160 | #else /* ARCH_AARCH64 */ 161 | 162 | static 163 | inline 164 | void 165 | neon_w16_split_4_multiply_region(gf_t *gf, uint16_t *src, uint16_t *dst, 166 | uint16_t *d_end, uint8_t *tbl, 167 | gf_val_32_t val, int xor) 168 | { 169 | unsigned i; 170 | uint8_t *high = tbl + 4 * 16; 171 | uint16x8_t va, r; 172 | uint8x8_t loset, vb, vc, rl, rh; 173 | 174 | uint8x8x2_t tbl_h[4], tbl_l[4]; 175 | for (i = 0; i < 4; i++) { 176 | tbl_l[i].val[0] = vld1_u8(tbl + i*16); 177 | tbl_l[i].val[1] = vld1_u8(tbl + i*16 + 8); 178 | tbl_h[i].val[0] = vld1_u8(high + i*16); 179 | tbl_h[i].val[1] = vld1_u8(high + i*16 + 8); 180 | } 181 | 182 | loset = vdup_n_u8(0xf); 183 | 184 | while (dst < d_end) { 185 | va = vld1q_u16(src); 186 | 187 | vb = vmovn_u16(va); 188 | vc = vshrn_n_u16(va, 8); 189 | 190 | rl = vtbl2_u8(tbl_l[0], vand_u8(vb, loset)); 191 | rh = vtbl2_u8(tbl_h[0], vand_u8(vb, loset)); 192 | vb = vshr_n_u8(vb, 4); 193 | rl = veor_u8(rl, vtbl2_u8(tbl_l[2], vand_u8(vc, loset))); 194 | rh = veor_u8(rh, vtbl2_u8(tbl_h[2], vand_u8(vc, loset))); 195 | vc = vshr_n_u8(vc, 4); 196 | rl = veor_u8(rl, vtbl2_u8(tbl_l[1], vb)); 197 | rh = veor_u8(rh, vtbl2_u8(tbl_h[1], vb)); 198 | rl = veor_u8(rl, vtbl2_u8(tbl_l[3], vc)); 199 | rh = veor_u8(rh, vtbl2_u8(tbl_h[3], vc)); 200 | 201 | r = vmovl_u8(rl); 202 | r = vorrq_u16(r, vshll_n_u8(rh, 8)); 203 | 204 | if (xor) { 205 | va = vld1q_u16(dst); 206 | r = veorq_u16(r, va); 207 | } 208 | vst1q_u16(dst, r); 209 | 210 | src += 8; 211 | dst += 8; 212 | } 213 | } 214 | 215 | static 216 | inline 217 | void 218 | neon_w16_split_4_altmap_multiply_region(gf_t *gf, uint8_t *src, 219 | uint8_t *dst, uint8_t *d_end, 220 | uint8_t *tbl, gf_val_32_t val, 221 | int xor) 222 | { 223 | unsigned i; 224 | uint8_t *high = tbl + 4 * 16; 225 | uint8x8_t vh0, vh1, vl0, vl1, r0, r1, r2, r3; 226 | uint8x8_t loset; 227 | 228 | uint8x8x2_t tbl_h[4], tbl_l[4]; 229 | for (i = 0; i < 4; i++) { 230 | tbl_l[i].val[0] = vld1_u8(tbl + i*16); 231 | tbl_l[i].val[1] = vld1_u8(tbl + i*16 + 8); 232 | tbl_h[i].val[0] = vld1_u8(high + i*16); 233 | tbl_h[i].val[1] = vld1_u8(high + i*16 + 8); 234 | } 235 | 236 | loset = vdup_n_u8(0xf); 237 | 238 | while (dst < d_end) { 239 | vh0 = vld1_u8(src); 240 | vh1 = vld1_u8(src + 8); 241 | vl0 = vld1_u8(src + 16); 242 | vl1 = vld1_u8(src + 24); 243 | 244 | r0 = vtbl2_u8(tbl_l[0], vand_u8(vh0, loset)); 245 | r1 = vtbl2_u8(tbl_h[0], vand_u8(vh1, loset)); 246 | r2 = vtbl2_u8(tbl_l[2], vand_u8(vl0, loset)); 247 | r3 = vtbl2_u8(tbl_h[2], vand_u8(vl1, loset)); 248 | 249 | vh0 = vshr_n_u8(vh0, 4); 250 | vh1 = vshr_n_u8(vh1, 4); 251 | vl0 = vshr_n_u8(vl0, 4); 252 | vl1 = vshr_n_u8(vl1, 4); 253 | 254 | r0 = veor_u8(r0, vtbl2_u8(tbl_l[1], vh0)); 255 | r1 = veor_u8(r1, vtbl2_u8(tbl_h[1], vh1)); 256 | r2 = veor_u8(r2, vtbl2_u8(tbl_l[3], vl0)); 257 | r3 = veor_u8(r3, vtbl2_u8(tbl_h[3], vl1)); 258 | 259 | if (xor) { 260 | vh0 = vld1_u8(dst); 261 | vh1 = vld1_u8(dst + 8); 262 | vl0 = vld1_u8(dst + 16); 263 | vl1 = vld1_u8(dst + 24); 264 | r0 = veor_u8(r0, vh0); 265 | r1 = veor_u8(r1, vh1); 266 | r2 = veor_u8(r2, vl0); 267 | r3 = veor_u8(r3, vl1); 268 | } 269 | vst1_u8(dst, r0); 270 | vst1_u8(dst + 8, r1); 271 | vst1_u8(dst + 16, r2); 272 | vst1_u8(dst + 24, r3); 273 | 274 | src += 32; 275 | dst += 32; 276 | } 277 | } 278 | #endif /* ARCH_AARCH64 */ 279 | 280 | static 281 | inline 282 | void 283 | neon_w16_split_4_16_lazy_multiply_region(gf_t *gf, void *src, void *dest, 284 | gf_val_32_t val, int bytes, int xor, 285 | int altmap) 286 | { 287 | gf_region_data rd; 288 | unsigned i, j; 289 | uint64_t c, prod; 290 | uint8_t tbl[2 * 4 * 16]; 291 | uint8_t *high = tbl + 4 * 16; 292 | 293 | if (val == 0) { gf_multby_zero(dest, bytes, xor); return; } 294 | if (val == 1) { gf_multby_one(src, dest, bytes, xor); return; } 295 | 296 | for (i = 0; i < 4; i++) { 297 | for (j = 0; j < 16; j++) { 298 | c = (j << (i*4)); 299 | prod = gf->multiply.w32(gf, c, val); 300 | tbl[i*16 + j] = prod & 0xff; 301 | high[i*16 + j] = prod >> 8; 302 | } 303 | } 304 | 305 | gf_set_region_data(&rd, gf, src, dest, bytes, val, xor, 32); 306 | gf_do_initial_region_alignment(&rd); 307 | 308 | if (altmap) { 309 | uint8_t *s8 = rd.s_start; 310 | uint8_t *d8 = rd.d_start; 311 | uint8_t *end8 = rd.d_top; 312 | if (xor) 313 | neon_w16_split_4_altmap_multiply_region(gf, s8, d8, end8, tbl, val, 1); 314 | else 315 | neon_w16_split_4_altmap_multiply_region(gf, s8, d8, end8, tbl, val, 0); 316 | } else { 317 | uint16_t *s16 = rd.s_start; 318 | uint16_t *d16 = rd.d_start; 319 | uint16_t *end16 = rd.d_top; 320 | if (xor) 321 | neon_w16_split_4_multiply_region(gf, s16, d16, end16, tbl, val, 1); 322 | else 323 | neon_w16_split_4_multiply_region(gf, s16, d16, end16, tbl, val, 0); 324 | } 325 | 326 | gf_do_final_region_alignment(&rd); 327 | } 328 | 329 | static 330 | void 331 | gf_w16_split_4_16_lazy_multiply_region_neon(gf_t *gf, void *src, void *dest, 332 | gf_val_32_t val, int bytes, int xor) 333 | { 334 | neon_w16_split_4_16_lazy_multiply_region(gf, src, dest, val, bytes, xor, 0); 335 | } 336 | 337 | static 338 | void 339 | gf_w16_split_4_16_lazy_altmap_multiply_region_neon(gf_t *gf, void *src, 340 | void *dest, 341 | gf_val_32_t val, int bytes, 342 | int xor) 343 | { 344 | neon_w16_split_4_16_lazy_multiply_region(gf, src, dest, val, bytes, xor, 1); 345 | } 346 | 347 | 348 | void gf_w16_neon_split_init(gf_t *gf) 349 | { 350 | gf_internal_t *h = (gf_internal_t *) gf->scratch; 351 | 352 | if (h->region_type & GF_REGION_ALTMAP) 353 | gf->multiply_region.w32 = gf_w16_split_4_16_lazy_altmap_multiply_region_neon; 354 | else 355 | gf->multiply_region.w32 = gf_w16_split_4_16_lazy_multiply_region_neon; 356 | } 357 | -------------------------------------------------------------------------------- /m4/ax_ext.m4: -------------------------------------------------------------------------------- 1 | # 2 | # Updated by KMG to support -DINTEL_SSE for GF-Complete 3 | # 4 | # =========================================================================== 5 | # http://www.gnu.org/software/autoconf-archive/ax_ext.html 6 | # =========================================================================== 7 | # 8 | # SYNOPSIS 9 | # 10 | # AX_EXT 11 | # 12 | # DESCRIPTION 13 | # 14 | # Find supported SIMD extensions by requesting cpuid. When an SIMD 15 | # extension is found, the -m"simdextensionname" is added to SIMD_FLAGS if 16 | # compiler supports it. For example, if "sse2" is available, then "-msse2" 17 | # is added to SIMD_FLAGS. 18 | # 19 | # This macro calls: 20 | # 21 | # AC_SUBST(SIMD_FLAGS) 22 | # 23 | # And defines: 24 | # 25 | # HAVE_MMX / HAVE_SSE / HAVE_SSE2 / HAVE_SSE3 / HAVE_SSSE3 / HAVE_SSE4.1 / HAVE_SSE4.2 / HAVE_AVX 26 | # 27 | # LICENSE 28 | # 29 | # Copyright (c) 2007 Christophe Tournayre 30 | # Copyright (c) 2013 Michael Petch 31 | # 32 | # Copying and distribution of this file, with or without modification, are 33 | # permitted in any medium without royalty provided the copyright notice 34 | # and this notice are preserved. This file is offered as-is, without any 35 | # warranty. 36 | 37 | #serial 12 38 | 39 | AC_DEFUN([AX_EXT], 40 | [ 41 | AC_REQUIRE([AC_CANONICAL_HOST]) 42 | 43 | case $host_cpu in 44 | aarch64*) 45 | AC_DEFINE(HAVE_ARCH_AARCH64,,[targeting AArch64]) 46 | SIMD_FLAGS="$SIMD_FLAGS -DARCH_AARCH64" 47 | 48 | AC_CACHE_CHECK([whether NEON is supported], [ax_cv_have_neon_ext], 49 | [ 50 | # TODO: detect / cross-compile 51 | ax_cv_have_neon_ext=yes 52 | ]) 53 | AC_CACHE_CHECK([whether cryptographic extension is supported], [ax_cv_have_arm_crypt_ext], 54 | [ 55 | # TODO: detect / cross-compile 56 | ax_cv_have_arm_crypt_ext=yes 57 | ]) 58 | 59 | if test "$ax_cv_have_arm_crypt_ext" = yes; then 60 | AC_DEFINE(HAVE_ARM_CRYPT_EXT,,[Support ARM cryptographic extension]) 61 | fi 62 | 63 | if test "$ax_cv_have_neon_ext" = yes; then 64 | AC_DEFINE(HAVE_NEON,,[Support NEON instructions]) 65 | fi 66 | 67 | if test "$ax_cv_have_arm_crypt_ext" = yes && test "$ax_cv_have_neon_ext" = yes; then 68 | AX_CHECK_COMPILE_FLAG(-march=armv8-a+simd+crypto, 69 | SIMD_FLAGS="$SIMD_FLAGS -march=armv8-a+simd+crypto -DARM_CRYPT -DARM_NEON", []) 70 | elif test "$ax_cv_have_arm_crypt_ext" = yes; then 71 | AX_CHECK_COMPILE_FLAG(-march=armv8-a+crypto, 72 | SIMD_FLAGS="$SIMD_FLAGS -march=armv8-a+crypto -DARM_CRYPT", []) 73 | elif test "$ax_cv_have_neon_ext" = yes; then 74 | AX_CHECK_COMPILE_FLAG(-march=armv8-a+simd, 75 | SIMD_FLAGS="$SIMD_FLAGS -march=armv8-a+simd -DARM_NEON", []) 76 | fi 77 | ;; 78 | 79 | arm*) 80 | AC_CACHE_CHECK([whether NEON is supported], [ax_cv_have_neon_ext], 81 | [ 82 | # TODO: detect / cross-compile 83 | ax_cv_have_neon_ext=yes 84 | ]) 85 | 86 | if test "$ax_cv_have_neon_ext" = yes; then 87 | AC_DEFINE(HAVE_NEON,,[Support NEON instructions]) 88 | AX_CHECK_COMPILE_FLAG(-mfpu=neon, 89 | SIMD_FLAGS="$SIMD_FLAGS -mfpu=neon -DARM_NEON", []) 90 | fi 91 | ;; 92 | 93 | powerpc*) 94 | AC_CACHE_CHECK([whether altivec is supported], [ax_cv_have_altivec_ext], 95 | [ 96 | if test `/usr/sbin/sysctl -a 2>/dev/null| grep -c hw.optional.altivec` != 0; then 97 | if test `/usr/sbin/sysctl -n hw.optional.altivec` = 1; then 98 | ax_cv_have_altivec_ext=yes 99 | fi 100 | fi 101 | ]) 102 | 103 | if test "$ax_cv_have_altivec_ext" = yes; then 104 | AC_DEFINE(HAVE_ALTIVEC,,[Support Altivec instructions]) 105 | AX_CHECK_COMPILE_FLAG(-faltivec, SIMD_FLAGS="$SIMD_FLAGS -faltivec", []) 106 | fi 107 | ;; 108 | 109 | 110 | i[[3456]]86*|x86_64*|amd64*) 111 | 112 | AC_REQUIRE([AX_GCC_X86_CPUID]) 113 | AC_REQUIRE([AX_GCC_X86_AVX_XGETBV]) 114 | 115 | AX_GCC_X86_CPUID(0x00000001) 116 | ecx=`echo $ax_cv_gcc_x86_cpuid_0x00000001 | cut -d ":" -f 3` 117 | edx=`echo $ax_cv_gcc_x86_cpuid_0x00000001 | cut -d ":" -f 4` 118 | 119 | AC_CACHE_CHECK([whether mmx is supported], [ax_cv_have_mmx_ext], 120 | [ 121 | ax_cv_have_mmx_ext=no 122 | if test "$((0x$edx>>23&0x01))" = 1; then 123 | ax_cv_have_mmx_ext=yes 124 | fi 125 | ]) 126 | 127 | AC_CACHE_CHECK([whether sse is supported], [ax_cv_have_sse_ext], 128 | [ 129 | ax_cv_have_sse_ext=no 130 | if test "$((0x$edx>>25&0x01))" = 1; then 131 | ax_cv_have_sse_ext=yes 132 | fi 133 | ]) 134 | 135 | AC_CACHE_CHECK([whether sse2 is supported], [ax_cv_have_sse2_ext], 136 | [ 137 | ax_cv_have_sse2_ext=no 138 | if test "$((0x$edx>>26&0x01))" = 1; then 139 | ax_cv_have_sse2_ext=yes 140 | fi 141 | ]) 142 | 143 | AC_CACHE_CHECK([whether sse3 is supported], [ax_cv_have_sse3_ext], 144 | [ 145 | ax_cv_have_sse3_ext=no 146 | if test "$((0x$ecx&0x01))" = 1; then 147 | ax_cv_have_sse3_ext=yes 148 | fi 149 | ]) 150 | 151 | AC_CACHE_CHECK([whether pclmuldq is supported], [ax_cv_have_pclmuldq_ext], 152 | [ 153 | ax_cv_have_pclmuldq_ext=no 154 | if test "$((0x$ecx>>1&0x01))" = 1; then 155 | ax_cv_have_pclmuldq_ext=yes 156 | fi 157 | ]) 158 | 159 | AC_CACHE_CHECK([whether ssse3 is supported], [ax_cv_have_ssse3_ext], 160 | [ 161 | ax_cv_have_ssse3_ext=no 162 | if test "$((0x$ecx>>9&0x01))" = 1; then 163 | ax_cv_have_ssse3_ext=yes 164 | fi 165 | ]) 166 | 167 | AC_CACHE_CHECK([whether sse4.1 is supported], [ax_cv_have_sse41_ext], 168 | [ 169 | ax_cv_have_sse41_ext=no 170 | if test "$((0x$ecx>>19&0x01))" = 1; then 171 | ax_cv_have_sse41_ext=yes 172 | fi 173 | ]) 174 | 175 | AC_CACHE_CHECK([whether sse4.2 is supported], [ax_cv_have_sse42_ext], 176 | [ 177 | ax_cv_have_sse42_ext=no 178 | if test "$((0x$ecx>>20&0x01))" = 1; then 179 | ax_cv_have_sse42_ext=yes 180 | fi 181 | ]) 182 | 183 | AC_CACHE_CHECK([whether avx is supported by processor], [ax_cv_have_avx_cpu_ext], 184 | [ 185 | ax_cv_have_avx_cpu_ext=no 186 | if test "$((0x$ecx>>28&0x01))" = 1; then 187 | ax_cv_have_avx_cpu_ext=yes 188 | fi 189 | ]) 190 | 191 | if test x"$ax_cv_have_avx_cpu_ext" = x"yes"; then 192 | AX_GCC_X86_AVX_XGETBV(0x00000000) 193 | 194 | xgetbv_eax="0" 195 | if test x"$ax_cv_gcc_x86_avx_xgetbv_0x00000000" != x"unknown"; then 196 | xgetbv_eax=`echo $ax_cv_gcc_x86_avx_xgetbv_0x00000000 | cut -d ":" -f 1` 197 | fi 198 | 199 | AC_CACHE_CHECK([whether avx is supported by operating system], [ax_cv_have_avx_ext], 200 | [ 201 | ax_cv_have_avx_ext=no 202 | 203 | if test "$((0x$ecx>>27&0x01))" = 1; then 204 | if test "$((0x$xgetbv_eax&0x6))" = 6; then 205 | ax_cv_have_avx_ext=yes 206 | fi 207 | fi 208 | ]) 209 | if test x"$ax_cv_have_avx_ext" = x"no"; then 210 | AC_MSG_WARN([Your processor supports AVX, but your operating system doesn't]) 211 | fi 212 | fi 213 | 214 | if test "$ax_cv_have_mmx_ext" = yes; then 215 | AX_CHECK_COMPILE_FLAG(-mmmx, ax_cv_support_mmx_ext=yes, []) 216 | if test x"$ax_cv_support_mmx_ext" = x"yes"; then 217 | SIMD_FLAGS="$SIMD_FLAGS -mmmx" 218 | AC_DEFINE(HAVE_MMX,,[Support mmx instructions]) 219 | else 220 | AC_MSG_WARN([Your processor supports mmx instructions but not your compiler, can you try another compiler?]) 221 | fi 222 | fi 223 | 224 | if test "$ax_cv_have_sse_ext" = yes; then 225 | AX_CHECK_COMPILE_FLAG(-msse, ax_cv_support_sse_ext=yes, []) 226 | if test x"$ax_cv_support_sse_ext" = x"yes"; then 227 | SIMD_FLAGS="$SIMD_FLAGS -msse -DINTEL_SSE" 228 | AC_DEFINE(HAVE_SSE,,[Support SSE (Streaming SIMD Extensions) instructions]) 229 | else 230 | AC_MSG_WARN([Your processor supports sse instructions but not your compiler, can you try another compiler?]) 231 | fi 232 | fi 233 | 234 | if test "$ax_cv_have_sse2_ext" = yes; then 235 | AX_CHECK_COMPILE_FLAG(-msse2, ax_cv_support_sse2_ext=yes, []) 236 | if test x"$ax_cv_support_sse2_ext" = x"yes"; then 237 | SIMD_FLAGS="$SIMD_FLAGS -msse2 -DINTEL_SSE2" 238 | AC_DEFINE(HAVE_SSE2,,[Support SSE2 (Streaming SIMD Extensions 2) instructions]) 239 | else 240 | AC_MSG_WARN([Your processor supports sse2 instructions but not your compiler, can you try another compiler?]) 241 | fi 242 | fi 243 | 244 | if test "$ax_cv_have_sse3_ext" = yes; then 245 | AX_CHECK_COMPILE_FLAG(-msse3, ax_cv_support_sse3_ext=yes, []) 246 | if test x"$ax_cv_support_sse3_ext" = x"yes"; then 247 | SIMD_FLAGS="$SIMD_FLAGS -msse3 -DINTEL_SSE3" 248 | AC_DEFINE(HAVE_SSE3,,[Support SSE3 (Streaming SIMD Extensions 3) instructions]) 249 | else 250 | AC_MSG_WARN([Your processor supports sse3 instructions but not your compiler, can you try another compiler?]) 251 | fi 252 | fi 253 | 254 | if test "$ax_cv_have_pclmuldq_ext" = yes; then 255 | AX_CHECK_COMPILE_FLAG(-mpclmul, ax_cv_support_pclmuldq_ext=yes, []) 256 | if test x"$ax_cv_support_pclmuldq_ext" = x"yes"; then 257 | SIMD_FLAGS="$SIMD_FLAGS -mpclmul -DINTEL_SSE4_PCLMUL" 258 | AC_DEFINE(HAVE_PCLMULDQ,,[Support (PCLMULDQ) Carry-Free Muliplication]) 259 | else 260 | AC_MSG_WARN([Your processor supports pclmuldq instructions but not your compiler, can you try another compiler?]) 261 | fi 262 | fi 263 | 264 | if test "$ax_cv_have_ssse3_ext" = yes; then 265 | AX_CHECK_COMPILE_FLAG(-mssse3, ax_cv_support_ssse3_ext=yes, []) 266 | if test x"$ax_cv_support_ssse3_ext" = x"yes"; then 267 | SIMD_FLAGS="$SIMD_FLAGS -mssse3 -DINTEL_SSSE3" 268 | AC_DEFINE(HAVE_SSSE3,,[Support SSSE3 (Supplemental Streaming SIMD Extensions 3) instructions]) 269 | else 270 | AC_MSG_WARN([Your processor supports ssse3 instructions but not your compiler, can you try another compiler?]) 271 | fi 272 | fi 273 | 274 | if test "$ax_cv_have_sse41_ext" = yes; then 275 | AX_CHECK_COMPILE_FLAG(-msse4.1, ax_cv_support_sse41_ext=yes, []) 276 | if test x"$ax_cv_support_sse41_ext" = x"yes"; then 277 | SIMD_FLAGS="$SIMD_FLAGS -msse4.1 -DINTEL_SSE4" 278 | AC_DEFINE(HAVE_SSE4_1,,[Support SSSE4.1 (Streaming SIMD Extensions 4.1) instructions]) 279 | else 280 | AC_MSG_WARN([Your processor supports sse4.1 instructions but not your compiler, can you try another compiler?]) 281 | fi 282 | fi 283 | 284 | if test "$ax_cv_have_sse42_ext" = yes; then 285 | AX_CHECK_COMPILE_FLAG(-msse4.2, ax_cv_support_sse42_ext=yes, []) 286 | if test x"$ax_cv_support_sse42_ext" = x"yes"; then 287 | SIMD_FLAGS="$SIMD_FLAGS -msse4.2 -DINTEL_SSE4" 288 | AC_DEFINE(HAVE_SSE4_2,,[Support SSSE4.2 (Streaming SIMD Extensions 4.2) instructions]) 289 | else 290 | AC_MSG_WARN([Your processor supports sse4.2 instructions but not your compiler, can you try another compiler?]) 291 | fi 292 | fi 293 | 294 | if test "$ax_cv_have_avx_ext" = yes; then 295 | AX_CHECK_COMPILE_FLAG(-mavx, ax_cv_support_avx_ext=yes, []) 296 | if test x"$ax_cv_support_avx_ext" = x"yes"; then 297 | SIMD_FLAGS="$SIMD_FLAGS -mavx" 298 | AC_DEFINE(HAVE_AVX,,[Support AVX (Advanced Vector Extensions) instructions]) 299 | else 300 | AC_MSG_WARN([Your processor supports avx instructions but not your compiler, can you try another compiler?]) 301 | fi 302 | fi 303 | 304 | ;; 305 | esac 306 | 307 | AC_SUBST(SIMD_FLAGS) 308 | ]) 309 | --------------------------------------------------------------------------------