├── Makefile.am
├── test
    ├── Makefile.am
    └── example.c
├── .gitignore
├── include
    ├── gf_method.h
    ├── gf_rand.h
    ├── gf_w64.h
    ├── gf_w16.h
    ├── gf_w32.h
    ├── gf_w4.h
    ├── liberation.h
    ├── cauchy.h
    ├── gf_general.h
    ├── reed_sol.h
    ├── gf_w8.h
    ├── lrc.h
    ├── galois.h
    ├── gf_complete.h
    └── gf_int.h
├── m4
    ├── ltversion.m4
    ├── ax_check_compile_flag.m4
    ├── ax_gcc_x86_cpuid.m4
    ├── ax_gcc_x86_avx_xgetbv.m4
    ├── ltsugar.m4
    ├── lt~obsolete.m4
    └── ax_ext.m4
├── src
    ├── Makefile.am
    ├── gf_rand.c
    ├── gf_method.c
    ├── neon
    │   ├── gf_w4_neon.c
    │   ├── gf_w32_neon.c
    │   ├── gf_w8_neon.c
    │   ├── gf_w64_neon.c
    │   └── gf_w16_neon.c
    ├── liberation.c
    ├── reed_sol.c
    ├── galois.c
    └── lrc.c
├── LICENSE
├── configure.ac
├── missing
├── compile
└── README.md


/Makefile.am:
--------------------------------------------------------------------------------
1 | ACLOCAL_AMFLAGS = -I m4
2 | 
3 | SUBDIRS = src test
4 | include_HEADERS = include/lrc.h
5 | 


--------------------------------------------------------------------------------
/test/Makefile.am:
--------------------------------------------------------------------------------
 1 | # GF-Complete 'test' AM file
 2 | 
 3 | AM_CPPFLAGS = -I$(top_srcdir)/include
 4 | AM_CFLAGS = -O2 $(SIMD_FLAGS) -fPIC
 5 | 
 6 | noinst_PROGRAMS = test_lrc example
 7 | 
 8 | test_lrc_SOURCES = test_lrc.c
 9 | test_lrc_LDADD = ../src/liblrc.la
10 | 
11 | example_SOURCES = example.c
12 | example_LDADD = ../src/liblrc.la
13 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
 1 | # Object files
 2 | *.o
 3 | *.ko
 4 | *.obj
 5 | *.elf
 6 | 
 7 | # Precompiled Headers
 8 | *.gch
 9 | *.pch
10 | 
11 | # Libraries
12 | *.lib
13 | *.a
14 | *.la
15 | *.lo
16 | 
17 | # Shared objects (inc. Windows DLLs)
18 | *.dll
19 | *.so
20 | *.so.*
21 | *.dylib
22 | 
23 | # Executables
24 | *.exe
25 | *.out
26 | *.app
27 | *.i*86
28 | *.x86_64
29 | *.hex
30 | 
31 | # Debug files
32 | *.dSYM/
33 | 


--------------------------------------------------------------------------------
/include/gf_method.h:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * GF-Complete: A Comprehensive Open Source Library for Galois Field Arithmetic
 3 |  * James S. Plank, Ethan L. Miller, Kevin M. Greenan,
 4 |  * Benjamin A. Arnold, John A. Burnum, Adam W. Disney, Allen C. McBride.
 5 |  *
 6 |  * gf_method.h
 7 |  *
 8 |  * Parses argv to figure out the flags and arguments.  Creates the gf.
 9 |  */
10 | 
11 | #pragma once
12 | 
13 | #include "gf_complete.h"
14 | 
15 | /* Parses argv starting at "starting".  
16 |    
17 |    Returns 0 on failure.
18 |    On success, it returns one past the last argument it read in argv. */
19 | 
20 | extern int create_gf_from_argv(gf_t *gf, int w, int argc, char **argv, int starting);
21 | 


--------------------------------------------------------------------------------
/m4/ltversion.m4:
--------------------------------------------------------------------------------
 1 | # ltversion.m4 -- version numbers			-*- Autoconf -*-
 2 | #
 3 | #   Copyright (C) 2004, 2011-2015 Free Software Foundation, Inc.
 4 | #   Written by Scott James Remnant, 2004
 5 | #
 6 | # This file is free software; the Free Software Foundation gives
 7 | # unlimited permission to copy and/or distribute it, with or without
 8 | # modifications, as long as this notice is preserved.
 9 | 
10 | # @configure_input@
11 | 
12 | # serial 4179 ltversion.m4
13 | # This file is part of GNU Libtool
14 | 
15 | m4_define([LT_PACKAGE_VERSION], [2.4.6])
16 | m4_define([LT_PACKAGE_REVISION], [2.4.6])
17 | 
18 | AC_DEFUN([LTVERSION_VERSION],
19 | [macro_version='2.4.6'
20 | macro_revision='2.4.6'
21 | _LT_DECL(, macro_version, 0, [Which release of libtool.m4 was used?])
22 | _LT_DECL(, macro_revision, 0)
23 | ])
24 | 


--------------------------------------------------------------------------------
/include/gf_rand.h:
--------------------------------------------------------------------------------
 1 | /* 
 2 |  * GF-Complete: A Comprehensive Open Source Library for Galois Field Arithmetic
 3 |  * James S. Plank, Ethan L. Miller, Kevin M. Greenan,
 4 |  * Benjamin A. Arnold, John A. Burnum, Adam W. Disney, Allen C. McBride.
 5 |  *
 6 |  * gf_rand.h
 7 |  *
 8 |  * Random number generation, using the "Mother of All" random number generator.  */
 9 | 
10 | #pragma once
11 | #include <stdint.h>
12 | #include <stdio.h>
13 | #include <stdlib.h>
14 | 
15 | /* These are all pretty self-explanatory */
16 | uint32_t MOA_Random_32();
17 | uint64_t MOA_Random_64();
18 | void     MOA_Random_128(uint64_t *x);
19 | uint32_t MOA_Random_W(int w, int zero_ok);
20 | void MOA_Fill_Random_Region (void *reg, int size);   /* reg should be aligned to 4 bytes, but
21 |                                                         size can be anything. */
22 | void     MOA_Seed(uint32_t seed);
23 | 


--------------------------------------------------------------------------------
/src/Makefile.am:
--------------------------------------------------------------------------------
 1 | AM_CPPFLAGS = -I$(top_srcdir)/include
 2 | AM_CFLAGS = -O2 $(SIMD_FLAGS) -fPIC
 3 | 
 4 | ACLOCAL_AMFLAGS = -I m4
 5 | 
 6 | lib_LTLIBRARIES = liblrc.la
 7 | liblrc_la_SOURCES = \
 8 |                   gf.c  \
 9 |                   gf_method.c  \
10 |                   gf_wgen.c  \
11 |                   gf_w4.c  \
12 |                   gf_w8.c  \
13 |                   gf_w16.c  \
14 |                   gf_w32.c  \
15 |                   gf_w64.c  \
16 |                   gf_w128.c  \
17 |                   gf_rand.c  \
18 |                   gf_general.c  \
19 |                   cauchy.c \
20 |                   galois.c \
21 |                   liberation.c \
22 |                   reed_sol.c \
23 |                   jerasure.c \
24 |                   lrc.c
25 | 
26 | if HAVE_NEON
27 | liblrc_la_SOURCES += neon/gf_w4_neon.c  \
28 |                     neon/gf_w8_neon.c  \
29 |                     neon/gf_w16_neon.c \
30 |                     neon/gf_w32_neon.c \
31 |                     neon/gf_w64_neon.c
32 | endif
33 | 
34 | # vim:expandtab
35 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | The MIT License (MIT)
 2 | 
 3 | Copyright (c) 2015 张炎泼
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 
23 | 


--------------------------------------------------------------------------------
/include/gf_w64.h:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * GF-Complete: A Comprehensive Open Source Library for Galois Field Arithmetic
 3 |  * James S. Plank, Ethan L. Miller, Kevin M. Greenan,
 4 |  * Benjamin A. Arnold, John A. Burnum, Adam W. Disney, Allen C. McBride.
 5 |  *
 6 |  * gf_w64.h
 7 |  *
 8 |  * Defines and data structures for 64-bit Galois fields
 9 |  */
10 | 
11 | #ifndef GF_COMPLETE_GF_W64_H
12 | #define GF_COMPLETE_GF_W64_H
13 | 
14 | #include <stdint.h>
15 | 
16 | #define GF_FIELD_WIDTH (64)
17 | #define GF_FIRST_BIT (1ULL << 63)
18 | 
19 | #define GF_BASE_FIELD_WIDTH (32)
20 | #define GF_BASE_FIELD_SIZE       (1ULL << GF_BASE_FIELD_WIDTH)
21 | #define GF_BASE_FIELD_GROUP_SIZE  GF_BASE_FIELD_SIZE-1
22 | 
23 | struct gf_w64_group_data {
24 |     uint64_t *reduce;
25 |     uint64_t *shift;
26 |     uint64_t *memory;
27 | };
28 | 
29 | struct gf_split_4_64_lazy_data {
30 |     uint64_t      tables[16][16];
31 |     uint64_t      last_value;
32 | };
33 | 
34 | struct gf_split_8_64_lazy_data {
35 |     uint64_t      tables[8][(1<<8)];
36 |     uint64_t      last_value;
37 | };
38 | 
39 | struct gf_split_16_64_lazy_data {
40 |     uint64_t      tables[4][(1<<16)];
41 |     uint64_t      last_value;
42 | };
43 | 
44 | struct gf_split_8_8_data {
45 |     uint64_t      tables[15][256][256];
46 | };
47 | 
48 | void gf_w64_neon_split_init(gf_t *gf);
49 | 
50 | #endif /* GF_COMPLETE_GF_W64_H */
51 | 


--------------------------------------------------------------------------------
/test/example.c:
--------------------------------------------------------------------------------
 1 | #include "lrc.h"
 2 | 
 3 | #include <stdio.h>
 4 | #include <stdlib.h>
 5 | #include <string.h>
 6 | 
 7 | /* gcc example.c -llrc */
 8 | 
 9 | int main(int argc, char **argv) {
10 | 
11 |   int k, m, i;
12 |   int        size = 8;
13 |   lrc_t     *lrc  = &(lrc_t) {0};
14 |   lrc_buf_t *buf  = &(lrc_buf_t) {0};
15 | 
16 |   if (lrc_init_n(lrc, 2, (uint8_t[]) {2, 2}, 3) != 0) {
17 |     exit(-1);
18 |   }
19 | 
20 |   if (lrc_buf_init(buf, lrc, size) != 0) {
21 |     exit(-1);
22 |   }
23 | 
24 |   strcpy(buf->data[0], "hello");
25 |   strcpy(buf->data[1], "world");
26 |   strcpy(buf->data[2], "lrc");
27 |   strcpy(buf->data[3], "ec");
28 | 
29 |   if (lrc_encode(lrc, buf) != 0) {
30 |     exit(-1);
31 |   }
32 | 
33 |   for (k = 0; k < lrc->k; k++) {
34 |     printf("data[%d]: ", k);
35 |     for (i = 0; i < size; i++) {
36 |       printf("%02x ", (uint8_t)buf->data[k][i]);
37 |     }
38 |     printf("\n");
39 |   }
40 | 
41 |   for (m = 0; m < lrc->m; m++) {
42 |     printf("code[%d]: ", m);
43 |     for (i = 0; i < size; i++) {
44 |       printf("%02x ", (uint8_t)buf->code[m][i]);
45 |     }
46 |     printf("\n");
47 |   }
48 | 
49 |   int8_t erased[2 + 2 + 3] = {1, 0, 0, 0, 0, 0};
50 | 
51 |   strcpy(buf->data[0], "*");
52 | 
53 |   printf("damaged: %s %s %s %s\n",
54 |          buf->data[0], buf->data[1], buf->data[2], buf->data[3]);
55 | 
56 |   if (lrc_decode(lrc, buf, erased) != 0) {
57 |     exit(-1);
58 |   }
59 | 
60 |   printf("reconstructed: %s %s %s %s\n",
61 |          buf->data[0], buf->data[1], buf->data[2], buf->data[3]);
62 | 
63 |   lrc_destroy(lrc);
64 |   lrc_buf_destroy(buf);
65 | 
66 |   return 0;
67 | }
68 | 
69 | // vim:sw=2:fdl=0
70 | 


--------------------------------------------------------------------------------
/include/gf_w16.h:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * GF-Complete: A Comprehensive Open Source Library for Galois Field Arithmetic
 3 |  * James S. Plank, Ethan L. Miller, Kevin M. Greenan,
 4 |  * Benjamin A. Arnold, John A. Burnum, Adam W. Disney, Allen C. McBride.
 5 |  *
 6 |  * gf_w16.h
 7 |  *
 8 |  * Defines and data structures for 16-bit Galois fields
 9 |  */
10 | 
11 | #ifndef GF_COMPLETE_GF_W16_H
12 | #define GF_COMPLETE_GF_W16_H
13 | 
14 | #include <stdint.h>
15 | 
16 | #define GF_FIELD_WIDTH (16)
17 | #define GF_FIELD_SIZE (1 << GF_FIELD_WIDTH)
18 | #define GF_MULT_GROUP_SIZE GF_FIELD_SIZE-1
19 | 
20 | #define GF_BASE_FIELD_WIDTH (8)
21 | #define GF_BASE_FIELD_SIZE       (1 << GF_BASE_FIELD_WIDTH)
22 | 
23 | struct gf_w16_logtable_data {
24 |     uint16_t      log_tbl[GF_FIELD_SIZE];
25 |     uint16_t      antilog_tbl[GF_FIELD_SIZE * 2];
26 |     uint16_t      inv_tbl[GF_FIELD_SIZE];
27 |     uint16_t      *d_antilog;
28 | };
29 | 
30 | struct gf_w16_zero_logtable_data {
31 |     int           log_tbl[GF_FIELD_SIZE];
32 |     uint16_t      _antilog_tbl[GF_FIELD_SIZE * 4];
33 |     uint16_t      *antilog_tbl;
34 |     uint16_t      inv_tbl[GF_FIELD_SIZE];
35 | };
36 | 
37 | struct gf_w16_lazytable_data {
38 |     uint16_t      log_tbl[GF_FIELD_SIZE];
39 |     uint16_t      antilog_tbl[GF_FIELD_SIZE * 2];
40 |     uint16_t      inv_tbl[GF_FIELD_SIZE];
41 |     uint16_t      *d_antilog;
42 |     uint16_t      lazytable[GF_FIELD_SIZE];
43 | };
44 | 
45 | struct gf_w16_bytwo_data {
46 |     uint64_t prim_poly;
47 |     uint64_t mask1;
48 |     uint64_t mask2;
49 | };
50 | 
51 | struct gf_w16_split_8_8_data {
52 |     uint16_t      tables[3][256][256];
53 | };
54 | 
55 | struct gf_w16_group_4_4_data {
56 |     uint16_t reduce[16];
57 |     uint16_t shift[16];
58 | };
59 | 
60 | struct gf_w16_composite_data {
61 |   uint8_t *mult_table;
62 | };
63 | 
64 | void gf_w16_neon_split_init(gf_t *gf);
65 | 
66 | #endif /* GF_COMPLETE_GF_W16_H */
67 | 


--------------------------------------------------------------------------------
/include/gf_w32.h:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * GF-Complete: A Comprehensive Open Source Library for Galois Field Arithmetic
 3 |  * James S. Plank, Ethan L. Miller, Kevin M. Greenan,
 4 |  * Benjamin A. Arnold, John A. Burnum, Adam W. Disney, Allen C. McBride.
 5 |  *
 6 |  * gf_w32.h
 7 |  *
 8 |  * Defines and data structures for 32-bit Galois fields
 9 |  */
10 | 
11 | #ifndef GF_COMPLETE_GF_W32_H
12 | #define GF_COMPLETE_GF_W32_H
13 | 
14 | #include <stdint.h>
15 | 
16 | #define GF_FIELD_WIDTH (32)
17 | #define GF_FIRST_BIT (1 << 31)
18 | 
19 | #define GF_BASE_FIELD_WIDTH (16)
20 | #define GF_BASE_FIELD_SIZE       (1 << GF_BASE_FIELD_WIDTH)
21 | #define GF_BASE_FIELD_GROUP_SIZE  GF_BASE_FIELD_SIZE-1
22 | #define GF_MULTBY_TWO(p) (((p) & GF_FIRST_BIT) ? (((p) << 1) ^ h->prim_poly) : (p) << 1)
23 | 
24 | struct gf_split_2_32_lazy_data {
25 |     uint32_t      tables[16][4];
26 |     uint32_t      last_value;
27 | };
28 | 
29 | struct gf_w32_split_8_8_data {
30 |     uint32_t      tables[7][256][256];
31 |     uint32_t      region_tables[4][256];
32 |     uint32_t      last_value;
33 | };
34 | 
35 | struct gf_w32_group_data {
36 |     uint32_t *reduce;
37 |     uint32_t *shift;
38 |     int      tshift;
39 |     uint64_t rmask;
40 |     uint32_t *memory;
41 | };
42 | 
43 | struct gf_split_16_32_lazy_data {
44 |     uint32_t      tables[2][(1<<16)];
45 |     uint32_t      last_value;
46 | };
47 | 
48 | struct gf_split_8_32_lazy_data {
49 |     uint32_t      tables[4][256];
50 |     uint32_t      last_value;
51 | };
52 | 
53 | struct gf_split_4_32_lazy_data {
54 |     uint32_t      tables[8][16];
55 |     uint32_t      last_value;
56 | };
57 | 
58 | struct gf_w32_bytwo_data {
59 |     uint64_t prim_poly;
60 |     uint64_t mask1;
61 |     uint64_t mask2;
62 | };
63 | 
64 | struct gf_w32_composite_data {
65 |   uint16_t *log;
66 |   uint16_t *alog;
67 | };
68 | 
69 | void gf_w32_neon_split_init(gf_t *gf);
70 | 
71 | #endif /* GF_COMPLETE_GF_W32_H */
72 | 


--------------------------------------------------------------------------------
/configure.ac:
--------------------------------------------------------------------------------
 1 | # gf-complete autoconf template
 2 | 
 3 | # FIXME - add project url as the last argument
 4 | AC_INIT(lrc-ec, 1.0)
 5 | 
 6 | # Override default CFLAGS
 7 | : ${CFLAGS="-std=gnu99 -Wall -Wpointer-arith -O2 -g"}
 8 | 
 9 | AC_PREREQ([2.61])
10 | 
11 | AM_INIT_AUTOMAKE([no-dependencies foreign parallel-tests subdir-objects])
12 | LT_INIT # libtool
13 | 
14 | AC_CONFIG_MACRO_DIRS([m4])
15 | 
16 | # This prevents './configure; make' from trying to run autotools.
17 | AM_MAINTAINER_MODE([disable])
18 | 
19 | dnl Compiling with per-target flags requires AM_PROG_CC_C_O.
20 | AC_PROG_CC
21 | 
22 | # Check for functions to provide aligned memory
23 | #
24 | AC_CHECK_FUNCS([posix_memalign],
25 |  [found_memalign=yes; break])
26 | 
27 | AS_IF([test "x$found_memalign" != "xyes"], [AC_MSG_WARN([No function for aligned memory allocation found])])
28 | 
29 | AX_EXT()
30 | 
31 | AC_ARG_ENABLE([neon],
32 |               AS_HELP_STRING([--disable-neon], [Build without NEON optimizations]))
33 | 
34 | AS_IF([test "x$enable_neon" != "xno"],
35 |       [noneon_CPPFLAGS=$CPPFLAGS
36 |        CPPFLAGS="$CPPFLAGS $SIMD_FLAGS"
37 |        AC_CHECK_HEADER([arm_neon.h],
38 |                        [have_neon=yes],
39 |                        [have_neon=no
40 |                         CPPFLAGS=$noneon_CPPFLAGS])],
41 |       [have_neon=no
42 |        AS_IF([test "x$ax_cv_have_neon_ext" = "xyes"],
43 |              [SIMD_FLAGS=""])
44 |       ])
45 | 
46 | AS_IF([test "x$have_neon" = "xno"],
47 |       [AS_IF([test "x$enable_neon" = "xyes"],
48 |              [AC_MSG_ERROR([neon requested but arm_neon.h not found])])
49 |       ])
50 | AM_CONDITIONAL([HAVE_NEON], [test "x$have_neon" = "xyes"])
51 | 
52 | AC_ARG_ENABLE([sse],
53 |               AS_HELP_STRING([--disable-sse], [Build without SSE optimizations]),
54 |               [if   test "x$enableval" = "xno" ; then
55 |                 SIMD_FLAGS=""
56 |                 echo "DISABLED SSE!!!"
57 |               fi]
58 | )
59 | 
60 | AC_CONFIG_FILES([Makefile
61 |                  src/Makefile
62 |                  test/Makefile
63 |                  ])
64 | AC_OUTPUT
65 | 


--------------------------------------------------------------------------------
/include/gf_w4.h:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * GF-Complete: A Comprehensive Open Source Library for Galois Field Arithmetic
 3 |  * James S. Plank, Ethan L. Miller, Kevin M. Greenan,
 4 |  * Benjamin A. Arnold, John A. Burnum, Adam W. Disney, Allen C. McBride.
 5 |  *
 6 |  * gf_w4.h
 7 |  *
 8 |  * Defines and data structures for 4-bit Galois fields
 9 |  */
10 | 
11 | #ifndef GF_COMPLETE_GF_W4_H
12 | #define GF_COMPLETE_GF_W4_H
13 | 
14 | #include <stdint.h>
15 | 
16 | #define GF_FIELD_WIDTH      4
17 | #define GF_DOUBLE_WIDTH     (GF_FIELD_WIDTH*2)
18 | #define GF_FIELD_SIZE       (1 << GF_FIELD_WIDTH)
19 | #define GF_MULT_GROUP_SIZE       (GF_FIELD_SIZE-1)
20 | 
21 | /* ------------------------------------------------------------
22 |    JSP: Each implementation has its own data, which is allocated
23 |    at one time as part of the handle. For that reason, it
24 |    shouldn't be hierarchical -- i.e. one should be able to
25 |    allocate it with one call to malloc. */
26 | 
27 | struct gf_logtable_data {
28 |     uint8_t      log_tbl[GF_FIELD_SIZE];
29 |     uint8_t      antilog_tbl[GF_FIELD_SIZE * 2];
30 |     uint8_t      *antilog_tbl_div;
31 | };
32 | 
33 | struct gf_single_table_data {
34 |     uint8_t      mult[GF_FIELD_SIZE][GF_FIELD_SIZE];
35 |     uint8_t      div[GF_FIELD_SIZE][GF_FIELD_SIZE];
36 | };
37 | 
38 | struct gf_double_table_data {
39 |     uint8_t      div[GF_FIELD_SIZE][GF_FIELD_SIZE];
40 |     uint8_t      mult[GF_FIELD_SIZE][GF_FIELD_SIZE*GF_FIELD_SIZE];
41 | };
42 | struct gf_quad_table_data {
43 |     uint8_t      div[GF_FIELD_SIZE][GF_FIELD_SIZE];
44 |     uint16_t     mult[GF_FIELD_SIZE][(1<<16)];
45 | };
46 | 
47 | struct gf_quad_table_lazy_data {
48 |     uint8_t      div[GF_FIELD_SIZE][GF_FIELD_SIZE];
49 |     uint8_t      smult[GF_FIELD_SIZE][GF_FIELD_SIZE];
50 |     uint16_t     mult[(1 << 16)];
51 | };
52 | 
53 | struct gf_bytwo_data {
54 |     uint64_t prim_poly;
55 |     uint64_t mask1;
56 |     uint64_t mask2;
57 | };
58 | 
59 | // ARM NEON init functions
60 | int gf_w4_neon_cfm_init(gf_t *gf);
61 | void gf_w4_neon_single_table_init(gf_t *gf);
62 | 
63 | #endif /* GF_COMPLETE_GF_W4_H */
64 | 


--------------------------------------------------------------------------------
/src/gf_rand.c:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * GF-Complete: A Comprehensive Open Source Library for Galois Field Arithmetic
 3 |  * James S. Plank, Ethan L. Miller, Kevin M. Greenan,
 4 |  * Benjamin A. Arnold, John A. Burnum, Adam W. Disney, Allen C. McBride.
 5 |  *
 6 |  * gf_rand.c -- Random number generator.
 7 |  */
 8 | 
 9 | #include <stdio.h>
10 | #include <stdlib.h>
11 | #include <stdint.h>
12 | #include "gf_rand.h"
13 | 
14 | /* Lifted the "Mother of All" random number generator from http://www.agner.org/random/ */
15 | 
16 | static uint32_t MOA_X[5];
17 | 
18 | uint32_t MOA_Random_32() {
19 |   uint64_t sum;
20 |   sum = (uint64_t)2111111111UL * (uint64_t)MOA_X[3] +
21 |      (uint64_t)1492 * (uint64_t)(MOA_X[2]) +
22 |      (uint64_t)1776 * (uint64_t)(MOA_X[1]) +
23 |      (uint64_t)5115 * (uint64_t)(MOA_X[0]) +
24 |      (uint64_t)MOA_X[4];
25 |   MOA_X[3] = MOA_X[2];  MOA_X[2] = MOA_X[1];  MOA_X[1] = MOA_X[0];
26 |   MOA_X[4] = (uint32_t)(sum >> 32);
27 |   MOA_X[0] = (uint32_t)sum;
28 |   return MOA_X[0];
29 | }
30 | 
31 | uint64_t MOA_Random_64() {
32 |   uint64_t sum;
33 | 
34 |   sum = MOA_Random_32();
35 |   sum <<= 32;
36 |   sum |= MOA_Random_32();
37 |   return sum;
38 | }
39 | 
40 | void MOA_Random_128(uint64_t *x) {
41 |   x[0] = MOA_Random_64();
42 |   x[1] = MOA_Random_64();
43 |   return;
44 | }
45 | 
46 | uint32_t MOA_Random_W(int w, int zero_ok)
47 | {
48 |   uint32_t b;
49 | 
50 |   do {
51 |     b = MOA_Random_32();
52 |     if (w == 31) b &= 0x7fffffff;
53 |     if (w < 31)  b %= (1 << w);
54 |   } while (!zero_ok && b == 0);
55 |   return b;
56 | }
57 | 
58 | void MOA_Seed(uint32_t seed) {
59 |   int i;
60 |   uint32_t s = seed;
61 |   for (i = 0; i < 5; i++) {
62 |     s = s * 29943829 - 1;
63 |     MOA_X[i] = s;
64 |   }
65 |   for (i=0; i<19; i++) MOA_Random_32();
66 | }
67 | 
68 | 
69 | void MOA_Fill_Random_Region (void *reg, int size)
70 | {
71 |   uint32_t *r32;
72 |   uint8_t *r8;
73 |   int i;
74 | 
75 |   r32 = (uint32_t *) reg;
76 |   r8 = (uint8_t *) reg;
77 |   for (i = 0; i < size/4; i++) r32[i] = MOA_Random_32();
78 |   for (i *= 4; i < size; i++) r8[i] = MOA_Random_W(8, 1);
79 | }
80 | 
81 | 


--------------------------------------------------------------------------------
/include/liberation.h:
--------------------------------------------------------------------------------
 1 | /* *
 2 |  * Copyright (c) 2013, James S. Plank and Kevin Greenan
 3 |  * All rights reserved.
 4 |  *
 5 |  * Jerasure - A C/C++ Library for a Variety of Reed-Solomon and RAID-6 Erasure
 6 |  * Coding Techniques
 7 |  *
 8 |  * Revision 2.0: Galois Field backend now links to GF-Complete
 9 |  *
10 |  * Redistribution and use in source and binary forms, with or without
11 |  * modification, are permitted provided that the following conditions
12 |  * are met:
13 |  *
14 |  *  - Redistributions of source code must retain the above copyright
15 |  *    notice, this list of conditions and the following disclaimer.
16 |  *
17 |  *  - Redistributions in binary form must reproduce the above copyright
18 |  *    notice, this list of conditions and the following disclaimer in
19 |  *    the documentation and/or other materials provided with the
20 |  *    distribution.
21 |  *
22 |  *  - Neither the name of the University of Tennessee nor the names of its
23 |  *    contributors may be used to endorse or promote products derived
24 |  *    from this software without specific prior written permission.
25 |  *
26 |  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
27 |  * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
28 |  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
29 |  * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
30 |  * HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
31 |  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
32 |  * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS
33 |  * OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
34 |  * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
35 |  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY
36 |  * WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
37 |  * POSSIBILITY OF SUCH DAMAGE.
38 |  */
39 | 
40 | 
41 | #ifndef _LIBERATION
42 | 
43 | extern int *liberation_coding_bitmatrix(int k, int w);
44 | extern int *liber8tion_coding_bitmatrix(int k);
45 | extern int *blaum_roth_coding_bitmatrix(int k, int w);
46 | 
47 | #endif
48 | 


--------------------------------------------------------------------------------
/include/cauchy.h:
--------------------------------------------------------------------------------
 1 | /* *
 2 |  * Copyright (c) 2013, James S. Plank and Kevin Greenan
 3 |  * All rights reserved.
 4 |  *
 5 |  * Jerasure - A C/C++ Library for a Variety of Reed-Solomon and RAID-6 Erasure
 6 |  * Coding Techniques
 7 |  *
 8 |  * Revision 2.0: Galois Field backend now links to GF-Complete
 9 |  *
10 |  * Redistribution and use in source and binary forms, with or without
11 |  * modification, are permitted provided that the following conditions
12 |  * are met:
13 |  *
14 |  *  - Redistributions of source code must retain the above copyright
15 |  *    notice, this list of conditions and the following disclaimer.
16 |  *
17 |  *  - Redistributions in binary form must reproduce the above copyright
18 |  *    notice, this list of conditions and the following disclaimer in
19 |  *    the documentation and/or other materials provided with the
20 |  *    distribution.
21 |  *
22 |  *  - Neither the name of the University of Tennessee nor the names of its
23 |  *    contributors may be used to endorse or promote products derived
24 |  *    from this software without specific prior written permission.
25 |  *
26 |  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
27 |  * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
28 |  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
29 |  * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
30 |  * HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
31 |  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
32 |  * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS
33 |  * OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
34 |  * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
35 |  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY
36 |  * WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
37 |  * POSSIBILITY OF SUCH DAMAGE.
38 |  */
39 | 
40 | 
41 | extern int *cauchy_original_coding_matrix(int k, int m, int w);
42 | extern int *cauchy_xy_coding_matrix(int k, int m, int w, int *x, int *y);
43 | extern void cauchy_improve_coding_matrix(int k, int m, int w, int *matrix);
44 | extern int *cauchy_good_general_coding_matrix(int k, int m, int w);
45 | extern int cauchy_n_ones(int n, int w);
46 | 


--------------------------------------------------------------------------------
/include/gf_general.h:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * GF-Complete: A Comprehensive Open Source Library for Galois Field Arithmetic
 3 |  * James S. Plank, Ethan L. Miller, Kevin M. Greenan,
 4 |  * Benjamin A. Arnold, John A. Burnum, Adam W. Disney, Allen C. McBride.
 5 |  *
 6 |  * gf_general.h
 7 |  *
 8 |  * This file has helper routines for doing basic GF operations with any
 9 |  * legal value of w.  The problem is that w <= 32, w=64 and w=128 all have
10 |  * different data types, which is a pain.  The procedures in this file try
11 |  * to alleviate that pain.  They are used in gf_unit and gf_time.
12 |  */
13 | 
14 | #pragma once
15 | 
16 | #include <stdio.h>
17 | #include <getopt.h>
18 | #include <stdint.h>
19 | #include <string.h>
20 | #include <stdlib.h>
21 | #include <time.h>
22 | 
23 | #include "gf_complete.h"
24 | 
25 | typedef union {
26 |   uint32_t w32;
27 |   uint64_t w64;
28 |   uint64_t w128[2];
29 | } gf_general_t;
30 | 
31 | void gf_general_set_zero(gf_general_t *v, int w);
32 | void gf_general_set_one(gf_general_t *v, int w);
33 | void gf_general_set_two(gf_general_t *v, int w);
34 | 
35 | int gf_general_is_zero(gf_general_t *v, int w);
36 | int gf_general_is_one(gf_general_t *v, int w);
37 | int gf_general_are_equal(gf_general_t *v1, gf_general_t *v2, int w);
38 | 
39 | void gf_general_val_to_s(gf_general_t *v, int w, char *s, int hex);
40 | int  gf_general_s_to_val(gf_general_t *v, int w, char *s, int hex);
41 | 
42 | void gf_general_set_random(gf_general_t *v, int w, int zero_ok);
43 | 
44 | void gf_general_add(gf_t *gf, gf_general_t *a, gf_general_t *b, gf_general_t *c);
45 | void gf_general_multiply(gf_t *gf, gf_general_t *a, gf_general_t *b, gf_general_t *c);
46 | void gf_general_divide(gf_t *gf, gf_general_t *a, gf_general_t *b, gf_general_t *c);
47 | void gf_general_inverse(gf_t *gf, gf_general_t *a, gf_general_t *b);
48 | 
49 | void gf_general_do_region_multiply(gf_t *gf, gf_general_t *a, 
50 |                                    void *ra, void *rb, 
51 |                                    int bytes, int xor);
52 | 
53 | void gf_general_do_region_check(gf_t *gf, gf_general_t *a, 
54 |                                 void *orig_a, void *orig_target, void *final_target, 
55 |                                 int bytes, int xor);
56 | 
57 | 
58 | /* Which is M, D or I for multiply, divide or inverse. */
59 | 
60 | void gf_general_set_up_single_timing_test(int w, void *ra, void *rb, int size);
61 | int  gf_general_do_single_timing_test(gf_t *gf, void *ra, void *rb, int size, char which);
62 | 


--------------------------------------------------------------------------------
/include/reed_sol.h:
--------------------------------------------------------------------------------
 1 | /* *
 2 |  * Copyright (c) 2013, James S. Plank and Kevin Greenan
 3 |  * All rights reserved.
 4 |  *
 5 |  * Jerasure - A C/C++ Library for a Variety of Reed-Solomon and RAID-6 Erasure
 6 |  * Coding Techniques
 7 |  *
 8 |  * Revision 2.0: Galois Field backend now links to GF-Complete
 9 |  *
10 |  * Redistribution and use in source and binary forms, with or without
11 |  * modification, are permitted provided that the following conditions
12 |  * are met:
13 |  *
14 |  *  - Redistributions of source code must retain the above copyright
15 |  *    notice, this list of conditions and the following disclaimer.
16 |  *
17 |  *  - Redistributions in binary form must reproduce the above copyright
18 |  *    notice, this list of conditions and the following disclaimer in
19 |  *    the documentation and/or other materials provided with the
20 |  *    distribution.
21 |  *
22 |  *  - Neither the name of the University of Tennessee nor the names of its
23 |  *    contributors may be used to endorse or promote products derived
24 |  *    from this software without specific prior written permission.
25 |  *
26 |  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
27 |  * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
28 |  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
29 |  * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
30 |  * HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
31 |  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
32 |  * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS
33 |  * OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
34 |  * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
35 |  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY
36 |  * WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
37 |  * POSSIBILITY OF SUCH DAMAGE.
38 |  */
39 | 
40 | 
41 | extern int *reed_sol_vandermonde_coding_matrix(int k, int m, int w);
42 | extern int *reed_sol_extended_vandermonde_matrix(int rows, int cols, int w);
43 | extern int *reed_sol_big_vandermonde_distribution_matrix(int rows, int cols, int w);
44 | 
45 | extern int reed_sol_r6_encode(int k, int w, char **data_ptrs, char **coding_ptrs, int size);
46 | extern int *reed_sol_r6_coding_matrix(int k, int w);
47 | 
48 | extern void reed_sol_galois_w08_region_multby_2(char *region, int nbytes);
49 | extern void reed_sol_galois_w16_region_multby_2(char *region, int nbytes);
50 | extern void reed_sol_galois_w32_region_multby_2(char *region, int nbytes);
51 | 


--------------------------------------------------------------------------------
/include/gf_w8.h:
--------------------------------------------------------------------------------
  1 | /*
  2 |  * GF-Complete: A Comprehensive Open Source Library for Galois Field Arithmetic
  3 |  * James S. Plank, Ethan L. Miller, Kevin M. Greenan,
  4 |  * Benjamin A. Arnold, John A. Burnum, Adam W. Disney, Allen C. McBride.
  5 |  *
  6 |  * gf_w8.c
  7 |  *
  8 |  * Defines and data stuctures for 8-bit Galois fields
  9 |  */
 10 | 
 11 | #ifndef GF_COMPLETE_GF_W8_H
 12 | #define GF_COMPLETE_GF_W8_H
 13 | 
 14 | #include "gf_int.h"
 15 | #include <stdint.h>
 16 | 
 17 | #define GF_FIELD_WIDTH (8)
 18 | #define GF_FIELD_SIZE       (1 << GF_FIELD_WIDTH)
 19 | #define GF_HALF_SIZE       (1 << (GF_FIELD_WIDTH/2))
 20 | #define GF_MULT_GROUP_SIZE       GF_FIELD_SIZE-1
 21 | 
 22 | #define GF_BASE_FIELD_WIDTH (4)
 23 | #define GF_BASE_FIELD_SIZE       (1 << GF_BASE_FIELD_WIDTH)
 24 | 
 25 | struct gf_w8_logtable_data {
 26 |     uint8_t         log_tbl[GF_FIELD_SIZE];
 27 |     uint8_t         antilog_tbl[GF_FIELD_SIZE * 2];
 28 |     uint8_t         inv_tbl[GF_FIELD_SIZE];
 29 | };
 30 | 
 31 | struct gf_w8_logzero_table_data {
 32 |     short           log_tbl[GF_FIELD_SIZE];  /* Make this signed, so that we can divide easily */
 33 |     uint8_t         antilog_tbl[512+512+1];
 34 |     uint8_t         *div_tbl;
 35 |     uint8_t         *inv_tbl;
 36 | };
 37 | 
 38 | struct gf_w8_logzero_small_table_data {
 39 |     short           log_tbl[GF_FIELD_SIZE];  /* Make this signed, so that we can divide easily */
 40 |     uint8_t         antilog_tbl[255*3];
 41 |     uint8_t         inv_tbl[GF_FIELD_SIZE];
 42 |     uint8_t         *div_tbl;
 43 | };
 44 | 
 45 | struct gf_w8_composite_data {
 46 |   uint8_t *mult_table;
 47 | };
 48 | 
 49 | /* Don't change the order of these relative to gf_w8_half_table_data */
 50 | 
 51 | struct gf_w8_default_data {
 52 |   uint8_t     high[GF_FIELD_SIZE][GF_HALF_SIZE];
 53 |   uint8_t     low[GF_FIELD_SIZE][GF_HALF_SIZE];
 54 |   uint8_t     divtable[GF_FIELD_SIZE][GF_FIELD_SIZE];
 55 |   uint8_t     multtable[GF_FIELD_SIZE][GF_FIELD_SIZE];
 56 | };
 57 | 
 58 | struct gf_w8_half_table_data {
 59 |   uint8_t     high[GF_FIELD_SIZE][GF_HALF_SIZE];
 60 |   uint8_t     low[GF_FIELD_SIZE][GF_HALF_SIZE];
 61 | };
 62 | 
 63 | struct gf_w8_single_table_data {
 64 |   uint8_t     divtable[GF_FIELD_SIZE][GF_FIELD_SIZE];
 65 |   uint8_t     multtable[GF_FIELD_SIZE][GF_FIELD_SIZE];
 66 | };
 67 | 
 68 | struct gf_w8_double_table_data {
 69 |     uint8_t         div[GF_FIELD_SIZE][GF_FIELD_SIZE];
 70 |     uint16_t        mult[GF_FIELD_SIZE][GF_FIELD_SIZE*GF_FIELD_SIZE];
 71 | };
 72 | 
 73 | struct gf_w8_double_table_lazy_data {
 74 |     uint8_t         div[GF_FIELD_SIZE][GF_FIELD_SIZE];
 75 |     uint8_t         smult[GF_FIELD_SIZE][GF_FIELD_SIZE];
 76 |     uint16_t        mult[GF_FIELD_SIZE*GF_FIELD_SIZE];
 77 | };
 78 | 
 79 | struct gf_w4_logtable_data {
 80 |     uint8_t         log_tbl[GF_BASE_FIELD_SIZE];
 81 |     uint8_t         antilog_tbl[GF_BASE_FIELD_SIZE * 2];
 82 |     uint8_t         *antilog_tbl_div;
 83 | };
 84 | 
 85 | struct gf_w4_single_table_data {
 86 |     uint8_t         div[GF_BASE_FIELD_SIZE][GF_BASE_FIELD_SIZE];
 87 |     uint8_t         mult[GF_BASE_FIELD_SIZE][GF_BASE_FIELD_SIZE];
 88 | };
 89 | 
 90 | struct gf_w8_bytwo_data {
 91 |     uint64_t prim_poly;
 92 |     uint64_t mask1;
 93 |     uint64_t mask2;
 94 | };
 95 | 
 96 | int gf_w8_neon_cfm_init(gf_t *gf);
 97 | void gf_w8_neon_split_init(gf_t *gf);
 98 | 
 99 | #endif /* GF_COMPLETE_GF_W8_H */
100 | 


--------------------------------------------------------------------------------
/m4/ax_check_compile_flag.m4:
--------------------------------------------------------------------------------
 1 | # ===========================================================================
 2 | #   http://www.gnu.org/software/autoconf-archive/ax_check_compile_flag.html
 3 | # ===========================================================================
 4 | #
 5 | # SYNOPSIS
 6 | #
 7 | #   AX_CHECK_COMPILE_FLAG(FLAG, [ACTION-SUCCESS], [ACTION-FAILURE], [EXTRA-FLAGS])
 8 | #
 9 | # DESCRIPTION
10 | #
11 | #   Check whether the given FLAG works with the current language's compiler
12 | #   or gives an error.  (Warnings, however, are ignored)
13 | #
14 | #   ACTION-SUCCESS/ACTION-FAILURE are shell commands to execute on
15 | #   success/failure.
16 | #
17 | #   If EXTRA-FLAGS is defined, it is added to the current language's default
18 | #   flags (e.g. CFLAGS) when the check is done.  The check is thus made with
19 | #   the flags: "CFLAGS EXTRA-FLAGS FLAG".  This can for example be used to
20 | #   force the compiler to issue an error when a bad flag is given.
21 | #
22 | #   NOTE: Implementation based on AX_CFLAGS_GCC_OPTION. Please keep this
23 | #   macro in sync with AX_CHECK_{PREPROC,LINK}_FLAG.
24 | #
25 | # LICENSE
26 | #
27 | #   Copyright (c) 2008 Guido U. Draheim <guidod@gmx.de>
28 | #   Copyright (c) 2011 Maarten Bosmans <mkbosmans@gmail.com>
29 | #
30 | #   This program is free software: you can redistribute it and/or modify it
31 | #   under the terms of the GNU General Public License as published by the
32 | #   Free Software Foundation, either version 3 of the License, or (at your
33 | #   option) any later version.
34 | #
35 | #   This program is distributed in the hope that it will be useful, but
36 | #   WITHOUT ANY WARRANTY; without even the implied warranty of
37 | #   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General
38 | #   Public License for more details.
39 | #
40 | #   You should have received a copy of the GNU General Public License along
41 | #   with this program. If not, see <http://www.gnu.org/licenses/>.
42 | #
43 | #   As a special exception, the respective Autoconf Macro's copyright owner
44 | #   gives unlimited permission to copy, distribute and modify the configure
45 | #   scripts that are the output of Autoconf when processing the Macro. You
46 | #   need not follow the terms of the GNU General Public License when using
47 | #   or distributing such scripts, even though portions of the text of the
48 | #   Macro appear in them. The GNU General Public License (GPL) does govern
49 | #   all other use of the material that constitutes the Autoconf Macro.
50 | #
51 | #   This special exception to the GPL applies to versions of the Autoconf
52 | #   Macro released by the Autoconf Archive. When you make and distribute a
53 | #   modified version of the Autoconf Macro, you may extend this special
54 | #   exception to the GPL to apply to your modified version as well.
55 | 
56 | #serial 2
57 | 
58 | AC_DEFUN([AX_CHECK_COMPILE_FLAG],
59 | [AC_PREREQ(2.59)dnl for _AC_LANG_PREFIX
60 | AS_VAR_PUSHDEF([CACHEVAR],[ax_cv_check_[]_AC_LANG_ABBREV[]flags_$4_$1])dnl
61 | AC_CACHE_CHECK([whether _AC_LANG compiler accepts $1], CACHEVAR, [
62 |   ax_check_save_flags=$[]_AC_LANG_PREFIX[]FLAGS
63 |   _AC_LANG_PREFIX[]FLAGS="$[]_AC_LANG_PREFIX[]FLAGS $4 $1"
64 |   AC_COMPILE_IFELSE([AC_LANG_PROGRAM()],
65 |     [AS_VAR_SET(CACHEVAR,[yes])],
66 |     [AS_VAR_SET(CACHEVAR,[no])])
67 |   _AC_LANG_PREFIX[]FLAGS=$ax_check_save_flags])
68 | AS_IF([test x"AS_VAR_GET(CACHEVAR)" = xyes],
69 |   [m4_default([$2], :)],
70 |   [m4_default([$3], :)])
71 | AS_VAR_POPDEF([CACHEVAR])dnl
72 | ])dnl AX_CHECK_COMPILE_FLAGS
73 | 


--------------------------------------------------------------------------------
/m4/ax_gcc_x86_cpuid.m4:
--------------------------------------------------------------------------------
 1 | # ===========================================================================
 2 | #     http://www.gnu.org/software/autoconf-archive/ax_gcc_x86_cpuid.html
 3 | # ===========================================================================
 4 | #
 5 | # SYNOPSIS
 6 | #
 7 | #   AX_GCC_X86_CPUID(OP)
 8 | #
 9 | # DESCRIPTION
10 | #
11 | #   On Pentium and later x86 processors, with gcc or a compiler that has a
12 | #   compatible syntax for inline assembly instructions, run a small program
13 | #   that executes the cpuid instruction with input OP. This can be used to
14 | #   detect the CPU type.
15 | #
16 | #   On output, the values of the eax, ebx, ecx, and edx registers are stored
17 | #   as hexadecimal strings as "eax:ebx:ecx:edx" in the cache variable
18 | #   ax_cv_gcc_x86_cpuid_OP.
19 | #
20 | #   If the cpuid instruction fails (because you are running a
21 | #   cross-compiler, or because you are not using gcc, or because you are on
22 | #   a processor that doesn't have this instruction), ax_cv_gcc_x86_cpuid_OP
23 | #   is set to the string "unknown".
24 | #
25 | #   This macro mainly exists to be used in AX_GCC_ARCHFLAG.
26 | #
27 | # LICENSE
28 | #
29 | #   Copyright (c) 2008 Steven G. Johnson <stevenj@alum.mit.edu>
30 | #   Copyright (c) 2008 Matteo Frigo
31 | #
32 | #   This program is free software: you can redistribute it and/or modify it
33 | #   under the terms of the GNU General Public License as published by the
34 | #   Free Software Foundation, either version 3 of the License, or (at your
35 | #   option) any later version.
36 | #
37 | #   This program is distributed in the hope that it will be useful, but
38 | #   WITHOUT ANY WARRANTY; without even the implied warranty of
39 | #   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General
40 | #   Public License for more details.
41 | #
42 | #   You should have received a copy of the GNU General Public License along
43 | #   with this program. If not, see <http://www.gnu.org/licenses/>.
44 | #
45 | #   As a special exception, the respective Autoconf Macro's copyright owner
46 | #   gives unlimited permission to copy, distribute and modify the configure
47 | #   scripts that are the output of Autoconf when processing the Macro. You
48 | #   need not follow the terms of the GNU General Public License when using
49 | #   or distributing such scripts, even though portions of the text of the
50 | #   Macro appear in them. The GNU General Public License (GPL) does govern
51 | #   all other use of the material that constitutes the Autoconf Macro.
52 | #
53 | #   This special exception to the GPL applies to versions of the Autoconf
54 | #   Macro released by the Autoconf Archive. When you make and distribute a
55 | #   modified version of the Autoconf Macro, you may extend this special
56 | #   exception to the GPL to apply to your modified version as well.
57 | 
58 | #serial 7
59 | 
60 | AC_DEFUN([AX_GCC_X86_CPUID],
61 | [AC_REQUIRE([AC_PROG_CC])
62 | AC_LANG_PUSH([C])
63 | AC_CACHE_CHECK(for x86 cpuid $1 output, ax_cv_gcc_x86_cpuid_$1,
64 |  [AC_RUN_IFELSE([AC_LANG_PROGRAM([#include <stdio.h>], [
65 |      int op = $1, eax, ebx, ecx, edx;
66 |      FILE *f;
67 |       __asm__("cpuid"
68 |         : "=a" (eax), "=b" (ebx), "=c" (ecx), "=d" (edx)
69 |         : "a" (op));
70 |      f = fopen("conftest_cpuid", "w"); if (!f) return 1;
71 |      fprintf(f, "%x:%x:%x:%x\n", eax, ebx, ecx, edx);
72 |      fclose(f);
73 |      return 0;
74 | ])],
75 |      [ax_cv_gcc_x86_cpuid_$1=`cat conftest_cpuid`; rm -f conftest_cpuid],
76 |      [ax_cv_gcc_x86_cpuid_$1=unknown; rm -f conftest_cpuid],
77 |      [ax_cv_gcc_x86_cpuid_$1=unknown])])
78 | AC_LANG_POP([C])
79 | ])
80 | 


--------------------------------------------------------------------------------
/m4/ax_gcc_x86_avx_xgetbv.m4:
--------------------------------------------------------------------------------
 1 | # ===========================================================================
 2 | #   http://www.gnu.org/software/autoconf-archive/ax_gcc_x86_avx_xgetbv.html
 3 | # ===========================================================================
 4 | #
 5 | # SYNOPSIS
 6 | #
 7 | #   AX_GCC_X86_AVX_XGETBV
 8 | #
 9 | # DESCRIPTION
10 | #
11 | #   On later x86 processors with AVX SIMD support, with gcc or a compiler
12 | #   that has a compatible syntax for inline assembly instructions, run a
13 | #   small program that executes the xgetbv instruction with input OP. This
14 | #   can be used to detect if the OS supports AVX instruction usage.
15 | #
16 | #   On output, the values of the eax and edx registers are stored as
17 | #   hexadecimal strings as "eax:edx" in the cache variable
18 | #   ax_cv_gcc_x86_avx_xgetbv.
19 | #
20 | #   If the xgetbv instruction fails (because you are running a
21 | #   cross-compiler, or because you are not using gcc, or because you are on
22 | #   a processor that doesn't have this instruction),
23 | #   ax_cv_gcc_x86_avx_xgetbv_OP is set to the string "unknown".
24 | #
25 | #   This macro mainly exists to be used in AX_EXT.
26 | #
27 | # LICENSE
28 | #
29 | #   Copyright (c) 2013 Michael Petch <mpetch@capp-sysware.com>
30 | #
31 | #   This program is free software: you can redistribute it and/or modify it
32 | #   under the terms of the GNU General Public License as published by the
33 | #   Free Software Foundation, either version 3 of the License, or (at your
34 | #   option) any later version.
35 | #
36 | #   This program is distributed in the hope that it will be useful, but
37 | #   WITHOUT ANY WARRANTY; without even the implied warranty of
38 | #   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General
39 | #   Public License for more details.
40 | #
41 | #   You should have received a copy of the GNU General Public License along
42 | #   with this program. If not, see <http://www.gnu.org/licenses/>.
43 | #
44 | #   As a special exception, the respective Autoconf Macro's copyright owner
45 | #   gives unlimited permission to copy, distribute and modify the configure
46 | #   scripts that are the output of Autoconf when processing the Macro. You
47 | #   need not follow the terms of the GNU General Public License when using
48 | #   or distributing such scripts, even though portions of the text of the
49 | #   Macro appear in them. The GNU General Public License (GPL) does govern
50 | #   all other use of the material that constitutes the Autoconf Macro.
51 | #
52 | #   This special exception to the GPL applies to versions of the Autoconf
53 | #   Macro released by the Autoconf Archive. When you make and distribute a
54 | #   modified version of the Autoconf Macro, you may extend this special
55 | #   exception to the GPL to apply to your modified version as well.
56 | 
57 | #serial 1
58 | 
59 | AC_DEFUN([AX_GCC_X86_AVX_XGETBV],
60 | [AC_REQUIRE([AC_PROG_CC])
61 | AC_LANG_PUSH([C])
62 | AC_CACHE_CHECK(for x86-AVX xgetbv $1 output, ax_cv_gcc_x86_avx_xgetbv_$1,
63 |  [AC_RUN_IFELSE([AC_LANG_PROGRAM([#include <stdio.h>], [
64 |      int op = $1, eax, edx;
65 |      FILE *f;
66 |       /* Opcodes for xgetbv */
67 |       __asm__(".byte 0x0f, 0x01, 0xd0"
68 |         : "=a" (eax), "=d" (edx)
69 |         : "c" (op));
70 |      f = fopen("conftest_xgetbv", "w"); if (!f) return 1;
71 |      fprintf(f, "%x:%x\n", eax, edx);
72 |      fclose(f);
73 |      return 0;
74 | ])],
75 |      [ax_cv_gcc_x86_avx_xgetbv_$1=`cat conftest_xgetbv`; rm -f conftest_xgetbv],
76 |      [ax_cv_gcc_x86_avx_xgetbv_$1=unknown; rm -f conftest_xgetbv],
77 |      [ax_cv_gcc_x86_avx_xgetbv_$1=unknown])])
78 | AC_LANG_POP([C])
79 | ])
80 | 


--------------------------------------------------------------------------------
/include/lrc.h:
--------------------------------------------------------------------------------
  1 | /*
  2 |  * The MIT License (MIT)
  3 |  *
  4 |  * Copyright (c) 2015 Zhang Yanpo (张炎泼) <drdr.xp@gmail.com>
  5 |  */
  6 | 
  7 | #ifndef LRC_EC_LRC_
  8 | #define LRC_EC_LRC_
  9 | 
 10 | #include <stdint.h>
 11 | 
 12 | /* #define LRC_DEBUG 1 */
 13 | 
 14 | #define LRC_OUT_OF_MEMORY (-1)
 15 | #define LRC_UNRECOVERABLE (-2)
 16 | #define LRC_INIT_TWICE    (-3)
 17 | #define LRC_INVALID_M     (-4)
 18 | 
 19 | #ifdef LRC_DEBUG
 20 | #   define dd( _fmt, ... )   fprintf( stderr, _fmt "\n", ##__VA_ARGS__)
 21 | #   define dlog( _fmt, ... ) fprintf( stderr, _fmt,      ##__VA_ARGS__)
 22 | #   define lrc_debug_buf_line(...) lrc_debug_buf_line_( __VA_ARGS__ )
 23 | #   define lrc_debug_matrix(...) lrc_debug_matrix_( __VA_ARGS__ )
 24 | #   define lrc_debug_sources(...) lrc_debug_sources_( __VA_ARGS__ )
 25 | #else
 26 | #   define dd( _fmt, ... )
 27 | #   define dlog( _fmt, ... )
 28 | #   define lrc_debug_buf_line(...)
 29 | #   define lrc_debug_matrix(...)
 30 | #   define lrc_debug_sources(...)
 31 | #endif /* LRC_DEBUG */
 32 | 
 33 | #define lrc_align_16(val) (((val - 1) / 16 + 1) * 16)
 34 | 
 35 | #define _lrc_concat(a, b) a ## b
 36 | 
 37 | #define _lrc_n_arr_k(...) (sizeof((uint8_t[]){__VA_ARGS__}) / sizeof(uint8_t)), \
 38 |   (uint8_t[]){__VA_ARGS__}
 39 | 
 40 | /* k_param is in form: 'k(2, 3, 4)'
 41 |  *
 42 |  * expansion of thie macro:
 43 |  *
 44 |  *      lrc_init(lrc, k(2, 3), 2)
 45 |  * ->   lrc_init_n(lrc, _lrc_concat(_lrc_n_arr_, k(2, 3), 2)
 46 |  * ->   lrc_init_n(lrc, _lrc_n_arr_ ## k(2, 3), 2)
 47 |  * ->   lrc_init_n(lrc, _lrc_n_arr_k(2, 3), 2)
 48 |  * ->   lrc_init_n(lrc, (sizeof((uint8_t[]){2, 3}) / sizeof(uint8_t)), (uint8_t[]){2, 3}, 2)
 49 |  * ->   lrc_init_n(lrc, 2, (uint8_t[]){2, 3}, 2)
 50 |  */
 51 | #define lrc_init(lrc, k_param, m) \
 52 |   lrc_init_n((lrc), _lrc_concat(_lrc_n_arr_, k_param), (m))
 53 | 
 54 | 
 55 | extern int *reed_sol_vandermonde_coding_matrix(int k, int m, int w);
 56 | extern int jerasure_matrix_decode(int k, int m, int w,
 57 |                                   int *matrix, int row_k_ones, int *erasures,
 58 |                                   char **data_ptrs, char **coding_ptrs, int size);
 59 | 
 60 | typedef struct {
 61 |   uint8_t start;
 62 |   uint8_t len;
 63 | } lrc_local_t;
 64 | 
 65 | typedef struct {
 66 | 
 67 |   int       n_data;
 68 |   int       n_code;
 69 |   int       n;                    /* n_data + n_code */
 70 | 
 71 |   char     *data[512];
 72 |   char    **code;
 73 | 
 74 |   int64_t   chunk_size;
 75 |   int64_t   aligned_chunk_size;
 76 |   char     *buf;
 77 | 
 78 |   int8_t    buf_owned;
 79 |   int8_t    inited_;
 80 | 
 81 | } lrc_buf_t;
 82 | 
 83 | typedef struct {
 84 | 
 85 |   int          k;             /* nr of data                                  */
 86 |   int          m;             /* nr of code of original reed-solomon ec      */
 87 |   int          n;             /* total number of chunks: k + m               */
 88 | 
 89 |   int          n_local;       /* nr of local EC                              */
 90 |   lrc_local_t *locals;        /* start index and nr of elts of each local EC */
 91 | 
 92 |   int         *matrix;        /* ecoding matrix m *k                         */
 93 |   int8_t      *code_erased;   /* for encode                                  */
 94 | 
 95 |   int8_t       inited_;
 96 | 
 97 | } lrc_t;
 98 | 
 99 | typedef struct {
100 | 
101 |   lrc_t     *lrc;
102 |   lrc_buf_t  buf;
103 | 
104 |   int8_t     erased[512];     /* array of index of erased data/code                        */
105 |   int8_t     source[512];     /* data/code indexes those are required to reconstruct       */
106 |   int       *decode_matrix;   /* matrix with damaged data-row/unnecessary code-row removed */
107 | 
108 |   int8_t     inited_;
109 | 
110 | } lrc_decoder_t;
111 | 
112 | int  lrc_init_n(lrc_t *lrc, int n_local, uint8_t *local_k_arr, int m);
113 | void lrc_destroy(lrc_t *lrc);
114 | int  lrc_encode(lrc_t *lrc, lrc_buf_t *lb);
115 | int  lrc_decode(lrc_t *lrc, lrc_buf_t *lb, int8_t *erased);
116 | int  lrc_get_source(lrc_t *lrc, int8_t *erased, int8_t *source);
117 | 
118 | int *lrc_make_matrix(lrc_t *lrc);
119 | int  lrc_get_n_locally_erased(lrc_t *lrc, int idx_local, int8_t *erased);
120 | int  lrc_count_erased(int n, int8_t *erased);
121 | 
122 | void lrc_debug_buf_line_(lrc_buf_t *lb, int n);
123 | void lrc_debug_matrix_(int *matrix, int row, int col);
124 | void lrc_debug_sources_(int n, int8_t *source);
125 | 
126 | int  lrc_buf_init(lrc_buf_t *lb, lrc_t *lrc, int64_t chunk_size);
127 | void lrc_buf_destroy(lrc_buf_t *lb);
128 | int  lrc_buf_shadow(lrc_buf_t *lb, lrc_buf_t *src);
129 | 
130 | int  lrc_decoder_init(lrc_decoder_t *dec, lrc_t *lrc, lrc_buf_t *lb, int8_t *erased);
131 | void lrc_decoder_destroy(lrc_decoder_t *dec);
132 | int  lrc_decoder_decode(lrc_decoder_t *dec);
133 | 
134 | #endif /* LRC_EC_LRC_ */
135 | // vim:sw=2:fdl=1
136 | 


--------------------------------------------------------------------------------
/m4/ltsugar.m4:
--------------------------------------------------------------------------------
  1 | # ltsugar.m4 -- libtool m4 base layer.                         -*-Autoconf-*-
  2 | #
  3 | # Copyright (C) 2004-2005, 2007-2008, 2011-2015 Free Software
  4 | # Foundation, Inc.
  5 | # Written by Gary V. Vaughan, 2004
  6 | #
  7 | # This file is free software; the Free Software Foundation gives
  8 | # unlimited permission to copy and/or distribute it, with or without
  9 | # modifications, as long as this notice is preserved.
 10 | 
 11 | # serial 6 ltsugar.m4
 12 | 
 13 | # This is to help aclocal find these macros, as it can't see m4_define.
 14 | AC_DEFUN([LTSUGAR_VERSION], [m4_if([0.1])])
 15 | 
 16 | 
 17 | # lt_join(SEP, ARG1, [ARG2...])
 18 | # -----------------------------
 19 | # Produce ARG1SEPARG2...SEPARGn, omitting [] arguments and their
 20 | # associated separator.
 21 | # Needed until we can rely on m4_join from Autoconf 2.62, since all earlier
 22 | # versions in m4sugar had bugs.
 23 | m4_define([lt_join],
 24 | [m4_if([$#], [1], [],
 25 |        [$#], [2], [[$2]],
 26 |        [m4_if([$2], [], [], [[$2]_])$0([$1], m4_shift(m4_shift($@)))])])
 27 | m4_define([_lt_join],
 28 | [m4_if([$#$2], [2], [],
 29 |        [m4_if([$2], [], [], [[$1$2]])$0([$1], m4_shift(m4_shift($@)))])])
 30 | 
 31 | 
 32 | # lt_car(LIST)
 33 | # lt_cdr(LIST)
 34 | # ------------
 35 | # Manipulate m4 lists.
 36 | # These macros are necessary as long as will still need to support
 37 | # Autoconf-2.59, which quotes differently.
 38 | m4_define([lt_car], [[$1]])
 39 | m4_define([lt_cdr],
 40 | [m4_if([$#], 0, [m4_fatal([$0: cannot be called without arguments])],
 41 |        [$#], 1, [],
 42 |        [m4_dquote(m4_shift($@))])])
 43 | m4_define([lt_unquote], $1)
 44 | 
 45 | 
 46 | # lt_append(MACRO-NAME, STRING, [SEPARATOR])
 47 | # ------------------------------------------
 48 | # Redefine MACRO-NAME to hold its former content plus 'SEPARATOR''STRING'.
 49 | # Note that neither SEPARATOR nor STRING are expanded; they are appended
 50 | # to MACRO-NAME as is (leaving the expansion for when MACRO-NAME is invoked).
 51 | # No SEPARATOR is output if MACRO-NAME was previously undefined (different
 52 | # than defined and empty).
 53 | #
 54 | # This macro is needed until we can rely on Autoconf 2.62, since earlier
 55 | # versions of m4sugar mistakenly expanded SEPARATOR but not STRING.
 56 | m4_define([lt_append],
 57 | [m4_define([$1],
 58 | 	   m4_ifdef([$1], [m4_defn([$1])[$3]])[$2])])
 59 | 
 60 | 
 61 | 
 62 | # lt_combine(SEP, PREFIX-LIST, INFIX, SUFFIX1, [SUFFIX2...])
 63 | # ----------------------------------------------------------
 64 | # Produce a SEP delimited list of all paired combinations of elements of
 65 | # PREFIX-LIST with SUFFIX1 through SUFFIXn.  Each element of the list
 66 | # has the form PREFIXmINFIXSUFFIXn.
 67 | # Needed until we can rely on m4_combine added in Autoconf 2.62.
 68 | m4_define([lt_combine],
 69 | [m4_if(m4_eval([$# > 3]), [1],
 70 |        [m4_pushdef([_Lt_sep], [m4_define([_Lt_sep], m4_defn([lt_car]))])]]dnl
 71 | [[m4_foreach([_Lt_prefix], [$2],
 72 | 	     [m4_foreach([_Lt_suffix],
 73 | 		]m4_dquote(m4_dquote(m4_shift(m4_shift(m4_shift($@)))))[,
 74 | 	[_Lt_sep([$1])[]m4_defn([_Lt_prefix])[$3]m4_defn([_Lt_suffix])])])])])
 75 | 
 76 | 
 77 | # lt_if_append_uniq(MACRO-NAME, VARNAME, [SEPARATOR], [UNIQ], [NOT-UNIQ])
 78 | # -----------------------------------------------------------------------
 79 | # Iff MACRO-NAME does not yet contain VARNAME, then append it (delimited
 80 | # by SEPARATOR if supplied) and expand UNIQ, else NOT-UNIQ.
 81 | m4_define([lt_if_append_uniq],
 82 | [m4_ifdef([$1],
 83 | 	  [m4_if(m4_index([$3]m4_defn([$1])[$3], [$3$2$3]), [-1],
 84 | 		 [lt_append([$1], [$2], [$3])$4],
 85 | 		 [$5])],
 86 | 	  [lt_append([$1], [$2], [$3])$4])])
 87 | 
 88 | 
 89 | # lt_dict_add(DICT, KEY, VALUE)
 90 | # -----------------------------
 91 | m4_define([lt_dict_add],
 92 | [m4_define([$1($2)], [$3])])
 93 | 
 94 | 
 95 | # lt_dict_add_subkey(DICT, KEY, SUBKEY, VALUE)
 96 | # --------------------------------------------
 97 | m4_define([lt_dict_add_subkey],
 98 | [m4_define([$1($2:$3)], [$4])])
 99 | 
100 | 
101 | # lt_dict_fetch(DICT, KEY, [SUBKEY])
102 | # ----------------------------------
103 | m4_define([lt_dict_fetch],
104 | [m4_ifval([$3],
105 | 	m4_ifdef([$1($2:$3)], [m4_defn([$1($2:$3)])]),
106 |     m4_ifdef([$1($2)], [m4_defn([$1($2)])]))])
107 | 
108 | 
109 | # lt_if_dict_fetch(DICT, KEY, [SUBKEY], VALUE, IF-TRUE, [IF-FALSE])
110 | # -----------------------------------------------------------------
111 | m4_define([lt_if_dict_fetch],
112 | [m4_if(lt_dict_fetch([$1], [$2], [$3]), [$4],
113 | 	[$5],
114 |     [$6])])
115 | 
116 | 
117 | # lt_dict_filter(DICT, [SUBKEY], VALUE, [SEPARATOR], KEY, [...])
118 | # --------------------------------------------------------------
119 | m4_define([lt_dict_filter],
120 | [m4_if([$5], [], [],
121 |   [lt_join(m4_quote(m4_default([$4], [[, ]])),
122 |            lt_unquote(m4_split(m4_normalize(m4_foreach(_Lt_key, lt_car([m4_shiftn(4, $@)]),
123 | 		      [lt_if_dict_fetch([$1], _Lt_key, [$2], [$3], [_Lt_key ])])))))])[]dnl
124 | ])
125 | 


--------------------------------------------------------------------------------
/include/galois.h:
--------------------------------------------------------------------------------
  1 | /* *
  2 |  * Copyright (c) 2013, James S. Plank and Kevin Greenan
  3 |  * All rights reserved.
  4 |  *
  5 |  * Jerasure - A C/C++ Library for a Variety of Reed-Solomon and RAID-6 Erasure
  6 |  * Coding Techniques
  7 |  *
  8 |  * Revision 2.0: Galois Field backend now links to GF-Complete
  9 |  *
 10 |  * Redistribution and use in source and binary forms, with or without
 11 |  * modification, are permitted provided that the following conditions
 12 |  * are met:
 13 |  *
 14 |  *  - Redistributions of source code must retain the above copyright
 15 |  *    notice, this list of conditions and the following disclaimer.
 16 |  *
 17 |  *  - Redistributions in binary form must reproduce the above copyright
 18 |  *    notice, this list of conditions and the following disclaimer in
 19 |  *    the documentation and/or other materials provided with the
 20 |  *    distribution.
 21 |  *
 22 |  *  - Neither the name of the University of Tennessee nor the names of its
 23 |  *    contributors may be used to endorse or promote products derived
 24 |  *    from this software without specific prior written permission.
 25 |  *
 26 |  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
 27 |  * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
 28 |  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
 29 |  * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
 30 |  * HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
 31 |  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
 32 |  * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS
 33 |  * OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
 34 |  * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
 35 |  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY
 36 |  * WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
 37 |  * POSSIBILITY OF SUCH DAMAGE.
 38 |  */
 39 | 
 40 | 
 41 | #ifndef _GALOIS_H
 42 | #define _GALOIS_H
 43 | 
 44 | #include <stdio.h>
 45 | #include <stdlib.h>
 46 | #include <gf_complete.h>
 47 | 
 48 | extern void galois_change_technique(gf_t *gf, int w);
 49 | 
 50 | extern int galois_single_multiply(int a, int b, int w);
 51 | extern int galois_single_divide(int a, int b, int w);
 52 | extern int galois_inverse(int x, int w);
 53 | 
 54 | void galois_region_xor(           char *src,         /* Source Region */
 55 |                                   char *dest,        /* Dest Region (holds result) */
 56 |                                   int nbytes);      /* Number of bytes in region */
 57 | 
 58 | /* These multiply regions in w=8, w=16 and w=32.  They are much faster
 59 |    than calling galois_single_multiply.  The regions must be long word aligned. */
 60 | 
 61 | void galois_w08_region_multiply(char *region,       /* Region to multiply */
 62 |                                   int multby,       /* Number to multiply by */
 63 |                                   int nbytes,       /* Number of bytes in region */
 64 |                                   char *r2,         /* If r2 != NULL, products go here.  
 65 |                                                        Otherwise region is overwritten */
 66 |                                   int add);         /* If (r2 != NULL && add) the produce is XOR'd with r2 */
 67 | 
 68 | void galois_w16_region_multiply(char *region,       /* Region to multiply */
 69 |                                   int multby,       /* Number to multiply by */
 70 |                                   int nbytes,       /* Number of bytes in region */
 71 |                                   char *r2,         /* If r2 != NULL, products go here.  
 72 |                                                        Otherwise region is overwritten */
 73 |                                   int add);         /* If (r2 != NULL && add) the produce is XOR'd with r2 */
 74 | 
 75 | void galois_w32_region_multiply(char *region,       /* Region to multiply */
 76 |                                   int multby,       /* Number to multiply by */
 77 |                                   int nbytes,       /* Number of bytes in region */
 78 |                                   char *r2,         /* If r2 != NULL, products go here.  
 79 |                                                        Otherwise region is overwritten */
 80 |                                   int add);         /* If (r2 != NULL && add) the produce is XOR'd with r2 */
 81 | 
 82 | gf_t* galois_init_field(int w,
 83 |                              int mult_type,
 84 |                              int region_type,
 85 |                              int divide_type,
 86 |                              uint64_t prim_poly,
 87 |                              int arg1,
 88 |                              int arg2);
 89 | 
 90 | gf_t* galois_init_composite_field(int w,
 91 |                                 int region_type,
 92 |                                 int divide_type,
 93 |                                 int degree,
 94 |                                 gf_t* base_gf);
 95 | 
 96 | gf_t * galois_get_field_ptr(int w);
 97 | 
 98 | 
 99 | #endif
100 | 


--------------------------------------------------------------------------------
/m4/lt~obsolete.m4:
--------------------------------------------------------------------------------
  1 | # lt~obsolete.m4 -- aclocal satisfying obsolete definitions.    -*-Autoconf-*-
  2 | #
  3 | #   Copyright (C) 2004-2005, 2007, 2009, 2011-2015 Free Software
  4 | #   Foundation, Inc.
  5 | #   Written by Scott James Remnant, 2004.
  6 | #
  7 | # This file is free software; the Free Software Foundation gives
  8 | # unlimited permission to copy and/or distribute it, with or without
  9 | # modifications, as long as this notice is preserved.
 10 | 
 11 | # serial 5 lt~obsolete.m4
 12 | 
 13 | # These exist entirely to fool aclocal when bootstrapping libtool.
 14 | #
 15 | # In the past libtool.m4 has provided macros via AC_DEFUN (or AU_DEFUN),
 16 | # which have later been changed to m4_define as they aren't part of the
 17 | # exported API, or moved to Autoconf or Automake where they belong.
 18 | #
 19 | # The trouble is, aclocal is a bit thick.  It'll see the old AC_DEFUN
 20 | # in /usr/share/aclocal/libtool.m4 and remember it, then when it sees us
 21 | # using a macro with the same name in our local m4/libtool.m4 it'll
 22 | # pull the old libtool.m4 in (it doesn't see our shiny new m4_define
 23 | # and doesn't know about Autoconf macros at all.)
 24 | #
 25 | # So we provide this file, which has a silly filename so it's always
 26 | # included after everything else.  This provides aclocal with the
 27 | # AC_DEFUNs it wants, but when m4 processes it, it doesn't do anything
 28 | # because those macros already exist, or will be overwritten later.
 29 | # We use AC_DEFUN over AU_DEFUN for compatibility with aclocal-1.6.
 30 | #
 31 | # Anytime we withdraw an AC_DEFUN or AU_DEFUN, remember to add it here.
 32 | # Yes, that means every name once taken will need to remain here until
 33 | # we give up compatibility with versions before 1.7, at which point
 34 | # we need to keep only those names which we still refer to.
 35 | 
 36 | # This is to help aclocal find these macros, as it can't see m4_define.
 37 | AC_DEFUN([LTOBSOLETE_VERSION], [m4_if([1])])
 38 | 
 39 | m4_ifndef([AC_LIBTOOL_LINKER_OPTION],	[AC_DEFUN([AC_LIBTOOL_LINKER_OPTION])])
 40 | m4_ifndef([AC_PROG_EGREP],		[AC_DEFUN([AC_PROG_EGREP])])
 41 | m4_ifndef([_LT_AC_PROG_ECHO_BACKSLASH],	[AC_DEFUN([_LT_AC_PROG_ECHO_BACKSLASH])])
 42 | m4_ifndef([_LT_AC_SHELL_INIT],		[AC_DEFUN([_LT_AC_SHELL_INIT])])
 43 | m4_ifndef([_LT_AC_SYS_LIBPATH_AIX],	[AC_DEFUN([_LT_AC_SYS_LIBPATH_AIX])])
 44 | m4_ifndef([_LT_PROG_LTMAIN],		[AC_DEFUN([_LT_PROG_LTMAIN])])
 45 | m4_ifndef([_LT_AC_TAGVAR],		[AC_DEFUN([_LT_AC_TAGVAR])])
 46 | m4_ifndef([AC_LTDL_ENABLE_INSTALL],	[AC_DEFUN([AC_LTDL_ENABLE_INSTALL])])
 47 | m4_ifndef([AC_LTDL_PREOPEN],		[AC_DEFUN([AC_LTDL_PREOPEN])])
 48 | m4_ifndef([_LT_AC_SYS_COMPILER],	[AC_DEFUN([_LT_AC_SYS_COMPILER])])
 49 | m4_ifndef([_LT_AC_LOCK],		[AC_DEFUN([_LT_AC_LOCK])])
 50 | m4_ifndef([AC_LIBTOOL_SYS_OLD_ARCHIVE],	[AC_DEFUN([AC_LIBTOOL_SYS_OLD_ARCHIVE])])
 51 | m4_ifndef([_LT_AC_TRY_DLOPEN_SELF],	[AC_DEFUN([_LT_AC_TRY_DLOPEN_SELF])])
 52 | m4_ifndef([AC_LIBTOOL_PROG_CC_C_O],	[AC_DEFUN([AC_LIBTOOL_PROG_CC_C_O])])
 53 | m4_ifndef([AC_LIBTOOL_SYS_HARD_LINK_LOCKS], [AC_DEFUN([AC_LIBTOOL_SYS_HARD_LINK_LOCKS])])
 54 | m4_ifndef([AC_LIBTOOL_OBJDIR],		[AC_DEFUN([AC_LIBTOOL_OBJDIR])])
 55 | m4_ifndef([AC_LTDL_OBJDIR],		[AC_DEFUN([AC_LTDL_OBJDIR])])
 56 | m4_ifndef([AC_LIBTOOL_PROG_LD_HARDCODE_LIBPATH], [AC_DEFUN([AC_LIBTOOL_PROG_LD_HARDCODE_LIBPATH])])
 57 | m4_ifndef([AC_LIBTOOL_SYS_LIB_STRIP],	[AC_DEFUN([AC_LIBTOOL_SYS_LIB_STRIP])])
 58 | m4_ifndef([AC_PATH_MAGIC],		[AC_DEFUN([AC_PATH_MAGIC])])
 59 | m4_ifndef([AC_PROG_LD_GNU],		[AC_DEFUN([AC_PROG_LD_GNU])])
 60 | m4_ifndef([AC_PROG_LD_RELOAD_FLAG],	[AC_DEFUN([AC_PROG_LD_RELOAD_FLAG])])
 61 | m4_ifndef([AC_DEPLIBS_CHECK_METHOD],	[AC_DEFUN([AC_DEPLIBS_CHECK_METHOD])])
 62 | m4_ifndef([AC_LIBTOOL_PROG_COMPILER_NO_RTTI], [AC_DEFUN([AC_LIBTOOL_PROG_COMPILER_NO_RTTI])])
 63 | m4_ifndef([AC_LIBTOOL_SYS_GLOBAL_SYMBOL_PIPE], [AC_DEFUN([AC_LIBTOOL_SYS_GLOBAL_SYMBOL_PIPE])])
 64 | m4_ifndef([AC_LIBTOOL_PROG_COMPILER_PIC], [AC_DEFUN([AC_LIBTOOL_PROG_COMPILER_PIC])])
 65 | m4_ifndef([AC_LIBTOOL_PROG_LD_SHLIBS],	[AC_DEFUN([AC_LIBTOOL_PROG_LD_SHLIBS])])
 66 | m4_ifndef([AC_LIBTOOL_POSTDEP_PREDEP],	[AC_DEFUN([AC_LIBTOOL_POSTDEP_PREDEP])])
 67 | m4_ifndef([LT_AC_PROG_EGREP],		[AC_DEFUN([LT_AC_PROG_EGREP])])
 68 | m4_ifndef([LT_AC_PROG_SED],		[AC_DEFUN([LT_AC_PROG_SED])])
 69 | m4_ifndef([_LT_CC_BASENAME],		[AC_DEFUN([_LT_CC_BASENAME])])
 70 | m4_ifndef([_LT_COMPILER_BOILERPLATE],	[AC_DEFUN([_LT_COMPILER_BOILERPLATE])])
 71 | m4_ifndef([_LT_LINKER_BOILERPLATE],	[AC_DEFUN([_LT_LINKER_BOILERPLATE])])
 72 | m4_ifndef([_AC_PROG_LIBTOOL],		[AC_DEFUN([_AC_PROG_LIBTOOL])])
 73 | m4_ifndef([AC_LIBTOOL_SETUP],		[AC_DEFUN([AC_LIBTOOL_SETUP])])
 74 | m4_ifndef([_LT_AC_CHECK_DLFCN],		[AC_DEFUN([_LT_AC_CHECK_DLFCN])])
 75 | m4_ifndef([AC_LIBTOOL_SYS_DYNAMIC_LINKER],	[AC_DEFUN([AC_LIBTOOL_SYS_DYNAMIC_LINKER])])
 76 | m4_ifndef([_LT_AC_TAGCONFIG],		[AC_DEFUN([_LT_AC_TAGCONFIG])])
 77 | m4_ifndef([AC_DISABLE_FAST_INSTALL],	[AC_DEFUN([AC_DISABLE_FAST_INSTALL])])
 78 | m4_ifndef([_LT_AC_LANG_CXX],		[AC_DEFUN([_LT_AC_LANG_CXX])])
 79 | m4_ifndef([_LT_AC_LANG_F77],		[AC_DEFUN([_LT_AC_LANG_F77])])
 80 | m4_ifndef([_LT_AC_LANG_GCJ],		[AC_DEFUN([_LT_AC_LANG_GCJ])])
 81 | m4_ifndef([AC_LIBTOOL_LANG_C_CONFIG],	[AC_DEFUN([AC_LIBTOOL_LANG_C_CONFIG])])
 82 | m4_ifndef([_LT_AC_LANG_C_CONFIG],	[AC_DEFUN([_LT_AC_LANG_C_CONFIG])])
 83 | m4_ifndef([AC_LIBTOOL_LANG_CXX_CONFIG],	[AC_DEFUN([AC_LIBTOOL_LANG_CXX_CONFIG])])
 84 | m4_ifndef([_LT_AC_LANG_CXX_CONFIG],	[AC_DEFUN([_LT_AC_LANG_CXX_CONFIG])])
 85 | m4_ifndef([AC_LIBTOOL_LANG_F77_CONFIG],	[AC_DEFUN([AC_LIBTOOL_LANG_F77_CONFIG])])
 86 | m4_ifndef([_LT_AC_LANG_F77_CONFIG],	[AC_DEFUN([_LT_AC_LANG_F77_CONFIG])])
 87 | m4_ifndef([AC_LIBTOOL_LANG_GCJ_CONFIG],	[AC_DEFUN([AC_LIBTOOL_LANG_GCJ_CONFIG])])
 88 | m4_ifndef([_LT_AC_LANG_GCJ_CONFIG],	[AC_DEFUN([_LT_AC_LANG_GCJ_CONFIG])])
 89 | m4_ifndef([AC_LIBTOOL_LANG_RC_CONFIG],	[AC_DEFUN([AC_LIBTOOL_LANG_RC_CONFIG])])
 90 | m4_ifndef([_LT_AC_LANG_RC_CONFIG],	[AC_DEFUN([_LT_AC_LANG_RC_CONFIG])])
 91 | m4_ifndef([AC_LIBTOOL_CONFIG],		[AC_DEFUN([AC_LIBTOOL_CONFIG])])
 92 | m4_ifndef([_LT_AC_FILE_LTDLL_C],	[AC_DEFUN([_LT_AC_FILE_LTDLL_C])])
 93 | m4_ifndef([_LT_REQUIRED_DARWIN_CHECKS],	[AC_DEFUN([_LT_REQUIRED_DARWIN_CHECKS])])
 94 | m4_ifndef([_LT_AC_PROG_CXXCPP],		[AC_DEFUN([_LT_AC_PROG_CXXCPP])])
 95 | m4_ifndef([_LT_PREPARE_SED_QUOTE_VARS],	[AC_DEFUN([_LT_PREPARE_SED_QUOTE_VARS])])
 96 | m4_ifndef([_LT_PROG_ECHO_BACKSLASH],	[AC_DEFUN([_LT_PROG_ECHO_BACKSLASH])])
 97 | m4_ifndef([_LT_PROG_F77],		[AC_DEFUN([_LT_PROG_F77])])
 98 | m4_ifndef([_LT_PROG_FC],		[AC_DEFUN([_LT_PROG_FC])])
 99 | m4_ifndef([_LT_PROG_CXX],		[AC_DEFUN([_LT_PROG_CXX])])
100 | 


--------------------------------------------------------------------------------
/src/gf_method.c:
--------------------------------------------------------------------------------
  1 | /*
  2 |  * GF-Complete: A Comprehensive Open Source Library for Galois Field Arithmetic
  3 |  * James S. Plank, Ethan L. Miller, Kevin M. Greenan,
  4 |  * Benjamin A. Arnold, John A. Burnum, Adam W. Disney, Allen C. McBride.
  5 |  *
  6 |  * gf_method.c
  7 |  *
  8 |  * Parses argv to figure out the mult_type and arguments.  Returns the gf.
  9 |  */
 10 | 
 11 | #include <stdio.h>
 12 | #include <stdint.h>
 13 | #include <string.h>
 14 | #include <stdlib.h>
 15 | #include <time.h>
 16 | 
 17 | #include "gf_complete.h"
 18 | #include "gf_int.h"
 19 | #include "gf_method.h"
 20 | 
 21 | int create_gf_from_argv(gf_t *gf, int w, int argc, char **argv, int starting)
 22 | {
 23 |   int mult_type, divide_type, region_type;
 24 |   int arg1, arg2;
 25 |   uint64_t prim_poly;
 26 |   gf_t *base;
 27 | 
 28 |   mult_type = GF_MULT_DEFAULT;
 29 |   region_type = GF_REGION_DEFAULT;
 30 |   divide_type = GF_DIVIDE_DEFAULT;
 31 |   prim_poly = 0;
 32 |   base = NULL;
 33 |   arg1 = 0;
 34 |   arg2 = 0;
 35 |   while (1) {
 36 |     if (argc > starting) {
 37 |       if (strcmp(argv[starting], "-m") == 0) {
 38 |         starting++;
 39 |         if (mult_type != GF_MULT_DEFAULT) {
 40 |           if (base != NULL) gf_free(base, 1);
 41 |           _gf_errno = GF_E_TWOMULT;
 42 |           return 0;
 43 |         }
 44 |         if (strcmp(argv[starting], "SHIFT") == 0) {
 45 |           mult_type = GF_MULT_SHIFT;
 46 |           starting++;
 47 |         } else if (strcmp(argv[starting], "CARRY_FREE") == 0) {
 48 |           mult_type = GF_MULT_CARRY_FREE;
 49 |           starting++;
 50 |         } else if (strcmp(argv[starting], "CARRY_FREE_GK") == 0) {
 51 |           mult_type = GF_MULT_CARRY_FREE_GK;
 52 |           starting++;
 53 |         } else if (strcmp(argv[starting], "GROUP") == 0) {
 54 |           mult_type = GF_MULT_GROUP;
 55 |           if (argc < starting + 3) {
 56 |             _gf_errno = GF_E_GROUPAR;
 57 |             return 0;
 58 |           }
 59 |           if (sscanf(argv[starting+1], "%d", &arg1) == 0 ||
 60 |               sscanf(argv[starting+2], "%d", &arg2) == 0) {
 61 |             _gf_errno = GF_E_GROUPNU;
 62 |             return 0;
 63 |           }
 64 |           starting += 3;
 65 |         } else if (strcmp(argv[starting], "BYTWO_p") == 0) {
 66 |           mult_type = GF_MULT_BYTWO_p;
 67 |           starting++;
 68 |         } else if (strcmp(argv[starting], "BYTWO_b") == 0) {
 69 |           mult_type = GF_MULT_BYTWO_b;
 70 |           starting++;
 71 |         } else if (strcmp(argv[starting], "TABLE") == 0) {
 72 |           mult_type = GF_MULT_TABLE;
 73 |           starting++;
 74 |         } else if (strcmp(argv[starting], "LOG") == 0) {
 75 |           mult_type = GF_MULT_LOG_TABLE;
 76 |           starting++;
 77 |         } else if (strcmp(argv[starting], "LOG_ZERO") == 0) {
 78 |           mult_type = GF_MULT_LOG_ZERO;
 79 |           starting++;
 80 |         } else if (strcmp(argv[starting], "LOG_ZERO_EXT") == 0) {
 81 |           mult_type = GF_MULT_LOG_ZERO_EXT;
 82 |           starting++;
 83 |         } else if (strcmp(argv[starting], "SPLIT") == 0) {
 84 |           mult_type = GF_MULT_SPLIT_TABLE;
 85 |           if (argc < starting + 3) {
 86 |             _gf_errno = GF_E_SPLITAR;
 87 |             return 0;
 88 |           }
 89 |           if (sscanf(argv[starting+1], "%d", &arg1) == 0 ||
 90 |               sscanf(argv[starting+2], "%d", &arg2) == 0) {
 91 |             _gf_errno = GF_E_SPLITNU;
 92 |             return 0;
 93 |           }
 94 |           starting += 3;
 95 |         } else if (strcmp(argv[starting], "COMPOSITE") == 0) {
 96 |           mult_type = GF_MULT_COMPOSITE;
 97 |           if (argc < starting + 2) { _gf_errno = GF_E_FEWARGS; return 0; }
 98 |           if (sscanf(argv[starting+1], "%d", &arg1) == 0) {
 99 |             _gf_errno = GF_E_COMP_A2;
100 |             return 0;
101 |           }
102 |           starting += 2;
103 |           base = (gf_t *) malloc(sizeof(gf_t));
104 |           starting = create_gf_from_argv(base, w/arg1, argc, argv, starting);
105 |           if (starting == 0) {
106 |             free(base);
107 |             return 0;
108 |           }
109 |         } else {
110 |           _gf_errno = GF_E_UNKNOWN;
111 |           return 0;
112 |         }
113 |       } else if (strcmp(argv[starting], "-r") == 0) {
114 |         starting++;
115 |         if (strcmp(argv[starting], "DOUBLE") == 0) {
116 |           region_type |= GF_REGION_DOUBLE_TABLE;
117 |           starting++;
118 |         } else if (strcmp(argv[starting], "QUAD") == 0) {
119 |           region_type |= GF_REGION_QUAD_TABLE;
120 |           starting++;
121 |         } else if (strcmp(argv[starting], "LAZY") == 0) {
122 |           region_type |= GF_REGION_LAZY;
123 |           starting++;
124 |         } else if (strcmp(argv[starting], "SIMD") == 0) {
125 |           region_type |= GF_REGION_SIMD;
126 |           starting++;
127 |         } else if (strcmp(argv[starting], "NOSIMD") == 0) {
128 |           region_type |= GF_REGION_NOSIMD;
129 |           starting++;
130 |         } else if (strcmp(argv[starting], "SSE") == 0) {
131 |           region_type |= GF_REGION_SIMD;
132 |           starting++;
133 |         } else if (strcmp(argv[starting], "NOSSE") == 0) {
134 |           region_type |= GF_REGION_NOSIMD;
135 |           starting++;
136 |         } else if (strcmp(argv[starting], "CAUCHY") == 0) {
137 |           region_type |= GF_REGION_CAUCHY;
138 |           starting++;
139 |         } else if (strcmp(argv[starting], "ALTMAP") == 0) {
140 |           region_type |= GF_REGION_ALTMAP;
141 |           starting++;
142 |         } else {
143 |           if (base != NULL) gf_free(base, 1);
144 |           _gf_errno = GF_E_UNK_REG;
145 |           return 0;
146 |         }
147 |       } else if (strcmp(argv[starting], "-p") == 0) {
148 |         starting++;
149 |         if (sscanf(argv[starting], "%llx", (long long unsigned int *)(&prim_poly)) == 0) {
150 |           if (base != NULL) gf_free(base, 1);
151 |           _gf_errno = GF_E_POLYSPC;
152 |           return 0;
153 |         }
154 |         starting++;
155 |       } else if (strcmp(argv[starting], "-d") == 0) {
156 |         starting++;
157 |         if (divide_type != GF_DIVIDE_DEFAULT) {
158 |           if (base != NULL) gf_free(base, 1);
159 |           _gf_errno = GF_E_TWO_DIV;
160 |           return 0;
161 |         } else if (strcmp(argv[starting], "EUCLID") == 0) {
162 |           divide_type = GF_DIVIDE_EUCLID;
163 |           starting++;
164 |         } else if (strcmp(argv[starting], "MATRIX") == 0) {
165 |           divide_type = GF_DIVIDE_MATRIX;
166 |           starting++;
167 |         } else {
168 |           _gf_errno = GF_E_UNK_DIV;
169 |           return 0;
170 |         }
171 |       } else if (strcmp(argv[starting], "-") == 0) {
172 |          /*
173 |          printf("Scratch size: %d\n", gf_scratch_size(w, 
174 |                                       mult_type, region_type, divide_type, arg1, arg2));
175 |          */
176 |         if (gf_init_hard(gf, w, mult_type, region_type, divide_type, 
177 |                          prim_poly, arg1, arg2, base, NULL) == 0) {
178 |           if (base != NULL) gf_free(base, 1);
179 |           return 0;
180 |         } else
181 |           return starting + 1;
182 |       } else {
183 |         if (base != NULL) gf_free(base, 1);
184 |         _gf_errno = GF_E_UNKFLAG;
185 |         return 0;
186 |       }
187 |     } else {
188 |       if (base != NULL) gf_free(base, 1);
189 |       _gf_errno = GF_E_FEWARGS;
190 |       return 0;
191 |     }
192 |   }
193 | }
194 | 


--------------------------------------------------------------------------------
/include/gf_complete.h:
--------------------------------------------------------------------------------
  1 | /* 
  2 |  * GF-Complete: A Comprehensive Open Source Library for Galois Field Arithmetic
  3 |  * James S. Plank, Ethan L. Miller, Kevin M. Greenan,
  4 |  * Benjamin A. Arnold, John A. Burnum, Adam W. Disney, Allen C. McBride.
  5 |  *
  6 |  * gf_complete.h
  7 |  *
  8 |  * The main include file for gf_complete. 
  9 |  */
 10 | 
 11 | #ifndef _GF_COMPLETE_H_
 12 | #define _GF_COMPLETE_H_
 13 | #include <stdint.h>
 14 | 
 15 | #ifdef INTEL_SSE4
 16 |   #ifdef __SSE4_2__
 17 |     #include <nmmintrin.h>
 18 |   #endif
 19 |   #ifdef __SSE4_1__
 20 |     #include <smmintrin.h>
 21 |   #endif
 22 | #endif
 23 | 
 24 | #ifdef INTEL_SSSE3
 25 |   #include <tmmintrin.h>
 26 | #endif
 27 | 
 28 | #ifdef INTEL_SSE2
 29 |   #include <emmintrin.h>
 30 | #endif
 31 | 
 32 | #ifdef INTEL_SSE4_PCLMUL
 33 |   #include <wmmintrin.h>
 34 | #endif
 35 | 
 36 | #if defined(ARM_NEON)
 37 |   #include <arm_neon.h>
 38 | #endif
 39 | 
 40 | 
 41 | /* These are the different ways to perform multiplication.
 42 |    Not all are implemented for all values of w.
 43 |    See the paper for an explanation of how they work. */
 44 | 
 45 | typedef enum {GF_MULT_DEFAULT,
 46 |               GF_MULT_SHIFT,
 47 |               GF_MULT_CARRY_FREE,
 48 |               GF_MULT_CARRY_FREE_GK,
 49 |               GF_MULT_GROUP,
 50 |               GF_MULT_BYTWO_p,
 51 |               GF_MULT_BYTWO_b,
 52 |               GF_MULT_TABLE,
 53 |               GF_MULT_LOG_TABLE,
 54 |               GF_MULT_LOG_ZERO,
 55 |               GF_MULT_LOG_ZERO_EXT,
 56 |               GF_MULT_SPLIT_TABLE,
 57 |               GF_MULT_COMPOSITE } gf_mult_type_t;
 58 | 
 59 | /* These are the different ways to optimize region 
 60 |    operations.  They are bits because you can compose them.
 61 |    Certain optimizations only apply to certain gf_mult_type_t's.  
 62 |    Again, please see documentation for how to use these */
 63 |    
 64 | #define GF_REGION_DEFAULT      (0x0)
 65 | #define GF_REGION_DOUBLE_TABLE (0x1)
 66 | #define GF_REGION_QUAD_TABLE   (0x2)
 67 | #define GF_REGION_LAZY         (0x4)
 68 | #define GF_REGION_SIMD         (0x8)
 69 | #define GF_REGION_SSE          (0x8)
 70 | #define GF_REGION_NOSIMD       (0x10)
 71 | #define GF_REGION_NOSSE        (0x10)
 72 | #define GF_REGION_ALTMAP       (0x20)
 73 | #define GF_REGION_CAUCHY       (0x40)
 74 | 
 75 | typedef uint32_t gf_region_type_t;
 76 | 
 77 | /* These are different ways to implement division.
 78 |    Once again, it's best to use "DEFAULT".  However,
 79 |    there are times when you may want to experiment
 80 |    with the others. */
 81 | 
 82 | typedef enum { GF_DIVIDE_DEFAULT,
 83 |                GF_DIVIDE_MATRIX,
 84 |                GF_DIVIDE_EUCLID } gf_division_type_t;
 85 | 
 86 | /* We support w=4,8,16,32,64 and 128 with their own data types and
 87 |    operations for multiplication, division, etc.  We also support
 88 |    a "gen" type so that you can do general gf arithmetic for any 
 89 |    value of w from 1 to 32.  You can perform a "region" operation
 90 |    on these if you use "CAUCHY" as the mapping. 
 91 |  */
 92 | 
 93 | typedef uint32_t    gf_val_32_t;
 94 | typedef uint64_t    gf_val_64_t;
 95 | typedef uint64_t   *gf_val_128_t;
 96 | 
 97 | extern int _gf_errno;
 98 | extern void gf_error();
 99 | 
100 | typedef struct gf *GFP;
101 | 
102 | typedef union gf_func_a_b {
103 |     gf_val_32_t  (*w32) (GFP gf, gf_val_32_t a,  gf_val_32_t b);
104 |     gf_val_64_t  (*w64) (GFP gf, gf_val_64_t a,  gf_val_64_t b);
105 |     void         (*w128)(GFP gf, gf_val_128_t a, gf_val_128_t b, gf_val_128_t c);
106 | } gf_func_a_b;
107 |   
108 | typedef union {
109 |   gf_val_32_t  (*w32) (GFP gf, gf_val_32_t a);
110 |   gf_val_64_t  (*w64) (GFP gf, gf_val_64_t a);
111 |   void         (*w128)(GFP gf, gf_val_128_t a, gf_val_128_t b);
112 | } gf_func_a;
113 |   
114 | typedef union {
115 |   void  (*w32) (GFP gf, void *src, void *dest, gf_val_32_t val,  int bytes, int add);
116 |   void  (*w64) (GFP gf, void *src, void *dest, gf_val_64_t val,  int bytes, int add);
117 |   void  (*w128)(GFP gf, void *src, void *dest, gf_val_128_t val, int bytes, int add);
118 | } gf_region;
119 | 
120 | typedef union {
121 |   gf_val_32_t  (*w32) (GFP gf, void *start, int bytes, int index);
122 |   gf_val_64_t  (*w64) (GFP gf, void *start, int bytes, int index);
123 |   void         (*w128)(GFP gf, void *start, int bytes, int index, gf_val_128_t rv);
124 | } gf_extract;
125 | 
126 | typedef struct gf {
127 |   gf_func_a_b    multiply;
128 |   gf_func_a_b    divide;
129 |   gf_func_a      inverse;
130 |   gf_region      multiply_region;
131 |   gf_extract     extract_word;
132 |   void           *scratch;
133 | } gf_t;
134 |     
135 | /* Initializes the GF to defaults.  Pass it a pointer to a gf_t.
136 |    Returns 0 on failure, 1 on success. */
137 | 
138 | extern int gf_init_easy(GFP gf, int w);
139 | 
140 | /* Initializes the GF changing the defaults.
141 |    Returns 0 on failure, 1 on success.
142 |    Pass it a pointer to a gf_t.
143 |    For mult_type and divide_type, use one of gf_mult_type_t gf_divide_type_t .  
144 |    For region_type, OR together the GF_REGION_xxx's defined above.  
145 |    Use 0 as prim_poly for defaults.  Otherwise, the leading 1 is optional.
146 |    Use NULL for scratch_memory to have init_hard allocate memory.  Otherwise,
147 |    use gf_scratch_size() to determine how big scratch_memory has to be.
148 |  */
149 | 
150 | extern int gf_init_hard(GFP gf, 
151 |                         int w, 
152 |                         int mult_type, 
153 |                         int region_type, 
154 |                         int divide_type, 
155 |                         uint64_t prim_poly,
156 |                         int arg1, 
157 |                         int arg2,
158 |                         GFP base_gf,
159 |                         void *scratch_memory);
160 | 
161 | /* Determines the size for scratch_memory.  
162 |    Returns 0 on failure and non-zero on success. */
163 | 
164 | extern int gf_scratch_size(int w, 
165 |                            int mult_type, 
166 |                            int region_type, 
167 |                            int divide_type, 
168 |                            int arg1, 
169 |                            int arg2);
170 | 
171 | /* This reports the gf_scratch_size of a gf_t that has already been created */
172 | 
173 | extern int gf_size(GFP gf);
174 | 
175 | /* Frees scratch memory if gf_init_easy/gf_init_hard called malloc.
176 |    If recursive = 1, then it calls itself recursively on base_gf. */
177 | 
178 | extern int gf_free(GFP gf, int recursive);
179 | 
180 | /* This is support for inline single multiplications and divisions.
181 |    I know it's yucky, but if you've got to be fast, you've got to be fast.
182 |    We support inlining for w=4, w=8 and w=16.  
183 | 
184 |    To use inline multiplication and division with w=4 or 8, you should use the 
185 |    default gf_t, or one with a single table.  Otherwise, gf_w4/8_get_mult_table()
186 |    will return NULL. Similarly, with w=16, the gf_t must be LOG */
187 | 
188 | uint8_t *gf_w4_get_mult_table(GFP gf);
189 | uint8_t *gf_w4_get_div_table(GFP gf);
190 | 
191 | #define GF_W4_INLINE_MULTDIV(table, a, b) (table[((a)<<4)|(b)])
192 | 
193 | uint8_t *gf_w8_get_mult_table(GFP gf);
194 | uint8_t *gf_w8_get_div_table(GFP gf);
195 | 
196 | #define GF_W8_INLINE_MULTDIV(table, a, b) (table[(((uint32_t) (a))<<8)|(b)])
197 | 
198 | uint16_t *gf_w16_get_log_table(GFP gf);
199 | uint16_t *gf_w16_get_mult_alog_table(GFP gf);
200 | uint16_t *gf_w16_get_div_alog_table(GFP gf);
201 | 
202 | #define GF_W16_INLINE_MULT(log, alog, a, b) ((a) == 0 || (b) == 0) ? 0 : (alog[(uint32_t)log[a]+(uint32_t)log[b]])
203 | #define GF_W16_INLINE_DIV(log, alog, a, b) ((a) == 0 || (b) == 0) ? 0 : (alog[(int)log[a]-(int)log[b]])
204 | #endif
205 | 


--------------------------------------------------------------------------------
/missing:
--------------------------------------------------------------------------------
  1 | #! /bin/sh
  2 | # Common wrapper for a few potentially missing GNU programs.
  3 | 
  4 | scriptversion=2013-10-28.13; # UTC
  5 | 
  6 | # Copyright (C) 1996-2014 Free Software Foundation, Inc.
  7 | # Originally written by Fran,cois Pinard <pinard@iro.umontreal.ca>, 1996.
  8 | 
  9 | # This program is free software; you can redistribute it and/or modify
 10 | # it under the terms of the GNU General Public License as published by
 11 | # the Free Software Foundation; either version 2, or (at your option)
 12 | # any later version.
 13 | 
 14 | # This program is distributed in the hope that it will be useful,
 15 | # but WITHOUT ANY WARRANTY; without even the implied warranty of
 16 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 17 | # GNU General Public License for more details.
 18 | 
 19 | # You should have received a copy of the GNU General Public License
 20 | # along with this program.  If not, see <http://www.gnu.org/licenses/>.
 21 | 
 22 | # As a special exception to the GNU General Public License, if you
 23 | # distribute this file as part of a program that contains a
 24 | # configuration script generated by Autoconf, you may include it under
 25 | # the same distribution terms that you use for the rest of that program.
 26 | 
 27 | if test $# -eq 0; then
 28 |   echo 1>&2 "Try '$0 --help' for more information"
 29 |   exit 1
 30 | fi
 31 | 
 32 | case $1 in
 33 | 
 34 |   --is-lightweight)
 35 |     # Used by our autoconf macros to check whether the available missing
 36 |     # script is modern enough.
 37 |     exit 0
 38 |     ;;
 39 | 
 40 |   --run)
 41 |     # Back-compat with the calling convention used by older automake.
 42 |     shift
 43 |     ;;
 44 | 
 45 |   -h|--h|--he|--hel|--help)
 46 |     echo "\
 47 | $0 [OPTION]... PROGRAM [ARGUMENT]...
 48 | 
 49 | Run 'PROGRAM [ARGUMENT]...', returning a proper advice when this fails due
 50 | to PROGRAM being missing or too old.
 51 | 
 52 | Options:
 53 |   -h, --help      display this help and exit
 54 |   -v, --version   output version information and exit
 55 | 
 56 | Supported PROGRAM values:
 57 |   aclocal   autoconf  autoheader   autom4te  automake  makeinfo
 58 |   bison     yacc      flex         lex       help2man
 59 | 
 60 | Version suffixes to PROGRAM as well as the prefixes 'gnu-', 'gnu', and
 61 | 'g' are ignored when checking the name.
 62 | 
 63 | Send bug reports to <bug-automake@gnu.org>."
 64 |     exit $?
 65 |     ;;
 66 | 
 67 |   -v|--v|--ve|--ver|--vers|--versi|--versio|--version)
 68 |     echo "missing $scriptversion (GNU Automake)"
 69 |     exit $?
 70 |     ;;
 71 | 
 72 |   -*)
 73 |     echo 1>&2 "$0: unknown '$1' option"
 74 |     echo 1>&2 "Try '$0 --help' for more information"
 75 |     exit 1
 76 |     ;;
 77 | 
 78 | esac
 79 | 
 80 | # Run the given program, remember its exit status.
 81 | "$@"; st=$?
 82 | 
 83 | # If it succeeded, we are done.
 84 | test $st -eq 0 && exit 0
 85 | 
 86 | # Also exit now if we it failed (or wasn't found), and '--version' was
 87 | # passed; such an option is passed most likely to detect whether the
 88 | # program is present and works.
 89 | case $2 in --version|--help) exit $st;; esac
 90 | 
 91 | # Exit code 63 means version mismatch.  This often happens when the user
 92 | # tries to use an ancient version of a tool on a file that requires a
 93 | # minimum version.
 94 | if test $st -eq 63; then
 95 |   msg="probably too old"
 96 | elif test $st -eq 127; then
 97 |   # Program was missing.
 98 |   msg="missing on your system"
 99 | else
100 |   # Program was found and executed, but failed.  Give up.
101 |   exit $st
102 | fi
103 | 
104 | perl_URL=http://www.perl.org/
105 | flex_URL=http://flex.sourceforge.net/
106 | gnu_software_URL=http://www.gnu.org/software
107 | 
108 | program_details ()
109 | {
110 |   case $1 in
111 |     aclocal|automake)
112 |       echo "The '$1' program is part of the GNU Automake package:"
113 |       echo "<$gnu_software_URL/automake>"
114 |       echo "It also requires GNU Autoconf, GNU m4 and Perl in order to run:"
115 |       echo "<$gnu_software_URL/autoconf>"
116 |       echo "<$gnu_software_URL/m4/>"
117 |       echo "<$perl_URL>"
118 |       ;;
119 |     autoconf|autom4te|autoheader)
120 |       echo "The '$1' program is part of the GNU Autoconf package:"
121 |       echo "<$gnu_software_URL/autoconf/>"
122 |       echo "It also requires GNU m4 and Perl in order to run:"
123 |       echo "<$gnu_software_URL/m4/>"
124 |       echo "<$perl_URL>"
125 |       ;;
126 |   esac
127 | }
128 | 
129 | give_advice ()
130 | {
131 |   # Normalize program name to check for.
132 |   normalized_program=`echo "$1" | sed '
133 |     s/^gnu-//; t
134 |     s/^gnu//; t
135 |     s/^g//; t'`
136 | 
137 |   printf '%s\n' "'$1' is $msg."
138 | 
139 |   configure_deps="'configure.ac' or m4 files included by 'configure.ac'"
140 |   case $normalized_program in
141 |     autoconf*)
142 |       echo "You should only need it if you modified 'configure.ac',"
143 |       echo "or m4 files included by it."
144 |       program_details 'autoconf'
145 |       ;;
146 |     autoheader*)
147 |       echo "You should only need it if you modified 'acconfig.h' or"
148 |       echo "$configure_deps."
149 |       program_details 'autoheader'
150 |       ;;
151 |     automake*)
152 |       echo "You should only need it if you modified 'Makefile.am' or"
153 |       echo "$configure_deps."
154 |       program_details 'automake'
155 |       ;;
156 |     aclocal*)
157 |       echo "You should only need it if you modified 'acinclude.m4' or"
158 |       echo "$configure_deps."
159 |       program_details 'aclocal'
160 |       ;;
161 |    autom4te*)
162 |       echo "You might have modified some maintainer files that require"
163 |       echo "the 'autom4te' program to be rebuilt."
164 |       program_details 'autom4te'
165 |       ;;
166 |     bison*|yacc*)
167 |       echo "You should only need it if you modified a '.y' file."
168 |       echo "You may want to install the GNU Bison package:"
169 |       echo "<$gnu_software_URL/bison/>"
170 |       ;;
171 |     lex*|flex*)
172 |       echo "You should only need it if you modified a '.l' file."
173 |       echo "You may want to install the Fast Lexical Analyzer package:"
174 |       echo "<$flex_URL>"
175 |       ;;
176 |     help2man*)
177 |       echo "You should only need it if you modified a dependency" \
178 |            "of a man page."
179 |       echo "You may want to install the GNU Help2man package:"
180 |       echo "<$gnu_software_URL/help2man/>"
181 |     ;;
182 |     makeinfo*)
183 |       echo "You should only need it if you modified a '.texi' file, or"
184 |       echo "any other file indirectly affecting the aspect of the manual."
185 |       echo "You might want to install the Texinfo package:"
186 |       echo "<$gnu_software_URL/texinfo/>"
187 |       echo "The spurious makeinfo call might also be the consequence of"
188 |       echo "using a buggy 'make' (AIX, DU, IRIX), in which case you might"
189 |       echo "want to install GNU make:"
190 |       echo "<$gnu_software_URL/make/>"
191 |       ;;
192 |     *)
193 |       echo "You might have modified some files without having the proper"
194 |       echo "tools for further handling them.  Check the 'README' file, it"
195 |       echo "often tells you about the needed prerequisites for installing"
196 |       echo "this package.  You may also peek at any GNU archive site, in"
197 |       echo "case some other package contains this missing '$1' program."
198 |       ;;
199 |   esac
200 | }
201 | 
202 | give_advice "$1" | sed -e '1s/^/WARNING: /' \
203 |                        -e '2,$s/^/         /' >&2
204 | 
205 | # Propagate the correct exit status (expected to be 127 for a program
206 | # not found, 63 for a program that failed due to version mismatch).
207 | exit $st
208 | 
209 | # Local variables:
210 | # eval: (add-hook 'write-file-hooks 'time-stamp)
211 | # time-stamp-start: "scriptversion="
212 | # time-stamp-format: "%:y-%02m-%02d.%02H"
213 | # time-stamp-time-zone: "UTC"
214 | # time-stamp-end: "; # UTC"
215 | # End:
216 | 


--------------------------------------------------------------------------------
/src/neon/gf_w4_neon.c:
--------------------------------------------------------------------------------
  1 | /*
  2 |  * GF-Complete: A Comprehensive Open Source Library for Galois Field Arithmetic
  3 |  * James S. Plank, Ethan L. Miller, Kevin M. Greenan,
  4 |  * Benjamin A. Arnold, John A. Burnum, Adam W. Disney, Allen C. McBride.
  5 |  *
  6 |  * Copyright (c) 2014: Janne Grunau <j@jannau.net>
  7 |  *
  8 |  * Redistribution and use in source and binary forms, with or without
  9 |  * modification, are permitted provided that the following conditions
 10 |  * are met:
 11 |  *
 12 |  *  - Redistributions of source code must retain the above copyright
 13 |  *     notice, this list of conditions and the following disclaimer.
 14 |  *
 15 |  *  - Redistributions in binary form must reproduce the above copyright
 16 |  *    notice, this list of conditions and the following disclaimer in
 17 |  *    the documentation and/or other materials provided with the
 18 |  *    distribution.
 19 |  *
 20 |  *  - Neither the name of the University of Tennessee nor the names of its
 21 |  *    contributors may be used to endorse or promote products derived
 22 |  *    from this software without specific prior written permission.
 23 |  *
 24 |  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
 25 |  * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
 26 |  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
 27 |  * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
 28 |  * HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
 29 |  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
 30 |  * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS
 31 |  * OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
 32 |  * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
 33 |  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY
 34 |  * WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
 35 |  * POSSIBILITY OF SUCH DAMAGE.
 36 |  *
 37 |  * gf_w4_neon.c
 38 |  *
 39 |  * Neon routines for 4-bit Galois fields
 40 |  *
 41 |  */
 42 | 
 43 | #include "gf_int.h"
 44 | #include <stdio.h>
 45 | #include <stdlib.h>
 46 | #include "gf_w4.h"
 47 | 
 48 | static
 49 | gf_val_32_t
 50 | gf_w4_neon_clm_multiply (gf_t *gf, gf_val_32_t a4, gf_val_32_t b4)
 51 | {
 52 |   gf_val_32_t rv = 0;
 53 |   poly8x8_t       result, prim_poly;
 54 |   poly8x8_t       a, b, w;
 55 |   uint8x8_t       v;
 56 |   gf_internal_t * h = gf->scratch;
 57 | 
 58 |   a =  vdup_n_p8 (a4);
 59 |   b =  vdup_n_p8 (b4);
 60 | 
 61 |   prim_poly = vdup_n_p8 ((uint32_t)(h->prim_poly & 0x1fULL));
 62 | 
 63 |   /* Do the initial multiply */
 64 |   result = vmul_p8 (a, b);
 65 |   v = vshr_n_u8 (vreinterpret_u8_p8(result), 4);
 66 |   w = vmul_p8 (prim_poly, vreinterpret_p8_u8(v));
 67 |   result = vreinterpret_p8_u8 (veor_u8 (vreinterpret_u8_p8(result), vreinterpret_u8_p8(w)));
 68 | 
 69 |   /* Extracts 32 bit value from result. */
 70 |   rv = (gf_val_32_t)vget_lane_u8 (vreinterpret_u8_p8 (result), 0);
 71 | 
 72 |   return rv;
 73 | }
 74 | 
 75 | static inline void
 76 | neon_clm_multiply_region_from_single (gf_t *gf, uint8_t *s8, uint8_t *d8,
 77 |                                       gf_val_32_t val, uint8_t *d_end, int xor)
 78 | {
 79 |   gf_internal_t * h = gf->scratch;
 80 |   poly8x8_t       prim_poly;
 81 |   poly8x8_t       a, w, even, odd;
 82 |   uint8x8_t       b, c, v, mask;
 83 | 
 84 |   a         = vdup_n_p8 (val);
 85 |   mask      = vdup_n_u8 (0xf);
 86 |   prim_poly = vdup_n_p8 ((uint8_t)(h->prim_poly & 0x1fULL));
 87 | 
 88 |   while (d8 < d_end) {
 89 |     b = vld1_u8 (s8);
 90 | 
 91 |     even = vreinterpret_p8_u8 (vand_u8 (b, mask));
 92 |     odd  = vreinterpret_p8_u8 (vshr_n_u8 (b, 4));
 93 | 
 94 |     if (xor)
 95 |         c = vld1_u8 (d8);
 96 | 
 97 |     even = vmul_p8 (a, even);
 98 |     odd  = vmul_p8 (a, odd);
 99 | 
100 |     v = vshr_n_u8 (vreinterpret_u8_p8(even), 4);
101 |     w = vmul_p8 (prim_poly, vreinterpret_p8_u8(v));
102 |     even = vreinterpret_p8_u8 (veor_u8 (vreinterpret_u8_p8(even), vreinterpret_u8_p8(w)));
103 | 
104 |     v = vshr_n_u8 (vreinterpret_u8_p8(odd), 4);
105 |     w = vmul_p8 (prim_poly, vreinterpret_p8_u8(v));
106 |     odd = vreinterpret_p8_u8 (veor_u8 (vreinterpret_u8_p8(odd), vreinterpret_u8_p8(w)));
107 | 
108 |     v = veor_u8 (vreinterpret_u8_p8 (even), vshl_n_u8 (vreinterpret_u8_p8 (odd), 4));
109 | 
110 |     if (xor)
111 |       v = veor_u8 (c, v);
112 | 
113 |     vst1_u8 (d8, v);
114 | 
115 |     d8 += 8;
116 |     s8 += 8;
117 |   }
118 | }
119 | 
120 | 
121 | static void
122 | gf_w4_neon_clm_multiply_region_from_single (gf_t *gf, void *src, void *dest,
123 |                                             gf_val_32_t val, int bytes, int xor)
124 | {
125 |   gf_region_data rd;
126 |   uint8_t *s8;
127 |   uint8_t *d8;
128 | 
129 |   if (val == 0) { gf_multby_zero(dest, bytes, xor); return; }
130 |   if (val == 1) { gf_multby_one(src, dest, bytes, xor); return; }
131 | 
132 |   gf_set_region_data(&rd, gf, src, dest, bytes, val, xor, 16);
133 |   gf_do_initial_region_alignment(&rd);
134 | 
135 |   s8 = (uint8_t *) rd.s_start;
136 |   d8 = (uint8_t *) rd.d_start;
137 | 
138 |   if (xor)
139 |     neon_clm_multiply_region_from_single (gf, s8, d8, val, rd.d_top, 1);
140 |   else
141 |     neon_clm_multiply_region_from_single (gf, s8, d8, val, rd.d_top, 0);
142 | 
143 |   gf_do_final_region_alignment(&rd);
144 | }
145 | 
146 | #ifndef ARCH_AARCH64
147 | #define vqtbl1q_u8(tbl, v) vcombine_u8(vtbl2_u8(tbl, vget_low_u8(v)),   \
148 |                                        vtbl2_u8(tbl, vget_high_u8(v)))
149 | #endif
150 | 
151 | static
152 | inline
153 | void
154 | w4_single_table_multiply_region_neon(gf_t *gf, uint8_t *src, uint8_t *dst,
155 |                                      uint8_t * d_end, gf_val_32_t val, int xor)
156 | {
157 |   struct gf_single_table_data *std;
158 |   uint8_t *base;
159 |   uint8x16_t r, va, vh, vl, loset;
160 | 
161 | #ifdef ARCH_AARCH64
162 |   uint8x16_t th, tl;
163 | #else
164 |   uint8x8x2_t th, tl;
165 | #endif
166 | 
167 |   std = (struct gf_single_table_data *) ((gf_internal_t *) (gf->scratch))->private;
168 |   base = (uint8_t *) std->mult;
169 |   base += (val << GF_FIELD_WIDTH);
170 | 
171 | #ifdef ARCH_AARCH64
172 |   tl = vld1q_u8 (base);
173 |   th = vshlq_n_u8 (tl, 4);
174 | #else
175 |   tl.val[0] = vld1_u8 (base);
176 |   tl.val[1] = vld1_u8 (base + 8);
177 |   th.val[0] =  vshl_n_u8 (tl.val[0], 4);
178 |   th.val[1] =  vshl_n_u8 (tl.val[1], 4);
179 | #endif
180 | 
181 |   loset = vdupq_n_u8(0xf);
182 | 
183 |   while (dst < d_end) {
184 |       va = vld1q_u8 (src);
185 | 
186 |       vh = vshrq_n_u8 (va, 4);
187 |       vl = vandq_u8 (va, loset);
188 | 
189 |       if (xor)
190 |         va = vld1q_u8 (dst);
191 | 
192 |       vh = vqtbl1q_u8 (th, vh);
193 |       vl = vqtbl1q_u8 (tl, vl);
194 | 
195 |       r = veorq_u8 (vh, vl);
196 | 
197 |       if (xor)
198 |         r = veorq_u8 (va, r);
199 | 
200 |       vst1q_u8 (dst, r);
201 | 
202 |     dst += 16;
203 |     src += 16;
204 |   }
205 | }
206 | 
207 | static
208 | void
209 | gf_w4_single_table_multiply_region_neon(gf_t *gf, void *src, void *dest,
210 |                                         gf_val_32_t val, int bytes, int xor)
211 | {
212 |   gf_region_data rd;
213 |   uint8_t *sptr, *dptr, *top;
214 | 
215 |   if (val == 0) { gf_multby_zero(dest, bytes, xor); return; }
216 |   if (val == 1) { gf_multby_one(src, dest, bytes, xor); return; }
217 | 
218 |   gf_set_region_data(&rd, gf, src, dest, bytes, val, xor, 16);
219 |   gf_do_initial_region_alignment(&rd);
220 | 
221 |   sptr = rd.s_start;
222 |   dptr = rd.d_start;
223 |   top  = rd.d_top;
224 | 
225 |   if (xor)
226 |       w4_single_table_multiply_region_neon(gf, sptr, dptr, top, val, 1);
227 |   else
228 |       w4_single_table_multiply_region_neon(gf, sptr, dptr, top, val, 0);
229 | 
230 |   gf_do_final_region_alignment(&rd);
231 | 
232 | }
233 | 
234 | 
235 | int gf_w4_neon_cfm_init(gf_t *gf)
236 | {
237 |   // single clm multiplication probably pointless
238 |   gf->multiply.w32 = gf_w4_neon_clm_multiply;
239 |   gf->multiply_region.w32 = gf_w4_neon_clm_multiply_region_from_single;
240 | 
241 |   return 1;
242 | }
243 | 
244 | void gf_w4_neon_single_table_init(gf_t *gf)
245 | {
246 |   gf->multiply_region.w32 = gf_w4_single_table_multiply_region_neon;
247 | }
248 | 


--------------------------------------------------------------------------------
/src/liberation.c:
--------------------------------------------------------------------------------
  1 | /* *
  2 |  * Copyright (c) 2014, James S. Plank and Kevin Greenan
  3 |  * All rights reserved.
  4 |  *
  5 |  * Jerasure - A C/C++ Library for a Variety of Reed-Solomon and RAID-6 Erasure
  6 |  * Coding Techniques
  7 |  *
  8 |  * Revision 2.0: Galois Field backend now links to GF-Complete
  9 |  *
 10 |  * Redistribution and use in source and binary forms, with or without
 11 |  * modification, are permitted provided that the following conditions
 12 |  * are met:
 13 |  *
 14 |  *  - Redistributions of source code must retain the above copyright
 15 |  *    notice, this list of conditions and the following disclaimer.
 16 |  *
 17 |  *  - Redistributions in binary form must reproduce the above copyright
 18 |  *    notice, this list of conditions and the following disclaimer in
 19 |  *    the documentation and/or other materials provided with the
 20 |  *    distribution.
 21 |  *
 22 |  *  - Neither the name of the University of Tennessee nor the names of its
 23 |  *    contributors may be used to endorse or promote products derived
 24 |  *    from this software without specific prior written permission.
 25 |  *
 26 |  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
 27 |  * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
 28 |  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
 29 |  * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
 30 |  * HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
 31 |  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
 32 |  * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS
 33 |  * OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
 34 |  * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
 35 |  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY
 36 |  * WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
 37 |  * POSSIBILITY OF SUCH DAMAGE.
 38 |  */
 39 | 
 40 | /* Jerasure's authors:
 41 | 
 42 |    Revision 2.x - 2014: James S. Plank and Kevin M. Greenan
 43 |    Revision 1.2 - 2008: James S. Plank, Scott Simmerman and Catherine D. Schuman.
 44 |    Revision 1.0 - 2007: James S. Plank
 45 |  */
 46 | 
 47 | #include <stdio.h>
 48 | #include <stdlib.h>
 49 | #include <string.h>
 50 | 
 51 | #include "galois.h"
 52 | #include "jerasure.h"
 53 | #include "liberation.h"
 54 | 
 55 | #define talloc(type, num) (type *) malloc(sizeof(type)*(num))
 56 | 
 57 | int *liberation_coding_bitmatrix(int k, int w)
 58 | {
 59 |   int *matrix, i, j, index;
 60 | 
 61 |   if (k > w) return NULL;
 62 |   matrix = talloc(int, 2*k*w*w);
 63 |   if (matrix == NULL) return NULL;
 64 |   bzero(matrix, sizeof(int)*2*k*w*w);
 65 |   
 66 |   /* Set up identity matrices */
 67 | 
 68 |   for(i = 0; i < w; i++) {
 69 |     index = i*k*w+i;
 70 |     for (j = 0; j < k; j++) {
 71 |       matrix[index] = 1;
 72 |       index += w;
 73 |     }
 74 |   }
 75 | 
 76 |   /* Set up liberation matrices */
 77 | 
 78 |   for (j = 0; j < k; j++) {
 79 |     index = k*w*w+j*w;
 80 |     for (i = 0; i < w; i++) {
 81 |       matrix[index+(j+i)%w] = 1;
 82 |       index += (k*w);
 83 |     }
 84 |     if (j > 0) {
 85 |       i = (j*((w-1)/2))%w;
 86 |       matrix[k*w*w+j*w+i*k*w+(i+j-1)%w] = 1;
 87 |     }
 88 |   }
 89 |   return matrix;
 90 | }
 91 |   
 92 | 
 93 | int *liber8tion_coding_bitmatrix(int k)
 94 | {
 95 |   int *matrix, i, j, index;
 96 |   int w;
 97 | 
 98 |   w = 8;
 99 |   if (k > w) return NULL;
100 |   matrix = talloc(int, 2*k*w*w);
101 |   if (matrix == NULL) return NULL;
102 |   bzero(matrix, sizeof(int)*2*k*w*w);
103 |   
104 |   /* Set up identity matrices */
105 | 
106 |   for(i = 0; i < w; i++) {
107 |     index = i*k*w+i;
108 |     for (j = 0; j < k; j++) {
109 |       matrix[index] = 1;
110 |       index += w;
111 |     }
112 |   }
113 | 
114 |   /* Set up liber8tion matrices */
115 | 
116 |   index = k*w*w;
117 | 
118 |   if (k == 0) return matrix;
119 |   matrix[index+0*k*w+0*w+0] = 1;
120 |   matrix[index+1*k*w+0*w+1] = 1;
121 |   matrix[index+2*k*w+0*w+2] = 1;
122 |   matrix[index+3*k*w+0*w+3] = 1;
123 |   matrix[index+4*k*w+0*w+4] = 1;
124 |   matrix[index+5*k*w+0*w+5] = 1;
125 |   matrix[index+6*k*w+0*w+6] = 1;
126 |   matrix[index+7*k*w+0*w+7] = 1;
127 | 
128 |   if (k == 1) return matrix;
129 |   matrix[index+0*k*w+1*w+7] = 1;
130 |   matrix[index+1*k*w+1*w+3] = 1;
131 |   matrix[index+2*k*w+1*w+0] = 1;
132 |   matrix[index+3*k*w+1*w+2] = 1;
133 |   matrix[index+4*k*w+1*w+6] = 1;
134 |   matrix[index+5*k*w+1*w+1] = 1;
135 |   matrix[index+6*k*w+1*w+5] = 1;
136 |   matrix[index+7*k*w+1*w+4] = 1;
137 |   matrix[index+4*k*w+1*w+7] = 1;
138 | 
139 |   if (k == 2) return matrix;
140 |   matrix[index+0*k*w+2*w+6] = 1;
141 |   matrix[index+1*k*w+2*w+2] = 1;
142 |   matrix[index+2*k*w+2*w+4] = 1;
143 |   matrix[index+3*k*w+2*w+0] = 1;
144 |   matrix[index+4*k*w+2*w+7] = 1;
145 |   matrix[index+5*k*w+2*w+3] = 1;
146 |   matrix[index+6*k*w+2*w+1] = 1;
147 |   matrix[index+7*k*w+2*w+5] = 1;
148 |   matrix[index+1*k*w+2*w+3] = 1;
149 | 
150 |   if (k == 3) return matrix;
151 |   matrix[index+0*k*w+3*w+2] = 1;
152 |   matrix[index+1*k*w+3*w+5] = 1;
153 |   matrix[index+2*k*w+3*w+7] = 1;
154 |   matrix[index+3*k*w+3*w+6] = 1;
155 |   matrix[index+4*k*w+3*w+0] = 1;
156 |   matrix[index+5*k*w+3*w+3] = 1;
157 |   matrix[index+6*k*w+3*w+4] = 1;
158 |   matrix[index+7*k*w+3*w+1] = 1;
159 |   matrix[index+5*k*w+3*w+4] = 1;
160 | 
161 |   if (k == 4) return matrix;
162 |   matrix[index+0*k*w+4*w+5] = 1;
163 |   matrix[index+1*k*w+4*w+6] = 1;
164 |   matrix[index+2*k*w+4*w+1] = 1;
165 |   matrix[index+3*k*w+4*w+7] = 1;
166 |   matrix[index+4*k*w+4*w+2] = 1;
167 |   matrix[index+5*k*w+4*w+4] = 1;
168 |   matrix[index+6*k*w+4*w+3] = 1;
169 |   matrix[index+7*k*w+4*w+0] = 1;
170 |   matrix[index+2*k*w+4*w+0] = 1;
171 | 
172 |   if (k == 5) return matrix;
173 |   matrix[index+0*k*w+5*w+1] = 1;
174 |   matrix[index+1*k*w+5*w+2] = 1;
175 |   matrix[index+2*k*w+5*w+3] = 1;
176 |   matrix[index+3*k*w+5*w+4] = 1;
177 |   matrix[index+4*k*w+5*w+5] = 1;
178 |   matrix[index+5*k*w+5*w+6] = 1;
179 |   matrix[index+6*k*w+5*w+7] = 1;
180 |   matrix[index+7*k*w+5*w+0] = 1;
181 |   matrix[index+7*k*w+5*w+2] = 1;
182 | 
183 |   if (k == 6) return matrix;
184 |   matrix[index+0*k*w+6*w+3] = 1;
185 |   matrix[index+1*k*w+6*w+0] = 1;
186 |   matrix[index+2*k*w+6*w+6] = 1;
187 |   matrix[index+3*k*w+6*w+5] = 1;
188 |   matrix[index+4*k*w+6*w+1] = 1;
189 |   matrix[index+5*k*w+6*w+7] = 1;
190 |   matrix[index+6*k*w+6*w+4] = 1;
191 |   matrix[index+7*k*w+6*w+2] = 1;
192 |   matrix[index+6*k*w+6*w+5] = 1;
193 | 
194 |   if (k == 7) return matrix;
195 |   matrix[index+0*k*w+7*w+4] = 1;
196 |   matrix[index+1*k*w+7*w+7] = 1;
197 |   matrix[index+2*k*w+7*w+1] = 1;
198 |   matrix[index+3*k*w+7*w+5] = 1;
199 |   matrix[index+4*k*w+7*w+3] = 1;
200 |   matrix[index+5*k*w+7*w+2] = 1;
201 |   matrix[index+6*k*w+7*w+0] = 1;
202 |   matrix[index+7*k*w+7*w+6] = 1;
203 |   matrix[index+3*k*w+7*w+1] = 1;
204 | 
205 |   return matrix;
206 | }
207 |   
208 | int *blaum_roth_coding_bitmatrix(int k, int w)
209 | {
210 |   int *matrix, i, j, index, l, m, p;
211 | 
212 |   if (k > w) return NULL ;
213 | 
214 |   matrix = talloc(int, 2*k*w*w);
215 |   if (matrix == NULL) return NULL;
216 |   bzero(matrix, sizeof(int)*2*k*w*w);
217 |   
218 |   /* Set up identity matrices */
219 | 
220 |   for(i = 0; i < w; i++) {
221 |     index = i*k*w+i;
222 |     for (j = 0; j < k; j++) {
223 |       matrix[index] = 1;
224 |       index += w;
225 |     }
226 |   }
227 | 
228 |   /* Set up blaum_roth matrices -- Ignore identity */
229 | 
230 |   p = w+1;
231 |   for (j = 0; j < k; j++) {
232 |     index = k*w*w+j*w;
233 |     if (j == 0) {
234 |       for (l = 0; l < w; l++) {
235 |         matrix[index+l] = 1;
236 |         index += k*w;
237 |       }
238 |     } else {
239 |       i = j;
240 |       for (l = 1; l <= w; l++) {
241 |         if (l != p-i) {
242 |           m = l+i;
243 |           if (m >= p) m -= p;
244 |           m--;
245 |           matrix[index+m] = 1;
246 |         } else {
247 |           matrix[index+i-1] = 1;
248 |           if (i%2 == 0) {
249 |             m = i/2;
250 |           } else {
251 |             m = (p/2) + 1 + (i/2);
252 |           }
253 |           m--;
254 |           matrix[index+m] = 1;
255 |         }
256 |         index += k*w;
257 |       }
258 |     }
259 |   }
260 | 
261 |   return matrix;
262 | }
263 | 


--------------------------------------------------------------------------------
/compile:
--------------------------------------------------------------------------------
  1 | #! /bin/sh
  2 | # Wrapper for compilers which do not understand '-c -o'.
  3 | 
  4 | scriptversion=2012-10-14.11; # UTC
  5 | 
  6 | # Copyright (C) 1999-2014 Free Software Foundation, Inc.
  7 | # Written by Tom Tromey <tromey@cygnus.com>.
  8 | #
  9 | # This program is free software; you can redistribute it and/or modify
 10 | # it under the terms of the GNU General Public License as published by
 11 | # the Free Software Foundation; either version 2, or (at your option)
 12 | # any later version.
 13 | #
 14 | # This program is distributed in the hope that it will be useful,
 15 | # but WITHOUT ANY WARRANTY; without even the implied warranty of
 16 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 17 | # GNU General Public License for more details.
 18 | #
 19 | # You should have received a copy of the GNU General Public License
 20 | # along with this program.  If not, see <http://www.gnu.org/licenses/>.
 21 | 
 22 | # As a special exception to the GNU General Public License, if you
 23 | # distribute this file as part of a program that contains a
 24 | # configuration script generated by Autoconf, you may include it under
 25 | # the same distribution terms that you use for the rest of that program.
 26 | 
 27 | # This file is maintained in Automake, please report
 28 | # bugs to <bug-automake@gnu.org> or send patches to
 29 | # <automake-patches@gnu.org>.
 30 | 
 31 | nl='
 32 | '
 33 | 
 34 | # We need space, tab and new line, in precisely that order.  Quoting is
 35 | # there to prevent tools from complaining about whitespace usage.
 36 | IFS=" ""	$nl"
 37 | 
 38 | file_conv=
 39 | 
 40 | # func_file_conv build_file lazy
 41 | # Convert a $build file to $host form and store it in $file
 42 | # Currently only supports Windows hosts. If the determined conversion
 43 | # type is listed in (the comma separated) LAZY, no conversion will
 44 | # take place.
 45 | func_file_conv ()
 46 | {
 47 |   file=$1
 48 |   case $file in
 49 |     / | /[!/]*) # absolute file, and not a UNC file
 50 |       if test -z "$file_conv"; then
 51 | 	# lazily determine how to convert abs files
 52 | 	case `uname -s` in
 53 | 	  MINGW*)
 54 | 	    file_conv=mingw
 55 | 	    ;;
 56 | 	  CYGWIN*)
 57 | 	    file_conv=cygwin
 58 | 	    ;;
 59 | 	  *)
 60 | 	    file_conv=wine
 61 | 	    ;;
 62 | 	esac
 63 |       fi
 64 |       case $file_conv/,$2, in
 65 | 	*,$file_conv,*)
 66 | 	  ;;
 67 | 	mingw/*)
 68 | 	  file=`cmd //C echo "$file " | sed -e 's/"\(.*\) " *$/\1/'`
 69 | 	  ;;
 70 | 	cygwin/*)
 71 | 	  file=`cygpath -m "$file" || echo "$file"`
 72 | 	  ;;
 73 | 	wine/*)
 74 | 	  file=`winepath -w "$file" || echo "$file"`
 75 | 	  ;;
 76 |       esac
 77 |       ;;
 78 |   esac
 79 | }
 80 | 
 81 | # func_cl_dashL linkdir
 82 | # Make cl look for libraries in LINKDIR
 83 | func_cl_dashL ()
 84 | {
 85 |   func_file_conv "$1"
 86 |   if test -z "$lib_path"; then
 87 |     lib_path=$file
 88 |   else
 89 |     lib_path="$lib_path;$file"
 90 |   fi
 91 |   linker_opts="$linker_opts -LIBPATH:$file"
 92 | }
 93 | 
 94 | # func_cl_dashl library
 95 | # Do a library search-path lookup for cl
 96 | func_cl_dashl ()
 97 | {
 98 |   lib=$1
 99 |   found=no
100 |   save_IFS=$IFS
101 |   IFS=';'
102 |   for dir in $lib_path $LIB
103 |   do
104 |     IFS=$save_IFS
105 |     if $shared && test -f "$dir/$lib.dll.lib"; then
106 |       found=yes
107 |       lib=$dir/$lib.dll.lib
108 |       break
109 |     fi
110 |     if test -f "$dir/$lib.lib"; then
111 |       found=yes
112 |       lib=$dir/$lib.lib
113 |       break
114 |     fi
115 |     if test -f "$dir/lib$lib.a"; then
116 |       found=yes
117 |       lib=$dir/lib$lib.a
118 |       break
119 |     fi
120 |   done
121 |   IFS=$save_IFS
122 | 
123 |   if test "$found" != yes; then
124 |     lib=$lib.lib
125 |   fi
126 | }
127 | 
128 | # func_cl_wrapper cl arg...
129 | # Adjust compile command to suit cl
130 | func_cl_wrapper ()
131 | {
132 |   # Assume a capable shell
133 |   lib_path=
134 |   shared=:
135 |   linker_opts=
136 |   for arg
137 |   do
138 |     if test -n "$eat"; then
139 |       eat=
140 |     else
141 |       case $1 in
142 | 	-o)
143 | 	  # configure might choose to run compile as 'compile cc -o foo foo.c'.
144 | 	  eat=1
145 | 	  case $2 in
146 | 	    *.o | *.[oO][bB][jJ])
147 | 	      func_file_conv "$2"
148 | 	      set x "$@" -Fo"$file"
149 | 	      shift
150 | 	      ;;
151 | 	    *)
152 | 	      func_file_conv "$2"
153 | 	      set x "$@" -Fe"$file"
154 | 	      shift
155 | 	      ;;
156 | 	  esac
157 | 	  ;;
158 | 	-I)
159 | 	  eat=1
160 | 	  func_file_conv "$2" mingw
161 | 	  set x "$@" -I"$file"
162 | 	  shift
163 | 	  ;;
164 | 	-I*)
165 | 	  func_file_conv "${1#-I}" mingw
166 | 	  set x "$@" -I"$file"
167 | 	  shift
168 | 	  ;;
169 | 	-l)
170 | 	  eat=1
171 | 	  func_cl_dashl "$2"
172 | 	  set x "$@" "$lib"
173 | 	  shift
174 | 	  ;;
175 | 	-l*)
176 | 	  func_cl_dashl "${1#-l}"
177 | 	  set x "$@" "$lib"
178 | 	  shift
179 | 	  ;;
180 | 	-L)
181 | 	  eat=1
182 | 	  func_cl_dashL "$2"
183 | 	  ;;
184 | 	-L*)
185 | 	  func_cl_dashL "${1#-L}"
186 | 	  ;;
187 | 	-static)
188 | 	  shared=false
189 | 	  ;;
190 | 	-Wl,*)
191 | 	  arg=${1#-Wl,}
192 | 	  save_ifs="$IFS"; IFS=','
193 | 	  for flag in $arg; do
194 | 	    IFS="$save_ifs"
195 | 	    linker_opts="$linker_opts $flag"
196 | 	  done
197 | 	  IFS="$save_ifs"
198 | 	  ;;
199 | 	-Xlinker)
200 | 	  eat=1
201 | 	  linker_opts="$linker_opts $2"
202 | 	  ;;
203 | 	-*)
204 | 	  set x "$@" "$1"
205 | 	  shift
206 | 	  ;;
207 | 	*.cc | *.CC | *.cxx | *.CXX | *.[cC]++)
208 | 	  func_file_conv "$1"
209 | 	  set x "$@" -Tp"$file"
210 | 	  shift
211 | 	  ;;
212 | 	*.c | *.cpp | *.CPP | *.lib | *.LIB | *.Lib | *.OBJ | *.obj | *.[oO])
213 | 	  func_file_conv "$1" mingw
214 | 	  set x "$@" "$file"
215 | 	  shift
216 | 	  ;;
217 | 	*)
218 | 	  set x "$@" "$1"
219 | 	  shift
220 | 	  ;;
221 |       esac
222 |     fi
223 |     shift
224 |   done
225 |   if test -n "$linker_opts"; then
226 |     linker_opts="-link$linker_opts"
227 |   fi
228 |   exec "$@" $linker_opts
229 |   exit 1
230 | }
231 | 
232 | eat=
233 | 
234 | case $1 in
235 |   '')
236 |      echo "$0: No command.  Try '$0 --help' for more information." 1>&2
237 |      exit 1;
238 |      ;;
239 |   -h | --h*)
240 |     cat <<\EOF
241 | Usage: compile [--help] [--version] PROGRAM [ARGS]
242 | 
243 | Wrapper for compilers which do not understand '-c -o'.
244 | Remove '-o dest.o' from ARGS, run PROGRAM with the remaining
245 | arguments, and rename the output as expected.
246 | 
247 | If you are trying to build a whole package this is not the
248 | right script to run: please start by reading the file 'INSTALL'.
249 | 
250 | Report bugs to <bug-automake@gnu.org>.
251 | EOF
252 |     exit $?
253 |     ;;
254 |   -v | --v*)
255 |     echo "compile $scriptversion"
256 |     exit $?
257 |     ;;
258 |   cl | *[/\\]cl | cl.exe | *[/\\]cl.exe )
259 |     func_cl_wrapper "$@"      # Doesn't return...
260 |     ;;
261 | esac
262 | 
263 | ofile=
264 | cfile=
265 | 
266 | for arg
267 | do
268 |   if test -n "$eat"; then
269 |     eat=
270 |   else
271 |     case $1 in
272 |       -o)
273 | 	# configure might choose to run compile as 'compile cc -o foo foo.c'.
274 | 	# So we strip '-o arg' only if arg is an object.
275 | 	eat=1
276 | 	case $2 in
277 | 	  *.o | *.obj)
278 | 	    ofile=$2
279 | 	    ;;
280 | 	  *)
281 | 	    set x "$@" -o "$2"
282 | 	    shift
283 | 	    ;;
284 | 	esac
285 | 	;;
286 |       *.c)
287 | 	cfile=$1
288 | 	set x "$@" "$1"
289 | 	shift
290 | 	;;
291 |       *)
292 | 	set x "$@" "$1"
293 | 	shift
294 | 	;;
295 |     esac
296 |   fi
297 |   shift
298 | done
299 | 
300 | if test -z "$ofile" || test -z "$cfile"; then
301 |   # If no '-o' option was seen then we might have been invoked from a
302 |   # pattern rule where we don't need one.  That is ok -- this is a
303 |   # normal compilation that the losing compiler can handle.  If no
304 |   # '.c' file was seen then we are probably linking.  That is also
305 |   # ok.
306 |   exec "$@"
307 | fi
308 | 
309 | # Name of file we expect compiler to create.
310 | cofile=`echo "$cfile" | sed 's|^.*[\\/]||; s|^[a-zA-Z]:||; s/\.c$/.o/'`
311 | 
312 | # Create the lock directory.
313 | # Note: use '[/\\:.-]' here to ensure that we don't use the same name
314 | # that we are using for the .o file.  Also, base the name on the expected
315 | # object file name, since that is what matters with a parallel build.
316 | lockdir=`echo "$cofile" | sed -e 's|[/\\:.-]|_|g'`.d
317 | while true; do
318 |   if mkdir "$lockdir" >/dev/null 2>&1; then
319 |     break
320 |   fi
321 |   sleep 1
322 | done
323 | # FIXME: race condition here if user kills between mkdir and trap.
324 | trap "rmdir '$lockdir'; exit 1" 1 2 15
325 | 
326 | # Run the compile.
327 | "$@"
328 | ret=$?
329 | 
330 | if test -f "$cofile"; then
331 |   test "$cofile" = "$ofile" || mv "$cofile" "$ofile"
332 | elif test -f "${cofile}bj"; then
333 |   test "${cofile}bj" = "$ofile" || mv "${cofile}bj" "$ofile"
334 | fi
335 | 
336 | rmdir "$lockdir"
337 | exit $ret
338 | 
339 | # Local Variables:
340 | # mode: shell-script
341 | # sh-indentation: 2
342 | # eval: (add-hook 'write-file-hooks 'time-stamp)
343 | # time-stamp-start: "scriptversion="
344 | # time-stamp-format: "%:y-%02m-%02d.%02H"
345 | # time-stamp-time-zone: "UTC"
346 | # time-stamp-end: "; # UTC"
347 | # End:
348 | 


--------------------------------------------------------------------------------
/src/reed_sol.c:
--------------------------------------------------------------------------------
  1 | /* *
  2 |  * Copyright (c) 2014, James S. Plank and Kevin Greenan
  3 |  * All rights reserved.
  4 |  *
  5 |  * Jerasure - A C/C++ Library for a Variety of Reed-Solomon and RAID-6 Erasure
  6 |  * Coding Techniques
  7 |  *
  8 |  * Revision 2.0: Galois Field backend now links to GF-Complete
  9 |  *
 10 |  * Redistribution and use in source and binary forms, with or without
 11 |  * modification, are permitted provided that the following conditions
 12 |  * are met:
 13 |  *
 14 |  *  - Redistributions of source code must retain the above copyright
 15 |  *    notice, this list of conditions and the following disclaimer.
 16 |  *
 17 |  *  - Redistributions in binary form must reproduce the above copyright
 18 |  *    notice, this list of conditions and the following disclaimer in
 19 |  *    the documentation and/or other materials provided with the
 20 |  *    distribution.
 21 |  *
 22 |  *  - Neither the name of the University of Tennessee nor the names of its
 23 |  *    contributors may be used to endorse or promote products derived
 24 |  *    from this software without specific prior written permission.
 25 |  *
 26 |  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
 27 |  * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
 28 |  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
 29 |  * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
 30 |  * HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
 31 |  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
 32 |  * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS
 33 |  * OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
 34 |  * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
 35 |  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY
 36 |  * WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
 37 |  * POSSIBILITY OF SUCH DAMAGE.
 38 |  */
 39 | 
 40 | /* Jerasure's authors:
 41 | 
 42 |    Revision 2.x - 2014: James S. Plank and Kevin M. Greenan
 43 |    Revision 1.2 - 2008: James S. Plank, Scott Simmerman and Catherine D. Schuman.
 44 |    Revision 1.0 - 2007: James S. Plank
 45 |  */
 46 | 
 47 | #include <stdio.h>
 48 | #include <stdlib.h>
 49 | #include <string.h>
 50 | 
 51 | #include <gf_complete.h>
 52 | #include "galois.h"
 53 | #include "jerasure.h"
 54 | #include "reed_sol.h"
 55 | 
 56 | #define talloc(type, num) (type *) malloc(sizeof(type)*(num))
 57 | 
 58 | int *reed_sol_r6_coding_matrix(int k, int w)
 59 | {
 60 |   int *matrix;
 61 |   int i, tmp;
 62 | 
 63 |   if (w != 8 && w != 16 && w != 32) return NULL;
 64 | 
 65 |   matrix = talloc(int, 2*k);
 66 |   if (matrix == NULL) return NULL;
 67 | 
 68 |   for (i = 0; i < k; i++) matrix[i] = 1;
 69 |   matrix[k] = 1;
 70 |   tmp = 1;
 71 |   for (i = 1; i < k; i++) {
 72 |     tmp = galois_single_multiply(tmp, 2, w);
 73 |     matrix[k+i] = tmp;
 74 |   }
 75 |   return matrix;
 76 | }
 77 | 
 78 | int *reed_sol_vandermonde_coding_matrix(int k, int m, int w)
 79 | {
 80 |   int i, j;
 81 |   int *vdm, *dist;
 82 | 
 83 |   vdm = reed_sol_big_vandermonde_distribution_matrix(k+m, k, w);
 84 |   if (vdm == NULL) return NULL;
 85 |   dist = talloc(int, m*k);
 86 |   if (dist == NULL) {
 87 |     free(vdm);
 88 |     return NULL;
 89 |   }
 90 | 
 91 |   i = k*k;
 92 |   for (j = 0; j < m*k; j++) {
 93 |     dist[j] = vdm[i];
 94 |     i++;
 95 |   }
 96 |   free(vdm);
 97 |   return dist;
 98 | }
 99 | 
100 | static int prim08 = -1;
101 | static gf_t GF08;
102 | 
103 | void reed_sol_galois_w08_region_multby_2(char *region, int nbytes)
104 | {
105 |   if (prim08 == -1) {
106 |     prim08 = galois_single_multiply((1 << 7), 2, 8);
107 |     if (!gf_init_hard(&GF08, 8, GF_MULT_BYTWO_b, GF_REGION_DEFAULT, GF_DIVIDE_DEFAULT,
108 |                       prim08, 0, 0, NULL, NULL)) {
109 |       fprintf(stderr, "Error: Can't initialize the GF for reed_sol_galois_w08_region_multby_2\n");
110 |       exit(1);
111 |     }
112 |   }
113 |   GF08.multiply_region.w32(&GF08, region, region, 2, nbytes, 0);
114 | }
115 | 
116 | static int prim16 = -1;
117 | static gf_t GF16;
118 | 
119 | void reed_sol_galois_w16_region_multby_2(char *region, int nbytes)
120 | {
121 |   if (prim16 == -1) {
122 |     prim16 = galois_single_multiply((1 << 15), 2, 16);
123 |     if (!gf_init_hard(&GF16, 16, GF_MULT_BYTWO_b, GF_REGION_DEFAULT, GF_DIVIDE_DEFAULT,
124 |                       prim16, 0, 0, NULL, NULL)) {
125 |       fprintf(stderr, "Error: Can't initialize the GF for reed_sol_galois_w16_region_multby_2\n");
126 |       exit(1);
127 |     }
128 |   }
129 |   GF16.multiply_region.w32(&GF16, region, region, 2, nbytes, 0);
130 | }
131 | 
132 | static int prim32 = -1;
133 | static gf_t GF32;
134 | 
135 | void reed_sol_galois_w32_region_multby_2(char *region, int nbytes)
136 | {
137 |   if (prim32 == -1) {
138 |     prim32 = galois_single_multiply((1 << 31), 2, 32);
139 |     if (!gf_init_hard(&GF32, 32, GF_MULT_BYTWO_b, GF_REGION_DEFAULT, GF_DIVIDE_DEFAULT,
140 |                       prim32, 0, 0, NULL, NULL)) {
141 |       fprintf(stderr, "Error: Can't initialize the GF for reed_sol_galois_w32_region_multby_2\n");
142 |       exit(1);
143 |     }
144 |   }
145 |   GF32.multiply_region.w32(&GF32, region, region, 2, nbytes, 0);
146 | }
147 | 
148 | int reed_sol_r6_encode(int k, int w, char **data_ptrs, char **coding_ptrs, int size)
149 | {
150 |   int i;
151 | 
152 |   /* First, put the XOR into coding region 0 */
153 | 
154 |   memcpy(coding_ptrs[0], data_ptrs[0], size);
155 | 
156 |   for (i = 1; i < k; i++) galois_region_xor(data_ptrs[i], coding_ptrs[0], size);
157 | 
158 |   /* Next, put the sum of (2^j)*Dj into coding region 1 */
159 | 
160 |   memcpy(coding_ptrs[1], data_ptrs[k-1], size);
161 | 
162 |   for (i = k-2; i >= 0; i--) {
163 |     switch (w) {
164 |       case 8:  reed_sol_galois_w08_region_multby_2(coding_ptrs[1], size); break;
165 |       case 16: reed_sol_galois_w16_region_multby_2(coding_ptrs[1], size); break;
166 |       case 32: reed_sol_galois_w32_region_multby_2(coding_ptrs[1], size); break;
167 |       default: return 0;
168 |     }
169 | 
170 |     galois_region_xor(data_ptrs[i], coding_ptrs[1], size);
171 |   }
172 |   return 1;
173 | }
174 | 
175 | int *reed_sol_extended_vandermonde_matrix(int rows, int cols, int w)
176 | {
177 |   int *vdm;
178 |   int i, j, k;
179 | 
180 |   if (w < 30 && (1 << w) < rows) return NULL;
181 |   if (w < 30 && (1 << w) < cols) return NULL;
182 | 
183 |   vdm = talloc(int, rows*cols);
184 |   if (vdm == NULL) { return NULL; }
185 |   
186 |   vdm[0] = 1;
187 |   for (j = 1; j < cols; j++) vdm[j] = 0;
188 |   if (rows == 1) return vdm;
189 | 
190 |   i=(rows-1)*cols;
191 |   for (j = 0; j < cols-1; j++) vdm[i+j] = 0;
192 |   vdm[i+j] = 1;
193 |   if (rows == 2) return vdm;
194 | 
195 |   for (i = 1; i < rows-1; i++) {
196 |     k = 1;
197 |     for (j = 0; j < cols; j++) {
198 |       vdm[i*cols+j] = k;
199 |       k = galois_single_multiply(k, i, w);
200 |     }
201 |   }
202 |   return vdm;
203 | }
204 | 
205 | int *reed_sol_big_vandermonde_distribution_matrix(int rows, int cols, int w)
206 | {
207 |   int *dist;
208 |   int i, j, k;
209 |   int sindex, srindex, siindex, tmp;
210 | 
211 |   if (cols >= rows) return NULL;
212 |   
213 |   dist = reed_sol_extended_vandermonde_matrix(rows, cols, w);
214 |   if (dist == NULL) return NULL;
215 | 
216 |   sindex = 0;
217 |   for (i = 1; i < cols; i++) {
218 |     sindex += cols;
219 | 
220 |     /* Find an appropriate row -- where i,i != 0 */
221 |     srindex = sindex+i;
222 |     for (j = i; j < rows && dist[srindex] == 0; j++) srindex += cols;
223 |     if (j >= rows) {   /* This should never happen if rows/w are correct */
224 |       fprintf(stderr, "reed_sol_big_vandermonde_distribution_matrix(%d,%d,%d) - couldn't make matrix\n", 
225 |              rows, cols, w);
226 |       exit(1);
227 |     }
228 |  
229 |     /* If necessary, swap rows */
230 |     if (j != i) {
231 |       srindex -= i;
232 |       for (k = 0; k < cols; k++) {
233 |         tmp = dist[srindex+k];
234 |         dist[srindex+k] = dist[sindex+k];
235 |         dist[sindex+k] = tmp;
236 |       }
237 |     }
238 |   
239 |     /* If Element i,i is not equal to 1, multiply the column by 1/i */
240 | 
241 |     if (dist[sindex+i] != 1) {
242 |       tmp = galois_single_divide(1, dist[sindex+i], w);
243 |       srindex = i;
244 |       for (j = 0; j < rows; j++) {
245 |         dist[srindex] = galois_single_multiply(tmp, dist[srindex], w);
246 |         srindex += cols;
247 |       }
248 |     }
249 |  
250 |     /* Now, for each element in row i that is not in column 1, you need
251 |        to make it zero.  Suppose that this is column j, and the element
252 |        at i,j = e.  Then you want to replace all of column j with 
253 |        (col-j + col-i*e).   Note, that in row i, col-i = 1 and col-j = e.
254 |        So (e + 1e) = 0, which is indeed what we want. */
255 | 
256 |     for (j = 0; j < cols; j++) {
257 |       tmp = dist[sindex+j];
258 |       if (j != i && tmp != 0) {
259 |         srindex = j;
260 |         siindex = i;
261 |         for (k = 0; k < rows; k++) {
262 |           dist[srindex] = dist[srindex] ^ galois_single_multiply(tmp, dist[siindex], w);
263 |           srindex += cols;
264 |           siindex += cols;
265 |         }
266 |       }
267 |     }
268 |   }
269 |   /* We desire to have row k be all ones.  To do that, multiply
270 |      the entire column j by 1/dist[k,j].  Then row j by 1/dist[j,j]. */
271 | 
272 |   sindex = cols*cols;
273 |   for (j = 0; j < cols; j++) {
274 |     tmp = dist[sindex];
275 |     if (tmp != 1) { 
276 |       tmp = galois_single_divide(1, tmp, w);
277 |       srindex = sindex;
278 |       for (i = cols; i < rows; i++) {
279 |         dist[srindex] = galois_single_multiply(tmp, dist[srindex], w);
280 |         srindex += cols;
281 |       }
282 |     }
283 |     sindex++;
284 |   }
285 | 
286 |   /* Finally, we'd like the first column of each row to be all ones.  To
287 |      do that, we multiply the row by the inverse of the first element. */
288 | 
289 |   sindex = cols*(cols+1);
290 |   for (i = cols+1; i < rows; i++) {
291 |     tmp = dist[sindex];
292 |     if (tmp != 1) { 
293 |       tmp = galois_single_divide(1, tmp, w);
294 |       for (j = 0; j < cols; j++) dist[sindex+j] = galois_single_multiply(dist[sindex+j], tmp, w);
295 |     }
296 |     sindex += cols;
297 |   }
298 | 
299 |   return dist;
300 | }
301 | 
302 | 


--------------------------------------------------------------------------------
/src/neon/gf_w32_neon.c:
--------------------------------------------------------------------------------
  1 | /*
  2 |  * GF-Complete: A Comprehensive Open Source Library for Galois Field Arithmetic
  3 |  * James S. Plank, Ethan L. Miller, Kevin M. Greenan,
  4 |  * Benjamin A. Arnold, John A. Burnum, Adam W. Disney, Allen C. McBride.
  5 |  *
  6 |  * Copyright (c) 2014: Janne Grunau <j@jannau.net>
  7 |  *
  8 |  * Redistribution and use in source and binary forms, with or without
  9 |  * modification, are permitted provided that the following conditions
 10 |  * are met:
 11 |  *
 12 |  *  - Redistributions of source code must retain the above copyright
 13 |  *     notice, this list of conditions and the following disclaimer.
 14 |  *
 15 |  *  - Redistributions in binary form must reproduce the above copyright
 16 |  *    notice, this list of conditions and the following disclaimer in
 17 |  *    the documentation and/or other materials provided with the
 18 |  *    distribution.
 19 |  *
 20 |  *  - Neither the name of the University of Tennessee nor the names of its
 21 |  *    contributors may be used to endorse or promote products derived
 22 |  *    from this software without specific prior written permission.
 23 |  *
 24 |  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
 25 |  * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
 26 |  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
 27 |  * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
 28 |  * HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
 29 |  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
 30 |  * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS
 31 |  * OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
 32 |  * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
 33 |  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY
 34 |  * WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
 35 |  * POSSIBILITY OF SUCH DAMAGE.
 36 |  *
 37 |  * gf_w32_neon.c
 38 |  *
 39 |  * Neon routines for 32-bit Galois fields
 40 |  *
 41 |  */
 42 | 
 43 | 
 44 | #include "gf_int.h"
 45 | #include <stdio.h>
 46 | #include <stdlib.h>
 47 | #include "gf_w32.h"
 48 | 
 49 | #ifndef ARCH_AARCH64
 50 | #define vqtbl1q_u8(tbl, v) vcombine_u8(vtbl2_u8(tbl, vget_low_u8(v)),   \
 51 |                                        vtbl2_u8(tbl, vget_high_u8(v)))
 52 | #endif
 53 | 
 54 | static
 55 | void
 56 | neon_w32_split_4_32_multiply_region(gf_t *gf, uint32_t *src, uint32_t *dst,
 57 |                                     uint32_t *d_end, uint8_t btable[8][4][16],
 58 |                                     uint32_t val, int xor, int altmap)
 59 | {
 60 |   int i, j;
 61 | #ifdef ARCH_AARCH64
 62 |   uint8x16_t tables[8][4];
 63 | #else
 64 |   uint8x8x2_t tables[8][4];
 65 | #endif
 66 |   uint32x4_t v0, v1, v2, v3, s0, s1, s2, s3;
 67 |   uint8x16_t p0, p1, p2, p3, si, mask1;
 68 |   uint16x8x2_t r0, r1;
 69 |   uint8x16x2_t q0, q1;
 70 | 
 71 |   for (i = 0; i < 8; i++) {
 72 |     for (j = 0; j < 4; j++) {
 73 | #ifdef ARCH_AARCH64
 74 |       tables[i][j] = vld1q_u8(btable[i][j]);
 75 | #else
 76 |       tables[i][j].val[0] = vld1_u8(btable[i][j]);
 77 |       tables[i][j].val[1] = vld1_u8(btable[i][j] + 8);
 78 | #endif
 79 |     }
 80 |   }
 81 | 
 82 |   mask1 = vdupq_n_u8(0xf);
 83 | 
 84 |   while (dst < d_end) {
 85 | 
 86 |       v0 = vld1q_u32(src); src += 4;
 87 |       v1 = vld1q_u32(src); src += 4;
 88 |       v2 = vld1q_u32(src); src += 4;
 89 |       v3 = vld1q_u32(src); src += 4;
 90 | 
 91 |       if (altmap) {
 92 |           q0.val[0] = vreinterpretq_u8_u32(v0);
 93 |           q0.val[1] = vreinterpretq_u8_u32(v1);
 94 |           q1.val[0] = vreinterpretq_u8_u32(v2);
 95 |           q1.val[1] = vreinterpretq_u8_u32(v3);
 96 |       } else {
 97 |           r0 = vtrnq_u16(vreinterpretq_u16_u32(v0), vreinterpretq_u16_u32(v2));
 98 |           r1 = vtrnq_u16(vreinterpretq_u16_u32(v1), vreinterpretq_u16_u32(v3));
 99 | 
100 |           q0 = vtrnq_u8(vreinterpretq_u8_u16(r0.val[0]),
101 |                         vreinterpretq_u8_u16(r1.val[0]));
102 |           q1 = vtrnq_u8(vreinterpretq_u8_u16(r0.val[1]),
103 |                         vreinterpretq_u8_u16(r1.val[1]));
104 |       }
105 | 
106 |       si = vandq_u8(q0.val[0], mask1);
107 |       p0 = vqtbl1q_u8(tables[0][0], si);
108 |       p1 = vqtbl1q_u8(tables[0][1], si);
109 |       p2 = vqtbl1q_u8(tables[0][2], si);
110 |       p3 = vqtbl1q_u8(tables[0][3], si);
111 | 
112 |       si = vshrq_n_u8(q0.val[0], 4);
113 |       p0 = veorq_u8(p0, vqtbl1q_u8(tables[1][0], si));
114 |       p1 = veorq_u8(p1, vqtbl1q_u8(tables[1][1], si));
115 |       p2 = veorq_u8(p2, vqtbl1q_u8(tables[1][2], si));
116 |       p3 = veorq_u8(p3, vqtbl1q_u8(tables[1][3], si));
117 | 
118 |       si = vandq_u8(q0.val[1], mask1);
119 |       p0 = veorq_u8(p0, vqtbl1q_u8(tables[2][0], si));
120 |       p1 = veorq_u8(p1, vqtbl1q_u8(tables[2][1], si));
121 |       p2 = veorq_u8(p2, vqtbl1q_u8(tables[2][2], si));
122 |       p3 = veorq_u8(p3, vqtbl1q_u8(tables[2][3], si));
123 | 
124 |       si = vshrq_n_u8(q0.val[1], 4);
125 |       p0 = veorq_u8(p0, vqtbl1q_u8(tables[3][0], si));
126 |       p1 = veorq_u8(p1, vqtbl1q_u8(tables[3][1], si));
127 |       p2 = veorq_u8(p2, vqtbl1q_u8(tables[3][2], si));
128 |       p3 = veorq_u8(p3, vqtbl1q_u8(tables[3][3], si));
129 | 
130 |       si = vandq_u8(q1.val[0], mask1);
131 |       p0 = veorq_u8(p0, vqtbl1q_u8(tables[4][0], si));
132 |       p1 = veorq_u8(p1, vqtbl1q_u8(tables[4][1], si));
133 |       p2 = veorq_u8(p2, vqtbl1q_u8(tables[4][2], si));
134 |       p3 = veorq_u8(p3, vqtbl1q_u8(tables[4][3], si));
135 | 
136 |       si = vshrq_n_u8(q1.val[0], 4);
137 |       p0 = veorq_u8(p0, vqtbl1q_u8(tables[5][0], si));
138 |       p1 = veorq_u8(p1, vqtbl1q_u8(tables[5][1], si));
139 |       p2 = veorq_u8(p2, vqtbl1q_u8(tables[5][2], si));
140 |       p3 = veorq_u8(p3, vqtbl1q_u8(tables[5][3], si));
141 | 
142 |       si = vandq_u8(q1.val[1], mask1);
143 |       p0 = veorq_u8(p0, vqtbl1q_u8(tables[6][0], si));
144 |       p1 = veorq_u8(p1, vqtbl1q_u8(tables[6][1], si));
145 |       p2 = veorq_u8(p2, vqtbl1q_u8(tables[6][2], si));
146 |       p3 = veorq_u8(p3, vqtbl1q_u8(tables[6][3], si));
147 | 
148 |       si = vshrq_n_u8(q1.val[1], 4);
149 |       p0 = veorq_u8(p0, vqtbl1q_u8(tables[7][0], si));
150 |       p1 = veorq_u8(p1, vqtbl1q_u8(tables[7][1], si));
151 |       p2 = veorq_u8(p2, vqtbl1q_u8(tables[7][2], si));
152 |       p3 = veorq_u8(p3, vqtbl1q_u8(tables[7][3], si));
153 | 
154 |       if (altmap) {
155 |           s0 = vreinterpretq_u32_u8(p0);
156 |           s1 = vreinterpretq_u32_u8(p1);
157 |           s2 = vreinterpretq_u32_u8(p2);
158 |           s3 = vreinterpretq_u32_u8(p3);
159 |       } else {
160 |           q0 = vtrnq_u8(p0, p1);
161 |           q1 = vtrnq_u8(p2, p3);
162 | 
163 |           r0 = vtrnq_u16(vreinterpretq_u16_u8(q0.val[0]),
164 |                          vreinterpretq_u16_u8(q1.val[0]));
165 |           r1 = vtrnq_u16(vreinterpretq_u16_u8(q0.val[1]),
166 |                          vreinterpretq_u16_u8(q1.val[1]));
167 | 
168 |           s0 = vreinterpretq_u32_u16(r0.val[0]);
169 |           s1 = vreinterpretq_u32_u16(r1.val[0]);
170 |           s2 = vreinterpretq_u32_u16(r0.val[1]);
171 |           s3 = vreinterpretq_u32_u16(r1.val[1]);
172 |       }
173 | 
174 |       if (xor) {
175 |           v0 = vld1q_u32(dst);
176 |           v1 = vld1q_u32(dst + 4);
177 |           v2 = vld1q_u32(dst + 8);
178 |           v3 = vld1q_u32(dst + 12);
179 |           s0 = veorq_u32(s0, v0);
180 |           s1 = veorq_u32(s1, v1);
181 |           s2 = veorq_u32(s2, v2);
182 |           s3 = veorq_u32(s3, v3);
183 |       }
184 | 
185 |       vst1q_u32(dst,      s0);
186 |       vst1q_u32(dst + 4,  s1);
187 |       vst1q_u32(dst + 8,  s2);
188 |       vst1q_u32(dst + 12, s3);
189 | 
190 |       dst += 16;
191 |   }
192 | }
193 | 
194 | static
195 | inline
196 | void
197 | neon_w32_split_4_32_lazy_multiply_region(gf_t *gf, void *src, void *dest, uint32_t val, int bytes, int xor, int altmap)
198 | {
199 |   gf_internal_t *h;
200 |   int i, j, k;
201 |   uint32_t pp, v, *s32, *d32, *top, tmp_table[16];
202 |   uint8_t btable[8][4][16];
203 |   gf_region_data rd;
204 | 
205 |   if (val == 0) { gf_multby_zero(dest, bytes, xor); return; }
206 |   if (val == 1) { gf_multby_one(src, dest, bytes, xor); return; }
207 | 
208 |   h = (gf_internal_t *) gf->scratch;
209 |   pp = h->prim_poly;
210 | 
211 |   gf_set_region_data(&rd, gf, src, dest, bytes, val, xor, 64);
212 |   gf_do_initial_region_alignment(&rd);
213 | 
214 |   s32 = (uint32_t *) rd.s_start;
215 |   d32 = (uint32_t *) rd.d_start;
216 |   top = (uint32_t *) rd.d_top;
217 | 
218 |   v = val;
219 |   for (i = 0; i < 8; i++) {
220 |     tmp_table[0] = 0;
221 |     for (j = 1; j < 16; j <<= 1) {
222 |       for (k = 0; k < j; k++) {
223 |         tmp_table[k^j] = (v ^ tmp_table[k]);
224 |       }
225 |       v = (v & GF_FIRST_BIT) ? ((v << 1) ^ pp) : (v << 1);
226 |     }
227 |     for (j = 0; j < 4; j++) {
228 |       for (k = 0; k < 16; k++) {
229 |         btable[i][j][k] = (uint8_t) tmp_table[k];
230 |         tmp_table[k] >>= 8;
231 |       }
232 |     }
233 |   }
234 | 
235 |   if (xor)
236 |     neon_w32_split_4_32_multiply_region(gf, s32, d32, top, btable, val, 1, altmap);
237 |   else
238 |     neon_w32_split_4_32_multiply_region(gf, s32, d32, top, btable, val, 0, altmap);
239 | 
240 |   gf_do_final_region_alignment(&rd);
241 | }
242 | 
243 | static
244 | void
245 | gf_w32_split_4_32_lazy_multiply_region_neon(gf_t *gf, void *src, void *dest,
246 |                                             gf_val_32_t val, int bytes, int xor)
247 | {
248 |   neon_w32_split_4_32_lazy_multiply_region(gf, src, dest, val, bytes, xor, 0);
249 | }
250 | 
251 | static
252 | void
253 | gf_w32_split_4_32_lazy_altmap_multiply_region_neon(gf_t *gf, void *src,
254 |                                                    void *dest, gf_val_32_t val,
255 |                                                    int bytes, int xor)
256 | {
257 |   neon_w32_split_4_32_lazy_multiply_region(gf, src, dest, val, bytes, xor, 1);
258 | }
259 | 
260 | void gf_w32_neon_split_init(gf_t *gf)
261 | {
262 |   gf_internal_t *h = (gf_internal_t *) gf->scratch;
263 | 
264 |   if (h->region_type & GF_REGION_ALTMAP)
265 |       gf->multiply_region.w32 = gf_w32_split_4_32_lazy_altmap_multiply_region_neon;
266 |   else
267 |       gf->multiply_region.w32 = gf_w32_split_4_32_lazy_multiply_region_neon;
268 | 
269 | }
270 | 


--------------------------------------------------------------------------------
/include/gf_int.h:
--------------------------------------------------------------------------------
  1 | /*
  2 |  * GF-Complete: A Comprehensive Open Source Library for Galois Field Arithmetic
  3 |  * James S. Plank, Ethan L. Miller, Kevin M. Greenan,
  4 |  * Benjamin A. Arnold, John A. Burnum, Adam W. Disney, Allen C. McBride.
  5 |  *
  6 |  * gf_int.h
  7 |  *
  8 |  * Internal code for Galois field routines.  This is not meant for 
  9 |  * users to include, but for the internal GF files to use. 
 10 |  */
 11 | 
 12 | #pragma once
 13 | 
 14 | #include "gf_complete.h"
 15 | 
 16 | #include <string.h>
 17 | 
 18 | extern void     timer_start (double *t);
 19 | extern double   timer_split (const double *t);
 20 | extern void     galois_fill_random (void *buf, int len, unsigned int seed);
 21 | 
 22 | typedef struct {
 23 |   int mult_type;
 24 |   int region_type;
 25 |   int divide_type;
 26 |   int w;
 27 |   uint64_t prim_poly;
 28 |   int free_me;
 29 |   int arg1;
 30 |   int arg2;
 31 |   gf_t *base_gf;
 32 |   void *private;
 33 | } gf_internal_t;
 34 | 
 35 | extern int gf_w4_init (gf_t *gf);
 36 | extern int gf_w4_scratch_size(int mult_type, int region_type, int divide_type, int arg1, int arg2);
 37 | 
 38 | extern int gf_w8_init (gf_t *gf);
 39 | extern int gf_w8_scratch_size(int mult_type, int region_type, int divide_type, int arg1, int arg2);
 40 | 
 41 | extern int gf_w16_init (gf_t *gf);
 42 | extern int gf_w16_scratch_size(int mult_type, int region_type, int divide_type, int arg1, int arg2);
 43 | 
 44 | extern int gf_w32_init (gf_t *gf);
 45 | extern int gf_w32_scratch_size(int mult_type, int region_type, int divide_type, int arg1, int arg2);
 46 | 
 47 | extern int gf_w64_init (gf_t *gf);
 48 | extern int gf_w64_scratch_size(int mult_type, int region_type, int divide_type, int arg1, int arg2);
 49 | 
 50 | extern int gf_w128_init (gf_t *gf);
 51 | extern int gf_w128_scratch_size(int mult_type, int region_type, int divide_type, int arg1, int arg2);
 52 | 
 53 | extern int gf_wgen_init (gf_t *gf);
 54 | extern int gf_wgen_scratch_size(int w, int mult_type, int region_type, int divide_type, int arg1, int arg2);
 55 | 
 56 | void gf_wgen_cauchy_region(gf_t *gf, void *src, void *dest, gf_val_32_t val, int bytes, int xor);
 57 | gf_val_32_t gf_wgen_extract_word(gf_t *gf, void *start, int bytes, int index);
 58 | 
 59 | extern void gf_alignment_error(char *s, int a);
 60 | 
 61 | extern uint32_t gf_bitmatrix_inverse(uint32_t y, int w, uint32_t pp);
 62 | 
 63 | /* This returns the correct default for prim_poly when base is used as the base
 64 |    field for COMPOSITE.  It returns 0 if we don't have a default prim_poly. */
 65 | 
 66 | extern uint64_t gf_composite_get_default_poly(gf_t *base);
 67 | 
 68 | /* This structure lets you define a region multiply.  It helps because you can handle
 69 |    unaligned portions of the data with the procedures below, which really cleans
 70 |    up the code. */
 71 | 
 72 | typedef struct {
 73 |   gf_t *gf;
 74 |   void *src;
 75 |   void *dest;
 76 |   int bytes;
 77 |   uint64_t val;
 78 |   int xor;
 79 |   int align;           /* The number of bytes to which to align. */
 80 |   void *s_start;       /* The start and the top of the aligned region. */
 81 |   void *d_start;
 82 |   void *s_top;
 83 |   void *d_top;
 84 | } gf_region_data;
 85 | 
 86 | /* This lets you set up one of these in one call. It also sets the start/top pointers. */
 87 | 
 88 | void gf_set_region_data(gf_region_data *rd,
 89 |                         gf_t *gf,
 90 |                         void *src,
 91 |                         void *dest,
 92 |                         int bytes,
 93 |                         uint64_t val,
 94 |                         int xor,
 95 |                         int align);
 96 | 
 97 | /* This performs gf->multiply.32() on all of the unaligned bytes in the beginning of the region */
 98 | 
 99 | extern void gf_do_initial_region_alignment(gf_region_data *rd);
100 | 
101 | /* This performs gf->multiply.32() on all of the unaligned bytes in the end of the region */
102 | 
103 | extern void gf_do_final_region_alignment(gf_region_data *rd);
104 | 
105 | extern void gf_two_byte_region_table_multiply(gf_region_data *rd, uint16_t *base);
106 | 
107 | extern void gf_multby_zero(void *dest, int bytes, int xor);
108 | extern void gf_multby_one(void *src, void *dest, int bytes, int xor);
109 | 
110 | typedef enum {GF_E_MDEFDIV, /* Dev != Default && Mult == Default */
111 |               GF_E_MDEFREG, /* Reg != Default && Mult == Default */
112 |               GF_E_MDEFARG, /* Args != Default && Mult == Default */
113 |               GF_E_DIVCOMP, /* Mult == Composite && Div != Default */
114 |               GF_E_CAUCOMP, /* Mult == Composite && Reg == CAUCHY */
115 |               GF_E_DOUQUAD, /* Reg == DOUBLE && Reg == QUAD */
116 |               GF_E_SIMD_NO, /* Reg == SIMD && Reg == NOSIMD */
117 |               GF_E_CAUCHYB, /* Reg == CAUCHY && Other Reg */
118 |               GF_E_CAUGT32, /* Reg == CAUCHY && w > 32*/
119 |               GF_E_ARG1SET, /* Arg1 != 0 && Mult \notin COMPOSITE/SPLIT/GROUP */
120 |               GF_E_ARG2SET, /* Arg2 != 0 && Mult \notin SPLIT/GROUP */
121 |               GF_E_MATRIXW, /* Div == MATRIX && w > 32 */
122 |               GF_E_BAD___W, /* Illegal w */
123 |               GF_E_DOUBLET, /* Reg == DOUBLE && Mult != TABLE */
124 |               GF_E_DOUBLEW, /* Reg == DOUBLE && w \notin {4,8} */
125 |               GF_E_DOUBLEJ, /* Reg == DOUBLE && other Reg */
126 |               GF_E_DOUBLEL, /* Reg == DOUBLE & LAZY but w = 4 */
127 |               GF_E_QUAD__T, /* Reg == QUAD && Mult != TABLE */
128 |               GF_E_QUAD__W, /* Reg == QUAD && w != 4 */
129 |               GF_E_QUAD__J, /* Reg == QUAD && other Reg */
130 |               GF_E_LAZY__X, /* Reg == LAZY && not DOUBLE or QUAD*/
131 |               GF_E_ALTSHIF, /* Mult == Shift && Reg == ALTMAP */
132 |               GF_E_SSESHIF, /* Mult == Shift && Reg == SIMD|NOSIMD */
133 |               GF_E_ALT_CFM, /* Mult == CARRY_FREE && Reg == ALTMAP */
134 |               GF_E_SSE_CFM, /* Mult == CARRY_FREE && Reg == SIMD|NOSIMD */
135 |               GF_E_PCLMULX, /* Mult == Carry_Free && No PCLMUL */
136 |               GF_E_ALT_BY2, /* Mult == Bytwo_x && Reg == ALTMAP */
137 |               GF_E_BY2_SSE, /* Mult == Bytwo_x && Reg == SSE && No SSE2 */
138 |               GF_E_LOGBADW, /* Mult == LOGx, w too big*/
139 |               GF_E_LOG___J, /* Mult == LOGx, && Reg == SSE|ALTMAP|NOSSE */
140 |               GF_E_ZERBADW, /* Mult == LOG_ZERO, w \notin {8,16} */
141 |               GF_E_ZEXBADW, /* Mult == LOG_ZERO_EXT, w != 8 */
142 |               GF_E_LOGPOLY, /* Mult == LOG & poly not primitive */
143 |               GF_E_GR_ARGX, /* Mult == GROUP, Bad arg1/2 */
144 |               GF_E_GR_W_48, /* Mult == GROUP, w \in { 4, 8 } */
145 |               GF_E_GR_W_16, /* Mult == GROUP, w == 16, arg1 != 4 || arg2 != 4 */
146 |               GF_E_GR_128A, /* Mult == GROUP, w == 128, bad args */
147 |               GF_E_GR_A_27, /* Mult == GROUP, either arg > 27 */
148 |               GF_E_GR_AR_W, /* Mult == GROUP, either arg > w  */
149 |               GF_E_GR____J, /* Mult == GROUP, Reg == SSE|ALTMAP|NOSSE */
150 |               GF_E_TABLE_W, /* Mult == TABLE, w too big */
151 |               GF_E_TAB_SSE, /* Mult == TABLE, SIMD|NOSIMD only apply to w == 4 */
152 |               GF_E_TABSSE3, /* Mult == TABLE, Need SSSE3 for SSE */
153 |               GF_E_TAB_ALT, /* Mult == TABLE, Reg == ALTMAP */
154 |               GF_E_SP128AR, /* Mult == SPLIT, w=128, Bad arg1/arg2 */
155 |               GF_E_SP128AL, /* Mult == SPLIT, w=128, SSE requires ALTMAP */
156 |               GF_E_SP128AS, /* Mult == SPLIT, w=128, ALTMAP requires SSE */
157 |               GF_E_SP128_A, /* Mult == SPLIT, w=128, ALTMAP only with 4/128 */
158 |               GF_E_SP128_S, /* Mult == SPLIT, w=128, SSE only with 4/128 */
159 |               GF_E_SPLIT_W, /* Mult == SPLIT, Bad w (8, 16, 32, 64, 128)  */
160 |               GF_E_SP_16AR, /* Mult == SPLIT, w=16, Bad arg1/arg2 */
161 |               GF_E_SP_16_A, /* Mult == SPLIT, w=16, ALTMAP only with 4/16 */
162 |               GF_E_SP_16_S, /* Mult == SPLIT, w=16, SSE only with 4/16 */
163 |               GF_E_SP_32AR, /* Mult == SPLIT, w=32, Bad arg1/arg2 */
164 |               GF_E_SP_32AS, /* Mult == SPLIT, w=32, ALTMAP requires SSE */
165 |               GF_E_SP_32_A, /* Mult == SPLIT, w=32, ALTMAP only with 4/32 */
166 |               GF_E_SP_32_S, /* Mult == SPLIT, w=32, SSE only with 4/32 */
167 |               GF_E_SP_64AR, /* Mult == SPLIT, w=64, Bad arg1/arg2 */
168 |               GF_E_SP_64AS, /* Mult == SPLIT, w=64, ALTMAP requires SSE */
169 |               GF_E_SP_64_A, /* Mult == SPLIT, w=64, ALTMAP only with 4/64 */
170 |               GF_E_SP_64_S, /* Mult == SPLIT, w=64, SSE only with 4/64 */
171 |               GF_E_SP_8_AR, /* Mult == SPLIT, w=8, Bad arg1/arg2 */
172 |               GF_E_SP_8__A, /* Mult == SPLIT, w=8, no ALTMAP */
173 |               GF_E_SP_SSE3, /* Mult == SPLIT, Need SSSE3 for SSE */
174 |               GF_E_COMP_A2, /* Mult == COMP, arg1 must be = 2 */
175 |               GF_E_COMP_SS, /* Mult == COMP, SIMD|NOSIMD */
176 |               GF_E_COMP__W, /* Mult == COMP, Bad w. */
177 |               GF_E_UNKFLAG, /* Unknown flag in create_from.... */
178 |               GF_E_UNKNOWN, /* Unknown mult_type. */
179 |               GF_E_UNK_REG, /* Unknown region_type. */
180 |               GF_E_UNK_DIV, /* Unknown divide_type. */
181 |               GF_E_CFM___W, /* Mult == CFM,  Bad w. */
182 |               GF_E_CFM4POL, /* Mult == CFM & Prim Poly has high bits set. */
183 |               GF_E_CFM8POL, /* Mult == CFM & Prim Poly has high bits set. */
184 |               GF_E_CF16POL, /* Mult == CFM & Prim Poly has high bits set. */
185 |               GF_E_CF32POL, /* Mult == CFM & Prim Poly has high bits set. */
186 |               GF_E_CF64POL, /* Mult == CFM & Prim Poly has high bits set. */
187 |               GF_E_FEWARGS, /* Too few args in argc/argv. */
188 |               GF_E_BADPOLY, /* Bad primitive polynomial -- too many bits set. */
189 |               GF_E_COMP_PP, /* Bad primitive polynomial -- bigger than sub-field. */
190 |               GF_E_COMPXPP, /* Can't derive a default pp for composite field. */
191 |               GF_E_BASE__W, /* Composite -- Base field is the wrong size. */
192 |               GF_E_TWOMULT, /* In create_from... two -m's. */
193 |               GF_E_TWO_DIV, /* In create_from... two -d's. */
194 |               GF_E_POLYSPC, /* Bad numbera after -p. */
195 |               GF_E_SPLITAR, /* Ran out of arguments in SPLIT */
196 |               GF_E_SPLITNU, /* Arguments not integers in SPLIT. */
197 |               GF_E_GROUPAR, /* Ran out of arguments in GROUP */
198 |               GF_E_GROUPNU, /* Arguments not integers in GROUP. */
199 |               GF_E_DEFAULT } gf_error_type_t;
200 | 
201 | 


--------------------------------------------------------------------------------
/src/neon/gf_w8_neon.c:
--------------------------------------------------------------------------------
  1 | /*
  2 |  * GF-Complete: A Comprehensive Open Source Library for Galois Field Arithmetic
  3 |  * James S. Plank, Ethan L. Miller, Kevin M. Greenan,
  4 |  * Benjamin A. Arnold, John A. Burnum, Adam W. Disney, Allen C. McBride.
  5 |  *
  6 |  * Copyright (c) 2014: Janne Grunau <j@jannau.net>
  7 |  *
  8 |  * Redistribution and use in source and binary forms, with or without
  9 |  * modification, are permitted provided that the following conditions
 10 |  * are met:
 11 |  *
 12 |  *  - Redistributions of source code must retain the above copyright
 13 |  *     notice, this list of conditions and the following disclaimer.
 14 |  *
 15 |  *  - Redistributions in binary form must reproduce the above copyright
 16 |  *    notice, this list of conditions and the following disclaimer in
 17 |  *    the documentation and/or other materials provided with the
 18 |  *    distribution.
 19 |  *
 20 |  *  - Neither the name of the University of Tennessee nor the names of its
 21 |  *    contributors may be used to endorse or promote products derived
 22 |  *    from this software without specific prior written permission.
 23 |  *
 24 |  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
 25 |  * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
 26 |  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
 27 |  * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
 28 |  * HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
 29 |  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
 30 |  * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS
 31 |  * OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
 32 |  * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
 33 |  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY
 34 |  * WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
 35 |  * POSSIBILITY OF SUCH DAMAGE.
 36 |  *
 37 |  * gf_w8_neon.c
 38 |  *
 39 |  * Neon optimized routines for 8-bit Galois fields
 40 |  *
 41 |  */
 42 | 
 43 | #include "gf_int.h"
 44 | #include "gf_w8.h"
 45 | #include <stdio.h>
 46 | #include <stdlib.h>
 47 | 
 48 | /* ARM NEON reducing macro for the carry free multiplication
 49 |  *   vmull_p8 is the carryless multiply operation. Here vshrn_n_u16 shifts
 50 |  *   the result to the right by 1 byte. This allows us to multiply
 51 |  *   the prim_poly by the leading bits of the result. We then xor the result
 52 |  *   of that operation back with the result. */
 53 | #define NEON_CFM_REDUCE(v, w, result, prim_poly, initial)               \
 54 |   do {								        \
 55 |     if (initial)                                                        \
 56 |       v = vshrn_n_u16 (vreinterpretq_u16_p16(result), 8);               \
 57 |     else                                                                \
 58 |       v = veor_u8 (v, vshrn_n_u16 (vreinterpretq_u16_p16(result), 8));  \
 59 |     w = vmull_p8 (prim_poly, vreinterpret_p8_u8(v));                    \
 60 |     result = vreinterpretq_p16_u16 (veorq_u16 (vreinterpretq_u16_p16(result), vreinterpretq_u16_p16(w))); \
 61 |   } while (0)
 62 | 
 63 | static
 64 | inline
 65 | gf_val_32_t
 66 | gf_w8_neon_clm_multiply_x (gf_t *gf, gf_val_32_t a8, gf_val_32_t b8, int x)
 67 | {
 68 |   gf_val_32_t rv = 0;
 69 |   poly8x8_t       a, b;
 70 |   uint8x8_t       v;
 71 |   poly16x8_t      result;
 72 |   poly8x8_t       prim_poly;
 73 |   poly16x8_t      w;
 74 |   gf_internal_t * h = gf->scratch;
 75 | 
 76 |   a =  vdup_n_p8 (a8);
 77 |   b =  vdup_n_p8 (b8);
 78 | 
 79 |   prim_poly = vdup_n_p8 ((uint32_t)(h->prim_poly & 0x1ffULL));
 80 | 
 81 |   /* Do the initial multiply */
 82 |   result = vmull_p8 (a, b);
 83 | 
 84 |   /* Ben: Do prim_poly reduction twice. We are guaranteed that we will only
 85 |      have to do the reduction at most twice, because (w-2)/z == 2. Where
 86 |      z is equal to the number of zeros after the leading 1 */
 87 |   NEON_CFM_REDUCE (v, w, result, prim_poly, 1);
 88 |   NEON_CFM_REDUCE (v, w, result, prim_poly, 0);
 89 |   if (x >= 3) {
 90 |     NEON_CFM_REDUCE (v, w, result, prim_poly, 0);
 91 |   }
 92 |   if (x >= 4) {
 93 |     NEON_CFM_REDUCE (v, w, result, prim_poly, 0);
 94 |   }
 95 |   /* Extracts 32 bit value from result. */
 96 |   rv = (gf_val_32_t)vget_lane_u8 (vmovn_u16 (vreinterpretq_u16_p16 (result)), 0);
 97 | 
 98 |   return rv;
 99 | }
100 | 
101 | #define CLM_MULTIPLY(x) \
102 | static gf_val_32_t gf_w8_neon_clm_multiply_ ## x (gf_t *gf, gf_val_32_t a8, gf_val_32_t b8) \
103 | {\
104 |     return gf_w8_neon_clm_multiply_x (gf, a8, b8, x);\
105 | }
106 | 
107 | CLM_MULTIPLY(2)
108 | CLM_MULTIPLY(3)
109 | CLM_MULTIPLY(4)
110 | 
111 | static inline void
112 | neon_clm_multiply_region_from_single_x(gf_t *gf, uint8_t *s8, uint8_t *d8,
113 |                                        gf_val_32_t val, uint8_t *d_end,
114 |                                        int xor, int x)
115 | {
116 |   gf_internal_t * h = gf->scratch;
117 |   poly8x8_t       a, b;
118 |   uint8x8_t       c, v;
119 |   poly16x8_t      result;
120 |   poly8x8_t       prim_poly;
121 |   poly16x8_t      w;
122 | 
123 |   a         = vdup_n_p8 (val);
124 |   prim_poly = vdup_n_p8 ((uint8_t)(h->prim_poly & 0xffULL));
125 | 
126 |   while (d8 < d_end) {
127 |     b = vld1_p8 ((poly8_t *) s8);
128 | 
129 |     if (xor)
130 |         c = vld1_u8 (d8);
131 | 
132 |     result = vmull_p8 (a, b);
133 | 
134 |     NEON_CFM_REDUCE(v, w, result, prim_poly, 1);
135 |     NEON_CFM_REDUCE (v, w, result, prim_poly, 0);
136 |     if (x >= 3) {
137 |       NEON_CFM_REDUCE (v, w, result, prim_poly, 0);
138 |     }
139 |     if (x >= 4) {
140 |       NEON_CFM_REDUCE (v, w, result, prim_poly, 0);
141 |     }
142 |     v = vmovn_u16 (vreinterpretq_u16_p16 (result));
143 |     if (xor)
144 |       v = veor_u8 (c, v);
145 | 
146 |     vst1_u8 (d8, v);
147 | 
148 |     d8 += 8;
149 |     s8 += 8;
150 |   }
151 | }
152 | 
153 | #define CLM_MULT_REGION(x)                                              \
154 | static void                                                             \
155 | gf_w8_neon_clm_multiply_region_from_single_ ## x (gf_t *gf, void *src,  \
156 |                                                   void *dest,           \
157 |                                                   gf_val_32_t val, int bytes, \
158 |                                                   int xor)              \
159 | {                                                                       \
160 |   gf_region_data rd;                                                    \
161 |   uint8_t *s8;                                                          \
162 |   uint8_t *d8;                                                          \
163 |                                                                         \
164 |   if (val == 0) { gf_multby_zero(dest, bytes, xor); return; }           \
165 |   if (val == 1) { gf_multby_one(src, dest, bytes, xor); return; }       \
166 |                                                                         \
167 |   gf_set_region_data(&rd, gf, src, dest, bytes, val, xor, 16);          \
168 |   gf_do_initial_region_alignment(&rd);                                  \
169 |   s8 = (uint8_t *) rd.s_start;                                          \
170 |   d8 = (uint8_t *) rd.d_start;                                          \
171 |                                                                         \
172 |   if (xor)                                                              \
173 |     neon_clm_multiply_region_from_single_x (gf, s8, d8, val, rd.d_top, 1, x); \
174 |   else                                                                  \
175 |     neon_clm_multiply_region_from_single_x (gf, s8, d8, val, rd.d_top, 0, x);\
176 |   gf_do_final_region_alignment(&rd);                                    \
177 | }
178 | 
179 | CLM_MULT_REGION(2)
180 | CLM_MULT_REGION(3)
181 | CLM_MULT_REGION(4)
182 | 
183 | 
184 | int gf_w8_neon_cfm_init(gf_t *gf)
185 | {
186 |   gf_internal_t *h;
187 | 
188 |   h = (gf_internal_t *) gf->scratch;
189 | 
190 |   if ((0xe0 & h->prim_poly) == 0){
191 |     gf->multiply.w32 = gf_w8_neon_clm_multiply_2;
192 |     gf->multiply_region.w32 = gf_w8_neon_clm_multiply_region_from_single_2;
193 |   }else if ((0xc0 & h->prim_poly) == 0){
194 |     gf->multiply.w32 = gf_w8_neon_clm_multiply_3;
195 |     gf->multiply_region.w32 = gf_w8_neon_clm_multiply_region_from_single_3;
196 |   }else if ((0x80 & h->prim_poly) == 0){
197 |     gf->multiply.w32 = gf_w8_neon_clm_multiply_4;
198 |     gf->multiply_region.w32 = gf_w8_neon_clm_multiply_region_from_single_4;
199 |   }else{
200 |     return 0;
201 |   }
202 |   return 1;
203 | }
204 | 
205 | #ifndef ARCH_AARCH64
206 | #define vqtbl1q_u8(tbl, v) vcombine_u8(vtbl2_u8(tbl, vget_low_u8(v)),   \
207 |                                        vtbl2_u8(tbl, vget_high_u8(v)))
208 | #endif
209 | 
210 | static
211 | void
212 | gf_w8_split_multiply_region_neon(gf_t *gf, void *src, void *dest, gf_val_32_t val, int bytes, int xor)
213 | {
214 |   uint8_t *bh, *bl, *sptr, *dptr;
215 |   uint8x16_t r, va, vh, vl, loset;
216 | #ifdef ARCH_AARCH64
217 |   uint8x16_t mth, mtl;
218 | #else
219 |   uint8x8x2_t mth, mtl;
220 | #endif
221 |   struct gf_w8_half_table_data *htd;
222 |   gf_region_data rd;
223 | 
224 |   if (val == 0) { gf_multby_zero(dest, bytes, xor); return; }
225 |   if (val == 1) { gf_multby_one(src, dest, bytes, xor); return; }
226 | 
227 |   htd = (struct gf_w8_half_table_data *) ((gf_internal_t *) (gf->scratch))->private;
228 | 
229 |   gf_set_region_data(&rd, gf, src, dest, bytes, val, xor, 16);
230 |   gf_do_initial_region_alignment(&rd);
231 | 
232 |   bh = (uint8_t *) htd->high;
233 |   bh += (val << 4);
234 |   bl = (uint8_t *) htd->low;
235 |   bl += (val << 4);
236 | 
237 |   sptr = rd.s_start;
238 |   dptr = rd.d_start;
239 | 
240 | #ifdef ARCH_AARCH64
241 |   mth = vld1q_u8 (bh);
242 |   mtl = vld1q_u8 (bl);
243 | #else
244 |   mth.val[0] = vld1_u8 (bh);
245 |   mtl.val[0] = vld1_u8 (bl);
246 |   mth.val[1] = vld1_u8 (bh + 8);
247 |   mtl.val[1] = vld1_u8 (bl + 8);
248 | #endif
249 | 
250 |   loset = vdupq_n_u8(0xf);
251 | 
252 |   if (xor) {
253 |     while (sptr < (uint8_t *) rd.s_top) {
254 |       va = vld1q_u8 (sptr);
255 | 
256 |       vh = vshrq_n_u8 (va, 4);
257 |       vl = vandq_u8 (va, loset);
258 |       va = vld1q_u8 (dptr);
259 | 
260 |       vh = vqtbl1q_u8 (mth, vh);
261 |       vl = vqtbl1q_u8 (mtl, vl);
262 | 
263 |       r = veorq_u8 (vh, vl);
264 | 
265 |       vst1q_u8 (dptr, veorq_u8 (va, r));
266 | 
267 |       dptr += 16;
268 |       sptr += 16;
269 |     }
270 |   } else {
271 |     while (sptr < (uint8_t *) rd.s_top) {
272 |       va = vld1q_u8 (sptr);
273 | 
274 |       vh = vshrq_n_u8 (va, 4);
275 |       vl = vandq_u8 (va, loset);
276 | #ifdef ARCH_AARCH64
277 |       vh = vqtbl1q_u8 (mth, vh);
278 |       vl = vqtbl1q_u8 (mtl, vl);
279 | #else
280 |       vh = vcombine_u8 (vtbl2_u8 (mth, vget_low_u8 (vh)),
281 | 			vtbl2_u8 (mth, vget_high_u8 (vh)));
282 |       vl = vcombine_u8 (vtbl2_u8 (mtl, vget_low_u8 (vl)),
283 | 			vtbl2_u8 (mtl, vget_high_u8 (vl)));
284 | #endif
285 | 
286 |       r = veorq_u8 (vh, vl);
287 | 
288 |       vst1q_u8(dptr, r);
289 | 
290 |       dptr += 16;
291 |       sptr += 16;
292 |     }
293 |   }
294 | 
295 |   gf_do_final_region_alignment(&rd);
296 | }
297 | 
298 | 
299 | void gf_w8_neon_split_init(gf_t *gf)
300 | {
301 |   gf->multiply_region.w32 = gf_w8_split_multiply_region_neon;
302 | }
303 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | # LRC-Erasure-Code
  2 | 
  3 | LRC(Local Reconstruction Codes) Erasure Code based on Reed-Solomon with Vandermonde matrix.
  4 | 
  5 | <!-- START doctoc generated TOC please keep comment here to allow auto update -->
  6 | <!-- DON'T EDIT THIS SECTION, INSTEAD RE-RUN doctoc TO UPDATE -->
  7 | **Table of Contents**  *generated with [DocToc](https://github.com/thlorenz/doctoc)*
  8 | 
  9 | - [Status](#status)
 10 | - [Description](#description)
 11 |   - [LRC parameters and the differences from original Erasure Code](#lrc-parameters-and-the-differences-from-original-erasure-code)
 12 | - [Synopsis](#synopsis)
 13 | - [Install](#install)
 14 | - [API](#api)
 15 |   - [lrc_init_n](#lrc_init_n)
 16 |   - [lrc_destroy](#lrc_destroy)
 17 |   - [lrc_encode](#lrc_encode)
 18 |   - [lrc_decode](#lrc_decode)
 19 |   - [lrc_get_source](#lrc_get_source)
 20 |   - [lrc_buf_init](#lrc_buf_init)
 21 |   - [lrc_buf_destroy](#lrc_buf_destroy)
 22 | - [Analysis](#analysis)
 23 |   - [Reliability](#reliability)
 24 |   - [IO bandwidth](#io-bandwidth)
 25 | - [TODO](#todo)
 26 | - [Author](#author)
 27 | - [Copyright and License](#copyright-and-license)
 28 | 
 29 | <!-- END doctoc generated TOC please keep comment here to allow auto update -->
 30 | 
 31 | # Status
 32 | 
 33 | This library is considered production ready.
 34 | 
 35 | And it is the core EC implementation in [open.sinastorage.com](http://open.sinastorage.com), which has been protecting dozens of PB user data.
 36 | 
 37 | # Description
 38 | 
 39 | LRC(Local Reconstruction Codes) Erasure Code supplies almost the same functionality and reliability as original Erasure Code does.
 40 | And at the same time it reduces reconstruction IO consumption by 50% or more.
 41 | 
 42 | Erasure Code Algorithm makes it possible to achieve as high reliability(11 9s)
 43 | as 3-copy replication provides, with highly reduced storage overhead(130% against 300%).
 44 | 
 45 | But one of the problems with Erasure Code is the high IO consumption during
 46 | data reconstruction.
 47 | Normally to reconstruct `1` chunk it is required to read `n` chunks.
 48 | 
 49 | LRC is a trade-off between storage cost and IO cost.
 50 | 
 51 | With several additional **local** `Coding` chunks calculated from subsets of `Data`
 52 | chunks, average IO consumption for reconstruction would be reduced to
 53 | `1 / number_of_local_sets`(normally 10% ~ 50%), at the cost of only about 10%(depends on LRC policy) more space used.
 54 | 
 55 | ## LRC parameters and the differences from original Erasure Code
 56 | 
 57 | For a collection there are 5 data chunks in it.
 58 | To create LRC Erasure Code with:
 59 | 
 60 | *   2 local EC codes;
 61 | *   2 global EC codes;
 62 | 
 63 | LRC should be initialized with:
 64 | 
 65 | `lrc_init_n(lrc, 2, (uint8_t[]){3, 2}, 4)`
 66 | 
 67 | Here in this example, the first local EC code will be created from the 1st 3
 68 | data chunks, and the 2nd local EC code will be created from the last 2 data
 69 | chunks.
 70 | 
 71 | 4 is the total number of codes, which includes:
 72 | *   2 of them are local EC codes, for data[0, 1, 2] and data[3, 4] respectively.
 73 | *   2 additional global EC codes.
 74 | 
 75 | The encoding matrix for this LRC parameter is:
 76 | 
 77 | ```
 78 | 1  1  1  0  0
 79 | 0  0  0  1  1
 80 | 1  2  4  8 16
 81 | 1  3  9 27 81
 82 | ```
 83 | 
 84 | LRC-EC `(2,2)+4` is identical to original EC `5+3`, except that it splits the first row
 85 | into 2 rows(which makes it possible to use less data/code chunks to reconstruct one).
 86 | The original EC `5+3` encoding matrix is:
 87 | 
 88 | ```
 89 | 1  1  1  1  1
 90 | 1  2  4  8 16
 91 | 1  3  9 27 81
 92 | ```
 93 | 
 94 | If you prefer to use original EC `5+3` like above, `lrc` can be initialized with:
 95 | 
 96 | `lrc_init_n(lrc, 1, (uint8_t[]){5}, 3)`
 97 | 
 98 | # Synopsis
 99 | 
100 | ```c
101 | #include "lrc.h"
102 | 
103 | #include <stdio.h>
104 | #include <stdlib.h>
105 | #include <string.h>
106 | 
107 | int main(int argc, char **argv) {
108 | 
109 |     int        size = 16;
110 |     lrc_t     *lrc  = &(lrc_t) {0};
111 |     lrc_buf_t *buf  = &(lrc_buf_t) {0};
112 | 
113 |     if (lrc_init_n(lrc, 2, (uint8_t[]) {2, 2}, 3) != 0) {
114 |         exit(-1);
115 |     }
116 | 
117 |     if (lrc_buf_init(buf, lrc, size) != 0) {
118 |         exit(-1);
119 |     }
120 | 
121 |     strcpy(buf->data[0], "hello");
122 |     strcpy(buf->data[1], "world");
123 |     strcpy(buf->data[2], "lrc");
124 |     strcpy(buf->data[3], "ec");
125 | 
126 |     if (lrc_encode(lrc, buf) != 0) {
127 |         exit(-1);
128 |     }
129 | 
130 |     strcpy(buf->data[0], "*");
131 | 
132 |     printf("damaged: %s %s %s %s\n", buf->data[0], buf->data[1], buf->data[2], buf->data[3]);
133 | 
134 |     int8_t erased[2 + 2 + 3] = {
135 |         1, 0,
136 |         0, 0,
137 |         0, 0, 0};
138 | 
139 |     if (lrc_decode(lrc, buf, erased) != 0) {
140 |         exit(-1);
141 |     }
142 | 
143 |     printf("reconstructed: %s %s %s %s\n", buf->data[0], buf->data[1], buf->data[2], buf->data[3]);
144 | 
145 |     lrc_destroy(lrc);
146 |     lrc_buf_destroy(buf);
147 | 
148 |     return 0;
149 | }
150 | ```
151 | 
152 | # Install
153 | 
154 | ```shell
155 | ./configure
156 | make
157 | sudo make install
158 | 
159 | # run a test
160 | cd test
161 | gcc example.c -o example -llrc
162 | ./example
163 | ```
164 | 
165 | # API
166 | 
167 | ## lrc_init_n
168 | 
169 | `int lrc_init_n(lrc_t *lrc, int n_local, uint8_t *local_arr, int m)`
170 | 
171 | Initializes LRC descriptor `lrc`.
172 | 
173 | Parameters:
174 | 
175 | * `lrc`
176 | Pointer to a struct `lrc_t`. a `lrc_t` describes the parameters LRC to
177 | generate codes.
178 | 
179 | * `n_local`
180 | Specify the number of local EC to create.
181 | 
182 | * `local_arr`
183 | An array of length `n_local` of number of data chunks in each local EC.
184 | 
185 | * `m`
186 | Specifies the total number of codes. It must be equal or greater than `n_local`.
187 | Thus there are `n_local` local EC codes and `m - n_local + 1` global EC codes.
188 | Because the first global EC code can be calculated by `local-code-1 ^ local-code-2 ^ ...`
189 | 
190 | Returns:
191 | 
192 | * `0`
193 | If Success.
194 | 
195 | * `LRC_INIT_TWICE`
196 | If `lrc` is already initialized.
197 | 
198 | * `LRC_INVALID_M`
199 | If `m` is less than `n_local`.
200 | 
201 | * `LRC_OUT_OF_MEMORY`
202 | If any `malloc()` fails during initializing.
203 | 
204 | ## lrc_destroy
205 | 
206 | `void lrc_destroy(lrc_t *lrc);`
207 | 
208 | Free memory allocated by `lrc_init_n()`. It does not free `*lrc` itself.
209 | 
210 | ## lrc_encode
211 | 
212 | `int lrc_encode(lrc_t *lrc, lrc_buf_t *lrc_buf);`
213 | 
214 | Generate `m`(from `lrc_init_n()`) code chunks from all `k` data chunks. `k = sum(local_arr)`.
215 | `lrc_buf_t` is the container of all data chunks and code chunks. It must be
216 | initialized with `lrc_buf_init()` before use.
217 | 
218 | After `lrc_encode()`, your program should save `lrc_buf->data[0..k-1]` and
219 | `lrc_buf->code[0..m-1]` on persistent storage for later reconstruction.
220 | 
221 | Returns:
222 | 
223 | * `0`
224 | If Success.
225 | 
226 | * `LRC_OUT_OF_MEMORY`
227 | If any `malloc()` fails during initializing.
228 | 
229 | ## lrc_decode
230 | 
231 | `int lrc_decode(lrc_t *lrc, lrc_buf_t *lrc_buf, int8_t *erased);`
232 | 
233 | Reconstruct lost data and code chunks from existing data and code.
234 | 
235 | If too many data or code are lost, reconstruction
236 | 
237 | Parameters:
238 | 
239 | * `lrc_buf`
240 | Specifies data/code buffer for reconstruction and the buffer to store
241 | reconstructed data/code.
242 | 
243 | * `erased`
244 | Specifies which data / code are missing that needs to reconstruct.
245 | It is an array of length `k + m`.
246 | Array element `erased[i]` value `1` means the data(`i<k`) or code(`i<=k<m`) is missing,
247 | `0` means data/code presents in `lrc_buf->data[i]` or `lrc_buf->code[i-k]`.
248 | 
249 | Returns:
250 | 
251 | * `0`
252 | If Success.
253 | 
254 | * `LRC_OUT_OF_MEMORY`
255 | If any `malloc()` fails during decoding.
256 | 
257 | * `LRC_UNRECOVERABLE`
258 | If there is not enough data / code to reconstruct the missing ones.
259 | 
260 | ## lrc_get_source
261 | 
262 | `int lrc_get_source(lrc_t *lrc, int8_t *erased, int8_t *source);`
263 | 
264 | If LRC is used(`n_local` passed to `lrc_init_n()` is greater than 1), not
265 | always all data/code are required.
266 | This function calculate which data/code is required.
267 | 
268 | For example if LRC parameter is `2, 2, 3`, and `erased = {1, 0, 0, 0, 0, 0, 0}`
269 | which means only 0-th data is missing, `source` will be filled in with: `{0, 1, 0, 0, 1, 0, 0}`
270 | which means only `data[1], code[0]` are required to reconstruct the missing
271 | `data[0]`.
272 | 
273 | Parameters:
274 | 
275 | * `erased`
276 | Specifies missing data/code. There must be at least `k+m` 0/1 elements in
277 | `erased`.
278 | 
279 | * `source`
280 | Specifies where to store the indexes of source data/code for reconstruction.
281 | There must be at least `k+m` available bytes in `source`.
282 | 
283 | Returns:
284 | 
285 | * `0`
286 | If Success.
287 | 
288 | * `LRC_UNRECOVERABLE`
289 | If there is not enough data / code to reconstruct the missing ones.
290 | 
291 | ## lrc_buf_init
292 | 
293 | `int  lrc_buf_init(lrc_buf_t *lrc_buf, lrc_t *lrc, int64_t chunk_size);`
294 | 
295 | Allocate memory that will be used during reconstruction,
296 | which includes: `k+m` byte arrays and a matrix for reconstruction.
297 | 
298 | Parameters:
299 | 
300 | * `lrc`
301 | Specifies LRC parameters. It must have been initialized by `lrc_init_n()` first.
302 | 
303 | * `chunk_size`
304 | Specifies the size for each of `k+m` data/code buffers.
305 | Internally, actual memory allocated is 16 byte aligned in order to utilize SMID
306 | instructions.
307 | 
308 | Returns:
309 | 
310 | * `0`
311 | If Success.
312 | 
313 | * `LRC_INIT_TWICE`
314 | If `lrc` is already initialized.
315 | 
316 | * `LRC_OUT_OF_MEMORY`
317 | If any `malloc()` fails during initializing.
318 | 
319 | ## lrc_buf_destroy
320 | 
321 | `void lrc_buf_destroy(lrc_buf_t *lrc_buf);`
322 | 
323 | Free memory allocated by `lrc_buf_init()`.
324 | It does not free `lrc_buf`.
325 | 
326 | 
327 | This is a specialized Erasure Code implementation for storage service.
328 | What matrix to choose does not matter.
329 | Because usually most CPU cycles are spent on matrix multiplication to decode lost data,
330 | but not on finding reversed matrix.
331 | 
332 | In this implementation Vandermonde matrix is used.
333 | 
334 | # Analysis
335 | 
336 | ## Reliability
337 | 
338 | *   If `k`(number of data chunks) is not very large, reliability of Erasure Code(LRC-EC or EC) with `m` code is similar with n-copy replication with `m+1` copies.
339 | 
340 | *   LRC-EC can always reconstruct `m - n_local + 1` data loss. In a `(6,6)+4`
341 | LRC-EC, 3 data loss is always reconstructible.
342 | 
343 | *   LRC-EC with `m` codes can not always reconstruct `m` data loss.
344 | In a `(6,6)+4` LRC, there are 1820 different combinations but only 1568 of
345 | them can be reconstructed(87%).
346 | 
347 | ## IO bandwidth
348 | 
349 | In calculation, each TB of storage requires
350 | `k * 0.13G` IO throughput(both for network and disk drive) each day
351 | to reconstruct lost data.
352 | Where `k` is the number of members in a Erasure Code group.
353 | 
354 | # TODO
355 | 
356 | *   Another local code that covers all global codes.
357 | 
358 | # Author
359 | 
360 | Zhang Yanpo (张炎泼) <drdr.xp@gmail.com>
361 | 
362 | # Copyright and License
363 | 
364 | The MIT License (MIT)
365 | 
366 | Copyright (c) 2015 Zhang Yanpo (张炎泼) <drdr.xp@gmail.com>
367 | 


--------------------------------------------------------------------------------
/src/galois.c:
--------------------------------------------------------------------------------
  1 | /* *
  2 |  * Copyright (c) 2014, James S. Plank and Kevin Greenan
  3 |  * All rights reserved.
  4 |  *
  5 |  * Jerasure - A C/C++ Library for a Variety of Reed-Solomon and RAID-6 Erasure
  6 |  * Coding Techniques
  7 |  *
  8 |  * Revision 2.0: Galois Field backend now links to GF-Complete
  9 |  *
 10 |  * Redistribution and use in source and binary forms, with or without
 11 |  * modification, are permitted provided that the following conditions
 12 |  * are met:
 13 |  *
 14 |  *  - Redistributions of source code must retain the above copyright
 15 |  *    notice, this list of conditions and the following disclaimer.
 16 |  *
 17 |  *  - Redistributions in binary form must reproduce the above copyright
 18 |  *    notice, this list of conditions and the following disclaimer in
 19 |  *    the documentation and/or other materials provided with the
 20 |  *    distribution.
 21 |  *
 22 |  *  - Neither the name of the University of Tennessee nor the names of its
 23 |  *    contributors may be used to endorse or promote products derived
 24 |  *    from this software without specific prior written permission.
 25 |  *
 26 |  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
 27 |  * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
 28 |  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
 29 |  * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
 30 |  * HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
 31 |  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
 32 |  * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS
 33 |  * OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
 34 |  * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
 35 |  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY
 36 |  * WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
 37 |  * POSSIBILITY OF SUCH DAMAGE.
 38 |  */
 39 | 
 40 | /* Jerasure's authors:
 41 | 
 42 |    Revision 2.x - 2014: James S. Plank and Kevin M. Greenan
 43 |    Revision 1.2 - 2008: James S. Plank, Scott Simmerman and Catherine D. Schuman.
 44 |    Revision 1.0 - 2007: James S. Plank
 45 |  */
 46 | 
 47 | #include <stdio.h>
 48 | #include <stdlib.h>
 49 | #include <string.h>
 50 | #include <signal.h>
 51 | 
 52 | #include "galois.h"
 53 | 
 54 | #define MAX_GF_INSTANCES 64
 55 | gf_t *gfp_array[MAX_GF_INSTANCES] = { 0 };
 56 | int  gfp_is_composite[MAX_GF_INSTANCES] = { 0 };
 57 | 
 58 | gf_t *galois_get_field_ptr(int w)
 59 | {
 60 |   if (gfp_array[w] != NULL) {
 61 |     return gfp_array[w];
 62 |   }
 63 | 
 64 |   return NULL;
 65 | }
 66 | 
 67 | gf_t* galois_init_field(int w,
 68 |                         int mult_type,
 69 |                         int region_type,
 70 |                         int divide_type,
 71 |                         uint64_t prim_poly,
 72 |                         int arg1,
 73 |                         int arg2)
 74 | {
 75 |   int scratch_size;
 76 |   void *scratch_memory;
 77 |   gf_t *gfp;
 78 | 
 79 |   if (w <= 0 || w > 32) {
 80 |     fprintf(stderr, "ERROR -- cannot init default Galois field for w=%d\n", w);
 81 |     exit(1);
 82 |   }
 83 | 
 84 |   gfp = (gf_t *) malloc(sizeof(gf_t));
 85 |   if (!gfp) {
 86 |     fprintf(stderr, "ERROR -- cannot allocate memory for Galois field w=%d\n", w);
 87 |     exit(1);
 88 |   }
 89 | 
 90 |   scratch_size = gf_scratch_size(w, mult_type, region_type, divide_type, arg1, arg2);
 91 |   if (!scratch_size) {
 92 |     fprintf(stderr, "ERROR -- cannot get scratch size for base field w=%d\n", w);
 93 |     exit(1);
 94 |   }
 95 | 
 96 |   scratch_memory = malloc(scratch_size);
 97 |   if (!scratch_memory) {
 98 |     fprintf(stderr, "ERROR -- cannot get scratch memory for base field w=%d\n", w);
 99 |     exit(1);
100 |   }
101 | 
102 |   if(!gf_init_hard(gfp,
103 |                    w, 
104 |                    mult_type, 
105 |                    region_type, 
106 |                    divide_type, 
107 |                    prim_poly, 
108 |                    arg1, 
109 |                    arg2, 
110 |                    NULL, 
111 |                    scratch_memory))
112 |   {
113 |     fprintf(stderr, "ERROR -- cannot init default Galois field for w=%d\n", w);
114 |     exit(1);
115 |   }
116 | 
117 |   gfp_is_composite[w] = 0;
118 |   return gfp;
119 | }
120 | 
121 | gf_t* galois_init_composite_field(int w,
122 |                                 int region_type,
123 |                                 int divide_type,
124 |                                 int degree,
125 |                                 gf_t* base_gf)
126 | {
127 |   int scratch_size;
128 |   void *scratch_memory;
129 |   gf_t *gfp;
130 |   
131 |   if (w <= 0 || w > 32) {
132 |     fprintf(stderr, "ERROR -- cannot init composite field for w=%d\n", w);
133 |     exit(1);
134 |   }
135 |   
136 |   gfp = (gf_t *) malloc(sizeof(gf_t));
137 |   if (!gfp) {
138 |     fprintf(stderr, "ERROR -- cannot allocate memory for Galois field w=%d\n", w);
139 |     exit(1);
140 |   }
141 | 
142 |   scratch_size = gf_scratch_size(w, GF_MULT_COMPOSITE, region_type, divide_type, degree, 0);
143 |   if (!scratch_size) {
144 |     fprintf(stderr, "ERROR -- cannot get scratch size for composite field w=%d\n", w);
145 |     exit(1);
146 |   }
147 | 
148 |   scratch_memory = malloc(scratch_size);
149 |   if (!scratch_memory) {
150 |     fprintf(stderr, "ERROR -- cannot get scratch memory for composite field w=%d\n", w);
151 |     exit(1);
152 |   }
153 | 
154 |   if(!gf_init_hard(gfp,
155 |                    w,
156 |                    GF_MULT_COMPOSITE,
157 |                    region_type,
158 |                    divide_type,
159 |                    0, 
160 |                    degree, 
161 |                    0, 
162 |                    base_gf,
163 |                    scratch_memory))
164 |   {
165 |     fprintf(stderr, "ERROR -- cannot init default composite field for w=%d\n", w);
166 |     exit(1);
167 |   }
168 |   gfp_is_composite[w] = 1;
169 |   return gfp;
170 | }
171 | 
172 | static void galois_init_default_field(int w)
173 | {
174 |   if (w <= 0 || w > 32) {
175 |     fprintf(stderr, "ERROR -- cannot init default Galois field for w=%d\n", w);
176 |     exit(1);
177 |   }
178 | 
179 |   if (gfp_array[w] == NULL) {
180 |     gfp_array[w] = (gf_t*)malloc(sizeof(gf_t));
181 |     if (gfp_array[w] == NULL) {
182 |       fprintf(stderr, "ERROR -- cannot allocate memory for Galois field w=%d\n", w);
183 |       exit(1);
184 |     }
185 |   }
186 | 
187 |   if (!gf_init_easy(gfp_array[w], w)) {
188 |     fprintf(stderr, "ERROR -- cannot init default Galois field for w=%d\n", w);
189 |     exit(1);
190 |   }
191 | }
192 | 
193 | 
194 | static int is_valid_gf(gf_t *gf, int w)
195 | {
196 |   // TODO: I assume we may eventually
197 |   // want to do w=64 and 128, so w
198 |   // will be needed to perform this check
199 |   (void)w;
200 | 
201 |   if (gf == NULL) {
202 |     return 0;
203 |   }
204 |   if (gf->multiply.w32 == NULL) {
205 |     return 0;
206 |   }
207 |   if (gf->multiply_region.w32 == NULL) {
208 |     return 0;
209 |   }
210 |   if (gf->divide.w32 == NULL) {
211 |     return 0;
212 |   }
213 |   if (gf->inverse.w32 == NULL) {
214 |     return 0;
215 |   }
216 |   if (gf->extract_word.w32 == NULL) {
217 |     return 0;
218 |   }
219 | 
220 |   return 1;
221 | }
222 | 
223 | void galois_change_technique(gf_t *gf, int w)
224 | {
225 |   if (w <= 0 || w > 32) {
226 |     fprintf(stderr, "ERROR -- cannot support Galois field for w=%d\n", w);
227 |     exit(1);
228 |   }
229 | 
230 |   if (!is_valid_gf(gf, w)) {
231 |     fprintf(stderr, "ERROR -- overriding with invalid Galois field for w=%d\n", w);
232 |     exit(1);
233 |   }
234 | 
235 |   if (gfp_array[w] != NULL) {
236 |     gf_free(gfp_array[w], gfp_is_composite[w]);
237 |   }
238 | 
239 |   gfp_array[w] = gf;
240 | }
241 | 
242 | int galois_single_multiply(int x, int y, int w)
243 | {
244 |   if (x == 0 || y == 0) return 0;
245 |   
246 |   if (gfp_array[w] == NULL) {
247 |     galois_init_default_field(w);
248 |   }
249 | 
250 |   if (w <= 32) {
251 |     return gfp_array[w]->multiply.w32(gfp_array[w], x, y);
252 |   } else {
253 |     fprintf(stderr, "ERROR -- Galois field not implemented for w=%d\n", w);
254 |     raise(SIGSEGV);
255 |     return 0;
256 |   }
257 | }
258 | 
259 | int galois_single_divide(int x, int y, int w)
260 | {
261 |   if (x == 0) return 0;
262 |   if (y == 0) return -1;
263 | 
264 |   if (gfp_array[w] == NULL) {
265 |     galois_init_default_field(w);
266 |   }
267 | 
268 |   if (w <= 32) {
269 |     return gfp_array[w]->divide.w32(gfp_array[w], x, y);
270 |   } else {
271 |     fprintf(stderr, "ERROR -- Galois field not implemented for w=%d\n", w);
272 |     raise(SIGSEGV);
273 |     return 0;
274 |   }
275 | }
276 | 
277 | void galois_w08_region_multiply(char *region,      /* Region to multiply */
278 |                                   int multby,       /* Number to multiply by */
279 |                                   int nbytes,        /* Number of bytes in region */
280 |                                   char *r2,          /* If r2 != NULL, products go here */
281 |                                   int add)
282 | {
283 |   if (gfp_array[8] == NULL) {
284 |     galois_init_default_field(8);
285 |   }
286 |   gfp_array[8]->multiply_region.w32(gfp_array[8], region, r2, multby, nbytes, add);
287 | }
288 | 
289 | void galois_w16_region_multiply(char *region,      /* Region to multiply */
290 |                                   int multby,       /* Number to multiply by */
291 |                                   int nbytes,        /* Number of bytes in region */
292 |                                   char *r2,          /* If r2 != NULL, products go here */
293 |                                   int add)
294 | {
295 |   if (gfp_array[16] == NULL) {
296 |     galois_init_default_field(16);
297 |   }
298 |   gfp_array[16]->multiply_region.w32(gfp_array[16], region, r2, multby, nbytes, add);
299 | }
300 | 
301 | 
302 | void galois_w32_region_multiply(char *region,      /* Region to multiply */
303 |                                   int multby,       /* Number to multiply by */
304 |                                   int nbytes,        /* Number of bytes in region */
305 |                                   char *r2,          /* If r2 != NULL, products go here */
306 |                                   int add)
307 | {
308 |   if (gfp_array[32] == NULL) {
309 |     galois_init_default_field(32);
310 |   }
311 |   gfp_array[32]->multiply_region.w32(gfp_array[32], region, r2, multby, nbytes, add);
312 | }
313 | 
314 | void galois_w8_region_xor(void *src, void *dest, int nbytes)
315 | {
316 |   if (gfp_array[8] == NULL) {
317 |     galois_init_default_field(8);
318 |   }
319 |   gfp_array[8]->multiply_region.w32(gfp_array[32], src, dest, 1, nbytes, 1);
320 | }
321 | 
322 | void galois_w16_region_xor(void *src, void *dest, int nbytes)
323 | {
324 |   if (gfp_array[16] == NULL) {
325 |     galois_init_default_field(16);
326 |   }
327 |   gfp_array[16]->multiply_region.w32(gfp_array[16], src, dest, 1, nbytes, 1);
328 | }
329 | 
330 | void galois_w32_region_xor(void *src, void *dest, int nbytes)
331 | {
332 |   if (gfp_array[32] == NULL) {
333 |     galois_init_default_field(32);
334 |   }
335 |   gfp_array[32]->multiply_region.w32(gfp_array[32], src, dest, 1, nbytes, 1);
336 | }
337 | 
338 | void galois_region_xor(char *src, char *dest, int nbytes)
339 | {
340 |   if (nbytes >= 16) {
341 |     galois_w32_region_xor(src, dest, nbytes);
342 |   } else {
343 |     int i = 0;
344 |     for (i = 0; i < nbytes; i++) {
345 |       *dest ^= *src;
346 |       dest++;
347 |       src++;
348 |     } 
349 |   }
350 | }
351 | 
352 | int galois_inverse(int y, int w)
353 | {
354 |   if (y == 0) return -1;
355 |   return galois_single_divide(1, y, w);
356 | }
357 | 


--------------------------------------------------------------------------------
/src/neon/gf_w64_neon.c:
--------------------------------------------------------------------------------
  1 | /*
  2 |  * GF-Complete: A Comprehensive Open Source Library for Galois Field Arithmetic
  3 |  * James S. Plank, Ethan L. Miller, Kevin M. Greenan,
  4 |  * Benjamin A. Arnold, John A. Burnum, Adam W. Disney, Allen C. McBride.
  5 |  *
  6 |  * Copyright (c) 2014: Janne Grunau <j@jannau.net>
  7 |  *
  8 |  * Redistribution and use in source and binary forms, with or without
  9 |  * modification, are permitted provided that the following conditions
 10 |  * are met:
 11 |  *
 12 |  *  - Redistributions of source code must retain the above copyright
 13 |  *     notice, this list of conditions and the following disclaimer.
 14 |  *
 15 |  *  - Redistributions in binary form must reproduce the above copyright
 16 |  *    notice, this list of conditions and the following disclaimer in
 17 |  *    the documentation and/or other materials provided with the
 18 |  *    distribution.
 19 |  *
 20 |  *  - Neither the name of the University of Tennessee nor the names of its
 21 |  *    contributors may be used to endorse or promote products derived
 22 |  *    from this software without specific prior written permission.
 23 |  *
 24 |  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
 25 |  * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
 26 |  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
 27 |  * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
 28 |  * HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
 29 |  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
 30 |  * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS
 31 |  * OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
 32 |  * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
 33 |  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY
 34 |  * WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
 35 |  * POSSIBILITY OF SUCH DAMAGE.
 36 |  *
 37 |  * gf_w64_neon.c
 38 |  *
 39 |  * Neon routines for 64-bit Galois fields
 40 |  *
 41 |  */
 42 | 
 43 | #include "gf_int.h"
 44 | #include <stdio.h>
 45 | #include <stdlib.h>
 46 | #include "gf_w64.h"
 47 | 
 48 | 
 49 | #ifndef ARCH_AARCH64
 50 | #define vqtbl1q_u8(tbl, v) vcombine_u8(vtbl2_u8(tbl, vget_low_u8(v)),   \
 51 |                                        vtbl2_u8(tbl, vget_high_u8(v)))
 52 | #endif
 53 | 
 54 | static
 55 | inline
 56 | void
 57 | neon_w64_split_4_lazy_altmap_multiply_region(gf_t *gf, uint64_t *src,
 58 |                                              uint64_t *dst, uint64_t *d_end,
 59 |                                              uint64_t val, int xor)
 60 | {
 61 |   unsigned i, j, k;
 62 |   uint8_t btable[16];
 63 | #ifdef ARCH_AARCH64
 64 |   uint8x16_t tables[16][8];
 65 | #else
 66 |   uint8x8x2_t tables[16][8];
 67 | #endif
 68 |   uint8x16_t p[8], mask1, si;
 69 | 
 70 |   gf_internal_t *h = (gf_internal_t *) gf->scratch;
 71 |   struct gf_split_4_64_lazy_data *ld = (struct gf_split_4_64_lazy_data *) h->private;
 72 | 
 73 |   for (i = 0; i < 16; i++) {
 74 |     for (j = 0; j < 8; j++) {
 75 |       for (k = 0; k < 16; k++) {
 76 |         btable[k] = (uint8_t) ld->tables[i][k];
 77 |         ld->tables[i][k] >>= 8;
 78 |       }
 79 | #ifdef ARCH_AARCH64
 80 |       tables[i][j] = vld1q_u8(btable);
 81 | #else
 82 |       tables[i][j].val[0] = vld1_u8(btable);
 83 |       tables[i][j].val[1] = vld1_u8(btable + 8);
 84 | #endif
 85 |     }
 86 |   }
 87 | 
 88 |   mask1 = vdupq_n_u8(0xf);
 89 | 
 90 |   while (dst < d_end) {
 91 | 
 92 |     if (xor) {
 93 |       for (i = 0; i < 8; i++)
 94 |         p[i] = vld1q_u8((uint8_t *) (dst + i * 2));
 95 |     } else {
 96 |       for (i = 0; i < 8; i++)
 97 |         p[i] = vdupq_n_u8(0);
 98 |     }
 99 | 
100 |     i = 0;
101 |     for (k = 0; k < 8; k++) {
102 |       uint8x16_t v0 = vld1q_u8((uint8_t *) src);
103 |       src += 2;
104 | 
105 |       si = vandq_u8(v0, mask1);
106 |       for (j = 0; j < 8; j++) {
107 |         p[j] = veorq_u8(p[j], vqtbl1q_u8(tables[i][j], si));
108 |       }
109 |       i++;
110 |       si = vshrq_n_u8(v0, 4);
111 |       for (j = 0; j < 8; j++) {
112 |         p[j] = veorq_u8(p[j], vqtbl1q_u8(tables[i][j], si));
113 |       }
114 |       i++;
115 | 
116 |     }
117 |     for (i = 0; i < 8; i++) {
118 |       vst1q_u8((uint8_t *) dst, p[i]);
119 |       dst += 2;
120 |     }
121 |   }
122 | }
123 | 
124 | static
125 | inline
126 | void
127 | neon_w64_split_4_lazy_multiply_region(gf_t *gf, uint64_t *src, uint64_t *dst,
128 |                                       uint64_t *d_end, uint64_t val, int xor)
129 | {
130 |   unsigned i, j, k;
131 |   uint8_t btable[16];
132 | #ifdef ARCH_AARCH64
133 |   uint8x16_t tables[16][8];
134 | #else
135 |   uint8x8x2_t tables[16][8];
136 | #endif
137 |   uint8x16_t p[8], mask1, si;
138 |   uint64x2_t st[8];
139 |   uint32x4x2_t s32[4];
140 |   uint16x8x2_t s16[4];
141 |   uint8x16x2_t s8[4];
142 | 
143 |   gf_internal_t *h = (gf_internal_t *) gf->scratch;
144 |   struct gf_split_4_64_lazy_data *ld = (struct gf_split_4_64_lazy_data *) h->private;
145 | 
146 |   for (i = 0; i < 16; i++) {
147 |     for (j = 0; j < 8; j++) {
148 |       for (k = 0; k < 16; k++) {
149 |         btable[k] = (uint8_t) ld->tables[i][k];
150 |         ld->tables[i][k] >>= 8;
151 |       }
152 | #ifdef ARCH_AARCH64
153 |       tables[i][j] = vld1q_u8(btable);
154 | #else
155 |       tables[i][j].val[0] = vld1_u8(btable);
156 |       tables[i][j].val[1] = vld1_u8(btable + 8);
157 | #endif
158 |     }
159 |   }
160 | 
161 |   mask1 = vdupq_n_u8(0xf);
162 | 
163 |   while (dst < d_end) {
164 | 
165 |     for (k = 0; k < 8; k++) {
166 |       st[k]  = vld1q_u64(src);
167 |       src += 2;
168 |       p[k] = vdupq_n_u8(0);
169 |     }
170 | 
171 |     s32[0] = vuzpq_u32(vreinterpretq_u32_u64(st[0]),
172 |                        vreinterpretq_u32_u64(st[1]));
173 |     s32[1] = vuzpq_u32(vreinterpretq_u32_u64(st[2]),
174 |                        vreinterpretq_u32_u64(st[3]));
175 |     s32[2] = vuzpq_u32(vreinterpretq_u32_u64(st[4]),
176 |                        vreinterpretq_u32_u64(st[5]));
177 |     s32[3] = vuzpq_u32(vreinterpretq_u32_u64(st[6]),
178 |                        vreinterpretq_u32_u64(st[7]));
179 | 
180 |     s16[0] = vuzpq_u16(vreinterpretq_u16_u32(s32[0].val[0]),
181 |                        vreinterpretq_u16_u32(s32[1].val[0]));
182 |     s16[1] = vuzpq_u16(vreinterpretq_u16_u32(s32[2].val[0]),
183 |                        vreinterpretq_u16_u32(s32[3].val[0]));
184 |     s16[2] = vuzpq_u16(vreinterpretq_u16_u32(s32[0].val[1]),
185 |                        vreinterpretq_u16_u32(s32[1].val[1]));
186 |     s16[3] = vuzpq_u16(vreinterpretq_u16_u32(s32[2].val[1]),
187 |                        vreinterpretq_u16_u32(s32[3].val[1]));
188 | 
189 |     s8[0]  = vuzpq_u8(vreinterpretq_u8_u16(s16[0].val[0]),
190 |                       vreinterpretq_u8_u16(s16[1].val[0]));
191 |     s8[1]  = vuzpq_u8(vreinterpretq_u8_u16(s16[0].val[1]),
192 |                       vreinterpretq_u8_u16(s16[1].val[1]));
193 |     s8[2]  = vuzpq_u8(vreinterpretq_u8_u16(s16[2].val[0]),
194 |                       vreinterpretq_u8_u16(s16[3].val[0]));
195 |     s8[3]  = vuzpq_u8(vreinterpretq_u8_u16(s16[2].val[1]),
196 |                       vreinterpretq_u8_u16(s16[3].val[1]));
197 | 
198 |     i = 0;
199 |     for (k = 0; k < 8; k++) {
200 |       si = vandq_u8(s8[k >> 1].val[k & 1], mask1);
201 |       for (j = 0; j < 8; j++) {
202 |         p[j] = veorq_u8(p[j], vqtbl1q_u8(tables[i][j], si));
203 |       }
204 |       i++;
205 |       si = vshrq_n_u8(s8[k >> 1].val[k & 1], 4);
206 |       for (j = 0; j < 8; j++) {
207 |         p[j] = veorq_u8(p[j], vqtbl1q_u8(tables[i][j], si));
208 |       }
209 |       i++;
210 |     }
211 | 
212 |     s8[0]  = vzipq_u8(p[0], p[1]);
213 |     s8[1]  = vzipq_u8(p[2], p[3]);
214 |     s8[2]  = vzipq_u8(p[4], p[5]);
215 |     s8[3]  = vzipq_u8(p[6], p[7]);
216 | 
217 |     s16[0] = vzipq_u16(vreinterpretq_u16_u8(s8[0].val[0]),
218 |                        vreinterpretq_u16_u8(s8[1].val[0]));
219 |     s16[1] = vzipq_u16(vreinterpretq_u16_u8(s8[2].val[0]),
220 |                        vreinterpretq_u16_u8(s8[3].val[0]));
221 |     s16[2] = vzipq_u16(vreinterpretq_u16_u8(s8[0].val[1]),
222 |                        vreinterpretq_u16_u8(s8[1].val[1]));
223 |     s16[3] = vzipq_u16(vreinterpretq_u16_u8(s8[2].val[1]),
224 |                        vreinterpretq_u16_u8(s8[3].val[1]));
225 | 
226 |     s32[0] = vzipq_u32(vreinterpretq_u32_u16(s16[0].val[0]),
227 |                        vreinterpretq_u32_u16(s16[1].val[0]));
228 |     s32[1] = vzipq_u32(vreinterpretq_u32_u16(s16[0].val[1]),
229 |                        vreinterpretq_u32_u16(s16[1].val[1]));
230 |     s32[2] = vzipq_u32(vreinterpretq_u32_u16(s16[2].val[0]),
231 |                        vreinterpretq_u32_u16(s16[3].val[0]));
232 |     s32[3] = vzipq_u32(vreinterpretq_u32_u16(s16[2].val[1]),
233 |                        vreinterpretq_u32_u16(s16[3].val[1]));
234 | 
235 |     for (k = 0; k < 8; k ++) {
236 |         st[k] = vreinterpretq_u64_u32(s32[k >> 1].val[k & 1]);
237 |     }
238 | 
239 |     if (xor) {
240 |       for (i = 0; i < 8; i++) {
241 |         uint64x2_t t1 = vld1q_u64(dst);
242 |         vst1q_u64(dst, veorq_u64(st[i], t1));
243 |         dst += 2;
244 |       }
245 |     } else {
246 |       for (i = 0; i < 8; i++) {
247 |         vst1q_u64(dst, st[i]);
248 |         dst += 2;
249 |       }
250 |     }
251 | 
252 |   }
253 | }
254 | 
255 | static
256 | void
257 | gf_w64_neon_split_4_lazy_multiply_region(gf_t *gf, void *src, void *dest,
258 |                                          uint64_t val, int bytes, int xor,
259 |                                          int altmap)
260 | {
261 |   gf_internal_t *h;
262 |   int i, j, k;
263 |   uint64_t pp, v, *s64, *d64, *top;
264 |   struct gf_split_4_64_lazy_data *ld;
265 |   gf_region_data rd;
266 | 
267 |   if (val == 0) { gf_multby_zero(dest, bytes, xor); return; }
268 |   if (val == 1) { gf_multby_one(src, dest, bytes, xor); return; }
269 | 
270 |   gf_set_region_data(&rd, gf, src, dest, bytes, val, xor, 128);
271 |   gf_do_initial_region_alignment(&rd);
272 | 
273 |   s64 = (uint64_t *) rd.s_start;
274 |   d64 = (uint64_t *) rd.d_start;
275 |   top = (uint64_t *) rd.d_top;
276 | 
277 |   h = (gf_internal_t *) gf->scratch;
278 |   pp = h->prim_poly;
279 |   ld = (struct gf_split_4_64_lazy_data *) h->private;
280 | 
281 |   v = val;
282 |   for (i = 0; i < 16; i++) {
283 |     ld->tables[i][0] = 0;
284 |     for (j = 1; j < 16; j <<= 1) {
285 |       for (k = 0; k < j; k++) {
286 |         ld->tables[i][k^j] = (v ^ ld->tables[i][k]);
287 |       }
288 |       v = (v & GF_FIRST_BIT) ? ((v << 1) ^ pp) : (v << 1);
289 |     }
290 |   }
291 | 
292 |   if (altmap) {
293 |     if (xor)
294 |       neon_w64_split_4_lazy_altmap_multiply_region(gf, s64, d64, top, val, 1);
295 |     else
296 |       neon_w64_split_4_lazy_altmap_multiply_region(gf, s64, d64, top, val, 0);
297 |   } else {
298 |     if (xor)
299 |       neon_w64_split_4_lazy_multiply_region(gf, s64, d64, top, val, 1);
300 |     else
301 |       neon_w64_split_4_lazy_multiply_region(gf, s64, d64, top, val, 0);
302 |   }
303 | 
304 |   gf_do_final_region_alignment(&rd);
305 | }
306 | 
307 | static
308 | void
309 | gf_w64_split_4_64_lazy_multiply_region_neon(gf_t *gf, void *src, void *dest,
310 |                                             uint64_t val, int bytes, int xor)
311 | {
312 |   gf_w64_neon_split_4_lazy_multiply_region(gf, src, dest, val, bytes, xor, 0);
313 | }
314 | 
315 | static
316 | void
317 | gf_w64_split_4_64_lazy_altmap_multiply_region_neon(gf_t *gf, void *src,
318 |                                                    void *dest, uint64_t val,
319 |                                                    int bytes, int xor)
320 | {
321 |   gf_w64_neon_split_4_lazy_multiply_region(gf, src, dest, val, bytes, xor, 1);
322 | }
323 | 
324 | void gf_w64_neon_split_init(gf_t *gf)
325 | {
326 |   gf_internal_t *h = (gf_internal_t *) gf->scratch;
327 | 
328 |   if (h->region_type & GF_REGION_ALTMAP)
329 |       gf->multiply_region.w64 = gf_w64_split_4_64_lazy_altmap_multiply_region_neon;
330 |   else
331 |       gf->multiply_region.w64 = gf_w64_split_4_64_lazy_multiply_region_neon;
332 | 
333 | }
334 | 


--------------------------------------------------------------------------------
/src/lrc.c:
--------------------------------------------------------------------------------
  1 | /*
  2 |  * The MIT License (MIT)
  3 |  *
  4 |  * Copyright (c) 2015 Zhang Yanpo (张炎泼) <drdr.xp@gmail.com>
  5 |  */
  6 | 
  7 | #include "lrc.h"
  8 | 
  9 | #include <stdint.h>
 10 | #include <stdio.h>
 11 | #include <stdlib.h>
 12 | #include <string.h>
 13 | #include <unistd.h>
 14 | 
 15 | int lrc_init_n(lrc_t *lrc, int n_local, uint8_t *local_k_arr, int m) {
 16 | 
 17 |   int ret = 0;
 18 | 
 19 |   if (lrc->inited_ == 1) {
 20 |     return LRC_INIT_TWICE;
 21 |   }
 22 | 
 23 |   if (m < n_local) {
 24 |     return LRC_INVALID_M;
 25 |   }
 26 | 
 27 |   bzero(lrc, sizeof(*lrc));
 28 | 
 29 |   lrc->n_local = n_local;
 30 | 
 31 |   lrc->locals = malloc(sizeof(*lrc->locals) * lrc->n_local);
 32 |   if (lrc->locals == NULL) {
 33 |     ret = LRC_OUT_OF_MEMORY;
 34 |     goto exit;
 35 |   }
 36 | 
 37 |   lrc->k = 0;
 38 |   lrc->m = m;
 39 | 
 40 |   for (int i = 0; i < n_local; i++) {
 41 | 
 42 |     lrc->locals[i].start = lrc->k;
 43 |     lrc->locals[i].len = local_k_arr[i];
 44 | 
 45 |     lrc->k += local_k_arr[i];
 46 |   }
 47 | 
 48 |   lrc->n = lrc->k + lrc->m;
 49 | 
 50 |   /* matrix */
 51 |   lrc->matrix = lrc_make_matrix(lrc);
 52 |   if (lrc->matrix == NULL) {
 53 |     ret = LRC_OUT_OF_MEMORY;
 54 |     goto exit;
 55 |   }
 56 | 
 57 |   /* An error index that indicates all codes are damaged */
 58 |   lrc->code_erased = calloc(sizeof(lrc->code_erased[0]), lrc->n);
 59 |   if (lrc->code_erased == NULL) {
 60 |     ret = LRC_OUT_OF_MEMORY;
 61 |     goto exit;
 62 |   }
 63 | 
 64 |   for (int i = 0; i < lrc->m; i++) {
 65 |     lrc->code_erased[lrc->k + i] = 1;
 66 |   }
 67 | 
 68 |   lrc->inited_ = 1;
 69 | 
 70 | exit:
 71 | 
 72 |   if (ret != 0) {
 73 |     free(lrc->code_erased);
 74 |     free(lrc->locals);
 75 |     free(lrc->matrix);
 76 |   }
 77 | 
 78 |   return ret;
 79 | }
 80 | 
 81 | void lrc_destroy(lrc_t *lrc) {
 82 | 
 83 |   if (lrc->inited_ == 0) {
 84 |     return;
 85 |   }
 86 | 
 87 |   free(lrc->code_erased);
 88 |   free(lrc->locals);
 89 |   free(lrc->matrix);
 90 | 
 91 |   bzero(lrc, sizeof(*lrc));
 92 | }
 93 | 
 94 | int lrc_encode(lrc_t *lrc, lrc_buf_t *lb) {
 95 |   return lrc_decode(lrc, lb, lrc->code_erased);
 96 | }
 97 | 
 98 | int lrc_decode(lrc_t *lrc, lrc_buf_t *lb, int8_t *erased) {
 99 | 
100 |   int ret = 0;
101 |   lrc_decoder_t *dec = &(lrc_decoder_t) {0};
102 | 
103 |   ret = lrc_decoder_init(dec, lrc, lb, erased);
104 |   if (ret != 0) {
105 |     goto exit;
106 |   }
107 | 
108 |   ret = lrc_decoder_decode(dec);
109 | 
110 | exit:
111 | 
112 |   lrc_decoder_destroy(dec);
113 | 
114 |   return ret;
115 | }
116 | 
117 | int lrc_get_source(lrc_t *lrc, int8_t *erased, int8_t *source) {
118 | 
119 |   /* we need at least as many equations as erased chunks  */
120 | 
121 |   int n_erased = lrc_count_erased(lrc->n, erased);
122 |   int ret = 0;
123 | 
124 |   for (int i = 0; i < lrc->n_local; i++) {
125 | 
126 |     int n = lrc_get_n_locally_erased(lrc, i, erased);
127 |     if (n == 0) {
128 |       continue;
129 |     }
130 | 
131 |     n_erased--;
132 | 
133 |     /* local data for reconstruction */
134 |     lrc_local_t *l = &lrc->locals[i];
135 | 
136 |     for (int j = l->start; j < l->start + l->len; j++) {
137 |       source[j] = erased[j] == 0;
138 |     }
139 | 
140 |     /* local code for reconstruction */
141 |     int j = lrc->k + i;
142 |     source[j] = erased[j] == 0;
143 | 
144 |   }
145 | 
146 |   if (n_erased > 0) {
147 | 
148 |     for (int i = 0; i < lrc->k; i++) {
149 |       source[i] = (erased[i] == 0);
150 |     }
151 | 
152 |     for (int i = lrc->k + lrc->n_local; i < lrc->n; i++) {
153 | 
154 |       source[i] = (erased[i] == 0);
155 | 
156 |       n_erased--;
157 | 
158 |       if (n_erased == 0) {
159 |         break;
160 |       }
161 |     }
162 |   }
163 | 
164 |   if (n_erased > 0) {
165 |     ret = LRC_UNRECOVERABLE;
166 |     goto exit;
167 |   }
168 | 
169 |   lrc_debug_sources(lrc->n, source);
170 | 
171 | exit:
172 | 
173 |   return ret;
174 | }
175 | 
176 | int *lrc_make_matrix(lrc_t *lrc) {
177 |   /*
178 |    * LRC Erasure Code:
179 |    *  d0 d1   d2 d3   d4 d5
180 |    * ------- ------- -------
181 |    *   c1.1    c1.2    c1.3
182 |    *
183 |    * c1.1 c1.2 c1.3 are codes calculated from a sub set.
184 |    * We have c1.1 ^ c1.2 ^ c.13 = c1
185 |    * Because coefficient of the row 1 in a Vandermonde Matrix are always 1
186 |    *
187 |    *  k = 6, n_local = 3, m = 5
188 |    *
189 |    *  | 1 1 0 0 0 0 |     | d1 |     | c1.1 | |
190 |    *  | 0 0 1 1 0 0 |     | d2 |     | c1.2 | | ^= c1
191 |    *  | 0 0 0 0 1 1 |  X  | d3 |  =  | c1.3 | |
192 |    *  | 1 2 4 8 * * |     | d4 |     | c2   |
193 |    *  | 1 3 9 * * * |     | d5 |     | c3   |
194 |    */
195 | 
196 |   int k = lrc->k;
197 |   int m = lrc->m;
198 |   int *matrix = NULL;
199 |   int *lrc_matrix = NULL;
200 | 
201 |   matrix = reed_sol_vandermonde_coding_matrix(k, m - lrc->n_local + 1, 8);
202 |   if (matrix == NULL) {
203 |     goto exit;
204 |   }
205 | 
206 |   lrc_matrix = malloc(sizeof(int) * k * lrc->m);
207 |   if (lrc_matrix == NULL) {
208 |     goto exit;
209 |   }
210 | 
211 |   bzero(lrc_matrix, sizeof(int) * k * lrc->m);
212 | 
213 |   for (int i = 0; i < lrc->n_local; i++) {
214 | 
215 |     lrc_local_t *l = &lrc->locals[i];
216 | 
217 |     for (int j = 0; j < l->len; j++) {
218 |       lrc_matrix[i * k + l->start + j] = 1;
219 |     }
220 | 
221 |   }
222 | 
223 |   for (int i = 0; i < m - lrc->n_local; i++) {
224 |     for (int j = 0; j < k; j++) {
225 |       lrc_matrix[(lrc->n_local + i)*k + j] = matrix[(i + 1) * k + j];
226 |     }
227 |   }
228 | 
229 | exit:
230 |   free(matrix);
231 |   return lrc_matrix;
232 | }
233 | 
234 | int lrc_get_n_locally_erased(lrc_t *lrc, int idx_local, int8_t *erased) {
235 | 
236 |   int start = lrc->locals[idx_local].start;
237 |   int end = start + lrc->locals[idx_local].len;
238 |   int n_damaged = 0;
239 | 
240 |   /* data in this region is damaged or its code is damaged */
241 |   for (int i = start; i < end; i++) {
242 |     if (erased[i] == 1) {
243 |       n_damaged++;
244 |     }
245 |   }
246 | 
247 |   if (erased[lrc->k + idx_local]) {
248 |     n_damaged++;
249 |   }
250 | 
251 |   return n_damaged;
252 | }
253 | 
254 | int lrc_count_erased(int n, int8_t *erased) {
255 | 
256 |   int en = 0;
257 | 
258 |   for (int i = 0; i < n; i++) {
259 |     if (erased[i]) {
260 |       en++;
261 |     }
262 |   }
263 | 
264 |   return en;
265 | }
266 | 
267 | void lrc_debug_buf_line_(lrc_buf_t *lb, int n) {
268 | 
269 |   char *b;
270 |   (void)b;
271 | 
272 |   dd("#%04d:", n);
273 |   if (n < 0 || n >= lb->chunk_size) {
274 |     dlog("--\n");
275 |     return;
276 |   }
277 | 
278 |   for (int i = 0; i < lb->n_data; i++) {
279 |     b = lb->data[i];
280 |     if (b[n] == 0) {
281 |       dlog(" . ");
282 |     } else {
283 |       dlog("%02x ", (unsigned char)b[n]);
284 |     }
285 |   }
286 | 
287 |   dlog("| ");
288 | 
289 |   for (int i = 0; i < lb->n_code; i++) {
290 |     b = lb->code[i];
291 |     if (b[n] == 0) {
292 |       dlog(" . ");
293 |     } else {
294 |       dlog("%02x ", (unsigned char)b[n]);
295 |     }
296 |   }
297 | 
298 |   dlog("\n");
299 | }
300 | 
301 | void lrc_debug_matrix_(int *matrix, int row, int col) {
302 | 
303 |   dd("matrix:");
304 | 
305 |   for (int i = 0; i < row; i++) {
306 | 
307 |     for (int j = 0; j < col; j++) {
308 | 
309 |       int e = matrix[i * col + j];
310 |       if (e == 0) {
311 |         dlog(" . ");
312 |       } else {
313 |         dlog("%02x ", e);
314 |       }
315 |     }
316 |     dlog("\n");
317 |   }
318 | }
319 | 
320 | void lrc_debug_sources_(int n, int8_t *source) {
321 | 
322 |   dd("source:");
323 | 
324 |   for (int i = 0; i < n; i++) {
325 | 
326 |     int8_t e = source[i];
327 | 
328 |     if (e == 0) {
329 |       dlog(" . ");
330 |     } else {
331 |       dlog("%02x ", e);
332 |     }
333 |   }
334 | 
335 |   dlog("\n");
336 | }
337 | 
338 | /* lrc_buf_t */
339 | 
340 | int lrc_buf_init(lrc_buf_t *lb, lrc_t *lrc, int64_t chunk_size) {
341 | 
342 |   int ret = 0;
343 | 
344 |   if (lb->inited_ == 1) {
345 |     return LRC_INIT_TWICE;
346 |   }
347 | 
348 |   bzero(lb, sizeof(*lb));
349 | 
350 |   lb->n_data = lrc->k;
351 |   lb->n_code = lrc->m;
352 |   lb->n = lb->n_data + lb->n_code;
353 | 
354 |   lb->chunk_size = chunk_size;
355 |   lb->aligned_chunk_size = lrc_align_16(chunk_size);
356 | 
357 |   ret = posix_memalign((void **)&lb->buf, 16,
358 |                        lb->aligned_chunk_size * lb->n);
359 |   if (ret != 0) {
360 |     goto exit;
361 |   }
362 | 
363 |   for (int i = 0; i < lb->n; i++) {
364 |     lb->data[i] = lb->buf + lb->aligned_chunk_size * i;
365 |   }
366 | 
367 |   lb->code = &lb->data[lb->n_data];
368 | 
369 |   lb->buf_owned = 1;
370 |   lb->inited_ = 1;
371 | 
372 | exit:
373 | 
374 |   if (ret != 0) {
375 |     free(lb->buf);
376 |   }
377 | 
378 |   return ret;
379 | }
380 | 
381 | void lrc_buf_destroy(lrc_buf_t *lb) {
382 | 
383 |   if (lb == NULL || lb->inited_ == 0) {
384 |     return;
385 |   }
386 | 
387 |   if (lb->buf_owned == 1) {
388 |     free(lb->buf);
389 |   }
390 | 
391 |   bzero(lb, sizeof(*lb));
392 | }
393 | 
394 | int lrc_buf_shadow(lrc_buf_t *lb, lrc_buf_t *src) {
395 |   *lb = *src;
396 |   lb->code = &lb->data[lb->n_data];
397 |   lb->buf_owned = 0;
398 |   return 0;
399 | }
400 | 
401 | /* lrc decoder */
402 | 
403 | int lrc_decoder_init(lrc_decoder_t *dec, lrc_t *lrc, lrc_buf_t *lb, int8_t *erased) {
404 | 
405 |   /*
406 |    * To a certain pattern of data loss, a specific matrix specific is required
407 |    * to be created for decoding.
408 |    * Matrix rows that do not cover lost data is removed.
409 |    *
410 |    * Because jerasure uses only first n_of_damaged row of the encoding matrix
411 |    * to decode. It is not enough if lrc is used.
412 |    *
413 |    * For example with a encoding matrix 3*5:
414 |    *   1 1 1 0 0
415 |    *   0 0 0 1 1
416 |    *   1 2 4 8 16
417 |    * If data [0], [1], [2] are lost, row[0] contributes nothing to decode.
418 |    */
419 | 
420 |   int k = lrc->k;
421 |   int ret = 0;
422 | 
423 |   if (dec->inited_ == 1) {
424 |     return LRC_INIT_TWICE;
425 |   }
426 | 
427 |   bzero(dec, sizeof(*dec));
428 | 
429 |   dec->lrc = lrc;
430 | 
431 |   ret = lrc_buf_shadow(&dec->buf, lb);
432 |   if (ret != 0) {
433 |     goto exit;
434 |   }
435 | 
436 |   ret = lrc_get_source(lrc, erased, dec->source);
437 |   if (ret != 0) {
438 |     goto exit;
439 |   }
440 | 
441 |   /* only copy erased data. erased code will be remapped */
442 |   for (int i = 0; i < lrc->k; i++) {
443 |     dec->erased[i] = erased[i];
444 |   }
445 | 
446 |   dec->decode_matrix = malloc(sizeof(int) * lrc->m * k);
447 |   if (dec->decode_matrix == NULL) {
448 |     ret = LRC_OUT_OF_MEMORY;
449 |     goto exit;
450 |   }
451 | 
452 |   int to = k;
453 |   for (int i = lrc->k; i < lrc->n; i++) {
454 |     if (dec->source[i] == 1 || erased[i] == 1) {
455 |       dd("decoder map: %d -> %d", i, to);
456 |       dec->buf.code[to - k] = lb->code[i - k];
457 |       dec->erased[to] = erased[i];
458 |       memcpy(&dec->decode_matrix[(to - k) * k], &lrc->matrix[(i - k) * k], sizeof(lrc->matrix[0]) * k);
459 |       to++;
460 |     }
461 |   }
462 | 
463 |   dec->buf.n_code = to - k;
464 |   dec->buf.n = dec->buf.n_data + dec->buf.n_code;
465 |   dec->inited_ = 1;
466 | 
467 |   dd("\ndecoder inited:");
468 |   lrc_debug_matrix(dec->decode_matrix, to - k, k);
469 |   lrc_debug_sources(dec->lrc->n, dec->source);
470 | 
471 | exit:
472 | 
473 |   if (ret != 0) {
474 |     free(dec->decode_matrix);
475 |   }
476 | 
477 |   return ret;
478 | }
479 | 
480 | void lrc_decoder_destroy(lrc_decoder_t *dec) {
481 | 
482 |   if (dec == NULL || dec->inited_ == 0) {
483 |     return;
484 |   }
485 | 
486 |   free(dec->decode_matrix);
487 | 
488 |   bzero(dec, sizeof(*dec));
489 | }
490 | 
491 | int lrc_decoder_decode(lrc_decoder_t *dec) {
492 | 
493 |   lrc_buf_t *lb = &dec->buf;
494 |   int erasures[512] = {0};
495 |   int start = 0;
496 | 
497 |   for (int i = 0; i < dec->lrc->n; i++) {
498 | 
499 |     if (dec->erased[i] == 1) {
500 | 
501 |       erasures[start] = i;
502 |       dd("erasures: %d", i);
503 |       start++;
504 |     }
505 |   }
506 |   erasures[start] = -1;
507 | 
508 |   return jerasure_matrix_decode(lb->n_data, lb->n_code, 8, dec->decode_matrix, 0,
509 |                                 erasures, lb->data, lb->code,
510 |                                 lb->chunk_size);
511 | }
512 | 
513 | // vim:sw=2:fdl=0
514 | 


--------------------------------------------------------------------------------
/src/neon/gf_w16_neon.c:
--------------------------------------------------------------------------------
  1 | /*
  2 |  * GF-Complete: A Comprehensive Open Source Library for Galois Field Arithmetic
  3 |  * James S. Plank, Ethan L. Miller, Kevin M. Greenan,
  4 |  * Benjamin A. Arnold, John A. Burnum, Adam W. Disney, Allen C. McBride.
  5 |  *
  6 |  * Copyright (c) 2014: Janne Grunau <j@jannau.net>
  7 |  *
  8 |  * Redistribution and use in source and binary forms, with or without
  9 |  * modification, are permitted provided that the following conditions
 10 |  * are met:
 11 |  *
 12 |  *  - Redistributions of source code must retain the above copyright
 13 |  *     notice, this list of conditions and the following disclaimer.
 14 |  *
 15 |  *  - Redistributions in binary form must reproduce the above copyright
 16 |  *    notice, this list of conditions and the following disclaimer in
 17 |  *    the documentation and/or other materials provided with the
 18 |  *    distribution.
 19 |  *
 20 |  *  - Neither the name of the University of Tennessee nor the names of its
 21 |  *    contributors may be used to endorse or promote products derived
 22 |  *    from this software without specific prior written permission.
 23 |  *
 24 |  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
 25 |  * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
 26 |  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
 27 |  * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
 28 |  * HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
 29 |  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
 30 |  * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS
 31 |  * OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
 32 |  * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
 33 |  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY
 34 |  * WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
 35 |  * POSSIBILITY OF SUCH DAMAGE.
 36 |  *
 37 |  *
 38 |  * gf_w16_neon.c
 39 |  *
 40 |  * Neon routines for 16-bit Galois fields
 41 |  *
 42 |  */
 43 | 
 44 | #include "gf_int.h"
 45 | #include <stdio.h>
 46 | #include <stdlib.h>
 47 | #include "gf_w16.h"
 48 | 
 49 | #ifdef ARCH_AARCH64
 50 | static
 51 | inline
 52 | void
 53 | neon_w16_split_4_multiply_region(gf_t *gf, uint16_t *src, uint16_t *dst,
 54 |                                  uint16_t *d_end, uint8_t *tbl,
 55 |                                  gf_val_32_t val, int xor)
 56 | {
 57 |   unsigned i;
 58 |   uint8_t *high = tbl + 4 * 16;
 59 |   uint16x8_t va0, va1, r0, r1;
 60 |   uint8x16_t loset, rl, rh;
 61 |   uint8x16x2_t va;
 62 | 
 63 |   uint8x16_t tbl_h[4], tbl_l[4];
 64 |   for (i = 0; i < 4; i++) {
 65 |       tbl_l[i] = vld1q_u8(tbl + i*16);
 66 |       tbl_h[i] = vld1q_u8(high + i*16);
 67 |   }
 68 | 
 69 |   loset = vdupq_n_u8(0xf);
 70 | 
 71 |   while (dst < d_end) {
 72 |       va0 = vld1q_u16(src);
 73 |       va1 = vld1q_u16(src + 8);
 74 | 
 75 |       va = vtrnq_u8(vreinterpretq_u8_u16(va0), vreinterpretq_u8_u16(va1));
 76 | 
 77 |       rl = vqtbl1q_u8(tbl_l[0], vandq_u8(va.val[0], loset));
 78 |       rh = vqtbl1q_u8(tbl_h[0], vandq_u8(va.val[0], loset));
 79 |       rl = veorq_u8(rl, vqtbl1q_u8(tbl_l[2], vandq_u8(va.val[1], loset)));
 80 |       rh = veorq_u8(rh, vqtbl1q_u8(tbl_h[2], vandq_u8(va.val[1], loset)));
 81 | 
 82 |       va.val[0] = vshrq_n_u8(va.val[0], 4);
 83 |       va.val[1] = vshrq_n_u8(va.val[1], 4);
 84 | 
 85 |       rl = veorq_u8(rl, vqtbl1q_u8(tbl_l[1], va.val[0]));
 86 |       rh = veorq_u8(rh, vqtbl1q_u8(tbl_h[1], va.val[0]));
 87 |       rl = veorq_u8(rl, vqtbl1q_u8(tbl_l[3], va.val[1]));
 88 |       rh = veorq_u8(rh, vqtbl1q_u8(tbl_h[3], va.val[1]));
 89 | 
 90 |       va = vtrnq_u8(rl, rh);
 91 |       r0 = vreinterpretq_u16_u8(va.val[0]);
 92 |       r1 = vreinterpretq_u16_u8(va.val[1]);
 93 | 
 94 |       if (xor) {
 95 |           va0 = vld1q_u16(dst);
 96 |           va1 = vld1q_u16(dst + 8);
 97 |           r0 = veorq_u16(r0, va0);
 98 |           r1 = veorq_u16(r1, va1);
 99 |       }
100 |       vst1q_u16(dst, r0);
101 |       vst1q_u16(dst + 8, r1);
102 | 
103 |       src += 16;
104 |       dst += 16;
105 |   }
106 | }
107 | 
108 | static
109 | inline
110 | void
111 | neon_w16_split_4_altmap_multiply_region(gf_t *gf, uint8_t *src,
112 |                                         uint8_t *dst, uint8_t *d_end,
113 |                                         uint8_t *tbl, gf_val_32_t val,
114 |                                         int xor)
115 | {
116 |   unsigned i;
117 |   uint8_t *high = tbl + 4 * 16;
118 |   uint8x16_t vh, vl, rh, rl;
119 |   uint8x16_t loset;
120 | 
121 |   uint8x16_t tbl_h[4], tbl_l[4];
122 |   for (i = 0; i < 4; i++) {
123 |       tbl_l[i] = vld1q_u8(tbl + i*16);
124 |       tbl_h[i] = vld1q_u8(high + i*16);
125 |   }
126 | 
127 |   loset = vdupq_n_u8(0xf);
128 | 
129 |   while (dst < d_end) {
130 |       vh = vld1q_u8(src);
131 |       vl = vld1q_u8(src + 16);
132 | 
133 |       rl = vqtbl1q_u8(tbl_l[0], vandq_u8(vl, loset));
134 |       rh = vqtbl1q_u8(tbl_h[0], vandq_u8(vl, loset));
135 |       rl = veorq_u8(rl, vqtbl1q_u8(tbl_l[2], vandq_u8(vh, loset)));
136 |       rh = veorq_u8(rh, vqtbl1q_u8(tbl_h[2], vandq_u8(vh, loset)));
137 | 
138 |       vl = vshrq_n_u8(vl, 4);
139 |       vh = vshrq_n_u8(vh, 4);
140 | 
141 |       rl = veorq_u8(rl, vqtbl1q_u8(tbl_l[1], vl));
142 |       rh = veorq_u8(rh, vqtbl1q_u8(tbl_h[1], vl));
143 |       rl = veorq_u8(rl, vqtbl1q_u8(tbl_l[3], vh));
144 |       rh = veorq_u8(rh, vqtbl1q_u8(tbl_h[3], vh));
145 | 
146 |       if (xor) {
147 |           vh = vld1q_u8(dst);
148 |           vl = vld1q_u8(dst + 16);
149 |           rh = veorq_u8(rh, vh);
150 |           rl = veorq_u8(rl, vl);
151 |       }
152 |       vst1q_u8(dst, rh);
153 |       vst1q_u8(dst + 16, rl);
154 | 
155 |       src += 32;
156 |       dst += 32;
157 |   }
158 | }
159 | 
160 | #else /* ARCH_AARCH64 */
161 | 
162 | static
163 | inline
164 | void
165 | neon_w16_split_4_multiply_region(gf_t *gf, uint16_t *src, uint16_t *dst,
166 |                                  uint16_t *d_end, uint8_t *tbl,
167 |                                  gf_val_32_t val, int xor)
168 | {
169 |   unsigned i;
170 |   uint8_t *high = tbl + 4 * 16;
171 |   uint16x8_t va, r;
172 |   uint8x8_t loset, vb, vc, rl, rh;
173 | 
174 |   uint8x8x2_t tbl_h[4], tbl_l[4];
175 |   for (i = 0; i < 4; i++) {
176 |       tbl_l[i].val[0] = vld1_u8(tbl + i*16);
177 |       tbl_l[i].val[1] = vld1_u8(tbl + i*16 + 8);
178 |       tbl_h[i].val[0] = vld1_u8(high + i*16);
179 |       tbl_h[i].val[1] = vld1_u8(high + i*16 + 8);
180 |   }
181 | 
182 |   loset = vdup_n_u8(0xf);
183 | 
184 |   while (dst < d_end) {
185 |       va = vld1q_u16(src);
186 | 
187 |       vb = vmovn_u16(va);
188 |       vc = vshrn_n_u16(va, 8);
189 | 
190 |       rl = vtbl2_u8(tbl_l[0], vand_u8(vb, loset));
191 |       rh = vtbl2_u8(tbl_h[0], vand_u8(vb, loset));
192 |       vb = vshr_n_u8(vb, 4);
193 |       rl = veor_u8(rl, vtbl2_u8(tbl_l[2], vand_u8(vc, loset)));
194 |       rh = veor_u8(rh, vtbl2_u8(tbl_h[2], vand_u8(vc, loset)));
195 |       vc = vshr_n_u8(vc, 4);
196 |       rl = veor_u8(rl, vtbl2_u8(tbl_l[1], vb));
197 |       rh = veor_u8(rh, vtbl2_u8(tbl_h[1], vb));
198 |       rl = veor_u8(rl, vtbl2_u8(tbl_l[3], vc));
199 |       rh = veor_u8(rh, vtbl2_u8(tbl_h[3], vc));
200 | 
201 |       r  = vmovl_u8(rl);
202 |       r  = vorrq_u16(r, vshll_n_u8(rh, 8));
203 | 
204 |       if (xor) {
205 |           va = vld1q_u16(dst);
206 |           r = veorq_u16(r, va);
207 |       }
208 |       vst1q_u16(dst, r);
209 | 
210 |       src += 8;
211 |       dst += 8;
212 |   }
213 | }
214 | 
215 | static
216 | inline
217 | void
218 | neon_w16_split_4_altmap_multiply_region(gf_t *gf, uint8_t *src,
219 |                                         uint8_t *dst, uint8_t *d_end,
220 |                                         uint8_t *tbl, gf_val_32_t val,
221 |                                         int xor)
222 | {
223 |   unsigned i;
224 |   uint8_t *high = tbl + 4 * 16;
225 |   uint8x8_t vh0, vh1, vl0, vl1, r0, r1, r2, r3;
226 |   uint8x8_t loset;
227 | 
228 |   uint8x8x2_t tbl_h[4], tbl_l[4];
229 |   for (i = 0; i < 4; i++) {
230 |       tbl_l[i].val[0] = vld1_u8(tbl + i*16);
231 |       tbl_l[i].val[1] = vld1_u8(tbl + i*16 + 8);
232 |       tbl_h[i].val[0] = vld1_u8(high + i*16);
233 |       tbl_h[i].val[1] = vld1_u8(high + i*16 + 8);
234 |   }
235 | 
236 |   loset = vdup_n_u8(0xf);
237 | 
238 |   while (dst < d_end) {
239 |       vh0 = vld1_u8(src);
240 |       vh1 = vld1_u8(src + 8);
241 |       vl0 = vld1_u8(src + 16);
242 |       vl1 = vld1_u8(src + 24);
243 | 
244 |       r0 = vtbl2_u8(tbl_l[0], vand_u8(vh0, loset));
245 |       r1 = vtbl2_u8(tbl_h[0], vand_u8(vh1, loset));
246 |       r2 = vtbl2_u8(tbl_l[2], vand_u8(vl0, loset));
247 |       r3 = vtbl2_u8(tbl_h[2], vand_u8(vl1, loset));
248 | 
249 |       vh0 = vshr_n_u8(vh0, 4);
250 |       vh1 = vshr_n_u8(vh1, 4);
251 |       vl0 = vshr_n_u8(vl0, 4);
252 |       vl1 = vshr_n_u8(vl1, 4);
253 | 
254 |       r0 = veor_u8(r0, vtbl2_u8(tbl_l[1], vh0));
255 |       r1 = veor_u8(r1, vtbl2_u8(tbl_h[1], vh1));
256 |       r2 = veor_u8(r2, vtbl2_u8(tbl_l[3], vl0));
257 |       r3 = veor_u8(r3, vtbl2_u8(tbl_h[3], vl1));
258 | 
259 |       if (xor) {
260 |           vh0 = vld1_u8(dst);
261 |           vh1 = vld1_u8(dst + 8);
262 |           vl0 = vld1_u8(dst + 16);
263 |           vl1 = vld1_u8(dst + 24);
264 |           r0  = veor_u8(r0, vh0);
265 |           r1  = veor_u8(r1, vh1);
266 |           r2  = veor_u8(r2, vl0);
267 |           r3  = veor_u8(r3, vl1);
268 |       }
269 |       vst1_u8(dst,      r0);
270 |       vst1_u8(dst +  8, r1);
271 |       vst1_u8(dst + 16, r2);
272 |       vst1_u8(dst + 24, r3);
273 | 
274 |       src += 32;
275 |       dst += 32;
276 |   }
277 | }
278 | #endif /* ARCH_AARCH64 */
279 | 
280 | static
281 | inline
282 | void
283 | neon_w16_split_4_16_lazy_multiply_region(gf_t *gf, void *src, void *dest,
284 |                                          gf_val_32_t val, int bytes, int xor,
285 |                                          int altmap)
286 | {
287 |   gf_region_data rd;
288 |   unsigned i, j;
289 |   uint64_t c, prod;
290 |   uint8_t tbl[2 * 4 * 16];
291 |   uint8_t *high = tbl + 4 * 16;
292 | 
293 |   if (val == 0) { gf_multby_zero(dest, bytes, xor); return; }
294 |   if (val == 1) { gf_multby_one(src, dest, bytes, xor); return; }
295 | 
296 |   for (i = 0; i < 4; i++) {
297 |     for (j = 0; j < 16; j++) {
298 |       c = (j << (i*4));
299 |       prod = gf->multiply.w32(gf, c, val);
300 |       tbl[i*16 + j]  = prod & 0xff;
301 |       high[i*16 + j] = prod >> 8;
302 |     }
303 |   }
304 | 
305 |   gf_set_region_data(&rd, gf, src, dest, bytes, val, xor, 32);
306 |   gf_do_initial_region_alignment(&rd);
307 | 
308 |   if (altmap) {
309 |     uint8_t *s8   = rd.s_start;
310 |     uint8_t *d8   = rd.d_start;
311 |     uint8_t *end8 = rd.d_top;
312 |     if (xor)
313 |       neon_w16_split_4_altmap_multiply_region(gf, s8, d8, end8, tbl, val, 1);
314 |     else
315 |       neon_w16_split_4_altmap_multiply_region(gf, s8, d8, end8, tbl, val, 0);
316 |   } else {
317 |     uint16_t *s16   = rd.s_start;
318 |     uint16_t *d16   = rd.d_start;
319 |     uint16_t *end16 = rd.d_top;
320 |     if (xor)
321 |       neon_w16_split_4_multiply_region(gf, s16, d16, end16, tbl, val, 1);
322 |     else
323 |       neon_w16_split_4_multiply_region(gf, s16, d16, end16, tbl, val, 0);
324 |   }
325 | 
326 |   gf_do_final_region_alignment(&rd);
327 | }
328 | 
329 | static
330 | void
331 | gf_w16_split_4_16_lazy_multiply_region_neon(gf_t *gf, void *src, void *dest,
332 |                                             gf_val_32_t val, int bytes, int xor)
333 | {
334 |   neon_w16_split_4_16_lazy_multiply_region(gf, src, dest, val, bytes, xor, 0);
335 | }
336 | 
337 | static
338 | void
339 | gf_w16_split_4_16_lazy_altmap_multiply_region_neon(gf_t *gf, void *src,
340 |                                                    void *dest,
341 |                                                    gf_val_32_t val, int bytes,
342 |                                                    int xor)
343 | {
344 |   neon_w16_split_4_16_lazy_multiply_region(gf, src, dest, val, bytes, xor, 1);
345 | }
346 | 
347 | 
348 | void gf_w16_neon_split_init(gf_t *gf)
349 | {
350 |   gf_internal_t *h = (gf_internal_t *) gf->scratch;
351 | 
352 |   if (h->region_type & GF_REGION_ALTMAP)
353 |     gf->multiply_region.w32 = gf_w16_split_4_16_lazy_altmap_multiply_region_neon;
354 |   else
355 |     gf->multiply_region.w32 = gf_w16_split_4_16_lazy_multiply_region_neon;
356 | }
357 | 


--------------------------------------------------------------------------------
/m4/ax_ext.m4:
--------------------------------------------------------------------------------
  1 | #
  2 | # Updated by KMG to support -DINTEL_SSE for GF-Complete
  3 | #
  4 | # ===========================================================================
  5 | #          http://www.gnu.org/software/autoconf-archive/ax_ext.html
  6 | # ===========================================================================
  7 | #
  8 | # SYNOPSIS
  9 | #
 10 | #   AX_EXT
 11 | #
 12 | # DESCRIPTION
 13 | #
 14 | #   Find supported SIMD extensions by requesting cpuid. When an SIMD
 15 | #   extension is found, the -m"simdextensionname" is added to SIMD_FLAGS if
 16 | #   compiler supports it. For example, if "sse2" is available, then "-msse2"
 17 | #   is added to SIMD_FLAGS.
 18 | #
 19 | #   This macro calls:
 20 | #
 21 | #     AC_SUBST(SIMD_FLAGS)
 22 | #
 23 | #   And defines:
 24 | #
 25 | #     HAVE_MMX / HAVE_SSE / HAVE_SSE2 / HAVE_SSE3 / HAVE_SSSE3 / HAVE_SSE4.1 / HAVE_SSE4.2 / HAVE_AVX
 26 | #
 27 | # LICENSE
 28 | #
 29 | #   Copyright (c) 2007 Christophe Tournayre <turn3r@users.sourceforge.net>
 30 | #   Copyright (c) 2013 Michael Petch <mpetch@capp-sysware.com>
 31 | #
 32 | #   Copying and distribution of this file, with or without modification, are
 33 | #   permitted in any medium without royalty provided the copyright notice
 34 | #   and this notice are preserved. This file is offered as-is, without any
 35 | #   warranty.
 36 | 
 37 | #serial 12
 38 | 
 39 | AC_DEFUN([AX_EXT],
 40 | [
 41 |   AC_REQUIRE([AC_CANONICAL_HOST])
 42 | 
 43 |   case $host_cpu in
 44 |     aarch64*)
 45 |       AC_DEFINE(HAVE_ARCH_AARCH64,,[targeting AArch64])
 46 |       SIMD_FLAGS="$SIMD_FLAGS -DARCH_AARCH64"
 47 | 
 48 |       AC_CACHE_CHECK([whether NEON is supported], [ax_cv_have_neon_ext],
 49 |           [
 50 |             # TODO: detect / cross-compile
 51 |             ax_cv_have_neon_ext=yes
 52 |           ])
 53 |       AC_CACHE_CHECK([whether cryptographic extension is supported], [ax_cv_have_arm_crypt_ext],
 54 |           [
 55 |             # TODO: detect / cross-compile
 56 |             ax_cv_have_arm_crypt_ext=yes
 57 |           ])
 58 | 
 59 |       if test "$ax_cv_have_arm_crypt_ext" = yes; then
 60 |         AC_DEFINE(HAVE_ARM_CRYPT_EXT,,[Support ARM cryptographic extension])
 61 |       fi
 62 | 
 63 |       if test "$ax_cv_have_neon_ext" = yes; then
 64 |         AC_DEFINE(HAVE_NEON,,[Support NEON instructions])
 65 |       fi
 66 | 
 67 |       if test "$ax_cv_have_arm_crypt_ext" = yes && test "$ax_cv_have_neon_ext" = yes; then
 68 |           AX_CHECK_COMPILE_FLAG(-march=armv8-a+simd+crypto,
 69 |                                 SIMD_FLAGS="$SIMD_FLAGS -march=armv8-a+simd+crypto -DARM_CRYPT -DARM_NEON", [])
 70 |       elif test "$ax_cv_have_arm_crypt_ext" = yes; then
 71 |           AX_CHECK_COMPILE_FLAG(-march=armv8-a+crypto,
 72 |                                 SIMD_FLAGS="$SIMD_FLAGS -march=armv8-a+crypto -DARM_CRYPT", [])
 73 |       elif test "$ax_cv_have_neon_ext" = yes; then
 74 |           AX_CHECK_COMPILE_FLAG(-march=armv8-a+simd,
 75 |                                 SIMD_FLAGS="$SIMD_FLAGS -march=armv8-a+simd -DARM_NEON", [])
 76 |       fi
 77 |     ;;
 78 | 
 79 |     arm*)
 80 |       AC_CACHE_CHECK([whether NEON is supported], [ax_cv_have_neon_ext],
 81 |           [
 82 |             # TODO: detect / cross-compile
 83 |             ax_cv_have_neon_ext=yes
 84 |           ])
 85 | 
 86 |       if test "$ax_cv_have_neon_ext" = yes; then
 87 |         AC_DEFINE(HAVE_NEON,,[Support NEON instructions])
 88 |         AX_CHECK_COMPILE_FLAG(-mfpu=neon,
 89 |                                 SIMD_FLAGS="$SIMD_FLAGS -mfpu=neon -DARM_NEON", [])
 90 |       fi
 91 |     ;;
 92 | 
 93 |     powerpc*)
 94 |       AC_CACHE_CHECK([whether altivec is supported], [ax_cv_have_altivec_ext],
 95 |           [
 96 |             if test `/usr/sbin/sysctl -a 2>/dev/null| grep -c hw.optional.altivec` != 0; then
 97 |                 if test `/usr/sbin/sysctl -n hw.optional.altivec` = 1; then
 98 |                   ax_cv_have_altivec_ext=yes
 99 |                 fi
100 |             fi
101 |           ])
102 | 
103 |           if test "$ax_cv_have_altivec_ext" = yes; then
104 |             AC_DEFINE(HAVE_ALTIVEC,,[Support Altivec instructions])
105 |             AX_CHECK_COMPILE_FLAG(-faltivec, SIMD_FLAGS="$SIMD_FLAGS -faltivec", [])
106 |           fi
107 |     ;;
108 | 
109 | 
110 |     i[[3456]]86*|x86_64*|amd64*)
111 | 
112 |       AC_REQUIRE([AX_GCC_X86_CPUID])
113 |       AC_REQUIRE([AX_GCC_X86_AVX_XGETBV])
114 | 
115 |       AX_GCC_X86_CPUID(0x00000001)
116 |       ecx=`echo $ax_cv_gcc_x86_cpuid_0x00000001 | cut -d ":" -f 3`
117 |       edx=`echo $ax_cv_gcc_x86_cpuid_0x00000001 | cut -d ":" -f 4`
118 | 
119 |       AC_CACHE_CHECK([whether mmx is supported], [ax_cv_have_mmx_ext],
120 |       [
121 |         ax_cv_have_mmx_ext=no
122 |         if test "$((0x$edx>>23&0x01))" = 1; then
123 |           ax_cv_have_mmx_ext=yes
124 |         fi
125 |       ])
126 | 
127 |       AC_CACHE_CHECK([whether sse is supported], [ax_cv_have_sse_ext],
128 |       [
129 |         ax_cv_have_sse_ext=no
130 |         if test "$((0x$edx>>25&0x01))" = 1; then
131 |           ax_cv_have_sse_ext=yes
132 |         fi
133 |       ])
134 | 
135 |       AC_CACHE_CHECK([whether sse2 is supported], [ax_cv_have_sse2_ext],
136 |       [
137 |         ax_cv_have_sse2_ext=no
138 |         if test "$((0x$edx>>26&0x01))" = 1; then
139 |           ax_cv_have_sse2_ext=yes
140 |         fi
141 |       ])
142 | 
143 |       AC_CACHE_CHECK([whether sse3 is supported], [ax_cv_have_sse3_ext],
144 |       [
145 |         ax_cv_have_sse3_ext=no
146 |         if test "$((0x$ecx&0x01))" = 1; then
147 |           ax_cv_have_sse3_ext=yes
148 |         fi
149 |       ])
150 |       
151 |       AC_CACHE_CHECK([whether pclmuldq is supported], [ax_cv_have_pclmuldq_ext],
152 |       [
153 |         ax_cv_have_pclmuldq_ext=no
154 |         if test "$((0x$ecx>>1&0x01))" = 1; then
155 |           ax_cv_have_pclmuldq_ext=yes
156 |         fi
157 |       ])
158 | 
159 |       AC_CACHE_CHECK([whether ssse3 is supported], [ax_cv_have_ssse3_ext],
160 |       [
161 |         ax_cv_have_ssse3_ext=no
162 |         if test "$((0x$ecx>>9&0x01))" = 1; then
163 |           ax_cv_have_ssse3_ext=yes
164 |         fi
165 |       ])
166 | 
167 |       AC_CACHE_CHECK([whether sse4.1 is supported], [ax_cv_have_sse41_ext],
168 |       [
169 |         ax_cv_have_sse41_ext=no
170 |         if test "$((0x$ecx>>19&0x01))" = 1; then
171 |           ax_cv_have_sse41_ext=yes
172 |         fi
173 |       ])
174 | 
175 |       AC_CACHE_CHECK([whether sse4.2 is supported], [ax_cv_have_sse42_ext],
176 |       [
177 |         ax_cv_have_sse42_ext=no
178 |         if test "$((0x$ecx>>20&0x01))" = 1; then
179 |           ax_cv_have_sse42_ext=yes
180 |         fi
181 |       ])
182 | 
183 |       AC_CACHE_CHECK([whether avx is supported by processor], [ax_cv_have_avx_cpu_ext],
184 |       [
185 |         ax_cv_have_avx_cpu_ext=no
186 |         if test "$((0x$ecx>>28&0x01))" = 1; then
187 |           ax_cv_have_avx_cpu_ext=yes
188 |         fi
189 |       ])
190 | 
191 |       if test x"$ax_cv_have_avx_cpu_ext" = x"yes"; then
192 |         AX_GCC_X86_AVX_XGETBV(0x00000000)
193 | 
194 |         xgetbv_eax="0"
195 |         if test x"$ax_cv_gcc_x86_avx_xgetbv_0x00000000" != x"unknown"; then
196 |           xgetbv_eax=`echo $ax_cv_gcc_x86_avx_xgetbv_0x00000000 | cut -d ":" -f 1`
197 |         fi
198 | 
199 |         AC_CACHE_CHECK([whether avx is supported by operating system], [ax_cv_have_avx_ext],
200 |         [
201 |           ax_cv_have_avx_ext=no
202 | 
203 |           if test "$((0x$ecx>>27&0x01))" = 1; then
204 |             if test "$((0x$xgetbv_eax&0x6))" = 6; then
205 |               ax_cv_have_avx_ext=yes
206 |             fi
207 |           fi
208 |         ])
209 |         if test x"$ax_cv_have_avx_ext" = x"no"; then
210 |           AC_MSG_WARN([Your processor supports AVX, but your operating system doesn't])
211 |         fi
212 |       fi
213 | 
214 |       if test "$ax_cv_have_mmx_ext" = yes; then
215 |         AX_CHECK_COMPILE_FLAG(-mmmx, ax_cv_support_mmx_ext=yes, [])
216 |         if test x"$ax_cv_support_mmx_ext" = x"yes"; then
217 |           SIMD_FLAGS="$SIMD_FLAGS -mmmx"
218 |           AC_DEFINE(HAVE_MMX,,[Support mmx instructions])
219 |         else
220 |           AC_MSG_WARN([Your processor supports mmx instructions but not your compiler, can you try another compiler?])
221 |         fi
222 |       fi
223 | 
224 |       if test "$ax_cv_have_sse_ext" = yes; then
225 |         AX_CHECK_COMPILE_FLAG(-msse, ax_cv_support_sse_ext=yes, [])
226 |         if test x"$ax_cv_support_sse_ext" = x"yes"; then
227 |           SIMD_FLAGS="$SIMD_FLAGS -msse -DINTEL_SSE"
228 |           AC_DEFINE(HAVE_SSE,,[Support SSE (Streaming SIMD Extensions) instructions])
229 |         else
230 |           AC_MSG_WARN([Your processor supports sse instructions but not your compiler, can you try another compiler?])
231 |         fi
232 |       fi
233 | 
234 |       if test "$ax_cv_have_sse2_ext" = yes; then
235 |         AX_CHECK_COMPILE_FLAG(-msse2, ax_cv_support_sse2_ext=yes, [])
236 |         if test x"$ax_cv_support_sse2_ext" = x"yes"; then
237 |           SIMD_FLAGS="$SIMD_FLAGS -msse2 -DINTEL_SSE2"
238 |           AC_DEFINE(HAVE_SSE2,,[Support SSE2 (Streaming SIMD Extensions 2) instructions])
239 |         else
240 |           AC_MSG_WARN([Your processor supports sse2 instructions but not your compiler, can you try another compiler?])
241 |         fi
242 |       fi
243 | 
244 |       if test "$ax_cv_have_sse3_ext" = yes; then
245 |         AX_CHECK_COMPILE_FLAG(-msse3, ax_cv_support_sse3_ext=yes, [])
246 |         if test x"$ax_cv_support_sse3_ext" = x"yes"; then
247 |           SIMD_FLAGS="$SIMD_FLAGS -msse3 -DINTEL_SSE3"
248 |           AC_DEFINE(HAVE_SSE3,,[Support SSE3 (Streaming SIMD Extensions 3) instructions])
249 |         else
250 |           AC_MSG_WARN([Your processor supports sse3 instructions but not your compiler, can you try another compiler?])
251 |         fi
252 |       fi
253 |       
254 |       if test "$ax_cv_have_pclmuldq_ext" = yes; then
255 |         AX_CHECK_COMPILE_FLAG(-mpclmul, ax_cv_support_pclmuldq_ext=yes, [])
256 |         if test x"$ax_cv_support_pclmuldq_ext" = x"yes"; then
257 |           SIMD_FLAGS="$SIMD_FLAGS -mpclmul -DINTEL_SSE4_PCLMUL"
258 |           AC_DEFINE(HAVE_PCLMULDQ,,[Support (PCLMULDQ) Carry-Free Muliplication])
259 |         else
260 |           AC_MSG_WARN([Your processor supports pclmuldq instructions but not your compiler, can you try another compiler?])
261 |         fi
262 |       fi
263 | 
264 |       if test "$ax_cv_have_ssse3_ext" = yes; then
265 |         AX_CHECK_COMPILE_FLAG(-mssse3, ax_cv_support_ssse3_ext=yes, [])
266 |         if test x"$ax_cv_support_ssse3_ext" = x"yes"; then
267 |           SIMD_FLAGS="$SIMD_FLAGS -mssse3 -DINTEL_SSSE3"
268 |           AC_DEFINE(HAVE_SSSE3,,[Support SSSE3 (Supplemental Streaming SIMD Extensions 3) instructions])
269 |         else
270 |           AC_MSG_WARN([Your processor supports ssse3 instructions but not your compiler, can you try another compiler?])
271 |         fi
272 |       fi
273 | 
274 |       if test "$ax_cv_have_sse41_ext" = yes; then
275 |         AX_CHECK_COMPILE_FLAG(-msse4.1, ax_cv_support_sse41_ext=yes, [])
276 |         if test x"$ax_cv_support_sse41_ext" = x"yes"; then
277 |           SIMD_FLAGS="$SIMD_FLAGS -msse4.1 -DINTEL_SSE4"
278 |           AC_DEFINE(HAVE_SSE4_1,,[Support SSSE4.1 (Streaming SIMD Extensions 4.1) instructions])
279 |         else
280 |           AC_MSG_WARN([Your processor supports sse4.1 instructions but not your compiler, can you try another compiler?])
281 |         fi
282 |       fi
283 | 
284 |       if test "$ax_cv_have_sse42_ext" = yes; then
285 |         AX_CHECK_COMPILE_FLAG(-msse4.2, ax_cv_support_sse42_ext=yes, [])
286 |         if test x"$ax_cv_support_sse42_ext" = x"yes"; then
287 |           SIMD_FLAGS="$SIMD_FLAGS -msse4.2 -DINTEL_SSE4"
288 |           AC_DEFINE(HAVE_SSE4_2,,[Support SSSE4.2 (Streaming SIMD Extensions 4.2) instructions])
289 |         else
290 |           AC_MSG_WARN([Your processor supports sse4.2 instructions but not your compiler, can you try another compiler?])
291 |         fi
292 |       fi
293 | 
294 |       if test "$ax_cv_have_avx_ext" = yes; then
295 |         AX_CHECK_COMPILE_FLAG(-mavx, ax_cv_support_avx_ext=yes, [])
296 |         if test x"$ax_cv_support_avx_ext" = x"yes"; then
297 |           SIMD_FLAGS="$SIMD_FLAGS -mavx"
298 |           AC_DEFINE(HAVE_AVX,,[Support AVX (Advanced Vector Extensions) instructions])
299 |         else
300 |           AC_MSG_WARN([Your processor supports avx instructions but not your compiler, can you try another compiler?])
301 |         fi
302 |       fi
303 | 
304 |   ;;
305 |   esac
306 | 
307 |   AC_SUBST(SIMD_FLAGS)
308 | ])
309 | 


--------------------------------------------------------------------------------