├── README.md ├── memcpy_ve16 ├── data.h ├── link.ld ├── Makefile ├── test.S ├── test.ld ├── util.h └── main.c ├── memcpy_ve32 ├── data.h ├── link.ld ├── Makefile ├── test.S ├── test.ld ├── util.h └── main.c ├── memcpy_ve64 ├── data.h ├── link.ld ├── Makefile ├── test.S ├── test.ld ├── util.h └── main.c ├── vadd_vv ├── link.ld ├── Makefile ├── test.ld ├── util.h └── test.S ├── vadd_vx ├── link.ld ├── Makefile ├── test.ld ├── util.h └── test.S ├── vand_vv ├── link.ld ├── Makefile ├── test.ld ├── util.h └── test.S ├── vlse8_v ├── link.ld ├── Makefile ├── test.S ├── test.ld ├── util.h └── main.c ├── vmax_vv ├── link.ld ├── Makefile ├── test.ld ├── util.h └── test.S ├── vmax_vx ├── link.ld ├── Makefile ├── test.ld ├── util.h └── test.S ├── vmin_vv ├── link.ld ├── Makefile ├── test.ld ├── util.h └── test.S ├── vmin_vx ├── link.ld ├── Makefile ├── test.ld ├── util.h └── test.S ├── vor_vv ├── link.ld ├── Makefile ├── test.ld ├── util.h └── test.S ├── vsub_vv ├── link.ld ├── Makefile ├── test.ld ├── util.h └── test.S ├── vsub_vx ├── link.ld ├── Makefile ├── test.ld ├── util.h └── test.S ├── vxor_vv ├── link.ld ├── Makefile ├── test.ld ├── util.h └── test.S ├── memcpy_ve8 ├── link.ld ├── Makefile ├── test.S ├── test.ld ├── util.h ├── main.c └── data.h ├── vlse16_v ├── link.ld ├── Makefile ├── test.S ├── test.ld ├── util.h ├── main.c └── data.h ├── vlse32_v ├── link.ld ├── Makefile ├── test.S ├── test.ld ├── util.h └── main.c ├── vlse64_v ├── link.ld ├── Makefile ├── test.S ├── test.ld ├── util.h └── main.c ├── vmaxu_vv ├── link.ld ├── Makefile ├── test.ld ├── util.h └── test.S ├── vmaxu_vx ├── link.ld ├── Makefile ├── test.ld ├── util.h └── test.S ├── vminu_vv ├── link.ld ├── Makefile ├── test.ld ├── util.h └── test.S ├── vminu_vx ├── link.ld ├── Makefile ├── test.ld ├── util.h └── test.S ├── vrsub_vx ├── link.ld ├── Makefile ├── test.ld ├── util.h └── test.S └── LICENSE /README.md: -------------------------------------------------------------------------------- 1 | # riscv-vector-tests 2 | -------------------------------------------------------------------------------- /memcpy_ve16/data.h: -------------------------------------------------------------------------------- 1 | ../memcpy_ve8/data.h -------------------------------------------------------------------------------- /memcpy_ve32/data.h: -------------------------------------------------------------------------------- 1 | ../memcpy_ve8/data.h -------------------------------------------------------------------------------- /memcpy_ve64/data.h: -------------------------------------------------------------------------------- 1 | ../memcpy_ve8/data.h -------------------------------------------------------------------------------- /vadd_vv/link.ld: -------------------------------------------------------------------------------- 1 | OUTPUT_ARCH( "riscv" ) 2 | ENTRY(_start) 3 | 4 | SECTIONS 5 | { 6 | . = 0x80000000; 7 | .text.init : { *(.text.init) } 8 | . = ALIGN(0x1000); 9 | .tohost : { *(.tohost) } 10 | . = ALIGN(0x1000); 11 | .text : { *(.text) } 12 | . = ALIGN(0x1000); 13 | .data : { *(.data) } 14 | .bss : { *(.bss) } 15 | _end = .; 16 | } 17 | 18 | -------------------------------------------------------------------------------- /vadd_vx/link.ld: -------------------------------------------------------------------------------- 1 | OUTPUT_ARCH( "riscv" ) 2 | ENTRY(_start) 3 | 4 | SECTIONS 5 | { 6 | . = 0x80000000; 7 | .text.init : { *(.text.init) } 8 | . = ALIGN(0x1000); 9 | .tohost : { *(.tohost) } 10 | . = ALIGN(0x1000); 11 | .text : { *(.text) } 12 | . = ALIGN(0x1000); 13 | .data : { *(.data) } 14 | .bss : { *(.bss) } 15 | _end = .; 16 | } 17 | 18 | -------------------------------------------------------------------------------- /vand_vv/link.ld: -------------------------------------------------------------------------------- 1 | OUTPUT_ARCH( "riscv" ) 2 | ENTRY(_start) 3 | 4 | SECTIONS 5 | { 6 | . = 0x80000000; 7 | .text.init : { *(.text.init) } 8 | . = ALIGN(0x1000); 9 | .tohost : { *(.tohost) } 10 | . = ALIGN(0x1000); 11 | .text : { *(.text) } 12 | . = ALIGN(0x1000); 13 | .data : { *(.data) } 14 | .bss : { *(.bss) } 15 | _end = .; 16 | } 17 | 18 | -------------------------------------------------------------------------------- /vlse8_v/link.ld: -------------------------------------------------------------------------------- 1 | OUTPUT_ARCH( "riscv" ) 2 | ENTRY(_start) 3 | 4 | SECTIONS 5 | { 6 | . = 0x80000000; 7 | .text.init : { *(.text.init) } 8 | . = ALIGN(0x1000); 9 | .tohost : { *(.tohost) } 10 | . = ALIGN(0x1000); 11 | .text : { *(.text) } 12 | . = ALIGN(0x1000); 13 | .data : { *(.data) } 14 | .bss : { *(.bss) } 15 | _end = .; 16 | } 17 | 18 | -------------------------------------------------------------------------------- /vmax_vv/link.ld: -------------------------------------------------------------------------------- 1 | OUTPUT_ARCH( "riscv" ) 2 | ENTRY(_start) 3 | 4 | SECTIONS 5 | { 6 | . = 0x80000000; 7 | .text.init : { *(.text.init) } 8 | . = ALIGN(0x1000); 9 | .tohost : { *(.tohost) } 10 | . = ALIGN(0x1000); 11 | .text : { *(.text) } 12 | . = ALIGN(0x1000); 13 | .data : { *(.data) } 14 | .bss : { *(.bss) } 15 | _end = .; 16 | } 17 | 18 | -------------------------------------------------------------------------------- /vmax_vx/link.ld: -------------------------------------------------------------------------------- 1 | OUTPUT_ARCH( "riscv" ) 2 | ENTRY(_start) 3 | 4 | SECTIONS 5 | { 6 | . = 0x80000000; 7 | .text.init : { *(.text.init) } 8 | . = ALIGN(0x1000); 9 | .tohost : { *(.tohost) } 10 | . = ALIGN(0x1000); 11 | .text : { *(.text) } 12 | . = ALIGN(0x1000); 13 | .data : { *(.data) } 14 | .bss : { *(.bss) } 15 | _end = .; 16 | } 17 | 18 | -------------------------------------------------------------------------------- /vmin_vv/link.ld: -------------------------------------------------------------------------------- 1 | OUTPUT_ARCH( "riscv" ) 2 | ENTRY(_start) 3 | 4 | SECTIONS 5 | { 6 | . = 0x80000000; 7 | .text.init : { *(.text.init) } 8 | . = ALIGN(0x1000); 9 | .tohost : { *(.tohost) } 10 | . = ALIGN(0x1000); 11 | .text : { *(.text) } 12 | . = ALIGN(0x1000); 13 | .data : { *(.data) } 14 | .bss : { *(.bss) } 15 | _end = .; 16 | } 17 | 18 | -------------------------------------------------------------------------------- /vmin_vx/link.ld: -------------------------------------------------------------------------------- 1 | OUTPUT_ARCH( "riscv" ) 2 | ENTRY(_start) 3 | 4 | SECTIONS 5 | { 6 | . = 0x80000000; 7 | .text.init : { *(.text.init) } 8 | . = ALIGN(0x1000); 9 | .tohost : { *(.tohost) } 10 | . = ALIGN(0x1000); 11 | .text : { *(.text) } 12 | . = ALIGN(0x1000); 13 | .data : { *(.data) } 14 | .bss : { *(.bss) } 15 | _end = .; 16 | } 17 | 18 | -------------------------------------------------------------------------------- /vor_vv/link.ld: -------------------------------------------------------------------------------- 1 | OUTPUT_ARCH( "riscv" ) 2 | ENTRY(_start) 3 | 4 | SECTIONS 5 | { 6 | . = 0x80000000; 7 | .text.init : { *(.text.init) } 8 | . = ALIGN(0x1000); 9 | .tohost : { *(.tohost) } 10 | . = ALIGN(0x1000); 11 | .text : { *(.text) } 12 | . = ALIGN(0x1000); 13 | .data : { *(.data) } 14 | .bss : { *(.bss) } 15 | _end = .; 16 | } 17 | 18 | -------------------------------------------------------------------------------- /vsub_vv/link.ld: -------------------------------------------------------------------------------- 1 | OUTPUT_ARCH( "riscv" ) 2 | ENTRY(_start) 3 | 4 | SECTIONS 5 | { 6 | . = 0x80000000; 7 | .text.init : { *(.text.init) } 8 | . = ALIGN(0x1000); 9 | .tohost : { *(.tohost) } 10 | . = ALIGN(0x1000); 11 | .text : { *(.text) } 12 | . = ALIGN(0x1000); 13 | .data : { *(.data) } 14 | .bss : { *(.bss) } 15 | _end = .; 16 | } 17 | 18 | -------------------------------------------------------------------------------- /vsub_vx/link.ld: -------------------------------------------------------------------------------- 1 | OUTPUT_ARCH( "riscv" ) 2 | ENTRY(_start) 3 | 4 | SECTIONS 5 | { 6 | . = 0x80000000; 7 | .text.init : { *(.text.init) } 8 | . = ALIGN(0x1000); 9 | .tohost : { *(.tohost) } 10 | . = ALIGN(0x1000); 11 | .text : { *(.text) } 12 | . = ALIGN(0x1000); 13 | .data : { *(.data) } 14 | .bss : { *(.bss) } 15 | _end = .; 16 | } 17 | 18 | -------------------------------------------------------------------------------- /vxor_vv/link.ld: -------------------------------------------------------------------------------- 1 | OUTPUT_ARCH( "riscv" ) 2 | ENTRY(_start) 3 | 4 | SECTIONS 5 | { 6 | . = 0x80000000; 7 | .text.init : { *(.text.init) } 8 | . = ALIGN(0x1000); 9 | .tohost : { *(.tohost) } 10 | . = ALIGN(0x1000); 11 | .text : { *(.text) } 12 | . = ALIGN(0x1000); 13 | .data : { *(.data) } 14 | .bss : { *(.bss) } 15 | _end = .; 16 | } 17 | 18 | -------------------------------------------------------------------------------- /memcpy_ve16/link.ld: -------------------------------------------------------------------------------- 1 | OUTPUT_ARCH( "riscv" ) 2 | ENTRY(_start) 3 | 4 | SECTIONS 5 | { 6 | . = 0x80000000; 7 | .text.init : { *(.text.init) } 8 | . = ALIGN(0x1000); 9 | .tohost : { *(.tohost) } 10 | . = ALIGN(0x1000); 11 | .text : { *(.text) } 12 | . = ALIGN(0x1000); 13 | .data : { *(.data) } 14 | .bss : { *(.bss) } 15 | _end = .; 16 | } 17 | 18 | -------------------------------------------------------------------------------- /memcpy_ve32/link.ld: -------------------------------------------------------------------------------- 1 | OUTPUT_ARCH( "riscv" ) 2 | ENTRY(_start) 3 | 4 | SECTIONS 5 | { 6 | . = 0x80000000; 7 | .text.init : { *(.text.init) } 8 | . = ALIGN(0x1000); 9 | .tohost : { *(.tohost) } 10 | . = ALIGN(0x1000); 11 | .text : { *(.text) } 12 | . = ALIGN(0x1000); 13 | .data : { *(.data) } 14 | .bss : { *(.bss) } 15 | _end = .; 16 | } 17 | 18 | -------------------------------------------------------------------------------- /memcpy_ve64/link.ld: -------------------------------------------------------------------------------- 1 | OUTPUT_ARCH( "riscv" ) 2 | ENTRY(_start) 3 | 4 | SECTIONS 5 | { 6 | . = 0x80000000; 7 | .text.init : { *(.text.init) } 8 | . = ALIGN(0x1000); 9 | .tohost : { *(.tohost) } 10 | . = ALIGN(0x1000); 11 | .text : { *(.text) } 12 | . = ALIGN(0x1000); 13 | .data : { *(.data) } 14 | .bss : { *(.bss) } 15 | _end = .; 16 | } 17 | 18 | -------------------------------------------------------------------------------- /memcpy_ve8/link.ld: -------------------------------------------------------------------------------- 1 | OUTPUT_ARCH( "riscv" ) 2 | ENTRY(_start) 3 | 4 | SECTIONS 5 | { 6 | . = 0x80000000; 7 | .text.init : { *(.text.init) } 8 | . = ALIGN(0x1000); 9 | .tohost : { *(.tohost) } 10 | . = ALIGN(0x1000); 11 | .text : { *(.text) } 12 | . = ALIGN(0x1000); 13 | .data : { *(.data) } 14 | .bss : { *(.bss) } 15 | _end = .; 16 | } 17 | 18 | -------------------------------------------------------------------------------- /vlse16_v/link.ld: -------------------------------------------------------------------------------- 1 | OUTPUT_ARCH( "riscv" ) 2 | ENTRY(_start) 3 | 4 | SECTIONS 5 | { 6 | . = 0x80000000; 7 | .text.init : { *(.text.init) } 8 | . = ALIGN(0x1000); 9 | .tohost : { *(.tohost) } 10 | . = ALIGN(0x1000); 11 | .text : { *(.text) } 12 | . = ALIGN(0x1000); 13 | .data : { *(.data) } 14 | .bss : { *(.bss) } 15 | _end = .; 16 | } 17 | 18 | -------------------------------------------------------------------------------- /vlse32_v/link.ld: -------------------------------------------------------------------------------- 1 | OUTPUT_ARCH( "riscv" ) 2 | ENTRY(_start) 3 | 4 | SECTIONS 5 | { 6 | . = 0x80000000; 7 | .text.init : { *(.text.init) } 8 | . = ALIGN(0x1000); 9 | .tohost : { *(.tohost) } 10 | . = ALIGN(0x1000); 11 | .text : { *(.text) } 12 | . = ALIGN(0x1000); 13 | .data : { *(.data) } 14 | .bss : { *(.bss) } 15 | _end = .; 16 | } 17 | 18 | -------------------------------------------------------------------------------- /vlse64_v/link.ld: -------------------------------------------------------------------------------- 1 | OUTPUT_ARCH( "riscv" ) 2 | ENTRY(_start) 3 | 4 | SECTIONS 5 | { 6 | . = 0x80000000; 7 | .text.init : { *(.text.init) } 8 | . = ALIGN(0x1000); 9 | .tohost : { *(.tohost) } 10 | . = ALIGN(0x1000); 11 | .text : { *(.text) } 12 | . = ALIGN(0x1000); 13 | .data : { *(.data) } 14 | .bss : { *(.bss) } 15 | _end = .; 16 | } 17 | 18 | -------------------------------------------------------------------------------- /vmaxu_vv/link.ld: -------------------------------------------------------------------------------- 1 | OUTPUT_ARCH( "riscv" ) 2 | ENTRY(_start) 3 | 4 | SECTIONS 5 | { 6 | . = 0x80000000; 7 | .text.init : { *(.text.init) } 8 | . = ALIGN(0x1000); 9 | .tohost : { *(.tohost) } 10 | . = ALIGN(0x1000); 11 | .text : { *(.text) } 12 | . = ALIGN(0x1000); 13 | .data : { *(.data) } 14 | .bss : { *(.bss) } 15 | _end = .; 16 | } 17 | 18 | -------------------------------------------------------------------------------- /vmaxu_vx/link.ld: -------------------------------------------------------------------------------- 1 | OUTPUT_ARCH( "riscv" ) 2 | ENTRY(_start) 3 | 4 | SECTIONS 5 | { 6 | . = 0x80000000; 7 | .text.init : { *(.text.init) } 8 | . = ALIGN(0x1000); 9 | .tohost : { *(.tohost) } 10 | . = ALIGN(0x1000); 11 | .text : { *(.text) } 12 | . = ALIGN(0x1000); 13 | .data : { *(.data) } 14 | .bss : { *(.bss) } 15 | _end = .; 16 | } 17 | 18 | -------------------------------------------------------------------------------- /vminu_vv/link.ld: -------------------------------------------------------------------------------- 1 | OUTPUT_ARCH( "riscv" ) 2 | ENTRY(_start) 3 | 4 | SECTIONS 5 | { 6 | . = 0x80000000; 7 | .text.init : { *(.text.init) } 8 | . = ALIGN(0x1000); 9 | .tohost : { *(.tohost) } 10 | . = ALIGN(0x1000); 11 | .text : { *(.text) } 12 | . = ALIGN(0x1000); 13 | .data : { *(.data) } 14 | .bss : { *(.bss) } 15 | _end = .; 16 | } 17 | 18 | -------------------------------------------------------------------------------- /vminu_vx/link.ld: -------------------------------------------------------------------------------- 1 | OUTPUT_ARCH( "riscv" ) 2 | ENTRY(_start) 3 | 4 | SECTIONS 5 | { 6 | . = 0x80000000; 7 | .text.init : { *(.text.init) } 8 | . = ALIGN(0x1000); 9 | .tohost : { *(.tohost) } 10 | . = ALIGN(0x1000); 11 | .text : { *(.text) } 12 | . = ALIGN(0x1000); 13 | .data : { *(.data) } 14 | .bss : { *(.bss) } 15 | _end = .; 16 | } 17 | 18 | -------------------------------------------------------------------------------- /vrsub_vx/link.ld: -------------------------------------------------------------------------------- 1 | OUTPUT_ARCH( "riscv" ) 2 | ENTRY(_start) 3 | 4 | SECTIONS 5 | { 6 | . = 0x80000000; 7 | .text.init : { *(.text.init) } 8 | . = ALIGN(0x1000); 9 | .tohost : { *(.tohost) } 10 | . = ALIGN(0x1000); 11 | .text : { *(.text) } 12 | . = ALIGN(0x1000); 13 | .data : { *(.data) } 14 | .bss : { *(.bss) } 15 | _end = .; 16 | } 17 | 18 | -------------------------------------------------------------------------------- /memcpy_ve8/Makefile: -------------------------------------------------------------------------------- 1 | all: test.elf 2 | 3 | CFLAGS = -march=rv64gv -DPREALLOCATE=1 -static -mcmodel=medany -std=gnu99 -O2 \ 4 | -ffast-math -fno-common -fno-builtin-printf -nostdlib -nostartfiles -lm -lgcc -T test.ld 5 | 6 | test.elf: crt.S syscalls.c main.c test.S 7 | PATH=${HOME}/riscv-rvv0.9/bin LD_LIBRARY_PATH=${HOME}/riscv-rvv0.9/lib riscv64-unknown-elf-gcc $(CFLAGS) $^ -o $@ 8 | PATH=${HOME}/riscv-rvv0.9/bin LD_LIBRARY_PATH=${HOME}/riscv-rvv0.9/lib riscv64-unknown-elf-objdump -D $@ > $@.dmp 9 | 10 | clean: 11 | $(RM) -rf test.elf.dmp test.elf 12 | -------------------------------------------------------------------------------- /vlse16_v/Makefile: -------------------------------------------------------------------------------- 1 | all: test.elf 2 | 3 | CFLAGS = -march=rv64gv -DPREALLOCATE=1 -static -mcmodel=medany -std=gnu99 -O2 \ 4 | -ffast-math -fno-common -fno-builtin-printf -nostdlib -nostartfiles -lm -lgcc -T test.ld 5 | 6 | test.elf: crt.S syscalls.c main.c test.S 7 | PATH=${HOME}/riscv-rvv0.9/bin LD_LIBRARY_PATH=${HOME}/riscv-rvv0.9/lib riscv64-unknown-elf-gcc $(CFLAGS) $^ -o $@ 8 | PATH=${HOME}/riscv-rvv0.9/bin LD_LIBRARY_PATH=${HOME}/riscv-rvv0.9/lib riscv64-unknown-elf-objdump -D $@ > $@.dmp 9 | 10 | clean: 11 | $(RM) -rf test.elf.dmp test.elf 12 | -------------------------------------------------------------------------------- /vlse32_v/Makefile: -------------------------------------------------------------------------------- 1 | all: test.elf 2 | 3 | CFLAGS = -march=rv64gv -DPREALLOCATE=1 -static -mcmodel=medany -std=gnu99 -O2 \ 4 | -ffast-math -fno-common -fno-builtin-printf -nostdlib -nostartfiles -lm -lgcc -T test.ld 5 | 6 | test.elf: crt.S syscalls.c main.c test.S 7 | PATH=${HOME}/riscv-rvv0.9/bin LD_LIBRARY_PATH=${HOME}/riscv-rvv0.9/lib riscv64-unknown-elf-gcc $(CFLAGS) $^ -o $@ 8 | PATH=${HOME}/riscv-rvv0.9/bin LD_LIBRARY_PATH=${HOME}/riscv-rvv0.9/lib riscv64-unknown-elf-objdump -D $@ > $@.dmp 9 | 10 | clean: 11 | $(RM) -rf test.elf.dmp test.elf 12 | -------------------------------------------------------------------------------- /vlse64_v/Makefile: -------------------------------------------------------------------------------- 1 | all: test.elf 2 | 3 | CFLAGS = -march=rv64gv -DPREALLOCATE=1 -static -mcmodel=medany -std=gnu99 -O2 \ 4 | -ffast-math -fno-common -fno-builtin-printf -nostdlib -nostartfiles -lm -lgcc -T test.ld 5 | 6 | test.elf: crt.S syscalls.c main.c test.S 7 | PATH=${HOME}/riscv-rvv0.9/bin LD_LIBRARY_PATH=${HOME}/riscv-rvv0.9/lib riscv64-unknown-elf-gcc $(CFLAGS) $^ -o $@ 8 | PATH=${HOME}/riscv-rvv0.9/bin LD_LIBRARY_PATH=${HOME}/riscv-rvv0.9/lib riscv64-unknown-elf-objdump -D $@ > $@.dmp 9 | 10 | clean: 11 | $(RM) -rf test.elf.dmp test.elf 12 | -------------------------------------------------------------------------------- /vlse8_v/Makefile: -------------------------------------------------------------------------------- 1 | all: test.elf 2 | 3 | CFLAGS = -march=rv64gv -DPREALLOCATE=1 -static -mcmodel=medany -std=gnu99 -O2 \ 4 | -ffast-math -fno-common -fno-builtin-printf -nostdlib -nostartfiles -lm -lgcc -T test.ld 5 | 6 | test.elf: crt.S syscalls.c main.c test.S 7 | PATH=${HOME}/riscv-rvv0.9/bin LD_LIBRARY_PATH=${HOME}/riscv-rvv0.9/lib riscv64-unknown-elf-gcc $(CFLAGS) $^ -o $@ 8 | PATH=${HOME}/riscv-rvv0.9/bin LD_LIBRARY_PATH=${HOME}/riscv-rvv0.9/lib riscv64-unknown-elf-objdump -D $@ > $@.dmp 9 | 10 | clean: 11 | $(RM) -rf test.elf.dmp test.elf 12 | -------------------------------------------------------------------------------- /memcpy_ve16/Makefile: -------------------------------------------------------------------------------- 1 | all: test.elf 2 | 3 | CFLAGS = -march=rv64gv -DPREALLOCATE=1 -static -mcmodel=medany -std=gnu99 -O2 \ 4 | -ffast-math -fno-common -fno-builtin-printf -nostdlib -nostartfiles -lm -lgcc -T test.ld 5 | 6 | test.elf: crt.S syscalls.c main.c test.S 7 | PATH=${HOME}/riscv-rvv0.9/bin LD_LIBRARY_PATH=${HOME}/riscv-rvv0.9/lib riscv64-unknown-elf-gcc $(CFLAGS) $^ -o $@ 8 | PATH=${HOME}/riscv-rvv0.9/bin LD_LIBRARY_PATH=${HOME}/riscv-rvv0.9/lib riscv64-unknown-elf-objdump -D $@ > $@.dmp 9 | 10 | clean: 11 | $(RM) -rf test.elf.dmp test.elf 12 | -------------------------------------------------------------------------------- /memcpy_ve32/Makefile: -------------------------------------------------------------------------------- 1 | all: test.elf 2 | 3 | CFLAGS = -march=rv64gv -DPREALLOCATE=1 -static -mcmodel=medany -std=gnu99 -O2 \ 4 | -ffast-math -fno-common -fno-builtin-printf -nostdlib -nostartfiles -lm -lgcc -T test.ld 5 | 6 | test.elf: crt.S syscalls.c main.c test.S 7 | PATH=${HOME}/riscv-rvv0.9/bin LD_LIBRARY_PATH=${HOME}/riscv-rvv0.9/lib riscv64-unknown-elf-gcc $(CFLAGS) $^ -o $@ 8 | PATH=${HOME}/riscv-rvv0.9/bin LD_LIBRARY_PATH=${HOME}/riscv-rvv0.9/lib riscv64-unknown-elf-objdump -D $@ > $@.dmp 9 | 10 | clean: 11 | $(RM) -rf test.elf.dmp test.elf 12 | -------------------------------------------------------------------------------- /memcpy_ve64/Makefile: -------------------------------------------------------------------------------- 1 | all: test.elf 2 | 3 | CFLAGS = -march=rv64gv -DPREALLOCATE=1 -static -mcmodel=medany -std=gnu99 -O2 \ 4 | -ffast-math -fno-common -fno-builtin-printf -nostdlib -nostartfiles -lm -lgcc -T test.ld 5 | 6 | test.elf: crt.S syscalls.c main.c test.S 7 | PATH=${HOME}/riscv-rvv0.9/bin LD_LIBRARY_PATH=${HOME}/riscv-rvv0.9/lib riscv64-unknown-elf-gcc $(CFLAGS) $^ -o $@ 8 | PATH=${HOME}/riscv-rvv0.9/bin LD_LIBRARY_PATH=${HOME}/riscv-rvv0.9/lib riscv64-unknown-elf-objdump -D $@ > $@.dmp 9 | 10 | clean: 11 | $(RM) -rf test.elf.dmp test.elf 12 | -------------------------------------------------------------------------------- /vor_vv/Makefile: -------------------------------------------------------------------------------- 1 | all: test.elf 2 | 3 | CFLAGS = -march=rv64gv -DPREALLOCATE=1 -static -mcmodel=medany -std=gnu99 -O2 \ 4 | -ffast-math -fno-common -fno-builtin-printf -nostdlib -nostartfiles -lm -lgcc -T test.ld 5 | 6 | test.elf: crt.S syscalls.c main.cpp test.S data.h 7 | PATH=${HOME}/riscv-rvv0.9/bin LD_LIBRARY_PATH=${HOME}/riscv-rvv0.9/lib riscv64-unknown-elf-gcc $(CFLAGS) $^ -o $@ 8 | PATH=${HOME}/riscv-rvv0.9/bin LD_LIBRARY_PATH=${HOME}/riscv-rvv0.9/lib riscv64-unknown-elf-objdump -D $@ > $@.dmp 9 | 10 | clean: 11 | $(RM) -rf test.elf.dmp test.elf 12 | -------------------------------------------------------------------------------- /vadd_vv/Makefile: -------------------------------------------------------------------------------- 1 | all: test.elf 2 | 3 | CFLAGS = -march=rv64gv -DPREALLOCATE=1 -static -mcmodel=medany -std=gnu99 -O2 \ 4 | -ffast-math -fno-common -fno-builtin-printf -nostdlib -nostartfiles -lm -lgcc -T test.ld 5 | 6 | test.elf: crt.S syscalls.c main.cpp test.S data.h 7 | PATH=${HOME}/riscv-rvv0.9/bin LD_LIBRARY_PATH=${HOME}/riscv-rvv0.9/lib riscv64-unknown-elf-gcc $(CFLAGS) $^ -o $@ 8 | PATH=${HOME}/riscv-rvv0.9/bin LD_LIBRARY_PATH=${HOME}/riscv-rvv0.9/lib riscv64-unknown-elf-objdump -D $@ > $@.dmp 9 | 10 | clean: 11 | $(RM) -rf test.elf.dmp test.elf 12 | -------------------------------------------------------------------------------- /vadd_vx/Makefile: -------------------------------------------------------------------------------- 1 | all: test.elf 2 | 3 | CFLAGS = -march=rv64gv -DPREALLOCATE=1 -static -mcmodel=medany -std=gnu99 -O2 \ 4 | -ffast-math -fno-common -fno-builtin-printf -nostdlib -nostartfiles -lm -lgcc -T test.ld 5 | 6 | test.elf: crt.S syscalls.c main.cpp test.S data.h 7 | PATH=${HOME}/riscv-rvv0.9/bin LD_LIBRARY_PATH=${HOME}/riscv-rvv0.9/lib riscv64-unknown-elf-gcc $(CFLAGS) $^ -o $@ 8 | PATH=${HOME}/riscv-rvv0.9/bin LD_LIBRARY_PATH=${HOME}/riscv-rvv0.9/lib riscv64-unknown-elf-objdump -D $@ > $@.dmp 9 | 10 | clean: 11 | $(RM) -rf test.elf.dmp test.elf 12 | -------------------------------------------------------------------------------- /vand_vv/Makefile: -------------------------------------------------------------------------------- 1 | all: test.elf 2 | 3 | CFLAGS = -march=rv64gv -DPREALLOCATE=1 -static -mcmodel=medany -std=gnu99 -O2 \ 4 | -ffast-math -fno-common -fno-builtin-printf -nostdlib -nostartfiles -lm -lgcc -T test.ld 5 | 6 | test.elf: crt.S syscalls.c main.cpp test.S data.h 7 | PATH=${HOME}/riscv-rvv0.9/bin LD_LIBRARY_PATH=${HOME}/riscv-rvv0.9/lib riscv64-unknown-elf-gcc $(CFLAGS) $^ -o $@ 8 | PATH=${HOME}/riscv-rvv0.9/bin LD_LIBRARY_PATH=${HOME}/riscv-rvv0.9/lib riscv64-unknown-elf-objdump -D $@ > $@.dmp 9 | 10 | clean: 11 | $(RM) -rf test.elf.dmp test.elf 12 | -------------------------------------------------------------------------------- /vmax_vv/Makefile: -------------------------------------------------------------------------------- 1 | all: test.elf 2 | 3 | CFLAGS = -march=rv64gv -DPREALLOCATE=1 -static -mcmodel=medany -std=gnu99 -O2 \ 4 | -ffast-math -fno-common -fno-builtin-printf -nostdlib -nostartfiles -lm -lgcc -T test.ld 5 | 6 | test.elf: crt.S syscalls.c main.cpp test.S data.h 7 | PATH=${HOME}/riscv-rvv0.9/bin LD_LIBRARY_PATH=${HOME}/riscv-rvv0.9/lib riscv64-unknown-elf-gcc $(CFLAGS) $^ -o $@ 8 | PATH=${HOME}/riscv-rvv0.9/bin LD_LIBRARY_PATH=${HOME}/riscv-rvv0.9/lib riscv64-unknown-elf-objdump -D $@ > $@.dmp 9 | 10 | clean: 11 | $(RM) -rf test.elf.dmp test.elf 12 | -------------------------------------------------------------------------------- /vmax_vx/Makefile: -------------------------------------------------------------------------------- 1 | all: test.elf 2 | 3 | CFLAGS = -march=rv64gv -DPREALLOCATE=1 -static -mcmodel=medany -std=gnu99 -O2 \ 4 | -ffast-math -fno-common -fno-builtin-printf -nostdlib -nostartfiles -lm -lgcc -T test.ld 5 | 6 | test.elf: crt.S syscalls.c main.cpp test.S data.h 7 | PATH=${HOME}/riscv-rvv0.9/bin LD_LIBRARY_PATH=${HOME}/riscv-rvv0.9/lib riscv64-unknown-elf-gcc $(CFLAGS) $^ -o $@ 8 | PATH=${HOME}/riscv-rvv0.9/bin LD_LIBRARY_PATH=${HOME}/riscv-rvv0.9/lib riscv64-unknown-elf-objdump -D $@ > $@.dmp 9 | 10 | clean: 11 | $(RM) -rf test.elf.dmp test.elf 12 | -------------------------------------------------------------------------------- /vmaxu_vv/Makefile: -------------------------------------------------------------------------------- 1 | all: test.elf 2 | 3 | CFLAGS = -march=rv64gv -DPREALLOCATE=1 -static -mcmodel=medany -std=gnu99 -O2 \ 4 | -ffast-math -fno-common -fno-builtin-printf -nostdlib -nostartfiles -lm -lgcc -T test.ld 5 | 6 | test.elf: crt.S syscalls.c main.cpp test.S data.h 7 | PATH=${HOME}/riscv-rvv0.9/bin LD_LIBRARY_PATH=${HOME}/riscv-rvv0.9/lib riscv64-unknown-elf-gcc $(CFLAGS) $^ -o $@ 8 | PATH=${HOME}/riscv-rvv0.9/bin LD_LIBRARY_PATH=${HOME}/riscv-rvv0.9/lib riscv64-unknown-elf-objdump -D $@ > $@.dmp 9 | 10 | clean: 11 | $(RM) -rf test.elf.dmp test.elf 12 | -------------------------------------------------------------------------------- /vmaxu_vx/Makefile: -------------------------------------------------------------------------------- 1 | all: test.elf 2 | 3 | CFLAGS = -march=rv64gv -DPREALLOCATE=1 -static -mcmodel=medany -std=gnu99 -O2 \ 4 | -ffast-math -fno-common -fno-builtin-printf -nostdlib -nostartfiles -lm -lgcc -T test.ld 5 | 6 | test.elf: crt.S syscalls.c main.cpp test.S data.h 7 | PATH=${HOME}/riscv-rvv0.9/bin LD_LIBRARY_PATH=${HOME}/riscv-rvv0.9/lib riscv64-unknown-elf-gcc $(CFLAGS) $^ -o $@ 8 | PATH=${HOME}/riscv-rvv0.9/bin LD_LIBRARY_PATH=${HOME}/riscv-rvv0.9/lib riscv64-unknown-elf-objdump -D $@ > $@.dmp 9 | 10 | clean: 11 | $(RM) -rf test.elf.dmp test.elf 12 | -------------------------------------------------------------------------------- /vmin_vv/Makefile: -------------------------------------------------------------------------------- 1 | all: test.elf 2 | 3 | CFLAGS = -march=rv64gv -DPREALLOCATE=1 -static -mcmodel=medany -std=gnu99 -O2 \ 4 | -ffast-math -fno-common -fno-builtin-printf -nostdlib -nostartfiles -lm -lgcc -T test.ld 5 | 6 | test.elf: crt.S syscalls.c main.cpp test.S data.h 7 | PATH=${HOME}/riscv-rvv0.9/bin LD_LIBRARY_PATH=${HOME}/riscv-rvv0.9/lib riscv64-unknown-elf-gcc $(CFLAGS) $^ -o $@ 8 | PATH=${HOME}/riscv-rvv0.9/bin LD_LIBRARY_PATH=${HOME}/riscv-rvv0.9/lib riscv64-unknown-elf-objdump -D $@ > $@.dmp 9 | 10 | clean: 11 | $(RM) -rf test.elf.dmp test.elf 12 | -------------------------------------------------------------------------------- /vmin_vx/Makefile: -------------------------------------------------------------------------------- 1 | all: test.elf 2 | 3 | CFLAGS = -march=rv64gv -DPREALLOCATE=1 -static -mcmodel=medany -std=gnu99 -O2 \ 4 | -ffast-math -fno-common -fno-builtin-printf -nostdlib -nostartfiles -lm -lgcc -T test.ld 5 | 6 | test.elf: crt.S syscalls.c main.cpp test.S data.h 7 | PATH=${HOME}/riscv-rvv0.9/bin LD_LIBRARY_PATH=${HOME}/riscv-rvv0.9/lib riscv64-unknown-elf-gcc $(CFLAGS) $^ -o $@ 8 | PATH=${HOME}/riscv-rvv0.9/bin LD_LIBRARY_PATH=${HOME}/riscv-rvv0.9/lib riscv64-unknown-elf-objdump -D $@ > $@.dmp 9 | 10 | clean: 11 | $(RM) -rf test.elf.dmp test.elf 12 | -------------------------------------------------------------------------------- /vminu_vv/Makefile: -------------------------------------------------------------------------------- 1 | all: test.elf 2 | 3 | CFLAGS = -march=rv64gv -DPREALLOCATE=1 -static -mcmodel=medany -std=gnu99 -O2 \ 4 | -ffast-math -fno-common -fno-builtin-printf -nostdlib -nostartfiles -lm -lgcc -T test.ld 5 | 6 | test.elf: crt.S syscalls.c main.cpp test.S data.h 7 | PATH=${HOME}/riscv-rvv0.9/bin LD_LIBRARY_PATH=${HOME}/riscv-rvv0.9/lib riscv64-unknown-elf-gcc $(CFLAGS) $^ -o $@ 8 | PATH=${HOME}/riscv-rvv0.9/bin LD_LIBRARY_PATH=${HOME}/riscv-rvv0.9/lib riscv64-unknown-elf-objdump -D $@ > $@.dmp 9 | 10 | clean: 11 | $(RM) -rf test.elf.dmp test.elf 12 | -------------------------------------------------------------------------------- /vminu_vx/Makefile: -------------------------------------------------------------------------------- 1 | all: test.elf 2 | 3 | CFLAGS = -march=rv64gv -DPREALLOCATE=1 -static -mcmodel=medany -std=gnu99 -O2 \ 4 | -ffast-math -fno-common -fno-builtin-printf -nostdlib -nostartfiles -lm -lgcc -T test.ld 5 | 6 | test.elf: crt.S syscalls.c main.cpp test.S data.h 7 | PATH=${HOME}/riscv-rvv0.9/bin LD_LIBRARY_PATH=${HOME}/riscv-rvv0.9/lib riscv64-unknown-elf-gcc $(CFLAGS) $^ -o $@ 8 | PATH=${HOME}/riscv-rvv0.9/bin LD_LIBRARY_PATH=${HOME}/riscv-rvv0.9/lib riscv64-unknown-elf-objdump -D $@ > $@.dmp 9 | 10 | clean: 11 | $(RM) -rf test.elf.dmp test.elf 12 | -------------------------------------------------------------------------------- /vrsub_vx/Makefile: -------------------------------------------------------------------------------- 1 | all: test.elf 2 | 3 | CFLAGS = -march=rv64gv -DPREALLOCATE=1 -static -mcmodel=medany -std=gnu99 -O2 \ 4 | -ffast-math -fno-common -fno-builtin-printf -nostdlib -nostartfiles -lm -lgcc -T test.ld 5 | 6 | test.elf: crt.S syscalls.c main.cpp test.S data.h 7 | PATH=${HOME}/riscv-rvv0.9/bin LD_LIBRARY_PATH=${HOME}/riscv-rvv0.9/lib riscv64-unknown-elf-gcc $(CFLAGS) $^ -o $@ 8 | PATH=${HOME}/riscv-rvv0.9/bin LD_LIBRARY_PATH=${HOME}/riscv-rvv0.9/lib riscv64-unknown-elf-objdump -D $@ > $@.dmp 9 | 10 | clean: 11 | $(RM) -rf test.elf.dmp test.elf 12 | -------------------------------------------------------------------------------- /vsub_vv/Makefile: -------------------------------------------------------------------------------- 1 | all: test.elf 2 | 3 | CFLAGS = -march=rv64gv -DPREALLOCATE=1 -static -mcmodel=medany -std=gnu99 -O2 \ 4 | -ffast-math -fno-common -fno-builtin-printf -nostdlib -nostartfiles -lm -lgcc -T test.ld 5 | 6 | test.elf: crt.S syscalls.c main.cpp test.S data.h 7 | PATH=${HOME}/riscv-rvv0.9/bin LD_LIBRARY_PATH=${HOME}/riscv-rvv0.9/lib riscv64-unknown-elf-gcc $(CFLAGS) $^ -o $@ 8 | PATH=${HOME}/riscv-rvv0.9/bin LD_LIBRARY_PATH=${HOME}/riscv-rvv0.9/lib riscv64-unknown-elf-objdump -D $@ > $@.dmp 9 | 10 | clean: 11 | $(RM) -rf test.elf.dmp test.elf 12 | -------------------------------------------------------------------------------- /vsub_vx/Makefile: -------------------------------------------------------------------------------- 1 | all: test.elf 2 | 3 | CFLAGS = -march=rv64gv -DPREALLOCATE=1 -static -mcmodel=medany -std=gnu99 -O2 \ 4 | -ffast-math -fno-common -fno-builtin-printf -nostdlib -nostartfiles -lm -lgcc -T test.ld 5 | 6 | test.elf: crt.S syscalls.c main.cpp test.S data.h 7 | PATH=${HOME}/riscv-rvv0.9/bin LD_LIBRARY_PATH=${HOME}/riscv-rvv0.9/lib riscv64-unknown-elf-gcc $(CFLAGS) $^ -o $@ 8 | PATH=${HOME}/riscv-rvv0.9/bin LD_LIBRARY_PATH=${HOME}/riscv-rvv0.9/lib riscv64-unknown-elf-objdump -D $@ > $@.dmp 9 | 10 | clean: 11 | $(RM) -rf test.elf.dmp test.elf 12 | -------------------------------------------------------------------------------- /vxor_vv/Makefile: -------------------------------------------------------------------------------- 1 | all: test.elf 2 | 3 | CFLAGS = -march=rv64gv -DPREALLOCATE=1 -static -mcmodel=medany -std=gnu99 -O2 \ 4 | -ffast-math -fno-common -fno-builtin-printf -nostdlib -nostartfiles -lm -lgcc -T test.ld 5 | 6 | test.elf: crt.S syscalls.c main.cpp test.S data.h 7 | PATH=${HOME}/riscv-rvv0.9/bin LD_LIBRARY_PATH=${HOME}/riscv-rvv0.9/lib riscv64-unknown-elf-gcc $(CFLAGS) $^ -o $@ 8 | PATH=${HOME}/riscv-rvv0.9/bin LD_LIBRARY_PATH=${HOME}/riscv-rvv0.9/lib riscv64-unknown-elf-objdump -D $@ > $@.dmp 9 | 10 | clean: 11 | $(RM) -rf test.elf.dmp test.elf 12 | -------------------------------------------------------------------------------- /vlse8_v/test.S: -------------------------------------------------------------------------------- 1 | .global copy_data_ustride_vec 2 | # void copy_data_ustride_vec(int8_t *dest_data, int8_t *source_data, int8_t stride, int data_num); 3 | # a0=dest, a1=src, a2=stride, a3=data_num 4 | # 5 | copy_data_ustride_vec: 6 | _loop: 7 | vsetvli t0, a3, e8,m1 # Vectors of 8b 8 | slli t2, t0, 0 # add step for 8-bit 9 | mul t1, t0, a2 # define step 10 | vlse8.v v1, (a1), a2 # Load bytes 11 | add a1, a1, t1 # Bump pointer 12 | sub a3, a3, t2 # Decrement count 13 | vse8.v v1, (a0) # Store bytes 14 | add a0, a0, t2 # Bump pointer 15 | bnez a3, _loop # Any more? 16 | ret # Return 17 | -------------------------------------------------------------------------------- /vlse16_v/test.S: -------------------------------------------------------------------------------- 1 | .global copy_data_ustride_vec 2 | # void copy_data_ustride_vec(int16_t *dest_data, int16_t *source_data, int16_t stride, int data_num); 3 | # a0=dest, a1=src, a2=stride, a3=data_num 4 | # 5 | copy_data_ustride_vec: 6 | _loop: 7 | vsetvli t0, a3, e16,m1 # Vectors of 8b 8 | slli t2, t0, 1 # add step for 8-bit 9 | mul t1, t0, a2 # define step 10 | vlse16.v v1, (a1), a2 # Load bytes 11 | add a1, a1, t1 # Bump pointer 12 | sub a3, a3, t0 # Decrement count 13 | vse16.v v1, (a0) # Store bytes 14 | add a0, a0, t2 # Bump pointer 15 | bnez a3, _loop # Any more? 16 | ret # Return 17 | -------------------------------------------------------------------------------- /vlse32_v/test.S: -------------------------------------------------------------------------------- 1 | .global copy_data_ustride_vec 2 | # void copy_data_ustride_vec(int32_t *dest_data, int32_t *source_data, int32_t stride, int data_num); 3 | # a0=dest, a1=src, a2=stride, a3=data_num 4 | # 5 | copy_data_ustride_vec: 6 | _loop: 7 | vsetvli t0, a3, e32,m1 # Vectors of 8b 8 | slli t2, t0, 2 # add step for 32-bit 9 | mul t1, t0, a2 # define step 10 | vlse32.v v1, (a1), a2 # Load bytes 11 | add a1, a1, t1 # Bump pointer 12 | sub a3, a3, t0 # Decrement count 13 | vse32.v v1, (a0) # Store bytes 14 | add a0, a0, t2 # Bump pointer 15 | bnez a3, _loop # Any more? 16 | ret # Return 17 | -------------------------------------------------------------------------------- /vlse64_v/test.S: -------------------------------------------------------------------------------- 1 | .global copy_data_ustride_vec 2 | # void copy_data_ustride_vec(int64_t *dest_data, int64_t *source_data, int64_t stride, int data_num); 3 | # a0=dest, a1=src, a2=stride, a3=data_num 4 | # 5 | copy_data_ustride_vec: 6 | _loop: 7 | vsetvli t0, a3, e64,m1 # Vectors of 8b 8 | slli t2, t0, 3 # add step for 8-bit 9 | mul t1, t0, a2 # define step 10 | vlse64.v v1, (a1), a2 # Load bytes 11 | add a1, a1, t1 # Bump pointer 12 | sub a3, a3, t0 # Decrement count 13 | vse64.v v1, (a0) # Store bytes 14 | add a0, a0, t2 # Bump pointer 15 | bnez a3, _loop # Any more? 16 | ret # Return 17 | -------------------------------------------------------------------------------- /memcpy_ve8/test.S: -------------------------------------------------------------------------------- 1 | .text 2 | .global copy_data_vec 3 | # void copy_data_vec(int8_t *dest_data, int8_t *source_data, int data_num); 4 | # a0=dest, a1=src, a2=n 5 | # 6 | copy_data_vec: 7 | mv a3, a0 # Copy destination 8 | .loop: 9 | vsetvli t0, a3, e8,m1 # Vectors of 8b 10 | vle8.v v0, (a1) # Load bytes 11 | add a1, a1, t0 # Bump pointer 12 | sub a2, a2, t0 # Decrement count 13 | vse8.v v0, (a3) # Store bytes 14 | add a3, a3, t0 # Bump pointer 15 | bnez a2, .loop # Any more? 16 | ret # Return 17 | 18 | 19 | .global copy_data_mask_vec 20 | # void copy_data_mask_vec(int8_t *dest_data, int8_t *source_data, int8_t *mask, int data_num); 21 | # a0=dest, a1=src, a2=n 22 | # 23 | copy_data_mask_vec: 24 | li t1, 8 # calculate element length of mask, VLEN=512 / ELEN=8 / 8-bit 25 | _loop: 26 | vsetvli t0, t1, e8,m1 # Vectors of 8b 27 | vle8.v v0, (a2) 28 | add a2, a2, t0 29 | 30 | vsetvli t0, a3, e8,m1 # Vectors of 8b 31 | vle8.v v1, (a1), v0.t # Load bytes 32 | add a1, a1, t0 # Bump pointer 33 | sub a3, a3, t0 # Decrement count 34 | vse8.v v1, (a0), v0.t # Store bytes 35 | add a0, a0, t0 # Bump pointer 36 | bnez a3, _loop # Any more? 37 | ret # Return 38 | -------------------------------------------------------------------------------- /memcpy_ve32/test.S: -------------------------------------------------------------------------------- 1 | .text 2 | .global copy_data_vec 3 | # void *memcpy(void* dest, const void* src, size_t n) 4 | # a0=dest, a1=src, a2=n 5 | # 6 | copy_data_vec: 7 | mv a3, a0 # Copy destination 8 | _loop: 9 | vsetvli t0, a3, e32,m1 # Vectors of 8b 10 | slli t1, t0, 2 # shift 2-bit 11 | vle32.v v0, (a1) # Load bytes 12 | add a1, a1, t1 # Bump pointer 13 | sub a2, a2, t0 # Decrement count 14 | vse32.v v0, (a3) # Store bytes 15 | add a3, a3, t1 # Bump pointer 16 | bnez a2, _loop # Any more? 17 | ret # Return 18 | 19 | 20 | .global copy_data_mask_vec 21 | # void copy_data_mask_vec(int16_t *dest_data, int16_t *source_data, int16_t *mask, int data_num); 22 | # a0=dest, a1=src, a2=n 23 | # 24 | copy_data_mask_vec: 25 | li t1, 2 # calculate element length of mask, VLEN=512 / ELEN=32 / 8-bit 26 | _loop_mask: 27 | vsetvli t0, t1, e8,m1 # Vectors of 8b 28 | vle8.v v0, (a2) 29 | add a2, a2, t0 30 | 31 | vsetvli t0, a3, e32,m1 # Vectors of 8b 32 | slli t2, t0, 2 33 | vle32.v v1, (a1), v0.t # Load bytes 34 | add a1, a1, t2 # Bump pointer 35 | sub a3, a3, t0 # Decrement count 36 | vse32.v v1, (a0), v0.t # Store bytes 37 | add a0, a0, t2 # Bump pointer 38 | bnez a3, _loop_mask # Any more? 39 | ret # Return 40 | -------------------------------------------------------------------------------- /memcpy_ve16/test.S: -------------------------------------------------------------------------------- 1 | .text 2 | .global copy_data_vec 3 | # void *memcpy(void* dest, const void* src, size_t n) 4 | # a0=dest, a1=src, a2=n 5 | # 6 | copy_data_vec: 7 | mv a3, a0 # Copy destination 8 | _loop: 9 | vsetvli t0, a3, e16,m1 # Vectors of 8b 10 | slli t1, t0, 1 # shift 1-bit 11 | vle16.v v0, (a1) # Load bytes 12 | add a1, a1, t1 # Bump pointer 13 | sub a2, a2, t0 # Decrement count 14 | vse16.v v0, (a3) # Store bytes 15 | add a3, a3, t1 # Bump pointer 16 | bnez a2, _loop # Any more? 17 | ret # Return 18 | 19 | 20 | .global copy_data_mask_vec 21 | # void copy_data_mask_vec(int16_t *dest_data, int16_t *source_data, int16_t *mask, int data_num); 22 | # a0=dest, a1=src, a2=n 23 | # 24 | copy_data_mask_vec: 25 | li t1, 4 # calculate element length of mask, VLEN=512 / ELEN=16 / 8-bit 26 | _loop_mask: 27 | vsetvli t0, t1, e8,m1 # Vectors of 8b 28 | vle8.v v0, (a2) 29 | add a2, a2, t0 30 | 31 | vsetvli t0, a3, e16,m1 # Vectors of 8b 32 | slli t2, t0, 1 33 | vle16.v v1, (a1), v0.t # Load bytes 34 | add a1, a1, t2 # Bump pointer 35 | sub a3, a3, t0 # Decrement count 36 | vse16.v v1, (a0), v0.t # Store bytes 37 | add a0, a0, t2 # Bump pointer 38 | bnez a3, _loop_mask # Any more? 39 | ret # Return 40 | -------------------------------------------------------------------------------- /memcpy_ve64/test.S: -------------------------------------------------------------------------------- 1 | .text 2 | .global copy_data_vec 3 | # void *memcpy(void* dest, const void* src, size_t n) 4 | # a0=dest, a1=src, a2=n 5 | # 6 | copy_data_vec: 7 | mv a3, a0 # Copy destination 8 | _loop: 9 | vsetvli t0, a3, e64,m1 # Vectors of 8b 10 | slli t1, t0, 3 # shift 3-bit 11 | vle64.v v0, (a1) # Load bytes 12 | add a1, a1, t1 # Bump pointer 13 | sub a2, a2, t0 # Decrement count 14 | vse64.v v0, (a3) # Store bytes 15 | add a3, a3, t1 # Bump pointer 16 | bnez a2, _loop # Any more? 17 | ret # Return 18 | 19 | 20 | .global copy_data_mask_vec 21 | # void copy_data_mask_vec(int64_t *dest_data, int64_t *source_data, int64_t *mask, int data_num); 22 | # a0=dest, a1=src, a2=n 23 | # 24 | copy_data_mask_vec: 25 | li t1, 1 # calculate element length of mask, VLEN=512 / ELEN=64 / 8-bit 26 | _loop_mask: 27 | vsetvli t0, t1, e8,m1 # Vectors of 8b 28 | vle8.v v0, (a2) 29 | add a2, a2, t0 30 | 31 | vsetvli t0, a3, e64,m1 # Vectors of 8b 32 | slli t2, t0, 3 33 | vle64.v v1, (a1), v0.t # Load bytes 34 | add a1, a1, t2 # Bump pointer 35 | sub a3, a3, t0 # Decrement count 36 | vse64.v v1, (a0), v0.t # Store bytes 37 | add a0, a0, t2 # Bump pointer 38 | bnez a3, _loop_mask # Any more? 39 | ret # Return 40 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | BSD 3-Clause License 2 | 3 | Copyright (c) 2020, msyksphinz 4 | All rights reserved. 5 | 6 | Redistribution and use in source and binary forms, with or without 7 | modification, are permitted provided that the following conditions are met: 8 | 9 | * Redistributions of source code must retain the above copyright notice, this 10 | list of conditions and the following disclaimer. 11 | 12 | * Redistributions in binary form must reproduce the above copyright notice, 13 | this list of conditions and the following disclaimer in the documentation 14 | and/or other materials provided with the distribution. 15 | 16 | * Neither the name of the copyright holder nor the names of its 17 | contributors may be used to endorse or promote products derived from 18 | this software without specific prior written permission. 19 | 20 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 21 | AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 22 | IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 23 | DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE 24 | FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 25 | DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR 26 | SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER 27 | CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 28 | OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 29 | OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 30 | -------------------------------------------------------------------------------- /vadd_vv/test.ld: -------------------------------------------------------------------------------- 1 | /*======================================================================*/ 2 | /* Proxy kernel linker script */ 3 | /*======================================================================*/ 4 | /* This is the linker script used when building the proxy kernel. */ 5 | 6 | /*----------------------------------------------------------------------*/ 7 | /* Setup */ 8 | /*----------------------------------------------------------------------*/ 9 | 10 | /* The OUTPUT_ARCH command specifies the machine architecture where the 11 | argument is one of the names used in the BFD library. More 12 | specifically one of the entires in bfd/cpu-mips.c */ 13 | 14 | OUTPUT_ARCH( "riscv" ) 15 | ENTRY(_start) 16 | 17 | /*----------------------------------------------------------------------*/ 18 | /* Sections */ 19 | /*----------------------------------------------------------------------*/ 20 | 21 | SECTIONS 22 | { 23 | 24 | /* text: test code section */ 25 | . = 0x80000000; 26 | .text.init : { *(.text.init) } 27 | 28 | . = ALIGN(0x1000); 29 | .tohost : { *(.tohost) } 30 | 31 | . = ALIGN(0x1000); 32 | .text : { *(.text) } 33 | 34 | /* data segment */ 35 | .data : { *(.data) } 36 | 37 | .sdata : { 38 | __global_pointer$ = . + 0x800; 39 | *(.srodata.cst16) *(.srodata.cst8) *(.srodata.cst4) *(.srodata.cst2) *(.srodata*) 40 | *(.sdata .sdata.* .gnu.linkonce.s.*) 41 | } 42 | 43 | /* bss segment */ 44 | .sbss : { 45 | *(.sbss .sbss.* .gnu.linkonce.sb.*) 46 | *(.scommon) 47 | } 48 | .bss : { *(.bss) } 49 | 50 | /* thread-local data segment */ 51 | .tdata : 52 | { 53 | _tdata_begin = .; 54 | *(.tdata) 55 | _tdata_end = .; 56 | } 57 | .tbss : 58 | { 59 | *(.tbss) 60 | _tbss_end = .; 61 | } 62 | 63 | /* End of uninitalized data segement */ 64 | _end = .; 65 | } 66 | 67 | -------------------------------------------------------------------------------- /vadd_vx/test.ld: -------------------------------------------------------------------------------- 1 | /*======================================================================*/ 2 | /* Proxy kernel linker script */ 3 | /*======================================================================*/ 4 | /* This is the linker script used when building the proxy kernel. */ 5 | 6 | /*----------------------------------------------------------------------*/ 7 | /* Setup */ 8 | /*----------------------------------------------------------------------*/ 9 | 10 | /* The OUTPUT_ARCH command specifies the machine architecture where the 11 | argument is one of the names used in the BFD library. More 12 | specifically one of the entires in bfd/cpu-mips.c */ 13 | 14 | OUTPUT_ARCH( "riscv" ) 15 | ENTRY(_start) 16 | 17 | /*----------------------------------------------------------------------*/ 18 | /* Sections */ 19 | /*----------------------------------------------------------------------*/ 20 | 21 | SECTIONS 22 | { 23 | 24 | /* text: test code section */ 25 | . = 0x80000000; 26 | .text.init : { *(.text.init) } 27 | 28 | . = ALIGN(0x1000); 29 | .tohost : { *(.tohost) } 30 | 31 | . = ALIGN(0x1000); 32 | .text : { *(.text) } 33 | 34 | /* data segment */ 35 | .data : { *(.data) } 36 | 37 | .sdata : { 38 | __global_pointer$ = . + 0x800; 39 | *(.srodata.cst16) *(.srodata.cst8) *(.srodata.cst4) *(.srodata.cst2) *(.srodata*) 40 | *(.sdata .sdata.* .gnu.linkonce.s.*) 41 | } 42 | 43 | /* bss segment */ 44 | .sbss : { 45 | *(.sbss .sbss.* .gnu.linkonce.sb.*) 46 | *(.scommon) 47 | } 48 | .bss : { *(.bss) } 49 | 50 | /* thread-local data segment */ 51 | .tdata : 52 | { 53 | _tdata_begin = .; 54 | *(.tdata) 55 | _tdata_end = .; 56 | } 57 | .tbss : 58 | { 59 | *(.tbss) 60 | _tbss_end = .; 61 | } 62 | 63 | /* End of uninitalized data segement */ 64 | _end = .; 65 | } 66 | 67 | -------------------------------------------------------------------------------- /vand_vv/test.ld: -------------------------------------------------------------------------------- 1 | /*======================================================================*/ 2 | /* Proxy kernel linker script */ 3 | /*======================================================================*/ 4 | /* This is the linker script used when building the proxy kernel. */ 5 | 6 | /*----------------------------------------------------------------------*/ 7 | /* Setup */ 8 | /*----------------------------------------------------------------------*/ 9 | 10 | /* The OUTPUT_ARCH command specifies the machine architecture where the 11 | argument is one of the names used in the BFD library. More 12 | specifically one of the entires in bfd/cpu-mips.c */ 13 | 14 | OUTPUT_ARCH( "riscv" ) 15 | ENTRY(_start) 16 | 17 | /*----------------------------------------------------------------------*/ 18 | /* Sections */ 19 | /*----------------------------------------------------------------------*/ 20 | 21 | SECTIONS 22 | { 23 | 24 | /* text: test code section */ 25 | . = 0x80000000; 26 | .text.init : { *(.text.init) } 27 | 28 | . = ALIGN(0x1000); 29 | .tohost : { *(.tohost) } 30 | 31 | . = ALIGN(0x1000); 32 | .text : { *(.text) } 33 | 34 | /* data segment */ 35 | .data : { *(.data) } 36 | 37 | .sdata : { 38 | __global_pointer$ = . + 0x800; 39 | *(.srodata.cst16) *(.srodata.cst8) *(.srodata.cst4) *(.srodata.cst2) *(.srodata*) 40 | *(.sdata .sdata.* .gnu.linkonce.s.*) 41 | } 42 | 43 | /* bss segment */ 44 | .sbss : { 45 | *(.sbss .sbss.* .gnu.linkonce.sb.*) 46 | *(.scommon) 47 | } 48 | .bss : { *(.bss) } 49 | 50 | /* thread-local data segment */ 51 | .tdata : 52 | { 53 | _tdata_begin = .; 54 | *(.tdata) 55 | _tdata_end = .; 56 | } 57 | .tbss : 58 | { 59 | *(.tbss) 60 | _tbss_end = .; 61 | } 62 | 63 | /* End of uninitalized data segement */ 64 | _end = .; 65 | } 66 | 67 | -------------------------------------------------------------------------------- /vlse8_v/test.ld: -------------------------------------------------------------------------------- 1 | /*======================================================================*/ 2 | /* Proxy kernel linker script */ 3 | /*======================================================================*/ 4 | /* This is the linker script used when building the proxy kernel. */ 5 | 6 | /*----------------------------------------------------------------------*/ 7 | /* Setup */ 8 | /*----------------------------------------------------------------------*/ 9 | 10 | /* The OUTPUT_ARCH command specifies the machine architecture where the 11 | argument is one of the names used in the BFD library. More 12 | specifically one of the entires in bfd/cpu-mips.c */ 13 | 14 | OUTPUT_ARCH( "riscv" ) 15 | ENTRY(_start) 16 | 17 | /*----------------------------------------------------------------------*/ 18 | /* Sections */ 19 | /*----------------------------------------------------------------------*/ 20 | 21 | SECTIONS 22 | { 23 | 24 | /* text: test code section */ 25 | . = 0x80000000; 26 | .text.init : { *(.text.init) } 27 | 28 | . = ALIGN(0x1000); 29 | .tohost : { *(.tohost) } 30 | 31 | . = ALIGN(0x1000); 32 | .text : { *(.text) } 33 | 34 | /* data segment */ 35 | .data : { *(.data) } 36 | 37 | .sdata : { 38 | __global_pointer$ = . + 0x800; 39 | *(.srodata.cst16) *(.srodata.cst8) *(.srodata.cst4) *(.srodata.cst2) *(.srodata*) 40 | *(.sdata .sdata.* .gnu.linkonce.s.*) 41 | } 42 | 43 | /* bss segment */ 44 | .sbss : { 45 | *(.sbss .sbss.* .gnu.linkonce.sb.*) 46 | *(.scommon) 47 | } 48 | .bss : { *(.bss) } 49 | 50 | /* thread-local data segment */ 51 | .tdata : 52 | { 53 | _tdata_begin = .; 54 | *(.tdata) 55 | _tdata_end = .; 56 | } 57 | .tbss : 58 | { 59 | *(.tbss) 60 | _tbss_end = .; 61 | } 62 | 63 | /* End of uninitalized data segement */ 64 | _end = .; 65 | } 66 | 67 | -------------------------------------------------------------------------------- /vmax_vv/test.ld: -------------------------------------------------------------------------------- 1 | /*======================================================================*/ 2 | /* Proxy kernel linker script */ 3 | /*======================================================================*/ 4 | /* This is the linker script used when building the proxy kernel. */ 5 | 6 | /*----------------------------------------------------------------------*/ 7 | /* Setup */ 8 | /*----------------------------------------------------------------------*/ 9 | 10 | /* The OUTPUT_ARCH command specifies the machine architecture where the 11 | argument is one of the names used in the BFD library. More 12 | specifically one of the entires in bfd/cpu-mips.c */ 13 | 14 | OUTPUT_ARCH( "riscv" ) 15 | ENTRY(_start) 16 | 17 | /*----------------------------------------------------------------------*/ 18 | /* Sections */ 19 | /*----------------------------------------------------------------------*/ 20 | 21 | SECTIONS 22 | { 23 | 24 | /* text: test code section */ 25 | . = 0x80000000; 26 | .text.init : { *(.text.init) } 27 | 28 | . = ALIGN(0x1000); 29 | .tohost : { *(.tohost) } 30 | 31 | . = ALIGN(0x1000); 32 | .text : { *(.text) } 33 | 34 | /* data segment */ 35 | .data : { *(.data) } 36 | 37 | .sdata : { 38 | __global_pointer$ = . + 0x800; 39 | *(.srodata.cst16) *(.srodata.cst8) *(.srodata.cst4) *(.srodata.cst2) *(.srodata*) 40 | *(.sdata .sdata.* .gnu.linkonce.s.*) 41 | } 42 | 43 | /* bss segment */ 44 | .sbss : { 45 | *(.sbss .sbss.* .gnu.linkonce.sb.*) 46 | *(.scommon) 47 | } 48 | .bss : { *(.bss) } 49 | 50 | /* thread-local data segment */ 51 | .tdata : 52 | { 53 | _tdata_begin = .; 54 | *(.tdata) 55 | _tdata_end = .; 56 | } 57 | .tbss : 58 | { 59 | *(.tbss) 60 | _tbss_end = .; 61 | } 62 | 63 | /* End of uninitalized data segement */ 64 | _end = .; 65 | } 66 | 67 | -------------------------------------------------------------------------------- /vmax_vx/test.ld: -------------------------------------------------------------------------------- 1 | /*======================================================================*/ 2 | /* Proxy kernel linker script */ 3 | /*======================================================================*/ 4 | /* This is the linker script used when building the proxy kernel. */ 5 | 6 | /*----------------------------------------------------------------------*/ 7 | /* Setup */ 8 | /*----------------------------------------------------------------------*/ 9 | 10 | /* The OUTPUT_ARCH command specifies the machine architecture where the 11 | argument is one of the names used in the BFD library. More 12 | specifically one of the entires in bfd/cpu-mips.c */ 13 | 14 | OUTPUT_ARCH( "riscv" ) 15 | ENTRY(_start) 16 | 17 | /*----------------------------------------------------------------------*/ 18 | /* Sections */ 19 | /*----------------------------------------------------------------------*/ 20 | 21 | SECTIONS 22 | { 23 | 24 | /* text: test code section */ 25 | . = 0x80000000; 26 | .text.init : { *(.text.init) } 27 | 28 | . = ALIGN(0x1000); 29 | .tohost : { *(.tohost) } 30 | 31 | . = ALIGN(0x1000); 32 | .text : { *(.text) } 33 | 34 | /* data segment */ 35 | .data : { *(.data) } 36 | 37 | .sdata : { 38 | __global_pointer$ = . + 0x800; 39 | *(.srodata.cst16) *(.srodata.cst8) *(.srodata.cst4) *(.srodata.cst2) *(.srodata*) 40 | *(.sdata .sdata.* .gnu.linkonce.s.*) 41 | } 42 | 43 | /* bss segment */ 44 | .sbss : { 45 | *(.sbss .sbss.* .gnu.linkonce.sb.*) 46 | *(.scommon) 47 | } 48 | .bss : { *(.bss) } 49 | 50 | /* thread-local data segment */ 51 | .tdata : 52 | { 53 | _tdata_begin = .; 54 | *(.tdata) 55 | _tdata_end = .; 56 | } 57 | .tbss : 58 | { 59 | *(.tbss) 60 | _tbss_end = .; 61 | } 62 | 63 | /* End of uninitalized data segement */ 64 | _end = .; 65 | } 66 | 67 | -------------------------------------------------------------------------------- /vmin_vv/test.ld: -------------------------------------------------------------------------------- 1 | /*======================================================================*/ 2 | /* Proxy kernel linker script */ 3 | /*======================================================================*/ 4 | /* This is the linker script used when building the proxy kernel. */ 5 | 6 | /*----------------------------------------------------------------------*/ 7 | /* Setup */ 8 | /*----------------------------------------------------------------------*/ 9 | 10 | /* The OUTPUT_ARCH command specifies the machine architecture where the 11 | argument is one of the names used in the BFD library. More 12 | specifically one of the entires in bfd/cpu-mips.c */ 13 | 14 | OUTPUT_ARCH( "riscv" ) 15 | ENTRY(_start) 16 | 17 | /*----------------------------------------------------------------------*/ 18 | /* Sections */ 19 | /*----------------------------------------------------------------------*/ 20 | 21 | SECTIONS 22 | { 23 | 24 | /* text: test code section */ 25 | . = 0x80000000; 26 | .text.init : { *(.text.init) } 27 | 28 | . = ALIGN(0x1000); 29 | .tohost : { *(.tohost) } 30 | 31 | . = ALIGN(0x1000); 32 | .text : { *(.text) } 33 | 34 | /* data segment */ 35 | .data : { *(.data) } 36 | 37 | .sdata : { 38 | __global_pointer$ = . + 0x800; 39 | *(.srodata.cst16) *(.srodata.cst8) *(.srodata.cst4) *(.srodata.cst2) *(.srodata*) 40 | *(.sdata .sdata.* .gnu.linkonce.s.*) 41 | } 42 | 43 | /* bss segment */ 44 | .sbss : { 45 | *(.sbss .sbss.* .gnu.linkonce.sb.*) 46 | *(.scommon) 47 | } 48 | .bss : { *(.bss) } 49 | 50 | /* thread-local data segment */ 51 | .tdata : 52 | { 53 | _tdata_begin = .; 54 | *(.tdata) 55 | _tdata_end = .; 56 | } 57 | .tbss : 58 | { 59 | *(.tbss) 60 | _tbss_end = .; 61 | } 62 | 63 | /* End of uninitalized data segement */ 64 | _end = .; 65 | } 66 | 67 | -------------------------------------------------------------------------------- /vmin_vx/test.ld: -------------------------------------------------------------------------------- 1 | /*======================================================================*/ 2 | /* Proxy kernel linker script */ 3 | /*======================================================================*/ 4 | /* This is the linker script used when building the proxy kernel. */ 5 | 6 | /*----------------------------------------------------------------------*/ 7 | /* Setup */ 8 | /*----------------------------------------------------------------------*/ 9 | 10 | /* The OUTPUT_ARCH command specifies the machine architecture where the 11 | argument is one of the names used in the BFD library. More 12 | specifically one of the entires in bfd/cpu-mips.c */ 13 | 14 | OUTPUT_ARCH( "riscv" ) 15 | ENTRY(_start) 16 | 17 | /*----------------------------------------------------------------------*/ 18 | /* Sections */ 19 | /*----------------------------------------------------------------------*/ 20 | 21 | SECTIONS 22 | { 23 | 24 | /* text: test code section */ 25 | . = 0x80000000; 26 | .text.init : { *(.text.init) } 27 | 28 | . = ALIGN(0x1000); 29 | .tohost : { *(.tohost) } 30 | 31 | . = ALIGN(0x1000); 32 | .text : { *(.text) } 33 | 34 | /* data segment */ 35 | .data : { *(.data) } 36 | 37 | .sdata : { 38 | __global_pointer$ = . + 0x800; 39 | *(.srodata.cst16) *(.srodata.cst8) *(.srodata.cst4) *(.srodata.cst2) *(.srodata*) 40 | *(.sdata .sdata.* .gnu.linkonce.s.*) 41 | } 42 | 43 | /* bss segment */ 44 | .sbss : { 45 | *(.sbss .sbss.* .gnu.linkonce.sb.*) 46 | *(.scommon) 47 | } 48 | .bss : { *(.bss) } 49 | 50 | /* thread-local data segment */ 51 | .tdata : 52 | { 53 | _tdata_begin = .; 54 | *(.tdata) 55 | _tdata_end = .; 56 | } 57 | .tbss : 58 | { 59 | *(.tbss) 60 | _tbss_end = .; 61 | } 62 | 63 | /* End of uninitalized data segement */ 64 | _end = .; 65 | } 66 | 67 | -------------------------------------------------------------------------------- /vor_vv/test.ld: -------------------------------------------------------------------------------- 1 | /*======================================================================*/ 2 | /* Proxy kernel linker script */ 3 | /*======================================================================*/ 4 | /* This is the linker script used when building the proxy kernel. */ 5 | 6 | /*----------------------------------------------------------------------*/ 7 | /* Setup */ 8 | /*----------------------------------------------------------------------*/ 9 | 10 | /* The OUTPUT_ARCH command specifies the machine architecture where the 11 | argument is one of the names used in the BFD library. More 12 | specifically one of the entires in bfd/cpu-mips.c */ 13 | 14 | OUTPUT_ARCH( "riscv" ) 15 | ENTRY(_start) 16 | 17 | /*----------------------------------------------------------------------*/ 18 | /* Sections */ 19 | /*----------------------------------------------------------------------*/ 20 | 21 | SECTIONS 22 | { 23 | 24 | /* text: test code section */ 25 | . = 0x80000000; 26 | .text.init : { *(.text.init) } 27 | 28 | . = ALIGN(0x1000); 29 | .tohost : { *(.tohost) } 30 | 31 | . = ALIGN(0x1000); 32 | .text : { *(.text) } 33 | 34 | /* data segment */ 35 | .data : { *(.data) } 36 | 37 | .sdata : { 38 | __global_pointer$ = . + 0x800; 39 | *(.srodata.cst16) *(.srodata.cst8) *(.srodata.cst4) *(.srodata.cst2) *(.srodata*) 40 | *(.sdata .sdata.* .gnu.linkonce.s.*) 41 | } 42 | 43 | /* bss segment */ 44 | .sbss : { 45 | *(.sbss .sbss.* .gnu.linkonce.sb.*) 46 | *(.scommon) 47 | } 48 | .bss : { *(.bss) } 49 | 50 | /* thread-local data segment */ 51 | .tdata : 52 | { 53 | _tdata_begin = .; 54 | *(.tdata) 55 | _tdata_end = .; 56 | } 57 | .tbss : 58 | { 59 | *(.tbss) 60 | _tbss_end = .; 61 | } 62 | 63 | /* End of uninitalized data segement */ 64 | _end = .; 65 | } 66 | 67 | -------------------------------------------------------------------------------- /vsub_vv/test.ld: -------------------------------------------------------------------------------- 1 | /*======================================================================*/ 2 | /* Proxy kernel linker script */ 3 | /*======================================================================*/ 4 | /* This is the linker script used when building the proxy kernel. */ 5 | 6 | /*----------------------------------------------------------------------*/ 7 | /* Setup */ 8 | /*----------------------------------------------------------------------*/ 9 | 10 | /* The OUTPUT_ARCH command specifies the machine architecture where the 11 | argument is one of the names used in the BFD library. More 12 | specifically one of the entires in bfd/cpu-mips.c */ 13 | 14 | OUTPUT_ARCH( "riscv" ) 15 | ENTRY(_start) 16 | 17 | /*----------------------------------------------------------------------*/ 18 | /* Sections */ 19 | /*----------------------------------------------------------------------*/ 20 | 21 | SECTIONS 22 | { 23 | 24 | /* text: test code section */ 25 | . = 0x80000000; 26 | .text.init : { *(.text.init) } 27 | 28 | . = ALIGN(0x1000); 29 | .tohost : { *(.tohost) } 30 | 31 | . = ALIGN(0x1000); 32 | .text : { *(.text) } 33 | 34 | /* data segment */ 35 | .data : { *(.data) } 36 | 37 | .sdata : { 38 | __global_pointer$ = . + 0x800; 39 | *(.srodata.cst16) *(.srodata.cst8) *(.srodata.cst4) *(.srodata.cst2) *(.srodata*) 40 | *(.sdata .sdata.* .gnu.linkonce.s.*) 41 | } 42 | 43 | /* bss segment */ 44 | .sbss : { 45 | *(.sbss .sbss.* .gnu.linkonce.sb.*) 46 | *(.scommon) 47 | } 48 | .bss : { *(.bss) } 49 | 50 | /* thread-local data segment */ 51 | .tdata : 52 | { 53 | _tdata_begin = .; 54 | *(.tdata) 55 | _tdata_end = .; 56 | } 57 | .tbss : 58 | { 59 | *(.tbss) 60 | _tbss_end = .; 61 | } 62 | 63 | /* End of uninitalized data segement */ 64 | _end = .; 65 | } 66 | 67 | -------------------------------------------------------------------------------- /vsub_vx/test.ld: -------------------------------------------------------------------------------- 1 | /*======================================================================*/ 2 | /* Proxy kernel linker script */ 3 | /*======================================================================*/ 4 | /* This is the linker script used when building the proxy kernel. */ 5 | 6 | /*----------------------------------------------------------------------*/ 7 | /* Setup */ 8 | /*----------------------------------------------------------------------*/ 9 | 10 | /* The OUTPUT_ARCH command specifies the machine architecture where the 11 | argument is one of the names used in the BFD library. More 12 | specifically one of the entires in bfd/cpu-mips.c */ 13 | 14 | OUTPUT_ARCH( "riscv" ) 15 | ENTRY(_start) 16 | 17 | /*----------------------------------------------------------------------*/ 18 | /* Sections */ 19 | /*----------------------------------------------------------------------*/ 20 | 21 | SECTIONS 22 | { 23 | 24 | /* text: test code section */ 25 | . = 0x80000000; 26 | .text.init : { *(.text.init) } 27 | 28 | . = ALIGN(0x1000); 29 | .tohost : { *(.tohost) } 30 | 31 | . = ALIGN(0x1000); 32 | .text : { *(.text) } 33 | 34 | /* data segment */ 35 | .data : { *(.data) } 36 | 37 | .sdata : { 38 | __global_pointer$ = . + 0x800; 39 | *(.srodata.cst16) *(.srodata.cst8) *(.srodata.cst4) *(.srodata.cst2) *(.srodata*) 40 | *(.sdata .sdata.* .gnu.linkonce.s.*) 41 | } 42 | 43 | /* bss segment */ 44 | .sbss : { 45 | *(.sbss .sbss.* .gnu.linkonce.sb.*) 46 | *(.scommon) 47 | } 48 | .bss : { *(.bss) } 49 | 50 | /* thread-local data segment */ 51 | .tdata : 52 | { 53 | _tdata_begin = .; 54 | *(.tdata) 55 | _tdata_end = .; 56 | } 57 | .tbss : 58 | { 59 | *(.tbss) 60 | _tbss_end = .; 61 | } 62 | 63 | /* End of uninitalized data segement */ 64 | _end = .; 65 | } 66 | 67 | -------------------------------------------------------------------------------- /vxor_vv/test.ld: -------------------------------------------------------------------------------- 1 | /*======================================================================*/ 2 | /* Proxy kernel linker script */ 3 | /*======================================================================*/ 4 | /* This is the linker script used when building the proxy kernel. */ 5 | 6 | /*----------------------------------------------------------------------*/ 7 | /* Setup */ 8 | /*----------------------------------------------------------------------*/ 9 | 10 | /* The OUTPUT_ARCH command specifies the machine architecture where the 11 | argument is one of the names used in the BFD library. More 12 | specifically one of the entires in bfd/cpu-mips.c */ 13 | 14 | OUTPUT_ARCH( "riscv" ) 15 | ENTRY(_start) 16 | 17 | /*----------------------------------------------------------------------*/ 18 | /* Sections */ 19 | /*----------------------------------------------------------------------*/ 20 | 21 | SECTIONS 22 | { 23 | 24 | /* text: test code section */ 25 | . = 0x80000000; 26 | .text.init : { *(.text.init) } 27 | 28 | . = ALIGN(0x1000); 29 | .tohost : { *(.tohost) } 30 | 31 | . = ALIGN(0x1000); 32 | .text : { *(.text) } 33 | 34 | /* data segment */ 35 | .data : { *(.data) } 36 | 37 | .sdata : { 38 | __global_pointer$ = . + 0x800; 39 | *(.srodata.cst16) *(.srodata.cst8) *(.srodata.cst4) *(.srodata.cst2) *(.srodata*) 40 | *(.sdata .sdata.* .gnu.linkonce.s.*) 41 | } 42 | 43 | /* bss segment */ 44 | .sbss : { 45 | *(.sbss .sbss.* .gnu.linkonce.sb.*) 46 | *(.scommon) 47 | } 48 | .bss : { *(.bss) } 49 | 50 | /* thread-local data segment */ 51 | .tdata : 52 | { 53 | _tdata_begin = .; 54 | *(.tdata) 55 | _tdata_end = .; 56 | } 57 | .tbss : 58 | { 59 | *(.tbss) 60 | _tbss_end = .; 61 | } 62 | 63 | /* End of uninitalized data segement */ 64 | _end = .; 65 | } 66 | 67 | -------------------------------------------------------------------------------- /memcpy_ve16/test.ld: -------------------------------------------------------------------------------- 1 | /*======================================================================*/ 2 | /* Proxy kernel linker script */ 3 | /*======================================================================*/ 4 | /* This is the linker script used when building the proxy kernel. */ 5 | 6 | /*----------------------------------------------------------------------*/ 7 | /* Setup */ 8 | /*----------------------------------------------------------------------*/ 9 | 10 | /* The OUTPUT_ARCH command specifies the machine architecture where the 11 | argument is one of the names used in the BFD library. More 12 | specifically one of the entires in bfd/cpu-mips.c */ 13 | 14 | OUTPUT_ARCH( "riscv" ) 15 | ENTRY(_start) 16 | 17 | /*----------------------------------------------------------------------*/ 18 | /* Sections */ 19 | /*----------------------------------------------------------------------*/ 20 | 21 | SECTIONS 22 | { 23 | 24 | /* text: test code section */ 25 | . = 0x80000000; 26 | .text.init : { *(.text.init) } 27 | 28 | . = ALIGN(0x1000); 29 | .tohost : { *(.tohost) } 30 | 31 | . = ALIGN(0x1000); 32 | .text : { *(.text) } 33 | 34 | /* data segment */ 35 | .data : { *(.data) } 36 | 37 | .sdata : { 38 | __global_pointer$ = . + 0x800; 39 | *(.srodata.cst16) *(.srodata.cst8) *(.srodata.cst4) *(.srodata.cst2) *(.srodata*) 40 | *(.sdata .sdata.* .gnu.linkonce.s.*) 41 | } 42 | 43 | /* bss segment */ 44 | .sbss : { 45 | *(.sbss .sbss.* .gnu.linkonce.sb.*) 46 | *(.scommon) 47 | } 48 | .bss : { *(.bss) } 49 | 50 | /* thread-local data segment */ 51 | .tdata : 52 | { 53 | _tdata_begin = .; 54 | *(.tdata) 55 | _tdata_end = .; 56 | } 57 | .tbss : 58 | { 59 | *(.tbss) 60 | _tbss_end = .; 61 | } 62 | 63 | /* End of uninitalized data segement */ 64 | _end = .; 65 | } 66 | 67 | -------------------------------------------------------------------------------- /memcpy_ve32/test.ld: -------------------------------------------------------------------------------- 1 | /*======================================================================*/ 2 | /* Proxy kernel linker script */ 3 | /*======================================================================*/ 4 | /* This is the linker script used when building the proxy kernel. */ 5 | 6 | /*----------------------------------------------------------------------*/ 7 | /* Setup */ 8 | /*----------------------------------------------------------------------*/ 9 | 10 | /* The OUTPUT_ARCH command specifies the machine architecture where the 11 | argument is one of the names used in the BFD library. More 12 | specifically one of the entires in bfd/cpu-mips.c */ 13 | 14 | OUTPUT_ARCH( "riscv" ) 15 | ENTRY(_start) 16 | 17 | /*----------------------------------------------------------------------*/ 18 | /* Sections */ 19 | /*----------------------------------------------------------------------*/ 20 | 21 | SECTIONS 22 | { 23 | 24 | /* text: test code section */ 25 | . = 0x80000000; 26 | .text.init : { *(.text.init) } 27 | 28 | . = ALIGN(0x1000); 29 | .tohost : { *(.tohost) } 30 | 31 | . = ALIGN(0x1000); 32 | .text : { *(.text) } 33 | 34 | /* data segment */ 35 | .data : { *(.data) } 36 | 37 | .sdata : { 38 | __global_pointer$ = . + 0x800; 39 | *(.srodata.cst16) *(.srodata.cst8) *(.srodata.cst4) *(.srodata.cst2) *(.srodata*) 40 | *(.sdata .sdata.* .gnu.linkonce.s.*) 41 | } 42 | 43 | /* bss segment */ 44 | .sbss : { 45 | *(.sbss .sbss.* .gnu.linkonce.sb.*) 46 | *(.scommon) 47 | } 48 | .bss : { *(.bss) } 49 | 50 | /* thread-local data segment */ 51 | .tdata : 52 | { 53 | _tdata_begin = .; 54 | *(.tdata) 55 | _tdata_end = .; 56 | } 57 | .tbss : 58 | { 59 | *(.tbss) 60 | _tbss_end = .; 61 | } 62 | 63 | /* End of uninitalized data segement */ 64 | _end = .; 65 | } 66 | 67 | -------------------------------------------------------------------------------- /memcpy_ve64/test.ld: -------------------------------------------------------------------------------- 1 | /*======================================================================*/ 2 | /* Proxy kernel linker script */ 3 | /*======================================================================*/ 4 | /* This is the linker script used when building the proxy kernel. */ 5 | 6 | /*----------------------------------------------------------------------*/ 7 | /* Setup */ 8 | /*----------------------------------------------------------------------*/ 9 | 10 | /* The OUTPUT_ARCH command specifies the machine architecture where the 11 | argument is one of the names used in the BFD library. More 12 | specifically one of the entires in bfd/cpu-mips.c */ 13 | 14 | OUTPUT_ARCH( "riscv" ) 15 | ENTRY(_start) 16 | 17 | /*----------------------------------------------------------------------*/ 18 | /* Sections */ 19 | /*----------------------------------------------------------------------*/ 20 | 21 | SECTIONS 22 | { 23 | 24 | /* text: test code section */ 25 | . = 0x80000000; 26 | .text.init : { *(.text.init) } 27 | 28 | . = ALIGN(0x1000); 29 | .tohost : { *(.tohost) } 30 | 31 | . = ALIGN(0x1000); 32 | .text : { *(.text) } 33 | 34 | /* data segment */ 35 | .data : { *(.data) } 36 | 37 | .sdata : { 38 | __global_pointer$ = . + 0x800; 39 | *(.srodata.cst16) *(.srodata.cst8) *(.srodata.cst4) *(.srodata.cst2) *(.srodata*) 40 | *(.sdata .sdata.* .gnu.linkonce.s.*) 41 | } 42 | 43 | /* bss segment */ 44 | .sbss : { 45 | *(.sbss .sbss.* .gnu.linkonce.sb.*) 46 | *(.scommon) 47 | } 48 | .bss : { *(.bss) } 49 | 50 | /* thread-local data segment */ 51 | .tdata : 52 | { 53 | _tdata_begin = .; 54 | *(.tdata) 55 | _tdata_end = .; 56 | } 57 | .tbss : 58 | { 59 | *(.tbss) 60 | _tbss_end = .; 61 | } 62 | 63 | /* End of uninitalized data segement */ 64 | _end = .; 65 | } 66 | 67 | -------------------------------------------------------------------------------- /memcpy_ve8/test.ld: -------------------------------------------------------------------------------- 1 | /*======================================================================*/ 2 | /* Proxy kernel linker script */ 3 | /*======================================================================*/ 4 | /* This is the linker script used when building the proxy kernel. */ 5 | 6 | /*----------------------------------------------------------------------*/ 7 | /* Setup */ 8 | /*----------------------------------------------------------------------*/ 9 | 10 | /* The OUTPUT_ARCH command specifies the machine architecture where the 11 | argument is one of the names used in the BFD library. More 12 | specifically one of the entires in bfd/cpu-mips.c */ 13 | 14 | OUTPUT_ARCH( "riscv" ) 15 | ENTRY(_start) 16 | 17 | /*----------------------------------------------------------------------*/ 18 | /* Sections */ 19 | /*----------------------------------------------------------------------*/ 20 | 21 | SECTIONS 22 | { 23 | 24 | /* text: test code section */ 25 | . = 0x80000000; 26 | .text.init : { *(.text.init) } 27 | 28 | . = ALIGN(0x1000); 29 | .tohost : { *(.tohost) } 30 | 31 | . = ALIGN(0x1000); 32 | .text : { *(.text) } 33 | 34 | /* data segment */ 35 | .data : { *(.data) } 36 | 37 | .sdata : { 38 | __global_pointer$ = . + 0x800; 39 | *(.srodata.cst16) *(.srodata.cst8) *(.srodata.cst4) *(.srodata.cst2) *(.srodata*) 40 | *(.sdata .sdata.* .gnu.linkonce.s.*) 41 | } 42 | 43 | /* bss segment */ 44 | .sbss : { 45 | *(.sbss .sbss.* .gnu.linkonce.sb.*) 46 | *(.scommon) 47 | } 48 | .bss : { *(.bss) } 49 | 50 | /* thread-local data segment */ 51 | .tdata : 52 | { 53 | _tdata_begin = .; 54 | *(.tdata) 55 | _tdata_end = .; 56 | } 57 | .tbss : 58 | { 59 | *(.tbss) 60 | _tbss_end = .; 61 | } 62 | 63 | /* End of uninitalized data segement */ 64 | _end = .; 65 | } 66 | 67 | -------------------------------------------------------------------------------- /vlse16_v/test.ld: -------------------------------------------------------------------------------- 1 | /*======================================================================*/ 2 | /* Proxy kernel linker script */ 3 | /*======================================================================*/ 4 | /* This is the linker script used when building the proxy kernel. */ 5 | 6 | /*----------------------------------------------------------------------*/ 7 | /* Setup */ 8 | /*----------------------------------------------------------------------*/ 9 | 10 | /* The OUTPUT_ARCH command specifies the machine architecture where the 11 | argument is one of the names used in the BFD library. More 12 | specifically one of the entires in bfd/cpu-mips.c */ 13 | 14 | OUTPUT_ARCH( "riscv" ) 15 | ENTRY(_start) 16 | 17 | /*----------------------------------------------------------------------*/ 18 | /* Sections */ 19 | /*----------------------------------------------------------------------*/ 20 | 21 | SECTIONS 22 | { 23 | 24 | /* text: test code section */ 25 | . = 0x80000000; 26 | .text.init : { *(.text.init) } 27 | 28 | . = ALIGN(0x1000); 29 | .tohost : { *(.tohost) } 30 | 31 | . = ALIGN(0x1000); 32 | .text : { *(.text) } 33 | 34 | /* data segment */ 35 | .data : { *(.data) } 36 | 37 | .sdata : { 38 | __global_pointer$ = . + 0x800; 39 | *(.srodata.cst16) *(.srodata.cst8) *(.srodata.cst4) *(.srodata.cst2) *(.srodata*) 40 | *(.sdata .sdata.* .gnu.linkonce.s.*) 41 | } 42 | 43 | /* bss segment */ 44 | .sbss : { 45 | *(.sbss .sbss.* .gnu.linkonce.sb.*) 46 | *(.scommon) 47 | } 48 | .bss : { *(.bss) } 49 | 50 | /* thread-local data segment */ 51 | .tdata : 52 | { 53 | _tdata_begin = .; 54 | *(.tdata) 55 | _tdata_end = .; 56 | } 57 | .tbss : 58 | { 59 | *(.tbss) 60 | _tbss_end = .; 61 | } 62 | 63 | /* End of uninitalized data segement */ 64 | _end = .; 65 | } 66 | 67 | -------------------------------------------------------------------------------- /vlse32_v/test.ld: -------------------------------------------------------------------------------- 1 | /*======================================================================*/ 2 | /* Proxy kernel linker script */ 3 | /*======================================================================*/ 4 | /* This is the linker script used when building the proxy kernel. */ 5 | 6 | /*----------------------------------------------------------------------*/ 7 | /* Setup */ 8 | /*----------------------------------------------------------------------*/ 9 | 10 | /* The OUTPUT_ARCH command specifies the machine architecture where the 11 | argument is one of the names used in the BFD library. More 12 | specifically one of the entires in bfd/cpu-mips.c */ 13 | 14 | OUTPUT_ARCH( "riscv" ) 15 | ENTRY(_start) 16 | 17 | /*----------------------------------------------------------------------*/ 18 | /* Sections */ 19 | /*----------------------------------------------------------------------*/ 20 | 21 | SECTIONS 22 | { 23 | 24 | /* text: test code section */ 25 | . = 0x80000000; 26 | .text.init : { *(.text.init) } 27 | 28 | . = ALIGN(0x1000); 29 | .tohost : { *(.tohost) } 30 | 31 | . = ALIGN(0x1000); 32 | .text : { *(.text) } 33 | 34 | /* data segment */ 35 | .data : { *(.data) } 36 | 37 | .sdata : { 38 | __global_pointer$ = . + 0x800; 39 | *(.srodata.cst16) *(.srodata.cst8) *(.srodata.cst4) *(.srodata.cst2) *(.srodata*) 40 | *(.sdata .sdata.* .gnu.linkonce.s.*) 41 | } 42 | 43 | /* bss segment */ 44 | .sbss : { 45 | *(.sbss .sbss.* .gnu.linkonce.sb.*) 46 | *(.scommon) 47 | } 48 | .bss : { *(.bss) } 49 | 50 | /* thread-local data segment */ 51 | .tdata : 52 | { 53 | _tdata_begin = .; 54 | *(.tdata) 55 | _tdata_end = .; 56 | } 57 | .tbss : 58 | { 59 | *(.tbss) 60 | _tbss_end = .; 61 | } 62 | 63 | /* End of uninitalized data segement */ 64 | _end = .; 65 | } 66 | 67 | -------------------------------------------------------------------------------- /vlse64_v/test.ld: -------------------------------------------------------------------------------- 1 | /*======================================================================*/ 2 | /* Proxy kernel linker script */ 3 | /*======================================================================*/ 4 | /* This is the linker script used when building the proxy kernel. */ 5 | 6 | /*----------------------------------------------------------------------*/ 7 | /* Setup */ 8 | /*----------------------------------------------------------------------*/ 9 | 10 | /* The OUTPUT_ARCH command specifies the machine architecture where the 11 | argument is one of the names used in the BFD library. More 12 | specifically one of the entires in bfd/cpu-mips.c */ 13 | 14 | OUTPUT_ARCH( "riscv" ) 15 | ENTRY(_start) 16 | 17 | /*----------------------------------------------------------------------*/ 18 | /* Sections */ 19 | /*----------------------------------------------------------------------*/ 20 | 21 | SECTIONS 22 | { 23 | 24 | /* text: test code section */ 25 | . = 0x80000000; 26 | .text.init : { *(.text.init) } 27 | 28 | . = ALIGN(0x1000); 29 | .tohost : { *(.tohost) } 30 | 31 | . = ALIGN(0x1000); 32 | .text : { *(.text) } 33 | 34 | /* data segment */ 35 | .data : { *(.data) } 36 | 37 | .sdata : { 38 | __global_pointer$ = . + 0x800; 39 | *(.srodata.cst16) *(.srodata.cst8) *(.srodata.cst4) *(.srodata.cst2) *(.srodata*) 40 | *(.sdata .sdata.* .gnu.linkonce.s.*) 41 | } 42 | 43 | /* bss segment */ 44 | .sbss : { 45 | *(.sbss .sbss.* .gnu.linkonce.sb.*) 46 | *(.scommon) 47 | } 48 | .bss : { *(.bss) } 49 | 50 | /* thread-local data segment */ 51 | .tdata : 52 | { 53 | _tdata_begin = .; 54 | *(.tdata) 55 | _tdata_end = .; 56 | } 57 | .tbss : 58 | { 59 | *(.tbss) 60 | _tbss_end = .; 61 | } 62 | 63 | /* End of uninitalized data segement */ 64 | _end = .; 65 | } 66 | 67 | -------------------------------------------------------------------------------- /vmaxu_vv/test.ld: -------------------------------------------------------------------------------- 1 | /*======================================================================*/ 2 | /* Proxy kernel linker script */ 3 | /*======================================================================*/ 4 | /* This is the linker script used when building the proxy kernel. */ 5 | 6 | /*----------------------------------------------------------------------*/ 7 | /* Setup */ 8 | /*----------------------------------------------------------------------*/ 9 | 10 | /* The OUTPUT_ARCH command specifies the machine architecture where the 11 | argument is one of the names used in the BFD library. More 12 | specifically one of the entires in bfd/cpu-mips.c */ 13 | 14 | OUTPUT_ARCH( "riscv" ) 15 | ENTRY(_start) 16 | 17 | /*----------------------------------------------------------------------*/ 18 | /* Sections */ 19 | /*----------------------------------------------------------------------*/ 20 | 21 | SECTIONS 22 | { 23 | 24 | /* text: test code section */ 25 | . = 0x80000000; 26 | .text.init : { *(.text.init) } 27 | 28 | . = ALIGN(0x1000); 29 | .tohost : { *(.tohost) } 30 | 31 | . = ALIGN(0x1000); 32 | .text : { *(.text) } 33 | 34 | /* data segment */ 35 | .data : { *(.data) } 36 | 37 | .sdata : { 38 | __global_pointer$ = . + 0x800; 39 | *(.srodata.cst16) *(.srodata.cst8) *(.srodata.cst4) *(.srodata.cst2) *(.srodata*) 40 | *(.sdata .sdata.* .gnu.linkonce.s.*) 41 | } 42 | 43 | /* bss segment */ 44 | .sbss : { 45 | *(.sbss .sbss.* .gnu.linkonce.sb.*) 46 | *(.scommon) 47 | } 48 | .bss : { *(.bss) } 49 | 50 | /* thread-local data segment */ 51 | .tdata : 52 | { 53 | _tdata_begin = .; 54 | *(.tdata) 55 | _tdata_end = .; 56 | } 57 | .tbss : 58 | { 59 | *(.tbss) 60 | _tbss_end = .; 61 | } 62 | 63 | /* End of uninitalized data segement */ 64 | _end = .; 65 | } 66 | 67 | -------------------------------------------------------------------------------- /vmaxu_vx/test.ld: -------------------------------------------------------------------------------- 1 | /*======================================================================*/ 2 | /* Proxy kernel linker script */ 3 | /*======================================================================*/ 4 | /* This is the linker script used when building the proxy kernel. */ 5 | 6 | /*----------------------------------------------------------------------*/ 7 | /* Setup */ 8 | /*----------------------------------------------------------------------*/ 9 | 10 | /* The OUTPUT_ARCH command specifies the machine architecture where the 11 | argument is one of the names used in the BFD library. More 12 | specifically one of the entires in bfd/cpu-mips.c */ 13 | 14 | OUTPUT_ARCH( "riscv" ) 15 | ENTRY(_start) 16 | 17 | /*----------------------------------------------------------------------*/ 18 | /* Sections */ 19 | /*----------------------------------------------------------------------*/ 20 | 21 | SECTIONS 22 | { 23 | 24 | /* text: test code section */ 25 | . = 0x80000000; 26 | .text.init : { *(.text.init) } 27 | 28 | . = ALIGN(0x1000); 29 | .tohost : { *(.tohost) } 30 | 31 | . = ALIGN(0x1000); 32 | .text : { *(.text) } 33 | 34 | /* data segment */ 35 | .data : { *(.data) } 36 | 37 | .sdata : { 38 | __global_pointer$ = . + 0x800; 39 | *(.srodata.cst16) *(.srodata.cst8) *(.srodata.cst4) *(.srodata.cst2) *(.srodata*) 40 | *(.sdata .sdata.* .gnu.linkonce.s.*) 41 | } 42 | 43 | /* bss segment */ 44 | .sbss : { 45 | *(.sbss .sbss.* .gnu.linkonce.sb.*) 46 | *(.scommon) 47 | } 48 | .bss : { *(.bss) } 49 | 50 | /* thread-local data segment */ 51 | .tdata : 52 | { 53 | _tdata_begin = .; 54 | *(.tdata) 55 | _tdata_end = .; 56 | } 57 | .tbss : 58 | { 59 | *(.tbss) 60 | _tbss_end = .; 61 | } 62 | 63 | /* End of uninitalized data segement */ 64 | _end = .; 65 | } 66 | 67 | -------------------------------------------------------------------------------- /vminu_vv/test.ld: -------------------------------------------------------------------------------- 1 | /*======================================================================*/ 2 | /* Proxy kernel linker script */ 3 | /*======================================================================*/ 4 | /* This is the linker script used when building the proxy kernel. */ 5 | 6 | /*----------------------------------------------------------------------*/ 7 | /* Setup */ 8 | /*----------------------------------------------------------------------*/ 9 | 10 | /* The OUTPUT_ARCH command specifies the machine architecture where the 11 | argument is one of the names used in the BFD library. More 12 | specifically one of the entires in bfd/cpu-mips.c */ 13 | 14 | OUTPUT_ARCH( "riscv" ) 15 | ENTRY(_start) 16 | 17 | /*----------------------------------------------------------------------*/ 18 | /* Sections */ 19 | /*----------------------------------------------------------------------*/ 20 | 21 | SECTIONS 22 | { 23 | 24 | /* text: test code section */ 25 | . = 0x80000000; 26 | .text.init : { *(.text.init) } 27 | 28 | . = ALIGN(0x1000); 29 | .tohost : { *(.tohost) } 30 | 31 | . = ALIGN(0x1000); 32 | .text : { *(.text) } 33 | 34 | /* data segment */ 35 | .data : { *(.data) } 36 | 37 | .sdata : { 38 | __global_pointer$ = . + 0x800; 39 | *(.srodata.cst16) *(.srodata.cst8) *(.srodata.cst4) *(.srodata.cst2) *(.srodata*) 40 | *(.sdata .sdata.* .gnu.linkonce.s.*) 41 | } 42 | 43 | /* bss segment */ 44 | .sbss : { 45 | *(.sbss .sbss.* .gnu.linkonce.sb.*) 46 | *(.scommon) 47 | } 48 | .bss : { *(.bss) } 49 | 50 | /* thread-local data segment */ 51 | .tdata : 52 | { 53 | _tdata_begin = .; 54 | *(.tdata) 55 | _tdata_end = .; 56 | } 57 | .tbss : 58 | { 59 | *(.tbss) 60 | _tbss_end = .; 61 | } 62 | 63 | /* End of uninitalized data segement */ 64 | _end = .; 65 | } 66 | 67 | -------------------------------------------------------------------------------- /vminu_vx/test.ld: -------------------------------------------------------------------------------- 1 | /*======================================================================*/ 2 | /* Proxy kernel linker script */ 3 | /*======================================================================*/ 4 | /* This is the linker script used when building the proxy kernel. */ 5 | 6 | /*----------------------------------------------------------------------*/ 7 | /* Setup */ 8 | /*----------------------------------------------------------------------*/ 9 | 10 | /* The OUTPUT_ARCH command specifies the machine architecture where the 11 | argument is one of the names used in the BFD library. More 12 | specifically one of the entires in bfd/cpu-mips.c */ 13 | 14 | OUTPUT_ARCH( "riscv" ) 15 | ENTRY(_start) 16 | 17 | /*----------------------------------------------------------------------*/ 18 | /* Sections */ 19 | /*----------------------------------------------------------------------*/ 20 | 21 | SECTIONS 22 | { 23 | 24 | /* text: test code section */ 25 | . = 0x80000000; 26 | .text.init : { *(.text.init) } 27 | 28 | . = ALIGN(0x1000); 29 | .tohost : { *(.tohost) } 30 | 31 | . = ALIGN(0x1000); 32 | .text : { *(.text) } 33 | 34 | /* data segment */ 35 | .data : { *(.data) } 36 | 37 | .sdata : { 38 | __global_pointer$ = . + 0x800; 39 | *(.srodata.cst16) *(.srodata.cst8) *(.srodata.cst4) *(.srodata.cst2) *(.srodata*) 40 | *(.sdata .sdata.* .gnu.linkonce.s.*) 41 | } 42 | 43 | /* bss segment */ 44 | .sbss : { 45 | *(.sbss .sbss.* .gnu.linkonce.sb.*) 46 | *(.scommon) 47 | } 48 | .bss : { *(.bss) } 49 | 50 | /* thread-local data segment */ 51 | .tdata : 52 | { 53 | _tdata_begin = .; 54 | *(.tdata) 55 | _tdata_end = .; 56 | } 57 | .tbss : 58 | { 59 | *(.tbss) 60 | _tbss_end = .; 61 | } 62 | 63 | /* End of uninitalized data segement */ 64 | _end = .; 65 | } 66 | 67 | -------------------------------------------------------------------------------- /vrsub_vx/test.ld: -------------------------------------------------------------------------------- 1 | /*======================================================================*/ 2 | /* Proxy kernel linker script */ 3 | /*======================================================================*/ 4 | /* This is the linker script used when building the proxy kernel. */ 5 | 6 | /*----------------------------------------------------------------------*/ 7 | /* Setup */ 8 | /*----------------------------------------------------------------------*/ 9 | 10 | /* The OUTPUT_ARCH command specifies the machine architecture where the 11 | argument is one of the names used in the BFD library. More 12 | specifically one of the entires in bfd/cpu-mips.c */ 13 | 14 | OUTPUT_ARCH( "riscv" ) 15 | ENTRY(_start) 16 | 17 | /*----------------------------------------------------------------------*/ 18 | /* Sections */ 19 | /*----------------------------------------------------------------------*/ 20 | 21 | SECTIONS 22 | { 23 | 24 | /* text: test code section */ 25 | . = 0x80000000; 26 | .text.init : { *(.text.init) } 27 | 28 | . = ALIGN(0x1000); 29 | .tohost : { *(.tohost) } 30 | 31 | . = ALIGN(0x1000); 32 | .text : { *(.text) } 33 | 34 | /* data segment */ 35 | .data : { *(.data) } 36 | 37 | .sdata : { 38 | __global_pointer$ = . + 0x800; 39 | *(.srodata.cst16) *(.srodata.cst8) *(.srodata.cst4) *(.srodata.cst2) *(.srodata*) 40 | *(.sdata .sdata.* .gnu.linkonce.s.*) 41 | } 42 | 43 | /* bss segment */ 44 | .sbss : { 45 | *(.sbss .sbss.* .gnu.linkonce.sb.*) 46 | *(.scommon) 47 | } 48 | .bss : { *(.bss) } 49 | 50 | /* thread-local data segment */ 51 | .tdata : 52 | { 53 | _tdata_begin = .; 54 | *(.tdata) 55 | _tdata_end = .; 56 | } 57 | .tbss : 58 | { 59 | *(.tbss) 60 | _tbss_end = .; 61 | } 62 | 63 | /* End of uninitalized data segement */ 64 | _end = .; 65 | } 66 | 67 | -------------------------------------------------------------------------------- /vadd_vv/util.h: -------------------------------------------------------------------------------- 1 | // See LICENSE for license details. 2 | 3 | #ifndef __UTIL_H 4 | #define __UTIL_H 5 | 6 | extern void setStats(int enable); 7 | 8 | #include 9 | 10 | #define static_assert(cond) switch(0) { case 0: case !!(long)(cond): ; } 11 | 12 | static int verify(int n, const volatile int* test, const int* verify) 13 | { 14 | int i; 15 | // Unrolled for faster verification 16 | for (i = 0; i < n/2*2; i+=2) 17 | { 18 | int t0 = test[i], t1 = test[i+1]; 19 | int v0 = verify[i], v1 = verify[i+1]; 20 | if (t0 != v0) return i+1; 21 | if (t1 != v1) return i+2; 22 | } 23 | if (n % 2 != 0 && test[n-1] != verify[n-1]) 24 | return n; 25 | return 0; 26 | } 27 | 28 | static int verifyDouble(int n, const volatile double* test, const double* verify) 29 | { 30 | int i; 31 | // Unrolled for faster verification 32 | for (i = 0; i < n/2*2; i+=2) 33 | { 34 | double t0 = test[i], t1 = test[i+1]; 35 | double v0 = verify[i], v1 = verify[i+1]; 36 | int eq1 = t0 == v0, eq2 = t1 == v1; 37 | if (!(eq1 & eq2)) return i+1+eq1; 38 | } 39 | if (n % 2 != 0 && test[n-1] != verify[n-1]) 40 | return n; 41 | return 0; 42 | } 43 | 44 | static void __attribute__((noinline)) barrier(int ncores) 45 | { 46 | static volatile int sense; 47 | static volatile int count; 48 | static __thread int threadsense; 49 | 50 | __sync_synchronize(); 51 | 52 | threadsense = !threadsense; 53 | if (__sync_fetch_and_add(&count, 1) == ncores-1) 54 | { 55 | count = 0; 56 | sense = threadsense; 57 | } 58 | else while(sense != threadsense) 59 | ; 60 | 61 | __sync_synchronize(); 62 | } 63 | 64 | static uint64_t lfsr(uint64_t x) 65 | { 66 | uint64_t bit = (x ^ (x >> 1)) & 1; 67 | return (x >> 1) | (bit << 62); 68 | } 69 | 70 | static uintptr_t insn_len(uintptr_t pc) 71 | { 72 | return (*(unsigned short*)pc & 3) ? 4 : 2; 73 | } 74 | 75 | #ifdef __riscv 76 | #include "encoding.h" 77 | #endif 78 | 79 | #define stringify_1(s) #s 80 | #define stringify(s) stringify_1(s) 81 | #define stats(code, iter) do { \ 82 | unsigned long _c = -read_csr(mcycle), _i = -read_csr(minstret); \ 83 | code; \ 84 | _c += read_csr(mcycle), _i += read_csr(minstret); \ 85 | if (cid == 0) \ 86 | printf("\n%s: %ld cycles, %ld.%ld cycles/iter, %ld.%ld CPI\n", \ 87 | stringify(code), _c, _c/iter, 10*_c/iter%10, _c/_i, 10*_c/_i%10); \ 88 | } while(0) 89 | 90 | #endif //__UTIL_H 91 | -------------------------------------------------------------------------------- /vadd_vx/util.h: -------------------------------------------------------------------------------- 1 | // See LICENSE for license details. 2 | 3 | #ifndef __UTIL_H 4 | #define __UTIL_H 5 | 6 | extern void setStats(int enable); 7 | 8 | #include 9 | 10 | #define static_assert(cond) switch(0) { case 0: case !!(long)(cond): ; } 11 | 12 | static int verify(int n, const volatile int* test, const int* verify) 13 | { 14 | int i; 15 | // Unrolled for faster verification 16 | for (i = 0; i < n/2*2; i+=2) 17 | { 18 | int t0 = test[i], t1 = test[i+1]; 19 | int v0 = verify[i], v1 = verify[i+1]; 20 | if (t0 != v0) return i+1; 21 | if (t1 != v1) return i+2; 22 | } 23 | if (n % 2 != 0 && test[n-1] != verify[n-1]) 24 | return n; 25 | return 0; 26 | } 27 | 28 | static int verifyDouble(int n, const volatile double* test, const double* verify) 29 | { 30 | int i; 31 | // Unrolled for faster verification 32 | for (i = 0; i < n/2*2; i+=2) 33 | { 34 | double t0 = test[i], t1 = test[i+1]; 35 | double v0 = verify[i], v1 = verify[i+1]; 36 | int eq1 = t0 == v0, eq2 = t1 == v1; 37 | if (!(eq1 & eq2)) return i+1+eq1; 38 | } 39 | if (n % 2 != 0 && test[n-1] != verify[n-1]) 40 | return n; 41 | return 0; 42 | } 43 | 44 | static void __attribute__((noinline)) barrier(int ncores) 45 | { 46 | static volatile int sense; 47 | static volatile int count; 48 | static __thread int threadsense; 49 | 50 | __sync_synchronize(); 51 | 52 | threadsense = !threadsense; 53 | if (__sync_fetch_and_add(&count, 1) == ncores-1) 54 | { 55 | count = 0; 56 | sense = threadsense; 57 | } 58 | else while(sense != threadsense) 59 | ; 60 | 61 | __sync_synchronize(); 62 | } 63 | 64 | static uint64_t lfsr(uint64_t x) 65 | { 66 | uint64_t bit = (x ^ (x >> 1)) & 1; 67 | return (x >> 1) | (bit << 62); 68 | } 69 | 70 | static uintptr_t insn_len(uintptr_t pc) 71 | { 72 | return (*(unsigned short*)pc & 3) ? 4 : 2; 73 | } 74 | 75 | #ifdef __riscv 76 | #include "encoding.h" 77 | #endif 78 | 79 | #define stringify_1(s) #s 80 | #define stringify(s) stringify_1(s) 81 | #define stats(code, iter) do { \ 82 | unsigned long _c = -read_csr(mcycle), _i = -read_csr(minstret); \ 83 | code; \ 84 | _c += read_csr(mcycle), _i += read_csr(minstret); \ 85 | if (cid == 0) \ 86 | printf("\n%s: %ld cycles, %ld.%ld cycles/iter, %ld.%ld CPI\n", \ 87 | stringify(code), _c, _c/iter, 10*_c/iter%10, _c/_i, 10*_c/_i%10); \ 88 | } while(0) 89 | 90 | #endif //__UTIL_H 91 | -------------------------------------------------------------------------------- /vand_vv/util.h: -------------------------------------------------------------------------------- 1 | // See LICENSE for license details. 2 | 3 | #ifndef __UTIL_H 4 | #define __UTIL_H 5 | 6 | extern void setStats(int enable); 7 | 8 | #include 9 | 10 | #define static_assert(cond) switch(0) { case 0: case !!(long)(cond): ; } 11 | 12 | static int verify(int n, const volatile int* test, const int* verify) 13 | { 14 | int i; 15 | // Unrolled for faster verification 16 | for (i = 0; i < n/2*2; i+=2) 17 | { 18 | int t0 = test[i], t1 = test[i+1]; 19 | int v0 = verify[i], v1 = verify[i+1]; 20 | if (t0 != v0) return i+1; 21 | if (t1 != v1) return i+2; 22 | } 23 | if (n % 2 != 0 && test[n-1] != verify[n-1]) 24 | return n; 25 | return 0; 26 | } 27 | 28 | static int verifyDouble(int n, const volatile double* test, const double* verify) 29 | { 30 | int i; 31 | // Unrolled for faster verification 32 | for (i = 0; i < n/2*2; i+=2) 33 | { 34 | double t0 = test[i], t1 = test[i+1]; 35 | double v0 = verify[i], v1 = verify[i+1]; 36 | int eq1 = t0 == v0, eq2 = t1 == v1; 37 | if (!(eq1 & eq2)) return i+1+eq1; 38 | } 39 | if (n % 2 != 0 && test[n-1] != verify[n-1]) 40 | return n; 41 | return 0; 42 | } 43 | 44 | static void __attribute__((noinline)) barrier(int ncores) 45 | { 46 | static volatile int sense; 47 | static volatile int count; 48 | static __thread int threadsense; 49 | 50 | __sync_synchronize(); 51 | 52 | threadsense = !threadsense; 53 | if (__sync_fetch_and_add(&count, 1) == ncores-1) 54 | { 55 | count = 0; 56 | sense = threadsense; 57 | } 58 | else while(sense != threadsense) 59 | ; 60 | 61 | __sync_synchronize(); 62 | } 63 | 64 | static uint64_t lfsr(uint64_t x) 65 | { 66 | uint64_t bit = (x ^ (x >> 1)) & 1; 67 | return (x >> 1) | (bit << 62); 68 | } 69 | 70 | static uintptr_t insn_len(uintptr_t pc) 71 | { 72 | return (*(unsigned short*)pc & 3) ? 4 : 2; 73 | } 74 | 75 | #ifdef __riscv 76 | #include "encoding.h" 77 | #endif 78 | 79 | #define stringify_1(s) #s 80 | #define stringify(s) stringify_1(s) 81 | #define stats(code, iter) do { \ 82 | unsigned long _c = -read_csr(mcycle), _i = -read_csr(minstret); \ 83 | code; \ 84 | _c += read_csr(mcycle), _i += read_csr(minstret); \ 85 | if (cid == 0) \ 86 | printf("\n%s: %ld cycles, %ld.%ld cycles/iter, %ld.%ld CPI\n", \ 87 | stringify(code), _c, _c/iter, 10*_c/iter%10, _c/_i, 10*_c/_i%10); \ 88 | } while(0) 89 | 90 | #endif //__UTIL_H 91 | -------------------------------------------------------------------------------- /vlse16_v/util.h: -------------------------------------------------------------------------------- 1 | // See LICENSE for license details. 2 | 3 | #ifndef __UTIL_H 4 | #define __UTIL_H 5 | 6 | extern void setStats(int enable); 7 | 8 | #include 9 | 10 | #define static_assert(cond) switch(0) { case 0: case !!(long)(cond): ; } 11 | 12 | static int verify(int n, const volatile int* test, const int* verify) 13 | { 14 | int i; 15 | // Unrolled for faster verification 16 | for (i = 0; i < n/2*2; i+=2) 17 | { 18 | int t0 = test[i], t1 = test[i+1]; 19 | int v0 = verify[i], v1 = verify[i+1]; 20 | if (t0 != v0) return i+1; 21 | if (t1 != v1) return i+2; 22 | } 23 | if (n % 2 != 0 && test[n-1] != verify[n-1]) 24 | return n; 25 | return 0; 26 | } 27 | 28 | static int verifyDouble(int n, const volatile double* test, const double* verify) 29 | { 30 | int i; 31 | // Unrolled for faster verification 32 | for (i = 0; i < n/2*2; i+=2) 33 | { 34 | double t0 = test[i], t1 = test[i+1]; 35 | double v0 = verify[i], v1 = verify[i+1]; 36 | int eq1 = t0 == v0, eq2 = t1 == v1; 37 | if (!(eq1 & eq2)) return i+1+eq1; 38 | } 39 | if (n % 2 != 0 && test[n-1] != verify[n-1]) 40 | return n; 41 | return 0; 42 | } 43 | 44 | static void __attribute__((noinline)) barrier(int ncores) 45 | { 46 | static volatile int sense; 47 | static volatile int count; 48 | static __thread int threadsense; 49 | 50 | __sync_synchronize(); 51 | 52 | threadsense = !threadsense; 53 | if (__sync_fetch_and_add(&count, 1) == ncores-1) 54 | { 55 | count = 0; 56 | sense = threadsense; 57 | } 58 | else while(sense != threadsense) 59 | ; 60 | 61 | __sync_synchronize(); 62 | } 63 | 64 | static uint64_t lfsr(uint64_t x) 65 | { 66 | uint64_t bit = (x ^ (x >> 1)) & 1; 67 | return (x >> 1) | (bit << 62); 68 | } 69 | 70 | static uintptr_t insn_len(uintptr_t pc) 71 | { 72 | return (*(unsigned short*)pc & 3) ? 4 : 2; 73 | } 74 | 75 | #ifdef __riscv 76 | #include "encoding.h" 77 | #endif 78 | 79 | #define stringify_1(s) #s 80 | #define stringify(s) stringify_1(s) 81 | #define stats(code, iter) do { \ 82 | unsigned long _c = -read_csr(mcycle), _i = -read_csr(minstret); \ 83 | code; \ 84 | _c += read_csr(mcycle), _i += read_csr(minstret); \ 85 | if (cid == 0) \ 86 | printf("\n%s: %ld cycles, %ld.%ld cycles/iter, %ld.%ld CPI\n", \ 87 | stringify(code), _c, _c/iter, 10*_c/iter%10, _c/_i, 10*_c/_i%10); \ 88 | } while(0) 89 | 90 | #endif //__UTIL_H 91 | -------------------------------------------------------------------------------- /vlse32_v/util.h: -------------------------------------------------------------------------------- 1 | // See LICENSE for license details. 2 | 3 | #ifndef __UTIL_H 4 | #define __UTIL_H 5 | 6 | extern void setStats(int enable); 7 | 8 | #include 9 | 10 | #define static_assert(cond) switch(0) { case 0: case !!(long)(cond): ; } 11 | 12 | static int verify(int n, const volatile int* test, const int* verify) 13 | { 14 | int i; 15 | // Unrolled for faster verification 16 | for (i = 0; i < n/2*2; i+=2) 17 | { 18 | int t0 = test[i], t1 = test[i+1]; 19 | int v0 = verify[i], v1 = verify[i+1]; 20 | if (t0 != v0) return i+1; 21 | if (t1 != v1) return i+2; 22 | } 23 | if (n % 2 != 0 && test[n-1] != verify[n-1]) 24 | return n; 25 | return 0; 26 | } 27 | 28 | static int verifyDouble(int n, const volatile double* test, const double* verify) 29 | { 30 | int i; 31 | // Unrolled for faster verification 32 | for (i = 0; i < n/2*2; i+=2) 33 | { 34 | double t0 = test[i], t1 = test[i+1]; 35 | double v0 = verify[i], v1 = verify[i+1]; 36 | int eq1 = t0 == v0, eq2 = t1 == v1; 37 | if (!(eq1 & eq2)) return i+1+eq1; 38 | } 39 | if (n % 2 != 0 && test[n-1] != verify[n-1]) 40 | return n; 41 | return 0; 42 | } 43 | 44 | static void __attribute__((noinline)) barrier(int ncores) 45 | { 46 | static volatile int sense; 47 | static volatile int count; 48 | static __thread int threadsense; 49 | 50 | __sync_synchronize(); 51 | 52 | threadsense = !threadsense; 53 | if (__sync_fetch_and_add(&count, 1) == ncores-1) 54 | { 55 | count = 0; 56 | sense = threadsense; 57 | } 58 | else while(sense != threadsense) 59 | ; 60 | 61 | __sync_synchronize(); 62 | } 63 | 64 | static uint64_t lfsr(uint64_t x) 65 | { 66 | uint64_t bit = (x ^ (x >> 1)) & 1; 67 | return (x >> 1) | (bit << 62); 68 | } 69 | 70 | static uintptr_t insn_len(uintptr_t pc) 71 | { 72 | return (*(unsigned short*)pc & 3) ? 4 : 2; 73 | } 74 | 75 | #ifdef __riscv 76 | #include "encoding.h" 77 | #endif 78 | 79 | #define stringify_1(s) #s 80 | #define stringify(s) stringify_1(s) 81 | #define stats(code, iter) do { \ 82 | unsigned long _c = -read_csr(mcycle), _i = -read_csr(minstret); \ 83 | code; \ 84 | _c += read_csr(mcycle), _i += read_csr(minstret); \ 85 | if (cid == 0) \ 86 | printf("\n%s: %ld cycles, %ld.%ld cycles/iter, %ld.%ld CPI\n", \ 87 | stringify(code), _c, _c/iter, 10*_c/iter%10, _c/_i, 10*_c/_i%10); \ 88 | } while(0) 89 | 90 | #endif //__UTIL_H 91 | -------------------------------------------------------------------------------- /vlse64_v/util.h: -------------------------------------------------------------------------------- 1 | // See LICENSE for license details. 2 | 3 | #ifndef __UTIL_H 4 | #define __UTIL_H 5 | 6 | extern void setStats(int enable); 7 | 8 | #include 9 | 10 | #define static_assert(cond) switch(0) { case 0: case !!(long)(cond): ; } 11 | 12 | static int verify(int n, const volatile int* test, const int* verify) 13 | { 14 | int i; 15 | // Unrolled for faster verification 16 | for (i = 0; i < n/2*2; i+=2) 17 | { 18 | int t0 = test[i], t1 = test[i+1]; 19 | int v0 = verify[i], v1 = verify[i+1]; 20 | if (t0 != v0) return i+1; 21 | if (t1 != v1) return i+2; 22 | } 23 | if (n % 2 != 0 && test[n-1] != verify[n-1]) 24 | return n; 25 | return 0; 26 | } 27 | 28 | static int verifyDouble(int n, const volatile double* test, const double* verify) 29 | { 30 | int i; 31 | // Unrolled for faster verification 32 | for (i = 0; i < n/2*2; i+=2) 33 | { 34 | double t0 = test[i], t1 = test[i+1]; 35 | double v0 = verify[i], v1 = verify[i+1]; 36 | int eq1 = t0 == v0, eq2 = t1 == v1; 37 | if (!(eq1 & eq2)) return i+1+eq1; 38 | } 39 | if (n % 2 != 0 && test[n-1] != verify[n-1]) 40 | return n; 41 | return 0; 42 | } 43 | 44 | static void __attribute__((noinline)) barrier(int ncores) 45 | { 46 | static volatile int sense; 47 | static volatile int count; 48 | static __thread int threadsense; 49 | 50 | __sync_synchronize(); 51 | 52 | threadsense = !threadsense; 53 | if (__sync_fetch_and_add(&count, 1) == ncores-1) 54 | { 55 | count = 0; 56 | sense = threadsense; 57 | } 58 | else while(sense != threadsense) 59 | ; 60 | 61 | __sync_synchronize(); 62 | } 63 | 64 | static uint64_t lfsr(uint64_t x) 65 | { 66 | uint64_t bit = (x ^ (x >> 1)) & 1; 67 | return (x >> 1) | (bit << 62); 68 | } 69 | 70 | static uintptr_t insn_len(uintptr_t pc) 71 | { 72 | return (*(unsigned short*)pc & 3) ? 4 : 2; 73 | } 74 | 75 | #ifdef __riscv 76 | #include "encoding.h" 77 | #endif 78 | 79 | #define stringify_1(s) #s 80 | #define stringify(s) stringify_1(s) 81 | #define stats(code, iter) do { \ 82 | unsigned long _c = -read_csr(mcycle), _i = -read_csr(minstret); \ 83 | code; \ 84 | _c += read_csr(mcycle), _i += read_csr(minstret); \ 85 | if (cid == 0) \ 86 | printf("\n%s: %ld cycles, %ld.%ld cycles/iter, %ld.%ld CPI\n", \ 87 | stringify(code), _c, _c/iter, 10*_c/iter%10, _c/_i, 10*_c/_i%10); \ 88 | } while(0) 89 | 90 | #endif //__UTIL_H 91 | -------------------------------------------------------------------------------- /vlse8_v/util.h: -------------------------------------------------------------------------------- 1 | // See LICENSE for license details. 2 | 3 | #ifndef __UTIL_H 4 | #define __UTIL_H 5 | 6 | extern void setStats(int enable); 7 | 8 | #include 9 | 10 | #define static_assert(cond) switch(0) { case 0: case !!(long)(cond): ; } 11 | 12 | static int verify(int n, const volatile int* test, const int* verify) 13 | { 14 | int i; 15 | // Unrolled for faster verification 16 | for (i = 0; i < n/2*2; i+=2) 17 | { 18 | int t0 = test[i], t1 = test[i+1]; 19 | int v0 = verify[i], v1 = verify[i+1]; 20 | if (t0 != v0) return i+1; 21 | if (t1 != v1) return i+2; 22 | } 23 | if (n % 2 != 0 && test[n-1] != verify[n-1]) 24 | return n; 25 | return 0; 26 | } 27 | 28 | static int verifyDouble(int n, const volatile double* test, const double* verify) 29 | { 30 | int i; 31 | // Unrolled for faster verification 32 | for (i = 0; i < n/2*2; i+=2) 33 | { 34 | double t0 = test[i], t1 = test[i+1]; 35 | double v0 = verify[i], v1 = verify[i+1]; 36 | int eq1 = t0 == v0, eq2 = t1 == v1; 37 | if (!(eq1 & eq2)) return i+1+eq1; 38 | } 39 | if (n % 2 != 0 && test[n-1] != verify[n-1]) 40 | return n; 41 | return 0; 42 | } 43 | 44 | static void __attribute__((noinline)) barrier(int ncores) 45 | { 46 | static volatile int sense; 47 | static volatile int count; 48 | static __thread int threadsense; 49 | 50 | __sync_synchronize(); 51 | 52 | threadsense = !threadsense; 53 | if (__sync_fetch_and_add(&count, 1) == ncores-1) 54 | { 55 | count = 0; 56 | sense = threadsense; 57 | } 58 | else while(sense != threadsense) 59 | ; 60 | 61 | __sync_synchronize(); 62 | } 63 | 64 | static uint64_t lfsr(uint64_t x) 65 | { 66 | uint64_t bit = (x ^ (x >> 1)) & 1; 67 | return (x >> 1) | (bit << 62); 68 | } 69 | 70 | static uintptr_t insn_len(uintptr_t pc) 71 | { 72 | return (*(unsigned short*)pc & 3) ? 4 : 2; 73 | } 74 | 75 | #ifdef __riscv 76 | #include "encoding.h" 77 | #endif 78 | 79 | #define stringify_1(s) #s 80 | #define stringify(s) stringify_1(s) 81 | #define stats(code, iter) do { \ 82 | unsigned long _c = -read_csr(mcycle), _i = -read_csr(minstret); \ 83 | code; \ 84 | _c += read_csr(mcycle), _i += read_csr(minstret); \ 85 | if (cid == 0) \ 86 | printf("\n%s: %ld cycles, %ld.%ld cycles/iter, %ld.%ld CPI\n", \ 87 | stringify(code), _c, _c/iter, 10*_c/iter%10, _c/_i, 10*_c/_i%10); \ 88 | } while(0) 89 | 90 | #endif //__UTIL_H 91 | -------------------------------------------------------------------------------- /vmax_vv/util.h: -------------------------------------------------------------------------------- 1 | // See LICENSE for license details. 2 | 3 | #ifndef __UTIL_H 4 | #define __UTIL_H 5 | 6 | extern void setStats(int enable); 7 | 8 | #include 9 | 10 | #define static_assert(cond) switch(0) { case 0: case !!(long)(cond): ; } 11 | 12 | static int verify(int n, const volatile int* test, const int* verify) 13 | { 14 | int i; 15 | // Unrolled for faster verification 16 | for (i = 0; i < n/2*2; i+=2) 17 | { 18 | int t0 = test[i], t1 = test[i+1]; 19 | int v0 = verify[i], v1 = verify[i+1]; 20 | if (t0 != v0) return i+1; 21 | if (t1 != v1) return i+2; 22 | } 23 | if (n % 2 != 0 && test[n-1] != verify[n-1]) 24 | return n; 25 | return 0; 26 | } 27 | 28 | static int verifyDouble(int n, const volatile double* test, const double* verify) 29 | { 30 | int i; 31 | // Unrolled for faster verification 32 | for (i = 0; i < n/2*2; i+=2) 33 | { 34 | double t0 = test[i], t1 = test[i+1]; 35 | double v0 = verify[i], v1 = verify[i+1]; 36 | int eq1 = t0 == v0, eq2 = t1 == v1; 37 | if (!(eq1 & eq2)) return i+1+eq1; 38 | } 39 | if (n % 2 != 0 && test[n-1] != verify[n-1]) 40 | return n; 41 | return 0; 42 | } 43 | 44 | static void __attribute__((noinline)) barrier(int ncores) 45 | { 46 | static volatile int sense; 47 | static volatile int count; 48 | static __thread int threadsense; 49 | 50 | __sync_synchronize(); 51 | 52 | threadsense = !threadsense; 53 | if (__sync_fetch_and_add(&count, 1) == ncores-1) 54 | { 55 | count = 0; 56 | sense = threadsense; 57 | } 58 | else while(sense != threadsense) 59 | ; 60 | 61 | __sync_synchronize(); 62 | } 63 | 64 | static uint64_t lfsr(uint64_t x) 65 | { 66 | uint64_t bit = (x ^ (x >> 1)) & 1; 67 | return (x >> 1) | (bit << 62); 68 | } 69 | 70 | static uintptr_t insn_len(uintptr_t pc) 71 | { 72 | return (*(unsigned short*)pc & 3) ? 4 : 2; 73 | } 74 | 75 | #ifdef __riscv 76 | #include "encoding.h" 77 | #endif 78 | 79 | #define stringify_1(s) #s 80 | #define stringify(s) stringify_1(s) 81 | #define stats(code, iter) do { \ 82 | unsigned long _c = -read_csr(mcycle), _i = -read_csr(minstret); \ 83 | code; \ 84 | _c += read_csr(mcycle), _i += read_csr(minstret); \ 85 | if (cid == 0) \ 86 | printf("\n%s: %ld cycles, %ld.%ld cycles/iter, %ld.%ld CPI\n", \ 87 | stringify(code), _c, _c/iter, 10*_c/iter%10, _c/_i, 10*_c/_i%10); \ 88 | } while(0) 89 | 90 | #endif //__UTIL_H 91 | -------------------------------------------------------------------------------- /vmax_vx/util.h: -------------------------------------------------------------------------------- 1 | // See LICENSE for license details. 2 | 3 | #ifndef __UTIL_H 4 | #define __UTIL_H 5 | 6 | extern void setStats(int enable); 7 | 8 | #include 9 | 10 | #define static_assert(cond) switch(0) { case 0: case !!(long)(cond): ; } 11 | 12 | static int verify(int n, const volatile int* test, const int* verify) 13 | { 14 | int i; 15 | // Unrolled for faster verification 16 | for (i = 0; i < n/2*2; i+=2) 17 | { 18 | int t0 = test[i], t1 = test[i+1]; 19 | int v0 = verify[i], v1 = verify[i+1]; 20 | if (t0 != v0) return i+1; 21 | if (t1 != v1) return i+2; 22 | } 23 | if (n % 2 != 0 && test[n-1] != verify[n-1]) 24 | return n; 25 | return 0; 26 | } 27 | 28 | static int verifyDouble(int n, const volatile double* test, const double* verify) 29 | { 30 | int i; 31 | // Unrolled for faster verification 32 | for (i = 0; i < n/2*2; i+=2) 33 | { 34 | double t0 = test[i], t1 = test[i+1]; 35 | double v0 = verify[i], v1 = verify[i+1]; 36 | int eq1 = t0 == v0, eq2 = t1 == v1; 37 | if (!(eq1 & eq2)) return i+1+eq1; 38 | } 39 | if (n % 2 != 0 && test[n-1] != verify[n-1]) 40 | return n; 41 | return 0; 42 | } 43 | 44 | static void __attribute__((noinline)) barrier(int ncores) 45 | { 46 | static volatile int sense; 47 | static volatile int count; 48 | static __thread int threadsense; 49 | 50 | __sync_synchronize(); 51 | 52 | threadsense = !threadsense; 53 | if (__sync_fetch_and_add(&count, 1) == ncores-1) 54 | { 55 | count = 0; 56 | sense = threadsense; 57 | } 58 | else while(sense != threadsense) 59 | ; 60 | 61 | __sync_synchronize(); 62 | } 63 | 64 | static uint64_t lfsr(uint64_t x) 65 | { 66 | uint64_t bit = (x ^ (x >> 1)) & 1; 67 | return (x >> 1) | (bit << 62); 68 | } 69 | 70 | static uintptr_t insn_len(uintptr_t pc) 71 | { 72 | return (*(unsigned short*)pc & 3) ? 4 : 2; 73 | } 74 | 75 | #ifdef __riscv 76 | #include "encoding.h" 77 | #endif 78 | 79 | #define stringify_1(s) #s 80 | #define stringify(s) stringify_1(s) 81 | #define stats(code, iter) do { \ 82 | unsigned long _c = -read_csr(mcycle), _i = -read_csr(minstret); \ 83 | code; \ 84 | _c += read_csr(mcycle), _i += read_csr(minstret); \ 85 | if (cid == 0) \ 86 | printf("\n%s: %ld cycles, %ld.%ld cycles/iter, %ld.%ld CPI\n", \ 87 | stringify(code), _c, _c/iter, 10*_c/iter%10, _c/_i, 10*_c/_i%10); \ 88 | } while(0) 89 | 90 | #endif //__UTIL_H 91 | -------------------------------------------------------------------------------- /vmaxu_vv/util.h: -------------------------------------------------------------------------------- 1 | // See LICENSE for license details. 2 | 3 | #ifndef __UTIL_H 4 | #define __UTIL_H 5 | 6 | extern void setStats(int enable); 7 | 8 | #include 9 | 10 | #define static_assert(cond) switch(0) { case 0: case !!(long)(cond): ; } 11 | 12 | static int verify(int n, const volatile int* test, const int* verify) 13 | { 14 | int i; 15 | // Unrolled for faster verification 16 | for (i = 0; i < n/2*2; i+=2) 17 | { 18 | int t0 = test[i], t1 = test[i+1]; 19 | int v0 = verify[i], v1 = verify[i+1]; 20 | if (t0 != v0) return i+1; 21 | if (t1 != v1) return i+2; 22 | } 23 | if (n % 2 != 0 && test[n-1] != verify[n-1]) 24 | return n; 25 | return 0; 26 | } 27 | 28 | static int verifyDouble(int n, const volatile double* test, const double* verify) 29 | { 30 | int i; 31 | // Unrolled for faster verification 32 | for (i = 0; i < n/2*2; i+=2) 33 | { 34 | double t0 = test[i], t1 = test[i+1]; 35 | double v0 = verify[i], v1 = verify[i+1]; 36 | int eq1 = t0 == v0, eq2 = t1 == v1; 37 | if (!(eq1 & eq2)) return i+1+eq1; 38 | } 39 | if (n % 2 != 0 && test[n-1] != verify[n-1]) 40 | return n; 41 | return 0; 42 | } 43 | 44 | static void __attribute__((noinline)) barrier(int ncores) 45 | { 46 | static volatile int sense; 47 | static volatile int count; 48 | static __thread int threadsense; 49 | 50 | __sync_synchronize(); 51 | 52 | threadsense = !threadsense; 53 | if (__sync_fetch_and_add(&count, 1) == ncores-1) 54 | { 55 | count = 0; 56 | sense = threadsense; 57 | } 58 | else while(sense != threadsense) 59 | ; 60 | 61 | __sync_synchronize(); 62 | } 63 | 64 | static uint64_t lfsr(uint64_t x) 65 | { 66 | uint64_t bit = (x ^ (x >> 1)) & 1; 67 | return (x >> 1) | (bit << 62); 68 | } 69 | 70 | static uintptr_t insn_len(uintptr_t pc) 71 | { 72 | return (*(unsigned short*)pc & 3) ? 4 : 2; 73 | } 74 | 75 | #ifdef __riscv 76 | #include "encoding.h" 77 | #endif 78 | 79 | #define stringify_1(s) #s 80 | #define stringify(s) stringify_1(s) 81 | #define stats(code, iter) do { \ 82 | unsigned long _c = -read_csr(mcycle), _i = -read_csr(minstret); \ 83 | code; \ 84 | _c += read_csr(mcycle), _i += read_csr(minstret); \ 85 | if (cid == 0) \ 86 | printf("\n%s: %ld cycles, %ld.%ld cycles/iter, %ld.%ld CPI\n", \ 87 | stringify(code), _c, _c/iter, 10*_c/iter%10, _c/_i, 10*_c/_i%10); \ 88 | } while(0) 89 | 90 | #endif //__UTIL_H 91 | -------------------------------------------------------------------------------- /vmaxu_vx/util.h: -------------------------------------------------------------------------------- 1 | // See LICENSE for license details. 2 | 3 | #ifndef __UTIL_H 4 | #define __UTIL_H 5 | 6 | extern void setStats(int enable); 7 | 8 | #include 9 | 10 | #define static_assert(cond) switch(0) { case 0: case !!(long)(cond): ; } 11 | 12 | static int verify(int n, const volatile int* test, const int* verify) 13 | { 14 | int i; 15 | // Unrolled for faster verification 16 | for (i = 0; i < n/2*2; i+=2) 17 | { 18 | int t0 = test[i], t1 = test[i+1]; 19 | int v0 = verify[i], v1 = verify[i+1]; 20 | if (t0 != v0) return i+1; 21 | if (t1 != v1) return i+2; 22 | } 23 | if (n % 2 != 0 && test[n-1] != verify[n-1]) 24 | return n; 25 | return 0; 26 | } 27 | 28 | static int verifyDouble(int n, const volatile double* test, const double* verify) 29 | { 30 | int i; 31 | // Unrolled for faster verification 32 | for (i = 0; i < n/2*2; i+=2) 33 | { 34 | double t0 = test[i], t1 = test[i+1]; 35 | double v0 = verify[i], v1 = verify[i+1]; 36 | int eq1 = t0 == v0, eq2 = t1 == v1; 37 | if (!(eq1 & eq2)) return i+1+eq1; 38 | } 39 | if (n % 2 != 0 && test[n-1] != verify[n-1]) 40 | return n; 41 | return 0; 42 | } 43 | 44 | static void __attribute__((noinline)) barrier(int ncores) 45 | { 46 | static volatile int sense; 47 | static volatile int count; 48 | static __thread int threadsense; 49 | 50 | __sync_synchronize(); 51 | 52 | threadsense = !threadsense; 53 | if (__sync_fetch_and_add(&count, 1) == ncores-1) 54 | { 55 | count = 0; 56 | sense = threadsense; 57 | } 58 | else while(sense != threadsense) 59 | ; 60 | 61 | __sync_synchronize(); 62 | } 63 | 64 | static uint64_t lfsr(uint64_t x) 65 | { 66 | uint64_t bit = (x ^ (x >> 1)) & 1; 67 | return (x >> 1) | (bit << 62); 68 | } 69 | 70 | static uintptr_t insn_len(uintptr_t pc) 71 | { 72 | return (*(unsigned short*)pc & 3) ? 4 : 2; 73 | } 74 | 75 | #ifdef __riscv 76 | #include "encoding.h" 77 | #endif 78 | 79 | #define stringify_1(s) #s 80 | #define stringify(s) stringify_1(s) 81 | #define stats(code, iter) do { \ 82 | unsigned long _c = -read_csr(mcycle), _i = -read_csr(minstret); \ 83 | code; \ 84 | _c += read_csr(mcycle), _i += read_csr(minstret); \ 85 | if (cid == 0) \ 86 | printf("\n%s: %ld cycles, %ld.%ld cycles/iter, %ld.%ld CPI\n", \ 87 | stringify(code), _c, _c/iter, 10*_c/iter%10, _c/_i, 10*_c/_i%10); \ 88 | } while(0) 89 | 90 | #endif //__UTIL_H 91 | -------------------------------------------------------------------------------- /vmin_vv/util.h: -------------------------------------------------------------------------------- 1 | // See LICENSE for license details. 2 | 3 | #ifndef __UTIL_H 4 | #define __UTIL_H 5 | 6 | extern void setStats(int enable); 7 | 8 | #include 9 | 10 | #define static_assert(cond) switch(0) { case 0: case !!(long)(cond): ; } 11 | 12 | static int verify(int n, const volatile int* test, const int* verify) 13 | { 14 | int i; 15 | // Unrolled for faster verification 16 | for (i = 0; i < n/2*2; i+=2) 17 | { 18 | int t0 = test[i], t1 = test[i+1]; 19 | int v0 = verify[i], v1 = verify[i+1]; 20 | if (t0 != v0) return i+1; 21 | if (t1 != v1) return i+2; 22 | } 23 | if (n % 2 != 0 && test[n-1] != verify[n-1]) 24 | return n; 25 | return 0; 26 | } 27 | 28 | static int verifyDouble(int n, const volatile double* test, const double* verify) 29 | { 30 | int i; 31 | // Unrolled for faster verification 32 | for (i = 0; i < n/2*2; i+=2) 33 | { 34 | double t0 = test[i], t1 = test[i+1]; 35 | double v0 = verify[i], v1 = verify[i+1]; 36 | int eq1 = t0 == v0, eq2 = t1 == v1; 37 | if (!(eq1 & eq2)) return i+1+eq1; 38 | } 39 | if (n % 2 != 0 && test[n-1] != verify[n-1]) 40 | return n; 41 | return 0; 42 | } 43 | 44 | static void __attribute__((noinline)) barrier(int ncores) 45 | { 46 | static volatile int sense; 47 | static volatile int count; 48 | static __thread int threadsense; 49 | 50 | __sync_synchronize(); 51 | 52 | threadsense = !threadsense; 53 | if (__sync_fetch_and_add(&count, 1) == ncores-1) 54 | { 55 | count = 0; 56 | sense = threadsense; 57 | } 58 | else while(sense != threadsense) 59 | ; 60 | 61 | __sync_synchronize(); 62 | } 63 | 64 | static uint64_t lfsr(uint64_t x) 65 | { 66 | uint64_t bit = (x ^ (x >> 1)) & 1; 67 | return (x >> 1) | (bit << 62); 68 | } 69 | 70 | static uintptr_t insn_len(uintptr_t pc) 71 | { 72 | return (*(unsigned short*)pc & 3) ? 4 : 2; 73 | } 74 | 75 | #ifdef __riscv 76 | #include "encoding.h" 77 | #endif 78 | 79 | #define stringify_1(s) #s 80 | #define stringify(s) stringify_1(s) 81 | #define stats(code, iter) do { \ 82 | unsigned long _c = -read_csr(mcycle), _i = -read_csr(minstret); \ 83 | code; \ 84 | _c += read_csr(mcycle), _i += read_csr(minstret); \ 85 | if (cid == 0) \ 86 | printf("\n%s: %ld cycles, %ld.%ld cycles/iter, %ld.%ld CPI\n", \ 87 | stringify(code), _c, _c/iter, 10*_c/iter%10, _c/_i, 10*_c/_i%10); \ 88 | } while(0) 89 | 90 | #endif //__UTIL_H 91 | -------------------------------------------------------------------------------- /vmin_vx/util.h: -------------------------------------------------------------------------------- 1 | // See LICENSE for license details. 2 | 3 | #ifndef __UTIL_H 4 | #define __UTIL_H 5 | 6 | extern void setStats(int enable); 7 | 8 | #include 9 | 10 | #define static_assert(cond) switch(0) { case 0: case !!(long)(cond): ; } 11 | 12 | static int verify(int n, const volatile int* test, const int* verify) 13 | { 14 | int i; 15 | // Unrolled for faster verification 16 | for (i = 0; i < n/2*2; i+=2) 17 | { 18 | int t0 = test[i], t1 = test[i+1]; 19 | int v0 = verify[i], v1 = verify[i+1]; 20 | if (t0 != v0) return i+1; 21 | if (t1 != v1) return i+2; 22 | } 23 | if (n % 2 != 0 && test[n-1] != verify[n-1]) 24 | return n; 25 | return 0; 26 | } 27 | 28 | static int verifyDouble(int n, const volatile double* test, const double* verify) 29 | { 30 | int i; 31 | // Unrolled for faster verification 32 | for (i = 0; i < n/2*2; i+=2) 33 | { 34 | double t0 = test[i], t1 = test[i+1]; 35 | double v0 = verify[i], v1 = verify[i+1]; 36 | int eq1 = t0 == v0, eq2 = t1 == v1; 37 | if (!(eq1 & eq2)) return i+1+eq1; 38 | } 39 | if (n % 2 != 0 && test[n-1] != verify[n-1]) 40 | return n; 41 | return 0; 42 | } 43 | 44 | static void __attribute__((noinline)) barrier(int ncores) 45 | { 46 | static volatile int sense; 47 | static volatile int count; 48 | static __thread int threadsense; 49 | 50 | __sync_synchronize(); 51 | 52 | threadsense = !threadsense; 53 | if (__sync_fetch_and_add(&count, 1) == ncores-1) 54 | { 55 | count = 0; 56 | sense = threadsense; 57 | } 58 | else while(sense != threadsense) 59 | ; 60 | 61 | __sync_synchronize(); 62 | } 63 | 64 | static uint64_t lfsr(uint64_t x) 65 | { 66 | uint64_t bit = (x ^ (x >> 1)) & 1; 67 | return (x >> 1) | (bit << 62); 68 | } 69 | 70 | static uintptr_t insn_len(uintptr_t pc) 71 | { 72 | return (*(unsigned short*)pc & 3) ? 4 : 2; 73 | } 74 | 75 | #ifdef __riscv 76 | #include "encoding.h" 77 | #endif 78 | 79 | #define stringify_1(s) #s 80 | #define stringify(s) stringify_1(s) 81 | #define stats(code, iter) do { \ 82 | unsigned long _c = -read_csr(mcycle), _i = -read_csr(minstret); \ 83 | code; \ 84 | _c += read_csr(mcycle), _i += read_csr(minstret); \ 85 | if (cid == 0) \ 86 | printf("\n%s: %ld cycles, %ld.%ld cycles/iter, %ld.%ld CPI\n", \ 87 | stringify(code), _c, _c/iter, 10*_c/iter%10, _c/_i, 10*_c/_i%10); \ 88 | } while(0) 89 | 90 | #endif //__UTIL_H 91 | -------------------------------------------------------------------------------- /vminu_vv/util.h: -------------------------------------------------------------------------------- 1 | // See LICENSE for license details. 2 | 3 | #ifndef __UTIL_H 4 | #define __UTIL_H 5 | 6 | extern void setStats(int enable); 7 | 8 | #include 9 | 10 | #define static_assert(cond) switch(0) { case 0: case !!(long)(cond): ; } 11 | 12 | static int verify(int n, const volatile int* test, const int* verify) 13 | { 14 | int i; 15 | // Unrolled for faster verification 16 | for (i = 0; i < n/2*2; i+=2) 17 | { 18 | int t0 = test[i], t1 = test[i+1]; 19 | int v0 = verify[i], v1 = verify[i+1]; 20 | if (t0 != v0) return i+1; 21 | if (t1 != v1) return i+2; 22 | } 23 | if (n % 2 != 0 && test[n-1] != verify[n-1]) 24 | return n; 25 | return 0; 26 | } 27 | 28 | static int verifyDouble(int n, const volatile double* test, const double* verify) 29 | { 30 | int i; 31 | // Unrolled for faster verification 32 | for (i = 0; i < n/2*2; i+=2) 33 | { 34 | double t0 = test[i], t1 = test[i+1]; 35 | double v0 = verify[i], v1 = verify[i+1]; 36 | int eq1 = t0 == v0, eq2 = t1 == v1; 37 | if (!(eq1 & eq2)) return i+1+eq1; 38 | } 39 | if (n % 2 != 0 && test[n-1] != verify[n-1]) 40 | return n; 41 | return 0; 42 | } 43 | 44 | static void __attribute__((noinline)) barrier(int ncores) 45 | { 46 | static volatile int sense; 47 | static volatile int count; 48 | static __thread int threadsense; 49 | 50 | __sync_synchronize(); 51 | 52 | threadsense = !threadsense; 53 | if (__sync_fetch_and_add(&count, 1) == ncores-1) 54 | { 55 | count = 0; 56 | sense = threadsense; 57 | } 58 | else while(sense != threadsense) 59 | ; 60 | 61 | __sync_synchronize(); 62 | } 63 | 64 | static uint64_t lfsr(uint64_t x) 65 | { 66 | uint64_t bit = (x ^ (x >> 1)) & 1; 67 | return (x >> 1) | (bit << 62); 68 | } 69 | 70 | static uintptr_t insn_len(uintptr_t pc) 71 | { 72 | return (*(unsigned short*)pc & 3) ? 4 : 2; 73 | } 74 | 75 | #ifdef __riscv 76 | #include "encoding.h" 77 | #endif 78 | 79 | #define stringify_1(s) #s 80 | #define stringify(s) stringify_1(s) 81 | #define stats(code, iter) do { \ 82 | unsigned long _c = -read_csr(mcycle), _i = -read_csr(minstret); \ 83 | code; \ 84 | _c += read_csr(mcycle), _i += read_csr(minstret); \ 85 | if (cid == 0) \ 86 | printf("\n%s: %ld cycles, %ld.%ld cycles/iter, %ld.%ld CPI\n", \ 87 | stringify(code), _c, _c/iter, 10*_c/iter%10, _c/_i, 10*_c/_i%10); \ 88 | } while(0) 89 | 90 | #endif //__UTIL_H 91 | -------------------------------------------------------------------------------- /vminu_vx/util.h: -------------------------------------------------------------------------------- 1 | // See LICENSE for license details. 2 | 3 | #ifndef __UTIL_H 4 | #define __UTIL_H 5 | 6 | extern void setStats(int enable); 7 | 8 | #include 9 | 10 | #define static_assert(cond) switch(0) { case 0: case !!(long)(cond): ; } 11 | 12 | static int verify(int n, const volatile int* test, const int* verify) 13 | { 14 | int i; 15 | // Unrolled for faster verification 16 | for (i = 0; i < n/2*2; i+=2) 17 | { 18 | int t0 = test[i], t1 = test[i+1]; 19 | int v0 = verify[i], v1 = verify[i+1]; 20 | if (t0 != v0) return i+1; 21 | if (t1 != v1) return i+2; 22 | } 23 | if (n % 2 != 0 && test[n-1] != verify[n-1]) 24 | return n; 25 | return 0; 26 | } 27 | 28 | static int verifyDouble(int n, const volatile double* test, const double* verify) 29 | { 30 | int i; 31 | // Unrolled for faster verification 32 | for (i = 0; i < n/2*2; i+=2) 33 | { 34 | double t0 = test[i], t1 = test[i+1]; 35 | double v0 = verify[i], v1 = verify[i+1]; 36 | int eq1 = t0 == v0, eq2 = t1 == v1; 37 | if (!(eq1 & eq2)) return i+1+eq1; 38 | } 39 | if (n % 2 != 0 && test[n-1] != verify[n-1]) 40 | return n; 41 | return 0; 42 | } 43 | 44 | static void __attribute__((noinline)) barrier(int ncores) 45 | { 46 | static volatile int sense; 47 | static volatile int count; 48 | static __thread int threadsense; 49 | 50 | __sync_synchronize(); 51 | 52 | threadsense = !threadsense; 53 | if (__sync_fetch_and_add(&count, 1) == ncores-1) 54 | { 55 | count = 0; 56 | sense = threadsense; 57 | } 58 | else while(sense != threadsense) 59 | ; 60 | 61 | __sync_synchronize(); 62 | } 63 | 64 | static uint64_t lfsr(uint64_t x) 65 | { 66 | uint64_t bit = (x ^ (x >> 1)) & 1; 67 | return (x >> 1) | (bit << 62); 68 | } 69 | 70 | static uintptr_t insn_len(uintptr_t pc) 71 | { 72 | return (*(unsigned short*)pc & 3) ? 4 : 2; 73 | } 74 | 75 | #ifdef __riscv 76 | #include "encoding.h" 77 | #endif 78 | 79 | #define stringify_1(s) #s 80 | #define stringify(s) stringify_1(s) 81 | #define stats(code, iter) do { \ 82 | unsigned long _c = -read_csr(mcycle), _i = -read_csr(minstret); \ 83 | code; \ 84 | _c += read_csr(mcycle), _i += read_csr(minstret); \ 85 | if (cid == 0) \ 86 | printf("\n%s: %ld cycles, %ld.%ld cycles/iter, %ld.%ld CPI\n", \ 87 | stringify(code), _c, _c/iter, 10*_c/iter%10, _c/_i, 10*_c/_i%10); \ 88 | } while(0) 89 | 90 | #endif //__UTIL_H 91 | -------------------------------------------------------------------------------- /vor_vv/util.h: -------------------------------------------------------------------------------- 1 | // See LICENSE for license details. 2 | 3 | #ifndef __UTIL_H 4 | #define __UTIL_H 5 | 6 | extern void setStats(int enable); 7 | 8 | #include 9 | 10 | #define static_assert(cond) switch(0) { case 0: case !!(long)(cond): ; } 11 | 12 | static int verify(int n, const volatile int* test, const int* verify) 13 | { 14 | int i; 15 | // Unrolled for faster verification 16 | for (i = 0; i < n/2*2; i+=2) 17 | { 18 | int t0 = test[i], t1 = test[i+1]; 19 | int v0 = verify[i], v1 = verify[i+1]; 20 | if (t0 != v0) return i+1; 21 | if (t1 != v1) return i+2; 22 | } 23 | if (n % 2 != 0 && test[n-1] != verify[n-1]) 24 | return n; 25 | return 0; 26 | } 27 | 28 | static int verifyDouble(int n, const volatile double* test, const double* verify) 29 | { 30 | int i; 31 | // Unrolled for faster verification 32 | for (i = 0; i < n/2*2; i+=2) 33 | { 34 | double t0 = test[i], t1 = test[i+1]; 35 | double v0 = verify[i], v1 = verify[i+1]; 36 | int eq1 = t0 == v0, eq2 = t1 == v1; 37 | if (!(eq1 & eq2)) return i+1+eq1; 38 | } 39 | if (n % 2 != 0 && test[n-1] != verify[n-1]) 40 | return n; 41 | return 0; 42 | } 43 | 44 | static void __attribute__((noinline)) barrier(int ncores) 45 | { 46 | static volatile int sense; 47 | static volatile int count; 48 | static __thread int threadsense; 49 | 50 | __sync_synchronize(); 51 | 52 | threadsense = !threadsense; 53 | if (__sync_fetch_and_add(&count, 1) == ncores-1) 54 | { 55 | count = 0; 56 | sense = threadsense; 57 | } 58 | else while(sense != threadsense) 59 | ; 60 | 61 | __sync_synchronize(); 62 | } 63 | 64 | static uint64_t lfsr(uint64_t x) 65 | { 66 | uint64_t bit = (x ^ (x >> 1)) & 1; 67 | return (x >> 1) | (bit << 62); 68 | } 69 | 70 | static uintptr_t insn_len(uintptr_t pc) 71 | { 72 | return (*(unsigned short*)pc & 3) ? 4 : 2; 73 | } 74 | 75 | #ifdef __riscv 76 | #include "encoding.h" 77 | #endif 78 | 79 | #define stringify_1(s) #s 80 | #define stringify(s) stringify_1(s) 81 | #define stats(code, iter) do { \ 82 | unsigned long _c = -read_csr(mcycle), _i = -read_csr(minstret); \ 83 | code; \ 84 | _c += read_csr(mcycle), _i += read_csr(minstret); \ 85 | if (cid == 0) \ 86 | printf("\n%s: %ld cycles, %ld.%ld cycles/iter, %ld.%ld CPI\n", \ 87 | stringify(code), _c, _c/iter, 10*_c/iter%10, _c/_i, 10*_c/_i%10); \ 88 | } while(0) 89 | 90 | #endif //__UTIL_H 91 | -------------------------------------------------------------------------------- /vrsub_vx/util.h: -------------------------------------------------------------------------------- 1 | // See LICENSE for license details. 2 | 3 | #ifndef __UTIL_H 4 | #define __UTIL_H 5 | 6 | extern void setStats(int enable); 7 | 8 | #include 9 | 10 | #define static_assert(cond) switch(0) { case 0: case !!(long)(cond): ; } 11 | 12 | static int verify(int n, const volatile int* test, const int* verify) 13 | { 14 | int i; 15 | // Unrolled for faster verification 16 | for (i = 0; i < n/2*2; i+=2) 17 | { 18 | int t0 = test[i], t1 = test[i+1]; 19 | int v0 = verify[i], v1 = verify[i+1]; 20 | if (t0 != v0) return i+1; 21 | if (t1 != v1) return i+2; 22 | } 23 | if (n % 2 != 0 && test[n-1] != verify[n-1]) 24 | return n; 25 | return 0; 26 | } 27 | 28 | static int verifyDouble(int n, const volatile double* test, const double* verify) 29 | { 30 | int i; 31 | // Unrolled for faster verification 32 | for (i = 0; i < n/2*2; i+=2) 33 | { 34 | double t0 = test[i], t1 = test[i+1]; 35 | double v0 = verify[i], v1 = verify[i+1]; 36 | int eq1 = t0 == v0, eq2 = t1 == v1; 37 | if (!(eq1 & eq2)) return i+1+eq1; 38 | } 39 | if (n % 2 != 0 && test[n-1] != verify[n-1]) 40 | return n; 41 | return 0; 42 | } 43 | 44 | static void __attribute__((noinline)) barrier(int ncores) 45 | { 46 | static volatile int sense; 47 | static volatile int count; 48 | static __thread int threadsense; 49 | 50 | __sync_synchronize(); 51 | 52 | threadsense = !threadsense; 53 | if (__sync_fetch_and_add(&count, 1) == ncores-1) 54 | { 55 | count = 0; 56 | sense = threadsense; 57 | } 58 | else while(sense != threadsense) 59 | ; 60 | 61 | __sync_synchronize(); 62 | } 63 | 64 | static uint64_t lfsr(uint64_t x) 65 | { 66 | uint64_t bit = (x ^ (x >> 1)) & 1; 67 | return (x >> 1) | (bit << 62); 68 | } 69 | 70 | static uintptr_t insn_len(uintptr_t pc) 71 | { 72 | return (*(unsigned short*)pc & 3) ? 4 : 2; 73 | } 74 | 75 | #ifdef __riscv 76 | #include "encoding.h" 77 | #endif 78 | 79 | #define stringify_1(s) #s 80 | #define stringify(s) stringify_1(s) 81 | #define stats(code, iter) do { \ 82 | unsigned long _c = -read_csr(mcycle), _i = -read_csr(minstret); \ 83 | code; \ 84 | _c += read_csr(mcycle), _i += read_csr(minstret); \ 85 | if (cid == 0) \ 86 | printf("\n%s: %ld cycles, %ld.%ld cycles/iter, %ld.%ld CPI\n", \ 87 | stringify(code), _c, _c/iter, 10*_c/iter%10, _c/_i, 10*_c/_i%10); \ 88 | } while(0) 89 | 90 | #endif //__UTIL_H 91 | -------------------------------------------------------------------------------- /vsub_vv/util.h: -------------------------------------------------------------------------------- 1 | // See LICENSE for license details. 2 | 3 | #ifndef __UTIL_H 4 | #define __UTIL_H 5 | 6 | extern void setStats(int enable); 7 | 8 | #include 9 | 10 | #define static_assert(cond) switch(0) { case 0: case !!(long)(cond): ; } 11 | 12 | static int verify(int n, const volatile int* test, const int* verify) 13 | { 14 | int i; 15 | // Unrolled for faster verification 16 | for (i = 0; i < n/2*2; i+=2) 17 | { 18 | int t0 = test[i], t1 = test[i+1]; 19 | int v0 = verify[i], v1 = verify[i+1]; 20 | if (t0 != v0) return i+1; 21 | if (t1 != v1) return i+2; 22 | } 23 | if (n % 2 != 0 && test[n-1] != verify[n-1]) 24 | return n; 25 | return 0; 26 | } 27 | 28 | static int verifyDouble(int n, const volatile double* test, const double* verify) 29 | { 30 | int i; 31 | // Unrolled for faster verification 32 | for (i = 0; i < n/2*2; i+=2) 33 | { 34 | double t0 = test[i], t1 = test[i+1]; 35 | double v0 = verify[i], v1 = verify[i+1]; 36 | int eq1 = t0 == v0, eq2 = t1 == v1; 37 | if (!(eq1 & eq2)) return i+1+eq1; 38 | } 39 | if (n % 2 != 0 && test[n-1] != verify[n-1]) 40 | return n; 41 | return 0; 42 | } 43 | 44 | static void __attribute__((noinline)) barrier(int ncores) 45 | { 46 | static volatile int sense; 47 | static volatile int count; 48 | static __thread int threadsense; 49 | 50 | __sync_synchronize(); 51 | 52 | threadsense = !threadsense; 53 | if (__sync_fetch_and_add(&count, 1) == ncores-1) 54 | { 55 | count = 0; 56 | sense = threadsense; 57 | } 58 | else while(sense != threadsense) 59 | ; 60 | 61 | __sync_synchronize(); 62 | } 63 | 64 | static uint64_t lfsr(uint64_t x) 65 | { 66 | uint64_t bit = (x ^ (x >> 1)) & 1; 67 | return (x >> 1) | (bit << 62); 68 | } 69 | 70 | static uintptr_t insn_len(uintptr_t pc) 71 | { 72 | return (*(unsigned short*)pc & 3) ? 4 : 2; 73 | } 74 | 75 | #ifdef __riscv 76 | #include "encoding.h" 77 | #endif 78 | 79 | #define stringify_1(s) #s 80 | #define stringify(s) stringify_1(s) 81 | #define stats(code, iter) do { \ 82 | unsigned long _c = -read_csr(mcycle), _i = -read_csr(minstret); \ 83 | code; \ 84 | _c += read_csr(mcycle), _i += read_csr(minstret); \ 85 | if (cid == 0) \ 86 | printf("\n%s: %ld cycles, %ld.%ld cycles/iter, %ld.%ld CPI\n", \ 87 | stringify(code), _c, _c/iter, 10*_c/iter%10, _c/_i, 10*_c/_i%10); \ 88 | } while(0) 89 | 90 | #endif //__UTIL_H 91 | -------------------------------------------------------------------------------- /vsub_vx/util.h: -------------------------------------------------------------------------------- 1 | // See LICENSE for license details. 2 | 3 | #ifndef __UTIL_H 4 | #define __UTIL_H 5 | 6 | extern void setStats(int enable); 7 | 8 | #include 9 | 10 | #define static_assert(cond) switch(0) { case 0: case !!(long)(cond): ; } 11 | 12 | static int verify(int n, const volatile int* test, const int* verify) 13 | { 14 | int i; 15 | // Unrolled for faster verification 16 | for (i = 0; i < n/2*2; i+=2) 17 | { 18 | int t0 = test[i], t1 = test[i+1]; 19 | int v0 = verify[i], v1 = verify[i+1]; 20 | if (t0 != v0) return i+1; 21 | if (t1 != v1) return i+2; 22 | } 23 | if (n % 2 != 0 && test[n-1] != verify[n-1]) 24 | return n; 25 | return 0; 26 | } 27 | 28 | static int verifyDouble(int n, const volatile double* test, const double* verify) 29 | { 30 | int i; 31 | // Unrolled for faster verification 32 | for (i = 0; i < n/2*2; i+=2) 33 | { 34 | double t0 = test[i], t1 = test[i+1]; 35 | double v0 = verify[i], v1 = verify[i+1]; 36 | int eq1 = t0 == v0, eq2 = t1 == v1; 37 | if (!(eq1 & eq2)) return i+1+eq1; 38 | } 39 | if (n % 2 != 0 && test[n-1] != verify[n-1]) 40 | return n; 41 | return 0; 42 | } 43 | 44 | static void __attribute__((noinline)) barrier(int ncores) 45 | { 46 | static volatile int sense; 47 | static volatile int count; 48 | static __thread int threadsense; 49 | 50 | __sync_synchronize(); 51 | 52 | threadsense = !threadsense; 53 | if (__sync_fetch_and_add(&count, 1) == ncores-1) 54 | { 55 | count = 0; 56 | sense = threadsense; 57 | } 58 | else while(sense != threadsense) 59 | ; 60 | 61 | __sync_synchronize(); 62 | } 63 | 64 | static uint64_t lfsr(uint64_t x) 65 | { 66 | uint64_t bit = (x ^ (x >> 1)) & 1; 67 | return (x >> 1) | (bit << 62); 68 | } 69 | 70 | static uintptr_t insn_len(uintptr_t pc) 71 | { 72 | return (*(unsigned short*)pc & 3) ? 4 : 2; 73 | } 74 | 75 | #ifdef __riscv 76 | #include "encoding.h" 77 | #endif 78 | 79 | #define stringify_1(s) #s 80 | #define stringify(s) stringify_1(s) 81 | #define stats(code, iter) do { \ 82 | unsigned long _c = -read_csr(mcycle), _i = -read_csr(minstret); \ 83 | code; \ 84 | _c += read_csr(mcycle), _i += read_csr(minstret); \ 85 | if (cid == 0) \ 86 | printf("\n%s: %ld cycles, %ld.%ld cycles/iter, %ld.%ld CPI\n", \ 87 | stringify(code), _c, _c/iter, 10*_c/iter%10, _c/_i, 10*_c/_i%10); \ 88 | } while(0) 89 | 90 | #endif //__UTIL_H 91 | -------------------------------------------------------------------------------- /vxor_vv/util.h: -------------------------------------------------------------------------------- 1 | // See LICENSE for license details. 2 | 3 | #ifndef __UTIL_H 4 | #define __UTIL_H 5 | 6 | extern void setStats(int enable); 7 | 8 | #include 9 | 10 | #define static_assert(cond) switch(0) { case 0: case !!(long)(cond): ; } 11 | 12 | static int verify(int n, const volatile int* test, const int* verify) 13 | { 14 | int i; 15 | // Unrolled for faster verification 16 | for (i = 0; i < n/2*2; i+=2) 17 | { 18 | int t0 = test[i], t1 = test[i+1]; 19 | int v0 = verify[i], v1 = verify[i+1]; 20 | if (t0 != v0) return i+1; 21 | if (t1 != v1) return i+2; 22 | } 23 | if (n % 2 != 0 && test[n-1] != verify[n-1]) 24 | return n; 25 | return 0; 26 | } 27 | 28 | static int verifyDouble(int n, const volatile double* test, const double* verify) 29 | { 30 | int i; 31 | // Unrolled for faster verification 32 | for (i = 0; i < n/2*2; i+=2) 33 | { 34 | double t0 = test[i], t1 = test[i+1]; 35 | double v0 = verify[i], v1 = verify[i+1]; 36 | int eq1 = t0 == v0, eq2 = t1 == v1; 37 | if (!(eq1 & eq2)) return i+1+eq1; 38 | } 39 | if (n % 2 != 0 && test[n-1] != verify[n-1]) 40 | return n; 41 | return 0; 42 | } 43 | 44 | static void __attribute__((noinline)) barrier(int ncores) 45 | { 46 | static volatile int sense; 47 | static volatile int count; 48 | static __thread int threadsense; 49 | 50 | __sync_synchronize(); 51 | 52 | threadsense = !threadsense; 53 | if (__sync_fetch_and_add(&count, 1) == ncores-1) 54 | { 55 | count = 0; 56 | sense = threadsense; 57 | } 58 | else while(sense != threadsense) 59 | ; 60 | 61 | __sync_synchronize(); 62 | } 63 | 64 | static uint64_t lfsr(uint64_t x) 65 | { 66 | uint64_t bit = (x ^ (x >> 1)) & 1; 67 | return (x >> 1) | (bit << 62); 68 | } 69 | 70 | static uintptr_t insn_len(uintptr_t pc) 71 | { 72 | return (*(unsigned short*)pc & 3) ? 4 : 2; 73 | } 74 | 75 | #ifdef __riscv 76 | #include "encoding.h" 77 | #endif 78 | 79 | #define stringify_1(s) #s 80 | #define stringify(s) stringify_1(s) 81 | #define stats(code, iter) do { \ 82 | unsigned long _c = -read_csr(mcycle), _i = -read_csr(minstret); \ 83 | code; \ 84 | _c += read_csr(mcycle), _i += read_csr(minstret); \ 85 | if (cid == 0) \ 86 | printf("\n%s: %ld cycles, %ld.%ld cycles/iter, %ld.%ld CPI\n", \ 87 | stringify(code), _c, _c/iter, 10*_c/iter%10, _c/_i, 10*_c/_i%10); \ 88 | } while(0) 89 | 90 | #endif //__UTIL_H 91 | -------------------------------------------------------------------------------- /memcpy_ve16/util.h: -------------------------------------------------------------------------------- 1 | // See LICENSE for license details. 2 | 3 | #ifndef __UTIL_H 4 | #define __UTIL_H 5 | 6 | extern void setStats(int enable); 7 | 8 | #include 9 | 10 | #define static_assert(cond) switch(0) { case 0: case !!(long)(cond): ; } 11 | 12 | static int verify(int n, const volatile int* test, const int* verify) 13 | { 14 | int i; 15 | // Unrolled for faster verification 16 | for (i = 0; i < n/2*2; i+=2) 17 | { 18 | int t0 = test[i], t1 = test[i+1]; 19 | int v0 = verify[i], v1 = verify[i+1]; 20 | if (t0 != v0) return i+1; 21 | if (t1 != v1) return i+2; 22 | } 23 | if (n % 2 != 0 && test[n-1] != verify[n-1]) 24 | return n; 25 | return 0; 26 | } 27 | 28 | static int verifyDouble(int n, const volatile double* test, const double* verify) 29 | { 30 | int i; 31 | // Unrolled for faster verification 32 | for (i = 0; i < n/2*2; i+=2) 33 | { 34 | double t0 = test[i], t1 = test[i+1]; 35 | double v0 = verify[i], v1 = verify[i+1]; 36 | int eq1 = t0 == v0, eq2 = t1 == v1; 37 | if (!(eq1 & eq2)) return i+1+eq1; 38 | } 39 | if (n % 2 != 0 && test[n-1] != verify[n-1]) 40 | return n; 41 | return 0; 42 | } 43 | 44 | static void __attribute__((noinline)) barrier(int ncores) 45 | { 46 | static volatile int sense; 47 | static volatile int count; 48 | static __thread int threadsense; 49 | 50 | __sync_synchronize(); 51 | 52 | threadsense = !threadsense; 53 | if (__sync_fetch_and_add(&count, 1) == ncores-1) 54 | { 55 | count = 0; 56 | sense = threadsense; 57 | } 58 | else while(sense != threadsense) 59 | ; 60 | 61 | __sync_synchronize(); 62 | } 63 | 64 | static uint64_t lfsr(uint64_t x) 65 | { 66 | uint64_t bit = (x ^ (x >> 1)) & 1; 67 | return (x >> 1) | (bit << 62); 68 | } 69 | 70 | static uintptr_t insn_len(uintptr_t pc) 71 | { 72 | return (*(unsigned short*)pc & 3) ? 4 : 2; 73 | } 74 | 75 | #ifdef __riscv 76 | #include "encoding.h" 77 | #endif 78 | 79 | #define stringify_1(s) #s 80 | #define stringify(s) stringify_1(s) 81 | #define stats(code, iter) do { \ 82 | unsigned long _c = -read_csr(mcycle), _i = -read_csr(minstret); \ 83 | code; \ 84 | _c += read_csr(mcycle), _i += read_csr(minstret); \ 85 | if (cid == 0) \ 86 | printf("\n%s: %ld cycles, %ld.%ld cycles/iter, %ld.%ld CPI\n", \ 87 | stringify(code), _c, _c/iter, 10*_c/iter%10, _c/_i, 10*_c/_i%10); \ 88 | } while(0) 89 | 90 | #endif //__UTIL_H 91 | -------------------------------------------------------------------------------- /memcpy_ve32/util.h: -------------------------------------------------------------------------------- 1 | // See LICENSE for license details. 2 | 3 | #ifndef __UTIL_H 4 | #define __UTIL_H 5 | 6 | extern void setStats(int enable); 7 | 8 | #include 9 | 10 | #define static_assert(cond) switch(0) { case 0: case !!(long)(cond): ; } 11 | 12 | static int verify(int n, const volatile int* test, const int* verify) 13 | { 14 | int i; 15 | // Unrolled for faster verification 16 | for (i = 0; i < n/2*2; i+=2) 17 | { 18 | int t0 = test[i], t1 = test[i+1]; 19 | int v0 = verify[i], v1 = verify[i+1]; 20 | if (t0 != v0) return i+1; 21 | if (t1 != v1) return i+2; 22 | } 23 | if (n % 2 != 0 && test[n-1] != verify[n-1]) 24 | return n; 25 | return 0; 26 | } 27 | 28 | static int verifyDouble(int n, const volatile double* test, const double* verify) 29 | { 30 | int i; 31 | // Unrolled for faster verification 32 | for (i = 0; i < n/2*2; i+=2) 33 | { 34 | double t0 = test[i], t1 = test[i+1]; 35 | double v0 = verify[i], v1 = verify[i+1]; 36 | int eq1 = t0 == v0, eq2 = t1 == v1; 37 | if (!(eq1 & eq2)) return i+1+eq1; 38 | } 39 | if (n % 2 != 0 && test[n-1] != verify[n-1]) 40 | return n; 41 | return 0; 42 | } 43 | 44 | static void __attribute__((noinline)) barrier(int ncores) 45 | { 46 | static volatile int sense; 47 | static volatile int count; 48 | static __thread int threadsense; 49 | 50 | __sync_synchronize(); 51 | 52 | threadsense = !threadsense; 53 | if (__sync_fetch_and_add(&count, 1) == ncores-1) 54 | { 55 | count = 0; 56 | sense = threadsense; 57 | } 58 | else while(sense != threadsense) 59 | ; 60 | 61 | __sync_synchronize(); 62 | } 63 | 64 | static uint64_t lfsr(uint64_t x) 65 | { 66 | uint64_t bit = (x ^ (x >> 1)) & 1; 67 | return (x >> 1) | (bit << 62); 68 | } 69 | 70 | static uintptr_t insn_len(uintptr_t pc) 71 | { 72 | return (*(unsigned short*)pc & 3) ? 4 : 2; 73 | } 74 | 75 | #ifdef __riscv 76 | #include "encoding.h" 77 | #endif 78 | 79 | #define stringify_1(s) #s 80 | #define stringify(s) stringify_1(s) 81 | #define stats(code, iter) do { \ 82 | unsigned long _c = -read_csr(mcycle), _i = -read_csr(minstret); \ 83 | code; \ 84 | _c += read_csr(mcycle), _i += read_csr(minstret); \ 85 | if (cid == 0) \ 86 | printf("\n%s: %ld cycles, %ld.%ld cycles/iter, %ld.%ld CPI\n", \ 87 | stringify(code), _c, _c/iter, 10*_c/iter%10, _c/_i, 10*_c/_i%10); \ 88 | } while(0) 89 | 90 | #endif //__UTIL_H 91 | -------------------------------------------------------------------------------- /memcpy_ve64/util.h: -------------------------------------------------------------------------------- 1 | // See LICENSE for license details. 2 | 3 | #ifndef __UTIL_H 4 | #define __UTIL_H 5 | 6 | extern void setStats(int enable); 7 | 8 | #include 9 | 10 | #define static_assert(cond) switch(0) { case 0: case !!(long)(cond): ; } 11 | 12 | static int verify(int n, const volatile int* test, const int* verify) 13 | { 14 | int i; 15 | // Unrolled for faster verification 16 | for (i = 0; i < n/2*2; i+=2) 17 | { 18 | int t0 = test[i], t1 = test[i+1]; 19 | int v0 = verify[i], v1 = verify[i+1]; 20 | if (t0 != v0) return i+1; 21 | if (t1 != v1) return i+2; 22 | } 23 | if (n % 2 != 0 && test[n-1] != verify[n-1]) 24 | return n; 25 | return 0; 26 | } 27 | 28 | static int verifyDouble(int n, const volatile double* test, const double* verify) 29 | { 30 | int i; 31 | // Unrolled for faster verification 32 | for (i = 0; i < n/2*2; i+=2) 33 | { 34 | double t0 = test[i], t1 = test[i+1]; 35 | double v0 = verify[i], v1 = verify[i+1]; 36 | int eq1 = t0 == v0, eq2 = t1 == v1; 37 | if (!(eq1 & eq2)) return i+1+eq1; 38 | } 39 | if (n % 2 != 0 && test[n-1] != verify[n-1]) 40 | return n; 41 | return 0; 42 | } 43 | 44 | static void __attribute__((noinline)) barrier(int ncores) 45 | { 46 | static volatile int sense; 47 | static volatile int count; 48 | static __thread int threadsense; 49 | 50 | __sync_synchronize(); 51 | 52 | threadsense = !threadsense; 53 | if (__sync_fetch_and_add(&count, 1) == ncores-1) 54 | { 55 | count = 0; 56 | sense = threadsense; 57 | } 58 | else while(sense != threadsense) 59 | ; 60 | 61 | __sync_synchronize(); 62 | } 63 | 64 | static uint64_t lfsr(uint64_t x) 65 | { 66 | uint64_t bit = (x ^ (x >> 1)) & 1; 67 | return (x >> 1) | (bit << 62); 68 | } 69 | 70 | static uintptr_t insn_len(uintptr_t pc) 71 | { 72 | return (*(unsigned short*)pc & 3) ? 4 : 2; 73 | } 74 | 75 | #ifdef __riscv 76 | #include "encoding.h" 77 | #endif 78 | 79 | #define stringify_1(s) #s 80 | #define stringify(s) stringify_1(s) 81 | #define stats(code, iter) do { \ 82 | unsigned long _c = -read_csr(mcycle), _i = -read_csr(minstret); \ 83 | code; \ 84 | _c += read_csr(mcycle), _i += read_csr(minstret); \ 85 | if (cid == 0) \ 86 | printf("\n%s: %ld cycles, %ld.%ld cycles/iter, %ld.%ld CPI\n", \ 87 | stringify(code), _c, _c/iter, 10*_c/iter%10, _c/_i, 10*_c/_i%10); \ 88 | } while(0) 89 | 90 | #endif //__UTIL_H 91 | -------------------------------------------------------------------------------- /memcpy_ve8/util.h: -------------------------------------------------------------------------------- 1 | // See LICENSE for license details. 2 | 3 | #ifndef __UTIL_H 4 | #define __UTIL_H 5 | 6 | extern void setStats(int enable); 7 | 8 | #include 9 | 10 | #define static_assert(cond) switch(0) { case 0: case !!(long)(cond): ; } 11 | 12 | static int verify(int n, const volatile int* test, const int* verify) 13 | { 14 | int i; 15 | // Unrolled for faster verification 16 | for (i = 0; i < n/2*2; i+=2) 17 | { 18 | int t0 = test[i], t1 = test[i+1]; 19 | int v0 = verify[i], v1 = verify[i+1]; 20 | if (t0 != v0) return i+1; 21 | if (t1 != v1) return i+2; 22 | } 23 | if (n % 2 != 0 && test[n-1] != verify[n-1]) 24 | return n; 25 | return 0; 26 | } 27 | 28 | static int verifyDouble(int n, const volatile double* test, const double* verify) 29 | { 30 | int i; 31 | // Unrolled for faster verification 32 | for (i = 0; i < n/2*2; i+=2) 33 | { 34 | double t0 = test[i], t1 = test[i+1]; 35 | double v0 = verify[i], v1 = verify[i+1]; 36 | int eq1 = t0 == v0, eq2 = t1 == v1; 37 | if (!(eq1 & eq2)) return i+1+eq1; 38 | } 39 | if (n % 2 != 0 && test[n-1] != verify[n-1]) 40 | return n; 41 | return 0; 42 | } 43 | 44 | static void __attribute__((noinline)) barrier(int ncores) 45 | { 46 | static volatile int sense; 47 | static volatile int count; 48 | static __thread int threadsense; 49 | 50 | __sync_synchronize(); 51 | 52 | threadsense = !threadsense; 53 | if (__sync_fetch_and_add(&count, 1) == ncores-1) 54 | { 55 | count = 0; 56 | sense = threadsense; 57 | } 58 | else while(sense != threadsense) 59 | ; 60 | 61 | __sync_synchronize(); 62 | } 63 | 64 | static uint64_t lfsr(uint64_t x) 65 | { 66 | uint64_t bit = (x ^ (x >> 1)) & 1; 67 | return (x >> 1) | (bit << 62); 68 | } 69 | 70 | static uintptr_t insn_len(uintptr_t pc) 71 | { 72 | return (*(unsigned short*)pc & 3) ? 4 : 2; 73 | } 74 | 75 | #ifdef __riscv 76 | #include "encoding.h" 77 | #endif 78 | 79 | #define stringify_1(s) #s 80 | #define stringify(s) stringify_1(s) 81 | #define stats(code, iter) do { \ 82 | unsigned long _c = -read_csr(mcycle), _i = -read_csr(minstret); \ 83 | code; \ 84 | _c += read_csr(mcycle), _i += read_csr(minstret); \ 85 | if (cid == 0) \ 86 | printf("\n%s: %ld cycles, %ld.%ld cycles/iter, %ld.%ld CPI\n", \ 87 | stringify(code), _c, _c/iter, 10*_c/iter%10, _c/_i, 10*_c/_i%10); \ 88 | } while(0) 89 | 90 | #endif //__UTIL_H 91 | -------------------------------------------------------------------------------- /vadd_vx/test.S: -------------------------------------------------------------------------------- 1 | .text 2 | .global add_data_vec_8 3 | # void add_data_vec_8(int8_t *dest_data, int8_t src1, int8_t *src2, int data_num); 4 | # a0=dest, a1=src1, a2=src2, a3=n 5 | # 6 | add_data_vec_8: 7 | // mv a3, a0 # Copy destination 8 | .loop_8: 9 | vsetvli t0, a3, e8,m1 # Vectors of 8b 10 | vle8.v v1, (a2) # Load bytes 11 | add a2, a2, t0 # Bump pointer 12 | sub a3, a3, t0 # Decrement count 13 | vadd.vx v2, v1, a1 # Vector Add 14 | vse8.v v2, (a0) # Store bytes 15 | add a0, a0, t0 # Bump pointer 16 | bnez a3, .loop_8 # Any more? 17 | ret # Return 18 | 19 | .text 20 | .global add_data_vec_16 21 | # void add_data_vec_16(int16_t *dest_data, int16_t src1, int16_t *src2, int data_num); 22 | # a0=dest, a1=src1, a2=src2, a3=n 23 | # 24 | add_data_vec_16: 25 | .loop_16: 26 | vsetvli t0, a3, e16,m1 # Vectors of 8b 27 | slli t1, t0, 1 # shift 1-bit 28 | vle16.v v1, (a2) # Load bytes 29 | add a2, a2, t1 # Bump pointer 30 | sub a3, a3, t0 # Decrement count 31 | vadd.vx v2, v1, a1 # Vector Add 32 | vse16.v v2, (a0) # Store bytes 33 | add a0, a0, t1 # Bump pointer 34 | bnez a3, .loop_16 # Any more? 35 | ret # Return 36 | 37 | 38 | .text 39 | .global add_data_vec_32 40 | # void add_data_vec_32(int32_t *dest_data, int32_t src1, int32_t *src2, int data_num); 41 | # a0=dest, a1=src1, a2=src2, a3=n 42 | # 43 | add_data_vec_32: 44 | .loop_32: 45 | vsetvli t0, a3, e32,m1 # Vectors of 8b 46 | slli t1, t0, 2 # shift 2-bit 47 | vle32.v v1, (a2) # Load bytes 48 | add a2, a2, t1 # Bump pointer 49 | sub a3, a3, t0 # Decrement count 50 | vadd.vx v2, v1, a1 # Vector Add 51 | vse32.v v2, (a0) # Store bytes 52 | add a0, a0, t1 # Bump pointer 53 | bnez a3, .loop_32 # Any more? 54 | ret # Return 55 | 56 | 57 | .text 58 | .global add_data_vec_64 59 | # void add_data_vec_64(int64_t *dest_data, int64_t src1, int64_t *src2, int data_num); 60 | # a0=dest, a1=src1, a2=src2, a3=n 61 | # 62 | add_data_vec_64: 63 | .loop_64: 64 | vsetvli t0, a3, e64,m1 # Vectors of 8b 65 | slli t1, t0, 3 # shift 3-bit 66 | vle64.v v1, (a2) # Load bytes 67 | add a2, a2, t1 # Bump pointer 68 | sub a3, a3, t0 # Decrement count 69 | vadd.vx v2, v1, a1 # Vector Add 70 | vse64.v v2, (a0) # Store bytes 71 | add a0, a0, t1 # Bump pointer 72 | bnez a3, .loop_64 # Any more? 73 | ret # Return 74 | -------------------------------------------------------------------------------- /vmax_vx/test.S: -------------------------------------------------------------------------------- 1 | .text 2 | .global max_data_vec_8 3 | # void max_data_vec_8(int8_t *dest_data, int8_t src1, int8_t *src2, int data_num); 4 | # a0=dest, a1=src1, a2=src2, a3=n 5 | # 6 | max_data_vec_8: 7 | // mv a3, a0 # Copy destination 8 | .loop_8: 9 | vsetvli t0, a3, e8,m1 # Vectors of 8b 10 | vle8.v v1, (a2) # Load bytes 11 | add a2, a2, t0 # Bump pointer 12 | sub a3, a3, t0 # Decrement count 13 | vmax.vx v2, v1, a1 # Vector Sub 14 | vse8.v v2, (a0) # Store bytes 15 | add a0, a0, t0 # Bump pointer 16 | bnez a3, .loop_8 # Any more? 17 | ret # Return 18 | 19 | .text 20 | .global max_data_vec_16 21 | # void max_data_vec_16(int16_t *dest_data, int16_t src1, int16_t *src2, int data_num); 22 | # a0=dest, a1=src1, a2=src2, a3=n 23 | # 24 | max_data_vec_16: 25 | .loop_16: 26 | vsetvli t0, a3, e16,m1 # Vectors of 8b 27 | slli t1, t0, 1 # shift 1-bit 28 | vle16.v v1, (a2) # Load bytes 29 | add a2, a2, t1 # Bump pointer 30 | sub a3, a3, t0 # Decrement count 31 | vmax.vx v2, v1, a1 # Vector Sub 32 | vse16.v v2, (a0) # Store bytes 33 | add a0, a0, t1 # Bump pointer 34 | bnez a3, .loop_16 # Any more? 35 | ret # Return 36 | 37 | 38 | .text 39 | .global max_data_vec_32 40 | # void max_data_vec_32(int32_t *dest_data, int32_t src1, int32_t *src2, int data_num); 41 | # a0=dest, a1=src1, a2=src2, a3=n 42 | # 43 | max_data_vec_32: 44 | .loop_32: 45 | vsetvli t0, a3, e32,m1 # Vectors of 8b 46 | slli t1, t0, 2 # shift 2-bit 47 | vle32.v v1, (a2) # Load bytes 48 | add a2, a2, t1 # Bump pointer 49 | sub a3, a3, t0 # Decrement count 50 | vmax.vx v2, v1, a1 # Vector Sub 51 | vse32.v v2, (a0) # Store bytes 52 | add a0, a0, t1 # Bump pointer 53 | bnez a3, .loop_32 # Any more? 54 | ret # Return 55 | 56 | 57 | .text 58 | .global max_data_vec_64 59 | # void max_data_vec_64(int64_t *dest_data, int64_t src1, int64_t *src2, int data_num); 60 | # a0=dest, a1=src1, a2=src2, a3=n 61 | # 62 | max_data_vec_64: 63 | .loop_64: 64 | vsetvli t0, a3, e64,m1 # Vectors of 8b 65 | slli t1, t0, 3 # shift 3-bit 66 | vle64.v v1, (a2) # Load bytes 67 | add a2, a2, t1 # Bump pointer 68 | sub a3, a3, t0 # Decrement count 69 | vmax.vx v2, v1, a1 # Vector Sub 70 | vse64.v v2, (a0) # Store bytes 71 | add a0, a0, t1 # Bump pointer 72 | bnez a3, .loop_64 # Any more? 73 | ret # Return 74 | -------------------------------------------------------------------------------- /vmin_vx/test.S: -------------------------------------------------------------------------------- 1 | .text 2 | .global min_data_vec_8 3 | # void min_data_vec_8(int8_t *dest_data, int8_t src1, int8_t *src2, int data_num); 4 | # a0=dest, a1=src1, a2=src2, a3=n 5 | # 6 | min_data_vec_8: 7 | // mv a3, a0 # Copy destination 8 | .loop_8: 9 | vsetvli t0, a3, e8,m1 # Vectors of 8b 10 | vle8.v v1, (a2) # Load bytes 11 | add a2, a2, t0 # Bump pointer 12 | sub a3, a3, t0 # Decrement count 13 | vmin.vx v2, v1, a1 # Vector Sub 14 | vse8.v v2, (a0) # Store bytes 15 | add a0, a0, t0 # Bump pointer 16 | bnez a3, .loop_8 # Any more? 17 | ret # Return 18 | 19 | .text 20 | .global min_data_vec_16 21 | # void min_data_vec_16(int16_t *dest_data, int16_t src1, int16_t *src2, int data_num); 22 | # a0=dest, a1=src1, a2=src2, a3=n 23 | # 24 | min_data_vec_16: 25 | .loop_16: 26 | vsetvli t0, a3, e16,m1 # Vectors of 8b 27 | slli t1, t0, 1 # shift 1-bit 28 | vle16.v v1, (a2) # Load bytes 29 | add a2, a2, t1 # Bump pointer 30 | sub a3, a3, t0 # Decrement count 31 | vmin.vx v2, v1, a1 # Vector Sub 32 | vse16.v v2, (a0) # Store bytes 33 | add a0, a0, t1 # Bump pointer 34 | bnez a3, .loop_16 # Any more? 35 | ret # Return 36 | 37 | 38 | .text 39 | .global min_data_vec_32 40 | # void min_data_vec_32(int32_t *dest_data, int32_t src1, int32_t *src2, int data_num); 41 | # a0=dest, a1=src1, a2=src2, a3=n 42 | # 43 | min_data_vec_32: 44 | .loop_32: 45 | vsetvli t0, a3, e32,m1 # Vectors of 8b 46 | slli t1, t0, 2 # shift 2-bit 47 | vle32.v v1, (a2) # Load bytes 48 | add a2, a2, t1 # Bump pointer 49 | sub a3, a3, t0 # Decrement count 50 | vmin.vx v2, v1, a1 # Vector Sub 51 | vse32.v v2, (a0) # Store bytes 52 | add a0, a0, t1 # Bump pointer 53 | bnez a3, .loop_32 # Any more? 54 | ret # Return 55 | 56 | 57 | .text 58 | .global min_data_vec_64 59 | # void min_data_vec_64(int64_t *dest_data, int64_t src1, int64_t *src2, int data_num); 60 | # a0=dest, a1=src1, a2=src2, a3=n 61 | # 62 | min_data_vec_64: 63 | .loop_64: 64 | vsetvli t0, a3, e64,m1 # Vectors of 8b 65 | slli t1, t0, 3 # shift 3-bit 66 | vle64.v v1, (a2) # Load bytes 67 | add a2, a2, t1 # Bump pointer 68 | sub a3, a3, t0 # Decrement count 69 | vmin.vx v2, v1, a1 # Vector Sub 70 | vse64.v v2, (a0) # Store bytes 71 | add a0, a0, t1 # Bump pointer 72 | bnez a3, .loop_64 # Any more? 73 | ret # Return 74 | -------------------------------------------------------------------------------- /vsub_vx/test.S: -------------------------------------------------------------------------------- 1 | .text 2 | .global sub_data_vec_8 3 | # void sub_data_vec_8(int8_t *dest_data, int8_t src1, int8_t *src2, int data_num); 4 | # a0=dest, a1=src1, a2=src2, a3=n 5 | # 6 | sub_data_vec_8: 7 | // mv a3, a0 # Copy destination 8 | .loop_8: 9 | vsetvli t0, a3, e8,m1 # Vectors of 8b 10 | vle8.v v1, (a2) # Load bytes 11 | add a2, a2, t0 # Bump pointer 12 | sub a3, a3, t0 # Decrement count 13 | vsub.vx v2, v1, a1 # Vector Sub 14 | vse8.v v2, (a0) # Store bytes 15 | add a0, a0, t0 # Bump pointer 16 | bnez a3, .loop_8 # Any more? 17 | ret # Return 18 | 19 | .text 20 | .global sub_data_vec_16 21 | # void sub_data_vec_16(int16_t *dest_data, int16_t src1, int16_t *src2, int data_num); 22 | # a0=dest, a1=src1, a2=src2, a3=n 23 | # 24 | sub_data_vec_16: 25 | .loop_16: 26 | vsetvli t0, a3, e16,m1 # Vectors of 8b 27 | slli t1, t0, 1 # shift 1-bit 28 | vle16.v v1, (a2) # Load bytes 29 | add a2, a2, t1 # Bump pointer 30 | sub a3, a3, t0 # Decrement count 31 | vsub.vx v2, v1, a1 # Vector Sub 32 | vse16.v v2, (a0) # Store bytes 33 | add a0, a0, t1 # Bump pointer 34 | bnez a3, .loop_16 # Any more? 35 | ret # Return 36 | 37 | 38 | .text 39 | .global sub_data_vec_32 40 | # void sub_data_vec_32(int32_t *dest_data, int32_t src1, int32_t *src2, int data_num); 41 | # a0=dest, a1=src1, a2=src2, a3=n 42 | # 43 | sub_data_vec_32: 44 | .loop_32: 45 | vsetvli t0, a3, e32,m1 # Vectors of 8b 46 | slli t1, t0, 2 # shift 2-bit 47 | vle32.v v1, (a2) # Load bytes 48 | add a2, a2, t1 # Bump pointer 49 | sub a3, a3, t0 # Decrement count 50 | vsub.vx v2, v1, a1 # Vector Sub 51 | vse32.v v2, (a0) # Store bytes 52 | add a0, a0, t1 # Bump pointer 53 | bnez a3, .loop_32 # Any more? 54 | ret # Return 55 | 56 | 57 | .text 58 | .global sub_data_vec_64 59 | # void sub_data_vec_64(int64_t *dest_data, int64_t src1, int64_t *src2, int data_num); 60 | # a0=dest, a1=src1, a2=src2, a3=n 61 | # 62 | sub_data_vec_64: 63 | .loop_64: 64 | vsetvli t0, a3, e64,m1 # Vectors of 8b 65 | slli t1, t0, 3 # shift 3-bit 66 | vle64.v v1, (a2) # Load bytes 67 | add a2, a2, t1 # Bump pointer 68 | sub a3, a3, t0 # Decrement count 69 | vsub.vx v2, v1, a1 # Vector Sub 70 | vse64.v v2, (a0) # Store bytes 71 | add a0, a0, t1 # Bump pointer 72 | bnez a3, .loop_64 # Any more? 73 | ret # Return 74 | -------------------------------------------------------------------------------- /vmaxu_vx/test.S: -------------------------------------------------------------------------------- 1 | .text 2 | .global maxu_data_vec_8 3 | # void maxu_data_vec_8(int8_t *dest_data, int8_t src1, int8_t *src2, int data_num); 4 | # a0=dest, a1=src1, a2=src2, a3=n 5 | # 6 | maxu_data_vec_8: 7 | // mv a3, a0 # Copy destination 8 | .loop_8: 9 | vsetvli t0, a3, e8,m1 # Vectors of 8b 10 | vle8.v v1, (a2) # Load bytes 11 | add a2, a2, t0 # Bump pointer 12 | sub a3, a3, t0 # Decrement count 13 | vmaxu.vx v2, v1, a1 # Vector Sub 14 | vse8.v v2, (a0) # Store bytes 15 | add a0, a0, t0 # Bump pointer 16 | bnez a3, .loop_8 # Any more? 17 | ret # Return 18 | 19 | .text 20 | .global maxu_data_vec_16 21 | # void maxu_data_vec_16(int16_t *dest_data, int16_t src1, int16_t *src2, int data_num); 22 | # a0=dest, a1=src1, a2=src2, a3=n 23 | # 24 | maxu_data_vec_16: 25 | .loop_16: 26 | vsetvli t0, a3, e16,m1 # Vectors of 8b 27 | slli t1, t0, 1 # shift 1-bit 28 | vle16.v v1, (a2) # Load bytes 29 | add a2, a2, t1 # Bump pointer 30 | sub a3, a3, t0 # Decrement count 31 | vmaxu.vx v2, v1, a1 # Vector Sub 32 | vse16.v v2, (a0) # Store bytes 33 | add a0, a0, t1 # Bump pointer 34 | bnez a3, .loop_16 # Any more? 35 | ret # Return 36 | 37 | 38 | .text 39 | .global maxu_data_vec_32 40 | # void maxu_data_vec_32(int32_t *dest_data, int32_t src1, int32_t *src2, int data_num); 41 | # a0=dest, a1=src1, a2=src2, a3=n 42 | # 43 | maxu_data_vec_32: 44 | .loop_32: 45 | vsetvli t0, a3, e32,m1 # Vectors of 8b 46 | slli t1, t0, 2 # shift 2-bit 47 | vle32.v v1, (a2) # Load bytes 48 | add a2, a2, t1 # Bump pointer 49 | sub a3, a3, t0 # Decrement count 50 | vmaxu.vx v2, v1, a1 # Vector Sub 51 | vse32.v v2, (a0) # Store bytes 52 | add a0, a0, t1 # Bump pointer 53 | bnez a3, .loop_32 # Any more? 54 | ret # Return 55 | 56 | 57 | .text 58 | .global maxu_data_vec_64 59 | # void maxu_data_vec_64(int64_t *dest_data, int64_t src1, int64_t *src2, int data_num); 60 | # a0=dest, a1=src1, a2=src2, a3=n 61 | # 62 | maxu_data_vec_64: 63 | .loop_64: 64 | vsetvli t0, a3, e64,m1 # Vectors of 8b 65 | slli t1, t0, 3 # shift 3-bit 66 | vle64.v v1, (a2) # Load bytes 67 | add a2, a2, t1 # Bump pointer 68 | sub a3, a3, t0 # Decrement count 69 | vmaxu.vx v2, v1, a1 # Vector Sub 70 | vse64.v v2, (a0) # Store bytes 71 | add a0, a0, t1 # Bump pointer 72 | bnez a3, .loop_64 # Any more? 73 | ret # Return 74 | -------------------------------------------------------------------------------- /vminu_vx/test.S: -------------------------------------------------------------------------------- 1 | .text 2 | .global minu_data_vec_8 3 | # void minu_data_vec_8(int8_t *dest_data, int8_t src1, int8_t *src2, int data_num); 4 | # a0=dest, a1=src1, a2=src2, a3=n 5 | # 6 | minu_data_vec_8: 7 | // mv a3, a0 # Copy destination 8 | .loop_8: 9 | vsetvli t0, a3, e8,m1 # Vectors of 8b 10 | vle8.v v1, (a2) # Load bytes 11 | add a2, a2, t0 # Bump pointer 12 | sub a3, a3, t0 # Decrement count 13 | vminu.vx v2, v1, a1 # Vector Sub 14 | vse8.v v2, (a0) # Store bytes 15 | add a0, a0, t0 # Bump pointer 16 | bnez a3, .loop_8 # Any more? 17 | ret # Return 18 | 19 | .text 20 | .global minu_data_vec_16 21 | # void minu_data_vec_16(int16_t *dest_data, int16_t src1, int16_t *src2, int data_num); 22 | # a0=dest, a1=src1, a2=src2, a3=n 23 | # 24 | minu_data_vec_16: 25 | .loop_16: 26 | vsetvli t0, a3, e16,m1 # Vectors of 8b 27 | slli t1, t0, 1 # shift 1-bit 28 | vle16.v v1, (a2) # Load bytes 29 | add a2, a2, t1 # Bump pointer 30 | sub a3, a3, t0 # Decrement count 31 | vminu.vx v2, v1, a1 # Vector Sub 32 | vse16.v v2, (a0) # Store bytes 33 | add a0, a0, t1 # Bump pointer 34 | bnez a3, .loop_16 # Any more? 35 | ret # Return 36 | 37 | 38 | .text 39 | .global minu_data_vec_32 40 | # void minu_data_vec_32(int32_t *dest_data, int32_t src1, int32_t *src2, int data_num); 41 | # a0=dest, a1=src1, a2=src2, a3=n 42 | # 43 | minu_data_vec_32: 44 | .loop_32: 45 | vsetvli t0, a3, e32,m1 # Vectors of 8b 46 | slli t1, t0, 2 # shift 2-bit 47 | vle32.v v1, (a2) # Load bytes 48 | add a2, a2, t1 # Bump pointer 49 | sub a3, a3, t0 # Decrement count 50 | vminu.vx v2, v1, a1 # Vector Sub 51 | vse32.v v2, (a0) # Store bytes 52 | add a0, a0, t1 # Bump pointer 53 | bnez a3, .loop_32 # Any more? 54 | ret # Return 55 | 56 | 57 | .text 58 | .global minu_data_vec_64 59 | # void minu_data_vec_64(int64_t *dest_data, int64_t src1, int64_t *src2, int data_num); 60 | # a0=dest, a1=src1, a2=src2, a3=n 61 | # 62 | minu_data_vec_64: 63 | .loop_64: 64 | vsetvli t0, a3, e64,m1 # Vectors of 8b 65 | slli t1, t0, 3 # shift 3-bit 66 | vle64.v v1, (a2) # Load bytes 67 | add a2, a2, t1 # Bump pointer 68 | sub a3, a3, t0 # Decrement count 69 | vminu.vx v2, v1, a1 # Vector Sub 70 | vse64.v v2, (a0) # Store bytes 71 | add a0, a0, t1 # Bump pointer 72 | bnez a3, .loop_64 # Any more? 73 | ret # Return 74 | -------------------------------------------------------------------------------- /vrsub_vx/test.S: -------------------------------------------------------------------------------- 1 | .text 2 | .global rsub_data_vec_8 3 | # void rsub_data_vec_8(int8_t *dest_data, int8_t src1, int8_t *src2, int data_num); 4 | # a0=dest, a1=src1, a2=src2, a3=n 5 | # 6 | rsub_data_vec_8: 7 | // mv a3, a0 # Copy destination 8 | .loop_8: 9 | vsetvli t0, a3, e8,m1 # Vectors of 8b 10 | vle8.v v1, (a2) # Load bytes 11 | add a2, a2, t0 # Bump pointer 12 | sub a3, a3, t0 # Decrement count 13 | vrsub.vx v2, v1, a1 # Vector Sub 14 | vse8.v v2, (a0) # Store bytes 15 | add a0, a0, t0 # Bump pointer 16 | bnez a3, .loop_8 # Any more? 17 | ret # Return 18 | 19 | .text 20 | .global rsub_data_vec_16 21 | # void rsub_data_vec_16(int16_t *dest_data, int16_t src1, int16_t *src2, int data_num); 22 | # a0=dest, a1=src1, a2=src2, a3=n 23 | # 24 | rsub_data_vec_16: 25 | .loop_16: 26 | vsetvli t0, a3, e16,m1 # Vectors of 8b 27 | slli t1, t0, 1 # shift 1-bit 28 | vle16.v v1, (a2) # Load bytes 29 | add a2, a2, t1 # Bump pointer 30 | sub a3, a3, t0 # Decrement count 31 | vrsub.vx v2, v1, a1 # Vector Sub 32 | vse16.v v2, (a0) # Store bytes 33 | add a0, a0, t1 # Bump pointer 34 | bnez a3, .loop_16 # Any more? 35 | ret # Return 36 | 37 | 38 | .text 39 | .global rsub_data_vec_32 40 | # void rsub_data_vec_32(int32_t *dest_data, int32_t src1, int32_t *src2, int data_num); 41 | # a0=dest, a1=src1, a2=src2, a3=n 42 | # 43 | rsub_data_vec_32: 44 | .loop_32: 45 | vsetvli t0, a3, e32,m1 # Vectors of 8b 46 | slli t1, t0, 2 # shift 2-bit 47 | vle32.v v1, (a2) # Load bytes 48 | add a2, a2, t1 # Bump pointer 49 | sub a3, a3, t0 # Decrement count 50 | vrsub.vx v2, v1, a1 # Vector Sub 51 | vse32.v v2, (a0) # Store bytes 52 | add a0, a0, t1 # Bump pointer 53 | bnez a3, .loop_32 # Any more? 54 | ret # Return 55 | 56 | 57 | .text 58 | .global rsub_data_vec_64 59 | # void rsub_data_vec_64(int64_t *dest_data, int64_t src1, int64_t *src2, int data_num); 60 | # a0=dest, a1=src1, a2=src2, a3=n 61 | # 62 | rsub_data_vec_64: 63 | .loop_64: 64 | vsetvli t0, a3, e64,m1 # Vectors of 8b 65 | slli t1, t0, 3 # shift 3-bit 66 | vle64.v v1, (a2) # Load bytes 67 | add a2, a2, t1 # Bump pointer 68 | sub a3, a3, t0 # Decrement count 69 | vrsub.vx v2, v1, a1 # Vector Sub 70 | vse64.v v2, (a0) # Store bytes 71 | add a0, a0, t1 # Bump pointer 72 | bnez a3, .loop_64 # Any more? 73 | ret # Return 74 | -------------------------------------------------------------------------------- /vlse8_v/main.c: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | void copy_data_ustride_vec(int8_t *dest_data, int8_t *source_data, int stride, int data_num); 4 | 5 | #include "data.h" 6 | 7 | int32_t vec_data[DATA_NUM] = {0}; 8 | int32_t scalar_data[DATA_NUM] = {0}; 9 | 10 | void format_array() 11 | { 12 | for (int i = 0; i < DATA_NUM; i++) { 13 | vec_data[i] = 0; 14 | scalar_data[i] = 0; 15 | } 16 | } 17 | 18 | 19 | int check_data (const int64_t *vec_data, const int64_t *scalar_data, const int data_num) 20 | { 21 | for(int i = 0; i < data_num; i++) { 22 | if(vec_data[i] != scalar_data[i]) { 23 | return i + 1; 24 | } 25 | } 26 | return 0; 27 | } 28 | 29 | 30 | void copy_data_scalar(int8_t *dest_data, int8_t *source_data, const int data_num) 31 | { 32 | for (int i = 0; i < data_num; i++) { 33 | dest_data[i] = source_data[i]; 34 | } 35 | } 36 | 37 | 38 | void copy_data_ustride_scalar(int8_t *dest_data, int8_t *source_data, int8_t stride, const int data_num) 39 | { 40 | for (int i = 0; i < data_num; i++) { 41 | dest_data[i] = source_data[i*stride]; 42 | } 43 | } 44 | 45 | 46 | int test_ustride(int stride); 47 | int test_ustride_minus(int stride); 48 | 49 | int main() 50 | { 51 | int result = 0; 52 | if ((result = test_ustride(1)) != 0) { 53 | return (result << 8) | 1; 54 | } 55 | if ((result = test_ustride(2)) != 0) { 56 | return (result << 8) | 2; 57 | } 58 | if ((result = test_ustride(3)) != 0) { 59 | return (result << 8) | 3; 60 | } 61 | if ((result = test_ustride_minus(-1)) != 0) { 62 | return (result << 8) | 4; 63 | } 64 | if ((result = test_ustride_minus(-2)) != 0) { 65 | return (result << 8) | 5; 66 | } 67 | if ((result = test_ustride_minus(-3)) != 0) { 68 | return (result << 8) | 6; 69 | } 70 | 71 | return 0; 72 | } 73 | 74 | 75 | int test_ustride(int stride) 76 | { 77 | format_array(); 78 | 79 | const int data_num = (DATA_NUM / 4) * sizeof(int32_t) / sizeof(int8_t); 80 | copy_data_ustride_vec ((int8_t *)vec_data, (int8_t *)source_data, stride, data_num); 81 | copy_data_ustride_scalar ((int8_t *)scalar_data, (int8_t *)source_data, stride, data_num); 82 | 83 | return check_data((int64_t *)vec_data, (int64_t *)scalar_data, DATA_NUM * sizeof(int32_t) / sizeof(int64_t)); 84 | } 85 | 86 | 87 | int test_ustride_minus(int stride) 88 | { 89 | format_array(); 90 | 91 | const int data_num = (DATA_NUM / 4) * sizeof(int32_t) / sizeof(int8_t); 92 | copy_data_ustride_vec ((int8_t *)vec_data, (int8_t *)(source_data+DATA_NUM-1), stride, data_num); 93 | copy_data_ustride_scalar ((int8_t *)scalar_data, (int8_t *)(source_data+DATA_NUM-1), stride, data_num); 94 | 95 | return check_data((int64_t *)vec_data, (int64_t *)scalar_data, DATA_NUM * sizeof(int32_t) / sizeof(int64_t)); 96 | } 97 | -------------------------------------------------------------------------------- /memcpy_ve8/main.c: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | void copy_data_vec(int8_t *dest_data, int8_t *source_data, int data_num); 4 | void copy_data_mask_vec(int8_t *dest_data, int8_t *source_data, int8_t *mask, int data_num); 5 | 6 | #include "data.h" 7 | 8 | int32_t vec_data[DATA_NUM] = {0}; 9 | int32_t scalar_data[DATA_NUM] = {0}; 10 | 11 | void format_array() 12 | { 13 | for (int i = 0; i < DATA_NUM; i++) { 14 | vec_data[i] = 0; 15 | scalar_data[i] = 0; 16 | } 17 | } 18 | 19 | 20 | int check_data (const int64_t *vec_data, const int64_t *scalar_data, const int data_num) 21 | { 22 | for(int i = 0; i < data_num; i++) { 23 | if(vec_data[i] != scalar_data[i]) { 24 | return i + 1; 25 | } 26 | } 27 | return 0; 28 | } 29 | 30 | 31 | void copy_data_scalar(int8_t *dest_data, int8_t *source_data, const int data_num) 32 | { 33 | for (int i = 0; i < data_num; i++) { 34 | dest_data[i] = source_data[i]; 35 | } 36 | } 37 | 38 | 39 | void copy_data_mask_scalar(int8_t *dest_data, int8_t *source_data, int8_t *mask, const int data_num) 40 | { 41 | for (int i = 0; i < data_num; i++) { 42 | dest_data[i] = ((mask[i/8] >> (i%8)) & 0x1) ? source_data[i] : 0; 43 | } 44 | } 45 | 46 | 47 | int test_0(); 48 | int test_vl(); 49 | int test_mask(); 50 | 51 | int main() 52 | { 53 | if (test_0() != 0) { 54 | return 10; 55 | } 56 | if (test_vl() != 0) { 57 | return 20; 58 | } 59 | if (test_mask() != 0) { 60 | return 30; 61 | } 62 | return 0; 63 | } 64 | 65 | 66 | int test_0() 67 | { 68 | format_array(); 69 | 70 | const int data_num = DATA_NUM * sizeof(int32_t) / sizeof(int8_t); 71 | copy_data_vec ((int8_t *)vec_data, (int8_t *)source_data, data_num); 72 | copy_data_scalar((int8_t *)scalar_data, (int8_t *)source_data, data_num); 73 | 74 | return check_data((int64_t *)vec_data, (int64_t *)scalar_data, DATA_NUM * sizeof(int32_t) / sizeof(int64_t)); 75 | } 76 | 77 | int test_vl() 78 | { 79 | format_array(); 80 | 81 | const int data_num = DATA_NUM * sizeof(int32_t) / sizeof(int8_t) - 10; 82 | copy_data_vec ((int8_t *)vec_data, (int8_t *)source_data, data_num); 83 | copy_data_scalar((int8_t *)scalar_data, (int8_t *)source_data, data_num); 84 | 85 | return check_data((int64_t *)vec_data, (int64_t *)scalar_data, DATA_NUM * sizeof(int32_t) / sizeof(int64_t)); 86 | } 87 | 88 | int test_mask() 89 | { 90 | format_array(); 91 | 92 | const int data_num = DATA_NUM * sizeof(int32_t) / sizeof(int8_t); 93 | copy_data_mask_vec((int8_t *)vec_data, (int8_t *)source_data, (int8_t *)mask_data, data_num); 94 | copy_data_mask_scalar ((int8_t *)scalar_data, (int8_t *)source_data, (int8_t *)mask_data, data_num); 95 | 96 | return check_data((int64_t *)vec_data, (int64_t *)scalar_data, DATA_NUM * sizeof(int32_t) / sizeof(int64_t)); 97 | } 98 | -------------------------------------------------------------------------------- /vlse64_v/main.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | 4 | void copy_data_ustride_vec(int64_t *dest_data, int64_t *source_data, int stride, int data_num); 5 | 6 | #include "data.h" 7 | 8 | int32_t vec_data[DATA_NUM] = {0}; 9 | int32_t scalar_data[DATA_NUM] = {0}; 10 | 11 | void format_array() 12 | { 13 | for (int i = 0; i < DATA_NUM; i++) { 14 | vec_data[i] = 0; 15 | scalar_data[i] = 0; 16 | } 17 | } 18 | 19 | 20 | int check_data (const int64_t *vec_data, const int64_t *scalar_data, const int data_num) 21 | { 22 | for(int i = 0; i < data_num; i++) { 23 | if(vec_data[i] != scalar_data[i]) { 24 | return i + 1; 25 | } 26 | } 27 | return 0; 28 | } 29 | 30 | 31 | void copy_data_scalar(int64_t *dest_data, int64_t *source_data, const int data_num) 32 | { 33 | for (int i = 0; i < data_num; i++) { 34 | dest_data[i] = source_data[i]; 35 | } 36 | } 37 | 38 | 39 | void copy_data_ustride_scalar(int64_t *dest_data, int64_t *source_data, int64_t stride, const int data_num) 40 | { 41 | for (int i = 0; i < data_num; i++) { 42 | dest_data[i] = source_data[i*stride]; 43 | } 44 | } 45 | 46 | 47 | int test_ustride(int stride); 48 | int test_ustride_minus(int stride); 49 | 50 | int main() 51 | { 52 | int result = 0; 53 | if ((result = test_ustride(8)) != 0) { 54 | return (result << 8) | 1; 55 | } 56 | if ((result = test_ustride_minus(-8)) != 0) { 57 | return (result << 8) | 4; 58 | } 59 | 60 | return 0; 61 | } 62 | 63 | 64 | int test_ustride(int stride) 65 | { 66 | format_array(); 67 | const size_t target_size = sizeof(int64_t); 68 | 69 | const int data_num = (DATA_NUM / 4) * sizeof(int32_t) / target_size; 70 | copy_data_ustride_vec ((int64_t *)vec_data, 71 | (int64_t *)source_data, 72 | stride, data_num); 73 | copy_data_ustride_scalar ((int64_t *)scalar_data, 74 | (int64_t *)source_data, 75 | stride / target_size, data_num); 76 | 77 | return check_data((int64_t *)vec_data, (int64_t *)scalar_data, DATA_NUM * sizeof(int32_t) / sizeof(int64_t)); 78 | } 79 | 80 | 81 | int test_ustride_minus(int stride) 82 | { 83 | format_array(); 84 | const size_t target_size = sizeof(int64_t); 85 | 86 | const int data_num = (DATA_NUM / 4) * sizeof(int32_t) / target_size; 87 | copy_data_ustride_vec ((int64_t *)vec_data, 88 | (int64_t *)(source_data+DATA_NUM-2), 89 | stride, data_num); 90 | copy_data_ustride_scalar ((int64_t *)scalar_data, 91 | (int64_t *)(source_data+DATA_NUM-2), 92 | stride / target_size, data_num); 93 | 94 | return check_data((int64_t *)vec_data, (int64_t *)scalar_data, DATA_NUM * sizeof(int32_t) / sizeof(int64_t)); 95 | } 96 | -------------------------------------------------------------------------------- /memcpy_ve16/main.c: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | void copy_data_vec(int16_t *dest_data, int16_t *source_data, int data_num); 4 | void copy_data_mask_vec(int16_t *dest_data, int16_t *source_data, int16_t *mask, int data_num); 5 | 6 | #include "data.h" 7 | 8 | int32_t vec_data[DATA_NUM] = {0}; 9 | int32_t scalar_data[DATA_NUM] = {0}; 10 | 11 | void format_array() 12 | { 13 | for (int i = 0; i < DATA_NUM; i++) { 14 | vec_data[i] = 0; 15 | scalar_data[i] = 0; 16 | } 17 | } 18 | 19 | 20 | int check_data (int64_t *vec_data, const int64_t *scalar_data, const int data_num) 21 | { 22 | for(int i = 0; i < data_num; i++) { 23 | if(vec_data[i] != scalar_data[i]) { 24 | return i + 1; 25 | } 26 | } 27 | return 0; 28 | } 29 | 30 | 31 | void copy_data_scalar(int16_t *dest_data, int16_t *source_data, const int data_num) 32 | { 33 | for (int i = 0; i < data_num; i++) { 34 | dest_data[i] = source_data[i]; 35 | } 36 | } 37 | 38 | 39 | void copy_data_mask_scalar(int16_t *dest_data, int16_t *source_data, int16_t *mask, const int data_num) 40 | { 41 | const int elem_size = sizeof(int16_t) * 8; 42 | for (int i = 0; i < data_num; i++) { 43 | dest_data[i] = ((mask[i/elem_size] >> (i%elem_size)) & 0x1) ? source_data[i] : 0; 44 | } 45 | } 46 | 47 | 48 | int test_0(); 49 | int test_vl(); 50 | int test_mask(); 51 | 52 | int main() 53 | { 54 | if (test_0() != 0) { 55 | return 10; 56 | } 57 | if (test_vl() != 0) { 58 | return 20; 59 | } 60 | if (test_mask() != 0) { 61 | return 30; 62 | } 63 | return 0; 64 | } 65 | 66 | 67 | int test_0() 68 | { 69 | format_array(); 70 | 71 | const int data_num = DATA_NUM * sizeof(int32_t) / sizeof(int16_t); 72 | copy_data_vec ((int16_t *)vec_data, (int16_t *)source_data, data_num); 73 | copy_data_scalar((int16_t *)scalar_data, (int16_t *)source_data, data_num); 74 | 75 | return check_data((int64_t *)vec_data, (int64_t *)scalar_data, DATA_NUM * sizeof(int32_t) / sizeof(int64_t)); 76 | } 77 | 78 | int test_vl() 79 | { 80 | format_array(); 81 | 82 | const int data_num = DATA_NUM * sizeof(int32_t) / sizeof(int16_t) - 10; 83 | copy_data_vec ((int16_t *)vec_data, (int16_t *)source_data, data_num); 84 | copy_data_scalar((int16_t *)scalar_data, (int16_t *)source_data, data_num); 85 | 86 | return check_data((int64_t *)vec_data, (int64_t *)scalar_data, DATA_NUM * sizeof(int32_t) / sizeof(int64_t)); 87 | } 88 | 89 | int test_mask() 90 | { 91 | format_array(); 92 | 93 | const int data_num = DATA_NUM * sizeof(int32_t) / sizeof(int16_t); 94 | copy_data_mask_vec((int16_t *)vec_data, (int16_t *)source_data, (int16_t *)mask_data, data_num); 95 | copy_data_mask_scalar ((int16_t *)scalar_data, (int16_t *)source_data, (int16_t *)mask_data, data_num); 96 | 97 | return check_data((int64_t *)vec_data, (int64_t *)scalar_data, DATA_NUM * sizeof(int32_t) / sizeof(int64_t)); 98 | } 99 | -------------------------------------------------------------------------------- /memcpy_ve64/main.c: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | void copy_data_vec(int64_t *dest_data, int64_t *source_data, int data_num); 4 | void copy_data_mask_vec(int64_t *dest_data, int64_t *source_data, int64_t *mask, int data_num); 5 | 6 | #include "data.h" 7 | 8 | int32_t vec_data[DATA_NUM] = {0}; 9 | int32_t scalar_data[DATA_NUM] = {0}; 10 | 11 | void format_array() 12 | { 13 | for (int i = 0; i < DATA_NUM; i++) { 14 | vec_data[i] = 0; 15 | scalar_data[i] = 0; 16 | } 17 | } 18 | 19 | 20 | int check_data (int64_t *vec_data, const int64_t *scalar_data, const int data_num) 21 | { 22 | for(int i = 0; i < data_num; i++) { 23 | if(vec_data[i] != scalar_data[i]) { 24 | return i + 1; 25 | } 26 | } 27 | return 0; 28 | } 29 | 30 | 31 | void copy_data_scalar(int64_t *dest_data, int64_t *source_data, const int data_num) 32 | { 33 | for (int i = 0; i < data_num; i++) { 34 | dest_data[i] = source_data[i]; 35 | } 36 | } 37 | 38 | 39 | void copy_data_mask_scalar(int64_t *dest_data, int64_t *source_data, int64_t *mask, const int data_num) 40 | { 41 | const int elem_size = sizeof(int64_t) * 8; 42 | for (int i = 0; i < data_num; i++) { 43 | dest_data[i] = ((mask[i/elem_size] >> (i%elem_size)) & 0x1) ? source_data[i] : 0; 44 | } 45 | } 46 | 47 | 48 | int test_0(); 49 | int test_vl(); 50 | int test_mask(); 51 | 52 | int main() 53 | { 54 | if (test_0() != 0) { 55 | return 10; 56 | } 57 | if (test_vl() != 0) { 58 | return 20; 59 | } 60 | if (test_mask() != 0) { 61 | return 30; 62 | } 63 | return 0; 64 | } 65 | 66 | 67 | int test_0() 68 | { 69 | format_array(); 70 | 71 | const int data_num = DATA_NUM * sizeof(int32_t) / sizeof(int64_t); 72 | copy_data_vec ((int64_t *)vec_data, (int64_t *)source_data, data_num); 73 | copy_data_scalar((int64_t *)scalar_data, (int64_t *)source_data, data_num); 74 | 75 | return check_data((int64_t *)vec_data, (int64_t *)scalar_data, DATA_NUM * sizeof(int32_t) / sizeof(int64_t)); 76 | } 77 | 78 | int test_vl() 79 | { 80 | format_array(); 81 | 82 | const int data_num = DATA_NUM * sizeof(int32_t) / sizeof(int64_t) - 10; 83 | copy_data_vec ((int64_t *)vec_data, (int64_t *)source_data, data_num); 84 | copy_data_scalar((int64_t *)scalar_data, (int64_t *)source_data, data_num); 85 | 86 | return check_data((int64_t *)vec_data, (int64_t *)scalar_data, DATA_NUM * sizeof(int32_t) / sizeof(int64_t)); 87 | } 88 | 89 | int test_mask() 90 | { 91 | format_array(); 92 | 93 | const int data_num = DATA_NUM * sizeof(int32_t) / sizeof(int64_t); 94 | copy_data_mask_vec((int64_t *)vec_data, (int64_t *)source_data, (int64_t *)mask_data, data_num); 95 | copy_data_mask_scalar ((int64_t *)scalar_data, (int64_t *)source_data, (int64_t *)mask_data, data_num); 96 | 97 | return check_data((int64_t *)vec_data, (int64_t *)scalar_data, DATA_NUM * sizeof(int32_t) / sizeof(int64_t)); 98 | } 99 | -------------------------------------------------------------------------------- /memcpy_ve32/main.c: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | void copy_data_vec(int32_t *dest_data, int32_t *source_data, int data_num); 4 | void copy_data_mask_vec(int32_t *dest_data, int32_t *source_data, int32_t *mask, int data_num); 5 | 6 | #include "data.h" 7 | 8 | int32_t vec_data[DATA_NUM] = {0}; 9 | int32_t scalar_data[DATA_NUM] = {0}; 10 | 11 | void format_array() 12 | { 13 | for (int i = 0; i < DATA_NUM; i++) { 14 | vec_data[i] = 0; 15 | scalar_data[i] = 0; 16 | } 17 | } 18 | 19 | 20 | int check_data (int64_t *vec_data, const int64_t *scalar_data, const int data_num) 21 | { 22 | for(int i = 0; i < data_num; i++) { 23 | if(vec_data[i] != scalar_data[i]) { 24 | return i + 1; 25 | } 26 | } 27 | return 0; 28 | } 29 | 30 | 31 | void copy_data_scalar(int32_t *dest_data, int32_t *source_data, const int data_num) 32 | { 33 | for (int i = 0; i < data_num; i++) { 34 | dest_data[i] = source_data[i]; 35 | } 36 | } 37 | 38 | 39 | void copy_data_mask_scalar(int32_t *dest_data, int32_t *source_data, int32_t *mask, const int data_num) 40 | { 41 | const int elem_size = sizeof(int32_t) * 8; 42 | for (int i = 0; i < data_num; i++) { 43 | dest_data[i] = ((mask[i/elem_size] >> (i%elem_size)) & 0x1) ? source_data[i] : 0; 44 | } 45 | } 46 | 47 | 48 | int test_0(); 49 | int test_vl(); 50 | int test_mask(); 51 | 52 | int main() 53 | { 54 | if (test_0() != 0) { 55 | return 10; 56 | } 57 | if (test_vl() != 0) { 58 | return 20; 59 | } 60 | if (test_mask() != 0) { 61 | return 30; 62 | } 63 | return 0; 64 | } 65 | 66 | 67 | int test_0() 68 | { 69 | format_array(); 70 | 71 | const int data_num = DATA_NUM * sizeof(int32_t) / sizeof(int32_t); 72 | copy_data_vec ((int32_t *)vec_data, (int32_t *)source_data, data_num); 73 | copy_data_scalar((int32_t *)scalar_data, (int32_t *)source_data, data_num); 74 | 75 | return check_data((int64_t *)vec_data, (int64_t *)scalar_data, DATA_NUM * sizeof(int32_t) / sizeof(int64_t)); 76 | } 77 | 78 | int test_vl() 79 | { 80 | format_array(); 81 | 82 | const int data_num = DATA_NUM * sizeof(int32_t) / sizeof(int32_t) - 10; 83 | copy_data_vec ((int32_t *)vec_data, (int32_t *)source_data, data_num); 84 | copy_data_scalar((int32_t *)scalar_data, (int32_t *)source_data, data_num); 85 | 86 | return check_data((int64_t *)vec_data, (int64_t *)scalar_data, DATA_NUM * sizeof(int32_t) / sizeof(int64_t)); 87 | } 88 | 89 | int test_mask() 90 | { 91 | format_array(); 92 | 93 | const int data_num = DATA_NUM * sizeof(int32_t) / sizeof(int32_t); 94 | copy_data_mask_vec ((int32_t *)vec_data, (int32_t *)source_data, (int32_t *)mask_data, data_num); 95 | copy_data_mask_scalar ((int32_t *)scalar_data, (int32_t *)source_data, (int32_t *)mask_data, data_num); 96 | 97 | return check_data((int64_t *)vec_data, (int64_t *)scalar_data, DATA_NUM * sizeof(int32_t) / sizeof(int64_t)); 98 | } 99 | -------------------------------------------------------------------------------- /vor_vv/test.S: -------------------------------------------------------------------------------- 1 | .text 2 | .global or_data_vec_8 3 | # void or_data_vec_8(int8_t *dest_data, int8_t *src1, int8_t *src2, int data_num); 4 | # a0=dest, a1=src1, a2=src2, a3=n 5 | # 6 | or_data_vec_8: 7 | // mv a3, a0 # Copy destination 8 | .loop_8: 9 | vsetvli t0, a3, e8,m1 # Vectors of 8b 10 | vle8.v v0, (a1) # Load bytes 11 | add a1, a1, t0 # Bump pointer 12 | vle8.v v1, (a2) # Load bytes 13 | add a2, a2, t0 # Bump pointer 14 | sub a3, a3, t0 # Decrement count 15 | vor.vv v2, v1, v0 # Vector Sub 16 | vse8.v v2, (a0) # Store bytes 17 | add a0, a0, t0 # Bump pointer 18 | bnez a3, .loop_8 # Any more? 19 | ret # Return 20 | 21 | .text 22 | .global or_data_vec_16 23 | # void or_data_vec_16(int16_t *dest_data, int16_t *src1, int16_t *src2, int data_num); 24 | # a0=dest, a1=src1, a2=src2, a3=n 25 | # 26 | or_data_vec_16: 27 | .loop_16: 28 | vsetvli t0, a3, e16,m1 # Vectors of 8b 29 | slli t1, t0, 1 # shift 1-bit 30 | vle16.v v0, (a1) # Load bytes 31 | add a1, a1, t1 # Bump pointer 32 | vle16.v v1, (a2) # Load bytes 33 | add a2, a2, t1 # Bump pointer 34 | sub a3, a3, t0 # Decrement count 35 | vor.vv v2, v1, v0 # Vector Sub 36 | vse16.v v2, (a0) # Store bytes 37 | add a0, a0, t1 # Bump pointer 38 | bnez a3, .loop_16 # Any more? 39 | ret # Return 40 | 41 | 42 | .text 43 | .global or_data_vec_32 44 | # void or_data_vec_32(int32_t *dest_data, int32_t *src1, int32_t *src2, int data_num); 45 | # a0=dest, a1=src1, a2=src2, a3=n 46 | # 47 | or_data_vec_32: 48 | .loop_32: 49 | vsetvli t0, a3, e32,m1 # Vectors of 8b 50 | slli t1, t0, 2 # shift 2-bit 51 | vle32.v v0, (a1) # Load bytes 52 | add a1, a1, t1 # Bump pointer 53 | vle32.v v1, (a2) # Load bytes 54 | add a2, a2, t1 # Bump pointer 55 | sub a3, a3, t0 # Decrement count 56 | vor.vv v2, v1, v0 # Vector Sub 57 | vse32.v v2, (a0) # Store bytes 58 | add a0, a0, t1 # Bump pointer 59 | bnez a3, .loop_32 # Any more? 60 | ret # Return 61 | 62 | 63 | .text 64 | .global or_data_vec_64 65 | # void or_data_vec_64(int64_t *dest_data, int64_t *src1, int64_t *src2, int data_num); 66 | # a0=dest, a1=src1, a2=src2, a3=n 67 | # 68 | or_data_vec_64: 69 | .loop_64: 70 | vsetvli t0, a3, e64,m1 # Vectors of 8b 71 | slli t1, t0, 3 # shift 3-bit 72 | vle64.v v0, (a1) # Load bytes 73 | add a1, a1, t1 # Bump pointer 74 | vle64.v v1, (a2) # Load bytes 75 | add a2, a2, t1 # Bump pointer 76 | sub a3, a3, t0 # Decrement count 77 | vor.vv v2, v1, v0 # Vector Sub 78 | vse64.v v2, (a0) # Store bytes 79 | add a0, a0, t1 # Bump pointer 80 | bnez a3, .loop_64 # Any more? 81 | ret # Return 82 | -------------------------------------------------------------------------------- /vadd_vv/test.S: -------------------------------------------------------------------------------- 1 | .text 2 | .global add_data_vec_8 3 | # void add_data_vec_8(int8_t *dest_data, int8_t *src1, int8_t *src2, int data_num); 4 | # a0=dest, a1=src1, a2=src2, a3=n 5 | # 6 | add_data_vec_8: 7 | // mv a3, a0 # Copy destination 8 | .loop_8: 9 | vsetvli t0, a3, e8,m1 # Vectors of 8b 10 | vle8.v v0, (a1) # Load bytes 11 | add a1, a1, t0 # Bump pointer 12 | vle8.v v1, (a2) # Load bytes 13 | add a2, a2, t0 # Bump pointer 14 | sub a3, a3, t0 # Decrement count 15 | vadd.vv v2, v1, v0 # Vector Add 16 | vse8.v v2, (a0) # Store bytes 17 | add a0, a0, t0 # Bump pointer 18 | bnez a3, .loop_8 # Any more? 19 | ret # Return 20 | 21 | .text 22 | .global add_data_vec_16 23 | # void add_data_vec_16(int16_t *dest_data, int16_t *src1, int16_t *src2, int data_num); 24 | # a0=dest, a1=src1, a2=src2, a3=n 25 | # 26 | add_data_vec_16: 27 | .loop_16: 28 | vsetvli t0, a3, e16,m1 # Vectors of 8b 29 | slli t1, t0, 1 # shift 1-bit 30 | vle16.v v0, (a1) # Load bytes 31 | add a1, a1, t1 # Bump pointer 32 | vle16.v v1, (a2) # Load bytes 33 | add a2, a2, t1 # Bump pointer 34 | sub a3, a3, t0 # Decrement count 35 | vadd.vv v2, v1, v0 # Vector Add 36 | vse16.v v2, (a0) # Store bytes 37 | add a0, a0, t1 # Bump pointer 38 | bnez a3, .loop_16 # Any more? 39 | ret # Return 40 | 41 | 42 | .text 43 | .global add_data_vec_32 44 | # void add_data_vec_32(int32_t *dest_data, int32_t *src1, int32_t *src2, int data_num); 45 | # a0=dest, a1=src1, a2=src2, a3=n 46 | # 47 | add_data_vec_32: 48 | .loop_32: 49 | vsetvli t0, a3, e32,m1 # Vectors of 8b 50 | slli t1, t0, 2 # shift 2-bit 51 | vle32.v v0, (a1) # Load bytes 52 | add a1, a1, t1 # Bump pointer 53 | vle32.v v1, (a2) # Load bytes 54 | add a2, a2, t1 # Bump pointer 55 | sub a3, a3, t0 # Decrement count 56 | vadd.vv v2, v1, v0 # Vector Add 57 | vse32.v v2, (a0) # Store bytes 58 | add a0, a0, t1 # Bump pointer 59 | bnez a3, .loop_32 # Any more? 60 | ret # Return 61 | 62 | 63 | .text 64 | .global add_data_vec_64 65 | # void add_data_vec_64(int64_t *dest_data, int64_t *src1, int64_t *src2, int data_num); 66 | # a0=dest, a1=src1, a2=src2, a3=n 67 | # 68 | add_data_vec_64: 69 | .loop_64: 70 | vsetvli t0, a3, e64,m1 # Vectors of 8b 71 | slli t1, t0, 3 # shift 3-bit 72 | vle64.v v0, (a1) # Load bytes 73 | add a1, a1, t1 # Bump pointer 74 | vle64.v v1, (a2) # Load bytes 75 | add a2, a2, t1 # Bump pointer 76 | sub a3, a3, t0 # Decrement count 77 | vadd.vv v2, v1, v0 # Vector Add 78 | vse64.v v2, (a0) # Store bytes 79 | add a0, a0, t1 # Bump pointer 80 | bnez a3, .loop_64 # Any more? 81 | ret # Return 82 | -------------------------------------------------------------------------------- /vand_vv/test.S: -------------------------------------------------------------------------------- 1 | .text 2 | .global and_data_vec_8 3 | # void and_data_vec_8(int8_t *dest_data, int8_t *src1, int8_t *src2, int data_num); 4 | # a0=dest, a1=src1, a2=src2, a3=n 5 | # 6 | and_data_vec_8: 7 | // mv a3, a0 # Copy destination 8 | .loop_8: 9 | vsetvli t0, a3, e8,m1 # Vectors of 8b 10 | vle8.v v0, (a1) # Load bytes 11 | add a1, a1, t0 # Bump pointer 12 | vle8.v v1, (a2) # Load bytes 13 | add a2, a2, t0 # Bump pointer 14 | sub a3, a3, t0 # Decrement count 15 | vand.vv v2, v1, v0 # Vector Sub 16 | vse8.v v2, (a0) # Store bytes 17 | add a0, a0, t0 # Bump pointer 18 | bnez a3, .loop_8 # Any more? 19 | ret # Return 20 | 21 | .text 22 | .global and_data_vec_16 23 | # void and_data_vec_16(int16_t *dest_data, int16_t *src1, int16_t *src2, int data_num); 24 | # a0=dest, a1=src1, a2=src2, a3=n 25 | # 26 | and_data_vec_16: 27 | .loop_16: 28 | vsetvli t0, a3, e16,m1 # Vectors of 8b 29 | slli t1, t0, 1 # shift 1-bit 30 | vle16.v v0, (a1) # Load bytes 31 | add a1, a1, t1 # Bump pointer 32 | vle16.v v1, (a2) # Load bytes 33 | add a2, a2, t1 # Bump pointer 34 | sub a3, a3, t0 # Decrement count 35 | vand.vv v2, v1, v0 # Vector Sub 36 | vse16.v v2, (a0) # Store bytes 37 | add a0, a0, t1 # Bump pointer 38 | bnez a3, .loop_16 # Any more? 39 | ret # Return 40 | 41 | 42 | .text 43 | .global and_data_vec_32 44 | # void and_data_vec_32(int32_t *dest_data, int32_t *src1, int32_t *src2, int data_num); 45 | # a0=dest, a1=src1, a2=src2, a3=n 46 | # 47 | and_data_vec_32: 48 | .loop_32: 49 | vsetvli t0, a3, e32,m1 # Vectors of 8b 50 | slli t1, t0, 2 # shift 2-bit 51 | vle32.v v0, (a1) # Load bytes 52 | add a1, a1, t1 # Bump pointer 53 | vle32.v v1, (a2) # Load bytes 54 | add a2, a2, t1 # Bump pointer 55 | sub a3, a3, t0 # Decrement count 56 | vand.vv v2, v1, v0 # Vector Sub 57 | vse32.v v2, (a0) # Store bytes 58 | add a0, a0, t1 # Bump pointer 59 | bnez a3, .loop_32 # Any more? 60 | ret # Return 61 | 62 | 63 | .text 64 | .global and_data_vec_64 65 | # void and_data_vec_64(int64_t *dest_data, int64_t *src1, int64_t *src2, int data_num); 66 | # a0=dest, a1=src1, a2=src2, a3=n 67 | # 68 | and_data_vec_64: 69 | .loop_64: 70 | vsetvli t0, a3, e64,m1 # Vectors of 8b 71 | slli t1, t0, 3 # shift 3-bit 72 | vle64.v v0, (a1) # Load bytes 73 | add a1, a1, t1 # Bump pointer 74 | vle64.v v1, (a2) # Load bytes 75 | add a2, a2, t1 # Bump pointer 76 | sub a3, a3, t0 # Decrement count 77 | vand.vv v2, v1, v0 # Vector Sub 78 | vse64.v v2, (a0) # Store bytes 79 | add a0, a0, t1 # Bump pointer 80 | bnez a3, .loop_64 # Any more? 81 | ret # Return 82 | -------------------------------------------------------------------------------- /vmax_vv/test.S: -------------------------------------------------------------------------------- 1 | .text 2 | .global max_data_vec_8 3 | # void max_data_vec_8(int8_t *dest_data, int8_t *src1, int8_t *src2, int data_num); 4 | # a0=dest, a1=src1, a2=src2, a3=n 5 | # 6 | max_data_vec_8: 7 | // mv a3, a0 # Copy destination 8 | .loop_8: 9 | vsetvli t0, a3, e8,m1 # Vectors of 8b 10 | vle8.v v0, (a1) # Load bytes 11 | add a1, a1, t0 # Bump pointer 12 | vle8.v v1, (a2) # Load bytes 13 | add a2, a2, t0 # Bump pointer 14 | sub a3, a3, t0 # Decrement count 15 | vmax.vv v2, v1, v0 # Vector Sub 16 | vse8.v v2, (a0) # Store bytes 17 | add a0, a0, t0 # Bump pointer 18 | bnez a3, .loop_8 # Any more? 19 | ret # Return 20 | 21 | .text 22 | .global max_data_vec_16 23 | # void max_data_vec_16(int16_t *dest_data, int16_t *src1, int16_t *src2, int data_num); 24 | # a0=dest, a1=src1, a2=src2, a3=n 25 | # 26 | max_data_vec_16: 27 | .loop_16: 28 | vsetvli t0, a3, e16,m1 # Vectors of 8b 29 | slli t1, t0, 1 # shift 1-bit 30 | vle16.v v0, (a1) # Load bytes 31 | add a1, a1, t1 # Bump pointer 32 | vle16.v v1, (a2) # Load bytes 33 | add a2, a2, t1 # Bump pointer 34 | sub a3, a3, t0 # Decrement count 35 | vmax.vv v2, v1, v0 # Vector Sub 36 | vse16.v v2, (a0) # Store bytes 37 | add a0, a0, t1 # Bump pointer 38 | bnez a3, .loop_16 # Any more? 39 | ret # Return 40 | 41 | 42 | .text 43 | .global max_data_vec_32 44 | # void max_data_vec_32(int32_t *dest_data, int32_t *src1, int32_t *src2, int data_num); 45 | # a0=dest, a1=src1, a2=src2, a3=n 46 | # 47 | max_data_vec_32: 48 | .loop_32: 49 | vsetvli t0, a3, e32,m1 # Vectors of 8b 50 | slli t1, t0, 2 # shift 2-bit 51 | vle32.v v0, (a1) # Load bytes 52 | add a1, a1, t1 # Bump pointer 53 | vle32.v v1, (a2) # Load bytes 54 | add a2, a2, t1 # Bump pointer 55 | sub a3, a3, t0 # Decrement count 56 | vmax.vv v2, v1, v0 # Vector Sub 57 | vse32.v v2, (a0) # Store bytes 58 | add a0, a0, t1 # Bump pointer 59 | bnez a3, .loop_32 # Any more? 60 | ret # Return 61 | 62 | 63 | .text 64 | .global max_data_vec_64 65 | # void max_data_vec_64(int64_t *dest_data, int64_t *src1, int64_t *src2, int data_num); 66 | # a0=dest, a1=src1, a2=src2, a3=n 67 | # 68 | max_data_vec_64: 69 | .loop_64: 70 | vsetvli t0, a3, e64,m1 # Vectors of 8b 71 | slli t1, t0, 3 # shift 3-bit 72 | vle64.v v0, (a1) # Load bytes 73 | add a1, a1, t1 # Bump pointer 74 | vle64.v v1, (a2) # Load bytes 75 | add a2, a2, t1 # Bump pointer 76 | sub a3, a3, t0 # Decrement count 77 | vmax.vv v2, v1, v0 # Vector Sub 78 | vse64.v v2, (a0) # Store bytes 79 | add a0, a0, t1 # Bump pointer 80 | bnez a3, .loop_64 # Any more? 81 | ret # Return 82 | -------------------------------------------------------------------------------- /vmin_vv/test.S: -------------------------------------------------------------------------------- 1 | .text 2 | .global min_data_vec_8 3 | # void min_data_vec_8(int8_t *dest_data, int8_t *src1, int8_t *src2, int data_num); 4 | # a0=dest, a1=src1, a2=src2, a3=n 5 | # 6 | min_data_vec_8: 7 | // mv a3, a0 # Copy destination 8 | .loop_8: 9 | vsetvli t0, a3, e8,m1 # Vectors of 8b 10 | vle8.v v0, (a1) # Load bytes 11 | add a1, a1, t0 # Bump pointer 12 | vle8.v v1, (a2) # Load bytes 13 | add a2, a2, t0 # Bump pointer 14 | sub a3, a3, t0 # Decrement count 15 | vmin.vv v2, v1, v0 # Vector Sub 16 | vse8.v v2, (a0) # Store bytes 17 | add a0, a0, t0 # Bump pointer 18 | bnez a3, .loop_8 # Any more? 19 | ret # Return 20 | 21 | .text 22 | .global min_data_vec_16 23 | # void min_data_vec_16(int16_t *dest_data, int16_t *src1, int16_t *src2, int data_num); 24 | # a0=dest, a1=src1, a2=src2, a3=n 25 | # 26 | min_data_vec_16: 27 | .loop_16: 28 | vsetvli t0, a3, e16,m1 # Vectors of 8b 29 | slli t1, t0, 1 # shift 1-bit 30 | vle16.v v0, (a1) # Load bytes 31 | add a1, a1, t1 # Bump pointer 32 | vle16.v v1, (a2) # Load bytes 33 | add a2, a2, t1 # Bump pointer 34 | sub a3, a3, t0 # Decrement count 35 | vmin.vv v2, v1, v0 # Vector Sub 36 | vse16.v v2, (a0) # Store bytes 37 | add a0, a0, t1 # Bump pointer 38 | bnez a3, .loop_16 # Any more? 39 | ret # Return 40 | 41 | 42 | .text 43 | .global min_data_vec_32 44 | # void min_data_vec_32(int32_t *dest_data, int32_t *src1, int32_t *src2, int data_num); 45 | # a0=dest, a1=src1, a2=src2, a3=n 46 | # 47 | min_data_vec_32: 48 | .loop_32: 49 | vsetvli t0, a3, e32,m1 # Vectors of 8b 50 | slli t1, t0, 2 # shift 2-bit 51 | vle32.v v0, (a1) # Load bytes 52 | add a1, a1, t1 # Bump pointer 53 | vle32.v v1, (a2) # Load bytes 54 | add a2, a2, t1 # Bump pointer 55 | sub a3, a3, t0 # Decrement count 56 | vmin.vv v2, v1, v0 # Vector Sub 57 | vse32.v v2, (a0) # Store bytes 58 | add a0, a0, t1 # Bump pointer 59 | bnez a3, .loop_32 # Any more? 60 | ret # Return 61 | 62 | 63 | .text 64 | .global min_data_vec_64 65 | # void min_data_vec_64(int64_t *dest_data, int64_t *src1, int64_t *src2, int data_num); 66 | # a0=dest, a1=src1, a2=src2, a3=n 67 | # 68 | min_data_vec_64: 69 | .loop_64: 70 | vsetvli t0, a3, e64,m1 # Vectors of 8b 71 | slli t1, t0, 3 # shift 3-bit 72 | vle64.v v0, (a1) # Load bytes 73 | add a1, a1, t1 # Bump pointer 74 | vle64.v v1, (a2) # Load bytes 75 | add a2, a2, t1 # Bump pointer 76 | sub a3, a3, t0 # Decrement count 77 | vmin.vv v2, v1, v0 # Vector Sub 78 | vse64.v v2, (a0) # Store bytes 79 | add a0, a0, t1 # Bump pointer 80 | bnez a3, .loop_64 # Any more? 81 | ret # Return 82 | -------------------------------------------------------------------------------- /vsub_vv/test.S: -------------------------------------------------------------------------------- 1 | .text 2 | .global sub_data_vec_8 3 | # void sub_data_vec_8(int8_t *dest_data, int8_t *src1, int8_t *src2, int data_num); 4 | # a0=dest, a1=src1, a2=src2, a3=n 5 | # 6 | sub_data_vec_8: 7 | // mv a3, a0 # Copy destination 8 | .loop_8: 9 | vsetvli t0, a3, e8,m1 # Vectors of 8b 10 | vle8.v v0, (a1) # Load bytes 11 | add a1, a1, t0 # Bump pointer 12 | vle8.v v1, (a2) # Load bytes 13 | add a2, a2, t0 # Bump pointer 14 | sub a3, a3, t0 # Decrement count 15 | vsub.vv v2, v1, v0 # Vector Sub 16 | vse8.v v2, (a0) # Store bytes 17 | add a0, a0, t0 # Bump pointer 18 | bnez a3, .loop_8 # Any more? 19 | ret # Return 20 | 21 | .text 22 | .global sub_data_vec_16 23 | # void sub_data_vec_16(int16_t *dest_data, int16_t *src1, int16_t *src2, int data_num); 24 | # a0=dest, a1=src1, a2=src2, a3=n 25 | # 26 | sub_data_vec_16: 27 | .loop_16: 28 | vsetvli t0, a3, e16,m1 # Vectors of 8b 29 | slli t1, t0, 1 # shift 1-bit 30 | vle16.v v0, (a1) # Load bytes 31 | add a1, a1, t1 # Bump pointer 32 | vle16.v v1, (a2) # Load bytes 33 | add a2, a2, t1 # Bump pointer 34 | sub a3, a3, t0 # Decrement count 35 | vsub.vv v2, v1, v0 # Vector Sub 36 | vse16.v v2, (a0) # Store bytes 37 | add a0, a0, t1 # Bump pointer 38 | bnez a3, .loop_16 # Any more? 39 | ret # Return 40 | 41 | 42 | .text 43 | .global sub_data_vec_32 44 | # void sub_data_vec_32(int32_t *dest_data, int32_t *src1, int32_t *src2, int data_num); 45 | # a0=dest, a1=src1, a2=src2, a3=n 46 | # 47 | sub_data_vec_32: 48 | .loop_32: 49 | vsetvli t0, a3, e32,m1 # Vectors of 8b 50 | slli t1, t0, 2 # shift 2-bit 51 | vle32.v v0, (a1) # Load bytes 52 | add a1, a1, t1 # Bump pointer 53 | vle32.v v1, (a2) # Load bytes 54 | add a2, a2, t1 # Bump pointer 55 | sub a3, a3, t0 # Decrement count 56 | vsub.vv v2, v1, v0 # Vector Sub 57 | vse32.v v2, (a0) # Store bytes 58 | add a0, a0, t1 # Bump pointer 59 | bnez a3, .loop_32 # Any more? 60 | ret # Return 61 | 62 | 63 | .text 64 | .global sub_data_vec_64 65 | # void sub_data_vec_64(int64_t *dest_data, int64_t *src1, int64_t *src2, int data_num); 66 | # a0=dest, a1=src1, a2=src2, a3=n 67 | # 68 | sub_data_vec_64: 69 | .loop_64: 70 | vsetvli t0, a3, e64,m1 # Vectors of 8b 71 | slli t1, t0, 3 # shift 3-bit 72 | vle64.v v0, (a1) # Load bytes 73 | add a1, a1, t1 # Bump pointer 74 | vle64.v v1, (a2) # Load bytes 75 | add a2, a2, t1 # Bump pointer 76 | sub a3, a3, t0 # Decrement count 77 | vsub.vv v2, v1, v0 # Vector Sub 78 | vse64.v v2, (a0) # Store bytes 79 | add a0, a0, t1 # Bump pointer 80 | bnez a3, .loop_64 # Any more? 81 | ret # Return 82 | -------------------------------------------------------------------------------- /vxor_vv/test.S: -------------------------------------------------------------------------------- 1 | .text 2 | .global xor_data_vec_8 3 | # void xor_data_vec_8(int8_t *dest_data, int8_t *src1, int8_t *src2, int data_num); 4 | # a0=dest, a1=src1, a2=src2, a3=n 5 | # 6 | xor_data_vec_8: 7 | // mv a3, a0 # Copy destination 8 | .loop_8: 9 | vsetvli t0, a3, e8,m1 # Vectors of 8b 10 | vle8.v v0, (a1) # Load bytes 11 | add a1, a1, t0 # Bump pointer 12 | vle8.v v1, (a2) # Load bytes 13 | add a2, a2, t0 # Bump pointer 14 | sub a3, a3, t0 # Decrement count 15 | vxor.vv v2, v1, v0 # Vector Sub 16 | vse8.v v2, (a0) # Store bytes 17 | add a0, a0, t0 # Bump pointer 18 | bnez a3, .loop_8 # Any more? 19 | ret # Return 20 | 21 | .text 22 | .global xor_data_vec_16 23 | # void xor_data_vec_16(int16_t *dest_data, int16_t *src1, int16_t *src2, int data_num); 24 | # a0=dest, a1=src1, a2=src2, a3=n 25 | # 26 | xor_data_vec_16: 27 | .loop_16: 28 | vsetvli t0, a3, e16,m1 # Vectors of 8b 29 | slli t1, t0, 1 # shift 1-bit 30 | vle16.v v0, (a1) # Load bytes 31 | add a1, a1, t1 # Bump pointer 32 | vle16.v v1, (a2) # Load bytes 33 | add a2, a2, t1 # Bump pointer 34 | sub a3, a3, t0 # Decrement count 35 | vxor.vv v2, v1, v0 # Vector Sub 36 | vse16.v v2, (a0) # Store bytes 37 | add a0, a0, t1 # Bump pointer 38 | bnez a3, .loop_16 # Any more? 39 | ret # Return 40 | 41 | 42 | .text 43 | .global xor_data_vec_32 44 | # void xor_data_vec_32(int32_t *dest_data, int32_t *src1, int32_t *src2, int data_num); 45 | # a0=dest, a1=src1, a2=src2, a3=n 46 | # 47 | xor_data_vec_32: 48 | .loop_32: 49 | vsetvli t0, a3, e32,m1 # Vectors of 8b 50 | slli t1, t0, 2 # shift 2-bit 51 | vle32.v v0, (a1) # Load bytes 52 | add a1, a1, t1 # Bump pointer 53 | vle32.v v1, (a2) # Load bytes 54 | add a2, a2, t1 # Bump pointer 55 | sub a3, a3, t0 # Decrement count 56 | vxor.vv v2, v1, v0 # Vector Sub 57 | vse32.v v2, (a0) # Store bytes 58 | add a0, a0, t1 # Bump pointer 59 | bnez a3, .loop_32 # Any more? 60 | ret # Return 61 | 62 | 63 | .text 64 | .global xor_data_vec_64 65 | # void xor_data_vec_64(int64_t *dest_data, int64_t *src1, int64_t *src2, int data_num); 66 | # a0=dest, a1=src1, a2=src2, a3=n 67 | # 68 | xor_data_vec_64: 69 | .loop_64: 70 | vsetvli t0, a3, e64,m1 # Vectors of 8b 71 | slli t1, t0, 3 # shift 3-bit 72 | vle64.v v0, (a1) # Load bytes 73 | add a1, a1, t1 # Bump pointer 74 | vle64.v v1, (a2) # Load bytes 75 | add a2, a2, t1 # Bump pointer 76 | sub a3, a3, t0 # Decrement count 77 | vxor.vv v2, v1, v0 # Vector Sub 78 | vse64.v v2, (a0) # Store bytes 79 | add a0, a0, t1 # Bump pointer 80 | bnez a3, .loop_64 # Any more? 81 | ret # Return 82 | -------------------------------------------------------------------------------- /vmaxu_vv/test.S: -------------------------------------------------------------------------------- 1 | .text 2 | .global maxu_data_vec_8 3 | # void maxu_data_vec_8(int8_t *dest_data, int8_t *src1, int8_t *src2, int data_num); 4 | # a0=dest, a1=src1, a2=src2, a3=n 5 | # 6 | maxu_data_vec_8: 7 | // mv a3, a0 # Copy destination 8 | .loop_8: 9 | vsetvli t0, a3, e8,m1 # Vectors of 8b 10 | vle8.v v0, (a1) # Load bytes 11 | add a1, a1, t0 # Bump pointer 12 | vle8.v v1, (a2) # Load bytes 13 | add a2, a2, t0 # Bump pointer 14 | sub a3, a3, t0 # Decrement count 15 | vmaxu.vv v2, v1, v0 # Vector Sub 16 | vse8.v v2, (a0) # Store bytes 17 | add a0, a0, t0 # Bump pointer 18 | bnez a3, .loop_8 # Any more? 19 | ret # Return 20 | 21 | .text 22 | .global maxu_data_vec_16 23 | # void maxu_data_vec_16(int16_t *dest_data, int16_t *src1, int16_t *src2, int data_num); 24 | # a0=dest, a1=src1, a2=src2, a3=n 25 | # 26 | maxu_data_vec_16: 27 | .loop_16: 28 | vsetvli t0, a3, e16,m1 # Vectors of 8b 29 | slli t1, t0, 1 # shift 1-bit 30 | vle16.v v0, (a1) # Load bytes 31 | add a1, a1, t1 # Bump pointer 32 | vle16.v v1, (a2) # Load bytes 33 | add a2, a2, t1 # Bump pointer 34 | sub a3, a3, t0 # Decrement count 35 | vmaxu.vv v2, v1, v0 # Vector Sub 36 | vse16.v v2, (a0) # Store bytes 37 | add a0, a0, t1 # Bump pointer 38 | bnez a3, .loop_16 # Any more? 39 | ret # Return 40 | 41 | 42 | .text 43 | .global maxu_data_vec_32 44 | # void maxu_data_vec_32(int32_t *dest_data, int32_t *src1, int32_t *src2, int data_num); 45 | # a0=dest, a1=src1, a2=src2, a3=n 46 | # 47 | maxu_data_vec_32: 48 | .loop_32: 49 | vsetvli t0, a3, e32,m1 # Vectors of 8b 50 | slli t1, t0, 2 # shift 2-bit 51 | vle32.v v0, (a1) # Load bytes 52 | add a1, a1, t1 # Bump pointer 53 | vle32.v v1, (a2) # Load bytes 54 | add a2, a2, t1 # Bump pointer 55 | sub a3, a3, t0 # Decrement count 56 | vmaxu.vv v2, v1, v0 # Vector Sub 57 | vse32.v v2, (a0) # Store bytes 58 | add a0, a0, t1 # Bump pointer 59 | bnez a3, .loop_32 # Any more? 60 | ret # Return 61 | 62 | 63 | .text 64 | .global maxu_data_vec_64 65 | # void maxu_data_vec_64(int64_t *dest_data, int64_t *src1, int64_t *src2, int data_num); 66 | # a0=dest, a1=src1, a2=src2, a3=n 67 | # 68 | maxu_data_vec_64: 69 | .loop_64: 70 | vsetvli t0, a3, e64,m1 # Vectors of 8b 71 | slli t1, t0, 3 # shift 3-bit 72 | vle64.v v0, (a1) # Load bytes 73 | add a1, a1, t1 # Bump pointer 74 | vle64.v v1, (a2) # Load bytes 75 | add a2, a2, t1 # Bump pointer 76 | sub a3, a3, t0 # Decrement count 77 | vmaxu.vv v2, v1, v0 # Vector Sub 78 | vse64.v v2, (a0) # Store bytes 79 | add a0, a0, t1 # Bump pointer 80 | bnez a3, .loop_64 # Any more? 81 | ret # Return 82 | -------------------------------------------------------------------------------- /vminu_vv/test.S: -------------------------------------------------------------------------------- 1 | .text 2 | .global minu_data_vec_8 3 | # void minu_data_vec_8(int8_t *dest_data, int8_t *src1, int8_t *src2, int data_num); 4 | # a0=dest, a1=src1, a2=src2, a3=n 5 | # 6 | minu_data_vec_8: 7 | // mv a3, a0 # Copy destination 8 | .loop_8: 9 | vsetvli t0, a3, e8,m1 # Vectors of 8b 10 | vle8.v v0, (a1) # Load bytes 11 | add a1, a1, t0 # Bump pointer 12 | vle8.v v1, (a2) # Load bytes 13 | add a2, a2, t0 # Bump pointer 14 | sub a3, a3, t0 # Decrement count 15 | vminu.vv v2, v1, v0 # Vector Sub 16 | vse8.v v2, (a0) # Store bytes 17 | add a0, a0, t0 # Bump pointer 18 | bnez a3, .loop_8 # Any more? 19 | ret # Return 20 | 21 | .text 22 | .global minu_data_vec_16 23 | # void minu_data_vec_16(int16_t *dest_data, int16_t *src1, int16_t *src2, int data_num); 24 | # a0=dest, a1=src1, a2=src2, a3=n 25 | # 26 | minu_data_vec_16: 27 | .loop_16: 28 | vsetvli t0, a3, e16,m1 # Vectors of 8b 29 | slli t1, t0, 1 # shift 1-bit 30 | vle16.v v0, (a1) # Load bytes 31 | add a1, a1, t1 # Bump pointer 32 | vle16.v v1, (a2) # Load bytes 33 | add a2, a2, t1 # Bump pointer 34 | sub a3, a3, t0 # Decrement count 35 | vminu.vv v2, v1, v0 # Vector Sub 36 | vse16.v v2, (a0) # Store bytes 37 | add a0, a0, t1 # Bump pointer 38 | bnez a3, .loop_16 # Any more? 39 | ret # Return 40 | 41 | 42 | .text 43 | .global minu_data_vec_32 44 | # void minu_data_vec_32(int32_t *dest_data, int32_t *src1, int32_t *src2, int data_num); 45 | # a0=dest, a1=src1, a2=src2, a3=n 46 | # 47 | minu_data_vec_32: 48 | .loop_32: 49 | vsetvli t0, a3, e32,m1 # Vectors of 8b 50 | slli t1, t0, 2 # shift 2-bit 51 | vle32.v v0, (a1) # Load bytes 52 | add a1, a1, t1 # Bump pointer 53 | vle32.v v1, (a2) # Load bytes 54 | add a2, a2, t1 # Bump pointer 55 | sub a3, a3, t0 # Decrement count 56 | vminu.vv v2, v1, v0 # Vector Sub 57 | vse32.v v2, (a0) # Store bytes 58 | add a0, a0, t1 # Bump pointer 59 | bnez a3, .loop_32 # Any more? 60 | ret # Return 61 | 62 | 63 | .text 64 | .global minu_data_vec_64 65 | # void minu_data_vec_64(int64_t *dest_data, int64_t *src1, int64_t *src2, int data_num); 66 | # a0=dest, a1=src1, a2=src2, a3=n 67 | # 68 | minu_data_vec_64: 69 | .loop_64: 70 | vsetvli t0, a3, e64,m1 # Vectors of 8b 71 | slli t1, t0, 3 # shift 3-bit 72 | vle64.v v0, (a1) # Load bytes 73 | add a1, a1, t1 # Bump pointer 74 | vle64.v v1, (a2) # Load bytes 75 | add a2, a2, t1 # Bump pointer 76 | sub a3, a3, t0 # Decrement count 77 | vminu.vv v2, v1, v0 # Vector Sub 78 | vse64.v v2, (a0) # Store bytes 79 | add a0, a0, t1 # Bump pointer 80 | bnez a3, .loop_64 # Any more? 81 | ret # Return 82 | -------------------------------------------------------------------------------- /vlse32_v/main.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | 4 | void copy_data_ustride_vec(int32_t *dest_data, int32_t *source_data, int stride, int data_num); 5 | 6 | #include "data.h" 7 | 8 | int32_t vec_data[DATA_NUM] = {0}; 9 | int32_t scalar_data[DATA_NUM] = {0}; 10 | 11 | void format_array() 12 | { 13 | for (int i = 0; i < DATA_NUM; i++) { 14 | vec_data[i] = 0; 15 | scalar_data[i] = 0; 16 | } 17 | } 18 | 19 | 20 | int check_data (const int64_t *vec_data, const int64_t *scalar_data, const int data_num) 21 | { 22 | for(int i = 0; i < data_num; i++) { 23 | if(vec_data[i] != scalar_data[i]) { 24 | return i + 1; 25 | } 26 | } 27 | return 0; 28 | } 29 | 30 | 31 | void copy_data_scalar(int32_t *dest_data, int32_t *source_data, const int data_num) 32 | { 33 | for (int i = 0; i < data_num; i++) { 34 | dest_data[i] = source_data[i]; 35 | } 36 | } 37 | 38 | 39 | void copy_data_ustride_scalar(int32_t *dest_data, int32_t *source_data, int32_t stride, const int data_num) 40 | { 41 | for (int i = 0; i < data_num; i++) { 42 | dest_data[i] = source_data[i*stride]; 43 | } 44 | } 45 | 46 | 47 | int test_ustride(int stride); 48 | int test_ustride_minus(int stride); 49 | 50 | int main() 51 | { 52 | int result = 0; 53 | if ((result = test_ustride(4)) != 0) { 54 | return result << 8 | 1; 55 | } 56 | if ((result = test_ustride(8)) != 0) { 57 | return (result << 8) | 2; 58 | } 59 | if ((result = test_ustride_minus(-4)) != 0) { 60 | return (result << 8) | 4; 61 | } 62 | if ((result = test_ustride_minus(-8)) != 0) { 63 | return (result << 8) | 5; 64 | } 65 | 66 | return 0; 67 | } 68 | 69 | 70 | int test_ustride(int stride) 71 | { 72 | const size_t target_size = sizeof(int32_t); 73 | format_array(); 74 | 75 | const int data_num = (DATA_NUM / 4) * sizeof(int32_t) / target_size; 76 | copy_data_ustride_vec((int32_t *)vec_data, 77 | (int32_t *)source_data, 78 | stride, data_num); 79 | copy_data_ustride_scalar ((int32_t *)scalar_data, 80 | (int32_t *)source_data, 81 | stride / sizeof(int32_t), data_num); 82 | 83 | return check_data((int64_t *)vec_data, (int64_t *)scalar_data, DATA_NUM * sizeof(int32_t) / sizeof(int64_t)); 84 | } 85 | 86 | 87 | int test_ustride_minus(int stride) 88 | { 89 | format_array(); 90 | 91 | const size_t target_size = sizeof(int32_t); 92 | const int data_num = (DATA_NUM / 4) * sizeof(int32_t) / target_size; 93 | copy_data_ustride_vec ((int32_t *)vec_data, 94 | (int32_t *)(source_data+DATA_NUM-1), 95 | stride, data_num); 96 | copy_data_ustride_scalar ((int32_t *)scalar_data, 97 | (int32_t *)(source_data+DATA_NUM-1), 98 | stride / sizeof(int32_t), data_num); 99 | 100 | return check_data((int64_t *)vec_data, (int64_t *)scalar_data, DATA_NUM * sizeof(int32_t) / sizeof(int64_t)); 101 | } 102 | -------------------------------------------------------------------------------- /vlse16_v/main.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | 4 | void copy_data_ustride_vec(int16_t *dest_data, int16_t *source_data, int stride, int data_num); 5 | 6 | #include "data.h" 7 | 8 | int32_t vec_data[DATA_NUM] = {0}; 9 | int32_t scalar_data[DATA_NUM] = {0}; 10 | 11 | void format_array() 12 | { 13 | for (int i = 0; i < DATA_NUM; i++) { 14 | vec_data[i] = 0; 15 | scalar_data[i] = 0; 16 | } 17 | } 18 | 19 | 20 | int check_data (const int64_t *vec_data, const int64_t *scalar_data, const int data_num) 21 | { 22 | for(int i = 0; i < data_num; i++) { 23 | if(vec_data[i] != scalar_data[i]) { 24 | return i + 1; 25 | } 26 | } 27 | return 0; 28 | } 29 | 30 | 31 | void copy_data_scalar(int16_t *dest_data, int16_t *source_data, const int data_num) 32 | { 33 | for (int i = 0; i < data_num; i++) { 34 | dest_data[i] = source_data[i]; 35 | } 36 | } 37 | 38 | 39 | void copy_data_ustride_scalar(int16_t *dest_data, int16_t *source_data, int16_t stride, const int data_num) 40 | { 41 | for (int i = 0; i < data_num; i++) { 42 | dest_data[i] = source_data[i*stride]; 43 | } 44 | } 45 | 46 | 47 | int test_ustride(int stride); 48 | int test_ustride_minus(int stride); 49 | 50 | int main() 51 | { 52 | int result = 0; 53 | if ((result = test_ustride(2)) != 0) { 54 | return (result << 8) | 1; 55 | } 56 | if ((result = test_ustride(4)) != 0) { 57 | return (result << 8) | 2; 58 | } 59 | if ((result = test_ustride(6)) != 0) { 60 | return (result << 8) | 3; 61 | } 62 | if ((result = test_ustride_minus(-2)) != 0) { 63 | return (result << 8) | 4; 64 | } 65 | if ((result = test_ustride_minus(-4)) != 0) { 66 | return (result << 8) | 5; 67 | } 68 | if ((result = test_ustride_minus(-6)) != 0) { 69 | return (result << 8) | 6; 70 | } 71 | 72 | return 0; 73 | } 74 | 75 | 76 | int test_ustride(int stride) 77 | { 78 | format_array(); 79 | const size_t target_size = sizeof(int16_t); 80 | 81 | const int data_num = (DATA_NUM / 4) * sizeof(int32_t) / target_size; 82 | copy_data_ustride_vec ((int16_t *)vec_data, 83 | (int16_t *)source_data, 84 | stride, data_num); 85 | copy_data_ustride_scalar ((int16_t *)scalar_data, 86 | (int16_t *)source_data, 87 | stride * target_size / sizeof(int32_t), data_num); 88 | 89 | return check_data((int64_t *)vec_data, (int64_t *)scalar_data, DATA_NUM * sizeof(int32_t) / sizeof(int64_t)); 90 | } 91 | 92 | 93 | int test_ustride_minus(int stride) 94 | { 95 | format_array(); 96 | const size_t target_size = sizeof(int16_t); 97 | 98 | const int data_num = (DATA_NUM / 4) * sizeof(int32_t) / target_size; 99 | copy_data_ustride_vec ((int16_t *)vec_data, 100 | (int16_t *)(source_data+DATA_NUM-1), 101 | stride, data_num); 102 | copy_data_ustride_scalar ((int16_t *)scalar_data, 103 | (int16_t *)(source_data+DATA_NUM-1), 104 | stride * target_size / sizeof(int32_t), data_num); 105 | 106 | return check_data((int64_t *)vec_data, (int64_t *)scalar_data, DATA_NUM * sizeof(int32_t) / sizeof(int64_t)); 107 | } 108 | -------------------------------------------------------------------------------- /memcpy_ve8/data.h: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | #define DATA_NUM 256 4 | 5 | int32_t mask_data[DATA_NUM/32] = { 6 | 0x01234567, 0x89abcdef, 0x11112222, 0x33334444, 7 | 0x55556666, 0x77778888, 0x9999aaaa, 0xbbbbcccc 8 | }; 9 | 10 | 11 | int32_t source_data[DATA_NUM] = { 12 | 845150577, -965358962, -733826200, 224278040, 13 | -1657493282, 1884651197, -1369202253, -379841765, 14 | 767247286, 1718024944, 759098771, -1236415500, 15 | 460355761, 1171477725, -346432824, -303215321, 16 | 565714941, 624152115, -344452799, -1013465950, 17 | -1272607770, -1171761022, 951998449, -36998345, 18 | -914471897, 709861964, -210539865, 1009009779, 19 | -538688058, -1778531594, -699396714, -823118797, 20 | -1648297241, -2121926270, 1517466862, 1021658420, 21 | 1238577922, -1815939784, 1753398281, -759867277, 22 | -16028396, 622572017, -1715960166, 1132347809, 23 | 1777017290, 1386168492, -1496084766, -358265870, 24 | 448237830, 795306506, 908143501, 1557476530, 25 | -1882669463, 1092796523, -2031257413, -1555773029, 26 | -1302500909, -920726685, -1426461739, -1361140622, 27 | 520222723, -389071842, 1382138763, 1088759247, 28 | 1093731105, 1925047966, -1769451399, 1518648831, 29 | 1613231296, -1994577528, -737393311, 738966111, 30 | 1817162107, 495870949, -980359630, -1405807109, 31 | -2007742454, -1761881285, -128771519, -1895405862, 32 | 559773760, -183387373, -190142599, -902791932, 33 | -836838803, -1058243087, -1235670498, -801040554, 34 | -1987496673, 1370846164, -1776444254, 1190247722, 35 | 318418821, 1105589803, 1759358678, -91769151, 36 | 2138760432, 1573796835, -824086695, -805056712, 37 | 1381898821, 565355494, 2131837501, -1202937090, 38 | -1256485559, -1497213917, 2124694637, -603507295, 39 | -293903795, 1125664553, 1767159852, -1267456837, 40 | -661357315, -404155922, 367696568, -368856006, 41 | -800229006, -1271186190, -1624674902, 973147957, 42 | 279578680, -789240866, -159810466, -1118623436, 43 | -633826703, -115457815, 508111455, -1751491452, 44 | 301553052, -2044979379, 195526192, 443913498, 45 | 156733816, -1260176354, 826786547, 800994978, 46 | -1850549756, 1894285345, -1977641056, -962236284, 47 | -1480470188, 226551980, 1327565952, 55457195, 48 | -1087159523, 1488800072, -1470554508, -2030062734, 49 | -933841272, -457808705, 253199437, -1164282989, 50 | 1512276620, -223348404, -1305549715, -578132757, 51 | -228159608, -226613390, 2141321242, -1938367055, 52 | -2021335574, 739862560, -275592170, -1857842651, 53 | 1206586507, 1851919195, 1468813467, -722797819, 54 | -390809785, 632134341, 858289338, 1742920731, 55 | -1945045715, 1569698070, -109148104, 373542237, 56 | -398377473, -466435623, 1579985503, -672096266, 57 | -2062323921, -1631182832, 1885558690, 1058504920, 58 | 7906845, 182233337, -506859644, 1248099571, 59 | 1585093376, 26173252, 1831114947, -1406284842, 60 | 2054213854, -1376909123, 211729051, -565762513, 61 | -461644592, 538541677, -1891045481, 1057511104, 62 | -551738917, -195893163, -128136105, 111106852, 63 | -1151834040, 1066936519, 2047965660, -1133353279, 64 | 842937697, 89755761, -1969340709, -1426862250, 65 | 468776469, 1630899612, -722166927, 1183133783, 66 | 1698700290, 1646455279, -580202890, -1328425178, 67 | -371094220, -131491363, 1591382416, 217146116, 68 | -839080209, 1358207911, 691808298, -2092618385, 69 | 305539498, -244436152, 628400348, -1107083434, 70 | 1067254223, 2113473530, -854396720, 1765431315, 71 | -872168674, 738084812, 515310458, -247686862, 72 | 1940962032, 1304650770, 906480350, 1659947879, 73 | 1632875396, 2114340654, 205762117, -1434625092, 74 | -687673392, 1301427614, -1293742789, -100581323, 75 | -712745814, 605822142, -990957746, -100846050 76 | }; 77 | -------------------------------------------------------------------------------- /vlse16_v/data.h: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | #define DATA_NUM 256 4 | 5 | int32_t mask_data[DATA_NUM/32] = { 6 | 0x01234567, 0x89abcdef, 0x11112222, 0x33334444, 7 | 0x55556666, 0x77778888, 0x9999aaaa, 0xbbbbcccc 8 | }; 9 | 10 | 11 | int32_t source_data[DATA_NUM] = { 12 | 845150577, -965358962, -733826200, 224278040, 13 | -1657493282, 1884651197, -1369202253, -379841765, 14 | 767247286, 1718024944, 759098771, -1236415500, 15 | 460355761, 1171477725, -346432824, -303215321, 16 | 565714941, 624152115, -344452799, -1013465950, 17 | -1272607770, -1171761022, 951998449, -36998345, 18 | -914471897, 709861964, -210539865, 1009009779, 19 | -538688058, -1778531594, -699396714, -823118797, 20 | -1648297241, -2121926270, 1517466862, 1021658420, 21 | 1238577922, -1815939784, 1753398281, -759867277, 22 | -16028396, 622572017, -1715960166, 1132347809, 23 | 1777017290, 1386168492, -1496084766, -358265870, 24 | 448237830, 795306506, 908143501, 1557476530, 25 | -1882669463, 1092796523, -2031257413, -1555773029, 26 | -1302500909, -920726685, -1426461739, -1361140622, 27 | 520222723, -389071842, 1382138763, 1088759247, 28 | 1093731105, 1925047966, -1769451399, 1518648831, 29 | 1613231296, -1994577528, -737393311, 738966111, 30 | 1817162107, 495870949, -980359630, -1405807109, 31 | -2007742454, -1761881285, -128771519, -1895405862, 32 | 559773760, -183387373, -190142599, -902791932, 33 | -836838803, -1058243087, -1235670498, -801040554, 34 | -1987496673, 1370846164, -1776444254, 1190247722, 35 | 318418821, 1105589803, 1759358678, -91769151, 36 | 2138760432, 1573796835, -824086695, -805056712, 37 | 1381898821, 565355494, 2131837501, -1202937090, 38 | -1256485559, -1497213917, 2124694637, -603507295, 39 | -293903795, 1125664553, 1767159852, -1267456837, 40 | -661357315, -404155922, 367696568, -368856006, 41 | -800229006, -1271186190, -1624674902, 973147957, 42 | 279578680, -789240866, -159810466, -1118623436, 43 | -633826703, -115457815, 508111455, -1751491452, 44 | 301553052, -2044979379, 195526192, 443913498, 45 | 156733816, -1260176354, 826786547, 800994978, 46 | -1850549756, 1894285345, -1977641056, -962236284, 47 | -1480470188, 226551980, 1327565952, 55457195, 48 | -1087159523, 1488800072, -1470554508, -2030062734, 49 | -933841272, -457808705, 253199437, -1164282989, 50 | 1512276620, -223348404, -1305549715, -578132757, 51 | -228159608, -226613390, 2141321242, -1938367055, 52 | -2021335574, 739862560, -275592170, -1857842651, 53 | 1206586507, 1851919195, 1468813467, -722797819, 54 | -390809785, 632134341, 858289338, 1742920731, 55 | -1945045715, 1569698070, -109148104, 373542237, 56 | -398377473, -466435623, 1579985503, -672096266, 57 | -2062323921, -1631182832, 1885558690, 1058504920, 58 | 7906845, 182233337, -506859644, 1248099571, 59 | 1585093376, 26173252, 1831114947, -1406284842, 60 | 2054213854, -1376909123, 211729051, -565762513, 61 | -461644592, 538541677, -1891045481, 1057511104, 62 | -551738917, -195893163, -128136105, 111106852, 63 | -1151834040, 1066936519, 2047965660, -1133353279, 64 | 842937697, 89755761, -1969340709, -1426862250, 65 | 468776469, 1630899612, -722166927, 1183133783, 66 | 1698700290, 1646455279, -580202890, -1328425178, 67 | -371094220, -131491363, 1591382416, 217146116, 68 | -839080209, 1358207911, 691808298, -2092618385, 69 | 305539498, -244436152, 628400348, -1107083434, 70 | 1067254223, 2113473530, -854396720, 1765431315, 71 | -872168674, 738084812, 515310458, -247686862, 72 | 1940962032, 1304650770, 906480350, 1659947879, 73 | 1632875396, 2114340654, 205762117, -1434625092, 74 | -687673392, 1301427614, -1293742789, -100581323, 75 | -712745814, 605822142, -990957746, -100846050 76 | }; 77 | --------------------------------------------------------------------------------