├── doc ├── vamo-format.adoc ├── Makefile ├── vtype-format.adoc ├── valu-format.adoc └── vmem-format.adoc ├── README.md ├── vstdlib ├── CMakeLists.txt ├── vstdlib.hpp ├── memset.cpp ├── memmove.cpp ├── memcpy.cpp └── memcpy_backward.cpp ├── tests ├── CMakeLists.txt ├── stdlib.cpp ├── unit_tests.cpp └── vector_examples.cpp ├── CMakeLists.txt ├── include └── riscv │ └── ext │ └── v.hpp └── src └── riscv32 └── v.cpp /doc/vamo-format.adoc: -------------------------------------------------------------------------------- 1 | ---- 2 | Format for Vector AMO Instructions under AMO major opcode 3 | 31 27 26 25 24 20 19 15 14 12 11 7 6 0 4 | amoop |wd| vm | vs2 | rs1 | width | vs3/vd |0101111| VAMO* 5 | 5 1 1 5 5 3 5 7 6 | ---- 7 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # RISCV-V V extension simulator 2 | 3 | RISC-V vector extension v0.7 (base) simulator implemented in C++. 4 | 5 | # Requirements 6 | 7 | * CMake >= 3.6 8 | * Boost >= 1.66 9 | 10 | # Building 11 | 12 | ``` 13 | mkdir build 14 | cd build 15 | cmake .. 16 | cmake --build . 17 | 18 | tests/vector_examples 19 | tests/stdlib 20 | ``` 21 | -------------------------------------------------------------------------------- /doc/Makefile: -------------------------------------------------------------------------------- 1 | .PHONY: all docbook clean 2 | 3 | adoc_src:= $(wildcard ./*.adoc) 4 | pdfs:=$(patsubst %.adoc,%.pdf, $(adoc_src)) 5 | xmls:=$(patsubst %.adoc,%.xml, $(adoc_src)) 6 | 7 | $(pdfs):%.pdf:%.adoc 8 | asciidoctor-pdf -o $@ $< 9 | 10 | all: $(pdfs) docbook 11 | 12 | docbook: $(xmls) 13 | 14 | $(xmls):%.xml:%.adoc 15 | asciidoctor -v -b docbook -o $@ $< 16 | 17 | clean: 18 | $(RM) $(pdfs) $(xmls) 19 | -------------------------------------------------------------------------------- /doc/vtype-format.adoc: -------------------------------------------------------------------------------- 1 | 2 | .`vtype` register layout 3 | [cols="2,4,10"] 4 | |=== 5 | | Bits | Name | Description 6 | 7 | | XLEN-1 | vill | Illegal value if set 8 | | XLEN-2:7 | | Reserved (write 0) 9 | | 6:5 | vediv[1:0] | Used by EDIV extension 10 | | 4:2 | vsew[2:0] | Standard element width (SEW) setting 11 | | 1:0 | vlmul[1:0] | Vector register group multiplier (LMUL) setting 12 | |=== 13 | -------------------------------------------------------------------------------- /vstdlib/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | cmake_minimum_required(VERSION 3.0) 2 | 3 | project(vstdlib) 4 | 5 | add_library(vstdlib STATIC 6 | vstdlib.hpp 7 | memcpy.cpp 8 | memset.cpp 9 | memmove.cpp 10 | memcpy_backward.cpp 11 | ) 12 | set_target_properties(vstdlib PROPERTIES FOLDER "Libs") 13 | target_include_directories(vstdlib PUBLIC "${CMAKE_CURRENT_SOURCE_DIR}/..") 14 | target_link_libraries(vstdlib PUBLIC riscv32) 15 | 16 | target_compile_features(vstdlib PUBLIC ${CXX_FEATURES}) 17 | -------------------------------------------------------------------------------- /tests/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | cmake_minimum_required(VERSION 3.0) 2 | 3 | project(Vendetta_tests) 4 | 5 | set(tests 6 | unit_tests 7 | stdlib 8 | vector_examples 9 | ) 10 | 11 | foreach(name ${tests}) 12 | add_executable(${name} "${CMAKE_CURRENT_SOURCE_DIR}/${name}.cpp") 13 | set_target_properties(${name} PROPERTIES FOLDER "tests") 14 | add_test(test_${name} ${name}) 15 | endforeach() 16 | 17 | target_link_libraries(unit_tests riscv32) 18 | target_link_libraries(stdlib riscv32 vstdlib) 19 | target_link_libraries(vector_examples riscv32) 20 | -------------------------------------------------------------------------------- /vstdlib/vstdlib.hpp: -------------------------------------------------------------------------------- 1 | /** 2 | @file vstdlib.hpp 3 | @copyright ©2019 Syntacore. 4 | @authors 5 | Grigory Okhotnikov 6 | @brief Vector extension simulator (v0.7) example 7 | */ 8 | 9 | #ifndef RISCV_VEXAMPLES_HPP_ 10 | #define RISCV_VEXAMPLES_HPP_ 11 | 12 | #include "riscv/ext/v.hpp" 13 | 14 | #include 15 | #include 16 | 17 | namespace rvv { 18 | 19 | void *memset(void *const dest, int c, size_t count); 20 | void *memcpy(void * const dest, void const *src, size_t count); 21 | void *memmove(void *const dest, void const *src, size_t count); 22 | void *memcpy_backward(void *pd, void const *ps, size_t count); 23 | 24 | } // namespace rvv 25 | 26 | #endif // RISCV_VEXAMPLES_HPP_ 27 | -------------------------------------------------------------------------------- /doc/valu-format.adoc: -------------------------------------------------------------------------------- 1 | ---- 2 | Formats for Vector Arithmetic Instructions under OP-V major opcode 3 | 4 | 31 26 25 24 20 19 15 14 12 11 7 6 0 5 | funct6 | vm | vs2 | vs1 | 0 0 0 | vd |1010111| OP-V (OPIVV) 6 | funct6 | vm | vs2 | vs1 | 0 0 1 | vd |1010111| OP-V (OPFVV) 7 | funct6 | vm | vs2 | vs1 | 0 1 0 | vd/rd |1010111| OP-V (OPMVV) 8 | funct6 | vm | vs2 | simm5 | 0 1 1 | vd |1010111| OP-V (OPIVI) 9 | funct6 | vm | vs2 | rs1 | 1 0 0 | vd |1010111| OP-V (OPIVX) 10 | funct6 | vm | vs2 | rs1 | 1 0 1 | vd |1010111| OP-V (OPFVF) 11 | funct6 | vm | vs2 | rs1 | 1 1 0 | vd/rd |1010111| OP-V (OPMVX) 12 | 6 1 5 5 3 5 7 13 | ---- 14 | -------------------------------------------------------------------------------- /vstdlib/memset.cpp: -------------------------------------------------------------------------------- 1 | /** 2 | @file memset.cpp 3 | @copyright ©2019 Syntacore. 4 | @authors 5 | Grigory Okhotnikov 6 | @brief Vector extension simulator (v0.7) example 7 | */ 8 | 9 | #include "vstdlib/vstdlib.hpp" 10 | 11 | #include 12 | 13 | namespace rvv { 14 | 15 | using namespace ::riscv::v; 16 | 17 | void* 18 | memset(void *const dest, int c, size_t count) 19 | { 20 | if (0 != count) { 21 | vsetvli(count, vtypei(e8, m8)); 22 | vmv_v_x(v0, c); 23 | 24 | int8_t *pd = static_cast(dest); 25 | do { 26 | size_t const vl = vsetvli(count, vtypei(e8, m8)); 27 | count -= vl; 28 | vsb_v(v0, pd); 29 | pd += vl; 30 | } while (count); 31 | } 32 | 33 | return dest; 34 | } 35 | 36 | } // namespace rvv 37 | -------------------------------------------------------------------------------- /vstdlib/memmove.cpp: -------------------------------------------------------------------------------- 1 | /** 2 | @file memmove.cpp 3 | @copyright ©2019 Syntacore. 4 | @authors 5 | Grigory Okhotnikov 6 | @brief Vector extension simulator (v0.7) example 7 | */ 8 | 9 | #include "vstdlib/vstdlib.hpp" 10 | 11 | #include 12 | 13 | namespace rvv { 14 | 15 | using namespace ::riscv::v; 16 | 17 | void* 18 | memmove(void *const dest, void const *src, size_t count) 19 | { 20 | uint8_t const *ps = static_cast(src); 21 | uint8_t *pd = static_cast(dest); 22 | if (0 != count || pd == ps) { 23 | if (pd < ps || ps + count <= pd) { 24 | return memcpy(dest, src, count); 25 | } else { 26 | memcpy_backward(pd, ps, count); 27 | return dest; 28 | } 29 | } 30 | 31 | return dest; 32 | } 33 | } // namespace rvv 34 | -------------------------------------------------------------------------------- /vstdlib/memcpy.cpp: -------------------------------------------------------------------------------- 1 | /** 2 | @file memcpy.cpp 3 | @copyright ©2019 Syntacore. 4 | @authors 5 | Grigory Okhotnikov 6 | @brief Vector extension simulator (v0.7) example 7 | */ 8 | 9 | #include "vstdlib/vstdlib.hpp" 10 | 11 | #include 12 | 13 | namespace rvv { 14 | 15 | using namespace ::riscv::v; 16 | 17 | void* 18 | memcpy(void * const dest, void const *src, size_t count) 19 | { 20 | if (0 != count) { 21 | int8_t const *ps = static_cast(src); 22 | int8_t *pd = static_cast(dest); 23 | do { 24 | size_t const vl = vsetvli(count, vtypei(e8, m8)); 25 | count -= vl; 26 | vlb_v(v0, ps); 27 | ps += vl; 28 | vsb_v(v0, pd); 29 | pd += vl; 30 | } while (count); 31 | } 32 | 33 | return dest; 34 | } 35 | } // namespace rvv 36 | -------------------------------------------------------------------------------- /doc/vmem-format.adoc: -------------------------------------------------------------------------------- 1 | ---- 2 | Format for Vector Load Instructions under LOAD-FP major opcode 3 | 31 29 28 26 25 24 20 19 15 14 12 11 7 6 0 4 | nf | mop | vm | lumop | rs1 | width | vd |0000111| VL* unit-stride 5 | nf | mop | vm | rs2 | rs1 | width | vd |0000111| VLS* strided 6 | nf | mop | vm | vs2 | rs1 | width | vd |0000111| VLX* indexed 7 | 3 3 1 5 5 3 5 7 8 | 9 | Format for Vector Store Instructions under STORE-FP major opcode 10 | 31 29 28 26 25 24 20 19 15 14 12 11 7 6 0 11 | nf | mop | vm | sumop | rs1 | width | vs3 |0100111| VS* unit-stride 12 | nf | mop | vm | rs2 | rs1 | width | vs3 |0100111| VSS* strided 13 | nf | mop | vm | vs2 | rs1 | width | vs3 |0100111| VSX* indexed 14 | 3 3 1 5 5 3 5 7 15 | ---- 16 | -------------------------------------------------------------------------------- /vstdlib/memcpy_backward.cpp: -------------------------------------------------------------------------------- 1 | /** 2 | @file memcpy_backward.cpp 3 | @copyright ©2019 Syntacore. 4 | @authors 5 | Grigory Okhotnikov 6 | @brief Vector extension simulator (v0.7) example 7 | */ 8 | 9 | #include "vstdlib/vstdlib.hpp" 10 | 11 | #include 12 | 13 | namespace rvv { 14 | 15 | using namespace ::riscv::v; 16 | 17 | void* 18 | memcpy_backward(void *const dest, void const *src, size_t count) 19 | { 20 | if (0 != count) { 21 | int8_t const *ps = static_cast(src) + (count - 1); 22 | int8_t *pd = static_cast(dest) + (count - 1); 23 | 24 | static ptrdiff_t const stride = -1; 25 | do { 26 | size_t const vl = vsetvli(count, vtypei(e8, m8)); 27 | count -= vl; 28 | vlsb_v(v0, ps, stride); 29 | ps -= vl; 30 | vssb_v(v0, pd, stride); 31 | pd -= vl; 32 | } while (count); 33 | } 34 | return dest; 35 | } 36 | } // namespace rvv 37 | -------------------------------------------------------------------------------- /CMakeLists.txt: -------------------------------------------------------------------------------- 1 | cmake_minimum_required(VERSION 3.6) 2 | 3 | project(rvv-simulator) 4 | 5 | set(Boost_USE_MULTITHREADED ON) 6 | find_package(Boost 1.66) 7 | 8 | enable_testing() 9 | 10 | set_property(GLOBAL PROPERTY USE_FOLDERS ON) 11 | 12 | if (MSVC) 13 | add_definitions( 14 | -D_SCL_SECURE_NO_WARNINGS 15 | -D_USE_MATH_DEFINES 16 | -DNOMINMAX 17 | ) 18 | add_compile_options(/bigobj) 19 | endif(MSVC) 20 | 21 | set(RVV_ELEN 64 CACHE STRING "The maximum size of a single vector element in bits") 22 | set(RVV_VLEN 256 CACHE STRING "The number of bits in a vector register") # VLEN ≥ ELEN 23 | set(RVV_SLEN 64 CACHE STRING "The striping distance in bits") # VLEN ≥ SLEN ≥ 32 24 | 25 | set(CXX_FEATURES 26 | #cxx_aggregate_default_initializers 27 | cxx_alias_templates 28 | cxx_alignas 29 | cxx_alignof 30 | cxx_attributes 31 | cxx_attribute_deprecated 32 | cxx_auto_type 33 | cxx_binary_literals 34 | cxx_constexpr 35 | cxx_contextual_conversions 36 | #cxx_decltype_incomplete_return_types 37 | cxx_decltype 38 | cxx_decltype_auto 39 | cxx_default_function_template_args 40 | cxx_defaulted_functions 41 | cxx_defaulted_move_initializers 42 | cxx_delegating_constructors 43 | cxx_deleted_functions 44 | cxx_digit_separators 45 | cxx_enum_forward_declarations 46 | cxx_explicit_conversions 47 | cxx_extended_friend_declarations 48 | cxx_extern_templates 49 | cxx_final 50 | cxx_func_identifier 51 | cxx_generalized_initializers 52 | cxx_generic_lambdas 53 | cxx_inheriting_constructors 54 | cxx_inline_namespaces 55 | cxx_lambdas 56 | cxx_lambda_init_captures 57 | cxx_local_type_template_args 58 | cxx_long_long_type 59 | cxx_noexcept 60 | cxx_nonstatic_member_init 61 | cxx_nullptr 62 | cxx_override 63 | cxx_range_for 64 | cxx_raw_string_literals 65 | cxx_reference_qualified_functions 66 | #cxx_relaxed_constexpr 67 | cxx_return_type_deduction 68 | cxx_right_angle_brackets 69 | cxx_rvalue_references 70 | cxx_sizeof_member 71 | cxx_static_assert 72 | cxx_strong_enums 73 | cxx_thread_local 74 | cxx_trailing_return_types 75 | cxx_unicode_literals 76 | cxx_uniform_initialization 77 | cxx_unrestricted_unions 78 | cxx_user_literals 79 | cxx_variable_templates 80 | cxx_variadic_macros 81 | cxx_variadic_templates 82 | cxx_template_template_parameters 83 | ) 84 | 85 | add_library(riscv32 STATIC 86 | src/riscv32/v.cpp 87 | include/riscv/ext/v.hpp 88 | ) 89 | target_compile_definitions(riscv32 PUBLIC -DRVV_ELEN=${RVV_ELEN} PUBLIC -DRVV_VLEN=${RVV_VLEN} PUBLIC -DRVV_SLEN=${RVV_SLEN}) 90 | set_target_properties(riscv32 PROPERTIES FOLDER "Libs") 91 | target_include_directories(riscv32 PUBLIC ${CMAKE_CURRENT_SOURCE_DIR}/include) 92 | target_link_libraries(riscv32 Boost::boost) 93 | target_compile_features(riscv32 PUBLIC ${CXX_FEATURES}) 94 | 95 | add_subdirectory(vstdlib) 96 | add_subdirectory(tests) 97 | -------------------------------------------------------------------------------- /tests/stdlib.cpp: -------------------------------------------------------------------------------- 1 | /** 2 | @file stdlib.cpp 3 | @copyright ©2019 Syntacore. 4 | @authors 5 | Grigory Okhotnikov 6 | @brief Vector extension simulator (v0.7) standard library tests 7 | */ 8 | 9 | #define BOOST_TEST_MODULE stdlib 10 | 11 | #include 12 | #include 13 | #include 14 | 15 | #include "vstdlib/vstdlib.hpp" 16 | 17 | #include 18 | #include 19 | #include 20 | #include 21 | 22 | namespace bdata = boost::unit_test::data; 23 | 24 | namespace { 25 | std::default_random_engine generator; 26 | } // namespace 27 | 28 | BOOST_AUTO_TEST_CASE(test_memcpy) 29 | { 30 | using std::begin; 31 | using std::end; 32 | typedef std::vector buf_type; 33 | buf_type in_buf; 34 | static std::uniform_int_distribution distribution(0, 255); 35 | static auto const gen = []() {return distribution(generator); }; 36 | std::generate_n(std::back_inserter(in_buf), 1024, gen); 37 | buf_type out_buf(in_buf.size()); 38 | rvv::memcpy(&out_buf[0], &in_buf[0], in_buf.size() * sizeof(buf_type::value_type)); 39 | BOOST_TEST(in_buf == out_buf); 40 | } 41 | 42 | BOOST_AUTO_TEST_CASE(test_memcpy_backward) 43 | { 44 | typedef std::vector buf_type; 45 | 46 | static std::uniform_int_distribution distribution(0, 255); 47 | static auto const gen = []() {return distribution(generator); }; 48 | 49 | buf_type in_buf; 50 | using std::begin; 51 | using std::end; 52 | std::generate_n(std::back_inserter(in_buf), 1024, gen); 53 | 54 | buf_type out_buf(in_buf.size()); 55 | rvv::memcpy_backward(&out_buf[0], &in_buf[0], in_buf.size() * sizeof(buf_type::value_type)); 56 | BOOST_TEST(in_buf == out_buf); 57 | } 58 | 59 | BOOST_AUTO_TEST_CASE(test_memmove_forward) 60 | { 61 | typedef std::vector buf_type; 62 | static std::uniform_int_distribution distribution(0, 255); 63 | static auto const gen = []() {return distribution(generator); }; 64 | 65 | buf_type ref_buf; 66 | using std::begin; 67 | using std::end; 68 | static size_t const buf_size = 1024; 69 | std::generate_n(std::back_inserter(ref_buf), buf_size, gen); 70 | 71 | buf_type tst_buf = ref_buf; 72 | 73 | std::memmove(&ref_buf[buf_size / 4], &ref_buf[0], buf_size / 2); 74 | rvv::memmove(&tst_buf[buf_size / 4], &tst_buf[0], buf_size / 2); 75 | BOOST_TEST(ref_buf == tst_buf); 76 | } 77 | 78 | BOOST_AUTO_TEST_CASE(test_memmove_backward) 79 | { 80 | static size_t const buf_size = 1024; 81 | typedef std::vector buf_type; 82 | static std::uniform_int_distribution distribution(0, 255); 83 | static auto const gen = []() {return distribution(generator); }; 84 | 85 | buf_type ref_buf; 86 | using std::begin; 87 | using std::end; 88 | std::generate_n(std::back_inserter(ref_buf), buf_size, gen); 89 | 90 | buf_type tst_buf = ref_buf; 91 | 92 | std::memmove(&ref_buf[0], &ref_buf[buf_size / 4], buf_size / 2); 93 | rvv::memmove(&tst_buf[0], &tst_buf[buf_size / 4], buf_size / 2); 94 | BOOST_TEST(ref_buf == tst_buf); 95 | } 96 | 97 | BOOST_DATA_TEST_CASE(test_memset, bdata::xrange(256), val) 98 | { 99 | typedef std::vector buf_type; 100 | buf_type buf(1024); 101 | rvv::memset(&buf[0], val, buf.size() * sizeof(buf_type::value_type)); 102 | 103 | using std::begin; 104 | using std::end; 105 | namespace ph = std::placeholders; 106 | BOOST_TEST(std::all_of(begin(buf), end(buf), std::bind(std::equal_to(), val, ph::_1))); 107 | } 108 | -------------------------------------------------------------------------------- /tests/unit_tests.cpp: -------------------------------------------------------------------------------- 1 | /** 2 | @file unit_tests.cpp 3 | @copyright ©2019 Syntacore. 4 | @authors 5 | Grigory Okhotnikov 6 | @brief Vector extension simulator (v0.7) per-instruction tests 7 | */ 8 | 9 | #define BOOST_TEST_MODULE basic_ops 10 | 11 | #include 12 | #include 13 | #include 14 | 15 | #include "riscv/ext/v.hpp" 16 | 17 | using namespace ::riscv::v; 18 | 19 | namespace { 20 | std::default_random_engine generator(0); 21 | 22 | template 23 | void* 24 | add(void * const dest, void const *src_a, void const *src_b, size_t count) 25 | { 26 | if (0 != count) { 27 | Ty const *pa = static_cast(src_a); 28 | Ty const *pb = static_cast(src_b); 29 | Ty *pd = static_cast(dest); 30 | 31 | do { 32 | size_t const vl = vsetvl(count, vtype(e32, m8)); 33 | count -= vl; 34 | vlw_v(v0, pa); 35 | pa += vl; 36 | vlw_v(v8, pb); 37 | pb += vl; 38 | vadd_vv(v16, v8, v0); 39 | vsw_v(v16, pd); 40 | pd += vl; 41 | } while (count); 42 | } 43 | 44 | return dest; 45 | } 46 | 47 | template 48 | void* 49 | addx(void * const dest, void const *src_a, int32_t const src_b, size_t count) 50 | { 51 | if (0 != count) { 52 | Ty const *pa = static_cast(src_a); 53 | Ty *pd = static_cast(dest); 54 | 55 | do { 56 | size_t const vl = vsetvl(count, vtype(e32, m8)); 57 | count -= vl; 58 | vlw_v(v0, pa); 59 | pa += vl; 60 | vadd_vx(v16, v0, src_b); 61 | vsw_v(v16, pd); 62 | pd += vl; 63 | } while (count); 64 | } 65 | 66 | return dest; 67 | } 68 | 69 | template 70 | void* 71 | addi(void * const dest, void const *src_a, int16_t const imm, size_t count) 72 | { 73 | if (0 != count) { 74 | Ty const *pa = static_cast(src_a); 75 | Ty *pd = static_cast(dest); 76 | 77 | do { 78 | size_t const vl = vsetvl(count, vtype(e32, m8)); 79 | count -= vl; 80 | vlw_v(v0, pa); 81 | pa += vl; 82 | vadd_vi(v16, v0, imm); 83 | vsw_v(v16, pd); 84 | pd += vl; 85 | } while (count); 86 | } 87 | 88 | return dest; 89 | } 90 | 91 | template 92 | void* 93 | sub(void * const dest, void const *src_a, void const *src_b, size_t count) 94 | { 95 | if (0 != count) { 96 | Ty const *pa = static_cast(src_a); 97 | Ty const *pb = static_cast(src_b); 98 | Ty *pd = static_cast(dest); 99 | 100 | do { 101 | size_t const vl = vsetvl(count, vtype(e32, m8)); 102 | count -= vl; 103 | vlw_v(v0, pa); 104 | pa += vl; 105 | vlw_v(v8, pb); 106 | pb += vl; 107 | vsub_vv(v16, v8, v0); 108 | vsw_v(v16, pd); 109 | pd += vl; 110 | } while (count); 111 | } 112 | 113 | return dest; 114 | } 115 | } // namespace 116 | 117 | BOOST_AUTO_TEST_CASE(addition) 118 | { 119 | using std::begin; 120 | using std::end; 121 | typedef std::vector buf_type; 122 | const size_t buf_size = 16; 123 | buf_type in_buf_a; 124 | buf_type in_buf_b; 125 | static std::uniform_int_distribution distribution(0, 255); 126 | static auto const gen = []() {return distribution(generator); }; 127 | std::generate_n(std::back_inserter(in_buf_a), buf_size, gen); 128 | std::generate_n(std::back_inserter(in_buf_b), buf_size, gen); 129 | 130 | buf_type out_buf(in_buf_a.size()); 131 | buf_type ref_buf; 132 | 133 | add(&out_buf[0], &in_buf_a[0], &in_buf_b[0], in_buf_a.size()); 134 | std::transform(in_buf_a.begin(), in_buf_a.end(), in_buf_b.begin(), std::back_inserter(ref_buf), std::plus()); 135 | 136 | BOOST_TEST(ref_buf == out_buf); 137 | } 138 | 139 | BOOST_AUTO_TEST_CASE(addition_scalar) 140 | { 141 | using std::begin; 142 | using std::end; 143 | namespace ph = std::placeholders; 144 | typedef std::vector buf_type; 145 | size_t const buf_size = 16; 146 | int32_t const x = 127; 147 | buf_type in_buf_a; 148 | static std::uniform_int_distribution distribution(0, 255); 149 | static auto const gen = []() {return distribution(generator); }; 150 | std::generate_n(std::back_inserter(in_buf_a), buf_size, gen); 151 | 152 | buf_type out_buf(in_buf_a.size()); 153 | buf_type ref_buf; 154 | 155 | addx(&out_buf[0], &in_buf_a[0], x, in_buf_a.size()); 156 | std::transform(in_buf_a.begin(), in_buf_a.end(), std::back_inserter(ref_buf), std::bind(std::plus(), ph::_1, x)); 157 | 158 | BOOST_TEST(ref_buf == out_buf); 159 | } 160 | 161 | BOOST_AUTO_TEST_CASE(addition_immediate) 162 | { 163 | using std::begin; 164 | using std::end; 165 | namespace ph = std::placeholders; 166 | typedef std::vector buf_type; 167 | size_t const buf_size = 16; 168 | int16_t const imm = 127; 169 | buf_type in_buf_a; 170 | static std::uniform_int_distribution distribution(0, 255); 171 | static auto const gen = []() {return distribution(generator); }; 172 | std::generate_n(std::back_inserter(in_buf_a), buf_size, gen); 173 | 174 | buf_type out_buf(in_buf_a.size()); 175 | buf_type ref_buf; 176 | 177 | addi(&out_buf[0], &in_buf_a[0], imm, in_buf_a.size()); 178 | std::transform(in_buf_a.begin(), in_buf_a.end(), std::back_inserter(ref_buf), std::bind(std::plus(), ph::_1, imm)); 179 | 180 | BOOST_TEST(ref_buf == out_buf); 181 | } 182 | 183 | BOOST_AUTO_TEST_CASE(subtraction) 184 | { 185 | using std::begin; 186 | using std::end; 187 | typedef std::vector buf_type; 188 | const size_t buf_size = 32; 189 | buf_type in_buf_a; 190 | buf_type in_buf_b; 191 | static std::uniform_int_distribution distribution(0, 255); 192 | static auto const gen = []() {return distribution(generator); }; 193 | std::generate_n(std::back_inserter(in_buf_a), buf_size, gen); 194 | std::generate_n(std::back_inserter(in_buf_b), buf_size, gen); 195 | 196 | buf_type out_buf(in_buf_a.size()); 197 | buf_type ref_buf; 198 | 199 | sub(&out_buf[0], &in_buf_a[0], &in_buf_b[0], in_buf_a.size()); 200 | std::transform(in_buf_a.begin(), in_buf_a.end(), in_buf_b.begin(), std::back_inserter(ref_buf), std::minus()); 201 | 202 | BOOST_TEST(ref_buf == out_buf); 203 | } 204 | -------------------------------------------------------------------------------- /tests/vector_examples.cpp: -------------------------------------------------------------------------------- 1 | /** 2 | @file vector_examples.cpp 3 | @copyright ©2019 Syntacore. 4 | @authors 5 | Grigory Okhotnikov 6 | @brief RISCV-V Vector extension (v0.7) simulator usage examples and tests 7 | */ 8 | 9 | #define BOOST_TEST_MODULE vector_examples 10 | 11 | #include 12 | #include 13 | #include 14 | 15 | #include "riscv/ext/v.hpp" 16 | 17 | using namespace ::riscv::v; 18 | 19 | namespace { 20 | std::default_random_engine generator(0); 21 | 22 | void* 23 | vvaddint32(size_t n, void * const dest, void const *src_a, void const *src_b) 24 | { 25 | if (0 != n) { 26 | int32_t const *pa = static_cast(src_a); 27 | int32_t const *pb = static_cast(src_b); 28 | int32_t *pd = static_cast(dest); 29 | 30 | do { 31 | size_t const vl = vsetvli(n, vtypei(e32, m1)); // Set vector length based on 32-bit vectors 32 | vlw_v(v0, pa); // Get first vector 33 | n -= vl; // Decrement number done 34 | pa += vl; // Bump pointer 35 | vlw_v(v1, pb); // Get second vector 36 | pb += vl; // Bump pointer 37 | vadd_vv(v2, v0, v1); // Sum vectors 38 | vsw_v(v2, pd); // Store result 39 | pd += vl; // Bump pointer 40 | } while (n); // Loop back 41 | } 42 | 43 | return dest; 44 | } 45 | 46 | void 47 | mixed_width(size_t n, void const *a, void * const b, void const *c) 48 | { 49 | if (0 != n) { 50 | int8_t const *pa = static_cast(a); 51 | int32_t *pb = static_cast(b); 52 | int32_t const *pc = static_cast(c); 53 | 54 | do { 55 | size_t vl = vsetvli(n, vtypei(e8, m1)); // Byte vector for predicate calc 56 | vlb_v(v1, pa); // Load a[i] 57 | pa += vl; // Bump pointer 58 | vmsle_vi(v0, v1, 5 - 1); // a[i] < 5? 59 | 60 | vl = vsetvli(n, vtypei(e32, m4)); // Vector of 32-bit values 61 | n -= vl; // Decrement count 62 | vmv_v_i(v4, 1); // Splat immediate to destination 63 | vlw_v(v4, pc, vop_type::masked_in); // Load requested elements of C 64 | pc += vl; // Bump pointer 65 | vsw_v(v4, pb); // Store b[i] 66 | pb += vl; // Bump pointer 67 | } while (n); // Any more? 68 | } 69 | } 70 | 71 | void* 72 | vmemcpy(void * const dest, void const *src, size_t n) 73 | { 74 | if (0 != n) { 75 | int8_t const *ps = static_cast(src); 76 | int8_t *pd = static_cast(dest); 77 | do { 78 | size_t const vl = vsetvli(n, vtypei(e8, m8)); // Vectors of 8b 79 | vlb_v(v0, ps); // Load bytes 80 | ps += vl; // Bump pointer 81 | n -= vl; // Decrement count 82 | vsb_v(v0, pd); // Store bytes 83 | pd += vl; // Bump pointer 84 | } while (n); // Any more? 85 | } 86 | 87 | return dest; 88 | } 89 | 90 | void 91 | conditional(size_t n, void const *x, void const *a, void const *b, void * const z) 92 | { 93 | if (0 != n) { 94 | int8_t const *px = static_cast(x); 95 | int16_t const *pa = static_cast(a); 96 | int16_t const *pb = static_cast(b); 97 | int16_t *pz = static_cast(z); 98 | 99 | do { 100 | size_t const vl = vsetvli(n, vtypei(e16)); // Use 16b elements. 101 | vlb_v(v0, px); // Get x[i], sign-extended to 16b 102 | n -= vl; // Decrement element count 103 | px += vl; // x[i] Bump pointer 104 | vmsle_vi(v0, v0, 5 - 1); // Set mask in v0 105 | vlh_v(v1, pa, vop_type::masked_in); // z[i] = a[i] case 106 | vmnot_m(v0, v0); // Invert v0 107 | pa += vl; // a[i] bump pointer 108 | vlh_v(v1, pb, vop_type::masked_in); // z[i] = b[i] case 109 | pb += vl; // b[i] bump pointer 110 | vsh_v(v1, pz); // Store z 111 | pz += vl; // b[i] bump pointer 112 | } while (n); 113 | } 114 | } 115 | 116 | void 117 | saxpy(size_t n, float const a, float const *x, float *y) 118 | { 119 | if (0 != n) { 120 | do { 121 | size_t const vl = vsetvli(n, vtypei(e32, m8)); 122 | vlw_v(v0, reinterpret_cast(x)); 123 | n -= vl; 124 | x += vl; 125 | vlw_v(v8, reinterpret_cast(y)); 126 | vfmacc_vf(v8, a, v0); 127 | vsw_v(v8, reinterpret_cast(y)); 128 | y += vl; 129 | } while (n); 130 | } 131 | } 132 | 133 | void 134 | sgemm(size_t n, size_t m, size_t k, float const *a, size_t lda, float const *b, size_t ldb, float *c, size_t ldc) 135 | { 136 | if ((n == 0) || (m == 0) || (k == 0)) { 137 | return; 138 | } 139 | 140 | size_t const astride = lda; 141 | size_t const bstride = ldb; 142 | size_t const cstride = ldc; 143 | 144 | while (m >= 16) { // Loop across rows of C blocks 145 | size_t nt = n; // Initialize n counter for next row of C blocks 146 | float const *bnp = b; // Initialize B n-loop pointer to start 147 | float *cnp = c; // Initialize C n-loop pointer 148 | 149 | while (nt) { // Loop across one row of C blocks 150 | size_t const nvl = vsetvli(nt, vtypei(e32)); // 32-bit vectors, LMUL=1 151 | float const *akp = a; // reset pointer into A to beginning 152 | float const *bkp = bnp; // step to next column in B matrix 153 | 154 | // Initialize current C submatrix block from memory 155 | vlw_v(v0, reinterpret_cast(cnp)); 156 | float *ccp = cnp + cstride; 157 | vlw_v(v1, reinterpret_cast(ccp)); 158 | ccp += cstride; 159 | vlw_v(v2, reinterpret_cast(ccp)); 160 | ccp += cstride; 161 | vlw_v(v3, reinterpret_cast(ccp)); 162 | ccp += cstride; 163 | vlw_v(v4, reinterpret_cast(ccp)); 164 | ccp += cstride; 165 | vlw_v(v5, reinterpret_cast(ccp)); 166 | ccp += cstride; 167 | vlw_v(v6, reinterpret_cast(ccp)); 168 | ccp += cstride; 169 | vlw_v(v7, reinterpret_cast(ccp)); 170 | ccp += cstride; 171 | vlw_v(v8, reinterpret_cast(ccp)); 172 | ccp += cstride; 173 | vlw_v(v9, reinterpret_cast(ccp)); 174 | ccp += cstride; 175 | vlw_v(v10, reinterpret_cast(ccp)); 176 | ccp += cstride; 177 | vlw_v(v11, reinterpret_cast(ccp)); 178 | ccp += cstride; 179 | vlw_v(v12, reinterpret_cast(ccp)); 180 | ccp += cstride; 181 | vlw_v(v13, reinterpret_cast(ccp)); 182 | ccp += cstride; 183 | vlw_v(v14, reinterpret_cast(ccp)); 184 | ccp += cstride; 185 | vlw_v(v15, reinterpret_cast(ccp)); 186 | 187 | size_t kt = k; // Initialize inner loop counter 188 | 189 | // Inner loop scheduled assuming 4-clock occupancy of vfmacc instruction and single-issue pipeline 190 | // Software pipeline loads 191 | float ft0 = akp[0]; 192 | float const *amp = akp + astride; 193 | float ft1 = amp[0]; 194 | amp += astride; 195 | float ft2 = amp[0]; 196 | amp += astride; 197 | float ft3 = amp[0]; 198 | amp += astride; 199 | 200 | float ft15 = 0; 201 | 202 | // Get vector from B matrix 203 | vlw_v(v16, reinterpret_cast(bkp)); 204 | 205 | while (kt) { // Loop on inner dimension for current C block 206 | vfmacc_vf(v0, ft0, v16); 207 | bkp += bstride; 208 | float ft4 = amp[0]; 209 | amp += astride; 210 | vfmacc_vf(v1, ft1, v16); 211 | kt -= 1; // Decrement k counter 212 | float ft5 = amp[0]; 213 | amp += astride; 214 | vfmacc_vf(v2, ft2, v16); 215 | float ft6 = amp[0]; 216 | amp += astride; 217 | float ft7 = amp[0]; 218 | vfmacc_vf(v3, ft3, v16); 219 | amp += astride; 220 | float ft8 = amp[0]; 221 | amp += astride; 222 | vfmacc_vf(v4, ft4, v16); 223 | float ft9 = amp[0]; 224 | amp += astride; 225 | vfmacc_vf(v5, ft5, v16); 226 | float ft10 = amp[0]; 227 | amp += astride; 228 | vfmacc_vf(v6, ft6, v16); 229 | float ft11 = amp[0]; 230 | amp += astride; 231 | vfmacc_vf(v7, ft7, v16); 232 | float ft12 = amp[0]; 233 | amp += astride; 234 | vfmacc_vf(v8, ft8, v16); 235 | float ft13 = amp[0]; 236 | amp += astride; 237 | vfmacc_vf(v9, ft9, v16); 238 | float ft14 = amp[0]; 239 | amp += astride; 240 | vfmacc_vf(v10, ft10, v16); 241 | float ft15 = amp[0]; 242 | amp += astride; 243 | akp += 1; // Move to next column of a 244 | vfmacc_vf(v11, ft11, v16); 245 | // Don't load past end of matrix 246 | if (0 != kt) { 247 | ft0 = akp[0]; 248 | amp = akp + astride; 249 | } 250 | vfmacc_vf(v12, ft12, v16); 251 | if (0 != kt) { 252 | ft1 = amp[0]; 253 | amp += astride; 254 | } 255 | vfmacc_vf(v13, ft13, v16); 256 | if (0 != kt) { 257 | ft2 = amp[0]; 258 | amp += astride; 259 | } 260 | vfmacc_vf(v14, ft14, v16); 261 | if (0 != kt) { 262 | ft3 = amp[0]; 263 | amp += astride; 264 | } 265 | vfmacc_vf(v15, ft15, v16); 266 | vlw_v(v16, reinterpret_cast(bkp)); 267 | } // k_loop 268 | vfmacc_vf(v15, ft15, v16); 269 | 270 | // Save C matrix block back to memory 271 | vsw_v(v0, reinterpret_cast(cnp)); 272 | ccp = cnp + cstride; 273 | vsw_v(v1, reinterpret_cast(ccp)); 274 | ccp += cstride; 275 | vsw_v(v2, reinterpret_cast(ccp)); 276 | ccp += cstride; 277 | vsw_v(v3, reinterpret_cast(ccp)); 278 | ccp += cstride; 279 | vsw_v(v4, reinterpret_cast(ccp)); 280 | ccp += cstride; 281 | vsw_v(v5, reinterpret_cast(ccp)); 282 | ccp += cstride; 283 | vsw_v(v6, reinterpret_cast(ccp)); 284 | ccp += cstride; 285 | vsw_v(v7, reinterpret_cast(ccp)); 286 | ccp += cstride; 287 | vsw_v(v8, reinterpret_cast(ccp)); 288 | ccp += cstride; 289 | vsw_v(v9, reinterpret_cast(ccp)); 290 | ccp += cstride; 291 | vsw_v(v10, reinterpret_cast(ccp)); 292 | ccp += cstride; 293 | vsw_v(v11, reinterpret_cast(ccp)); 294 | ccp += cstride; 295 | vsw_v(v12, reinterpret_cast(ccp)); 296 | ccp += cstride; 297 | vsw_v(v13, reinterpret_cast(ccp)); 298 | ccp += cstride; 299 | vsw_v(v14, reinterpret_cast(ccp)); 300 | ccp += cstride; 301 | vsw_v(v15, reinterpret_cast(ccp)); 302 | 303 | // Following tail instructions should be scheduled earlier in free slots during C block save 304 | 305 | // Bump pointers for loop across blocks in one row 306 | cnp += nvl; // Move C block pointer over 307 | bnp += nvl; // Move B block pointer over 308 | nt -= nvl; // Decrement element count in n dimension 309 | } // c_col_loop 310 | 311 | m -= 16; 312 | a += astride * 16; 313 | c += cstride * 16; 314 | } // c_row_loop 315 | // TODO: Handle end of matrix with fewer than 16 rows. 316 | } 317 | } // namespace 318 | 319 | BOOST_AUTO_TEST_CASE(vector_vector_add_example) 320 | { 321 | using std::begin; 322 | using std::end; 323 | typedef std::vector buf_type; 324 | const size_t buf_size = 128; 325 | buf_type in_buf_a; 326 | buf_type in_buf_b; 327 | static std::uniform_int_distribution distribution(0, 255); 328 | static auto const gen = []() {return distribution(generator); }; 329 | std::generate_n(std::back_inserter(in_buf_a), buf_size, gen); 330 | std::generate_n(std::back_inserter(in_buf_b), buf_size, gen); 331 | 332 | buf_type out_buf(in_buf_a.size()); 333 | buf_type ref_buf; 334 | 335 | vvaddint32(buf_size, &out_buf[0], &in_buf_a[0], &in_buf_b[0]); 336 | std::transform(in_buf_a.begin(), in_buf_a.end(), in_buf_b.begin(), std::back_inserter(ref_buf), std::plus()); 337 | 338 | BOOST_TEST(ref_buf == out_buf); 339 | } 340 | 341 | BOOST_AUTO_TEST_CASE(mixed_width_example) 342 | { 343 | using std::begin; 344 | using std::end; 345 | typedef std::vector buf_type; 346 | const size_t buf_size = 128; 347 | std::vector a; 348 | buf_type b; 349 | buf_type c; 350 | 351 | static std::uniform_int_distribution distribution(0, 255); 352 | static std::uniform_int_distribution distribution_a(0, 10); 353 | static auto const gen = []() { return distribution(generator); }; 354 | static auto const gen_a = []() { return distribution_a(generator); }; 355 | std::generate_n(std::back_inserter(a), buf_size, gen_a); 356 | std::generate_n(std::back_inserter(b), buf_size, gen); 357 | std::generate_n(std::back_inserter(c), buf_size, gen); 358 | 359 | buf_type out_buf(buf_size); 360 | buf_type ref_buf; 361 | 362 | for (size_t i = 0; i < buf_size; ++i) { 363 | ref_buf.push_back(a[i] < 5 ? c[i] : 1); 364 | } 365 | 366 | mixed_width(buf_size, &a[0], &b[0], &c[0]); 367 | 368 | BOOST_TEST(ref_buf == b); 369 | } 370 | 371 | BOOST_AUTO_TEST_CASE(memcpy_example) 372 | { 373 | using std::begin; 374 | using std::end; 375 | typedef std::vector buf_type; 376 | const size_t buf_size = 1024; 377 | buf_type in_buf; 378 | static std::uniform_int_distribution distribution(0, 255); 379 | static auto const gen = []() {return distribution(generator); }; 380 | std::generate_n(std::back_inserter(in_buf), buf_size, gen); 381 | 382 | buf_type out_buf(in_buf.size()); 383 | vmemcpy(&out_buf[0], &in_buf[0], in_buf.size() * sizeof(buf_type::value_type)); 384 | 385 | BOOST_TEST(in_buf == out_buf); 386 | } 387 | 388 | BOOST_AUTO_TEST_CASE(conditional_example) 389 | { 390 | using std::begin; 391 | using std::end; 392 | typedef std::vector buf_type; 393 | const size_t buf_size = 128; 394 | buf_type a; 395 | buf_type b; 396 | std::vector x; 397 | static std::uniform_int_distribution distribution(0, 255); 398 | static std::uniform_int_distribution distribution_x(0, 10); 399 | static auto const gen = []() { return distribution(generator); }; 400 | static auto const gen_x = []() { return distribution_x(generator); }; 401 | std::generate_n(std::back_inserter(x), buf_size, gen_x); 402 | std::generate_n(std::back_inserter(a), buf_size, gen); 403 | std::generate_n(std::back_inserter(b), buf_size, gen); 404 | 405 | buf_type out_buf(a.size()); 406 | buf_type ref_buf; 407 | 408 | conditional(buf_size, &x[0], &a[0], &b[0], &out_buf[0]); 409 | 410 | for (size_t i = 0; i < buf_size; ++i) { 411 | ref_buf.push_back(x[i] < 5 ? a[i] : b[i]); 412 | } 413 | 414 | BOOST_TEST(ref_buf == out_buf); 415 | } 416 | 417 | BOOST_AUTO_TEST_CASE(saxpy_example) 418 | { 419 | using std::begin; 420 | using std::end; 421 | typedef std::vector buf_type; 422 | const size_t buf_size = 128; 423 | float const a = 2.7f; 424 | 425 | buf_type x; 426 | buf_type y; 427 | 428 | static std::uniform_real_distribution distribution(0, 255); 429 | static auto const gen = []() { return distribution(generator); }; 430 | std::generate_n(std::back_inserter(x), buf_size, gen); 431 | std::generate_n(std::back_inserter(y), buf_size, gen); 432 | 433 | buf_type ref_buf; 434 | for (size_t i = 0; i < buf_size; ++i) { 435 | ref_buf.push_back(a * x[i] + y[i]); 436 | } 437 | 438 | saxpy(buf_size, a, &x[0], &y[0]); 439 | 440 | BOOST_TEST(ref_buf == y); 441 | } 442 | 443 | BOOST_AUTO_TEST_CASE(sgemm_example) 444 | { 445 | using std::begin; 446 | using std::end; 447 | 448 | const size_t M = 16; 449 | const size_t K = 16; 450 | const size_t N = 16; 451 | 452 | typedef std::vector buf_type; 453 | 454 | buf_type a; 455 | buf_type b; 456 | buf_type c; 457 | 458 | static std::uniform_real_distribution distribution(0, 10); 459 | static auto const gen = []() { return distribution(generator); }; 460 | 461 | std::generate_n(std::back_inserter(a), M * K, gen); 462 | std::generate_n(std::back_inserter(b), K * N, gen); 463 | std::generate_n(std::back_inserter(c), M * N, gen); 464 | 465 | buf_type ref_buf(M * N); 466 | 467 | for (size_t i = 0; i < M; ++i) { 468 | for (size_t j = 0; j < N; ++j) { 469 | float sum = c[i * N + j]; 470 | for (size_t k = 0; k < K; k++) 471 | sum += a[i * K + k] * b[k * N + j]; 472 | ref_buf[i * N + j] = sum; 473 | } 474 | } 475 | 476 | sgemm(N, M, K, &a[0], M, &b[0], K, &c[0], M); 477 | 478 | BOOST_TEST(ref_buf == c); 479 | } 480 | -------------------------------------------------------------------------------- /include/riscv/ext/v.hpp: -------------------------------------------------------------------------------- 1 | /** 2 | @file v.hpp 3 | @copyright ©2019 Syntacore. 4 | @authors 5 | Grigory Okhotnikov 6 | @brief Vector extension simulator (v0.7) 7 | */ 8 | 9 | #ifndef RISCV_EXT_V_HPP_ 10 | #define RISCV_EXT_V_HPP_ 11 | 12 | #include 13 | #include 14 | #include 15 | #include 16 | 17 | #define PASTE_(OP1, OP2) OP1 ## OP2 18 | #define CONCAT_(OP1, OP2) PASTE_(OP1, OP2) 19 | 20 | namespace riscv { 21 | 22 | typedef int64_t xreg_type; 23 | 24 | class Invalid_instruction 25 | : public std::invalid_argument 26 | { 27 | protected: 28 | Invalid_instruction(std::string const& msg) 29 | : std::invalid_argument(std::string("exception: invalid instruction: ") + msg) 30 | {} 31 | 32 | }; 33 | 34 | namespace v { 35 | inline namespace spec_0_7 { 36 | 37 | enum class vop_type : uint8_t 38 | { 39 | thread_all = 0b0, 40 | masked_in = 0b1, 41 | }; 42 | 43 | enum vreg_no : uint8_t 44 | { 45 | v0 = 0, 46 | v1 = 1, 47 | v2 = 2, 48 | v3 = 3, 49 | v4 = 4, 50 | v5 = 5, 51 | v6 = 6, 52 | v7 = 7, 53 | v8 = 8, 54 | v9 = 9, 55 | v10 = 10, 56 | v11 = 11, 57 | v12 = 12, 58 | v13 = 13, 59 | v14 = 14, 60 | v15 = 15, 61 | v16 = 16, 62 | v17 = 17, 63 | v18 = 18, 64 | v19 = 19, 65 | v20 = 20, 66 | v21 = 21, 67 | v22 = 22, 68 | v23 = 23, 69 | v24 = 24, 70 | v25 = 25, 71 | v26 = 26, 72 | v27 = 27, 73 | v28 = 28, 74 | v29 = 29, 75 | v30 = 30, 76 | v31 = 31 77 | }; 78 | 79 | enum vreg_ew : uint8_t 80 | { 81 | e8 = 0b00, 82 | e16 = 0b01, 83 | e32 = 0b10, 84 | e64 = 0b11, 85 | e128 = 0b100 86 | }; 87 | 88 | enum vreg_mul : uint8_t 89 | { 90 | m1 = 0b00, 91 | m2 = 0b01, 92 | m4 = 0b10, 93 | m8 = 0b11 94 | }; 95 | 96 | inline size_t 97 | vtype(vreg_ew ew, vreg_mul mul = m1, size_t vill = 0) 98 | { 99 | return vill << (sizeof(xreg_type) - 1) | ew << 2 | mul; 100 | } 101 | 102 | inline int16_t 103 | vtypei(vreg_ew ew, vreg_mul mul = m1) 104 | { 105 | return (ew << 2) | mul; 106 | } 107 | 108 | namespace implementation { 109 | 110 | typedef float float32_t; 111 | typedef double float64_t; 112 | 113 | #if 0 114 | class Bad_element_size 115 | : public Invalid_instruction 116 | { 117 | public: 118 | Bad_element_size(size_t el_size) 119 | : Invalid_instruction(std::string("RVV config bad element size: ") + std::to_string(el_size)) 120 | {} 121 | }; 122 | #endif 123 | 124 | class State_not_configured 125 | : public Invalid_instruction 126 | { 127 | public: 128 | State_not_configured() 129 | : Invalid_instruction(std::string("Illegal configuration (vill is set)")) 130 | {} 131 | }; 132 | 133 | class Register_out_of_config_range 134 | : public Invalid_instruction 135 | { 136 | public: 137 | Register_out_of_config_range(vreg_no reg) 138 | : Invalid_instruction(std::string("RVV register is out of config range : ") + std::to_string(static_cast(reg))) 139 | {} 140 | }; 141 | 142 | class Load_wider_value_to_narrowed_element 143 | : public Invalid_instruction 144 | { 145 | public: 146 | Load_wider_value_to_narrowed_element(size_t value_size, size_t el_size) 147 | : Invalid_instruction(std::string("RVV load wider value (size=") + std::to_string(value_size) + ") to narrowed element (size" + std::to_string(el_size) + ")") 148 | {} 149 | }; 150 | 151 | class Instruction_undefined_for_element_size 152 | : public Invalid_instruction 153 | { 154 | public: 155 | Instruction_undefined_for_element_size(size_t el_size) 156 | : Invalid_instruction(std::string("Instruction undefined for element size ") + std::to_string(el_size)) 157 | {} 158 | }; 159 | 160 | class V_unit; 161 | class State; 162 | 163 | template 164 | struct Size_traits; 165 | 166 | template 167 | class Loader 168 | { 169 | private: 170 | Loader(Loader const&) = delete; 171 | Loader& operator = (Loader const&) = delete; 172 | 173 | protected: 174 | Loader() = default; 175 | virtual ~Loader() = default; 176 | 177 | protected: 178 | static Memory_type 179 | to_element(bool const &_val) 180 | { 181 | return Memory_type(!!_val); 182 | } 183 | 184 | template 185 | static typename std::enable_if< 186 | (sizeof(Value_type) > sizeof(Memory_type)), 187 | Memory_type 188 | >::type 189 | to_element(Value_type const &) 190 | { 191 | throw Load_wider_value_to_narrowed_element(sizeof(Value_type), sizeof(Memory_type)); 192 | } 193 | 194 | template 195 | static typename std::enable_if< 196 | sizeof(Memory_type) == sizeof(Value_type), 197 | Memory_type 198 | >::type 199 | to_element(Value_type const &_val) 200 | { 201 | return reinterpret_cast(_val); 202 | } 203 | 204 | template 205 | static typename std::enable_if< 206 | (sizeof(Value_type) < sizeof(Memory_type)) && 207 | std::is_integral::value && 208 | std::is_unsigned::value, 209 | Memory_type 210 | >::type 211 | to_element(Value_type const &_val) 212 | { 213 | return static_cast(static_cast::type>(_val)); 214 | } 215 | 216 | template 217 | static typename std::enable_if< 218 | (sizeof(Value_type) < sizeof(Memory_type)) && 219 | std::is_integral::value && 220 | std::is_signed::value, 221 | Memory_type 222 | >::type 223 | to_element(Value_type const &_val) 224 | { 225 | return static_cast(_val); 226 | } 227 | 228 | template 229 | static typename std::enable_if< 230 | (sizeof(Value_type) < sizeof(Memory_type)) && 231 | std::is_floating_point::value, 232 | Memory_type 233 | >::type 234 | to_element(Value_type const &_val) 235 | { 236 | typedef typename std::make_unsigned::type uel_type; 237 | // static uel_type const NaN_box = ~uel_type(0) << (CHAR_BIT * sizeof(Value_type)); 238 | 239 | typedef typename Size_traits::uint_type uval_type; 240 | // return static_cast(NaN_box | uel_type(reinterpret_cast(_val))); 241 | return static_cast(uel_type(reinterpret_cast(_val))); 242 | } 243 | 244 | public: 245 | virtual void operator()(V_unit& vu, vreg_no vd, Memory_type const* rs1, ptrdiff_t rs2, vop_type mode) = 0; 246 | virtual void operator()(V_unit& vu, vreg_no vd, Memory_type const* rs1, vreg_no idx, vop_type mode) = 0; 247 | }; 248 | 249 | template 250 | class Saver 251 | { 252 | private: 253 | Saver(Saver const&) = delete; 254 | Saver& operator = (Saver const&) = delete; 255 | 256 | protected: 257 | Saver() = default; 258 | virtual ~Saver() = default; 259 | 260 | public: 261 | virtual void operator()(V_unit& vu, vreg_no vs1, Memory_type* rs1, ptrdiff_t rs2, vop_type mode) const = 0; 262 | virtual void operator()(V_unit& vu, vreg_no vs1, Memory_type* rs1, vreg_no idx, vop_type mode) const = 0; 263 | }; 264 | 265 | class Operations 266 | { 267 | private: 268 | Operations(Operations const&) = delete; 269 | Operations& operator = (Operations const&) = delete; 270 | 271 | protected: 272 | Operations() = default; 273 | virtual ~Operations() = default; 274 | 275 | public: 276 | virtual operator Loader& () = 0; 277 | virtual operator Loader& () = 0; 278 | virtual operator Loader& () = 0; 279 | virtual operator Loader& () = 0; 280 | 281 | virtual operator Loader& () = 0; 282 | virtual operator Loader& () = 0; 283 | virtual operator Loader& () = 0; 284 | 285 | virtual operator Saver& () = 0; 286 | virtual operator Saver& () = 0; 287 | virtual operator Saver& () = 0; 288 | virtual operator Saver& () = 0; 289 | 290 | virtual void vadd_vv(vreg_no vd, vreg_no vs2, vreg_no vs1, vop_type mode = vop_type::thread_all) = 0; 291 | virtual void vadd_vx(vreg_no vd, vreg_no vs2, xreg_type rs1, vop_type mode = vop_type::thread_all) = 0; 292 | virtual void vadd_vi(vreg_no vd, vreg_no vs2, int16_t imm, vop_type mode = vop_type::thread_all) = 0; 293 | 294 | virtual void vsub_vv(vreg_no vd, vreg_no vs1, vreg_no vs2, vop_type mode = vop_type::thread_all) = 0; 295 | virtual void vsub_vx(vreg_no vd, vreg_no vs2, xreg_type rs1, vop_type mode = vop_type::thread_all) = 0; 296 | 297 | virtual void vmsle_vv(vreg_no vd, vreg_no vs2, vreg_no vs1, vop_type mode = vop_type::thread_all) = 0; 298 | virtual void vmsle_vx(vreg_no vd, vreg_no vs2, xreg_type rs1, vop_type mode = vop_type::thread_all) = 0; 299 | virtual void vmsle_vi(vreg_no vd, vreg_no vs2, int16_t imm, vop_type mode = vop_type::thread_all) = 0; 300 | 301 | virtual void vmand_mm(vreg_no vd, vreg_no vs2, vreg_no vs1) = 0; 302 | virtual void vmnand_mm(vreg_no vd, vreg_no vs2, vreg_no vs1) = 0; 303 | 304 | virtual void vmnot_m(vreg_no vd, vreg_no vs1) = 0; 305 | 306 | virtual void vmv_v_v(vreg_no vd, vreg_no vs1) = 0; 307 | virtual void vmv_v_x(vreg_no vd, xreg_type rs1) = 0; 308 | virtual void vmv_v_i(vreg_no vd, int16_t imm) = 0; 309 | #if 0 310 | virtual void vaddw(vreg_no vd, vreg_no vs1, vreg_no vs2) = 0; 311 | virtual void vsubw(vreg_no vd, vreg_no vs1, vreg_no vs2) = 0; 312 | 313 | virtual void vsll(vreg_no vd, vreg_no vs1, vreg_no vs2) = 0; 314 | virtual void vsra(vreg_no vd, vreg_no vs1, vreg_no vs2) = 0; 315 | virtual void vsrl(vreg_no vd, vreg_no vs1, vreg_no vs2) = 0; 316 | 317 | virtual void vand(vreg_no vd, vreg_no vs1, vreg_no vs2) = 0; 318 | virtual void vor(vreg_no vd, vreg_no vs1, vreg_no vs2) = 0; 319 | virtual void vxor(vreg_no vd, vreg_no vs1, vreg_no vs2) = 0; 320 | 321 | virtual void vseq(vreg_no vd, vreg_no vs1, vreg_no vs2) = 0; 322 | virtual void vslt(vreg_no vd, vreg_no vs1, vreg_no vs2) = 0; 323 | virtual void vsge(vreg_no vd, vreg_no vs1, vreg_no vs2) = 0; 324 | virtual void vsltu(vreg_no vd, vreg_no vs1, vreg_no vs2) = 0; 325 | virtual void vsgeu(vreg_no vd, vreg_no vs1, vreg_no vs2) = 0; 326 | 327 | virtual void vaddi(vreg_no vd, vreg_no vs1, int16_t imm) = 0; 328 | virtual void vslli(vreg_no vd, vreg_no vs1, int16_t imm) = 0; 329 | virtual void vsrli(vreg_no vd, vreg_no vs1, int16_t imm) = 0; 330 | virtual void vsrai(vreg_no vd, vreg_no vs1, int16_t imm) = 0; 331 | virtual void vandi(vreg_no vd, vreg_no vs1, int16_t imm) = 0; 332 | virtual void vori(vreg_no vd, vreg_no vs1, int16_t imm) = 0; 333 | virtual void vxori(vreg_no vd, vreg_no vs1, int16_t imm) = 0; 334 | virtual void vaddwi(vreg_no vd, vreg_no vs1, int16_t imm) = 0; 335 | 336 | virtual void vmul(vreg_no vd, vreg_no vs1, vreg_no vs2) = 0; 337 | virtual void vmulh(vreg_no vd, vreg_no vs1, vreg_no vs2) = 0; 338 | virtual void vmulhsu(vreg_no vd, vreg_no vs1, vreg_no vs2) = 0; 339 | virtual void vmulhu(vreg_no vd, vreg_no vs1, vreg_no vs2) = 0; 340 | 341 | virtual void vdiv(vreg_no vd, vreg_no vs1, vreg_no vs2) = 0; 342 | virtual void vdivu(vreg_no vd, vreg_no vs1, vreg_no vs2) = 0; 343 | virtual void vrem(vreg_no vd, vreg_no vs1, vreg_no vs2) = 0; 344 | virtual void vremu(vreg_no vd, vreg_no vs1, vreg_no vs2) = 0; 345 | 346 | virtual void vfadd_w(vreg_no vd, vreg_no vs1, vreg_no vs2) = 0; 347 | virtual void vfadd_d(vreg_no vd, vreg_no vs1, vreg_no vs2) = 0; 348 | virtual void vfsub_w(vreg_no vd, vreg_no vs1, vreg_no vs2) = 0; 349 | virtual void vfsub_d(vreg_no vd, vreg_no vs1, vreg_no vs2) = 0; 350 | virtual void vfmul_w(vreg_no vd, vreg_no vs1, vreg_no vs2) = 0; 351 | virtual void vfmul_d(vreg_no vd, vreg_no vs1, vreg_no vs2) = 0; 352 | virtual void vfdiv_w(vreg_no vd, vreg_no vs1, vreg_no vs2) = 0; 353 | virtual void vfdiv_d(vreg_no vd, vreg_no vs1, vreg_no vs2) = 0; 354 | 355 | virtual void vfsgnj_w(vreg_no vd, vreg_no vs1, vreg_no vs2) = 0; 356 | virtual void vfsgnj_d(vreg_no vd, vreg_no vs1, vreg_no vs2) = 0; 357 | virtual void vfsgnjn_w(vreg_no vd, vreg_no vs1, vreg_no vs2) = 0; 358 | virtual void vfsgnjn_d(vreg_no vd, vreg_no vs1, vreg_no vs2) = 0; 359 | virtual void vfsgnjx_w(vreg_no vd, vreg_no vs1, vreg_no vs2) = 0; 360 | virtual void vfsgnjx_d(vreg_no vd, vreg_no vs1, vreg_no vs2) = 0; 361 | 362 | virtual void vfmadd_w(vreg_no vd, vreg_no vs1, vreg_no vs2, vreg_no vs3) = 0; 363 | virtual void vfmadd_d(vreg_no vd, vreg_no vs1, vreg_no vs2, vreg_no vs3) = 0; 364 | virtual void vfmsub_w(vreg_no vd, vreg_no vs1, vreg_no vs2, vreg_no vs3) = 0; 365 | virtual void vfmsub_d(vreg_no vd, vreg_no vs1, vreg_no vs2, vreg_no vs3) = 0; 366 | virtual void vfmaddwdn_w(vreg_no vd, vreg_no vs1, vreg_no vs2, vreg_no vs3) = 0; 367 | virtual void vfmsubwdn_w(vreg_no vd, vreg_no vs1, vreg_no vs2, vreg_no vs3) = 0; 368 | 369 | virtual void vfmin_w(vreg_no vd, vreg_no vs1, vreg_no vs2) = 0; 370 | virtual void vfmin_d(vreg_no vd, vreg_no vs1, vreg_no vs2) = 0; 371 | virtual void vfmax_w(vreg_no vd, vreg_no vs1, vreg_no vs2) = 0; 372 | virtual void vfmax_d(vreg_no vd, vreg_no vs1, vreg_no vs2) = 0; 373 | virtual void vfeq_w(vreg_no vd, vreg_no vs1, vreg_no vs2) = 0; 374 | virtual void vfeq_d(vreg_no vd, vreg_no vs1, vreg_no vs2) = 0; 375 | virtual void vflt_w(vreg_no vd, vreg_no vs1, vreg_no vs2) = 0; 376 | virtual void vflt_d(vreg_no vd, vreg_no vs1, vreg_no vs2) = 0; 377 | virtual void vfle_w(vreg_no vd, vreg_no vs1, vreg_no vs2) = 0; 378 | virtual void vfle_d(vreg_no vd, vreg_no vs1, vreg_no vs2) = 0; 379 | virtual void vfsqrt_w(vreg_no vd, vreg_no vs1) = 0; 380 | virtual void vfsqrt_d(vreg_no vd, vreg_no vs1) = 0; 381 | 382 | virtual void vinsx(vreg_no vd, int32_t value, size_t idx) = 0; 383 | virtual void vmiota(vreg_no vd) = 0; 384 | #endif 385 | }; 386 | 387 | class Float_operations 388 | { 389 | private: 390 | Float_operations(Float_operations const&) = delete; 391 | Float_operations& operator = (Float_operations const&) = delete; 392 | 393 | protected: 394 | Float_operations() = default; 395 | virtual ~Float_operations() = default; 396 | 397 | public: 398 | virtual void vfmacc_vf(vreg_no vd, float rs1, vreg_no vs2, vop_type mode = vop_type::thread_all) = 0; 399 | }; 400 | 401 | #ifndef RVV_ELEN 402 | #define RVV_ELEN 64 403 | #endif 404 | 405 | #ifndef RVV_VLEN 406 | #define RVV_VLEN 256 407 | #endif 408 | 409 | #ifndef RVV_SLEN 410 | #define RVV_SLEN 64 411 | #endif 412 | 413 | class V_unit 414 | { 415 | private: 416 | V_unit(V_unit const&) = delete; 417 | V_unit& operator=(V_unit const&) = delete; 418 | 419 | protected: 420 | V_unit() = default; 421 | virtual ~V_unit() = default; 422 | 423 | public: 424 | static size_t const ELEN = RVV_ELEN; 425 | static size_t const VLEN = RVV_VLEN; 426 | static size_t const SLEN = RVV_SLEN; 427 | static size_t const NREGS = 32; 428 | 429 | static V_unit& instance(); 430 | 431 | virtual Operations& get_op_performer()const = 0; 432 | virtual Float_operations& get_fop_performer()const = 0; 433 | }; 434 | 435 | template 436 | inline void 437 | load(vreg_no vd, Ty const* rs1, ptrdiff_t rs2 = sizeof(Ty), vop_type mode = vop_type::thread_all) 438 | { 439 | static_cast&>(static_cast(V_unit::instance().get_op_performer()))(V_unit::instance(), vd, rs1, rs2, mode); 440 | } 441 | 442 | template 443 | inline void 444 | load(vreg_no vd, Ty const* rs1, vreg_no vs1, vop_type mode = vop_type::thread_all) 445 | { 446 | static_cast&>(static_cast(V_unit::instance().get_op_performer()))(V_unit::instance(), vd, rs1, vs1, mode); 447 | } 448 | 449 | template 450 | inline void 451 | save(vreg_no vs1, Ty* rs1, ptrdiff_t rs2 = sizeof(Ty), vop_type mode = vop_type::thread_all) 452 | { 453 | static_cast&>(static_cast(V_unit::instance().get_op_performer()))(V_unit::instance(), vs1, rs1, rs2, mode); 454 | } 455 | 456 | template 457 | inline void 458 | save(vreg_no vs1, Ty* rs1, vreg_no vs2, vop_type mode = vop_type::thread_all) 459 | { 460 | static_cast&>(static_cast(V_unit::instance().get_op_performer()))(V_unit::instance(), vs1, rs1, vs2, mode); 461 | } 462 | 463 | } // namespace implementation 464 | 465 | size_t 466 | vsetvl(size_t, size_t); 467 | 468 | size_t 469 | vsetvli(size_t, int16_t); 470 | 471 | #define DEF_B_D(INNER_DEF_) INNER_DEF_(b,8) INNER_DEF_(h,16) INNER_DEF_(w,32) 472 | 473 | /// Load constant-stride instructions 474 | ///@{ 475 | 476 | #define DEF_LOAD_CONSTANT_STRIDE_(NAME,TYPE) \ 477 | inline void CONCAT_(NAME,_v)(vreg_no vd, TYPE const* rs1, ptrdiff_t rs2, vop_type mode = vop_type::thread_all) \ 478 | { \ 479 | using namespace implementation; \ 480 | load(vd, rs1, rs2, mode); \ 481 | } 482 | 483 | /// Load as signed integer (sign extended) 484 | ///@{ 485 | 486 | #define DEF_LOAD_INT_CONSTANT_STRIDE_(CHR,NUM) DEF_LOAD_CONSTANT_STRIDE_(CONCAT_(vls,CHR), CONCAT_(int,CONCAT_(NUM,_t))) 487 | DEF_B_D(DEF_LOAD_INT_CONSTANT_STRIDE_) 488 | #undef DEF_LOAD_INT_CONSTANT_STRIDE_ 489 | ///@} 490 | 491 | /// Load as unsigned integer (zero extended) 492 | ///@{ 493 | #define DEF_LOAD_UNSIGNED_CONSTANT_STRIDE_(CHR,NUM) DEF_LOAD_CONSTANT_STRIDE_(CONCAT_(CONCAT_(vls,CHR),u), CONCAT_(CONCAT_(uint,NUM),_t)) 494 | DEF_B_D(DEF_LOAD_UNSIGNED_CONSTANT_STRIDE_) 495 | #undef DEF_LOAD_UNSIGNED_CONSTANT_STRIDE_ 496 | ///@} 497 | 498 | #undef DEF_LOAD_CONSTANT_STRIDE_ 499 | 500 | ///@} 501 | 502 | /// Load unit-stride instructions 503 | ///@{ 504 | 505 | #define DEF_LOAD_UNIT_STRIDE_(NAME,TYPE) \ 506 | inline void CONCAT_(NAME,_v)(vreg_no vd, TYPE const rs1[], vop_type mode = vop_type::thread_all) \ 507 | { \ 508 | using namespace implementation; \ 509 | load(vd, rs1, sizeof(TYPE), mode); \ 510 | } 511 | 512 | /// Load as signed integer (sign extended) 513 | ///@{ 514 | #define DEF_LOAD_INT_UNIT_STRIDE_(CHR,NUM) DEF_LOAD_UNIT_STRIDE_(CONCAT_(vl,CHR), CONCAT_(CONCAT_(int,NUM),_t)) 515 | DEF_B_D(DEF_LOAD_INT_UNIT_STRIDE_) 516 | #undef DEF_LOAD_INT_UNIT_STRIDE_ 517 | ///@} 518 | 519 | /// Load as unsigned integer (zero extended) 520 | ///@{ 521 | #define DEF_LOAD_UNSIGNED_UNIT_STRIDE_(CHR,NUM) DEF_LOAD_UNIT_STRIDE_(CONCAT_(CONCAT_(vl,CHR),u), CONCAT_(CONCAT_(uint,NUM),_t)) 522 | DEF_B_D(DEF_LOAD_UNSIGNED_UNIT_STRIDE_) 523 | #undef DEF_LOAD_UNSIGNED_UNIT_STRIDE_ 524 | ///@} 525 | 526 | #undef DEF_LOAD_UNIT_STRIDE_ 527 | ///@} 528 | 529 | /// Load indexed (scatter-gather) 530 | ///@{ 531 | #define DEF_LOAD_INDEXED_(NAME,TYPE) \ 532 | inline void CONCAT_(NAME,_v)(vreg_no vd, TYPE const* rs1, vreg_no vs1, vop_type mode = vop_type::thread_all) \ 533 | { \ 534 | using namespace implementation; \ 535 | load(vd, rs1, vs1, mode); \ 536 | } 537 | 538 | /// Load as signed integer (sign extended) 539 | ///@{ 540 | #define DEF_LOAD_INT_INDEXED_(CHR,NUM) DEF_LOAD_INDEXED_(CONCAT_(vlx,CHR), CONCAT_(CONCAT_(int,NUM),_t)) 541 | DEF_B_D(DEF_LOAD_INT_INDEXED_) 542 | #undef DEF_LOAD_INT_INDEXED_ 543 | ///@} 544 | 545 | /// Load as unsigned integer (zero extended) 546 | ///@{ 547 | #define DEF_LOAD_UNSIGNED_INDEXED_(CHR,NUM) DEF_LOAD_INDEXED_(CONCAT_(vlx,CONCAT_(CHR,u)), CONCAT_(CONCAT_(uint,NUM),_t)) 548 | DEF_B_D(DEF_LOAD_UNSIGNED_INDEXED_) 549 | #undef DEF_LOAD_UNSIGNED_INDEXED_ 550 | ///@} 551 | 552 | ///@} 553 | 554 | /// Constant-stride store instructions 555 | ///@{ 556 | #define DEF_SAVE_CONSTANT_STRIDE_(NAME,TYPE) \ 557 | inline void CONCAT_(NAME,_v)(vreg_no vs1, TYPE* rs1, ptrdiff_t rs2, vop_type mode = vop_type::thread_all) \ 558 | { \ 559 | using namespace implementation; \ 560 | save(vs1, rs1, rs2, mode); \ 561 | } 562 | 563 | #define DEF_SAVE_INT_CONSTANT_STRIDE_(CHR,NUM) DEF_SAVE_CONSTANT_STRIDE_(CONCAT_(vss,CHR), CONCAT_(CONCAT_(int,NUM),_t)) 564 | DEF_B_D(DEF_SAVE_INT_CONSTANT_STRIDE_) 565 | #undef DEF_SAVE_INT_CONSTANT_STRIDE_ 566 | #undef DEF_SAVE_CONSTANT_STRIDE_ 567 | ///@} 568 | 569 | /// Store unit-stride instructions 570 | ///@{ 571 | #define DEF_SAVE_UNIT_STRIDE_(NAME,TYPE) \ 572 | inline void CONCAT_(NAME,_v)(vreg_no vs1, TYPE rs1[], vop_type mode = vop_type::thread_all) \ 573 | { \ 574 | using namespace implementation; \ 575 | save(vs1, rs1, sizeof(TYPE), mode); \ 576 | } 577 | 578 | #define DEF_SAVE_INT_UNIT_STRIDE_(CHR,NUM) DEF_SAVE_UNIT_STRIDE_(CONCAT_(vs,CHR), CONCAT_(CONCAT_(int,NUM),_t)) 579 | DEF_B_D(DEF_SAVE_INT_UNIT_STRIDE_) 580 | #undef DEF_SAVE_INT_UNIT_STRIDE_ 581 | #undef DEF_SAVE_UNIT_STRIDE_ 582 | ///@} 583 | 584 | /// indexed-ordered store (scatter) instructions 585 | ///@{ 586 | #define DEF_SAVE_INDEXED_(NAME,TYPE) \ 587 | inline void CONCAT_(NAME,_v)(vreg_no vs1, TYPE* rs1, vreg_no vs2, vop_type mode = vop_type::thread_all) \ 588 | { \ 589 | using namespace implementation; \ 590 | save(vs1, rs1, vs2, mode); \ 591 | } 592 | 593 | #define DEF_SAVE_INT_INDEXED_(CHR,NUM) DEF_SAVE_INDEXED_(CONCAT_(vsx,CHR), CONCAT_(CONCAT_(int,NUM),_t)) 594 | DEF_B_D(DEF_SAVE_INT_INDEXED_) 595 | #undef DEF_SAVE_INT_INDEXED_ 596 | ///@} 597 | 598 | #undef DEF_SAVE_INDEXED_ 599 | 600 | #undef DEF_B_D 601 | #undef DEF_B_W 602 | 603 | #define DEF_BIN_OP_VV(NAM) \ 604 | inline void CONCAT_(NAM,_vv)(vreg_no vd, vreg_no vs2, vreg_no vs1, vop_type mode = vop_type::thread_all) \ 605 | { \ 606 | using namespace implementation; \ 607 | static_cast(V_unit::instance().get_op_performer()).CONCAT_(NAM,_vv)(vd, vs2, vs1, mode); \ 608 | } 609 | 610 | #define DEF_BIN_OP_VX(NAM) \ 611 | inline void CONCAT_(NAM,_vx)(vreg_no vd, vreg_no vs2, xreg_type rs1, vop_type mode = vop_type::thread_all) \ 612 | { \ 613 | using namespace implementation; \ 614 | static_cast(V_unit::instance().get_op_performer()).CONCAT_(NAM,_vx)(vd, vs2, rs1, mode); \ 615 | } 616 | 617 | #define DEF_BIN_OP_VI(NAM) \ 618 | inline void CONCAT_(NAM,_vi)(vreg_no vd, vreg_no vs2, int16_t imm, vop_type mode = vop_type::thread_all) \ 619 | { \ 620 | using namespace implementation; \ 621 | static_cast(V_unit::instance().get_op_performer()).CONCAT_(NAM,_vi)(vd, vs2, imm, mode); \ 622 | } 623 | 624 | #define DEF_BIN_OP_VXI(nam) \ 625 | DEF_BIN_OP_VV(nam) \ 626 | DEF_BIN_OP_VX(nam) \ 627 | DEF_BIN_OP_VI(nam) 628 | 629 | DEF_BIN_OP_VXI(vadd) 630 | 631 | DEF_BIN_OP_VV(vsub) 632 | DEF_BIN_OP_VX(vsub) 633 | 634 | // DEF_BIN_OP_VXI(vmsle) 635 | DEF_BIN_OP_VI(vmsle) 636 | 637 | #define DEF_BIN_OP_MM(NAM) \ 638 | inline void CONCAT_(NAM,_mm)(vreg_no vd, vreg_no vs2, vreg_no vs1) \ 639 | { \ 640 | using namespace implementation; \ 641 | static_cast(V_unit::instance().get_op_performer()).CONCAT_(NAM,_mm)(vd, vs2, vs1); \ 642 | } 643 | 644 | #define DEF_BIN_OP_M(NAM) \ 645 | inline void CONCAT_(NAM,_m)(vreg_no vd, vreg_no vs1, vop_type mode = vop_type::thread_all) \ 646 | { \ 647 | using namespace implementation; \ 648 | static_cast(V_unit::instance().get_op_performer()).CONCAT_(NAM,_m)(vd, vs1, mode); \ 649 | } 650 | 651 | #define DEF_BIN_OP_M_NO_MODE(NAM) \ 652 | inline void CONCAT_(NAM,_m)(vreg_no vd, vreg_no vs1) \ 653 | { \ 654 | using namespace implementation; \ 655 | static_cast(V_unit::instance().get_op_performer()).CONCAT_(NAM,_m)(vd, vs1); \ 656 | } 657 | 658 | DEF_BIN_OP_MM(vmand) 659 | DEF_BIN_OP_MM(vmnand) 660 | DEF_BIN_OP_M_NO_MODE(vmnot) 661 | 662 | #undef DEF_BIN_OP_M_NO_MODE 663 | #undef DEF_BIN_OP_MM 664 | #undef DEF_BIN_OP_M 665 | 666 | #if 0 667 | DEF_BIN_OP(vsll) 668 | DEF_BIN_OP(vsra) 669 | DEF_BIN_OP(vsrl) 670 | DEF_BIN_OP(vand) 671 | DEF_BIN_OP(vor) 672 | DEF_BIN_OP(vxor) 673 | 674 | DEF_BIN_OP(vseq) 675 | DEF_BIN_OP(vslt) 676 | DEF_BIN_OP(vsge) 677 | DEF_BIN_OP(vsltu) 678 | DEF_BIN_OP(vsgeu) 679 | #endif 680 | 681 | #define DEF_BIN_IMM_OP(NAM) \ 682 | inline void NAM(vreg_no vd, vreg_no vs1, int16_t imm, vop_type mode = vop_type::thread_all) \ 683 | { \ 684 | using namespace implementation; \ 685 | static_cast(V_unit::instance().get_op_performer()).NAM(vd, vs1, imm, mode); \ 686 | } 687 | 688 | #if 0 689 | DEF_BIN_IMM_OP(vaddi) 690 | DEF_BIN_IMM_OP(vslli) 691 | DEF_BIN_IMM_OP(vsrli) 692 | DEF_BIN_IMM_OP(vsrai) 693 | DEF_BIN_IMM_OP(vandi) 694 | DEF_BIN_IMM_OP(vori) 695 | DEF_BIN_IMM_OP(vxori) 696 | 697 | /// 32-bit operations 698 | ///@{ 699 | DEF_BIN_OP(vaddw) 700 | DEF_BIN_OP(vsubw) 701 | DEF_BIN_IMM_OP(vaddwi) 702 | ///@} 703 | 704 | DEF_BIN_OP(vmul) 705 | DEF_BIN_OP(vmulh) 706 | DEF_BIN_OP(vmulhsu) 707 | DEF_BIN_OP(vmulhu) 708 | 709 | DEF_BIN_OP(vdiv) 710 | DEF_BIN_OP(vdivu) 711 | DEF_BIN_OP(vrem) 712 | DEF_BIN_OP(vremu) 713 | #endif 714 | 715 | inline void vmv_v_v(vreg_no vd, vreg_no vs1) 716 | { 717 | using namespace implementation; 718 | static_cast(V_unit::instance().get_op_performer()).vmv_v_v(vd, vs1); 719 | } 720 | 721 | inline void vmv_v_x(vreg_no vd, xreg_type rs1) 722 | { 723 | using namespace implementation; 724 | static_cast(V_unit::instance().get_op_performer()).vmv_v_x(vd, rs1); 725 | } 726 | 727 | inline void vmv_v_i(vreg_no vd, int16_t imm) 728 | { 729 | using namespace implementation; 730 | static_cast(V_unit::instance().get_op_performer()).vmv_v_i(vd, imm); 731 | } 732 | 733 | #if 0 734 | template inline void vmulwdn(); 735 | 736 | /// Integer reduction operations 737 | ///@{ 738 | template inline void vredsum(); 739 | 740 | template inline void vredmax(); 741 | template inline void vredmaxu(); 742 | 743 | template inline void vredmin(); 744 | template inline void vredminu(); 745 | ///@} 746 | #endif 747 | 748 | // void vfmacc_vf(vreg_no vd, float rs1, vreg_no vs2, vop_type mode = vop_type::thread_all) = 0; 749 | 750 | #define DEF_BIN_OP_VF(NAM) \ 751 | inline void CONCAT_(NAM,_vf)(vreg_no vd, float rs1, vreg_no vs2, vop_type mode = vop_type::thread_all) \ 752 | { \ 753 | using namespace implementation; \ 754 | static_cast(V_unit::instance().get_fop_performer()).CONCAT_(NAM,_vf)(vd, rs1, vs2, mode); \ 755 | } 756 | 757 | DEF_BIN_OP_VF(vfmacc) 758 | 759 | #if 0 760 | DEF_BIN_OP(vfadd_w) 761 | DEF_BIN_OP(vfadd_d) 762 | 763 | DEF_BIN_OP(vfsub_w) 764 | DEF_BIN_OP(vfsub_d) 765 | 766 | DEF_BIN_OP(vfmul_w) 767 | DEF_BIN_OP(vfmul_d) 768 | 769 | DEF_BIN_OP(vfdiv_w) 770 | DEF_BIN_OP(vfdiv_d) 771 | 772 | DEF_BIN_OP(vfsgnj_w) 773 | DEF_BIN_OP(vfsgnj_d) 774 | 775 | DEF_BIN_OP(vfsgnjn_w) 776 | DEF_BIN_OP(vfsgnjn_d) 777 | 778 | DEF_BIN_OP(vfsgnjx_w) 779 | DEF_BIN_OP(vfsgnjx_d) 780 | 781 | DEF_BIN_OP(vfmin_w) 782 | DEF_BIN_OP(vfmin_d) 783 | 784 | DEF_BIN_OP(vfmax_w) 785 | DEF_BIN_OP(vfmax_d) 786 | 787 | DEF_BIN_OP(vfeq_w) 788 | DEF_BIN_OP(vfeq_d) 789 | 790 | DEF_BIN_OP(vflt_w) 791 | DEF_BIN_OP(vflt_d) 792 | 793 | DEF_BIN_OP(vfle_w) 794 | DEF_BIN_OP(vfle_d) 795 | #endif 796 | 797 | #define DEF_UNARY_OP_V(NAM) \ 798 | template inline void CONCAT_(NAM, _v)() \ 799 | { \ 800 | using namespace implementation; \ 801 | static_cast(V_unit::instance().get_op_performer()).NAM(vd, vs1); \ 802 | } 803 | 804 | #if 0 805 | DEF_UNARY_OP_V(vfsqrt) 806 | #endif 807 | 808 | #if 0 809 | template inline void vfclass_v(); 810 | #endif 811 | 812 | #if 0 813 | /// Floating-point reduction operations 814 | ///@{ 815 | template inline void vfredosum_v(); 816 | template inline void vfredsum_v(); 817 | template inline void vfredmax_v(); 818 | template inline void vfredmin_v(); 819 | ///@} 820 | #endif 821 | 822 | /// Vector floating-point fused multiply-add 823 | ///@{ 824 | #define DEF_3_OP(NAM) \ 825 | inline void NAM(vreg_no vd, vreg_no vs1, vreg_no vs2, vreg_no vs3, vop_type mode = vop_type::thread_all) \ 826 | { \ 827 | using namespace implementation; \ 828 | static_cast(V_unit::instance().get_op_performer()).NAM(vd, vs1, vs2, vs3, mode); \ 829 | } 830 | 831 | #if 0 832 | DEF_3_OP(vfmadd_vv) 833 | DEF_3_OP(vfmadd_vf) 834 | 835 | DEF_3_OP(vfmsub_vv) 836 | DEF_3_OP(vfmsub_vf) 837 | #endif 838 | 839 | #undef DEF_3_OP 840 | ///@} 841 | 842 | #if 0 843 | /// Convert integer to narrower integer 844 | ///@{ 845 | //template void vcvt(vreg_no vd, vreg_no vs1); 846 | ///@} 847 | 848 | /// Convert integer to float 849 | ///@{ 850 | template void vfcvt(vreg_no vd, vreg_no vs1); 851 | ///@} 852 | 853 | /// Move to/from floating-point (f) registers. 854 | ///@{ 855 | template void vfmv(vreg_no vd, from fs1); 856 | template to vfmv(vreg_no vs1); 857 | ///@} 858 | #endif 859 | 860 | #if 0 861 | template inline void vmiota() 862 | { 863 | using namespace implementation; 864 | static_cast&>(V_unit::instance()).vmiota(vd); 865 | } 866 | #endif 867 | 868 | #if 0 869 | template inline void vinsx(int32_t value, size_t idx = 0) 870 | { 871 | using namespace implementation; 872 | static_cast&>(V_unit::instance()).vinsx(vd, value, idx); 873 | } 874 | #endif 875 | 876 | #undef DEF_UNARY_OP 877 | #undef DEF_BIN_IMM_OP 878 | #undef DEF_BIN_OP_VV 879 | #undef DEF_BIN_OP_VX 880 | #undef DEF_BIN_OP_VI 881 | #undef DEF_BIN_OP_VXI 882 | #undef DEF_BIN_OP_VF 883 | 884 | } // namespace spec_0_7 885 | } // namespace v 886 | } // namespace riscv 887 | 888 | #undef PASTE_ 889 | #undef CONCAT_ 890 | 891 | #endif // RISCV_EXT_V_HPP_ 892 | -------------------------------------------------------------------------------- /src/riscv32/v.cpp: -------------------------------------------------------------------------------- 1 | /** 2 | @file v.cpp 3 | @copyright ©2019 Syntacore. 4 | @authors 5 | Grigory Okhotnikov 6 | @brief Vector extension simulator (v0.7) 7 | */ 8 | 9 | #include "riscv/ext/v.hpp" 10 | 11 | #include 12 | #include 13 | #include 14 | #include 15 | #include 16 | #include 17 | #include 18 | #include 19 | #include 20 | #include 21 | 22 | #ifdef _MSC_VER 23 | #pragma warning( disable : 4250) 24 | #endif 25 | 26 | namespace riscv { 27 | namespace v { 28 | namespace spec_0_7 { 29 | namespace implementation { 30 | namespace { 31 | 32 | template 33 | struct Size_traits; 34 | 35 | template<> 36 | struct Size_traits<1u> 37 | { 38 | typedef int8_t int_type; 39 | typedef uint8_t uint_type; 40 | }; 41 | 42 | template<> 43 | struct Size_traits<2u> 44 | { 45 | typedef int16_t int_type; 46 | typedef uint16_t uint_type; 47 | }; 48 | 49 | template<> 50 | struct Size_traits<4u> 51 | { 52 | typedef int32_t int_type; 53 | typedef uint32_t uint_type; 54 | typedef float32_t float_type; 55 | }; 56 | 57 | template<> 58 | struct Size_traits<8u> 59 | { 60 | typedef int64_t int_type; 61 | typedef uint64_t uint_type; 62 | typedef float64_t float_type; 63 | }; 64 | 65 | template 66 | inline 67 | typename std::enable_if::value, Ty>::type 68 | fsgnj(Ty const &s1, Ty const &s2) 69 | { 70 | typedef Size_traits size_traits; 71 | typedef typename size_traits::uint_type uint_type; 72 | typedef typename size_traits::int_type int_type; 73 | static uint_type const mask1 = uint_type((std::numeric_limits::max)()); 74 | auto const p1 = reinterpret_cast(s1); 75 | auto const p2 = reinterpret_cast(s2); 76 | auto const res = (mask1 & p1) | (~mask1 & p2); 77 | auto const fres = reinterpret_cast(res); 78 | return fres; 79 | } 80 | 81 | template 82 | inline 83 | typename std::enable_if::value, Ty>::type 84 | fsgnjn(Ty const &s1, Ty const &s2) 85 | { 86 | typedef Size_traits size_traits; 87 | typedef typename size_traits::uint_type uint_type; 88 | typedef typename size_traits::int_type int_type; 89 | static uint_type const mask1 = uint_type((std::numeric_limits::max)()); 90 | auto const p1 = reinterpret_cast(s1); 91 | auto const p2 = reinterpret_cast(s2); 92 | auto const res = (mask1 & p1) | (~mask1 & ~p2); 93 | auto const fres = reinterpret_cast(res); 94 | return fres; 95 | } 96 | 97 | template 98 | inline 99 | typename std::enable_if::value, Ty>::type 100 | fsgnjx(Ty const &s1, Ty const &s2) 101 | { 102 | typedef Size_traits size_traits; 103 | typedef typename size_traits::uint_type uint_type; 104 | typedef typename size_traits::int_type int_type; 105 | static uint_type const mask1 = uint_type((std::numeric_limits::max)()); 106 | auto const p1 = reinterpret_cast(s1); 107 | auto const p2 = reinterpret_cast(s2); 108 | auto const res = (mask1 & p1) ^(~mask1 & p2); 109 | auto const fres = reinterpret_cast(res); 110 | return fres; 111 | } 112 | 113 | template 114 | inline 115 | typename std::enable_if< 116 | std::is_integral::value && 117 | std::is_signed::value && 118 | std::is_integral::int_type>::value, 119 | Ty>::type 120 | mulh(Ty const &x, Ty const &y) 121 | { 122 | typedef typename Size_traits<2 * sizeof(Ty)>::int_type dbl_type; 123 | return static_cast((dbl_type(x) * dbl_type(y)) >> (CHAR_BIT * sizeof(Ty))); 124 | } 125 | 126 | template 127 | inline 128 | typename std::enable_if< 129 | sizeof(Ty1) == sizeof(Ty2) && 130 | std::is_integral::value && 131 | std::is_signed::value && 132 | std::is_integral::value && 133 | std::is_unsigned::value && 134 | std::is_integral::int_type>::value, 135 | Ty1>::type 136 | mulhsu(Ty1 const &x, Ty2 const &y) 137 | { 138 | typedef typename Size_traits<2 * sizeof(Ty1)>::int_type idbl_type; 139 | typedef typename Size_traits<2 * sizeof(Ty2)>::uint_type udbl_type; 140 | return static_cast((idbl_type(x) * idbl_type(udbl_type(y))) >> (CHAR_BIT * sizeof(Ty1))); 141 | } 142 | 143 | template 144 | inline 145 | typename std::enable_if< 146 | std::is_integral::value && 147 | std::is_unsigned::value && 148 | std::is_integral::uint_type>::value, 149 | Ty>::type 150 | mulhu(Ty const &x, Ty const &y) 151 | { 152 | typedef typename Size_traits<2 * sizeof(Ty)>::uint_type udbl_type; 153 | return static_cast((udbl_type(x) * udbl_type(y)) >> (CHAR_BIT * sizeof(Ty))); 154 | } 155 | 156 | inline int64_t 157 | mulh(int64_t const &, int64_t const &) 158 | { 159 | throw Instruction_undefined_for_element_size(sizeof(int64_t)); 160 | } 161 | 162 | inline int64_t 163 | mulhsu(int64_t const &, int64_t const &) 164 | { 165 | throw Instruction_undefined_for_element_size(sizeof(int64_t)); 166 | } 167 | 168 | inline int64_t 169 | mulhu(int64_t const &, int64_t const &) 170 | { 171 | throw Instruction_undefined_for_element_size(sizeof(int64_t)); 172 | } 173 | 174 | class Impl_base 175 | : virtual public V_unit 176 | { 177 | public: 178 | virtual size_t setvl(size_t vl) = 0; 179 | virtual size_t setvstart(size_t vstart) = 0; 180 | virtual void setill(bool ill) = 0; 181 | virtual void setew(size_t ew) = 0; 182 | virtual void setmul(size_t mul) = 0; 183 | #if 0 184 | virtual void set_mask_reg(vreg_no) = 0; 185 | #endif 186 | }; 187 | } // namespace 188 | 189 | class State 190 | : virtual public Impl_base 191 | { 192 | public: 193 | virtual size_t vl()const = 0; 194 | virtual size_t vstart()const = 0; 195 | virtual size_t vlmax()const = 0; 196 | virtual char* elt_ptr(vreg_no _reg, size_t _ind) = 0; 197 | virtual char const* elt_ptr(vreg_no _reg, size_t _ind)const = 0; 198 | virtual size_t sew()const = 0; 199 | virtual size_t lmul()const = 0; 200 | virtual bool is_ill()const = 0; 201 | virtual Operations& get_op_performer()const = 0; 202 | virtual Float_operations& get_fop_performer()const = 0; 203 | virtual bool is_enabled(size_t i)const = 0; 204 | virtual bool get_mask(vreg_no _reg, size_t _ind)const = 0; 205 | virtual void set_mask(vreg_no _reg, size_t _ind, bool value) = 0; 206 | protected: 207 | virtual bool is_valid_reg(vreg_no _reg)const = 0; 208 | virtual bool mask_bit(size_t i)const = 0; 209 | }; 210 | 211 | namespace { 212 | template 213 | class Bad_load 214 | : public Loader 215 | { 216 | static_assert(sizeof(Element_type) < sizeof(Memory_type), "Bad type"); 217 | 218 | void operator()(V_unit &st, vreg_no, Memory_type const *, ptrdiff_t, vop_type mode) final 219 | { 220 | throw Load_wider_value_to_narrowed_element(sizeof(Memory_type), sizeof(Element_type)); 221 | } 222 | 223 | void operator()(V_unit &st, vreg_no, Memory_type const *, vreg_no, vop_type mode) final 224 | { 225 | throw Load_wider_value_to_narrowed_element(sizeof(Memory_type), sizeof(Element_type)); 226 | } 227 | }; 228 | 229 | template 230 | class Good_load; 231 | 232 | template 233 | using Loader_impl = 234 | typename std::conditional< 235 | (sizeof(Element_type) < sizeof(Memory_type)), 236 | Bad_load, 237 | Good_load 238 | >::type; 239 | 240 | template 241 | class Operations_impl; 242 | 243 | template 244 | class Operations_essentials; 245 | 246 | template 247 | class Good_load 248 | : public Loader 249 | { 250 | static_assert(sizeof(Element_type) >= sizeof(Memory_type), "Bad type"); 251 | 252 | using Loader::to_element; 253 | 254 | void operator()(V_unit& vu, vreg_no vd, Memory_type const *rs1, ptrdiff_t rs2, vop_type mode) final 255 | { 256 | State& st = dynamic_cast(vu); 257 | auto p = reinterpret_cast(rs1); 258 | auto const len = st.vl(); 259 | 260 | for (size_t i = 0; i < len; ++i, p += rs2) { 261 | if (mode == vop_type::thread_all || mode == vop_type::masked_in && st.is_enabled(i)) { 262 | Element_type *const addr = reinterpret_cast(st.elt_ptr(vd, i)); 263 | *addr = static_cast(*reinterpret_cast(p)); 264 | } 265 | } 266 | } 267 | 268 | void operator()(V_unit& vu, vreg_no vd, Memory_type const *rs1, vreg_no idx, vop_type mode) final 269 | { 270 | State& st = dynamic_cast(vu); 271 | auto const p = reinterpret_cast(rs1); 272 | auto const len = st.vl(); 273 | 274 | for (size_t i = 0; i < len; ++i) { 275 | if (mode == vop_type::thread_all || mode == vop_type::masked_in && st.is_enabled(i)) { 276 | Memory_type *const addr = reinterpret_cast(st.elt_ptr(vd, i)); 277 | size_t const stride = *reinterpret_cast(st.elt_ptr(idx, i)); 278 | *addr = to_element(*reinterpret_cast(p + stride)); 279 | } 280 | } 281 | } 282 | }; 283 | 284 | template 285 | class Saver_impl 286 | : protected Saver 287 | { 288 | void operator()(V_unit& vu, vreg_no vs1, Memory_type *rs1, ptrdiff_t rs2, vop_type mode) const final 289 | { 290 | State& st = dynamic_cast(vu); 291 | auto p = reinterpret_cast(rs1); 292 | auto const len = st.vl(); 293 | 294 | for (size_t i = 0; i < len; ++i, p += rs2) { 295 | if (mode == vop_type::thread_all || mode == vop_type::masked_in && st.is_enabled(i)) { 296 | Memory_type *const addr = reinterpret_cast(p); 297 | *addr = *reinterpret_cast(st.elt_ptr(vs1, i)); 298 | } 299 | } 300 | } 301 | 302 | void operator()(V_unit& vu, vreg_no vs1, Memory_type *rs1, vreg_no idx, vop_type mode) const final 303 | { 304 | State& st = dynamic_cast(vu); 305 | auto const p = reinterpret_cast(rs1); 306 | auto const len = st.vl(); 307 | 308 | for (size_t i = 0; i < len; ++i) { 309 | if (mode == vop_type::thread_all || mode == vop_type::masked_in && st.is_enabled(i)) { 310 | size_t const stride = *reinterpret_cast(st.elt_ptr(idx, i)); 311 | Memory_type *const addr = reinterpret_cast(p + stride); 312 | *addr = *reinterpret_cast(st.elt_ptr(vs1, i)); 313 | } 314 | } 315 | } 316 | }; 317 | 318 | template 319 | class Get_loader 320 | : virtual Operations 321 | , Loader_impl 322 | { 323 | operator Loader &() final 324 | { 325 | return static_cast &>(*this); 326 | } 327 | }; 328 | 329 | template 330 | class Get_saver 331 | : virtual Operations 332 | , Saver_impl 333 | { 334 | operator Saver &() final 335 | { 336 | return static_cast &>(*this); 337 | } 338 | }; 339 | 340 | template 341 | class Get_mem_IO 342 | : virtual Operations 343 | 344 | , Get_loader 345 | , Get_loader 346 | , Get_loader 347 | , Get_loader 348 | 349 | , Get_loader 350 | , Get_loader 351 | , Get_loader 352 | 353 | , Get_saver 354 | , Get_saver 355 | , Get_saver 356 | , Get_saver 357 | { 358 | }; 359 | 360 | template 361 | class Non_scalar_operations_essentials 362 | { 363 | protected: 364 | template 365 | typename std::enable_if, Func>::value, void>::type 366 | iterate(V_unit& vu, Func &&func, vreg_no vd) 367 | { 368 | State& st = dynamic_cast(vu); 369 | 370 | if (st.is_ill()) { 371 | throw State_not_configured(); 372 | } 373 | 374 | auto const vstart = st.vstart(); 375 | auto const vl = st.vl(); 376 | 377 | for (size_t i = vstart; i < vl; ++i) { 378 | Element_type *const dest_i = reinterpret_cast(st.elt_ptr(vd, i)); 379 | *dest_i = func(); 380 | } 381 | 382 | if (vl) { 383 | for (size_t i = vl; i < st.vlmax(); ++i) { 384 | Element_type *const dest_i = reinterpret_cast(st.elt_ptr(vd, i)); 385 | *dest_i = 0; 386 | } 387 | } 388 | } 389 | 390 | template 391 | typename std::enable_if, Func>::value, void>::type 393 | iterate(V_unit& vu, Func &&func, vreg_no vd, vreg_no vs1, vop_type mode) 394 | { 395 | State& st = dynamic_cast(vu); 396 | 397 | if (st.is_ill()) { 398 | throw State_not_configured(); 399 | } 400 | 401 | auto const vstart = st.vstart(); 402 | auto const vl = st.vl(); 403 | 404 | for (size_t i = vstart; i < vl; ++i) { 405 | Element_type *const dest_i = reinterpret_cast(st.elt_ptr(vd, i)); 406 | if (mode == vop_type::thread_all || mode == vop_type::masked_in && st.is_enabled(i)) { 407 | Element_type const src1_i = *reinterpret_cast(st.elt_ptr(vs1, i)); 408 | *dest_i = func(src1_i); 409 | } 410 | } 411 | 412 | if (vl) { 413 | for (size_t i = vl; i < st.vlmax(); ++i) { 414 | Element_type *const dest_i = reinterpret_cast(st.elt_ptr(vd, i)); 415 | *dest_i = 0; 416 | } 417 | } 418 | } 419 | 420 | template 421 | typename std::enable_if, Func>::value, void>::type 423 | iterate(V_unit& vu, Func &&func, vreg_no vd, vreg_no vs1, vreg_no vs2, vop_type mode) 424 | { 425 | State& st = dynamic_cast(vu); 426 | 427 | if (st.is_ill()) { 428 | throw State_not_configured(); 429 | } 430 | 431 | auto const vstart = st.vstart(); 432 | auto vl = st.vl(); 433 | 434 | for (size_t i = vstart; i < vl; ++i) { 435 | Element_type *const dest_i = reinterpret_cast(st.elt_ptr(vd, i)); 436 | if (mode == vop_type::thread_all || mode == vop_type::masked_in && st.is_enabled(i)) { 437 | Element_type const src1_i = *reinterpret_cast(st.elt_ptr(vs1, i)); 438 | Element_type const src2_i = *reinterpret_cast(st.elt_ptr(vs2, i)); 439 | *dest_i = func(src1_i, src2_i); 440 | } 441 | } 442 | 443 | if (vl) { 444 | for (size_t i = vl; i < st.vlmax(); ++i) { 445 | Element_type *const dest_i = reinterpret_cast(st.elt_ptr(vd, i)); 446 | *dest_i = 0; 447 | } 448 | } 449 | } 450 | 451 | template 452 | typename std::enable_if, Func>::value, void>::type 454 | iterate(V_unit& vu, Func &&func, vreg_no vd, vreg_no vs1, vreg_no vs2, vreg_no vs3, vop_type mode) 455 | { 456 | State& st = dynamic_cast(vu); 457 | 458 | if (st.is_ill()) { 459 | throw State_not_configured(); 460 | } 461 | 462 | auto const vstart = st.vstart(); 463 | auto const vl = st.vl(); 464 | 465 | for (size_t i = vstart; i < vl; ++i) { 466 | Element_type *const dest_i = reinterpret_cast(st.elt_ptr(vd, i)); 467 | if (mode == vop_type::thread_all || mode == vop_type::masked_in && st.is_enabled(i)) { 468 | Element_type const src1_i = *reinterpret_cast(st.elt_ptr(vs1, i)); 469 | Element_type const src2_i = *reinterpret_cast(st.elt_ptr(vs2, i)); 470 | Element_type const src3_i = *reinterpret_cast(st.elt_ptr(vs3, i)); 471 | *dest_i = func(src1_i, src2_i, src3_i); 472 | } 473 | } 474 | 475 | if (vl) { 476 | for (size_t i = vl; i < st.vlmax(); ++i) { 477 | Element_type *const dest_i = reinterpret_cast(st.elt_ptr(vd, i)); 478 | *dest_i = 0; 479 | } 480 | } 481 | } 482 | 483 | template 484 | typename std::enable_if, Func>::value, void>::type 485 | iterate_vm(V_unit& vu, Func &&func, vreg_no vd, vreg_no vs1, vop_type mode) 486 | { 487 | State& st = dynamic_cast(vu); 488 | 489 | if (st.is_ill()) { 490 | throw State_not_configured(); 491 | } 492 | 493 | auto const vstart = st.vstart(); 494 | auto const vl = st.vl(); 495 | 496 | for (size_t i = vstart; i < vl; ++i) { 497 | if (mode == vop_type::thread_all || mode == vop_type::masked_in && st.is_enabled(i)) { 498 | Element_type const src1_i = *reinterpret_cast(st.elt_ptr(vs1, i)); 499 | bool result = func(src1_i); 500 | st.set_mask(v0, i, result); 501 | } 502 | } 503 | 504 | if (vl) { 505 | for (size_t i = vl; i < st.vlmax(); ++i) { 506 | st.set_mask(v0, i, false); 507 | } 508 | } 509 | } 510 | 511 | template 512 | typename std::enable_if, Func>::value, void>::type 514 | iterate_vm(V_unit& vu, Func &&func, vreg_no vd, vreg_no vs1, vreg_no vs2, vop_type mode) 515 | { 516 | State& st = dynamic_cast(vu); 517 | 518 | if (st.is_ill()) { 519 | throw State_not_configured(); 520 | } 521 | 522 | auto const vstart = st.vstart(); 523 | auto const vl = st.vl(); 524 | 525 | for (size_t i = vstart; i < vl; ++i) { 526 | if (mode == vop_type::thread_all || mode == vop_type::masked_in && st.is_enabled(i)) { 527 | Element_type const src1_i = *reinterpret_cast(st.elt_ptr(vs1, i)); 528 | Element_type const src2_i = *reinterpret_cast(st.elt_ptr(vs2, i)); 529 | bool result = func(src1_i, src2_i); 530 | st.set_mask(v0, i, result); 531 | } 532 | } 533 | 534 | if (vl) { 535 | for (size_t i = vl; i < st.vlmax(); ++i) { 536 | st.set_mask(v0, i, false); 537 | } 538 | } 539 | } 540 | 541 | template 542 | typename std::enable_if, Func>::value, void>::type 544 | iterate_mm(V_unit& vu, Func &&func, vreg_no vd, vreg_no vs1, vreg_no vs2) 545 | { 546 | State& st = dynamic_cast(vu); 547 | 548 | if (st.is_ill()) { 549 | throw State_not_configured(); 550 | } 551 | 552 | auto const vstart = st.vstart(); 553 | auto const vl = st.vl(); 554 | 555 | for (size_t i = vstart; i < vl; ++i) { 556 | bool src1_i = st.get_mask(vs1, i); 557 | bool src2_i = st.get_mask(vs2, i); 558 | bool result = func(src1_i, src2_i); 559 | st.set_mask(v0, i, result); 560 | } 561 | 562 | if (vl) { 563 | for (size_t i = vl; i < st.vlmax(); ++i) { 564 | st.set_mask(v0, i, false); 565 | } 566 | } 567 | } 568 | }; 569 | 570 | 571 | template 572 | class Operations_essentials 573 | : virtual protected Operations 574 | , protected Non_scalar_operations_essentials 575 | { 576 | }; 577 | 578 | template 579 | class Float_operations_essentials 580 | : virtual protected Float_operations 581 | , protected Non_scalar_operations_essentials 582 | { 583 | }; 584 | 585 | template 586 | class Operations_impl 587 | : virtual public Operations 588 | , Get_mem_IO 589 | , Operations_essentials 590 | { 591 | private: 592 | virtual operator Operations &() final 593 | { 594 | return *this; 595 | } 596 | 597 | #if 0 598 | template 599 | static 600 | typename std::enable_if< 601 | (sizeof(Element_type) < sizeof(Ty) || sizeof(Element_type) < sizeof(Ty)) 602 | && std::is_assignable, Func>::value, 603 | std::function 604 | >::type 605 | adapter1(Func &&) 606 | { 607 | throw Load_wider_value_to_narrowed_element((std::max)(sizeof(Ty), sizeof(RTy)), sizeof(Element_type)); 608 | } 609 | 610 | template= sizeof(Ty) && sizeof(Element_type) >= sizeof(RTy)) 613 | && std::is_assignable, Func>::value> 614 | > 615 | static auto adapter1(Func &&func) 616 | { 617 | return 618 | [&func](Element_type const &x)->Element_type { 619 | return 620 | to_element(func( 621 | reinterpret_cast(x) 622 | )); 623 | }; 624 | } 625 | 626 | template 627 | static 628 | typename std::enable_if< 629 | (sizeof(Element_type) < sizeof(Ty) || sizeof(Element_type) < sizeof(RTy)) 630 | && std::is_assignable, Func>::value, 631 | std::function 632 | >::type 633 | adapter2(Func &&) 634 | { 635 | throw Load_wider_value_to_narrowed_element((std::max)(sizeof(Ty), sizeof(RTy)), sizeof(Element_type)); 636 | } 637 | 638 | template= sizeof(Ty) && sizeof(Element_type) >= sizeof(RTy)) 641 | && std::is_assignable, Func>::value> 642 | > 643 | static 644 | auto 645 | adapter2(Func &&func) 646 | { 647 | return 648 | [&func](Element_type const &x, Element_type const &y)->Element_type { 649 | return 650 | to_element(func( 651 | reinterpret_cast(x), 652 | reinterpret_cast(y)); 653 | }; 654 | } 655 | 656 | template 657 | static 658 | typename std::enable_if< 659 | (sizeof(Element_type) < sizeof(Ty) || sizeof(Element_type) < sizeof(RTy)) 660 | && std::is_assignable, Func>::value, 661 | std::function 662 | >::type 663 | adapter3(Func &&) 664 | { 665 | throw Load_wider_value_to_narrowed_element((std::max)(sizeof(Ty), sizeof(RTy)), sizeof(Element_type)); 666 | } 667 | 668 | template= sizeof(Ty) && sizeof(Element_type) >= sizeof(RTy)) 671 | && std::is_assignable, Func>::value> 672 | > 673 | static 674 | auto 675 | adapter3(Func &&func) 676 | { 677 | return 678 | [&func](Element_type const &x, Element_type const &y, Element_type const &z)->Element_type { 679 | return to_element(func( 680 | reinterpret_cast(x), 681 | reinterpret_cast(y), 682 | reinterpret_cast(z))); 683 | }; 684 | } 685 | #endif 686 | 687 | static Element_type sll(Element_type const &x, Element_type const &y) 688 | { 689 | return x << y; 690 | } 691 | 692 | static Element_type sra(Element_type const &x, Element_type const &y) 693 | { 694 | return x >> y; 695 | } 696 | 697 | static Element_type srl(Element_type const &x, Element_type const &y) 698 | { 699 | return to_element(static_cast::type>(x) >> y); 700 | } 701 | 702 | void 703 | vadd_vv(vreg_no vd, vreg_no vs2, vreg_no vs1, vop_type mode = vop_type::thread_all) final 704 | { 705 | this->iterate(V_unit::instance(), std::plus(), vd, vs1, vs2, mode); 706 | } 707 | 708 | void 709 | vadd_vx(vreg_no vd, vreg_no vs2, xreg_type rs1, vop_type mode = vop_type::thread_all) final 710 | { 711 | using namespace std::placeholders; 712 | this->iterate(V_unit::instance(), std::bind(std::plus(), _1, Element_type(rs1)), vd, vs2, mode); 713 | } 714 | 715 | void 716 | vadd_vi(vreg_no vd, vreg_no vs2, int16_t imm, vop_type mode = vop_type::thread_all) final 717 | { 718 | using namespace std::placeholders; 719 | this->iterate(V_unit::instance(), std::bind(std::plus(), _1, Element_type(imm)), vd, vs2, mode); 720 | } 721 | 722 | void 723 | vsub_vv(vreg_no vd, vreg_no vs2, vreg_no vs1, vop_type mode = vop_type::thread_all) final 724 | { 725 | this->iterate(V_unit::instance(), std::minus(), vd, vs1, vs2, mode); 726 | } 727 | 728 | void 729 | vsub_vx(vreg_no vd, vreg_no vs2, xreg_type rs1, vop_type mode = vop_type::thread_all) final 730 | { 731 | using namespace std::placeholders; 732 | this->iterate(V_unit::instance(), std::bind(std::minus(), _1, Element_type(rs1)), vd, vs2, mode); 733 | } 734 | 735 | void 736 | vmsle_vv(vreg_no vd, vreg_no vs2, vreg_no vs1, vop_type mode = vop_type::thread_all) final 737 | { 738 | this->iterate_vm(V_unit::instance(), 739 | std::less_equal(), 740 | vd, 741 | vs1, 742 | vs2, 743 | mode); 744 | } 745 | 746 | void 747 | vmsle_vx(vreg_no vd, vreg_no vs2, xreg_type rs1, vop_type mode = vop_type::thread_all) final 748 | { 749 | using namespace std::placeholders; 750 | this->iterate_vm(V_unit::instance(), 751 | std::bind(std::less_equal(), _1, Element_type(rs1)), 752 | vd, 753 | vs2, 754 | mode); 755 | } 756 | 757 | void 758 | vmsle_vi(vreg_no vd, vreg_no vs2, int16_t imm, vop_type mode = vop_type::thread_all) final 759 | { 760 | using namespace std::placeholders; 761 | this->iterate_vm(V_unit::instance(), 762 | std::bind(std::less_equal(), _1, Element_type(imm)), 763 | vd, 764 | vs2, 765 | mode); 766 | } 767 | 768 | void 769 | vmand_mm(vreg_no vd, vreg_no vs2, vreg_no vs1) final 770 | { 771 | this->iterate_mm(V_unit::instance(), std::logical_and(), vd, vs1, vs2); 772 | } 773 | 774 | void 775 | vmnand_mm(vreg_no vd, vreg_no vs2, vreg_no vs1) final 776 | { 777 | auto op = [](bool const& x, bool const& y)->bool { 778 | return !(x && y); 779 | }; 780 | this->iterate_mm(V_unit::instance(), op, vd, vs1, vs2); 781 | } 782 | 783 | void 784 | vmnot_m(vreg_no vd, vreg_no vs1) final 785 | { 786 | vmnand_mm(vd, vs1, vs1); 787 | } 788 | 789 | void 790 | vmv_v_v(vreg_no vd, vreg_no vs1) final 791 | { 792 | auto op = [](Element_type const& x)->Element_type { 793 | return x; 794 | }; 795 | this->iterate(V_unit::instance(), op, vd, vs1, vop_type::thread_all); 796 | } 797 | 798 | void 799 | vmv_v_x(vreg_no vd, xreg_type rs1) final 800 | { 801 | auto op = [&rs1]()->Element_type { 802 | return Element_type(rs1); 803 | }; 804 | this->iterate(V_unit::instance(), op, vd); 805 | } 806 | 807 | void 808 | vmv_v_i(vreg_no vd, int16_t imm) final 809 | { 810 | auto op = [&imm]()->Element_type { 811 | return Element_type(imm); 812 | }; 813 | this->iterate(V_unit::instance(), op, vd); 814 | } 815 | 816 | #if 0 817 | void vsll(vreg_no vd, vreg_no vs1, vreg_no vs2) final 818 | { 819 | this->iterate(sll, vd, vs1, vs2); 820 | } 821 | 822 | void vsra(vreg_no vd, vreg_no vs1, vreg_no vs2) final 823 | { 824 | this->iterate(sra, vd, vs1, vs2); 825 | } 826 | 827 | void vsrl(vreg_no vd, vreg_no vs1, vreg_no vs2) final 828 | { 829 | this->iterate(srl, vd, vs1, vs2); 830 | } 831 | 832 | void vand(vreg_no vd, vreg_no vs1, vreg_no vs2) final 833 | { 834 | this->iterate(std::bit_and(), vd, vs1, vs2); 835 | } 836 | 837 | void vor(vreg_no vd, vreg_no vs1, vreg_no vs2) final 838 | { 839 | this->iterate(std::bit_or(), vd, vs1, vs2); 840 | } 841 | 842 | void vxor(vreg_no vd, vreg_no vs1, vreg_no vs2) final 843 | { 844 | this->iterate(std::bit_xor(), vd, vs1, vs2); 845 | } 846 | 847 | void vmul(vreg_no vd, vreg_no vs1, vreg_no vs2) final 848 | { 849 | this->iterate(std::multiplies(), vd, vs1, vs2); 850 | } 851 | 852 | void vmulh(vreg_no vd, vreg_no vs1, vreg_no vs2) final 853 | { 854 | auto op = [](Element_type const& x, Element_type const& y)->Element_type { 855 | return to_element(mulh(x, y)); 856 | }; 857 | this->iterate(op, vd, vs1, vs2); 858 | } 859 | 860 | void vmulhsu(vreg_no vd, vreg_no vs1, vreg_no vs2) final 861 | { 862 | auto op = [](Element_type const& x, Element_type const& y)->Element_type { 863 | return to_element(mulhsu(x, y)); 864 | }; 865 | this->iterate(op, vd, vs1, vs2); 866 | } 867 | 868 | void vmulhu(vreg_no vd, vreg_no vs1, vreg_no vs2) final 869 | { 870 | auto op = [](Element_type const& x, Element_type const& y)->Element_type { 871 | return to_element(mulhu(x, y)); 872 | }; 873 | this->iterate(op, vd, vs1, vs2); 874 | } 875 | 876 | void vdiv(vreg_no vd, vreg_no vs1, vreg_no vs2) final 877 | { 878 | this->iterate(std::divides(), vd, vs1, vs2); 879 | } 880 | 881 | void vdivu(vreg_no vd, vreg_no vs1, vreg_no vs2) final 882 | { 883 | static auto const op = [](Element_type x, Element_type y)->Element_type { 884 | typedef typename std::make_unsigned::type uns_type; 885 | return static_cast(x) / static_cast(y); 886 | }; 887 | this->iterate(op, vd, vs1, vs2); 888 | } 889 | 890 | void vrem(vreg_no vd, vreg_no vs1, vreg_no vs2) final 891 | { 892 | this->iterate(std::modulus(), vd, vs1, vs2); 893 | } 894 | 895 | void vremu(vreg_no vd, vreg_no vs1, vreg_no vs2) final 896 | { 897 | static auto const op = [](Element_type x, Element_type y)->Element_type { 898 | typedef typename std::make_unsigned::type uns_type; 899 | return static_cast(x) % static_cast(y); 900 | }; 901 | this->iterate(op, vd, vs1, vs2); 902 | } 903 | 904 | void vseq(vreg_no vd, vreg_no vs1, vreg_no vs2) final 905 | { 906 | this->iterate(adapter2(std::equal_to()), vd, vs1, vs2); 907 | } 908 | 909 | void vslt(vreg_no vd, vreg_no vs1, vreg_no vs2) final 910 | { 911 | this->iterate(adapter2(std::less()), vd, vs1, vs2); 912 | } 913 | 914 | void vsge(vreg_no vd, vreg_no vs1, vreg_no vs2) final 915 | { 916 | this->iterate(adapter2(std::greater_equal()), vd, vs1, vs2); 917 | } 918 | 919 | void vsltu(vreg_no vd, vreg_no vs1, vreg_no vs2) final 920 | { 921 | static auto const op = [](Element_type x, Element_type y)->bool { 922 | typedef typename std::make_unsigned::type uns_type; 923 | return std::less()(static_cast(x), static_cast(y)); 924 | }; 925 | this->iterate(adapter2(op), vd, vs1, vs2); 926 | } 927 | 928 | void vsgeu(vreg_no vd, vreg_no vs1, vreg_no vs2) final 929 | { 930 | static auto const op = [](Element_type x, Element_type y)->bool { 931 | typedef typename std::make_unsigned::type uns_type; 932 | return std::greater()(static_cast(x), static_cast(y)); 933 | }; 934 | this->iterate(adapter2(op), vd, vs1, vs2); 935 | } 936 | 937 | void vslli(vreg_no vd, vreg_no vs1, int16_t imm) final 938 | { 939 | using namespace std::placeholders; 940 | this->iterate(std::bind(sll, _1, Element_type(imm)), vd, vs1); 941 | } 942 | 943 | void vsrli(vreg_no vd, vreg_no vs1, int16_t imm) final 944 | { 945 | using namespace std::placeholders; 946 | this->iterate(std::bind(srl, _1, Element_type(imm)), vd, vs1); 947 | } 948 | 949 | void vsrai(vreg_no vd, vreg_no vs1, int16_t imm) final 950 | { 951 | using namespace std::placeholders; 952 | this->iterate(std::bind(sra, _1, Element_type(imm)), vd, vs1); 953 | } 954 | 955 | void vandi(vreg_no vd, vreg_no vs1, int16_t imm) final 956 | { 957 | using namespace std::placeholders; 958 | this->iterate(std::bind(std::bit_and(), _1, Element_type(imm)), 959 | vd, vs1); 960 | } 961 | 962 | void vori(vreg_no vd, vreg_no vs1, int16_t imm) final 963 | { 964 | using namespace std::placeholders; 965 | this->iterate(std::bind(std::bit_or(), _1, Element_type(imm)), 966 | vd, vs1); 967 | } 968 | 969 | void vxori(vreg_no vd, vreg_no vs1, int16_t imm) final 970 | { 971 | using namespace std::placeholders; 972 | this->iterate(std::bind(std::bit_xor(), _1, Element_type(imm)), 973 | vd, vs1); 974 | } 975 | 976 | void vfadd_w(vreg_no vd, vreg_no vs1, vreg_no vs2)final 977 | { 978 | this->iterate(adapter2(std::plus()), vd, vs1, vs2); 979 | } 980 | 981 | void vfadd_d(vreg_no vd, vreg_no vs1, vreg_no vs2)final 982 | { 983 | this->iterate(adapter2(std::plus()), vd, vs1, vs2); 984 | } 985 | 986 | void vfsub_w(vreg_no vd, vreg_no vs1, vreg_no vs2) final 987 | { 988 | this->iterate(adapter2(std::minus()), vd, vs1, vs2); 989 | } 990 | 991 | void vfsub_d(vreg_no vd, vreg_no vs1, vreg_no vs2) final 992 | { 993 | this->iterate(adapter2(std::minus()), vd, vs1, vs2); 994 | } 995 | 996 | void vfmul_w(vreg_no vd, vreg_no vs1, vreg_no vs2) final 997 | { 998 | this->iterate(adapter2(std::multiplies()), vd, vs1, vs2); 999 | } 1000 | 1001 | void vfmul_d(vreg_no vd, vreg_no vs1, vreg_no vs2) final 1002 | { 1003 | this->iterate(adapter2(std::multiplies()), vd, vs1, vs2); 1004 | } 1005 | 1006 | void vfdiv_w(vreg_no vd, vreg_no vs1, vreg_no vs2) final 1007 | { 1008 | this->iterate(adapter2(std::divides()), vd, vs1, vs2); 1009 | } 1010 | 1011 | void vfdiv_d(vreg_no vd, vreg_no vs1, vreg_no vs2) final 1012 | { 1013 | this->iterate(adapter2(std::divides()), vd, vs1, vs2); 1014 | } 1015 | 1016 | void vfsgnj_w(vreg_no vd, vreg_no vs1, vreg_no vs2) final 1017 | { 1018 | this->iterate(adapter2(fsgnj), vd, vs1, vs2); 1019 | } 1020 | 1021 | void vfsgnj_d(vreg_no vd, vreg_no vs1, vreg_no vs2) final 1022 | { 1023 | this->iterate(adapter2(fsgnj), vd, vs1, vs2); 1024 | } 1025 | 1026 | void vfsgnjn_w(vreg_no vd, vreg_no vs1, vreg_no vs2) final 1027 | { 1028 | this->iterate(adapter2(fsgnjn), vd, vs1, vs2); 1029 | } 1030 | 1031 | void vfsgnjn_d(vreg_no vd, vreg_no vs1, vreg_no vs2) final 1032 | { 1033 | this->iterate(adapter2(fsgnjn), vd, vs1, vs2); 1034 | } 1035 | 1036 | void vfsgnjx_w(vreg_no vd, vreg_no vs1, vreg_no vs2) final 1037 | { 1038 | this->iterate(adapter2(fsgnjx), vd, vs1, vs2); 1039 | } 1040 | 1041 | void vfsgnjx_d(vreg_no vd, vreg_no vs1, vreg_no vs2) final 1042 | { 1043 | this->iterate(adapter2(fsgnjx), vd, vs1, vs2); 1044 | } 1045 | 1046 | void vfmadd_w(vreg_no vd, vreg_no vs1, vreg_no vs2, vreg_no vs3) final 1047 | { 1048 | typedef float32_t curr_type; 1049 | static curr_type(*const fn)(curr_type, curr_type, curr_type) = std::fmaf; 1050 | this->iterate(adapter3(fn), vd, vs1, vs2, vs3); 1051 | } 1052 | 1053 | void vfmadd_d(vreg_no vd, vreg_no vs1, vreg_no vs2, vreg_no vs3) final 1054 | { 1055 | typedef float64_t curr_type; 1056 | static curr_type(*const fn)(curr_type, curr_type, curr_type) = std::fma; 1057 | this->iterate(adapter3(fn), vd, vs1, vs2, vs3); 1058 | } 1059 | 1060 | void vfmsub_w(vreg_no vd, vreg_no vs1, vreg_no vs2, vreg_no vs3) final 1061 | { 1062 | typedef float32_t curr_type; 1063 | static auto const op = [](curr_type const & x, curr_type const & y, curr_type const & z)->curr_type { 1064 | return std::fmaf(x, y, -z); 1065 | }; 1066 | this->iterate(adapter3(op), vd, vs1, vs2, vs3); 1067 | } 1068 | 1069 | void vfmsub_d(vreg_no vd, vreg_no vs1, vreg_no vs2, vreg_no vs3) final 1070 | { 1071 | typedef float64_t curr_type; 1072 | static auto const op = [](curr_type const & x, curr_type const & y, curr_type const & z)->curr_type { 1073 | return std::fma(x, y, -z); 1074 | }; 1075 | this->iterate(adapter3(op), vd, vs1, vs2, vs3); 1076 | } 1077 | 1078 | void vfmaddwdn_w(vreg_no vd, vreg_no vs1, vreg_no vs2, vreg_no vs3) final 1079 | { 1080 | typedef float32_t small_type; 1081 | typedef float32_t big_type; 1082 | static auto const op = [](small_type const & x, small_type const & y, small_type const & z)->big_type { 1083 | return std::fma(big_type(x), big_type(y), big_type(z)); 1084 | }; 1085 | this->iterate(adapter3(op), vd, vs1, vs2, vs3); 1086 | } 1087 | 1088 | void vfmsubwdn_w(vreg_no vd, vreg_no vs1, vreg_no vs2, vreg_no vs3) final 1089 | { 1090 | typedef float32_t small_type; 1091 | typedef float32_t big_type; 1092 | static auto const op = [](small_type const & x, small_type const & y, small_type const & z)->big_type { 1093 | return std::fma(big_type(x), big_type(y), big_type(-z)); 1094 | }; 1095 | this->iterate(adapter3(op), vd, vs1, vs2, vs3); 1096 | } 1097 | 1098 | void vfmin_w(vreg_no vd, vreg_no vs1, vreg_no vs2) final 1099 | { 1100 | typedef float32_t curr_type; 1101 | static curr_type(*const fn)(curr_type, curr_type) = std::fminf; 1102 | this->iterate(adapter2(fn), vd, vs1, vs2); 1103 | } 1104 | 1105 | void vfmin_d(vreg_no vd, vreg_no vs1, vreg_no vs2) final 1106 | { 1107 | typedef float64_t curr_type; 1108 | static curr_type(*const fn)(curr_type, curr_type) = std::fmin; 1109 | this->iterate(adapter2(fn), vd, vs1, vs2); 1110 | } 1111 | 1112 | void vfmax_w(vreg_no vd, vreg_no vs1, vreg_no vs2) final 1113 | { 1114 | typedef float32_t curr_type; 1115 | static curr_type(*const fn)(curr_type, curr_type) = std::fmaxf; 1116 | this->iterate(adapter2(fn), vd, vs1, vs2); 1117 | } 1118 | 1119 | void vfmax_d(vreg_no vd, vreg_no vs1, vreg_no vs2) final 1120 | { 1121 | typedef float64_t curr_type; 1122 | static curr_type(*const fn)(curr_type, curr_type) = std::fmax; 1123 | this->iterate(adapter2(fn), vd, vs1, vs2); 1124 | } 1125 | 1126 | void vfsqrt_w(vreg_no vd, vreg_no vs1) final 1127 | { 1128 | typedef float32_t curr_type; 1129 | static curr_type(*const fn)(curr_type) = std::sqrt; 1130 | this->iterate(adapter1(fn), vd, vs1); 1131 | } 1132 | 1133 | void vfsqrt_d(vreg_no vd, vreg_no vs1) final 1134 | { 1135 | typedef float64_t curr_type; 1136 | static curr_type(*const fn)(curr_type) = std::sqrt; 1137 | this->iterate(adapter1(fn), vd, vs1); 1138 | } 1139 | 1140 | void vfeq_w(vreg_no vd, vreg_no vs1, vreg_no vs2) final 1141 | { 1142 | typedef float32_t curr_type; 1143 | this->iterate(adapter2(std::equal_to()), vd, vs1, vs2); 1144 | } 1145 | 1146 | void vfeq_d(vreg_no vd, vreg_no vs1, vreg_no vs2) final 1147 | { 1148 | typedef float64_t curr_type; 1149 | this->iterate(adapter2(std::equal_to()), vd, vs1, vs2); 1150 | } 1151 | 1152 | void vflt_w(vreg_no vd, vreg_no vs1, vreg_no vs2) final 1153 | { 1154 | typedef float32_t curr_type; 1155 | static bool(*const fn)(curr_type, curr_type) = std::isless; 1156 | this->iterate(adapter2(fn), vd, vs1, vs2); 1157 | } 1158 | 1159 | void vflt_d(vreg_no vd, vreg_no vs1, vreg_no vs2) final 1160 | { 1161 | typedef float64_t curr_type; 1162 | static bool(*const fn)(curr_type, curr_type) = std::isless; 1163 | this->iterate(adapter2(fn), vd, vs1, vs2); 1164 | } 1165 | 1166 | void vfle_w(vreg_no vd, vreg_no vs1, vreg_no vs2) final 1167 | { 1168 | typedef float32_t curr_type; 1169 | static bool(*const fn)(curr_type, curr_type) = std::islessequal; 1170 | this->iterate(adapter2(fn), vd, vs1, vs2); 1171 | } 1172 | 1173 | void vfle_d(vreg_no vd, vreg_no vs1, vreg_no vs2) final 1174 | { 1175 | typedef float64_t curr_type; 1176 | static bool(*const fn)(curr_type, curr_type) = std::islessequal; 1177 | this->iterate(adapter2(fn), vd, vs1, vs2); 1178 | } 1179 | 1180 | void vaddw(vreg_no vd, vreg_no vs1, vreg_no vs2) final 1181 | { 1182 | typedef int32_t curr_type; 1183 | this->iterate(adapter2(std::plus()), vd, vs1, vs2); 1184 | } 1185 | 1186 | void vsubw(vreg_no vd, vreg_no vs1, vreg_no vs2) final 1187 | { 1188 | typedef int32_t curr_type; 1189 | this->iterate(adapter2(std::minus()), vd, vs1, vs2); 1190 | } 1191 | 1192 | void vaddwi(vreg_no vd, vreg_no vs1, int16_t imm) final 1193 | { 1194 | typedef int32_t curr_type; 1195 | using namespace std::placeholders; 1196 | this->iterate(adapter1(std::bind(std::plus(), _1, curr_type(imm))), vd, vs1); 1197 | } 1198 | #endif 1199 | }; 1200 | 1201 | template 1202 | class Float_operations_impl 1203 | : virtual public Float_operations 1204 | , Float_operations_essentials 1205 | { 1206 | private: 1207 | virtual operator Float_operations &() final 1208 | { 1209 | return *this; 1210 | } 1211 | 1212 | void 1213 | vfmacc_vf(vreg_no vd, float rs1, vreg_no vs2, vop_type mode = vop_type::thread_all) final 1214 | { 1215 | auto op = [&rs1](Element_type const& x, Element_type const& y)->Element_type { 1216 | return rs1 * x + y; 1217 | }; 1218 | this->iterate(V_unit::instance(), op, vd, vs2, vd, mode); 1219 | } 1220 | }; 1221 | 1222 | Operations_impl op8; 1223 | Operations_impl op16; 1224 | Operations_impl op32; 1225 | Operations_impl op64; 1226 | 1227 | Float_operations_impl fop32; 1228 | Float_operations_impl fop64; 1229 | 1230 | static thread_local std::unique_ptr p_state; 1231 | 1232 | static inline constexpr size_t 1233 | bits(size_t _value, size_t _offset = 0) 1234 | { 1235 | return 0 == _value ? _offset : bits(_value >> 1, _offset + 1); 1236 | } 1237 | } // namespace 1238 | 1239 | class State_impl 1240 | : virtual protected State 1241 | { 1242 | typedef State_impl This_class; 1243 | protected: 1244 | State_impl() 1245 | : m_vstart(0) 1246 | , m_vl(0) 1247 | , m_mul(m1) 1248 | , m_mask_reg(v0) 1249 | { 1250 | } 1251 | 1252 | private: 1253 | size_t setvstart(size_t vstart)final 1254 | { 1255 | return this->m_vstart = vstart; 1256 | } 1257 | 1258 | size_t setvl(size_t vl)final 1259 | { 1260 | return this->m_vl = vl; 1261 | } 1262 | 1263 | void setill(bool ill)final 1264 | { 1265 | this->m_ill = ill; 1266 | } 1267 | 1268 | void setew(size_t ew)final 1269 | { 1270 | switch(ew) { 1271 | case 0b000: 1272 | this->m_op_performer = &op8; 1273 | this->m_fop_performer = nullptr; 1274 | break; 1275 | case 0b001: 1276 | this->m_op_performer = &op16; 1277 | this->m_fop_performer = nullptr; 1278 | break; 1279 | case 0b010: 1280 | this->m_op_performer = &op32; 1281 | this->m_fop_performer = &fop32; 1282 | break; 1283 | case 0b011: 1284 | this->m_op_performer = &op64; 1285 | this->m_fop_performer = &fop64; 1286 | break; 1287 | } 1288 | this->m_ew = ew; 1289 | } 1290 | 1291 | void setmul(size_t mul)final 1292 | { 1293 | this->m_mul = mul; 1294 | } 1295 | 1296 | size_t vstart()const final 1297 | { 1298 | return m_vstart; 1299 | } 1300 | 1301 | size_t vl()const final 1302 | { 1303 | return m_vl; 1304 | } 1305 | 1306 | size_t sew()const final 1307 | { 1308 | return 8 << m_ew; 1309 | } 1310 | 1311 | size_t lmul()const final 1312 | { 1313 | return 1 << m_mul; 1314 | } 1315 | 1316 | bool is_ill()const final 1317 | { 1318 | return m_ill; 1319 | } 1320 | 1321 | size_t vlmax()const final 1322 | { 1323 | return m_mul * implementation::V_unit::VLEN / sew(); 1324 | } 1325 | 1326 | inline bool is_enabled(size_t i)const 1327 | { 1328 | return 0 != this->mask_bit(i); 1329 | } 1330 | 1331 | bool 1332 | is_valid_reg(vreg_no _reg)const final 1333 | { 1334 | return (_reg % lmul()) == 0; 1335 | } 1336 | 1337 | char * 1338 | elt_ptr(vreg_no _reg, size_t _ind) final 1339 | { 1340 | if (!is_valid_reg(_reg)) { 1341 | throw Register_out_of_config_range(_reg); 1342 | } 1343 | size_t skip_rows = _reg; 1344 | 1345 | size_t elements_in_stripe = V_unit::SLEN / sew(); 1346 | size_t elements_in_group = elements_in_stripe * lmul(); 1347 | 1348 | size_t num_of_group = _ind / elements_in_group; 1349 | 1350 | size_t row = (_ind % elements_in_group) / elements_in_stripe; 1351 | size_t col = _ind % elements_in_stripe; 1352 | 1353 | size_t bits = (skip_rows + row) * V_unit::VLEN + (num_of_group * elements_in_stripe + col) * sew(); 1354 | return &m_register_file[bits / 8]; 1355 | } 1356 | 1357 | char const * 1358 | elt_ptr(vreg_no _reg, size_t _ind)const final 1359 | { 1360 | return const_cast(this)->elt_ptr(_reg, _ind); 1361 | } 1362 | 1363 | void 1364 | set_mask(vreg_no _reg, size_t _ind, bool value) final 1365 | { 1366 | size_t mlen = sew() / lmul(); 1367 | size_t byte_num = _reg * V_unit::VLEN / 8 + (_ind * mlen) / 8; 1368 | char *byte_ptr = &m_register_file[byte_num]; 1369 | 1370 | // zeroing mlen bits 1371 | size_t bits_left = mlen; 1372 | char *ptr = byte_ptr; 1373 | while (bits_left >= 8) { 1374 | *ptr = 0; 1375 | bits_left -= 8; 1376 | ++ptr; 1377 | } 1378 | if (bits_left) { 1379 | *ptr = *ptr & ~((1 << bits_left) - 1); 1380 | } 1381 | 1382 | // set LSB to the value 1383 | *byte_ptr = *byte_ptr & ~(1u) | !!value; 1384 | } 1385 | 1386 | bool 1387 | get_mask(vreg_no _reg, size_t _ind)const final 1388 | { 1389 | size_t mlen = sew() / lmul(); 1390 | size_t byte_num = _reg * V_unit::VLEN / 8 + (_ind * mlen) / 8; 1391 | char const *byte_ptr = &m_register_file[byte_num]; 1392 | 1393 | return 0 != (*byte_ptr & (1 << (mlen % 8))); 1394 | } 1395 | 1396 | bool 1397 | mask_bit(size_t i)const final 1398 | { 1399 | return get_mask(v0, i); 1400 | } 1401 | 1402 | Operations& 1403 | get_op_performer()const final 1404 | { 1405 | return *m_op_performer; 1406 | } 1407 | 1408 | Float_operations& 1409 | get_fop_performer()const final 1410 | { 1411 | return *m_fop_performer; 1412 | } 1413 | 1414 | private: 1415 | std::array m_register_file; 1416 | 1417 | Operations* m_op_performer; 1418 | Float_operations* m_fop_performer; 1419 | 1420 | size_t m_vstart; 1421 | size_t m_vl; 1422 | bool m_ill; 1423 | size_t m_ew; 1424 | size_t m_mul; 1425 | vreg_no m_mask_reg; 1426 | }; 1427 | 1428 | namespace { 1429 | class V_unit_impl 1430 | : State_impl 1431 | { 1432 | V_unit_impl() 1433 | : State_impl() 1434 | {} 1435 | 1436 | public: 1437 | static void 1438 | init() 1439 | { 1440 | p_state.reset(new V_unit_impl()); 1441 | } 1442 | }; 1443 | 1444 | } // namespace 1445 | 1446 | V_unit& 1447 | V_unit::instance() 1448 | { 1449 | return *p_state; 1450 | } 1451 | 1452 | } // namespace implementation 1453 | 1454 | size_t 1455 | vsetvl(size_t _avl, size_t _vtype) 1456 | { 1457 | using implementation::p_state; 1458 | 1459 | if (!p_state) { 1460 | implementation::V_unit_impl::init(); 1461 | } 1462 | 1463 | bool const ill = 0 != ((_vtype >> (sizeof(xreg_type) - 1)) && 0b1); 1464 | size_t mul = 0; 1465 | size_t ew = 0; 1466 | size_t avl = 0; 1467 | if (!ill) { 1468 | mul = _vtype & 0b11; 1469 | ew = (_vtype >> 2) & 0b111; 1470 | size_t lmul = 1 << mul; 1471 | size_t sew = 8 << ew; 1472 | 1473 | if (_avl > 0) { 1474 | size_t vlmax = lmul * implementation::V_unit::VLEN / sew; 1475 | 1476 | if (_avl <= vlmax) { 1477 | avl = _avl; 1478 | } else if (_avl >= 2 * vlmax) { 1479 | avl = vlmax; 1480 | } else { 1481 | avl = (_avl + 1) / 2; 1482 | } 1483 | } 1484 | } 1485 | 1486 | p_state->setill(ill); 1487 | p_state->setmul(mul); 1488 | p_state->setew(ew); 1489 | p_state->setvstart(0); 1490 | return p_state->setvl(avl); 1491 | } 1492 | 1493 | size_t 1494 | vsetvli(size_t _avl, int16_t _vtypei) 1495 | { 1496 | return vsetvl(_avl, size_t(_vtypei)); 1497 | } 1498 | 1499 | #if 0 1500 | // mask is always register 0 1501 | void 1502 | vsetmask(vreg_no _size) 1503 | { 1504 | using implementation::p_state; 1505 | 1506 | p_state->set_mask_reg(_size); 1507 | } 1508 | #endif 1509 | 1510 | } // namespace spec_0_7 1511 | } // namespace v 1512 | } // namespace riscv 1513 | --------------------------------------------------------------------------------