├── include └── burst │ ├── detail │ ├── allocator.inl │ ├── vector.inl │ └── memory.inl │ ├── config.h │ ├── memory.h │ ├── allocator.h │ ├── vector.h │ ├── simd │ ├── detail │ │ └── vec_types.h │ └── emmintrin.h │ └── rand_iterator.h ├── test ├── testbench.cpp ├── test2.cpp ├── test1.cpp └── vivado_hls.app ├── src └── burst │ └── memory.cpp ├── LICENSE └── README.md /include/burst/detail/allocator.inl: -------------------------------------------------------------------------------- 1 | // This file is distributed under the MIT license. 2 | // See the LICENSE file for details. -------------------------------------------------------------------------------- /include/burst/config.h: -------------------------------------------------------------------------------- 1 | // This file is distributed under the MIT license. 2 | // See the LICENSE file for details. 3 | 4 | #pragma once 5 | 6 | #include 7 | 8 | namespace burst 9 | { 10 | namespace config 11 | { 12 | 13 | typedef std::size_t size_type; 14 | typedef std::ptrdiff_t difference_type; 15 | 16 | } // namespace config 17 | } // namespace burst 18 | -------------------------------------------------------------------------------- /test/testbench.cpp: -------------------------------------------------------------------------------- 1 | // This file is distributed under the MIT license. 2 | // See the LICENSE file for details. 3 | 4 | #include 5 | #include // memset 6 | #include 7 | #include 8 | 9 | extern int test1(volatile uint8_t* a, size_t n); 10 | extern int test2(volatile uint8_t* a, size_t n); 11 | 12 | int main() 13 | { 14 | int a[64]; 15 | memset(a, 0, sizeof(a)); 16 | size_t n = (sizeof(a) / sizeof(int)); 17 | 18 | int result = test1(reinterpret_cast(a), n); 19 | 20 | for (auto i : a) 21 | std::cout << i << ' '; 22 | std::cout << '\n'; 23 | 24 | return result; 25 | } 26 | -------------------------------------------------------------------------------- /src/burst/memory.cpp: -------------------------------------------------------------------------------- 1 | // This file is distributed under the MIT license. 2 | // See the LICENSE file for details. 3 | 4 | #include 5 | 6 | #include 7 | 8 | namespace burst 9 | { 10 | namespace memory 11 | { 12 | 13 | region default_regions[RegionMax]; 14 | 15 | //------------------------------------------------------------------------------------------------- 16 | // Initialize a default memory region 17 | // 18 | 19 | void init(volatile uint8_t* a, region::size_type n, region_id id) 20 | { 21 | assert(a != nullptr); 22 | assert(id < RegionMax); 23 | 24 | default_regions[id] = region(a, n); 25 | } 26 | 27 | } 28 | } 29 | -------------------------------------------------------------------------------- /test/test2.cpp: -------------------------------------------------------------------------------- 1 | // This file is distributed under the MIT license. 2 | // See the LICENSE file for details. 3 | 4 | #include 5 | #include 6 | 7 | int test2(volatile uint8_t* a, size_t n) 8 | { 9 | #pragma HLS INTERFACE s_axilite port=return bundle=CTRL_BUS 10 | #pragma HLS INTERFACE m_axi depth=512 port=a offset=slave bundle=MASTER_BUS 11 | #pragma HLS INTERFACE s_axilite port=n bundle=CTRL_BUS 12 | 13 | burst::memory::init(a, n); 14 | 15 | burst::vector v1({0,1,2,3,4,5,6,7}); 16 | v1.at(1) = 2; 17 | v1[2] = v1[1] + 1; 18 | v1.front() = 1; 19 | v1.back() = 8; 20 | 21 | for (auto it = v1.begin() + 3; it != v1.end() - 1; ++it) 22 | { 23 | *it += 1; 24 | } 25 | 26 | return 0; 27 | } 28 | -------------------------------------------------------------------------------- /test/test1.cpp: -------------------------------------------------------------------------------- 1 | // This file is distributed under the MIT license. 2 | // See the LICENSE file for details. 3 | 4 | #include 5 | #include 6 | 7 | #include 8 | 9 | int test1(volatile uint8_t* a, size_t n) 10 | { 11 | #pragma HLS INTERFACE s_axilite port=return bundle=CTRL_BUS 12 | #pragma HLS INTERFACE m_axi depth=256 port=a offset=slave bundle=MASTER_BUS 13 | #pragma HLS INTERFACE s_axilite port=n bundle=CTRL_BUS 14 | 15 | burst::memory::init(a, n); 16 | 17 | auto a_begin = burst::memory::allocate(8); 18 | auto a_end = a_begin + 8; 19 | 20 | std::fill(a_begin, a_end, 24); 21 | 22 | int arr[] = { 5, 2, 4, 9, 1, -1, 0, 12 }; 23 | std::copy(arr, arr + 8, a_begin); 24 | 25 | std::make_heap(a_begin, a_end); 26 | std::sort_heap(a_begin, a_end); 27 | std::swap(a_begin[0], a_begin[1]); 28 | std::fill(a_begin + 2, a_begin + 6, 23); 29 | std::rotate(a_begin, a_begin + 2, a_end); 30 | 31 | burst::memory::deallocate(a_begin); 32 | 33 | return 0; 34 | } 35 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | The MIT License (MIT) 2 | 3 | Copyright (c) 2016 S. Zellmann 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /test/vivado_hls.app: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | burst 2 | ===== 3 | 4 | A C++ template library for FPGAs on top of Xilinx Vivado HLS 5 | 6 | > **Note that the current version of burst is an early preview. At this stage, the framework, including the API, are likely to undergo frequent changes.** 7 | 8 | This library is currently at a proof of concept stage. It will (hopefully, and soon) help you to port existing C++ code that relies on templates and dynamic memory. 9 | 10 | Why "burst"? 11 | ------------ 12 | 13 | Because of "burst reads" from AXI-Master interfaces, and because it differs from "boost" by only two characters. 14 | 15 | Use burst like this: 16 | -------------------- 17 | 18 | ```C++ 19 | #include 20 | #include 21 | 22 | #include 23 | 24 | int test(volatile uint8_t* a, size_t n) 25 | { 26 | #pragma HLS INTERFACE s_axilite port=return bundle=CTRL_BUS 27 | #pragma HLS INTERFACE m_axi depth=256 port=a offset=slave bundle=MASTER_BUS 28 | #pragma HLS INTERFACE s_axilite port=n bundle=CTRL_BUS 29 | 30 | burst::memory::init(a, n); 31 | 32 | auto a_begin = burst::memory::allocate(8); 33 | auto a_end = a_begin + 8; 34 | 35 | std::fill(a_begin, a_end, 24); 36 | 37 | int arr[] = { 5, 2, 4, 9, 1, -1, 0, 12 }; 38 | std::copy(arr, arr + 8, a_begin); 39 | 40 | std::make_heap(a_begin, a_end); 41 | std::sort_heap(a_begin, a_end); 42 | std::swap(a_begin[0], a_begin[1]); 43 | std::fill(a_begin + 2, a_begin + 6, 23); 44 | std::rotate(a_begin, a_begin + 2, a_end); 45 | 46 | burst::memory::deallocate(a_begin); 47 | 48 | return 0; 49 | } 50 | ``` 51 | 52 | License 53 | ------- 54 | 55 | Burst is licensed under the MIT License (MIT) 56 | -------------------------------------------------------------------------------- /include/burst/memory.h: -------------------------------------------------------------------------------- 1 | // This file is distributed under the MIT license. 2 | // See the LICENSE file for details. 3 | 4 | #pragma once 5 | 6 | #include 7 | #include 8 | 9 | #include "config.h" 10 | #include "rand_iterator.h" 11 | 12 | namespace burst 13 | { 14 | namespace memory 15 | { 16 | 17 | //------------------------------------------------------------------------------------------------- 18 | // Wrapper class for memory regions 19 | // 20 | 21 | class region 22 | { 23 | public: 24 | 25 | typedef config::size_type size_type; 26 | typedef config::difference_type difference_type; 27 | 28 | public: 29 | 30 | region(); 31 | region(volatile uint8_t* a, size_type n); 32 | 33 | template 34 | rand_iterator allocate(size_type n); 35 | 36 | template 37 | void deallocate(rand_iterator ptr); 38 | 39 | bool valid() const; 40 | 41 | volatile uint8_t* data; 42 | volatile size_type N; 43 | }; 44 | 45 | 46 | //------------------------------------------------------------------------------------------------- 47 | // Default memory regions 48 | // 49 | 50 | enum region_id 51 | { 52 | Region0, 53 | Region1, 54 | Region2, 55 | Region3, 56 | Region4, 57 | Region5, 58 | Region6, 59 | Region7, 60 | RegionMax 61 | }; 62 | 63 | extern region default_regions[RegionMax]; 64 | 65 | 66 | //------------------------------------------------------------------------------------------------- 67 | // Initialize a default memory region 68 | // 69 | 70 | void init(volatile uint8_t* a, region::size_type n, region_id id = Region0); 71 | 72 | 73 | //------------------------------------------------------------------------------------------------- 74 | // Allocate/deallocate on one of the default memory regions 75 | // 76 | 77 | template 78 | rand_iterator allocate(region::size_type n, region_id id = Region0); 79 | 80 | template 81 | void deallocate(rand_iterator ptr, region_id id = Region0); 82 | 83 | } // namespace memory 84 | } // namespace burst 85 | 86 | #include "detail/memory.inl" 87 | -------------------------------------------------------------------------------- /include/burst/allocator.h: -------------------------------------------------------------------------------- 1 | // This file is distributed under the MIT license. 2 | // See the LICENSE file for details. 3 | 4 | #pragma once 5 | 6 | #include "memory.h" 7 | #include "rand_iterator.h" 8 | 9 | namespace burst 10 | { 11 | 12 | template 13 | class allocator 14 | { 15 | public: 16 | 17 | typedef T value_type; 18 | typedef rand_iterator pointer; 19 | typedef const rand_iterator const_pointer; 20 | typedef typename rand_iterator::reference reference; 21 | typedef typename rand_iterator::const_reference const_reference; 22 | typedef typename rand_iterator::size_type size_type; 23 | typedef typename rand_iterator::difference_type difference_type; 24 | 25 | public: 26 | 27 | allocator() = default; 28 | 29 | template 30 | allocator(allocator const& /*rhs*/) 31 | { 32 | //TODO 33 | } 34 | 35 | template 36 | struct rebind 37 | { 38 | typedef allocator other; 39 | }; 40 | 41 | pointer address(reference r) const 42 | { 43 | return &r; 44 | } 45 | 46 | const_pointer address(const_reference r) const 47 | { 48 | return &r; 49 | } 50 | 51 | pointer allocate(size_type n, void* /*hint*/ = 0) 52 | { 53 | 54 | return memory::allocate(n, Id); 55 | } 56 | 57 | void deallocate(pointer p, size_type /*n*/) 58 | { 59 | memory::deallocate(p, Id); 60 | } 61 | 62 | size_t max_size() const 63 | { 64 | return memory::default_regions[Id].N; 65 | } 66 | 67 | void construct(pointer p, const_reference val) 68 | { 69 | 70 | } 71 | 72 | void destroy(pointer p) 73 | { 74 | 75 | } 76 | 77 | bool operator==(allocator const& rhs) const 78 | { 79 | return true; 80 | } 81 | 82 | bool operator!=(allocator const& rhs) const 83 | { 84 | return !(*this == rhs); 85 | } 86 | 87 | private: 88 | 89 | }; 90 | 91 | } // namespace burst 92 | 93 | #include "detail/allocator.inl" 94 | -------------------------------------------------------------------------------- /include/burst/vector.h: -------------------------------------------------------------------------------- 1 | // This file is distributed under the MIT license. 2 | // See the LICENSE file for details. 3 | 4 | #include 5 | 6 | #include "allocator.h" 7 | #include "config.h" 8 | #include "memory.h" 9 | #include "rand_iterator.h" 10 | 11 | namespace burst 12 | { 13 | 14 | template > 15 | class vector 16 | { 17 | public: 18 | typedef T value_type; 19 | typedef Alloc allocator_type; 20 | typedef config::size_type size_type; 21 | typedef config::difference_type difference_type; 22 | typedef rand_iterator pointer; 23 | typedef const rand_iterator const_pointer; 24 | typedef typename rand_iterator::reference reference; 25 | typedef typename rand_iterator::const_reference const_reference; 26 | typedef rand_iterator iterator; 27 | typedef const rand_iterator const_iterator; 28 | 29 | public: 30 | 31 | // ---------------------------------------------------- 32 | 33 | vector(); 34 | explicit vector(size_type count); 35 | vector(std::initializer_list init, Alloc const& alloc = Alloc()); 36 | vector(const_iterator first, const_iterator last); 37 | 38 | // Element access ------------------------------------- 39 | 40 | reference at(size_type pos); 41 | const_reference at(size_type pos) const; 42 | reference operator[](size_type pos); 43 | const_reference operator[](size_type pos) const; 44 | reference front(); 45 | const_reference front() const; 46 | reference back(); 47 | const_reference back() const; 48 | 49 | // Iterators ------------------------------------------ 50 | 51 | iterator begin(); 52 | const_iterator begin() const; 53 | const_iterator cbegin() const; 54 | iterator end(); 55 | const_iterator end() const; 56 | const_iterator cend() const; 57 | 58 | // Capacity ------------------------------------------- 59 | 60 | bool empty() const; 61 | size_type size() const; 62 | size_type max_size() const; 63 | void reserve(size_type new_cap); 64 | size_type capacity() const; 65 | void shrink_to_fit(); 66 | 67 | // Modifiers ------------------------------------------ 68 | 69 | void push_back(T const& value); 70 | 71 | private: 72 | 73 | rand_iterator first_; 74 | size_type size_; 75 | size_type capacity_; 76 | 77 | void grow_by(size_type s); 78 | void shrink_by(size_type s); 79 | }; 80 | 81 | } // namespace burst 82 | 83 | #include "detail/vector.inl" 84 | -------------------------------------------------------------------------------- /include/burst/detail/vector.inl: -------------------------------------------------------------------------------- 1 | // This file is distributed under the MIT license. 2 | // See the LICENSE file for details. 3 | 4 | #include 5 | #include 6 | 7 | namespace burst 8 | { 9 | 10 | template 11 | inline vector::vector() 12 | : size_(0) 13 | , capacity_(0) 14 | { 15 | } 16 | 17 | template 18 | inline vector::vector(typename vector::size_type count) 19 | : size_(count) 20 | , capacity_(0) 21 | { 22 | grow_by(size_); 23 | } 24 | 25 | template 26 | inline vector::vector(std::initializer_list init, Alloc const&) 27 | : size_(0) 28 | , capacity_(0) 29 | { 30 | grow_by(init.size()); 31 | size_ = init.size(); 32 | std::copy(init.begin(), init.end(), first_); 33 | } 34 | 35 | template 36 | inline vector::vector( 37 | typename vector::const_iterator first, 38 | typename vector::const_iterator last 39 | ) 40 | : size_(std::distance(first, last)) 41 | , capacity_(0) 42 | { 43 | grow_by(size_); 44 | std::copy(first, last, first_); 45 | } 46 | 47 | // Iterators ---------------------------------------------- 48 | 49 | template 50 | inline typename vector::iterator vector::begin() 51 | { 52 | return first_; 53 | } 54 | 55 | template 56 | inline typename vector::const_iterator vector::begin() const 57 | { 58 | return first_; 59 | } 60 | 61 | template 62 | inline typename vector::const_iterator vector::cbegin() const 63 | { 64 | return first_; 65 | } 66 | 67 | template 68 | inline typename vector::iterator vector::end() 69 | { 70 | return first_ + size_; 71 | } 72 | 73 | template 74 | inline typename vector::const_iterator vector::end() const 75 | { 76 | return first_ + size_; 77 | } 78 | 79 | template 80 | inline typename vector::const_iterator vector::cend() const 81 | { 82 | return first_ + size_; 83 | } 84 | 85 | // Element access ----------------------------------------- 86 | 87 | template 88 | inline typename vector::reference vector::at( 89 | typename vector::size_type pos 90 | ) 91 | { 92 | // TODO: emulate "throw std::out_of_range" 93 | return operator[](pos); 94 | } 95 | 96 | template 97 | inline typename vector::const_reference vector::at( 98 | typename vector::size_type pos 99 | ) const 100 | { 101 | // TODO: emulate "throw std::out_of_range" 102 | return operator[](pos); 103 | } 104 | 105 | template 106 | inline typename vector::reference vector::operator[]( 107 | typename vector::size_type pos 108 | ) 109 | { 110 | return *(first_ + pos); 111 | } 112 | 113 | template 114 | inline typename vector::const_reference vector::operator[]( 115 | typename vector::size_type pos 116 | ) const 117 | { 118 | return *(first_ + pos); 119 | } 120 | 121 | template 122 | inline typename vector::reference vector::front() 123 | { 124 | return *first_; 125 | } 126 | 127 | template 128 | inline typename vector::const_reference vector::front() const 129 | { 130 | return *first_; 131 | } 132 | 133 | template 134 | inline typename vector::reference vector::back() 135 | { 136 | return *(first_ + size_ - 1); 137 | } 138 | 139 | template 140 | inline typename vector::const_reference vector::back() const 141 | { 142 | return *(first_ + size_ - 1); 143 | } 144 | 145 | // Capacity ----------------------------------------------- 146 | 147 | template 148 | inline bool vector::empty() const 149 | { 150 | return size_ == 0; 151 | } 152 | 153 | template 154 | inline typename vector::size_type vector::size() const 155 | { 156 | return size_; 157 | } 158 | 159 | template 160 | inline typename vector::size_type vector::max_size() const 161 | { 162 | return size_type(-1); 163 | } 164 | 165 | template 166 | inline void vector::reserve(typename vector::size_type new_cap) 167 | { 168 | grow_by(new_cap - capacity_); 169 | } 170 | 171 | template 172 | inline typename vector::size_type vector::capacity() const 173 | { 174 | return capacity_; 175 | } 176 | 177 | // Modifiers ---------------------------------------------- 178 | 179 | template 180 | inline void vector::push_back(T const& value) 181 | { 182 | if (capacity_ < size_ + 1) 183 | { 184 | grow_by(1); 185 | } 186 | 187 | *(first_ + size_) = value; 188 | ++size_; 189 | } 190 | 191 | // private ------------------------------------------------ 192 | 193 | template 194 | inline void vector::grow_by(typename vector::size_type s) 195 | { 196 | Alloc alloc; 197 | 198 | if (capacity_ == 0) 199 | { 200 | first_ = alloc.allocate(s); 201 | } 202 | else 203 | { 204 | auto tmp = alloc.allocate(capacity_ + s); 205 | std::copy(first_, first_ + size_, tmp); 206 | alloc.deallocate(first_, capacity_); 207 | first_ = tmp; 208 | } 209 | 210 | capacity_ += s; 211 | } 212 | 213 | template 214 | inline void vector::shrink_by(typename vector::size_type s) 215 | { 216 | Alloc alloc; 217 | first_ = alloc.allocate(s); 218 | } 219 | 220 | } // namespace burst 221 | -------------------------------------------------------------------------------- /include/burst/detail/memory.inl: -------------------------------------------------------------------------------- 1 | // This file is distributed under the MIT license. 2 | // See the LICENSE file for details. 3 | 4 | #include 5 | #include 6 | #include 7 | #include // memcpy 8 | 9 | #ifndef NDEBUG 10 | #include 11 | #include 12 | #endif 13 | 14 | #ifndef __SYNTHESIS__ 15 | //#include 16 | //#include 17 | #endif 18 | 19 | #include "../rand_iterator.h" 20 | 21 | namespace burst 22 | { 23 | namespace memory 24 | { 25 | 26 | //enum { N = 1024 }; 27 | 28 | #ifndef __SYNTHESIS__ 29 | //static boost::mutex mtx; 30 | #endif 31 | 32 | struct node; 33 | typedef node* node_ptr; 34 | 35 | 36 | // node --------------------------------------------------- 37 | 38 | struct node 39 | { 40 | region::size_type pos; 41 | region::size_type size; 42 | region::size_type pred_pos; 43 | region::size_type allocated; 44 | }; 45 | 46 | inline void make_node( 47 | volatile uint8_t* mem, 48 | region::size_type pos, 49 | region::size_type size, 50 | region::size_type pred_pos, 51 | bool allocated 52 | ) 53 | { 54 | #pragma HLS INLINE 55 | node n; 56 | n.pos = pos; 57 | n.size = size; 58 | n.pred_pos = pred_pos; 59 | n.allocated = allocated; 60 | { 61 | #ifndef __SYNTHESIS__ 62 | // boost::lock_guard l(mtx); 63 | #endif 64 | memcpy((uint8_t*)mem + pos, &n, sizeof(n)); 65 | } 66 | } 67 | 68 | // Order matters! 69 | inline void merge_nodes( 70 | volatile uint8_t* mem, 71 | node const& n1, 72 | node const& n2, 73 | bool allocated 74 | ) 75 | { 76 | #pragma HLS INLINE 77 | make_node(mem, n1.pos, n1.size + n2.size, n1.pred_pos, allocated); 78 | } 79 | 80 | inline node get_node(volatile uint8_t* mem, region::size_type pos) 81 | { 82 | #pragma HLS INLINE 83 | node n; 84 | { 85 | #ifndef __SYNTHESIS__ 86 | // boost::lock_guard l(mtx); 87 | #endif 88 | memcpy(&n, (uint8_t*)mem + pos, sizeof(n)); 89 | } 90 | return n; 91 | } 92 | 93 | inline node get_node_from_ptr(volatile void const* ptr) 94 | { 95 | #pragma HLS INLINE 96 | node n; 97 | { 98 | #ifndef __SYNTHESIS__ 99 | // boost::lock_guard l(mtx); 100 | #endif 101 | memcpy(&n, (void const*)ptr, sizeof(n)); 102 | } 103 | return n; 104 | } 105 | 106 | inline node prev_node(volatile uint8_t* mem, node const& n) 107 | { 108 | #pragma HLS INLINE 109 | return get_node(mem, n.pred_pos); 110 | } 111 | 112 | inline node next_node(volatile uint8_t* mem, node const& n) 113 | { 114 | #pragma HLS INLINE 115 | region::size_type addr = n.pos + n.size; 116 | return get_node(mem, addr); 117 | } 118 | 119 | inline region::size_type get_data_addr(node const& n) 120 | { 121 | #pragma HLS INLINE 122 | return n.pos + sizeof(n); 123 | } 124 | 125 | inline bool operator!=(node const& n1, node const& n2) 126 | { 127 | return n1.pos != n2.pos || 128 | n1.size != n2.size || 129 | n1.pred_pos != n2.pred_pos || 130 | n1.allocated != n2.allocated 131 | ; 132 | } 133 | 134 | #ifndef NDEBUG 135 | inline std::ostream& operator<<(std::ostream& out, node n) 136 | { 137 | out << '(' << n.pos 138 | << ',' << n.size 139 | << ',' << n.pred_pos 140 | << ',' << n.allocated 141 | << ')'; 142 | return out; 143 | } 144 | #endif 145 | 146 | 147 | // list --------------------------------------------------- 148 | 149 | static bool free_list_initialized = false; 150 | 151 | inline void free_list_init(volatile uint8_t* mem, region::size_type N) 152 | { 153 | #pragma HLS INLINE 154 | make_node(mem, 0, N, 0, false); 155 | } 156 | 157 | 158 | inline node free_list_insert_first(volatile uint8_t* mem, region::size_type size) 159 | { 160 | #pragma HLS INLINE 161 | region::size_type addr = 0; 162 | 163 | node n = get_node(mem, addr); 164 | 165 | while (n.size < size || n.allocated) 166 | { 167 | n = next_node(mem, n); 168 | } 169 | 170 | 171 | node n1; 172 | n1.pos = n.pos; 173 | n1.size = size; 174 | n1.pred_pos = n.pred_pos; 175 | n1.allocated = true; 176 | 177 | node n2; 178 | n2.pos = n.pos + size; 179 | n2.size = n.size - size; 180 | n2.pred_pos = n.pos; 181 | n2.allocated = n.allocated; 182 | 183 | make_node(mem, n1.pos, n1.size, n1.pred_pos, n1.allocated); 184 | make_node(mem, n2.pos, n2.size, n2.pred_pos, n2.allocated); 185 | 186 | return n1; 187 | } 188 | 189 | #ifndef NDEBUG 190 | inline void free_list_print(volatile uint8_t* mem, region::size_type N) 191 | { 192 | #ifndef __SYNTHESIS__ 193 | node n = get_node(mem, 0); 194 | std::cout << "n= get_node(0): "< 221 | inline rand_iterator region::allocate(region::size_type n) 222 | { 223 | if (!free_list_initialized) 224 | { 225 | free_list_init(data, N); 226 | free_list_initialized = true; 227 | } 228 | 229 | // Bytes 230 | region::size_type size = n * sizeof(T); 231 | 232 | node nd = free_list_insert_first(data, size + sizeof(node)); 233 | 234 | return rand_iterator( 235 | data, 236 | static_cast(get_data_addr(nd) / sizeof(T)) // TODO! 237 | ); 238 | } 239 | 240 | template 241 | inline void region::deallocate(rand_iterator ptr) 242 | { 243 | if (!free_list_initialized) 244 | { 245 | free_list_init(data, N); 246 | free_list_initialized = true; 247 | } 248 | 249 | volatile uint8_t* ptr8 = ptr.data() + ptr.pos() * sizeof(T); 250 | node n1 = get_node_from_ptr(ptr8 - sizeof(node)); 251 | node n2 = next_node(data, n1); 252 | 253 | make_node(data, n1.pos, n1.size, n1.pred_pos, false); 254 | 255 | if (!n2.allocated) 256 | merge_nodes(data, n1, n2, false); 257 | 258 | n1 = get_node(data, n1.pos); 259 | node n3 = prev_node(data, n1); 260 | 261 | if (!n3.allocated && n1 != n3) 262 | merge_nodes(data, n3, n1, false); 263 | } 264 | 265 | inline bool region::valid() const 266 | { 267 | return data != nullptr; 268 | } 269 | 270 | 271 | //------------------------------------------------------------------------------------------------- 272 | // Allocate/deallocate on one of the default memory regions 273 | // 274 | 275 | template 276 | inline rand_iterator allocate(region::size_type n, region_id id) 277 | { 278 | assert(id < RegionMax); 279 | 280 | auto reg = default_regions[id]; // Local copy, HLS seems to optimize 281 | // this away otherwise.. 282 | return reg.allocate(n); 283 | } 284 | 285 | template 286 | inline void deallocate(rand_iterator ptr, region_id id) 287 | { 288 | assert(id < RegionMax); 289 | 290 | auto reg = default_regions[id]; 291 | reg.deallocate(ptr); 292 | } 293 | 294 | } // namespace memory 295 | } // namespace burst 296 | -------------------------------------------------------------------------------- /include/burst/simd/detail/vec_types.h: -------------------------------------------------------------------------------- 1 | // This file is distributed under the MIT license. 2 | // See the LICENSE file for details. 3 | 4 | #pragma once 5 | 6 | #include 7 | #include 8 | #include 9 | 10 | #define FORCEINLINE inline 11 | #define SIMD_LITTLE_ENDIAN 1 12 | 13 | namespace detail 14 | { 15 | 16 | //------------------------------------------------------------------------------------------------- 17 | // Signed types 18 | // 19 | 20 | struct int8x8_t 21 | { 22 | enum { N = 8 }; 23 | typedef int8_t value_type; 24 | int8_t value[8]; 25 | }; 26 | 27 | struct int8x16_t 28 | { 29 | enum { N = 16 }; 30 | typedef int8_t value_type; 31 | int8_t value[16]; 32 | }; 33 | 34 | struct int16x8_t 35 | { 36 | enum { N = 8 }; 37 | typedef int16_t value_type; 38 | int16_t value[8]; 39 | }; 40 | 41 | struct int32x2_t 42 | { 43 | enum { N = 2 }; 44 | typedef int32_t value_type; 45 | int32_t value[2]; 46 | }; 47 | 48 | struct int32x4_t 49 | { 50 | enum { N = 4 }; 51 | typedef int32_t value_type; 52 | int32_t value[4]; 53 | }; 54 | 55 | struct int64x2_t 56 | { 57 | enum { N = 2 }; 58 | typedef int64_t value_type; 59 | int64_t value[2]; 60 | }; 61 | 62 | 63 | //------------------------------------------------------------------------------------------------- 64 | // Signed types 65 | // 66 | 67 | struct uint8x16_t 68 | { 69 | enum { N = 16 }; 70 | typedef uint8_t value_type; 71 | uint8_t value[16]; 72 | }; 73 | 74 | struct uint16x8_t 75 | { 76 | enum { N = 8 }; 77 | typedef uint16_t value_type; 78 | uint16_t value[8]; 79 | }; 80 | 81 | struct uint32x4_t 82 | { 83 | enum { N = 4 }; 84 | typedef uint32_t value_type; 85 | uint32_t value[4]; 86 | }; 87 | 88 | struct uint64x2_t 89 | { 90 | enum { N = 2 }; 91 | typedef uint64_t value_type; 92 | uint64_t value[2]; 93 | }; 94 | 95 | 96 | //------------------------------------------------------------------------------------------------- 97 | // 98 | // 99 | 100 | template 101 | struct swap_sign; 102 | 103 | template <> 104 | struct swap_sign { typedef uint8x16_t value_type; }; 105 | 106 | template <> 107 | struct swap_sign { typedef int8x16_t value_type; }; 108 | 109 | template <> 110 | struct swap_sign { typedef uint16x8_t value_type; }; 111 | 112 | template <> 113 | struct swap_sign { typedef int16x8_t value_type; }; 114 | 115 | template <> 116 | struct swap_sign { typedef uint32x4_t value_type; }; 117 | 118 | template <> 119 | struct swap_sign { typedef int32x4_t value_type; }; 120 | 121 | template <> 122 | struct swap_sign { typedef uint64x2_t value_type; }; 123 | 124 | template <> 125 | struct swap_sign { typedef int64x2_t value_type; }; 126 | 127 | 128 | //------------------------------------------------------------------------------------------------- 129 | // Conversion 130 | // 131 | 132 | template 133 | FORCEINLINE I1 convert_up(I2 const& a) 134 | { 135 | auto stride = I2::N / I1::N; 136 | auto shift = sizeof(typename I2::value_type) * 8; 137 | 138 | I1 result; 139 | typedef typename std::make_unsigned::type UI2; 140 | for (int i = 0; i < I2::N; i += stride) 141 | { 142 | result.value[i / stride] = UI2(a.value[i + stride - 1]); 143 | for (int j = 1; j < stride; ++j) 144 | { 145 | #if SIMD_LITTLE_ENDIAN 146 | result.value[i / stride] <<= shift; 147 | #else 148 | result.value[i / stride] >>= shift; 149 | #endif 150 | result.value[i / stride] |= UI2(a.value[i + stride - j - 1]); 151 | } 152 | } 153 | 154 | return result; 155 | } 156 | 157 | template 158 | FORCEINLINE I1 convert_down(I2 const& a) 159 | { 160 | auto stride = I1::N/ I2::N; 161 | auto shift = sizeof(typename I1::value_type) * 8; 162 | 163 | I1 result; 164 | for (int i = 0; i < I1::N; i += stride) 165 | { 166 | auto val = a.value[i / stride]; 167 | 168 | for (int j = 0; j < stride; ++j) 169 | { 170 | result.value[i + j] = typename I1::value_type(val); 171 | #if SIMD_LITTLE_ENDIAN 172 | val >>= shift; 173 | #else 174 | val <<= shift; 175 | #endif 176 | } 177 | } 178 | return result; 179 | } 180 | 181 | template 182 | FORCEINLINE T1 convert_static(T2 const& a) 183 | { 184 | static_assert(sizeof(T1) == sizeof(T2), "Size mismatch"); 185 | static_assert(T1::N == T2::N, "Element count mismatch"); 186 | 187 | T1 result; 188 | 189 | for (int i = 0; i < T1::N; ++i) 190 | { 191 | result.value[i] = static_cast(a.value[i]); 192 | } 193 | 194 | return result; 195 | } 196 | 197 | template 198 | FORCEINLINE I convert(I const& a) 199 | { 200 | return a; 201 | } 202 | 203 | template < 204 | typename I1, 205 | typename I2, 206 | typename = typename std::enable_if::value>::type, 207 | typename = typename std::enable_if<(std::is_signed::value && std::is_signed::value) 208 | || (!std::is_signed::value && !std::is_signed::value) 209 | >::type 210 | > 211 | FORCEINLINE I1 convert(I2 const& a) 212 | { 213 | if (sizeof(typename I1::value_type) < sizeof(typename I2::value_type)) 214 | { 215 | return convert_down(a); 216 | } 217 | else 218 | { 219 | return convert_up(a); 220 | } 221 | } 222 | 223 | template < 224 | typename T1, 225 | typename T2, 226 | typename = typename std::enable_if<(std::is_signed::value && !std::is_signed::value) 227 | || (!std::is_signed::value && std::is_signed::value) 228 | >::type 229 | > 230 | FORCEINLINE T1 convert(T2 const& a) 231 | { 232 | auto tmp = convert_static::value_type>(a); 233 | return convert(tmp); 234 | } 235 | 236 | 237 | //------------------------------------------------------------------------------------------------- 238 | // 239 | // 240 | 241 | template 242 | I signed_saturated_add(I a, I b) 243 | { 244 | I sum = a + b; 245 | if (a >= 0 && b >= 0) 246 | return (a > std::numeric_limits::max() - b) 247 | ? std::numeric_limits::max() 248 | : a + b 249 | ; 250 | else if (a < 0 && b < 0) 251 | return (a < std::numeric_limits::min() - b) 252 | ? std::numeric_limits::min() 253 | : a + b 254 | ; 255 | else 256 | return a + b; 257 | } 258 | 259 | template 260 | UI unsigned_saturated_add(UI a, UI b) 261 | { 262 | return (a > std::numeric_limits::max() - b) 263 | ? std::numeric_limits::max() 264 | : a + b 265 | ; 266 | } 267 | 268 | template 269 | I signed_saturated_sub(I a, I b) 270 | { 271 | if (a < 0 && b > 0) 272 | return (a < std::numeric_limits::min() + b) 273 | ? std::numeric_limits::min() 274 | : a - b 275 | ; 276 | else if (a > 0 && b < 0) 277 | return (a > std::numeric_limits::max() + b) 278 | ? std::numeric_limits::max() 279 | : a - b 280 | ; 281 | else if (a == 0 && b == std::numeric_limits::min()) 282 | return std::numeric_limits::max(); 283 | else 284 | return a - b; 285 | } 286 | 287 | template 288 | UI unsigned_saturated_sub(UI a, UI b) 289 | { 290 | return b > a ? 0U : a - b; 291 | } 292 | 293 | } // detail 294 | -------------------------------------------------------------------------------- /include/burst/rand_iterator.h: -------------------------------------------------------------------------------- 1 | // This file is distributed under the MIT license. 2 | // See the LICENSE file for details. 3 | 4 | #pragma once 5 | 6 | #include 7 | #include 8 | #include 9 | #include 10 | #include 11 | #include 12 | 13 | #include "config.h" 14 | 15 | 16 | namespace burst 17 | { 18 | namespace detail 19 | { 20 | namespace randit 21 | { 22 | 23 | template 24 | struct reference 25 | { 26 | typedef config::difference_type difference_type; 27 | typedef T value_type; 28 | 29 | T value; 30 | difference_type index; 31 | volatile uint8_t* raw; 32 | 33 | 34 | reference() 35 | : value(0) 36 | , index(0) 37 | , raw(0) 38 | { 39 | 40 | } 41 | 42 | reference(reference const& rhs) 43 | : value(rhs.value) 44 | , index(rhs.index) 45 | , raw(rhs.raw) 46 | { 47 | 48 | } 49 | 50 | reference(reference const&& rhs) 51 | : value(std::move(rhs.value)) 52 | , index(std::move(rhs.index)) 53 | , raw(std::move(rhs.raw)) 54 | { 55 | 56 | } 57 | 58 | ~reference() 59 | { 60 | 61 | } 62 | 63 | void reset(T const& val) 64 | { 65 | value = val; 66 | 67 | config::size_type stride = sizeof(T); 68 | 69 | // std::memcpy( 70 | // (uint8_t*)(&raw[index * stride]), 71 | // &value, 72 | // sizeof(T) 73 | // ); 74 | for (int i = 0; i < stride; ++i) 75 | { 76 | #pragma HLS PIPELINE 77 | uint8_t tmp = (value >> (i * 8)) & 0xFF; 78 | 79 | raw[index * stride + i] = tmp; 80 | } 81 | } 82 | 83 | reference& operator=(reference const& rhs) 84 | { 85 | if (&rhs != this) 86 | { 87 | reset(rhs.value); 88 | index = rhs.index; 89 | raw = rhs.raw; 90 | } 91 | 92 | return *this; 93 | } 94 | 95 | reference& operator=(reference const&& rhs) 96 | { 97 | if (&rhs != this) 98 | { 99 | reset(std::move(rhs.value)); 100 | index = std::move(rhs.index); 101 | raw = std::move(rhs.raw); 102 | } 103 | 104 | return *this; 105 | } 106 | 107 | operator T const&() const 108 | { 109 | return value; 110 | } 111 | 112 | operator T&() 113 | { 114 | return value; 115 | } 116 | 117 | void swap(reference rhs) 118 | { 119 | T value_l = value; 120 | T value_r = rhs.value; 121 | 122 | reset(value_r); 123 | rhs.reset(value_l); 124 | } 125 | 126 | // Manipulate value ----------------------------------- 127 | 128 | reference& operator=(T const& val) 129 | { 130 | reset(val); 131 | return *this; 132 | } 133 | 134 | reference& operator+=(T const& val) 135 | { 136 | reset(value + val); 137 | return *this; 138 | } 139 | 140 | reference& operator-=(T const& val) 141 | { 142 | reset(value - val); 143 | return *this; 144 | } 145 | 146 | reference& operator*=(T const& val) 147 | { 148 | reset(value * val); 149 | return *this; 150 | } 151 | 152 | reference& operator/=(T const& val) 153 | { 154 | reset(value / val); 155 | return *this; 156 | } 157 | 158 | reference& operator%=(T const& val) 159 | { 160 | reset(value % val); 161 | return *this; 162 | } 163 | 164 | reference& operator<<=(T const& val) 165 | { 166 | reset(value << val); 167 | return *this; 168 | } 169 | 170 | reference& operator>>=(T const& val) 171 | { 172 | reset(value >> val); 173 | return *this; 174 | } 175 | 176 | reference& operator&=(T const& val) 177 | { 178 | reset(value & val); 179 | return *this; 180 | } 181 | 182 | reference& operator^=(T const& val) 183 | { 184 | reset(value ^ val); 185 | return *this; 186 | } 187 | 188 | reference& operator|=(T const& val) 189 | { 190 | reset(value | val); 191 | return *this; 192 | } 193 | }; 194 | 195 | template 196 | struct const_reference 197 | { 198 | typedef config::difference_type difference_type; 199 | typedef T value_type; 200 | 201 | T value; 202 | difference_type index; 203 | volatile uint8_t* raw; 204 | 205 | const_reference() 206 | : value(0) 207 | , index(0) 208 | , raw(0) 209 | { 210 | 211 | } 212 | 213 | operator T const&() const 214 | { 215 | return value; 216 | } 217 | }; 218 | 219 | template 220 | inline void swap(reference a, reference b) 221 | { 222 | a.swap(b); 223 | } 224 | 225 | } // namespace randit 226 | } // namespace detail 227 | 228 | 229 | template 230 | class rand_iterator : public std::iterator 231 | { 232 | static_assert( sizeof(T) >= sizeof(uint8_t), "Size mismatch"); 233 | 234 | public: 235 | typedef config::size_type size_type; 236 | typedef config::difference_type difference_type; 237 | 238 | typedef typename detail::randit::reference reference; 239 | typedef typename detail::randit::const_reference const_reference; 240 | 241 | template 242 | friend rand_iterator operator+(rand_iterator const& a, difference_type i); 243 | 244 | public: 245 | rand_iterator() 246 | : raw_(0) 247 | , pos_(0) 248 | { 249 | } 250 | 251 | explicit rand_iterator(volatile uint8_t* raw) 252 | : raw_(raw) 253 | , pos_(0) 254 | { 255 | } 256 | 257 | rand_iterator(volatile uint8_t* raw, difference_type pos) 258 | : raw_(raw) 259 | , pos_(pos) 260 | { 261 | } 262 | 263 | rand_iterator(rand_iterator const& rhs) 264 | : raw_(rhs.raw_) 265 | , pos_(rhs.pos_) 266 | { 267 | } 268 | 269 | ~rand_iterator() 270 | { 271 | } 272 | 273 | rand_iterator& operator=(rand_iterator const& rhs) 274 | { 275 | if (&rhs != this) 276 | { 277 | raw_ = rhs.raw_; 278 | pos_ = rhs.pos_; 279 | } 280 | 281 | return *this; 282 | } 283 | 284 | reference operator[](difference_type n) 285 | { 286 | reference result; 287 | 288 | config::size_type stride = sizeof(T); 289 | 290 | result.raw = raw_; 291 | result.index = pos_ + n; 292 | 293 | // std::memcpy( 294 | // &result.value, 295 | // (uint8_t const*)(&raw_[n * stride]), 296 | // sizeof(T) 297 | // ); 298 | for (int i = 0; i < stride; ++i) 299 | { 300 | #pragma HLS PIPELINE 301 | T val = raw_[(pos_ + n) * stride + i] << (i * 8); 302 | result.value += val; 303 | } 304 | 305 | return result; 306 | } 307 | 308 | const_reference operator[](difference_type n) const 309 | { 310 | reference result; 311 | 312 | config::size_type stride = sizeof(T); 313 | 314 | result.index = pos_ + n; 315 | result.raw = raw_; 316 | 317 | // std::memcpy( 318 | // &result.value, 319 | // (uint8_t const*)(&raw_[n * stride]), 320 | // sizeof(T) 321 | // ); 322 | for (int i = 0; i < stride; ++i) 323 | { 324 | #pragma HLS PIPELINE 325 | T val = raw_[(pos_ + n) * stride + i] << (i * 8); 326 | result.value += val; 327 | } 328 | 329 | return result; 330 | } 331 | 332 | reference operator*() 333 | { 334 | return operator[](0); 335 | } 336 | 337 | const_reference operator*() const 338 | { 339 | return operator[](0); 340 | } 341 | 342 | rand_iterator& operator++() 343 | { 344 | pos_ += 1; 345 | return *this; 346 | } 347 | 348 | rand_iterator& operator--() 349 | { 350 | pos_ -= 1; 351 | return *this; 352 | } 353 | 354 | rand_iterator operator++(int) 355 | { 356 | rand_iterator old = *this; 357 | this->operator++(); 358 | return old; 359 | } 360 | 361 | volatile difference_type& pos() 362 | { 363 | return pos_; 364 | } 365 | 366 | volatile difference_type const& pos() const 367 | { 368 | return pos_; 369 | } 370 | 371 | volatile uint8_t* data() 372 | { 373 | return raw_; 374 | } 375 | 376 | volatile uint8_t const* data() const 377 | { 378 | return raw_; 379 | } 380 | 381 | void swap(rand_iterator rhs) 382 | { 383 | (*this).swap(*rhs); 384 | } 385 | 386 | private: 387 | volatile uint8_t* raw_; 388 | volatile difference_type pos_; 389 | 390 | }; 391 | 392 | 393 | template 394 | bool operator==(rand_iterator a, rand_iterator b) 395 | { 396 | //TODO: store some hash to avoid pointer comparison 397 | return /*a.data() == b.data() &&*/ a.pos() == b.pos(); 398 | } 399 | 400 | template 401 | bool operator!=(rand_iterator a, rand_iterator b) 402 | { 403 | //TODO: store some hash to avoid pointer comparison 404 | return /*a.data() != b.data() ||*/ a.pos() != b.pos(); 405 | } 406 | 407 | template 408 | rand_iterator operator+(rand_iterator const& a, typename rand_iterator::difference_type n) 409 | { 410 | rand_iterator result(a); 411 | result.pos() = a.pos() + n; 412 | return result; 413 | } 414 | 415 | template 416 | rand_iterator operator-(rand_iterator const& a, typename rand_iterator::difference_type n) 417 | { 418 | rand_iterator result(a); 419 | result.pos() = a.pos() - n; 420 | return result; 421 | } 422 | 423 | template 424 | typename rand_iterator::difference_type operator-(rand_iterator a, rand_iterator b) 425 | { 426 | return a.pos() - b.pos(); 427 | } 428 | 429 | template 430 | rand_iterator& operator+=(rand_iterator& ptr, typename rand_iterator::difference_type n) 431 | { 432 | ptr = ptr + n; 433 | return ptr; 434 | } 435 | 436 | //template 437 | //static_pointer& operator-=( 438 | // static_pointer& ptr, 439 | // typename static_pointer::difference_type n 440 | // ) 441 | //{ 442 | // ptr = ptr - n; 443 | // return ptr; 444 | //} 445 | 446 | } // namespace burst 447 | 448 | 449 | namespace std 450 | { 451 | 452 | template 453 | struct iterator_traits > 454 | { 455 | typedef T value_type; 456 | typedef typename burst::rand_iterator::iterator_category iterator_category; 457 | typedef typename burst::rand_iterator::reference reference; 458 | typedef typename burst::rand_iterator::const_reference const_reference; 459 | typedef typename burst::rand_iterator::difference_type difference_type; 460 | }; 461 | 462 | 463 | template 464 | void swap(burst::detail::randit::reference a, burst::detail::randit::reference b) 465 | { 466 | burst::detail::randit::swap(a, b); 467 | } 468 | 469 | } // namespace std 470 | -------------------------------------------------------------------------------- /include/burst/simd/emmintrin.h: -------------------------------------------------------------------------------- 1 | // This file is distributed under the MIT license. 2 | // See the LICENSE file for details. 3 | 4 | #pragma once 5 | 6 | #include 7 | #include 8 | 9 | #include "detail/vec_types.h" 10 | 11 | 12 | //------------------------------------------------------------------------------------------------- 13 | // Types 14 | // 15 | 16 | typedef detail::int32x2_t __m64; 17 | typedef detail::int64x2_t __m128i; 18 | 19 | 20 | //------------------------------------------------------------------------------------------------- 21 | // Load / store 22 | // 23 | 24 | FORCEINLINE __m128i _mm_load_si128(__m128i const* mem_addr) 25 | { 26 | return *mem_addr; 27 | } 28 | 29 | FORCEINLINE void _mm_store_si128(__m128i* mem_addr, __m128i const& a) 30 | { 31 | mem_addr[0] = a; 32 | } 33 | 34 | 35 | //------------------------------------------------------------------------------------------------- 36 | // Set 37 | // 38 | 39 | FORCEINLINE __m128i _mm_set1_epi8(int8_t a) 40 | { 41 | detail::int8x16_t tmp; 42 | for (int i = 0; i < 16; ++i) 43 | { 44 | tmp.value[i] = a; 45 | } 46 | return detail::convert<__m128i>(tmp); 47 | } 48 | 49 | FORCEINLINE __m128i _mm_set1_epi16(int16_t a) 50 | { 51 | detail::int16x8_t tmp; 52 | for (int i = 0; i < 8; ++i) 53 | { 54 | tmp.value[i] = a; 55 | } 56 | return detail::convert<__m128i>(tmp); 57 | } 58 | 59 | FORCEINLINE __m128i _mm_set1_epi32(int32_t a) 60 | { 61 | detail::int32x4_t tmp; 62 | for (int i = 0; i < 4; ++i) 63 | { 64 | tmp.value[i] = a; 65 | } 66 | return detail::convert<__m128i>(tmp); 67 | } 68 | 69 | //FORCEINLINE __m128i _mm_set1_epi64(__m64 const& a) 70 | //{ 71 | // detail::int64x2_t tmp; 72 | // for (int i = 0; i < 2; ++i) 73 | // { 74 | // tmp.value[i] = a; 75 | // } 76 | // return detail::convert<__m128i>(tmp); 77 | //} 78 | 79 | FORCEINLINE __m128i _mm_set_epi8( 80 | int8_t a16, int8_t a15, int8_t a14, int8_t a13, 81 | int8_t a12, int8_t a11, int8_t a10, int8_t a9, 82 | int8_t a8, int8_t a7, int8_t a6, int8_t a5, 83 | int8_t a4, int8_t a3, int8_t a2, int8_t a1 84 | ) 85 | { 86 | detail::int8x16_t tmp = { a1, a2, a3, a4, a5, a6, a7, a8, a9, a10, a11, a12, a13, a14, a15, a16 }; 87 | return detail::convert<__m128i>(tmp); 88 | } 89 | 90 | FORCEINLINE __m128i _mm_set_epi16( 91 | int16_t a8, int16_t a7, int16_t a6, int16_t a5, 92 | int16_t a4, int16_t a3, int16_t a2, int16_t a1 93 | ) 94 | { 95 | detail::int16x8_t tmp = { a1, a2, a3, a4, a5, a6, a7, a8 }; 96 | return detail::convert<__m128i>(tmp); 97 | } 98 | 99 | FORCEINLINE __m128i _mm_set_epi32(int32_t a4, int32_t a3, int32_t a2, int32_t a1) 100 | { 101 | detail::int32x4_t tmp = { a1, a2, a3, a4 }; 102 | return detail::convert<__m128i>(tmp); 103 | } 104 | 105 | //FORCEINLINE __m128i _mm_set_epi64(__m64 const& a2, __m64 const& a1) 106 | //{ 107 | // detail::int64x2_t tmp = { a1, a2 }; 108 | // return detail::convert<__m128i>(tmp); 109 | //} 110 | 111 | 112 | //------------------------------------------------------------------------------------------------- 113 | // Basic arithmetic 114 | // 115 | 116 | FORCEINLINE __m128i _mm_add_epi8(__m128i const& a, __m128i const& b) 117 | { 118 | auto a8 = detail::convert(a); 119 | auto b8 = detail::convert(b); 120 | detail::int8x16_t c8; 121 | 122 | for (int i = 0; i < 16; ++i) 123 | { 124 | c8.value[i] = a8.value[i] + b8.value[i]; 125 | } 126 | 127 | return detail::convert<__m128i>(c8); 128 | } 129 | 130 | FORCEINLINE __m128i _mm_add_epi16(__m128i const& a, __m128i const& b) 131 | { 132 | auto a16 = detail::convert(a); 133 | auto b16 = detail::convert(b); 134 | detail::int16x8_t c16; 135 | 136 | for (int i = 0; i < 8; ++i) 137 | { 138 | c16.value[i] = a16.value[i] + b16.value[i]; 139 | } 140 | 141 | return detail::convert<__m128i>(c16); 142 | } 143 | 144 | FORCEINLINE __m128i _mm_add_epi32(__m128i const& a, __m128i const& b) 145 | { 146 | auto a32 = detail::convert(a); 147 | auto b32 = detail::convert(b); 148 | detail::int32x4_t c32; 149 | 150 | for (int i = 0; i < 4; ++i) 151 | { 152 | c32.value[i] = a32.value[i] + b32.value[i]; 153 | } 154 | 155 | return detail::convert<__m128i>(c32); 156 | } 157 | 158 | //FORCEINLINE __m128i _mm_add_epi64(__m128i const& a, __m128i const& b) 159 | //{ 160 | // auto a64 = detail::convert(a); 161 | // auto b64 = detail::convert(b); 162 | // detail::int64x2_t c64; 163 | // 164 | // for (int i = 0; i < 2; ++i) 165 | // { 166 | // c64.value[i] = a64.value[i] + b64.value[i]; 167 | // } 168 | // 169 | // return detail::convert<__m128i>(c64); 170 | //} 171 | 172 | 173 | //------------------------------------------------------------------------------------------------- 174 | // Saturated add/sub 175 | // 176 | 177 | FORCEINLINE __m128i _mm_adds_epi8(__m128i const& a, __m128i const& b) 178 | { 179 | auto a8 = detail::convert(a); 180 | auto b8 = detail::convert(b); 181 | detail::int8x16_t c8; 182 | 183 | for (int i = 0; i < 16; ++i) 184 | { 185 | c8.value[i] = detail::signed_saturated_add(a8.value[i], b8.value[i]); 186 | } 187 | 188 | return detail::convert<__m128i>(c8); 189 | } 190 | 191 | FORCEINLINE __m128i _mm_adds_epi16(__m128i const& a, __m128i const& b) 192 | { 193 | auto a16 = detail::convert(a); 194 | auto b16 = detail::convert(b); 195 | detail::int16x8_t c16; 196 | 197 | for (int i = 0; i < 8; ++i) 198 | { 199 | c16.value[i] = detail::signed_saturated_add(a16.value[i], b16.value[i]); 200 | } 201 | 202 | return detail::convert<__m128i>(c16); 203 | } 204 | 205 | FORCEINLINE __m128i _mm_adds_epi32(__m128i const& a, __m128i const& b) 206 | { 207 | auto a32 = detail::convert(a); 208 | auto b32 = detail::convert(b); 209 | detail::int32x4_t c32; 210 | 211 | for (int i = 0; i < 4; ++i) 212 | { 213 | c32.value[i] = detail::signed_saturated_add(a32.value[i], b32.value[i]); 214 | } 215 | 216 | return detail::convert<__m128i>(c32); 217 | } 218 | 219 | FORCEINLINE __m128i _mm_adds_epu8(__m128i const& a, __m128i const& b) 220 | { 221 | auto a8 = detail::convert(a); 222 | auto b8 = detail::convert(b); 223 | detail::uint8x16_t c8; 224 | 225 | for (int i = 0; i < 16; ++i) 226 | { 227 | c8.value[i] = detail::unsigned_saturated_add(a8.value[i], b8.value[i]); 228 | } 229 | 230 | return detail::convert<__m128i>(c8); 231 | } 232 | 233 | FORCEINLINE __m128i _mm_adds_epu16(__m128i const& a, __m128i const& b) 234 | { 235 | auto a16 = detail::convert(a); 236 | auto b16 = detail::convert(b); 237 | detail::uint16x8_t c16; 238 | 239 | for (int i = 0; i < 8; ++i) 240 | { 241 | c16.value[i] = detail::unsigned_saturated_add(a16.value[i], b16.value[i]); 242 | } 243 | 244 | return detail::convert<__m128i>(c16); 245 | } 246 | 247 | FORCEINLINE __m128i _mm_subs_epi8(__m128i const& a, __m128i const& b) 248 | { 249 | auto a8 = detail::convert(a); 250 | auto b8 = detail::convert(b); 251 | detail::int8x16_t c8; 252 | 253 | for (int i = 0; i < 16; ++i) 254 | { 255 | c8.value[i] = detail::signed_saturated_sub(a8.value[i], b8.value[i]); 256 | } 257 | 258 | return detail::convert<__m128i>(c8); 259 | } 260 | 261 | FORCEINLINE __m128i _mm_subs_epi16(__m128i const& a, __m128i const& b) 262 | { 263 | auto a16 = detail::convert(a); 264 | auto b16 = detail::convert(b); 265 | detail::int16x8_t c16; 266 | 267 | for (int i = 0; i < 8; ++i) 268 | { 269 | c16.value[i] = detail::signed_saturated_sub(a16.value[i], b16.value[i]); 270 | } 271 | 272 | return detail::convert<__m128i>(c16); 273 | } 274 | 275 | FORCEINLINE __m128i _mm_subs_epi32(__m128i const& a, __m128i const& b) 276 | { 277 | auto a32 = detail::convert(a); 278 | auto b32 = detail::convert(b); 279 | detail::int32x4_t c32; 280 | 281 | for (int i = 0; i < 4; ++i) 282 | { 283 | c32.value[i] = detail::signed_saturated_sub(a32.value[i], b32.value[i]); 284 | } 285 | 286 | return detail::convert<__m128i>(c32); 287 | } 288 | 289 | FORCEINLINE __m128i _mm_subs_epu8(__m128i const& a, __m128i const& b) 290 | { 291 | auto a8 = detail::convert(a); 292 | auto b8 = detail::convert(b); 293 | detail::uint8x16_t c8; 294 | 295 | for (int i = 0; i < 16; ++i) 296 | { 297 | c8.value[i] = detail::unsigned_saturated_sub(a8.value[i], b8.value[i]); 298 | } 299 | 300 | return detail::convert<__m128i>(c8); 301 | } 302 | 303 | FORCEINLINE __m128i _mm_subs_epu16(__m128i const& a, __m128i const& b) 304 | { 305 | auto a16 = detail::convert(a); 306 | auto b16 = detail::convert(b); 307 | detail::uint16x8_t c16; 308 | 309 | for (int i = 0; i < 8; ++i) 310 | { 311 | c16.value[i] = detail::unsigned_saturated_sub(a16.value[i], b16.value[i]); 312 | } 313 | 314 | return detail::convert<__m128i>(c16); 315 | } 316 | 317 | 318 | //------------------------------------------------------------------------------------------------- 319 | // Bitwise 320 | // 321 | 322 | FORCEINLINE __m128i _mm_slli_si128(__m128i const& a, int imm) 323 | { 324 | auto a8 = detail::convert(a); 325 | detail::int8x16_t result; 326 | 327 | for (int i = 0; i < 16; ++i) 328 | { 329 | result.value[i] = i - imm >= 0 ? a8.value[i - imm] : 0; 330 | } 331 | 332 | return detail::convert<__m128i>(result); 333 | } 334 | 335 | FORCEINLINE __m128i _mm_srli_si128(__m128i const& a, int imm) 336 | { 337 | auto a8 = detail::convert(a); 338 | detail::int8x16_t result; 339 | 340 | for (int i = 0; i < 16; ++i) 341 | { 342 | result.value[i] = i + imm < 16 ? a8.value[i + imm] : 0; 343 | } 344 | 345 | return detail::convert<__m128i>(result); 346 | } 347 | 348 | 349 | //------------------------------------------------------------------------------------------------- 350 | // Comparisons 351 | // 352 | 353 | FORCEINLINE __m128i _mm_cmpeq_epi8(__m128i const& a, __m128i const& b) 354 | { 355 | auto a8 = detail::convert(a); 356 | auto b8 = detail::convert(b); 357 | detail::int8x16_t c8; 358 | 359 | for (int i = 0; i < 16; ++i) 360 | { 361 | c8.value[i] = a8.value[i] == b8.value[i] ? 0xFF : 0x0; 362 | } 363 | 364 | return detail::convert<__m128i>(c8); 365 | } 366 | 367 | FORCEINLINE __m128i _mm_cmpeq_epi16(__m128i const& a, __m128i const& b) 368 | { 369 | auto a16 = detail::convert(a); 370 | auto b16 = detail::convert(b); 371 | detail::int16x8_t c16; 372 | 373 | for (int i = 0; i < 8; ++i) 374 | { 375 | c16.value[i] = a16.value[i] == b16.value[i] ? 0xFFFF : 0x0; 376 | } 377 | 378 | return detail::convert<__m128i>(c16); 379 | } 380 | 381 | FORCEINLINE __m128i _mm_cmpgt_epi8(__m128i const& a, __m128i const& b) 382 | { 383 | auto a8 = detail::convert(a); 384 | auto b8 = detail::convert(b); 385 | detail::int8x16_t c8; 386 | 387 | for (int i = 0; i < 16; ++i) 388 | { 389 | c8.value[i] = a8.value[i] > b8.value[i] ? 0xFF : 0x0; 390 | } 391 | 392 | return detail::convert<__m128i>(c8); 393 | } 394 | 395 | FORCEINLINE __m128i _mm_cmpgt_epi16(__m128i const& a, __m128i const& b) 396 | { 397 | auto a16 = detail::convert(a); 398 | auto b16 = detail::convert(b); 399 | detail::int16x8_t c16; 400 | 401 | for (int i = 0; i < 8; ++i) 402 | { 403 | c16.value[i] = a16.value[i] > b16.value[i] ? 0xFFFF : 0x0; 404 | } 405 | 406 | return detail::convert<__m128i>(c16); 407 | } 408 | 409 | 410 | //------------------------------------------------------------------------------------------------- 411 | // Compact 412 | // 413 | 414 | int _mm_extract_epi16(__m128i const& a, int imm) 415 | { 416 | assert(imm >= 0 && imm < 8); 417 | auto a16 = detail::convert(a); 418 | return a16.value[imm]; 419 | } 420 | 421 | int _mm_movemask_epi8(__m128i const& a) 422 | { 423 | auto a8 = detail::convert(a); 424 | return 425 | (a8.value[15] >> 7) << 15 | 426 | (a8.value[14] >> 7) << 14 | 427 | (a8.value[13] >> 7) << 13 | 428 | (a8.value[12] >> 7) << 12 | 429 | (a8.value[11] >> 7) << 11 | 430 | (a8.value[10] >> 7) << 10 | 431 | (a8.value[ 9] >> 7) << 9 | 432 | (a8.value[ 8] >> 7) << 8 | 433 | (a8.value[ 7] >> 7) << 7 | 434 | (a8.value[ 6] >> 7) << 6 | 435 | (a8.value[ 5] >> 7) << 5 | 436 | (a8.value[ 4] >> 7) << 4 | 437 | (a8.value[ 3] >> 7) << 3 | 438 | (a8.value[ 2] >> 7) << 2 | 439 | (a8.value[ 1] >> 7) << 1 | 440 | (a8.value[ 0] >> 7); 441 | } 442 | 443 | 444 | //------------------------------------------------------------------------------------------------- 445 | // stdlib-like 446 | // 447 | 448 | FORCEINLINE __m128i _mm_max_epu8(__m128i const& a, __m128i const& b) 449 | { 450 | auto a8 = detail::convert(a); 451 | auto b8 = detail::convert(b); 452 | detail::uint8x16_t c8; 453 | 454 | for (int i = 0; i < 16; ++i) 455 | { 456 | c8.value[i] = a8.value[i] < b8.value[i] 457 | ? b8.value[i] 458 | : a8.value[i] 459 | ; 460 | } 461 | 462 | return detail::convert<__m128i>(c8); 463 | } 464 | 465 | FORCEINLINE __m128i _mm_max_epi16(__m128i const& a, __m128i const& b) 466 | { 467 | auto a16 = detail::convert(a); 468 | auto b16 = detail::convert(b); 469 | detail::int16x8_t c16; 470 | 471 | for (int i = 0; i < 8; ++i) 472 | { 473 | c16.value[i] = a16.value[i] < b16.value[i] 474 | ? b16.value[i] 475 | : a16.value[i] 476 | ; 477 | } 478 | 479 | return detail::convert<__m128i>(c16); 480 | } 481 | --------------------------------------------------------------------------------