├── .gitignore ├── src ├── dynvec.rs ├── benching.rs ├── random.rs ├── dimensional.rs ├── sse.rs ├── avx.rs ├── serialize.rs ├── advanced.rs ├── view.rs ├── insights.rs ├── templatemetamath.rs ├── base.rs ├── layout.rs ├── consts.rs ├── types.rs └── lib.rs ├── hooks └── pre-commit ├── rustfmt.toml ├── Cargo.toml ├── benches └── simd.rs └── README.md /.gitignore: -------------------------------------------------------------------------------- 1 | /target 2 | Cargo.lock 3 | -------------------------------------------------------------------------------- /src/dynvec.rs: -------------------------------------------------------------------------------- 1 | struct DynVec<'a, T> { 2 | inner: &'a [T], 3 | len: usize, 4 | } 5 | -------------------------------------------------------------------------------- /hooks/pre-commit: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | set -e 4 | 5 | cargo fmt -- --check 6 | cargo test 7 | cargo sync-readme --check 8 | -------------------------------------------------------------------------------- /src/benching.rs: -------------------------------------------------------------------------------- 1 | use crate::{ConstIndex, Stupidity, Vector}; 2 | 3 | const S: usize = 250; 4 | pub fn add(a: &Vector, b: &Vector) -> Vector { a + b } 5 | pub fn internal_add(a: &Vector, b: &Vector) -> Vector { 6 | Vector::build_with_fn(|i| { 7 | let a: &f32 = a.i(i); 8 | let b: &f32 = b.i(i); 9 | a + b 10 | }) 11 | } 12 | -------------------------------------------------------------------------------- /src/random.rs: -------------------------------------------------------------------------------- 1 | use crate::{types::Stupidity, Vector}; 2 | use rand::{ 3 | distributions::{Distribution, Standard}, 4 | Rng, 5 | }; 6 | 7 | impl Distribution> for Standard 8 | where 9 | Standard: Distribution, 10 | { 11 | fn sample(&self, rng: &mut R) -> Vector { 12 | Vector::build_with_fn(|_| rng.gen()) 13 | } 14 | } 15 | -------------------------------------------------------------------------------- /rustfmt.toml: -------------------------------------------------------------------------------- 1 | hard_tabs = true 2 | 3 | fn_single_line = true 4 | struct_lit_single_line = true 5 | 6 | imports_granularity = "Crate" 7 | use_field_init_shorthand = true 8 | 9 | overflow_delimited_expr = true 10 | #blank_lines_lower_bound = 1 11 | format_code_in_doc_comments = true 12 | match_block_trailing_comma = true 13 | newline_style = "Unix" 14 | version = "Two" 15 | normalize_doc_attributes = true 16 | unstable_features = true 17 | -------------------------------------------------------------------------------- /src/dimensional.rs: -------------------------------------------------------------------------------- 1 | #![allow(dead_code)] 2 | use core::marker::PhantomData; 3 | 4 | enum One {} 5 | 6 | // would love for this to be an enum, rust issue #32739 7 | struct Multi { 8 | marker: PhantomData, 9 | } 10 | 11 | trait Dimension { 12 | const MUL: usize; 13 | const DIMS: usize; 14 | } 15 | 16 | impl Dimension for One { 17 | const MUL: usize = N; 18 | const DIMS: usize = 1; 19 | } 20 | 21 | impl Dimension for Multi { 22 | const MUL: usize = N * T::MUL; 23 | const DIMS: usize = T::DIMS + 1; 24 | } 25 | 26 | struct Mathable { 27 | // cool, so ice fixed, but by removing functionality. 28 | // I do indeed want to depend on the generic parameter. 29 | // that is kind of the point. 30 | inner: [T; Dim::MUL], 31 | marker: PhantomData, 32 | } 33 | -------------------------------------------------------------------------------- /Cargo.toml: -------------------------------------------------------------------------------- 1 | [package] 2 | name = "optimath" 3 | version = "0.3.1" 4 | authors = ["djugei Add for &'a Vector { 9 | fn add(self, other: Self) -> Vector { 10 | let simd_width = 128 / 8; 11 | let simd_elements = simd_width / size_of::(); 12 | 13 | let simd_self = self.inner.chunks_exact(simd_elements); 14 | let remainder_self = simd_self.remainder(); 15 | 16 | let simd_other = other.inner.chunks_exact(simd_elements); 17 | let remainder_other = simd_other.remainder(); 18 | 19 | let simd = simd_self 20 | .zip(simd_other) 21 | .map(|(s, o)| unsafe { 22 | let s = _mm_loadu_ps(s.as_ptr()); 23 | let o = _mm_loadu_ps(o.as_ptr()); 24 | let res = _mm_add_ps(s, o); 25 | let mut dst = [0.; 4]; 26 | _mm_store_ps(dst.as_mut_ptr(), res); 27 | Vector { inner: dst } 28 | }) 29 | .flatten(); 30 | 31 | let remainder = remainder_self 32 | .iter() 33 | .zip(remainder_other) 34 | .map(|(s, o)| Add::add(s, o)); 35 | 36 | simd.chain(remainder).collect() 37 | } 38 | } 39 | -------------------------------------------------------------------------------- /src/avx.rs: -------------------------------------------------------------------------------- 1 | use crate::Vector; 2 | use core::{ 3 | arch::x86_64::{_mm256_add_ps, _mm256_loadu_ps, _mm256_store_ps}, 4 | mem::size_of, 5 | ops::Add, 6 | }; 7 | 8 | impl<'a, const N: usize> Add for &'a Vector { 9 | fn add(self, other: Self) -> Vector { 10 | let simd_width = 256 / 8; 11 | let simd_elements = simd_width / size_of::(); 12 | 13 | assert_eq!(simd_elements, 8); 14 | 15 | let simd_self = self.inner.chunks_exact(simd_elements); 16 | let remainder_self = simd_self.remainder(); 17 | 18 | let simd_other = other.inner.chunks_exact(simd_elements); 19 | let remainder_other = simd_other.remainder(); 20 | 21 | let simd = simd_self 22 | .zip(simd_other) 23 | .map(|(s, o)| unsafe { 24 | let s = _mm256_loadu_ps(s.as_ptr()); 25 | let o = _mm256_loadu_ps(o.as_ptr()); 26 | let res = _mm256_add_ps(s, o); 27 | let mut dst = [0.; 8]; 28 | _mm256_store_ps(dst.as_mut_ptr(), res); 29 | Vector { inner: dst } 30 | }) 31 | .flatten(); 32 | 33 | let remainder = remainder_self 34 | .iter() 35 | .zip(remainder_other) 36 | .map(|(s, o)| Add::add(s, o)); 37 | 38 | simd.chain(remainder).collect() 39 | } 40 | } 41 | -------------------------------------------------------------------------------- /src/serialize.rs: -------------------------------------------------------------------------------- 1 | use crate::Vector; 2 | use serde::ser::{Serialize, SerializeTuple, Serializer}; 3 | 4 | impl Serialize for Vector 5 | where 6 | T: Serialize, 7 | { 8 | fn serialize(&self, s: S) -> Result { 9 | let mut el = s.serialize_tuple(N)?; 10 | for i in self { 11 | el.serialize_element(i)?; 12 | } 13 | el.end() 14 | } 15 | } 16 | 17 | use core::fmt; 18 | use serde::de::{Deserialize, Deserializer, SeqAccess, Visitor}; 19 | 20 | impl<'de, T, const N: usize> Deserialize<'de> for Vector 21 | where 22 | T: Deserialize<'de>, 23 | { 24 | fn deserialize>(d: D) -> Result { 25 | let visitor = ElementVisitor::(Default::default()); 26 | d.deserialize_tuple(N, visitor) 27 | } 28 | } 29 | 30 | struct ElementVisitor(core::marker::PhantomData); 31 | 32 | impl<'de, T: Deserialize<'de>, const N: usize> Visitor<'de> for ElementVisitor { 33 | type Value = Vector; 34 | fn expecting(&self, formatter: &mut fmt::Formatter) -> fmt::Result { 35 | write!(formatter, "a sequence of {} elements", N) 36 | } 37 | fn visit_seq(self, mut seq: A) -> Result 38 | where 39 | A: SeqAccess<'de>, 40 | { 41 | //fixme: fail softly/by returning a Result 42 | use crate::types::Stupidity; 43 | Ok(Vector::::build_with_fn(|_| { 44 | seq.next_element().unwrap().unwrap() 45 | })) 46 | } 47 | } 48 | 49 | #[test] 50 | fn ser_de_test() { 51 | use crate::Matrix; 52 | use rand::{thread_rng, Rng}; 53 | let mut rng = thread_rng(); 54 | 55 | let matrix: Matrix = rng.gen(); 56 | 57 | let mut buf: [u8; 20 * 40 * 4] = [0; 20 * 40 * 4]; 58 | bincode::serialize_into(buf.as_mut_slice(), &matrix).unwrap(); 59 | 60 | let decoded = bincode::deserialize(&buf[..]).unwrap(); 61 | assert_eq!(matrix, decoded); 62 | } 63 | -------------------------------------------------------------------------------- /src/advanced.rs: -------------------------------------------------------------------------------- 1 | //! non-element wise operations, like dot product and matrix multiplication 2 | //! as such they need to explicitly be called 3 | 4 | use crate::{ 5 | types::{Matrix, Vector}, 6 | view::{TransposedMatrixView, VectorView}, 7 | }; 8 | 9 | impl Matrix { 10 | pub fn transpose(&self) -> TransposedMatrixView<'_, T, N, M> { 11 | TransposedMatrixView { matrix: self } 12 | } 13 | } 14 | 15 | impl<'a, 'b, T: 'a + 'b + Clone + Copy + Default, const M: usize, const N: usize> Matrix 16 | where 17 | &'a T: core::ops::Mul<&'b T, Output = T>, 18 | T: core::iter::Sum, 19 | { 20 | // todo: move into trait so this can be the default implementation, overrideable at another point. 21 | pub fn matrix_multiply( 22 | &'a self, 23 | other: &'b Matrix, 24 | ) -> Matrix { 25 | //todo: do this without default-initalizing 26 | let mut output = Matrix::default(); 27 | if false { 28 | return output; 29 | } 30 | let sel: TransposedMatrixView = self.transpose(); 31 | 32 | for (row, o) in (0..O).zip(other) { 33 | let o: &'b Vector = o; 34 | let col = &mut output[row]; 35 | for (column, s) in (0..M).zip(sel) { 36 | let s: VectorView = s; 37 | let field: &mut T = &mut col[column]; 38 | *field = s.dot(o) 39 | } 40 | } 41 | output 42 | } 43 | } 44 | 45 | impl<'a, 'b, T: 'a + 'b, const M: usize, const N: usize> VectorView<'a, T, M, N> 46 | where 47 | &'a T: core::ops::Mul<&'b T, Output = T>, 48 | T: core::iter::Sum, 49 | { 50 | pub fn dot(self, other: &'b Vector) -> T { (self * other).into_iter().sum() } 51 | } 52 | 53 | impl<'a, 'b, T: 'a + 'b, const M: usize> Vector 54 | where 55 | &'a T: core::ops::Mul<&'b T, Output = T>, 56 | T: core::iter::Sum, 57 | { 58 | pub fn dot(&'a self, other: &'b Vector) -> T { (self * other).into_iter().sum() } 59 | } 60 | 61 | #[test] 62 | fn matrix_multiply() { 63 | use rand::{thread_rng, Rng}; 64 | let mut rng = thread_rng(); 65 | 66 | let a: Matrix = rng.gen(); 67 | let b: Matrix = rng.gen(); 68 | 69 | let _c: Matrix = a.matrix_multiply(&b); 70 | } 71 | -------------------------------------------------------------------------------- /src/view.rs: -------------------------------------------------------------------------------- 1 | //! views on underlying vectors 2 | //! 3 | //! basically move trough the data at different strides and offsets 4 | //! currently only transposed matrices and contained flipped vectors 5 | //! 6 | //! the Index trait sucks hard 7 | use crate::{consts::ConstIterator, types::Matrix}; 8 | 9 | #[derive(Debug)] 10 | pub struct TransposedMatrixView<'a, T, const M: usize, const N: usize> { 11 | pub(crate) matrix: &'a Matrix, 12 | } 13 | 14 | impl<'a, T, const M: usize, const N: usize> Copy for TransposedMatrixView<'a, T, M, N> {} 15 | impl<'a, T, const M: usize, const N: usize> Clone for TransposedMatrixView<'a, T, M, N> { 16 | fn clone(&self) -> Self { *self } 17 | } 18 | 19 | impl<'a, T: 'a + Clone, const M: usize, const N: usize> TransposedMatrixView<'a, T, M, N> { 20 | pub fn materialize(self) -> Matrix { 21 | self.into_iter() 22 | .map(IntoIterator::into_iter) 23 | .map(Iterator::cloned) 24 | .map(Iterator::collect) 25 | .collect() 26 | } 27 | } 28 | 29 | impl<'a, T, const M: usize, const N: usize> IntoIterator for TransposedMatrixView<'a, T, M, N> { 30 | type Item = VectorView<'a, T, M, N>; 31 | type IntoIter = ConstIterator; 32 | 33 | fn into_iter(self) -> Self::IntoIter { self.into() } 34 | } 35 | 36 | #[derive(Debug)] 37 | pub struct VectorView<'a, T, const M: usize, const N: usize> { 38 | pub(crate) row: usize, 39 | pub(crate) matrix: &'a Matrix, 40 | } 41 | 42 | impl<'a, T, const M: usize, const N: usize> Copy for VectorView<'a, T, M, N> {} 43 | impl<'a, T, const M: usize, const N: usize> Clone for VectorView<'a, T, M, N> { 44 | fn clone(&self) -> Self { *self } 45 | } 46 | 47 | impl<'a, T, const M: usize, const N: usize> IntoIterator for VectorView<'a, T, M, N> { 48 | type Item = &'a T; 49 | type IntoIter = ConstIterator<&'a T, Self, M>; 50 | 51 | fn into_iter(self) -> Self::IntoIter { self.into() } 52 | } 53 | 54 | #[test] 55 | fn transpose_bounds() { 56 | extern crate std; 57 | use std::println; 58 | let a: Matrix = (0..3) 59 | .map(|r| (0..2).map(|e| (e + (10 * r)) as f32).collect()) 60 | .collect(); 61 | println!("origin matrix: {:?}", a); 62 | let a2 = a.transpose().materialize().transpose().materialize(); 63 | 64 | assert_eq!(a, a2); 65 | } 66 | -------------------------------------------------------------------------------- /src/insights.rs: -------------------------------------------------------------------------------- 1 | //! Some insight about simd, autovectorization, const generics and specialization gained from building this library 2 | //! 3 | //! i used a lot of experimental parts of rust. this is supposed to give some feedback on how 4 | //! usable i feel they are and where issues remain. This library is very Type Astronaut-y so this 5 | //! feedback is for the bleeding edge and will not represent the average usecase. 6 | //! 7 | //! # SIMD 8 | //! simd is a pain in the butt to deal with. This is true in general and specifically for rust. 9 | //! Luckily autovectorization is quite reliable if you try to play nicely with the compiler 10 | //! 11 | //! packed_simd has more #[doc(hidden)] than i have fingers, especially the core defining trait, 12 | //! SimdArray is private, which is annoying if you want to build a library thats generic over simd. 13 | //! 14 | //! simd is very platform dependent, so you have to choose to either just ask people to 15 | //! "-C target-cpu=native" or you need to provide a way to at runtime switch between 16 | //! implementations. 17 | //! 18 | //! rn you can't build your structs to suit your simd-needs bc compiler-bug/unfinishedness 19 | //! 20 | //! # Autovectorization 21 | //! Autovetcorization actually works quite well, at least in my usecase. the problem is 22 | //! reliability. i can't assure a loop actually gets vectorized. it is kinda similar to tail call 23 | //! optimization. a solution would be to have an attribute that forces optimization or fails the 24 | //! compile. 25 | //! 26 | //! # Const Generics 27 | //! Seem quite cool to me, but severely limited by the inability to build vectors of sizes 28 | //! calculated at compile time. once you can do that some really nice stuff becomes possible. 29 | //! 30 | //! Limitations are that there is no dedicated place to do const calculations on structs as you 31 | //! can't have associated constants on them. that will especially be annoying if const generics 32 | //! need to come from the same "place/expression" to be considered equal. a workaround is to 33 | //! have a wrapper type that does the calculations and an inner type that stores the results. 34 | //! Right now that leads to the inner struct not being buildable, but i think thats the same bug 35 | //! that also stops calculations in array sizes. 36 | //! 37 | //! i feel like there is some higher level concept unexpressed, a type whose size is known, but not 38 | //! nescesarily at compile time, kinda similar to TrustedLen. 39 | //! 40 | //! also, slightly related, its not possible to build a completely different struct with different 41 | //! fields based on generics or const generics. maybe thats a good thing though. 42 | //! 43 | //! # specialization 44 | //! really cool concept, have not had too many issues except some weird type inference bugs on 45 | //! (imo) unrelated places, but im not 100% sure those are due to specialization, could also be one 46 | //! of the other 10 nightly features. 47 | //! 48 | //! whats a bit weird is having a trait that has associated types and methods taking or returning 49 | //! that associated type, both being default. 50 | //! because when writing a default implementation you can't assume anything about the type since it 51 | //! could be overridden independently from the function. this is not clear from the error messages 52 | //! though. a solution here is to constrain the associated type by traits in the trait definition 53 | //! and only rely on trait methods. this does not work for return types though. 54 | //! 55 | //! also specialization feels suspiciously like inheritance 56 | //! 57 | //! # Generic Associated Types (GAT) 58 | //! there are multiple places in the library where i think GATs would have been usefull, mainly to 59 | //! stop combinatorial explosion of diferent vector-types and Add/Sub/Mul between them. The 60 | //! compiler would then on-demand be able to instanciate them into the code instead of me having to 61 | //! macro them. 62 | //! 63 | //! so im looking forward to them being implemented so i can play with them a bit 64 | //! 65 | //! # The Index trait 66 | //! sucks hard, cause it requires you to treturn an actual reference to something stored in the 67 | //! type, can't use it to on the fly generate a view (not 100% true as there is the fat pointer 68 | //! trick but thats not really... documented behaviour). had the Iterator trait made the same 69 | //! decision rust would be in a horrible state right now. 70 | -------------------------------------------------------------------------------- /src/templatemetamath.rs: -------------------------------------------------------------------------------- 1 | //! build calculations in the type system, for better locality of operations 2 | //! 3 | //! instead of calculating the whole result for each step of the operation 4 | //! calculate all steps of the operation for each element of the result. 5 | //! 6 | //! this might lead to less memory bandwidth used, as data gets worked on in one go. 7 | //! might also lead to less cache locality tough, as elements from all inputs are used 8 | //! instead of all elements from one (two). 9 | //! cache locality will be slightly mitigated, as operations will (soon (TM)) run on multiple data 10 | //! at once. 11 | 12 | //todo: add V type that wraps a Vector and implements virtual add/mul/sub/div, just at type to get 13 | //your calculation started 14 | 15 | //todo: add transpose/matrix multiplication, potentially switch from basing this off off ConstIndex 16 | //to ConstIter 17 | 18 | use crate::consts::{ConstIndex, ConstIterator}; 19 | use core::ops::*; 20 | 21 | pub struct VAdd 22 | where 23 | // use Borrow maybe? 24 | // hard to abstract over lifetimes + owned/borrowd 25 | L: ConstIndex + Copy + Clone, 26 | R: ConstIndex + Copy + Clone, 27 | LT: Add, 28 | { 29 | l: L, 30 | r: R, 31 | m: core::marker::PhantomData<(T, LT, RT)>, 32 | } 33 | 34 | impl Copy for VAdd 35 | where 36 | L: ConstIndex + Copy + Clone, 37 | R: ConstIndex + Copy + Clone, 38 | LT: Add, 39 | { 40 | } 41 | 42 | impl Clone for VAdd 43 | where 44 | L: ConstIndex + Copy + Clone, 45 | R: ConstIndex + Copy + Clone, 46 | LT: Add, 47 | { 48 | fn clone(&self) -> Self { *self } 49 | } 50 | 51 | // this is safe because the underlying ConstIndex implementations are guaranteed to be safe 52 | unsafe impl ConstIndex for VAdd 53 | where 54 | L: ConstIndex + Copy + Clone, 55 | R: ConstIndex + Copy + Clone, 56 | LT: Add, 57 | { 58 | #[inline] 59 | fn i(self, index: usize) -> T { 60 | let l = self.l.i(index); 61 | let r = self.r.i(index); 62 | l + r 63 | } 64 | } 65 | 66 | // this restricts other to const-Index to the same type as self 67 | // 68 | // this is not a necessary restriction, but rust type system does not allow for expressing anything 69 | // more generic due to "unconstrained type parameters" 70 | // 71 | // this might be possible once GAT lands, allowing for stuff like (X,Y) + (Z,) = (X, Y, Z) or 72 | // the like. like for example T + &T which is kinda important... 73 | impl Add for VAdd 74 | where 75 | L: ConstIndex + Copy + Clone, 76 | R: ConstIndex + Copy + Clone, 77 | LT: Add, 78 | 79 | O: ConstIndex + Copy + Clone, 80 | T: Add, 81 | { 82 | type Output = VAdd; 83 | fn add(self, other: O) -> Self::Output { 84 | VAdd { 85 | l: self, 86 | r: other, 87 | m: Default::default(), 88 | } 89 | } 90 | } 91 | 92 | /* 93 | // can't even specialize for vector, cause "downstream crates may implement ConstIndex 94 | // except im already implementing that in this crate... 95 | use crate::Vector; 96 | impl<'o, T, L, R, LT, RT, NT, const N: usize> Add<&'o Vector> for VAdd 97 | where 98 | L: ConstIndex + Copy + Clone, 99 | R: ConstIndex + Copy + Clone, 100 | LT: Add, 101 | 102 | T: Add<&'o T, Output = NT>, 103 | { 104 | type Output = VAdd, T, T, N>; 105 | fn add(self, other: &'o Vector) -> Self::Output { 106 | VAdd { 107 | l: self, 108 | r: other, 109 | m: Default::default(), 110 | } 111 | } 112 | } 113 | */ 114 | // 115 | impl VAdd 116 | where 117 | L: ConstIndex + Copy + Clone, 118 | R: ConstIndex + Copy + Clone, 119 | LT: Add, 120 | { 121 | pub fn new(l: L, r: R) -> Self { 122 | Self { 123 | l, 124 | r, 125 | m: Default::default(), 126 | } 127 | } 128 | 129 | pub fn realize(self) -> crate::Vector { ConstIterator::from(self).collect() } 130 | } 131 | 132 | #[cfg(test)] 133 | pub(crate) const TESTLEN: usize = 777usize; 134 | 135 | #[test] 136 | fn calc_chain() { 137 | use crate::Vector; 138 | use rand::{thread_rng, Rng}; 139 | let mut rng = thread_rng(); 140 | let a: Vector = rng.gen(); 141 | let b: Vector = rng.gen(); 142 | let c: Vector = rng.gen(); 143 | let d: Vector = rng.gen(); 144 | let e: Vector = rng.gen(); 145 | 146 | let ab = VAdd::new(a, b); 147 | let abc = ab + c; 148 | let abcd = abc + d; 149 | let abcde = abcd + e; 150 | 151 | let _res = abcde.realize(); 152 | } 153 | -------------------------------------------------------------------------------- /benches/simd.rs: -------------------------------------------------------------------------------- 1 | use core::ops::Add; 2 | use criterion::{black_box, criterion_group, criterion_main, Criterion}; 3 | use optimath::Vector; 4 | use rand::{thread_rng, Rng}; 5 | 6 | #[derive(Copy, Clone)] 7 | #[repr(transparent)] 8 | struct Ff32(f32); 9 | 10 | impl Add for Ff32 { 11 | type Output = Ff32; 12 | fn add(self, other: Self) -> Self { Ff32(self.0 + other.0) } 13 | } 14 | 15 | impl<'a, 'b> Add<&'b Ff32> for &'a Ff32 { 16 | type Output = Ff32; 17 | fn add(self, other: &'b Ff32) -> Ff32 { Ff32(self.0 + other.0) } 18 | } 19 | 20 | const TESTLEN: usize = 250; 21 | 22 | pub fn add(c: &mut Criterion) { 23 | let mut rng = thread_rng(); 24 | let a: Vector = rng.gen(); 25 | let b: Vector = rng.gen(); 26 | 27 | // there seem to be two "modes" hit on optimization 28 | // currently "f32 simd" and "f32 inline" hit the good one 29 | // and "f32 noabstract" and "f32 scalar" hit the bad one 30 | // good one takes 180ns, bad one 320ns 31 | // can't inspect the asm of this specific benchmark though as the compiler just locks up 32 | // and eats 30 GB ram. 33 | // asm of benching::add and benching::internal_add seem identical and very well vectorized 34 | // so im assuming this a benchmarking problem 35 | let mut group = c.benchmark_group("addition"); 36 | group.warm_up_time(core::time::Duration::from_millis(200)); 37 | group.measurement_time(core::time::Duration::from_secs(2)); 38 | group.sample_size(250); 39 | 40 | group.bench_function("f32 inline", |bench| bench.iter(|| &a + &b)); 41 | group.bench_function("f32 simd", |bench| { 42 | bench.iter(|| optimath::benching::add(&a, &b)) 43 | }); 44 | 45 | group.bench_function("f32 noabstract", |bench| { 46 | bench.iter(|| black_box(optimath::benching::internal_add(&a, &b))) 47 | }); 48 | 49 | let a: Vector = rng.gen(); 50 | let b: Vector = rng.gen(); 51 | let a: Vector = a.into_iter().map(|f: f32| Ff32(f)).collect(); 52 | let b: Vector = b.into_iter().map(|f: f32| Ff32(f)).collect(); 53 | 54 | group.bench_function("f32 scalar", |bench| bench.iter(|| black_box(&a + &b))); 55 | } 56 | 57 | pub fn mul(c: &mut Criterion) { 58 | let mut rng = thread_rng(); 59 | let mut group = c.benchmark_group("sizes"); 60 | group.warm_up_time(core::time::Duration::from_millis(200)); 61 | group.measurement_time(core::time::Duration::from_secs(1)); 62 | group.sample_size(500); 63 | 64 | let a: Vector = rng.gen(); 65 | let b: Vector = rng.gen(); 66 | group.bench_function("u8", |bench| bench.iter(|| black_box(&a * &b))); 67 | group.bench_function("u8 noabstract", |bench| { 68 | bench.iter(|| { 69 | for (a, b) in a.into_iter().zip(b) { 70 | black_box(a * b); 71 | } 72 | }) 73 | }); 74 | 75 | let a: Vector = rng.gen(); 76 | let b: Vector = rng.gen(); 77 | group.bench_function("u16", |bench| bench.iter(|| black_box(&a * &b))); 78 | group.bench_function("u16 noabstract", |bench| { 79 | bench.iter(|| { 80 | for (a, b) in a.into_iter().zip(b) { 81 | black_box(a * b); 82 | } 83 | }) 84 | }); 85 | 86 | let a: Vector = rng.gen(); 87 | let b: Vector = rng.gen(); 88 | group.bench_function("u32", |bench| bench.iter(|| black_box(&a * &b))); 89 | group.bench_function("u32 noabstract", |bench| { 90 | bench.iter(|| { 91 | for (a, b) in a.into_iter().zip(b) { 92 | black_box(a + b); 93 | } 94 | }) 95 | }); 96 | 97 | let a: Vector = rng.gen(); 98 | let b: Vector = rng.gen(); 99 | group.bench_function("u64", |bench| bench.iter(|| black_box(&a * &b))); 100 | group.bench_function("u64 noabstract", |bench| { 101 | bench.iter(|| { 102 | for (a, b) in a.into_iter().zip(b) { 103 | black_box(a + b); 104 | } 105 | }) 106 | }); 107 | 108 | let a: Vector = rng.gen(); 109 | let b: Vector = rng.gen(); 110 | group.bench_function("u128", |bench| bench.iter(|| black_box(&a * &b))); 111 | group.bench_function("u128 noabstract", |bench| { 112 | bench.iter(|| { 113 | for (a, b) in a.into_iter().zip(b) { 114 | black_box(a + b); 115 | } 116 | }) 117 | }); 118 | } 119 | 120 | const BIG: usize = 40_001; 121 | 122 | pub fn create(c: &mut Criterion) { 123 | use core::mem::MaybeUninit; 124 | let mut group = c.benchmark_group("create"); 125 | group.warm_up_time(core::time::Duration::from_millis(200)); 126 | group.measurement_time(core::time::Duration::from_secs(2)); 127 | group.sample_size(250); 128 | 129 | group.bench_function("uninit", |bench| { 130 | bench.iter(|| { 131 | black_box({ 132 | let b: MaybeUninit<[f32; BIG]> = MaybeUninit::uninit(); 133 | b 134 | }) 135 | }) 136 | }); 137 | 138 | group.bench_function("write", |bench| { 139 | bench.iter(|| { 140 | black_box({ 141 | let mut b: MaybeUninit<[f32; BIG]> = MaybeUninit::uninit(); 142 | let b_ptr = b.as_mut_ptr() as *mut f32; 143 | for i in 0..BIG { 144 | unsafe { 145 | b_ptr.add(i).write(0.); 146 | } 147 | } 148 | unsafe { b.assume_init() } 149 | }) 150 | }) 151 | }); 152 | } 153 | 154 | criterion_group!(sse3, add, mul, create); 155 | criterion_main!(sse3); 156 | -------------------------------------------------------------------------------- /src/base.rs: -------------------------------------------------------------------------------- 1 | use crate::{consts::ConstIndex, types::Vector}; 2 | use core::ops::{Add, AddAssign, Div, DivAssign, Mul, MulAssign, Sub, SubAssign}; 3 | 4 | // reference operations 5 | // need to have the Output = T on the Add for &T, otherwise you get infinite recursion 6 | 7 | // im not good with macro hygene, but foring the size to be called N and the type to be called T 8 | // seems wrong to me and like its breaking hygene.. 9 | macro_rules! impl_op { 10 | ( $op:tt, $fn:ident, $basetype:ty, $( $generics:tt), *; $( $cons:tt $constype:ty ), * ) => { 11 | impl<'a, 'b, $( $generics), *, B: 'b, $( const $cons : $constype), *> $op for $basetype 12 | where 13 | &'a T: $op<&'b T, Output = T>, 14 | B: ConstIndex<&'b T, N> + Copy + Clone, 15 | T: 'a + 'b, 16 | { 17 | type Output = Vector; 18 | default fn $fn(self, other: B) -> Vector { 19 | self.into_iter() 20 | .enumerate() 21 | .map(|(i, s)| $op::$fn(s, other.i(i))) 22 | .collect() 23 | } 24 | } 25 | }; 26 | } 27 | 28 | macro_rules! maths { 29 | ( $basetype:ty, $( $generics:tt), *; $( const $cons:tt : $constype:ty ), * ) => { 30 | impl_op!(Add, add, $basetype, $( $generics), *; $( $cons $constype ), * ); 31 | impl_op!(Sub, sub, $basetype, $( $generics), *; $( $cons $constype ), * ); 32 | impl_op!(Mul, mul, $basetype, $( $generics), *; $( $cons $constype ), * ); 33 | impl_op!(Div, div, $basetype, $( $generics), *; $( $cons $constype ), * ); 34 | }; 35 | } 36 | 37 | // assigning operations 38 | macro_rules! impl_assign_op { 39 | ( $op:tt, $fn:ident, $basetype:ty, $( $generics:tt), *; $( $cons:tt $constype:ty ), * ) => { 40 | impl<'a, $( $generics), *, $( const $cons : $constype), *> $op <&'a $basetype> for $basetype 41 | where 42 | T: $op<&'a T>, 43 | { 44 | fn $fn(&mut self, other: &'a $basetype) { 45 | let iter = self.inner.iter_mut().zip(other); 46 | for (s, o) in iter { 47 | $op::$fn(s, o); 48 | } 49 | } 50 | } 51 | 52 | impl<'a, 'b, $( $generics), *, B: 'b, $( const $cons : $constype), *> $op for $basetype 53 | where 54 | T: $op<&'b T>, 55 | B: ConstIndex<&'b T, N> + Copy + Clone, 56 | T: 'a + 'b, 57 | { 58 | default fn $fn(&mut self, other: B) { 59 | use crate::consts::ConstIteratorMut; 60 | let iter = ConstIteratorMut::from(self); 61 | for (i, s) in iter.enumerate() { 62 | $op::$fn(s, other.i(i)); 63 | } 64 | } 65 | } 66 | }; 67 | } 68 | 69 | macro_rules! assign_maths { 70 | ( $basetype:ty, $( $generics:tt), *; $( const $cons:tt : $constype:ty ), * ) => { 71 | impl_assign_op!(AddAssign, add_assign, $basetype, $( $generics), *; $( $cons $constype ), * ); 72 | impl_assign_op!(SubAssign, sub_assign, $basetype, $( $generics), *; $( $cons $constype ), * ); 73 | impl_assign_op!(MulAssign, mul_assign, $basetype, $( $generics), *; $( $cons $constype ), * ); 74 | impl_assign_op!(DivAssign, div_assign, $basetype, $( $generics), *; $( $cons $constype ), * ); 75 | } 76 | } 77 | 78 | maths!(&'a Vector, T; const N: usize); 79 | 80 | use crate::VectorView; 81 | maths!(VectorView<'a, T, N, M>, T; const N: usize, const M: usize); 82 | 83 | assign_maths!(Vector, T; const N: usize); 84 | 85 | #[cfg(test)] 86 | pub(crate) const TESTLEN: usize = 777usize; 87 | 88 | #[test] 89 | fn default_is_default() { 90 | let m = Vector::::default(); 91 | for i in 0..TESTLEN { 92 | assert_eq!(m.inner[i], f32::default()); 93 | } 94 | } 95 | 96 | #[test] 97 | fn operations() { 98 | use rand::{thread_rng, Rng}; 99 | let mut rng = thread_rng(); 100 | let a: Vector = rng.gen(); 101 | let b: Vector = rng.gen(); 102 | 103 | let add = &a + &b; 104 | let sub = &a - &b; 105 | let mul = &a * &b; 106 | let div = &a / &b; 107 | 108 | for i in 0..TESTLEN { 109 | assert_eq!(a.inner[i] + b.inner[i], add.inner[i]); 110 | assert_eq!(a.inner[i] - b.inner[i], sub.inner[i]); 111 | assert_eq!(a.inner[i] * b.inner[i], mul.inner[i]); 112 | assert_eq!(a.inner[i] / b.inner[i], div.inner[i]); 113 | } 114 | } 115 | 116 | #[test] 117 | fn assignment_operations() { 118 | use rand::{thread_rng, Rng}; 119 | let mut rng = thread_rng(); 120 | let a: Vector = rng.gen(); 121 | let b: Vector = rng.gen(); 122 | 123 | let mut add = a.clone(); 124 | add += &b; 125 | 126 | let mut sub = a.clone(); 127 | sub -= &b; 128 | 129 | let mut mul = a.clone(); 130 | mul *= &b; 131 | 132 | let mut div = a.clone(); 133 | div /= &b; 134 | 135 | for i in 0..TESTLEN { 136 | assert_eq!(a.inner[i] + b.inner[i], add.inner[i]); 137 | assert_eq!(a.inner[i] - b.inner[i], sub.inner[i]); 138 | assert_eq!(a.inner[i] * b.inner[i], mul.inner[i]); 139 | assert_eq!(a.inner[i] / b.inner[i], div.inner[i]); 140 | } 141 | } 142 | -------------------------------------------------------------------------------- /src/layout.rs: -------------------------------------------------------------------------------- 1 | use core::{iter::FromIterator, mem::MaybeUninit}; 2 | 3 | /// size_of::() * num == size_of::() 4 | pub trait SimdRepr: Sized { 5 | const NUM: usize; 6 | type Repr; 7 | //todo: automatically build this type, dunnow how tho 8 | //also there is a compiler bug with calculations in array sizes specifically 9 | type Unpacked = [Self; 1]; 10 | } 11 | 12 | // need to find a way to pack&unpack data into/from the iter 13 | // default trait does not rly work it seems :( 14 | // 15 | // maybe i can do a default impl with 1-1 "casts" for normal types 16 | // and specific stuff for stuff with simd impls? probably 17 | pub trait SimdPack: SimdRepr { 18 | fn pack(un: Self::Unpacked) -> Self::Repr; 19 | // fn pack_ref(&self) -> &::Repr; 20 | 21 | // fn unpack(pack: ::Repr) -> Self; 22 | // fn unpack_ref(pack: &::Repr) -> Self; 23 | } 24 | 25 | impl SimdRepr for T { 26 | default type Repr = T; 27 | default const NUM: usize = 1; 28 | default type Unpacked = [Self; 1]; 29 | } 30 | 31 | #[repr(transparent)] 32 | pub struct Vecc { 33 | i: Inner< 34 | T, 35 | { N / ::NUM }, 36 | { N / ::NUM }, 37 | { (N / ::NUM) * N }, 38 | >, 39 | } 40 | 41 | impl FromIterator for Vecc { 42 | /// this is todo!() as any implementation i tried leads to ICE 43 | fn from_iter>(_iter: I) -> Self { 44 | /* ICE, probably can't resolve the type?? 45 | let inner = FromIterator::from_iter(iter); 46 | Vecc { i: inner } 47 | */ 48 | /* 49 | let inner = Inner::< 50 | T, 51 | { N / ::NUM }, 52 | { N / ::NUM }, 53 | { (N / ::NUM) * N }, 54 | >::from_iter(iter); 55 | // can't unify the expressions, so trying to transmute, lul 56 | // still leads to ice tho 57 | let inner = unsafe { core::mem::transmute(inner) }; 58 | Vecc { i: inner } 59 | */ 60 | /* even more ice! yay 61 | let inner = Inner::< 62 | T, 63 | { N / ::NUM }, 64 | { N / ::NUM }, 65 | { (N / ::NUM) * N }, 66 | >::from_iter(iter); 67 | 68 | let inner_ptr = &mut inner as *mut _ as *mut Self; 69 | 70 | unsafe { inner_ptr.read() } 71 | */ 72 | todo!() 73 | } 74 | } 75 | 76 | pub(crate) struct Inner { 77 | base: [[::Repr; ::NUM]; CHUNKS], 78 | spill: [T; SPILL], 79 | } 80 | 81 | impl FromIterator 82 | for Inner 83 | { 84 | default fn from_iter>(iter: I) -> Self { 85 | let mut iter = iter.into_iter(); 86 | 87 | let mut base = MaybeUninit::uninit(); 88 | let mut spill = MaybeUninit::uninit(); 89 | 90 | let base_ptr = &mut base as *mut _ as *mut T; 91 | for offset in 0..IN_BASE { 92 | if let Some(element) = iter.next() { 93 | unsafe { 94 | base_ptr.add(offset).write(element); 95 | } 96 | } else { 97 | panic!("not enough elements"); 98 | } 99 | } 100 | let base = unsafe { base.assume_init() }; 101 | 102 | let spill_ptr = &mut spill as *mut _ as *mut T; 103 | for offset in 0..SPILL { 104 | if let Some(element) = iter.next() { 105 | unsafe { 106 | spill_ptr.add(offset).write(element); 107 | } 108 | } else { 109 | panic!("not enough elements"); 110 | } 111 | } 112 | let spill = unsafe { spill.assume_init() }; 113 | 114 | iter.next().map(|_| panic!("too many elements")); 115 | 116 | Inner { base, spill } 117 | } 118 | } 119 | 120 | #[test] 121 | fn i32_build() { let a: Vecc = (0..77).map(|a| a as f32).collect(); } 122 | 123 | use core::arch::x86_64::{__m128, _mm_loadu_ps}; 124 | 125 | impl SimdRepr for f32 { 126 | const NUM: usize = 4; 127 | type Repr = __m128; 128 | type Unpacked = [Self; 4]; 129 | } 130 | 131 | impl FromIterator 132 | for Inner 133 | { 134 | fn from_iter>(iter: I) -> Self { 135 | let mut iter = iter.into_iter(); 136 | 137 | let mut base = MaybeUninit::uninit(); 138 | let mut spill = MaybeUninit::uninit(); 139 | 140 | let base_ptr = &mut base as *mut _ as *mut __m128; 141 | 142 | for offset in 0..CHUNKS { 143 | let chunk = [ 144 | iter.next().unwrap(), 145 | iter.next().unwrap(), 146 | iter.next().unwrap(), 147 | iter.next().unwrap(), 148 | ]; 149 | unsafe { 150 | let e = _mm_loadu_ps(chunk.as_ptr()); 151 | base_ptr.add(offset).write(e); 152 | } 153 | } 154 | let base = unsafe { base.assume_init() }; 155 | 156 | let spill_ptr = spill.as_mut_ptr() as *mut f32; 157 | for offset in 0..SPILL { 158 | if let Some(element) = iter.next() { 159 | unsafe { 160 | spill_ptr.add(offset).write(element); 161 | } 162 | } else { 163 | panic!("not enough elements"); 164 | } 165 | } 166 | let spill = unsafe { spill.assume_init() }; 167 | 168 | iter.next().map(|_| panic!("too many elements")); 169 | 170 | Inner { base, spill } 171 | } 172 | } 173 | 174 | #[test] 175 | fn f32_build() { let a: Vecc = (0..77).map(|a| a as f32).collect(); } 176 | -------------------------------------------------------------------------------- /src/consts.rs: -------------------------------------------------------------------------------- 1 | //! This module defines traits that can be implemented by all vector representations 2 | //! operations then only need to be defined between each vector representation and this trait(s) 3 | //! instead of between all combinations of vector reperesentation 4 | //! i.e. only N implementations instead of N*N 5 | //! with GAT i could bring that down to 1 6 | 7 | /// implement this on types that can be indexed into that have a size known at compile time 8 | /// 9 | /// mutability: just implement this twice, with &E &mut E as T 10 | /// 11 | /// unsafety: calling .i(x) with x < N must successfully return T 12 | /// 13 | /// unsafety: .i(x) and .i(y) return different objects when x != y. 14 | /// i.e. they do not alias. 15 | pub unsafe trait ConstIndex { 16 | fn i(self, index: usize) -> T; 17 | } 18 | 19 | use crate::Vector; 20 | unsafe impl<'a, T, const N: usize> ConstIndex<&'a T, N> for &'a Vector { 21 | fn i(self, index: usize) -> &'a T { &self.inner[index] } 22 | } 23 | 24 | // lets just hope for the optimizer. only added this for templatemetamath to be usable. 25 | // could think about changing the Add impls so &Vector returning T is accepted 26 | // and only impl ConstIndex for Copy types 27 | unsafe impl<'a, T, const N: usize> ConstIndex for Vector 28 | where 29 | T: Copy, 30 | { 31 | fn i(self, index: usize) -> T { *&self.inner[index] } 32 | } 33 | 34 | unsafe impl<'a, T, const N: usize> ConstIndex<&'a mut T, N> for &'a mut Vector { 35 | fn i(self, index: usize) -> &'a mut T { &mut self.inner[index] } 36 | } 37 | 38 | use crate::VectorView; 39 | unsafe impl<'a, T, const M: usize, const N: usize> ConstIndex<&'a T, M> 40 | for VectorView<'a, T, M, N> 41 | { 42 | fn i(self, index: usize) -> &'a T { 43 | let row = &self.matrix[index]; 44 | &row[self.row] 45 | } 46 | } 47 | 48 | unsafe impl<'a, T, const M: usize, const N: usize> ConstIndex, N> 49 | for crate::TransposedMatrixView<'a, T, M, N> 50 | { 51 | fn i(self, index: usize) -> VectorView<'a, T, M, N> { 52 | debug_assert!(index <= M); 53 | VectorView { 54 | row: index, 55 | matrix: self.matrix, 56 | } 57 | } 58 | } 59 | 60 | pub struct ConstIterator, const N: usize> { 61 | pub(crate) pos: usize, 62 | pub(crate) content: C, 63 | pub(crate) marker: core::marker::PhantomData, 64 | } 65 | 66 | impl + Copy, const N: usize> Iterator for ConstIterator { 67 | type Item = T; 68 | fn next(&mut self) -> Option { 69 | if self.pos < N { 70 | let ret = self.content.i(self.pos); 71 | self.pos += 1; 72 | Some(ret) 73 | } else { 74 | None 75 | } 76 | } 77 | } 78 | 79 | /* im reasonably sure this could be implemented with GAT 80 | * right now it tells me that T and N are unconstrained, because C is not generic over them 81 | * this is exactly what GAT provides 82 | impl IntoIterator for C 83 | where 84 | C: ConstIndex + Copy, 85 | { 86 | type Item = T; 87 | type IntoIter = ConstIterator; 88 | 89 | fn into_iter(self) -> Self::IntoIter { ConstIterator { pos: 0, content: self, marker: Default::default() } 90 | } 91 | */ 92 | 93 | impl From for ConstIterator 94 | where 95 | C: ConstIndex, 96 | { 97 | fn from(content: C) -> Self { 98 | Self { 99 | pos: 0, 100 | content, 101 | marker: Default::default(), 102 | } 103 | } 104 | } 105 | 106 | pub struct ConstIteratorMut<'a, T, C, const N: usize> { 107 | pos: usize, 108 | content: *mut C, 109 | marker: core::marker::PhantomData<&'a mut T>, 110 | } 111 | 112 | impl<'a, T: 'a, C: 'a, const N: usize> Iterator for ConstIteratorMut<'a, T, C, N> 113 | where 114 | &'a mut C: ConstIndex<&'a mut T, N>, 115 | { 116 | type Item = &'a mut T; 117 | fn next(&mut self) -> Option<&'a mut T> { 118 | if self.pos < N { 119 | // this is to work around lifetime issues (but its like legit) 120 | // we can't just do this the direct way with self.content being &'a mut C 121 | // because then content.i(x) would return &'a mut T 122 | // but we need &a mut T, i.e. living as long/not outliving as &mut self 123 | // can't express that concept though cause its an anonymous lifetime 124 | // and changing that would break the iterator api. 125 | // the problem that would be occuring is that calling .next() twice and storing 126 | // the result might return the same reference twice leading to mutable 127 | // aliasing. we can guarantee that not to happen though, because the unsafe 128 | // trait ConstIndex provides the method .i(x) which is guaranteed to not alias for 129 | // different indices. as we increment the index on each iteration we never 130 | // alias. 131 | // 132 | // additionally there are tests that are run with miri just to make sure 133 | let content: &mut C = unsafe { core::mem::transmute(self.content) }; 134 | let ret = content.i(self.pos); 135 | self.pos += 1; 136 | Some(ret) 137 | } else { 138 | None 139 | } 140 | } 141 | } 142 | 143 | impl<'a, T, C: 'a, const N: usize> From<&'a mut C> for ConstIteratorMut<'a, T, C, N> 144 | where 145 | &'a mut C: ConstIndex<&'a mut T, N>, 146 | { 147 | fn from(content: &'a mut C) -> Self { 148 | Self { 149 | pos: 0, 150 | content: content as *mut _, 151 | marker: Default::default(), 152 | } 153 | } 154 | } 155 | 156 | #[test] 157 | fn const_iter() { 158 | use crate::Vector; 159 | use rand::{thread_rng, Rng}; 160 | let mut rng = thread_rng(); 161 | let a: Vector = rng.gen(); 162 | let iter: ConstIterator<&f32, _, 40> = ConstIterator { 163 | pos: 0, 164 | content: &a, 165 | marker: Default::default(), 166 | }; 167 | 168 | let _s: f32 = iter.sum(); 169 | } 170 | -------------------------------------------------------------------------------- /src/types.rs: -------------------------------------------------------------------------------- 1 | //! this module contains some non-math specific methods around Vectors as std-lib arrays are sadly 2 | //! intentionally unusable above the size of 32. When that restriction gets removed most of this 3 | //! module gets obsolete 4 | use core::{ 5 | iter::{FromIterator, IntoIterator}, 6 | mem::MaybeUninit, 7 | ops::*, 8 | }; 9 | 10 | /// a const-sized vector of elements, supports all math operations that T does on an 11 | /// element-by-element basis. 12 | /// 13 | /// can be iterated over using [.into_iter()](#method.into_iter) on Vector or &Vector 14 | /// can be constructed from iterators using collect(). 15 | /// 16 | /// is repr(align(16)) for simd 17 | #[repr(align(16))] // todo: choose alignment based on simd-width 18 | #[derive(Copy, Clone)] 19 | pub struct Vector { 20 | pub(crate) inner: [T; N], 21 | } 22 | 23 | /// Matrix is just a type alias for Vector. 24 | /// 25 | /// Supports some matrix specific maths operations, namely matrix multiplication and transpose 26 | /// 27 | /// A Vector>> can also be considered a matrix and as such has those operations 28 | /// defined too. 29 | pub type Matrix = Vector, N>; 30 | 31 | impl Vector { 32 | pub(crate) fn uninit_inner() -> MaybeUninit<[T; N]> { MaybeUninit::uninit() } 33 | pub fn ascend(self) -> Vector { Vector { inner: [self] } } 34 | } 35 | 36 | /// now you might be asking: hey djugei, why isn't this function just implemented directly on 37 | /// Vector? 38 | /// 39 | /// and that is a very good question! 40 | /// well the answer is that for some reason if i copy this exact code into the types impl and 41 | /// delete the trait i get an infinite recursion error during compilation that i can't explain, 42 | /// feels a lot like spooky action at a distance and that i would consider a compiler bug 43 | pub trait Stupidity { 44 | //todo: maybe add a try_build function 45 | fn build_with_fn T>(f: F) -> Self; 46 | } 47 | impl Stupidity for Vector { 48 | fn build_with_fn T>(mut f: F) -> Self { 49 | let mut inner = Self::uninit_inner(); 50 | let base = inner.as_mut_ptr() as *mut T; 51 | for offset in 0..N { 52 | let element = f(offset); 53 | unsafe { 54 | // can't overshoot cause N is const 55 | base.add(offset).write(element); 56 | } 57 | } 58 | 59 | // has to be initialized at this point because all N elements have been visited 60 | let inner = unsafe { inner.assume_init() }; 61 | Self { inner } 62 | } 63 | } 64 | 65 | impl Index for Vector { 66 | type Output = T; 67 | fn index(&self, index: usize) -> &T { &self.inner[index] } 68 | } 69 | 70 | impl IndexMut for Vector { 71 | fn index_mut(&mut self, index: usize) -> &mut T { &mut self.inner[index] } 72 | } 73 | 74 | impl FromIterator for Vector { 75 | fn from_iter>(iter: I) -> Self { 76 | let mut iter = iter.into_iter(); 77 | Self::build_with_fn(|_| iter.next().unwrap()) 78 | } 79 | } 80 | 81 | // iter stuff just required cause impls on arrays are limited to 32 elements (for no reason) 82 | pub struct IntoIter { 83 | pos: usize, 84 | data: [MaybeUninit; N], 85 | } 86 | 87 | impl IntoIter { 88 | // fixme: this is probably rly slow cause of all the copies 89 | // seems to be optimized out though 90 | fn new(vector: Vector) -> Self { 91 | let data = unsafe { 92 | let data = 93 | core::ptr::read(&vector.inner as *const [T; N] as *const [MaybeUninit; N]); 94 | core::mem::forget(vector); 95 | data 96 | }; 97 | IntoIter { pos: 0, data } 98 | } 99 | } 100 | 101 | impl Iterator for IntoIter { 102 | type Item = T; 103 | fn next(&mut self) -> Option { 104 | if self.pos == N { 105 | None 106 | } else { 107 | let out = unsafe { self.data.get_unchecked(self.pos).assume_init_read() }; 108 | self.pos += 1; 109 | Some(out) 110 | } 111 | } 112 | } 113 | 114 | impl Drop for IntoIter { 115 | fn drop(&mut self) { for _item in self {} } 116 | } 117 | 118 | impl IntoIterator for Vector { 119 | type Item = T; 120 | type IntoIter = IntoIter; 121 | 122 | fn into_iter(self) -> Self::IntoIter { IntoIter::new(self) } 123 | } 124 | 125 | use crate::consts::ConstIterator; 126 | 127 | impl<'a, T, const N: usize> IntoIterator for &'a Vector { 128 | type Item = &'a T; 129 | type IntoIter = ConstIterator<&'a T, &'a Vector, N>; 130 | 131 | fn into_iter(self) -> Self::IntoIter { self.into() } 132 | } 133 | 134 | impl Default for Vector { 135 | fn default() -> Self { Self::build_with_fn(|_| T::default()) } 136 | } 137 | 138 | impl PartialEq for Vector { 139 | fn eq(&self, other: &Self) -> bool { self.into_iter().zip(other).all(|(s, o)| s == o) } 140 | } 141 | 142 | impl Eq for Vector {} 143 | 144 | use core::fmt::Debug; 145 | impl Debug for Vector { 146 | fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::result::Result<(), core::fmt::Error> { 147 | f.write_str("Vector<")?; 148 | Debug::fmt(&N, f)?; 149 | f.write_str(">[")?; 150 | for i in self { 151 | i.fmt(f)?; 152 | f.write_str(", ")?; 153 | } 154 | f.write_str("]")?; 155 | Ok(()) 156 | } 157 | } 158 | 159 | use core::fmt::Display; 160 | impl Display for Vector { 161 | fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::result::Result<(), core::fmt::Error> { 162 | if f.alternate() { 163 | Debug::fmt(self, f)?; 164 | } else { 165 | f.write_str("Vector[\n")?; 166 | for i in self { 167 | Display::fmt(i, f)?; 168 | f.write_str(",\n")?; 169 | } 170 | f.write_str("]")?; 171 | } 172 | Ok(()) 173 | } 174 | } 175 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | 2 | 3 | # Optimath 4 | 5 | A Linear Algebra library that uses const generics to be no_std and specialization to enable SIMD*. 6 | 7 | *simd blocked on compiler bug, autovectorization works well though. 8 | 9 | Note: [nalgebra](https://crates.io/crates/nalgebra) now supports const generics 10 | and is more full-featured than this crate. 11 | Maybe it fits your needs better. 12 | 13 | ## Examples 14 | 15 | ### Element-wise addition 16 | 17 | use optimath::{Vector, Stupidity}; 18 | use rand::{thread_rng, Rng}; 19 | let mut rng = thread_rng(); 20 | 21 | // Vectors can be initalized from an rng, 22 | let a: Vector = rng.gen(); 23 | // from iterators 24 | let b: Vector = (0..2000).collect(); 25 | // with an initalizer function 26 | let c: Vector = Vector::build_with_fn(|i| i as i32); 27 | // or using Default 28 | let d: Vector = Default::default(); 29 | 30 | let e = &a + &b; 31 | let f = &c + &d; 32 | let h = &e + &f; 33 | 34 | ### Matrix multiplication 35 | 36 | use optimath::Matrix; 37 | let a: Matrix = Default::default(); 38 | let b: Matrix = Default::default(); 39 | 40 | // matrix size is checked at compile time! 41 | let c: Matrix = a.matrix_multiply(&b); 42 | 43 | ## Design 44 | 45 | The whole library is built around just one type, [Vector](Vector) representing a Vector of N 46 | elements of type T. 47 | 48 | In case T supports some math operation like addition (implements the Add trait) the Vector too 49 | supports that as an element-wise operation. As such a Vector, M> also 50 | supports addition, due to Vector being a type that implements Add. 51 | 52 | Matrices and Tensors are therefore just Vectors within Vectors (within Vectors) 53 | 54 | ### no_std 55 | 56 | const generics are used to enable Vectors to contain any (fixed) number of elements and 57 | therefore not require allocation on the heap. 58 | 59 | ### SIMD 60 | 61 | Vectors provide generic math operations for any T that implements that operation. 62 | specialization is used to provide optimized implementations for specific T, like for example 63 | floats and integers. 64 | 65 | At this moment SIMD support is disabled while we wait for rustc to fix some ICE :). 66 | 67 | ## Goals 68 | 69 | Besides being hopefully useful as a library it is also an exploration of rusts newer advanced 70 | type system features. It is therefore an explicit goal to provide feedback to the developers of 71 | those features. The [insights] module contains some of that. 72 | 73 | It is also meant to explore the design space of Linear Algebra libraries that utilize those 74 | features. As such it may serve as inspiration for how bigger linalg libraries might adopt 75 | them. 76 | 77 | ## Changelog (and future) 78 | 79 | ### 0.1.0 80 | * A Vector type that can do element-wise maths 81 | * Basic linear algebra operations 82 | * A sturdy design for future improvements 83 | 84 | ### 0.2.0 85 | * serde support 86 | * rand support 87 | 88 | ### 0.3.0 (current) 89 | * moved more iterating over to ConstIterator 90 | * add templatemetamaths (building a calculation, then building the result element by element) 91 | 92 | ### 0.X.0 93 | * [ ] re-architecture a bit so Vectors are generic over containers 94 | * [ ] strided iteration over matrices 95 | * [ ] windows-function 96 | 97 | ### 0.X.0 98 | * [ ] working SIMD on Vectors (blocked on rust compiler bug(s), but auto-vectorization works 99 | super well) 100 | * [ ] additional operations on Vectors and Matrixes (taking feature requests!) 101 | 102 | 103 | ### 0.X.0 104 | * [ ] interaction with dynamically sized vectors 105 | * [ ] widows-function on dynamically sized vectors 106 | 107 | ### 0.X.0 108 | * [ ] multi-threading for really large workloads 109 | 110 | ### 0.X.0 111 | * [ ] full specialized SIMD for sse, avx and avx512 112 | * [ ] full SIMD between Vectors, dynamic Vectors and vector views 113 | 114 | ### 0.X.0 115 | * [ ] a BLAS compatible interface, including a C-interface. Probably in a different crate based 116 | on this 117 | * [ ] have 2 additional contributors :) come join the fun and headache about weird compiler bugs 118 | and pointer offset calculations 119 | 120 | ### 1.0.0 121 | * [ ] been used/tested in other peoples crates and considered usable 122 | 123 | 124 | ## Ideas section 125 | 126 | Currently the crate is built up from vectors, could instead be built "down" from dimensions 127 | see the (private) dimensional module for a sketch of that. its currently blocked on rust not 128 | being able to actually use any calculations for const-generic array sizes. 129 | positive: enable easier iteration/strided iteration as that would just be plain pointer maths. 130 | negative: harder/impossible to express explicit simd. 131 | 132 | 133 | Automatically build Vectors to be ready for simd and/or multiprocessing. also blocked on 134 | the same rust feature of calculated array sizes. see the (private) layout module for a preview. 135 | im not sure this is necessary though, seeing that with the sizes know at compile time rust 136 | generates very good simd and unrolls. 137 | positive: perfect simd every time on every platform. negative: higher workload, need to take 138 | care for every operation and every platform. negative: transposed and strided iteration gets 139 | harder 140 | 141 | For interoperability it would be nice to express things either being sized or unsized. 142 | especially for dimensions like matrix multiplication, U x S(3) * S(3) x U = U x U could be a 143 | common case to self multiply a list with unknown number of entries but known number of features 144 | (this is probably also blocked on the same rust bug, but i did not test yet) 145 | 146 | 147 | 148 | # Contributing 149 | Please symlink the hooks to your local .git/hooks/ directory to run some automatic checks before committing. 150 | 151 | ln -s ../../hooks/pre-commit .git/hooks/ 152 | 153 | Please install rustfmt and cargo-sync-readme so these checks can be run. 154 | 155 | rustup component add rustfmt 156 | cargo install cargo-sync-readme 157 | 158 | Please execute `cargo-sync-readme` when you change the top-level-documentation. 159 | Please run `cargo fmt` whenever you change code. If possible configure your editor to do so for you. 160 | -------------------------------------------------------------------------------- /src/lib.rs: -------------------------------------------------------------------------------- 1 | #![no_std] 2 | #![allow(incomplete_features)] 3 | #![feature(specialization)] 4 | #![feature(maybe_uninit_extra)] 5 | //#![feature(avx512_target_feature)] 6 | #![feature(array_methods)] 7 | 8 | //! # Optimath 9 | //! 10 | //! A Linear Algebra library that uses const generics to be no_std and specialization to enable SIMD*. 11 | //! 12 | //! *simd blocked on compiler bug, autovectorization works well though. 13 | //! 14 | //! Note: [nalgebra](https://crates.io/crates/nalgebra) now supports const generics 15 | //! and is more full-featured than this crate. 16 | //! Maybe it fits your needs better. 17 | //! 18 | //! ## Examples 19 | //! 20 | //! ### Element-wise addition 21 | //! 22 | //! use optimath::{Vector, Stupidity}; 23 | //! use rand::{thread_rng, Rng}; 24 | //! let mut rng = thread_rng(); 25 | //! 26 | //! // Vectors can be initalized from an rng, 27 | //! let a: Vector = rng.gen(); 28 | //! // from iterators 29 | //! let b: Vector = (0..2000).collect(); 30 | //! // with an initalizer function 31 | //! let c: Vector = Vector::build_with_fn(|i| i as i32); 32 | //! // or using Default 33 | //! let d: Vector = Default::default(); 34 | //! 35 | //! let e = &a + &b; 36 | //! let f = &c + &d; 37 | //! let h = &e + &f; 38 | //! 39 | //! ### Matrix multiplication 40 | //! 41 | //! use optimath::Matrix; 42 | //! let a: Matrix = Default::default(); 43 | //! let b: Matrix = Default::default(); 44 | //! 45 | //! // matrix size is checked at compile time! 46 | //! let c: Matrix = a.matrix_multiply(&b); 47 | //! 48 | //! ## Design 49 | //! 50 | //! The whole library is built around just one type, [Vector](Vector) representing a Vector of N 51 | //! elements of type T. 52 | //! 53 | //! In case T supports some math operation like addition (implements the Add trait) the Vector too 54 | //! supports that as an element-wise operation. As such a Vector, M> also 55 | //! supports addition, due to Vector being a type that implements Add. 56 | //! 57 | //! Matrices and Tensors are therefore just Vectors within Vectors (within Vectors) 58 | //! 59 | //! ### no_std 60 | //! 61 | //! const generics are used to enable Vectors to contain any (fixed) number of elements and 62 | //! therefore not require allocation on the heap. 63 | //! 64 | //! ### SIMD 65 | //! 66 | //! Vectors provide generic math operations for any T that implements that operation. 67 | //! specialization is used to provide optimized implementations for specific T, like for example 68 | //! floats and integers. 69 | //! 70 | //! At this moment SIMD support is disabled while we wait for rustc to fix some ICE :). 71 | //! 72 | //! ## Goals 73 | //! 74 | //! Besides being hopefully useful as a library it is also an exploration of rusts newer advanced 75 | //! type system features. It is therefore an explicit goal to provide feedback to the developers of 76 | //! those features. The [insights] module contains some of that. 77 | //! 78 | //! It is also meant to explore the design space of Linear Algebra libraries that utilize those 79 | //! features. As such it may serve as inspiration for how bigger linalg libraries might adopt 80 | //! them. 81 | //! 82 | //! ## Changelog (and future) 83 | //! 84 | //! ### 0.1.0 85 | //! * A Vector type that can do element-wise maths 86 | //! * Basic linear algebra operations 87 | //! * A sturdy design for future improvements 88 | //! 89 | //! ### 0.2.0 90 | //! * serde support 91 | //! * rand support 92 | //! 93 | //! ### 0.3.0 (current) 94 | //! * moved more iterating over to ConstIterator 95 | //! * add templatemetamaths (building a calculation, then building the result element by element) 96 | //! 97 | //! ### 0.X.0 98 | //! * [ ] re-architecture a bit so Vectors are generic over containers 99 | //! * [ ] strided iteration over matrices 100 | //! * [ ] windows-function 101 | //! 102 | //! ### 0.X.0 103 | //! * [ ] working SIMD on Vectors (blocked on rust compiler bug(s), but auto-vectorization works 104 | //! super well) 105 | //! * [ ] additional operations on Vectors and Matrixes (taking feature requests!) 106 | //! 107 | //! 108 | //! ### 0.X.0 109 | //! * [ ] interaction with dynamically sized vectors 110 | //! * [ ] widows-function on dynamically sized vectors 111 | //! 112 | //! ### 0.X.0 113 | //! * [ ] multi-threading for really large workloads 114 | //! 115 | //! ### 0.X.0 116 | //! * [ ] full specialized SIMD for sse, avx and avx512 117 | //! * [ ] full SIMD between Vectors, dynamic Vectors and vector views 118 | //! 119 | //! ### 0.X.0 120 | //! * [ ] a BLAS compatible interface, including a C-interface. Probably in a different crate based 121 | //! on this 122 | //! * [ ] have 2 additional contributors :) come join the fun and headache about weird compiler bugs 123 | //! and pointer offset calculations 124 | //! 125 | //! ### 1.0.0 126 | //! * [ ] been used/tested in other peoples crates and considered usable 127 | //! 128 | //! 129 | //! ## Ideas section 130 | //! 131 | //! Currently the crate is built up from vectors, could instead be built "down" from dimensions 132 | //! see the (private) dimensional module for a sketch of that. its currently blocked on rust not 133 | //! being able to actually use any calculations for const-generic array sizes. 134 | //! positive: enable easier iteration/strided iteration as that would just be plain pointer maths. 135 | //! negative: harder/impossible to express explicit simd. 136 | //! 137 | //! 138 | //! Automatically build Vectors to be ready for simd and/or multiprocessing. also blocked on 139 | //! the same rust feature of calculated array sizes. see the (private) layout module for a preview. 140 | //! im not sure this is necessary though, seeing that with the sizes know at compile time rust 141 | //! generates very good simd and unrolls. 142 | //! positive: perfect simd every time on every platform. negative: higher workload, need to take 143 | //! care for every operation and every platform. negative: transposed and strided iteration gets 144 | //! harder 145 | //! 146 | //! For interoperability it would be nice to express things either being sized or unsized. 147 | //! especially for dimensions like matrix multiplication, U x S(3) * S(3) x U = U x U could be a 148 | //! common case to self multiply a list with unknown number of entries but known number of features 149 | //! (this is probably also blocked on the same rust bug, but i did not test yet) 150 | 151 | //mod dimensional; 152 | 153 | pub mod insights; 154 | 155 | // turn vector into a transparent wrapper struct that can contain anything 156 | // it can then contain for example: straight data, &[T] Vec> or Vector> 157 | // potentially also views of vectors again? 158 | // then implement stuff like matrix-multiply conditionally on const-ness 159 | mod types; 160 | // basic element-wise functions 161 | mod base; 162 | // maths-stuff, dot product, matrix multiply etc 163 | mod advanced; 164 | // a helper trait for compile time known sizes 165 | mod consts; 166 | // views on underlying vectors 167 | mod view; 168 | 169 | pub mod templatemetamath; 170 | 171 | #[cfg(feature = "serde")] 172 | mod serialize; 173 | 174 | #[cfg(feature = "rand")] 175 | mod random; 176 | 177 | #[cfg(feature = "alloc")] 178 | mod dynvec; 179 | 180 | #[doc(hidden)] 181 | pub mod benching; 182 | 183 | /* SIMD is currently a slowdown 184 | * because loading stuff into simd-format and unloading afterwards is more overhead than speed-up 185 | * the solution would be to use simd-format as memory-layout but rust currently has some compiler 186 | * bugs stopping that from happening 187 | #[cfg(all( 188 | target_arch = "x86_64", 189 | target_feature = "sse", 190 | not(target_feature = "avx") 191 | ))] 192 | mod sse; 193 | 194 | #[cfg(all(target_arch = "x86_64", target_feature = "avx"))] 195 | mod avx; 196 | 197 | mod layout; 198 | */ 199 | pub use consts::ConstIndex; 200 | pub use types::{Matrix, Stupidity, Vector}; 201 | pub use view::{TransposedMatrixView, VectorView}; 202 | // add a type like StaticSizedIterator to make reasoning about dimensions easier/enable 203 | // optimizations 204 | --------------------------------------------------------------------------------