├── Makefile ├── README.md ├── test-suite ├── test-hairy-simd-functions.lisp ├── packages.lisp ├── test-packages.lisp ├── sb-simd-test-suite.asd ├── numbers.lisp ├── test-horizontal-functions.lisp ├── test-arefs.lisp ├── test-suite.lisp └── utilities.lisp ├── code ├── define-types.lisp ├── instruction-sets │ ├── sse3.lisp │ ├── x86-64.lisp │ ├── sse4-2.lisp │ ├── ssse3.lisp │ ├── fma.lisp │ ├── sse.lisp │ ├── sse4-1.lisp │ └── sb-simd.lisp ├── printable.lisp ├── define-ifs.lisp ├── sb-simd.asd ├── missing-instruction.lisp ├── define-rounders.lisp ├── cpu-identification.lisp ├── define-instructions.lisp ├── constants.lisp ├── define-reducers.lisp ├── define-vrefs.lisp ├── define-reffers.lisp ├── define-unequals.lisp ├── define-comparisons.lisp ├── define-associatives.lisp ├── utilities.lisp ├── define-vop-functions.lisp ├── define-scalar-casts.lisp ├── instruction-set-case.lisp ├── define-simd-casts.lisp ├── define-vref-vops.lisp ├── define-modify-macros.lisp ├── instruction-set.lisp ├── define-custom-vops.lisp ├── define-arefs.lisp └── define-instruction-vops.lisp ├── examples ├── simd-sum.lisp ├── simd-dot.lisp └── benchmarks.lisp └── sb-simd.texinfo /Makefile: -------------------------------------------------------------------------------- 1 | SYSTEM=sb-simd 2 | include ../asdf-module.mk 3 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | This repository has been archived because its code has been merged into SBCL. 2 | 3 | You can find it at https://github.com/sbcl/sbcl/tree/master/contrib/sb-simd 4 | -------------------------------------------------------------------------------- /test-suite/test-hairy-simd-functions.lisp: -------------------------------------------------------------------------------- 1 | (in-package #:sb-simd-test-suite) 2 | 3 | (in-package #:sb-simd-sse) 4 | 5 | (in-package #:sb-simd-sse2) 6 | 7 | (in-package #:sb-simd-avx) 8 | 9 | (in-package #:sb-simd-avx2) 10 | -------------------------------------------------------------------------------- /code/define-types.lisp: -------------------------------------------------------------------------------- 1 | (in-package #:sb-simd-internals) 2 | 3 | (macrolet 4 | ((define-types () 5 | `(progn 6 | ,@(loop for value-record being the hash-values of *value-records* 7 | collect 8 | `(deftype ,(value-record-name value-record) () 9 | ',(value-record-type value-record)))))) 10 | (define-types)) 11 | -------------------------------------------------------------------------------- /test-suite/packages.lisp: -------------------------------------------------------------------------------- 1 | (cl:in-package #:common-lisp-user) 2 | 3 | (defpackage #:sb-simd-test-suite 4 | (:use #:common-lisp #:sb-simd-internals) 5 | (:export 6 | #:run-test-suite 7 | #:define-test 8 | #:is 9 | #:signals 10 | #:all-tests 11 | #:check-package 12 | #:run-tests 13 | #:define-simple-simd-test 14 | #:define-horizontal-test 15 | #:define-aref-test)) 16 | -------------------------------------------------------------------------------- /code/instruction-sets/sse3.lisp: -------------------------------------------------------------------------------- 1 | (in-package #:sb-simd-sse3) 2 | 3 | (define-instruction-set :sse3 4 | (:include :sse2) 5 | (:test (sse3-supported-p)) 6 | (:instructions 7 | (f32.4-hadd #:haddps (f32.4) (f32.4 f32.4) :cost 1 :encoding :sse) 8 | (f32.4-hdup #:movshdup (f32.4) (f32.4) :cost 1) 9 | (f32.4-ldup #:movsldup (f32.4) (f32.4) :cost 1) 10 | (f64.2-hadd #:haddpd (f64.2) (f64.2 f64.2) :cost 1 :encoding :sse))) 11 | 12 | -------------------------------------------------------------------------------- /test-suite/test-packages.lisp: -------------------------------------------------------------------------------- 1 | (in-package #:sb-simd-test-suite) 2 | 3 | (define-test packages) 4 | 5 | (define-test packages 6 | (check-package '#:sb-simd-internals) 7 | (check-package '#:sb-simd) 8 | (check-package '#:sb-simd-x86-64) 9 | (check-package '#:sb-simd-sse) 10 | (check-package '#:sb-simd-sse2) 11 | (check-package '#:sb-simd-sse3) 12 | (check-package '#:sb-simd-ssse3) 13 | (check-package '#:sb-simd-sse4.1) 14 | (check-package '#:sb-simd-sse4.2) 15 | (check-package '#:sb-simd-avx) 16 | (check-package '#:sb-simd-avx2)) 17 | -------------------------------------------------------------------------------- /code/instruction-sets/x86-64.lisp: -------------------------------------------------------------------------------- 1 | (in-package #:sb-simd-x86-64) 2 | 3 | (define-instruction-set :x86-64 4 | (:include :sb-simd) 5 | (:scalars 6 | (imm1 1 (unsigned-byte 1) (:constant (unsigned-byte 1))) 7 | (imm2 2 (unsigned-byte 2) (:constant (unsigned-byte 2))) 8 | (imm3 3 (unsigned-byte 3) (:constant (unsigned-byte 3))) 9 | (imm4 4 (unsigned-byte 4) (:constant (unsigned-byte 4))) 10 | (imm5 5 (unsigned-byte 5) (:constant (unsigned-byte 5))) 11 | (imm6 6 (unsigned-byte 6) (:constant (unsigned-byte 6))) 12 | (imm7 7 (unsigned-byte 7) (:constant (unsigned-byte 7))) 13 | (imm8 8 (unsigned-byte 8) (:constant (unsigned-byte 8))))) 14 | -------------------------------------------------------------------------------- /test-suite/sb-simd-test-suite.asd: -------------------------------------------------------------------------------- 1 | (defsystem "sb-simd-test-suite" 2 | :description "The sb-simd test suite." 3 | :author "Marco Heisig " 4 | :license "MIT" 5 | 6 | :depends-on ("sb-simd") 7 | 8 | :perform 9 | (test-op (o c) (symbol-call '#:sb-simd-test-suite '#:run-test-suite)) 10 | 11 | :serial t 12 | :components 13 | ((:file "packages") 14 | (:file "numbers") 15 | (:file "utilities") 16 | (:file "test-suite") 17 | (:file "test-arefs") 18 | (:file "test-simple-simd-functions") 19 | (:file "test-horizontal-functions") 20 | (:file "test-hairy-simd-functions") 21 | (:file "test-packages"))) 22 | -------------------------------------------------------------------------------- /examples/simd-sum.lisp: -------------------------------------------------------------------------------- 1 | (in-package :sb-simd-avx) 2 | 3 | (defun simd-sum (array &aux (n (array-total-size array))) 4 | (declare (type (simple-array double-float (*)) array) 5 | (optimize speed (safety 0))) 6 | (do ((index 0 (the (integer 0 #.(- array-total-size-limit 16)) (+ index 16))) 7 | (acc1 (f64.4 0) (f64.4+ acc1 (f64.4-aref array (+ index 0)))) 8 | (acc2 (f64.4 0) (f64.4+ acc2 (f64.4-aref array (+ index 4)))) 9 | (acc3 (f64.4 0) (f64.4+ acc3 (f64.4-aref array (+ index 8)))) 10 | (acc4 (f64.4 0) (f64.4+ acc4 (f64.4-aref array (+ index 12))))) 11 | ((>= index (- n 16)) 12 | (do ((result (f64.4-horizontal+ (f64.4+ acc1 acc2 acc3 acc4)) 13 | (+ result (row-major-aref array index))) 14 | (index index (1+ index))) 15 | ((>= index n) result))))) 16 | -------------------------------------------------------------------------------- /code/printable.lisp: -------------------------------------------------------------------------------- 1 | (in-package #:sb-simd-internals) 2 | 3 | (defclass printable () ()) 4 | 5 | ;;; Gather a plist of all class slots. We use this plist to conveniently 6 | ;;; define PRINT-OBJECT. 7 | (defgeneric printable-slot-plist (printable) 8 | (:method-combination append :most-specific-last)) 9 | 10 | (defmethod printable-slot-plist append (printable) 11 | '()) 12 | 13 | (defmethod print-object ((printable printable) stream) 14 | (print-unreadable-object (printable stream :type t) 15 | (pprint-logical-block (stream (printable-slot-plist printable)) 16 | (loop 17 | (pprint-exit-if-list-exhausted) 18 | (write (pprint-pop) :stream stream) 19 | (pprint-exit-if-list-exhausted) 20 | (write-char #\space stream) 21 | (write (pprint-pop) :stream stream) 22 | (pprint-exit-if-list-exhausted) 23 | (write-char #\space stream) 24 | (pprint-newline :linear stream))))) 25 | -------------------------------------------------------------------------------- /test-suite/numbers.lisp: -------------------------------------------------------------------------------- 1 | (in-package #:sb-simd-test-suite) 2 | 3 | (defparameter *numbers* 4 | (remove-duplicates 5 | `(,most-positive-double-float 6 | ,most-negative-double-float 7 | ,least-positive-normalized-double-float 8 | ,least-negative-normalized-double-float 9 | ,least-positive-double-float 10 | ,least-negative-double-float 11 | ,most-positive-single-float 12 | ,most-negative-single-float 13 | ,least-positive-normalized-single-float 14 | ,least-negative-normalized-single-float 15 | ,least-positive-single-float 16 | ,least-negative-single-float 17 | ,@(loop for type in '(single-float double-float integer) 18 | append (loop for i from 0 to 64 19 | for n = (expt 2 i) 20 | collect (coerce n type) 21 | collect (coerce (- n) type) 22 | collect (coerce (1- n) type) 23 | collect (coerce (1+ n) type) 24 | collect (coerce (1- (- n)) type) 25 | collect (coerce (1+ (- n)) type)))))) 26 | 27 | (defun numbers-of-type (type) 28 | (remove-if-not (lambda (x) (typep x type)) *numbers*)) 29 | -------------------------------------------------------------------------------- /examples/simd-dot.lisp: -------------------------------------------------------------------------------- 1 | (in-package :sb-simd-avx) 2 | 3 | (defun simd-dot (array1 array2 &aux (n (min (array-total-size array1) (array-total-size array2)))) 4 | (declare (type (simple-array double-float 1) array1 array2) 5 | (optimize speed (safety 0))) 6 | (do ((index 0 (the (integer 0 #.(- array-total-size-limit 16)) (+ index 16))) 7 | (acc1 (f64.4 0) (f64.4+ acc1 (f64.4* (f64.4-row-major-aref array1 (+ index 0)) 8 | (f64.4-row-major-aref array2 (+ index 0))))) 9 | (acc2 (f64.4 0) (f64.4+ acc2 (f64.4* (f64.4-row-major-aref array1 (+ index 4)) 10 | (f64.4-row-major-aref array2 (+ index 4))))) 11 | (acc3 (f64.4 0) (f64.4+ acc3 (f64.4* (f64.4-row-major-aref array1 (+ index 8)) 12 | (f64.4-row-major-aref array2 (+ index 8))))) 13 | (acc4 (f64.4 0) (f64.4+ acc4 (f64.4* (f64.4-row-major-aref array1 (+ index 12)) 14 | (f64.4-row-major-aref array2 (+ index 12)))))) 15 | ((>= index (- n 16)) 16 | (do ((result (f64.4-horizontal+ (f64.4+ acc1 acc2 acc3 acc4)) 17 | (+ result (* (row-major-aref array1 index) 18 | (row-major-aref array2 index)))) 19 | (index index (1+ index))) 20 | ((>= index n) result))))) 21 | -------------------------------------------------------------------------------- /code/instruction-sets/sse4-2.lisp: -------------------------------------------------------------------------------- 1 | (in-package #:sb-simd-sse4.2) 2 | 3 | (define-instruction-set :sse4.2 4 | (:include :sse4.1) 5 | (:test (sse4.2-supported-p)) 6 | (:instructions 7 | ;; u64.2 8 | (two-arg-u64.2>~ #:pcmpgtq (u64.2) (u64.2 u64.2) :cost 3 :encoding :sse) 9 | (two-arg-u64.2> nil (u64.2) (u64.2 u64.2) :cost 3 :encoding :fake-vop) 10 | (two-arg-u64.2>= nil (u64.2) (u64.2 u64.2) :cost 4 :encoding :fake-vop) 11 | (two-arg-u64.2< nil (u64.2) (u64.2 u64.2) :cost 3 :encoding :fake-vop) 12 | (two-arg-u64.2<= nil (u64.2) (u64.2 u64.2) :cost 4 :encoding :fake-vop) 13 | ;; s64.2 14 | (two-arg-s64.2> #:pcmpgtq (u64.2) (s64.2 s64.2) :cost 3 :encoding :sse) 15 | (two-arg-s64.2>= nil (u64.2) (s64.2 s64.2) :cost 4 :encoding :fake-vop) 16 | (two-arg-s64.2< nil (u64.2) (s64.2 s64.2) :cost 3 :encoding :fake-vop) 17 | (two-arg-s64.2<= nil (u64.2) (s64.2 s64.2) :cost 4 :encoding :fake-vop)) 18 | (:comparisons 19 | (u64.2< two-arg-u64.2< u64.2-and +u64-true+) 20 | (u64.2<= two-arg-u64.2<= u64.2-and +u64-true+) 21 | (u64.2> two-arg-u64.2> u64.2-and +u64-true+) 22 | (u64.2>= two-arg-u64.2>= u64.2-and +u64-true+) 23 | (s64.2< two-arg-s64.2< u64.2-and +u64-true+) 24 | (s64.2<= two-arg-s64.2<= u64.2-and +u64-true+) 25 | (s64.2> two-arg-s64.2> u64.2-and +u64-true+) 26 | (s64.2>= two-arg-s64.2>= u64.2-and +u64-true+))) 27 | -------------------------------------------------------------------------------- /code/define-ifs.lisp: -------------------------------------------------------------------------------- 1 | (in-package #:sb-simd-internals) 2 | 3 | (macrolet 4 | ((define-if (if-record-name) 5 | (with-accessors ((name if-record-name) 6 | (blend if-record-blend)) 7 | (find-function-record if-record-name) 8 | (with-accessors ((blend instruction-record-name) 9 | (result-records instruction-record-result-records) 10 | (argument-records instruction-record-argument-records)) blend 11 | (destructuring-bind (a-record b-record mask-record) argument-records 12 | (assert (eq a-record b-record)) 13 | (assert (= (value-record-bits a-record) 14 | (value-record-bits mask-record))) 15 | (destructuring-bind (result-record) result-records 16 | (assert (eq result-record a-record)) 17 | (let ((value-type (value-record-name a-record)) 18 | (mask-type (value-record-name mask-record))) 19 | `(define-inline ,name (mask a b) 20 | (the ,value-type 21 | (,blend (,value-type b) 22 | (,value-type a) 23 | (,mask-type mask)))))))))) 24 | (define-ifs () 25 | `(progn 26 | ,@(loop for if-record in (filter-function-records #'if-record-p) 27 | collect `(define-if ,(if-record-name if-record)))))) 28 | (define-ifs)) 29 | -------------------------------------------------------------------------------- /code/sb-simd.asd: -------------------------------------------------------------------------------- 1 | (defsystem #:sb-simd 2 | :description "A convenient SIMD interface for SBCL." 3 | :author "Marco Heisig " 4 | :license "MIT" 5 | 6 | :in-order-to ((test-op (test-op "sb-simd-test-suite"))) 7 | 8 | :serial t 9 | :components 10 | ((:file "packages") 11 | (:file "constants") 12 | (:file "utilities") 13 | (:file "printable") 14 | (:file "cpu-identification") 15 | (:file "instruction-set") 16 | (:file "instruction-set-case") 17 | (:file "record") 18 | (:file "missing-instruction") 19 | (:module "instruction-sets" 20 | :components 21 | ((:file "sb-simd") 22 | (:file "x86-64") 23 | (:file "sse") 24 | (:file "sse2") 25 | (:file "sse3") 26 | (:file "ssse3") 27 | (:file "sse4-1") 28 | (:file "sse4-2") 29 | (:file "avx") 30 | (:file "avx2") 31 | (:file "fma"))) 32 | (:file "define-types") 33 | (:file "define-instruction-vops") 34 | (:file "define-vref-vops") 35 | (:file "define-custom-vops") 36 | (:file "define-vop-functions") 37 | (:file "define-scalar-casts") 38 | (:file "define-fake-vops") 39 | (:file "define-simd-casts") 40 | (:file "define-instructions") 41 | (:file "define-vrefs") 42 | (:file "define-reffers") 43 | (:file "define-arefs") 44 | (:file "define-ifs") 45 | (:file "define-associatives") 46 | (:file "define-reducers") 47 | (:file "define-comparisons") 48 | (:file "define-unequals") 49 | (:file "define-rounders") 50 | (:file "define-modify-macros"))) 51 | -------------------------------------------------------------------------------- /code/missing-instruction.lisp: -------------------------------------------------------------------------------- 1 | (in-package #:sb-simd-internals) 2 | 3 | ;;; Our library always defines all instruction sets and instructions, but 4 | ;;; usually, only a subset of them are actually available. All the 5 | ;;; remaining instructions are replaced by functions that signal a suitable 6 | ;;; error message. 7 | 8 | (define-condition missing-instruction (error) 9 | ((%record 10 | :initarg :record 11 | :reader missing-instruction-record)) 12 | (:report 13 | (lambda (c s) 14 | (with-accessors ((instruction-set instruction-record-instruction-set) 15 | (instruction-name instruction-record-name)) 16 | (missing-instruction-record c) 17 | (format s "Missing ~S instruction ~S." 18 | (instruction-set-name instruction-set) 19 | instruction-name))))) 20 | 21 | (defun missing-instruction (instruction-record) 22 | (error 'missing-instruction :record instruction-record)) 23 | 24 | (defmacro define-missing-instruction 25 | (name &key (required-arguments '()) (optional-arguments '()) (rest-argument nil)) 26 | (assert (find-function-record name)) 27 | `(defun ,name (,@required-arguments 28 | ,@optional-arguments 29 | ,@(when rest-argument `(&rest ,rest-argument))) 30 | (declare (ignore ,@required-arguments 31 | ,@optional-arguments 32 | ,@(when rest-argument `(,rest-argument)))) 33 | (missing-instruction 34 | (load-time-value 35 | (find-function-record ',name))))) 36 | -------------------------------------------------------------------------------- /code/instruction-sets/ssse3.lisp: -------------------------------------------------------------------------------- 1 | (in-package #:sb-simd-ssse3) 2 | 3 | (define-instruction-set :ssse3 4 | (:include :sse3) 5 | (:test (ssse3-supported-p)) 6 | (:instructions 7 | ;; u16.8 8 | (u16.8-hadd #:phaddw (u16.8) (u16.8 u16.8) :cost 3 :encoding :sse) 9 | (u16.8-hsub #:phsubw (u16.8) (u16.8 u16.8) :cost 3 :encoding :sse) 10 | ;; u32.4 11 | (u32.4-hadd #:phaddd (u32.4) (u32.4 u32.4) :cost 3 :encoding :sse) 12 | (u32.4-hsub #:phsubd (u32.4) (u32.4 u32.4) :cost 3 :encoding :sse) 13 | ;; s8.16 14 | (s8.16-shuffle #:pshufb (s8.16) (s8.16 s8.16) :cost 1 :encoding :sse) 15 | (s8.16-abs #:pabsb (s8.16) (s8.16) :cost 2) 16 | (s8.16-sign #:psignb (s8.16) (s8.16 s8.16) :cost 1 :encoding :sse) 17 | ;; s16.8 18 | (two-arg-s16.8-mulhrs #:pmulhrsw (s16.8) (s16.8 s16.8) :cost 1 :encoding :sse :associative t) 19 | (s16.8-abs #:pabsw (s16.8) (s16.8) :cost 2) 20 | (s16.8-maddubs #:pmaddubsw (s16.8) (u16.8 s16.8) :cost 2 :encoding :sse) 21 | (s16.8-sign #:psignw (s16.8) (s16.8 s16.8) :cost 2 :encoding :sse) 22 | ;; s16.8 23 | (s16.8-hadd #:phaddw (s16.8) (s16.8 s16.8) :cost 3 :encoding :sse) 24 | (s16.8-hsub #:phsubw (s16.8) (s16.8 s16.8) :cost 3 :encoding :sse) 25 | ;; s32.4 26 | (s32.4-abs #:pabsd (s32.4) (s32.4) :cost 2) 27 | (s32.4-sign #:psignd (s32.4) (s32.4 s32.4) :cost 3 :encoding :sse) 28 | (s32.4-hadd #:phaddd (s32.4) (s32.4 s32.4) :cost 3 :encoding :sse) 29 | (s32.4-hsub #:phsubd (s32.4) (s32.4 s32.4) :cost 3 :encoding :sse)) 30 | (:associatives 31 | (s16.8-mulhrs two-arg-s16.8-mulhrs 1))) 32 | -------------------------------------------------------------------------------- /code/define-rounders.lisp: -------------------------------------------------------------------------------- 1 | (in-package #:sb-simd-sse4.1) 2 | 3 | (define-inline f32.4-round (x) 4 | (f32.4-%round x #b00)) 5 | 6 | (define-inline f32.4-floor (x) 7 | (f32.4-%round x #b01)) 8 | 9 | (define-inline f32.4-ceiling (x) 10 | (f32.4-%round x #b10)) 11 | 12 | (define-inline f32.4-truncate (x) 13 | (f32.4-%round x #b11)) 14 | 15 | (define-inline f64.2-round (x) 16 | (f64.2-%round x #b00)) 17 | 18 | (define-inline f64.2-floor (x) 19 | (f64.2-%round x #b01)) 20 | 21 | (define-inline f64.2-ceiling (x) 22 | (f64.2-%round x #b10)) 23 | 24 | (define-inline f64.2-truncate (x) 25 | (f64.2-%round x #b11)) 26 | 27 | (in-package #:sb-simd-avx) 28 | 29 | (define-inline f32.4-round (x) 30 | (f32.4-%round x #b00)) 31 | 32 | (define-inline f32.4-floor (x) 33 | (f32.4-%round x #b01)) 34 | 35 | (define-inline f32.4-ceiling (x) 36 | (f32.4-%round x #b10)) 37 | 38 | (define-inline f32.4-truncate (x) 39 | (f32.4-%round x #b11)) 40 | 41 | (define-inline f64.2-round (x) 42 | (f64.2-%round x #b00)) 43 | 44 | (define-inline f64.2-floor (x) 45 | (f64.2-%round x #b01)) 46 | 47 | (define-inline f64.2-ceiling (x) 48 | (f64.2-%round x #b10)) 49 | 50 | (define-inline f64.2-truncate (x) 51 | (f64.2-%round x #b11)) 52 | 53 | (define-inline f32.8-round (x) 54 | (f32.8-%round x #b00)) 55 | 56 | (define-inline f32.8-floor (x) 57 | (f32.8-%round x #b01)) 58 | 59 | (define-inline f32.8-ceiling (x) 60 | (f32.8-%round x #b10)) 61 | 62 | (define-inline f32.8-truncate (x) 63 | (f32.8-%round x #b11)) 64 | 65 | (define-inline f64.4-round (x) 66 | (f64.4-%round x #b00)) 67 | 68 | (define-inline f64.4-floor (x) 69 | (f64.4-%round x #b01)) 70 | 71 | (define-inline f64.4-ceiling (x) 72 | (f64.4-%round x #b10)) 73 | 74 | (define-inline f64.4-truncate (x) 75 | (f64.4-%round x #b11)) 76 | -------------------------------------------------------------------------------- /code/cpu-identification.lisp: -------------------------------------------------------------------------------- 1 | (in-package #:sb-simd-internals) 2 | 3 | #+x86-64 4 | (progn 5 | (defun cpuid (eax &optional (ecx 0)) 6 | (declare (type (unsigned-byte 32) eax ecx)) 7 | (sb-vm::%cpu-identification eax ecx)) 8 | 9 | (defun sse-supported-p () 10 | (and (>= (cpuid 0) 1) 11 | (logbitp 25 (nth-value 3 (cpuid 1))))) 12 | 13 | (defun sse2-supported-p () 14 | (and (>= (cpuid 0) 1) 15 | (logbitp 26 (nth-value 3 (cpuid 1))))) 16 | 17 | (defun sse3-supported-p () 18 | (and (>= (cpuid 0) 1) 19 | (logbitp 0 (nth-value 2 (cpuid 1))))) 20 | 21 | (defun ssse3-supported-p () 22 | (and (>= (cpuid 0) 1) 23 | (logbitp 9 (nth-value 2 (cpuid 1))))) 24 | 25 | (defun sse4.1-supported-p () 26 | (and (>= (cpuid 0) 1) 27 | (logbitp 19 (nth-value 2 (cpuid 1))))) 28 | 29 | (defun sse4.2-supported-p () 30 | (and (>= (cpuid 0) 1) 31 | (logbitp 20 (nth-value 2 (cpuid 1))))) 32 | 33 | (defun avx-supported-p () 34 | (and (>= (cpuid 0) 1) 35 | (logbitp 28 (nth-value 2 (cpuid 1))))) 36 | 37 | (defun avx2-supported-p () 38 | (and (>= (cpuid 0) 7) 39 | (logbitp 5 (nth-value 1 (cpuid 7))))) 40 | 41 | (defun fma-supported-p () 42 | (and (>= (cpuid 0) 1) 43 | (logbitp 12 (nth-value 2 (cpuid 1)))))) 44 | 45 | #-x86-64 46 | (progn 47 | (defun sse-supported-p () 48 | nil) 49 | 50 | (defun sse2-supported-p () 51 | nil) 52 | 53 | (defun sse3-supported-p () 54 | nil) 55 | 56 | (defun ssse3-supported-p () 57 | nil) 58 | 59 | (defun sse4.1-supported-p () 60 | nil) 61 | 62 | (defun sse4.2-supported-p () 63 | nil) 64 | 65 | (defun avx-supported-p () 66 | nil) 67 | 68 | (defun avx2-supported-p () 69 | nil) 70 | 71 | (defun fma-supported-p () 72 | nil)) 73 | -------------------------------------------------------------------------------- /code/define-instructions.lisp: -------------------------------------------------------------------------------- 1 | (in-package #:sb-simd-internals) 2 | 3 | (macrolet 4 | ((define-instruction (name) 5 | (with-accessors ((name instruction-record-name) 6 | (vop instruction-record-vop) 7 | (argument-records instruction-record-argument-records) 8 | (encoding instruction-record-encoding) 9 | (instruction-set instruction-record-instruction-set)) 10 | (find-function-record name) 11 | (let ((argument-record-names (mapcar #'record-name argument-records)) 12 | (argument-symbols (prefixed-symbols "ARGUMENT-" (length argument-records)))) 13 | (if (not (instruction-set-available-p instruction-set)) 14 | `(define-missing-instruction ,name 15 | :required-arguments ,argument-symbols) 16 | ;; Define the actual instruction as a wrapper around the VOP 17 | ;; that attempts to cast all arguments to the correct types. 18 | `(define-inline ,name ,argument-symbols 19 | (let ,(loop for argument-symbol in argument-symbols 20 | for type in (mapcar #'value-record-name argument-records) 21 | collect `(,argument-symbol (,type ,argument-symbol))) 22 | (with-primitive-arguments 23 | ,(mapcar #'list argument-symbols argument-record-names) 24 | (,vop ,@argument-symbols)))))))) 25 | (define-instructions () 26 | `(progn 27 | ,@(loop for instruction-record in (filter-function-records #'instruction-record-p) 28 | for name = (instruction-record-name instruction-record) 29 | collect `(define-instruction ,name))))) 30 | (define-instructions)) 31 | -------------------------------------------------------------------------------- /code/constants.lisp: -------------------------------------------------------------------------------- 1 | (in-package #:sb-simd-internals) 2 | 3 | (defconstant most-positive-f32 most-positive-single-float) 4 | (defconstant most-negative-f32 most-negative-single-float) 5 | (defconstant most-positive-f64 most-positive-double-float) 6 | (defconstant most-negative-f64 most-negative-double-float) 7 | 8 | (defconstant most-positive-u1 (1- (expt 2 1))) 9 | (defconstant most-positive-u2 (1- (expt 2 2))) 10 | (defconstant most-positive-u4 (1- (expt 2 4))) 11 | (defconstant most-positive-u8 (1- (expt 2 8))) 12 | (defconstant most-positive-u16 (1- (expt 2 16))) 13 | (defconstant most-positive-u32 (1- (expt 2 32))) 14 | (defconstant most-positive-u64 (1- (expt 2 64))) 15 | 16 | (defconstant +u8-true+ most-positive-u8) 17 | (defconstant +u16-true+ most-positive-u16) 18 | (defconstant +u32-true+ most-positive-u32) 19 | (defconstant +u64-true+ most-positive-u64) 20 | 21 | (defconstant +u8-false+ 0) 22 | (defconstant +u16-false+ 0) 23 | (defconstant +u32-false+ 0) 24 | (defconstant +u64-false+ 0) 25 | 26 | (defconstant most-positive-s8 (1- (expt 2 (1- 8)))) 27 | (defconstant most-positive-s16 (1- (expt 2 (1- 16)))) 28 | (defconstant most-positive-s32 (1- (expt 2 (1- 32)))) 29 | (defconstant most-positive-s64 (1- (expt 2 (1- 64)))) 30 | 31 | (defconstant most-negative-s8 (- (expt 2 (1- 8)))) 32 | (defconstant most-negative-s16 (- (expt 2 (1- 16)))) 33 | (defconstant most-negative-s32 (- (expt 2 (1- 32)))) 34 | (defconstant most-negative-s64 (- (expt 2 (1- 64)))) 35 | 36 | (defconstant +s8-true+ -1) 37 | (defconstant +s16-true+ -1) 38 | (defconstant +s32-true+ -1) 39 | (defconstant +s64-true+ -1) 40 | 41 | (defconstant +s8-false+ 0) 42 | (defconstant +s16-false+ 0) 43 | (defconstant +s32-false+ 0) 44 | (defconstant +s64-false+ 0) 45 | 46 | (defconstant +f32-true+ (sb-kernel:make-single-float +s32-true+)) 47 | (defconstant +f64-true+ (sb-kernel:make-double-float +s32-true+ +u32-true+)) 48 | 49 | (defconstant +f32-false+ (sb-kernel:make-single-float +s32-false+)) 50 | (defconstant +f64-false+ (sb-kernel:make-double-float +s32-false+ +u32-false+)) 51 | -------------------------------------------------------------------------------- /code/define-reducers.lisp: -------------------------------------------------------------------------------- 1 | (in-package #:sb-simd-internals) 2 | 3 | (macrolet 4 | ((define-reducer (reducer-record-name) 5 | (with-accessors ((name reducer-record-name) 6 | (binary-operation reducer-record-binary-operation) 7 | (initial-element reducer-record-initial-element)) 8 | (find-function-record reducer-record-name) 9 | (with-accessors ((binary-operation instruction-record-name) 10 | (result-records instruction-record-result-records) 11 | (argument-records instruction-record-argument-records)) binary-operation 12 | (destructuring-bind ((value-record) (arg1-record arg2-record)) 13 | (list result-records argument-records) 14 | (assert (eq value-record arg1-record)) 15 | (assert (eq value-record arg2-record)) 16 | (let ((type (value-record-name value-record))) 17 | `(progn 18 | (defun ,name (arg &rest more-args) 19 | (if (null more-args) 20 | (,binary-operation (,type ,initial-element) (,type arg)) 21 | (let ((result (,type arg))) 22 | (declare (,type result)) 23 | (loop for arg in more-args 24 | do (setf result (,binary-operation result (,type arg)))) 25 | result))) 26 | (define-compiler-macro ,name (arg &rest more-args) 27 | (cond ((null more-args) 28 | `(,',binary-operation ,',initial-element (,',type ,arg))) 29 | (t (reduce 30 | (lambda (a b) `(,',binary-operation (,',type ,a) (,',type ,b))) 31 | more-args 32 | :initial-value `(,',type ,arg))))))))))) 33 | (define-reducers () 34 | `(progn 35 | ,@(loop for reducer-record in (filter-function-records #'reducer-record-p) 36 | collect `(define-reducer ,(reducer-record-name reducer-record)))))) 37 | (define-reducers)) 38 | -------------------------------------------------------------------------------- /code/define-vrefs.lisp: -------------------------------------------------------------------------------- 1 | (in-package #:sb-simd-internals) 2 | 3 | (macrolet 4 | ((define-vref (name kind) 5 | (with-accessors ((name vref-record-name) 6 | (instruction-set vref-record-instruction-set) 7 | (value-record vref-record-value-record) 8 | (vector-record vref-record-vector-record) 9 | (vop vref-record-vop)) 10 | (find-function-record name) 11 | (let* ((simd-width (value-record-simd-width value-record)) 12 | (element-type 13 | (second 14 | (value-record-type vector-record)))) 15 | (ecase kind 16 | (:load 17 | `(define-inline ,name (array index) 18 | (declare (type (array ,element-type) array) 19 | (index index)) 20 | (multiple-value-bind (vector index) 21 | (sb-kernel:%data-vector-and-index 22 | array 23 | (sb-kernel:check-bound array (- (array-total-size array) ,(1- simd-width)) index)) 24 | (declare (type (simple-array ,element-type (*)) vector)) 25 | (,vop vector index 0)))) 26 | (:store 27 | `(define-inline ,name (value array index) 28 | (declare (type (array ,element-type) array) 29 | (index index)) 30 | (multiple-value-bind (vector index) 31 | (sb-kernel:%data-vector-and-index 32 | array 33 | (sb-kernel:check-bound array (- (array-total-size array) ,(1- simd-width)) index)) 34 | (declare (type (simple-array ,element-type (*)) vector)) 35 | (,vop (,(value-record-name value-record) value) vector index 0)))))))) 36 | (define-vrefs () 37 | `(progn 38 | ,@(loop for load-record in (filter-function-records #'load-record-p) 39 | for name = (load-record-name load-record) 40 | collect `(define-vref ,name :load)) 41 | ,@(loop for store-record in (filter-function-records #'store-record-p) 42 | for name = (store-record-name store-record) 43 | collect `(define-vref ,name :store))))) 44 | (define-vrefs)) 45 | -------------------------------------------------------------------------------- /code/define-reffers.lisp: -------------------------------------------------------------------------------- 1 | (in-package #:sb-simd-internals) 2 | 3 | ;;; In this file, we define reffers for scalars that can be defined in 4 | ;;; terms of built-in Common Lisp functions. 5 | (macrolet 6 | ((define-reffers () 7 | `(progn 8 | ,@(loop for record in (filter-function-records 9 | (lambda (function-record) 10 | (eq (symbol-package 11 | (parse-function-name 12 | (function-record-name function-record))) 13 | (find-package "SB-SIMD")))) 14 | for name = (function-record-name record) 15 | for value-record = (first (function-record-result-records record)) 16 | for type = (value-record-name value-record) 17 | when (aref-record-p record) collect 18 | `(progn 19 | (defun ,name (array &rest subscripts) 20 | (declare (type (array ,type) array)) 21 | (apply #'aref array subscripts)) 22 | (define-compiler-macro ,name (array &rest subscripts) 23 | `(aref (the (array ,',type) ,array) ,@subscripts))) 24 | when (setf-aref-record-p record) collect 25 | `(progn 26 | (defun ,name (value array &rest subscripts) 27 | (declare (type ,type value) 28 | (type (array ,type) array)) 29 | (setf (apply #'aref array subscripts) value)) 30 | (define-compiler-macro ,name (value array &rest subscripts) 31 | (let ((v (gensym "VALUE"))) 32 | `(let ((,v ,value)) 33 | (setf (aref (the (array ,',type) ,array) ,@subscripts) 34 | ,v))))) 35 | when (row-major-aref-record-p record) collect 36 | `(define-inline ,name (array index) 37 | (declare (type (array ,type) array)) 38 | (row-major-aref array index)) 39 | when (setf-row-major-aref-record-p record) collect 40 | `(define-inline ,name (value array index) 41 | (declare (type ,type value) 42 | (type (array ,type) array)) 43 | (setf (row-major-aref array index) value)))))) 44 | (define-reffers)) 45 | -------------------------------------------------------------------------------- /code/instruction-sets/fma.lisp: -------------------------------------------------------------------------------- 1 | (in-package #:sb-simd-fma) 2 | 3 | (define-instruction-set :fma 4 | (:test (fma-supported-p)) 5 | (:include :avx2) 6 | (:instructions 7 | ;; f32 8 | (f32-fmadd #:vfmadd213ss (f32) (f32 f32 f32) :cost 1 :encoding :fma) 9 | (f32-fnmadd #:vfnmadd213ss (f32) (f32 f32 f32) :cost 1 :encoding :fma) 10 | (f32-fmsub #:vfmsub213ss (f32) (f32 f32 f32) :cost 1 :encoding :fma) 11 | ;; f64 12 | (f64-fmadd #:vfmadd213sd (f64) (f64 f64 f64) :cost 1 :encoding :fma) 13 | (f64-fnmadd #:vfnmadd213sd (f64) (f64 f64 f64) :cost 1 :encoding :fma) 14 | (f64-fmsub #:vfmsub213sd (f64) (f64 f64 f64) :cost 1 :encoding :fma) 15 | ;; f32.4 16 | (f32.4-fmadd #:vfmadd213ps (f32.4) (f32.4 f32.4 f32.4) :cost 1 :encoding :fma) 17 | (f32.4-fnmadd #:vfnmadd213ps (f32.4) (f32.4 f32.4 f32.4) :cost 1 :encoding :fma) 18 | (f32.4-fmsub #:vfmsub213ps (f32.4) (f32.4 f32.4 f32.4) :cost 1 :encoding :fma) 19 | (f32.4-fmaddsub #:vfmaddsub213ps (f32.4) (f32.4 f32.4 f32.4) :cost 1 :encoding :fma) 20 | (f32.4-fmsubadd #:vfmsubadd213ps (f32.4) (f32.4 f32.4 f32.4) :cost 1 :encoding :fma) 21 | ;; f32.8 22 | (f32.8-fmadd #:vfmadd213ps (f32.8) (f32.8 f32.8 f32.8) :cost 1 :encoding :fma) 23 | (f32.8-fnmadd #:vfnmadd213ps (f32.8) (f32.8 f32.8 f32.8) :cost 1 :encoding :fma) 24 | (f32.8-fmsub #:vfmsub213ps (f32.8) (f32.8 f32.8 f32.8) :cost 1 :encoding :fma) 25 | (f32.8-fmaddsub #:vfmaddsub213ps (f32.8) (f32.8 f32.8 f32.8) :cost 1 :encoding :fma) 26 | (f32.8-fmsubadd #:vfmsubadd213ps (f32.8) (f32.8 f32.8 f32.8) :cost 1 :encoding :fma) 27 | ;; f64.2 28 | (f64.2-fmadd #:vfmadd213pd (f64.2) (f64.2 f64.2 f64.2) :cost 1 :encoding :fma) 29 | (f64.2-fnmadd #:vfnmadd213pd (f64.2) (f64.2 f64.2 f64.2) :cost 1 :encoding :fma) 30 | (f64.2-fmsub #:vfmsub213pd (f64.2) (f64.2 f64.2 f64.2) :cost 1 :encoding :fma) 31 | (f64.2-fmaddsub #:vfmaddsub213pd (f64.2) (f64.2 f64.2 f64.2) :cost 1 :encoding :fma) 32 | (f64.2-fmsubadd #:vfmsubadd213pd (f64.2) (f64.2 f64.2 f64.2) :cost 1 :encoding :fma) 33 | ;; f64.4 34 | (f64.4-fmadd #:vfmadd213pd (f64.4) (f64.4 f64.4 f64.4) :cost 1 :encoding :fma) 35 | (f64.4-fnmadd #:vfnmadd213pd (f64.4) (f64.4 f64.4 f64.4) :cost 1 :encoding :fma) 36 | (f64.4-fmsub #:vfmsub213pd (f64.4) (f64.4 f64.4 f64.4) :cost 1 :encoding :fma) 37 | (f64.4-fmaddsub #:vfmaddsub213pd (f64.4) (f64.4 f64.4 f64.4) :cost 1 :encoding :fma) 38 | (f64.4-fmsubadd #:vfmsubadd213pd (f64.4) (f64.4 f64.4 f64.4) :cost 1 :encoding :fma))) 39 | -------------------------------------------------------------------------------- /code/define-unequals.lisp: -------------------------------------------------------------------------------- 1 | (in-package #:sb-simd-internals) 2 | 3 | (macrolet 4 | ((define-unequal (unequal-record-name) 5 | (with-accessors ((name unequal-record-name) 6 | (neq unequal-record-neq) 7 | (and unequal-record-and) 8 | (truth unequal-record-truth)) 9 | (find-function-record unequal-record-name) 10 | (with-accessors ((neq instruction-record-name) 11 | (result-records instruction-record-result-records) 12 | (argument-records instruction-record-argument-records)) neq 13 | (destructuring-bind ((result-record) (argument-record other-argument-record)) 14 | (list result-records argument-records) 15 | (assert (eq argument-record other-argument-record)) 16 | (let ((and (function-record-name and)) 17 | (result-type (value-record-name result-record)) 18 | (argument-type (value-record-name argument-record))) 19 | `(progn 20 | (defun ,name (arg &rest more-args) 21 | (let ((args (list* (,argument-type arg) (mapcar #',argument-type more-args))) 22 | (result (,result-type ,truth))) 23 | (declare (,result-type result)) 24 | (loop for (a . rest) on args do 25 | (loop for b in rest do 26 | (setf result (,and result (,neq a b))))) 27 | result)) 28 | (define-compiler-macro ,name (arg &rest more-args) 29 | (if (null more-args) 30 | `(progn (,',argument-type ,arg) (,',result-type ,',truth)) 31 | (let ((bindings 32 | (loop for arg in (list* arg more-args) 33 | collect 34 | (list (gensym "ARG") (list ',argument-type arg))))) 35 | `(let ,bindings 36 | (,',and 37 | ,@(loop for ((a nil) . rest) on bindings 38 | append 39 | (loop for (b nil) in rest 40 | collect `(,',neq ,a ,b)))))))))))))) 41 | (define-unequals () 42 | `(progn 43 | ,@(loop for unequal-record in (filter-function-records #'unequal-record-p) 44 | collect 45 | `(define-unequal ,(unequal-record-name unequal-record)))))) 46 | (define-unequals)) 47 | -------------------------------------------------------------------------------- /code/define-comparisons.lisp: -------------------------------------------------------------------------------- 1 | (in-package #:sb-simd-internals) 2 | 3 | (macrolet 4 | ((define-comparison (comparision-record-name) 5 | (with-accessors ((name comparison-record-name) 6 | (cmp comparison-record-cmp) 7 | (and comparison-record-and) 8 | (truth comparison-record-truth)) 9 | (find-function-record comparision-record-name) 10 | (with-accessors ((cmp instruction-record-name) 11 | (result-records instruction-record-result-records) 12 | (argument-records instruction-record-argument-records)) cmp 13 | (destructuring-bind ((result-record) (argument-record other-argument-record)) 14 | (list result-records argument-records) 15 | (assert (eq argument-record other-argument-record)) 16 | (let ((and (function-record-name and)) 17 | (result-type (value-record-name result-record)) 18 | (argument-type (value-record-name argument-record))) 19 | `(progn 20 | (defun ,name (arg &rest more-args) 21 | (if (null more-args) 22 | (progn (,argument-type arg) (,result-type ,truth)) 23 | (let* ((a (,argument-type arg)) 24 | (b (,argument-type (first more-args))) 25 | (result (,cmp a b))) 26 | (declare (,argument-type a b) 27 | (,result-type result)) 28 | (loop for elt in (rest more-args) 29 | do (shiftf a b (,argument-type elt)) 30 | do (setf result (,and result (,cmp a b)))) 31 | result))) 32 | (define-compiler-macro ,name (arg &rest more-args) 33 | (if (null more-args) 34 | `(progn (,',argument-type ,arg) (,',result-type ,',truth)) 35 | (let ((bindings 36 | (loop for arg in (list* arg more-args) 37 | collect 38 | (list (gensym "ARG") (list ',argument-type arg))))) 39 | `(let ,bindings 40 | (,',and ,@(loop for ((a nil) (b nil) . rest) on bindings 41 | collect `(,',cmp ,a ,b) 42 | until (null rest))))))))))))) 43 | (define-comparisons () 44 | `(progn 45 | ,@(loop for comparison-record in (filter-function-records #'comparison-record-p) 46 | collect `(define-comparison ,(comparison-record-name comparison-record)))))) 47 | (define-comparisons)) 48 | -------------------------------------------------------------------------------- /code/define-associatives.lisp: -------------------------------------------------------------------------------- 1 | (in-package #:sb-simd-internals) 2 | 3 | (macrolet 4 | ((define-associative (associative-record-name) 5 | (with-accessors ((name associative-record-name) 6 | (binary-operation associative-record-binary-operation) 7 | (identity-element associative-record-identity-element)) 8 | (find-function-record associative-record-name) 9 | (with-accessors ((binary-operation instruction-record-name) 10 | (result-records instruction-record-result-records) 11 | (argument-records instruction-record-argument-records) 12 | (associative instruction-record-associative)) binary-operation 13 | (assert associative) 14 | (destructuring-bind ((value-record) (arg1-record arg2-record)) 15 | (list result-records argument-records) 16 | (assert (eq value-record arg1-record)) 17 | (assert (eq value-record arg2-record)) 18 | (let ((type (value-record-name value-record))) 19 | (if (not identity-element) 20 | `(progn 21 | (defun ,name (arg &rest more-args) 22 | (let ((result (,type arg))) 23 | (declare (,type result)) 24 | (loop for arg in more-args 25 | do (setf result (,binary-operation result (,type arg)))) 26 | result)) 27 | (define-compiler-macro ,name (&whole whole &rest args) 28 | (let ((n (length args))) 29 | (case n 30 | (0 whole) 31 | (1 `(,',type ,(first args))) 32 | (otherwise 33 | `(,',binary-operation 34 | (,',name ,@(subseq args 0 (floor n 2))) 35 | (,',name ,@(subseq args (floor n 2))))))))) 36 | `(progn 37 | (defun ,name (&rest args) 38 | (if (null args) 39 | (,type ,identity-element) 40 | (let ((result (,type (first args)))) 41 | (declare (,type result)) 42 | (loop for arg in (rest args) 43 | do (setf result (,binary-operation result (,type arg)))) 44 | result))) 45 | (define-compiler-macro ,name (&rest args) 46 | (let ((n (length args))) 47 | (case n 48 | (0 `(,',type ,,identity-element)) 49 | (1 `(,',type ,(first args))) 50 | (otherwise 51 | `(,',binary-operation 52 | (,',name ,@(subseq args 0 (floor n 2))) 53 | (,',name ,@(subseq args (floor n 2))))))))))))))) 54 | (define-associatives () 55 | `(progn 56 | ,@(loop for associative-record in (filter-function-records #'associative-record-p) 57 | collect `(define-associative ,(associative-record-name associative-record)))))) 58 | (define-associatives)) 59 | -------------------------------------------------------------------------------- /code/utilities.lisp: -------------------------------------------------------------------------------- 1 | (in-package #:sb-simd-internals) 2 | 3 | ;;; Types 4 | 5 | (deftype type-specifier () 6 | '(or symbol cons)) 7 | 8 | (deftype non-nil-symbol () 9 | '(and symbol (not null))) 10 | 11 | (deftype function-name () 12 | '(or non-nil-symbol (cons (eql setf) (cons non-nil-symbol null)))) 13 | 14 | (deftype index () 15 | '(integer 16 | (#.(- (1- array-total-size-limit))) 17 | (#.(1- array-total-size-limit)))) 18 | 19 | ;;; Functions 20 | 21 | (defun index+ (&rest indices) 22 | (the index (apply #'+ indices))) 23 | 24 | (define-compiler-macro index+ (&rest indices) 25 | `(the index (+ ,@(loop for index in indices collect `(the index ,index))))) 26 | 27 | (defun index- (index &rest more-indices) 28 | (the index (apply #'- index more-indices))) 29 | 30 | (define-compiler-macro index- (index &rest more-indices) 31 | `(the index (- (the index ,index) ,@(loop for index in more-indices collect `(the index ,index))))) 32 | 33 | (defun index* (&rest indices) 34 | (the index (apply #'* indices))) 35 | 36 | (define-compiler-macro index* (&rest indices) 37 | `(the index (* ,@(loop for index in indices collect `(the index ,index))))) 38 | 39 | (defun ensure-package (name) 40 | (or (find-package name) 41 | (make-package name))) 42 | 43 | (defun mksym (package &rest string-designators) 44 | (intern 45 | (apply #'concatenate 'string (mapcar #'string string-designators)) 46 | package)) 47 | 48 | (defun prefixed-symbols (prefix n &optional (package *package*)) 49 | (loop for index below n 50 | collect 51 | (mksym package prefix (format nil "~D" index)))) 52 | 53 | (declaim (notinline touch)) 54 | (defun touch (&rest arguments &aux value) 55 | (declare (ignore arguments)) 56 | (declare (special value)) 57 | (values-list (loop repeat (random 100) collect value))) 58 | 59 | (defun required-argument (initarg) 60 | (error "Required argument: ~S" initarg)) 61 | 62 | (defun macroexpand-all (form &optional env) 63 | (let ((sb-walker::*walk-form-expand-macros-p* t)) 64 | (sb-walker:walk-form form env))) 65 | 66 | (defun ensure-list (x) 67 | (if (listp x) 68 | x 69 | (list x))) 70 | 71 | (defun lambda-expression-p (x) 72 | (and (listp x) 73 | (> (list-length x) 1) 74 | (eq (first x) 'lambda) 75 | (listp (second x)))) 76 | 77 | (defun parse-function-name (function-name) 78 | (typecase function-name 79 | (non-nil-symbol 80 | (values function-name nil)) 81 | ((cons (eql setf) (cons non-nil-symbol null)) 82 | (values (second function-name) t)) 83 | (otherwise 84 | (error "Not a valid function name: ~S" function-name)))) 85 | 86 | ;;; Macros 87 | 88 | (defmacro define-inline (name lambda-list &body body) 89 | `(progn 90 | (declaim (inline ,name)) 91 | (defun ,name ,lambda-list ,@body))) 92 | 93 | (defmacro define-notinline (name lambda-list &body body) 94 | `(progn 95 | (declaim (notinline ,name)) 96 | (defun ,name ,lambda-list ,@body))) 97 | 98 | (defun integer-type-specifier-inclusive-bounds (type-specifier) 99 | (assert (= 2 (length type-specifier))) 100 | (let ((bits (the (integer 1) (second type-specifier)))) 101 | (ecase (first type-specifier) 102 | (unsigned-byte 103 | (values 0 (1- (expt 2 bits)))) 104 | (signed-byte 105 | (values (- (expt 2 (1- bits))) 106 | (1- (expt 2 (1- bits)))))))) 107 | -------------------------------------------------------------------------------- /code/define-vop-functions.lisp: -------------------------------------------------------------------------------- 1 | (in-package #:sb-simd-internals) 2 | 3 | ;;; For constant folding, SBCL needs functions of the same name as the VOP. 4 | ;;; In this file, we define these functions. Because some VOPs cannot 5 | ;;; translate the full range of arguments supported by such a function, 6 | ;;; e.g., because one argument is expected to be a constant, we also need 7 | ;;; some macrology to have each function dispatch only to calls that can be 8 | ;;; translated. 9 | 10 | (defmacro with-primitive-arguments (alist &body body) 11 | ;; Each entry in ALIST is of the form (VARIABLE VALUE-RECORD-NAME). 12 | (if (null alist) 13 | `(progn ,@body) 14 | `(with-primitive-argument ,(first alist) 15 | (with-primitive-arguments ,(rest alist) 16 | ,@body)))) 17 | 18 | (defmacro with-primitive-argument ((symbol value-record) &body body) 19 | (with-accessors ((primitive-type value-record-primitive-type) 20 | (simd-p simd-record-p)) 21 | (find-value-record value-record) 22 | (etypecase primitive-type 23 | ;; Case 1: A symbol denoting a primitive type. 24 | (symbol 25 | (let ((alias (find primitive-type sb-c::*backend-primitive-type-aliases* :key #'car))) 26 | ;; ALIAS is either null or a list of the form (:OR PRIMITIVE-TYPE*) 27 | (if (or (not simd-p) (not alias)) 28 | `(progn ,@body) 29 | `(etypecase ,symbol 30 | ,@(loop for pt in (rest (rest alias)) 31 | for type = (sb-c::primitive-type-specifier (sb-c:primitive-type-or-lose pt)) 32 | collect `(,type ,@body)))))) 33 | ;; Case 2: A list of the form (:CONSTANT TYPE), where TYPE is either 34 | ;; of the form (SIGNED-BYTE N) or (UNSIGNED-BYTE N) for some positive 35 | ;; integer N. 36 | ((cons (eql :constant) (cons type-specifier null)) 37 | (multiple-value-bind (low high) 38 | (integer-type-specifier-inclusive-bounds (second primitive-type)) 39 | `(ecase ,symbol 40 | ,@(loop for value from low to high 41 | collect `(,value (symbol-macrolet ((,symbol ,value)) ,@body))))))))) 42 | 43 | (macrolet 44 | ((define-vop-function (name) 45 | (with-accessors ((name instruction-record-name) 46 | (vop instruction-record-vop) 47 | (argument-records instruction-record-argument-records) 48 | (encoding instruction-record-encoding) 49 | (instruction-set instruction-record-instruction-set)) 50 | (find-function-record name) 51 | (let* ((argument-record-names (mapcar #'record-name argument-records)) 52 | (argument-symbols (prefixed-symbols "ARGUMENT-" (length argument-records)))) 53 | (unless (or (eq encoding :fake-vop) 54 | (not (instruction-set-available-p instruction-set))) 55 | `(defun ,vop (,@argument-symbols) 56 | (declare 57 | ,@(loop for argument-symbol in argument-symbols 58 | for argument-record in argument-records 59 | collect `(type ,(value-record-name argument-record) ,argument-symbol))) 60 | (with-primitive-arguments ,(mapcar #'list argument-symbols argument-record-names) 61 | (,vop ,@argument-symbols))))))) 62 | 63 | (define-vop-functions () 64 | `(progn 65 | ,@(loop for instruction-record in (filter-function-records #'instruction-record-p) 66 | for name = (instruction-record-name instruction-record) 67 | collect `(define-vop-function ,name))))) 68 | (define-vop-functions)) 69 | -------------------------------------------------------------------------------- /code/define-scalar-casts.lisp: -------------------------------------------------------------------------------- 1 | (in-package #:sb-simd-internals) 2 | 3 | ;;; For each value record we define a function of the same name that will 4 | ;;; either suitably convert its argument to that value record's type, or 5 | ;;; signal an error. 6 | 7 | (macrolet 8 | ((define-scalar-cast (scalar-cast-record-name) 9 | (with-accessors ((name scalar-cast-record-name)) 10 | (find-function-record scalar-cast-record-name) 11 | (let ((err (mksym (symbol-package name) "CANNOT-CONVERT-TO-" name))) 12 | `(progn 13 | (define-notinline ,err (x) 14 | (error "Cannot convert ~S to ~S." x ',name)) 15 | (sb-c:defknown ,name (t) (values ,name &optional) 16 | (sb-c:foldable) 17 | :overwrite-fndb-silently t) 18 | (sb-c:deftransform ,name ((x) (,name) *) 19 | 'x) 20 | ,@(case name 21 | (sb-simd:f32 22 | `((sb-c:deftransform ,name ((x) (double-float) *) 23 | '(coerce x 'single-float)))) 24 | (sb-simd:f64 25 | `((sb-c:deftransform ,name ((x) (single-float) *) 26 | '(coerce x 'double-float)))) 27 | (sb-simd-sse:f32 28 | `((sb-c:deftransform ,name ((x) (double-float) *) 29 | '(sb-kernel:%single-float x)) 30 | (sb-c:deftransform ,name ((x) ((signed-byte 64)) *) 31 | '(sb-simd-sse::f32-from-s64 x)))) 32 | (sb-simd-sse2:f64 33 | `((sb-c:deftransform ,name ((x) (single-float) *) 34 | '(sb-simd-sse2::f64-from-f32 x)) 35 | (sb-c:deftransform ,name ((x) ((signed-byte 64)) *) 36 | '(sb-simd-sse2::f64-from-s64 x)))) 37 | (sb-simd-avx:f32 38 | `((sb-c:deftransform ,name ((x) (double-float) *) 39 | '(sb-simd-avx::f32-from-f64 x)) 40 | (sb-c:deftransform ,name ((x) ((signed-byte 64)) *) 41 | '(sb-simd-avx::f32-from-s64 x)))) 42 | (sb-simd-avx:f64 43 | `((sb-c:deftransform ,name ((x) (single-float) *) 44 | '(sb-simd-avx::f64-from-f32 x)) 45 | (sb-c:deftransform ,name ((x) ((signed-byte 64)) *) 46 | '(sb-simd-avx::f64-from-s64 x))))) 47 | (defun ,name (x) 48 | (typecase x 49 | (,name x) 50 | ,@(case name 51 | (sb-simd:f32 52 | `((double-float (coerce x 'single-float)) 53 | (real (coerce x ',name)))) 54 | (sb-simd:f64 55 | `((sb-simd-sse2:f32 (coerce x 'double-float)) 56 | (real (coerce x ',name)))) 57 | (sb-simd-sse:f32 58 | `((double-float (sb-kernel:%single-float x)) 59 | (sb-simd-sse:s64 (sb-simd-sse::%f32-from-s64 x)) 60 | (real (coerce x ',name)))) 61 | (sb-simd-sse2:f64 62 | `((sb-simd-sse2:f32 (sb-simd-sse2::%f64-from-f32 x)) 63 | (sb-simd-sse2:s64 (sb-simd-sse2::%f64-from-s64 x)) 64 | (real (coerce x ',name)))) 65 | (sb-simd-avx:f32 66 | `((sb-simd-avx:f64 (sb-simd-avx::%f32-from-f64 x)) 67 | (sb-simd-avx:s64 (sb-simd-avx::%f32-from-s64 x)) 68 | (real (coerce x ',name)))) 69 | (sb-simd-avx:f64 70 | `((sb-simd-avx:f32 (sb-simd-avx::%f64-from-f32 x)) 71 | (sb-simd-avx:s64 (sb-simd-avx::%f64-from-s64 x)) 72 | (real (coerce x ',name))))) 73 | (otherwise (,err x)))))))) 74 | (define-scalar-casts () 75 | `(progn 76 | ,@(loop for scalar-cast-record in (filter-function-records #'scalar-cast-record-p) 77 | collect `(define-scalar-cast ,(function-record-name scalar-cast-record)))))) 78 | (define-scalar-casts)) 79 | -------------------------------------------------------------------------------- /test-suite/test-horizontal-functions.lisp: -------------------------------------------------------------------------------- 1 | (in-package #:sb-simd-test-suite) 2 | 3 | (defmacro define-horizontal-test (horizontal-fn scalar-fn) 4 | (let* ((horizontal-record (find-function-record horizontal-fn)) 5 | (scalar-record (find-function-record scalar-fn)) 6 | (argument-record (first (function-record-required-argument-records horizontal-record))) 7 | (result-record (function-record-result-record horizontal-record)) 8 | (width (value-record-simd-width argument-record)) 9 | (args (loop repeat width collect (gensym))) 10 | (pack (third (simd-info (value-record-name argument-record))))) 11 | (assert (eq (function-record-result-record scalar-record) result-record)) 12 | `(define-test ,horizontal-fn 13 | (let ((generator (find-generator ',(value-record-name result-record)))) 14 | (loop repeat 99 do 15 | (let ,(loop for arg in args collect `(,arg (funcall generator))) 16 | (handler-case 17 | (assert (bitwise= (,horizontal-fn (,pack ,@args)) 18 | (,scalar-fn ,@args))) 19 | (floating-point-overflow ()) 20 | (floating-point-overflow ())))))))) 21 | 22 | (in-package #:sb-simd-sse) 23 | 24 | (sb-simd-test-suite:define-horizontal-test f32.4-horizontal-and f32-and) 25 | (sb-simd-test-suite:define-horizontal-test f32.4-horizontal-or f32-or) 26 | (sb-simd-test-suite:define-horizontal-test f32.4-horizontal-xor f32-xor) 27 | (sb-simd-test-suite:define-horizontal-test f32.4-horizontal-max f32-max) 28 | (sb-simd-test-suite:define-horizontal-test f32.4-horizontal-min f32-min) 29 | (sb-simd-test-suite:define-horizontal-test f32.4-horizontal+ f32+) 30 | (sb-simd-test-suite:define-horizontal-test f32.4-horizontal* f32*) 31 | 32 | (in-package #:sb-simd-sse2) 33 | 34 | (sb-simd-test-suite:define-horizontal-test f64.2-horizontal-and f64-and) 35 | (sb-simd-test-suite:define-horizontal-test f64.2-horizontal-or f64-or) 36 | (sb-simd-test-suite:define-horizontal-test f64.2-horizontal-xor f64-xor) 37 | (sb-simd-test-suite:define-horizontal-test f64.2-horizontal-max f64-max) 38 | (sb-simd-test-suite:define-horizontal-test f64.2-horizontal-min f64-min) 39 | (sb-simd-test-suite:define-horizontal-test f64.2-horizontal+ f64+) 40 | (sb-simd-test-suite:define-horizontal-test f64.2-horizontal* f64*) 41 | 42 | (in-package #:sb-simd-avx) 43 | 44 | (sb-simd-test-suite:define-horizontal-test f32.4-horizontal-and f32-and) 45 | (sb-simd-test-suite:define-horizontal-test f32.4-horizontal-or f32-or) 46 | (sb-simd-test-suite:define-horizontal-test f32.4-horizontal-xor f32-xor) 47 | (sb-simd-test-suite:define-horizontal-test f32.4-horizontal-max f32-max) 48 | (sb-simd-test-suite:define-horizontal-test f32.4-horizontal-min f32-min) 49 | (sb-simd-test-suite:define-horizontal-test f32.4-horizontal+ f32+) 50 | (sb-simd-test-suite:define-horizontal-test f32.4-horizontal* f32*) 51 | 52 | (sb-simd-test-suite:define-horizontal-test f64.2-horizontal-and f64-and) 53 | (sb-simd-test-suite:define-horizontal-test f64.2-horizontal-or f64-or) 54 | (sb-simd-test-suite:define-horizontal-test f64.2-horizontal-xor f64-xor) 55 | (sb-simd-test-suite:define-horizontal-test f64.2-horizontal-max f64-max) 56 | (sb-simd-test-suite:define-horizontal-test f64.2-horizontal-min f64-min) 57 | (sb-simd-test-suite:define-horizontal-test f64.2-horizontal+ f64+) 58 | (sb-simd-test-suite:define-horizontal-test f64.2-horizontal* f64*) 59 | 60 | (sb-simd-test-suite:define-horizontal-test f32.8-horizontal-and f32-and) 61 | (sb-simd-test-suite:define-horizontal-test f32.8-horizontal-or f32-or) 62 | (sb-simd-test-suite:define-horizontal-test f32.8-horizontal-xor f32-xor) 63 | (sb-simd-test-suite:define-horizontal-test f32.8-horizontal-max f32-max) 64 | (sb-simd-test-suite:define-horizontal-test f32.8-horizontal-min f32-min) 65 | (sb-simd-test-suite:define-horizontal-test f32.8-horizontal+ f32+) 66 | (sb-simd-test-suite:define-horizontal-test f32.8-horizontal* f32*) 67 | 68 | (sb-simd-test-suite:define-horizontal-test f64.4-horizontal-and f64-and) 69 | (sb-simd-test-suite:define-horizontal-test f64.4-horizontal-or f64-or) 70 | (sb-simd-test-suite:define-horizontal-test f64.4-horizontal-xor f64-xor) 71 | (sb-simd-test-suite:define-horizontal-test f64.4-horizontal-max f64-max) 72 | (sb-simd-test-suite:define-horizontal-test f64.4-horizontal-min f64-min) 73 | (sb-simd-test-suite:define-horizontal-test f64.4-horizontal+ f64+) 74 | (sb-simd-test-suite:define-horizontal-test f64.4-horizontal* f64*) 75 | 76 | (in-package #:sb-simd-avx2) 77 | -------------------------------------------------------------------------------- /code/instruction-set-case.lisp: -------------------------------------------------------------------------------- 1 | (in-package #:sb-simd-internals) 2 | 3 | ;;; Lisp supports an image-based workflow, so it is entirely possible that 4 | ;;; a program is transferred from one machine to another one that supports 5 | ;;; different instructions. For example, a video game developer might 6 | ;;; create a game on a machine supporting x86 instructions up to AVX2, 7 | ;;; create an image-based executable, and ship it to a customer whose 8 | ;;; machine supports only x86 instructions up to SSE4.1. 9 | ;;; 10 | ;;; This file defines a macro for conditionally executing code based on the 11 | ;;; available instructions. Special care was taken to make this 12 | ;;; conditional selection as efficient as possible. On x86, it should 13 | ;;; compile to a jump table whose index is computed only at load time and 14 | ;;; whenever the image is restarted. 15 | 16 | ;;; For each instruction-set-case macro appearing in source code, we 17 | ;;; register an IDISPATCH structure that tracks the available clauses, and 18 | ;;; the index of the currently active clause. Whenever the Lisp image is 19 | ;;; restarted, we traverse all IDISPATCH structures and recompute the index 20 | ;;; of the currently active clause. 21 | (defparameter *idispatch-table* 22 | (make-hash-table :weakness :key :synchronized t)) 23 | 24 | (defstruct (idispatch 25 | (:constructor %make-idispatch) 26 | (:predicate idispatchp)) 27 | ;; The clauses supplied to the instruction-set-case macro. 28 | (clauses nil :type list :read-only t) 29 | ;; A list of lists of instruction sets - one for each clause. 30 | (isets nil :type list :read-only t) 31 | ;; The index of the currently active clause. 32 | (index 0 :type (unsigned-byte 8))) 33 | 34 | (defun make-idispatch (isets clauses) 35 | (let ((idispatch (%make-idispatch 36 | :clauses clauses 37 | :isets isets))) 38 | (update-idispatch-index idispatch) 39 | (setf (gethash idispatch *idispatch-table*) t) 40 | idispatch)) 41 | 42 | (defun update-idispatch-index (idispatch) 43 | (loop for index from 0 44 | for instruction-sets in (idispatch-isets idispatch) 45 | when (every #'instruction-set-available-p instruction-sets) do 46 | (setf (idispatch-index idispatch) index) 47 | (return-from update-idispatch-index) 48 | finally 49 | (warn "None of the idispatch clauses in ~@ 50 | ~{~S~%~}is available on this machine." 51 | (idispatch-clauses idispatch)) 52 | (setf (idispatch-index idispatch) 53 | (length (idispatch-isets idispatch))))) 54 | 55 | (defun update-idispatch-indices () 56 | (loop for idispatch being the hash-keys of *idispatch-table* do 57 | (update-idispatch-index idispatch))) 58 | 59 | (pushnew 'update-idispatch-indices sb-ext:*init-hooks*) 60 | 61 | (defmacro instruction-set-case (&body clauses) 62 | "Execute the first clause whose instruction sets are available at run 63 | time, or signal an error if no clause could be run. 64 | 65 | Each clause has to start with an instruction set name, or a list of 66 | instruction set names, followed by a list of statements. 67 | 68 | Example: 69 | 70 | (instruction-set-case 71 | (:sse2 (foo)) 72 | (:avx (bar)) 73 | ((:avx2 :fma) (baz))) 74 | " 75 | (sb-int:with-unique-names (idispatch) 76 | (multiple-value-bind (isets bodies) 77 | (parse-instruction-set-case-clauses clauses) 78 | `(let ((,idispatch (load-time-value (make-idispatch ',isets ',clauses)))) 79 | (declare (idispatch ,idispatch)) 80 | (case (idispatch-index ,idispatch) 81 | ,@(loop for iset in isets 82 | for body in bodies 83 | for index from 0 84 | collect `(,index ,@body)) 85 | (,(length clauses) 86 | (idispatch-no-applicable-clause ,idispatch))))))) 87 | 88 | (defun parse-instruction-set-case-clauses (clauses) 89 | (flet ((clause-iset (clause) 90 | (unless (consp clause) 91 | (error "Not a valid instruction-set-case clause: ~S" 92 | clause)) 93 | (let ((head (first clause))) 94 | (if (listp head) 95 | (mapcar #'find-instruction-set head) 96 | (list (find-instruction-set head))))) 97 | (clause-body (clause) 98 | (rest clause))) 99 | (values 100 | (mapcar #'clause-iset clauses) 101 | (mapcar #'clause-body clauses)))) 102 | 103 | (defun idispatch-no-applicable-clause (idispatch) 104 | (error "None of the idispatch clauses in ~@ 105 | ~{~S~%~}is available on this machine." 106 | (idispatch-clauses idispatch))) 107 | -------------------------------------------------------------------------------- /code/define-simd-casts.lisp: -------------------------------------------------------------------------------- 1 | (in-package #:sb-simd-internals) 2 | 3 | ;;; For each SIMD data type X.Y, define two functions: 4 | ;;; 5 | ;;; 1. A function named X.Y that ensures that an object is either of the 6 | ;;; type X.Y, or a scalar that can be broadcast to the type X.Y. 7 | ;;; 8 | ;;; 2. A function named X.Y! that reinterprets the bits of another SIMD 9 | ;;; pack or suitable scalar as an X.Y. If the supplied argument has 10 | ;;; more bits than the target data type, the excess bits are discarded. 11 | ;;; If the supplied argument has less bits than the target data types, 12 | ;;; the remaining bits are set to zero. 13 | 14 | ;;; The pXXX SIMD types are special - we define their 'cast' function 15 | ;;; manually. 16 | (define-inline sb-simd-sse:p128 (x) (the sb-simd-sse:p128 x)) 17 | (define-inline sb-simd-avx:p128 (x) (the sb-simd-avx:p128 x)) 18 | (define-inline sb-simd-avx:p256 (x) (the sb-simd-avx:p256 x)) 19 | 20 | (macrolet 21 | (;; We cannot call known functions directly in the definition of a 22 | ;; cast, only their VOPs. The reason is that each known function 23 | ;; definition uses casts to automatically upgrade its arguments, and 24 | ;; we don't want to end up with a circular dependency. 25 | (call-vop (instruction-record-name &rest arguments) 26 | (with-accessors ((instruction-set instruction-record-instruction-set) 27 | (vop instruction-record-vop)) 28 | (find-function-record instruction-record-name) 29 | (if (instruction-set-available-p instruction-set) 30 | `(,vop ,@arguments) 31 | `(progn 32 | (missing-instruction 33 | (load-time-value 34 | (find-function-record ',instruction-record-name))) 35 | (touch ,@arguments))))) 36 | (define-simd-cast (simd-cast-record-name) 37 | (with-accessors ((name simd-cast-record-name) 38 | (instruction-set simd-cast-record-instruction-set) 39 | (broadcast-record simd-cast-record-broadcast)) 40 | (find-function-record simd-cast-record-name) 41 | (let* ((broadcast (function-record-name broadcast-record)) 42 | (simd-record (function-record-result-record broadcast-record)) 43 | (real-record (simd-record-scalar-record simd-record)) 44 | (simd-type (value-record-name simd-record)) 45 | (real-type (value-record-name real-record)) 46 | (package (instruction-set-package instruction-set)) 47 | (err (mksym package "CANNOT-CONVERT-TO-" name))) 48 | `(progn 49 | (define-notinline ,err (x) 50 | (error "Cannot convert ~S to ~S." x ',name)) 51 | (sb-c:defknown ,name (t) (values ,name &optional) 52 | (sb-c:foldable) 53 | :overwrite-fndb-silently t) 54 | (sb-c:deftransform ,name ((x) (,simd-type) *) 55 | 'x) 56 | (sb-c:deftransform ,name ((x) (real) *) 57 | '(,broadcast (,real-type x))) 58 | (defun ,name (x) 59 | (typecase x 60 | (,simd-type x) 61 | (real (call-vop ,broadcast (,real-type x))) 62 | (otherwise (,err x)))))))) 63 | (define-reinterpret-cast (reinterpret-cast-record) 64 | (with-accessors ((name reinterpret-cast-record-name) 65 | (instruction-set reinterpret-cast-record-instruction-set) 66 | (reinterpreters reinterpret-cast-record-reinterpreters)) 67 | (find-function-record reinterpret-cast-record) 68 | (let* ((package (instruction-set-package instruction-set)) 69 | (err (mksym package "CANNOT-REINTERPRET-AS-" name))) 70 | `(progn 71 | (define-notinline ,err (x) 72 | (error "Cannot reinterpret ~S as ~S." x ',name)) 73 | (defun ,name (x) 74 | (typecase x 75 | ,@(loop for reinterpreter in reinterpreters 76 | for argument-record = (first (function-record-required-argument-records reinterpreter)) 77 | collect 78 | `(,(value-record-name argument-record) 79 | (call-vop ,(function-record-name reinterpreter) x))) 80 | (otherwise (,err x)))))))) 81 | (define-simd-casts () 82 | `(progn 83 | ,@(loop for simd-cast-record in (filter-function-records #'simd-cast-record-p) 84 | collect 85 | `(define-simd-cast ,(simd-cast-record-name simd-cast-record))))) 86 | (define-reinterpret-casts () 87 | `(progn 88 | ,@(loop for reinterpret-cast-record in (filter-function-records #'reinterpret-cast-record-p) 89 | collect 90 | `(define-reinterpret-cast ,(reinterpret-cast-record-name reinterpret-cast-record)))))) 91 | (define-simd-casts) 92 | (define-reinterpret-casts)) 93 | -------------------------------------------------------------------------------- /code/define-vref-vops.lisp: -------------------------------------------------------------------------------- 1 | (in-package #:sb-vm) 2 | 3 | ;;; Both load- and store VOPs are augmented with an auxiliary last argument 4 | ;;; that is a constant addend for the address calculation. This addend is 5 | ;;; zero by default, but we can sometimes transform the code for the index 6 | ;;; calculation such that we have a nonzero addend. We also generate two 7 | ;;; variants of the VOP - one for the general case, and one for the case 8 | ;;; where the index is a compile-time constant. 9 | 10 | (macrolet 11 | ((define-vref-vop (vref-record-name) 12 | (with-accessors ((name sb-simd-internals:vref-record-name) 13 | (vop sb-simd-internals:vref-record-vop) 14 | (vop-c sb-simd-internals:vref-record-vop-c) 15 | (mnemonic sb-simd-internals:vref-record-mnemonic) 16 | (value-record sb-simd-internals:vref-record-value-record) 17 | (vector-record sb-simd-internals:vref-record-vector-record) 18 | (store sb-simd-internals:store-record-p)) 19 | (sb-simd-internals:find-function-record vref-record-name) 20 | (let* ((vector-type (sb-simd-internals:value-record-type vector-record)) 21 | (vector-primitive-type (sb-simd-internals:value-record-primitive-type vector-record)) 22 | (value-scs (sb-simd-internals:value-record-scs value-record)) 23 | (value-type (sb-simd-internals:value-record-type value-record)) 24 | (value-primitive-type (sb-simd-internals:value-record-primitive-type value-record)) 25 | (scalar-record 26 | (etypecase value-record 27 | (sb-simd-internals:simd-record (sb-simd-internals:simd-record-scalar-record value-record)) 28 | (sb-simd-internals:value-record value-record))) 29 | (bits-per-element (sb-simd-internals:value-record-bits scalar-record)) 30 | (bytes-per-element (ceiling bits-per-element 8)) 31 | (displacement 32 | (multiple-value-bind (lo hi) 33 | (displacement-bounds other-pointer-lowtag bits-per-element vector-data-offset) 34 | `(integer ,lo ,hi)))) 35 | (multiple-value-bind (index-scs scale) 36 | (if (>= bytes-per-element (ash 1 n-fixnum-tag-bits)) 37 | (values '(any-reg signed-reg unsigned-reg) `(index-scale ,bytes-per-element index)) 38 | (values '(signed-reg unsigned-reg) bytes-per-element)) 39 | `(progn 40 | (defknown ,vop (,@(when store `(,value-type)) ,vector-type index ,displacement) 41 | (values ,value-type &optional) 42 | (always-translatable) 43 | :overwrite-fndb-silently t) 44 | (define-vop (,vop) 45 | (:translate ,vop) 46 | (:policy :fast-safe) 47 | (:args 48 | ,@(when store `((value :scs ,value-scs :target result))) 49 | (vector :scs (descriptor-reg)) 50 | (index :scs ,index-scs)) 51 | (:info addend) 52 | (:arg-types 53 | ,@(when store `(,value-primitive-type)) 54 | ,vector-primitive-type 55 | positive-fixnum 56 | (:constant ,displacement)) 57 | (:results (result :scs ,value-scs)) 58 | (:result-types ,value-primitive-type) 59 | (:generator 60 | 2 61 | ,@(let ((ea `(ea (+ (* vector-data-offset n-word-bytes) 62 | (* addend ,bytes-per-element) 63 | (- other-pointer-lowtag)) 64 | vector index ,scale))) 65 | (if store 66 | `((inst ,mnemonic ,ea value) 67 | (move result value)) 68 | `((inst ,mnemonic result ,ea)))))) 69 | (define-vop (,vop-c) 70 | (:translate ,vop) 71 | (:policy :fast-safe) 72 | (:args ,@(when store `((value :scs ,value-scs :target result))) 73 | (vector :scs (descriptor-reg))) 74 | (:info index addend) 75 | (:arg-types ,@(when store `(,value-primitive-type)) 76 | ,vector-primitive-type 77 | (:constant low-index) 78 | (:constant ,displacement)) 79 | (:results (result :scs ,value-scs)) 80 | (:result-types ,value-primitive-type) 81 | (:generator 82 | 1 83 | ,@(let ((ea `(ea (+ (* vector-data-offset n-word-bytes) 84 | (* ,bytes-per-element (+ index addend)) 85 | (- other-pointer-lowtag)) 86 | vector))) 87 | (if store 88 | `((inst ,mnemonic ,ea value) 89 | (move result value)) 90 | `((inst ,mnemonic result ,ea))))))))))) 91 | (define-vref-vops () 92 | `(progn 93 | ,@(loop for vref-record 94 | in (sb-simd-internals:filter-available-function-records 95 | #'sb-simd-internals:vref-record-p) 96 | collect `(define-vref-vop ,(sb-simd-internals:vref-record-name vref-record)))))) 97 | (define-vref-vops)) 98 | -------------------------------------------------------------------------------- /test-suite/test-arefs.lisp: -------------------------------------------------------------------------------- 1 | (in-package #:sb-simd-test-suite) 2 | 3 | (defmacro define-aref-test (aref element-type simd-width &optional (unpacker 'identity)) 4 | (let ((value-symbols (prefixed-symbols "V" simd-width)) 5 | (zero (coerce 0 element-type)) 6 | (one (coerce 1 element-type))) 7 | `(define-test ,aref 8 | ;; Create an array of zeros and successively replace zeros with 9 | ;; ones. After each replacement, check whether a load still 10 | ;; produces the expected result. 11 | (let ((array (make-array '(,simd-width) 12 | :element-type ',element-type 13 | :initial-element ,zero))) 14 | (multiple-value-bind ,value-symbols (,unpacker (,aref array 0)) 15 | ,@(loop for value-symbol in value-symbols 16 | collect `(is (= ,value-symbol ,zero)))) 17 | (loop for index below ,simd-width do 18 | (setf (aref array index) ,one) 19 | (loop for number in (multiple-value-list (,unpacker (,aref array 0))) 20 | for position from 0 21 | do (if (<= position index) 22 | (is (= number ,one)) 23 | (is (= number ,zero)))))) 24 | ;; Create an array with twice as many elements as the width of the 25 | ;; SIMD data type, and whose lower half consists of all zeros and 26 | ;; whose upper half consists of all ones. Check that all valid 27 | ;; loads from this array have the expected state. 28 | (let ((array (make-array '(,(* 2 simd-width)) 29 | :element-type ',element-type 30 | :initial-contents 31 | (append (make-list ,simd-width :initial-element ,zero) 32 | (make-list ,simd-width :initial-element ,one))))) 33 | (loop for index below ,simd-width do 34 | (multiple-value-bind ,value-symbols 35 | (,unpacker (,aref array index)) 36 | ,@(loop for value-symbol in value-symbols 37 | for position from 0 38 | collect `(if (< (+ ,position index) ,simd-width) 39 | (is (= ,value-symbol ,zero)) 40 | (is (= ,value-symbol ,one)))))))))) 41 | 42 | (in-package #:sb-simd) 43 | 44 | (sb-simd-test-suite:define-aref-test u8-aref (unsigned-byte 8) 1) 45 | (sb-simd-test-suite:define-aref-test u16-aref (unsigned-byte 16) 1) 46 | (sb-simd-test-suite:define-aref-test u32-aref (unsigned-byte 32) 1) 47 | (sb-simd-test-suite:define-aref-test u64-aref (unsigned-byte 64) 1) 48 | (sb-simd-test-suite:define-aref-test s8-aref (signed-byte 8) 1) 49 | (sb-simd-test-suite:define-aref-test s16-aref (signed-byte 16) 1) 50 | (sb-simd-test-suite:define-aref-test s32-aref (signed-byte 32) 1) 51 | (sb-simd-test-suite:define-aref-test s64-aref (signed-byte 64) 1) 52 | (sb-simd-test-suite:define-aref-test f32-aref single-float 1) 53 | (sb-simd-test-suite:define-aref-test f64-aref double-float 1) 54 | 55 | (in-package #:sb-simd-sse) 56 | 57 | (sb-simd-test-suite:define-aref-test f32.4-aref single-float 4 f32.4-values) 58 | 59 | (in-package #:sb-simd-sse2) 60 | 61 | (sb-simd-test-suite:define-aref-test f64.2-aref double-float 2 f64.2-values) 62 | (sb-simd-test-suite:define-aref-test u8.16-aref (unsigned-byte 8) 16 u8.16-values) 63 | (sb-simd-test-suite:define-aref-test u16.8-aref (unsigned-byte 16) 8 u16.8-values) 64 | (sb-simd-test-suite:define-aref-test u32.4-aref (unsigned-byte 32) 4 u32.4-values) 65 | (sb-simd-test-suite:define-aref-test u64.2-aref (unsigned-byte 64) 2 u64.2-values) 66 | (sb-simd-test-suite:define-aref-test s8.16-aref (signed-byte 8) 16 s8.16-values) 67 | (sb-simd-test-suite:define-aref-test s16.8-aref (signed-byte 16) 8 s16.8-values) 68 | (sb-simd-test-suite:define-aref-test s32.4-aref (signed-byte 32) 4 s32.4-values) 69 | (sb-simd-test-suite:define-aref-test s64.2-aref (signed-byte 64) 2 s64.2-values) 70 | 71 | (in-package #:sb-simd-avx) 72 | 73 | (sb-simd-test-suite:define-aref-test f32.4-aref single-float 4 f32.4-values) 74 | (sb-simd-test-suite:define-aref-test f32.8-aref single-float 8 f32.8-values) 75 | (sb-simd-test-suite:define-aref-test f64.2-aref double-float 2 f64.2-values) 76 | (sb-simd-test-suite:define-aref-test f64.4-aref double-float 4 f64.4-values) 77 | (sb-simd-test-suite:define-aref-test u8.16-aref (unsigned-byte 8) 16 u8.16-values) 78 | (sb-simd-test-suite:define-aref-test u16.8-aref (unsigned-byte 16) 8 u16.8-values) 79 | (sb-simd-test-suite:define-aref-test u32.4-aref (unsigned-byte 32) 4 u32.4-values) 80 | (sb-simd-test-suite:define-aref-test u64.2-aref (unsigned-byte 64) 2 u64.2-values) 81 | (sb-simd-test-suite:define-aref-test s8.16-aref (signed-byte 8) 16 s8.16-values) 82 | (sb-simd-test-suite:define-aref-test s16.8-aref (signed-byte 16) 8 s16.8-values) 83 | (sb-simd-test-suite:define-aref-test s32.4-aref (signed-byte 32) 4 s32.4-values) 84 | (sb-simd-test-suite:define-aref-test s64.2-aref (signed-byte 64) 2 s64.2-values) 85 | (sb-simd-test-suite:define-aref-test u8.32-aref (unsigned-byte 8) 32 u8.32-values) 86 | (sb-simd-test-suite:define-aref-test u16.16-aref (unsigned-byte 16) 16 u16.16-values) 87 | (sb-simd-test-suite:define-aref-test u32.8-aref (unsigned-byte 32) 8 u32.8-values) 88 | (sb-simd-test-suite:define-aref-test u64.4-aref (unsigned-byte 64) 4 u64.4-values) 89 | (sb-simd-test-suite:define-aref-test s8.32-aref (signed-byte 8) 32 s8.32-values) 90 | (sb-simd-test-suite:define-aref-test s16.16-aref (signed-byte 16) 16 s16.16-values) 91 | (sb-simd-test-suite:define-aref-test s32.8-aref (signed-byte 32) 8 s32.8-values) 92 | (sb-simd-test-suite:define-aref-test s64.4-aref (signed-byte 64) 4 s64.4-values) 93 | -------------------------------------------------------------------------------- /test-suite/test-suite.lisp: -------------------------------------------------------------------------------- 1 | (in-package #:sb-simd-test-suite) 2 | 3 | ;; A list of the names of all tests defined with the TEST macro. 4 | (defparameter *tests* '()) 5 | 6 | ;; The value of *RANDOM-STATE* when the test suite encountered the most 7 | ;; recent error, or NIL, if there have been no recent errors. 8 | (defvar *failed-random-state* nil) 9 | 10 | ;; The number of tests that have been run so far. 11 | (defvar *test-count*) 12 | 13 | ;; The number of successful checks that have been performed so far. 14 | (defvar *pass-count*) 15 | 16 | ;; The number of checks that have been performed in the current test. 17 | (defvar *check-count* 0) 18 | 19 | (defun call-with-random-state (thunk) 20 | (let ((*random-state* 21 | (if (not *failed-random-state*) 22 | (load-time-value (make-random-state t)) 23 | *failed-random-state*))) 24 | (setf *failed-random-state* (make-random-state *random-state*)) 25 | (multiple-value-prog1 (funcall thunk) 26 | (setf *failed-random-state* nil)))) 27 | 28 | (defmacro with-random-state (&body body) 29 | `(call-with-random-state (lambda () ,@body))) 30 | 31 | (defun call-with-test-harness (thunk) 32 | (if (and (boundp '*test-count*) 33 | (boundp '*pass-count*)) 34 | (funcall thunk) 35 | (let* ((*test-count* 0) 36 | (*pass-count* 0)) 37 | (with-random-state (funcall thunk)) 38 | (report *test-count* *pass-count*)))) 39 | 40 | (defmacro with-test-harness (&body body) 41 | `(call-with-test-harness (lambda () ,@body))) 42 | 43 | (defun report (test-count pass-count) 44 | (format t "~&Success: ~s test~:p, ~s check~:p.~%" test-count pass-count) 45 | (finish-output)) 46 | 47 | ;;; Ensure that DEFINE-TEST never overwrites function names in other 48 | ;;; packages. 49 | (defun intern-test-name (test-name) 50 | (let ((test-suite-package #.*package*) 51 | (name (symbol-name test-name)) 52 | (package (symbol-package test-name))) 53 | (cond ((eq package test-suite-package) 54 | test-name) 55 | ((eq (nth-value 1 (find-symbol name package)) :external) 56 | (intern (format nil "~A:~A" (package-name package) name) test-suite-package)) 57 | (t 58 | (intern (format nil "~A:~A" (package-name package) name) test-suite-package))))) 59 | 60 | (defmacro define-test (test-name &body body) 61 | "Define a test function and add it to *TESTS*." 62 | (let ((name (intern-test-name test-name))) 63 | `(prog1 ',name 64 | (defun ,name () 65 | (declare (optimize (debug 3) (safety 3))) 66 | (with-test-harness 67 | (enter-test ',name) 68 | ,@body)) 69 | (pushnew ',name *tests*)))) 70 | 71 | (defun enter-test (test-name) 72 | (incf *test-count*) 73 | (setf *check-count* 0) 74 | (format t "~&~A" (string test-name)) 75 | (finish-output)) 76 | 77 | (defun pass () 78 | (incf *pass-count*) 79 | (when (zerop (logand *check-count* (incf *check-count*))) 80 | (write-char #\.)) 81 | (values)) 82 | 83 | (defmacro is (test-form) 84 | "Assert that TEST-FORM evaluates to true." 85 | `(progn 86 | (assert ,test-form) 87 | (pass))) 88 | 89 | (defun expect-condition (expected thunk) 90 | (block nil 91 | (flet ((handler (condition) 92 | (cond ((typep condition expected) 93 | (pass) 94 | (return)) 95 | (t (error "Expected to signal ~s, but got ~s:~%~a" 96 | expected (type-of condition) condition))))) 97 | (handler-bind ((condition #'handler)) 98 | (funcall thunk))) 99 | (error "Expected to signal ~s, but got nothing." expected))) 100 | 101 | (defmacro signals (condition &body body) 102 | "Assert that `body' signals a condition of type `condition'." 103 | `(expect-condition ',condition (lambda () ,@body))) 104 | 105 | (defun all-tests () 106 | *tests*) 107 | 108 | ;;; This function catches particularly hideous bugs - exported symbols with 109 | ;;; no attached definition. In case some exported symbols are 110 | ;;; intentionally without definition, they can be excluded via the SKIP 111 | ;;; argument. 112 | (defun check-package (package &key skip) 113 | (let ((skip (loop for name in skip collect (intern (string name) package)))) 114 | (do-external-symbols (symbol package) 115 | (unless (member symbol skip) 116 | (unless (symbol-defined-p symbol) 117 | (error "~@" 119 | (package-name package) 120 | (symbol-name symbol)))))) 121 | (pass)) 122 | 123 | (defun symbol-defined-p (symbol) 124 | (or (find-class symbol nil) 125 | (boundp symbol) 126 | (fboundp symbol) 127 | (fboundp `(setf ,symbol)) 128 | (macro-function symbol) 129 | (special-operator-p symbol) 130 | (type-specifier-p symbol))) 131 | 132 | (defun type-specifier-p (object) 133 | (ignore-errors (typep 42 object) t)) 134 | 135 | (defun run-tests (&rest tests) 136 | (with-test-harness (mapc #'funcall (shuffle tests))) 137 | (values)) 138 | 139 | (defun run-test-suite () 140 | (format t "== Testing SB-SIMD ==~%") 141 | (format t "The library exports ~D functions." 142 | (length (all-exported-functions))) 143 | (apply #'run-tests (all-tests))) 144 | 145 | (defun all-instruction-sets () 146 | (remove-duplicates 147 | (loop for iset being the hash-values of sb-simd-internals:*instruction-sets* 148 | collect iset))) 149 | 150 | (defun all-exported-functions () 151 | (remove-duplicates 152 | (loop for iset in (all-instruction-sets) 153 | for package = (instruction-set-package iset) 154 | append 155 | (loop for symbol being the external-symbols of package 156 | when (fboundp symbol) 157 | collect symbol)))) 158 | -------------------------------------------------------------------------------- /code/instruction-sets/sse.lisp: -------------------------------------------------------------------------------- 1 | (in-package #:sb-simd-sse) 2 | 3 | (define-instruction-set :sse 4 | (:test (sse-supported-p)) 5 | (:include :x86-64) 6 | (:scalars 7 | (f32 32 single-float #:single-float (#:single-reg))) 8 | (:simd-packs 9 | (p128 nil 128 #:simd-pack (#:int-sse-reg #:double-sse-reg #:single-sse-reg)) 10 | (f32.4 f32 128 #:simd-pack-single (#:single-sse-reg))) 11 | (:simd-casts 12 | (f32.4 f32.4-broadcast)) 13 | (:reinterpret-casts 14 | (f32! f32!-from-p128) 15 | (f32.4! f32.4!-from-f32)) 16 | (:instructions 17 | ;; f32 18 | (f32-from-s64 nil (f32) (s64) :cost 5 :encoding :custom) 19 | (f32!-from-p128 nil (f32) (p128) :cost 1 :encoding :custom :always-translatable nil) 20 | (two-arg-f32-and #:andps (f32) (f32 f32) :cost 1 :encoding :sse :associative t) 21 | (two-arg-f32-or #:orps (f32) (f32 f32) :cost 1 :encoding :sse :associative t) 22 | (two-arg-f32-xor #:xorps (f32) (f32 f32) :cost 1 :encoding :sse :associative t) 23 | (two-arg-f32-max #:maxss (f32) (f32 f32) :cost 1 :encoding :sse :associative t) 24 | (two-arg-f32-min #:minss (f32) (f32 f32) :cost 1 :encoding :sse :associative t) 25 | (two-arg-f32+ #:addss (f32) (f32 f32) :cost 1 :encoding :sse :associative t) 26 | (two-arg-f32- #:subss (f32) (f32 f32) :cost 2 :encoding :sse) 27 | (two-arg-f32* #:mulss (f32) (f32 f32) :cost 2 :encoding :sse :associative t) 28 | (two-arg-f32/ #:divss (f32) (f32 f32) :cost 8 :encoding :sse) 29 | (two-arg-f32= #:cmpss (u32) (f32 f32) :cost 4 :encoding :custom :prefix '(:eq) :associative t) 30 | (two-arg-f32/= #:cmpss (u32) (f32 f32) :cost 4 :encoding :custom :prefix '(:neq) :associative t) 31 | (two-arg-f32< #:cmpss (u32) (f32 f32) :cost 4 :encoding :custom :prefix '(:lt)) 32 | (two-arg-f32<= #:cmpss (u32) (f32 f32) :cost 4 :encoding :custom :prefix '(:le)) 33 | (two-arg-f32> #:cmpss (u32) (f32 f32) :cost 4 :encoding :custom :prefix '(:nle)) 34 | (two-arg-f32>= #:cmpss (u32) (f32 f32) :cost 4 :encoding :custom :prefix '(:nlt)) 35 | (f32-andc1 #:andnps (f32) (f32 f32) :cost 1 :encoding :sse) 36 | (f32-not nil (f32) (f32) :cost 1 :encoding :fake-vop) 37 | (f32-reciprocal #:rcpss (f32) (f32) :cost 5) 38 | (f32-rsqrt #:rsqrtss (f32) (f32) :cost 5) 39 | (f32-sqrt #:sqrtss (f32) (f32) :cost 15) 40 | ;; f32.4 41 | (f32.4!-from-f32 #:movups (f32.4) (f32) :cost 1 :encoding :move) 42 | (make-f32.4 nil (f32.4) (f32 f32 f32 f32) :cost 1 :encoding :fake-vop) 43 | (f32.4-values nil (f32 f32 f32 f32) (f32.4) :cost 1 :encoding :fake-vop) 44 | (f32.4-broadcast nil (f32.4) (f32) :cost 1 :encoding :fake-vop) 45 | (two-arg-f32.4-and #:andps (f32.4) (f32.4 f32.4) :cost 1 :encoding :sse :associative t) 46 | (two-arg-f32.4-or #:orps (f32.4) (f32.4 f32.4) :cost 1 :encoding :sse :associative t) 47 | (two-arg-f32.4-xor #:xorps (f32.4) (f32.4 f32.4) :cost 1 :encoding :sse :associative t) 48 | (two-arg-f32.4-max #:maxps (f32.4) (f32.4 f32.4) :cost 3 :encoding :sse :associative t) 49 | (two-arg-f32.4-min #:minps (f32.4) (f32.4 f32.4) :cost 3 :encoding :sse :associative t) 50 | (two-arg-f32.4+ #:addps (f32.4) (f32.4 f32.4) :cost 2 :encoding :sse :associative t) 51 | (two-arg-f32.4- #:subps (f32.4) (f32.4 f32.4) :cost 2 :encoding :sse) 52 | (two-arg-f32.4* #:mulps (f32.4) (f32.4 f32.4) :cost 2 :encoding :sse :associative t) 53 | (two-arg-f32.4/ #:divps (f32.4) (f32.4 f32.4) :cost 8 :encoding :sse) 54 | (f32.4-horizontal-and nil (f32) (f32.4) :cost 5 :encoding :fake-vop) 55 | (f32.4-horizontal-or nil (f32) (f32.4) :cost 5 :encoding :fake-vop) 56 | (f32.4-horizontal-xor nil (f32) (f32.4) :cost 5 :encoding :fake-vop) 57 | (f32.4-horizontal-max nil (f32) (f32.4) :cost 5 :encoding :fake-vop) 58 | (f32.4-horizontal-min nil (f32) (f32.4) :cost 5 :encoding :fake-vop) 59 | (f32.4-horizontal+ nil (f32) (f32.4) :cost 5 :encoding :fake-vop) 60 | (f32.4-horizontal* nil (f32) (f32.4) :cost 5 :encoding :fake-vop) 61 | (f32.4-andc1 #:andnps (f32.4) (f32.4 f32.4) :cost 1 :encoding :sse) 62 | (f32.4-not nil (f32.4) (f32.4) :cost 1 :encoding :fake-vop) 63 | (f32.4-reciprocal #:rcpps (f32.4) (f32.4) :cost 5) 64 | (f32.4-rsqrt #:rsqrtps (f32.4) (f32.4) :cost 5) 65 | (f32.4-sqrt #:sqrtps (f32.4) (f32.4) :cost 15) 66 | (f32.4-shuffle #:shufps (f32.4) (f32.4 f32.4 sb-simd-x86-64::imm8) :cost 1 :encoding :sse) 67 | (f32.4-unpacklo #:unpcklps (f32.4) (f32.4 f32.4) :cost 1 :encoding :sse) 68 | (f32.4-unpackhi #:unpckhps (f32.4) (f32.4 f32.4) :cost 1 :encoding :sse) 69 | (f32.4-movemask #:movmskps (u4) (f32.4) :cost 1)) 70 | (:loads 71 | (f32-load #:movss f32 f32vec f32-array f32-aref f32-row-major-aref) 72 | (f32.4-load #:movups f32.4 f32vec f32-array f32.4-aref f32.4-row-major-aref)) 73 | (:stores 74 | (f32-store #:movss f32 f32vec f32-array f32-aref f32-row-major-aref) 75 | (f32.4-store #:movups f32.4 f32vec f32-array f32.4-aref f32.4-row-major-aref) 76 | (f32.4-ntstore #:movntps f32.4 f32vec f32-array f32.4-non-temporal-aref f32.4-non-temporal-row-major-aref)) 77 | (:associatives 78 | (f32-and two-arg-f32-and +f32-true+) 79 | (f32-or two-arg-f32-or +f32-false+) 80 | (f32-xor two-arg-f32-xor +f32-false+) 81 | (f32-max two-arg-f32-max nil) 82 | (f32-min two-arg-f32-min nil) 83 | (f32+ two-arg-f32+ 0f0) 84 | (f32* two-arg-f32* 1f0) 85 | (f32.4-and two-arg-f32.4-and +f32-true+) 86 | (f32.4-or two-arg-f32.4-or +f32-false+) 87 | (f32.4-xor two-arg-f32.4-xor +f32-false+) 88 | (f32.4-max two-arg-f32.4-max nil) 89 | (f32.4-min two-arg-f32.4-min nil) 90 | (f32.4+ two-arg-f32.4+ 0f0) 91 | (f32.4* two-arg-f32.4* 1f0)) 92 | (:comparisons 93 | (f32= two-arg-f32= u32-and +u32-true+) 94 | (f32< two-arg-f32< u32-and +u32-true+) 95 | (f32<= two-arg-f32<= u32-and +u32-true+) 96 | (f32> two-arg-f32> u32-and +u32-true+) 97 | (f32>= two-arg-f32>= u32-and +u32-true+)) 98 | (:reducers 99 | (f32- two-arg-f32- 0f0) 100 | (f32/ two-arg-f32/ 1f0) 101 | (f32.4- two-arg-f32.4- 0f0) 102 | (f32.4/ two-arg-f32.4/ 1f0)) 103 | (:unequals 104 | (f32/= two-arg-f32/= u32-and +u32-true+))) 105 | 106 | -------------------------------------------------------------------------------- /code/define-modify-macros.lisp: -------------------------------------------------------------------------------- 1 | (in-package #:sb-simd) 2 | 3 | (define-modify-macro f32-incf (&optional (num 1f0)) two-arg-f32+) 4 | (define-modify-macro f32-decf (&optional (num 1f0)) two-arg-f32-) 5 | 6 | (define-modify-macro f64-incf (&optional (num 1d0)) two-arg-f64+) 7 | (define-modify-macro f64-decf (&optional (num 1d0)) two-arg-f64-) 8 | 9 | (define-modify-macro u8-incf (&optional (num 1)) two-arg-u8+) 10 | (define-modify-macro u8-decf (&optional (num 1)) two-arg-u8-) 11 | 12 | (define-modify-macro u16-incf (&optional (num 1)) two-arg-u16+) 13 | (define-modify-macro u16-decf (&optional (num 1)) two-arg-u16-) 14 | 15 | (define-modify-macro u32-incf (&optional (num 1)) two-arg-u32+) 16 | (define-modify-macro u32-decf (&optional (num 1)) two-arg-u32-) 17 | 18 | (define-modify-macro u64-incf (&optional (num 1)) two-arg-u64+) 19 | (define-modify-macro u64-decf (&optional (num 1)) two-arg-u64-) 20 | 21 | (define-modify-macro s8-incf (&optional (num 1)) two-arg-s8+) 22 | (define-modify-macro s8-decf (&optional (num 1)) two-arg-s8-) 23 | 24 | (define-modify-macro s16-incf (&optional (num 1)) two-arg-s16+) 25 | (define-modify-macro s16-decf (&optional (num 1)) two-arg-s16-) 26 | 27 | (define-modify-macro s32-incf (&optional (num 1)) two-arg-s32+) 28 | (define-modify-macro s32-decf (&optional (num 1)) two-arg-s32-) 29 | 30 | (define-modify-macro s64-incf (&optional (num 1)) two-arg-s64+) 31 | (define-modify-macro s64-decf (&optional (num 1)) two-arg-s64-) 32 | 33 | (in-package #:sb-simd-sse) 34 | 35 | (define-modify-macro f32-incf (&optional (num 1f0)) two-arg-f32+) 36 | (define-modify-macro f32-decf (&optional (num 1f0)) two-arg-f32-) 37 | 38 | (define-modify-macro f32.4-incf (&optional (num 1f0)) two-arg-f32.4+) 39 | (define-modify-macro f32.4-decf (&optional (num 1f0)) two-arg-f32.4-) 40 | 41 | (in-package #:sb-simd-sse2) 42 | 43 | (define-modify-macro f64-incf (&optional (num 1d0)) two-arg-f64+) 44 | (define-modify-macro f64-decf (&optional (num 1d0)) two-arg-f64-) 45 | 46 | (define-modify-macro f64.2-incf (&optional (num 1d0)) two-arg-f64.2+) 47 | (define-modify-macro f64.2-decf (&optional (num 1d0)) two-arg-f64.2-) 48 | 49 | (define-modify-macro u8.16-incf (&optional (num 1)) two-arg-u8.16+) 50 | (define-modify-macro u8.16-decf (&optional (num 1)) two-arg-u8.16-) 51 | 52 | (define-modify-macro u16.8-incf (&optional (num 1)) two-arg-u16.8+) 53 | (define-modify-macro u16.8-decf (&optional (num 1)) two-arg-u16.8-) 54 | 55 | (define-modify-macro u32.4-incf (&optional (num 1)) two-arg-u32.4+) 56 | (define-modify-macro u32.4-decf (&optional (num 1)) two-arg-u32.4-) 57 | 58 | (define-modify-macro u64.2-incf (&optional (num 1)) two-arg-u64.2+) 59 | (define-modify-macro u64.2-decf (&optional (num 1)) two-arg-u64.2-) 60 | 61 | (define-modify-macro s8.16-incf (&optional (num 1)) two-arg-s8.16+) 62 | (define-modify-macro s8.16-decf (&optional (num 1)) two-arg-s8.16-) 63 | 64 | (define-modify-macro s16.8-incf (&optional (num 1)) two-arg-s16.8+) 65 | (define-modify-macro s16.8-decf (&optional (num 1)) two-arg-s16.8-) 66 | 67 | (define-modify-macro s32.4-incf (&optional (num 1)) two-arg-s32.4+) 68 | (define-modify-macro s32.4-decf (&optional (num 1)) two-arg-s32.4-) 69 | 70 | (define-modify-macro s64.2-incf (&optional (num 1)) two-arg-s64.2+) 71 | (define-modify-macro s64.2-decf (&optional (num 1)) two-arg-s64.2-) 72 | 73 | (in-package #:sb-simd-avx) 74 | 75 | (define-modify-macro f32-incf (&optional (num 1f0)) two-arg-f32+) 76 | (define-modify-macro f32-decf (&optional (num 1f0)) two-arg-f32-) 77 | 78 | (define-modify-macro f64-incf (&optional (num 1d0)) two-arg-f64+) 79 | (define-modify-macro f64-decf (&optional (num 1d0)) two-arg-f64-) 80 | 81 | (define-modify-macro f32.4-incf (&optional (num 1f0)) two-arg-f32.4+) 82 | (define-modify-macro f32.4-decf (&optional (num 1f0)) two-arg-f32.4-) 83 | 84 | (define-modify-macro f32.8-incf (&optional (num 1f0)) two-arg-f32.8+) 85 | (define-modify-macro f32.8-decf (&optional (num 1f0)) two-arg-f32.8-) 86 | 87 | (define-modify-macro f64.2-incf (&optional (num 1d0)) two-arg-f64.2+) 88 | (define-modify-macro f64.2-decf (&optional (num 1d0)) two-arg-f64.2-) 89 | 90 | (define-modify-macro f64.4-incf (&optional (num 1d0)) two-arg-f64.4+) 91 | (define-modify-macro f64.4-decf (&optional (num 1d0)) two-arg-f64.4-) 92 | 93 | (define-modify-macro u8.16-incf (&optional (num 1)) two-arg-u8.16+) 94 | (define-modify-macro u8.16-decf (&optional (num 1)) two-arg-u8.16-) 95 | 96 | (define-modify-macro u16.8-incf (&optional (num 1)) two-arg-u16.8+) 97 | (define-modify-macro u16.8-decf (&optional (num 1)) two-arg-u16.8-) 98 | 99 | (define-modify-macro u32.4-incf (&optional (num 1)) two-arg-u32.4+) 100 | (define-modify-macro u32.4-decf (&optional (num 1)) two-arg-u32.4-) 101 | 102 | (define-modify-macro u64.2-incf (&optional (num 1)) two-arg-u64.2+) 103 | (define-modify-macro u64.2-decf (&optional (num 1)) two-arg-u64.2-) 104 | 105 | (define-modify-macro s8.16-incf (&optional (num 1)) two-arg-s8.16+) 106 | (define-modify-macro s8.16-decf (&optional (num 1)) two-arg-s8.16-) 107 | 108 | (define-modify-macro s16.8-incf (&optional (num 1)) two-arg-s16.8+) 109 | (define-modify-macro s16.8-decf (&optional (num 1)) two-arg-s16.8-) 110 | 111 | (define-modify-macro s32.4-incf (&optional (num 1)) two-arg-s32.4+) 112 | (define-modify-macro s32.4-decf (&optional (num 1)) two-arg-s32.4-) 113 | 114 | (define-modify-macro s64.2-incf (&optional (num 1)) two-arg-s64.2+) 115 | (define-modify-macro s64.2-decf (&optional (num 1)) two-arg-s64.2-) 116 | 117 | (in-package #:sb-simd-avx2) 118 | 119 | (define-modify-macro u8.32-incf (&optional (num 1)) two-arg-u8.32+) 120 | (define-modify-macro u8.32-decf (&optional (num 1)) two-arg-u8.32-) 121 | 122 | (define-modify-macro u16.16-incf (&optional (num 1)) two-arg-u16.16+) 123 | (define-modify-macro u16.16-decf (&optional (num 1)) two-arg-u16.16-) 124 | 125 | (define-modify-macro u32.8-incf (&optional (num 1)) two-arg-u32.8+) 126 | (define-modify-macro u32.8-decf (&optional (num 1)) two-arg-u32.8-) 127 | 128 | (define-modify-macro u64.4-incf (&optional (num 1)) two-arg-u64.4+) 129 | (define-modify-macro u64.4-decf (&optional (num 1)) two-arg-u64.4-) 130 | 131 | (define-modify-macro s8.32-incf (&optional (num 1)) two-arg-s8.32+) 132 | (define-modify-macro s8.32-decf (&optional (num 1)) two-arg-s8.32-) 133 | 134 | (define-modify-macro s16.16-incf (&optional (num 1)) two-arg-s16.16+) 135 | (define-modify-macro s16.16-decf (&optional (num 1)) two-arg-s16.16-) 136 | 137 | (define-modify-macro s32.8-incf (&optional (num 1)) two-arg-s32.8+) 138 | (define-modify-macro s32.8-decf (&optional (num 1)) two-arg-s32.8-) 139 | 140 | (define-modify-macro s64.4-incf (&optional (num 1)) two-arg-s64.4+) 141 | (define-modify-macro s64.4-decf (&optional (num 1)) two-arg-s64.4-) 142 | 143 | -------------------------------------------------------------------------------- /code/instruction-set.lisp: -------------------------------------------------------------------------------- 1 | (in-package #:sb-simd-internals) 2 | 3 | ;;; An instruction set is a description for a set of data types and 4 | ;;; functions in a particular package. 5 | 6 | (defclass instruction-set (printable) 7 | (;; The instruction set's name. 8 | (%name 9 | :type keyword 10 | :initarg :name 11 | :initform (required-argument :name) 12 | :reader instruction-set-name) 13 | ;; The package that holds the instruction set's symbols. 14 | (%package 15 | :type package 16 | :initarg :package 17 | :initform (required-argument :package) 18 | :reader instruction-set-package) 19 | ;; A thunk, returning whether the instruction set is currently available. 20 | ;; Such a run time check is needed in the case where an executable is 21 | ;; created on one machine and run on another machine. In that case, some 22 | ;; of the instructions sets available on the former might not be available 23 | ;; on the latter. 24 | (%test 25 | :type function 26 | :initarg :test :initform (required-argument :test) 27 | :reader instruction-set-test) 28 | ;; A list of instruction sets included by this one. 29 | (%includes 30 | :type list 31 | :initarg :includes 32 | :initform '() 33 | :reader instruction-set-includes) 34 | ;; A hash table, mapping from function records of scalar functions to 35 | ;; lists of function records of SIMD functions. 36 | (%vectorizer-table 37 | :type hash-table 38 | :initform (make-hash-table :test #'eq) 39 | :reader instruction-set-vectorizer-table))) 40 | 41 | (defmethod printable-slot-plist append ((instruction-set instruction-set)) 42 | (list :name (instruction-set-name instruction-set) 43 | :package (instruction-set-package instruction-set))) 44 | 45 | (defun instruction-set-p (x) 46 | (typep x 'instruction-set)) 47 | 48 | (defun instruction-set-available-p (instruction-set) 49 | (funcall (instruction-set-test instruction-set))) 50 | 51 | ;;; Returns a list containing the name of the supplied instruction set, and 52 | ;;; the names of all instruction sets that are directly or indirectly 53 | ;;; included by it. 54 | (defun included-instruction-sets (instruction-set) 55 | (let ((result '())) 56 | (labels ((scan (instruction-set) 57 | (with-accessors ((includes instruction-set-includes)) instruction-set 58 | (unless (member instruction-set result) 59 | (push instruction-set result) 60 | (mapcar #'scan includes))))) 61 | (scan instruction-set) 62 | (nreverse result)))) 63 | 64 | (defun register-vectorizer (X-record X.Y-record) 65 | (assert (scalar-function-record-p X-record)) 66 | (assert (simd-function-record-p X.Y-record)) 67 | (with-accessors ((vectorizer-table instruction-set-vectorizer-table)) 68 | (function-record-instruction-set X.Y-record) 69 | (pushnew X.Y-record (gethash X-record vectorizer-table '())))) 70 | 71 | (defun instruction-set-vectorizers (instruction-set X-record) 72 | (loop for instruction-set in (included-instruction-sets instruction-set) 73 | append 74 | (gethash X-record (instruction-set-vectorizer-table instruction-set) '()))) 75 | 76 | ;;; A hash table, mapping from instruction set names or packages to 77 | ;;; instruction sets. 78 | (defparameter *instruction-sets* (make-hash-table :test #'eq)) 79 | 80 | (defun find-instruction-set (designator &optional (errorp t)) 81 | (or (gethash designator *instruction-sets*) 82 | (when errorp 83 | (typecase designator 84 | (symbol (error "There is no instruction set with the name ~S." designator)) 85 | (package (error "There is not instruction set with the package ~S" designator)) 86 | (otherwise (error "Not a valid instruction set designator: ~S" designator)))))) 87 | 88 | (defmethod make-load-form ((instruction-set instruction-set) &optional env) 89 | (declare (ignore env)) 90 | `(find-instruction-set ',(instruction-set-name instruction-set))) 91 | 92 | (defmethod shared-initialize :after 93 | ((instruction-set instruction-set) slot-names &key &allow-other-keys) 94 | (setf (gethash (instruction-set-name instruction-set) *instruction-sets*) 95 | instruction-set) 96 | (setf (gethash (instruction-set-package instruction-set) *instruction-sets*) 97 | instruction-set) 98 | instruction-set) 99 | 100 | ;;; The currently active instruction set. 101 | (defvar *instruction-set*) 102 | 103 | ;;; Defining Instruction Sets 104 | 105 | (defparameter *instruction-set-options* 106 | '(:include :test :scalars :simd-packs :simd-casts :reinterpret-casts 107 | :instructions :loads :stores :reffers 108 | :associatives :reducers :comparisons :unequals :ifs)) 109 | 110 | (defgeneric decode-record-definition (record-name expr)) 111 | 112 | (defmacro define-instruction-set (name &body options) 113 | ;; Ensure that only valid options are supplied. 114 | (dolist (option options) 115 | (unless (and (listp option) 116 | (member (first option) *instruction-set-options*)) 117 | (error "Not a valid instruction set option:~% ~S" option))) 118 | (flet ((decode (keyword decoder) 119 | (loop for (key . exprs) in options 120 | when (eq key keyword) 121 | append (mapcar decoder exprs))) 122 | (decode-include (expr) 123 | `(find-instruction-set ',expr)) 124 | (record-decoder (record-name) 125 | (lambda (x) 126 | (decode-record-definition record-name x)))) 127 | ;; The macro expansion of an instruction set is a very large expression 128 | ;; that is evaluated exactly once, so compiling it would be a waste of 129 | ;; resources. Instead, we use SBCL's built-in interpreter. 130 | `(let ((sb-ext:*evaluator-mode* :interpret)) 131 | (eval 132 | '(let ((*instruction-set* 133 | (make-instance 'instruction-set 134 | :name ',name 135 | :package ,(if (eq name :sb-simd) 136 | (find-package "SB-SIMD") 137 | (find-package (concatenate 'string "SB-SIMD-" (string name)))) 138 | :test (lambda () (and ,@(decode :test #'identity))) 139 | :includes (list ,@(decode :include #'decode-include))))) 140 | ,@(decode :scalars (record-decoder 'value-record)) 141 | ,@(decode :simd-packs (record-decoder 'simd-record)) 142 | ,@(decode :instructions (record-decoder 'instruction-record)) 143 | ,@(decode :loads (record-decoder 'load-record)) 144 | ,@(decode :stores (record-decoder 'store-record)) 145 | ,@(decode :reffers (record-decoder 'reffer-record)) 146 | ,@(decode :associatives (record-decoder 'associative-record)) 147 | ,@(decode :reducers (record-decoder 'reducer-record)) 148 | ,@(decode :comparisons (record-decoder 'comparison-record)) 149 | ,@(decode :unequals (record-decoder 'unequal-record)) 150 | ,@(decode :ifs (record-decoder 'if-record)) 151 | ,@(decode :simd-casts (record-decoder 'simd-cast-record)) 152 | ,@(decode :reinterpret-casts (record-decoder 'reinterpret-cast-record))))))) 153 | -------------------------------------------------------------------------------- /code/define-custom-vops.lisp: -------------------------------------------------------------------------------- 1 | (in-package #:sb-vm) 2 | 3 | (macrolet 4 | ((define-custom-vop (name &body clauses) 5 | (with-accessors ((name sb-simd-internals:instruction-record-name) 6 | (vop sb-simd-internals:instruction-record-vop) 7 | (argument-records sb-simd-internals:instruction-record-argument-records) 8 | (result-records sb-simd-internals:instruction-record-result-records) 9 | (cost sb-simd-internals:instruction-record-cost) 10 | (encoding sb-simd-internals:instruction-record-encoding)) 11 | (sb-simd-internals:find-function-record name) 12 | (assert (eq encoding :custom)) 13 | (labels ((find-clauses (key) 14 | (remove key clauses :test-not #'eq :key #'first)) 15 | (find-clause (key) 16 | (let ((found (find-clauses key))) 17 | (assert (= 1 (length found))) 18 | (rest (first found))))) 19 | `(sb-c:define-vop (,vop) 20 | (:translate ,vop) 21 | (:policy :fast-safe) 22 | (:arg-types ,@(mapcar #'sb-simd-internals:value-record-primitive-type argument-records)) 23 | (:result-types ,@(mapcar #'sb-simd-internals:value-record-primitive-type result-records)) 24 | (:args 25 | ,@(loop for arg in (find-clause :args) 26 | for argument-record in argument-records 27 | collect `(,@arg :scs ,(sb-simd-internals:value-record-scs argument-record)))) 28 | ,@(find-clauses :info) 29 | ,@(find-clauses :temporary) 30 | (:results 31 | ,@(loop for result in (find-clause :results) 32 | for result-record in result-records 33 | collect `(,@result :scs ,(sb-simd-internals:value-record-scs result-record)))) 34 | (:generator ,cost ,@(find-clause :generator))))))) 35 | ;; SSE 36 | (macrolet ((def (name cmp) 37 | `(define-custom-vop ,name 38 | (:args (a :target tmp) (b)) 39 | (:temporary (:sc single-reg :from (:argument 0)) tmp) 40 | (:results (dst)) 41 | (:generator 42 | (unless (location= a tmp) 43 | (inst xorps tmp tmp) 44 | (inst movss tmp a)) 45 | (inst cmpss ,cmp tmp b) 46 | (inst movq dst tmp))))) 47 | (def sb-simd-sse::two-arg-f32= :eq) 48 | (def sb-simd-sse::two-arg-f32/= :neq) 49 | (def sb-simd-sse::two-arg-f32< :lt) 50 | (def sb-simd-sse::two-arg-f32<= :le) 51 | (def sb-simd-sse::two-arg-f32> :nle) 52 | (def sb-simd-sse::two-arg-f32>= :nlt)) 53 | (define-custom-vop sb-simd-sse::f32-from-s64 54 | (:args (src)) 55 | (:results (dst)) 56 | (:generator 57 | (inst xorps dst dst) 58 | (inst cvtsi2ss dst src))) 59 | (define-custom-vop sb-simd-sse::f32!-from-p128 60 | (:args (src :target dst)) 61 | (:temporary (:sc single-sse-reg :from (:argument 0)) tmp) 62 | (:results (dst)) 63 | (:generator 64 | (move tmp src) 65 | (inst xorps dst dst) 66 | (inst movss dst tmp))) 67 | ;; SSE2 68 | (macrolet ((def (name cmp) 69 | `(define-custom-vop ,name 70 | (:args (a :target tmp) (b)) 71 | (:temporary (:sc single-reg :from (:argument 0)) tmp) 72 | (:results (dst)) 73 | (:generator 74 | (unless (location= a tmp) 75 | (inst xorpd tmp tmp) 76 | (inst movsd tmp a)) 77 | (inst cmpsd ,cmp tmp b) 78 | (inst movq dst tmp))))) 79 | (def sb-simd-sse2::two-arg-f64= :eq) 80 | (def sb-simd-sse2::two-arg-f64/= :neq) 81 | (def sb-simd-sse2::two-arg-f64< :lt) 82 | (def sb-simd-sse2::two-arg-f64<= :le) 83 | (def sb-simd-sse2::two-arg-f64> :nle) 84 | (def sb-simd-sse2::two-arg-f64>= :nlt)) 85 | (define-custom-vop sb-simd-sse2::f64-from-s64 86 | (:args (src)) 87 | (:results (dst)) 88 | (:generator 89 | (inst xorpd dst dst) 90 | (inst cvtsi2sd dst src))) 91 | (define-custom-vop sb-simd-sse2::f64!-from-p128 92 | (:args (src :target tmp)) 93 | (:temporary (:sc double-sse-reg :from (:argument 0)) tmp) 94 | (:results (dst)) 95 | (:generator 96 | (move tmp src) 97 | (inst xorpd dst dst) 98 | (inst movsd dst tmp))) 99 | ;; AVX 100 | (macrolet ((def (name cmp) 101 | `(define-custom-vop ,name 102 | (:args (a :target tmp) (b)) 103 | (:temporary (:sc single-reg :from (:argument 0)) tmp) 104 | (:results (dst)) 105 | (:generator 106 | (unless (location= a tmp) 107 | (inst vxorps tmp tmp tmp)) 108 | (inst vcmpss ,cmp tmp a b) 109 | (inst vmovq dst tmp))))) 110 | (def sb-simd-avx::two-arg-f32= :eq) 111 | (def sb-simd-avx::two-arg-f32/= :neq) 112 | (def sb-simd-avx::two-arg-f32< :lt) 113 | (def sb-simd-avx::two-arg-f32<= :le) 114 | (def sb-simd-avx::two-arg-f32> :nle) 115 | (def sb-simd-avx::two-arg-f32>= :nlt)) 116 | (macrolet ((def (name cmp) 117 | `(define-custom-vop ,name 118 | (:args (a :target tmp) (b)) 119 | (:temporary (:sc single-reg :from (:argument 0)) tmp) 120 | (:results (dst)) 121 | (:generator 122 | (unless (location= a tmp) 123 | (inst vxorpd tmp tmp tmp)) 124 | (inst vcmpsd ,cmp tmp a b) 125 | (inst vmovq dst tmp))))) 126 | (def sb-simd-avx::two-arg-f64= :eq) 127 | (def sb-simd-avx::two-arg-f64/= :neq) 128 | (def sb-simd-avx::two-arg-f64< :lt) 129 | (def sb-simd-avx::two-arg-f64<= :le) 130 | (def sb-simd-avx::two-arg-f64> :nle) 131 | (def sb-simd-avx::two-arg-f64>= :nlt)) 132 | (define-custom-vop sb-simd-avx::f32-from-s64 133 | (:args (src :to :save)) 134 | (:results (dst)) 135 | (:generator 136 | (inst vxorpd dst dst dst) 137 | (inst vcvtsi2ss dst dst src))) 138 | (define-custom-vop sb-simd-avx::f64-from-s64 139 | (:args (src :to :save)) 140 | (:results (dst)) 141 | (:generator 142 | (inst vxorpd dst dst dst) 143 | (inst vcvtsi2sd dst dst src))) 144 | (define-custom-vop sb-simd-avx::f32!-from-p128 145 | (:args (src :to :save)) 146 | (:results (dst)) 147 | (:generator 148 | (inst vxorps dst dst dst) 149 | (inst movss dst src))) 150 | (define-custom-vop sb-simd-avx::f32!-from-p256 151 | (:args (src :to :save)) 152 | (:results (dst)) 153 | (:generator 154 | (inst vxorps dst dst dst) 155 | (inst movss dst src))) 156 | (define-custom-vop sb-simd-avx::f64!-from-p128 157 | (:args (src :to :save)) 158 | (:results (dst)) 159 | (:generator 160 | (inst vxorpd dst dst dst) 161 | (inst movsd dst src))) 162 | (define-custom-vop sb-simd-avx::f64!-from-p256 163 | (:args (src :to :save)) 164 | (:results (dst)) 165 | (:generator 166 | (inst vxorpd dst dst dst) 167 | (inst movsd dst src)))) 168 | -------------------------------------------------------------------------------- /test-suite/utilities.lisp: -------------------------------------------------------------------------------- 1 | (in-package #:sb-simd-test-suite) 2 | 3 | (defun shuffle (list) 4 | (let ((result (copy-seq list))) 5 | (loop for tail on result 6 | for tail-length from (length result) downto 2 7 | do (rotatef (first tail) 8 | (nth (random tail-length) tail))) 9 | result)) 10 | 11 | (defun simd-info (name) 12 | "Returns, as list: 13 | 14 | 1. The element type of the SIMD pack. 15 | 16 | 2. The number of elements of the SIMD pack. 17 | 18 | 3. The name of the function for creating the SIMD pack from individual 19 | elements. 20 | 21 | 4. The name of the function for returning the elements of the SIMD pack as 22 | multiple values." 23 | (with-accessors ((scalar-record simd-record-scalar-record) 24 | (width value-record-simd-width)) 25 | (find-value-record name) 26 | (list 27 | (value-record-name scalar-record) 28 | width 29 | (or (find-symbol (format nil "MAKE-~A" (symbol-name name)) 30 | (symbol-package name)) 31 | (error "No constructor found for ~S." name)) 32 | (or (find-symbol (format nil "~A-VALUES" (symbol-name name)) 33 | (symbol-package name)) 34 | (error "No unpacker found for ~S." name))))) 35 | 36 | (defun simd= (a b) 37 | (typecase a 38 | (sb-ext:simd-pack 39 | (when (sb-ext:simd-pack-p b) 40 | (multiple-value-bind (a0 a1) (sb-ext:%simd-pack-ub64s a) 41 | (multiple-value-bind (b0 b1) (sb-ext:%simd-pack-ub64s b) 42 | (and (= a0 b0) (= a1 b1)))))) 43 | (sb-ext:simd-pack-256 44 | (when (sb-ext:simd-pack-256-p b) 45 | (multiple-value-bind (a0 a1 a2 a3) (sb-ext:%simd-pack-256-ub64s a) 46 | (multiple-value-bind (b0 b1 b2 b3) (sb-ext:%simd-pack-256-ub64s b) 47 | (and (= a0 b0) (= a1 b1) (= a2 b2) (= a3 b3)))))) 48 | (otherwise nil))) 49 | 50 | ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; 51 | ;;; 52 | ;;; Argument Type Specifications 53 | 54 | (defun parse-argtypes (argtypes) 55 | "Returns, as multiple values: 56 | 57 | 1. The list of mandatory argtypes. 58 | 59 | 2. The list of optional argtypes. 60 | 61 | 3. The argtype of the rest argument, or NIL if there is no &rest argtype." 62 | (labels ((fail () 63 | (error "Malformed argtypes list: ~S" argtypes)) 64 | (process-mandatory (argtypes mandatory) 65 | (if (null argtypes) 66 | (values (reverse mandatory) '() nil) 67 | (case (first argtypes) 68 | ((&optional) 69 | (process-optional (rest argtypes) mandatory '())) 70 | ((&rest) 71 | (process-rest (rest argtypes) mandatory '())) 72 | (#.(set-difference lambda-list-keywords '(&optional &rest)) 73 | (fail)) 74 | (otherwise 75 | (process-mandatory (rest argtypes) (cons (first argtypes) mandatory)))))) 76 | (process-optional (argtypes mandatory optional) 77 | (if (null argtypes) 78 | (values (reverse mandatory) (reverse optional) nil) 79 | (case (first argtypes) 80 | ((&rest) 81 | (process-rest (rest argtypes) mandatory optional)) 82 | (#.(set-difference lambda-list-keywords '(&rest)) 83 | (fail)) 84 | (otherwise 85 | (process-optional (rest argtypes) mandatory (cons (first argtypes) optional)))))) 86 | (process-rest (argtypes mandatory optional) 87 | (if (or (endp argtypes) 88 | (not (endp (rest argtypes)))) 89 | (fail) 90 | (values (reverse mandatory) (reverse optional) (first argtypes))))) 91 | (process-mandatory argtypes '()))) 92 | 93 | (defun argtypes-variants (argtypes) 94 | "Returns a list of lists of type specifiers such that each list of type 95 | specifiers satisfies the argument type specification given by ARGTYPES." 96 | (multiple-value-bind (mandatory optional rest) 97 | (parse-argtypes argtypes) 98 | (let ((result '())) 99 | (loop for n-optional to (length optional) do 100 | (loop for n-rest from 0 to (if (not rest) 0 3) do 101 | (push (append mandatory 102 | (subseq optional 0 n-optional) 103 | (make-list n-rest :initial-element rest)) 104 | result))) 105 | (reverse result)))) 106 | 107 | ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; 108 | ;;; 109 | ;;; Generators 110 | 111 | (defun find-generator (type) 112 | (intern (format nil "RANDOM-~A" (symbol-name type)) 113 | #.*package*)) 114 | 115 | (macrolet ((define-generators () 116 | `(progn 117 | ,@(loop for (type name) 118 | in '((sb-simd:f32 random-f32) 119 | (sb-simd:f64 random-f64) 120 | (sb-simd:u8 random-u8) 121 | (sb-simd:u16 random-u16) 122 | (sb-simd:u32 random-u32) 123 | (sb-simd:u64 random-u64) 124 | (sb-simd:s8 random-s8) 125 | (sb-simd:s16 random-s16) 126 | (sb-simd:s32 random-s32) 127 | (sb-simd:s64 random-s64)) 128 | collect 129 | (let ((numbers (numbers-of-type type))) 130 | `(defun ,name () 131 | (aref ,(coerce numbers `(simple-array ,type (*))) 132 | (random ,(length numbers))))))))) 133 | (define-generators)) 134 | 135 | (defun find-valid-simd-call (scalar-function input-generators simd-width 136 | input-constructors output-constructors) 137 | (let ((inputs-list '()) 138 | (outputs-list '())) 139 | (loop repeat simd-width do 140 | (multiple-value-bind (inputs outputs) 141 | (find-valid-scalar-call scalar-function input-generators) 142 | (push inputs inputs-list) 143 | (push outputs outputs-list))) 144 | (values 145 | (apply #'mapcar #'funcall input-constructors inputs-list) 146 | (apply #'mapcar #'funcall output-constructors outputs-list)))) 147 | 148 | (defun find-valid-scalar-call (scalar-function input-generators) 149 | (let ((attempts 0)) 150 | (loop 151 | (let ((inputs (mapcar #'funcall input-generators))) 152 | (handler-case (return (values inputs (multiple-value-list (apply scalar-function inputs)))) 153 | (condition () 154 | (incf attempts) 155 | (when (> attempts 1000) 156 | (error "Failed to find a valid call to ~S." scalar-function)))))))) 157 | 158 | (defun bitwise= (a b) 159 | (etypecase a 160 | (rational 161 | (when (rationalp b) 162 | (= a b))) 163 | (single-float 164 | (when (typep b 'single-float) 165 | (= (sb-kernel:single-float-bits a) 166 | (sb-kernel:single-float-bits b)))) 167 | (double-float 168 | (when (typep b 'double-float) 169 | (= (sb-kernel:double-float-bits a) 170 | (sb-kernel:double-float-bits b)))) 171 | (complex 172 | (when (typep b 'complex) 173 | (bitwise= (realpart a) (realpart b)) 174 | (bitwise= (imagpart a) (imagpart b)))))) 175 | -------------------------------------------------------------------------------- /code/instruction-sets/sse4-1.lisp: -------------------------------------------------------------------------------- 1 | (in-package #:sb-simd-sse4.1) 2 | 3 | (define-instruction-set :sse4.1 4 | (:include :ssse3) 5 | (:test (sse4.1-supported-p)) 6 | (:instructions 7 | ;; f32.4 8 | (f32.4-blend #:blendvps (f32.4) (f32.4 f32.4 u32.4) :cost 1 :encoding :sse+xmm0) 9 | (f32.4-blendc #:blendps (f32.4) (f32.4 f32.4 imm4) :cost 1 :encoding :sse) 10 | (f32.4-%round #:roundps (f32.4) (f32.4 imm3) :cost 2) 11 | #+(or) ;; TODO: Broken in SBCL 12 | (f32.4-elt #:extractps (f32) (f32.4 imm2) :cost 1) 13 | (f32.4-insert #:insertps (f32.4) (f32.4 f32.4 imm8) :cost 1 :encoding :sse) 14 | ;; f64.2 15 | (f64.2-blend #:blendvpd (f64.2) (f64.2 f64.2 u64.2) :cost 1 :encoding :sse+xmm0) 16 | (f64.2-blendc #:blendpd (f64.2) (f64.2 f64.2 imm2) :cost 1 :encoding :sse) 17 | (f64.2-%round #:roundpd (f64.2) (f64.2 imm3) :cost 2) 18 | ;; u8.16 19 | (u8.16-blend #:pblendvb (u8.16) (u8.16 u8.16 u8.16) :cost 1 :encoding :sse+xmm0) 20 | (u8.16-elt #:pextrb (u8) (u8.16 imm4) :cost 1) 21 | (u8.16-insert #:pinsrb (u8.16) (u8.16 u8 imm4) :cost 1 :encoding :sse) 22 | ;; u16.8 23 | (u16.8-blend #:pblendvb (u16.8) (u16.8 u16.8 u16.8) :cost 1 :encoding :sse+xmm0) 24 | (two-arg-u16.8-max #:pmaxuw (u16.8) (u16.8 u16.8) :cost 2 :encoding :sse :associative t) 25 | (two-arg-u16.8-min #:pminuw (u16.8) (u16.8 u16.8) :cost 2 :encoding :sse :associative t) 26 | (u16.8-minpos #:pminuw (u16.8) (u16.8) :cost 5) 27 | ;; u32.4 28 | (u32.4-blend #:pblendvb (u32.4) (u32.4 u32.4 u32.4) :cost 1 :encoding :sse+xmm0) 29 | (two-arg-u32.4-max #:pmaxud (u32.4) (u32.4 u32.4) :cost 2 :encoding :sse :associative t) 30 | (two-arg-u32.4-min #:pminud (u32.4) (u32.4 u32.4) :cost 2 :encoding :sse :associative t) 31 | (u32.4-elt #:pextrd (u32) (u32.4 imm2) :cost 1) 32 | (u32.4-insert #:pinsrd (u32.4) (u32.4 u32 imm2) :cost 1 :encoding :sse) 33 | ;; u64.2 34 | (u64.2-blend #:pblendvb (u64.2) (u64.2 u64.2 u64.2) :cost 1 :encoding :sse+xmm0) 35 | (two-arg-u64.2= #:pcmpeqq (u64.2) (u64.2 u64.2) :cost 1 :encoding :sse :associative t) 36 | (two-arg-u64.2/= nil (u64.2) (u64.2 u64.2) :cost 2 :encoding :fake-vop :associative t) 37 | (u64.2-elt #:pextrq (u64) (u64.2 imm1) :cost 1) 38 | #+(or) ;; TODO: PINSRQ is currently missing in SBCL. 39 | (u64.2-insert #:pinsrq (u64.2) (u64.2 u64 imm1) :cost 1 :encoding :sse) 40 | ;; s8.16 41 | (s8.16-blend #:pblendvb (s8.16) (s8.16 s8.16 u8.16) :cost 1 :encoding :sse+xmm0) 42 | (two-arg-s8.16-max #:pmaxsb (s8.16) (s8.16 s8.16) :cost 2 :encoding :sse :associative t) 43 | (two-arg-s8.16-min #:pminsb (s8.16) (s8.16 s8.16) :cost 2 :encoding :sse :associative t) 44 | (s8.16-elt #:pextrb (s8) (s8.16 imm4) :cost 1) 45 | (s8.16-insert #:pinsrb (s8.16) (s8.16 s8 imm4) :cost 1 :encoding :sse) 46 | ;; s16.8 47 | (s16.8-blend #:pblendvb (s16.8) (s16.8 s16.8 u16.8) :cost 1 :encoding :sse+xmm0) 48 | (s16.8-from-u8.16 #:pmovsxbw (s16.8) (u8.16) :cost 5) 49 | (s16.8-from-s8.16 #:pmovsxbw (s16.8) (s8.16) :cost 5) 50 | (s16.8-pack #:packusdw (s16.8) (s32.4 s32.4) :cost 1 :encoding :sse) 51 | ;; s32.4 52 | (s32.4-blend #:pblendvb (s32.4) (s32.4 s32.4 u32.4) :cost 1 :encoding :sse+xmm0) 53 | (two-arg-s32.4-max #:pmaxsd (s32.4) (s32.4 s32.4) :cost 2 :encoding :sse :associative t) 54 | (two-arg-s32.4-min #:pminsd (s32.4) (s32.4 s32.4) :cost 2 :encoding :sse :associative t) 55 | (s32.4-from-u8.16 #:pmovsxbd (s32.4) (u8.16) :cost 5) 56 | (s32.4-from-s8.16 #:pmovsxbd (s32.4) (s8.16) :cost 5) 57 | (s32.4-from-u16.8 #:pmovsxwd (s32.4) (u16.8) :cost 5) 58 | (s32.4-from-s16.8 #:pmovsxwd (s32.4) (s16.8) :cost 5) 59 | (two-arg-s32.4-mullo #:pmulld (s32.4) (s32.4 s32.4) :cost 1 :encoding :sse :associative t) 60 | (s32.4-elt #:pextrd (s32) (s32.4 imm2) :cost 1) 61 | (s32.4-insert #:pinsrd (s32.4) (s32.4 s32 imm2) :cost 1 :encoding :sse) 62 | ;; s64.2 63 | (s64.2-blend #:pblendvb (s64.2) (s64.2 s64.2 u64.2) :cost 1 :encoding :sse+xmm0) 64 | (s64.2-from-u8.16 #:pmovsxbq (s64.2) (u8.16) :cost 5) 65 | (s64.2-from-s8.16 #:pmovsxbq (s64.2) (s8.16) :cost 5) 66 | (s64.2-from-u16.8 #:pmovsxwq (s64.2) (u16.8) :cost 5) 67 | (s64.2-from-s16.8 #:pmovsxwq (s64.2) (s16.8) :cost 5) 68 | (s64.2-from-u32.4 #:pmovsxdq (s64.2) (u32.4) :cost 5) 69 | (s64.2-from-s32.4 #:pmovsxdq (s64.2) (s32.4) :cost 5) 70 | (two-arg-s64.2-mul #:pmuldq (s64.2) (s64.2 s64.2) :cost 2 :encoding :sse :associative t) 71 | (two-arg-s64.2= #:pcmpeqq (u64.2) (s64.2 s64.2) :cost 1 :encoding :sse :associative t) 72 | (two-arg-s64.2/= nil (u64.2) (s64.2 s64.2) :cost 2 :encoding :fake-vop :associative t) 73 | (s64.2-elt #:pextrq (s64) (s64.2 imm1) :cost 1) 74 | #+(or) ;; TODO: PINSRQ is currently missing in SBCL. 75 | (s64.2-insert #:pinsrq (s64.2) (s64.2 s64 imm1) :cost 1 :encoding :sse)) 76 | (:loads 77 | (f32.4-ntload #:movntdqa f32.4 f32vec f32-array f32.4-non-temporal-aref f32.4-non-temporal-row-major-aref) 78 | (f64.2-ntload #:movntdqa f64.2 f64vec f64-array f64.2-non-temporal-aref f64.2-non-temporal-row-major-aref) 79 | (u8.16-ntload #:movntdqa u8.16 u8vec u8-array u8.16-non-temporal-aref u8.16-non-temporal-row-major-aref) 80 | (u16.8-ntload #:movntdqa u16.8 u16vec u16-array u16.8-non-temporal-aref u16.8-non-temporal-row-major-aref) 81 | (u32.4-ntload #:movntdqa u32.4 u32vec u32-array u32.4-non-temporal-aref u32.4-non-temporal-row-major-aref) 82 | (u64.2-ntload #:movntdqa u64.2 u64vec u64-array u64.2-non-temporal-aref u64.2-non-temporal-row-major-aref) 83 | (s8.16-ntload #:movntdqa s8.16 s8vec s8-array s8.16-non-temporal-aref s8.16-non-temporal-row-major-aref) 84 | (s16.8-ntload #:movntdqa s16.8 s16vec s16-array s16.8-non-temporal-aref s16.8-non-temporal-row-major-aref) 85 | (s32.4-ntload #:movntdqa s32.4 s32vec s32-array s32.4-non-temporal-aref s32.4-non-temporal-row-major-aref) 86 | (s64.2-ntload #:movntdqa s64.2 s64vec s64-array s64.2-non-temporal-aref s64.2-non-temporal-row-major-aref)) 87 | (:stores 88 | (f32.4-ntstore #:movntps f32.4 f32vec f32-array f32.4-non-temporal-aref f32.4-non-temporal-row-major-aref) 89 | (f64.2-ntstore #:movntpd f64.2 f64vec f64-array f64.2-non-temporal-aref f64.2-non-temporal-row-major-aref) 90 | (u8.16-ntstore #:movntdq u8.16 u8vec u8-array u8.16-non-temporal-aref u8.16-non-temporal-row-major-aref) 91 | (u16.8-ntstore #:movntdq u16.8 u16vec u16-array u16.8-non-temporal-aref u16.8-non-temporal-row-major-aref) 92 | (u32.4-ntstore #:movntdq u32.4 u32vec u32-array u32.4-non-temporal-aref u32.4-non-temporal-row-major-aref) 93 | (u64.2-ntstore #:movntdq u64.2 u64vec u64-array u64.2-non-temporal-aref u64.2-non-temporal-row-major-aref) 94 | (s8.16-ntstore #:movntdq s8.16 s8vec s8-array s8.16-non-temporal-aref s8.16-non-temporal-row-major-aref) 95 | (s16.8-ntstore #:movntdq s16.8 s16vec s16-array s16.8-non-temporal-aref s16.8-non-temporal-row-major-aref) 96 | (s32.4-ntstore #:movntdq s32.4 s32vec s32-array s32.4-non-temporal-aref s32.4-non-temporal-row-major-aref) 97 | (s64.2-ntstore #:movntdq s64.2 s64vec s64-array s64.2-non-temporal-aref s64.2-non-temporal-row-major-aref)) 98 | (:associatives 99 | (u16.8-max two-arg-u16.8-max nil) 100 | (u16.8-min two-arg-u16.8-min nil) 101 | (u32.4-max two-arg-u32.4-max nil) 102 | (u32.4-min two-arg-u32.4-min nil) 103 | (s8.16-max two-arg-s8.16-max nil) 104 | (s8.16-min two-arg-s8.16-min nil) 105 | (s32.4-max two-arg-s32.4-max nil) 106 | (s32.4-min two-arg-s32.4-min nil) 107 | (s32.4-mullo two-arg-s32.4-mullo 1) 108 | (s64.2-mul two-arg-s64.2-mul 1)) 109 | (:comparisons 110 | (u64.2= two-arg-u64.2= u64.2-and +u64-true+) 111 | (s64.2= two-arg-s64.2= u64.2-and +u64-true+)) 112 | (:ifs 113 | (f32.4-if f32.4-blend) 114 | (f64.2-if f64.2-blend) 115 | (u8.16-if u8.16-blend) 116 | (u16.8-if u16.8-blend) 117 | (u32.4-if u32.4-blend) 118 | (u64.2-if u64.2-blend) 119 | (s8.16-if s8.16-blend) 120 | (s16.8-if s16.8-blend) 121 | (s32.4-if s32.4-blend) 122 | (s64.2-if s64.2-blend)) 123 | (:unequals 124 | (u64.2/= two-arg-u64.2/= u64.2-and +u64-true+) 125 | (s64.2/= two-arg-s64.2/= u64.2-and +u64-true+))) 126 | -------------------------------------------------------------------------------- /code/define-arefs.lisp: -------------------------------------------------------------------------------- 1 | (in-package #:sb-simd-internals) 2 | 3 | ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; 4 | ;;; 5 | ;;; Auxiliary Functions and Macros 6 | 7 | (declaim (notinline wrong-number-of-subscripts)) 8 | (defun wrong-number-of-subscripts (array number-of-subscripts) 9 | (error "Wrong number of subcripts, ~S, for an array of rank ~S." 10 | number-of-subscripts 11 | (array-rank array))) 12 | 13 | (declaim (notinline invalid-subscript)) 14 | (defun invalid-subscript (subscript array axis limit) 15 | (declare (ignore array)) 16 | (error "Invalid array subscript ~S for axis ~S, ~ 17 | should be a non-negative integer below ~S." 18 | subscript axis limit)) 19 | 20 | ;; This function doesn't have to be particularly fast, because all index 21 | ;; computations where the number of subscripts is known at compile time are 22 | ;; expanded by compiler macros and WITH-ROW-MAJOR-SIMD-INDEX. The only way 23 | ;; to reach this function is when an array indexing function is supplied as 24 | ;; the first argument to APPLY or FUNCALL. 25 | (defun array-row-major-simd-index (array simd-width &rest subscripts) 26 | (let ((rank (array-rank array)) 27 | (length (length subscripts))) 28 | (unless (= rank length) 29 | (wrong-number-of-subscripts array length)) 30 | (let ((stride 1) 31 | (index 0)) 32 | (declare (index stride index)) 33 | (loop for axis from (1- rank) downto 0 34 | for subscript = (nth axis subscripts) 35 | for dimension = (array-dimension array axis) 36 | for width = simd-width then 1 do 37 | (unless (<= -1 subscript (- dimension width)) 38 | (invalid-subscript subscript array axis (1+ (- dimension width)))) 39 | (incf index (* stride subscript)) 40 | (setf stride (* stride dimension))) 41 | index))) 42 | 43 | (defmacro with-row-major-simd-index 44 | ((index array simd-width &rest indices) &body body &environment env) 45 | (check-type index symbol) 46 | (check-type array symbol) 47 | (check-type simd-width (integer 1)) 48 | (dolist (index indices) 49 | (check-type index symbol)) 50 | (let* ((length (length indices)) 51 | (rank-binding `(,(gensym "RANK") (array-rank ,array))) 52 | (rank (first rank-binding)) 53 | (dimension-bindings 54 | (loop for axis below length 55 | collect `(,(gensym "DIMENSION") (array-dimension ,array ,axis)))) 56 | (dimensions (mapcar #'first dimension-bindings)) 57 | (stride-bindings 58 | (loop for axis from (- length 2) downto 0 59 | for old-stride = nil then new-stride 60 | for new-stride = (gensym "STRIDE") 61 | for stride-binding = `(,new-stride ,(nth axis dimensions)) 62 | then `(,new-stride (index* ,(nth axis dimensions) ,old-stride)) 63 | collect stride-binding)) 64 | (strides (reverse (mapcar #'first stride-bindings))) 65 | (index-form 66 | `(index+ 67 | ,@(loop for stride in strides 68 | for index in indices 69 | collect `(index* ,stride ,index)) 70 | ,(first (last indices))))) 71 | `(let (,rank-binding) 72 | (unless (= ,rank ,length) 73 | (wrong-number-of-subscripts ,array ,length)) 74 | (let (,@dimension-bindings) 75 | (declare (ignorable ,@dimensions)) 76 | ,@(when (sb-c:policy env (plusp sb-c::insert-array-bounds-checks)) 77 | (loop for axis from 0 78 | for dimension in dimensions 79 | for index in indices 80 | for limit = (if (= axis (1- length)) `(- ,dimension ,(1- simd-width)) dimension) 81 | collect `(unless (< -1 ,index ,limit) 82 | (invalid-subscript ,index ,array ,axis ,limit)))) 83 | (let* (,@stride-bindings) 84 | (let ((,index ,index-form)) 85 | ,@body)))))) 86 | 87 | (defun setf-row-major-aref (v a i) 88 | (setf (row-major-aref a i) v)) 89 | 90 | (define-compiler-macro setf-row-major-aref (v a i) 91 | (let* ((v-binding `(,(gensym "VALUE") ,v)) 92 | (v (first v-binding))) 93 | `(let (,v-binding) 94 | (setf (row-major-aref ,a ,i) ,v)))) 95 | 96 | ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; 97 | ;;; 98 | ;;; Array Load and Store Instructions 99 | 100 | (macrolet 101 | ((define-aref (load-record-name) 102 | (with-accessors ((load load-record-name) 103 | (aref load-record-aref) 104 | (row-major-aref load-record-row-major-aref) 105 | (value-record load-record-value-record) 106 | (vector-record load-record-vector-record)) 107 | (find-function-record load-record-name) 108 | (let ((simd-width (value-record-simd-width value-record)) 109 | (element-type 110 | (second 111 | (value-record-type vector-record)))) 112 | `(progn 113 | (define-inline ,row-major-aref (array index) 114 | (declare (type (array ,element-type) array) 115 | (index index)) 116 | (,load array index)) 117 | (defun ,aref (array &rest indices) 118 | (declare (type (array ,element-type) array)) 119 | (,load 120 | array 121 | (apply #'array-row-major-simd-index array ,simd-width indices))) 122 | (define-compiler-macro ,aref (array &rest indices) 123 | (let* ((index (gensym "INDEX")) 124 | (array-binding `(,(gensym "ARRAY") ,array)) 125 | (index-bindings 126 | (loop for index-form in indices 127 | collect `(,(gensym "INDEX") ,index-form))) 128 | (array (first array-binding)) 129 | (indices (mapcar #'first index-bindings))) 130 | `(let (,array-binding ,@index-bindings) 131 | (declare (type (array ,',element-type) ,array)) 132 | (with-row-major-simd-index (,index ,array ,',simd-width ,@indices) 133 | (,',load ,array ,index))))))))) 134 | (define-setf-aref (store-record-name) 135 | (with-accessors ((store store-record-name) 136 | (aref store-record-aref) 137 | (row-major-aref store-record-row-major-aref) 138 | (value-record store-record-value-record) 139 | (vector-record store-record-vector-record)) 140 | (find-function-record store-record-name) 141 | (let ((value-type (value-record-name value-record)) 142 | (simd-width (value-record-simd-width value-record)) 143 | (element-type 144 | (second 145 | (value-record-type vector-record)))) 146 | `(progn 147 | (define-inline (setf ,row-major-aref) (value array index) 148 | (declare (type (array ,element-type) array) 149 | (index index)) 150 | (,store (,value-type value) array index)) 151 | (defun (setf ,aref) (value array &rest indices) 152 | (declare (type (array ,element-type) array)) 153 | (,store 154 | (,value-type value) 155 | array 156 | (apply #'array-row-major-simd-index array ,simd-width indices))) 157 | (define-compiler-macro (setf ,aref) (value array &rest indices) 158 | (let* ((value-binding `(,(gensym "VALUE") ,value)) 159 | (array-binding `(,(gensym "ARRAY") ,array)) 160 | (index-bindings 161 | (loop for index-form in indices 162 | collect `(,(gensym "INDEX") ,index-form))) 163 | (indices (mapcar #'first index-bindings)) 164 | (value (first value-binding)) 165 | (array (first array-binding)) 166 | (index (gensym "INDEX"))) 167 | `(let (,value-binding ,array-binding ,@index-bindings) 168 | (declare (type (array ,',element-type) ,array)) 169 | (with-row-major-simd-index (,index ,array ,',simd-width ,@indices) 170 | (,',store (,',value-type ,value) ,array ,index))))))))) 171 | (define-arefs () 172 | `(progn 173 | ,@(loop for load-record in (filter-function-records #'load-record-p) 174 | for name = (load-record-name load-record) 175 | collect `(define-aref ,name)) 176 | ,@(loop for store-record in (filter-function-records #'store-record-p) 177 | for name = (store-record-name store-record) 178 | collect `(define-setf-aref ,name))))) 179 | (define-arefs)) 180 | -------------------------------------------------------------------------------- /code/define-instruction-vops.lisp: -------------------------------------------------------------------------------- 1 | (in-package #:sb-vm) 2 | 3 | (macrolet 4 | ((define-instruction-vop (instruction-record-name) 5 | (with-accessors ((name sb-simd-internals:instruction-record-name) 6 | (vop sb-simd-internals:instruction-record-vop) 7 | (mnemonic sb-simd-internals:instruction-record-mnemonic) 8 | (argument-records sb-simd-internals:instruction-record-argument-records) 9 | (result-records sb-simd-internals:instruction-record-result-records) 10 | (cost sb-simd-internals:instruction-record-cost) 11 | (pure sb-simd-internals:instruction-record-pure) 12 | (always-translatable sb-simd-internals:instruction-record-always-translatable) 13 | (associative sb-simd-internals:instruction-record-associative) 14 | (prefix sb-simd-internals:instruction-record-prefix) 15 | (suffix sb-simd-internals:instruction-record-suffix) 16 | (encoding sb-simd-internals:instruction-record-encoding)) 17 | (sb-simd-internals:find-function-record instruction-record-name) 18 | (let* ((asyms (sb-simd-internals:prefixed-symbols "A" (length argument-records))) 19 | (rsyms (sb-simd-internals:prefixed-symbols "R" (length result-records))) 20 | (defknown 21 | `(defknown ,vop 22 | (,@(mapcar #'sb-simd-internals:value-record-name argument-records)) 23 | (values ,@(mapcar #'sb-simd-internals:value-record-name result-records) &optional) 24 | (,@(when (and always-translatable (not (eq encoding :fake-vop))) 25 | '(always-translatable)) 26 | ,@(when pure '(foldable flushable movable))) 27 | :overwrite-fndb-silently t)) 28 | (arg-types 29 | (mapcar #'sb-simd-internals:value-record-primitive-type argument-records)) 30 | (result-types 31 | (mapcar #'sb-simd-internals:value-record-primitive-type result-records)) 32 | (args 33 | (loop for asym in asyms 34 | for argument-record in argument-records 35 | when (symbolp (sb-simd-internals:value-record-primitive-type argument-record)) 36 | collect `(,asym :scs ,(sb-simd-internals:value-record-scs argument-record)))) 37 | (info 38 | (loop for asym in asyms 39 | for argument-record in argument-records 40 | unless (symbolp (sb-simd-internals:value-record-primitive-type argument-record)) 41 | collect asym)) 42 | (results 43 | (loop for rsym in rsyms 44 | for result-record in result-records 45 | collect `(,rsym :scs ,(sb-simd-internals:value-record-scs result-record))))) 46 | (ecase encoding 47 | ((:fake-vop :custom) 48 | `(progn ,defknown)) 49 | (:standard 50 | (assert mnemonic) 51 | `(progn 52 | ,defknown 53 | (define-vop (,vop) 54 | (:translate ,vop) 55 | (:policy :fast-safe) 56 | (:args ,@args) 57 | (:info ,@info) 58 | (:results ,@results) 59 | (:arg-types ,@arg-types) 60 | (:result-types ,@result-types) 61 | (:generator 62 | ,cost 63 | (inst ,mnemonic ,@prefix ,@rsyms ,@asyms ,@suffix))))) 64 | (:move 65 | (assert mnemonic) 66 | (let ((src (first asyms)) 67 | (dst (first rsyms))) 68 | `(progn 69 | ,defknown 70 | (define-vop (,vop) 71 | (:translate ,vop) 72 | (:policy :fast-safe) 73 | (:args (,@(first args) :target ,dst) ,@(rest args)) 74 | (:info ,@info) 75 | (:results ,@results) 76 | (:arg-types ,@arg-types) 77 | (:result-types ,@result-types) 78 | (:generator 79 | ,cost 80 | (unless (location= ,dst ,src) 81 | (inst ,mnemonic ,@prefix ,@rsyms ,@asyms ,@suffix))))))) 82 | (:sse 83 | (assert mnemonic) 84 | (let ((x (first asyms)) 85 | (y (second asyms)) 86 | (rest (rest (rest asyms))) 87 | (r (first rsyms))) 88 | `(progn 89 | ,defknown 90 | (define-vop (,vop) 91 | (:translate ,vop) 92 | (:policy :fast-safe) 93 | (:args (,@(first args) :target ,r) ,@(rest args)) 94 | (:temporary (:sc ,(first (sb-simd-internals:value-record-scs (first argument-records)))) tmp) 95 | (:info ,@info) 96 | (:results ,@results) 97 | (:arg-types ,@arg-types) 98 | (:result-types ,@result-types) 99 | (:generator 100 | ,cost 101 | (cond ((location= ,x ,r) 102 | (inst ,mnemonic ,@prefix ,r ,y ,@rest ,@suffix)) 103 | ((or (not (tn-p ,y)) 104 | (not (location= ,y ,r))) 105 | (move ,r ,x) 106 | (inst ,mnemonic ,@prefix ,r ,y ,@rest ,@suffix)) 107 | (t 108 | (move tmp ,x) 109 | (inst ,mnemonic ,@prefix tmp ,y ,@rest ,@suffix) 110 | (move ,r tmp)))))))) 111 | (:sse+xmm0 112 | (assert mnemonic) 113 | (let ((x (first asyms)) 114 | (y (second asyms)) 115 | (z (third asyms)) 116 | (r (first rsyms))) 117 | `(progn 118 | ,defknown 119 | (define-vop (,vop) 120 | (:translate ,vop) 121 | (:policy :fast-safe) 122 | (:args (,@(first args) :target ,r) ,(second args) (,@(third args) :target xmm0)) 123 | (:temporary (:sc ,(first (sb-simd-internals:value-record-scs (first argument-records)))) tmp) 124 | (:temporary (:sc ,(first (sb-simd-internals:value-record-scs (second argument-records))) 125 | :from (:argument 0) :to :result :offset 0) xmm0) 126 | (:info ,@info) 127 | (:results ,@results) 128 | (:arg-types ,@arg-types) 129 | (:result-types ,@result-types) 130 | (:generator 131 | ,cost 132 | (move xmm0 ,z) 133 | (cond ((location= ,x ,r) 134 | (inst ,mnemonic ,@prefix ,r ,y xmm0 ,@suffix)) 135 | ((or (not (tn-p ,y)) 136 | (not (location= ,y ,r))) 137 | (move ,r ,x) 138 | (inst ,mnemonic ,@prefix ,r ,y xmm0 ,@suffix)) 139 | (t 140 | (move tmp ,x) 141 | (inst ,mnemonic ,@prefix tmp ,y xmm0 ,@suffix) 142 | (move ,r tmp)))))))) 143 | (:fma 144 | (assert mnemonic) 145 | (let ((x (first asyms)) 146 | (y (second asyms)) 147 | (z (third asyms)) 148 | (rest (rest (rest (rest asyms)))) 149 | (r (first rsyms))) 150 | `(progn 151 | ,defknown 152 | (define-vop (,vop) 153 | (:translate ,vop) 154 | (:policy :fast-safe) 155 | (:args (,@(first args) :target ,r) ,@(rest args)) 156 | (:temporary (:sc ,(first (sb-simd-internals:value-record-scs (first argument-records)))) tmp) 157 | (:info ,@info) 158 | (:results ,@results) 159 | (:arg-types ,@arg-types) 160 | (:result-types ,@result-types) 161 | (:generator 162 | ,cost 163 | (cond ((location= ,x ,r) 164 | (inst ,mnemonic ,@prefix ,r ,y ,z ,@rest ,@suffix)) 165 | ((and (or (not (tn-p ,y)) 166 | (not (location= ,y ,r))) 167 | (or (not (tn-p ,z)) 168 | (not (location= ,z ,r)))) 169 | (move ,r ,x) 170 | (inst ,mnemonic ,@prefix ,r ,y ,z ,@rest ,@suffix)) 171 | (t 172 | (move tmp ,x) 173 | (inst ,mnemonic ,@prefix tmp ,y ,z ,@rest ,@suffix) 174 | (move ,r tmp)))))))))))) 175 | (define-instruction-vops () 176 | `(progn 177 | ,@(loop for instruction-record 178 | in (sb-simd-internals:filter-available-function-records 179 | #'sb-simd-internals:instruction-record-p) 180 | collect `(define-instruction-vop ,(sb-simd-internals:instruction-record-name instruction-record)))))) 181 | (define-instruction-vops)) 182 | -------------------------------------------------------------------------------- /sb-simd.texinfo: -------------------------------------------------------------------------------- 1 | @node SIMD Programming 2 | @chapter SIMD Programming 3 | 4 | The @code{sb-simd} module provides a convenient interface for SIMD 5 | programming in SBCL. It provides one package per SIMD instruction set, 6 | plus functions and macros for querying whether an instruction set is 7 | available and what functions and data types it exports. 8 | 9 | @subsection Data Types 10 | 11 | The central data type in @code{sb-simd} is the SIMD pack. A SIMD pack 12 | is very similar to a specialized vector, except that its length must be 13 | a particular power of two that depends on its element type and the 14 | underlying hardware. The set of element types that are supported for 15 | SIMD packs is similar to that of SBCL's specialized array element types, 16 | except that there is currently no support for SIMD packs of complex 17 | numbers or characters. 18 | 19 | The supported scalar types are @code{f32}, @code{f64}, @code{sN}, and 20 | @code{uN}, where @code{N} is either 8, 16, 32, or 64. These scalar 21 | types are abbreviations for the Common Lisp types @code{single-float}, 22 | @code{double-float}, @code{signed-byte}, and @code{unsigned-byte}, 23 | respectively. For each scalar data type @code{X}, there exists one or 24 | more SIMD data type @code{X.Y} with @code{Y} elements. For example, in 25 | AVX there are two supported SIMD data types with element type 26 | @code{f64}, namely @code{f64.2} (128 bit) and @code{f64.4} (256 bit). 27 | 28 | SIMD packs are regular Common Lisp objects that have a type, a class, 29 | and can be passed as function arguments. The price for this is that 30 | SIMD packs have both a boxed and an unboxed representation. The unboxed 31 | representation of a SIMD pack has zero overhead and fits into a CPU 32 | register, but can only be used within a function and when the compiler 33 | can statically determine the SIMD pack's type. Otherwise, the SIMD pack 34 | is boxed, i.e., spilled to the heap together with its type information. 35 | In practice, boxing of SIMD packs can usually be avoided via inlining, 36 | or by loading and storing them to specialized arrays instead of passing 37 | them around as function arguments. 38 | 39 | @subsection Casts 40 | 41 | For each scalar data type @code{X}, there is a function named @code{X} 42 | that is equivalent to @code{(lambda (v) (coerce v 'X))}. For each SIMD 43 | data type @code{X.Y}, there is a function named @code{X.Y} that ensures 44 | that its argument is of type @code{X.Y}, or, if the argument is a number, 45 | calls the cast function of @code{X} and broadcasts the result. 46 | 47 | All functions provided by @code{sb-simd} (apart from the casts 48 | themselves) implicitly cast each argument to its expected type. So to 49 | add the number five to each single float in a SIMD pack @code{x} of type 50 | @code{f32.8}, it is sufficient to write @code{(f32.8+ x 5)}. We don't 51 | mention this implicit conversion explicitly in the following sections, 52 | so if any function description states that an argument must be of type 53 | @code{X.Y}, the argument can actually be of any type that is a suitable 54 | argument of the cast function named @code{X.Y}. 55 | 56 | @subsection Constructors 57 | 58 | For each SIMD data type @code{X.Y}, there is a constructor named 59 | @code{make-X.Y} that takes @code{Y} arguments of type @code{X} and 60 | returns a SIMD pack whose elements are the supplied values. 61 | 62 | @subsection Unpackers 63 | 64 | For each SIMD data type @code{X.Y}, there is a function named 65 | @code{X.Y-values} that returns, as @code{Y} multiple values, the 66 | elements of the supplied SIMD pack of type @code{X.Y}. 67 | 68 | @subsection Reinterpret Casts 69 | 70 | For each SIMD data type @code{X.Y}, there is a function named 71 | @code{X.Y!} that takes any SIMD pack or scalar datum and interprets its 72 | bits as a SIMD pack of type @code{X.Y}. If the supplied datum has more 73 | bits than the resulting value, the excess bits are discarded. If the 74 | supplied datum has less bits than the resulting value, the missing bits are 75 | assumed to be zero. 76 | 77 | @subsection Associatives 78 | 79 | For each associative binary function, e.g., @code{two-arg-X.Y-OP}, there 80 | is a function @code{X.Y-OP} that takes any number of arguments and 81 | combines them with this binary function in a tree-like fashion. If the 82 | binary function has an identity element, it is possible to call the 83 | function with zero arguments, in which case the identity element is 84 | returned. If there is no identity element, the function must receive at 85 | least one argument. 86 | 87 | Examples of associative functions are @code{f32.8+}, for summing any 88 | number of 256 bit packs of single floats, and @code{u8.32-max}, for 89 | computing the element-wise maximum of one or more 256 bit packs of 8 bit 90 | integers. 91 | 92 | @subsection Reducers 93 | 94 | For binary functions @code{two-arg-X.Y-OP} that are not associative, but 95 | that have a neutral element, we provide functions @code{X.Y-OP} that 96 | take any positive number of arguments and return the reduction of all 97 | arguments with the binary function. In the special case of a single 98 | supplied argument, the binary function is invoked on the neutral element 99 | and that argument. Reducers have been introduced to generate Lisp-style 100 | subtraction and division functions. 101 | 102 | Examples of reducers are @code{f32.8/}, for successively dividing a pack 103 | of 32 bit single floats by all further supplied packs of 32 bit single 104 | floats, or @code{u32.8-} for subtracting any number of supplied packs of 105 | 32 bit unsigned integers from the first supplied one, except in the case 106 | of a single argument, where @code{u32.8-} simply negates all values in 107 | the pack. 108 | 109 | @subsection Comparisons 110 | 111 | For each SIMD data type @code{X.Y}, there exist conversion functions 112 | @code{X.Y<}, @code{X.Y<=}, @code{X.Y>}, @code{X.Y>=}, and 113 | @code{X.Y=} that check whether the supplied arguments are strictly 114 | monotonically increasing, monotonically increasing, strictly monotonically 115 | decreasing, monotonically decreasing, equal, or nowhere equal, 116 | respectively. In contrast to the Common Lisp functions @code{<}, 117 | @code{<=}, @code{>}, @code{>=}, @code{=}, and @code{/=} the SIMD 118 | comparison functions don't return a generalized boolean, but a SIMD pack of 119 | unsigned integers with @code{Y} elements. The bits of each unsigned 120 | integer are either all one, if the values of the arguments at that position 121 | satisfy the test, or all zero, if they don't. We call a SIMD packs of such 122 | unsigned integers a mask. 123 | 124 | @subsection Conditionals 125 | 126 | The SIMD paradigm is inherently incompatible with fine-grained control 127 | flow. A piece of code containing an @code{if} special form cannot be 128 | vectorized in a straightforward way, because doing so would require as 129 | many instruction pointers and processor states as there are values in 130 | the desired SIMD data type. Instead, most SIMD instruction sets provide 131 | an operator for selecting values from one of two supplied SIMD packs 132 | based on a mask. The mask is a SIMD pack with as many elements as the 133 | other two arguments, but whose elements are unsigned integers whose bits 134 | must be either all zeros or all ones. This selection mechanism can be 135 | used to emulate the effect of an @code{if} special form, at the price 136 | that both operands have to be computed each time. 137 | 138 | In @code{sb-simd}, all conditional operations and comparisons emit 139 | suitable mask fields, and there is a @code{X.Y-if} function for each 140 | SIMD data type with element type @code{X} and number of elements 141 | @code{Y} whose first arguments must be a suitable mask, whose second and 142 | third argument must be objects that can be converted to the SIMD data 143 | type @code{X.Y}, and that returns a value of type @code{X.Y} where each 144 | element is from the second operand if the corresponding mask bits are 145 | set, and from the third operand if the corresponding mask bits are not 146 | set. 147 | 148 | @subsection Loads and Stores 149 | 150 | In practice, a SIMD pack @code{X.Y} is usually not constructed by 151 | calling its constructor, but by loading @code{Y} consecutive elements 152 | from a specialized array with element type @code{X}. The functions for 153 | doing so are called @code{X.Y-aref} and @code{X.Y-row-major-aref}, and 154 | have similar semantics as Common Lisp's @code{aref} and 155 | @code{row-major-aref}. In addition to that, some instruction sets 156 | provide the functions @code{X.Y-non-temporal-aref} and 157 | @code{X.Y-non-temporal-row-major-aref}, for accessing a memory location 158 | without loading the referenced values into the CPU's cache. 159 | 160 | For each function @code{X.Y-foo} for loading SIMD packs from an array, 161 | there also exists a corresponding function @code{(setf X.Y-foo)} for 162 | storing a SIMD pack in the specified memory location. An exception to 163 | this rule is that some instruction sets (e.g., SSE) only provide 164 | functions for non-temporal stores, but not for the corresponding 165 | non-temporal loads. 166 | 167 | One difficulty when treating the data of a Common Lisp array as a SIMD 168 | pack is that some hardware instructions require a particular alignment 169 | of the address being referenced. Luckily, most architectures provide 170 | instructions for unaligned loads and stores that are, at least on modern 171 | CPUs, not slower than their aligned equivalents. So by default we 172 | translate all array references as unaligned loads and stores. An 173 | exception are the instructions for non-temporal loads and stores, that 174 | always require a certain alignment. We do not handle this case 175 | specially, so without special handling by the user, non-temporal loads 176 | and stores will only work on certain array indices that depend on the 177 | actual placement of that array in memory. We'd be grateful if someone 178 | could point us to a mechanism for constraining the alignment of Common 179 | Lisp arrays in memory. 180 | 181 | @subsection Specialized Scalar Operations 182 | 183 | Finally, for each SIMD function @code{X.Y-OP} that applies a certain 184 | operation @code{OP} element-wise to the @code{Y} elements of type 185 | @code{X}, there exists also a functions @code{X-OP} for applying that 186 | operation only to a single element. For example, the SIMD function 187 | @code{f64.4+} has a corresponding function @code{f64+} that differs from 188 | @code{cl:+} in that it only accepts arguments of type double float, and 189 | that it adds its supplied arguments in a fixed order that is the same as 190 | the one used by @code{f64.4}. 191 | 192 | There are good reasons for exporting scalar functions from a SIMD 193 | library, too. The most obvious one is that they obey the same naming 194 | convention and hence make it easier to locate the correct functions. 195 | Another benefit is that the semantics of each scalar operation is 196 | precisely the same as that of the corresponding SIMD function, so they 197 | can be used to write reference implementations for testing. A final 198 | reason is that these scalar functions can be used to simplify the life 199 | of tools for automatic vectorization. 200 | 201 | @subsection Instruction Set Dispatch 202 | 203 | One challenge that is unique to image-based programming systems such as 204 | Lisp is that a program can run on one machine, be dumped as an image, 205 | and then resumed on another machine. While nobody expects this feature 206 | to work across machines with different architectures, it is quite likely 207 | that the machine where the image is dumped and the one where execution 208 | is resumed provide different instruction set extensions. 209 | 210 | As a practical example, consider a game developer that develops software 211 | on an x86-64 machine with all SIMD extensions up to AVX2, but then dumps 212 | it as an image and ships it to a customer whose machine only supports 213 | SIMD extensions up to SSE2. Ideally, the image should contain multiple 214 | optimized versions of all crucial functions, and dynamically select the 215 | most appropriate version based on the instruction set extensions that 216 | are actually available. 217 | 218 | This kind of run time instruction set dispatch is explicitly supported 219 | by means of the @code{instruction-set-case} macro. The code resulting 220 | from an invocation of this macro compiles to an efficient jump table 221 | whose index is recomputed on each startup of the Lisp image. 222 | -------------------------------------------------------------------------------- /examples/benchmarks.lisp: -------------------------------------------------------------------------------- 1 | ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; 2 | ;; Benchmarking code 3 | ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; 4 | (asdf:load-system :sb-simd) 5 | (ql:quickload :alexandria :silent t) 6 | (use-package :sb-simd-avx) 7 | 8 | (defmacro time-total (n &body body) 9 | "N-average the execution time of BODY in seconds" 10 | (declare (optimize (speed 0))) 11 | (alexandria:with-gensyms (start end) 12 | `(let (,start ,end) 13 | (sb-ext:gc :full t) 14 | (setf ,start (get-internal-real-time)) 15 | (loop for i below ,n 16 | do ,@body) 17 | (setf ,end (get-internal-real-time)) 18 | (coerce (/ (- ,end ,start) internal-time-units-per-second) 19 | 'float)))) 20 | 21 | (defun simd-sum (array &aux (n (array-total-size array))) 22 | (declare (type f64vec array) 23 | (optimize speed (safety 0))) 24 | (do ((index 0 (the (integer 0 #.(- array-total-size-limit 16)) (+ index 16))) 25 | (acc1 (f64.4 0) (f64.4+ acc1 (f64.4-aref array (+ index 0)))) 26 | (acc2 (f64.4 0) (f64.4+ acc2 (f64.4-aref array (+ index 4)))) 27 | (acc3 (f64.4 0) (f64.4+ acc3 (f64.4-aref array (+ index 8)))) 28 | (acc4 (f64.4 0) (f64.4+ acc4 (f64.4-aref array (+ index 12))))) 29 | ((> index (- n 16)) 30 | (do ((result (multiple-value-call #'+ (f64.4-values (f64.4+ acc1 acc2 acc3 acc4))) 31 | (+ result (row-major-aref array index))) 32 | (index index (1+ index))) 33 | ((>= index n) result))))) 34 | 35 | (defun simd-vdot (array1 array2 &aux (n (min (array-total-size array1) (array-total-size array2)))) 36 | (declare (type f64vec array1 array2) 37 | (optimize speed (safety 0))) 38 | (do ((index 0 (the (integer 0 #.(- array-total-size-limit 16)) (+ index 16))) 39 | (acc1 (f64.4 0) (f64.4-incf acc1 (f64.4* (f64.4-aref array1 (+ index 0)) 40 | (f64.4-aref array2 (+ index 0))))) 41 | (acc2 (f64.4 0) (f64.4-incf acc2 (f64.4* (f64.4-aref array1 (+ index 4)) 42 | (f64.4-aref array2 (+ index 4))))) 43 | (acc3 (f64.4 0) (f64.4-incf acc3 (f64.4* (f64.4-aref array1 (+ index 8)) 44 | (f64.4-aref array2 (+ index 8))))) 45 | (acc4 (f64.4 0) (f64.4-incf acc4 (f64.4* (f64.4-aref array1 (+ index 12)) 46 | (f64.4-aref array2 (+ index 12)))))) 47 | ((> index (- n 16)) 48 | (do ((result (multiple-value-call #'+ (f64.4-values (f64.4+ acc1 acc2 acc3 acc4))) 49 | (+ result (* (row-major-aref array1 index) 50 | (row-major-aref array2 index)))) 51 | (index index (1+ index))) 52 | ((>= index n) result))))) 53 | 54 | ;; For some reasons it floating-point-overflows 55 | (defun fma-vdot (array1 array2 &aux (n (min (array-total-size array1) (array-total-size array2)))) 56 | (declare (type f64vec array1 array2) 57 | (optimize speed (safety 0))) 58 | (do ((index 0 (the (integer 0 #.(- array-total-size-limit 16)) (+ index 16))) 59 | (acc1 (f64.4 0) (setf acc1 (sb-simd-avx2:f64.4-fmadd acc1 (f64.4-aref array1 (+ index 0)) 60 | (f64.4-aref array2 (+ index 0))))) 61 | (acc2 (f64.4 0) (setf acc2 (sb-simd-avx2:f64.4-fmadd acc2 (f64.4-aref array1 (+ index 4)) 62 | (f64.4-aref array2 (+ index 4))))) 63 | (acc3 (f64.4 0) (setf acc3 (sb-simd-avx2:f64.4-fmadd acc3 (f64.4-aref array1 (+ index 8)) 64 | (f64.4-aref array2 (+ index 8))))) 65 | (acc4 (f64.4 0) (setf acc4 (sb-simd-avx2:f64.4-fmadd acc4 (f64.4-aref array1 (+ index 12)) 66 | (f64.4-aref array2 (+ index 12)))))) 67 | ((> index (- n 16)) 68 | (do ((result (f64.4-hsum (f64.4+ acc1 acc2 acc3 acc4)) 69 | (+ result (* (row-major-aref array1 index) 70 | (row-major-aref array2 index)))) 71 | (index index (1+ index))) 72 | ((>= index n) result))))) 73 | 74 | (declaim (inline f64.4-fma-vdot)) 75 | (defun f64.4-fma-vdot (array1 array2 &aux (n (min (array-total-size array1) 76 | (array-total-size array2)))) 77 | (declare (optimize (speed 3) (safety 0) (debug 0)) 78 | (type f64vec array1 array2)) 79 | (let ((n0 (- n (mod n 16)))) 80 | (+ (loop with acc1 of-type f64.4 = (f64.4 0) 81 | with acc2 of-type f64.4 = (f64.4 0) 82 | with acc3 of-type f64.4 = (f64.4 0) 83 | with acc4 of-type f64.4 = (f64.4 0) 84 | for i of-type fixnum below n0 by 16 85 | do (let ((array1-i0 (f64.4-aref array1 (+ i 0))) 86 | (array1-i4 (f64.4-aref array1 (+ i 4))) 87 | (array1-i8 (f64.4-aref array1 (+ i 8))) 88 | (array1-i12 (f64.4-aref array1 (+ i 12)))) 89 | (setf acc1 (sb-simd-avx2:f64.4-fmadd acc1 array1-i0 90 | (f64.4-aref array2 (+ i 0))) 91 | acc2 (sb-simd-avx2:f64.4-fmadd acc2 array1-i4 92 | (f64.4-aref array2 (+ i 4))) 93 | acc3 (sb-simd-avx2:f64.4-fmadd acc3 array1-i8 94 | (f64.4-aref array2 (+ i 8))) 95 | acc4 (sb-simd-avx2:f64.4-fmadd acc4 array1-i12 96 | (f64.4-aref array2 (+ i 12))))) 97 | finally (return (f64.4-hsum (f64.4+ acc1 acc2 acc3 acc4)))) 98 | (loop for i of-type fixnum from n0 below n 99 | summing (* (aref array1 i) (aref array2 i)) 100 | into sum of-type double-float 101 | finally (return sum))))) 102 | 103 | (defun benchmark-f64.4-vdot-double (&rest v-lengths) 104 | (declare (optimize (speed 3) (safety 0) (debug 0)) 105 | (notinline sb-simd-avx2:f64.4-vdot)) 106 | (loop for len in v-lengths 107 | do (format t "Doing dot product of two ~A long double float vectors 1e6 times~%" len) 108 | collect (let ((u (make-array len :element-type 'double-float 109 | :initial-contents 110 | (mapcar (lambda (i) (+ i 0.1d0)) 111 | (alexandria:iota len)))) 112 | (v (make-array len :element-type 'double-float 113 | :initial-contents 114 | (mapcar (lambda (i) (+ i 0.2d0)) 115 | (alexandria:iota len))))) 116 | (declare (type f64vec u v)) 117 | (time-total 1e6 (sb-simd-avx2::f64.4-vdot u v))))) 118 | 119 | (defun benchmark-f64.4-vdot2-double (&rest v-lengths) 120 | (declare (optimize (speed 3) (safety 0) (debug 0)) 121 | (notinline sb-simd-avx2:f64.4-vdot2)) 122 | (loop for len in v-lengths 123 | do (format t "Doing dot product of two ~A long double float vectors 1e6 times~%" len) 124 | collect (let ((u (make-array len :element-type 'double-float 125 | :initial-contents 126 | (mapcar (lambda (i) (+ i 0.1d0)) 127 | (alexandria:iota len)))) 128 | (v (make-array len :element-type 'double-float 129 | :initial-contents 130 | (mapcar (lambda (i) (+ i 0.2d0)) 131 | (alexandria:iota len))))) 132 | (declare (type f64vec u v)) 133 | (time-total 1e6 (sb-simd-avx2::f64.4-vdot2 u v))))) 134 | 135 | (defun benchmark-f64.4-fma-vdot-double (&rest v-lengths) 136 | (declare (optimize (speed 3) (safety 0) (debug 0)) 137 | (notinline f64.4-fma-vdot)) 138 | (loop for len in v-lengths 139 | do (format t "Doing dot product of two ~A long double float vectors 1e6 times~%" len) 140 | collect (let ((u (make-array len :element-type 'double-float 141 | :initial-contents 142 | (mapcar (lambda (i) (+ i 0.1d0)) 143 | (alexandria:iota len)))) 144 | (v (make-array len :element-type 'double-float 145 | :initial-contents 146 | (mapcar (lambda (i) (+ i 0.2d0)) 147 | (alexandria:iota len))))) 148 | (declare (type f64vec u v)) 149 | (time-total 1e6 (f64.4-fma-vdot u v))))) 150 | 151 | (defun benchmark-simd-vdot-double (&rest v-lengths) 152 | (declare (optimize (speed 3) (safety 0) (debug 0)) 153 | (notinline f64.4-vdot)) 154 | (loop for len in v-lengths 155 | do (format t "Doing dot product of two ~A long double float vectors 1e6 times~%" len) 156 | collect (let ((u (make-array len :element-type 'double-float 157 | :initial-contents 158 | (mapcar (lambda (i) (+ i 0.1d0)) 159 | (alexandria:iota len)))) 160 | (v (make-array len :element-type 'double-float 161 | :initial-contents 162 | (mapcar (lambda (i) (+ i 0.2d0)) 163 | (alexandria:iota len))))) 164 | (declare (type f64vec u v)) 165 | (time-total 1e6 (simd-vdot u v))))) 166 | 167 | (defun benchmark-fma-vdot-double (&rest v-lengths) 168 | (declare (optimize (speed 3) (safety 0) (debug 0)) 169 | (notinline fma-vdot)) 170 | (loop for len in v-lengths 171 | do (format t "Doing dot product of two ~A long double float vectors 1e6 times~%" len) 172 | collect (let ((u (make-array len :element-type 'double-float 173 | :initial-contents 174 | (mapcar (lambda (i) (+ i 0.1d0)) 175 | (alexandria:iota len)))) 176 | (v (make-array len :element-type 'double-float 177 | :initial-contents 178 | (mapcar (lambda (i) (+ i 0.2d0)) 179 | (alexandria:iota len))))) 180 | (declare (type f64vec u v)) 181 | (time-total 1e6 (fma-vdot u v))))) 182 | 183 | (defun benchmark-avx-single (&rest v-lengths) 184 | (declare (optimize (speed 3) (safety 0) (debug 0)) 185 | (notinline f32.8-vdot)) 186 | (loop for len in v-lengths 187 | do (format t "Doing dot product of two ~A long single float vectors 1e6 times~%" len) 188 | collect (let ((u (make-array len :element-type 'single-float 189 | :initial-contents 190 | (mapcar (lambda (i) (+ i 0.1f0)) 191 | (alexandria:iota len)))) 192 | (v (make-array len :element-type 'single-float 193 | :initial-contents 194 | (mapcar (lambda (i) (+ i 0.2f0)) 195 | (alexandria:iota len))))) 196 | (declare (type f32vec u v)) 197 | (time-total 1e6 (f32.8-vdot u v))))) 198 | 199 | (defun benchmark-sse-double (&rest v-lengths) 200 | (declare (optimize (speed 3) (safety 0) (debug 0)) 201 | (notinline f64.2-vdot)) 202 | (loop for len in v-lengths 203 | do (format t "Doing dot product of two ~A long double float vectors 1e6 times~%" len) 204 | collect (let ((u (make-array len :element-type 'double-float 205 | :initial-contents 206 | (mapcar (lambda (i) (+ i 0.1d0)) 207 | (alexandria:iota len)))) 208 | (v (make-array len :element-type 'double-float 209 | :initial-contents 210 | (mapcar (lambda (i) (+ i 0.2d0)) 211 | (alexandria:iota len))))) 212 | (declare (type f32vec u v)) 213 | (time-total 1e6 (f64.2-vdot u v))))) 214 | 215 | (defun benchmark-sse-single (&rest v-lengths) 216 | (declare (optimize (speed 3) (safety 0) (debug 0)) 217 | (notinline f32.4-vdot)) 218 | (loop for len in v-lengths 219 | do (format t "Doing dot product of two ~A long single float vectors 1e6 times~%" len) 220 | collect (let ((u (make-array len :element-type 'single-float 221 | :initial-contents 222 | (mapcar (lambda (i) (+ i 0.1f0)) 223 | (alexandria:iota len)))) 224 | (v (make-array len :element-type 'single-float 225 | :initial-contents 226 | (mapcar (lambda (i) (+ i 0.2f0)) 227 | (alexandria:iota len))))) 228 | (declare (type f32vec u v)) 229 | (time-total 1e6 (f32.4-vdot u v))))) 230 | 231 | (defun benchmark-vsum-double (&rest v-lengths) 232 | (declare (optimize (speed 3) (safety 0) (debug 0)) 233 | (notinline f64.4-vsum)) 234 | (loop for len in v-lengths 235 | do (format t "Doing sum of a ~A long double float vector 1e6 times~%" len) 236 | collect (let ((u (make-array len :element-type 'double-float 237 | :initial-contents 238 | (mapcar (lambda (i) (+ i 0.1d0)) 239 | (alexandria:iota len))))) 240 | (declare (type f64vec u)) 241 | (time-total 1e6 (f64.4-vsum u))))) 242 | 243 | (defun benchmark-simd-sum-double (&rest v-lengths) 244 | (declare (optimize (speed 3) (safety 0) (debug 0)) 245 | (notinline simd-sum)) 246 | (loop for len in v-lengths 247 | do (format t "Doing sum of a ~A long double float vector 1e6 times~%" len) 248 | collect (let ((u (make-array len :element-type 'double-float 249 | :initial-contents 250 | (mapcar (lambda (i) (+ i 0.1d0)) 251 | (alexandria:iota len))))) 252 | (declare (type f64vec u)) 253 | (time-total 1e6 (simd-sum u))))) 254 | 255 | (defun benchmark-vsum-single (&rest v-lengths) 256 | (declare (optimize (speed 3) (safety 0) (debug 0)) 257 | (notinline f32.8-vsum)) 258 | (loop for len in v-lengths 259 | do (format t "Doing sum of a ~A long single float vector 1e6 x~%" len) 260 | collect (let ((u (make-array len :element-type 'single-float 261 | :initial-contents 262 | (mapcar (lambda (i) (+ i 0.1f0)) 263 | (alexandria:iota len))))) 264 | (declare (type f32vec u)) 265 | (time-total 1e6 (f32.8-vsum u))))) 266 | #+(or) 267 | (benchmark-f64.4-vdot-double 10 100 200 400 800 1200 2400 4800 9600) 268 | #+(or) 269 | (benchmark-f64.4-vdot2-double 10 100 200 400 800 1200 2400 4800 9600) 270 | #+(or) 271 | (benchmark-avx-single 10 100 200 400 800 1200 2400 4800 9600) 272 | #+(or) 273 | (benchmark-sse-double 10 100 200 400 800 1200 2400 4800 9600) 274 | #+(or) 275 | (benchmark-sse-single 10 100 200 400 800 1200 2400 4800 9600) 276 | #+(or) 277 | (benchmark-vsum-double 10 100 200 400 800 1200 2400 4800 9600) 278 | #+(or) 279 | (benchmark-vsum-single 10 100 200 400 800 1200 2400 4800 9600) 280 | #+(or) 281 | (apply 'benchmark-avx-double (loop for i from 1 to 10 collect (* 1000 i))) 282 | -------------------------------------------------------------------------------- /code/instruction-sets/sb-simd.lisp: -------------------------------------------------------------------------------- 1 | (in-package #:sb-simd) 2 | 3 | (define-instruction-set :sb-simd 4 | (:scalars 5 | (any 64 t #:t) 6 | ;; Numbers 7 | (index 64 sb-simd-internals::index #:signed-num (#:signed-reg)) 8 | (u1 1 (unsigned-byte 1) #:unsigned-num (#:unsigned-reg)) 9 | (u2 2 (unsigned-byte 2) #:unsigned-num (#:unsigned-reg)) 10 | (u4 4 (unsigned-byte 4) #:unsigned-num (#:unsigned-reg)) 11 | (u8 8 (unsigned-byte 8) #:unsigned-num (#:unsigned-reg)) 12 | (u16 16 (unsigned-byte 16) #:unsigned-num (#:unsigned-reg)) 13 | (u32 32 (unsigned-byte 32) #:unsigned-num (#:unsigned-reg)) 14 | (u64 64 (unsigned-byte 64) #:unsigned-num (#:unsigned-reg)) 15 | (s8 8 (signed-byte 8) #:signed-num (#:signed-reg)) 16 | (s16 16 (signed-byte 16) #:signed-num (#:signed-reg)) 17 | (s32 32 (signed-byte 32) #:signed-num (#:signed-reg)) 18 | (s64 64 (signed-byte 64) #:signed-num (#:signed-reg)) 19 | (f32 32 single-float #:single-float (#:single-reg)) 20 | (f64 64 double-float #:double-float (#:double-reg)) 21 | ;; Vectors 22 | (charvec 64 (simple-array character (*)) #:simple-character-string) 23 | ( u8vec 64 (simple-array (unsigned-byte 8) (*)) #:simple-array-unsigned-byte-8) 24 | (u16vec 64 (simple-array (unsigned-byte 16) (*)) #:simple-array-unsigned-byte-16) 25 | (u32vec 64 (simple-array (unsigned-byte 32) (*)) #:simple-array-unsigned-byte-32) 26 | (u64vec 64 (simple-array (unsigned-byte 64) (*)) #:simple-array-unsigned-byte-64) 27 | ( s8vec 64 (simple-array (signed-byte 8) (*)) #:simple-array-signed-byte-8) 28 | (s16vec 64 (simple-array (signed-byte 16) (*)) #:simple-array-signed-byte-16) 29 | (s32vec 64 (simple-array (signed-byte 32) (*)) #:simple-array-signed-byte-32) 30 | (s64vec 64 (simple-array (signed-byte 64) (*)) #:simple-array-signed-byte-64) 31 | (f32vec 64 (simple-array single-float (*)) #:simple-array-single-float) 32 | (f64vec 64 (simple-array double-float (*)) #:simple-array-double-float) 33 | ;; Arrays 34 | (char-array 64 (array character)) 35 | ( u8-array 64 (array (unsigned-byte 8))) 36 | (u16-array 64 (array (unsigned-byte 16))) 37 | (u32-array 64 (array (unsigned-byte 32))) 38 | (u64-array 64 (array (unsigned-byte 64))) 39 | ( s8-array 64 (array (signed-byte 8))) 40 | (s16-array 64 (array (signed-byte 16))) 41 | (s32-array 64 (array (signed-byte 32))) 42 | (s64-array 64 (array (signed-byte 64))) 43 | (f32-array 64 (array single-float)) 44 | (f64-array 64 (array double-float))) 45 | (:instructions 46 | ;; ub64 packers and unpackers 47 | (u64-from-f32 nil (u64) (f32 f32) :encoding :fake-vop) 48 | (u64-from-f64 nil (u64) (f64) :encoding :fake-vop) 49 | (u64-from-u8s nil (u64) (u8 u8 u8 u8 u8 u8 u8 u8) :encoding :fake-vop) 50 | (u64-from-u16s nil (u64) (u16 u16 u16 u16) :encoding :fake-vop) 51 | (u64-from-u32s nil (u64) (u32 u32) :encoding :fake-vop) 52 | (u64-from-s8s nil (u64) (s8 s8 s8 s8 s8 s8 s8 s8) :encoding :fake-vop) 53 | (u64-from-s16s nil (u64) (s16 s16 s16 s16) :encoding :fake-vop) 54 | (u64-from-s32s nil (u64) (s32 s32) :encoding :fake-vop) 55 | (u64-from-s64 nil (u64) (s64) :encoding :fake-vop) 56 | ( u8s-from-u64 nil (u8 u8 u8 u8 u8 u8 u8 u8) (u64) :encoding :fake-vop) 57 | (u16s-from-u64 nil (u16 u16 u16 u16) (u64) :encoding :fake-vop) 58 | (u32s-from-u64 nil (u32 u32) (u64) :encoding :fake-vop) 59 | ( s8s-from-u64 nil (s8 s8 s8 s8 s8 s8 s8 s8) (u64) :encoding :fake-vop) 60 | (s16s-from-u64 nil (s16 s16 s16 s16) (u64) :encoding :fake-vop) 61 | (s32s-from-u64 nil (s32 s32) (u64) :encoding :fake-vop) 62 | ( s64-from-u64 nil (s64) (u64) :encoding :fake-vop) 63 | ;; f32 64 | (f32-if nil (f32) (u32 f32 f32) :encoding :fake-vop) 65 | (two-arg-f32-and nil (f32) (f32 f32) :encoding :fake-vop :associative t) 66 | (two-arg-f32-or nil (f32) (f32 f32) :encoding :fake-vop :associative t) 67 | (two-arg-f32-xor nil (f32) (f32 f32) :encoding :fake-vop :associative t) 68 | (two-arg-f32-max nil (f32) (f32 f32) :encoding :fake-vop :associative t) 69 | (two-arg-f32-min nil (f32) (f32 f32) :encoding :fake-vop :associative t) 70 | (two-arg-f32+ nil (f32) (f32 f32) :encoding :fake-vop :associative t) 71 | (two-arg-f32- nil (f32) (f32 f32) :encoding :fake-vop) 72 | (two-arg-f32* nil (f32) (f32 f32) :encoding :fake-vop :associative t) 73 | (two-arg-f32/ nil (f32) (f32 f32) :encoding :fake-vop) 74 | (two-arg-f32= nil (u32) (f32 f32) :encoding :fake-vop :associative t) 75 | (two-arg-f32/= nil (u32) (f32 f32) :encoding :fake-vop :associative t) 76 | (two-arg-f32< nil (u32) (f32 f32) :encoding :fake-vop) 77 | (two-arg-f32<= nil (u32) (f32 f32) :encoding :fake-vop) 78 | (two-arg-f32> nil (u32) (f32 f32) :encoding :fake-vop) 79 | (two-arg-f32>= nil (u32) (f32 f32) :encoding :fake-vop) 80 | (f32-andc1 nil (f32) (f32 f32) :encoding :fake-vop) 81 | (f32-not nil (f32) (f32) :encoding :fake-vop) 82 | (f32-reciprocal nil (f32) (f32) :encoding :fake-vop) 83 | (f32-rsqrt nil (f32) (f32) :encoding :fake-vop) 84 | (f32-sqrt nil (f32) (f32) :encoding :fake-vop) 85 | ;; f64 86 | (f64-if nil (f64) (u64 f64 f64) :encoding :fake-vop) 87 | (two-arg-f64-and nil (f64) (f64 f64) :encoding :fake-vop :associative t) 88 | (two-arg-f64-or nil (f64) (f64 f64) :encoding :fake-vop :associative t) 89 | (two-arg-f64-xor nil (f64) (f64 f64) :encoding :fake-vop :associative t) 90 | (two-arg-f64-max nil (f64) (f64 f64) :encoding :fake-vop :associative t) 91 | (two-arg-f64-min nil (f64) (f64 f64) :encoding :fake-vop :associative t) 92 | (two-arg-f64+ nil (f64) (f64 f64) :encoding :fake-vop :associative t) 93 | (two-arg-f64- nil (f64) (f64 f64) :encoding :fake-vop) 94 | (two-arg-f64* nil (f64) (f64 f64) :encoding :fake-vop :associative t) 95 | (two-arg-f64/ nil (f64) (f64 f64) :encoding :fake-vop) 96 | (two-arg-f64= nil (u64) (f64 f64) :encoding :fake-vop :associative t) 97 | (two-arg-f64/= nil (u64) (f64 f64) :encoding :fake-vop :associative t) 98 | (two-arg-f64< nil (u64) (f64 f64) :encoding :fake-vop) 99 | (two-arg-f64<= nil (u64) (f64 f64) :encoding :fake-vop) 100 | (two-arg-f64> nil (u64) (f64 f64) :encoding :fake-vop) 101 | (two-arg-f64>= nil (u64) (f64 f64) :encoding :fake-vop) 102 | (f64-andc1 nil (f64) (f64 f64) :encoding :fake-vop) 103 | (f64-not nil (f64) (f64) :encoding :fake-vop) 104 | (f64-reciprocal nil (f64) (f64) :encoding :fake-vop) 105 | (f64-rsqrt nil (f64) (f64) :encoding :fake-vop) 106 | (f64-sqrt nil (f64) (f64) :encoding :fake-vop) 107 | ;; u8 108 | (u8-if nil (u8) (u8 u8 u8) :encoding :fake-vop) 109 | (two-arg-u8-and nil (u8) (u8 u8) :encoding :fake-vop :associative t) 110 | (two-arg-u8-or nil (u8) (u8 u8) :encoding :fake-vop :associative t) 111 | (two-arg-u8-xor nil (u8) (u8 u8) :encoding :fake-vop :associative t) 112 | (two-arg-u8-max nil (u8) (u8 u8) :encoding :fake-vop :associative t) 113 | (two-arg-u8-min nil (u8) (u8 u8) :encoding :fake-vop :associative t) 114 | (two-arg-u8+ nil (u8) (u8 u8) :encoding :fake-vop :associative t) 115 | (two-arg-u8- nil (u8) (u8 u8) :encoding :fake-vop) 116 | (two-arg-u8= nil (u8) (u8 u8) :encoding :fake-vop :associative t) 117 | (two-arg-u8/= nil (u8) (u8 u8) :encoding :fake-vop :associative t) 118 | (two-arg-u8< nil (u8) (u8 u8) :encoding :fake-vop) 119 | (two-arg-u8<= nil (u8) (u8 u8) :encoding :fake-vop) 120 | (two-arg-u8> nil (u8) (u8 u8) :encoding :fake-vop) 121 | (two-arg-u8>= nil (u8) (u8 u8) :encoding :fake-vop) 122 | (u8-andc1 nil (u8) (u8 u8) :encoding :fake-vop) 123 | (u8-not nil (u8) (u8) :encoding :fake-vop) 124 | ;; u16 125 | (u16-if nil (u16) (u16 u16 u16) :encoding :fake-vop) 126 | (two-arg-u16-and nil (u16) (u16 u16) :encoding :fake-vop :associative t) 127 | (two-arg-u16-or nil (u16) (u16 u16) :encoding :fake-vop :associative t) 128 | (two-arg-u16-xor nil (u16) (u16 u16) :encoding :fake-vop :associative t) 129 | (two-arg-u16-max nil (u16) (u16 u16) :encoding :fake-vop :associative t) 130 | (two-arg-u16-min nil (u16) (u16 u16) :encoding :fake-vop :associative t) 131 | (two-arg-u16+ nil (u16) (u16 u16) :encoding :fake-vop :associative t) 132 | (two-arg-u16- nil (u16) (u16 u16) :encoding :fake-vop) 133 | (two-arg-u16= nil (u16) (u16 u16) :encoding :fake-vop :associative t) 134 | (two-arg-u16/= nil (u16) (u16 u16) :encoding :fake-vop :associative t) 135 | (two-arg-u16< nil (u16) (u16 u16) :encoding :fake-vop) 136 | (two-arg-u16<= nil (u16) (u16 u16) :encoding :fake-vop) 137 | (two-arg-u16> nil (u16) (u16 u16) :encoding :fake-vop) 138 | (two-arg-u16>= nil (u16) (u16 u16) :encoding :fake-vop) 139 | (u16-andc1 nil (u16) (u16 u16) :encoding :fake-vop) 140 | (u16-not nil (u16) (u16) :encoding :fake-vop) 141 | ;; u32 142 | (u32-if nil (u32) (u32 u32 u32) :encoding :fake-vop) 143 | (two-arg-u32-and nil (u32) (u32 u32) :encoding :fake-vop :associative t) 144 | (two-arg-u32-or nil (u32) (u32 u32) :encoding :fake-vop :associative t) 145 | (two-arg-u32-xor nil (u32) (u32 u32) :encoding :fake-vop :associative t) 146 | (two-arg-u32-max nil (u32) (u32 u32) :encoding :fake-vop :associative t) 147 | (two-arg-u32-min nil (u32) (u32 u32) :encoding :fake-vop :associative t) 148 | (two-arg-u32+ nil (u32) (u32 u32) :encoding :fake-vop :associative t) 149 | (two-arg-u32- nil (u32) (u32 u32) :encoding :fake-vop) 150 | (two-arg-u32= nil (u32) (u32 u32) :encoding :fake-vop :associative t) 151 | (two-arg-u32/= nil (u32) (u32 u32) :encoding :fake-vop :associative t) 152 | (two-arg-u32< nil (u32) (u32 u32) :encoding :fake-vop) 153 | (two-arg-u32<= nil (u32) (u32 u32) :encoding :fake-vop) 154 | (two-arg-u32> nil (u32) (u32 u32) :encoding :fake-vop) 155 | (two-arg-u32>= nil (u32) (u32 u32) :encoding :fake-vop) 156 | (u32-andc1 nil (u32) (u32 u32) :encoding :fake-vop) 157 | (u32-not nil (u32) (u32) :encoding :fake-vop) 158 | ;; u64 159 | (u64-if nil (u64) (u64 u64 u64) :encoding :fake-vop) 160 | (two-arg-u64-and nil (u64) (u64 u64) :encoding :fake-vop :associative t) 161 | (two-arg-u64-or nil (u64) (u64 u64) :encoding :fake-vop :associative t) 162 | (two-arg-u64-xor nil (u64) (u64 u64) :encoding :fake-vop :associative t) 163 | (two-arg-u64-max nil (u64) (u64 u64) :encoding :fake-vop :associative t) 164 | (two-arg-u64-min nil (u64) (u64 u64) :encoding :fake-vop :associative t) 165 | (two-arg-u64+ nil (u64) (u64 u64) :encoding :fake-vop :associative t) 166 | (two-arg-u64- nil (u64) (u64 u64) :encoding :fake-vop) 167 | (two-arg-u64= nil (u64) (u64 u64) :encoding :fake-vop :associative t) 168 | (two-arg-u64/= nil (u64) (u64 u64) :encoding :fake-vop :associative t) 169 | (two-arg-u64< nil (u64) (u64 u64) :encoding :fake-vop) 170 | (two-arg-u64<= nil (u64) (u64 u64) :encoding :fake-vop) 171 | (two-arg-u64> nil (u64) (u64 u64) :encoding :fake-vop) 172 | (two-arg-u64>= nil (u64) (u64 u64) :encoding :fake-vop) 173 | (u64-andc1 nil (u64) (u64 u64) :encoding :fake-vop) 174 | (u64-not nil (u64) (u64) :encoding :fake-vop) 175 | ;; s8 176 | (s8-if nil (s8) (u8 s8 s8) :encoding :fake-vop) 177 | (two-arg-s8-and nil (s8) (s8 s8) :encoding :fake-vop :associative t) 178 | (two-arg-s8-or nil (s8) (s8 s8) :encoding :fake-vop :associative t) 179 | (two-arg-s8-xor nil (s8) (s8 s8) :encoding :fake-vop :associative t) 180 | (two-arg-s8-max nil (s8) (s8 s8) :encoding :fake-vop :associative t) 181 | (two-arg-s8-min nil (s8) (s8 s8) :encoding :fake-vop :associative t) 182 | (two-arg-s8+ nil (s8) (s8 s8) :encoding :fake-vop :associative t) 183 | (two-arg-s8- nil (s8) (s8 s8) :encoding :fake-vop) 184 | (two-arg-s8= nil (u8) (s8 s8) :encoding :fake-vop :associative t) 185 | (two-arg-s8/= nil (u8) (s8 s8) :encoding :fake-vop :associative t) 186 | (two-arg-s8< nil (u8) (s8 s8) :encoding :fake-vop) 187 | (two-arg-s8<= nil (u8) (s8 s8) :encoding :fake-vop) 188 | (two-arg-s8> nil (u8) (s8 s8) :encoding :fake-vop) 189 | (two-arg-s8>= nil (u8) (s8 s8) :encoding :fake-vop) 190 | (s8-andc1 nil (s8) (s8 s8) :encoding :fake-vop) 191 | (s8-not nil (s8) (s8) :encoding :fake-vop) 192 | ;; s16 193 | (s16-if nil (s16) (u16 s16 s16) :encoding :fake-vop) 194 | (two-arg-s16-and nil (s16) (s16 s16) :encoding :fake-vop :associative t) 195 | (two-arg-s16-or nil (s16) (s16 s16) :encoding :fake-vop :associative t) 196 | (two-arg-s16-xor nil (s16) (s16 s16) :encoding :fake-vop :associative t) 197 | (two-arg-s16-max nil (s16) (s16 s16) :encoding :fake-vop :associative t) 198 | (two-arg-s16-min nil (s16) (s16 s16) :encoding :fake-vop :associative t) 199 | (two-arg-s16+ nil (s16) (s16 s16) :encoding :fake-vop :associative t) 200 | (two-arg-s16- nil (s16) (s16 s16) :encoding :fake-vop) 201 | (two-arg-s16= nil (u16) (s16 s16) :encoding :fake-vop :associative t) 202 | (two-arg-s16/= nil (u16) (s16 s16) :encoding :fake-vop :associative t) 203 | (two-arg-s16< nil (u16) (s16 s16) :encoding :fake-vop) 204 | (two-arg-s16<= nil (u16) (s16 s16) :encoding :fake-vop) 205 | (two-arg-s16> nil (u16) (s16 s16) :encoding :fake-vop) 206 | (two-arg-s16>= nil (u16) (s16 s16) :encoding :fake-vop) 207 | (s16-andc1 nil (s16) (s16 s16) :encoding :fake-vop) 208 | (s16-not nil (s16) (s16) :encoding :fake-vop) 209 | ;; s32 210 | (s32-if nil (s32) (u32 s32 s32) :encoding :fake-vop) 211 | (two-arg-s32-and nil (s32) (s32 s32) :encoding :fake-vop :associative t) 212 | (two-arg-s32-or nil (s32) (s32 s32) :encoding :fake-vop :associative t) 213 | (two-arg-s32-xor nil (s32) (s32 s32) :encoding :fake-vop :associative t) 214 | (two-arg-s32-max nil (s32) (s32 s32) :encoding :fake-vop :associative t) 215 | (two-arg-s32-min nil (s32) (s32 s32) :encoding :fake-vop :associative t) 216 | (two-arg-s32+ nil (s32) (s32 s32) :encoding :fake-vop :associative t) 217 | (two-arg-s32- nil (s32) (s32 s32) :encoding :fake-vop) 218 | (two-arg-s32= nil (u32) (s32 s32) :encoding :fake-vop :associative t) 219 | (two-arg-s32/= nil (u32) (s32 s32) :encoding :fake-vop :associative t) 220 | (two-arg-s32< nil (u32) (s32 s32) :encoding :fake-vop) 221 | (two-arg-s32<= nil (u32) (s32 s32) :encoding :fake-vop) 222 | (two-arg-s32> nil (u32) (s32 s32) :encoding :fake-vop) 223 | (two-arg-s32>= nil (u32) (s32 s32) :encoding :fake-vop) 224 | (s32-andc1 nil (s32) (s32 s32) :encoding :fake-vop) 225 | (s32-not nil (s32) (s32) :encoding :fake-vop) 226 | ;; s64 227 | (s64-if nil (s64) (u64 s64 s64) :encoding :fake-vop) 228 | (two-arg-s64-and nil (s64) (s64 s64) :encoding :fake-vop :associative t) 229 | (two-arg-s64-or nil (s64) (s64 s64) :encoding :fake-vop :associative t) 230 | (two-arg-s64-xor nil (s64) (s64 s64) :encoding :fake-vop :associative t) 231 | (two-arg-s64-max nil (s64) (s64 s64) :encoding :fake-vop :associative t) 232 | (two-arg-s64-min nil (s64) (s64 s64) :encoding :fake-vop :associative t) 233 | (two-arg-s64+ nil (s64) (s64 s64) :encoding :fake-vop :associative t) 234 | (two-arg-s64- nil (s64) (s64 s64) :encoding :fake-vop) 235 | (two-arg-s64= nil (u64) (s64 s64) :encoding :fake-vop :associative t) 236 | (two-arg-s64/= nil (u64) (s64 s64) :encoding :fake-vop :associative t) 237 | (two-arg-s64< nil (u64) (s64 s64) :encoding :fake-vop) 238 | (two-arg-s64<= nil (u64) (s64 s64) :encoding :fake-vop) 239 | (two-arg-s64> nil (u64) (s64 s64) :encoding :fake-vop) 240 | (two-arg-s64>= nil (u64) (s64 s64) :encoding :fake-vop) 241 | (s64-andc1 nil (s64) (s64 s64) :encoding :fake-vop) 242 | (s64-not nil (s64) (s64) :encoding :fake-vop)) 243 | (:reffers 244 | (f32 f32-array f32-aref f32-row-major-aref) 245 | (f64 f64-array f64-aref f64-row-major-aref) 246 | ( u8 u8-array u8-aref u8-row-major-aref) 247 | (u16 u16-array u16-aref u16-row-major-aref) 248 | (u32 u32-array u32-aref u32-row-major-aref) 249 | (u64 u64-array u64-aref u64-row-major-aref) 250 | ( s8 s8-array s8-aref s8-row-major-aref) 251 | (s16 s16-array s16-aref s16-row-major-aref) 252 | (s32 s32-array s32-aref s32-row-major-aref) 253 | (s64 s64-array s64-aref s64-row-major-aref)) 254 | (:associatives 255 | (f32-and two-arg-f32-and +f32-true+) 256 | (f32-or two-arg-f32-or +f32-false+) 257 | (f32-xor two-arg-f32-xor +f32-false+) 258 | (f32-max two-arg-f32-max nil) 259 | (f32-min two-arg-f32-min nil) 260 | (f32+ two-arg-f32+ 0f0) 261 | (f32* two-arg-f32* 1f0) 262 | (f64-and two-arg-f64-and +f64-true+) 263 | (f64-or two-arg-f64-or +f64-false+) 264 | (f64-xor two-arg-f64-xor +f64-false+) 265 | (f64-max two-arg-f64-max nil) 266 | (f64-min two-arg-f64-min nil) 267 | (f64+ two-arg-f64+ 0d0) 268 | (f64* two-arg-f64* 1d0) 269 | ( u8-and two-arg-u8-and +u8-true+) 270 | ( u8-or two-arg-u8-or +u8-false+) 271 | ( u8-xor two-arg-u8-xor +u8-false+) 272 | ( u8-max two-arg-u8-max nil) 273 | ( u8-min two-arg-u8-min nil) 274 | ( u8+ two-arg-u8+ 0) 275 | (u16-and two-arg-u16-and +u16-true+) 276 | (u16-or two-arg-u16-or +u16-false+) 277 | (u16-xor two-arg-u16-xor +u16-false+) 278 | (u16-max two-arg-u16-max nil) 279 | (u16-min two-arg-u16-min nil) 280 | (u16+ two-arg-u16+ 0) 281 | (u32-and two-arg-u32-and +u32-true+) 282 | (u32-or two-arg-u32-or +u32-false+) 283 | (u32-xor two-arg-u32-xor +u32-false+) 284 | (u32-max two-arg-u32-max nil) 285 | (u32-min two-arg-u32-min nil) 286 | (u32+ two-arg-u32+ 0) 287 | (u64-and two-arg-u64-and +u64-true+) 288 | (u64-or two-arg-u64-or +u64-false+) 289 | (u64-xor two-arg-u64-xor +u64-false+) 290 | (u64-max two-arg-u64-max nil) 291 | (u64-min two-arg-u64-min nil) 292 | (u64+ two-arg-u64+ 0) 293 | ( s8-and two-arg-s8-and +s8-true+) 294 | ( s8-or two-arg-s8-or +s8-false+) 295 | ( s8-xor two-arg-s8-xor +s8-false+) 296 | ( s8-max two-arg-s8-max nil) 297 | ( s8-min two-arg-s8-min nil) 298 | ( s8+ two-arg-s8+ 0) 299 | (s16-and two-arg-s16-and +s16-true+) 300 | (s16-or two-arg-s16-or +s16-false+) 301 | (s16-xor two-arg-s16-xor +s16-false+) 302 | (s16-max two-arg-s16-max nil) 303 | (s16-min two-arg-s16-min nil) 304 | (s16+ two-arg-s16+ 0) 305 | (s32-and two-arg-s32-and +s32-true+) 306 | (s32-or two-arg-s32-or +s32-false+) 307 | (s32-xor two-arg-s32-xor +s32-false+) 308 | (s32-max two-arg-s32-max nil) 309 | (s32-min two-arg-s32-min nil) 310 | (s32+ two-arg-s32+ 0) 311 | (s64-and two-arg-s64-and +s64-true+) 312 | (s64-or two-arg-s64-or +s64-false+) 313 | (s64-xor two-arg-s64-xor +s64-false+) 314 | (s64-max two-arg-s64-max nil) 315 | (s64-min two-arg-s64-min nil) 316 | (s64+ two-arg-s64+ 0)) 317 | (:comparisons 318 | (f32= two-arg-f32= u32-and +u32-true+) 319 | (f32< two-arg-f32< u32-and +u32-true+) 320 | (f32<= two-arg-f32<= u32-and +u32-true+) 321 | (f32> two-arg-f32> u32-and +u32-true+) 322 | (f32>= two-arg-f32>= u32-and +u32-true+) 323 | (f64= two-arg-f64= u64-and +u64-true+) 324 | (f64< two-arg-f64< u64-and +u64-true+) 325 | (f64<= two-arg-f64<= u64-and +u64-true+) 326 | (f64> two-arg-f64> u64-and +u64-true+) 327 | (f64>= two-arg-f64>= u64-and +u64-true+) 328 | (u8= two-arg-u8= u8-and +u8-true+) 329 | (u8< two-arg-u8< u8-and +u8-true+) 330 | (u8<= two-arg-u8<= u8-and +u8-true+) 331 | (u8> two-arg-u8> u8-and +u8-true+) 332 | (u8>= two-arg-u8>= u8-and +u8-true+) 333 | (u16= two-arg-u16= u16-and +u16-true+) 334 | (u16< two-arg-u16< u16-and +u16-true+) 335 | (u16<= two-arg-u16<= u16-and +u16-true+) 336 | (u16> two-arg-u16> u16-and +u16-true+) 337 | (u16>= two-arg-u16>= u16-and +u16-true+) 338 | (u32= two-arg-u32= u32-and +u32-true+) 339 | (u32< two-arg-u32< u32-and +u32-true+) 340 | (u32<= two-arg-u32<= u32-and +u32-true+) 341 | (u32> two-arg-u32> u32-and +u32-true+) 342 | (u32>= two-arg-u32>= u32-and +u32-true+) 343 | (u64= two-arg-u64= u64-and +u64-true+) 344 | (u64< two-arg-u64< u64-and +u64-true+) 345 | (u64<= two-arg-u64<= u64-and +u64-true+) 346 | (u64> two-arg-u64> u64-and +u64-true+) 347 | (u64>= two-arg-u64>= u64-and +u64-true+) 348 | (s8= two-arg-s8= u8-and +u8-true+) 349 | (s8< two-arg-s8< u8-and +u8-true+) 350 | (s8<= two-arg-s8<= u8-and +u8-true+) 351 | (s8> two-arg-s8> u8-and +u8-true+) 352 | (s8>= two-arg-s8>= u8-and +u8-true+) 353 | (s16= two-arg-s16= u16-and +u16-true+) 354 | (s16< two-arg-s16< u16-and +u16-true+) 355 | (s16<= two-arg-s16<= u16-and +u16-true+) 356 | (s16> two-arg-s16> u16-and +u16-true+) 357 | (s16>= two-arg-s16>= u16-and +u16-true+) 358 | (s32= two-arg-s32= u32-and +u32-true+) 359 | (s32< two-arg-s32< u32-and +u32-true+) 360 | (s32<= two-arg-s32<= u32-and +u32-true+) 361 | (s32> two-arg-s32> u32-and +u32-true+) 362 | (s32>= two-arg-s32>= u32-and +u32-true+) 363 | (s64= two-arg-s64= u64-and +u64-true+) 364 | (s64< two-arg-s64< u64-and +u64-true+) 365 | (s64<= two-arg-s64<= u64-and +u64-true+) 366 | (s64> two-arg-s64> u64-and +u64-true+) 367 | (s64>= two-arg-s64>= u64-and +u64-true+)) 368 | (:reducers 369 | (f32- two-arg-f32- 0f0) 370 | (f32/ two-arg-f32/ 1f0) 371 | (f64- two-arg-f64- 0d0) 372 | (f64/ two-arg-f64/ 1d0) 373 | (u8- two-arg-u8- 0) 374 | (u16- two-arg-u16- 0) 375 | (u32- two-arg-u32- 0) 376 | (u64- two-arg-u64- 0) 377 | (s8- two-arg-s8- 0) 378 | (s16- two-arg-s16- 0) 379 | (s32- two-arg-s32- 0) 380 | (s64- two-arg-s64- 0)) 381 | (:unequals 382 | (f32/= two-arg-f32/= u32-and +u32-true+) 383 | (f64/= two-arg-f64/= u64-and +u64-true+) 384 | (u8/= two-arg-u8/= u8-and +u8-true+) 385 | (u16/= two-arg-u16/= u16-and +u16-true+) 386 | (u32/= two-arg-u32/= u32-and +u32-true+) 387 | (u64/= two-arg-u64/= u64-and +u64-true+) 388 | (s8/= two-arg-s8/= u8-and +u8-true+) 389 | (s16/= two-arg-s16/= u16-and +u16-true+) 390 | (s32/= two-arg-s32/= u32-and +u32-true+) 391 | (s64/= two-arg-s64/= u64-and +u64-true+))) 392 | --------------------------------------------------------------------------------