├── code ├── packages.lisp ├── numpy-file-format.asd ├── tests.lisp ├── python-parser.lisp ├── dtypes.lisp ├── store-array.lisp └── load-array.lisp ├── README.org └── LICENSE /code/packages.lisp: -------------------------------------------------------------------------------- 1 | (in-package #:common-lisp-user) 2 | 3 | (defpackage #:numpy-file-format 4 | (:use #:common-lisp) 5 | (:export 6 | #:load-array 7 | #:store-array)) 8 | -------------------------------------------------------------------------------- /README.org: -------------------------------------------------------------------------------- 1 | #+TITLE: Numpy File Format 2 | 3 | This library allows Lisp programs to read and write arrays in the Numpy 4 | file format. 5 | 6 | The API is deliberately simple and consists of two functions - =load-array= 7 | and =store-array=. 8 | -------------------------------------------------------------------------------- /code/numpy-file-format.asd: -------------------------------------------------------------------------------- 1 | (defsystem :numpy-file-format 2 | :description "Read and write Numpy .npy and .npz files." 3 | :author "Marco Heisig " 4 | :license "MIT" 5 | 6 | :depends-on 7 | ("ieee-floats" 8 | "trivial-features") 9 | 10 | :components 11 | ((:file "packages") 12 | (:file "dtypes") 13 | (:file "python-parser") 14 | (:file "load-array") 15 | (:file "store-array")) 16 | 17 | :in-order-to ((test-op (test-op :numpy-file-format/tests)))) 18 | 19 | (defsystem :numpy-file-format/tests 20 | :depends-on 21 | ("numpy-file-format" 22 | "uiop") 23 | 24 | :components 25 | ((:file "tests")) 26 | :perform (test-op (o c) (uiop:symbol-call :numpy-file-format/tests '#:run))) 27 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | The MIT License (MIT) 2 | 3 | Copyright (c) 2019 Marco Heisig 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in 13 | all copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 21 | THE SOFTWARE. 22 | -------------------------------------------------------------------------------- /code/tests.lisp: -------------------------------------------------------------------------------- 1 | (defpackage :numpy-file-format/tests 2 | (:use :cl :numpy-file-format) 3 | (:export :run)) 4 | 5 | (in-package :numpy-file-format/tests) 6 | 7 | (defparameter *array-element-types* 8 | `(single-float 9 | double-float 10 | (complex single-float) 11 | (complex double-float) 12 | ,@(loop for bytes in '(8 16 32 64) 13 | collect `(unsigned-byte ,bytes) 14 | collect `(signed-byte ,bytes)) 15 | bit)) 16 | 17 | (defparameter *array-dimensions* 18 | '((2 3 4 5) 19 | (3 3 3) 20 | (3 3) 21 | (3) 22 | 10)) 23 | 24 | (defun type= (type1 type2) 25 | (or (equal type1 type2) 26 | (values 27 | (and (subtypep type1 type2) 28 | (subtypep type2 type1))))) 29 | 30 | (defun array= (a b) 31 | (and (type= (array-element-type a) 32 | (array-element-type b)) 33 | (equal (array-dimensions a) 34 | (array-dimensions b)) 35 | (loop for index below (array-total-size a) 36 | always (= (row-major-aref a index) 37 | (row-major-aref b index))))) 38 | 39 | (defun make-random-array (dimensions element-type) 40 | (let ((array (make-array dimensions :element-type element-type))) 41 | (loop for index below (array-total-size array) do 42 | (setf (row-major-aref array index) 43 | (coerce (random 2) element-type))) 44 | array)) 45 | 46 | (defun run () 47 | (loop for element-type in *array-element-types* do 48 | (loop for array-dimensions in *array-dimensions* do 49 | (let ((array (make-random-array array-dimensions element-type))) 50 | (uiop:with-temporary-file (:pathname file) 51 | (store-array array file) 52 | (assert (array= array (load-array file)))))))) 53 | -------------------------------------------------------------------------------- /code/python-parser.lisp: -------------------------------------------------------------------------------- 1 | (in-package #:numpy-file-format) 2 | 3 | ;;; This parser is not very sophisticated, but it gets the job done. 4 | 5 | (defun read-python-object (stream &optional (skip #\,) (stop nil)) 6 | (loop for c = (read-char stream) do 7 | (case c 8 | ((#\space #\tab) (values)) 9 | ((#\' #\") (return (read-python-string c stream))) 10 | (#\( (return (read-python-tuple stream))) 11 | (#\[ (return (read-python-list stream))) 12 | (#\{ (return (read-python-dict stream))) 13 | ((#\T #\F) 14 | (unread-char c stream) 15 | (return (read-python-boolean stream))) 16 | (otherwise 17 | (cond ((eql c skip) 18 | (return (read-python-object stream nil stop))) 19 | ((eql c stop) 20 | (return stop)) 21 | ((digit-char-p c) 22 | (unread-char c stream) 23 | (return (read-python-integer stream))) 24 | (t 25 | (error "Invalid character: ~S" c))))))) 26 | 27 | (defun read-python-string (delimiter stream) 28 | (coerce 29 | (loop for c = (read-char stream) 30 | while (char/= c delimiter) 31 | collect c) 32 | 'string)) 33 | 34 | (defun read-python-integer (stream) 35 | (let ((result 0)) 36 | (loop for c = (read-char stream) do 37 | (let ((weight (digit-char-p c))) 38 | (if (null weight) 39 | (progn 40 | (unread-char c stream) 41 | (loop-finish)) 42 | (setf result (+ (* result 10) weight))))) 43 | result)) 44 | 45 | (defun read-python-boolean (stream) 46 | (flet ((skip (string) 47 | (loop for c across string do 48 | (assert (char= (read-char stream) c))))) 49 | (ecase (read-char stream) 50 | (#\T (skip "rue") t) 51 | (#\F (skip "alse") nil)))) 52 | 53 | (defun read-python-tuple (stream) 54 | (loop for object = (read-python-object stream nil #\)) 55 | then (read-python-object stream #\, #\)) 56 | until (eql object #\)) 57 | collect object)) 58 | 59 | (defun read-python-list (stream) 60 | (coerce 61 | (loop for object = (read-python-object stream nil #\]) 62 | then (read-python-object stream #\, #\]) 63 | until (eql object #\]) 64 | collect object) 65 | 'vector)) 66 | 67 | (defun read-python-dict (stream) 68 | (let ((dict (make-hash-table :test #'equal))) 69 | (loop 70 | (let ((key (read-python-object stream #\, #\}))) 71 | (when (eql key #\}) 72 | (return dict)) 73 | (setf (gethash key dict) 74 | (read-python-object stream #\:)))))) 75 | 76 | (defun read-python-object-from-string (string) 77 | (with-input-from-string (stream string) 78 | (read-python-object stream))) 79 | -------------------------------------------------------------------------------- /code/dtypes.lisp: -------------------------------------------------------------------------------- 1 | (in-package #:numpy-file-format) 2 | 3 | (defconstant +endianness+ 4 | #+little-endian :little-endian 5 | #+big-endian :big-endian) 6 | 7 | (defgeneric dtype-name (dtype)) 8 | 9 | (defgeneric dtype-endianness (dtype)) 10 | 11 | (defgeneric dtype-type (dtype)) 12 | 13 | (defgeneric dtype-code (dtype)) 14 | 15 | (defgeneric dtype-size (dtype)) 16 | 17 | (defparameter *dtypes* '()) 18 | 19 | (defclass dtype () 20 | ((%type :initarg :type :reader dtype-type) 21 | (%code :initarg :code :reader dtype-code) 22 | (%size :initarg :size :reader dtype-size) 23 | (%endianness :initarg :endianness :reader dtype-endianness))) 24 | 25 | (defmethod print-object ((dtype dtype) stream) 26 | (print-unreadable-object (dtype stream :type t) 27 | (prin1 (dtype-code dtype) stream))) 28 | 29 | (defun dtype-from-code (code) 30 | (or (find code *dtypes* :key #'dtype-code :test #'string=) 31 | (error "Cannot find dtype for the code ~S." code))) 32 | 33 | (defun dtype-from-type (type) 34 | (or (find-if 35 | (lambda (dtype) 36 | (and (eq (dtype-endianness dtype) +endianness+) 37 | (subtypep type (dtype-type dtype)))) 38 | *dtypes*) 39 | (error "Cannot find dtype for type ~S." type))) 40 | 41 | (defun define-dtype (code type size &optional (endianness +endianness+)) 42 | (let ((dtype (make-instance 'dtype 43 | :code code 44 | :type type 45 | :size size 46 | :endianness endianness))) 47 | (pushnew dtype *dtypes* :key #'dtype-code :test #'string=) 48 | dtype)) 49 | 50 | (defun define-multibyte-dtype (code type size) 51 | (define-dtype (concatenate 'string "<" code) type size :little-endian) 52 | (define-dtype (concatenate 'string ">" code) type size :big-endian) 53 | (define-dtype code type size +endianness+) 54 | (define-dtype (concatenate 'string "|" code) type size) 55 | (define-dtype (concatenate 'string "=" code) type size +endianness+)) 56 | 57 | (define-dtype "O" 't 64) 58 | (define-dtype "?" 'bit 8) 59 | (define-dtype "b" '(unsigned-byte 8) 8) 60 | (define-multibyte-dtype "i1" '(signed-byte 8) 8) 61 | (define-multibyte-dtype "i2" '(signed-byte 16) 16) 62 | (define-multibyte-dtype "i4" '(signed-byte 32) 32) 63 | (define-multibyte-dtype "i8" '(signed-byte 64) 64) 64 | (define-multibyte-dtype "u1" '(unsigned-byte 8) 8) 65 | (define-multibyte-dtype "u2" '(unsigned-byte 16) 16) 66 | (define-multibyte-dtype "u4" '(unsigned-byte 32) 32) 67 | (define-multibyte-dtype "u8" '(unsigned-byte 64) 64) 68 | (define-multibyte-dtype "f4" 'single-float 32) 69 | (define-multibyte-dtype "f8" 'double-float 64) 70 | (define-multibyte-dtype "c8" '(complex single-float) 64) 71 | (define-multibyte-dtype "c16" '(complex double-float) 128) 72 | 73 | ;; Finally, let's sort *dtypes* such that type queries always find the most 74 | ;; specific entry first. 75 | (setf *dtypes* (stable-sort *dtypes* #'subtypep :key #'dtype-type)) 76 | -------------------------------------------------------------------------------- /code/store-array.lisp: -------------------------------------------------------------------------------- 1 | (in-package #:numpy-file-format) 2 | 3 | (defun array-metadata-string (array) 4 | (with-output-to-string (stream nil :element-type 'base-char) 5 | (format stream "{'descr': '~A', ~ 6 | 'fortran_order': ~:[False~;True~], ~ 7 | 'shape': (~{~D,~^ ~}), }" 8 | (dtype-code (dtype-from-type (array-element-type array))) 9 | nil 10 | (array-dimensions array)))) 11 | 12 | (defun store-array (array filename) 13 | ;; We open the file twice - once with a stream element type of 14 | ;; (unsigned-byte 8) to write the header, and once with a stream element 15 | ;; type suitable for writing the array content. 16 | (let* ((dtype (dtype-from-type (array-element-type array))) 17 | (metadata (array-metadata-string array)) 18 | (metadata-length (- (* 64 (ceiling (+ 10 (length metadata)) 64)) 10))) 19 | (with-open-file (stream filename :direction :output 20 | :element-type '(unsigned-byte 8) 21 | :if-exists :supersede) 22 | (write-sequence #(#x93 78 85 77 80 89) stream) ; The magic string. 23 | (write-byte 1 stream) ; Major version. 24 | (write-byte 0 stream) ; Minor version. 25 | ;; Write the length of the metadata string (2 bytes, little endian). 26 | (write-byte (ldb (byte 8 0) metadata-length) stream) 27 | (write-byte (ldb (byte 8 8) metadata-length) stream) 28 | ;; Write the metadata string. 29 | (loop for char across metadata do 30 | (write-byte (char-code char) stream)) 31 | ;; Pad the header with spaces for 64 byte alignment. 32 | (loop repeat (- metadata-length (length metadata) 1) do 33 | (write-byte (char-code #\space) stream)) 34 | (write-byte (char-code #\newline) stream)) ; Finish with a newline. 35 | ;; Now, open the file a second time to write the array contents. 36 | (let* ((element-type (array-element-type array)) 37 | (chunk-size (if (subtypep element-type 'complex) 38 | (/ (dtype-size dtype) 2) 39 | (dtype-size dtype))) 40 | (stream-element-type 41 | (if (or (eq element-type 'double-float) 42 | (eq element-type 'single-float) 43 | (subtypep element-type '(unsigned-byte *))) 44 | `(unsigned-byte ,chunk-size) 45 | `(signed-byte ,chunk-size))) 46 | (total-size (array-total-size array))) 47 | (with-open-file (stream filename :direction :output 48 | :element-type stream-element-type 49 | :if-exists :append) 50 | (etypecase array 51 | ((simple-array single-float) 52 | (loop for index below total-size do 53 | (write-byte (ieee-floats:encode-float32 (row-major-aref array index)) stream))) 54 | ((simple-array double-float) 55 | (loop for index below total-size do 56 | (write-byte (ieee-floats:encode-float64 (row-major-aref array index)) stream))) 57 | ((simple-array (complex single-float)) 58 | (loop for index below total-size do 59 | (let ((c (row-major-aref array index))) 60 | (write-byte (ieee-floats:encode-float32 (realpart c)) stream) 61 | (write-byte (ieee-floats:encode-float32 (imagpart c)) stream)))) 62 | ((simple-array (complex double-float)) 63 | (loop for index below total-size do 64 | (let ((c (row-major-aref array index))) 65 | (write-byte (ieee-floats:encode-float64 (realpart c)) stream) 66 | (write-byte (ieee-floats:encode-float64 (imagpart c)) stream)))) 67 | ((simple-array *) 68 | (loop for index below total-size do 69 | (write-byte (row-major-aref array index) stream)))))))) 70 | -------------------------------------------------------------------------------- /code/load-array.lisp: -------------------------------------------------------------------------------- 1 | (in-package #:numpy-file-format) 2 | 3 | (defun load-array-metadata (filename) 4 | (with-open-file (stream filename :direction :input :element-type '(unsigned-byte 8)) 5 | ;; The first 6 bytes are a magic string: exactly \x93NUMPY. 6 | (unless (and (eql (read-byte stream) #x93) 7 | (eql (read-byte stream) 78) ; N 8 | (eql (read-byte stream) 85) ; U 9 | (eql (read-byte stream) 77) ; M 10 | (eql (read-byte stream) 80) ; P 11 | (eql (read-byte stream) 89)) ; Y 12 | (error "Not a Numpy file.")) 13 | (let* (;; The next 1 byte is an unsigned byte: the major version number 14 | ;; of the file format, e.g. \x01. 15 | (major-version (read-byte stream)) 16 | ;; The next 1 byte is an unsigned byte: the minor version number 17 | ;; of the file format, e.g. \x00. 18 | (minor-version (read-byte stream)) 19 | (header-len 20 | (if (= major-version 1) 21 | ;; Version 1.0: The next 2 bytes form a little-endian 22 | ;; unsigned int: the length of the header data HEADER_LEN. 23 | (logior (ash (read-byte stream) 0) 24 | (ash (read-byte stream) 8)) 25 | ;; Version 2.0: The next 4 bytes form a little-endian 26 | ;; unsigned int: the length of the header data HEADER_LEN. 27 | (logior (ash (read-byte stream) 0) 28 | (ash (read-byte stream) 8) 29 | (ash (read-byte stream) 16) 30 | (ash (read-byte stream) 24))))) 31 | (declare (ignore minor-version)) 32 | ;; The next HEADER_LEN bytes form the header data describing the 33 | ;; array’s format. It is an ASCII string which contains a Python 34 | ;; literal expression of a dictionary. It is terminated by a newline 35 | ;; (\n) and padded with spaces (\x20) to make the total of len(magic 36 | ;; string) + 2 + len(length) + HEADER_LEN be evenly divisible by 64 37 | ;; for alignment purposes. 38 | (let ((dict (read-python-object-from-string 39 | (let ((buffer (make-string header-len :element-type 'base-char))) 40 | (loop for index from 0 below header-len do 41 | (setf (schar buffer index) (code-char (read-byte stream)))) 42 | buffer)))) 43 | (values 44 | (gethash "shape" dict) 45 | (dtype-from-code (gethash "descr" dict)) 46 | (gethash "fortran_order" dict) 47 | (* 8 (+ header-len (if (= 1 major-version) 10 12)))))))) 48 | 49 | (defun load-array (filename) 50 | ;; We actually open the file twice, once to read the metadata - one byte 51 | ;; at a time, and once to read the array contents with a suitable element 52 | ;; type (e.g. (unsigned-byte 32) for single precision floating-point 53 | ;; numbers). 54 | (multiple-value-bind (dimensions dtype fortran-order header-bits) 55 | (load-array-metadata filename) 56 | (let* ((element-type (dtype-type dtype)) 57 | (array (make-array dimensions :element-type element-type)) 58 | (total-size (array-total-size array)) 59 | (chunk-size (if (subtypep element-type 'complex) 60 | (/ (dtype-size dtype) 2) 61 | (dtype-size dtype))) 62 | (stream-element-type 63 | (if (or (eq element-type 'double-float) 64 | (eq element-type 'single-float) 65 | (subtypep element-type '(unsigned-byte *))) 66 | `(unsigned-byte ,chunk-size) 67 | `(signed-byte ,chunk-size)))) 68 | (unless (not fortran-order) 69 | (error "Reading arrays in Fortran order is not yet supported.")) 70 | (unless (eq (dtype-endianness dtype) +endianness+) 71 | (error "Endianness conversion is not yet supported.")) 72 | ;; TODO Respect fortran-order and endianness. 73 | (with-open-file (stream filename :element-type stream-element-type) 74 | ;; Skip the header. 75 | (loop repeat (/ header-bits chunk-size) do (read-byte stream)) 76 | (etypecase array 77 | ((simple-array single-float) 78 | (loop for index below total-size do 79 | (setf (row-major-aref array index) 80 | (ieee-floats:decode-float32 (read-byte stream))))) 81 | ((simple-array double-float) 82 | (loop for index below total-size do 83 | (setf (row-major-aref array index) 84 | (ieee-floats:decode-float64 (read-byte stream))))) 85 | ((simple-array (complex single-float)) 86 | (loop for index below total-size do 87 | (setf (row-major-aref array index) 88 | (complex 89 | (ieee-floats:decode-float32 (read-byte stream)) 90 | (ieee-floats:decode-float32 (read-byte stream)))))) 91 | ((simple-array (complex double-float)) 92 | (loop for index below total-size do 93 | (setf (row-major-aref array index) 94 | (complex 95 | (ieee-floats:decode-float64 (read-byte stream)) 96 | (ieee-floats:decode-float64 (read-byte stream)))))) 97 | ((simple-array *) 98 | (loop for index below total-size do 99 | (setf (row-major-aref array index) 100 | (read-byte stream)))))) 101 | array))) 102 | --------------------------------------------------------------------------------