├── .clj-kondo └── config.edn ├── .github └── workflows │ └── leintest.yml ├── .gitignore ├── .travis.yml ├── README.md ├── java-src ├── impl │ ├── BigEndianDataInputStream.java │ ├── BigEndianDataOutputStream.java │ ├── CountingInputStream.java │ ├── LittleEndianDataInputStream.java │ ├── LittleEndianDataOutputStream.java │ ├── NullOutputStream.java │ └── RandomAccessInputStream.java └── interfaces │ ├── UnsignedDataInput.java │ └── UnsignedDataOutput.java ├── project.clj ├── src └── org │ └── clojars │ └── smee │ └── binary │ ├── core.clj │ └── demo │ ├── bitcoin.clj │ ├── elf.clj │ ├── matlab5.clj │ ├── mp3.clj │ └── protobuf.clj └── test └── org └── clojars └── smee └── binary └── codectests.clj /.clj-kondo/config.edn: -------------------------------------------------------------------------------- 1 | {:linters {:refer-all {:level :off}}} -------------------------------------------------------------------------------- /.github/workflows/leintest.yml: -------------------------------------------------------------------------------- 1 | name: Clojure CI 2 | 3 | on: [push] 4 | 5 | jobs: 6 | build: 7 | 8 | runs-on: ubuntu-latest 9 | 10 | steps: 11 | - uses: actions/checkout@v2 12 | - uses: DeLaGuardo/clojure-lint-action@v1 13 | with: 14 | clj-kondo-args: --lint src 15 | # secrets.GITHUB_TOKEN is needed here 16 | # to publish annotations back to github 17 | # this action is not storing or sending it anywhere 18 | github_token: ${{ secrets.GITHUB_TOKEN }} 19 | - uses: actions/cache@v1 20 | with: 21 | path: ~/.m2/repository 22 | key: ${{ runner.os }}-maven-${{ hashFiles('**/project.clj') }} 23 | restore-keys: | 24 | ${{ runner.os }}-maven- 25 | 26 | - name: Run tests 27 | run: lein test 28 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | /target 2 | /lib 3 | /classes 4 | /checkouts 5 | pom.xml 6 | *.jar 7 | *.class 8 | .lein-deps-sum 9 | .lein-failures 10 | .lein-plugins 11 | .classpath 12 | .settings/ 13 | bin/ 14 | .project 15 | misc 16 | .idea 17 | .nrepl-port 18 | .clj-kondo/.cache/ -------------------------------------------------------------------------------- /.travis.yml: -------------------------------------------------------------------------------- 1 | language: clojure 2 | lein: lein 3 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # binary-dsl 2 | 3 | This library is a high performance binary parser combinator. It enables reading and writing arbitrary binary data from Java's io streams. The focus is on enabling parsing of externally defined binary structures. If you have a format specification for any binary structure, this library is for you! 4 | 5 | It is inspired by [Gloss](https://github.com/ztellman/gloss) but focuses on java's stream classes. The individual codecs do not require explicit knowledge about the length of data that needs to be read. 6 | 7 | [![Build Status](https://secure.travis-ci.org/smee/binary.png)](http://travis-ci.org/smee/binary) 8 | 9 | ## Artifacts 10 | 11 | Binary artifacts are [released to Clojars](https://clojars.org/smee/binary). If you are using Maven, add the following repository 12 | definition to your `pom.xml`: 13 | 14 | ``` xml 15 | 16 | clojars.org 17 | http://clojars.org/repo 18 | 19 | ``` 20 | 21 | ### The Most Recent Release 22 | 23 | With [Leiningen](http://leiningen.org): 24 | 25 | ``` clojure 26 | [smee/binary "0.5.5"] 27 | ``` 28 | 29 | With Maven: 30 | 31 | ``` xml 32 | 33 | smee 34 | binary 35 | 0.5.5 36 | 37 | ``` 38 | 39 | ### Note 40 | All functions given in this document refer to the namespace `org.clojars.smee.binary.core` (needs to be `require`d or `use` in your namespace declaration). 41 | 42 | ## Examples / Demo 43 | 44 | Please refer to the [tests](https://github.com/smee/binary/blob/master/test/org/clojars/smee/binary/codectests.clj) for now. There are several demos: 45 | 46 | - the start of an [MP3 IDv2 parser](https://github.com/smee/binary/blob/master/src/org/clojars/smee/binary/demo/mp3.clj). 47 | - Another demonstration is the [bitcoin block chain parser](https://github.com/smee/binary/blob/master/src/org/clojars/smee/binary/demo/bitcoin.clj#L168) 48 | - [PNG file format](https://gist.github.com/stathissideris/8801295) 49 | - [MATLAB 5](https://github.com/smee/binary/blob/master/src/org/clojars/smee/binary/demo/matlab5), currently read-only 50 | - [ELF 32/64](https://github.com/smee/binary/blob/master/src/org/clojars/smee/binary/demo/elf.clj) 51 | 52 | ## Codec 53 | To read binary data we need two things: A `codec` that knows how to read and write it's binary representation and convert it to a clojure data structure and an instance of `java.io.InputStream`. 54 | The codec needs to satisfy the protocol `BinaryIO` (see [here](https://github.com/smee/binary/blob/master/src/org/clojars/smee/binary/core.clj#L6 "here")). 55 | 56 | Codecs are composable, you may combine them as you like. 57 | 58 | Each codec can have two attached functions: 59 | 60 | - `pre-encode` - to convert clojure data to something that can be written to binary 61 | - `post-decode` - to convert something read by a codec to a clojure/java data structure 62 | 63 | Example: Let's represent an instance of java.util.Date as a unix epoch and write it as a little-endian long: 64 | 65 | ``` clojure 66 | (compile-codec :long-le (fn [^java.util.Date date] (.getTime date)) (fn [^long number] (java.util.Date. number)) 67 | ``` 68 | 69 | The compiler hints are not necessary. They are just a clarification in this example. 70 | ### API 71 | - `encode` takes an instance of `codec`, a java.util.OutputStream and a value and writes the binary representation of this value into the stream. 72 | - `decode` takes a `codec` and a java.util.InputStream and returns a clojure/java value that was read from the stream. Individual read via `decode` are eager! 73 | 74 | ### Features/Available codecs 75 | #### Primitives 76 | Encodes primitive data types, either **b**ig-**e**ndian or **l**ittle-**e**ndian: 77 | 78 | ``` clojure 79 | ; signed 80 | :byte 81 | :short-le 82 | :short-be 83 | :int-le 84 | :int-be 85 | :uint-le 86 | :uint-be 87 | :long-le 88 | :long-be 89 | :float-le 90 | :float-be 91 | :double-le 92 | :double-be 93 | ; unsigned 94 | :ubyte 95 | :ushort-le 96 | :ushort-be 97 | :uint-le 98 | :uint-be 99 | :ulong-le 100 | :ulong-be 101 | ``` 102 | Please be aware that since Java doesn't support unsigned data types the codecs will consume/produce a bigger data type than for the unsigned case: Unsigned bytes are shorts, unsigned shorts are integers, unsigned integers are longs, unsigned longs are Bigints! 103 | 104 | ### Sequences 105 | If you want several codecs in a specific order, use a vector: 106 | 107 | ``` clojure 108 | [:int-le :float-le :float-le] 109 | ``` 110 | 111 | ### Maps 112 | To name elements in a binary data source maps are ideal. Unfortunately the order of the keys is unspecified. We need to use a map constructor that respects the order of the keys: 113 | 114 | ``` clojure 115 | (require '[org.clojars.smee.binary.core :as b]) 116 | (b/ordered-map :foo :int-le :bar [:float-le :double-le]) 117 | ``` 118 | 119 | As you can see arbitrary nesting of codecs is possible. You can define maps of maps of ... 120 | If you use clojure's map literals, the order of the binary values is unspecified (it is determined by the sequence of keys and values within the map's implementation). 121 | 122 | ### Repeated 123 | `repeated` uses another `codec` repeatedly until the stream is exhausted. To restrict, how often the `codec` should be used, you can explicitely give one of three parameters: 124 | 125 | - `:length` gives a fixed length. E.g. `(repeated :int-le :length 5)` will try to read/write exactly five little-endian 32bit integers from/to a stream 126 | - `:prefix` takes another codec that will get read/written first. This `codec` contains the length for the successive read/write of the repeated values. Example: `(repeated :int-le :prefix :short-le)` will first read a short and tries then to read as many integers as specified in this short value. 127 | - `:separator` will read values using the codec until the value read is the same as the given separator value. An example would be `(repeated :byte :separator (byte 0)` for null-tokenized c-strings. If the separator would be the last element in the stream, it is optional (think of comma-separated value where the last column may not have a trailing comma). 128 | 129 | **Caution**: When writing the data there **WILL** be a final separator. This means, the written data may have more bytes than initially read! 130 | 131 | - No parameter means: read until exhaustion of the stream (EOF). 132 | 133 | ### Blob 134 | `blob` is essentially an optimized version of `(repeated :byte ...)` that produces and consumes Java byte arrays. It takes the same options as `repeated`, except for `:separator`. 135 | 136 | ### String 137 | 138 | Reads and writes bytes and converts them from/to strings with a specific string encoding. This codec uses `repeated`, that means it takes either `:length` or `:prefix` as parameter to determine the length of the string. 139 | 140 | ``` clojure 141 | (string "ISO-8859-1" :length 3) ; read three bytes, interpret them as a string with encoding "ISO-8859-1" 142 | ``` 143 | ### C strings 144 | 145 | Similar to `string`, but reads bytes until it finds a null byte: 146 | 147 | ``` clojure 148 | (c-string "UTF8") ; 149 | ``` 150 | 151 | 152 | ### Bits 153 | If you have a byte where each bit has a specific meaning you can use a set of keywords as an input. 154 | For example, the following definition says, that the lowest bit in a byte gets the value `:a`, the next one `:b`, then `:c`. The bits 4-7 are ignored, the highest bit has the value `:last`: 155 | 156 | ```clojure 157 | (decode (bits [:a :b :c nil nil nil nil :last]) instream); let's assume the next byte in instream is 2r11011010 158 | => #{:b :last} 159 | ``` 160 | If you now read a byte with the value 2r11011001 using this codec you will get the clojure set `#{:a :b :last}` as a value. 161 | 162 | ### Header 163 | Decodes a header using `header-codec`. Passes this datastructure to `header->body` which returns the codec to use to parse the body. For writing this codec calls `body->header` with the data as parameter and expects a value to use for writing the header information. 164 | 165 | ### Padding 166 | Make sure there is always a minimum byte `length` when reading/writing values. 167 | Works by reading `length` bytes into a byte array, then reading from that array using `inner-codec`. 168 | Currently there are three options: 169 | 170 | - `:length` is the number of bytes that should be present after writing 171 | - `:padding-byte` is the numeric value of the byte used for padding (default is 0) 172 | - `:truncate?` is a boolean flag that determines the behaviour if `inner-codec` writes more bytes than `padding` can handle: false is the default, meaning throw an exception. True will lead to truncating the output of `inner-codec`. 173 | 174 | Example: 175 | 176 | ``` clojure 177 | (padding (repeated :int-le :length 100) :length 1024 :padding-byte (byte \x)) 178 | => [...] ; sequence of 100 integers, the stream will have 1024 bytes read, though 179 | 180 | (encode (padding (repeated (string "UTF8" :separator 0)) :length 11 :truncate? true) outstream ["abc" "def" "ghi"]) 181 | => ; writes bytes [97 98 99 0 100 101 102 0 103 104 105] 182 | ; observe: the last separator byte was truncated! 183 | ``` 184 | 185 | ### Align 186 | This codec is related to `padding` in that it makes sure that the number of bytes 187 | written/read to/from a stream always is aligned to a specified byte boundary. 188 | For example, if a format requires aligning all data to 8 byte boundaries this codec 189 | will pad the written data with `padding-byte` to make sure that the count of bytes written 190 | is divisable by 8. 191 | 192 | Parameters: 193 | 194 | - `modulo`: byte boundary modulo, should be positive 195 | - `:padding-byte` is the numeric value of the byte used for padding (default is 0) 196 | 197 | Example: 198 | 199 | ``` clojure 200 | (encode (align (repeated :short-be :length 3) :modulo 9 :padding-byte 55) [1 2 3] output-stream) 201 | ;==> writes these bytes: [0 1 0 2 0 3 55 55 55] 202 | ``` 203 | 204 | ### Constant 205 | If a binary format uses fixed elements (like the three bytes 'ID3' in mp3), you can use this codec. It needs a codec and a fixed value. If the value read using this codec does not match the given fixed value, an exception will be thrown. 206 | 207 | ``` clojure 208 | (constant (string "ISO-8859-1" :length 3) "ID3") 209 | ``` 210 | Alternatively, you may treat strings and byte arrays as ```constant``` encoders. 211 | 212 | 213 | ### Union 214 | Union is a C-style union. A fixed number of bytes may represent different values depending on the interpretation of the bytes. The value returned by `read-data` is a map of all valid interpretations according to the specified unioned codecs. 215 | Parameter is the number of bytes needed for the longest codec in this union and a map of value names to codecs. 216 | This codec will read the specified number of bytes from the input streams and then successively try to read from this byte array using each individual codec. 217 | 218 | Example: Four bytes may represent an integer, two shorts, four bytes, a list of bytes with prefix or a string. 219 | 220 | ``` clojure 221 | (union 4 {:integer :int-be 222 | :shorts (repeated :short-be :length 2) 223 | :bytes (repeated :byte :length 4) 224 | :prefixed (repeated :byte :prefix :byte) 225 | :str (string \"UTF8\" :prefix :byte)}) 226 | ``` 227 | ## License 228 | 229 | Copyright © 2014 Steffen Dienst 230 | 231 | Distributed under the Eclipse Public License, the same as Clojure. 232 | -------------------------------------------------------------------------------- /java-src/impl/BigEndianDataInputStream.java: -------------------------------------------------------------------------------- 1 | package impl; 2 | import interfaces.UnsignedDataInput; 3 | 4 | import java.io.DataInputStream; 5 | import java.io.EOFException; 6 | import java.io.IOException; 7 | import java.math.BigInteger; 8 | 9 | import clojure.lang.BigInt; 10 | 11 | 12 | public class BigEndianDataInputStream extends DataInputStream implements UnsignedDataInput { 13 | 14 | private CountingInputStream d; 15 | public BigEndianDataInputStream(CountingInputStream in) { 16 | super(in); 17 | this.d = in; 18 | } 19 | 20 | /* (non-Javadoc) 21 | * @see UnsignedDataInput#readUnsignedInt() 22 | */ 23 | @Override 24 | public final long readUnsignedInt() throws IOException 25 | { 26 | long ch1 = this.read(); 27 | long ch2 = this.read(); 28 | long ch3 = this.read(); 29 | long ch4 = this.read(); 30 | if ((ch1 | ch2 | ch3 | ch4) < 0) 31 | throw new EOFException(); 32 | return ((ch1 << 24) + (ch2 << 16) + (ch3 << 8) + (ch4 << 0)); 33 | } 34 | final byte[] w = new byte[9]; 35 | @Override 36 | public BigInt readUnsignedLong() throws IOException { 37 | this.read(w,1,8); 38 | boolean isMax = false; 39 | for (int i = 1; i < w.length; i++) { 40 | isMax |= (w[i]==255); 41 | } 42 | w[0]=(byte) (isMax?1:0); 43 | return BigInt.fromBigInteger(new BigInteger(w)); 44 | } 45 | @Override 46 | public long size() { 47 | return this.d.size(); 48 | } 49 | } 50 | -------------------------------------------------------------------------------- /java-src/impl/BigEndianDataOutputStream.java: -------------------------------------------------------------------------------- 1 | package impl; 2 | import interfaces.UnsignedDataOutput; 3 | 4 | import java.io.DataOutputStream; 5 | import java.io.IOException; 6 | import java.io.OutputStream; 7 | 8 | import clojure.lang.BigInt; 9 | 10 | 11 | public class BigEndianDataOutputStream extends DataOutputStream implements UnsignedDataOutput { 12 | 13 | public BigEndianDataOutputStream(OutputStream out) { 14 | super(out); 15 | } 16 | 17 | @Override 18 | public void writeUnsignedInt(long i) throws IOException { 19 | write((int) ((i >>> 24) & 0xFF)); 20 | write((int) ((i >>> 16) & 0xFF)); 21 | write((int) ((i >>> 8) & 0xFF)); 22 | write((int) ((i >>> 0) & 0xFF)); 23 | } 24 | 25 | @Override 26 | public void writeUnsignedShort(int v) throws IOException { 27 | write((v >>> 8) & 0xFF); 28 | write((v >>> 0) & 0xFF); 29 | } 30 | 31 | @Override 32 | public void writeUnsignedLong(BigInt bi) throws IOException { 33 | byte[] arr = bi.toBigInteger().toByteArray(); 34 | int arrayLength = arr.length; 35 | boolean isLongerThanLong = arrayLength>8; 36 | if(isLongerThanLong && arr[0]>1) 37 | throw new ArithmeticException("unsigned long is too big! Would truncate on write!"); 38 | int offset = isLongerThanLong?1:0; 39 | int len = isLongerThanLong?8:arrayLength; 40 | byte[] toWrite = new byte[8]; 41 | System.arraycopy(arr, offset, toWrite, 8-len, len); 42 | write(toWrite,0,8); 43 | } 44 | 45 | } 46 | -------------------------------------------------------------------------------- /java-src/impl/CountingInputStream.java: -------------------------------------------------------------------------------- 1 | package impl; 2 | import java.io.IOException; 3 | import java.io.InputStream; 4 | 5 | /** 6 | * 7 | * @author sdienst 8 | * 9 | */ 10 | public class CountingInputStream extends InputStream { 11 | private long count; 12 | private final InputStream delegate; 13 | 14 | public CountingInputStream(InputStream in) { 15 | this.delegate = in; 16 | } 17 | /** 18 | * Drop a number of bytes without returning anything. 19 | * @param length 20 | * @return 21 | * @throws IOException 22 | */ 23 | public final long skip(long length) throws IOException { 24 | return this.count += delegate.skip(length); 25 | } 26 | /** 27 | * @return number of bytes already read from the delegated inputstream 28 | */ 29 | public final long size() { 30 | return this.count; 31 | } 32 | 33 | @Override 34 | public final String toString() { 35 | return delegate.toString()+", "+count+" bytes read"; 36 | } 37 | @Override 38 | public int read() throws IOException { 39 | int v = delegate.read(); 40 | if(v>-1) count++; 41 | return v; 42 | } 43 | // public long getOffset() throws IOException { 44 | // return delegate.getOffset(); 45 | // } 46 | // public void seek(long loc) throws IOException { 47 | // delegate.seek(loc); 48 | // } 49 | // public void mark(int readlimit) { 50 | // delegate.mark(readlimit); 51 | // } 52 | // public void reset() throws IOException { 53 | // delegate.reset(); 54 | // } 55 | } 56 | -------------------------------------------------------------------------------- /java-src/impl/LittleEndianDataInputStream.java: -------------------------------------------------------------------------------- 1 | package impl; 2 | import interfaces.UnsignedDataInput; 3 | 4 | import java.io.EOFException; 5 | import java.io.FilterInputStream; 6 | import java.io.IOException; 7 | import java.math.BigInteger; 8 | 9 | import clojure.lang.BigInt; 10 | 11 | /** 12 | * Not threadsafe! 13 | * @author sdienst 14 | * 15 | */ 16 | public class LittleEndianDataInputStream extends FilterInputStream implements UnsignedDataInput { 17 | 18 | private final CountingInputStream d; 19 | private final byte w[]; // work array for buffering input 20 | 21 | public LittleEndianDataInputStream(CountingInputStream in) { 22 | super(in); 23 | this.d = in;//new DataInputStream(in); 24 | w = new byte[9]; 25 | } 26 | @Override 27 | public final short readShort() throws IOException 28 | { 29 | this.readFully(w, 0, 2); 30 | return (short)( 31 | (w[1]&0xff) << 8 | 32 | (w[0]&0xff)); 33 | } 34 | 35 | @Override 36 | public final int readUnsignedShort() throws IOException 37 | { 38 | this.readFully(w, 0, 2); 39 | return ( 40 | (w[1]&0xff) << 8 | 41 | (w[0]&0xff)); 42 | } 43 | 44 | @Override 45 | public final char readChar() throws IOException 46 | { 47 | this.readFully(w, 0, 2); 48 | return (char) ( 49 | (w[1]&0xff) << 8 | 50 | (w[0]&0xff)); 51 | } 52 | 53 | @Override 54 | public final int readInt() throws IOException 55 | { 56 | this.readFully(w, 0, 4); 57 | return 58 | (w[3]) << 24 | 59 | (w[2]&0xff) << 16 | 60 | (w[1]&0xff) << 8 | 61 | (w[0]&0xff); 62 | } 63 | @Override 64 | public final long readUnsignedInt() throws IOException 65 | { 66 | this.readFully(w, 0, 4); 67 | return 68 | (long)(w[3]&0xff) << 24 | 69 | (long)(w[2]&0xff) << 16 | 70 | (long)(w[1]&0xff) << 8 | 71 | (long)(w[0]&0xff); 72 | } 73 | @Override 74 | public final long readLong() throws IOException 75 | { 76 | this.readFully(w, 0, 8); 77 | return 78 | (long)(w[7]) << 56 | 79 | (long)(w[6]&0xff) << 48 | 80 | (long)(w[5]&0xff) << 40 | 81 | (long)(w[4]&0xff) << 32 | 82 | (long)(w[3]&0xff) << 24 | 83 | (long)(w[2]&0xff) << 16 | 84 | (long)(w[1]&0xff) << 8 | 85 | (long)(w[0]&0xff); 86 | } 87 | @Override 88 | public BigInt readUnsignedLong() throws IOException { 89 | this.readFully(w,1,8); 90 | boolean isMax=false; 91 | // reverse byte array 92 | for (int i = 1; i < 5; i++) { 93 | byte b = w[i]; 94 | w[i]=w[9-i]; 95 | w[9-i] = b; 96 | isMax |= (b==255); 97 | } 98 | w[0]=(byte) (isMax?1:0); 99 | return BigInt.fromBigInteger(new BigInteger(w)); 100 | } 101 | @Override 102 | public final float readFloat() throws IOException { 103 | return Float.intBitsToFloat(readInt()); 104 | } 105 | @Override 106 | public final double readDouble() throws IOException { 107 | return Double.longBitsToDouble(readLong()); 108 | } 109 | @Override 110 | public final int read(byte b[], int off, int len) throws IOException { 111 | return d.read(b, off, len); 112 | } 113 | @Override 114 | public final void readFully(byte b[]) throws IOException { 115 | this.readFully(b, 0, b.length); 116 | } 117 | @Override 118 | public final void readFully(byte b[], int off, int len) throws IOException { 119 | int l = d.read(b, off, len); 120 | if(l>> 8) & 0xFF); 38 | count+=2; 39 | } 40 | @Override 41 | public final void writeUnsignedShort(int s) throws IOException { 42 | out.write((s >>> 0) & 0xFF); 43 | out.write((s >>> 8) & 0xFF); 44 | count+=2; 45 | } 46 | @Override 47 | public final void writeChar(int c) throws IOException { 48 | out.write(c & 0xFF); 49 | out.write((c >>> 8) & 0xFF); 50 | count+=2; 51 | } 52 | 53 | @Override 54 | public final void writeInt(int i) throws IOException { 55 | out.write(i & 0xFF); 56 | out.write((i >>> 8) & 0xFF); 57 | out.write((i >>> 16) & 0xFF); 58 | out.write((i >>> 24) & 0xFF); 59 | count+=4; 60 | } 61 | @Override 62 | public final void writeUnsignedInt(long i) throws IOException { 63 | out.write((int) (i & 0xFF)); 64 | out.write((int) ((i >>> 8) & 0xFF)); 65 | out.write((int) ((i >>> 16) & 0xFF)); 66 | out.write((int) ((i >>> 24) & 0xFF)); 67 | count+=4; 68 | } 69 | 70 | @Override 71 | public final void writeLong(long l) throws IOException { 72 | out.write((int) l & 0xFF); 73 | out.write((int) (l >>> 8) & 0xFF); 74 | out.write((int) (l >>> 16) & 0xFF); 75 | out.write((int) (l >>> 24) & 0xFF); 76 | out.write((int) (l >>> 32) & 0xFF); 77 | out.write((int) (l >>> 40) & 0xFF); 78 | out.write((int) (l >>> 48) & 0xFF); 79 | out.write((int) (l >>> 56) & 0xFF); 80 | count+=8; 81 | } 82 | @Override 83 | public void writeUnsignedLong(BigInt bi) throws IOException { 84 | byte[] toWrite = new byte[8]; 85 | byte[] w = bi.toBigInteger().toByteArray(); 86 | 87 | int arrayLength = w.length; 88 | boolean isLongerThanLong = arrayLength>8; 89 | if(isLongerThanLong && w[0]>1) 90 | throw new ArithmeticException("unsigned long is too big! Would truncate on write!"); 91 | 92 | int offset = isLongerThanLong?1:0; 93 | int len = isLongerThanLong?8:arrayLength; 94 | System.arraycopy(w, offset, toWrite, 8-len, len); 95 | 96 | // reverse bytes 97 | for (int i = 0; i < 4; i++) { 98 | byte b = toWrite[i]; 99 | toWrite[i]=toWrite[7-i]; 100 | toWrite[7-i] = b; 101 | } 102 | out.write(toWrite,0,8); 103 | count+=8; 104 | } 105 | 106 | @Override 107 | public final void writeFloat(float f) throws IOException { 108 | this.writeInt(Float.floatToIntBits(f)); 109 | count+=4; 110 | } 111 | 112 | public final void writeDouble(double d) throws IOException { 113 | this.writeLong(Double.doubleToLongBits(d)); 114 | count+=8; 115 | } 116 | 117 | public void writeBytes(String s) throws IOException { 118 | throw new RuntimeException("unimplemented"); 119 | } 120 | 121 | public final void writeChars(String s) throws IOException { 122 | throw new RuntimeException("unimplemented"); 123 | } 124 | @Override 125 | public final void writeUTF(String s) throws IOException { 126 | throw new RuntimeException("unimplemented"); 127 | } 128 | 129 | @Override 130 | public int size() { 131 | return count; 132 | } 133 | 134 | @Override 135 | public void write(int b) throws IOException { 136 | super.write(b); 137 | count++; 138 | } 139 | 140 | } -------------------------------------------------------------------------------- /java-src/impl/NullOutputStream.java: -------------------------------------------------------------------------------- 1 | package impl; 2 | import java.io.IOException; 3 | import java.io.OutputStream; 4 | 5 | 6 | public class NullOutputStream extends OutputStream { 7 | 8 | public NullOutputStream() { 9 | } 10 | 11 | @Override 12 | public void write(int b) throws IOException { 13 | } 14 | 15 | } 16 | -------------------------------------------------------------------------------- /java-src/impl/RandomAccessInputStream.java: -------------------------------------------------------------------------------- 1 | package impl; 2 | import java.io.IOException; 3 | import java.io.InputStream; 4 | import java.io.RandomAccessFile; 5 | import java.util.ArrayList; 6 | import java.util.List; 7 | 8 | 9 | /** This class uses a memory cache to allow seeking within 10 | an InputStream. Based on the JAI MemoryCacheSeekableStream class. 11 | Can also be constructed from a RandomAccessFile, which uses less 12 | memory since the memory cache is not required. 13 | 14 | Copied and adapted from http://imagej.nih.gov/ij/developer/source/ij/io/RandomAccessStream.java.html (public domain) 15 | */ 16 | public final class RandomAccessInputStream extends InputStream { 17 | 18 | private static final int BLOCK_SIZE = 1024; 19 | private static final int BLOCK_MASK = 1023; 20 | private static final int BLOCK_SHIFT = 10; 21 | 22 | private InputStream src; 23 | private RandomAccessFile ras; 24 | private long pointer; 25 | private List data; 26 | private long length; 27 | private boolean foundEOS; 28 | 29 | /** Constructs a RandomAccessStream from an InputStream. Seeking 30 | backwards is supported using a memory cache. */ 31 | public RandomAccessInputStream(InputStream inputstream) { 32 | pointer = 0L; 33 | data = new ArrayList(); 34 | length = 0L; 35 | foundEOS = false; 36 | src = inputstream; 37 | } 38 | 39 | /** Constructs a RandomAccessStream from an RandomAccessFile. */ 40 | public RandomAccessInputStream(RandomAccessFile ras) { 41 | this.ras = ras; 42 | } 43 | 44 | 45 | public long getOffset() throws IOException { 46 | if (ras!=null) 47 | return ras.getFilePointer(); 48 | else 49 | return pointer; 50 | } 51 | 52 | public int read() throws IOException { 53 | if (ras!=null) 54 | return ras.read(); 55 | long l = pointer + 1L; 56 | long l1 = readUntil(l); 57 | if (l1>=l) { 58 | byte abyte0[] = data.get((int)(pointer>>BLOCK_SHIFT)); 59 | return abyte0[(int)(pointer++ & BLOCK_MASK)] & 0xff; 60 | } else 61 | return -1; 62 | } 63 | 64 | public int read(byte[] bytes, int off, int len) throws IOException { 65 | if(bytes == null) 66 | throw new NullPointerException(); 67 | if (ras!=null) 68 | return ras.read(bytes, off, len); 69 | if (off<0 || len<0 || off+len>bytes.length) 70 | throw new IndexOutOfBoundsException(); 71 | if (len == 0) 72 | return 0; 73 | long l = readUntil(pointer+len); 74 | if (l<=pointer) 75 | return -1; 76 | else { 77 | byte abyte1[] = data.get((int)(pointer >> BLOCK_SHIFT)); 78 | int k = Math.min(len, BLOCK_SIZE - (int)(pointer & BLOCK_MASK)); 79 | System.arraycopy(abyte1, (int)(pointer & BLOCK_MASK), bytes, off, k); 80 | pointer += k; 81 | return k; 82 | } 83 | } 84 | 85 | private long readUntil(long l) throws IOException { 86 | if (l>BLOCK_SHIFT); 91 | int j = (int)(length>>BLOCK_SHIFT); 92 | for (int k=j; k<=i; k++) { 93 | byte abyte0[] = new byte[BLOCK_SIZE]; 94 | data.add(abyte0); 95 | int i1 = BLOCK_SIZE; 96 | int j1 = 0; 97 | while (i1>0) { 98 | int k1 = src.read(abyte0, j1, i1); 99 | if (k1==-1) { 100 | foundEOS = true; 101 | return length; 102 | } 103 | j1 += k1; 104 | i1 -= k1; 105 | length += k1; 106 | } 107 | } 108 | return length; 109 | } 110 | 111 | public void seek(long loc) throws IOException { 112 | if (ras!=null) 113 | {ras.seek(loc); return;} 114 | if (loc<0L) 115 | pointer = 0L; 116 | else 117 | pointer = loc; 118 | } 119 | 120 | public void close() throws IOException { 121 | if (ras!=null) 122 | ras.close(); 123 | else { 124 | data.clear(); 125 | src.close(); 126 | } 127 | } 128 | 129 | 130 | } -------------------------------------------------------------------------------- /java-src/interfaces/UnsignedDataInput.java: -------------------------------------------------------------------------------- 1 | package interfaces; 2 | import java.io.DataInput; 3 | import java.io.IOException; 4 | 5 | import clojure.lang.BigInt; 6 | 7 | public interface UnsignedDataInput extends DataInput { 8 | 9 | long readUnsignedInt() throws IOException; 10 | int readUnsignedShort() throws IOException; 11 | BigInt readUnsignedLong() throws IOException; 12 | long size(); 13 | } -------------------------------------------------------------------------------- /java-src/interfaces/UnsignedDataOutput.java: -------------------------------------------------------------------------------- 1 | package interfaces; 2 | import java.io.DataOutput; 3 | import java.io.IOException; 4 | 5 | import clojure.lang.BigInt; 6 | 7 | 8 | public interface UnsignedDataOutput extends DataOutput{ 9 | void writeUnsignedShort(int i) throws IOException; 10 | void writeUnsignedInt(long i) throws IOException; 11 | void writeUnsignedLong(BigInt i) throws IOException; 12 | int size(); 13 | } 14 | -------------------------------------------------------------------------------- /project.clj: -------------------------------------------------------------------------------- 1 | (defproject smee/binary "0.5.5" 2 | :description "DSL for binary I/O using java's stream apis." 3 | :url "http://github.com/smee/binary" 4 | :license {:name "Eclipse Public License" 5 | :url "http://www.eclipse.org/legal/epl-v10.html"} 6 | :dependencies [[org.clojure/clojure "1.6.0"]] 7 | :java-source-paths ["java-src"]) 8 | -------------------------------------------------------------------------------- /src/org/clojars/smee/binary/core.clj: -------------------------------------------------------------------------------- 1 | (ns org.clojars.smee.binary.core 2 | (:require [clojure.java.io :refer [copy]]) 3 | (:import [java.io DataInput DataOutput InputStream DataOutputStream ByteArrayInputStream ByteArrayOutputStream OutputStream] 4 | [impl BigEndianDataInputStream BigEndianDataOutputStream 5 | CountingInputStream LittleEndianDataInputStream LittleEndianDataOutputStream] 6 | [interfaces UnsignedDataInput UnsignedDataOutput])) 7 | 8 | (defn- wrap-input-stream ^InputStream [in] (-> in (CountingInputStream.))) 9 | 10 | (defprotocol ^:private BinaryIO 11 | (read-data [codec big-in little-in]) 12 | (write-data [codec big-out little-out value])) 13 | 14 | (defn codec? [codec] 15 | (satisfies? BinaryIO codec)) 16 | 17 | (defmacro ^:private primitive-codec 18 | "Create a reification of `BinaryIO` that can read/write a primmitive data type." 19 | [get-fn write-fn cast-fn & [endianess]] 20 | (let [big-in (gensym "big-in") 21 | little-in (gensym "little-in") 22 | big-out (gensym "big-out") 23 | little-out (gensym "little-out") 24 | in (if (= endianess :le) little-in big-in) 25 | out (if (= endianess :le) little-out big-out)] 26 | `(reify BinaryIO 27 | (read-data [codec# ~big-in ~little-in] 28 | (~cast-fn (~get-fn ~(with-meta in {:tag "interfaces.UnsignedDataInput"})))) 29 | (write-data [codec# ~big-out ~little-out value#] 30 | (~write-fn ~(with-meta out {:tag "interfaces.UnsignedDataOutput"}) value#)) 31 | Object (toString [_#] (str ""))))) 32 | 33 | (defn byte->ubyte [b] 34 | (int (bit-and b 255))) 35 | 36 | (defn ubyte->byte [b] 37 | (if (>= b 128) 38 | (byte (- b 256)) 39 | (byte b))) 40 | 41 | (def primitive-codecs 42 | {:byte (primitive-codec .readByte .writeByte byte) 43 | :ubyte (primitive-codec .readUnsignedByte .writeByte byte->ubyte) 44 | 45 | ;:char (primitive-codec .readChar .writeChar char :be) 46 | ;:char-le (primitive-codec .readChar .writeChar char :le) 47 | ;:char-be (primitive-codec .readChar .writeChar char :be) 48 | 49 | :short (primitive-codec .readShort .writeShort short :be) 50 | :short-le (primitive-codec .readShort .writeShort short :le) 51 | :short-be (primitive-codec .readShort .writeShort short :be) 52 | 53 | :ushort (primitive-codec .readUnsignedShort .writeUnsignedShort int :be) 54 | :ushort-le (primitive-codec .readUnsignedShort .writeUnsignedShort int :le) 55 | :ushort-be (primitive-codec .readUnsignedShort .writeUnsignedShort int :be) 56 | 57 | :int (primitive-codec .readInt .writeInt int :be) 58 | :int-le (primitive-codec .readInt .writeInt int :le) 59 | :int-be (primitive-codec .readInt .writeInt int :be) 60 | :uint (primitive-codec .readUnsignedInt .writeUnsignedInt long :be) 61 | :uint-le (primitive-codec .readUnsignedInt .writeUnsignedInt long :le) 62 | :uint-be (primitive-codec .readUnsignedInt .writeUnsignedInt long :be) 63 | 64 | :long (primitive-codec .readLong .writeLong long :be) 65 | :long-le (primitive-codec .readLong .writeLong long :le) 66 | :long-be (primitive-codec .readLong .writeLong long :be) 67 | :ulong (primitive-codec .readUnsignedLong .writeUnsignedLong identity :be) 68 | :ulong-le (primitive-codec .readUnsignedLong .writeUnsignedLong identity :le) 69 | :ulong-be (primitive-codec .readUnsignedLong .writeUnsignedLong identity :be) 70 | 71 | :float (primitive-codec .readFloat .writeFloat float :be) 72 | :float-le (primitive-codec .readFloat .writeFloat float :le) 73 | :float-be (primitive-codec .readFloat .writeFloat float :be) 74 | 75 | :double (primitive-codec .readDouble .writeDouble double :be) 76 | :double-le (primitive-codec .readDouble .writeDouble double :le) 77 | :double-be (primitive-codec .readDouble .writeDouble double :be) 78 | }) 79 | 80 | (declare compile-codec) 81 | 82 | 83 | (defn ordered-map 84 | "Parse a binary stream into a map." 85 | [& kvs] 86 | {:pre [(even? (count kvs))]} 87 | (let [ks (take-nth 2 kvs) 88 | vs (take-nth 2 (rest kvs)) 89 | key-order (into {} (map-indexed #(vector %2 %) ks)) 90 | internal-map (apply sorted-map-by (comparator #(< (key-order % java.lang.Long/MAX_VALUE) (key-order %2 java.lang.Long/MAX_VALUE))) kvs)] 91 | (reify 92 | BinaryIO 93 | (read-data [_ big-in little-in] 94 | (zipmap ks (map (fn ordered-map-values [codec] (read-data codec big-in little-in)) vs))) 95 | (write-data [_ big-out little-out value] 96 | {:pre [(every? (set ks) (keys value))]} 97 | (dorun (map #(write-data % big-out little-out %2) 98 | vs 99 | (map #(get value %) ks)))) 100 | 101 | java.lang.Object 102 | (toString [this] 103 | (str internal-map)) 104 | 105 | clojure.lang.ILookup 106 | (valAt [_ key] 107 | (get internal-map key)) 108 | (valAt [_ key not-found] 109 | (get internal-map key not-found)) 110 | 111 | clojure.lang.Counted 112 | (count [_] 113 | (count internal-map)) 114 | 115 | ; clojure.lang.Associative 116 | ; (containsKey [_ k] 117 | ; (contains? internal-map k)) 118 | ; (entryAt [_ k] 119 | ; (get internal-map k)) 120 | ; (assoc [this k v] 121 | ; (apply ordered-map (apply concat (seq (assoc internal-map k v))))) 122 | 123 | clojure.lang.IPersistentMap 124 | (assoc [this k v] 125 | (apply ordered-map (apply concat (seq (assoc internal-map k v))))) 126 | (assocEx [this k v] 127 | (if (internal-map k) 128 | (throw (ex-info "Key already present" {:key k})) 129 | (apply ordered-map (apply concat (seq (assoc internal-map k v)))))) 130 | (without [this k] 131 | (apply ordered-map (apply concat (seq (dissoc internal-map k))))) 132 | 133 | clojure.lang.IPersistentCollection 134 | (cons [this [k v]] 135 | (assoc this k v)) 136 | (empty [_] 137 | (ordered-map)) 138 | (equiv [_ other] 139 | false) 140 | 141 | clojure.lang.Seqable 142 | (seq [_] 143 | (seq internal-map)) 144 | 145 | ;; Java interfaces 146 | java.lang.Iterable 147 | (iterator [this] 148 | (.iterator ^Iterable (seq this)))))) 149 | 150 | (defn- read-times 151 | "Performance optimization for `(repeatedly n #(read-data codec big-in little-in))`" 152 | [n codec big-in little-in] 153 | (loop [n (int n), res (transient [])] 154 | (if (zero? n) 155 | (persistent! res) 156 | (recur (dec n) (conj! res (read-data codec big-in little-in)))))) 157 | 158 | (defn- read-exhausting 159 | "Performance optimization for `(take-while (complement nil? )(repeatedly n #(read-data codec big-in little-in)))`" 160 | [codec big-in little-in] 161 | (loop [res (transient [])] 162 | (if-let [value (try (read-data codec big-in little-in) (catch java.io.EOFException _e nil))] 163 | (recur (conj! res value)) 164 | (persistent! res)))) 165 | 166 | (defn- read-until-separator 167 | "Read until the read value equals `separator`." 168 | [codec big-in little-in separator] 169 | (loop [res (transient []), empty? true] 170 | (let [value (try 171 | (read-data codec big-in little-in) 172 | (catch java.io.EOFException e 173 | (if empty? ;there is no value read yet, but the stream is empty 174 | (throw e) 175 | ;else: there seems to be no more bytes, so just return what we have 176 | separator)))] 177 | (if 178 | (= value separator) 179 | (persistent! res) 180 | (recur (conj! res value) false))))) 181 | 182 | (defn repeated 183 | "Read a sequence of values. Options are pairs of keys and values with possible keys: 184 | - `:length` fixed length of the sequence 185 | - `:prefix` codec for the length of the sequence to read prior to the sequence itself. 186 | - `:separator` reads until the read value equals the given separator value. EOF of a stream is regarded a separator too. 187 | That means if the last token is the last element in a stream, the final separator may be missing. Caution: When 188 | writing the data there WILL be a final separator. This means, the written data may have more bytes than initially read! 189 | 190 | If there is no options, the decoder tries to read continuously until the stream is exhausted. 191 | Example: To read a sequence of integers with a byte prefix for the length use `(repeated :byte :prefix :int)`" 192 | [codec & {:keys [length prefix separator]}] 193 | (let [codec (compile-codec codec)] 194 | (cond length (reify BinaryIO 195 | (read-data [_ big-in little-in] 196 | (read-times length codec big-in little-in)) 197 | (write-data [_ big-out little-out values] 198 | (if (not= length (count values)) 199 | (throw (java.lang.IllegalArgumentException. (str "This sequence should have length " length " but has really length " (count values)))) 200 | (doseq [value values] 201 | (write-data codec big-out little-out value)))) 202 | Object (toString [_] (str ""))) 203 | ; use prefix-codec? 204 | prefix (let [prefix-codec (compile-codec prefix)] 205 | (reify BinaryIO 206 | (read-data [_ big-in little-in] 207 | (let [length (read-data prefix-codec big-in little-in)] 208 | (read-times length codec big-in little-in))) 209 | (write-data [_ big-out little-out values] 210 | (let [length (count values)] 211 | (write-data prefix-codec big-out little-out length) 212 | (dorun (map #(write-data codec big-out little-out %) values)))) 213 | Object (toString [_] (str "")))) 214 | separator (reify BinaryIO 215 | (read-data [_ big-in little-in] 216 | (read-until-separator codec big-in little-in separator)) 217 | (write-data [_ big-out little-out values] 218 | (doseq [value values] 219 | (write-data codec big-out little-out value)) 220 | (write-data codec big-out little-out separator)) 221 | Object (toString [_] (str ""))) 222 | :else (reify BinaryIO 223 | (read-data [_ big-in little-in] 224 | (read-exhausting codec big-in little-in)) 225 | (write-data [_ big-out little-out values] 226 | (doseq [value values] 227 | (write-data codec big-out little-out value))) 228 | Object (toString [_] (str "")))))) 229 | 230 | (defn- read-bytes [^DataInput in len] 231 | (let [bytes (byte-array len)] 232 | (.readFully in bytes 0 len) 233 | bytes)) 234 | 235 | (defn blob 236 | "Reads a chunk of binary data as a Java byte array. 237 | Options as in `repeated`, except :separator is not supported." 238 | [& {:keys [length prefix]}] 239 | (cond length (reify BinaryIO 240 | (read-data [_ big-in little-in] 241 | (read-bytes big-in length)) 242 | (write-data [_ big-out little-out bytes] 243 | (if (not= length (alength ^"[B" bytes)) 244 | (throw (java.lang.IllegalArgumentException. (str "This sequence should have length " length " but has really length " (alength ^"[B" bytes)))) 245 | (.write ^DataOutput big-out ^"[B" bytes))) 246 | Object (toString [_] (str ""))) 247 | prefix (let [prefix-codec (compile-codec prefix)] 248 | (reify BinaryIO 249 | (read-data [_ big-in little-in] 250 | (let [length (read-data prefix-codec big-in little-in)] 251 | (read-bytes big-in length))) 252 | (write-data [_ big-out little-out bytes] 253 | (let [length (alength ^"[B" bytes)] 254 | (write-data prefix-codec big-out little-out length) 255 | (.write ^DataOutput big-out ^"[B" bytes))) 256 | Object (toString [_] (str "")))) 257 | :else (reify BinaryIO 258 | (read-data [_ big-in little-in] 259 | (let [byte-stream (ByteArrayOutputStream.)] 260 | (copy big-in byte-stream) 261 | (.toByteArray byte-stream))) 262 | (write-data [_ big-out little-out bytes] 263 | (.write ^DataOutput big-out ^"[B" bytes)) 264 | Object (toString [_] (str ""))))) 265 | 266 | (defn constant 267 | "Reads a constant value, ignores given value on write. Can be used as a version tag for a composite codec. 268 | Example: 269 | (encode out (constant :int-le 7) 1234) 270 | => ;will instead write bytes [7 0 0 0]" 271 | [codec constant-value] 272 | (compile-codec codec 273 | (constantly constant-value) 274 | #(if (= % constant-value) constant-value 275 | (throw (ex-info (format "value '%s' should have had the constant value '%s'" (str %) (str constant-value)) {:constant-value constant-value :value %}))))) 276 | 277 | (defn string [^String encoding & options] 278 | (compile-codec 279 | (apply repeated :byte options) 280 | (fn string2bytes [^String s] (.getBytes s encoding)) 281 | #(String. (byte-array %) encoding))) 282 | 283 | 284 | 285 | (defn c-string 286 | "Zero-terminated string (like in C). String is a sequence of bytes, terminated by a 0 byte." 287 | [^String encoding] 288 | (compile-codec 289 | (repeated :byte :separator (byte 0)) 290 | (fn string2bytes [^String s] (.getBytes s encoding)) 291 | #(String. (byte-array %) encoding))) 292 | 293 | 294 | (defn- bit-set? [bytes idx] 295 | (not (zero? (bit-and (bytes (- (count bytes) 1 (quot idx 8))) 296 | (bit-shift-left 1 (mod idx 8)))))) 297 | (defn- set-bit [bytes idx] 298 | (update-in bytes [(- (count bytes) 1 (quot idx 8))] 299 | #(bit-or % (bit-shift-left 1 (mod idx 8))))) 300 | 301 | (defn bits 302 | "`flags` is a sequence of flag names. Each flag's index corresponds to the bit with that index. 303 | Flag names `null` are ignored. Bit count will be padded up to the next multiple of 8." 304 | [flags] 305 | (let [byte-count (int (Math/ceil (/ (count flags) 8))) 306 | idx->flags (into {} (keep-indexed #(when %2 [%1 %2]) flags)) 307 | flags->idx (into {} (keep-indexed #(when %2 [%2 %1]) flags)) 308 | bit-indices (sort (keys idx->flags))] 309 | (compile-codec (repeated :byte :length byte-count) 310 | (fn [flags] (reduce #(set-bit % %2) (into [] (byte-array byte-count)) (vals (select-keys flags->idx flags)))) 311 | (fn [bytes] (set (map idx->flags (filter #(bit-set? bytes %) bit-indices))))))) 312 | 313 | (defn header 314 | "Decodes a header using `header-codec`. Passes this datastructure to `header->body-codec` which returns the codec to 315 | use to parse the body. For writing this codec calls `body->header` with the data as parameter and 316 | expects a value to use for writing the header information. 317 | If the optional flag `:keep-header` is set, read will return a map with the keys`:header` and `body` 318 | else only the `body` will be returned." 319 | [header-codec header->body-codec body->header & {:keys [keep-header?] :or {keep-header? false}}] 320 | (let [header-codec (compile-codec header-codec)] 321 | (reify BinaryIO 322 | (read-data [_ big-in little-in] 323 | (let [header (read-data header-codec big-in little-in) 324 | body-codec (header->body-codec header) 325 | body (read-data body-codec big-in little-in)] 326 | (if keep-header? 327 | {:header header 328 | :body body} 329 | body))) 330 | (write-data [_ big-out little-out value] 331 | (let [body (if keep-header? (:body value) value) 332 | header (cond (and keep-header? body->header) 333 | (body->header (:header value) (:body value)) 334 | keep-header? (:header value) 335 | :else (body->header body)) 336 | body-codec (header->body-codec header)] 337 | (write-data header-codec big-out little-out header) 338 | (write-data body-codec big-out little-out body))) 339 | Object (toString [_] (str ""))))) 340 | 341 | 342 | (defn padding 343 | "Make sure there is always a minimum byte `length` when reading/writing values. 344 | Works by reading `length` bytes into a byte array, then reading from that array using `inner-codec`. 345 | Currently there are three options: 346 | - `:length` is the number of bytes that should be present after writing 347 | - `:padding-byte` is the numeric value of the byte used for padding (default is 0) 348 | - `:truncate?` is a boolean flag that determines the behaviour if `inner-codec` writes more bytes than 349 | `padding` can handle: false is the default, meaning throw an exception. True will lead to truncating the 350 | output of `inner-codec`. 351 | 352 | Example: 353 | (encode (padding (repeated (string \"UTF8\" :separator 0)) :length 11 :truncate? true) outstream [\"abc\" \"def\" \"ghi\"]) 354 | => ; writes bytes [97 98 99 0 100 101 102 0 103 104 105] 355 | ; observe: the last separator byte was truncated!" 356 | [inner-codec & {:keys [length 357 | padding-byte 358 | truncate?] 359 | :or {padding-byte 0 360 | truncate? false} 361 | :as opts}] 362 | {:pre [(every? number? [padding-byte length]) 363 | (codec? inner-codec)]} 364 | (reify BinaryIO 365 | (read-data [_ big-in _] 366 | (let [bytes (byte-array length) 367 | _ (.readFully ^DataInput big-in bytes) 368 | in (wrap-input-stream (java.io.ByteArrayInputStream. bytes)) 369 | big-in (BigEndianDataInputStream. in) 370 | little-in (LittleEndianDataInputStream. in)] 371 | (read-data inner-codec big-in little-in))) 372 | (write-data [_ big-out _ value] 373 | (let [baos (ByteArrayOutputStream. length) 374 | big-o (BigEndianDataOutputStream. baos) 375 | little-o (LittleEndianDataOutputStream. baos) 376 | _ (write-data inner-codec big-o little-o value) 377 | arr (.toByteArray baos) 378 | len (if truncate? (min length (.size baos)) (.size baos)) 379 | padding-bytes-left (max 0 (- length len)) 380 | too-big? (> len length)] 381 | (if (and (not truncate?) too-big?) 382 | (throw (ex-info (str "Data should be max. " length " bytes, but attempting to write " (Math/abs ^long (- len length)) " bytes more!") {:overflow-bytes (Math/abs ^long (- len length))})) 383 | (do 384 | (.write ^DataOutputStream big-out arr 0 len) 385 | (dotimes [_ padding-bytes-left] (.writeByte ^DataOutputStream big-out padding-byte)))))) 386 | Object (toString [_] (str "")))) 387 | 388 | (defn align 389 | "This codec is related to `padding` in that it makes sure that the number of bytes 390 | written/read to/from a stream always is aligned to a specified byte boundary. 391 | For example, if a format requires aligning all data to 8 byte boundaries this codec 392 | will pad the written data with `padding-byte` to make sure that the count of bytes written 393 | is divisable by 8. 394 | 395 | Parameters: 396 | - `modulo`: byte boundary modulo, should be positive 397 | - `:padding-byte` is the numeric value of the byte used for padding (default is 0) 398 | 399 | Example: 400 | (encode (align (repeated :short-be :length 3) :modulo 9 :padding-byte 55) [1 2 3] output-stream) 401 | ;==> writes these bytes: [0 1 0 2 0 3 55 55 55]" 402 | [inner-codec & {:keys [modulo 403 | padding-byte] 404 | :or {padding-byte 0 405 | modulo 1} 406 | :as opts}] 407 | {:pre [(number? modulo) 408 | (number? padding-byte) 409 | (pos? modulo) 410 | (codec? inner-codec)]} 411 | (reify BinaryIO 412 | (read-data [_ b l] 413 | (let [^UnsignedDataInput b b 414 | data (read-data inner-codec b l) 415 | size (.size b) 416 | padding-bytes-left (mod (- modulo (mod size modulo)) modulo)] 417 | (dotimes [_ padding-bytes-left] (.readByte b)) 418 | data)) 419 | (write-data [_ big-out little-out value] 420 | (let [^UnsignedDataOutput b big-out 421 | ^UnsignedDataOutput l little-out 422 | _ (write-data inner-codec b little-out value) 423 | size (+ (.size b) (.size l)) 424 | padding-bytes-left (mod (- modulo (mod size modulo)) modulo)] 425 | (dotimes [_ padding-bytes-left] (.writeByte b padding-byte)))) 426 | Object (toString [_] (str "")))) 427 | 428 | 429 | (defn union 430 | "Union is a C-style union. A fixed number of bytes may represent different values depending on the 431 | interpretation of the bytes. The value returned by `read-data` is a map of all valid interpretations according to 432 | the specified unioned codecs. 433 | Parameter is the number of bytes needed for the longest codec in this union and a map of value names to codecs. 434 | This codec will read the specified number of bytes from the input streams and then successively try to read 435 | from this byte array using each individual codec. 436 | 437 | Example: Four bytes may represent an integer, two shorts, four bytes, a list of bytes with prefix or a string. 438 | 439 | (union 4 {:integer :int-be 440 | :shorts (repeated :short-be :length 2) 441 | :bytes (repeated :byte :length 4) 442 | :prefixed (repeated :byte :prefix :byte) 443 | :str (string \"UTF8\" :prefix :byte)})" 444 | [bytes-length codecs-map] 445 | (padding 446 | (reify BinaryIO 447 | (read-data [_ big-in _] 448 | (let [arr (byte-array bytes-length) 449 | _ (.readFully ^UnsignedDataInput big-in arr) 450 | bais (ByteArrayInputStream. arr) 451 | is (wrap-input-stream bais) 452 | os-b (BigEndianDataInputStream. is) 453 | os-l (LittleEndianDataInputStream. is) 454 | vals (doall (for [[n codec] codecs-map] 455 | (do (.reset bais) 456 | [n (read-data codec os-b os-l)])))] 457 | (into {} vals))) 458 | (write-data [_ big-out little-out value] 459 | (let [k (some (fn [[k v]] (when v k)) value) 460 | codec (codecs-map k)] 461 | (if (not codec) 462 | (throw (ex-info (str "No known codec for value with key " k) {:value value :unknown-key k :codecs codecs-map})) 463 | (write-data codec big-out little-out (get value k))))) 464 | Object (toString [_] (str ""))) 465 | :length bytes-length)) 466 | 467 | (defn- map-invert [m] 468 | {:post [(= (count (keys %)) (count (keys m)))]} 469 | (into {} (for [[k v] m] [v k]))) 470 | 471 | (defn- strict-map [m lenient?] 472 | (fn enum-lookup [k] 473 | (if-some [value (m k)] 474 | value 475 | (if lenient? 476 | k 477 | (throw (ex-info (str "Unknown enum key: " k) {:enum m :key k})))))) 478 | 479 | (defn enum 480 | "An enumerated value. `m` must be a 1-to-1 mapping of names (e.g. keywords) to their decoded values. 481 | Only names and values in `m` will be accepted when encoding or decoding." 482 | [codec m & {:keys [lenient?] :or {lenient? false}}] 483 | (let [pre-encode (strict-map m lenient?) 484 | post-decode (strict-map (map-invert m) lenient?)] 485 | (compile-codec codec pre-encode post-decode))) 486 | 487 | #_(defn at-offsets 488 | "Read from a stream at specific offsets. Problems are we are skipping data inbetween and we miss data earlier in the stream." 489 | [offset-name-codecs] 490 | {:pre [(every? #(= 3 (count %)) offset-name-codecs)]} 491 | (let [m (reduce (fn [m [offset name codec]] (assoc m offset [name codec])) (sorted-map) offset-name-codecs)] 492 | (reify BinaryIO 493 | (read-data [this big-in little-in] 494 | (loop [pos (.size big-in), pairs (seq m), res {}] 495 | (if (nil? pairs) 496 | res 497 | (let [[seek-pos [name codec]] (first pairs) 498 | _ (.skipBytes big-in (- seek-pos pos)) 499 | obj (read-data codec big-in little-in)] 500 | (recur (.size big-in) (next pairs) (assoc res name obj)))))) 501 | (write-data [this big-out little-out values] 502 | (throw :not-implemented))))) 503 | 504 | ;;;;;;; internal compilation of the DSL into instances of `BinaryIO` 505 | ;; 506 | ;; let sequences, vectors, maps and primitive's keywords implement BinaryIO 507 | ;; that means, compile-codec is optional! 508 | ;; also, strings and byte arrays are treated like `constant` 509 | (extend-protocol BinaryIO 510 | (java.lang.Class/forName "[B") 511 | (read-data [this big-in _] 512 | (let [^bytes bytes (read-bytes big-in (count this))] 513 | (assert (java.util.Arrays/equals ^bytes bytes ^bytes this) (format "Expected to read array '%s', found '%s' instead." (str (seq this)) (str (seq bytes)))) 514 | bytes)) 515 | (write-data [this out _ _] 516 | (.write ^OutputStream out (.getBytes ^String this))) 517 | 518 | java.lang.String 519 | (read-data [this big-in _] 520 | (let [^bytes bytes (read-bytes big-in (count this)) 521 | res (String. bytes)] 522 | (assert (java.util.Arrays/equals bytes (.getBytes ^String this)) (format "Expected to read string '%s', found '%s' instead." this res)) 523 | res)) 524 | (write-data [this out _ _] 525 | (.write ^OutputStream out (.getBytes ^String this))) 526 | 527 | clojure.lang.ISeq 528 | (read-data [this big-in little-in] 529 | (map #(read-data % big-in little-in) this)) 530 | (write-data [this big-out little-out values] 531 | (dorun (map #(write-data % big-out little-out %2) this values))) 532 | 533 | clojure.lang.IPersistentVector 534 | (read-data [this big-in little-in] 535 | (mapv #(read-data % big-in little-in) this)) 536 | (write-data [this big-out little-out values] 537 | (dorun (map #(write-data % big-out little-out %2) this values))) 538 | 539 | clojure.lang.Keyword 540 | (read-data [kw big-in little-in] 541 | (read-data (primitive-codecs kw) big-in little-in)) 542 | (write-data [kw big-out little-out value] 543 | (write-data (primitive-codecs kw) big-out little-out value)) 544 | 545 | clojure.lang.IPersistentMap 546 | (read-data [m big-in little-in] 547 | (zipmap (keys m) (map #(read-data % big-in little-in) (vals m)))) 548 | (write-data [m big-out little-out value] 549 | (dorun (map (fn [[k v]] (write-data (get m k) big-out little-out v)) value)))) 550 | 551 | (defn compile-codec 552 | "Wrap a `codec` into to pre- and post-processing functions to be applied to the value 553 | before writing/after reading. Use these to transform values according to domain specific rules." 554 | ([codec] (if (codec? codec) 555 | codec 556 | (throw (ex-info (str codec " does not satisfy the protocol BinaryIO!" ) {:codec codec})))) 557 | ([codec pre-encode post-decode] 558 | (let [codec (compile-codec codec)] 559 | (reify BinaryIO 560 | (read-data [_ big-in little-in] 561 | (post-decode (read-data codec big-in little-in))) 562 | (write-data [_ big-out little-out value] 563 | (write-data codec big-out little-out (pre-encode value))) 564 | Object (toString [_] (str "")))))) 565 | 566 | ;;;;;;;;;;;;;; API for en-/decoding 567 | 568 | 569 | (defn encode 570 | "Serialize a value to the OutputStream `out` according to the codec." 571 | [codec out value] 572 | (let [big-out (BigEndianDataOutputStream. out) 573 | little-out (LittleEndianDataOutputStream. out)] 574 | (write-data codec big-out little-out value))) 575 | 576 | (defn decode 577 | "Deserialize a value from the InputStream `in` according to the codec." 578 | [codec in] 579 | (let [wrapped (wrap-input-stream in) 580 | big-in (BigEndianDataInputStream. wrapped) 581 | little-in (LittleEndianDataInputStream. wrapped)] 582 | (read-data codec big-in little-in))) 583 | -------------------------------------------------------------------------------- /src/org/clojars/smee/binary/demo/bitcoin.clj: -------------------------------------------------------------------------------- 1 | (ns org.clojars.smee.binary.demo.bitcoin 2 | "Implementation of the raw binary format of the bitcoin block chain. 3 | Specification from https://en.bitcoin.it/wiki/Protocol_specification and 4 | http://james.lab6.com/2012/01/12/bitcoin-285-bytes-that-changed-the-world" 5 | (:refer-clojure :exclude [hash]) 6 | (:require [org.clojars.smee.binary.core :refer :all]) 7 | (:import org.clojars.smee.binary.core.BinaryIO 8 | java.io.DataOutput 9 | java.io.DataInput)) 10 | 11 | ;;;;;;;;;; common ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; 12 | (def var-int-le 13 | (let [s-le (compile-codec :short-le) 14 | i-le (compile-codec :int-le) 15 | l-le (compile-codec :long-le)] 16 | (reify BinaryIO 17 | (read-data [_ big-in little-in] 18 | (let [b (.readByte ^DataInput little-in)] 19 | (condp = b 20 | -3 #_0xfd (read-data s-le big-in little-in) 21 | -2 #_0xfe (read-data i-le big-in little-in) 22 | -1 #_0xff (read-data l-le big-in little-in) 23 | (byte->ubyte b)))) 24 | (write-data [_ big-out little-out value] 25 | (cond 26 | (< value 0xfd) (.writeByte ^DataOutput little-out value) 27 | (< value 0xffff) (do (.writeByte ^DataOutput little-out 0xfd) (write-data s-le big-out little-out value)) 28 | (< value 0xffffffff) (do (.writeByte ^DataOutput little-out 0xfe) (write-data i-le big-out little-out value)) 29 | :else (do (.writeByte ^DataOutput little-out 0xff) (write-data l-le big-out little-out value))))))) 30 | 31 | ;;;;;;;;;;; messages ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; 32 | 33 | (defn- sha256 [^bytes bs] 34 | (let [hash (java.security.MessageDigest/getInstance "SHA-256")] 35 | (.digest hash bs))) 36 | 37 | (defn- message-checksum [bs] 38 | (let [hash (-> bs byte-array sha256 sha256) 39 | res (byte-array 4)] 40 | (System/arraycopy hash 0 res 0 4) 41 | (mapv byte->ubyte res))) 42 | 43 | ;; see https://en.bitcoin.it/wiki/Protocol_documentation#Message_structure 44 | (def message (ordered-map :magic (enum :uint-le {:main 0xD9B4BEF9 45 | :testnet 0xDAB5BFFA 46 | :testnet3 0x0709110B 47 | :namecoin 0xFEB4BEF9}) 48 | :command (padding (c-string "US-ASCII") 49 | :length 12 :padding-byte 0 :truncate? true) 50 | :payload (header (ordered-map :length :uint-le 51 | :checksum (repeated :ubyte :length 4)) 52 | ;; how should we parse the body for this header? 53 | (fn [{:keys [length checksum]}] 54 | (compile-codec (repeated :ubyte :length length) 55 | identity 56 | (fn [payload] 57 | (assert (= checksum (message-checksum payload))) 58 | payload))) 59 | ;; create a new header for this body 60 | (fn [payload] 61 | {:length (count payload) 62 | :checksum (message-checksum payload)})))) 63 | 64 | ;;;;;;;;;;; transaction scripts ;;;;;;;;;;;;;;;;;;;;;;;; 65 | 66 | (def ^:private opcodes 67 | {:OP_0 0 68 | :OP_PUSHDATA1 76 69 | :OP_PUSHDATA2 77 70 | :OP_PUSHDATA4 78 71 | :OP_1NEGATE 79 72 | :OP_1 81 73 | :OP_2 82 74 | :OP_3 83 75 | :OP_4 84 76 | :OP_5 85 77 | :OP_6 86 78 | :OP_7 87 79 | :OP_8 88 80 | :OP_9 89 81 | :OP_10 90 82 | :OP_11 91 83 | :OP_12 92 84 | :OP_13 93 85 | :OP_14 94 86 | :OP_15 95 87 | :OP_16 96 88 | :OP_NOP 97 89 | :OP_IF 99 90 | :OP_NOTIF 100 91 | :OP_ELSE 103 92 | :OP_ENDIF 104 93 | :OP_VERIFY 105 94 | :OP_RETURN 106 95 | :OP_TOALTSTACK 107 96 | :OP_FROMALTSTACK 108 97 | :OP_IFDUP 115 98 | :OP_DEPTH 116 99 | :OP_DROP 117 100 | :OP_DUP 118 101 | :OP_NIP 119 102 | :OP_OVER 120 103 | :OP_PICK 121 104 | :OP_ROLL 122 105 | :OP_ROT 123 106 | :OP_SWAP 124 107 | :OP_TUCK 125 108 | :OP_2DROP 109 109 | :OP_2DUP 110 110 | :OP_3DUP 111 111 | :OP_2OVER 112 112 | :OP_2ROT 113 113 | :OP_2SWAP 114 114 | :OP_CAT 126 115 | :OP_SUBSTR 127 116 | :OP_LEFT 128 117 | :OP_RIGHT 129 118 | :OP_SIZE 130 119 | :OP_INVERT 131 120 | :OP_AND 132 121 | :OP_OR 133 122 | :OP_XOR 134 123 | :OP_EQUAL 135 124 | :OP_EQUALVERIFY 136 125 | :OP_1ADD 139 126 | :OP_1SUB 140 127 | :OP_2MUL 141 128 | :OP_2DIV 142 129 | :OP_NEGATE 143 130 | :OP_ABS 144 131 | :OP_NOT 145 132 | :OP_0NOTEQUAL 146 133 | :OP_ADD 147 134 | :OP_SUB 148 135 | :OP_MUL 149 136 | :OP_DIV 150 137 | :OP_MOD 151 138 | :OP_LSHIFT 152 139 | :OP_RSHIFT 153 140 | :OP_BOOLAND 154 141 | :OP_BOOLOR 155 142 | :OP_NUMEQUAL 156 143 | :OP_NUMEQUALVERIFY 157 144 | :OP_NUMNOTEQUAL 158 145 | :OP_LESSTHAN 159 146 | :OP_GREATERTHAN 160 147 | :OP_LESSTHANOREQUAL 161 148 | :OP_GREATERTHANOREQUAL 162 149 | :OP_MIN 163 150 | :OP_MAX 164 151 | :OP_WITHIN 165 152 | :OP_RIPEMD160 166 153 | :OP_SHA1 167 154 | :OP_SHA256 168 155 | :OP_HASH160 169 156 | :OP_HASH256 170 157 | :OP_CODESEPARATOR 171 158 | :OP_CHECKSIG 172 159 | :OP_CHECKSIGVERIFY 173 160 | :OP_CHECKMULTISIG 174 161 | :OP_CHECKMULTISIGVERIFY 175 162 | :OP_PUBKEYHASH 253 163 | :OP_PUBKEY 254 164 | :OP_INVALIDOPCODE 255 165 | :OP_RESERVED 80 166 | :OP_VER 98 167 | :OP_VERIF 101 168 | :OP_VERNOTIF 102 169 | :OP_RESERVED1 137 170 | :OP_RESERVED2 138}) 171 | 172 | (def ^:private opcodes-rev (into {} (for [[k v] opcodes] [v k]))) 173 | (def ^:private push-codec-opcodes (set (range 1 76))) 174 | (def ^:private push-codecs (zipmap push-codec-opcodes (map #(repeated :ubyte :length %) push-codec-opcodes))) 175 | 176 | (def script-codec 177 | (reify BinaryIO 178 | (read-data [_ big-in little-in] 179 | (let [overall (read-data var-int-le big-in little-in)] 180 | (loop [n 0, res []] 181 | (if 182 | (= n overall) res 183 | (let [b (byte->ubyte (.readByte ^DataInput big-in))] 184 | (if (contains? push-codec-opcodes b) 185 | (recur (+ n b 1) (conj res (read-data (push-codecs b) big-in little-in))) 186 | (recur (inc n) (conj res (opcodes-rev b))))))))) 187 | (write-data [_ big-out little-out script] 188 | (let [len (reduce #(if (keyword? %2) (inc %1) (+ %1 1 (count %2))) 0 script)] 189 | (write-data var-int-le big-out little-out len) 190 | (doseq [token script] 191 | (if (keyword? token) 192 | (.writeByte ^DataOutput big-out (opcodes token)) 193 | (let [len (count token)] 194 | (.writeByte ^DataOutput big-out len) 195 | (write-data (push-codecs len) big-out little-out token)))))))) 196 | 197 | 198 | ;;;;;;;;;; blocks ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; 199 | (def block-magic (constant (repeated :ubyte :length 4) [0xf9 0xbe 0xb4 0xd9])) 200 | 201 | (def hash (repeated :ubyte :length 32)) 202 | 203 | (defn var-len [codec] 204 | (repeated codec :prefix var-int-le)) 205 | 206 | (def transaction-input 207 | (ordered-map 208 | :hash hash 209 | :index :int-le 210 | :script script-codec 211 | :sequence-number :int-le)) 212 | 213 | (def transaction-output 214 | (ordered-map 215 | :amount :long-le 216 | :script script-codec)) 217 | 218 | (def transaction 219 | (ordered-map 220 | :transaction-version :int-le 221 | :inputs (var-len transaction-input) 222 | :outputs (var-len transaction-output) 223 | :lock-time :int-le)) 224 | 225 | (def block-codec 226 | (ordered-map 227 | :separator block-magic 228 | :length :int-le 229 | :header (ordered-map 230 | :block-version :int-le 231 | :previous-hash hash 232 | :merkle-root hash 233 | :timestamp (compile-codec :int-le #(int (/ (.getTime ^java.util.Date %) 1000)) #(java.util.Date. (long (* % 1000)))) 234 | :target :int-le 235 | :nonce :int-le) 236 | :transactions (var-len transaction) 237 | )) 238 | -------------------------------------------------------------------------------- /src/org/clojars/smee/binary/demo/elf.clj: -------------------------------------------------------------------------------- 1 | (ns org.clojars.smee.binary.demo.elf 2 | (:require [org.clojars.smee.binary.core :refer :all] 3 | [clojure.java.io :only [input-stream]])) 4 | 5 | (def primitives 6 | {:elf64 {:addr (align :ulong-le :modulo 8) 7 | :off (align :ulong-le :modulo 8) 8 | :half (align :ushort-le :modulo 2) 9 | :word (align :uint-le :modulo 4) 10 | :xword (align :ulong-le :modulo 8) 11 | :sword (align :long-le :modulo 8)} 12 | :elf32 {:addr (align :uint-le :modulo 4) 13 | :off (align :uint-le :modulo 4) 14 | :half (align :ushort-le :modulo 2) 15 | :word (align :uint-le :modulo 4) 16 | :sword (align :int-le :modulo 4)}}) 17 | 18 | (def e-ident 19 | (padding 20 | (ordered-map 21 | :ei-magic "ELF" 22 | :ei-class (enum :byte {:elf32 1 :elf64 2}) 23 | :ei-data (enum :byte {:le 1 :be 2}) 24 | :ei-version :byte 25 | :ei-osabi :byte) 26 | :length 16)) 27 | 28 | (defn phdr [endianess] 29 | (let [{:keys [word addr off xword]} (primitives endianess)] 30 | (if (= endianess :elf64) 31 | (ordered-map 32 | :p-type (enum word {:null 0 33 | :load 1 34 | :dynamic 2 35 | :interp 3 36 | :note 4 37 | :shlib 5 38 | :phdr 6 39 | :loos 0x60000000 40 | :gnu-eh-frame 0x6474e550 41 | :gnu-stack 0x6474e551 42 | :gnu-relro 0x6474e552 43 | :hios 0x6fffffff 44 | :loproc 0x70000000 45 | :hiproc 0x7fffffff 46 | } :lenient? true) ; type of segment, ignores vendor specifics 47 | :p-flags (padding (bits [:x :w :r]) :length 2) ; segment attributes 48 | :p-offset off ; offset in file 49 | :p-vaddr addr ; virtual address in memory 50 | :p-paddr addr ; reserved 51 | :p-filesz xword ; size of segment in file 52 | :p-memsz xword ; size of segment in memory 53 | :p-align xword ; alignment of segment 54 | ) 55 | (ordered-map 56 | :p-type (enum word {:null 0 57 | :load 1 58 | :dynamic 2 59 | :interp 3 60 | :note 4 61 | :shlib 5 62 | :phdr 6 63 | :loos 0x60000000 64 | :gnu-eh-frame 0x6474e550 65 | :gnu-stack 0x6474e551 66 | :gnu-relro 0x6474e552 67 | :hios 0x6fffffff 68 | :loproc 0x70000000 69 | :hiproc 0x7fffffff 70 | } :lenient? true) ; type of segment, ignores vendor specifics 71 | :p-offset off ; offset in file 72 | :p-vaddr addr ; virtual address in memory 73 | :p-paddr addr ; reserved 74 | :p-filesz word ; size of segment in file 75 | :p-memsz word ; size of segment in memory 76 | :p-flags (padding (bits [:x :w :r]) :length 2) ; segment attributes 77 | :p-align word ; alignment of segment 78 | )))) 79 | 80 | (def ehdr 81 | (-> e-ident 82 | (header 83 | (fn [ident] 84 | (let [{:keys [word half addr off]} (-> ident :ei-class primitives)] 85 | (ordered-map 86 | :e-type (enum half {:none 0 87 | :rel 1 88 | :exec 2 89 | :dyn 3 90 | :core 4 91 | :loos 0xfe00 92 | :hios 0xfeff 93 | :loproc 0xff00 94 | :hiproc 0xffff}) ; object file type 95 | :e-machine (enum half {:none 0 96 | :at&t-we-32100 1 97 | :sparc 2 98 | :intel-80386 3 99 | :motorola-68000 4 100 | :motorola-88000 5 101 | :intel-80860 7 102 | :mips-rs3000 8 103 | :amd64 62} :lenient? true) ; machine type 104 | :e-version word ; object file version 105 | :e-entry addr ; entry point address 106 | :e-phoff off ; program header offset 107 | :e-shoff off ; section header offset 108 | :e-flags word ; processor-specific flags 109 | :e-ehsize half ; elf header size 110 | :e-phentsize half ; size of program header in entry 111 | :e-phnum half ; number of program header entries 112 | :e-shentsize half ; size of section header entry 113 | :e-shnum half ; number of section header entry 114 | :e-shstmdx half ; section name string table index 115 | ))) 116 | nil 117 | :keep-header? true) 118 | (compile-codec 119 | #(hash-map :header (:e-ident %) :body (dissoc % :e-ident)) 120 | #(assoc (:body %) :e-ident (:header %))))) 121 | 122 | (def elf-codec 123 | (-> ehdr 124 | (header 125 | #(repeated (phdr (-> % :e-ident :ei-class)) :length (:e-phnum %)) 126 | nil 127 | :keep-header? true) 128 | (compile-codec 129 | #(hash-map :header (:e-header %) :body (:p-headers %)) 130 | #(hash-map :e-header (:header %) :p-headers (:body %))))) 131 | 132 | 133 | 134 | 135 | (comment 136 | (require 'clojure.pprint) 137 | (set! *print-length* nil) 138 | (with-open [is (CountingInputStream. (input-stream "echo32")) 139 | os (clojure.java.io/output-stream "echo32.out")] 140 | (let [elf (decode elf-codec is)] 141 | (clojure.pprint/pprint elf) 142 | (encode elf-codec os elf)) 143 | (println (format "bytes read: %x" (.size is)))) 144 | ) 145 | -------------------------------------------------------------------------------- /src/org/clojars/smee/binary/demo/matlab5.clj: -------------------------------------------------------------------------------- 1 | (ns org.clojars.smee.binary.demo.matlab5 2 | "Implementation for MATLAB 5 binary files. Currently read only, because creating headers requires 3 | knowing the number of bytes written. Since we do not know these size beforehand, we currently can't write 4 | a consistent header. 5 | Implementation according to http://www.mathworks.com/help/pdf_doc/matlab/matfile_format.pdf 6 | 7 | CAUTION: This implementation is incomplete! Still missing: 8 | - write structures (need to know binary length of nested structures for the header) 9 | - structures 10 | - cells 11 | - complex values 12 | - do not assume little-endian data, respect endianess indicator header field" 13 | (:require [org.clojars.smee.binary.core :refer :all] 14 | [clojure.java.io :refer [input-stream]]) 15 | (:import org.clojars.smee.binary.core.BinaryIO)) 16 | 17 | (defn- aligned 18 | "All tags and data need to be 64bit-aligned." 19 | [codec] 20 | (align codec :modulo 8)) 21 | 22 | (defn- map-invert [m] 23 | {:post [(= (count (keys %)) (count (keys m)))]} 24 | (into {} (for [[k v] m] [v k]))) 25 | 26 | (defn- break [hdr] 27 | (throw (ex-info "not implemented, can't create headers for this element!" {:header hdr}))) 28 | 29 | ;;;;;;;;;;;;; constants ;;;;;;;;;;;;;;;;;;;;;;;;;;;;; 30 | (def ^:private element-types 31 | {:long-format 0 32 | :byte 1 33 | :ubyte 2 34 | :short-le 3 35 | :ushort-le 4 36 | :int-le 5 37 | :uint-le 6 38 | :float-le 7 39 | :double-le 9 40 | :long-le 12 41 | :ulong-le 13 42 | :miMATRIX 14 43 | :miCOMPRESSED 15 44 | :miUTF8 16 45 | :miUTF16 17 46 | :miUTF32 18}) 47 | (def ^:private element-types-rev (map-invert element-types)) 48 | 49 | (def element-type-sizes "length in bytes per individual value of a data type" 50 | {:byte 1 51 | :ubyte 1 52 | :short-le 2 53 | :ushort-le 2 54 | :int-le 4 55 | :uint-le 4 56 | :float-le 4 57 | :double-le 8 58 | :long-le 8 59 | :ulong-le 8}) 60 | 61 | ;;;;;;;;;; header for each subelement ;;;;;;;;;;;;;;;;;;;;;;;; 62 | 63 | (def data-element-header-part1 (union 4 {:short (ordered-map :length :short-le 64 | :type (enum :short-le element-types)) 65 | :type :int-le})) 66 | (def element-type (enum :int-le element-types)) 67 | 68 | (def data-element-header (reify BinaryIO 69 | (read-data [_ b l] 70 | (let [{hdr :short t :type} (read-data data-element-header-part1 b l)] 71 | (if (= :long-format (:type hdr)) 72 | {:type (element-types-rev t) 73 | :length (read-data :int-le b l)} 74 | ; should be hdr, but in my tests the type always says 'ubyte' although the value is an integer 75 | {:length 4 76 | :type :uint-le}))) 77 | (write-data [this b l value] 78 | (throw (ex-info "not implemented" {:codec this :value value}))))) 79 | 80 | (def array-type (enum :byte {:cell 1 81 | :structure 2 82 | :object 3 83 | :chars 4 84 | :sparse 5 85 | :doubles 6 86 | :floats 7 87 | :bytes 8 88 | :ubytes 9 89 | :shorts 10 90 | :ushorts 11 91 | :ints 12 92 | :uints 13 93 | :longs 14 94 | :ulongs 15})) 95 | 96 | (declare subelement) 97 | ;;;;;;;;;;;;;; matrices ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; 98 | 99 | (def flag-codec (header data-element-header 100 | #(align (ordered-map 101 | :array-type array-type 102 | :flags (bits [nil nil nil nil nil :complex :global :logical])) 103 | :modulo (:length %)) 104 | #(hash-map :type :uint-le :length 8))) 105 | 106 | (def dimensions-codec (header data-element-header 107 | (fn [{t :type l :length}] 108 | (aligned (repeated t :length (/ l (get element-type-sizes t))))) 109 | break)) 110 | 111 | 112 | (defn data-matrix [length] 113 | (padding (ordered-map 114 | :flags flag-codec 115 | :dimensions dimensions-codec 116 | :name (header data-element-header 117 | #(aligned (string "UTF8" :length (:length %))) 118 | #(hash-map :type 1 :length (count %))) 119 | :real subelement) 120 | :length length)) 121 | 122 | ;;;;;;;;;;;; codecs for different data types / elements ;;;;;;;;;;;;;;;;;;; 123 | 124 | (defmulti data-element :type) 125 | (defmethod data-element :miMATRIX [{l :length}] 126 | (data-matrix l)) 127 | 128 | (defmethod data-element :miCOMPRESSED [{l :length}] 129 | (blob :length l)) 130 | 131 | (defmethod data-element :miUTF8 [{l :length}] 132 | (string "UTF8" :length l)) 133 | 134 | (defmethod data-element :miUTF16 [{l :length}] 135 | (string "UTF16" :length l)) 136 | 137 | (defmethod data-element :miUTF32 [{l :length}] 138 | (string "UTF32" :length l)) 139 | 140 | (defmethod data-element :default [{t :type l :length}] 141 | ; (println "default handler, unknown subelement with header=" hdr) 142 | (aligned (repeated t :length (/ l (element-type-sizes t))))) 143 | 144 | ;;;;;;;;;;; overall structure ;;;;;;;;;;;;;;;;;;;;;;;; 145 | 146 | (def matlab-header (ordered-map :text (compile-codec (string "UTF8" :length 124) 147 | #(apply str % (repeat (max 0 (- 124 (count %))) \space)) 148 | identity) 149 | :version :short-le 150 | ;todo use endianess to switch codecs for data later on 151 | :endianess (string "UTF8" :length 2))) 152 | 153 | (def subelement "Each individual entry of a MATLAB file has this structure" 154 | (header data-element-header ;#(blob :length (:length %)) 155 | data-element 156 | break)) 157 | 158 | (def matlab-5-codec (ordered-map 159 | :header matlab-header 160 | :elements (repeated subelement))) 161 | 162 | (comment 163 | (require 'clojure.pprint) 164 | (set! *print-length* 5) 165 | (->> "e:\\datasets\\Volumes\\Seagate\\seizure_detection\\competition_data\\clips\\Patient_2\\Patient_2_ictal_segment_0018.mat" 166 | input-stream 167 | (decode matlab-5-codec) 168 | clojure.pprint/pprint) 169 | ) 170 | -------------------------------------------------------------------------------- /src/org/clojars/smee/binary/demo/mp3.clj: -------------------------------------------------------------------------------- 1 | (ns org.clojars.smee.binary.demo.mp3 2 | "MP3 IDv2 tags, according to the current specification at http://www.id3.org/id3v2.4.0-structure" 3 | (:require [org.clojars.smee.binary.core :refer :all])) 4 | 5 | 6 | (defn int->synchsafe [x] 7 | {:pre [(< x (bit-shift-left 1 29))]} 8 | (let [m0 (bit-and x 127) 9 | m1 (bit-and (bit-shift-right x 7) 127) 10 | m2 (bit-and (bit-shift-right x 14) 127) 11 | m3 (bit-and (bit-shift-right x 21) 127)] 12 | (reduce bit-or (int 0) [m0 13 | (bit-shift-left m1 8) 14 | (bit-shift-left m2 16) 15 | (bit-shift-left m3 24)]))) 16 | (defn synchsafe->int [x] 17 | (let [m0 (bit-and x 255) 18 | m1 (bit-and (bit-shift-right x 8) 255) 19 | m2 (bit-and (bit-shift-right x 16) 255) 20 | m3 (bit-and (bit-shift-right x 24) 255)] 21 | (reduce bit-or (int 0) [m0 22 | (bit-shift-left m1 7) 23 | (bit-shift-left m2 14) 24 | (bit-shift-left m3 21)]))) 25 | 26 | (defn synchsafe-int [] 27 | (compile-codec :int-be int->synchsafe synchsafe->int)) 28 | 29 | (def header 30 | (ordered-map 31 | :magic-number (string "ISO-8859-1" :length 3) ;; "ID3" 32 | :version (ordered-map :major :byte 33 | :minor :byte) 34 | :flags (bits [nil nil nil nil :footer? :experimental? :extended-header? :unsynchronized?]) 35 | :tag-size (synchsafe-int))) 36 | 37 | (def extended-header 38 | (ordered-map 39 | :header-size (synchsafe-int) 40 | :flags-num :byte 41 | :extended-flags (bits [nil nil nil nil :tag-restrictions? :crc? :update?]))) 42 | 43 | (def idv2-frame 44 | (ordered-map 45 | :id (string "ISO-8859-1" :length 4) 46 | :size (synchsafe-int) 47 | ;; section 4.1. 48 | :flags (ordered-map :status (bits [nil nil nil nil :read-only? :frame-discarded-file-alteration? :frame-discarded-tag-alteration?]) 49 | :format (bits [:data-length-added? :unsynchronized? :encrypted? :compressed? nil nil :group-information?])) 50 | )) 51 | 52 | (def mp3-id3v2-codec 53 | (compile-codec [header])) 54 | 55 | (comment 56 | (require '[clojure.java.io :refer [input-stream]]) 57 | (let [in (input-stream "d:\\test.mp3")] 58 | (println (decode mp3-id3v2-codec in)) 59 | (println (decode idv2-frame in))) 60 | 61 | ) 62 | -------------------------------------------------------------------------------- /src/org/clojars/smee/binary/demo/protobuf.clj: -------------------------------------------------------------------------------- 1 | (ns org.clojars.smee.binary.demo.protobuf 2 | (:require [org.clojars.smee.binary.core :refer :all]) 3 | (:import org.clojars.smee.binary.core.BinaryIO)) 4 | 5 | (defn- reconstruct-varint [bytes] 6 | (reduce bit-or 7 | (long 0) 8 | (map-indexed #(bit-shift-left (bit-and 2r1111111 %2) (* 7 %)) bytes))) 9 | 10 | (defn- construct-varint [number] 11 | (loop [left number, bytes (transient [])] 12 | (if (<= left 127) 13 | (persistent! (conj! bytes left)) 14 | (recur (bit-shift-right left 7) (conj! bytes (bit-or 128 (bit-and 127 left))))))) 15 | 16 | (def var-int 17 | "Refer to https://developers.google.com/protocol-buffers/docs/encoding#varints" 18 | (reify BinaryIO 19 | (read-data [_ big-in little-in] 20 | (loop [bytes (transient [])] 21 | (let [b (.readByte little-in)] 22 | (if (bit-test b 8) 23 | (recur (conj! bytes b)) 24 | (reconstruct-varint (persistent! (conj! bytes b))))))) 25 | 26 | (write-data [_ big-out little-out value] 27 | (doseq [b (construct-varint value)] 28 | (.writeByte little-out b))))) 29 | 30 | (def proto-key (let [types {:varint 0 31 | :64bit 1 32 | :delimited 2 33 | :start-group 3 34 | :end-group 4 35 | :32bit 5} 36 | rev-types (into {} (for [[k v] types] [v k]))] 37 | (compile-codec var-int 38 | (fn [[number type]] (bit-or (bit-shift-left number 3) (get types type))) 39 | #(vector (bit-shift-right % 3) (get rev-types (bit-and 2r111 %)))))) 40 | 41 | (def proto-string ) 42 | (comment 43 | (defn byte2bits [byte] 44 | (Integer/toString byte 2)) 45 | 46 | (reconstruct-varint [2r10101100 2r00000010]) 47 | (map byte2bits (construct-varint 300)) 48 | (reconstruct-varint (construct-varint 300)) 49 | 50 | ;(decode protobuf (input-stream "dev-resources/google_message1.dat")) 51 | 52 | (defn t [codec value] 53 | (let [baos (java.io.ByteArrayOutputStream.) 54 | _ (encode codec baos value) 55 | arr (.toByteArray baos) 56 | encoded-bytes (map byte->ubyte (seq arr)) 57 | decoded (decode codec (java.io.ByteArrayInputStream. arr))] 58 | (println value (mapv byte2bits encoded-bytes) decoded))) 59 | (t proto-key [1 :varint]) 60 | ) 61 | -------------------------------------------------------------------------------- /test/org/clojars/smee/binary/codectests.clj: -------------------------------------------------------------------------------- 1 | (ns org.clojars.smee.binary.codectests 2 | (:require [clojure.test :refer [deftest are is]] 3 | [clojure.walk :as walk] 4 | [org.clojars.smee.binary.core :refer :all] 5 | [org.clojars.smee.binary.demo.bitcoin :as btc]) 6 | (:import impl.NullOutputStream)) 7 | 8 | (defn s2b [^String s] 9 | (vec (.getBytes s "UTF-8"))) 10 | 11 | (defonce array-classes (set (map #(class (% 0)) 12 | [byte-array int-array long-array short-array float-array double-array boolean-array object-array]))) 13 | 14 | (defn- do-roundtrip [codec value] 15 | (let [baos (java.io.ByteArrayOutputStream.) 16 | _ (encode codec baos value) 17 | arr (.toByteArray baos) 18 | encoded-bytes (mapv byte->ubyte (seq arr)) 19 | decoded (decode codec (java.io.ByteArrayInputStream. arr))] 20 | #_(do 21 | (println codec value expected-bytes decoded 22 | (java.lang.Long/toBinaryString decoded)) 23 | (doseq [b encoded-bytes] 24 | (print (java.lang.Integer/toHexString b) " ")) 25 | (println)) 26 | {:encoded encoded-bytes 27 | :value value 28 | :decoded decoded})) 29 | 30 | (defn- replace-arrays [v] 31 | (if (array-classes (class v)) 32 | (vec v) 33 | v)) 34 | 35 | (defn- test-roundtrip [codec value expected-bytes] 36 | (let [{:keys [decoded value encoded]} (do-roundtrip codec value) 37 | value (walk/postwalk replace-arrays value) 38 | decoded (walk/postwalk replace-arrays decoded)] 39 | (is (= decoded value)) 40 | (when expected-bytes 41 | (is (= encoded (mapv byte->ubyte expected-bytes)))))) 42 | 43 | (defn- test-all-roundtrips [test-cases] 44 | (doseq [[codec value bytes] test-cases] 45 | (is (codec? codec)) 46 | (test-roundtrip codec value bytes))) 47 | 48 | (deftest signed-primitive-encodings 49 | (test-all-roundtrips 50 | [[:byte (byte 55) [55]] 51 | [:byte (byte -56) [-56]] 52 | [:byte (byte -1) [255]] 53 | [:short-be (short 5) [0 5]] 54 | [:short-le (short 5) [5 0]] 55 | [:int-be (int 127) [0 0 0 127]] 56 | [:int-le (int 127) [127 0 0 0]] 57 | [:long-be (long 31) [0 0 0 0 0 0 0 31]] 58 | [:long-le (long 31) [31 0 0 0 0 0 0 0]] 59 | [:float-le (float 123.45) [0x66 0xe6 0xf6 0x42]] 60 | [:float-be (float 123.45) [0x42 0xf6 0xe6 0x66]] 61 | [:double-be (double 123.45) [64 94 220 204 204 204 204 205]] 62 | [:double-le (double 123.45) [205 204 204 204 204 220 94 64]] 63 | ])) 64 | 65 | (deftest unsigned-primitive-encodings 66 | (test-all-roundtrips 67 | [[:ubyte 200 [200]] 68 | [:ubyte 255 [255]] 69 | [:ushort-be (int 50000) [195 80]] 70 | [:ushort-be (int 65535) [0xff 0xff]] 71 | [:ushort-le (int 50000) [80 195]] 72 | [:ushort-le (int 65535) [0xff 0xff]] 73 | [:uint-le (long 255) [255 0 0 0]] 74 | [:uint-le (long 4294967295) [0xff 0xff 0xff 0xff]] 75 | [:uint-be (long 255) [0 0 0 255]] 76 | [:uint-be (long 4294967295) [0xff 0xff 0xff 0xff]] 77 | [:ulong-le 1N [1 0 0 0 0 0 0 0]] 78 | [:ulong-le 1024N [0 4 0 0 0 0 0 0]] 79 | [:ulong-le 18446744073709551614N [0xfe 0xff 0xff 0xff 0xff 0xff 0xff 0xff]] 80 | [:ulong-le 18446744073709551615N [0xff 0xff 0xff 0xff 0xff 0xff 0xff 0xff]] 81 | [:ulong-be 1N [0 0 0 0 0 0 0 1]] 82 | [:ulong-be 1024N [0 0 0 0 0 0 4 0]] 83 | [:ulong-be 18446744073709551614N [0xff 0xff 0xff 0xff 0xff 0xff 0xff 0xfe]] 84 | [:ulong-be 18446744073709551615N [0xff 0xff 0xff 0xff 0xff 0xff 0xff 0xff]] 85 | ])) 86 | 87 | (deftest pre-post-processing 88 | (test-all-roundtrips 89 | [[(compile-codec :int-be dec inc) 1 [0 0 0 0]] ; test pre-encode and post-decode 90 | [(compile-codec :long-be #(.getTime %) #(java.util.Date. %)) #inst "1999-12-31T23:59:59" [0 0 0 220 106 207 168 24]]])) 91 | 92 | (deftest string-encodings 93 | (test-all-roundtrips 94 | [[(string "UTF8" :prefix :byte) "ABC" [3 65 66 67]] 95 | [(string "UTF8" :prefix :int-be) "ABC" [0 0 0 3 65 66 67]] 96 | [(string "UTF8" :prefix :short-le) "ABC" [3 0 65 66 67]] 97 | [(string "UTF8" :length 2) "AA" [65 65]] 98 | ;; unbounded length 99 | [(string "US-ASCII") "ABC" [65 66 67]]])) 100 | 101 | (deftest c-string-encodings 102 | (test-all-roundtrips 103 | [[(c-string "UTF8") "ABC" [65 66 67 0]] 104 | [(repeated (c-string "UTF8") :length 2) ["AAA" "BBB"] [65 65 65 0 66 66 66 0]]])) 105 | 106 | (deftest map-encodings 107 | (test-all-roundtrips 108 | [[(ordered-map 109 | :foo :int-be 110 | :bar :short-le 111 | :baz :ubyte) {:foo 1 :bar 0, :baz 255} [0 0 0 1 0 0 255]]])) 112 | 113 | (deftest map-manipulations 114 | (is (= 0 (count (ordered-map)))) 115 | (is (= [:foo :bar] (keys (ordered-map :foo :byte :bar :int)))) 116 | (test-all-roundtrips 117 | [[(assoc (ordered-map :foo :int-be :bar :short-le) 118 | :baz :ubyte) 119 | {:foo 1 :bar 0, :baz 255} [0 0 0 1 0 0 255]] 120 | [(dissoc (ordered-map :foo :int-be :bar :short-le :baz :ubyte) :bar) 121 | {:foo 1, :baz 255} [0 0 0 1 255]] 122 | [(into (ordered-map) [[:foo :int-be] [:bar :short-le] [:baz :ubyte]]) 123 | {:foo 1 :bar 0, :baz 255} [0 0 0 1 0 0 255]]])) 124 | 125 | (deftest repeated-encodings 126 | (test-all-roundtrips 127 | [[(repeated :byte :prefix :byte) (vec (range 5)) [5 0 1 2 3 4]] 128 | [(repeated :byte :length 5) (vec (range 5)) [0 1 2 3 4]] 129 | [(repeated (string "UTF8" :prefix :byte) :prefix :int-be) ["AAA" "BB" "C"] [0 0 0 3 3 65 65 65 2 66 66 1 67]] 130 | [(repeated :byte) (vec (range 5)) [0 1 2 3 4]] 131 | [(repeated :short-le) (vec (range 5)) [0 0 1 0 2 0 3 0 4 0]] 132 | [(repeated :short-le :separator 123) (vec (range 5)) [0 0 1 0 2 0 3 0 4 0 123 0]]])) 133 | 134 | (deftest blob-encodings 135 | (test-all-roundtrips 136 | [[(blob) (byte-array 1025 (byte 42)) (repeat 1025 42)] 137 | [(blob :length 7) (byte-array 7) (repeat 7 0)] 138 | [(blob :prefix :byte) (byte-array 7) (cons 7 (repeat 7 0))]])) 139 | 140 | (deftest sequence-encodings 141 | (test-all-roundtrips 142 | [[[:byte :byte] [1 2] [1 2]] 143 | [[:short-be :int-be :short-le] [1 2 99] [0 1 0 0 0 2 99 0]]])) 144 | 145 | (defn- binary [s] 146 | (Long/parseLong s 2)) 147 | 148 | (deftest bitmasks 149 | (test-all-roundtrips 150 | [[(bits [:a :b :c nil nil nil nil :last]) #{:c :last} [(binary "10000100")]] 151 | [(bits [:0 :1 nil nil nil nil :6 :7 :8 nil :10]) #{:1 :7 :10} [2r00000100 2r10000010]] 152 | [(bits [:flag1 :flag2]) #{:flag2} [(binary "00000010")]] 153 | [(bits [:flag1 :flag2]) #{} [(binary "00000000")]]])) 154 | 155 | (deftest mixed-encodings 156 | (test-all-roundtrips 157 | [[(ordered-map :foo [:byte :byte] 158 | :baz (blob :length 4) 159 | :bar (string "UTF8" :prefix :int-be)) 160 | {:foo [1 2], :bar "test", :baz (byte-array 4 (byte 55))} 161 | [1 2 55 55 55 55 0 0 0 4 116 101 115 116]]])) 162 | 163 | (deftest wrong-length 164 | (are [codec values] (is (thrown? java.lang.RuntimeException (encode codec (NullOutputStream.) values))) 165 | (string "UTF-8" :length 5) "X" 166 | (repeated :int :length 3) [1 2] 167 | (blob :length 3) (byte-array 2) 168 | (padding :int-le :length 1) (int 1234) 169 | (padding :int-le :length 3) (int 1234) 170 | (padding (repeated (string "UTF-8" :separator 0)) :length 1) ["abc" "def" "ghi"])) 171 | 172 | (deftest paddings 173 | (test-all-roundtrips 174 | [[(padding :int-be :length 6 :padding-byte (int \x)) (int 55) [0 0 0 55 120 120]] 175 | [(padding (string "UTF8" :length 6) :length 6) "abcdef" [97 98 99 100 101 102]] 176 | [(padding (repeated :int-le) :length 10 :padding-byte 0x99) [1 2] [1 0 0 0 2 0 0 0 0x99 0x99]] 177 | [(padding (c-string "US-ASCII") :length 12 :padding-byte 0 :truncate? true) "version" [0x76 0x65 0x72 0x73 0x69 0x6F 0x6E 00 00 00 00 00]] 178 | [(padding (c-string "US-ASCII") :length 3 :padding-byte 0 :truncate? true) "ABC" [65 66 67]] 179 | ])) 180 | 181 | (deftest padding-truncate 182 | (let [codec (padding (repeated (string "UTF8" :separator 0)) :length 11 :truncate? true) 183 | value ["abc" "def" "ghi"]] 184 | (test-roundtrip codec value (s2b "abc\u0000def\u0000ghi")) 185 | (is (= (:decoded (do-roundtrip codec (concat value ["will be cut"]))) value)))) 186 | 187 | (deftest test-alignment 188 | (test-all-roundtrips 189 | [[(align :int-be :modulo 8 :padding-byte 1) 5 [0 0 0 5 1 1 1 1]] 190 | [(align (repeated :short-be :length 3) :modulo 9 :padding-byte 55) [1 2 3] [0 1 0 2 0 3 55 55 55]] 191 | [(align [:short-le :short-be] :modulo 6) [1 5] [1 0 0 5 0 0]]])) 192 | 193 | (deftest constants 194 | (test-all-roundtrips 195 | [[(constant :int-le 7) 7 [7 0 0 0]] 196 | [(constant (string "UTF8" :length 2) "AB") "AB" [65 66]]])) 197 | 198 | (deftest constants-exception-on-wrong-value 199 | (let [codec (constant (string "UTF8" :length 2) "AB")] 200 | (test-roundtrip codec "AB" [65 66]) 201 | (is (thrown? java.lang.RuntimeException 202 | (decode codec (java.io.ByteArrayInputStream. (byte-array [(byte 0) (byte 0)]))))))) 203 | 204 | (deftest headers 205 | (test-all-roundtrips 206 | [[(header :byte #(string "utf8" :length %) #(.length %)) "ABC" [3 65 66 67]] 207 | [(header :byte #(padding (repeated :int-le) :length % :padding-byte 0x99) (constantly 11)) [5 9] [11 5 0 0 0 9 0 0 0 0x99 0x99 0x99]] 208 | [(header :byte #(repeated :short-be :length %) nil :keep-header? true) {:header 2 :body [1 5]} [2 0 1 0 5]]])) 209 | 210 | (deftest enums 211 | (test-all-roundtrips 212 | [[(enum :byte {:apple 1 :banana 2 :durian 3}) :durian [3]] 213 | [(enum (string "UTF8" :length 2) {:alabama "AL" :alaska "AK" :arizona "AZ"}) :alaska [65 75]] 214 | [(enum (ordered-map :red :ubyte :green :ubyte :blue :ubyte) {:yellow {:red 255 :green 255 :blue 0}}) :yellow [255 255 0]] 215 | [(enum :ubyte {true 1 false 0}) false [0]] 216 | [(enum :ubyte {true 1 false 0}) true [1]]]) 217 | (is (thrown? java.lang.RuntimeException 218 | (decode (enum :byte {:val 1}) 219 | (java.io.ByteArrayInputStream. (byte-array [(byte 2)])))))) 220 | 221 | (deftest bitcoin 222 | (let [payload [0x62 0xEA 00 00 01 00 00 00 00 00 00 00 0x11 0xB2 0xD0 0x50 00 00 00 00 223 | 0x01 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 0xFF 0xFF 00 00 00 00 00 00 224 | 0x01 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 0xFF 0xFF 00 00 00 00 00 00 225 | 0x3B 0x2E 0xB3 0x5D 0x8C 0xE6 0x17 0x65 226 | 0x0F 0x2F 0x53 0x61 0x74 0x6F 0x73 0x68 0x69 0x3A 0x30 0x2E 0x37 0x2E 0x32 0x2F 227 | 0xC0 0x3E 0x03 00]] 228 | (test-all-roundtrips 229 | [[btc/message {:magic :main 230 | :command "version" 231 | :payload payload} 232 | (concat [0xF9 0xBE 0xB4 0xD9 ;; Main network magic bytes 233 | 0x76 0x65 0x72 0x73 0x69 0x6F 0x6E 00 00 00 00 00 ;; "version" command with padding 234 | 0x64 00 00 00 ;; 100 bytes payload 235 | 0x3B 0x64 0x8D 0x5A ;; payload checksum 236 | ] payload)] 237 | 238 | [btc/block-codec 239 | {:transactions 240 | [{:lock-time 0, 241 | :outputs [{:script [[4 103 138 253 176 254 85 72 39 25 103 241 166 113 48 183 16 92 214 168 40 224 57 9 166 121 98 224 234 31 97 222 182 73 246 188 63 76 239 56 196 243 85 4 229 30 193 18 222 92 56 77 247 186 11 141 87 138 76 112 43 107 241 29 95] :OP_CHECKSIG], 242 | :amount 5000000000}], 243 | :inputs [{:sequence-number -1, 244 | :script [[255 255 0 29] [4] [84 104 101 32 84 105 109 101 115 32 48 51 47 74 97 110 47 50 48 48 57 32 67 104 97 110 99 101 108 108 111 114 32 111 110 32 98 114 105 110 107 32 111 102 32 115 101 99 111 110 100 32 98 97 105 108 111 117 116 32 102 111 114 32 98 97 110 107 115]], 245 | :index -1, 246 | :hash [0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]}], 247 | :transaction-version 1}], 248 | :header {:nonce 2083236893, 249 | :target 486604799, 250 | :timestamp #inst "2009-01-03T18:15:05.000-00:00", 251 | :merkle-root [59 163 237 253 122 123 18 178 122 199 44 62 103 118 143 97 127 200 27 195 136 138 81 50 58 159 184 170 75 30 94 74], 252 | :previous-hash [0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0], 253 | :block-version 1}, 254 | :length 285, 255 | :separator [249 190 180 217]} 256 | [249 190 180 217 29 1 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 59 163 237 253 122 123 18 178 122 199 44 62 103 118 143 97 127 200 27 195 136 138 81 50 58 159 184 170 75 30 94 74 41 171 95 73 255 255 0 29 29 172 43 124 1 1 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 255 255 255 255 77 4 255 255 0 29 1 4 69 84 104 101 32 84 105 109 101 115 32 48 51 47 74 97 110 47 50 48 48 57 32 67 104 97 110 99 101 108 108 111 114 32 111 110 32 98 114 105 110 107 32 111 102 32 115 101 99 111 110 100 32 98 97 105 108 111 117 116 32 102 111 114 32 98 97 110 107 115 255 255 255 255 1 0 242 5 42 1 0 0 0 67 65 4 103 138 253 176 254 85 72 39 25 103 241 166 113 48 183 16 92 214 168 40 224 57 9 166 121 98 224 234 31 97 222 182 73 246 188 63 76 239 56 196 243 85 4 229 30 193 18 222 92 56 77 247 186 11 141 87 138 76 112 43 107 241 29 95 172 0 0 0 0]]]))) 257 | 258 | (deftest separator-based-repeated-does-not-touch-rest-of-bytes 259 | (let [bis (->> [1 0 2 3 0 4 0] 260 | (map byte) 261 | byte-array 262 | (java.io.ByteArrayInputStream.)) 263 | codec (repeated :byte :separator (byte 0))] 264 | (is (= [1] (decode codec bis))) 265 | (is (= 5 (.available bis))) 266 | (is (= [2 3] (decode codec bis))) 267 | (is (= 2 (.available bis))))) 268 | 269 | #_(deftest protobuf 270 | (test-all-roundtrips 271 | [[pb/proto-key [150 0] [8]] 272 | [pb/proto-delimited "testing" [0x12 0x07 0x74 0x65 0x73 0x74 0x69 0x6e 0x67]]])) 273 | 274 | (deftest test-unions 275 | (let [codec (union 4 {:integer :int-be 276 | :shorts [:short-be :short-be] 277 | :bytes [:byte :byte :byte :byte] 278 | :prefixed (repeated :byte :prefix :byte) 279 | :str (string "UTF8" :prefix :byte)}) 280 | result {:integer 0x03345678 281 | :shorts [0x0334 0x5678] 282 | :bytes [0x03 0x34 0x56 0x78] 283 | :prefixed [0x34 0x56 0x78] 284 | :str "4Vx"}] 285 | (are [value] (= result (:decoded (do-roundtrip codec value))) 286 | {:integer 0x03345678} 287 | {:shorts [0x0334 0x5678]} 288 | {:bytes [0x03 0x34 0x56 0x78]} 289 | {:prefixed [0x34 0x56 0x78]} 290 | {:str "4Vx"}))) 291 | --------------------------------------------------------------------------------