├── .gitignore
├── elm.json
├── LICENSE
├── README.md
└── src
    ├── Bytes.elm
    ├── Elm
        └── Kernel
        │   └── Bytes.js
    └── Bytes
        ├── Encode.elm
        └── Decode.elm


/.gitignore:
--------------------------------------------------------------------------------
1 | elm-stuff
2 | 


--------------------------------------------------------------------------------
/elm.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "type": "package",
 3 |     "name": "elm/bytes",
 4 |     "summary": "Work with sequences of bytes (a.k.a. ArrayBuffer, typed arrays, DataView)",
 5 |     "license": "BSD-3-Clause",
 6 |     "version": "1.0.8",
 7 |     "exposed-modules": [
 8 |         "Bytes",
 9 |         "Bytes.Encode",
10 |         "Bytes.Decode"
11 |     ],
12 |     "elm-version": "0.19.0 <= v < 0.20.0",
13 |     "dependencies": {
14 |         "elm/core": "1.0.1 <= v < 2.0.0"
15 |     },
16 |     "test-dependencies": {}
17 | }


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | Copyright (c) 2018-present, Evan Czaplicki
 2 | 
 3 | All rights reserved.
 4 | 
 5 | Redistribution and use in source and binary forms, with or without
 6 | modification, are permitted provided that the following conditions are met:
 7 | 
 8 |     * Redistributions of source code must retain the above copyright
 9 |       notice, this list of conditions and the following disclaimer.
10 | 
11 |     * Redistributions in binary form must reproduce the above
12 |       copyright notice, this list of conditions and the following
13 |       disclaimer in the documentation and/or other materials provided
14 |       with the distribution.
15 | 
16 |     * Neither the name of Evan Czaplicki nor the names of other
17 |       contributors may be used to endorse or promote products derived
18 |       from this software without specific prior written permission.
19 | 
20 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
21 | "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
22 | LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
23 | A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
24 | OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
25 | SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
26 | LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
27 | DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
28 | THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
29 | (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
30 | OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
31 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # Bytes
 2 | 
 3 | Work with densely packed sequences of bytes.
 4 | 
 5 | The goal of this package is to support **network protocols** such as ProtoBuf. Or to put it another way, the goal is to have packages like `elm/http` send fewer bytes over the wire.
 6 | 
 7 | 
 8 | ## Motivation = [A vision for data interchange in Elm](https://gist.github.com/evancz/1c5f2cf34939336ecb79b97bb89d9da6)
 9 | 
10 | Please read it!
11 | 
12 | 
13 | ## Example
14 | 
15 | This package lets you create encoders and decoders for working with sequences of bytes. Here is an example for converting between `Point` and `Bytes` values:
16 | 
17 | ```elm
18 | import Bytes exposing (Endianness(..))
19 | import Bytes.Encode as Encode exposing (Encoder)
20 | import Bytes.Decode as Decode exposing (Decoder)
21 | 
22 | 
23 | -- POINT
24 | 
25 | type alias Point =
26 |   { x : Float
27 |   , y : Float
28 |   , z : Float
29 |   }
30 | 
31 | toPointEncoder : Point -> Encoder
32 | toPointEncoder point =
33 |   Encode.sequence
34 |     [ Encode.float32 BE point.x
35 |     , Encode.float32 BE point.y
36 |     , Encode.float32 BE point.z
37 |     ]
38 | 
39 | pointDecoder : Decoder Point
40 | pointDecoder =
41 |   Decode.map3 Point
42 |     (Decode.float32 BE)
43 |     (Decode.float32 BE)
44 |     (Decode.float32 BE)
45 | ```
46 | 
47 | Rather than writing this by hand in client or server code, the hope is that folks implement things like ProtoBuf compilers for Elm.
48 | 
49 | Again, the overall plan is described in [**A vision for data interchange in Elm**](https://gist.github.com/evancz/1c5f2cf34939336ecb79b97bb89d9da6)!
50 | 
51 | 
52 | ## Scope
53 | 
54 | **This API is not intended to work like `Int8Array` or `Uint16Array` in JavaScript.** If you have a concrete scenario in which you want to interpret bytes as densely packed arrays of integers or floats, please describe it on [https://discourse.elm-lang.org/](https://discourse.elm-lang.org/) in a friendly and high-level way. What is the project about? What do densely packed arrays do for that project? Is it about perf? What kind of algorithms are you using? Etc.
55 | 
56 | If some scenarios require the mutation of entries in place, special care will be required in designing a nice API. All values in Elm are immutable, so the particular API that works well for us will probably depend a lot on the particulars of what folks are trying to do.
57 | 


--------------------------------------------------------------------------------
/src/Bytes.elm:
--------------------------------------------------------------------------------
  1 | module Bytes exposing
  2 |   ( Bytes
  3 |   , width
  4 |   , Endianness(..)
  5 |   , getHostEndianness
  6 |   )
  7 | 
  8 | 
  9 | {-|
 10 | 
 11 | # Bytes
 12 | @docs Bytes, width
 13 | 
 14 | # Endianness
 15 | @docs Endianness, getHostEndianness
 16 | 
 17 | -}
 18 | 
 19 | 
 20 | import Elm.Kernel.Bytes
 21 | import Task exposing (Task)
 22 | 
 23 | 
 24 | -- BYTES
 25 | 
 26 | 
 27 | {-| A sequence of bytes.
 28 | 
 29 | A byte is a chunk of eight bits. For example, the letter `j` is usually
 30 | represented as the byte `01101010`, and the letter `k` is `01101011`.
 31 | 
 32 | Seeing each byte as a stream of zeros and ones can be quite confusing though,
 33 | so it is common to use hexidecimal numbers instead:
 34 | 
 35 | ```
 36 | | Binary | Hex |
 37 | +--------+-----+
 38 | |  0000  |  0  |
 39 | |  0001  |  1  |
 40 | |  0010  |  2  |
 41 | |  0011  |  3  |     j = 01101010
 42 | |  0100  |  4  |         \__/\__/
 43 | |  0101  |  5  |           |   |
 44 | |  0110  |  6  |           6   A
 45 | |  0111  |  7  |
 46 | |  1000  |  8  |     k = 01101011
 47 | |  1001  |  9  |         \__/\__/
 48 | |  1010  |  A  |           |   |
 49 | |  1011  |  B  |           6   B
 50 | |  1100  |  C  |
 51 | |  1101  |  D  |
 52 | |  1110  |  E  |
 53 | |  1111  |  F  |
 54 | ```
 55 | 
 56 | So `j` is `6A` and `k` is `6B` in hexidecimal. This more compact representation
 57 | is great when you have a sequence of bytes. You can see this even in a short
 58 | string like `"jazz"`:
 59 | 
 60 | ```
 61 | binary                                 hexidecimal
 62 | 01101010 01100001 01111010 01111010 => 6A 61 7A 7A
 63 | ```
 64 | 
 65 | Anyway, the point is that `Bytes` is a sequence of bytes!
 66 | -}
 67 | type Bytes = Bytes
 68 | 
 69 | 
 70 | {-| Get the width of a sequence of bytes.
 71 | 
 72 | So if a sequence has four-hundred bytes, then `width bytes` would give back
 73 | `400`. That may be 400 unsigned 8-bit integers, 100 signed 32-bit integers, or
 74 | even a UTF-8 string. The content does not matter. This is just figuring out
 75 | how many bytes there are!
 76 | -}
 77 | width : Bytes -> Int
 78 | width =
 79 |   Elm.Kernel.Bytes.width
 80 | 
 81 | 
 82 | 
 83 | -- ENDIANNESS
 84 | 
 85 | 
 86 | {-| Different computers store integers and floats slightly differently in
 87 | memory. Say we have the integer `0x1A2B3C4D` in our program. It needs four
 88 | bytes (32 bits) in memory. It may seem reasonable to lay them out in order:
 89 | 
 90 | ```
 91 |    Big-Endian (BE)      (Obvious Order)
 92 | +----+----+----+----+
 93 | | 1A | 2B | 3C | 4D |
 94 | +----+----+----+----+
 95 | ```
 96 | 
 97 | But some people thought it would be better to store the bytes in the opposite
 98 | order:
 99 | 
100 | ```
101 |   Little-Endian (LE)    (Shuffled Order)
102 | +----+----+----+----+
103 | | 4D | 3C | 2B | 1A |
104 | +----+----+----+----+
105 | ```
106 | 
107 | Notice that **the _bytes_ are shuffled, not the bits.** It is like if you cut a
108 | photo into four strips and shuffled the strips. It is not a mirror image.
109 | The theory seems to be that an 8-bit `0x1A` and a 32-bit `0x0000001A` both have
110 | `1A` as the first byte in this scheme. Maybe this was helpful when processors
111 | handled one byte at a time.
112 | 
113 | **Most processors use little-endian (LE) layout.** This seems to be because
114 | Intel did it this way, and other chip manufactures followed their convention.
115 | **Most network protocols use big-endian (BE) layout.** I suspect this is
116 | because if you are trying to debug a network protocol, it is nice if your
117 | integers are not all shuffled.
118 | 
119 | **Note:** Endianness is relevant for integers and floats, but not strings.
120 | UTF-8 specifies the order of bytes explicitly.
121 | 
122 | **Note:** The terms little-endian and big-endian are a reference to an egg joke
123 | in Gulliver's Travels. They first appeared in 1980 in [this essay][essay], and
124 | you can decide for yourself if they stood the test of time. I personally find
125 | these terms quite unhelpful, so I say “Obvious Order” and “Shuffled Order” in
126 | my head. I remember which is more common by asking myself, “if things were
127 | obvious, would I have to ask this question?”
128 | 
129 | [essay]: http://www.ietf.org/rfc/ien/ien137.txt
130 | -}
131 | type Endianness = LE | BE
132 | 
133 | 
134 | {-| Is this program running on a big-endian or little-endian machine?
135 | -}
136 | getHostEndianness : Task x Endianness
137 | getHostEndianness =
138 |   Elm.Kernel.Bytes.getHostEndianness LE BE
139 | 


--------------------------------------------------------------------------------
/src/Elm/Kernel/Bytes.js:
--------------------------------------------------------------------------------
  1 | /*
  2 | 
  3 | import Bytes.Encode as Encode exposing (getWidth, write)
  4 | import Elm.Kernel.Scheduler exposing (binding, succeed)
  5 | import Elm.Kernel.Utils exposing (Tuple2, chr)
  6 | import Maybe exposing (Just, Nothing)
  7 | 
  8 | */
  9 | 
 10 | // BYTES
 11 | 
 12 | function _Bytes_width(bytes)
 13 | {
 14 | 	return bytes.byteLength;
 15 | }
 16 | 
 17 | var _Bytes_getHostEndianness = F2(function(le, be)
 18 | {
 19 | 	return __Scheduler_binding(function(callback)
 20 | 	{
 21 | 		callback(__Scheduler_succeed(new Uint8Array(new Uint32Array([1]))[0] === 1 ? le : be));
 22 | 	});
 23 | });
 24 | 
 25 | 
 26 | // ENCODERS
 27 | 
 28 | function _Bytes_encode(encoder)
 29 | {
 30 | 	var mutableBytes = new DataView(new ArrayBuffer(__Encode_getWidth(encoder)));
 31 | 	__Encode_write(encoder)(mutableBytes)(0);
 32 | 	return mutableBytes;
 33 | }
 34 | 
 35 | 
 36 | // SIGNED INTEGERS
 37 | 
 38 | var _Bytes_write_i8  = F3(function(mb, i, n) { mb.setInt8(i, n); return i + 1; });
 39 | var _Bytes_write_i16 = F4(function(mb, i, n, isLE) { mb.setInt16(i, n, isLE); return i + 2; });
 40 | var _Bytes_write_i32 = F4(function(mb, i, n, isLE) { mb.setInt32(i, n, isLE); return i + 4; });
 41 | 
 42 | 
 43 | // UNSIGNED INTEGERS
 44 | 
 45 | var _Bytes_write_u8  = F3(function(mb, i, n) { mb.setUint8(i, n); return i + 1 ;});
 46 | var _Bytes_write_u16 = F4(function(mb, i, n, isLE) { mb.setUint16(i, n, isLE); return i + 2; });
 47 | var _Bytes_write_u32 = F4(function(mb, i, n, isLE) { mb.setUint32(i, n, isLE); return i + 4; });
 48 | 
 49 | 
 50 | // FLOATS
 51 | 
 52 | var _Bytes_write_f32 = F4(function(mb, i, n, isLE) { mb.setFloat32(i, n, isLE); return i + 4; });
 53 | var _Bytes_write_f64 = F4(function(mb, i, n, isLE) { mb.setFloat64(i, n, isLE); return i + 8; });
 54 | 
 55 | 
 56 | // BYTES
 57 | 
 58 | var _Bytes_write_bytes = F3(function(mb, offset, bytes)
 59 | {
 60 | 	for (var i = 0, len = bytes.byteLength, limit = len - 4; i <= limit; i += 4)
 61 | 	{
 62 | 		mb.setUint32(offset + i, bytes.getUint32(i));
 63 | 	}
 64 | 	for (; i < len; i++)
 65 | 	{
 66 | 		mb.setUint8(offset + i, bytes.getUint8(i));
 67 | 	}
 68 | 	return offset + len;
 69 | });
 70 | 
 71 | 
 72 | // STRINGS
 73 | 
 74 | function _Bytes_getStringWidth(string)
 75 | {
 76 | 	for (var width = 0, i = 0; i < string.length; i++)
 77 | 	{
 78 | 		var code = string.charCodeAt(i);
 79 | 		width +=
 80 | 			(code < 0x80) ? 1 :
 81 | 			(code < 0x800) ? 2 :
 82 | 			(code < 0xD800 || 0xDBFF < code) ? 3 : (i++, 4);
 83 | 	}
 84 | 	return width;
 85 | }
 86 | 
 87 | var _Bytes_write_string = F3(function(mb, offset, string)
 88 | {
 89 | 	for (var i = 0; i < string.length; i++)
 90 | 	{
 91 | 		var code = string.charCodeAt(i);
 92 | 		offset +=
 93 | 			(code < 0x80)
 94 | 				? (mb.setUint8(offset, code)
 95 | 				, 1
 96 | 				)
 97 | 				:
 98 | 			(code < 0x800)
 99 | 				? (mb.setUint16(offset, 0xC080 /* 0b1100000010000000 */
100 | 					| (code >>> 6 & 0x1F /* 0b00011111 */) << 8
101 | 					| code & 0x3F /* 0b00111111 */)
102 | 				, 2
103 | 				)
104 | 				:
105 | 			(code < 0xD800 || 0xDBFF < code)
106 | 				? (mb.setUint16(offset, 0xE080 /* 0b1110000010000000 */
107 | 					| (code >>> 12 & 0xF /* 0b00001111 */) << 8
108 | 					| code >>> 6 & 0x3F /* 0b00111111 */)
109 | 				, mb.setUint8(offset + 2, 0x80 /* 0b10000000 */
110 | 					| code & 0x3F /* 0b00111111 */)
111 | 				, 3
112 | 				)
113 | 				:
114 | 			(code = (code - 0xD800) * 0x400 + string.charCodeAt(++i) - 0xDC00 + 0x10000
115 | 			, mb.setUint32(offset, 0xF0808080 /* 0b11110000100000001000000010000000 */
116 | 				| (code >>> 18 & 0x7 /* 0b00000111 */) << 24
117 | 				| (code >>> 12 & 0x3F /* 0b00111111 */) << 16
118 | 				| (code >>> 6 & 0x3F /* 0b00111111 */) << 8
119 | 				| code & 0x3F /* 0b00111111 */)
120 | 			, 4
121 | 			);
122 | 	}
123 | 	return offset;
124 | });
125 | 
126 | 
127 | // DECODER
128 | 
129 | var _Bytes_decode = F2(function(decoder, bytes)
130 | {
131 | 	try {
132 | 		return __Maybe_Just(A2(decoder, bytes, 0).b);
133 | 	} catch(e) {
134 | 		return __Maybe_Nothing;
135 | 	}
136 | });
137 | 
138 | var _Bytes_read_i8  = F2(function(      bytes, offset) { return __Utils_Tuple2(offset + 1, bytes.getInt8(offset)); });
139 | var _Bytes_read_i16 = F3(function(isLE, bytes, offset) { return __Utils_Tuple2(offset + 2, bytes.getInt16(offset, isLE)); });
140 | var _Bytes_read_i32 = F3(function(isLE, bytes, offset) { return __Utils_Tuple2(offset + 4, bytes.getInt32(offset, isLE)); });
141 | var _Bytes_read_u8  = F2(function(      bytes, offset) { return __Utils_Tuple2(offset + 1, bytes.getUint8(offset)); });
142 | var _Bytes_read_u16 = F3(function(isLE, bytes, offset) { return __Utils_Tuple2(offset + 2, bytes.getUint16(offset, isLE)); });
143 | var _Bytes_read_u32 = F3(function(isLE, bytes, offset) { return __Utils_Tuple2(offset + 4, bytes.getUint32(offset, isLE)); });
144 | var _Bytes_read_f32 = F3(function(isLE, bytes, offset) { return __Utils_Tuple2(offset + 4, bytes.getFloat32(offset, isLE)); });
145 | var _Bytes_read_f64 = F3(function(isLE, bytes, offset) { return __Utils_Tuple2(offset + 8, bytes.getFloat64(offset, isLE)); });
146 | 
147 | var _Bytes_read_bytes = F3(function(len, bytes, offset)
148 | {
149 | 	return __Utils_Tuple2(offset + len, new DataView(bytes.buffer, bytes.byteOffset + offset, len));
150 | });
151 | 
152 | var _Bytes_read_string = F3(function(len, bytes, offset)
153 | {
154 | 	var string = '';
155 | 	var end = offset + len;
156 | 	for (; offset < end;)
157 | 	{
158 | 		var byte = bytes.getUint8(offset++);
159 | 		string +=
160 | 			(byte < 128)
161 | 				? String.fromCharCode(byte)
162 | 				:
163 | 			((byte & 0xE0 /* 0b11100000 */) === 0xC0 /* 0b11000000 */)
164 | 				? String.fromCharCode((byte & 0x1F /* 0b00011111 */) << 6 | bytes.getUint8(offset++) & 0x3F /* 0b00111111 */)
165 | 				:
166 | 			((byte & 0xF0 /* 0b11110000 */) === 0xE0 /* 0b11100000 */)
167 | 				? String.fromCharCode(
168 | 					(byte & 0xF /* 0b00001111 */) << 12
169 | 					| (bytes.getUint8(offset++) & 0x3F /* 0b00111111 */) << 6
170 | 					| bytes.getUint8(offset++) & 0x3F /* 0b00111111 */
171 | 				)
172 | 				:
173 | 				(byte =
174 | 					((byte & 0x7 /* 0b00000111 */) << 18
175 | 						| (bytes.getUint8(offset++) & 0x3F /* 0b00111111 */) << 12
176 | 						| (bytes.getUint8(offset++) & 0x3F /* 0b00111111 */) << 6
177 | 						| bytes.getUint8(offset++) & 0x3F /* 0b00111111 */
178 | 					) - 0x10000
179 | 				, String.fromCharCode(Math.floor(byte / 0x400) + 0xD800, byte % 0x400 + 0xDC00)
180 | 				);
181 | 	}
182 | 	return __Utils_Tuple2(offset, string);
183 | });
184 | 
185 | var _Bytes_decodeFailure = F2(function() { throw 0; });
186 | 


--------------------------------------------------------------------------------
/src/Bytes/Encode.elm:
--------------------------------------------------------------------------------
  1 | module Bytes.Encode exposing
  2 |   ( encode
  3 |   , Encoder
  4 |   , signedInt8, signedInt16, signedInt32
  5 |   , unsignedInt8, unsignedInt16, unsignedInt32
  6 |   , float32, float64
  7 |   , bytes
  8 |   , string, getStringWidth
  9 |   , sequence
 10 |   )
 11 | 
 12 | 
 13 | {-|
 14 | 
 15 | # Encoders
 16 | @docs encode, Encoder, sequence
 17 | 
 18 | # Integers
 19 | @docs signedInt8, signedInt16, signedInt32,
 20 |   unsignedInt8, unsignedInt16, unsignedInt32
 21 | 
 22 | # Floats
 23 | @docs float32, float64
 24 | 
 25 | # Bytes
 26 | @docs bytes
 27 | 
 28 | # Strings
 29 | @docs string, getStringWidth
 30 | 
 31 | -}
 32 | 
 33 | 
 34 | import Bytes exposing (Bytes, Endianness(..))
 35 | 
 36 | 
 37 | 
 38 | -- ENCODER
 39 | 
 40 | 
 41 | {-| Describes how to generate a sequence of bytes.
 42 | 
 43 | These encoders snap together with [`sequence`](#sequence) so you can start with
 44 | small building blocks and put them together into a more complex encoding.
 45 | -}
 46 | type Encoder
 47 |   = I8 Int
 48 |   | I16 Endianness Int
 49 |   | I32 Endianness Int
 50 |   | U8 Int
 51 |   | U16 Endianness Int
 52 |   | U32 Endianness Int
 53 |   | F32 Endianness Float
 54 |   | F64 Endianness Float
 55 |   | Seq Int (List Encoder)
 56 |   | Utf8 Int String
 57 |   | Bytes Bytes
 58 | 
 59 | 
 60 | 
 61 | -- ENCODE
 62 | 
 63 | 
 64 | {-| Turn an `Encoder` into `Bytes`.
 65 | 
 66 |     encode (unsignedInt8     7) -- <07>
 67 |     encode (unsignedInt16 BE 7) -- <0007>
 68 |     encode (unsignedInt16 LE 7) -- <0700>
 69 | 
 70 | The `encode` function is designed to minimize allocation. It figures out the
 71 | exact width necessary to fit everything in `Bytes` and then generate that
 72 | value directly. This is valuable when you are encoding more elaborate data:
 73 | 
 74 |     import Bytes exposing (Endianness(..))
 75 |     import Bytes.Encode as Encode
 76 | 
 77 |     type alias Person =
 78 |       { age : Int
 79 |       , name : String
 80 |       }
 81 | 
 82 |     toEncoder : Person -> Encode.Encoder
 83 |     toEncoder person =
 84 |       Encode.sequence
 85 |         [ Encode.unsignedInt16 BE person.age
 86 |         , Encode.unsignedInt16 BE (Encode.getStringWidth person.name)
 87 |         , Encode.string person.name
 88 |         ]
 89 | 
 90 |     -- encode (toEncoder (Person 33 "Tom")) == <00210003546F6D>
 91 | 
 92 | Did you know it was going to be seven bytes? How about when you have a hundred
 93 | people to serialize? And when some have Japanese and Norwegian names? Having
 94 | this intermediate `Encoder` can help reduce allocation quite a lot!
 95 | -}
 96 | encode : Encoder -> Bytes
 97 | encode =
 98 |   Elm.Kernel.Bytes.encode
 99 | 
100 | 
101 | 
102 | -- INTEGERS
103 | 
104 | 
105 | {-| Encode integers from `-128` to `127` in one byte.
106 | -}
107 | signedInt8 : Int -> Encoder
108 | signedInt8 =
109 |   I8
110 | 
111 | 
112 | {-| Encode integers from `-32768` to `32767` in two bytes.
113 | -}
114 | signedInt16 : Endianness -> Int -> Encoder
115 | signedInt16 =
116 |   I16
117 | 
118 | 
119 | {-| Encode integers from `-2147483648` to `2147483647` in four bytes.
120 | -}
121 | signedInt32 : Endianness -> Int -> Encoder
122 | signedInt32 =
123 |   I32
124 | 
125 | 
126 | {-| Encode integers from `0` to `255` in one byte.
127 | -}
128 | unsignedInt8 : Int -> Encoder
129 | unsignedInt8 =
130 |   U8
131 | 
132 | 
133 | {-| Encode integers from `0` to `65535` in two bytes.
134 | -}
135 | unsignedInt16 : Endianness -> Int -> Encoder
136 | unsignedInt16 =
137 |   U16
138 | 
139 | 
140 | {-| Encode integers from `0` to `4294967295` in four bytes.
141 | -}
142 | unsignedInt32 : Endianness -> Int -> Encoder
143 | unsignedInt32 =
144 |   U32
145 | 
146 | 
147 | 
148 | -- FLOATS
149 | 
150 | 
151 | {-| Encode 32-bit floating point numbers in four bytes.
152 | -}
153 | float32 : Endianness -> Float -> Encoder
154 | float32 =
155 |   F32
156 | 
157 | 
158 | {-| Encode 64-bit floating point numbers in eight bytes.
159 | -}
160 | float64 : Endianness -> Float -> Encoder
161 | float64 =
162 |   F64
163 | 
164 | 
165 | 
166 | -- BYTES
167 | 
168 | 
169 | {-| Copy bytes directly into the new `Bytes` sequence. This does not record the
170 | width though! You usually want to say something like this:
171 | 
172 |     import Bytes exposing (Bytes, Endianness(..))
173 |     import Bytes.Encode as Encode
174 | 
175 |     png : Bytes -> Encode.Encoder
176 |     png imageData =
177 |       Encode.sequence
178 |         [ Encode.unsignedInt32 BE (Bytes.width imageData)
179 |         , Encode.bytes imageData
180 |         ]
181 | 
182 | This allows you to represent the width however is necessary for your protocol.
183 | For example, you can use [Base 128 Varints][pb] for ProtoBuf,
184 | [Variable-Length Integers][sql] for SQLite, or whatever else they dream up.
185 | 
186 | [pb]: https://developers.google.com/protocol-buffers/docs/encoding#varints
187 | [sql]: https://www.sqlite.org/src4/doc/trunk/www/varint.wiki
188 | -}
189 | bytes : Bytes -> Encoder
190 | bytes =
191 |   Bytes
192 | 
193 | 
194 | 
195 | -- STRINGS
196 | 
197 | 
198 | {-| Encode a `String` as a bunch of UTF-8 bytes.
199 | 
200 |     encode (string "$20")   -- <24 32 30>
201 |     encode (string "£20")   -- <C2A3 32 30>
202 |     encode (string "€20")   -- <E282AC 32 30>
203 |     encode (string "bread") -- <62 72 65 61 64>
204 |     encode (string "brød")  -- <62 72 C3B8 64>
205 | 
206 | Some characters take one byte, while others can take up to four. Read more
207 | about [UTF-8](https://en.wikipedia.org/wiki/UTF-8) to learn the details!
208 | 
209 | But if you just encode UTF-8 directly, how can you know when you get to the end
210 | of the string when you are decoding? So most protocols have an integer saying
211 | how many bytes follow, like this:
212 | 
213 |     sizedString : String -> Encoder
214 |     sizedString str =
215 |       sequence
216 |         [ unsignedInt32 BE (getStringWidth str)
217 |         , string str
218 |         ]
219 | 
220 | You can choose whatever representation you want for the width, which is helpful
221 | because many protocols use different integer representations to save space. For
222 | example:
223 | 
224 | - ProtoBuf uses [Base 128 Varints](https://developers.google.com/protocol-buffers/docs/encoding#varints)
225 | - SQLite uses [Variable-Length Integers](https://www.sqlite.org/src4/doc/trunk/www/varint.wiki)
226 | 
227 | In both cases, small numbers can fit just one byte, saving some space. (The
228 | SQLite encoding has the benefit that the first byte tells you how long the
229 | number is, making it faster to decode.) In both cases, it is sort of tricky
230 | to make negative numbers small.
231 | -}
232 | string : String -> Encoder
233 | string str =
234 |   Utf8 (Elm.Kernel.Bytes.getStringWidth str) str
235 | 
236 | 
237 | {-| Get the width of a `String` in UTF-8 bytes.
238 | 
239 |     getStringWidth "$20"   == 3
240 |     getStringWidth "£20"   == 4
241 |     getStringWidth "€20"   == 5
242 |     getStringWidth "bread" == 5
243 |     getStringWidth "brød"  == 5
244 | 
245 | Most protocols need this number to come directly before a chunk of UTF-8 bytes
246 | as a way to know where the string ends!
247 | 
248 | Read more about how UTF-8 works [here](https://en.wikipedia.org/wiki/UTF-8).
249 | -}
250 | getStringWidth : String -> Int
251 | getStringWidth =
252 |   Elm.Kernel.Bytes.getStringWidth
253 | 
254 | 
255 | 
256 | -- SEQUENCE
257 | 
258 | 
259 | {-| Put together a bunch of builders. So if you wanted to encode three `Float`
260 | values for the position of a ball in 3D space, you could say:
261 | 
262 |     import Bytes exposing (Endianness(..))
263 |     import Bytes.Encode as Encode
264 | 
265 |     type alias Ball = { x : Float, y : Float, z : Float }
266 | 
267 |     ball : Ball -> Encode.Encoder
268 |     ball {x,y,z} =
269 |       Encode.sequence
270 |         [ Encode.float32 BE x
271 |         , Encode.float32 BE y
272 |         , Encode.float32 BE z
273 |         ]
274 | 
275 | -}
276 | sequence : List Encoder -> Encoder
277 | sequence builders =
278 |   Seq (getWidths 0 builders) builders
279 | 
280 | 
281 | 
282 | -- WRITE
283 | 
284 | 
285 | write : Encoder -> Bytes -> Int -> Int
286 | write builder mb offset =
287 |   case builder of
288 |     I8    n -> Elm.Kernel.Bytes.write_i8  mb offset n
289 |     I16 e n -> Elm.Kernel.Bytes.write_i16 mb offset n (e == LE)
290 |     I32 e n -> Elm.Kernel.Bytes.write_i32 mb offset n (e == LE)
291 |     U8    n -> Elm.Kernel.Bytes.write_u8  mb offset n
292 |     U16 e n -> Elm.Kernel.Bytes.write_u16 mb offset n (e == LE)
293 |     U32 e n -> Elm.Kernel.Bytes.write_u32 mb offset n (e == LE)
294 |     F32 e n -> Elm.Kernel.Bytes.write_f32 mb offset n (e == LE)
295 |     F64 e n -> Elm.Kernel.Bytes.write_f64 mb offset n (e == LE)
296 |     Seq _ bs -> writeSequence bs mb offset
297 |     Utf8 _ s -> Elm.Kernel.Bytes.write_string mb offset s
298 |     Bytes bs -> Elm.Kernel.Bytes.write_bytes mb offset bs
299 | 
300 | 
301 | writeSequence : List Encoder -> Bytes -> Int -> Int
302 | writeSequence builders mb offset =
303 |   case builders of
304 |     [] ->
305 |       offset
306 | 
307 |     b :: bs ->
308 |       writeSequence bs mb (write b mb offset)
309 | 
310 | 
311 | 
312 | -- WIDTHS
313 | 
314 | 
315 | getWidth : Encoder -> Int
316 | getWidth builder =
317 |   case builder of
318 |     I8    _ -> 1
319 |     I16 _ _ -> 2
320 |     I32 _ _ -> 4
321 |     U8    _ -> 1
322 |     U16 _ _ -> 2
323 |     U32 _ _ -> 4
324 |     F32 _ _ -> 4
325 |     F64 _ _ -> 8
326 |     Seq w _ -> w
327 |     Utf8 w _ -> w
328 |     Bytes bs -> Elm.Kernel.Bytes.width bs
329 | 
330 | 
331 | getWidths : Int -> List Encoder -> Int
332 | getWidths width builders =
333 |   case builders of
334 |     [] ->
335 |       width
336 | 
337 |     b :: bs ->
338 |       getWidths (width + getWidth b) bs
339 | 


--------------------------------------------------------------------------------
/src/Bytes/Decode.elm:
--------------------------------------------------------------------------------
  1 | module Bytes.Decode exposing
  2 |   ( Decoder, decode
  3 |   , signedInt8, signedInt16, signedInt32
  4 |   , unsignedInt8, unsignedInt16, unsignedInt32
  5 |   , float32, float64
  6 |   , string
  7 |   , bytes
  8 |   , map, map2, map3, map4, map5
  9 |   , andThen, succeed, fail
 10 |   , Step(..), loop
 11 |   )
 12 | 
 13 | 
 14 | {-|
 15 | 
 16 | # Decoders
 17 | @docs Decoder, decode
 18 | 
 19 | # Integers
 20 | @docs signedInt8, signedInt16, signedInt32,
 21 |   unsignedInt8, unsignedInt16, unsignedInt32
 22 | 
 23 | # Floats
 24 | @docs float32, float64
 25 | 
 26 | # Bytes
 27 | @docs bytes
 28 | 
 29 | # Strings
 30 | @docs string
 31 | 
 32 | # Map
 33 | @docs map, map2, map3, map4, map5
 34 | 
 35 | # And Then
 36 | @docs andThen, succeed, fail
 37 | 
 38 | # Loop
 39 | @docs Step, loop
 40 | -}
 41 | 
 42 | 
 43 | import Bytes exposing (Bytes, Endianness(..))
 44 | 
 45 | 
 46 | 
 47 | -- PARSER
 48 | 
 49 | 
 50 | {-| Describes how to turn a sequence of bytes into a nice Elm value.
 51 | -}
 52 | type Decoder a =
 53 |   Decoder (Bytes -> Int -> (Int, a))
 54 | 
 55 | 
 56 | {-| Turn a sequence of bytes into a nice Elm value.
 57 | 
 58 |     -- decode (unsignedInt16 BE) <0007> == Just 7
 59 |     -- decode (unsignedInt16 LE) <0700> == Just 7
 60 |     -- decode (unsignedInt16 BE) <0700> == Just 1792
 61 |     -- decode (unsignedInt32 BE) <0700> == Nothing
 62 | 
 63 | The `Decoder` specifies exactly how this should happen. This process may fail
 64 | if the sequence of bytes is corrupted or unexpected somehow. The examples above
 65 | show a case where there are not enough bytes.
 66 | -}
 67 | decode : Decoder a -> Bytes -> Maybe a
 68 | decode (Decoder decoder) bs =
 69 |   Elm.Kernel.Bytes.decode decoder bs
 70 | 
 71 | 
 72 | 
 73 | -- SIGNED INTEGERS
 74 | 
 75 | 
 76 | {-| Decode one byte into an integer from `-128` to `127`.
 77 | -}
 78 | signedInt8 : Decoder Int
 79 | signedInt8 =
 80 |   Decoder Elm.Kernel.Bytes.read_i8
 81 | 
 82 | 
 83 | {-| Decode two bytes into an integer from `-32768` to `32767`.
 84 | -}
 85 | signedInt16 : Endianness -> Decoder Int
 86 | signedInt16 endianness =
 87 |   Decoder (Elm.Kernel.Bytes.read_i16 (endianness == LE))
 88 | 
 89 | 
 90 | {-| Decode four bytes into an integer from `-2147483648` to `2147483647`.
 91 | -}
 92 | signedInt32 : Endianness -> Decoder Int
 93 | signedInt32 endianness =
 94 |   Decoder (Elm.Kernel.Bytes.read_i32 (endianness == LE))
 95 | 
 96 | 
 97 | 
 98 | -- UNSIGNED INTEGERS
 99 | 
100 | 
101 | {-| Decode one byte into an integer from `0` to `255`.
102 | -}
103 | unsignedInt8 : Decoder Int
104 | unsignedInt8 =
105 |   Decoder Elm.Kernel.Bytes.read_u8
106 | 
107 | 
108 | {-| Decode two bytes into an integer from `0` to `65535`.
109 | -}
110 | unsignedInt16 : Endianness -> Decoder Int
111 | unsignedInt16 endianness =
112 |   Decoder (Elm.Kernel.Bytes.read_u16 (endianness == LE))
113 | 
114 | 
115 | {-| Decode four bytes into an integer from `0` to `4294967295`.
116 | -}
117 | unsignedInt32 : Endianness -> Decoder Int
118 | unsignedInt32 endianness =
119 |   Decoder (Elm.Kernel.Bytes.read_u32 (endianness == LE))
120 | 
121 | 
122 | 
123 | -- FLOATS
124 | 
125 | 
126 | {-| Decode four bytes into a floating point number.
127 | -}
128 | float32 : Endianness -> Decoder Float
129 | float32 endianness =
130 |   Decoder (Elm.Kernel.Bytes.read_f32 (endianness == LE))
131 | 
132 | 
133 | {-| Decode eight bytes into a floating point number.
134 | -}
135 | float64 : Endianness -> Decoder Float
136 | float64 endianness =
137 |   Decoder (Elm.Kernel.Bytes.read_f64 (endianness == LE))
138 | 
139 | 
140 | 
141 | -- BYTES
142 | 
143 | 
144 | {-| Copy a given number of bytes into a new `Bytes` sequence.
145 | -}
146 | bytes : Int -> Decoder Bytes
147 | bytes n =
148 |   Decoder (Elm.Kernel.Bytes.read_bytes n)
149 | 
150 | 
151 | 
152 | -- STRINGS
153 | 
154 | 
155 | {-| Decode a given number of UTF-8 bytes into a `String`.
156 | 
157 | Most protocols store the width of the string right before the content, so you
158 | will probably write things like this:
159 | 
160 |     import Bytes exposing (Endianness(..))
161 |     import Bytes.Decode as Decode
162 | 
163 |     sizedString : Decode.Decoder String
164 |     sizedString =
165 |       Decode.unsignedInt32 BE
166 |         |> Decode.andThen Decode.string
167 | 
168 | In this case we read the width as a 32-bit unsigned integer, but you have the
169 | leeway to read the width as a [Base 128 Varint][pb] for ProtoBuf, a
170 | [Variable-Length Integer][sql] for SQLite, or whatever else they dream up.
171 | 
172 | [pb]: https://developers.google.com/protocol-buffers/docs/encoding#varints
173 | [sql]: https://www.sqlite.org/src4/doc/trunk/www/varint.wiki
174 | -}
175 | string : Int -> Decoder String
176 | string n =
177 |   Decoder (Elm.Kernel.Bytes.read_string n)
178 | 
179 | 
180 | 
181 | -- MAP
182 | 
183 | 
184 | {-| Transform the value produced by a decoder. If you encode negative numbers
185 | in a special way, you can say something like this:
186 | 
187 |     negativeInt8 : Decoder Int
188 |     negativeInt8 =
189 |       map negate unsignedInt8
190 | 
191 | In practice you may see something like ProtoBuf’s [ZigZag encoding][zz] which
192 | decreases the size of small negative numbers.
193 | 
194 | [zz]: https://developers.google.com/protocol-buffers/docs/encoding#types
195 | -}
196 | map : (a -> b) -> Decoder a -> Decoder b
197 | map func (Decoder decodeA) =
198 |   Decoder <|
199 |     \bites offset ->
200 |       let
201 |         (aOffset, a) = decodeA bites offset
202 |       in
203 |       (aOffset, func a)
204 | 
205 | 
206 | {-| Combine two decoders.
207 | 
208 |     import Bytes exposing (Endiannness(..))
209 |     import Bytes.Decode as Decode
210 | 
211 |     type alias Point = { x : Float, y : Float }
212 | 
213 |     decoder : Decode.Decoder Point
214 |     decoder =
215 |       Decode.map2 Point
216 |         (Decode.float32 BE)
217 |         (Decode.float32 BE)
218 | -}
219 | map2 : (a -> b -> result) -> Decoder a -> Decoder b -> Decoder result
220 | map2 func (Decoder decodeA) (Decoder decodeB) =
221 |   Decoder <|
222 |     \bites offset ->
223 |       let
224 |         (aOffset, a) = decodeA bites offset
225 |         (bOffset, b) = decodeB bites aOffset
226 |       in
227 |       (bOffset, func a b)
228 | 
229 | 
230 | {-| Combine three decoders.
231 | -}
232 | map3 : (a -> b -> c -> result) -> Decoder a -> Decoder b -> Decoder c -> Decoder result
233 | map3 func (Decoder decodeA) (Decoder decodeB) (Decoder decodeC) =
234 |   Decoder <|
235 |     \bites offset ->
236 |       let
237 |         (aOffset, a) = decodeA bites offset
238 |         (bOffset, b) = decodeB bites aOffset
239 |         (cOffset, c) = decodeC bites bOffset
240 |       in
241 |       (cOffset, func a b c)
242 | 
243 | 
244 | {-| Combine four decoders.
245 | -}
246 | map4 : (a -> b -> c -> d -> result) -> Decoder a -> Decoder b -> Decoder c -> Decoder d -> Decoder result
247 | map4 func (Decoder decodeA) (Decoder decodeB) (Decoder decodeC) (Decoder decodeD) =
248 |   Decoder <|
249 |     \bites offset ->
250 |       let
251 |         (aOffset, a) = decodeA bites offset
252 |         (bOffset, b) = decodeB bites aOffset
253 |         (cOffset, c) = decodeC bites bOffset
254 |         (dOffset, d) = decodeD bites cOffset
255 |       in
256 |       (dOffset, func a b c d)
257 | 
258 | 
259 | {-| Combine five decoders. If you need to combine more things, it is possible
260 | to define more of these with `map2` or `andThen`.
261 | -}
262 | map5 : (a -> b -> c -> d -> e -> result) -> Decoder a -> Decoder b -> Decoder c -> Decoder d -> Decoder e -> Decoder result
263 | map5 func (Decoder decodeA) (Decoder decodeB) (Decoder decodeC) (Decoder decodeD) (Decoder decodeE) =
264 |   Decoder <|
265 |     \bites offset ->
266 |       let
267 |         (aOffset, a) = decodeA bites offset
268 |         (bOffset, b) = decodeB bites aOffset
269 |         (cOffset, c) = decodeC bites bOffset
270 |         (dOffset, d) = decodeD bites cOffset
271 |         (eOffset, e) = decodeE bites dOffset
272 |       in
273 |       (eOffset, func a b c d e)
274 | 
275 | 
276 | 
277 | -- AND THEN
278 | 
279 | 
280 | {-| Decode something **and then** use that information to decode something
281 | else. This is most common with strings or sequences where you need to read
282 | how long the value is going to be:
283 | 
284 |     import Bytes exposing (Endianness(..))
285 |     import Bytes.Decode as Decode
286 | 
287 |     string : Decoder String
288 |     string =
289 |       Decode.unsignedInt32 BE
290 |         |> Decode.andThen Decode.string
291 | 
292 | Check out the docs for [`succeed`](#succeed), [`fail`](#fail), and
293 | [`loop`](#loop) to see `andThen` used in more ways!
294 | -}
295 | andThen : (a -> Decoder b) -> Decoder a -> Decoder b
296 | andThen callback (Decoder decodeA) =
297 |   Decoder <|
298 |     \bites offset ->
299 |       let
300 |         (newOffset, a) = decodeA bites offset
301 |         (Decoder decodeB) = callback a
302 |       in
303 |       decodeB bites newOffset
304 | 
305 | 
306 | {-| A decoder that always succeeds with a certain value. Maybe we are making
307 | a `Maybe` decoder:
308 | 
309 |     import Bytes.Decode as Decode exposing (Decoder)
310 | 
311 |     maybe : Decoder a -> Decoder (Maybe a)
312 |     maybe decoder =
313 |       let
314 |         helper n =
315 |           if n == 0 then
316 |             Decode.succeed Nothing
317 |           else
318 |             Decode.map Just decoder
319 |       in
320 |       Decode.unsignedInt8
321 |         |> Decode.andThen helper
322 | 
323 | If the first byte is `00000000` then it is `Nothing`, otherwise we start
324 | decoding the value and put it in a `Just`.
325 | -}
326 | succeed : a -> Decoder a
327 | succeed a =
328 |   Decoder (\_ offset -> (offset,a))
329 | 
330 | 
331 | {-| A decoder that always fails. This can be useful when using `andThen` to
332 | decode custom types:
333 | 
334 |     import Bytes exposing (Endianness(..))
335 |     import Bytes.Encode as Encode
336 |     import Bytes.Decode as Decode
337 | 
338 |     type Distance = Yards Float | Meters Float
339 | 
340 |     toEncoder : Distance -> Encode.Encoder
341 |     toEncoder distance =
342 |       case distance of
343 |         Yards n -> Encode.sequence [ Encode.unsignedInt8 0, Encode.float32 BE n ]
344 |         Meters n -> Encode.sequence [ Encode.unsignedInt8 1, Encode.float32 BE n ]
345 | 
346 |     decoder : Decode.Decoder Distance
347 |     decoder =
348 |       Decode.unsignedInt8
349 |         |> Decode.andThen pickDecoder
350 | 
351 |     pickDecoder : Int -> Decode.Decoder Distance
352 |     pickDecoder tag =
353 |       case tag of
354 |         0 -> Decode.map Yards (Decode.float32 BE)
355 |         1 -> Decode.map Meters (Decode.float32 BE)
356 |         _ -> Decode.fail
357 | 
358 | The encoding chosen here uses an 8-bit unsigned integer to indicate which
359 | variant we are working with. If we are working with yards do this, if we are
360 | working with meters do that, and otherwise something went wrong!
361 | -}
362 | fail : Decoder a
363 | fail =
364 |   Decoder Elm.Kernel.Bytes.decodeFailure
365 | 
366 | 
367 | 
368 | -- LOOP
369 | 
370 | 
371 | {-| Decide what steps to take next in your [`loop`](#loop).
372 | 
373 | If you are `Done`, you give the result of the whole `loop`. If you decide to
374 | `Loop` around again, you give a new state to work from. Maybe you need to add
375 | an item to a list? Or maybe you need to track some information about what you
376 | just saw?
377 | 
378 | **Note:** It may be helpful to learn about [finite-state machines][fsm] to get
379 | a broader intuition about using `state`. I.e. You may want to create a `type`
380 | that describes four possible states, and then use `Loop` to transition between
381 | them as you consume characters.
382 | 
383 | [fsm]: https://en.wikipedia.org/wiki/Finite-state_machine
384 | -}
385 | type Step state a
386 |   = Loop state
387 |   | Done a
388 | 
389 | 
390 | {-| A decoder that can loop indefinitely. This can be helpful when parsing
391 | repeated structures, like a list:
392 | 
393 |     import Bytes exposing (Endianness(..))
394 |     import Bytes.Decode as Decode exposing (..)
395 | 
396 |     list : Decoder a -> Decoder (List a)
397 |     list decoder =
398 |       unsignedInt32 BE
399 |         |> andThen (\len -> loop (len, []) (listStep decoder))
400 | 
401 |     listStep : Decoder a -> (Int, List a) -> Decoder (Step (Int, List a) (List a))
402 |     listStep decoder (n, xs) =
403 |       if n <= 0 then
404 |         succeed (Done xs)
405 |       else
406 |         map (\x -> Loop (n - 1, x :: xs)) decoder
407 | 
408 | The `list` decoder first reads a 32-bit unsigned integer. That determines how
409 | many items will be decoded. From there we use [`loop`](#loop) to track all the
410 | items we have parsed so far and figure out when to stop.
411 | -}
412 | loop : state -> (state -> Decoder (Step state a)) -> Decoder a
413 | loop state callback =
414 |   Decoder (loopHelp state callback)
415 | 
416 | 
417 | loopHelp : state -> (state -> Decoder (Step state a)) -> Bytes -> Int -> (Int, a)
418 | loopHelp state callback bites offset =
419 |   let
420 |     (Decoder decoder) = callback state
421 |     (newOffset, step) = decoder bites offset
422 |   in
423 |   case step of
424 |     Loop newState ->
425 |       loopHelp newState callback bites newOffset
426 | 
427 |     Done result ->
428 |       ( newOffset, result )
429 | 


--------------------------------------------------------------------------------