├── .eqc_ci
├── .github
    └── FUNDING.yml
├── .gitignore
├── EQC_CI_LICENCE.txt
├── LICENSE
├── NOTICE
├── README.md
├── doc
    ├── README.md
    ├── edoc-info
    ├── erlang.png
    ├── overview.edoc
    ├── sext.md
    └── stylesheet.css
├── examples
    └── tt_proto.erl
├── rebar.config
├── src
    ├── sext.app.src
    └── sext.erl
└── test
    └── sext_eqc.erl


/.eqc_ci:
--------------------------------------------------------------------------------
1 | {build,"mkdir -p ebin; erlc -o ebin -DEQC +\\{parse_transform,eqc_cover\\} src/*.erl test/*.erl"}.
2 | {test_path, "ebin"}.
3 | 


--------------------------------------------------------------------------------
/.github/FUNDING.yml:
--------------------------------------------------------------------------------
1 | # These are supported funding model platforms
2 | 
3 | github: [uwiger]
4 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
 1 | .rebar3
 2 | _*
 3 | .eunit
 4 | *.o
 5 | *.beam
 6 | *.plt
 7 | *.swp
 8 | *.swo
 9 | .erlang.cookie
10 | log
11 | erl_crash.dump
12 | .rebar
13 | logs
14 | _build
15 | Barrel.nonode@nohost
16 | *.iml
17 | .idea
18 | deps
19 | .eunit/
20 | ebin
21 | *~
22 | */*~
23 | erl_crash.dump
24 | current_counterexample.eqc
25 | 


--------------------------------------------------------------------------------
/EQC_CI_LICENCE.txt:
--------------------------------------------------------------------------------
 1 | This file is an agreement between Quviq AB ("Quviq"), Sven Hultins
 2 | Gata 9, Gothenburg, Sweden, and the committers to the github
 3 | repository in which the file appears ("the owner"). By placing this
 4 | file in a github repository, the owner agrees to the terms below.
 5 | 
 6 | The purpose of the agreement is to enable Quviq AB to provide a
 7 | continuous integration service to the owner, whereby the code in the
 8 | repository ("the source code") is tested using Quviq's test tools, and
 9 | the test results are made available on the web. The test results
10 | include test output, generated test cases, and a copy of the source
11 | code in the repository annotated with coverage information ("the test
12 | results").
13 | 
14 | The owner agrees that Quviq may run the tests in the source code and
15 | display the test results on the web, without obligation.
16 | 
17 | The owner warrants that running the tests in the source code and
18 | displaying the test results on the web violates no laws, licences or other
19 | agreements. In the event of such a violation, the owner accepts full
20 | responsibility.
21 | 
22 | The owner warrants that the source code is not malicious, and will not
23 | mount an attack on either Quviq's server or any other server--for
24 | example by taking part in a denial of service attack, or by attempting
25 | to send unsolicited emails.
26 | 
27 | The owner warrants that the source code does not attempt to reverse
28 | engineer Quviq's code.
29 | 
30 | Quviq reserves the right to exclude repositories that break this
31 | agreement from its continuous integration service.
32 | 
33 | Any dispute arising from the use of Quviq's service will be resolved
34 | under Swedish law.
35 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
  1 | 
  2 |                                  Apache License
  3 |                            Version 2.0, January 2004
  4 |                         http://www.apache.org/licenses/
  5 | 
  6 |    TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
  7 | 
  8 |    1. Definitions.
  9 | 
 10 |       "License" shall mean the terms and conditions for use, reproduction,
 11 |       and distribution as defined by Sections 1 through 9 of this document.
 12 | 
 13 |       "Licensor" shall mean the copyright owner or entity authorized by
 14 |       the copyright owner that is granting the License.
 15 | 
 16 |       "Legal Entity" shall mean the union of the acting entity and all
 17 |       other entities that control, are controlled by, or are under common
 18 |       control with that entity. For the purposes of this definition,
 19 |       "control" means (i) the power, direct or indirect, to cause the
 20 |       direction or management of such entity, whether by contract or
 21 |       otherwise, or (ii) ownership of fifty percent (50%) or more of the
 22 |       outstanding shares, or (iii) beneficial ownership of such entity.
 23 | 
 24 |       "You" (or "Your") shall mean an individual or Legal Entity
 25 |       exercising permissions granted by this License.
 26 | 
 27 |       "Source" form shall mean the preferred form for making modifications,
 28 |       including but not limited to software source code, documentation
 29 |       source, and configuration files.
 30 | 
 31 |       "Object" form shall mean any form resulting from mechanical
 32 |       transformation or translation of a Source form, including but
 33 |       not limited to compiled object code, generated documentation,
 34 |       and conversions to other media types.
 35 | 
 36 |       "Work" shall mean the work of authorship, whether in Source or
 37 |       Object form, made available under the License, as indicated by a
 38 |       copyright notice that is included in or attached to the work
 39 |       (an example is provided in the Appendix below).
 40 | 
 41 |       "Derivative Works" shall mean any work, whether in Source or Object
 42 |       form, that is based on (or derived from) the Work and for which the
 43 |       editorial revisions, annotations, elaborations, or other modifications
 44 |       represent, as a whole, an original work of authorship. For the purposes
 45 |       of this License, Derivative Works shall not include works that remain
 46 |       separable from, or merely link (or bind by name) to the interfaces of,
 47 |       the Work and Derivative Works thereof.
 48 | 
 49 |       "Contribution" shall mean any work of authorship, including
 50 |       the original version of the Work and any modifications or additions
 51 |       to that Work or Derivative Works thereof, that is intentionally
 52 |       submitted to Licensor for inclusion in the Work by the copyright owner
 53 |       or by an individual or Legal Entity authorized to submit on behalf of
 54 |       the copyright owner. For the purposes of this definition, "submitted"
 55 |       means any form of electronic, verbal, or written communication sent
 56 |       to the Licensor or its representatives, including but not limited to
 57 |       communication on electronic mailing lists, source code control systems,
 58 |       and issue tracking systems that are managed by, or on behalf of, the
 59 |       Licensor for the purpose of discussing and improving the Work, but
 60 |       excluding communication that is conspicuously marked or otherwise
 61 |       designated in writing by the copyright owner as "Not a Contribution."
 62 | 
 63 |       "Contributor" shall mean Licensor and any individual or Legal Entity
 64 |       on behalf of whom a Contribution has been received by Licensor and
 65 |       subsequently incorporated within the Work.
 66 | 
 67 |    2. Grant of Copyright License. Subject to the terms and conditions of
 68 |       this License, each Contributor hereby grants to You a perpetual,
 69 |       worldwide, non-exclusive, no-charge, royalty-free, irrevocable
 70 |       copyright license to reproduce, prepare Derivative Works of,
 71 |       publicly display, publicly perform, sublicense, and distribute the
 72 |       Work and such Derivative Works in Source or Object form.
 73 | 
 74 |    3. Grant of Patent License. Subject to the terms and conditions of
 75 |       this License, each Contributor hereby grants to You a perpetual,
 76 |       worldwide, non-exclusive, no-charge, royalty-free, irrevocable
 77 |       (except as stated in this section) patent license to make, have made,
 78 |       use, offer to sell, sell, import, and otherwise transfer the Work,
 79 |       where such license applies only to those patent claims licensable
 80 |       by such Contributor that are necessarily infringed by their
 81 |       Contribution(s) alone or by combination of their Contribution(s)
 82 |       with the Work to which such Contribution(s) was submitted. If You
 83 |       institute patent litigation against any entity (including a
 84 |       cross-claim or counterclaim in a lawsuit) alleging that the Work
 85 |       or a Contribution incorporated within the Work constitutes direct
 86 |       or contributory patent infringement, then any patent licenses
 87 |       granted to You under this License for that Work shall terminate
 88 |       as of the date such litigation is filed.
 89 | 
 90 |    4. Redistribution. You may reproduce and distribute copies of the
 91 |       Work or Derivative Works thereof in any medium, with or without
 92 |       modifications, and in Source or Object form, provided that You
 93 |       meet the following conditions:
 94 | 
 95 |       (a) You must give any other recipients of the Work or
 96 |           Derivative Works a copy of this License; and
 97 | 
 98 |       (b) You must cause any modified files to carry prominent notices
 99 |           stating that You changed the files; and
100 | 
101 |       (c) You must retain, in the Source form of any Derivative Works
102 |           that You distribute, all copyright, patent, trademark, and
103 |           attribution notices from the Source form of the Work,
104 |           excluding those notices that do not pertain to any part of
105 |           the Derivative Works; and
106 | 
107 |       (d) If the Work includes a "NOTICE" text file as part of its
108 |           distribution, then any Derivative Works that You distribute must
109 |           include a readable copy of the attribution notices contained
110 |           within such NOTICE file, excluding those notices that do not
111 |           pertain to any part of the Derivative Works, in at least one
112 |           of the following places: within a NOTICE text file distributed
113 |           as part of the Derivative Works; within the Source form or
114 |           documentation, if provided along with the Derivative Works; or,
115 |           within a display generated by the Derivative Works, if and
116 |           wherever such third-party notices normally appear. The contents
117 |           of the NOTICE file are for informational purposes only and
118 |           do not modify the License. You may add Your own attribution
119 |           notices within Derivative Works that You distribute, alongside
120 |           or as an addendum to the NOTICE text from the Work, provided
121 |           that such additional attribution notices cannot be construed
122 |           as modifying the License.
123 | 
124 |       You may add Your own copyright statement to Your modifications and
125 |       may provide additional or different license terms and conditions
126 |       for use, reproduction, or distribution of Your modifications, or
127 |       for any such Derivative Works as a whole, provided Your use,
128 |       reproduction, and distribution of the Work otherwise complies with
129 |       the conditions stated in this License.
130 | 
131 |    5. Submission of Contributions. Unless You explicitly state otherwise,
132 |       any Contribution intentionally submitted for inclusion in the Work
133 |       by You to the Licensor shall be under the terms and conditions of
134 |       this License, without any additional terms or conditions.
135 |       Notwithstanding the above, nothing herein shall supersede or modify
136 |       the terms of any separate license agreement you may have executed
137 |       with Licensor regarding such Contributions.
138 | 
139 |    6. Trademarks. This License does not grant permission to use the trade
140 |       names, trademarks, service marks, or product names of the Licensor,
141 |       except as required for reasonable and customary use in describing the
142 |       origin of the Work and reproducing the content of the NOTICE file.
143 | 
144 |    7. Disclaimer of Warranty. Unless required by applicable law or
145 |       agreed to in writing, Licensor provides the Work (and each
146 |       Contributor provides its Contributions) on an "AS IS" BASIS,
147 |       WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
148 |       implied, including, without limitation, any warranties or conditions
149 |       of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
150 |       PARTICULAR PURPOSE. You are solely responsible for determining the
151 |       appropriateness of using or redistributing the Work and assume any
152 |       risks associated with Your exercise of permissions under this License.
153 | 
154 |    8. Limitation of Liability. In no event and under no legal theory,
155 |       whether in tort (including negligence), contract, or otherwise,
156 |       unless required by applicable law (such as deliberate and grossly
157 |       negligent acts) or agreed to in writing, shall any Contributor be
158 |       liable to You for damages, including any direct, indirect, special,
159 |       incidental, or consequential damages of any character arising as a
160 |       result of this License or out of the use or inability to use the
161 |       Work (including but not limited to damages for loss of goodwill,
162 |       work stoppage, computer failure or malfunction, or any and all
163 |       other commercial damages or losses), even if such Contributor
164 |       has been advised of the possibility of such damages.
165 | 
166 |    9. Accepting Warranty or Additional Liability. While redistributing
167 |       the Work or Derivative Works thereof, You may choose to offer,
168 |       and charge a fee for, acceptance of support, warranty, indemnity,
169 |       or other liability obligations and/or rights consistent with this
170 |       License. However, in accepting such obligations, You may act only
171 |       on Your own behalf and on Your sole responsibility, not on behalf
172 |       of any other Contributor, and only if You agree to indemnify,
173 |       defend, and hold each Contributor harmless for any liability
174 |       incurred by, or claims asserted against, such Contributor by reason
175 |       of your accepting any such warranty or additional liability.
176 | 
177 |    END OF TERMS AND CONDITIONS
178 | 
179 | 


--------------------------------------------------------------------------------
/NOTICE:
--------------------------------------------------------------------------------
1 | Original author: Ulf Wiger, Erlang Solutions, 2009
2 | 
3 | Copyright transfered to Ulf Wiger 2014
4 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | # The sext application #
  2 | 
  3 | [<img src="http://quickcheck-ci.com/p/uwiger/sext.png" alt="Build Status" width="160px">](http://quickcheck-ci.com/p/uwiger/sext)
  4 | 
  5 | __Authors:__ Ulf Wiger ([`ulf@wiger.net`](mailto:ulf@wiger.net)).
  6 | 
  7 | A sortable serialization library
  8 | This library offers a serialization format (a la term_to_binary()) that
  9 | preserves the Erlang term order.
 10 | 
 11 | ```
 12 | 
 13 | Copyright 2014-2020 Ulf Wiger
 14 | 
 15 | Licensed under the Apache License, Version 2.0 (the "License");
 16 | you may not use this file except in compliance with the License.
 17 | You may obtain a copy of the License at
 18 | 
 19 | http://www.apache.org/licenses/LICENSE-2.0
 20 | 
 21 | Unless required by applicable law or agreed to in writing, software
 22 | distributed under the License is distributed on an "AS IS" BASIS,
 23 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 24 | See the License for the specific language governing permissions and
 25 | limitations under the License.
 26 | 
 27 | ```
 28 | 
 29 | 
 30 | # 1. Introduction #
 31 | 
 32 | The idea to this library came out of the need for disk-based storage
 33 | with ordered_set semantics in Erlang. One previous solution used Tokyo Cabinet,
 34 | in which a C routine is used to hook into the sorting logic of TC.
 35 | 
 36 | I thought a more generic solution would be to be able to have a version
 37 | of term_to_binary() that respected the ordering semantics of Erlang terms.
 38 | 
 39 | A new addition is support for 'sb32' encoding. This is my own version of
 40 | Base32 encoding, with a slightly different alphabet, in order to preserve
 41 | sorting properties while generating octet strings that are perfectly safe
 42 | to use in file names.
 43 | 
 44 | Another feature is "prefix encoding", which encodes a term and truncates
 45 | the result if it encounters a "wildcard" (e.g. `'$1'`
 46 | or `'_'`). This is to enable a convenient and efficient mapping
 47 | of Erlang match specifications to e.g. prefix matching on the external storage
 48 | and subsequent match_spec matching on the found erlang terms.
 49 | 
 50 | The serialization format supports all Erlang types, and preserves the
 51 | internal Erlang term order, with a few exceptions:
 52 | 
 53 | * Floats are represented based on the IEEE 764 Binary 64 standard
 54 | representation. This is the representation used by Erlang, specifically
 55 | the representation used when encoding floats in binaries. To be exact,
 56 | `sext` first normalizes the float by encoding it as an Erlang binary, then
 57 | serializes it.
 58 | 
 59 | * In Erlang, integers are cast to floats before comparing them to a float.
 60 | This means e.g. that the relative sort order of `1` and `1.0` is undefined.
 61 | It is not possible for `sext` to preserve this ambiguity after serialization,
 62 | since it could only be done by producing identical encodings for the two
 63 | terms, thereby sacrificing the property that encoding a value and then
 64 | decoding it again, should produce the initial value.
 65 | 
 66 | 
 67 | # 2. Specification #
 68 | 
 69 | 
 70 | ## 2.1 Type tags ##
 71 | 
 72 | Each data type is encoded using a type tag (1 byte) that represents its order
 73 | in the global Erlang term ordering. The number type is divided into several
 74 | subtypes, to facilitate a reasonably efficient representation:
 75 | 
 76 | 
 77 | <table border="1"><tr align="left"><th>Type</th><th>Description</th><th>Tag</th></tr><tr><td>negbig</td><td>Negative bignum</td><td>8</td></tr><tr><td>neg4</td><td>Negative 31-bit integer</td><td>9</td></tr><tr><td>pos4</td><td>Positive 31-bit integer</td><td>10</td></tr><tr><td>posbig</td><td>Positive bignum</td><td>11</td></tr><tr><td>atom</td><td>Obj of type atom()</td><td>12</td></tr><tr><td>reference</td><td>Obj of type reference()</td><td>13</td></tr><tr><td>port</td><td>Obj of type port()</td><td>14</td></tr><tr><td>pid</td><td>Obj of type pid()</td><td>15</td></tr><tr><td>tuple</td><td>Obj of type tuple()</td><td>16</td></tr><tr><td>list</td><td>Obj of type map()</td><td>17, 1</td></tr><tr><td>list</td><td>Obj of type list()</td><td>17</td></tr><tr><td>binary</td><td>Obj of type binary()</td><td>18</td></tr><tr><td>bin_tail</td><td>Improper-tail marker followed by binary or bitstring</td><td>19</td></tr>
 78 | </table>
 79 | 
 80 | 
 81 | 
 82 | ## 2.2 Tuples ##
 83 | 
 84 | Tuples are encoded as the tuple tag, followed by a 32-bit size element,
 85 | denoting the number of elements in the tuple, followed by each element
 86 | in the tuple individually encoded.
 87 | 
 88 | 
 89 | ## 2.3 Lists ##
 90 | 
 91 | Lists are encoded as the list tag, followed by each element in the list
 92 | individually encoded, followed by the number 2 (1 byte).
 93 | 
 94 | Improper lists, e.g. `[1,2|3]`, have the number 1 inserted before the improper
 95 | tail. Since this also indicates the last element in the list, no end byte
 96 | is needed. This ensures that it sorts *before* any corresponding proper list,
 97 | as long as the improper tail is not a binary (binaries are greater than the
 98 | missing 'cons', or list, cell).
 99 | 
100 | Improper lists that have a binary or bitstring as 'tail', e.g. `[1,2|<<1>>]`,
101 | have a ?bin_tail (code 19) inserted before the tail. This ensures that it
102 | sorts after a corresponding proper list.
103 | 
104 | 
105 | ## 2.4 Binaries and bitstrings ##
106 | 
107 | A binary is basically a bitstring whose size is a multiple of 8. From a sorting
108 | perspective, binaries and bitstrings are both sorted as left-aligned bit
109 | arrays.
110 | 
111 | ```erlang
112 | 1> bitstring_to_list(<<11111111111:11>>).
113 | [56,<<7:3>>]
114 | ```
115 | 
116 | Binaries and bitstrings are encoded as the binary tag, followed by each whole
117 | byte, each padded with a leading 1 (one bit), followed by a number of 0-bits
118 | to pad again make the size a multiple of 8 bits, followed by a byte whose
119 | value is Bits, where Bits is the number of "remainder bits"; 8 if the original
120 | binary is 8-bit aligned.
121 | 
122 | Example:
123 | 
124 | ```erlang
125 | 2> sext:encode(<<1,2,3>>).
126 | <<18,128,192,160,96,8>>
127 | 3> <<18, 1:1,1, 1:1,2, 1:1,3, 0:5, 8>>.
128 | <<18,128,192,160,96,8>>
129 | ```
130 | 
131 | In the example above, we inserted 3 1-bits, and therefore had to insert 5 more
132 | pad bits (zeroes) at the end. The last byte is 8, signifying that the original
133 | binary was 8-bit aligned.
134 | 
135 | If the remainder is not an even 8 bits, the remainder bits are padded with
136 | a 1-bit, just like the others, then left-aligned and padded up to a whole
137 | byte (excluding the 1-bit added in front).
138 | The value of the last byte is the bit size of the remainder.
139 | 
140 | Example:
141 | 
142 | ```erlang
143 | 2> sext:encode(<<1,2,3>>).
144 | <<18,128,192,160,96,8>>
145 | 3> sext:encode(<<18, 1:1,1, 1:1,2, 1:1,3, 0:5, 8>>).
146 | <<18,128,192,160,96,8>>
147 | ```
148 | 
149 | The first part of the bitstring is encoded exactly like above. The number 4:3
150 | is first padded with 1 then padded at the end to become a whole byte. Then
151 | an additional pad, 0:4, is inserted to compensate for the fact that we have
152 | inserted 4 1-bits. Finally, the last byte is 3, to signify the size of the
153 | remainder.
154 | 
155 | 
156 | ## 2.5 Positive Numbers ##
157 | 
158 | Numbers are encoded as the corresponding type tag, followed by the integer
159 | part, a marker indicating the presence of a fraction part, and the fraction
160 | part, if any. The integer part is encoded differently depending on the size
161 | of the value. The fraction part is encoded as a binary (without the 'binary'
162 | type tag).
163 | 
164 | 
165 | ### 2.5.1 Positive small integers, pos4 ###
166 | 
167 | Integers up to 31 bits are encoded as << ?pos4, I:31, F:1 >>
168 | where I is the integer value, and F is 1 if a fraction part follows;
169 | 0 otherwise.
170 | 
171 | 
172 | ### 2.5.2 Positive large integers ###
173 | 
174 | Larger integers are converted to a byte string and then encoded like
175 | binaries (without the 'binary' type tag), followed by a byte signifying
176 | whether a fraction part follows (1 if yes; 0 otherwise).
177 | 
178 | ```erlang
179 | Bytes = encode_big(I),
180 | << ?pos_big, Bytes/binary, F:8 >>
181 | ```
182 | 
183 | 
184 | ### 2.5.3 Fraction part of positive numbers ###
185 | 
186 | The representation of floating point numbers is based on the [IEEE 764 Binary 64 standard representation](http://en.wikipedia.org/wiki/Double_precision_floating-point_format). This is also the representation used by Erlang:
187 | 
188 | ```erlang
189 | <<Sign:1, Exp:11, Frac:52>> = <<F/float>>
190 | ```
191 | 
192 | The encoding extracts the integer part and encodes it as a positive integer
193 | (either pos4 or pos_big), flags the presence of a fraction part, and encodes
194 | the fraction part as a binary (without the binary tag).
195 | 
196 | 
197 | ## 2.6 Negative Numbers ##
198 | 
199 | 
200 | ### 2.6.1 Small negative numbers ###
201 | 
202 | ```erlang
203 | << ?neg4:8, IRep:31, F:1 >>
204 | ```
205 | 
206 | A negative number I is encoded as IRep = Max + I, where Max is the largest
207 | possible number that can be represented with the number of bits present for
208 | the given subtype. For example, Max for neg4 is 0x7FFF FFFF (31 bits).
209 | Keep in mind that I < 0.
210 | 
211 | The fraction flag is inverted, compared to the pos4 representation, so it will
212 | be 1 if there is no fraction part; 0 otherwise.
213 | 
214 | 
215 | ### 2.6.2 Large negative numbers ###
216 | 
217 | Larger negative numbers are encoded as:
218 | 
219 | ```erlang
220 | encode_negbig(I) ->
221 |     {Words, Max} = get_max(-I),
222 |     Bin = encode_bin_elems(list_to_binary(encode_big(Max + I)),
223 |     WordsRep = 16#FFFFffff - Words,
224 |     << ?neg_big:8, WordsRep:32, Bin/binary, F:8 >>.
225 | ```
226 | 
227 | That is, get_max() figures out how many 64-bit words are needed to represent
228 | -I (the positive number), and also gives the maximum value that can be
229 | represented in so many words. WordsRep in essence becomes a sub-subtag of
230 | the negative bignum.
231 | 
232 | 
233 | ### 2.6.3 Fraction of negative numbers ###
234 | 
235 | The fraction is encoded almost like the inverse of the positive fraction
236 | (as a "negative binary", if such a thing existed). Each byte is padded with
237 | a 0-bit rather than a 1-bit, and the byte itself is replaced by 16#ff - Byte.
238 | The sequence is then padded with 1s to become a multiple of 8 bits.
239 | 
240 | The last byte, denoting the number of significant bits in the last byte,
241 | is similarly inverted.
242 | 
243 | 
244 | ## 2.7 Atoms ##
245 | 
246 | Atoms are encoded as the atom tag, followed by the string representation of
247 | the atom using the binary encoding described above (but without the binary
248 | tag).
249 | 
250 | 
251 | ## 2.8 References ##
252 | 
253 | The encoding of references is perhaps best described by the code:
254 | 
255 | ```erlang
256 | encode_ref(R) ->
257 |     RBin = term_to_binary(R),
258 |     <<131,114,_Len:16,100,NLen:16,Name:NLen/binary,Rest/binary>> = RBin,
259 |     NameEnc = encode_bin_elems(Name),
260 |     RestEnc = encode_bin_elems(Rest),
261 |     <<?reference, NameEnc/binary, RestEnc/binary>>.
262 | ```
263 | 
264 | where encode_bin_elems(B) encodes the argument B the same way as a binary
265 | (excluding the 'binary' type tag).
266 | 
267 | 
268 | ## 2.9 Ports ##
269 | 
270 | The encoding of ports is perhaps best described by the code:
271 | 
272 | ```erlang
273 | encode_port(P) ->
274 |     PBin = term_to_binary(P),
275 |     <<131,102,100,ALen:16,Name:ALen/binary,Rest:5/binary>> = PBin,
276 |     NameEnc = encode_bin_elems(Name),
277 |     <<?port, NameEnc/binary, Rest/binary>>.
278 | ```
279 | 
280 | 
281 | ## 2.10 Pids ##
282 | 
283 | The encoding of ports is perhaps best described by the code:
284 | 
285 | ```erlang
286 | encode_pid(P) ->
287 |     PBin = term_to_binary(P),
288 |     <<131,103,100,ALen:16,Name:ALen/binary,Rest:9/binary>> = PBin,
289 |     NameEnc = encode_bin_elems(Name),
290 |     <<?pid, NameEnc/binary, Rest/binary>>.
291 | ```
292 | 
293 | 
294 | ## 2.11 Maps ##
295 | 
296 | The encoding of maps is currently experimental.
297 | Maps sort between tuples and lists. Since the smallest list is represented
298 | by `<<17, 2>>`, maps encoding starts with `<<17, 1>>` (introducing a new tag
299 | would break backwards compatibility), followed by the size of the map (4 bytes),
300 | and each Key-Value pair in the map.
301 | 
302 | 
303 | ## Modules ##
304 | 
305 | 
306 | <table width="100%" border="0" summary="list of modules">
307 | <tr><td><a href="http://github.com/uwiger/sext/blob/uw-maps/doc/sext.md" class="module">sext</a></td></tr></table>
308 | 
309 | 


--------------------------------------------------------------------------------
/doc/README.md:
--------------------------------------------------------------------------------
  1 | 
  2 | 
  3 | # The sext application #
  4 | 
  5 | __Authors:__ Ulf Wiger ([`ulf@wiger.net`](mailto:ulf@wiger.net)).
  6 | 
  7 | A sortable serialization library
  8 | This library offers a serialization format (a la term_to_binary()) that
  9 | preserves the Erlang term order.
 10 | 
 11 | ```
 12 | 
 13 | Copyright 2010 Erlang Solutions Ltd.
 14 | 
 15 | Licensed under the Apache License, Version 2.0 (the "License");
 16 | you may not use this file except in compliance with the License.
 17 | You may obtain a copy of the License at
 18 | 
 19 | http://www.apache.org/licenses/LICENSE-2.0
 20 | 
 21 | Unless required by applicable law or agreed to in writing, software
 22 | distributed under the License is distributed on an "AS IS" BASIS,
 23 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 24 | See the License for the specific language governing permissions and
 25 | limitations under the License.
 26 | 
 27 | ```
 28 | 
 29 | 
 30 | # 1. Introduction #
 31 | 
 32 | The idea to this library came out of the need for disk-based storage
 33 | with ordered_set semantics in Erlang. One previous solution used Tokyo Cabinet,
 34 | in which a C routine is used to hook into the sorting logic of TC.
 35 | 
 36 | I thought a more generic solution would be to be able to have a version
 37 | of term_to_binary() that respected the ordering semantics of Erlang terms.
 38 | 
 39 | A new addition is support for 'sb32' encoding. This is my own version of
 40 | Base32 encoding, with a slightly different alphabet, in order to preserve
 41 | sorting properties while generating octet strings that are perfectly safe
 42 | to use in file names.
 43 | 
 44 | Another feature is "prefix encoding", which encodes a term and truncates
 45 | the result if it encounters a "wildcard" (e.g. `'$1'`
 46 | or `'_'`). This is to enable a convenient and efficient mapping
 47 | of Erlang match specifications to e.g. prefix matching on the external storage
 48 | and subsequent match_spec matching on the found erlang terms.
 49 | 
 50 | The serialization format supports all Erlang types, and preserves the
 51 | internal Erlang term order, with a few exceptions:
 52 | 
 53 | * Floats are represented based on the IEEE 764 Binary 64 standard
 54 | representation. This is the representation used by Erlang, specifically
 55 | the representation used when encoding floats in binaries. To be exact,
 56 | `sext` first normalizes the float by encoding it as an Erlang binary, then
 57 | serializes it.
 58 | 
 59 | * In Erlang, integers are cast to floats before comparing them to a float.
 60 | This means e.g. that the relative sort order of `1` and `1.0` is undefined.
 61 | It is not possible for `sext` to preserve this ambiguity after serialization,
 62 | since it could only be done by producing identical encodings for the two
 63 | terms, thereby sacrificing the property that encoding a value and then
 64 | decoding it again, should produce the initial value.
 65 | 
 66 | 
 67 | # 2. Specification #
 68 | 
 69 | 
 70 | ## 2.1 Type tags ##
 71 | 
 72 | Each data type is encoded using a type tag (1 byte) that represents its order
 73 | in the global Erlang term ordering. The number type is divided into several
 74 | subtypes, to facilitate a reasonably efficient representation:
 75 | 
 76 | 
 77 | <table border="1"><tr align="left"><th>Type</th><th>Description</th><th>Tag</th></tr><tr><td>negbig</td><td>Negative bignum</td><td>8</td></tr><tr><td>neg4</td><td>Negative 31-bit integer</td><td>9</td></tr><tr><td>pos4</td><td>Positive 31-bit integer</td><td>10</td></tr><tr><td>posbig</td><td>Positive bignum</td><td>11</td></tr><tr><td>atom</td><td>Obj of type atom()</td><td>12</td></tr><tr><td>reference</td><td>Obj of type reference()</td><td>13</td></tr><tr><td>port</td><td>Obj of type port()</td><td>14</td></tr><tr><td>pid</td><td>Obj of type pid()</td><td>15</td></tr><tr><td>tuple</td><td>Obj of type tuple()</td><td>16</td></tr><tr><td>list</td><td>Obj of type map()</td><td>17, 1</td></tr><tr><td>list</td><td>Obj of type list()</td><td>17</td></tr><tr><td>binary</td><td>Obj of type binary()</td><td>18</td></tr><tr><td>bin_tail</td><td>Improper-tail marker followed by binary or bitstring</td><td>19</td></tr>
 78 | </table>
 79 | 
 80 | 
 81 | 
 82 | ## 2.2 Tuples ##
 83 | 
 84 | Tuples are encoded as the tuple tag, followed by a 32-bit size element,
 85 | denoting the number of elements in the tuple, followed by each element
 86 | in the tuple individually encoded.
 87 | 
 88 | 
 89 | ## 2.3 Lists ##
 90 | 
 91 | Lists are encoded as the list tag, followed by each element in the list
 92 | individually encoded, followed by the number 2 (1 byte).
 93 | 
 94 | Improper lists, e.g. `[1,2|3]`, have the number 1 inserted before the improper
 95 | tail. Since this also indicates the last element in the list, no end byte
 96 | is needed. This ensures that it sorts *before* any corresponding proper list,
 97 | as long as the improper tail is not a binary (binaries are greater than the
 98 | missing 'cons', or list, cell).
 99 | 
100 | Improper lists that have a binary or bitstring as 'tail', e.g. `[1,2|<<1>>]`,
101 | have a ?bin_tail (code 19) inserted before the tail. This ensures that it
102 | sorts after a corresponding proper list.
103 | 
104 | 
105 | ## 2.4 Binaries and bitstrings ##
106 | 
107 | A binary is basically a bitstring whose size is a multiple of 8. From a sorting
108 | perspective, binaries and bitstrings are both sorted as left-aligned bit
109 | arrays.
110 | 
111 | ```erlang
112 | 1> bitstring_to_list(<<11111111111:11>>).
113 | [56,<<7:3>>]
114 | ```
115 | 
116 | Binaries and bitstrings are encoded as the binary tag, followed by each whole
117 | byte, each padded with a leading 1 (one bit), followed by a number of 0-bits
118 | to pad again make the size a multiple of 8 bits, followed by a byte whose
119 | value is Bits, where Bits is the number of "remainder bits"; 8 if the original
120 | binary is 8-bit aligned.
121 | 
122 | Example:
123 | 
124 | ```erlang
125 | 2> sext:encode(<<1,2,3>>).
126 | <<18,128,192,160,96,8>>
127 | 3> <<18, 1:1,1, 1:1,2, 1:1,3, 0:5, 8>>.
128 | <<18,128,192,160,96,8>>
129 | ```
130 | 
131 | In the example above, we inserted 3 1-bits, and therefore had to insert 5 more
132 | pad bits (zeroes) at the end. The last byte is 8, signifying that the original
133 | binary was 8-bit aligned.
134 | 
135 | If the remainder is not an even 8 bits, the remainder bits are padded with
136 | a 1-bit, just like the others, then left-aligned and padded up to a whole
137 | byte (excluding the 1-bit added in front).
138 | The value of the last byte is the bit size of the remainder.
139 | 
140 | Example:
141 | 
142 | ```erlang
143 | 2> sext:encode(<<1,2,3>>).
144 | <<18,128,192,160,96,8>>
145 | 3> sext:encode(<<18, 1:1,1, 1:1,2, 1:1,3, 0:5, 8>>).
146 | <<18,128,192,160,96,8>>
147 | ```
148 | 
149 | The first part of the bitstring is encoded exactly like above. The number 4:3
150 | is first padded with 1 then padded at the end to become a whole byte. Then
151 | an additional pad, 0:4, is inserted to compensate for the fact that we have
152 | inserted 4 1-bits. Finally, the last byte is 3, to signify the size of the
153 | remainder.
154 | 
155 | 
156 | ## 2.5 Positive Numbers ##
157 | 
158 | Numbers are encoded as the corresponding type tag, followed by the integer
159 | part, a marker indicating the presence of a fraction part, and the fraction
160 | part, if any. The integer part is encoded differently depending on the size
161 | of the value. The fraction part is encoded as a binary (without the 'binary'
162 | type tag).
163 | 
164 | 
165 | ### 2.5.1 Positive small integers, pos4 ###
166 | 
167 | Integers up to 31 bits are encoded as << ?pos4, I:31, F:1 >>
168 | where I is the integer value, and F is 1 if a fraction part follows;
169 | 0 otherwise.
170 | 
171 | 
172 | ### 2.5.2 Positive large integers ###
173 | 
174 | Larger integers are converted to a byte string and then encoded like
175 | binaries (without the 'binary' type tag), followed by a byte signifying
176 | whether a fraction part follows (1 if yes; 0 otherwise).
177 | 
178 | ```erlang
179 | Bytes = encode_big(I),
180 | << ?pos_big, Bytes/binary, F:8 >>
181 | ```
182 | 
183 | 
184 | ### 2.5.3 Fraction part of positive numbers ###
185 | 
186 | The representation of floating point numbers is based on the [IEEE 764 Binary 64 standard representation](http://en.wikipedia.org/wiki/Double_precision_floating-point_format). This is also the representation used by Erlang:
187 | 
188 | ```erlang
189 | <<Sign:1, Exp:11, Frac:52>> = <<F/float>>
190 | ```
191 | 
192 | The encoding extracts the integer part and encodes it as a positive integer
193 | (either pos4 or pos_big), flags the presence of a fraction part, and encodes
194 | the fraction part as a binary (without the binary tag).
195 | 
196 | 
197 | ## 2.6 Negative Numbers ##
198 | 
199 | 
200 | ### 2.6.1 Small negative numbers ###
201 | 
202 | ```erlang
203 | << ?neg4:8, IRep:31, F:1 >>
204 | ```
205 | 
206 | A negative number I is encoded as IRep = Max + I, where Max is the largest
207 | possible number that can be represented with the number of bits present for
208 | the given subtype. For example, Max for neg4 is 0x7FFF FFFF (31 bits).
209 | Keep in mind that I < 0.
210 | 
211 | The fraction flag is inverted, compared to the pos4 representation, so it will
212 | be 1 if there is no fraction part; 0 otherwise.
213 | 
214 | 
215 | ### 2.6.2 Large negative numbers ###
216 | 
217 | Larger negative numbers are encoded as:
218 | 
219 | ```erlang
220 | encode_negbig(I) ->
221 |     {Words, Max} = get_max(-I),
222 |     Bin = encode_bin_elems(list_to_binary(encode_big(Max + I)),
223 |     WordsRep = 16#FFFFffff - Words,
224 |     << ?neg_big:8, WordsRep:32, Bin/binary, F:8 >>.
225 | ```
226 | 
227 | That is, get_max() figures out how many 64-bit words are needed to represent
228 | -I (the positive number), and also gives the maximum value that can be
229 | represented in so many words. WordsRep in essence becomes a sub-subtag of
230 | the negative bignum.
231 | 
232 | 
233 | ### 2.6.3 Fraction of negative numbers ###
234 | 
235 | The fraction is encoded almost like the inverse of the positive fraction
236 | (as a "negative binary", if such a thing existed). Each byte is padded with
237 | a 0-bit rather than a 1-bit, and the byte itself is replaced by 16#ff - Byte.
238 | The sequence is then padded with 1s to become a multiple of 8 bits.
239 | 
240 | The last byte, denoting the number of significant bits in the last byte,
241 | is similarly inverted.
242 | 
243 | 
244 | ## 2.7 Atoms ##
245 | 
246 | Atoms are encoded as the atom tag, followed by the string representation of
247 | the atom using the binary encoding described above (but without the binary
248 | tag).
249 | 
250 | 
251 | ## 2.8 References ##
252 | 
253 | The encoding of references is perhaps best described by the code:
254 | 
255 | ```erlang
256 | encode_ref(R) ->
257 |     RBin = term_to_binary(R),
258 |     <<131,114,_Len:16,100,NLen:16,Name:NLen/binary,Rest/binary>> = RBin,
259 |     NameEnc = encode_bin_elems(Name),
260 |     RestEnc = encode_bin_elems(Rest),
261 |     <<?reference, NameEnc/binary, RestEnc/binary>>.
262 | ```
263 | 
264 | where encode_bin_elems(B) encodes the argument B the same way as a binary
265 | (excluding the 'binary' type tag).
266 | 
267 | 
268 | ## 2.9 Ports ##
269 | 
270 | The encoding of ports is perhaps best described by the code:
271 | 
272 | ```erlang
273 | encode_port(P) ->
274 |     PBin = term_to_binary(P),
275 |     <<131,102,100,ALen:16,Name:ALen/binary,Rest:5/binary>> = PBin,
276 |     NameEnc = encode_bin_elems(Name),
277 |     <<?port, NameEnc/binary, Rest/binary>>.
278 | ```
279 | 
280 | 
281 | ## 2.10 Pids ##
282 | 
283 | The encoding of ports is perhaps best described by the code:
284 | 
285 | ```erlang
286 | encode_pid(P) ->
287 |     PBin = term_to_binary(P),
288 |     <<131,103,100,ALen:16,Name:ALen/binary,Rest:9/binary>> = PBin,
289 |     NameEnc = encode_bin_elems(Name),
290 |     <<?pid, NameEnc/binary, Rest/binary>>.
291 | ```
292 | 
293 | 
294 | ## 2.11 Maps ##
295 | 
296 | The encoding of maps is currently experimental.
297 | Maps sort between tuples and lists. Since the smallest list is represented
298 | by `<<17, 2>>`, maps encoding starts with `<<17, 1>>` (introducing a new tag
299 | would break backwards compatibility), followed by the size of the map (4 bytes),
300 | and each Key-Value pair in the map.
301 | 
302 | 
303 | ## Modules ##
304 | 
305 | 
306 | <table width="100%" border="0" summary="list of modules">
307 | <tr><td><a href="sext.md" class="module">sext</a></td></tr></table>
308 | 
309 | 


--------------------------------------------------------------------------------
/doc/edoc-info:
--------------------------------------------------------------------------------
1 | %% encoding: UTF-8
2 | {application,sext}.
3 | {packages,[]}.
4 | {modules,[sext]}.
5 | 


--------------------------------------------------------------------------------
/doc/erlang.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/uwiger/sext/c22486add9cc374dc8138b1f547c0999a1922a65/doc/erlang.png


--------------------------------------------------------------------------------
/doc/overview.edoc:
--------------------------------------------------------------------------------
  1 | @author Ulf Wiger <ulf@wiger.net>
  2 | @doc A sortable serialization library
  3 | This library offers a serialization format (a la term_to_binary()) that
  4 | preserves the Erlang term order.
  5 | 
  6 | <pre>
  7 | Copyright 2010 Erlang Solutions Ltd.
  8 | 
  9 | Licensed under the Apache License, Version 2.0 (the "License");
 10 | you may not use this file except in compliance with the License.
 11 | You may obtain a copy of the License at
 12 | 
 13 | http://www.apache.org/licenses/LICENSE-2.0
 14 | 
 15 | Unless required by applicable law or agreed to in writing, software
 16 | distributed under the License is distributed on an "AS IS" BASIS,
 17 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 18 | See the License for the specific language governing permissions and
 19 | limitations under the License.
 20 | </pre>
 21 | 
 22 | <h1>1. Introduction</h1>
 23 | 
 24 | The idea to this library came out of the need for disk-based storage
 25 | with ordered_set semantics in Erlang. One previous solution used Tokyo Cabinet,
 26 | in which a C routine is used to hook into the sorting logic of TC.
 27 | 
 28 | I thought a more generic solution would be to be able to have a version
 29 | of term_to_binary() that respected the ordering semantics of Erlang terms.
 30 | 
 31 | A new addition is support for 'sb32' encoding. This is my own version of
 32 | Base32 encoding, with a slightly different alphabet, in order to preserve
 33 | sorting properties while generating octet strings that are perfectly safe
 34 | to use in file names.
 35 | 
 36 | Another feature is "prefix encoding", which encodes a term and truncates
 37 | the result if it encounters a "wildcard" (e.g. <code>'$1'</code>
 38 | or <code>'_'</code>). This is to enable a convenient and efficient mapping
 39 | of Erlang match specifications to e.g. prefix matching on the external storage
 40 | and subsequent match_spec matching on the found erlang terms.
 41 | 
 42 | The serialization format supports all Erlang types, and preserves the
 43 | internal Erlang term order, with a few exceptions:
 44 | 
 45 | * Floats are represented based on the IEEE 764 Binary 64 standard
 46 | representation. This is the representation used by Erlang, specifically
 47 | the representation used when encoding floats in binaries. To be exact,
 48 | `sext' first normalizes the float by encoding it as an Erlang binary, then
 49 | serializes it.
 50 | 
 51 | * In Erlang, integers are cast to floats before comparing them to a float.
 52 | This means e.g. that the relative sort order of `1' and `1.0' is undefined.
 53 | It is not possible for `sext' to preserve this ambiguity after serialization,
 54 | since it could only be done by producing identical encodings for the two
 55 | terms, thereby sacrificing the property that encoding a value and then
 56 | decoding it again, should produce the initial value.
 57 | 
 58 | <h1>2. Specification</h1>
 59 | 
 60 | <h2>2.1 Type tags</h2>
 61 | 
 62 | Each data type is encoded using a type tag (1 byte) that represents its order
 63 | in the global Erlang term ordering. The number type is divided into several
 64 | subtypes, to facilitate a reasonably efficient representation:
 65 | 
 66 | <table border="1">
 67 |  <tr align="left">
 68 |   <th>Type</th>
 69 |   <th>Description</th>
 70 |   <th>Tag</th>
 71 |  </tr>
 72 |  <tr>
 73 |   <td>negbig</td>
 74 |   <td>Negative bignum</td>
 75 |   <td>8</td>
 76 |  </tr>
 77 |  <tr>
 78 |   <td>neg4</td>
 79 |   <td>Negative 31-bit integer</td>
 80 |   <td>9</td>
 81 |  </tr>
 82 |  <tr>
 83 |   <td>pos4</td>
 84 |   <td>Positive 31-bit integer</td>
 85 |   <td>10</td>
 86 |  </tr>
 87 |  <tr>
 88 |   <td>posbig</td>
 89 |   <td>Positive bignum</td>
 90 |   <td>11</td>
 91 |  </tr>
 92 |  <tr>
 93 |   <td>atom</td>
 94 |   <td>Obj of type atom()</td>
 95 |   <td>12</td>
 96 |  </tr>
 97 |  <tr>
 98 |   <td>reference</td>
 99 |   <td>Obj of type reference()</td>
100 |   <td>13</td>
101 |  </tr>
102 |  <tr>
103 |   <td>port</td>
104 |   <td>Obj of type port()</td>
105 |   <td>14</td>
106 |  </tr>
107 |  <tr>
108 |   <td>pid</td>
109 |   <td>Obj of type pid()</td>
110 |   <td>15</td>
111 |  </tr>
112 |  <tr>
113 |   <td>tuple</td>
114 |   <td>Obj of type tuple()</td>
115 |   <td>16</td>
116 |  </tr>
117 |  <tr>
118 |   <td>list</td>
119 |   <td>Obj of type map()</td>
120 |   <td>17, 1</td>
121 |  </tr>
122 |  <tr>
123 |   <td>list</td>
124 |   <td>Obj of type list()</td>
125 |   <td>17</td>
126 |  </tr>
127 |  <tr>
128 |   <td>binary</td>
129 |   <td>Obj of type binary()</td>
130 |   <td>18</td>
131 |  </tr>
132 |  <tr>
133 |   <td>bin_tail</td>
134 |   <td>Improper-tail marker followed by binary or bitstring</td>
135 |   <td>19</td>
136 |  </tr>
137 | </table>
138 | 
139 | <h2>2.2 Tuples</h2>
140 | 
141 | Tuples are encoded as the tuple tag, followed by a 32-bit size element,
142 | denoting the number of elements in the tuple, followed by each element
143 | in the tuple individually encoded.
144 | 
145 | <h2>2.3 Lists</h2>
146 | 
147 | Lists are encoded as the list tag, followed by each element in the list
148 | individually encoded, followed by the number 2 (1 byte).
149 | 
150 | Improper lists, e.g. `[1,2|3]', have the number 1 inserted before the improper
151 | tail. Since this also indicates the last element in the list, no end byte
152 | is needed. This ensures that it sorts *before* any corresponding proper list,
153 | as long as the improper tail is not a binary (binaries are greater than the
154 | missing 'cons', or list, cell).
155 | 
156 | Improper lists that have a binary or bitstring as 'tail', e.g. `[1,2|<<1>>]',
157 | have a ?bin_tail (code 19) inserted before the tail. This ensures that it
158 | sorts after a corresponding proper list.
159 | 
160 | <h2>2.4 Binaries and bitstrings</h2>
161 | 
162 | A binary is basically a bitstring whose size is a multiple of 8. From a sorting
163 | perspective, binaries and bitstrings are both sorted as left-aligned bit
164 | arrays.
165 | 
166 | <pre lang="erlang"><![CDATA[1> bitstring_to_list(<<11111111111:11>>).
167 | [56,<<7:3>>]]]></pre>
168 | 
169 | Binaries and bitstrings are encoded as the binary tag, followed by each whole
170 | byte, each padded with a leading 1 (one bit), followed by a number of 0-bits
171 | to pad again make the size a multiple of 8 bits, followed by a byte whose
172 | value is Bits, where Bits is the number of "remainder bits"; 8 if the original
173 | binary is 8-bit aligned.
174 | 
175 | Example:
176 | 
177 | <pre lang="erlang"><![CDATA[2> sext:encode(<<1,2,3>>).
178 | <<18,128,192,160,96,8>>
179 | 3> <<18, 1:1,1, 1:1,2, 1:1,3, 0:5, 8>>.
180 | <<18,128,192,160,96,8>>]]></pre>
181 | 
182 | In the example above, we inserted 3 1-bits, and therefore had to insert 5 more
183 | pad bits (zeroes) at the end. The last byte is 8, signifying that the original
184 | binary was 8-bit aligned.
185 | 
186 | If the remainder is not an even 8 bits, the remainder bits are padded with
187 | a 1-bit, just like the others, then left-aligned and padded up to a whole
188 | byte (excluding the 1-bit added in front).
189 | The value of the last byte is the bit size of the remainder.
190 | 
191 | Example:
192 | 
193 | <pre lang="erlang"><![CDATA[2> sext:encode(<<1,2,3>>).
194 | <<18,128,192,160,96,8>>
195 | 3> sext:encode(<<18, 1:1,1, 1:1,2, 1:1,3, 0:5, 8>>).
196 | <<18,128,192,160,96,8>>]]></pre>
197 | 
198 | The first part of the bitstring is encoded exactly like above. The number 4:3
199 | is first padded with 1 then padded at the end to become a whole byte. Then
200 | an additional pad, 0:4, is inserted to compensate for the fact that we have
201 | inserted 4 1-bits. Finally, the last byte is 3, to signify the size of the
202 | remainder.
203 | 
204 | <h2>2.5 Positive Numbers</h2>
205 | 
206 | Numbers are encoded as the corresponding type tag, followed by the integer
207 | part, a marker indicating the presence of a fraction part, and the fraction
208 | part, if any. The integer part is encoded differently depending on the size
209 | of the value. The fraction part is encoded as a binary (without the 'binary'
210 | type tag).
211 | 
212 | <h3>2.5.1 Positive small integers, pos4</h3>
213 | 
214 | Integers up to 31 bits are encoded as &lt;&lt; ?pos4, I:31, F:1 &gt;&gt;
215 | where I is the integer value, and F is 1 if a fraction part follows;
216 | 0 otherwise.
217 | 
218 | <h3>2.5.2 Positive large integers</h3>
219 | 
220 | Larger integers are converted to a byte string and then encoded like
221 | binaries (without the 'binary' type tag), followed by a byte signifying
222 | whether a fraction part follows (1 if yes; 0 otherwise).
223 | 
224 | <pre lang="erlang"><![CDATA[Bytes = encode_big(I),
225 | << ?pos_big, Bytes/binary, F:8 >>]]></pre>
226 | 
227 | <h3>2.5.3 Fraction part of positive numbers</h3>
228 | 
229 | The representation of floating point numbers is based on the <a href="http://en.wikipedia.org/wiki/Double_precision_floating-point_format">IEEE 764 Binary 64 standard representation</a>. This is also the representation used by Erlang:
230 | 
231 | <pre lang="erlang"><![CDATA[<<Sign:1, Exp:11, Frac:52>> = <<F/float>>]]></pre>
232 | 
233 | The encoding extracts the integer part and encodes it as a positive integer
234 | (either pos4 or pos_big), flags the presence of a fraction part, and encodes
235 | the fraction part as a binary (without the binary tag).
236 | 
237 | <h2>2.6 Negative Numbers</h2>
238 | 
239 | <h3>2.6.1 Small negative numbers</h3>
240 | 
241 | <pre lang="erlang"><![CDATA[<< ?neg4:8, IRep:31, F:1 >>]]></pre>
242 | 
243 | A negative number I is encoded as IRep = Max + I, where Max is the largest
244 | possible number that can be represented with the number of bits present for
245 | the given subtype. For example, Max for neg4 is 0x7FFF FFFF (31 bits).
246 | Keep in mind that I &lt; 0.
247 | 
248 | The fraction flag is inverted, compared to the pos4 representation, so it will
249 | be 1 if there is no fraction part; 0 otherwise.
250 | 
251 | <h3>2.6.2 Large negative numbers</h3>
252 | 
253 | Larger negative numbers are encoded as:
254 | 
255 | <pre lang="erlang"><![CDATA[encode_negbig(I) ->
256 |     {Words, Max} = get_max(-I),
257 |     Bin = encode_bin_elems(list_to_binary(encode_big(Max + I)),
258 |     WordsRep = 16#FFFFffff - Words,
259 |     << ?neg_big:8, WordsRep:32, Bin/binary, F:8 >>.]]></pre>
260 | 
261 | That is, get_max() figures out how many 64-bit words are needed to represent
262 | -I (the positive number), and also gives the maximum value that can be
263 | represented in so many words. WordsRep in essence becomes a sub-subtag of
264 | the negative bignum.
265 | 
266 | <h3>2.6.3 Fraction of negative numbers</h3>
267 | 
268 | The fraction is encoded almost like the inverse of the positive fraction
269 | (as a "negative binary", if such a thing existed). Each byte is padded with
270 | a 0-bit rather than a 1-bit, and the byte itself is replaced by 16#ff - Byte.
271 | The sequence is then padded with 1s to become a multiple of 8 bits.
272 | 
273 | The last byte, denoting the number of significant bits in the last byte,
274 | is similarly inverted.
275 | 
276 | <h2>2.7 Atoms</h2>
277 | 
278 | Atoms are encoded as the atom tag, followed by the string representation of
279 | the atom using the binary encoding described above (but without the binary
280 | tag).
281 | 
282 | <h2>2.8 References</h2>
283 | 
284 | The encoding of references is perhaps best described by the code:
285 | 
286 | <pre lang="erlang"><![CDATA[encode_ref(R) ->
287 |     RBin = term_to_binary(R),
288 |     <<131,114,_Len:16,100,NLen:16,Name:NLen/binary,Rest/binary>> = RBin,
289 |     NameEnc = encode_bin_elems(Name),
290 |     RestEnc = encode_bin_elems(Rest),
291 |     <<?reference, NameEnc/binary, RestEnc/binary>>.]]></pre>
292 | 
293 | where encode_bin_elems(B) encodes the argument B the same way as a binary
294 | (excluding the 'binary' type tag).
295 | 
296 | <h2>2.9 Ports</h2>
297 | 
298 | The encoding of ports is perhaps best described by the code:
299 | 
300 | <pre lang="erlang"><![CDATA[encode_port(P) ->
301 |     PBin = term_to_binary(P),
302 |     <<131,102,100,ALen:16,Name:ALen/binary,Rest:5/binary>> = PBin,
303 |     NameEnc = encode_bin_elems(Name),
304 |     <<?port, NameEnc/binary, Rest/binary>>.]]></pre>
305 | 
306 | <h2>2.10 Pids</h2>
307 | 
308 | The encoding of ports is perhaps best described by the code:
309 | 
310 | <pre lang="erlang"><![CDATA[encode_pid(P) ->
311 |     PBin = term_to_binary(P),
312 |     <<131,103,100,ALen:16,Name:ALen/binary,Rest:9/binary>> = PBin,
313 |     NameEnc = encode_bin_elems(Name),
314 |     <<?pid, NameEnc/binary, Rest/binary>>.]]></pre>
315 | 
316 | <h2>2.11 Maps</h2>
317 | 
318 | The encoding of maps is currently experimental.
319 | 
320 | Maps sort between tuples and lists. Since the smallest list is represented
321 | by `<<17, 2>>', maps encoding starts with `<<17, 1>>' (introducing a new tag
322 | would break backwards compatibility), followed by the size of the map (4 bytes),
323 | and each Key-Value pair in the map.
324 | 
325 | @end
326 | 


--------------------------------------------------------------------------------
/doc/sext.md:
--------------------------------------------------------------------------------
  1 | 
  2 | 
  3 | # Module sext #
  4 | * [Description](#description)
  5 | * [Function Index](#index)
  6 | * [Function Details](#functions)
  7 | 
  8 | 
  9 | Sortable serialization library.
 10 | __Authors:__ Ulf Wiger ([`ulf@wiger.net`](mailto:ulf@wiger.net)).
 11 | <a name="index"></a>
 12 | 
 13 | ## Function Index ##
 14 | 
 15 | 
 16 | <table width="100%" border="1" cellspacing="0" cellpadding="2" summary="function index"><tr><td valign="top"><a href="#decode-1">decode/1</a></td><td>Decodes a binary generated using the function <a href="sext.md#encode-1"><code>sext:encode/1</code></a>.</td></tr><tr><td valign="top"><a href="#decode_hex-1">decode_hex/1</a></td><td></td></tr><tr><td valign="top"><a href="#decode_next-1">decode_next/1</a></td><td>Decode a binary stream, returning the next decoded term and the
 17 | stream remainder.</td></tr><tr><td valign="top"><a href="#decode_sb32-1">decode_sb32/1</a></td><td>Decodes a binary generated using the function <a href="#encode_sb32-1"><code>encode_sb32/1</code></a>.</td></tr><tr><td valign="top"><a href="#encode-1">encode/1</a></td><td>Encodes any Erlang term into a binary.</td></tr><tr><td valign="top"><a href="#encode-2">encode/2</a></td><td>Encodes an Erlang term using legacy bignum encoding.</td></tr><tr><td valign="top"><a href="#encode_hex-1">encode_hex/1</a></td><td>Encodes any Erlang term into a hex-encoded binary.</td></tr><tr><td valign="top"><a href="#encode_sb32-1">encode_sb32/1</a></td><td>Encodes any Erlang term into an sb32-encoded binary.</td></tr><tr><td valign="top"><a href="#from_hex-1">from_hex/1</a></td><td>Converts from a hex-encoded binary into a 'normal' binary.</td></tr><tr><td valign="top"><a href="#from_sb32-1">from_sb32/1</a></td><td>Converts from an sb32-encoded bitstring into a 'normal' bitstring.</td></tr><tr><td valign="top"><a href="#partial_decode-1">partial_decode/1</a></td><td>Decode a sext-encoded term or prefix embedded in a byte stream.</td></tr><tr><td valign="top"><a href="#prefix-1">prefix/1</a></td><td>Encodes a binary for prefix matching of similar encoded terms.</td></tr><tr><td valign="top"><a href="#prefix_hex-1">prefix_hex/1</a></td><td>Generates a hex-encoded binary for prefix matching.</td></tr><tr><td valign="top"><a href="#prefix_sb32-1">prefix_sb32/1</a></td><td>Generates an sb32-encoded binary for prefix matching.</td></tr><tr><td valign="top"><a href="#to_hex-1">to_hex/1</a></td><td>Converts a binary into a hex-encoded binary
 18 | This is conventional hex encoding, with the proviso that
 19 | only capital letters are used, e.g.</td></tr><tr><td valign="top"><a href="#to_sb32-1">to_sb32/1</a></td><td>Converts a bitstring into an sb-encoded bitstring.</td></tr></table>
 20 | 
 21 | 
 22 | <a name="functions"></a>
 23 | 
 24 | ## Function Details ##
 25 | 
 26 | <a name="decode-1"></a>
 27 | 
 28 | ### decode/1 ###
 29 | 
 30 | 
 31 | <pre><code>
 32 | decode(B::binary()) -&gt; term()
 33 | </code></pre>
 34 | <br />
 35 | 
 36 | Decodes a binary generated using the function [`sext:encode/1`](sext.md#encode-1).
 37 | <a name="decode_hex-1"></a>
 38 | 
 39 | ### decode_hex/1 ###
 40 | 
 41 | `decode_hex(Data) -> any()`
 42 | 
 43 | 
 44 | <a name="decode_next-1"></a>
 45 | 
 46 | ### decode_next/1 ###
 47 | 
 48 | 
 49 | <pre><code>
 50 | decode_next(X1::Bin) -&gt; {N, Rest}
 51 | </code></pre>
 52 | <br />
 53 | 
 54 | 
 55 | Decode a binary stream, returning the next decoded term and the
 56 | stream remainder
 57 | 
 58 | 
 59 | This function will raise an exception if the beginning of `Bin` is not
 60 | a valid sext-encoded term.
 61 | <a name="decode_sb32-1"></a>
 62 | 
 63 | ### decode_sb32/1 ###
 64 | 
 65 | `decode_sb32(Data) -> any()`
 66 | 
 67 | Decodes a binary generated using the function [`encode_sb32/1`](#encode_sb32-1).
 68 | <a name="encode-1"></a>
 69 | 
 70 | ### encode/1 ###
 71 | 
 72 | 
 73 | <pre><code>
 74 | encode(T::term()) -&gt; binary()
 75 | </code></pre>
 76 | <br />
 77 | 
 78 | Encodes any Erlang term into a binary.
 79 | The lexical sorting properties of the encoded binary match those of the
 80 | original Erlang term. That is, encoded terms sort the same way as the
 81 | original terms would.
 82 | <a name="encode-2"></a>
 83 | 
 84 | ### encode/2 ###
 85 | 
 86 | 
 87 | <pre><code>
 88 | encode(T::term(), Legacy::boolean()) -&gt; binary()
 89 | </code></pre>
 90 | <br />
 91 | 
 92 | 
 93 | Encodes an Erlang term using legacy bignum encoding.
 94 | On March 4 2013, Basho noticed that encoded bignums didn't always sort
 95 | properly. This bug has been fixed, but the encoding of bignums necessarily
 96 | changed in an incompatible way.
 97 | 
 98 | 
 99 | 
100 | The new decode/1 version can read the old bignum format, but the old
101 | version obviously cannot read the new. Using `encode(Term, true)`, the term
102 | will be encoded using the old format.
103 | 
104 | 
105 | Use only as transition support. This function will be deprecated in time.
106 | <a name="encode_hex-1"></a>
107 | 
108 | ### encode_hex/1 ###
109 | 
110 | 
111 | <pre><code>
112 | encode_hex(Term::any()) -&gt; binary()
113 | </code></pre>
114 | <br />
115 | 
116 | 
117 | Encodes any Erlang term into a hex-encoded binary.
118 | This is similar to [`encode/1`](#encode-1), but produces an octet string that
119 | can be used without escaping in file names (containing only the characters
120 | 0..9 and A..F). The sorting properties are preserved.
121 | 
122 | 
123 | Note: The encoding used is regular hex-encoding, with the proviso that only
124 | capital letters are used (mixing upper- and lowercase characters would break
125 | the sorting property).
126 | <a name="encode_sb32-1"></a>
127 | 
128 | ### encode_sb32/1 ###
129 | 
130 | 
131 | <pre><code>
132 | encode_sb32(Term::any()) -&gt; binary()
133 | </code></pre>
134 | <br />
135 | 
136 | 
137 | Encodes any Erlang term into an sb32-encoded binary.
138 | This is similar to [`encode/1`](#encode-1), but produces an octet string that
139 | can be used without escaping in file names (containing only the characters
140 | 0..9, A..V and '-'). The sorting properties are preserved.
141 | 
142 | 
143 | Note: The encoding used is inspired by the base32 encoding described in
144 | RFC3548, but uses a different alphabet in order to preserve the sort order.
145 | <a name="from_hex-1"></a>
146 | 
147 | ### from_hex/1 ###
148 | 
149 | 
150 | <pre><code>
151 | from_hex(Bin::binary()) -&gt; binary()
152 | </code></pre>
153 | <br />
154 | 
155 | 
156 | Converts from a hex-encoded binary into a 'normal' binary
157 | 
158 | 
159 | This function is the reverse of [`to_hex/1`](#to_hex-1).
160 | 
161 | <a name="from_sb32-1"></a>
162 | 
163 | ### from_sb32/1 ###
164 | 
165 | 
166 | <pre><code>
167 | from_sb32(Bits::bitstring()) -&gt; bitstring()
168 | </code></pre>
169 | <br />
170 | 
171 | 
172 | Converts from an sb32-encoded bitstring into a 'normal' bitstring
173 | 
174 | 
175 | This function is the reverse of [`to_sb32/1`](#to_sb32-1).
176 | <a name="partial_decode-1"></a>
177 | 
178 | ### partial_decode/1 ###
179 | 
180 | 
181 | <pre><code>
182 | partial_decode(Other::Bytes) -&gt; {full | partial, DecodedTerm, Rest}
183 | </code></pre>
184 | <br />
185 | 
186 | 
187 | Decode a sext-encoded term or prefix embedded in a byte stream.
188 | 
189 | 
190 | Example:
191 | 
192 | ```
193 |   1&gt; T = sext:encode({a,b,c}).
194 |   &lt;&lt;16,0,0,0,3,12,176,128,8,12,177,0,8,12,177,128,8&gt;&gt;
195 |   2&gt; sext:partial_decode(&lt;&lt;T/binary, "tail"&gt;&gt;).
196 |   {full,{a,b,c},&lt;&lt;"tail"&gt;&gt;}
197 |   3&gt; P = sext:prefix({a,b,'_'}).
198 |   &lt;&lt;16,0,0,0,3,12,176,128,8,12,177,0,8&gt;&gt;
199 |   4&gt; sext:partial_decode(&lt;&lt;P/binary, "tail"&gt;&gt;).
200 |   {partial,{a,b,'_'},&lt;&lt;"tail"&gt;&gt;}
201 | ```
202 | 
203 | 
204 | 
205 | Note that a decoded prefix may not be exactly like the encoded prefix.
206 | For example, `['_']` will be encoded as
207 | `<<17>>`, i.e. only the 'list' opcode. The
208 | decoded prefix will be `'_'`, since the encoded prefix would
209 | also match the empty list. The decoded prefix will always be a prefix to
210 | anything to which the original prefix is a prefix.
211 | 
212 | 
213 | For tuples, `{1,'_',3}` encoded and decoded, will result in
214 | `{1,'_','_'}`, i.e. the tuple size is kept, but the elements
215 | after the first wildcard are replaced with wildcards.
216 | <a name="prefix-1"></a>
217 | 
218 | ### prefix/1 ###
219 | 
220 | 
221 | <pre><code>
222 | prefix(X::term()) -&gt; binary()
223 | </code></pre>
224 | <br />
225 | 
226 | Encodes a binary for prefix matching of similar encoded terms.
227 | Lists and tuples can be prefixed by using the `'_'` marker,
228 | similarly to Erlang match specifications. For example:
229 | 
230 | * `prefix({1,2,'_','_'})` will result in a binary that is
231 | the same as the first part of any encoded 4-tuple with the first two
232 | elements being 1 and 2. The prefix algorithm will search for the
233 | first `'_'`, and treat all following elements as if they
234 | were `'_'`.
235 | 
236 | * `prefix([1,2|'_'])` will result in a binary that is the
237 | same as the first part of any encoded list where the first two elements
238 | are 1 and 2. `prefix([1,2,'_'])` will give the same result,
239 | as the prefix pattern is the same for all lists starting with
240 | `[1,2|...]`.
241 | 
242 | * `prefix(Binary)` will result in a binary that is the same as the
243 | encoded version of Binary, except that, instead of padding and
244 | terminating, the encoded binary is truncated to the longest byte-aligned
245 | binary. The same is done for bitstrings.
246 | 
247 | * `prefix({1,[1,2|'_'],'_'})` will prefix-encode the second
248 | element, and let it end the resulting binary. This prefix will match
249 | any 3-tuple where the first element is 1 and the second element is a
250 | list where the first two elements are 1 and 2.
251 | 
252 | * `prefix([1,[1|'_']|'_'])` will result in a prefix that
253 | matches all lists where the first element is 1 and the second element is
254 | a list where the first element is 1.
255 | 
256 | * For all other data types, the prefix is the same as the encoded term.
257 | 
258 | 
259 | <a name="prefix_hex-1"></a>
260 | 
261 | ### prefix_hex/1 ###
262 | 
263 | 
264 | <pre><code>
265 | prefix_hex(X::term()) -&gt; binary()
266 | </code></pre>
267 | <br />
268 | 
269 | Generates a hex-encoded binary for prefix matching.
270 | This is similar to [`prefix/1`](#prefix-1), but generates a prefix for binaries
271 | encoded with [`encode_hex/1`](#encode_hex-1), rather than [`encode/1`](#encode-1).
272 | <a name="prefix_sb32-1"></a>
273 | 
274 | ### prefix_sb32/1 ###
275 | 
276 | 
277 | <pre><code>
278 | prefix_sb32(X::term()) -&gt; binary()
279 | </code></pre>
280 | <br />
281 | 
282 | Generates an sb32-encoded binary for prefix matching.
283 | This is similar to [`prefix/1`](#prefix-1), but generates a prefix for binaries
284 | encoded with [`encode_sb32/1`](#encode_sb32-1), rather than [`encode/1`](#encode-1).
285 | <a name="to_hex-1"></a>
286 | 
287 | ### to_hex/1 ###
288 | 
289 | 
290 | <pre><code>
291 | to_hex(Bin::binary()) -&gt; binary()
292 | </code></pre>
293 | <br />
294 | 
295 | Converts a binary into a hex-encoded binary
296 | This is conventional hex encoding, with the proviso that
297 | only capital letters are used, e.g. `0..9A..F`.
298 | <a name="to_sb32-1"></a>
299 | 
300 | ### to_sb32/1 ###
301 | 
302 | 
303 | <pre><code>
304 | to_sb32(Bits::bitstring()) -&gt; binary()
305 | </code></pre>
306 | <br />
307 | 
308 | 
309 | Converts a bitstring into an sb-encoded bitstring
310 | 
311 | 
312 | 
313 | sb32 (Sortable base32) is a variant of RFC3548, slightly rearranged to
314 | preserve the lexical sorting properties. Base32 was chosen to avoid
315 | filename-unfriendly characters. Also important is that the padding
316 | character be less than any character in the alphabet
317 | 
318 | 
319 | sb32 alphabet:
320 | 
321 | ```
322 | 
323 |   0 0     6 6     12 C     18 I     24 O     30 U
324 |   1 1     7 7     13 D     19 J     25 P     31 V
325 |   2 2     8 8     14 E     20 K     26 Q  (pad) -
326 |   3 3     9 9     15 F     21 L     27 R
327 |   4 4    10 A     16 G     22 M     28 S
328 |   5 5    11 B     17 H     23 N     29 T
329 | ```
330 | 
331 | 


--------------------------------------------------------------------------------
/doc/stylesheet.css:
--------------------------------------------------------------------------------
 1 | /* standard EDoc style sheet */
 2 | body {
 3 | 	font-family: Verdana, Arial, Helvetica, sans-serif;
 4 |       	margin-left: .25in;
 5 |        	margin-right: .2in;
 6 |        	margin-top: 0.2in;
 7 |        	margin-bottom: 0.2in;
 8 |        	color: #000000;
 9 |        	background-color: #ffffff;
10 | }
11 | h1,h2 {
12 |  	margin-left: -0.2in;
13 | }
14 | div.navbar {
15 | 	background-color: #add8e6;
16 | 	padding: 0.2em;
17 | }
18 | h2.indextitle {
19 | 	padding: 0.4em;
20 | 	background-color: #add8e6;
21 | }
22 | h3.function,h3.typedecl {
23 | 	background-color: #add8e6;
24 |  	padding-left: 1em;
25 | }
26 | div.spec {
27 |  	margin-left: 2em;
28 | 	background-color: #eeeeee;
29 | }
30 | a.module,a.package {
31 | 	text-decoration:none
32 | }
33 | a.module:hover,a.package:hover {
34 | 	background-color: #eeeeee;
35 | }
36 | ul.definitions {
37 | 	list-style-type: none;
38 | }
39 | ul.index {
40 | 	list-style-type: none;
41 | 	background-color: #eeeeee;
42 | }
43 | 
44 | /*
45 |  * Minor style tweaks
46 |  */
47 | ul {
48 | 	list-style-type: square;
49 | }
50 | table {
51 | 	border-collapse: collapse;
52 | }
53 | td {
54 | 	padding: 3
55 | }
56 | 


--------------------------------------------------------------------------------
/examples/tt_proto.erl:
--------------------------------------------------------------------------------
  1 | %%==============================================================================
  2 | %% Copyright 2010 Erlang Solutions Ltd.
  3 | %%
  4 | %% Licensed under the Apache License, Version 2.0 (the "License");
  5 | %% you may not use this file except in compliance with the License.
  6 | %% You may obtain a copy of the License at
  7 | %%
  8 | %% http://www.apache.org/licenses/LICENSE-2.0
  9 | %%
 10 | %% Unless required by applicable law or agreed to in writing, software
 11 | %% distributed under the License is distributed on an "AS IS" BASIS,
 12 | %% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 13 | %% See the License for the specific language governing permissions and
 14 | %% limitations under the License.
 15 | %%==============================================================================
 16 | %%
 17 | %% @author Ulf Wiger <ulf.wiger@erlang-solutions.com>
 18 | %% @doc Bare-bones Tokyo Tyrant interface library.
 19 | %% This is an example to illustrate the use of Sortable EXernal Term (sext)
 20 | %% encoding.
 21 | %%
 22 | %% <a href="http://1978th.net/tokyotyrant/">Tokyo Tyrant</a> (TT) is an add-on 
 23 | %% to <a href="http://1978th.net/tokyocabinet/">Tokyo Cabinet</a>, adding
 24 | %% support for concurrent and remote access to Tokyo Cabinet (TC) through a 
 25 | %% TCP socket interface. TC supports storage of variable-length byte strings
 26 | %% as key-value pairs. The storage type can either be RAM-only or disk, and
 27 | %% either hash table or B-tree.
 28 | %%
 29 | %% Using sext-encoded terms in combination with TT's B-tree storage, it is
 30 | %% possible to store very large amounts of data on disk while honoring the 
 31 | %% Erlang Term ordering semantics. Using the `sext:prefix/1' function, it is
 32 | %% also possible to perform efficient range queries.
 33 | %%
 34 | %% Tokyo Tyrant is easy to install and get running. This module does not show
 35 | %% how that is done, nor does it automate the task of starting a TT server.
 36 | %% 
 37 | %% @end
 38 | -module(tt_proto).
 39 | 
 40 | -behaviour(gen_server).
 41 | 
 42 | -export([open/2,
 43 | 	 put/3,
 44 | 	 get/2,
 45 | 	 mget/2,
 46 | 	 keys/2]).
 47 | 
 48 | %% internal exports
 49 | -export([init/1,
 50 | 	 handle_call/3,
 51 | 	 handle_cast/2,
 52 | 	 handle_info/2,
 53 | 	 terminate/2,
 54 | 	 code_change/3]).
 55 | 
 56 | -compile(export_all).
 57 | 
 58 | -define(DEFAULT_PORT, 1978).
 59 | 
 60 | -record(st, {socket}).
 61 | 
 62 | %% @spec open(Name, Opts) -> {ok, pid()}
 63 | %% Opts = [Opt]
 64 | %% Opt  = {regname,atom()} | {port, integer()}
 65 | %%
 66 | %% @doc Connects to a running Tokyo Tyrant database server.
 67 | %% The default port, 1978, will be used unless another port is specified.
 68 | %% If the `regname' option is present, the Tokyo Tyrant proxy process will
 69 | %% register itself under that name, and the registered name can be used as 
 70 | %% an alias when accessing the database.
 71 | %% @end
 72 | %%
 73 | open(Name, Opts) ->
 74 |     case lists:keyfind(regname, 1, Opts) of
 75 | 	false ->
 76 | 	    gen_server:start_link(?MODULE, {Name, Opts}, []);
 77 | 	{_,RegName} ->
 78 | 	    gen_server:start_link({local,RegName}, ?MODULE,
 79 | 				  {Name, Opts}, [])
 80 |     end.
 81 | 
 82 | %% @spec put(TT, Key::term(), Value::term()) -> ok | {error, Reason}
 83 | %% @doc Inserts a `{Key,Value}' tuple in the database TT.
 84 | %% @end
 85 | %%
 86 | put(TT, Key, Value) ->
 87 |     cmd(TT, {put, encode(Key), encode(Value)}).
 88 | 
 89 | %% @spec get(TT, Key::term()) -> {ok, Value} | {error, Reason}
 90 | %% @doc Looks up Key in the database TT.
 91 | %% Returns `{ok,Value}' if found, otherwise `{error,Reason}'.
 92 | %% @end
 93 | %%
 94 | get(TT, Key) ->
 95 |     case ask(TT, {get, encode(Key)}) of
 96 | 	{ok, Vb} ->
 97 | 	    {ok, decode(Vb)};
 98 | 	Err ->
 99 | 	    Err
100 |     end.
101 | 
102 | %% @spec mget(TT, Keys::[term()]) -> {ok, [{K,V}]} | {error,Reason}
103 | %% @doc Fetches multiple objects from the database TT.
104 | %% All objects matching the list of keys will be returned. If no objects match,
105 | %% the return value will be `{ok, []}'.
106 | %% @end
107 | %%
108 | mget(TT, Keys) when is_list(Keys) ->
109 |     Enc = [encode(K) || K <- Keys],
110 |     case ask(TT, {mget, Enc}) of
111 | 	{ok, KVs} ->
112 | 	    {ok, [{decode(K),decode(V)} || {K,V} <- KVs]};
113 | 	Err ->
114 | 	    Err
115 |     end.
116 | 
117 | %% @spec keys(TT, Prefix) -> {ok, Keys} | {error, Reason}
118 | %% @doc Performs a prefix search in database TT based on Prefix.
119 | %% For details on Prefix, @see sext:prefix/1.
120 | %% @end
121 | %%
122 | keys(TT, Prefix) ->
123 |     case ask(TT, {keys, encode_prefix(Prefix), 100}) of
124 | 	{ok, Keys} ->
125 | 	    {ok, [decode(K) || K <- Keys]};
126 | 	Err ->
127 | 	    Err
128 |     end.
129 | 
130 | 
131 | %% Tell TokyoTyrant to perform an operation. No reply other than
132 | %% 0 (success), or non-zero (failure).
133 | %%
134 | cmd(TT, Req) ->
135 |     gen_server:call(TT, {cmd, Req}).
136 | 
137 | ask(TT, Req) ->
138 |     gen_server:call(TT, {ask, Req}).
139 | 
140 | encode(Term) ->
141 |     sext:encode(Term).
142 | 
143 | 
144 | decode(Bin) ->
145 |     sext:decode(Bin).
146 | 
147 | encode_prefix(Term) ->
148 |     sext:prefix(Term).
149 | 
150 | 
151 | %% @hidden
152 | init({_Name, Opts}) ->
153 | %%    TTName = tt_name(Name, Opts),
154 |     Port = proplists:get_value(port, Opts, ?DEFAULT_PORT),
155 |     case gen_tcp:connect({127,0,0,1}, Port, [binary,{active,false},
156 | 					     {nodelay,true}]) of
157 | 	{ok, Socket} ->
158 | 	    {ok, #st{socket = Socket}};
159 | 	Error ->
160 | 	    Error
161 |     end.
162 | 
163 | %% @hidden
164 | handle_call({cmd, Req}, _From, #st{socket = Sock} = S) ->
165 |     Msg = mk_req(Req),
166 |     gen_tcp:send(Sock, Msg),
167 |     Reply = cmd_reply(Sock),
168 |     {reply, Reply, S};
169 | handle_call({ask, Req}, _From, #st{socket = Sock} = S) ->
170 |     Msg = mk_req(Req),
171 |     gen_tcp:send(Sock, Msg),
172 |     Reply = ask_reply(Req, Sock),
173 |     {reply, Reply, S}.
174 | 
175 | 
176 | %% @hidden
177 | handle_info(Msg, S) ->
178 |     io:fwrite("handle_info(~p, ~p)~n", [Msg, S]),
179 |     {noreply, S}.
180 | 
181 | %% @hidden
182 | handle_cast(_, S) ->
183 |     {stop, unknown_cast, S}.
184 | 
185 | %% @hidden
186 | terminate(Reason, S) ->
187 |     io:fwrite("terminate(~p, ~p)~n", [Reason, S]).
188 | 
189 | %% @hidden
190 | code_change(_FromVsn, S, _Extra) ->
191 |     {ok, S}.
192 | 
193 | 
194 | mk_req({put, K, V}) ->
195 |     KSz = byte_size(K),
196 |     VSz = byte_size(V),
197 |     << 16#c8, 16#10, KSz:32, VSz:32, K/binary, V/binary >>;
198 | mk_req({get, K}) ->
199 |     KSz = byte_size(K),
200 |     << 16#c8, 16#30, KSz:32, K/binary >>;
201 | mk_req({mget, Ks}) ->
202 |     N = length(Ks),
203 |     Packed = pack_values(Ks),
204 |     << 16#c8, 16#31,
205 |      N:32, Packed/binary >>;
206 | mk_req({keys, Prefix, Limit}) ->
207 |     PSz = byte_size(Prefix),
208 |     << 16#c8, 16#58, PSz:32, Limit:32, Prefix/binary >>.
209 | 
210 | pack_values(Values) ->
211 |     pack_values(Values, <<>>).
212 | 
213 | pack_values([H|T], Acc) ->
214 |     Sz = byte_size(H),
215 |     Bin = << Sz:32, H/binary >>,
216 |     pack_values(T, << Acc/binary, Bin/binary >>);
217 | pack_values([], Acc) ->
218 |     Acc.
219 | 
220 | 
221 | cmd_reply(Sock) ->
222 |     case gen_tcp:recv(Sock, 1) of
223 | 	{ok, <<0>>} ->
224 | 	    ok;
225 | 	{ok, <<E>>} ->
226 | 	    {error, E};
227 | 	{error,_} = Err ->
228 | 	    Err
229 |     end.
230 | 
231 | ask_reply(Req, Sock) ->
232 |     Method = element(1, Req),
233 |     case gen_tcp:recv(Sock, 0) of
234 | 	{ok, <<0, Rest/binary>>} ->
235 | 	    try get_reply(Method, Rest, Sock)
236 | 	    catch
237 | 		throw:{error,Reason} ->
238 | 		    {error, Reason}
239 | 	    end;
240 | 	{ok, <<E>>} ->
241 | 	    {error, E};
242 | 	{error,_} = Err ->
243 | 	    Err
244 |     end.
245 | 
246 | get_reply(get, Data, Sock) ->
247 |     {Val, _} = get_value(Data, Sock),
248 |     {ok, Val};
249 | get_reply(mget, Data, Sock) ->
250 |     {N, D1} = get_word(Data, Sock),
251 |     Result = get_N(N, D1, fun get_k_v/2, Sock),
252 |     {ok, Result};
253 | get_reply(keys, Data, Sock) ->
254 |     {N, D1} = get_word(Data, Sock),
255 |     Result = get_N(N, D1, fun get_value/2, Sock),
256 |     {ok, Result}.
257 | 
258 | get_word(<<W:32, Rest/binary>>, _Sock) ->
259 |     {W, Rest};
260 | get_word(Sofar, Sock) ->
261 |     Bin = get_data(Sock),
262 |     get_word(<<Sofar/binary, Bin/binary>>, Sock).
263 | 
264 | get_value(<<Sz:32, V:Sz/binary, Rest/binary>>, _Sock) ->
265 |     {V, Rest};
266 | get_value(Sofar, Sock) ->
267 |     Bin = get_data(Sock),
268 |     get_value(<<Sofar/binary, Bin/binary>>, Sock).
269 | 
270 | get_k_v(<<KSz:32, VSz:32, K:KSz/binary, V:VSz/binary, Rest/binary>>, _Sock) ->
271 |     {{K,V}, Rest};
272 | get_k_v(Sofar, Sock) ->
273 |     Bin = get_data(Sock),
274 |     get_k_v(<<Sofar/binary, Bin/binary>>, Sock).
275 | 
276 | get_N(0, _, _, _) ->
277 |     [];
278 | get_N(N, Data, F, Sock) when N > 0 ->
279 |     {Item, Rest} = F(Data, Sock),
280 |     [Item | get_N(N-1, Rest, F, Sock)].
281 | 
282 | get_data(Sock) ->
283 |     case gen_tcp:recv(Sock, 0) of
284 | 	{ok, Bin} ->
285 | 	    Bin;
286 | 	{error,_} = Err ->
287 | 	    throw(Err)
288 |     end.
289 |     
290 | 


--------------------------------------------------------------------------------
/rebar.config:
--------------------------------------------------------------------------------
 1 | %% -*- erlang -*-
 2 | {erl_opts, [debug_info]}.
 3 | 
 4 | {profiles, [{docs, [{deps,
 5 |                      [
 6 |                       {edown,
 7 |                        {git,
 8 |                         "https://github.com/uwiger/edown.git",
 9 |                         {tag,
10 |                          "0.8"}}}
11 |                      ]},
12 | 
13 |                     {edoc_opts, [{doclet, edown_doclet},
14 |                                  {packages,
15 |                                   false},
16 |                                  {subpackages,
17 |                                   true},
18 |                                  {top_level_readme,
19 |                                   {"./README.md",
20 |                                    "http://github.com/uwiger/sext"}}]}]}
21 |            ]}.
22 | 


--------------------------------------------------------------------------------
/src/sext.app.src:
--------------------------------------------------------------------------------
 1 | %% -*- erlang-indent-level: 4; indent-tabs-mode: nil -*-
 2 | %%==============================================================================
 3 | %% Copyright 2014-16 Ulf Wiger
 4 | %%
 5 | %% Licensed under the Apache License, Version 2.0 (the "License");
 6 | %% you may not use this file except in compliance with the License.
 7 | %% You may obtain a copy of the License at
 8 | %%
 9 | %% http://www.apache.org/licenses/LICENSE-2.0
10 | %%
11 | %% Unless required by applicable law or agreed to in writing, software
12 | %% distributed under the License is distributed on an "AS IS" BASIS,
13 | %% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 | %% See the License for the specific language governing permissions and
15 | %% limitations under the License.
16 | %%==============================================================================
17 | 
18 | %% @author Ulf Wiger <ulf@wiger.net>
19 | %% @doc Sortable serialization of Erlang terms.
20 | %% @end
21 | {application, sext,
22 |     [{description, "Sortable serialization library"},
23 |         {vsn, git},
24 |         {modules, []},
25 |         {registered, []},
26 |         {applications, [kernel, stdlib]},
27 |   {env, []},
28 | 
29 |   {maintainers, ["Ulf Wiger"]},
30 |   {licenses, ["Apache 2.0"]},
31 |   {links, [{"Github", "https://github.com/uwiger/sext"}]}
32 |  ]}.
33 | 


--------------------------------------------------------------------------------
/src/sext.erl:
--------------------------------------------------------------------------------
   1 | %% -*- erlang-indent-level: 4; indent-tabs-mode: nil
   2 | %%==============================================================================
   3 | %% Copyright 2014-16 Ulf Wiger
   4 | %%
   5 | %% Licensed under the Apache License, Version 2.0 (the "License");
   6 | %% you may not use this file except in compliance with the License.
   7 | %% You may obtain a copy of the License at
   8 | %%
   9 | %% http://www.apache.org/licenses/LICENSE-2.0
  10 | %%
  11 | %% Unless required by applicable law or agreed to in writing, software
  12 | %% distributed under the License is distributed on an "AS IS" BASIS,
  13 | %% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  14 | %% See the License for the specific language governing permissions and
  15 | %% limitations under the License.
  16 | %%==============================================================================
  17 | %%
  18 | %% @author Ulf Wiger <ulf@wiger.net>
  19 | %% @doc Sortable serialization library
  20 | %% @end
  21 | -module(sext).
  22 | 
  23 | -export([encode/1, encode/2, decode/1, decode_next/1]).
  24 | -export([encode_hex/1, decode_hex/1]).
  25 | -export([encode_sb32/1, decode_sb32/1]).
  26 | -export([prefix/1,
  27 |          partial_decode/1]).
  28 | -export([prefix_hex/1]).
  29 | -export([prefix_sb32/1]).
  30 | -export([to_sb32/1, from_sb32/1]).
  31 | -export([to_hex/1, from_hex/1]).
  32 | 
  33 | -export([reverse_sext/1]).
  34 | 
  35 | -export([pp/1]).  % for debugging only
  36 | 
  37 | -define(rev_sext , 4).
  38 | %%
  39 | -define(negbig   , 8).
  40 | -define(neg4     , 9).
  41 | -define(pos4     , 10).
  42 | -define(posbig   , 11).
  43 | -define(atom     , 12).
  44 | -define(reference, 13).
  45 | -define(port     , 14).
  46 | -define(pid      , 15).
  47 | -define(tuple    , 16).
  48 | -define(list     , 17).
  49 | -define(binary   , 18).
  50 | -define(bin_tail , 19).
  51 | 
  52 | -define(is_sext(X),
  53 |         X==?negbig;
  54 |             X==?neg4;
  55 |             X==?pos4;
  56 |             X==?posbig;
  57 |             X==?atom;
  58 |             X==?reference;
  59 |             X==?port;
  60 |             X==?pid;
  61 |             X==?tuple;
  62 |             X==?list;
  63 |             X==?binary;
  64 |             X==?bin_tail).
  65 | 
  66 | -define(IMAX1, 16#ffffFFFFffffFFFF).
  67 | 
  68 | %% -define(dbg(Fmt,Args),
  69 | %%         case get(dbg) of
  70 | %%             true -> io:fwrite("~p: " ++ Fmt, [?LINE|Args]);
  71 | %%             _ -> no_dbg
  72 | %%         end).
  73 | -define(dbg(F,A),no_debug).
  74 | 
  75 | %% @spec encode(T::term()) -> binary()
  76 | %% @doc Encodes any Erlang term into a binary.
  77 | %% The lexical sorting properties of the encoded binary match those of the
  78 | %% original Erlang term. That is, encoded terms sort the same way as the
  79 | %% original terms would.
  80 | %% @end
  81 | %%
  82 | encode(X) -> encode(X, false).
  83 | 
  84 | %% @spec encode(T::term(), Legacy::boolean()) -> binary()
  85 | %% @doc Encodes an Erlang term using legacy bignum encoding.
  86 | %% On March 4 2013, Basho noticed that encoded bignums didn't always sort
  87 | %% properly. This bug has been fixed, but the encoding of bignums necessarily
  88 | %% changed in an incompatible way.
  89 | %%
  90 | %% The new decode/1 version can read the old bignum format, but the old
  91 | %% version obviously cannot read the new. Using `encode(Term, true)', the term
  92 | %% will be encoded using the old format.
  93 | %%
  94 | %% Use only as transition support. This function will be deprecated in time.
  95 | %% @end
  96 | encode(X, Legacy) when is_tuple(X)  -> encode_tuple(X, Legacy);
  97 | encode(X, Legacy) when is_map(X)    -> encode_map(X, Legacy);
  98 | encode(X, Legacy) when is_list(X)   -> encode_list(X, Legacy);
  99 | encode(X, _) when is_pid(X)         -> encode_pid(X);
 100 | encode(X, _) when is_port(X)        -> encode_port(X);
 101 | encode(X, _) when is_reference(X)   -> encode_ref(X);
 102 | encode(X, Legacy) when is_number(X) -> encode_number(X, Legacy);
 103 | encode(X, _) when is_binary(X)      -> encode_binary(X);
 104 | encode(X, _) when is_bitstring(X)   -> encode_bitstring(X);
 105 | encode(X, _) when is_atom(X)        -> encode_atom(X).
 106 | 
 107 | %% @spec reverse_sext(binary()) -> binary()
 108 | %% @doc Reverses the sorting properties of a sext-encoded term. Reverted
 109 | %% objects compare as smaller than all sext-encoded objects.
 110 | %%
 111 | %% No hex- or sb32-encoded variants are provided. Use the `to_hex/1' or
 112 | %% `to_sb32/1' functions instead.
 113 | %% @end
 114 | reverse_sext(<<X:8, _/binary>> = B) when ?is_sext(X) ->
 115 |     NegB = encode_neg_bits(B),
 116 |     <<?rev_sext, NegB/binary>>.
 117 | 
 118 | %% @spec encode_sb32(Term::any()) -> binary()
 119 | %% @doc Encodes any Erlang term into an sb32-encoded binary.
 120 | %% This is similar to {@link encode/1}, but produces an octet string that
 121 | %% can be used without escaping in file names (containing only the characters
 122 | %% 0..9, A..V and '-'). The sorting properties are preserved.
 123 | %%
 124 | %% Note: The encoding used is inspired by the base32 encoding described in
 125 | %% RFC3548, but uses a different alphabet in order to preserve the sort order.
 126 | %% @end
 127 | %%
 128 | encode_sb32(Term) ->
 129 |     to_sb32(encode(Term)).
 130 | 
 131 | %% @spec encode_hex(Term::any()) -> binary()
 132 | %% @doc Encodes any Erlang term into a hex-encoded binary.
 133 | %% This is similar to {@link encode/1}, but produces an octet string that
 134 | %% can be used without escaping in file names (containing only the characters
 135 | %% 0..9 and A..F). The sorting properties are preserved.
 136 | %%
 137 | %% Note: The encoding used is regular hex-encoding, with the proviso that only
 138 | %% capital letters are used (mixing upper- and lowercase characters would break
 139 | %% the sorting property).
 140 | %% @end
 141 | %%
 142 | encode_hex(Term) ->
 143 |     to_hex(encode(Term)).
 144 | 
 145 | %% @spec prefix(X::term()) -> binary()
 146 | %% @doc Encodes a binary for prefix matching of similar encoded terms.
 147 | %% Lists and tuples can be prefixed by using the <code>'_'</code> marker,
 148 | %% similarly to Erlang match specifications. For example:
 149 | %% <ul>
 150 | %%  <li><code>prefix({1,2,'_','_'})</code> will result in a binary that is
 151 | %%    the same as the first part of any encoded 4-tuple with the first two
 152 | %%    elements being 1 and 2. The prefix algorithm will search for the
 153 | %%    first <code>'_'</code>, and treat all following elements as if they
 154 | %%    were <code>'_'</code>.</li>
 155 | %%  <li><code>prefix([1,2|'_'])</code> will result in a binary that is the
 156 | %%    same as the first part of any encoded list where the first two elements
 157 | %%    are 1 and 2. <code>prefix([1,2,'_'])</code> will give the same result,
 158 | %%    as the prefix pattern is the same for all lists starting with
 159 | %%    `[1,2|...]'.</li>
 160 | %%  <li>`prefix(Binary)' will result in a binary that is the same as the
 161 | %%    encoded version of Binary, except that, instead of padding and
 162 | %%    terminating, the encoded binary is truncated to the longest byte-aligned
 163 | %%    binary. The same is done for bitstrings.</li>
 164 | %%  <li><code>prefix({1,[1,2|'_'],'_'})</code> will prefix-encode the second
 165 | %%    element, and let it end the resulting binary. This prefix will match
 166 | %%    any 3-tuple where the first element is 1 and the second element is a
 167 | %%    list where the first two elements are 1 and 2.</li>
 168 | %%  <li><code>prefix([1,[1|'_']|'_'])</code> will result in a prefix that
 169 | %%    matches all lists where the first element is 1 and the second element is
 170 | %%    a list where the first element is 1.</li>
 171 | %%  <li>For all other data types, the prefix is the same as the encoded term.
 172 | %%    </li>
 173 | %% </ul>
 174 | %% @end
 175 | %%
 176 | prefix(X) ->
 177 |     {_, P} = enc_prefix(X),
 178 |     P.
 179 | 
 180 | enc_prefix(X) when is_tuple(X)     -> prefix_tuple(X);
 181 | enc_prefix(X) when is_list(X)      -> prefix_list(X);
 182 | enc_prefix(X) when is_pid(X)       -> {false, encode_pid(X)};
 183 | enc_prefix(X) when is_port(X)      -> {false, encode_port(X)};
 184 | enc_prefix(X) when is_reference(X) -> {false, encode_ref(X)};
 185 | enc_prefix(X) when is_number(X)    -> {false, encode_number(X)};
 186 | enc_prefix(X) when is_binary(X)    -> prefix_binary(X);
 187 | enc_prefix(X) when is_bitstring(X) -> prefix_bitstring(X);
 188 | enc_prefix(X) when is_atom(X) ->
 189 |     case is_wild(X) of
 190 |         true ->
 191 |             {true, <<>>};
 192 |         false ->
 193 |             {false, encode_atom(X)}
 194 |     end.
 195 | 
 196 | %% @spec prefix_sb32(X::term()) -> binary()
 197 | %% @doc Generates an sb32-encoded binary for prefix matching.
 198 | %% This is similar to {@link prefix/1}, but generates a prefix for binaries
 199 | %% encoded with {@link encode_sb32/1}, rather than {@link encode/1}.
 200 | %% @end
 201 | %%
 202 | prefix_sb32(X) ->
 203 |     chop_prefix_tail(to_sb32(prefix(X))).
 204 | 
 205 | %% @spec prefix_hex(X::term()) -> binary()
 206 | %% @doc Generates a hex-encoded binary for prefix matching.
 207 | %% This is similar to {@link prefix/1}, but generates a prefix for binaries
 208 | %% encoded with {@link encode_hex/1}, rather than {@link encode/1}.
 209 | %% @end
 210 | %%
 211 | prefix_hex(X) ->
 212 |     to_hex(prefix(X)).
 213 | 
 214 | %% Must chop of the pad character and the last encoded unit (which, if pad
 215 | %% characters are present, is not a whole byte)
 216 | %%
 217 | chop_prefix_tail(Bin) ->
 218 |     Sz = byte_size(Bin),
 219 |     Sz6 = Sz-7, Sz4 = Sz - 5, Sz3 = Sz - 4, Sz1 = Sz - 2,
 220 |     case Bin of
 221 |         << P:Sz6/binary, _, "------" >> -> P;
 222 |         << P:Sz4/binary, _, "----"   >> -> P;
 223 |         << P:Sz3/binary, _, "---"    >> -> P;
 224 |         << P:Sz1/binary, _, "-"      >> -> P;
 225 |         _ -> Bin
 226 |     end.
 227 | 
 228 | %% @spec decode(B::binary()) -> term()
 229 | %% @doc Decodes a binary generated using the function {@link sext:encode/1}.
 230 | %%
 231 | %% Note that a reverse-encoded binary (using {@link sext:reverse_sext/1})
 232 | %% decodes into the original sext-encoded binary, not into the term itself.
 233 | %% In other words, if `R = reverse_sext(encode(T))',
 234 | %% then `T = decode(decode(R))'.
 235 | %% @end
 236 | %%
 237 | decode(Elems) ->
 238 |     case decode_next(Elems) of
 239 |         {Term, <<>>} -> Term;
 240 |         Other -> erlang:error(badarg, Other)
 241 |     end.
 242 | 
 243 | %% spec decode_sb32(B::binary()) -> term()
 244 | %% @doc Decodes a binary generated using the function {@link encode_sb32/1}.
 245 | %% @end
 246 | %%
 247 | decode_sb32(Data) ->
 248 |     decode(from_sb32(Data)).
 249 | 
 250 | decode_hex(Data) ->
 251 |     decode(from_hex(Data)).
 252 | 
 253 | pp(none) -> "<none>";
 254 | pp(B) when is_bitstring(B) ->
 255 |     [ $0 + I || <<I:1>> <= B ].
 256 | 
 257 | encode_tuple(T, Legacy) ->
 258 |     Sz = size(T),
 259 |     encode_tuple_elems(1, Sz, T, <<?tuple, Sz:32>>, Legacy).
 260 | 
 261 | prefix_tuple(T) ->
 262 |     Sz = size(T),
 263 |     Elems = tuple_to_list(T),
 264 |     prefix_tuple_elems(Elems, <<?tuple, Sz:32>>).
 265 | 
 266 | %% It's easier to iterate over a tuple by converting it to a list, but
 267 | %% since the tuple /can/ be huge, let's do it this way.
 268 | encode_tuple_elems(P, Sz, T, Acc, Legacy) when P =< Sz ->
 269 |     E = encode(element(P,T), Legacy),
 270 |     encode_tuple_elems(P+1, Sz, T, <<Acc/binary, E/binary>>, Legacy);
 271 | encode_tuple_elems(_, _, _, Acc, _) ->
 272 |     Acc.
 273 | 
 274 | prefix_tuple_elems([A|T], Acc) when is_atom(A) ->
 275 |     case is_wild(A) of
 276 |         true ->
 277 |             {true, Acc};
 278 |         false ->
 279 |             E = encode(A),
 280 |             prefix_tuple_elems(T, <<Acc/binary, E/binary>>)
 281 |     end;
 282 | prefix_tuple_elems([H|T], Acc) ->
 283 |     case enc_prefix(H) of
 284 |         {true, P} ->
 285 |             {true, <<Acc/binary, P/binary>>};
 286 |         {false, E} ->
 287 |             prefix_tuple_elems(T, <<Acc/binary, E/binary>>)
 288 |     end;
 289 | prefix_tuple_elems([], Acc) ->
 290 |     {false, Acc}.
 291 | 
 292 | encode_list(L, Legacy) ->
 293 |     encode_list_elems(L, <<?list>>, Legacy).
 294 | 
 295 | prefix_list(L) ->
 296 |     prefix_list_elems(L, <<?list>>).
 297 | 
 298 | encode_map(M, Legacy) ->
 299 |     Sz = map_size(M),
 300 |     maps:fold(
 301 |       fun(K,V,Acc) ->
 302 |               <<Acc/binary, (encode(K, Legacy))/binary,
 303 |                 (encode(V, Legacy))/binary>>
 304 |       end, <<?list, 1:8, Sz:32>>, M).
 305 | 
 306 | 
 307 | encode_binary(B)    ->
 308 |     Enc = encode_bin_elems(B),
 309 |     <<?binary:8, Enc/binary>>.
 310 | 
 311 | prefix_binary(B) ->
 312 |     Enc = encode_bin_elems(B),
 313 |     {false, <<?binary:8, Enc/binary>>}.
 314 | 
 315 | encode_bitstring(B) ->
 316 |     Enc = encode_bits_elems(B),
 317 |     <<?binary:8, Enc/binary>>.
 318 | 
 319 | prefix_bitstring(B) ->
 320 |     Enc = encode_bits_elems(B),
 321 |     {false, <<?binary:8, Enc/binary>>}.
 322 | 
 323 | encode_pid(P) ->
 324 |     case term_to_binary(P) of
 325 |       <<131,88,119,ALen:8,Name:ALen/binary,NS:8/binary,C:32>> ->
 326 |         encode_pid_new(Name, NS, C);
 327 |       <<131,88,100,ALen:16,Name:ALen/binary,NS:8/binary,C:32>> ->
 328 |         encode_pid_new(Name, NS, C);
 329 |       <<131,103,100,ALen:16,Name:ALen/binary,NS:8/binary,C:8>> ->
 330 |         true = C =< 3,
 331 |         encode_pid(Name, NS, <<C>>)
 332 |     end.
 333 | 
 334 | encode_pid_new(Name, NS, C) ->
 335 |     CBin =
 336 |       case C > 3 of
 337 |         true -> <<255, C:32>>;
 338 |         false -> <<C>>
 339 |       end,
 340 |     encode_pid(Name, NS, CBin).
 341 | 
 342 | encode_pid(Name, NS, C) ->
 343 |     NameEnc = encode_bin_elems(Name),
 344 |     <<?pid, NameEnc/binary, NS/binary, C/binary>>.
 345 | 
 346 | encode_port(P) ->
 347 |     case term_to_binary(P) of
 348 |       <<131,120,119,ALen:8,Name:ALen/binary,N:64,C:32>> ->
 349 |         case N bsr 28 of
 350 |           0 -> encode_port_new(Name, <<N:32>>, C);
 351 |           _ ->
 352 |             %% N was limited to 28 bits previously, meaning the initial byte
 353 |             %% in its binary was =< 15. We therefore prefix the 8-byte N with
 354 |             %% a byte with value 16 to signal the V4 format, and to ensure V4
 355 |             %% formats sort consistently with the previous format. In this
 356 |             %% case we don't need to try shortening the C(reation) field.
 357 |             encode_port(Name, <<16,N:64>>, <<C:32>>)
 358 |         end;
 359 |       <<131,89,100,ALen:16,Name:ALen/binary,N:32,C:32>> ->
 360 |         0 = N bsr 28, % assert
 361 |         encode_port_new(Name, <<N:32>>, C);
 362 |       <<131,102,100,ALen:16,Name:ALen/binary,N:32,C:8>> ->
 363 |         0 = N bsr 28, % assert
 364 |         true = C =< 3,
 365 |         encode_port(Name, <<N:32>>, <<C>>)
 366 |     end.
 367 | 
 368 | encode_port_new(Name, N, C) ->
 369 |     CBin =
 370 |       case C > 3 of
 371 |         true -> <<255, C:32>>;
 372 |         false -> <<C>>
 373 |       end,
 374 |     encode_port(Name, N, CBin).
 375 | 
 376 | encode_port(Name, N, C) ->
 377 |     NameEnc = encode_bin_elems(Name),
 378 |     <<?port, NameEnc/binary, N/binary, C/binary>>.
 379 | 
 380 | encode_ref(R) ->
 381 |     case term_to_binary(R) of
 382 |       <<131,90,_Len:16,119,NLen:8,Name:NLen/binary,C:32,Rest/binary>> ->
 383 |         encode_ref_newer(Name, C, Rest);
 384 |       <<131,90,_Len:16,100,NLen:16,Name:NLen/binary,C:32,Rest/binary>> ->
 385 |         encode_ref_newer(Name, C, Rest);
 386 |       <<131,114,_Len:16,100,NLen:16,Name:NLen/binary,C:8,Rest/binary>> ->
 387 |         true = C =< 3,
 388 |         encode_ref(Name, <<C, Rest/binary>>)
 389 |     end.
 390 | 
 391 | encode_ref_newer(Name, C, Rest) ->
 392 |     NewRest =
 393 |       case C > 3 of
 394 |         true -> <<255, C:32, Rest/binary>>;
 395 |         false -> <<C, Rest/binary>>
 396 |       end,
 397 |     encode_ref(Name, NewRest).
 398 | 
 399 | encode_ref(Name, Rest) ->
 400 |     NameEnc = encode_bin_elems(Name),
 401 |     RestEnc = encode_bin_elems(Rest),
 402 |     <<?reference, NameEnc/binary, RestEnc/binary>>.
 403 | 
 404 | encode_atom(A) ->
 405 |     Bin = list_to_binary(atom_to_list(A)),
 406 |     Enc = encode_bin_elems(Bin),
 407 |     <<?atom, Enc/binary>>.
 408 | 
 409 | encode_number(N) ->
 410 |     encode_number(N, false).
 411 | 
 412 | encode_number(N, Legacy) when is_integer(N) ->
 413 |     encode_int(N, none, Legacy);
 414 | encode_number(F, _Legacy) when is_float(F) ->
 415 |     encode_float(F).
 416 | 
 417 | %%
 418 | %% IEEE 764 Binary 64 standard representation
 419 | %% http://en.wikipedia.org/wiki/Double_precision_floating-point_format
 420 | %%
 421 | %% |12345678 12345678 12345678 12345678 12345678 12345678 12345678 12345678
 422 | %% |iEEEEEEE EEEEffff ffffffff ffffffff ffffffff ffffffff ffffffff ffffffff|
 423 | %%
 424 | %% i: sign bit
 425 | %% E: Exponent, 11 bits
 426 | %% f: fraction, 52 bits
 427 | %%
 428 | %% We perform the following operations:
 429 | %% - if E < 1023 (see Exponent bias), the integer part is 0
 430 | %%
 431 | encode_float(F) ->
 432 |     <<Sign:1, Exp0:11, Frac:52>> = <<F/float>>,
 433 |     ?dbg("F = ~p | Exp0 = ~p | Frac = ~p~n", [cF, Exp0, Frac]),
 434 |     {Int0, Fraction} =
 435 |         case Exp0 - 1023 of
 436 |             NegExp when NegExp < 0 ->
 437 |                 Offs = -NegExp,
 438 |                 ?dbg("NegExp = ~p, Offs = ~p~n"
 439 |                      "Frac = ~p~n", [NegExp, Offs, Frac]),
 440 |                 {0, << 0:Offs, 1:1,Frac:52 >>};
 441 |             Exp1 ->
 442 |                 ?dbg("Exp1 = ~p~n", [Exp1]),
 443 |                 if Exp1 >= 52 ->
 444 |                         %% Decimal part will be zero
 445 |                         {trunc(F), <<0:52>>};
 446 |                    true ->
 447 |                         R = 52-Exp1,
 448 |                         ?dbg("R = ~p~n", [R]),
 449 |                         Exp2 = Exp1 + 1,        % add the leading 1-bit
 450 |                         ?dbg("Exp2 = ~p~n", [Exp2]),
 451 |                         <<I:Exp2, Frac1:R>> = <<1:1, Frac:52>>,
 452 |                         ?dbg("I = ~p, Frac1 = ~p~n", [I,Frac1]),
 453 |                         {I, <<Frac1:R>>}
 454 |                 end
 455 |         end,
 456 |     if Sign == 1 ->
 457 |             %% explicitly encode a negative int, since Int0 can be zero.
 458 |             Int = if Int0 >= 0 -> -Int0;
 459 |                      true -> Int0
 460 |                   end,
 461 |             encode_neg_int(Int, Fraction);
 462 |        Sign == 0 ->
 463 |             encode_int(Int0, Fraction)
 464 |     end.
 465 | 
 466 | encode_neg_int(Int, Fraction)->
 467 |     encode_neg_int(Int, Fraction,false).
 468 | encode_int(I, R) ->
 469 |     encode_int(I, R, false).
 470 | 
 471 | encode_int(I,R, _Legacy) when I >= 0, I =< 16#7fffffff ->
 472 |     ?dbg("encode_int(~p, ~p)~n", [I,R]),
 473 |     if R == none ->
 474 |             << ?pos4, I:31, 0:1 >>;
 475 |        true ->
 476 |             RSz = bit_size(R),
 477 |             <<Fraction:RSz>> = R,
 478 |             ?dbg("Fraction = ~p~n", [Fraction]),
 479 |             if Fraction == 0 ->
 480 |                     << ?pos4, I:31, 1:1, 8:8 >>;
 481 |                true ->
 482 |                     Rbits = encode_bits_elems(R),
 483 |                     << ?pos4, I:31, 1:1, Rbits/binary >>
 484 |                end
 485 |     end;
 486 | encode_int(I,R, Legacy) when I > 16#7fffffff ->
 487 |     ?dbg("encode_int(~p, ~p)~n", [I,R]),
 488 |     Bytes = encode_big(I, Legacy),
 489 |     if R == none ->
 490 |             <<?posbig, Bytes/binary, 0:8>>;
 491 |        true ->
 492 |             RSz = bit_size(R),
 493 |             <<Fraction:RSz>> = R,
 494 |             ?dbg("Fraction = ~p~n", [Fraction]),
 495 |             if Fraction == 0 ->
 496 |                     << ?posbig, Bytes/binary, 1:8, 8:8 >>;
 497 |                true ->
 498 |                     Rbits = encode_bits_elems(R),
 499 |                     <<?posbig, Bytes/binary, 1:8, Rbits/binary>>
 500 |             end
 501 |     end;
 502 | encode_int(I, R,  Legacy) when I < 0 ->
 503 |     encode_neg_int(I, R,Legacy).
 504 | 
 505 | encode_neg_int(I,R,_Legacy) when I =< 0, I >= -16#7fffffff ->
 506 |     ?dbg("encode_neg_int(~p, ~p [sz: ~p])~n", [I,pp(R), try bit_size(R) catch error:_ -> "***" end]),
 507 |     Adj = max_value(31) + I,    % keep in mind that I < 0
 508 |     ?dbg("Adj = ~p~n", [erlang:integer_to_list(Adj,2)]),
 509 |     if R == none ->
 510 |             << ?neg4, Adj:31, 1:1 >>;
 511 |        true ->
 512 |             Rbits = encode_neg_bits(R),
 513 |             ?dbg("R = ~p -> RBits = ~p~n", [pp(R), pp(Rbits)]),
 514 |             << ?neg4, Adj:31, 0:1, Rbits/binary >>
 515 |     end;
 516 | encode_neg_int(I,R,Legacy) when I < -16#7fFFffFF ->
 517 |     ?dbg("encode_neg_int(BIG ~p)~n", [I]),
 518 |     Bytes = encode_big_neg(I,Legacy),
 519 |     ?dbg("Bytes = ~p~n", [Bytes]),
 520 |     if R == none ->
 521 |             <<?negbig, Bytes/binary, 16#ff:8>>;
 522 |        true ->
 523 |             Rbits = encode_neg_bits(R),
 524 |             ?dbg("R = ~p -> RBits = ~p~n", [pp(R), pp(Rbits)]),
 525 |             <<?negbig, Bytes/binary, 0, Rbits/binary>>
 526 |     end.
 527 | 
 528 | encode_big(I, Legacy) ->
 529 |     Bl = encode_big1(I),
 530 |     ?dbg("Bl = ~p~n", [Bl]),
 531 |     Bb = case Legacy of
 532 |              false ->
 533 |                  prepend_size(list_to_binary(Bl));
 534 |              true ->
 535 |                  list_to_binary(Bl)
 536 |          end,
 537 |     ?dbg("Bb = ~p~n", [Bb]),
 538 |     encode_bin_elems(Bb).
 539 | 
 540 | prepend_size(B) ->
 541 |     Sz = byte_size(B),
 542 |     <<255, (encode_size(Sz))/binary, B/binary>>.
 543 | 
 544 | remove_size_bits(<<255, T/binary>>) ->
 545 |     {_, Rest} = untag_7bits(T, <<>>),
 546 |     Rest;
 547 | remove_size_bits(B) ->
 548 |     %% legacy bignum
 549 |     B.
 550 | 
 551 | encode_size(I) when I > 127 ->
 552 |     B = int_to_binary(I),
 553 |     tag_7bits(B);
 554 | encode_size(I) ->
 555 |     <<I>>.
 556 | 
 557 | tag_7bits(B) when bit_size(B) > 7 ->
 558 |     <<H:7, T/bitstring>> = B,
 559 |     <<1:1, H:7, (tag_7bits(T))/binary>>;
 560 | tag_7bits(B) ->
 561 |     Sz = bit_size(B),
 562 |     <<I:Sz>> = B,
 563 |     <<0:1, I:7>>.
 564 | 
 565 | untag_7bits(<<1:1, H:7, T/binary>>, Acc) ->
 566 |     untag_7bits(T, <<Acc/bitstring, H:7>>);
 567 | untag_7bits(<<0:1, H:7, T/binary>>, Acc) ->
 568 |     AccBits = bit_size(Acc),
 569 |     HBits = 8 - (AccBits rem 8),
 570 |     {<<Acc/bitstring, H:HBits>>, T}.
 571 | 
 572 | int_to_binary(I) when I =< 16#ff -> <<I:8>>;
 573 | int_to_binary(I) when I =< 16#ffff -> <<I:16>>;
 574 | int_to_binary(I) when I =< 16#ffffff -> <<I:24>>;
 575 | int_to_binary(I) when I =< 16#ffffffff -> <<I:32>>;
 576 | int_to_binary(I) when I =< 16#ffffffffff -> <<I:40>>;
 577 | int_to_binary(I) when I =< 16#ffffffffffff -> <<I:48>>;
 578 | int_to_binary(I) when I =< 16#ffffffffffffff -> <<I:56>>;
 579 | int_to_binary(I) when I =< 16#ffffffffffffffff -> <<I:64>>;
 580 | int_to_binary(I) ->
 581 |     %% Realm of the ridiculous
 582 |     list_to_binary(
 583 |       lists:dropwhile(fun(X) -> X==0 end, binary_to_list(<<I:256>>))).
 584 | 
 585 | %% This function exists for documentation, but not used right now.
 586 | %% It's the reverse of encode_size/1, used for encoding bignums.
 587 | %%
 588 | %% decode_size(<<1:1, _/bitstring>> = T) ->
 589 | %%     {SzBin, Rest} = untag_7bits(T, <<>>),
 590 | %%     Bits = bit_size(SzBin),
 591 | %%     <<Sz:Bits>> = SzBin,
 592 | %%     {Sz, Rest};
 593 | %% decode_size(<<0:1, H:7, T/binary>>) ->
 594 | %%     {H, T}.
 595 | 
 596 | encode_big_neg(I,Legacy) ->
 597 |     {Words, Max} = get_max(-I),
 598 |     ?dbg("Words = ~p | Max = ~p~n", [Words,Max]),
 599 |     Iadj = Max + I,             % keep in mind that I < 0
 600 |     ?dbg("IAdj = ~p~n", [Iadj]),
 601 |     Bin = encode_big(Iadj,Legacy),
 602 |     ?dbg("Bin = ~p~n", [Bin]),
 603 |     WordsAdj = 16#ffffFFFF - Words,
 604 |     ?dbg("WordsAdj = ~p~n", [WordsAdj]),
 605 |     <<WordsAdj:32, Bin/binary>>.
 606 | 
 607 | encode_big1(I) ->
 608 |     encode_big1(I, []).
 609 | 
 610 | encode_big1(I, Acc) when I < 16#ff ->
 611 |     [I|Acc];
 612 | encode_big1(I, Acc) ->
 613 |     encode_big1(I bsr 8, [I band 16#ff | Acc]).
 614 | 
 615 | encode_list_elems([], Acc, _) ->
 616 |     <<Acc/binary, 2>>;
 617 | encode_list_elems(B, Acc, Legacy) when is_bitstring(B) ->
 618 |     %% improper list
 619 |     <<Acc/binary, ?bin_tail, (encode(B, Legacy))/binary>>;
 620 | encode_list_elems(E, Acc, Legacy) when not(is_list(E)) ->
 621 |     %% improper list
 622 |     <<Acc/binary, 1, (encode(E, Legacy))/binary>>;
 623 | encode_list_elems([H|T], Acc, Legacy) ->
 624 |     Enc = encode(H,Legacy),
 625 |     encode_list_elems(T, <<Acc/binary, Enc/binary>>, Legacy).
 626 | 
 627 | prefix_list_elems([], Acc) ->
 628 |     {false, <<Acc/binary, 2>>};
 629 | prefix_list_elems(E, Acc) when not(is_list(E)) ->
 630 |     case is_wild(E) of
 631 |         true ->
 632 |             {true, Acc};
 633 |         false ->
 634 |             Marker = if is_bitstring(E) -> ?bin_tail;
 635 |                         true -> 1
 636 |                      end,
 637 |             {Bool, P} = enc_prefix(E),
 638 |             {Bool, <<Acc/binary, Marker, P/binary>>}
 639 |     end;
 640 | prefix_list_elems([H|T], Acc) ->
 641 |     case enc_prefix(H) of
 642 |         {true, P} ->
 643 |             {true, <<Acc/binary, P/binary>>};
 644 |         {false, E} ->
 645 |             prefix_list_elems(T, <<Acc/binary, E/binary>>)
 646 |     end.
 647 | 
 648 | is_wild('_') ->
 649 |     true;
 650 | is_wild(A) when is_atom(A) ->
 651 |     case atom_to_list(A) of
 652 |         "\$" ++ S ->
 653 |             try begin
 654 |                     _ = list_to_integer(S),
 655 |                     true
 656 |                 end
 657 |             catch
 658 |                 error:_ ->
 659 |                     false
 660 |             end;
 661 |         _ ->
 662 |             false
 663 |     end;
 664 | is_wild(_) ->
 665 |     false.
 666 | 
 667 | encode_bin_elems(<<>>) ->
 668 |     <<8>>;
 669 | encode_bin_elems(B) ->
 670 |     Pad = 8 - (size(B) rem 8),
 671 |     << (<< <<1:1, B1:8>> || <<B1>> <= B >>)/bitstring, 0:Pad, 8 >>.
 672 | 
 673 | encode_neg_bits(<<>>) ->
 674 |     <<247>>;
 675 | encode_neg_bits(B) ->
 676 |     {Padded, TailBits} = pad_neg_bytes(B),
 677 |     ?dbg("TailBits = ~p~n", [TailBits]),
 678 |     TailSz0 = bit_size(TailBits),
 679 |     TailSz = 16#ff - TailSz0,
 680 |     if TailSz0 == 0 ->
 681 |             Pad = 8 - (bit_size(Padded) rem 8),
 682 |             Ip = max_value(Pad), % e.g. max_value(3) -> 2#111
 683 |             <<Padded/bitstring, Ip:Pad, TailSz:8>>;
 684 |        true ->
 685 |             ?dbg("TailSz0 = ~p~n", [TailSz0]),
 686 |             TailPad = 8 - TailSz0,
 687 |             ?dbg("TailPad = ~p~n", [TailPad]),
 688 |             Itp = (1 bsl TailPad)-1,
 689 |             ?dbg("Itp = ~p~n", [Itp]),
 690 |             Pad = 8 - ((bit_size(Padded) + 1) rem 8),
 691 |             ?dbg("Pad = ~p~n", [Pad]),
 692 |             Ip = max_value(Pad),
 693 |             ?dbg("Ip = ~p~n", [Ip]),
 694 |             ?dbg("Pad = ~p~n", [Pad]),
 695 |             ?dbg("TailSz = ~p~n", [TailSz]),
 696 |             <<Padded/bitstring, 0:1, TailBits/bitstring,
 697 |              Itp:TailPad, Ip:Pad, TailSz:8>>
 698 |     end.
 699 | 
 700 | pad_neg_bytes(Bin) ->
 701 |     pad_neg_bytes(Bin, <<>>).
 702 | 
 703 | pad_neg_bytes(<<H:8, T/bitstring>>, Acc) ->
 704 |     H1 = 16#ff - H,
 705 |     pad_neg_bytes(T, <<Acc/bitstring, 0:1, H1>>);
 706 | pad_neg_bytes(Bits, Acc) when is_bitstring(Bits) ->
 707 |     Sz = bit_size(Bits),
 708 |     Max = (1 bsl Sz) - 1,
 709 |     <<I0:Sz>> = Bits,
 710 |     I1 = Max - I0,
 711 |     {Acc, <<I1:Sz>>}.
 712 | 
 713 | encode_bits_elems(B) ->
 714 |     {Padded, TailBits} = pad_bytes(B),
 715 |     TailSz = bit_size(TailBits),
 716 |     TailPad = 8-TailSz,
 717 |     Pad = 8 - ((TailSz + TailPad + bit_size(Padded) + 1) rem 8),
 718 |     <<Padded/bitstring, 1:1, TailBits/bitstring, 0:TailPad, 0:Pad, TailSz:8>>.
 719 | 
 720 | pad_bytes(Bin) ->
 721 |     pad_bytes(Bin, <<>>).
 722 | 
 723 | pad_bytes(<<H:8, T/bitstring>>, Acc) ->
 724 |     pad_bytes(T, <<Acc/bitstring, 1:1, H>>);
 725 | pad_bytes(Bits, Acc) when is_bitstring(Bits) ->
 726 |     {Acc, Bits}.
 727 | 
 728 | 
 729 | %% ------------------------------------------------------
 730 | %% Decoding routines
 731 | 
 732 | -spec decode_next(binary()) -> {any(), binary()}.
 733 | %% @spec decode_next(Bin) -> {N, Rest}
 734 | %% @doc Decode a binary stream, returning the next decoded term and the
 735 | %% stream remainder
 736 | %%
 737 | %% This function will raise an exception if the beginning of `Bin' is not
 738 | %% a valid sext-encoded term.
 739 | %% @end
 740 | decode_next(<<?rev_sext,Rest/binary>>) -> decode_rev_sext(Rest);
 741 | decode_next(<<?atom,Rest/binary>>) -> decode_atom(Rest);
 742 | decode_next(<<?pid, Rest/binary>>) -> decode_pid(Rest);
 743 | decode_next(<<?port, Rest/binary>>) -> decode_port(Rest);
 744 | decode_next(<<?reference,Rest/binary>>) -> decode_ref(Rest);
 745 | decode_next(<<?tuple,Sz:32, Rest/binary>>) -> decode_tuple(Sz,Rest);
 746 | %% decode_next(<<?nil, Rest/binary>>) -> {[], Rest};
 747 | %% decode_next(<<?old_list, Rest/binary>>) -> decode_list(Rest);
 748 | decode_next(<<?list, 1, Rest/binary>>) -> decode_map(Rest);
 749 | decode_next(<<?list, Rest/binary>>) -> decode_list(Rest);
 750 | decode_next(<<?negbig, Rest/binary>>) -> decode_neg_big(Rest);
 751 | decode_next(<<?posbig, Rest/binary>>) -> decode_pos_big(Rest);
 752 | decode_next(<<?neg4, I:31, F:1, Rest/binary>>) -> decode_neg(I,F,Rest);
 753 | decode_next(<<?pos4, I:31, F:1, Rest/binary>>) -> decode_pos(I,F,Rest);
 754 | decode_next(<<?binary, Rest/binary>>) -> decode_binary(Rest).
 755 | 
 756 | -spec partial_decode(binary()) -> {full | partial, any(), binary()}.
 757 | %% @spec partial_decode(Bytes) -> {full | partial, DecodedTerm, Rest}
 758 | %% @doc Decode a sext-encoded term or prefix embedded in a byte stream.
 759 | %%
 760 | %% Example:
 761 | %% ```
 762 | %% 1&gt; T = sext:encode({a,b,c}).
 763 | %% &lt;&lt;16,0,0,0,3,12,176,128,8,12,177,0,8,12,177,128,8&gt;&gt;
 764 | %% 2&gt; sext:partial_decode(&lt;&lt;T/binary, "tail"&gt;&gt;).
 765 | %% {full,{a,b,c},&lt;&lt;"tail"&gt;&gt;}
 766 | %% 3&gt; P = sext:prefix({a,b,'_'}).
 767 | %% &lt;&lt;16,0,0,0,3,12,176,128,8,12,177,0,8&gt;&gt;
 768 | %% 4&gt; sext:partial_decode(&lt;&lt;P/binary, "tail"&gt;&gt;).
 769 | %% {partial,{a,b,'_'},&lt;&lt;"tail"&gt;&gt;}
 770 | %% '''
 771 | %%
 772 | %% Note that a decoded prefix may not be exactly like the encoded prefix.
 773 | %% For example, <code>['_']</code> will be encoded as
 774 | %% <code>&lt;&lt;17&gt;&gt;</code>, i.e. only the 'list' opcode. The
 775 | %% decoded prefix will be <code>'_'</code>, since the encoded prefix would
 776 | %% also match the empty list. The decoded prefix will always be a prefix to
 777 | %% anything to which the original prefix is a prefix.
 778 | %%
 779 | %% For tuples, <code>{1,'_',3}</code> encoded and decoded, will result in
 780 | %% <code>{1,'_','_'}</code>, i.e. the tuple size is kept, but the elements
 781 | %% after the first wildcard are replaced with wildcards.
 782 | %% @end
 783 | partial_decode(<<?tuple, Sz:32, Rest/binary>>) ->
 784 |     partial_decode_tuple(Sz, Rest);
 785 | partial_decode(<<?list, Rest/binary>>) ->
 786 |     partial_decode_list(Rest);
 787 | partial_decode(Other) ->
 788 |     try decode_next(Other) of
 789 |         {Dec, Rest} ->
 790 |             {full, Dec, Rest}
 791 |     catch
 792 |         error:function_clause ->
 793 |             {partial, '_', Other}
 794 |     end.
 795 | 
 796 | decode_rev_sext(B) ->
 797 |     decode_neg_binary(B).
 798 | 
 799 | decode_atom(B) ->
 800 |     {Bin, Rest} = decode_binary(B),
 801 |     {list_to_atom(binary_to_list(Bin)), Rest}.
 802 | 
 803 | decode_tuple(Sz, Elems) ->
 804 |     decode_tuple(Sz,Elems,[]).
 805 | 
 806 | decode_tuple(0, Rest, Acc) ->
 807 |     {list_to_tuple(lists:reverse(Acc)), Rest};
 808 | decode_tuple(N, Elems, Acc) ->
 809 |     {Term, Rest} = decode_next(Elems),
 810 |     decode_tuple(N-1, Rest, [Term|Acc]).
 811 | 
 812 | partial_decode_tuple(Sz, Elems) ->
 813 |     partial_decode_tuple(Sz, Elems, []).
 814 | 
 815 | partial_decode_tuple(0, Rest, Acc) ->
 816 |     {full, list_to_tuple(lists:reverse(Acc)), Rest};
 817 | partial_decode_tuple(N, Elems, Acc) ->
 818 |     case partial_decode(Elems) of
 819 |         {partial, Term, Rest} ->
 820 |             {partial, list_to_tuple(
 821 |                         lists:reverse([Term|Acc]) ++ pad_(N-1)), Rest};
 822 |         {full, Dec, Rest} ->
 823 |             partial_decode_tuple(N-1, Rest, [Dec|Acc])
 824 |     end.
 825 | 
 826 | pad_(0) ->
 827 |     [];
 828 | pad_(N) when N > 0 ->
 829 |     ['_'|pad_(N-1)].
 830 | 
 831 | partial_decode_list(Elems) ->
 832 |     partial_decode_list(Elems, []).
 833 | 
 834 | partial_decode_list(<<>>, Acc) ->
 835 |     {partial, lists:reverse(Acc) ++ '_', <<>>};
 836 | partial_decode_list(<<2, Rest/binary>>, Acc) ->
 837 |     {full, lists:reverse(Acc), Rest};
 838 | partial_decode_list(<<?bin_tail, Next/binary>>, Acc) ->
 839 |     %% improper list, binary tail
 840 |     {Term, Rest} = decode_next(Next),
 841 |     {full, lists:reverse(Acc) ++ Term, Rest};
 842 | partial_decode_list(<<1, Next/binary>>, Acc) ->
 843 |     {Result, Term, Rest} = partial_decode(Next),
 844 |     {Result, lists:reverse(Acc) ++ Term, Rest};
 845 | partial_decode_list(<<X,_/binary>> = Next, Acc) when ?is_sext(X) ->
 846 |     case partial_decode(Next) of
 847 |         {full, Term, Rest} ->
 848 |             partial_decode_list(Rest, [Term|Acc]);
 849 |         {partial, Term, Rest} ->
 850 |             {partial, lists:reverse([Term|Acc]) ++ '_', Rest}
 851 |     end;
 852 | partial_decode_list(Rest, Acc) ->
 853 |     {partial, lists:reverse(Acc) ++ '_', Rest}.
 854 | 
 855 | decode_map(<<Sz:32, Rest/binary>>) ->
 856 |     decode_map(Sz, Rest, #{}).
 857 | 
 858 | decode_map(0, Rest, M) ->
 859 |     {M, Rest};
 860 | decode_map(N, Bin, M) ->
 861 |     {K, Bin1} = decode_next(Bin),
 862 |     {V, Bin2} = decode_next(Bin1),
 863 |     decode_map(N-1, Bin2, maps:put(K, V, M)).
 864 | 
 865 | 
 866 | decode_list(Elems) ->
 867 |     decode_list(Elems, []).
 868 | 
 869 | decode_list(<<2, Rest/binary>>, Acc) ->
 870 |     {lists:reverse(Acc), Rest};
 871 | decode_list(<<?bin_tail, Next/binary>>, Acc) ->
 872 |     %% improper list, binary tail
 873 |     {Term, Rest} = decode_next(Next),
 874 |     {lists:reverse(Acc) ++ Term, Rest};
 875 | decode_list(<<1, Next/binary>>, Acc) ->
 876 |     %% improper list, non-binary tail
 877 |     {Term, Rest} = decode_next(Next),
 878 |     {lists:reverse(Acc) ++ Term, Rest};
 879 | decode_list(Elems, Acc) ->
 880 |     {Term, Rest} = decode_next(Elems),
 881 |     decode_list(Rest, [Term|Acc]).
 882 | 
 883 | decode_pid(Bin) ->
 884 |     {Name, Rest} = decode_binary(Bin),
 885 |     NameSz = size(Name),
 886 |     case Rest of
 887 |       <<NS:8/binary, 255, C:4/binary, Rest1/binary>> ->
 888 |         {binary_to_term(<<131,88,100,NameSz:16,Name/binary,NS/binary,C/binary>>), Rest1};
 889 |       <<NS:8/binary, C:8, Rest1/binary>> ->
 890 |         true = C =< 3,
 891 |         {binary_to_term(<<131,103,100,NameSz:16,Name/binary,NS/binary,C>>), Rest1}
 892 |     end.
 893 | 
 894 | decode_port(Bin) ->
 895 |     {Name, Rest} = decode_binary(Bin),
 896 |     NameSz = size(Name),
 897 |     case Rest of
 898 |       <<16, N:8/binary, 255, C:4/binary, Rest1/binary>> ->
 899 |         {binary_to_term(<<131,120,100,NameSz:16,Name/binary,N/binary,C/binary>>), Rest1};
 900 |       <<N:4/binary, 255, C:4/binary, Rest1/binary>> ->
 901 |         {binary_to_term(<<131,89,100,NameSz:16,Name/binary,N/binary,C/binary>>), Rest1};
 902 |       <<N:4/binary, C:8, Rest1/binary>> ->
 903 |         true = C =< 3,
 904 |         {binary_to_term(<<131,102,100,NameSz:16,Name/binary,N/binary,C>>), Rest1}
 905 |     end.
 906 | 
 907 | decode_ref(Bin) ->
 908 |     {Name, Rest} = decode_binary(Bin),
 909 |     {Tail, Rest1} = decode_binary(Rest),
 910 |     NLen = size(Name),
 911 |     case Tail of
 912 |       <<255, C:4/binary, Tail1/binary>> ->
 913 |         Len = size(Tail1) div 4,
 914 |         RefBin = <<131,90,Len:16,100,NLen:16,Name/binary,C/binary,Tail1/binary>>,
 915 |         {binary_to_term(RefBin), Rest1};
 916 |       <<C:8, Tail1/binary>> ->
 917 |         true = C =< 3,
 918 |         Len = size(Tail1) div 4,
 919 |         RefBin = <<131,114,Len:16,100,NLen:16,Name/binary,C,Tail1/binary>>,
 920 |         {binary_to_term(RefBin), Rest1}
 921 |     end.
 922 | 
 923 | decode_neg(I, 1, Rest) ->
 924 |     {(I - 16#7fffFFFF), Rest};
 925 | decode_neg(I0, 0, Bin) ->  % for negative numbers, 0 means that it's a float
 926 |     I = 16#7fffFFFF - I0,
 927 |     ?dbg("decode_neg()... I = ~p | Bin = ~p~n", [I, Bin]),
 928 |     decode_neg_float(I, Bin).
 929 | 
 930 | decode_neg_float(0, Bin) ->
 931 |     {R, Rest} = decode_neg_binary(Bin),
 932 |     ?dbg("Bin = ~p~n", [pp(Bin)]),
 933 |     ?dbg("R = ~p | Rest = ~p~n", [pp(R), Rest]),
 934 |     Sz = bit_size(R),
 935 |     Offs = Sz - 53,
 936 |     ?dbg("Offs = ~p | Sz - ~p~n", [Offs, Sz]),
 937 |     <<_:Offs, 1:1, I:52>> = R,
 938 |     Exp = 1023 - Offs,
 939 |     <<F/float>> = <<1:1, Exp:11, I:52>>,
 940 |     {F, Rest};
 941 | decode_neg_float(I, Bin) ->
 942 |     {R, Rest} = decode_neg_binary(Bin),
 943 |     ?dbg("decode_neg_float: I = ~p | R = ~p~n", [I, R]),
 944 |     Sz = bit_size(R),
 945 |     ?dbg("Sz = ~p~n", [Sz]),
 946 |     <<Ri:Sz>> = R,
 947 |     ?dbg("Ri = ~p~n", [Ri]),
 948 |     if Ri == 0 ->
 949 |             %% special case
 950 |             {0.0-I, Rest};
 951 |        true ->
 952 |             IBits = strip_first_one(I),
 953 |             ?dbg("IBits = ~p~n", [pp(IBits)]),
 954 |             Bits = <<IBits/bitstring, Ri:Sz>>,
 955 |             ?dbg("Bits = ~p (Sz: ~p)~n", [pp(Bits), bit_size(Bits)]),
 956 |             Exp = bit_size(IBits) + 1023,
 957 |             ?dbg("Exp = ~p~n", [Exp]),
 958 |             <<Frac:52, _/bitstring>> = <<Bits/bitstring, 0:52>>,
 959 |             ?dbg("Frac = ~p~n", [Frac]),
 960 |             <<F/float>> = <<1:1, Exp:11, Frac:52>>,
 961 |             {F, Rest}
 962 |     end.
 963 | 
 964 | decode_pos(I, 0, Rest) ->
 965 |     {I, Rest};
 966 | decode_pos(0, 1, Bin) ->
 967 |     {Real, Rest} = decode_binary(Bin),
 968 |     Offs = bit_size(Real) - 53,
 969 |     <<0:Offs, 1:1, Frac:52>> = Real,
 970 |     Exp = 1023 - Offs,
 971 |     <<F/float>> = <<0:1, Exp:11, Frac:52>>,
 972 |     {F, Rest};
 973 | decode_pos(I, 1, Bin) ->        % float > 1
 974 |     ?dbg("decode_pos(~p, 1, ~p)~n", [I, Bin]),
 975 |     {Real, Rest} = decode_binary(Bin),
 976 |     case decode_binary(Bin) of
 977 |         {<<>>, Rest} ->
 978 |             <<F/float>> = <<I/float>>,
 979 |             {F, Rest};
 980 |         {Real, Rest} ->
 981 |             ?dbg("Real = ~p~n", [Real]),
 982 |             Exp = 52 - bit_size(Real) + 1023,
 983 |             ?dbg("Exp = ~p~n", [Exp]),
 984 |             Bits0 = <<I:31, Real/bitstring>>,
 985 |             ?dbg("Bits0 = ~p~n", [Bits0]),
 986 |             Bits = strip_one(Bits0),
 987 |             <<Frac:52>> = Bits,
 988 |             <<F/float>> = <<0:1, Exp:11, Frac:52>>,
 989 |             {F, Rest}
 990 |     end.
 991 | 
 992 | decode_pos_big(Bin) ->
 993 |     ?dbg("decode_pos_big(~p)~n", [Bin]),
 994 |     {Ib0, Rest} = decode_binary(Bin),
 995 |     Ib = remove_size_bits(Ib0),
 996 |     ?dbg("Ib = ~p~n", [Ib]),
 997 |     ISz = size(Ib) * 8,
 998 |     ?dbg("ISz = ~p~n", [ISz]),
 999 |     <<I:ISz>> = Ib,
1000 |     ?dbg("I = ~p~n", [I]),
1001 |     <<F:8, Rest1/binary>> = Rest,
1002 |     ?dbg("Rest1 = ~p~n", [Rest1]),
1003 |     decode_pos(I, F, Rest1).
1004 | 
1005 | decode_neg_big(Bin) ->
1006 |     ?dbg("decode_neg_big(~p)~n", [Bin]),
1007 |     <<WordsAdj:32, Rest/binary>> = Bin,
1008 |     Words = 16#ffffFFFF - WordsAdj,
1009 |     ?dbg("Words = ~p~n", [Words]),
1010 |     {Ib0, Rest1} = decode_binary(Rest),
1011 |     Ib = remove_size_bits(Ib0),
1012 |     ?dbg("Ib = ~p | Rest1 = ~p~n", [Ib, Rest1]),
1013 |     ISz = size(Ib) * 8,
1014 |     <<I0:ISz>> = Ib,
1015 |     ?dbg("I0 = ~p~n", [I0]),
1016 |     Max = imax(Words),
1017 |     ?dbg("Max = ~p~n", [Max]),
1018 |     I = Max - I0,
1019 |     ?dbg("I = ~p~n", [I]),
1020 |     <<F:8, Rest2/binary>> = Rest1,
1021 |     ?dbg("F = ~p | Rest2 = ~p~n", [F, Rest2]),
1022 |     if F == 0 ->
1023 |             decode_neg_float(I, Rest2);
1024 |        F == 16#ff ->
1025 |             {-I, Rest2}
1026 |     end.
1027 | 
1028 | %% optimization - no need to loop through a very large number of zeros.
1029 | strip_first_one(I) ->
1030 |     Sz = if I < 16#ff -> 8;
1031 |             I < 16#ffff -> 16;
1032 |             I < 16#ffffff -> 24;
1033 |             I < 16#ffffffff -> 32;
1034 |             true -> 52
1035 |          end,
1036 |     strip_one(<<I:Sz>>).
1037 | 
1038 | strip_one(<<0:1, Rest/bitstring>>) -> strip_one(Rest);
1039 | strip_one(<<1:1, Rest/bitstring>>) -> Rest.
1040 | 
1041 | 
1042 | decode_binary(<<8, Rest/binary>>) ->  {<<>>, Rest};
1043 | decode_binary(B)     ->  decode_binary(B, 0, <<>>).
1044 | 
1045 | decode_binary(<<1:1,H:8,Rest/bitstring>>, N, Acc) ->
1046 |     case Rest of
1047 |         <<1:1,_/bitstring>> ->
1048 |             decode_binary(Rest, N+9, << Acc/binary, H >>);
1049 |         _ ->
1050 |             Pad = 8 - ((N+9) rem 8),
1051 |             <<0:Pad,EndBits,Rest1/binary>> = Rest,
1052 |             TailPad = 8-EndBits,
1053 |             <<Tail:EndBits,0:TailPad>> = <<H>>,
1054 |             {<< Acc/binary, Tail:EndBits >>, Rest1}
1055 |     end.
1056 | 
1057 | decode_neg_binary(<<247, Rest/binary>>) ->  {<<>>, Rest};  % 16#ff - 8
1058 | decode_neg_binary(B)     ->  decode_neg_binary(B, 0, <<>>).
1059 | 
1060 | decode_neg_binary(<<0:1,H:8,Rest/bitstring>>, N, Acc) ->
1061 |     case Rest of
1062 |         <<0:1,_/bitstring>> ->
1063 |             decode_neg_binary(Rest, N+9, << Acc/binary, (16#ff - H) >>);
1064 |         _ ->
1065 |             Pad = 8 - ((N+9) rem 8),
1066 |             ?dbg("Pad = ~p~n", [Pad]),
1067 |             IPad = (1 bsl Pad) - 1,
1068 |             <<IPad:Pad,EndBits0,Rest1/binary>> = Rest,
1069 |             ?dbg("EndBits0 = ~p~n", [EndBits0]),
1070 |             EndBits = 16#ff - EndBits0,
1071 |             ?dbg("EndBits = ~p~n", [EndBits]),
1072 |             if EndBits == 0 ->
1073 |                     {<< Acc/binary, (16#ff - H)>>, Rest1};
1074 |                true ->
1075 |                     <<Tail:EndBits,_/bitstring>> = <<(16#ff - H)>>,
1076 |                     ?dbg("Tail = ~p~n", [Tail]),
1077 |                     {<< Acc/binary, Tail:EndBits >>, Rest1}
1078 |             end
1079 |     end.
1080 | 
1081 | %% The largest value that fits in Sz bits
1082 | max_value(Sz) ->
1083 |     (1 bsl Sz) - 1.
1084 | 
1085 | %% The largest value that fits in Words*64 bits.
1086 | imax(1) -> max_value(64);
1087 | imax(2) -> max_value(128);
1088 | imax(Words) -> max_value(Words*64).
1089 | 
1090 | %% Get the smallest imax/1 value that's larger than I.
1091 | get_max(I) -> get_max(I, 1, imax(1)).
1092 | get_max(I, W, Max) when I > Max ->
1093 |     get_max(I, W+1, (Max bsl 64) bor ?IMAX1);
1094 | get_max(_, W, Max) ->
1095 |     {W, Max}.
1096 | 
1097 | %% @spec to_sb32(Bits::bitstring()) -> binary()
1098 | %% @doc Converts a bitstring into an sb-encoded bitstring
1099 | %%
1100 | %% sb32 (Sortable base32) is a variant of RFC3548, slightly rearranged to
1101 | %% preserve the lexical sorting properties. Base32 was chosen to avoid
1102 | %% filename-unfriendly characters. Also important is that the padding
1103 | %% character be less than any character in the alphabet
1104 | %%
1105 | %% sb32 alphabet:
1106 | %% <pre>
1107 | %% 0 0     6 6     12 C     18 I     24 O     30 U
1108 | %% 1 1     7 7     13 D     19 J     25 P     31 V
1109 | %% 2 2     8 8     14 E     20 K     26 Q  (pad) -
1110 | %% 3 3     9 9     15 F     21 L     27 R
1111 | %% 4 4    10 A     16 G     22 M     28 S
1112 | %% 5 5    11 B     17 H     23 N     29 T
1113 | %% </pre>
1114 | %% @end
1115 | %%
1116 | to_sb32(Bits) when is_bitstring(Bits) ->
1117 |     Sz = bit_size(Bits),
1118 |     {Chunk, Rest, Pad} =
1119 |         case Sz rem 5 of
1120 |             0 -> {Bits, <<>>, <<>>};
1121 |             R -> sb32_encode_chunks(Sz, R, Bits)
1122 |         end,
1123 |     Enc = << << (c2sb32(C1)) >> ||
1124 |               <<C1:5>> <= Chunk >>,
1125 |     if Rest == << >> ->
1126 |             Enc;
1127 |        true ->
1128 |             << Enc/bitstring, (c2sb32(Rest)):8, Pad/binary >>
1129 |     end.
1130 | 
1131 | sb32_encode_chunks(Sz, Rem, Bits) ->
1132 |     ChunkSz = Sz - Rem,
1133 |     << C:ChunkSz/bitstring, Rest:Rem >> = Bits,
1134 |     Pad = encode_pad(Rem),
1135 |     {C, Rest, Pad}.
1136 | 
1137 | encode_pad(3) -> <<"------">>;
1138 | encode_pad(1) -> <<"----">>;
1139 | encode_pad(4) -> <<"---">>;
1140 | encode_pad(2) -> <<"-">>.
1141 | 
1142 | %% @spec from_sb32(Bits::bitstring()) -> bitstring()
1143 | %% @doc Converts from an sb32-encoded bitstring into a 'normal' bitstring
1144 | %%
1145 | %% This function is the reverse of {@link to_sb32/1}.
1146 | %% @end
1147 | %%
1148 | from_sb32(<< C:8, "------" >>) -> << (sb322c(C)):3 >>;
1149 | from_sb32(<< C:8, "----" >>  ) -> << (sb322c(C)):1 >>;
1150 | from_sb32(<< C:8, "---" >>   ) -> << (sb322c(C)):4 >>;
1151 | from_sb32(<< C:8, "-" >>     ) -> << (sb322c(C)):2 >>;
1152 | from_sb32(<< C:8, Rest/bitstring >>) ->
1153 |     << (sb322c(C)):5, (from_sb32(Rest))/bitstring >>;
1154 | from_sb32(<< >>) ->
1155 |     << >>.
1156 | 
1157 | c2sb32(I) when 0  =< I, I =< 9  -> $0 + I;
1158 | c2sb32(I) when 10 =< I, I =< 31 -> $A + I - 10.
1159 | 
1160 | sb322c(I) when $0 =< I, I =< $9 -> I - $0;
1161 | sb322c(I) when $A =< I, I =< $V -> I - $A + 10.
1162 | 
1163 | %% @spec to_hex(Bin::binary()) -> binary()
1164 | %% @doc Converts a binary into a hex-encoded binary
1165 | %% This is conventional hex encoding, with the proviso that
1166 | %% only capital letters are used, e.g. `0..9A..F'.
1167 | %% @end
1168 | to_hex(Bin) ->
1169 |     << << (nib2hex(N)):8 >> || <<N:4>> <= Bin >>.
1170 | 
1171 | %% @spec from_hex(Bin::binary()) -> binary()
1172 | %% @doc Converts from a hex-encoded binary into a 'normal' binary
1173 | %%
1174 | %% This function is the reverse of {@link to_hex/1}.
1175 | %%
1176 | from_hex(Bin) ->
1177 |     << << (hex2nib(H)):4 >> || <<H:8>> <= Bin >>.
1178 | 
1179 | nib2hex(N) when  0 =< N, N =< 9 -> $0 + N;
1180 | nib2hex(N) when 10 =< N, N =< 15-> $A + N - 10.
1181 | 
1182 | hex2nib(C) when $0 =< C, C =< $9 -> C - $0;
1183 | hex2nib(C) when $A =< C, C =< $F -> C - $A + 10.
1184 | 
1185 | -ifdef(TEST).
1186 | -include_lib("eunit/include/eunit.hrl").
1187 | 
1188 | encode_test() ->
1189 |     L = test_list(),
1190 |     [{I,I} = {I,catch decode(encode(I))} || I <- L].
1191 | 
1192 | test_list() ->
1193 |     [-456453453477456464.45456,
1194 |      -5.23423564,
1195 |      -1.234234,
1196 |      -1.23423,
1197 |      -0.345,
1198 |      -0.34567,
1199 |      -0.0034567,
1200 |      0,
1201 |      0.00012345,
1202 |      0.12345,
1203 |      1.2345,
1204 |      123.45,
1205 |      456453453477456464.45456,
1206 |      a,
1207 |      aaa,
1208 |      {},
1209 |      {1},
1210 |      {1,2},
1211 |      {"","123"},
1212 |      {"1","234"},
1213 |      <<>>,
1214 |      <<1>>,
1215 |      <<1,5:3>>,
1216 |      <<1,5:4>>,
1217 |      [1,2,3],
1218 |      [],
1219 |      self(),
1220 |      spawn(fun() -> ok end),
1221 |      make_ref(),
1222 |      make_ref()|
1223 |      lists:sublist(erlang:ports(),1,2)].
1224 | 
1225 | -endif.
1226 | 


--------------------------------------------------------------------------------
/test/sext_eqc.erl:
--------------------------------------------------------------------------------
  1 | %% -*- erlang-indent-level: 4; indent-tabs-mode: nil -*-
  2 | %%==============================================================================
  3 | %% Copyright 2014-16 Ulf Wiger
  4 | %%
  5 | %% Licensed under the Apache License, Version 2.0 (the "License");
  6 | %% you may not use this file except in compliance with the License.
  7 | %% You may obtain a copy of the License at
  8 | %%
  9 | %% http://www.apache.org/licenses/LICENSE-2.0
 10 | %%
 11 | %% Unless required by applicable law or agreed to in writing, software
 12 | %% distributed under the License is distributed on an "AS IS" BASIS,
 13 | %% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 14 | %% See the License for the specific language governing permissions and
 15 | %% limitations under the License.
 16 | %%==============================================================================
 17 | 
 18 | -module(sext_eqc).
 19 | 
 20 | %% Prefer QuickCheck, but otherwise try with Proper (some properties will
 21 | %% have trouble under Proper - feel free to investigate).
 22 | -ifdef(EQC).
 23 | -undef(QC).
 24 | -define(QC,eqc).
 25 | -include_lib("eqc/include/eqc.hrl").
 26 | -else.
 27 | -ifdef(PROPER).
 28 | -undef(QC).
 29 | -define(QC,proper).
 30 | -include_lib("proper/include/proper.hrl").
 31 | -endif.
 32 | -endif.
 33 | 
 34 | -ifdef(QC).
 35 | -compile(export_all).
 36 | -include_lib("eunit/include/eunit.hrl").
 37 | 
 38 | get_n(Default) ->
 39 |     case os:getenv("SEXT_TESTS") of
 40 |         false -> Default;
 41 |         Res ->
 42 |             list_to_integer(Res)
 43 |     end.
 44 | 
 45 | sext_test_() ->
 46 |     N = get_n(500),
 47 |     {timeout, 60,
 48 |      [
 49 |       fun() -> t(run(N, prop_encode, fun prop_encode/0)) end
 50 |       , fun() -> t(run(N, prop_encode_rev, fun prop_encode_rev/0)) end
 51 |       , fun() -> t(run(N, prop_decode_legacy_big, fun prop_decode_legacy_big/0)) end
 52 |       , fun() -> t(run(N, prop_decode_legacy_neg_big, fun prop_decode_legacy_neg_big/0)) end
 53 |       , fun() -> t(run(N, prop_prefix_equiv,fun prop_prefix_equiv/0))end
 54 |       , fun() -> t(run(N, prop_sort, fun prop_sort/0)) end
 55 |       , fun() -> t(run(N, prop_revsort, fun prop_revsort/0)) end
 56 |       , fun() -> t(run(N, prop_sort_big, fun prop_sort_big/0)) end
 57 |       , fun() -> t(run(N, prop_sort_neg_big, fun prop_sort_neg_big/0)) end
 58 |       , fun() -> t(run(N, prop_revsort_neg_big, fun prop_revsort_neg_big/0)) end
 59 |       , fun() -> t(run(N, prop_encode_sb32, fun prop_encode_sb32/0)) end
 60 |       , fun() -> t(run(N, prop_sort_sb32, fun prop_sort_sb32/0)) end
 61 |       , fun() -> t(run(N, prop_partial_decode1, fun prop_partial_decode1/0)) end
 62 |       , fun() -> t(run(N, prop_partial_decode2, fun prop_partial_decode2/0)) end
 63 |       , fun() -> t(run(N, prop_partial_decode_plus1,
 64 |                        fun prop_partial_decode_plus1/0)) end
 65 |       , fun() -> t(run(N, prop_partial_decode_plus2,
 66 |                        fun prop_partial_decode_plus2/0)) end
 67 |       , fun() -> t(run(N, prop_is_prefix1, fun prop_is_prefix1/0)) end
 68 |       , fun() -> t(run(N, prop_is_prefix2, fun prop_is_prefix2/0)) end
 69 |       , fun() -> t(run(N, prop_encode_hex, fun prop_encode_hex/0)) end
 70 |       , fun() -> t(run(N, prop_sort_hex, fun prop_sort_hex/0)) end
 71 |       , fun() -> t(run(N, prop_is_prefix_hex1, fun prop_is_prefix_hex1/0)) end
 72 |       , fun() -> t(run(N, prop_is_prefix_hex2, fun prop_is_prefix_hex2/0)) end
 73 |       , fun() -> t(run(N,prop_non_proper_sorts,fun prop_non_proper_sorts/0)) end
 74 |      ]}.
 75 | 
 76 | t({_Lbl, Res}) ->
 77 |     ?assert(Res == true);
 78 | t(Res) ->
 79 |     ?assert(Res == true).
 80 | 
 81 | run() ->
 82 |     run(good_number_of_tests()).
 83 | 
 84 | good_number_of_tests() ->
 85 |     get_n(2000).
 86 | 
 87 | run(Num) ->
 88 |     [
 89 |      run  (Num, prop_encode , fun prop_encode/0)
 90 |      , run(Num, prop_decode_legacy_big, fun prop_decode_legacy_big/0)
 91 |      , run(Num, prop_decode_legacy_neg_big, fun prop_decode_legacy_neg_big/0)
 92 |      , run(Num, prop_prefix_equiv,fun prop_prefix_equiv/0)
 93 |      %% , run(Num, prop_prefix_equiv,fun prop_prefix_equiv/0)
 94 |      , run(Num, prop_sort , fun prop_sort/0)
 95 |      , run(Num, prop_sort_big, fun prop_sort_big/0)
 96 |      , run(Num, prop_sort_neg_big, fun prop_sort_neg_big/0)
 97 |      , run(Num, prop_encode_sb32, fun prop_encode_sb32/0)
 98 |      , run(Num, prop_sort_sb32 , fun prop_sort_sb32/0)
 99 |      , run(Num, prop_partial_decode1, fun prop_partial_decode1/0)
100 |      , run(Num, prop_partial_decode2, fun prop_partial_decode2/0)
101 |      , run(Num, prop_partial_decode_plus1, fun prop_partial_decode_plus1/0)
102 |      , run(Num, prop_partial_decode_plus2, fun prop_partial_decode_plus2/0)
103 |      , run(Num, prop_is_prefix1, fun prop_is_prefix1/0)
104 |      , run(Num, prop_is_prefix2, fun prop_is_prefix2/0)
105 |      , run(Num, prop_non_proper_sorts, fun prop_non_proper_sorts/0)
106 |     ].
107 | 
108 | run(Num, Lbl, F) ->
109 |     io:fwrite(user, "EQC test: ~p (~p)... ", [Lbl, Num]),
110 |     Res = ?QC:quickcheck(?QC:numtests(Num, F())),
111 |     io:fwrite(user, "-> ~p~n", [Res]),
112 |     {Lbl, Res}.
113 | 
114 | 
115 | %% In this property, the ?IMPLIES condition guards us against the
116 | %% unfortunate case where {1, 1.0} will have a strict ordering when
117 | %% encoded (in order to satisfy the encode property), but not in Erlang
118 | %% since they compare as equal. It seems a reasonable limitation, that
119 | %% we limit ourselves to testing the sort order of term pairs where the
120 | %% values actually differ.
121 | prop_sort() ->
122 |     ?FORALL({T1,T2}, {term_(), term_()},
123 |             begin
124 |                 {X1,X2} = {sext:encode(T1), sext:encode(T2)},
125 |                 collect(size(term_to_binary({T1,T2})),
126 |                         comp(X1,X2) == comp_i(T1,T2))
127 |             end).
128 | 
129 | prop_revsort() ->
130 |     ?FORALL({T1,T2}, {term_(), term_()},
131 |             begin
132 |                 {X1,X2} = {sext:reverse_sext(sext:encode(T1)),
133 |                            sext:reverse_sext(sext:encode(T2))},
134 |                 collect(size(term_to_binary({T1,T2})),
135 |                         comp(X1,X2) == comp_i(T2,T1))
136 |             end).
137 | 
138 | prop_sort_big() ->
139 |     ?FORALL({T1,T2}, {big(), big()},
140 |             begin
141 |                 {X1,X2} = {sext:encode(T1), sext:encode(T2)},
142 |                 collect(size(term_to_binary({T1,T2})),
143 |                         comp(X1,X2) == comp_i(T1,T2))
144 |             end).
145 | 
146 | prop_sort_neg_big() ->
147 |     ?FORALL({T1,T2}, {neg_big(), neg_big()},
148 |             begin
149 |                 {X1,X2} = {sext:encode(T1), sext:encode(T2)},
150 |                 collect(size(term_to_binary({T1,T2})),
151 |                         comp(X1,X2) == comp_i(T1,T2))
152 |             end).
153 | 
154 | prop_revsort_neg_big() ->
155 |     ?FORALL({T1,T2}, {neg_big(), neg_big()},
156 |             begin
157 |                 {X1,X2} = {sext:reverse_sext(sext:encode(T1)),
158 |                            sext:reverse_sext(sext:encode(T2))},
159 |                 collect(size(term_to_binary({T1,T2})),
160 |                         comp(X1,X2) == comp_i(T2,T1))
161 |             end).
162 | 
163 | prop_sort_sb32() ->
164 |     ?FORALL({T1,T2}, {term_(), term_()},
165 |             begin
166 |                 {X1,X2} = {sext:encode_sb32(T1), sext:encode_sb32(T2)},
167 |                 collect(size(term_to_binary({T1,T2})),
168 |                         comp(X1,X2) == comp_i(T1,T2))
169 |             end).
170 | 
171 | prop_sort_hex() ->
172 |     ?FORALL({T1,T2}, {term_(), term_()},
173 |             begin
174 |                 {X1,X2} = {sext:encode_hex(T1), sext:encode_hex(T2)},
175 |                 collect(size(term_to_binary({T1,T2})),
176 |                         comp(X1,X2) == comp_i(T1,T2))
177 |             end).
178 | 
179 | 
180 | prop_sort_fs() ->
181 |     ?FORALL({R1,R2}, {pos_float(),pos_float()},
182 |             begin
183 |                 {B1,B2} = {sext:encode(R1), sext:encode(R2)},
184 |                 comp(R1,R2) == comp(B1,B2)
185 |             end).
186 | 
187 | prop_sort_neg_fs() ->
188 |     ?FORALL({R1,R2}, {neg_float(), neg_float()},
189 |             begin
190 |                 {B1,B2} = {sext:encode(R1), sext:encode(R2)},
191 |                 comp(R1,R2) == comp(B1,B2)
192 |             end).
193 | 
194 | prop_encode() ->
195 |     ?FORALL(T, term_(),
196 |             sext:decode(sext:encode(T)) == T).
197 | 
198 | prop_encode_rev() ->
199 |     ?FORALL(T, term_(),
200 |             sext:decode(sext:decode(
201 |                           sext:reverse_sext(sext:encode(T)))) == T).
202 | 
203 | prop_decode_legacy_big() ->
204 |     ?FORALL(T, big(),
205 |             sext:decode(sext:encode(T, true)) == T).
206 | 
207 | prop_decode_legacy_neg_big() ->
208 |     ?FORALL(T, neg_big(),
209 |             sext:decode(sext:encode(T, true)) == T).
210 | 
211 | prop_encode_sb32() ->
212 |     ?FORALL(T, term_(),
213 |             sext:decode_sb32(sext:encode_sb32(T)) == T).
214 | 
215 | prop_encode_hex() ->
216 |     ?FORALL(T, term_(),
217 |             sext:decode_hex(sext:encode_hex(T)) == T).
218 | 
219 | prop_prefix_equiv() ->
220 |     ?FORALL(T, term_(),
221 |             sext:encode(T) == sext:prefix(T)).
222 | 
223 | %% Partial-decoding a whole term should give the term back
224 | prop_partial_decode1() ->
225 |     ?FORALL(T, term_(),
226 |             begin
227 |                 Enc = sext:encode(T),
228 |                 {full, Dec, Rest} = sext:partial_decode(Enc),
229 |                 Dec == T andalso Rest == <<>>
230 |             end).
231 | 
232 | %% Partial-decoding a prefix should give a _comparable_ prefix back
233 | prop_partial_decode2() ->
234 |     ?FORALL(Pat, wild_pat(),
235 |             begin
236 |                 Pfx = sext:prefix(Pat),
237 |                 case sext:partial_decode(Pfx) of
238 |                     {full, _, _} -> true;
239 |                     {partial, Dec, Rest} ->
240 |                         comp_pat(Dec, Pat) andalso Rest == <<>>
241 |                 end
242 |             end).
243 | 
244 | %% A sext term followed by something not sext-encoded
245 | prop_partial_decode_plus1() ->
246 |     ?FORALL(T, term_(),
247 |             begin
248 |                 Enc = sext:encode(T),
249 |                 {full, Dec, <<"foo">>} =
250 |                     sext:partial_decode(<<Enc/binary, "foo">>),
251 |                 Dec == T
252 |             end).
253 | 
254 | %% A sext prefix followed by something not sext-encoded
255 | prop_partial_decode_plus2() ->
256 |     ?FORALL(Pat, wild_pat(),
257 |             begin
258 |                 Pfx = sext:prefix(Pat),
259 |                 case sext:partial_decode(<<Pfx/binary, "foo">>) of
260 |                     {full, Dec, <<"foo">>} ->
261 |                         Dec == Pat;
262 |                     {partial, Dec, <<"foo">>} ->
263 |                         comp_pat(Dec, Pat)
264 |                 end
265 |             end).
266 | 
267 | wild_pat() ->
268 |     ?LET({T,W}, {?SUCHTHAT(Tp, prefixable_term(),
269 |                            positions(Tp) > 0),wild()},
270 |          ?LET(P, choose(1, positions(T)),
271 |               make_wild(T, P, W))).
272 | 
273 | comp_pat(X, X) -> true;
274 | comp_pat(A, B) when is_tuple(A), is_tuple(B), size(A) == size(B) ->
275 |     comp_pat_l(tuple_to_list(A), tuple_to_list(B));
276 | comp_pat(Dec, Pat) when is_list(Dec), is_list(Pat) ->
277 |     comp_pat_l(Dec, Pat);
278 | comp_pat(A, B) ->  % A: decoded; B: prefix
279 |     case {is_wild(A), is_wild(B)} of
280 |         {true, true} -> true;
281 |         {true, false} ->
282 |             case B of
283 |                 [H|_] ->
284 |                     %% This is because the decoded prefix of [] and ['_'|'_']
285 |                     %% are both '_'
286 |                     is_wild(H);
287 |                 _ -> false
288 |             end;
289 |         _ ->
290 |             false
291 |     end.
292 | 
293 | comp_pat_l([H1|T1], [H2|T2]) ->
294 |     case is_wild(H1) of
295 |         true -> true;
296 |         false ->
297 |             case comp_pat(H1, H2) of
298 |                 true  -> comp_pat_l(T1, T2);
299 |                 false -> false
300 |             end
301 |     end;
302 | comp_pat_l([], []) -> true;
303 | comp_pat_l(A, _) ->
304 |     is_wild(A).
305 | 
306 | 
307 | prop_is_prefix1() ->
308 |     ?FORALL({T,W}, {?SUCHTHAT(Tp, prefixable_term(),
309 |                               positions(Tp) > 0),wild()},
310 |             ?LET(P, choose(1, positions(T)),
311 |                  begin
312 |                      Pfx = sext:prefix(make_wild(T,P,W)),
313 |                      true = is_prefix(Pfx, sext:encode(T))
314 |                  end)).
315 | 
316 | prop_is_prefix2() ->
317 |     ?FORALL({T,W}, {?SUCHTHAT(Tp, prefixable_term(),
318 |                               positions(Tp) > 2), wild()},
319 |             ?LET(P, choose(2, positions(T)),
320 |                  begin
321 |                      {Pfx1,Pfx2} = {sext:prefix(make_wild(T,P,W)),
322 |                                     sext:prefix(make_wild(T,P-1,W))},
323 |                      true = is_prefix(Pfx2, Pfx1)
324 |                  end)).
325 | 
326 | prop_is_prefix_hex1() ->
327 |     ?FORALL({T,W}, {?SUCHTHAT(Tp, prefixable_term(),
328 |                               positions(Tp) > 0),wild()},
329 |             ?LET(P, choose(1, positions(T)),
330 |                  begin
331 |                      Pfx = sext:prefix_hex(make_wild(T,P,W)),
332 |                      true = is_prefix(Pfx, sext:encode_hex(T))
333 |                  end)).
334 | 
335 | prop_is_prefix_hex2() ->
336 |     ?FORALL({T,W}, {?SUCHTHAT(Tp, prefixable_term(),
337 |                               positions(Tp) > 2), wild()},
338 |             ?LET(P, choose(2, positions(T)),
339 |                  begin
340 |                      {Pfx1,Pfx2} = {sext:prefix_hex(make_wild(T,P,W)),
341 |                                     sext:prefix_hex(make_wild(T,P-1,W))},
342 |                      true = is_prefix(Pfx2, Pfx1)
343 |                  end)).
344 | 
345 | prop_non_proper_sorts() ->
346 |     ?FORALL({L,T}, {non_empty_list(), simple_term()},
347 |             begin
348 |                 List = [{L, 1},
349 |                         {L ++ T, 2},
350 |                         {L ++ [T], 3}],
351 |                 Encoded = [{sext:encode(A),B} || {A,B} <- List],
352 |                 Sorted1 = lists:keysort(1, List),
353 |                 Sorted2 = lists:keysort(1, Encoded),
354 |                 [I || {_,I} <- Sorted1]
355 |                     == [J || {_,J} <- Sorted2]
356 |             end).
357 | 
358 | prop_encode_neg_fs() ->
359 |     ?FORALL(T, neg_float(),
360 |             sext:decode(sext:encode(T)) == T).
361 | 
362 | prop_encode_big() ->
363 |     ?FORALL(T, big(),
364 |             sext:decode(sext:encode(T)) == T).
365 | 
366 | prop_encode_neg_big() ->
367 |     ?FORALL(T, neg_big(),
368 |             sext:decode(sext:encode(T)) == T).
369 | 
370 | 
371 | comp(A,B) when A == B, A =/= B ->
372 |     %% can only happen when either is a float and the other an int
373 |     IsMore = if A < 0 ->
374 |                      is_float(B);
375 |                 true ->
376 |                      is_float(A)
377 |              end,
378 |     case IsMore of
379 |         true -> more;
380 |         false -> less
381 |     end;
382 | comp(A,B) when A < B -> less;
383 | comp(A,A) -> equal;
384 | comp(_,_) -> more.
385 | 
386 | comp_i(Ta, Tb) when is_tuple(Ta), is_tuple(Tb),
387 |                     tuple_size(Ta) == tuple_size(Tb) ->
388 |     comp_l(tuple_to_list(Ta), tuple_to_list(Tb));
389 | comp_i(La, Lb) when is_list(La), is_list(Lb) ->
390 |     comp_l(La, Lb);
391 | comp_i(A, B) ->
392 |     comp(A, B).
393 | 
394 | comp_l([] , [] ) -> equal;
395 | comp_l([] , [_|_] ) -> less;
396 | comp_l([_|_] , [] ) -> more;
397 | comp_l([Ha|Ta],[Hb|Tb]) ->
398 |     case comp(Ha, Hb) of
399 |         equal ->
400 |             comp_l(Ta, Tb);
401 |         Other ->
402 |             Other
403 |     end;
404 | comp_l(A, B) -> % A or B was an improper list
405 |     comp_i(A, B).
406 | 
407 | is_prefix(A, B) ->
408 |     Sz = byte_size(A),
409 |     binary:longest_common_prefix([A,B]) == Sz.
410 | 
411 | prop_measure_term() ->
412 |     ?FORALL(T,term_(),
413 |             measure(term_size,size(term_to_binary(T)),true)).
414 | 
415 | simple_term() ->
416 |     oneof(simple_types()).
417 | 
418 | term_() ->
419 |     ?SIZED(Size,term(Size)).
420 | 
421 | term(0) ->
422 |     simple_term();
423 | term(Size) ->
424 |     %% You need ?LAZY for recursive generators!
425 |     ?LAZY(oneof(
426 |             simple_types() ++
427 |                 [
428 |                  %% Don't make lists and tuples EXACTLY Size long
429 |                  alist(Size),
430 |                  non_proper_list(Size),
431 |                  atuple(Size),
432 |                  astring(Size)])).
433 | 
434 | simple_types() ->
435 |     [int(),
436 |      big(),
437 |      pos_float(),
438 |      neg_float(),
439 |      anatom(),
440 |      abin(),
441 |      abitstr()].
442 | 
443 | big() ->
444 |     ?LET({X,M}, {nat(), pos()},
445 |          %% Multiply by the cube of `M'
446 |          %% to get the generator big enough.
447 |          %% Verified w/ `eqc_gen:sample/1'
448 |          (16#ffffFFFF + X) * (M * M * M)).
449 | 
450 | neg_big() ->
451 |     ?LET(B, big(), -B).
452 | 
453 | pos() ->
454 |     ?SUCHTHAT(N,nat(),N>0).
455 | 
456 | %% Set the Size just for list generation.
457 | 
458 | alist() ->
459 |     ?SIZED(Size, alist(Size)).
460 | 
461 | alist(Size) ->
462 |     list(Size,term(Size div 3)).
463 | 
464 | non_proper_list(Size) ->
465 |     ?LET(L,alist(Size),make_non_proper(L)).
466 | 
467 | list(Size,G) ->
468 |     ?SIZED(S,resize(Size,list(resize(S,G)))).
469 | 
470 | atuple(Size) ->
471 |     ?LET(L, alist(Size), list_to_tuple(L)).
472 | 
473 | anatom() ->
474 |     oneof([a,b,c,aa,bb,cc]).
475 | 
476 | astring(0) -> "";
477 | astring(Size) ->
478 |     list(Size, choose($A,$z)).
479 | 
480 | abin() ->
481 |     ?LET(L, list(choose(0,255)), list_to_binary(L)).
482 | 
483 | abitstr() ->
484 |     ?LET({Bin, Sz}, {abin(), choose(0, 7)},
485 |          ?LET(N, choose(0, 16#ff bsr (8-Sz)),
486 |               <<Bin/binary, N:Sz>>)).
487 | 
488 | pos_float() ->
489 |     ?LET(F, ?SUCHTHAT(R, real(), R > 0 andalso is_float(R)),
490 |          norm(F)).
491 | 
492 | neg_float() ->
493 |     ?LET(F, ?SUCHTHAT(R, real(), R < 0 andalso is_float(R)),
494 |          norm(F)).
495 | 
496 | norm(F) when is_float(F) ->
497 |     <<G/float>> = <<F/float>>,
498 |     G.
499 | 
500 | make_non_proper([A,B]) -> [A|B];
501 | make_non_proper([A]) -> [A];
502 | make_non_proper([A|B]) -> [A|make_non_proper(B)];
503 | make_non_proper([]) -> [].
504 | 
505 | 
506 | prefixable_term() ->
507 |     oneof([non_empty_tuple(),
508 |            non_empty_list()]).
509 | 
510 | non_empty_tuple() ->
511 |     ?LET(L, non_empty_list(),
512 |          list_to_tuple(L)).
513 | 
514 | non_empty_list() ->
515 |     non_empty(alist()).
516 | 
517 | positions(T) ->
518 |     positions(T, 0).
519 | 
520 | positions(T, Acc) when is_tuple(T) ->
521 |     positions(tuple_to_list(T), Acc);
522 | positions([H|T], Acc) ->
523 |     positions(T, positions(H) + Acc);
524 | positions([], Acc) ->
525 |     Acc;
526 | positions(_, Acc) ->
527 |     Acc+1.
528 | 
529 | is_wild('_') -> true;
530 | is_wild(A) when is_atom(A) ->
531 |     case atom_to_list(A) of
532 |         "\$" ++ Is ->
533 |             try _ = list_to_integer(Is),
534 |                   true
535 |             catch
536 |                 error:_ ->
537 |                     false
538 |             end;
539 |         _ ->
540 |             false
541 |     end;
542 | is_wild(_) ->
543 |     false.
544 | 
545 | make_wild(T, P, W) when P > 0 ->
546 |     if is_tuple(T) ->
547 |             {Res,_} = make_wild1(tuple_to_list(T), P, W, []),
548 |             list_to_tuple(Res);
549 |        is_list(T) ->
550 |             {Res,_} = make_wild1(T, P, W, []),
551 |             Res
552 |     end.
553 | 
554 | make_wild1(L, 0, _, Acc) ->
555 |     {lists:reverse(Acc) ++ L, 0};
556 | make_wild1(T, P, W, Acc) when not(is_list(T)) ->
557 |     if P == 1 ->
558 |             {lists:reverse(Acc) ++ W, 0};
559 |        true ->
560 |             {lists:reverse(Acc) ++ T, P-1}
561 |     end;
562 | make_wild1([_|T], 1, W, Acc) ->
563 |     {lists:reverse(Acc) ++ [W|T], 0};
564 | make_wild1([H|T], P, W, Acc) ->
565 |     if is_tuple(H) ->
566 |             {H1,P1} = make_wild1(tuple_to_list(H), P, W, []),
567 |             make_wild1(T, P1, W, [list_to_tuple(H1)|Acc]);
568 |        is_list(H) ->
569 |             {H1,P1} = make_wild1(H, P, W, []),
570 |             make_wild1(T, P1, W, [H1|Acc]);
571 |        true ->
572 |             make_wild1(T, P-1, W, [H|Acc])
573 |     end;
574 | make_wild1([], P, _W, Acc) ->
575 |     {lists:reverse(Acc), P}.
576 | 
577 | wild() ->
578 |     oneof(['_','$1','$9999']).
579 | 
580 | lists_replace(L, P, V) when P > 0, P =< length(L) ->
581 |     {L1, [_|L2]} = lists:split(P-1, L),
582 |     L1 ++ [V] ++ L2.
583 | 
584 | -endif.
585 | 
586 | 


--------------------------------------------------------------------------------