├── .circleci └── config.yml ├── .clj-kondo └── config.edn ├── .cljstyle ├── .gitignore ├── .lein-yagni ├── CHANGELOG.md ├── README.md ├── UNLICENSE ├── bench └── clj_cbor │ └── bench.clj ├── dev └── user.clj ├── project.clj ├── src └── clj_cbor │ ├── codec.clj │ ├── core.clj │ ├── data │ ├── core.clj │ ├── float16.clj │ ├── simple.clj │ └── tagged.clj │ ├── error.clj │ ├── header.clj │ └── tags │ ├── clojure.clj │ ├── content.clj │ ├── numbers.clj │ ├── text.clj │ └── time.clj └── test └── clj_cbor ├── codec_test.clj ├── core_test.clj ├── data ├── core_test.clj ├── float16_test.clj ├── simple_test.clj └── tagged_test.clj ├── generative_test.clj ├── header_test.clj ├── tags ├── clojure_test.clj ├── content_test.clj ├── numbers_test.clj ├── text_test.clj └── time_test.clj └── test_utils.clj /.circleci/config.yml: -------------------------------------------------------------------------------- 1 | version: 2.1 2 | 3 | # Common executor configuration 4 | executors: 5 | clojure: 6 | docker: 7 | - image: circleci/clojure:openjdk-11-lein-2.9.1 8 | working_directory: ~/repo 9 | 10 | 11 | # Job definitions 12 | jobs: 13 | style: 14 | executor: clojure 15 | steps: 16 | - checkout 17 | - run: 18 | name: Install cljstyle 19 | environment: 20 | CLJSTYLE_VERSION: 0.15.0 21 | command: | 22 | wget https://github.com/greglook/cljstyle/releases/download/${CLJSTYLE_VERSION}/cljstyle_${CLJSTYLE_VERSION}_linux.zip 23 | unzip cljstyle_${CLJSTYLE_VERSION}_linux.zip 24 | - run: 25 | name: Check source formatting 26 | command: "./cljstyle check --stats style-stats.tsv" 27 | - store_artifacts: 28 | path: style-stats.tsv 29 | destination: style 30 | 31 | lint: 32 | executor: clojure 33 | steps: 34 | - checkout 35 | - run: 36 | name: Install clj-kondo 37 | environment: 38 | CLJ_KONDO_VERSION: 2022.05.31 39 | command: | 40 | wget https://github.com/borkdude/clj-kondo/releases/download/v${CLJ_KONDO_VERSION}/clj-kondo-${CLJ_KONDO_VERSION}-linux-amd64.zip 41 | unzip clj-kondo-${CLJ_KONDO_VERSION}-linux-amd64.zip 42 | - run: 43 | name: Lint source code 44 | command: "./clj-kondo --lint src test bench" 45 | 46 | test: 47 | executor: clojure 48 | steps: 49 | - checkout 50 | - restore_cache: 51 | keys: 52 | - v1-test-{{ checksum "project.clj" }} 53 | - v1-test- 54 | - run: lein with-profile +test deps 55 | - save_cache: 56 | key: v1-test-{{ checksum "project.clj" }} 57 | paths: 58 | - ~/.m2 59 | - run: lein check 60 | - run: lein test 61 | - run: lein test :generative 62 | 63 | coverage: 64 | executor: clojure 65 | steps: 66 | - checkout 67 | - restore_cache: 68 | keys: 69 | - v1-coverage-{{ checksum "project.clj" }} 70 | - v1-coverage- 71 | - v1-test- 72 | - run: 73 | name: Generate test coverage 74 | command: lein cloverage --codecov 75 | - save_cache: 76 | paths: 77 | - ~/.m2 78 | key: v1-coverage-{{ checksum "project.clj" }} 79 | - store_artifacts: 80 | path: target/coverage 81 | destination: coverage 82 | - run: 83 | name: Install codecov 84 | command: | 85 | sudo apt-get update && sudo apt-get install gpg 86 | curl https://keybase.io/codecovsecurity/pgp_keys.asc | gpg --no-default-keyring --keyring trustedkeys.gpg --import 87 | curl -Os https://uploader.codecov.io/latest/linux/codecov 88 | curl -Os https://uploader.codecov.io/latest/linux/codecov.SHA256SUM 89 | curl -Os https://uploader.codecov.io/latest/linux/codecov.SHA256SUM.sig 90 | gpgv codecov.SHA256SUM.sig codecov.SHA256SUM 91 | shasum -a 256 -c codecov.SHA256SUM 92 | chmod +x codecov 93 | - run: 94 | name: Publish coverage report 95 | command: './codecov -f target/coverage/codecov.json' 96 | 97 | 98 | # Workflow definitions 99 | workflows: 100 | version: 2 101 | test: 102 | jobs: 103 | - style 104 | - lint 105 | - test 106 | - coverage: 107 | requires: 108 | - test 109 | -------------------------------------------------------------------------------- /.clj-kondo/config.edn: -------------------------------------------------------------------------------- 1 | {:linters 2 | {:consistent-alias 3 | {:level :warning 4 | :aliases {clojure.java.io io 5 | clojure.set set 6 | clojure.string str}} 7 | 8 | :unresolved-symbol 9 | {:exclude [(clojure.test/is [cbor-error?]) 10 | (clojure.test.check.clojure-test/defspec)]}}} 11 | -------------------------------------------------------------------------------- /.cljstyle: -------------------------------------------------------------------------------- 1 | ;; Clojure formatting rules 2 | ;; vim: filetype=clojure 3 | {:files 4 | {:ignore #{".git" "target"}} 5 | 6 | :rules 7 | {:indentation 8 | {:indents 9 | {cbor-error? [[:block 1]] 10 | prop/for-all [[:block 1]]}}}} 11 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | /target 2 | /classes 3 | /checkouts 4 | /bench/data 5 | /bench/samples 6 | /.clj-kondo/.cache 7 | /.lein-* 8 | /.nrepl-port 9 | pom.xml 10 | pom.xml.asc 11 | *.jar 12 | *.class 13 | -------------------------------------------------------------------------------- /.lein-yagni: -------------------------------------------------------------------------------- 1 | clj-cbor.core/cbor-codec 2 | clj-cbor.core/encode 3 | clj-cbor.core/decode 4 | 5 | clj-cbor.core/spit 6 | clj-cbor.core/slurp 7 | clj-cbor.core/slurp-all 8 | clj-cbor.core/self-describe 9 | 10 | clj-cbor.data.float16/encode 11 | clj-cbor.tags.time/date-read-handlers 12 | clj-cbor.tags.time/string-time-write-handlers 13 | -------------------------------------------------------------------------------- /CHANGELOG.md: -------------------------------------------------------------------------------- 1 | Change Log 2 | ========== 3 | 4 | All notable changes to this project will be documented in this file, which 5 | follows the conventions of [keepachangelog.com](http://keepachangelog.com/). 6 | This project adheres to [Semantic Versioning](http://semver.org/). 7 | 8 | 9 | ## [Unreleased] 10 | 11 | ... 12 | 13 | 14 | ## [1.1.1] - 2022-06-20 15 | 16 | ### Changed 17 | - Update Clojure to 1.11.1 18 | 19 | ### Fixed 20 | - Exclude the new `parse-uuid` function in Clojure 1.11 to avoid a reference 21 | collision warning. 22 | 23 | 24 | ## [1.1.0] - 2020-06-14 25 | 26 | ### Added 27 | - The base logic in the codec will write any collection supporting the standard 28 | Java `List`, `Map`, and `Set` interfaces as the corresponding CBOR types. 29 | Previously, this only worked directly on Clojure's collection types. 30 | - Local calendar dates are supported using the proposed tags 100 (epoch) and 31 | 1004 (string), mapped to the `java.time.LocalDate` type. 32 | 33 | 34 | ## [1.0.0] - 2020-04-20 35 | 36 | ### Added 37 | - Errors about unhandled types now contain more type information about the 38 | value that could not be encoded. 39 | [#15](https://github.com/greglook/clj-cbor/pull/15) 40 | - The `clj-cbor.core/dispatch-superclasses` function can be used to support 41 | inheritance-based write handler selection. 42 | [#12](https://github.com/greglook/clj-cbor/issues/12) 43 | 44 | 45 | ## [0.7.2] - 2019-06-28 46 | 47 | ### Added 48 | - Add [clj-async-profiler](https://github.com/clojure-goes-fast/clj-async-profiler) 49 | support in the benchmarking code to produce flame graphs for finding hotspots. 50 | 51 | ### Removed 52 | - Codecs no longer support a configurable set tag; it is now fixed at 258 53 | matching the IANA registry. 54 | 55 | ### Fixed 56 | - `decode` will not try to coerce inputs which are already input streams 57 | anymore. This fixes incremental/lazy decoding behavior. 58 | [#13](https://github.com/greglook/clj-cbor/issues/13) 59 | - `encode` will not try to coerce outputs to an output stream anymore. Now an 60 | exception is thrown when a bad argument is given. This prevents lost writes 61 | when the final buffer is not flushed correctly. 62 | [#14](https://github.com/greglook/clj-cbor/pull/14) 63 | 64 | 65 | ## [0.7.1] - 2019-01-10 66 | 67 | This is a performance-focused release that introduced a number of changes to 68 | speed up decoding performance. The main change is the switch to a _jump table_ 69 | for the initial header byte. 70 | [#9](https://github.com/greglook/clj-cbor/issues/9) 71 | [#11](https://github.com/greglook/clj-cbor/pull/11) 72 | 73 | For a representative data sample of about ~97 KB, this brings the benchmarked 74 | decoding time from 8.454 ms to 4.089 ms, or about twice as fast! 75 | 76 | ### Changed 77 | - Upgrade to Clojure 1.10.0. 78 | - Many operations in the codec are type-hinted to use primitive operations where 79 | possible. 80 | - CBOR arrays and maps are built up using transients for performance. 81 | - Decoding logic now uses a jump table. 82 | 83 | ### Fixed 84 | - A tagged-value major type with a streaming info code now results in a 85 | `::codec/illegal-stream` error. 86 | 87 | 88 | ## [0.6.0] - 2017-12-23 89 | 90 | ### Changed 91 | - Upgrade to Clojure 1.9.0. 92 | 93 | 94 | ## [0.5.0] - 2017-11-08 95 | 96 | This release fixes two of the longer-standing quirks with the library, which 97 | were unfortunately breaking changes. The fixes should be straightforward: 98 | 99 | - Replace any `(cbor/decode ...)` with `(cbor/decode-seq ...)`. 100 | - Replace any `(first (cbor/decode-seq ...))` with `(cbor/decode ...)`. 101 | 102 | If you have existing encoded data containing sets, you can use the following 103 | function to rewrite it: 104 | 105 | ```clojure 106 | (defn rewrite-cbor-sets 107 | [codec source dest] 108 | (with-open [input (io/input-stream source) 109 | output (io/output-stream dest)] 110 | (->> 111 | input 112 | (cbor/decode-seq (assoc codec :set-tag 13)) 113 | (cbor/encode-seq codec output)))) 114 | ``` 115 | 116 | If rewriting isn't an option, you can support reading sets via tag 13 by 117 | using a custom read handler: 118 | 119 | ```clojure 120 | (def compatible-codec 121 | (assoc-in cbor/default-codec [:read-handlers 13] set)) 122 | ``` 123 | 124 | ### Changed 125 | - **Breaking:** the default set tag is now 258, matching the IANA registry. 126 | [#6](//github.com/greglook/clj-cbor/issues/6) 127 | - **Breaking:** `clj-cbor.core/decode` now only decodes a single value; previous 128 | behavior moved to `decode-seq`. 129 | [#7](//github.com/greglook/clj-cbor/issues/7) 130 | 131 | ### Added 132 | - `clj-cbor.core/encode-seq` writes a sequence of values to a byte array or 133 | output stream. 134 | - `clj-cbor.core/spit-all` similarly writes a sequence of values to an output 135 | file like repeated calls to `spit` with `:append true`. 136 | 137 | 138 | ## [0.4.1] - 2017-05-17 139 | 140 | ### Fixed 141 | - Resolved an overflow issue when encoding `Long/MIN_VALUE`. 142 | - Integer values are always decoded as `Long` values, no matter their encoded 143 | size. Previously, numbers between `Short/MAX_VALUE` and `Integer/MAX_VALUE` 144 | would return `Integer` values. 145 | [#4](https://github.com/greglook/clj-cbor/issues/4) 146 | [#5](https://github.com/greglook/clj-cbor/pull/5) 147 | 148 | 149 | ## [0.4.0] - 2017-03-14 150 | 151 | ### Added 152 | - Implemented canonical mode sorting of map keys and set entries. 153 | 154 | 155 | ## [0.3.0] - 2016-01-05 156 | 157 | ### Added 158 | - Support self-describe CBOR tag 55799. This provides a 'magic' three-byte 159 | sequence to simplify detection of the CBOR format. The 160 | `clj-cbor.core/self-describe` function will wrap the given value with this 161 | tag. 162 | - Tag codes have been factored out into integer constants where appropriate to 163 | improve consistency. 164 | - Add `spit`, `slurp`, and `slurp-all` utility functions to the core namespace. 165 | 166 | ### Changed 167 | - Read handler functions are no longer called with the tag. This greatly 168 | simplifies their implementation and allows for reuse of existing 169 | transformation functions as-is. 170 | - Error type `:clj-cbor.codec/illegal-chunk` renamed to `illegal-chunk-type`. 171 | 172 | ### Fixed 173 | - The `decode` function now properly raises an error when the input ends 174 | mid-value rather than at a top-level value boundary. 175 | 176 | 177 | ## [0.2.0] - 2016-12-28 178 | 179 | This release includes 100% test coverage! 180 | 181 | ### Added 182 | - UUIDs are supported in binary form using tag 37. 183 | - CBOR error keywords are organized into a hierarchy underneath 184 | `:clj-cbor.error/encoding-error` and `:clj-cbor.error/decoding-error`. 185 | 186 | ### Changed 187 | - `clj-cbor.data.model` renamed to `clj-cbor.data.core`. 188 | - The `clj-cbor.float16` functions `from-bits` and `to-bits` renamed to 189 | `decode` and `encode`, respectively. 190 | - `Undefined` and `SimpleValue` records moved to new `clj-cbor.data.simple` 191 | namespace. 192 | - `TaggedValue` record moved to new `clj-cbor.data.tagged` namespace. 193 | - `clj-cbor.header/write-major-int` renamed to `write`. 194 | - `clj-cbor.header/read-size` renamed to `read-code`. 195 | 196 | ### Fixed 197 | - Generally tighted up namespaces and reduced linkage where possible. 198 | 199 | 200 | ## [0.1.0] - 2016-12-23 201 | 202 | Initial project release. 203 | 204 | 205 | [Unreleased]: https://github.com/greglook/clj-cbor/compare/1.1.1...HEAD 206 | [1.1.1]: https://github.com/greglook/clj-cbor/compare/1.1.0...1.1.1 207 | [1.1.0]: https://github.com/greglook/clj-cbor/compare/1.0.0...1.1.0 208 | [1.0.0]: https://github.com/greglook/clj-cbor/compare/0.7.2...1.0.0 209 | [0.7.2]: https://github.com/greglook/clj-cbor/compare/0.7.1...0.7.2 210 | [0.7.1]: https://github.com/greglook/clj-cbor/compare/0.6.0...0.7.1 211 | [0.6.0]: https://github.com/greglook/clj-cbor/compare/0.5.0...0.6.0 212 | [0.5.0]: https://github.com/greglook/clj-cbor/compare/0.4.1...0.5.0 213 | [0.4.1]: https://github.com/greglook/clj-cbor/compare/0.4.0...0.4.1 214 | [0.4.0]: https://github.com/greglook/clj-cbor/compare/0.3.0...0.4.0 215 | [0.3.0]: https://github.com/greglook/clj-cbor/compare/0.2.0...0.3.0 216 | [0.2.0]: https://github.com/greglook/clj-cbor/compare/0.1.0...0.2.0 217 | [0.1.0]: https://github.com/greglook/clj-cbor/tag/0.1.0 218 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | clj-cbor 2 | ======== 3 | 4 | [![CircleCI](https://circleci.com/gh/greglook/clj-cbor.svg?style=shield&circle-token=21efcbc50fe431aa2efc22413ba1f4407fec6283)](https://circleci.com/gh/greglook/clj-cbor) 5 | [![codecov](https://codecov.io/gh/greglook/clj-cbor/branch/main/graph/badge.svg)](https://codecov.io/gh/greglook/clj-cbor) 6 | [![cljdoc](https://cljdoc.org/badge/mvxcvi/clj-cbor)](https://cljdoc.org/d/mvxcvi/clj-cbor/CURRENT) 7 | 8 | This library is a native Clojure implementation of the [Concise Binary Object Representation](http://cbor.io/) 9 | format specified in [RFC 7049](https://tools.ietf.org/html/rfc7049). 10 | 11 | CBOR is a binary encoding with the goal of small code size, compact messages, 12 | and extensibility without the need for version negotiation. This makes it a good 13 | alternative to [EDN](https://github.com/edn-format/edn) for storing and 14 | transmitting Clojure data in a more compact form. 15 | 16 | 17 | ## Installation 18 | 19 | Library releases are published on Clojars. To use the latest version with 20 | Leiningen, add the following dependency to your project definition: 21 | 22 | [![Clojars Project](http://clojars.org/mvxcvi/clj-cbor/latest-version.svg)](http://clojars.org/mvxcvi/clj-cbor) 23 | 24 | 25 | ## Usage 26 | 27 | The `clj-cbor.core` namespace contains the high-level encoding and decoding 28 | functions. The simplest way to use this library is to require it and call them 29 | directly with data: 30 | 31 | ```clojure 32 | => (require '[clj-cbor.core :as cbor]) 33 | 34 | => (cbor/encode [0 :foo/bar true {:x 'y} #{1/3} #"foo"]) 35 | ; 0x8600D827683A666F6F2F626172F5A1D827623A78D8276179CD81D81E820103D82363666F6F 36 | 37 | => (cbor/decode *1) 38 | [0 :foo/bar true {:x y} #{1/3} #"foo"] 39 | ``` 40 | 41 | With no extra arguments, `encode` and `decode` will make use of the 42 | `default-codec`, which comes loaded with read and write handler support for many 43 | Java and Clojure types (see the [type extensions](#type-extensions) section 44 | below). Both functions accept an additional argument to specify the codec, 45 | should different behavior be desired. 46 | 47 | ```clojure 48 | => (def codec (cbor/cbor-codec :canonical true)) 49 | 50 | => (cbor/encode codec {:foo "bar", :baz 123}) 51 | ; 0xA2D827643A666F6F63626172D827643A62617A187B 52 | 53 | => (cbor/decode codec *1) 54 | {:foo "bar", :baz 123} 55 | ``` 56 | 57 | So far we haven't specified any outputs when encoding, so we've gotten a byte 58 | array back. The full form of `encode` takes three arguments: the codec, the 59 | output stream, and the value to encode. 60 | 61 | ```clojure 62 | => (def out (java.io.ByteArrayOutputStream.)) 63 | 64 | => (cbor/encode codec out :a) 65 | 5 66 | 67 | => (cbor/encode codec out 123) 68 | 2 69 | 70 | => (cbor/encode codec out true) 71 | 1 72 | 73 | => (cbor/encode codec out "foo") 74 | 4 75 | 76 | => (.toByteArray out)) 77 | ; 0xD827623A61187BF563666F6F 78 | 79 | => (with-open [input (java.io.ByteArrayInputStream. *1)] 80 | (doall (cbor/decode-seq codec input))) 81 | (:a 123 true "foo") 82 | ``` 83 | 84 | In this mode, `encode` returns the number of bytes written instead of a byte 85 | array. We can read multiple items from an input stream using `decode-seq`, which 86 | returns a lazy sequence. If the input is a file you must realize the values 87 | before closing the input. Similarly, `encode-seq` will write a sequence of 88 | multiple values to an output stream. 89 | 90 | As a convenience, the library also provides the `spit`, `spit-all`, `slurp`, and 91 | `slurp-all` functions, which operate on files: 92 | 93 | ```clojure 94 | => (cbor/spit "data.cbor" {:abc 123, :foo "qux", :bar true}) 95 | 29 96 | 97 | => (cbor/spit-all "data.cbor" [[0.0 'x] #{-42}] :append true) 98 | 12 99 | 100 | => (.length (io/file "data.cbor")) 101 | 41 102 | 103 | => (cbor/slurp "data.cbor") 104 | {:abc 123, :bar true, :foo "qux"} 105 | 106 | => (cbor/slurp-all "data.cbor") 107 | ({:abc 123, :bar true, :foo "qux"} [0.0 x] #{-42}) 108 | ``` 109 | 110 | 111 | ## Type Extensions 112 | 113 | In order to support types of values outside the ones which are a native to CBOR, 114 | the format uses _tagged values_, similar to EDN. In CBOR, the tags are integer 115 | numbers instead of symbols, but the purpose is the same: the tags convey _new 116 | semantics_ about the following value. 117 | 118 | The most common example of a need for this kind of type extension is 119 | representing an instant in time. In EDN, this is represented by the `#inst` tag 120 | on an ISO-8601 timestamp string. CBOR offers two tags to represent instants - 121 | tag 0 codes a timestamp string, while tag 1 codes a number in epoch seconds. The 122 | former is more human-friendly, but the latter is more efficient. 123 | 124 | New types are implemented by using read and write handlers - functions which map 125 | from typed value to representation and back. Currently, the library comes with 126 | support for the following types: 127 | 128 | | Tag | Representation | Type | Semantics | 129 | |-------|----------------|------|-----------| 130 | | 0 | Text string | `Date`/`Instant` | Standard date/time string | 131 | | 1 | Number | `Date`/`Instant` | Epoch-based date/time | 132 | | 2 | Byte string | `BigInt` | Positive bignum | 133 | | 3 | Byte string | `BigInt` | Negative bignum | 134 | | 4 | Array(2) | `BigDecimal` | Decimal fraction | 135 | | 27 | Array(2) | `TaggedLiteral` | Constructor support for Clojure tagged literal values | 136 | | 30 | Array(2) | `Ratio` | Rational fractions, represented as numerator and denominator numbers | 137 | | 32 | Text string | `URI` | Uniform Resource Identifier strings | 138 | | 35 | Text string | `Pattern` | Regular expression strings | 139 | | 37 | Byte string | `UUID` | Binary-encoded UUID values | 140 | | 39 | Text string | `Symbol`/`Keyword` | Identifiers | 141 | | 100 | Integer | `LocalDate` | Epoch-based local calendar date | 142 | | 258 | Array | `Set` | Sets of unique entries | 143 | | 1004 | Text string | `LocalDate` | String-based local calendar date | 144 | | 55799 | Varies | N/A | Self-describe CBOR | 145 | 146 | For further information about registered tag semantics, consult the 147 | [IANA Registry](https://www.iana.org/assignments/cbor-tags/cbor-tags.xhtml). 148 | 149 | ### Write Handlers 150 | 151 | A write handler is a function which takes a typed value and returns an 152 | encodable representation. In most cases, the representation is a CBOR tagged 153 | value. The tag conveys the type semantic and generally the expected form that 154 | the representation takes. Write handlers are selected by a dispatch function, 155 | which defaults to `class`. The `clj-cbor.core/dispatch-superclasses` function 156 | can be used to construct an inheritance-based dispatcher. 157 | 158 | In some cases, multiple types will map to the same tag. For example, by default 159 | this library maps both `java.util.Date` and the newer `java.time.Instant` types 160 | to the same representation. 161 | 162 | ### Read Handlers 163 | 164 | A read handler is a function which takes the representation from a tagged value 165 | and returns an appropriately typed value. The choice of function to parse the 166 | values thus determines the 'preferred type' to represent values of that kind. 167 | 168 | Continuing the example, the library comes with read handlers for both `Date` and 169 | `Instant` types, allowing the user to choose their preferred time type. 170 | 171 | 172 | ## Performance 173 | 174 | As of `0.7.1`, this library is competitive with many other comparable 175 | serialization formats. Some benchmarking results can be found in 176 | [this spreadsheet](https://docs.google.com/spreadsheets/d/142LhWX5aCnOoF6v0T46RASULQDuG7JIckKiCohDPgq8/edit?usp=sharing). 177 | 178 | For small and medium data sizes CBOR is more compact than most formats, while at 179 | larger sizes (above 512 bytes) all formats are fairly close in size (within 10%, 180 | generally). Other than Nippy, which was by far the fastest codec, clj-cbor 181 | was one of the fastest encoders and is in the middle of the pack in decoding 182 | times. 183 | 184 | To give some concrete performance numbers, here are a few samples from the 185 | dataset: 186 | 187 | | Size | Encode | Decode | 188 | |------|-----------|-----------| 189 | | 4 | 6.09 µs | 2.31 µs | 190 | | 55 | 20.87 µs | 7.58 µs | 191 | | 173 | 12.64 µs | 5.74 µs | 192 | | 388 | 15.38 µs | 11.60 µs | 193 | | 882 | 31.55 µs | 14.24 µs | 194 | | 1632 | 54.82 µs | 33.52 µs | 195 | | 3127 | 92.14 µs | 64.66 µs | 196 | | 4918 | 104.92 µs | 59.67 µs | 197 | | 7328 | 108.37 µs | 82.16 µs | 198 | 199 | 200 | ## Notes 201 | 202 | A few things to keep in mind while using the library: 203 | 204 | - Streaming CBOR data can be parsed from input, but there is currently no way to 205 | generate streaming output data. 206 | - Decoding half-precision (16-bit) floating-point numbers is supported, but the 207 | values are promoted to single-precision (32-bit) as the JVM does not have 208 | native support for them. There is currently no support for writing 209 | half-precision floats except for the special values `0.0`, `+Inf`, `-Inf`, and 210 | `NaN`, which are always written as two bytes for efficiency. 211 | - CBOR does not have a type for bare characters, so they will be converted to 212 | single-character strings when written. 213 | - Regular expressions are supported using tag 35, but beware that Java 214 | `Pattern` objects do not compare equal or have the same hash code for 215 | otherwise identical regexes. Using them in sets or as map keys is discouraged. 216 | 217 | 218 | ## License 219 | 220 | This is free and unencumbered software released into the public domain. 221 | See the UNLICENSE file for more information. 222 | -------------------------------------------------------------------------------- /UNLICENSE: -------------------------------------------------------------------------------- 1 | This is free and unencumbered software released into the public domain. 2 | 3 | Anyone is free to copy, modify, publish, use, compile, sell, or 4 | distribute this software, either in source code form or as a compiled 5 | binary, for any purpose, commercial or non-commercial, and by any 6 | means. 7 | 8 | In jurisdictions that recognize copyright laws, the author or authors 9 | of this software dedicate any and all copyright interest in the 10 | software to the public domain. We make this dedication for the benefit 11 | of the public at large and to the detriment of our heirs and 12 | successors. We intend this dedication to be an overt act of 13 | relinquishment in perpetuity of all present and future rights to this 14 | software under copyright law. 15 | 16 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 17 | EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 18 | MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. 19 | IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR 20 | OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, 21 | ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR 22 | OTHER DEALINGS IN THE SOFTWARE. 23 | 24 | For more information, please refer to 25 | -------------------------------------------------------------------------------- /bench/clj_cbor/bench.clj: -------------------------------------------------------------------------------- 1 | (ns clj-cbor.bench 2 | "Benchmarking utilities for format comparisons." 3 | (:require 4 | [blocks.core :as block] 5 | [blocks.store.file :refer [file-block-store]] 6 | [clj-async-profiler.core :as prof] 7 | [clj-cbor.core :as cbor] 8 | [clojure.data.fressian :as fressian] 9 | [clojure.edn :as edn] 10 | [clojure.java.io :as io] 11 | [clojure.set :as set] 12 | [clojure.string :as str] 13 | [clojure.test.check.generators :as gen] 14 | [cognitect.transit :as transit] 15 | [criterium.core :as crit] 16 | [multihash.core :as multihash] 17 | [taoensso.nippy :as nippy]) 18 | (:import 19 | (java.io 20 | ByteArrayInputStream 21 | ByteArrayOutputStream) 22 | java.nio.ByteBuffer)) 23 | 24 | 25 | ;; TODO: upgrade to blocks2 or rewrite this to not use blocks. 26 | 27 | 28 | (def stress-data 29 | "Stress-testing data, mostly borrowed from `ptaoussanis/nippy`." 30 | {;; :bytes (byte-array [(byte 1) (byte 2) (byte 3)]) 31 | :nil nil 32 | :true true 33 | :false false 34 | :char \ಬ 35 | :str-short "foo" 36 | :str-long (apply str (range 1000)) 37 | :kw :keyword 38 | :kw-ns ::keyword 39 | :sym 'foo 40 | :sym-ns 'foo/bar 41 | ;; :regex #"^(https?:)?//(www\?|\?)?" 42 | 43 | :lotsa-small-numbers (vec (range 200)) 44 | :lotsa-small-keywords (->> (java.util.Locale/getISOLanguages) 45 | (mapv keyword)) 46 | :lotsa-small-strings (->> (java.util.Locale/getISOCountries) 47 | (mapv #(.getDisplayCountry (java.util.Locale. "en" %)))) 48 | 49 | :sorted-set (sorted-set 1 2 3 4 5) 50 | :sorted-map (sorted-map :b 2 :a 1 :d 4 :c 3) 51 | 52 | :list (list 1 2 3 4 5 (list 6 7 8 (list 9 10))) 53 | :list-quoted '(1 2 3 4 5 (6 7 8 (9 10))) 54 | :list-empty (list) 55 | :vector [1 2 3 4 5 [6 7 8 [9 10]]] 56 | :vector-empty [] 57 | :map {:a 1 :b 2 :c 3 :d {:e 4 :f {:g 5 :h 6 :i 7}}} 58 | :map-empty {} 59 | :set #{1 2 3 4 5 #{6 7 8 #{9 10}}} 60 | :set-empty #{} 61 | :nested [#{{1 [:a :b] 2 [:c :d] 3 [:e :f]} [] #{:a :b}} 62 | #{{1 [:a :b] 2 [:c :d] 3 [:e :f]} [] #{:a :b}} 63 | [1 [1 2 [1 2 3 [1 2 3 4 [1 2 3 4 5]]]]]] 64 | 65 | :lazy-seq (repeatedly 1000 rand) 66 | :lazy-seq-empty (map identity '()) 67 | 68 | :short (short 42) 69 | :integer (int 3) 70 | :long (long 3) 71 | :bigint (bigint 31415926535897932384626433832795) 72 | 73 | :float (float 3.14) 74 | :double (double 3.14) 75 | :bigdec (bigdec 3.1415926535897932384626433832795) 76 | 77 | :ratio 22/7 78 | :uuid (java.util.UUID/randomUUID) 79 | :date (java.util.Date.)}) 80 | 81 | 82 | ;; ## Flame Graphs 83 | 84 | (defn massage-stack 85 | "Collapse a stack frame in a profiling run." 86 | [stack] 87 | (-> stack 88 | (str/replace #"^.+user\$eval\d+\$fn__\d+\.invoke;" "eval;") 89 | (str/replace #"clj_cbor\.codec\.CBORCodec\.write_value;(((clj\-cbor\.codec|clojure\.core|clojure\.core\.protocols)/[^;]+;)+clj_cbor\.codec\.CBORCodec\.write_value;)+" 90 | "clj_cbor.codec.CBORCodec.write_value ...;"))) 91 | 92 | 93 | (comment 94 | (def reddit-data 95 | (clojure.edn/read-string (slurp "bench/reddit.edn"))) 96 | 97 | ; For example: 98 | (prof/profile 99 | {:event :cpu 100 | :transform massage-stack} 101 | (dotimes [_ 5000] 102 | (cbor/encode reddit-data))) 103 | 104 | ; - Should also support `:alloc` profiling 105 | ; - Collapse stack frames, in particular recursive encode/decode 106 | ,,,) 107 | 108 | 109 | ;; ## Codec Definitions 110 | 111 | (defn fressian-encode 112 | [data] 113 | (let [^ByteBuffer buffer (fressian/write data) 114 | size (.remaining buffer) 115 | bytes (byte-array size)] 116 | (.get buffer bytes 0 size) 117 | bytes)) 118 | 119 | 120 | (defn fressian-decode 121 | [content] 122 | (fressian/read (ByteBuffer/wrap content))) 123 | 124 | 125 | (defn transit-encode 126 | [type data] 127 | (let [out (ByteArrayOutputStream.) 128 | writer (transit/writer out type)] 129 | (transit/write writer data) 130 | (.toByteArray out))) 131 | 132 | 133 | (defn transit-decode 134 | [type content] 135 | (let [in (ByteArrayInputStream. ^bytes content) 136 | reader (transit/reader in type)] 137 | (transit/read reader))) 138 | 139 | 140 | ;; TODO: pull versions directly from project.clj to keep them up to date 141 | (def codecs 142 | "Map of codec definitions for the benchmarking harness." 143 | {:reader 144 | {:dependency 'org.clojure/clojure 145 | :version "1.10.0" 146 | :encoder #(.getBytes (pr-str %) "UTF-8") 147 | :decoder #(read-string (String. ^bytes % "UTF-8"))} 148 | 149 | :cbor 150 | {:dependency 'mvxcvi/clj-cbor 151 | :version "0.7.0" 152 | :encoder cbor/encode 153 | :decoder cbor/decode} 154 | 155 | :nippy 156 | {:dependency 'com.taoensso/nippy 157 | :version "2.14.0" 158 | :encoder nippy/freeze 159 | :decoder nippy/thaw} 160 | 161 | :fressian 162 | {:dependency 'org.clojure/data.fressian 163 | :version "0.2.1" 164 | :encoder fressian-encode 165 | :decoder fressian-decode} 166 | 167 | :transit+json 168 | {:dependency 'com.cognitect/transit-clj 169 | :version "0.8.313" 170 | :encoder (partial transit-encode :json) 171 | :decoder (partial transit-decode :json)} 172 | 173 | :transit+msgpack 174 | {:dependency 'com.cognitect/transit-clj 175 | :version "0.8.313" 176 | :encoder (partial transit-encode :msgpack) 177 | :decoder (partial transit-decode :msgpack)}}) 178 | 179 | 180 | ;; ## Benchmarking Functions 181 | 182 | (defn bench-codec 183 | "Benchmark a codec defined in `codecs` against the given `data` value." 184 | [codec-type data] 185 | (let [{:keys [version encoder decoder]} (get codecs codec-type)] 186 | (try 187 | (let [encoded (encoder data) 188 | _decoded (decoder encoded) 189 | encode-stats (crit/quick-benchmark (encoder data) {}) 190 | encode-mean (-> encode-stats :mean first (* 1000)) 191 | decode-stats (crit/quick-benchmark (decoder encoded) {}) 192 | decode-mean (-> decode-stats :mean first (* 1000))] 193 | (printf " + %-15s %7.3f µs %7.3f µs %6s bytes\n" 194 | (name codec-type) 195 | (* 1000 encode-mean) 196 | (* 1000 decode-mean) 197 | (count encoded)) 198 | (flush) 199 | {:codec codec-type 200 | :version version 201 | :size (count encoded) 202 | :encode encode-mean 203 | :decode decode-mean}) 204 | (catch Exception ex 205 | (printf "Benchmark data doesn't round-trip: %s\n" 206 | (ex-message ex)) 207 | (flush) 208 | {:error (ex-message ex) 209 | :codec codec-type 210 | :version version})))) 211 | 212 | 213 | (defn bench-adhoc 214 | "Benchmark a set of codecs against some ad-hoc data structure." 215 | ([data] 216 | (bench-adhoc (keys codecs) data)) 217 | ([targets data] 218 | (printf " %-17s %10s %10s %12s\n" "Codec" "Encode" "Decode" "Size") 219 | (flush) 220 | (doseq [codec-type targets] 221 | (bench-codec codec-type data)))) 222 | 223 | 224 | ;; ## Size Histograms 225 | 226 | (def ^:private size-thresholds 227 | (vec (take 20 (iterate #(* 2 %) 64)))) 228 | 229 | 230 | (defn- update-size-histogram 231 | [histogram size] 232 | (let [index (or (->> size-thresholds 233 | (map vector (range)) 234 | (drop-while #(> size (second %))) 235 | (ffirst)) 236 | (count size-thresholds))] 237 | (-> histogram 238 | (update-in [:buckets index] (fnil inc 0)) 239 | (update :count inc) 240 | (update :size + size)))) 241 | 242 | 243 | (defn- into-size-histogram 244 | [sizes] 245 | (reduce update-size-histogram 246 | {:count 0 247 | :size 0} 248 | sizes)) 249 | 250 | 251 | (defn- print-size-histogram 252 | "Print out a human-consumable version of the histogram. Returns the histogram 253 | value." 254 | [histogram] 255 | (printf "%d objects in %d bytes\n" (:count histogram) (:size histogram)) 256 | (doseq [[index bucket-count] (->> histogram :buckets (sort-by key))] 257 | (printf "%s bytes: %4d\n" 258 | (if-let [threshold (get size-thresholds index)] 259 | (format "< %4d" threshold) 260 | "> ...") 261 | bucket-count)) 262 | (flush) 263 | histogram) 264 | 265 | 266 | (defn- report-sample-store 267 | [store] 268 | (println "Scanning stored sample data...") 269 | (flush) 270 | (->> (block/list store) 271 | (map :size) 272 | (into-size-histogram) 273 | (print-size-histogram))) 274 | 275 | 276 | ;; ## Data Generation 277 | 278 | (defn generate-sample 279 | [size] 280 | ;; TODO: review supported vs generated types 281 | ;; byte-arrays 282 | ;; dates (read as dates) 283 | ;; bignums 284 | ;; bigdecs 285 | ;; sets (probably already generated) 286 | ;; ratio 287 | ;; URI 288 | ;; regex (not as keys in maps or sets) 289 | (let [any-data (gen/recursive-gen gen/container-type 290 | gen/simple-type-printable)] 291 | (gen/generate any-data size))) 292 | 293 | 294 | (defn- generate-sample-data 295 | [store n min-size max-size] 296 | (printf "Generating %d data samples...\n" n) 297 | (flush) 298 | (->> (repeatedly #(generate-sample (+ min-size (rand-int (- max-size min-size))))) 299 | (take n) 300 | (map #(:size (block/store! store (cbor/encode %)))) 301 | (into-size-histogram) 302 | (print-size-histogram))) 303 | 304 | 305 | ;; ## TSV Output 306 | 307 | (def ^:private tsv-columns 308 | [:block-id :block-size :codec :version :size :encode :decode]) 309 | 310 | 311 | (defn- tsv-header 312 | [] 313 | (str/join \tab (map name tsv-columns))) 314 | 315 | 316 | (defn- tsv-report-line 317 | [block-id block-size result] 318 | (->> 319 | (if (contains? result :error) 320 | ["!" "!" "!"] 321 | [(:size result) 322 | (format "%.3f" (* 1000 (:encode result))) 323 | (format "%.3f" (* 1000 (:decode result)))]) 324 | (list* (multihash/hex block-id) 325 | block-size 326 | (subs (str (:codec result)) 1) 327 | (:version result)) 328 | (str/join \tab))) 329 | 330 | 331 | (defn- benched-blocks 332 | "Reads in the data TSV file and constructs a set of the ids of all the blocks 333 | which have been benchmarked already. Returns a map from codec/version pairs 334 | to sets of benchmarked block ids." 335 | [data-file] 336 | (->> 337 | (slurp data-file) 338 | (str/split-lines) 339 | (drop 1) 340 | (map #(str/split % #"\t")) 341 | (reduce 342 | (fn [benched [block-id _ codec version]] 343 | (update benched 344 | [(keyword codec) version] 345 | (fnil conj #{}) 346 | (multihash/decode block-id))) 347 | {}))) 348 | 349 | 350 | (defn- parse-data-file 351 | "Reads the written TSV and returns a sequence of maps for individual 352 | codec/block tests." 353 | [file] 354 | (let [raw-text (slurp file) 355 | lines (map #(str/split % #"\t") (str/split raw-text #"\n")) 356 | header (mapv keyword (first lines)) 357 | rows (next lines)] 358 | (map #(zipmap header %) rows))) 359 | 360 | 361 | (defn- print-spreadsheet-rows 362 | "Print information suitable for uploading to the Google benchmark 363 | spreadsheet. The `info` should be a map of block ids to collections of result 364 | maps as output by `parse-data-file`." 365 | [info targets] 366 | (->> 367 | targets 368 | (mapcat (comp #(vector (str % "/size") (str % "/encode") (str % "/decode")) name)) 369 | (list* "block/id" "block/size") 370 | (str/join "\t") 371 | (println)) 372 | (doseq [[block-id results] info] 373 | (let [codec-results (into {} (map (juxt (comp keyword :codec) identity)) results)] 374 | (when (every? codec-results targets) 375 | (->> 376 | targets 377 | (mapcat (comp (juxt :size :encode :decode) codec-results)) 378 | (list* block-id (:block-size (first results))) 379 | (str/join "\t") 380 | (println)))))) 381 | 382 | 383 | ;; ## Entry Point 384 | 385 | (defn -main 386 | [& args] 387 | (let [store (file-block-store "bench/samples") 388 | data-file (io/file "bench/data.tsv")] 389 | (when-not (.exists data-file) 390 | (io/make-parents data-file) 391 | (spit data-file (str (tsv-header) "\n"))) 392 | (case (first args) 393 | "stats" 394 | (report-sample-store store) 395 | 396 | "gen" ; n min-size max-size 397 | (let [n (Integer/parseInt (nth args 1 "100")) 398 | min-size (Integer/parseInt (nth args 2 "0")) 399 | max-size (Integer/parseInt (nth args 3 "200"))] 400 | (generate-sample-data store n min-size max-size)) 401 | 402 | "run" ; codec... 403 | (let [targets (if-let [names (next args)] 404 | (map keyword names) 405 | (keys codecs)) 406 | benched (->> targets 407 | (map #(vector % (get-in codecs [% :version]))) 408 | (map (juxt first (benched-blocks data-file))) 409 | (into {})) 410 | already-benched? (or (apply set/intersection (vals benched)) #{}) 411 | blocks (->> (block/list store) 412 | (remove (comp already-benched? :id)) 413 | (shuffle) 414 | (vec))] 415 | (printf "Benchmarking codecs %s against %d blocks\n" 416 | (str/join ", " targets) (count blocks)) 417 | (doseq [codec-type targets] 418 | (printf "Warming up %s...\n" (name codec-type)) 419 | (flush) 420 | (let [codec (get codecs codec-type) 421 | encoder (:encoder codec) 422 | decoder (:decoder codec) 423 | encoded (encoder stress-data)] 424 | (crit/warmup-for-jit 1000000000 (fn [] (encoder stress-data))) 425 | (crit/warmup-for-jit 1000000000 (fn [] (decoder encoded))))) 426 | (doseq [[i block] (map-indexed vector blocks)] 427 | (printf "\nTesting block %s (%d/%d %.1f%%)\n" 428 | (multihash/base58 (:id block)) 429 | (inc i) (count blocks) (* 100.0 (/ i (count blocks)))) 430 | (flush) 431 | (let [test-data (with-open [input (block/open (block/get store (:id block)))] 432 | (cbor/decode input))] 433 | (printf " Loaded %d bytes of data\n" (:size block)) 434 | (printf " %-17s %10s %10s %12s\n" 435 | "Codec" "Encode" "Decode" "Size") 436 | (flush) 437 | (doseq [codec-type targets] 438 | (if (contains? (get benched codec-type) (:id block)) 439 | (printf " - %-15s\n" (name codec-type)) 440 | (let [result (bench-codec codec-type test-data) 441 | out-line (tsv-report-line (:id block) (:size block) result)] 442 | (spit data-file (str out-line "\n") :append true))))))) 443 | 444 | "sheet" ; codec... 445 | (let [targets (if-let [names (next args)] 446 | (map keyword names) 447 | (keys codecs)) 448 | results (group-by :block-id (parse-data-file data-file))] 449 | (print-spreadsheet-rows results targets)) 450 | 451 | ;; No args 452 | nil 453 | (binding [*out* *err*] 454 | (println "Usage: lein bench stats") 455 | (println " lein bench gen [n] [min-size] [max-size]") 456 | (println " lein bench run [codec ...]") 457 | (println " lein bench sheet [codec ...]") 458 | (System/exit 1)) 459 | 460 | ;; Unknown command. 461 | (binding [*out* *err*] 462 | (println "Unknown command:" (first args)) 463 | (System/exit 1))))) 464 | -------------------------------------------------------------------------------- /dev/user.clj: -------------------------------------------------------------------------------- 1 | (ns user 2 | "Custom repl customization for local development." 3 | (:require 4 | [clj-cbor.core :as cbor] 5 | [clj-cbor.data.core :as data] 6 | [clj-cbor.test-utils :refer :all] 7 | [clojure.java.io :as io] 8 | [clojure.repl :refer :all] 9 | [clojure.stacktrace :refer [print-cause-trace]] 10 | [clojure.string :as str] 11 | [clojure.tools.namespace.repl :refer [refresh]])) 12 | 13 | 14 | (defn round-trip-equivalent? 15 | "True if the given value round-trips through CBOR to an 'equivalent' value. 16 | Returns the decoded value on success, or throws an exception on failure." 17 | ([value] 18 | (round-trip-equivalent? cbor/default-codec value)) 19 | ([codec value] 20 | (let [encoded (cbor/encode codec value) 21 | decoded (cbor/decode encoded)] 22 | (if (equivalent? value decoded) 23 | decoded 24 | (throw (ex-info "Data did not round-trip to an equivalent value" 25 | {:value value 26 | :encoded (bin->hex encoded) 27 | :decoded decoded})))))) 28 | -------------------------------------------------------------------------------- /project.clj: -------------------------------------------------------------------------------- 1 | (defproject mvxcvi/clj-cbor "1.1.1" 2 | :description "Concise Binary Object Representation (RFC 7049)" 3 | :url "https://github.com/greglook/clj-cbor" 4 | :license {:name "Public Domain" 5 | :url "http://unlicense.org/"} 6 | 7 | :deploy-branches ["main"] 8 | :pedantic? :abort 9 | 10 | :aliases 11 | {"coverage" ["with-profile" "+coverage" "cloverage"] 12 | "bench" ["with-profile" "+bench" "run" "-m" "clj-cbor.bench"] 13 | "bench-repl" ["with-profile" "+bench" "repl"]} 14 | 15 | :plugins 16 | [[lein-cloverage "1.2.2"]] 17 | 18 | :dependencies 19 | [[org.clojure/clojure "1.11.1"]] 20 | 21 | :test-selectors 22 | {:default (complement :generative) 23 | :generative :generative} 24 | 25 | :hiera 26 | {:cluster-depth 2 27 | :show-external true 28 | :ignore-ns #{clojure user}} 29 | 30 | :profiles 31 | {:dev 32 | {:dependencies 33 | [[org.clojure/test.check "1.1.1"] 34 | [org.clojure/tools.reader "1.3.6"] 35 | [org.clojure/tools.namespace "1.3.0"]]} 36 | 37 | :repl 38 | {:source-paths ["dev"]} 39 | 40 | :bench 41 | {:source-paths ["bench"] 42 | :dependencies 43 | [[com.clojure-goes-fast/clj-async-profiler "0.5.1"] 44 | [com.cognitect/transit-clj "1.0.329"] 45 | [com.taoensso/nippy "3.1.1"] 46 | [criterium "0.4.6"] 47 | [mvxcvi/blocks "1.1.0"] 48 | [org.clojure/data.fressian "1.0.0"]]}}) 49 | -------------------------------------------------------------------------------- /src/clj_cbor/codec.clj: -------------------------------------------------------------------------------- 1 | (ns clj-cbor.codec 2 | "Main CBOR codec implementation." 3 | (:require 4 | [clj-cbor.data.core :as data] 5 | [clj-cbor.data.float16 :as float16] 6 | [clj-cbor.error :as error] 7 | [clj-cbor.header :as header]) 8 | (:import 9 | clj_cbor.data.simple.SimpleValue 10 | clj_cbor.data.tagged.TaggedValue 11 | (java.io 12 | ByteArrayOutputStream 13 | DataInputStream 14 | DataOutputStream))) 15 | 16 | 17 | ;; ## Codec Protocols 18 | 19 | (defprotocol Encoder 20 | "An _encoder_ is a process that generates the representation format of a CBOR 21 | data item from application information." 22 | 23 | (write-value 24 | [encoder out x] 25 | "Writes the given value `x` to the `DataOutputStream` `out`.")) 26 | 27 | 28 | (defprotocol Decoder 29 | "A _decoder_ is a process that reads a CBOR data item and makes it available 30 | to an application. 31 | 32 | Formally speaking, a decoder contains a parser to break up the input using 33 | the syntax rules of CBOR, as well as a semantic processor to prepare the data 34 | in a form suitable to the application." 35 | 36 | (read-value* 37 | [decoder input header] 38 | "Reads a single value from the `DataInputStream`, given the just-read 39 | initial byte.")) 40 | 41 | 42 | (defn read-value 43 | "Reads a single value from the `DataInputStream`." 44 | [decoder ^DataInputStream input] 45 | (read-value* decoder input (.readUnsignedByte input))) 46 | 47 | 48 | ;; ## Byte Utilities 49 | 50 | (defn- read-bytes 51 | "Read `length` bytes from the input stream. Returns a byte array." 52 | ^bytes 53 | [^DataInputStream input length] 54 | (let [buffer (byte-array length)] 55 | (.readFully input buffer) 56 | buffer)) 57 | 58 | 59 | (defn- write-bytes 60 | "Writes the given value `x` to a byte array." 61 | [encoder x] 62 | (let [out (ByteArrayOutputStream.)] 63 | (with-open [data (DataOutputStream. out)] 64 | (write-value encoder data x)) 65 | (.toByteArray out))) 66 | 67 | 68 | (defn- compare-bytes 69 | "Returns a negative number, zero, or a positive number when `x` is 'less 70 | than', 'equal to', or 'greater than' `y`. 71 | 72 | Sorting is performed on the bytes of the representation of the key data 73 | items without paying attention to the 3/5 bit splitting for major types. 74 | The sorting rules are: 75 | 76 | - If two keys have different lengths, the shorter one sorts earlier; 77 | - If two keys have the same length, the one with the lower value in 78 | (byte-wise) lexical order sorts earlier." 79 | [^bytes x ^bytes y] 80 | (let [xlen (alength x) 81 | ylen (alength y) 82 | get-byte (fn get-byte 83 | [^bytes bs i] 84 | (let [b (aget bs i)] 85 | (if (neg? b) 86 | (+ b 256) 87 | b)))] 88 | (if (= xlen ylen) 89 | ;; Same length - compare content. 90 | (loop [i 0] 91 | (if (< i xlen) 92 | (let [xi (get-byte x i) 93 | yi (get-byte y i)] 94 | (if (= xi yi) 95 | (recur (inc i)) 96 | (compare xi yi))) 97 | 0)) 98 | ;; Compare lengths. 99 | (compare xlen ylen)))) 100 | 101 | 102 | ;; ## Reader Functions 103 | 104 | ;; These functions provide some data-reading capabilities which later 105 | ;; major-type readers are built on. In particular, these help deal with the 106 | ;; four data types which can be _streamed_ with indefinite lengths. 107 | 108 | (defn- read-chunks 109 | "Reads chunks from the input in a streaming fashion, combining them with the 110 | given reducing function. All chunks must have the given major type and 111 | definite length." 112 | [decoder ^DataInputStream input stream-type reducer] 113 | (loop [state (reducer)] 114 | (let [header (.readUnsignedByte input)] 115 | (if (== header 0xFF) 116 | ;; Break code, finish up result. 117 | (reducer state) 118 | ;; Read next value. 119 | (let [[chunk-type info] (header/decode header)] 120 | (cond 121 | ;; Illegal element type. 122 | (not= stream-type chunk-type) 123 | (error/*handler* 124 | ::illegal-chunk-type 125 | (str stream-type " stream may not contain chunks of type " 126 | chunk-type) 127 | {:stream-type stream-type 128 | :chunk-type chunk-type}) 129 | 130 | ;; Illegal indefinite-length chunk. 131 | (= info 31) 132 | (error/*handler* 133 | ::illegal-stream 134 | (str stream-type " stream chunks must have a definite length") 135 | {:stream-type stream-type}) 136 | 137 | ;; Reduce state with next value. 138 | :else 139 | (recur (reducer state (read-value* decoder input header))))))))) 140 | 141 | 142 | (defn- read-value-stream 143 | "Reads values from the input in a streaming fashion, combining them with the 144 | given reducing function." 145 | [decoder ^DataInputStream input reducer] 146 | (loop [state (reducer)] 147 | (let [header (.readUnsignedByte input)] 148 | (if (== header 0xFF) 149 | ;; Break code, finish up result. 150 | (reducer state) 151 | ;; Read next value. 152 | (recur (reducer state (read-value* decoder input header))))))) 153 | 154 | 155 | ;; ## Major Types 156 | 157 | ;; The header byte of each CBOR encoded data value uses the high-order three 158 | ;; bits to encode the _major type_ of the value. The remaining five bits 159 | ;; contain an additional information code, which often gives the size of the 160 | ;; resulting value. 161 | 162 | 163 | ;; ### Integers 164 | 165 | ;; Integers are represented by major types 0 and 1. Positive integers use type 166 | ;; 0, and the 5-bit additional information is either the integer itself (for 167 | ;; additional information values 0 through 23) or the length of additional 168 | ;; data. 169 | ;; 170 | ;; The encoding for negative integers follows the rules for unsigned integers, 171 | ;; except that the type is 1 and the value is negative one minus the encoded 172 | ;; unsigned integer. 173 | ;; 174 | ;; Additional information 24 means the value is represented in an additional 175 | ;; `uint8`, 25 means a `uint16`, 26 means a `uint32`, and 27 means a `uint64`. 176 | 177 | (def ^:private min-integer 178 | "The minimum integer value representable as a native type." 179 | (-> BigInteger/ONE 180 | (.shiftLeft 64) 181 | (.negate))) 182 | 183 | 184 | (def ^:private max-integer 185 | "The maximum integer value representable as a native type." 186 | (-> BigInteger/ONE 187 | (.shiftLeft 64) 188 | (.subtract BigInteger/ONE))) 189 | 190 | 191 | (defn- representable-integer? 192 | "True if the value is small enough to represent using the normal integer 193 | major-type. 194 | 195 | This is made slightly trickier at the high end of the representable range by 196 | the JVM's lack of unsigned types, so some values that are represented in CBOR 197 | as 8-byte integers must be represented by `BigInt` in memory." 198 | [value] 199 | (and (integer? value) (<= min-integer value max-integer))) 200 | 201 | 202 | (defn- write-integer 203 | "Writes an integer value." 204 | [^DataOutputStream out n] 205 | (if (neg? n) 206 | (header/write out :negative-integer (-' -1 n)) 207 | (header/write out :unsigned-integer n))) 208 | 209 | 210 | ;; ### Byte Strings 211 | 212 | ;; Byte strings are represented by major type 2. The string's length in bytes 213 | ;; is represented following the rules for positive integers (major type 0). 214 | ;; 215 | ;; If the additional info indicates an indefinite length, the header must be 216 | ;; followed by a sequence of definite-length byte strings, terminated with a 217 | ;; break stop code. The chunks will be concatenated together into the final 218 | ;; byte string. 219 | 220 | (defn- write-byte-string 221 | "Writes an array of bytes to the output string as a CBOR byte string." 222 | [^DataOutputStream out bs] 223 | (let [hlen (header/write out :byte-string (count bs))] 224 | (.write out ^bytes bs) 225 | (+ hlen (count bs)))) 226 | 227 | 228 | (defn- concat-bytes 229 | "Reducing function which builds a contiguous byte-array from a sequence of 230 | byte-array chunks." 231 | ([] 232 | (ByteArrayOutputStream.)) 233 | ([buffer] 234 | (.toByteArray ^ByteArrayOutputStream buffer)) 235 | ([buffer v] 236 | (.write ^ByteArrayOutputStream buffer ^bytes v) 237 | buffer)) 238 | 239 | 240 | ;; ### Text Strings 241 | 242 | ;; Major type 3 encodes a text string, specifically a string of Unicode 243 | ;; characters that is encoded as UTF-8 [RFC3629]. 244 | ;; 245 | ;; The format of this type is identical to that of byte strings (major type 2), 246 | ;; that is, as with major type 2, the length gives the number of bytes. This 247 | ;; type is provided for systems that need to interpret or display 248 | ;; human-readable text, and allows the differentiation between unstructured 249 | ;; bytes and text that has a specified repertoire and encoding. 250 | ;; 251 | ;; If the additional info indicates an indefinite length, the header must be 252 | ;; followed by a sequence of definite-length text strings, terminated with a 253 | ;; break stop code. The chunks will be concatenated together into the final 254 | ;; text string. 255 | 256 | (defn- write-text-string 257 | "Write a string of characters to the output as a CBOR text string." 258 | [^DataOutputStream out ts] 259 | (let [text (.getBytes ^String ts "UTF-8") 260 | hlen (header/write out :text-string (count text))] 261 | (.write out text) 262 | (+ hlen (count text)))) 263 | 264 | 265 | (defn- read-text 266 | "Reads a fixed-length text string from the input." 267 | [^DataInputStream input n] 268 | (String. (read-bytes input n) "UTF-8")) 269 | 270 | 271 | (defn- concat-text 272 | "Reducing function which builds a contiguous string from a sequence of string 273 | chunks." 274 | ([] 275 | (StringBuilder.)) 276 | ([buffer] 277 | (str buffer)) 278 | ([buffer v] 279 | (.append ^StringBuilder buffer ^String v) 280 | buffer)) 281 | 282 | 283 | ;; ### Data Arrays 284 | 285 | ;; Arrays of data items are encoded using major type 4. Arrays are used to 286 | ;; represent both lists and vectors in Clojure. Items in an array do not need 287 | ;; to all be of the same type. 288 | ;; 289 | ;; The array's length follows the rules for byte strings (major type 2), except 290 | ;; that the length denotes the number of data items, not the length in bytes 291 | ;; that the array takes up. 292 | ;; 293 | ;; If the additional info indicates an indefinite length, the header must be 294 | ;; followed by a sequence of element data values, terminated with a break stop 295 | ;; code. 296 | 297 | (defn- write-array 298 | "Writes an array of data items to the output. The array will be encoded with 299 | a definite length, so `xs` will be fully realized." 300 | [encoder ^DataOutputStream out xs] 301 | (let [hlen (header/write out :data-array (count xs))] 302 | (reduce 303 | (fn write-element 304 | [len x] 305 | (+ len (write-value encoder out x))) 306 | hlen xs))) 307 | 308 | 309 | (defn- build-array 310 | "Reducing function which builds a vector to represent a data array." 311 | ([] []) 312 | ([xs] xs) 313 | ([xs v] (conj xs v))) 314 | 315 | 316 | (defn- read-array 317 | "Read a fixed length array from the input as a vector of elements." 318 | [decoder input ^long n] 319 | {:pre [(pos? n)]} 320 | (let [objs (object-array n)] 321 | (loop [idx 0] 322 | (if (< idx n) 323 | (do (aset objs idx (read-value decoder input)) 324 | (recur (unchecked-inc idx))) 325 | (vec objs))))) 326 | 327 | 328 | ;; ### Data Maps 329 | 330 | ;; Maps of key-value entries are encoded using major type 5. A map is comprised 331 | ;; of pairs of data items, each pair consisting of a key that is immediately 332 | ;; followed by a value. 333 | ;; 334 | ;; The map's length follows the rules for byte strings (major type 2), except 335 | ;; that the length denotes the number of pairs, not the length in bytes that 336 | ;; the map takes up. 337 | ;; 338 | ;; If the additional info indicates an indefinite length, the header must be 339 | ;; followed by a sequence of data value pairs, terminated with a break stop 340 | ;; code. An odd number of values before the break means the map is not 341 | ;; well-formed. 342 | ;; 343 | ;; A map that has duplicate keys may be well-formed, but it is not valid, and 344 | ;; thus it causes indeterminate decoding. 345 | 346 | (defn- write-map-seq 347 | "Writes a sequence of key/value pairs to the output in the order given. The 348 | map will be encoded with a definite length, so `xm` will be fully realized." 349 | [encoder ^DataOutputStream out xm] 350 | (let [hlen (header/write out :data-map (count xm))] 351 | (reduce 352 | (fn encode-entry 353 | [^long sum [k v]] 354 | (let [^long klen (write-value encoder out k) 355 | ^long vlen (write-value encoder out v)] 356 | (+ sum klen vlen))) 357 | hlen 358 | xm))) 359 | 360 | 361 | (defn- write-map-canonical 362 | "Writes a sequence of key/value pairs to the output in canonical order. This 363 | requires serializing the keys in order to compare bytes." 364 | [encoder ^DataOutputStream out xm] 365 | (let [hlen (header/write out :data-map (count xm))] 366 | (->> 367 | xm 368 | (map (fn encode-key 369 | [[k v]] 370 | [(write-bytes encoder k) v])) 371 | (sort-by first compare-bytes) 372 | (reduce 373 | (fn encode-entry 374 | [^long sum [^bytes k v]] 375 | (.write out k) 376 | (let [klen (alength k) 377 | ^long vlen (write-value encoder out v)] 378 | (+ sum klen vlen))) 379 | hlen)))) 380 | 381 | 382 | (defn- write-map 383 | "Writes a map of key/value pairs to the output. The map will be encoded with 384 | a definite length, so `xm` will be fully realized." 385 | [encoder ^DataOutputStream out xm] 386 | (if (:canonical encoder) 387 | (write-map-canonical encoder out xm) 388 | (write-map-seq encoder out xm))) 389 | 390 | 391 | (defn- build-map 392 | "Reducing function which builds a map from a sequence of alternating key and 393 | value elements." 394 | ([] 395 | [{}]) 396 | ([[m k :as state]] 397 | (if (= 1 (count state)) 398 | m 399 | (error/*handler* 400 | ::missing-map-value 401 | (str "Encoded map did not contain a value for key: " 402 | (pr-str k)) 403 | {:map m, :key k}))) 404 | ([[m k :as state] e] 405 | (if (= 1 (count state)) 406 | (if (contains? m e) 407 | ;; Duplicate key error. 408 | (error/*handler* 409 | ::duplicate-map-key 410 | (str "Encoded map contains duplicate key: " 411 | (pr-str e)) 412 | {:map m, :key e}) 413 | ;; Save key and wait for value. 414 | [m e]) 415 | ;; Add completed entry to map. 416 | [(assoc m k e)]))) 417 | 418 | 419 | (defn- read-map 420 | "Read a fixed length map from the input as a sequence of entries." 421 | [decoder input ^long n] 422 | {:pre [(pos? n)]} 423 | (let [m (java.util.HashMap.)] 424 | (loop [idx 0] 425 | (if (< idx n) 426 | (let [k (read-value decoder input)] 427 | (if (.containsKey m k) 428 | (error/*handler* 429 | ::duplicate-map-key 430 | (str "Encoded map contains duplicate key: " (pr-str k)) 431 | {:map (into {} m) 432 | :key k}) 433 | (do (.put m k (read-value decoder input)) 434 | (recur (unchecked-inc idx))))) 435 | (into {} m))))) 436 | 437 | 438 | ;; ### Sets 439 | 440 | ;; Sets are represented as arrays of elements tagged with code 258. 441 | ;; 442 | ;; This support is implemented here rather than as a normal read/write handler 443 | ;; pair for two reasons. First, unlike the normal write-handlers which operate 444 | ;; on _concrete types_, there are many types which represent the 'set' semantic 445 | ;; in Clojure, and we don't want to maintain a brittle list of such types. That 446 | ;; approach would also prevent easy extension to new set types outside the core 447 | ;; libray. Instead, we use the `set?` predicate to trigger this handler. 448 | ;; 449 | ;; Second, when the codec is in canonical mode, we want to sort the entries in 450 | ;; the set before writing them out. A write handler wouldn't have a way to know 451 | ;; whether the codec had this behavior enabled, requiring coordination between 452 | ;; the codec setting and the selection of a canonical writer vs a regular one. 453 | 454 | (defn- write-set-seq 455 | "Writes a sequence of set entries to the output in the order given. The set 456 | will be encoded with a definite length, so `xm` will be fully realized." 457 | [encoder ^DataOutputStream out tag xs] 458 | (->> 459 | (vec xs) 460 | (data/tagged-value tag) 461 | (write-value encoder out))) 462 | 463 | 464 | (defn- write-set-canonical 465 | "Writes a set of entries to the output in canonical order. This requires 466 | serializing the entries in order to compare bytes." 467 | [encoder ^DataOutputStream out tag xs] 468 | (let [tag-hlen (header/write out :tagged-value tag) 469 | array-hlen (header/write out :data-array (count xs))] 470 | (->> 471 | xs 472 | (map (partial write-bytes encoder)) 473 | (sort compare-bytes) 474 | (reduce 475 | (fn encode-entry 476 | [^long sum ^bytes v] 477 | (.write out v) 478 | (+ sum (alength v))) 479 | (+ tag-hlen array-hlen))))) 480 | 481 | 482 | (defn- write-set 483 | "Writes a set of values to the output as a tagged array." 484 | [encoder ^DataOutputStream out tag xs] 485 | (if (:canonical encoder) 486 | (write-set-canonical encoder out tag xs) 487 | (write-set-seq encoder out tag xs))) 488 | 489 | 490 | (defn- read-set 491 | "Parse a set from the value contained in the tagged representation." 492 | [decoder value] 493 | (if (sequential? value) 494 | (let [result (set value)] 495 | (if (and (:strict decoder) (< (count result) (count value))) 496 | (error/*handler* 497 | ::duplicate-set-entry 498 | "Encoded set contains duplicate entries" 499 | {:value value}) 500 | result)) 501 | (error/*handler* 502 | ::tag-handling-error 503 | (str "Sets must be tagged arrays, got: " (class value)) 504 | {:value value}))) 505 | 506 | 507 | ;; ### Tagged Values 508 | 509 | ;; Major type 6 is used for optional semantic tagging of other CBOR values. 510 | 511 | (defn- write-tagged 512 | "Writes out a tagged value." 513 | ([encoder ^DataOutputStream out ^TaggedValue tv] 514 | (write-tagged encoder out (.tag tv) (.value tv))) 515 | ([encoder ^DataOutputStream out tag value] 516 | (let [hlen (header/write out :tagged-value tag) 517 | vlen (write-value encoder out value)] 518 | (+ hlen ^long vlen)))) 519 | 520 | 521 | (defn- read-tagged 522 | "Read a tagged value from the input stream." 523 | [decoder ^DataInputStream input info] 524 | (let [tag (header/read-code input info) 525 | value (read-value decoder input) 526 | read-handlers (:read-handlers decoder)] 527 | (if (= tag data/set-tag) 528 | (read-set decoder value) 529 | (try 530 | (if-let [handler (read-handlers tag)] 531 | ;; TODO: better error reporting 532 | (handler value) 533 | (if (:strict decoder) 534 | (error/*handler* 535 | ::unknown-tag 536 | (str "Unknown tag code " tag) 537 | {:tag tag, :value value}) 538 | (data/tagged-value tag value))) 539 | (catch Exception ex 540 | (error/*handler* 541 | ::tag-handling-error 542 | (.getMessage ex) 543 | (assoc (ex-data ex) ::error ex))))))) 544 | 545 | 546 | ;; ### Simple Values 547 | 548 | ;; Major type 7 is for two types of data: floating-point numbers and "simple 549 | ;; values" that do not need any content, as well as the "break" stop code. Each 550 | ;; value of the 5-bit additional information in the initial byte has its own 551 | ;; separate meaning. 552 | ;; 553 | ;; Like the major types for integers, items of this major type do not carry 554 | ;; content data; all the information is in the initial bytes. 555 | 556 | (defn- write-boolean 557 | "Writes a boolean simple value to the output." 558 | [^DataOutputStream out x] 559 | (.writeByte out (if x 0xF5 0xF4)) 560 | 1) 561 | 562 | 563 | (defn- write-null 564 | "Writes a 'null' simple value to the output." 565 | [^DataOutputStream out] 566 | (.writeByte out 0xF6) 567 | 1) 568 | 569 | 570 | (defn- write-undefined 571 | "Writes an 'undefined' simple value to the output." 572 | [^DataOutputStream out] 573 | (.writeByte out 0xF7) 574 | 1) 575 | 576 | 577 | (defn- write-float 578 | "Writes a floating-point value to the output. Special values zero, NaN, and 579 | +/- Infinity are represented as 16-bit numbers, otherwise the encoding is 580 | determined by class." 581 | [^DataOutputStream out n] 582 | (cond 583 | (zero? (double n)) 584 | (do (header/write-leader out :simple-value 25) 585 | (.writeShort out float16/zero) 586 | 3) 587 | 588 | (Double/isNaN n) 589 | (do (header/write-leader out :simple-value 25) 590 | (.writeShort out float16/not-a-number) 591 | 3) 592 | 593 | (Double/isInfinite n) 594 | (do (header/write-leader out :simple-value 25) 595 | (.writeShort out (if (pos? (double n)) 596 | float16/positive-infinity 597 | float16/negative-infinity)) 598 | 3) 599 | 600 | (instance? Float n) 601 | (do (header/write-leader out :simple-value 26) 602 | (.writeFloat out (float n)) 603 | 5) 604 | 605 | :else 606 | (do (header/write-leader out :simple-value 27) 607 | (.writeDouble out (double n)) 608 | 9))) 609 | 610 | 611 | (defn- write-simple 612 | "Writes a generic simple value for the given code and returns the number of 613 | bytes written. Does not handle floating-point or reserved values." 614 | [^DataOutputStream out ^SimpleValue x] 615 | (let [n (.n x)] 616 | (cond 617 | (<= 0 n 23) 618 | (do (header/write-leader out :simple-value n) 619 | 1) 620 | 621 | (<= 32 n 255) 622 | (do (header/write-leader out :simple-value 24) 623 | (.writeByte out n) 624 | 2) 625 | 626 | :else 627 | (error/*handler* 628 | ::illegal-simple-type 629 | (str "Illegal or reserved simple value: " n) 630 | {:code n})))) 631 | 632 | 633 | (defn- unknown-simple 634 | "Helper function to construct an unknown simple value from the given code." 635 | [decoder value] 636 | (if (:strict decoder) 637 | (error/*handler* 638 | ::unknown-simple-value 639 | (str "Unknown simple value " value) 640 | {:code value}) 641 | (data/simple-value value))) 642 | 643 | 644 | ;; ## Codec Implementation 645 | 646 | ;; ### Encoding Functions 647 | 648 | (defn- write-native 649 | "Writes the value `x` as one of the native CBOR values and return the number 650 | of bytes written. Returns nil if `x` is not a native type." 651 | [codec out x] 652 | (cond 653 | ;; Special and simple values 654 | (nil? x) (write-null out) 655 | (boolean? x) (write-boolean out x) 656 | (= data/undefined x) (write-undefined out) 657 | (data/simple-value? x) (write-simple out x) 658 | 659 | ;; Numbers 660 | (representable-integer? x) (write-integer out x) 661 | (float? x) (write-float out x) 662 | 663 | ;; Byte and text strings 664 | (char? x) (write-text-string out (str x)) 665 | (string? x) (write-text-string out x) 666 | (bytes? x) (write-byte-string out x) 667 | 668 | ;; Tag extensions 669 | (data/tagged-value? x) (write-tagged codec out x) 670 | 671 | :else nil)) 672 | 673 | 674 | (defn- handler-dispatch 675 | "Determine the 'dispatch value' used to select the write handler for the 676 | given value." 677 | [codec x] 678 | (when-let [dispatch (:dispatch codec)] 679 | (dispatch x))) 680 | 681 | 682 | (defn- write-handled 683 | "Writes the value `x` using a write-handler, if one is returned by the 684 | `write-handlers` lookup function. Returns the number of bytes written, or nil 685 | if no handler was found." 686 | [codec out x] 687 | (when-let [write-handlers (:write-handlers codec)] 688 | (when-let [formatter (write-handlers (handler-dispatch codec x))] 689 | (write-value codec out (formatter x))))) 690 | 691 | 692 | (defn- write-collection 693 | "Writes the value `x` as a collection type. Returns the number of bytes 694 | written, or nil if `x` is not a collection." 695 | [codec out x] 696 | (cond 697 | (vector? x) 698 | (write-array codec out x) 699 | 700 | (instance? java.util.Map x) 701 | (write-map codec out x) 702 | 703 | (instance? java.util.Set x) 704 | (write-set codec out data/set-tag x) 705 | 706 | ;; TODO: differentiate for streaming support? 707 | ;; (seq? x) or (instance? Iterable x) 708 | ;; (write-array-stream codec out x) 709 | 710 | (instance? java.util.List x) 711 | (write-array codec out x) 712 | 713 | :else nil)) 714 | 715 | 716 | ;; ### Decoding Functions 717 | 718 | (defn- jump-decode 719 | "Use a jump-table to decode the next value from the input. 720 | 721 | For decoding efficiency, we can directly represent decoding operations based 722 | on the first full byte of an encoded value. This can short circuit 723 | conditional logic in many cases. 724 | 725 | See https://tools.ietf.org/html/rfc7049#appendix-B for details." 726 | [decoder ^DataInputStream input ^long header] 727 | (let [info (bit-and 0x1F header)] 728 | (case (int header) 729 | ;; Positive Integers 730 | 0x00 0 731 | 0x01 1 732 | 0x02 2 733 | 0x03 3 734 | 0x04 4 735 | 0x05 5 736 | 0x06 6 737 | 0x07 7 738 | 0x08 8 739 | 0x09 9 740 | 0x0A 10 741 | 0x0B 11 742 | 0x0C 12 743 | 0x0D 13 744 | 0x0E 14 745 | 0x0F 15 746 | 0x10 16 747 | 0x11 17 748 | 0x12 18 749 | 0x13 19 750 | 0x14 20 751 | 0x15 21 752 | 0x16 22 753 | 0x17 23 754 | 0x18 (header/read-byte input) 755 | 0x19 (header/read-short input) 756 | 0x1A (header/read-int input) 757 | 0x1B (header/read-long input) 758 | 0x1F (error/*handler* 759 | ::illegal-stream 760 | "Encoded integers cannot have indefinite length." 761 | {:code info}) 762 | 763 | ;; Negative Integers 764 | 0x20 -1 765 | 0x21 -2 766 | 0x22 -3 767 | 0x23 -4 768 | 0x24 -5 769 | 0x25 -6 770 | 0x26 -7 771 | 0x27 -8 772 | 0x28 -9 773 | 0x29 -10 774 | 0x2A -11 775 | 0x2B -12 776 | 0x2C -13 777 | 0x2D -14 778 | 0x2E -15 779 | 0x2F -16 780 | 0x30 -17 781 | 0x31 -18 782 | 0x32 -19 783 | 0x33 -20 784 | 0x34 -21 785 | 0x35 -22 786 | 0x36 -23 787 | 0x37 -24 788 | 0x38 (unchecked-dec (unchecked-negate (long (header/read-byte input)))) 789 | 0x39 (unchecked-dec (unchecked-negate (long (header/read-short input)))) 790 | 0x3A (unchecked-dec (unchecked-negate (long (header/read-int input)))) 791 | 0x3B (dec (- (header/read-long input))) 792 | 0x3F (error/*handler* 793 | ::illegal-stream 794 | "Encoded integers cannot have indefinite length." 795 | {:code info}) 796 | 797 | ;; Byte Strings 798 | 0x40 (byte-array 0) 799 | 800 | (0x41 0x42 0x43 0x44 0x45 0x46 0x47 801 | 0x48 0x49 0x4A 0x4B 0x4C 0x4D 0x4E 0x4F 802 | 0x50 0x51 0x52 0x53 0x54 0x55 0x56 0x57) 803 | (read-bytes input info) 804 | 805 | 0x58 (read-bytes input (header/read-byte input)) 806 | 0x59 (read-bytes input (header/read-short input)) 807 | 0x5A (read-bytes input (header/read-int input)) 808 | 0x5B (read-bytes input (header/read-long input)) 809 | 0x5F (read-chunks decoder input :byte-string concat-bytes) 810 | 811 | ;; Text Strings 812 | 0x60 "" 813 | 814 | (0x61 0x62 0x63 0x64 0x65 0x66 0x67 815 | 0x68 0x69 0x6A 0x6B 0x6C 0x6D 0x6E 0x6F 816 | 0x70 0x71 0x72 0x73 0x74 0x75 0x76 0x77) 817 | (read-text input info) 818 | 819 | 0x78 (read-text input (header/read-byte input)) 820 | 0x79 (read-text input (header/read-short input)) 821 | 0x7A (read-text input (header/read-int input)) 822 | 0x7B (read-text input (header/read-long input)) 823 | 0x7F (read-chunks decoder input :text-string concat-text) 824 | 825 | ;; Arrays 826 | 0x80 [] 827 | 0x81 [(read-value decoder input)] 828 | 0x82 [(read-value decoder input) 829 | (read-value decoder input)] 830 | 0x83 [(read-value decoder input) 831 | (read-value decoder input) 832 | (read-value decoder input)] 833 | 0x84 [(read-value decoder input) 834 | (read-value decoder input) 835 | (read-value decoder input) 836 | (read-value decoder input)] 837 | 838 | (0x85 0x86 0x87 839 | 0x88 0x89 0x8A 0x8B 0x8C 0x8D 0x8E 0x8F 840 | 0x90 0x91 0x92 0x93 0x94 0x95 0x96 0x97) 841 | (read-array decoder input info) 842 | 843 | 0x98 (read-array decoder input (header/read-byte input)) 844 | 0x99 (read-array decoder input (header/read-short input)) 845 | 0x9A (read-array decoder input (header/read-int input)) 846 | 0x9B (read-array decoder input (header/read-long input)) 847 | 0x9F (-> (read-value-stream decoder input build-array) 848 | (vary-meta assoc :cbor/streaming true)) 849 | 850 | ;; Maps 851 | 0xA0 {} 852 | 0xA1 {(read-value decoder input) 853 | (read-value decoder input)} 854 | 855 | (0xA2 0xA3 0xA4 0xA5 0xA6 0xA7 856 | 0xA8 0xA9 0xAA 0xAB 0xAC 0xAD 0xAE 0xAF 857 | 0xB0 0xB1 0xB2 0xB3 0xB4 0xB5 0xB6 0xB7) 858 | (read-map decoder input info) 859 | 860 | 0xB8 (read-map decoder input (header/read-byte input)) 861 | 0xB9 (read-map decoder input (header/read-short input)) 862 | 0xBA (read-map decoder input (header/read-int input)) 863 | 0xBB (read-map decoder input (header/read-long input)) 864 | 0xBF (-> (read-value-stream decoder input build-map) 865 | (vary-meta assoc :cbor/streaming true)) 866 | 867 | ;; Tagged Values 868 | (0xC0 0xC1 0xC2 0xC3 0xC4 0xC5 0xC6 0xC7 869 | 0xC8 0xC9 0xCA 0xCB 0xCC 0xCD 0xCE 0xCF 870 | 0xD0 0xD1 0xD2 0xD3 0xD4 0xD5 0xD6 0xD7 871 | 0xD8 0xD9 0xDA 0xDB) 872 | (read-tagged decoder input info) 873 | 874 | 0xDF 875 | (error/*handler* 876 | ::illegal-stream 877 | "Encoded tags cannot have indefinite length." 878 | {:code info}) 879 | 880 | ;; Simple Values 881 | (0xE0 0xE1 0xE2 0xE3 0xE4 0xE5 0xE6 0xE7 882 | 0xE8 0xE9 0xEA 0xEB 0xEC 0xED 0xEE 0xEF 883 | 0xF0 0xF1 0xF2 0xF3) 884 | (unknown-simple decoder info) 885 | 886 | 0xF4 false 887 | 0xF5 true 888 | 0xF6 nil 889 | 0xF7 data/undefined 890 | 0xF8 (unknown-simple decoder (.readUnsignedByte input)) 891 | 0xF9 (float16/decode (.readUnsignedShort input)) 892 | 0xFA (.readFloat input) 893 | 0xFB (.readDouble input) 894 | 895 | (0xFC 0xFD 0xFE) 896 | (error/*handler* 897 | ::illegal-simple-type 898 | (format "Additional information simple-value code %d is reserved." 899 | info) 900 | {:code info}) 901 | 902 | 0xFF 903 | (error/*handler* 904 | ::unexpected-break 905 | "Break encountered outside streaming context." 906 | {}) 907 | 908 | ;; Otherwise, must be some reserved info code. 909 | (error/*handler* 910 | ::header/reserved-info-code 911 | (format "Additional information int code %d is reserved." 912 | info) 913 | {:header header 914 | :info info})))) 915 | 916 | 917 | ;; ## Codec Record 918 | 919 | (defrecord CBORCodec 920 | [dispatch write-handlers read-handlers] 921 | 922 | Encoder 923 | 924 | (write-value 925 | [this out x] 926 | (or (write-native this out x) 927 | (write-handled this out x) 928 | (write-collection this out x) 929 | (error/*handler* 930 | ::unsupported-type 931 | (str "No known encoding for object: " (pr-str x)) 932 | {:value x 933 | :class (class x) 934 | :dispatch (handler-dispatch this x)}))) 935 | 936 | 937 | Decoder 938 | 939 | (read-value* 940 | [this input header] 941 | (jump-decode this input header))) 942 | 943 | 944 | (defn blank-codec 945 | "Constructs a new `CBORCodec` record with default empty field values." 946 | [] 947 | (map->CBORCodec 948 | {:dispatch class 949 | :write-handlers {} 950 | :read-handlers {} 951 | :canonical false 952 | :strict false})) 953 | -------------------------------------------------------------------------------- /src/clj_cbor/core.clj: -------------------------------------------------------------------------------- 1 | (ns clj-cbor.core 2 | "Core CBOR library API." 3 | (:refer-clojure :exclude [spit slurp]) 4 | (:require 5 | [clj-cbor.codec :as codec] 6 | [clj-cbor.error :as error] 7 | [clj-cbor.tags.clojure :as tags.clj] 8 | [clj-cbor.tags.content :as tags.content] 9 | [clj-cbor.tags.numbers :as tags.num] 10 | [clj-cbor.tags.text :as tags.text] 11 | [clj-cbor.tags.time :as tags.time] 12 | [clojure.java.io :as io]) 13 | (:import 14 | (java.io 15 | ByteArrayOutputStream 16 | DataInputStream 17 | DataOutputStream 18 | EOFException 19 | InputStream 20 | OutputStream))) 21 | 22 | 23 | ;; ## Codec Construction 24 | 25 | (defn cbor-codec 26 | "Construct a new CBOR codec with no configuration. Note that this does not 27 | include **any** read and write handlers. See the `default-codec` and the 28 | `default-read-handlers` and `default-write-handlers` vars. 29 | 30 | Arguments may be a map or a sequence of key/value pairs. Valid options are: 31 | 32 | - `:dispatch` function which is called to provide a dispatch value based on 33 | the data to be encoded. (default: `class`) 34 | - `:write-handlers` lookup function from dispatch values to handlers which 35 | take some data to be encoded and return a transformed version of it 36 | (typically a tagged value). 37 | - `:read-handlers` lookup function from integer tags to handlers which take 38 | the embedded item and return the parsed data value." 39 | [& opts] 40 | (merge 41 | (codec/blank-codec) 42 | (if (and (= 1 (count opts)) (map? (first opts))) 43 | (first opts) 44 | (apply hash-map opts)))) 45 | 46 | 47 | (def default-write-handlers 48 | "Map of default write handlers to use, keyed by class. 49 | 50 | The default choice of encoding for instants in time is the numeric epoch 51 | representation (tag 1)." 52 | (merge tags.clj/clojure-write-handlers 53 | tags.content/content-write-handlers 54 | tags.num/number-write-handlers 55 | tags.time/epoch-time-write-handlers 56 | tags.time/epoch-date-write-handlers 57 | tags.text/text-write-handlers)) 58 | 59 | 60 | (def default-read-handlers 61 | "Map of default tag handlers to use, keyed by tag. 62 | 63 | The default choice of representation for instants in time is 64 | `java.time.Instant`." 65 | (merge tags.clj/clojure-read-handlers 66 | tags.content/content-read-handlers 67 | tags.num/number-read-handlers 68 | tags.time/instant-read-handlers 69 | tags.time/local-date-read-handlers 70 | tags.text/text-read-handlers)) 71 | 72 | 73 | (def default-codec 74 | "Default CBOR codec to use when none is specified." 75 | (cbor-codec 76 | :write-handlers default-write-handlers 77 | :read-handlers default-read-handlers)) 78 | 79 | 80 | (defn dispatch-superclasses 81 | "Construct a codec dispatch function which will return the named classes 82 | whenever one of their instances is encountered. 83 | 84 | This lets you use a single superclass to match all of its subclasses. The 85 | classes are tested in the order given; if none match, this returns the 86 | value's own class." 87 | [& classes] 88 | (let [cache (atom {})] 89 | (fn dispatch 90 | [x] 91 | (or (get @cache (class x)) 92 | (let [result (loop [classes classes] 93 | (if (seq classes) 94 | (let [cls (first classes)] 95 | (if (instance? cls x) 96 | cls 97 | (recur (rest classes)))) 98 | (class x)))] 99 | (swap! cache assoc (class x) result) 100 | result))))) 101 | 102 | 103 | ;; ## Encoding Functions 104 | 105 | (defn- data-output-stream 106 | "Coerce the argument to a `DataOutputStream`." 107 | ^DataOutputStream 108 | [output] 109 | (condp instance? output 110 | DataOutputStream 111 | output 112 | 113 | OutputStream 114 | (DataOutputStream. output) 115 | 116 | (throw (IllegalArgumentException. 117 | (str "Cannot coerce argument to an OutputStream: " 118 | (pr-str output)))))) 119 | 120 | 121 | (defn encode 122 | "Encode a single value as CBOR data. 123 | 124 | Writes the value bytes to the provided output stream, or returns the value 125 | as a byte array if no output is given. The `default-codec` is used to encode 126 | the value if none is provided." 127 | ([value] 128 | (encode default-codec value)) 129 | ([encoder value] 130 | (let [buffer (ByteArrayOutputStream.)] 131 | (with-open [output (data-output-stream buffer)] 132 | (encode encoder output value)) 133 | (.toByteArray buffer))) 134 | ([encoder output value] 135 | (let [data-output (data-output-stream output)] 136 | (codec/write-value encoder data-output value)))) 137 | 138 | 139 | (defn encode-seq 140 | "Encode a sequence of values as CBOR data. This eagerly consumes the 141 | input sequence. 142 | 143 | Writes the value bytes to the provided output stream, or returns the value 144 | as a byte array if no output is given. The `default-codec` is used to encode 145 | the value if none is provided." 146 | ([values] 147 | (encode-seq default-codec values)) 148 | ([encoder values] 149 | (let [buffer (ByteArrayOutputStream.)] 150 | (with-open [output (data-output-stream buffer)] 151 | (encode-seq encoder output values)) 152 | (.toByteArray buffer))) 153 | ([encoder output values] 154 | (let [data-output (data-output-stream output)] 155 | (transduce (map (partial encode encoder data-output)) + 0 values)))) 156 | 157 | 158 | ;; ## Decoding Functions 159 | 160 | (defn- data-input-stream 161 | "Coerce the argument to a `DataInputStream`." 162 | [input] 163 | (condp instance? input 164 | DataInputStream 165 | input 166 | 167 | InputStream 168 | (DataInputStream. input) 169 | 170 | (DataInputStream. (io/input-stream input)))) 171 | 172 | 173 | (defn- maybe-read-header 174 | "Attempts to read a header byte from the input stream. If there is no more 175 | input, the `guard` value is returned." 176 | [^DataInputStream input guard] 177 | (try 178 | (.readUnsignedByte input) 179 | (catch EOFException _ 180 | guard))) 181 | 182 | 183 | (defn- try-read-value 184 | "Attemtps to read the rest of a CBOR value from the input stream. If the 185 | input ends during the read, the error handler is called with an 186 | `end-of-input` error." 187 | [decoder input header] 188 | (try 189 | (codec/read-value* decoder input header) 190 | (catch EOFException _ 191 | (error/*handler* 192 | :clj-cbor.codec/end-of-input 193 | "Input data ended while parsing a CBOR value." 194 | {:header header})))) 195 | 196 | 197 | (defn decode 198 | "Decode a single CBOR value from the input. 199 | 200 | This uses the given codec or the `default-codec` if none is provided. If at 201 | the end of the input, this returns `eof-guard` or nil. 202 | 203 | The input must be an input stream or something coercible to one like a file 204 | or byte array. Note that coercion will produce a `BufferedInputStream` if the 205 | argument is not already a stream, so repeated reads will probably not behave 206 | as expected! If you need incremental parsing, make sure you pass in something 207 | that is already an `InputStream`." 208 | ([input] 209 | (decode default-codec input)) 210 | ([decoder input] 211 | (decode decoder input nil)) 212 | ([decoder input eof-guard] 213 | (let [input (data-input-stream input) 214 | header (maybe-read-header input eof-guard)] 215 | (if (identical? header eof-guard) 216 | eof-guard 217 | (try-read-value decoder input header))))) 218 | 219 | 220 | (defn decode-seq 221 | "Decode a sequence of CBOR values from the input. 222 | 223 | This uses the given codec or the `default-codec` if none is provided. The 224 | returned sequence is lazy, so take care that the input stream is not closed 225 | before the entries are realized. 226 | 227 | The input must be an input stream or something coercible to one - see 228 | `decode` for usage notes." 229 | ([input] 230 | (decode-seq default-codec input)) 231 | ([decoder input] 232 | (let [eof-guard (Object.) 233 | data-input (data-input-stream input) 234 | read-data! #(decode decoder data-input eof-guard)] 235 | (take-while 236 | #(not (identical? eof-guard %)) 237 | (repeatedly read-data!))))) 238 | 239 | 240 | ;; ## Utility Functions 241 | 242 | (defn spit 243 | "Opens an output stream to `f`, writes `value` to it, then closes the stream. 244 | 245 | Options may include `:append` to write to the end of the file instead of 246 | truncating." 247 | [f value & opts] 248 | (with-open [out ^OutputStream (apply io/output-stream f opts)] 249 | (encode default-codec out value))) 250 | 251 | 252 | (defn spit-all 253 | "Opens an output stream to `f`, writes each element in `values` to it, then 254 | closes the stream. 255 | 256 | Options may include `:append` to write to the end of the file instead of 257 | truncating." 258 | [f values & opts] 259 | (with-open [out ^OutputStream (apply io/output-stream f opts)] 260 | (encode-seq default-codec out values))) 261 | 262 | 263 | (defn slurp 264 | "Opens an input stream from `f`, reads the first value from it, then closes 265 | the stream." 266 | [f & opts] 267 | (with-open [in ^InputStream (apply io/input-stream f opts)] 268 | (decode default-codec in))) 269 | 270 | 271 | (defn slurp-all 272 | "Opens an input stream from `f`, reads all values from it, then closes the 273 | stream." 274 | [f & opts] 275 | (with-open [in ^InputStream (apply io/input-stream f opts)] 276 | (doall (decode-seq default-codec in)))) 277 | 278 | 279 | (defn self-describe 280 | "Wraps a value with a self-describing CBOR tag. This will cause the first few 281 | bytes of the data to be `D9D9F7`, which serves as a distinguishing header for 282 | format detection." 283 | [value] 284 | (tags.content/format-self-described value)) 285 | -------------------------------------------------------------------------------- /src/clj_cbor/data/core.clj: -------------------------------------------------------------------------------- 1 | (ns clj-cbor.data.core 2 | "Type definitions and keyword identifiers for CBOR data types." 3 | (:require 4 | [clj-cbor.data.simple :as simple] 5 | [clj-cbor.data.tagged :as tagged]) 6 | (:import 7 | (clj_cbor.data.simple 8 | SimpleValue) 9 | (clj_cbor.data.tagged 10 | TaggedValue))) 11 | 12 | 13 | ;; ## Simple Values 14 | 15 | (def undefined 16 | "Base singleton undefined value." 17 | (simple/->Undefined nil)) 18 | 19 | 20 | (defn simple-value 21 | "Constructs a simple type for the given number." 22 | [n] 23 | (when (or (neg? n) (< 255 n)) 24 | (throw (IllegalArgumentException. 25 | "Simple value codes must be between 0 and 255"))) 26 | (simple/->SimpleValue n nil)) 27 | 28 | 29 | (defn simple-value? 30 | "Predicate which tests whether `x` is a simple CBOR value." 31 | [x] 32 | (instance? SimpleValue x)) 33 | 34 | 35 | ;; ## Tagged Values 36 | 37 | (defn tagged-value 38 | "Constructs a tagged value." 39 | [tag value] 40 | (tagged/->TaggedValue tag value nil)) 41 | 42 | 43 | (defn tagged-value? 44 | "Predicate which tests whether `x` is a CBOR tagged value." 45 | [x] 46 | (instance? TaggedValue x)) 47 | 48 | 49 | (def set-tag 50 | "Tag code used to identify sets of unique values. Hard-coded here to support 51 | canonical encoding." 52 | 258) 53 | -------------------------------------------------------------------------------- /src/clj_cbor/data/float16.clj: -------------------------------------------------------------------------------- 1 | (ns clj-cbor.data.float16 2 | "Implementation of IEEE 754 half-precision floating point.") 3 | 4 | 5 | (def zero 2r0000000000000000) 6 | (def positive-infinity 2r0111110000000000) 7 | (def negative-infinity 2r1111110000000000) 8 | (def not-a-number 2r0111111000000000) 9 | 10 | 11 | (defn- combine-bits 12 | "Combine values for different fields in the float into a composite binary 13 | value." 14 | [sign exp mant] 15 | (Float/intBitsToFloat 16 | (bit-or (if (zero? sign) 0 Integer/MIN_VALUE) 17 | (bit-shift-left (bit-or exp mant) 13)))) 18 | 19 | 20 | (defn decode 21 | "Returns a `float` value read as a half-precision IEEE floating-point number 22 | from the lower two bytes of x." 23 | [x] 24 | (let [sign (bit-and x 0x8000) 25 | exp (bit-and x 0x7c00) 26 | mant (bit-and x 0x03ff)] 27 | (cond 28 | ;; NaN and Infinite values. 29 | (= exp 0x7c00) 30 | (combine-bits sign 0x3fc00 mant) 31 | 32 | ;; Normalized value. 33 | (not (zero? exp)) 34 | (combine-bits sign (+ exp 0x1c000) mant) 35 | 36 | ;; Subnormal value. 37 | (not (zero? mant)) 38 | (loop [exp 0x1c400 39 | mant mant] 40 | (if (zero? (bit-and mant 0x400)) 41 | (recur (- exp 0x400) (bit-shift-left mant 1)) 42 | (combine-bits sign exp (bit-and mant 0x3ff)))) 43 | 44 | ;; +/- 0 45 | :else 46 | (combine-bits sign exp mant)))) 47 | 48 | 49 | (defn encode 50 | "Returns an integer whose lower two bytes encode the given number in the 51 | half-precision IEEE floating point format." 52 | [x] 53 | (let [fbits (Float/floatToIntBits (float x)) 54 | sign (bit-and (unsigned-bit-shift-right fbits 16) 55 | 0x8000) 56 | ;; rounded value 57 | value (+ (bit-and fbits 0x7fffffff) 0x1000)] 58 | (cond 59 | ;; Value might be or become NaN/Inf. 60 | (>= value 0x47800000) 61 | (if (< value 0x7f800000) 62 | ;; Value was too large, promote to infinity. 63 | (bit-or sign 0x7c00) 64 | ;; Value remains NaN or +/-Inf. 65 | (bit-or sign 0x7c00 (unsigned-bit-shift-right 66 | (bit-and fbits 0x007fffff) 67 | 13))) 68 | 69 | ;; Retain normalized value. 70 | (>= value 0x38800000) 71 | (bit-or sign (unsigned-bit-shift-right (- value 0x38000000) 13)) 72 | 73 | ;; Value is too small, becomes +/-0 74 | (< value 0x33000000) 75 | sign 76 | 77 | ;; Encode subnormal value. 78 | :else 79 | (let [exp (unsigned-bit-shift-right (bit-and fbits 0x7fffffff) 23)] 80 | (bit-or sign 81 | (unsigned-bit-shift-right 82 | (+ (bit-or (bit-and fbits 0x7fffff) 83 | 0x800000) 84 | (unsigned-bit-shift-right 0x800000 (- exp 102))) 85 | (- 126 exp))))))) 86 | -------------------------------------------------------------------------------- /src/clj_cbor/data/simple.clj: -------------------------------------------------------------------------------- 1 | (ns clj-cbor.data.simple 2 | "Type definition for CBOR simple values.") 3 | 4 | 5 | ;; ## Undefined Value 6 | 7 | (deftype Undefined 8 | [_meta] 9 | 10 | Object 11 | 12 | (toString 13 | [_] 14 | "undefined") 15 | 16 | 17 | (equals 18 | [this that] 19 | (or (identical? this that) 20 | (instance? Undefined that))) 21 | 22 | 23 | (hashCode 24 | [this] 25 | (hash (class this))) 26 | 27 | 28 | clojure.lang.IObj 29 | 30 | (meta 31 | [_] 32 | _meta) 33 | 34 | 35 | (withMeta 36 | [_ meta-map] 37 | (Undefined. meta-map))) 38 | 39 | 40 | ;; ## Generic Simple Value 41 | 42 | (deftype SimpleValue 43 | [^long n _meta] 44 | 45 | Object 46 | 47 | (toString 48 | [_] 49 | (str "simple(" n ")")) 50 | 51 | 52 | (equals 53 | [this that] 54 | (or (identical? this that) 55 | (and (instance? SimpleValue that) 56 | (= n (.-n ^SimpleValue that))))) 57 | 58 | 59 | (hashCode 60 | [this] 61 | (hash-combine (hash (class this)) n)) 62 | 63 | 64 | Comparable 65 | 66 | (compareTo 67 | [_ that] 68 | (compare n (.-n ^SimpleValue that))) 69 | 70 | 71 | clojure.lang.IObj 72 | 73 | (meta 74 | [_] 75 | _meta) 76 | 77 | 78 | (withMeta 79 | [_ meta-map] 80 | (SimpleValue. n meta-map))) 81 | -------------------------------------------------------------------------------- /src/clj_cbor/data/tagged.clj: -------------------------------------------------------------------------------- 1 | (ns clj-cbor.data.tagged 2 | "Type definition for CBOR tagged values.") 3 | 4 | 5 | (deftype TaggedValue 6 | [tag value _meta] 7 | 8 | Object 9 | 10 | (toString 11 | [_] 12 | (str tag "(" value ")")) 13 | 14 | 15 | (equals 16 | [this that] 17 | (or (identical? this that) 18 | (and (instance? TaggedValue that) 19 | (= tag (.tag ^TaggedValue that)) 20 | (= value (.value ^TaggedValue that))))) 21 | 22 | 23 | (hashCode 24 | [this] 25 | (hash [(class this) tag value])) 26 | 27 | 28 | clojure.lang.IObj 29 | 30 | (meta 31 | [_] 32 | _meta) 33 | 34 | 35 | (withMeta 36 | [_ meta-map] 37 | (TaggedValue. tag value meta-map))) 38 | -------------------------------------------------------------------------------- /src/clj_cbor/error.clj: -------------------------------------------------------------------------------- 1 | (ns clj-cbor.error 2 | "Dynamic error handling support.") 3 | 4 | 5 | (defn codec-exception! 6 | "Default behavior for codec errors." 7 | [error-type message data] 8 | (throw (ex-info message (assoc data :cbor/error error-type)))) 9 | 10 | 11 | (def ^:dynamic *handler* 12 | "Dynamic error handler which can be bound to a function which will be called 13 | with a type keyword, a message, and a map of extra data." 14 | codec-exception!) 15 | 16 | 17 | ;; ## Error Hierarchy 18 | 19 | ;; Encoding errors. 20 | (derive :clj-cbor.header/negative-info-code ::encoding-error) 21 | (derive :clj-cbor.header/overflow-info-code ::encoding-error) 22 | (derive :clj-cbor.codec/illegal-simple-type ::encoding-error) 23 | (derive :clj-cbor.codec/unsupported-type ::encoding-error) 24 | 25 | 26 | ;; Decoding errors. 27 | (derive :clj-cbor.header/reserved-info-code ::decoding-error) 28 | (derive :clj-cbor.codec/illegal-chunk-type ::decoding-error) 29 | (derive :clj-cbor.codec/illegal-stream ::decoding-error) 30 | (derive :clj-cbor.codec/missing-map-value ::decoding-error) 31 | (derive :clj-cbor.codec/duplicate-map-key ::decoding-error) 32 | (derive :clj-cbor.codec/tag-handling-error ::decoding-error) 33 | (derive :clj-cbor.codec/unexpected-break ::decoding-error) 34 | (derive :clj-cbor.codec/end-of-input ::decoding-error) 35 | -------------------------------------------------------------------------------- /src/clj_cbor/header.clj: -------------------------------------------------------------------------------- 1 | (ns clj-cbor.header 2 | "Functions for reading and writing CBOR headers." 3 | (:require 4 | [clj-cbor.error :as error]) 5 | (:import 6 | clojure.lang.BigInt 7 | (java.io 8 | DataInputStream 9 | DataOutputStream))) 10 | 11 | 12 | (def major-types 13 | "Vector of major type keywords, indexed by the three-bit values 0-7. (§2.1)" 14 | [:unsigned-integer 15 | :negative-integer 16 | :byte-string 17 | :text-string 18 | :data-array 19 | :data-map 20 | :tagged-value 21 | :simple-value]) 22 | 23 | 24 | (def ^:private major-type-codes 25 | "Map of major type keywords to code values." 26 | (zipmap major-types (range))) 27 | 28 | 29 | ;; ## Encoding Functions 30 | 31 | (defn write-leader 32 | "Writes a header byte for the given major-type and additional info numbers." 33 | [^DataOutputStream out mtype info] 34 | (let [header (-> (bit-and (major-type-codes mtype) 0x07) 35 | (bit-shift-left 5) 36 | (bit-or (bit-and (long info) 0x1F)))] 37 | (.writeByte out header))) 38 | 39 | 40 | (defn write-byte 41 | "Write an unsigned byte (8-bit) value to the data output stream." 42 | [^DataOutputStream out i] 43 | (.writeByte out i)) 44 | 45 | 46 | (defn write-short 47 | "Write an unsigned short (16-bit) value to the data output stream." 48 | [^DataOutputStream out i] 49 | (.writeShort out i)) 50 | 51 | 52 | (defn write-int 53 | "Write an unsigned int (32-bit) value to the data output stream. Coerces the 54 | value into a signed representation before writing if necessary." 55 | [^DataOutputStream out i] 56 | (.writeInt 57 | out 58 | (if (<= i Integer/MAX_VALUE) 59 | i 60 | (+ Integer/MIN_VALUE (- (dec i) Integer/MAX_VALUE))))) 61 | 62 | 63 | (defn write-long 64 | "Write a long (32-bit) value to the data output stream. Coerces the value 65 | into a signed representation before writing if necessary." 66 | [^DataOutputStream out i] 67 | (.writeLong 68 | out 69 | (if (<= i Long/MAX_VALUE) 70 | i 71 | (+ Long/MIN_VALUE (- (dec i) Long/MAX_VALUE))))) 72 | 73 | 74 | (defn write 75 | "Writes a header byte for the given major-type, plus extra bytes to encode 76 | the given integer code. Always writes the smallest possible representation. 77 | Returns the number of bytes written." 78 | ^long 79 | [^DataOutputStream out mtype i] 80 | (cond 81 | (neg? i) 82 | (error/*handler* 83 | ::negative-info-code 84 | (str "Cannot write negative integer code: " i) 85 | {:code i}) 86 | 87 | (<= i 23) 88 | (do (write-leader out mtype i) 89 | 1) 90 | 91 | (<= i 0xFF) 92 | (do (write-leader out mtype 24) 93 | (write-byte out i) 94 | 2) 95 | 96 | (<= i 0xFFFF) 97 | (do (write-leader out mtype 25) 98 | (write-short out i) 99 | 3) 100 | 101 | (<= i 0xFFFFFFFF) 102 | (do (write-leader out mtype 26) 103 | (write-int out i) 104 | 5) 105 | 106 | (<= i (* -2N Long/MIN_VALUE)) 107 | (do (write-leader out mtype 27) 108 | (write-long out i) 109 | 9) 110 | 111 | :else 112 | (error/*handler* 113 | ::overflow-info-code 114 | (str "Cannot write integer code requiring 9 bytes of space: " i) 115 | {:code i}))) 116 | 117 | 118 | ;; ## Decoding Functions 119 | 120 | (defn decode 121 | "Determines the major type keyword and additional information encoded by the 122 | header byte. §2.1" 123 | [header] 124 | [(-> header 125 | (bit-and 0xE0) 126 | (bit-shift-right 5) 127 | (bit-and 0x07) 128 | (major-types)) 129 | (bit-and header 0x1F)]) 130 | 131 | 132 | (def ^:private two-64 133 | "Constant holding `2^64` for integer manipulation." 134 | (.shiftLeft BigInteger/ONE 64)) 135 | 136 | 137 | (defn read-byte 138 | "Read an unsigned byte (8-bit) value from the data input stream. Promotes the 139 | value to a long for consistency." 140 | [^DataInputStream in] 141 | (long (.readUnsignedByte in))) 142 | 143 | 144 | (defn read-short 145 | "Read an unsigned short (16-bit) value from the data input stream. Promotes 146 | the value to a long for consistency." 147 | [^DataInputStream in] 148 | (long (.readUnsignedShort in))) 149 | 150 | 151 | (defn read-int 152 | "Read an unsigned int (32-bit) value from the data input stream. Promotes the 153 | value to a long for consistency." 154 | [^DataInputStream in] 155 | (bit-and (long (.readInt in)) 0xFFFFFFFF)) 156 | 157 | 158 | (defn read-long 159 | "Read an unsigned long (64-bit) value from the data input stream. Handles 160 | overflowing values by promoting them to a bigint. 161 | 162 | https://tools.ietf.org/html/rfc7049#section-1.2" 163 | [^DataInputStream in] 164 | (let [i (.readLong in)] 165 | (if (neg? i) 166 | (-> (BigInteger/valueOf i) 167 | (.add two-64) 168 | (BigInt/fromBigInteger)) 169 | i))) 170 | 171 | 172 | (defn read-code 173 | "Reads a size value from the initial bytes of the input stream. Returns 174 | either a number, the keyword `:indefinite`, or calls the error handler on 175 | reserved info codes." 176 | [^DataInputStream in ^long info] 177 | (if (< info 24) 178 | ;; Info codes less than 24 directly represent the number. 179 | info 180 | ;; Otherwise, signify the number of bytes following. 181 | (case info 182 | 24 (read-byte in) 183 | 25 (read-short in) 184 | 26 (read-int in) 185 | 27 (read-long in) 186 | (28 29 30) (error/*handler* 187 | ::reserved-info-code 188 | (format "Additional information int code %d is reserved." 189 | info) 190 | {:info info}) 191 | 31 :indefinite))) 192 | -------------------------------------------------------------------------------- /src/clj_cbor/tags/clojure.clj: -------------------------------------------------------------------------------- 1 | (ns clj-cbor.tags.clojure 2 | "Read and write handler support for Clojure types." 3 | (:require 4 | [clj-cbor.data.core :as data]) 5 | (:import 6 | (clojure.lang 7 | Keyword 8 | Symbol 9 | TaggedLiteral))) 10 | 11 | 12 | ;; ## Symbols & Keywords 13 | 14 | (def ^:const identifier-tag 15 | "Keywords and symbols are represented using tag 39 ('identifier') applied to 16 | the string version of the value. This adds three bytes to the size of the 17 | identifier itself for the header, tag code, and string header. Keywords are 18 | symbols whose first character is a colon (:). 19 | 20 | See: [https://github.com/lucas-clemente/cbor-specs/blob/master/id.md](https://github.com/lucas-clemente/cbor-specs/blob/master/id.md)" 21 | 39) 22 | 23 | 24 | (defn format-symbol 25 | [value] 26 | (data/tagged-value identifier-tag (str value))) 27 | 28 | 29 | (defn parse-symbol 30 | [value] 31 | (when-not (string? value) 32 | (throw (ex-info (str "Symbols must be tagged strings, got: " 33 | (class value)) 34 | {:value value}))) 35 | (if (= \: (.charAt ^String value 0)) 36 | (keyword (subs value 1)) 37 | (symbol value))) 38 | 39 | 40 | ;; ## Tagged Literals 41 | 42 | (def ^:const generic-object-tag 43 | "Tagged literals are represented using tag 27 ('generic object') applied to 44 | an array containing two elements. The first element is the string version of 45 | the EDN tag symbol and the second is the tagged literal form. 46 | 47 | See: [http://cbor.schmorp.de/generic-object](http://cbor.schmorp.de/generic-object)" 48 | 27) 49 | 50 | 51 | (defn format-tagged-literal 52 | [value] 53 | (data/tagged-value 54 | generic-object-tag 55 | [(str (:tag value)) (:form value)])) 56 | 57 | 58 | (defn parse-tagged-literal 59 | [value] 60 | (when-not (and (sequential? value) (= 2 (count value))) 61 | (throw (ex-info (str "Sets must be tagged two-element arrays, got: " 62 | (class value)) 63 | {:value value}))) 64 | (tagged-literal (symbol (first value)) (second value))) 65 | 66 | 67 | ;; ## Codec Formatter/Handler Maps 68 | 69 | (def clojure-write-handlers 70 | "Map of Clojure types to write handler functions." 71 | {Keyword format-symbol 72 | Symbol format-symbol 73 | TaggedLiteral format-tagged-literal}) 74 | 75 | 76 | (def clojure-read-handlers 77 | "Map of tag codes to read handlers to parse Clojure values." 78 | {generic-object-tag parse-tagged-literal 79 | identifier-tag parse-symbol}) 80 | -------------------------------------------------------------------------------- /src/clj_cbor/tags/content.clj: -------------------------------------------------------------------------------- 1 | (ns clj-cbor.tags.content 2 | "Read and write handler support for content sharing and encoding hints." 3 | (:require 4 | [clj-cbor.data.core :as data])) 5 | 6 | 7 | ;; ## Self-Describe CBOR 8 | 9 | ;; In many applications, it will be clear from the context that CBOR is being 10 | ;; employed for encoding a data item. For instance, a specific protocol might 11 | ;; specify the use of CBOR, or a media type is indicated that specifies its 12 | ;; use. However, there may be applications where such context information is 13 | ;; not available, such as when CBOR data is stored in a file and disambiguating 14 | ;; metadata is not in use. Here, it may help to have some distinguishing 15 | ;; characteristics for the data itself. 16 | 17 | (def ^:const self-describe-cbor-tag 18 | "Tag 55799 is defined for self-described CBOR values. It does not impart any 19 | special semantics on the data item that follows; that is, the semantics of a 20 | data item tagged with tag 55799 is exactly identical to the semantics of the 21 | data item itself." 22 | 55799) 23 | 24 | 25 | (defn format-self-described 26 | [value] 27 | (data/tagged-value self-describe-cbor-tag value)) 28 | 29 | 30 | ;; ## Codec Formatter/Handler Maps 31 | 32 | (def content-write-handlers 33 | "Map of misc types to write handler functions." 34 | {}) 35 | 36 | 37 | (def content-read-handlers 38 | "Map of tag codes to read handlers to parse misc values." 39 | {self-describe-cbor-tag identity}) 40 | -------------------------------------------------------------------------------- /src/clj_cbor/tags/numbers.clj: -------------------------------------------------------------------------------- 1 | (ns clj-cbor.tags.numbers 2 | "Built-in tag support for the number extensions in RFC 7049. See section 3 | 2.4.2." 4 | (:require 5 | [clj-cbor.data.core :as data]) 6 | (:import 7 | (clojure.lang 8 | BigInt 9 | Ratio) 10 | (java.math 11 | BigDecimal 12 | BigInteger))) 13 | 14 | 15 | ;; ## Bignums 16 | 17 | ;; Bignums are integers that do not fit into the basic integer representations 18 | ;; provided by major types 0 and 1. 19 | 20 | (def ^:const positive-bignum-tag 21 | "Tag 2 is for positive bignums, which are encoded as a byte string data item. 22 | This is interpreted as an unsigned integer `n` in network byte order." 23 | 2) 24 | 25 | 26 | (def ^:const negative-bignum-tag 27 | "Tag 3 is for negative bignums. These are encoded the same as for positive 28 | bignums (tag 2), but the value of the bignum is `-1 - n`." 29 | 3) 30 | 31 | 32 | (defn format-bignum 33 | [value] 34 | (let [big-integer (biginteger value)] 35 | (if-not (neg? big-integer) 36 | (data/tagged-value 37 | positive-bignum-tag 38 | (.toByteArray big-integer)) 39 | (data/tagged-value 40 | negative-bignum-tag 41 | (-> big-integer 42 | (.add BigInteger/ONE) 43 | (.negate) 44 | (.toByteArray)))))) 45 | 46 | 47 | (defn parse-positive-bignum 48 | [value] 49 | (when-not (bytes? value) 50 | (throw (ex-info (str "Bignums must be represented as a tagged byte string, got: " 51 | (class value)) 52 | {:value value}))) 53 | (bigint (BigInteger. ^bytes value))) 54 | 55 | 56 | (defn parse-negative-bignum 57 | [value] 58 | (when-not (bytes? value) 59 | (throw (ex-info (str "Bignums must be represented as a tagged byte string, got: " 60 | (class value)) 61 | {:value value}))) 62 | (-> (BigInteger. ^bytes value) 63 | (.add BigInteger/ONE) 64 | (.negate) 65 | (bigint))) 66 | 67 | 68 | ;; ## Decimal Fractions 69 | 70 | ;; Decimal fractions combine an integer mantissa with a base-10 scaling factor. 71 | ;; They are most useful if an application needs the exact representation of a 72 | ;; decimal fraction such as 1.1 because there is no exact representation for 73 | ;; many decimal fractions in binary floating point. 74 | 75 | (def ^:const big-decimal-tag 76 | "Tag 4 indicates a decimal fraction represented by a tagged array with two 77 | items, an integer exponent and an integer or bignum mantissa. The value of a 78 | decimal fraction is `m*(10**e)`." 79 | 4) 80 | 81 | 82 | (defn format-big-decimal 83 | [^BigDecimal value] 84 | (let [exponent (.scale value) 85 | mantissa (.unscaledValue value)] 86 | (data/tagged-value big-decimal-tag [(- exponent) mantissa]))) 87 | 88 | 89 | (defn parse-big-decimal 90 | [value] 91 | (when-not (and (sequential? value) (= 2 (count value))) 92 | (throw (ex-info (str "Decimal fractions must be represented with a two-element array, got: " 93 | (pr-str value)) 94 | {:value value}))) 95 | (let [[exponent mantissa] value] 96 | (BigDecimal. (biginteger mantissa) (int (- exponent))))) 97 | 98 | 99 | ;; ## Ratios 100 | 101 | (def ^:const ratio-tag 102 | "Tag 30 is used to represent a rational number composed of two integers, a 103 | numerator and a denominator. 104 | 105 | See: [http://peteroupc.github.io/CBOR/rational.html](http://peteroupc.github.io/CBOR/rational.html)" 106 | 30) 107 | 108 | 109 | (defn format-ratio 110 | [value] 111 | (data/tagged-value ratio-tag [(numerator value) (denominator value)])) 112 | 113 | 114 | (defn parse-ratio 115 | [value] 116 | (when-not (and (sequential? value) (= 2 (count value))) 117 | (throw (ex-info (str "Rational numbers must be represented with a two-element array, got: " 118 | (pr-str value)) 119 | {:value value}))) 120 | (let [[numerator denominator] value] 121 | (Ratio. (biginteger numerator) (biginteger denominator)))) 122 | 123 | 124 | ;; ## Codec Formatter/Handler Maps 125 | 126 | (def number-write-handlers 127 | "Map of number types to write handler functions." 128 | {BigInt format-bignum 129 | BigInteger format-bignum 130 | BigDecimal format-big-decimal 131 | Ratio format-ratio}) 132 | 133 | 134 | (def number-read-handlers 135 | "Map of tag codes to read handlers to parse number values." 136 | {positive-bignum-tag parse-positive-bignum 137 | negative-bignum-tag parse-negative-bignum 138 | big-decimal-tag parse-big-decimal 139 | ratio-tag parse-ratio}) 140 | -------------------------------------------------------------------------------- /src/clj_cbor/tags/text.clj: -------------------------------------------------------------------------------- 1 | (ns clj-cbor.tags.text 2 | "Built-in tag support for the text extensions in RFC 7049. See section 3 | 2.4.4." 4 | (:refer-clojure :exclude [parse-uuid]) 5 | (:require 6 | [clj-cbor.data.core :as data]) 7 | (:import 8 | java.net.URI 9 | java.nio.ByteBuffer 10 | java.util.UUID 11 | java.util.regex.Pattern)) 12 | 13 | 14 | ;; ## URIs 15 | 16 | (def ^:const uri-tag 17 | "Tag 32 indicates that the tagged string represents a Uniform Resource 18 | Identifier." 19 | 32) 20 | 21 | 22 | (defn format-uri 23 | [^URI value] 24 | (data/tagged-value uri-tag (str value))) 25 | 26 | 27 | (defn parse-uri 28 | [value] 29 | (when-not (string? value) 30 | (throw (ex-info (str "URIs must be tagged strings, got: " 31 | (class value)) 32 | {:value value}))) 33 | (URI. value)) 34 | 35 | 36 | ;; ## Patterns 37 | 38 | (def ^:const pattern-tag 39 | "Tag 35 is used to represent regular expressions, expressed as a 40 | Perl-compatible pattern." 41 | 35) 42 | 43 | 44 | (defn format-pattern 45 | [^Pattern value] 46 | (data/tagged-value pattern-tag (str value))) 47 | 48 | 49 | (defn parse-pattern 50 | [value] 51 | (when-not (string? value) 52 | (throw (ex-info (str "Regular expressions must be tagged strings, got: " 53 | (class value)) 54 | {:value value}))) 55 | (Pattern/compile value)) 56 | 57 | 58 | ;; ## UUIDs 59 | 60 | (def ^:const uuid-tag 61 | "UUIDs are represented in binary form as a byte string tagged with code 37. 62 | 63 | See: [https://github.com/lucas-clemente/cbor-specs/blob/master/uuid.md](https://github.com/lucas-clemente/cbor-specs/blob/master/uuid.md)" 64 | 37) 65 | 66 | 67 | (defn format-uuid 68 | [^UUID value] 69 | (let [data (ByteBuffer/allocate 16)] 70 | (.putLong data (.getMostSignificantBits value)) 71 | (.putLong data (.getLeastSignificantBits value)) 72 | (data/tagged-value uuid-tag (.array data)))) 73 | 74 | 75 | (defn parse-uuid 76 | [value] 77 | (when-not (bytes? value) 78 | (throw (ex-info (str "UUIDs must be tagged byte strings, got: " 79 | (class value)) 80 | {:value value}))) 81 | (let [data (ByteBuffer/wrap value)] 82 | (UUID. (.getLong data) (.getLong data)))) 83 | 84 | 85 | ;; ## Codec Formatter/Handler Maps 86 | 87 | (def text-write-handlers 88 | "Map of text types to formatting functions." 89 | {URI format-uri 90 | UUID format-uuid 91 | Pattern format-pattern}) 92 | 93 | 94 | (def text-read-handlers 95 | "Map of tag handlers to parse text values." 96 | {uri-tag parse-uri 97 | pattern-tag parse-pattern 98 | uuid-tag parse-uuid}) 99 | -------------------------------------------------------------------------------- /src/clj_cbor/tags/time.clj: -------------------------------------------------------------------------------- 1 | (ns clj-cbor.tags.time 2 | "Built-in tag support for the time extensions in RFC 7049. See section 3 | 2.4.1. 4 | 5 | This namespace offers interop with both the older `java.util.Date` class as 6 | well as the newer `java.time.Instant`. Support for both timestamp-based 7 | tagged values and the more efficient epoch-based values is included." 8 | (:require 9 | [clj-cbor.data.core :as data]) 10 | (:import 11 | (java.time 12 | Instant 13 | LocalDate) 14 | java.time.format.DateTimeFormatter 15 | java.util.Date)) 16 | 17 | 18 | ;; ## Instants 19 | 20 | ;; Instant values represent a specific point in time, represented in the UTC 21 | ;; timezone. 22 | ;; TODO: java.sql.Timestamp compatibility? What are the java11 module implications? 23 | 24 | (def ^:const string-time-tag 25 | "Tag value 0 is for date/time strings that follow the standard format 26 | described in RFC3339, as refined by Section 3.3 of RFC4287." 27 | 0) 28 | 29 | 30 | (def ^:const epoch-time-tag 31 | "Tag value 1 is for numerical representation of seconds relative to 32 | 1970-01-01T00:00Z in UTC time. 33 | 34 | The tagged item can be a positive or negative integer (major types 0 and 1), 35 | or a floating-point number (major type 7 with additional information 25, 26, 36 | or 27). Note that the number can be negative (time before 1970-01-01T00:00Z) 37 | and, if a floating-point number, indicate fractional seconds." 38 | 1) 39 | 40 | 41 | (defn- tagged-epoch-millis 42 | [epoch-millis] 43 | (data/tagged-value 44 | epoch-time-tag 45 | (if (zero? (mod epoch-millis 1000)) 46 | (long (/ epoch-millis 1000)) 47 | (/ epoch-millis 1000.0)))) 48 | 49 | 50 | (defn- check-epoch-form! 51 | [value] 52 | (when-not (number? value) 53 | (throw (ex-info (str "Tag 1 values must be tagged numbers, got: " 54 | (class value)) 55 | {:value value})))) 56 | 57 | 58 | (defn- check-timestamp-form! 59 | [value] 60 | (when-not (string? value) 61 | (throw (ex-info (str "Tag 0 values must be tagged strings, got: " 62 | (class value)) 63 | {:value value})))) 64 | 65 | 66 | ;; ### java.time.Instant 67 | 68 | (defn format-instant-epoch 69 | "Format a `java.time.Instant` as a tagged numeric epoch offset." 70 | [^Instant value] 71 | (tagged-epoch-millis (.toEpochMilli value))) 72 | 73 | 74 | (defn parse-epoch-instant 75 | "Parse a numeric epoch offset into a `java.time.Instant` value." 76 | [value] 77 | (check-epoch-form! value) 78 | (Instant/ofEpochMilli (long (* value 1000)))) 79 | 80 | 81 | (defn format-instant-string 82 | "Format a `java.time.Instant` as a tagged timestamp string." 83 | [^Instant value] 84 | (data/tagged-value 85 | string-time-tag 86 | (.format DateTimeFormatter/ISO_INSTANT value))) 87 | 88 | 89 | (defn parse-string-instant 90 | "Parse a timestamp string into a `java.time.Instant` value." 91 | [value] 92 | (check-timestamp-form! value) 93 | (Instant/parse value)) 94 | 95 | 96 | ;; ### java.util.Date 97 | 98 | (defn format-date-epoch 99 | "Format a `java.util.Date` as a tagged numeric epoch offset." 100 | [^Date value] 101 | (tagged-epoch-millis (.getTime value))) 102 | 103 | 104 | (defn parse-epoch-date 105 | "Parse a numeric epoch offset into a `java.util.Date` value." 106 | [value] 107 | (check-epoch-form! value) 108 | (Date. (long (* value 1000)))) 109 | 110 | 111 | (defn format-date-string 112 | "Format a `java.util.Date` as a tagged timestamp string." 113 | [^Date value] 114 | (format-instant-string (.toInstant value))) 115 | 116 | 117 | (defn parse-string-date 118 | "Parse a timestamp string into a `java.util.Date` value." 119 | [value] 120 | (check-timestamp-form! value) 121 | (Date/from (parse-string-instant value))) 122 | 123 | 124 | ;; ## Dates 125 | 126 | ;; A local date represents a specific calendar day, without regard to any 127 | ;; particular time-zone. 128 | ;; 129 | ;; See: https://datatracker.ietf.org/doc/draft-ietf-cbor-date-tag/ 130 | ;; TODO: java.sql.Date compatibility? What are the java11 module implications? 131 | 132 | (def ^:const string-date-tag 133 | "Tag value 1004 is for date strings that follow the standard format 134 | described in RFC3339 \"full-date\" production." 135 | 1004) 136 | 137 | 138 | (def ^:const epoch-date-tag 139 | "Tag value 100 (ASCII 'd') is for numerical representation of the epoch date. 140 | The tagged integer is an unsigned or negative value indicating the number of 141 | days since the epoch date 1970-01-01." 142 | 100) 143 | 144 | 145 | ;; ### java.time.LocalDate 146 | 147 | (defn format-local-date-epoch 148 | "Format a `java.time.LocalDate` as a tagged numeric epoch offset." 149 | [^LocalDate value] 150 | (data/tagged-value 151 | epoch-date-tag 152 | (.toEpochDay value))) 153 | 154 | 155 | (defn parse-epoch-local-date 156 | "Parse a numeric epoch offset into a `java.time.LocalDate` value." 157 | [value] 158 | (when-not (integer? value) 159 | (throw (ex-info (str "Tag 100 values must be integers, got: " 160 | (class value)) 161 | {:value value}))) 162 | (LocalDate/ofEpochDay (long value))) 163 | 164 | 165 | (defn format-local-date-string 166 | "Format a `java.time.LocalDate` as a tagged date string." 167 | [^LocalDate value] 168 | (data/tagged-value 169 | string-date-tag 170 | (str value))) 171 | 172 | 173 | (defn parse-string-local-date 174 | "Parse a date string into a `java.time.LocalDate` value." 175 | [value] 176 | (when-not (string? value) 177 | (throw (ex-info (str "Tag 1004 values must be strings, got: " 178 | (class value)) 179 | {:value value}))) 180 | (LocalDate/parse value)) 181 | 182 | 183 | ;; ## Codec Maps 184 | 185 | ;; ### Instants 186 | 187 | (def epoch-time-write-handlers 188 | "Map of date-time types to render as numeric epoch offsets." 189 | {Date format-date-epoch 190 | Instant format-instant-epoch}) 191 | 192 | 193 | (def string-time-write-handlers 194 | "Map of date-time types to render as time strings." 195 | {Date format-date-string 196 | Instant format-instant-string}) 197 | 198 | 199 | (def instant-read-handlers 200 | "Map of tag handlers to parse date-times as `java.time.Instant` values." 201 | {string-time-tag parse-string-instant 202 | epoch-time-tag parse-epoch-instant}) 203 | 204 | 205 | (def date-read-handlers 206 | "Map of tag handlers to parse date-times as `java.util.Date` values." 207 | {string-time-tag parse-string-date 208 | epoch-time-tag parse-epoch-date}) 209 | 210 | 211 | ;; ### Dates 212 | 213 | (def epoch-date-write-handlers 214 | "Map of local-date types to render as numeric epoch offsets." 215 | {LocalDate format-local-date-epoch}) 216 | 217 | 218 | (def string-date-write-handlers 219 | "Map of local-date types to render as date strings." 220 | {LocalDate format-local-date-string}) 221 | 222 | 223 | (def local-date-read-handlers 224 | "Map of tag handlers to parse local-dates as `java.time.LocalDate` values." 225 | {string-date-tag parse-string-local-date 226 | epoch-date-tag parse-epoch-local-date}) 227 | -------------------------------------------------------------------------------- /test/clj_cbor/codec_test.clj: -------------------------------------------------------------------------------- 1 | (ns clj-cbor.codec-test 2 | "Decoding tests. Test examples are from RFC 7049 Appendix A." 3 | (:require 4 | [clj-cbor.codec :as codec] 5 | [clj-cbor.core :as cbor] 6 | [clj-cbor.data.core :as data] 7 | [clj-cbor.test-utils 8 | :refer [bytes= check-roundtrip decode-hex encoded-hex with-codec]] 9 | [clojure.test :refer [deftest testing is are]])) 10 | 11 | 12 | (deftest byte-utils 13 | (testing "byte comparison" 14 | (is (zero? (#'codec/compare-bytes (byte-array []) (byte-array []))) 15 | "empty bytes are equal") 16 | (is (neg? (#'codec/compare-bytes (byte-array []) (byte-array [0]))) 17 | "empty bytes sort before zero byte") 18 | (is (pos? (#'codec/compare-bytes (byte-array [1]) (byte-array []))) 19 | "one byte sorts after empty bytes") 20 | (is (zero? (#'codec/compare-bytes (byte-array [0 1 2 3]) (byte-array [0 1 2 3])))) 21 | (is (neg? (#'codec/compare-bytes (byte-array [0 1 2]) (byte-array [0 1 2 3])))) 22 | (is (pos? (#'codec/compare-bytes (byte-array [0 1 3]) (byte-array [0 1 2])))) 23 | (is (neg? (#'codec/compare-bytes (byte-array [0 0 3]) (byte-array [0 1 2])))) 24 | (is (neg? (#'codec/compare-bytes (byte-array [0 0 3]) (byte-array [0 -8 2])))))) 25 | 26 | 27 | (deftest integer-typing 28 | (let [roundtrip (comp cbor/decode cbor/encode)] 29 | (testing "direct values" 30 | (are [i] (instance? Long (roundtrip i)) 31 | -24 -1 0 1 23)) 32 | (testing "int8" 33 | (are [i] (instance? Long (roundtrip i)) 34 | -256 -25 24 255)) 35 | (testing "int16" 36 | (are [i] (instance? Long (roundtrip i)) 37 | -65536 -257 256 65535)) 38 | (testing "int32" 39 | (are [i] (instance? Long (roundtrip i)) 40 | -4294967296 -65537 65536 4294967295)) 41 | (testing "int64" 42 | (are [i] (instance? Long (roundtrip i)) 43 | Long/MIN_VALUE -4294967297 4294967296 Long/MAX_VALUE)) 44 | (testing "int64+" 45 | (are [i] (instance? clojure.lang.BigInt (roundtrip i)) 46 | -18446744073709551616N 47 | (dec' Long/MIN_VALUE) 48 | (inc' Long/MAX_VALUE) 49 | 18446744073709551615N)))) 50 | 51 | 52 | (deftest unsigned-integers 53 | (testing "direct values" 54 | (check-roundtrip 0 "00") 55 | (check-roundtrip 1 "01") 56 | (check-roundtrip 10 "0A") 57 | (check-roundtrip 23 "17")) 58 | (testing "uint8" 59 | (check-roundtrip 24 "1818") 60 | (check-roundtrip 25 "1819") 61 | (check-roundtrip 100 "1864") 62 | (check-roundtrip 255 "18FF")) 63 | (testing "uint16" 64 | (check-roundtrip 256 "190100") 65 | (check-roundtrip 1000 "1903E8") 66 | (check-roundtrip 65535 "19FFFF")) 67 | (testing "uint32" 68 | (check-roundtrip 65536 "1A00010000") 69 | (check-roundtrip 1000000 "1A000F4240") 70 | (check-roundtrip 4294967295 "1AFFFFFFFF")) 71 | (testing "uint64" 72 | (check-roundtrip 4294967296 "1B0000000100000000") 73 | (check-roundtrip 1000000000000 "1B000000E8D4A51000") 74 | (check-roundtrip Long/MAX_VALUE "1B7FFFFFFFFFFFFFFF") 75 | (check-roundtrip 18446744073709551615N "1BFFFFFFFFFFFFFFFF")) 76 | (testing "errors" 77 | (is (cbor-error? ::codec/illegal-stream 78 | (decode-hex "1F00"))))) 79 | 80 | 81 | (deftest negative-integers 82 | (testing "direct values" 83 | (check-roundtrip -1 "20") 84 | (check-roundtrip -10 "29") 85 | (check-roundtrip -24 "37")) 86 | (testing "int8" 87 | (check-roundtrip -25 "3818") 88 | (check-roundtrip -100 "3863") 89 | (check-roundtrip -256 "38FF")) 90 | (testing "int16" 91 | (check-roundtrip -257 "390100") 92 | (check-roundtrip -1000 "3903E7") 93 | (check-roundtrip -65536 "39FFFF")) 94 | (testing "int32" 95 | (check-roundtrip -65537 "3A00010000") 96 | (check-roundtrip -1000000 "3A000F423F") 97 | (check-roundtrip -4294967296 "3AFFFFFFFF")) 98 | (testing "int64" 99 | (check-roundtrip -4294967297 "3B0000000100000000") 100 | (check-roundtrip Long/MIN_VALUE "3B7FFFFFFFFFFFFFFF") 101 | (check-roundtrip -18446744073709551616 "3BFFFFFFFFFFFFFFFF")) 102 | (testing "errors" 103 | (is (cbor-error? ::codec/illegal-stream 104 | (decode-hex "3F00"))))) 105 | 106 | 107 | (deftest byte-strings 108 | (testing "direct bytes" 109 | (is (= "40" (encoded-hex (byte-array 0)))) 110 | (is (bytes= [] (decode-hex "40"))) 111 | (is (= "4401020304" (encoded-hex (byte-array [1 2 3 4])))) 112 | (is (bytes= [1 2 3 4] (decode-hex "4401020304")))) 113 | (testing "streamed chunks" 114 | (is (bytes= [1 2 3 4 5] (decode-hex "5F42010243030405FF"))) 115 | (is (cbor-error? {:type ::codec/illegal-chunk-type 116 | :data {:stream-type :byte-string 117 | :chunk-type :unsigned-integer}} 118 | (decode-hex "5F42010201FF"))) 119 | (is (cbor-error? {:type ::codec/illegal-stream 120 | :data {:stream-type :byte-string}} 121 | (decode-hex "5F4201025F4100FFFF"))))) 122 | 123 | 124 | (deftest text-strings 125 | (testing "direct strings" 126 | (check-roundtrip "" "60") 127 | (check-roundtrip "a" "6161") 128 | (check-roundtrip "IETF" "6449455446") 129 | (check-roundtrip "\"\\" "62225C") 130 | (check-roundtrip "\u00fc" "62C3BC") 131 | (check-roundtrip "\u6c34" "63E6B0B4") 132 | (check-roundtrip "\ud800\udd51" "64F0908591")) 133 | (testing "streamed chunks" 134 | (is (= "streaming" (decode-hex "7F657374726561646D696E67FF"))) 135 | (is (cbor-error? {:type ::codec/illegal-chunk-type 136 | :data {:stream-type :text-string 137 | :chunk-type :negative-integer}} 138 | (decode-hex "7F6265732100FF"))) 139 | (is (cbor-error? {:type ::codec/illegal-stream 140 | :data {:stream-type :text-string}} 141 | (decode-hex "7F6265737F6161FFFF"))))) 142 | 143 | 144 | (deftest data-arrays 145 | (testing "encoded size" 146 | (check-roundtrip [] "80") 147 | (check-roundtrip [1 2 3] "83010203") 148 | (check-roundtrip [1 [2 3] [4 5]] "8301820203820405") 149 | (check-roundtrip (range 1 26) "98190102030405060708090A0B0C0D0E0F101112131415161718181819")) 150 | (testing "java compatibility" 151 | (is (= "80" (encoded-hex (java.util.ArrayList.)))) 152 | (is (= "83010203" 153 | (encoded-hex (doto (java.util.ArrayList.) 154 | (.add 1) 155 | (.add 2) 156 | (.add 3))))) 157 | (is (= "820708" 158 | (encoded-hex (doto (java.util.LinkedList.) 159 | (.add 7) 160 | (.add 8)))))) 161 | (testing "streaming" 162 | (is (true? (:cbor/streaming (meta (decode-hex "9FFF"))))) 163 | (is (= [] (decode-hex "9FFF"))) 164 | (is (= [1 [2 3] [4 5]] (decode-hex "9F018202039F0405FFFF"))) 165 | (is (= [1 [2 3] [4 5]] (decode-hex "9F01820203820405FF"))) 166 | (is (= [1 [2 3] [4 5]] (decode-hex "83018202039F0405FF"))) 167 | (is (= [1 [2 3] [4 5]] (decode-hex "83019F0203FF820405"))) 168 | (is (= (range 1 26) (decode-hex "9F0102030405060708090A0B0C0D0E0F101112131415161718181819FF"))))) 169 | 170 | 171 | (deftest data-maps 172 | (testing "encoded size" 173 | (check-roundtrip {} "A0") 174 | (check-roundtrip {1 2, 3 4} "A201020304") 175 | (check-roundtrip {"a" 1, "b" [2 3]} "A26161016162820203") 176 | (check-roundtrip ["a" {"b" "c"}] "826161A161626163") 177 | (check-roundtrip {"a" "A", "b" "B", "c" "C", "d" "D", "e" "E"} 178 | "A56161614161626142616361436164614461656145")) 179 | (testing "java compatibility" 180 | (is (= "A0" (encoded-hex (java.util.HashMap.)))) 181 | (is (= "A201020304" (encoded-hex (doto (java.util.TreeMap.) 182 | (.put 1 2) 183 | (.put 3 4)))))) 184 | (testing "streaming" 185 | (is (true? (:cbor/streaming (meta (decode-hex "BFFF"))))) 186 | (is (= {} (decode-hex "BFFF"))) 187 | (is (= {"a" 1, "b" [2 3]} (decode-hex "BF61610161629F0203FFFF"))) 188 | (is (= ["a" {"b" "c"}] (decode-hex "826161BF61626163FF"))) 189 | (is (= {"Fun" true, "Amt" -2} (decode-hex "BF6346756EF563416D7421FF")))) 190 | (testing "canonical mode" 191 | (let [codec (cbor/cbor-codec :canonical true)] 192 | (is (= "A3000861610243000102626263" 193 | (encoded-hex codec {0 8, "a" 2, (byte-array [0 1 2]) "bc"}))))) 194 | (testing "errors" 195 | (testing "duplicate key in fixed map" 196 | (is (cbor-error? {:type ::codec/duplicate-map-key 197 | :data {:map {"Fun" true}, :key "Fun"}} 198 | (decode-hex "A26346756EF56346756EF4")))) 199 | (testing "duplicate key in streaming map" 200 | (is (cbor-error? {:type ::codec/duplicate-map-key 201 | :data {:map {"Fun" true}, :key "Fun"}} 202 | (decode-hex "BF6346756EF56346756EF4FF")))) 203 | (testing "missing value in streaming map" 204 | (is (cbor-error? {:type ::codec/missing-map-value 205 | :data {:map {}, :key "Fun"}} 206 | (decode-hex "BF6346756EFF")))))) 207 | 208 | 209 | (deftest set-collections 210 | (with-codec {} 211 | (check-roundtrip #{} "D9010280") 212 | (check-roundtrip #{1 2 3} "D9010283010302")) 213 | (testing "read handler" 214 | (is (cbor-error? ::codec/tag-handling-error 215 | (decode-hex "D90102A10102")))) 216 | (testing "strict mode" 217 | (let [codec (cbor/cbor-codec :strict true)] 218 | (is (cbor-error? ::codec/duplicate-set-entry 219 | (decode-hex codec "D90102820101"))))) 220 | (testing "canonical mode" 221 | (let [codec (cbor/cbor-codec :canonical true)] 222 | (is (= "D90102840018406161820304" 223 | (encoded-hex codec #{[3 4] 0 64 "a"}))))) 224 | (testing "java compatibility" 225 | (is (= "D9010280" (encoded-hex (java.util.HashSet.)))) 226 | (is (= "D9010283010203" 227 | (encoded-hex (doto (java.util.TreeSet.) 228 | (.add 1) 229 | (.add 2) 230 | (.add 3))))))) 231 | 232 | 233 | (deftest floating-point-numbers 234 | (testing "special value encoding" 235 | (is (= "F90000" (encoded-hex 0.0))) 236 | (is (= "F90000" (encoded-hex -0.0))) 237 | (is (= "F97E00" (encoded-hex Float/NaN))) 238 | (is (= "F97C00" (encoded-hex Float/POSITIVE_INFINITY))) 239 | (is (= "F9FC00" (encoded-hex Float/NEGATIVE_INFINITY)))) 240 | (testing "half-precision" 241 | (is (instance? Float (decode-hex "F90000"))) 242 | (is (= 0.0 (decode-hex "F90000"))) 243 | (is (= -0.0 (decode-hex "F98000"))) 244 | (is (= 1.0 (decode-hex "F93C00"))) 245 | (is (= 1.5 (decode-hex "F93E00"))) 246 | (is (= 65504.0 (decode-hex "F97BFF"))) 247 | (is (= 5.960464477539063e-8 (decode-hex "F90001"))) 248 | (is (= 0.00006103515625 (decode-hex "F90400"))) 249 | (is (= -4.0 (decode-hex "F9C400"))) 250 | (is (Float/isNaN (decode-hex "F97E00"))) 251 | (is (= Float/POSITIVE_INFINITY (decode-hex "F97C00"))) 252 | (is (= Float/NEGATIVE_INFINITY (decode-hex "F9FC00")))) 253 | (testing "single-precision" 254 | (is (instance? Float (decode-hex "FA47C35000"))) 255 | (check-roundtrip (float 100000.0) "FA47C35000") 256 | (check-roundtrip (float 3.4028234663852886e+38) "FA7F7FFFFF") 257 | (is (Float/isNaN (decode-hex "FA7FC00000"))) 258 | (is (= Float/POSITIVE_INFINITY (decode-hex "FA7F800000"))) 259 | (is (= Float/NEGATIVE_INFINITY (decode-hex "FAFF800000")))) 260 | (testing "double-precision" 261 | (is (instance? Double (decode-hex "FB7FF8000000000000"))) 262 | (check-roundtrip 1.1 "FB3FF199999999999A") 263 | #_(is (= "FB7E37E43C8800759C" (encoded-hex 1.0e+300))) 264 | (is (= 1.0e+300 (decode-hex "FB7E37E43C8800759C"))) 265 | (check-roundtrip -4.1 "FBC010666666666666") 266 | (is (Double/isNaN (decode-hex "FB7FF8000000000000"))) 267 | (is (= Double/POSITIVE_INFINITY (decode-hex "FB7FF0000000000000"))) 268 | (is (= Double/NEGATIVE_INFINITY (decode-hex "FBFFF0000000000000"))))) 269 | 270 | 271 | (deftest simple-values 272 | (testing "special primitives" 273 | (check-roundtrip false "F4") 274 | (check-roundtrip true "F5") 275 | (check-roundtrip nil "F6") 276 | (check-roundtrip data/undefined "F7")) 277 | (testing "generic values" 278 | (check-roundtrip (data/simple-value 16) "F0") 279 | (check-roundtrip (data/simple-value 32) "F820") 280 | (check-roundtrip (data/simple-value 255) "F8FF")) 281 | (testing "reserved codes" 282 | (is (cbor-error? ::codec/illegal-simple-type 283 | (encoded-hex (data/simple-value 24)))) 284 | (is (cbor-error? {:type ::codec/illegal-simple-type 285 | :data {:code 28}} 286 | (decode-hex "FC"))) 287 | (is (cbor-error? {:type ::codec/illegal-simple-type 288 | :data {:code 29}} 289 | (decode-hex "FD"))) 290 | (is (cbor-error? {:type ::codec/illegal-simple-type 291 | :data {:code 30}} 292 | (decode-hex "FE"))) 293 | (is (cbor-error? ::codec/unexpected-break 294 | (decode-hex "FF")))) 295 | (testing "strict mode" 296 | (is (cbor-error? ::codec/unknown-simple-value 297 | (decode-hex (cbor/cbor-codec :strict true) "EF"))))) 298 | 299 | 300 | (deftest tagged-values 301 | (testing "non-strict mode" 302 | (is (= (data/tagged-value 11 "a") (decode-hex "CB6161")))) 303 | (testing "strict mode" 304 | (let [codec (cbor/cbor-codec :strict true)] 305 | (is (cbor-error? ::codec/unknown-tag 306 | (decode-hex codec "CC08"))))) 307 | (testing "handler error" 308 | (let [handler (fn [_ _] (throw (Exception. "BOOM"))) 309 | codec (cbor/cbor-codec :read-handlers {0 handler})] 310 | (is (cbor-error? ::codec/tag-handling-error 311 | (decode-hex codec "C00F"))))) 312 | (testing "unknown types" 313 | (is (cbor-error? ::codec/unsupported-type 314 | (encoded-hex (java.util.Currency/getInstance "USD")))))) 315 | 316 | 317 | (deftest jump-table 318 | (testing "Positive Integers" 319 | (dotimes [i 24] 320 | (is (= i (cbor/decode (byte-array [i]))))) 321 | (is (= 1 (decode-hex "1801"))) 322 | (is (= 2 (decode-hex "190002"))) 323 | (is (= 3 (decode-hex "1A00000003"))) 324 | (is (= 4 (decode-hex "1B0000000000000004"))) 325 | (is (cbor-error? ::codec/illegal-stream 326 | (decode-hex "1F")))) 327 | (testing "Negative Integers" 328 | (dotimes [i 24] 329 | (is (= (dec (- i)) (cbor/decode (byte-array [(+ 0x20 i)]))))) 330 | (is (= -1 (decode-hex "3800"))) 331 | (is (= -2 (decode-hex "390001"))) 332 | (is (= -3 (decode-hex "3A00000002"))) 333 | (is (= -4 (decode-hex "3B0000000000000003"))) 334 | (is (cbor-error? ::codec/illegal-stream 335 | (decode-hex "3F")))) 336 | (testing "Byte Strings" 337 | (is (bytes= [] (decode-hex "40"))) 338 | (dotimes [i 24] 339 | (let [bs (vec (range i)) 340 | hex (->> (cons (+ 0x40 i) bs) 341 | (map #(format "%02X" %)) 342 | (apply str))] 343 | (is (bytes= bs (decode-hex hex))))) 344 | (is (bytes= [42] (decode-hex "58012A"))) 345 | (is (bytes= [16] (decode-hex "59000110"))) 346 | (is (bytes= [96] (decode-hex "5A0000000160"))) 347 | (is (bytes= [27] (decode-hex "5B00000000000000011B")))) 348 | (testing "Text Strings" 349 | (is (= "" (decode-hex "60"))) 350 | (dotimes [i 24] 351 | (let [string (apply str (repeat i "X")) 352 | hex (apply str (format "%02X" (+ 0x60 i)) (repeat i "58"))] 353 | (is (= string (decode-hex hex))))) 354 | (is (= "X" (decode-hex "780158"))) 355 | (is (= "XX" (decode-hex "7900025858"))) 356 | (is (= "XXX" (decode-hex "7A00000003585858"))) 357 | (is (= "XXXX" (decode-hex "7B000000000000000458585858")))) 358 | (testing "Arrays" 359 | (is (= [] (decode-hex "80"))) 360 | (dotimes [i 24] 361 | (let [nums (vec (repeat i 0)) 362 | hex (apply str (format "%02X" (+ 0x80 i)) (repeat i "00"))] 363 | (is (= nums (decode-hex hex))))) 364 | (is (= [0] (decode-hex "980100"))) 365 | (is (= [0] (decode-hex "99000100"))) 366 | (is (= [0] (decode-hex "9A0000000100"))) 367 | (is (= [0] (decode-hex "9B000000000000000100")))) 368 | (testing "Maps" 369 | (is (= {} (decode-hex "A0"))) 370 | (dotimes [i 24] 371 | (let [m (into {} (map (juxt identity identity)) (range i)) 372 | hex (->> (range i) 373 | (mapcat (juxt identity identity)) 374 | (cons (+ 0xA0 i)) 375 | (map #(format "%02X" %)) 376 | (apply str))] 377 | (is (= m (decode-hex hex))))) 378 | (is (= {0 0} (decode-hex "B8010000"))) 379 | (is (= {0 0} (decode-hex "B900010000"))) 380 | (is (= {0 0} (decode-hex "BA000000010000"))) 381 | (is (= {0 0} (decode-hex "BB00000000000000010000")))) 382 | (testing "Tagged Values" 383 | ;; 0-4, 27, 30, 32, 35, 37, 39, 55799 are all handled by built-in types. 384 | ;; 5-23 385 | (doseq [i (range 5 24)] 386 | (is (= (data/tagged-value i 0) (decode-hex (str (format "%02X" (+ 0xC0 i)) "00"))))) 387 | (is (= (data/tagged-value 5 0) (decode-hex "D80500"))) 388 | (is (= (data/tagged-value 5 0) (decode-hex "D9000500"))) 389 | (is (= (data/tagged-value 5 0) (decode-hex "DA0000000500"))) 390 | (is (= (data/tagged-value 5 0) (decode-hex "DB000000000000000500"))) 391 | (is (cbor-error? ::codec/illegal-stream 392 | (decode-hex "DF00")))) 393 | (testing "Simple Values" 394 | (doseq [v (range 20)] 395 | (is (= (data/simple-value v) (cbor/decode (byte-array [(+ 0xE0 v)]))))) 396 | (is (false? (decode-hex "F4"))) 397 | (is (true? (decode-hex "F5"))) 398 | (is (nil? (decode-hex "F6"))) 399 | (is (= data/undefined (decode-hex "F7"))) 400 | (is (= (data/simple-value 117) (decode-hex "F875"))) 401 | (is (= 1.5 (decode-hex "F93E00"))) 402 | (is (= (float 100000.0) (decode-hex "FA47C35000"))) 403 | (is (= 1.1 (decode-hex "FB3FF199999999999A")))) 404 | (testing "Illegal Headers" 405 | (is (cbor-error? {:type :clj-cbor.header/reserved-info-code 406 | :data {:header 157 407 | :info 29}} 408 | (decode-hex "9D"))))) 409 | -------------------------------------------------------------------------------- /test/clj_cbor/core_test.clj: -------------------------------------------------------------------------------- 1 | (ns clj-cbor.core-test 2 | (:require 3 | [clj-cbor.codec :as codec] 4 | [clj-cbor.core :as cbor] 5 | [clj-cbor.test-utils :refer [decode-hex-all]] 6 | [clojure.test :refer [deftest testing is]]) 7 | (:import 8 | (java.io 9 | ByteArrayInputStream 10 | File 11 | IOException) 12 | java.time.Instant)) 13 | 14 | 15 | (deftest codec-construction 16 | (let [codec (cbor/cbor-codec)] 17 | (is (satisfies? codec/Encoder codec)) 18 | (is (satisfies? codec/Decoder codec))) 19 | (let [codec (cbor/cbor-codec 20 | {:read-handlers {0 :x} 21 | :write-handlers {Long :y}})] 22 | (is (satisfies? codec/Encoder codec)) 23 | (is (satisfies? codec/Decoder codec)) 24 | (is (= {0 :x} (:read-handlers codec))) 25 | (is (= {Long :y} (:write-handlers codec)))) 26 | (let [codec (cbor/cbor-codec 27 | :read-handlers {0 :x} 28 | :write-handlers {Long :y})] 29 | (is (satisfies? codec/Encoder codec)) 30 | (is (satisfies? codec/Decoder codec)) 31 | (is (= {0 :x} (:read-handlers codec))) 32 | (is (= {Long :y} (:write-handlers codec))))) 33 | 34 | 35 | (deftest superclass-dispatch 36 | (let [dispatch (cbor/dispatch-superclasses 37 | IOException 38 | RuntimeException 39 | Exception)] 40 | (is (= IOException (dispatch (IOException. "couldn't input/output"))) 41 | "directly matching class should return") 42 | (is (= IOException (dispatch (IOException. "couldn't input/output"))) 43 | "second call should be cached") 44 | (is (= RuntimeException (dispatch (NullPointerException.))) 45 | "subclasses should return the first superclass") 46 | (is (= RuntimeException (dispatch (NullPointerException.))) 47 | "subclasses should return the first superclass") 48 | (is (= Exception (dispatch (InterruptedException.))) 49 | "subclasses should return the first superclass") 50 | (is (= Long (dispatch 123)) 51 | "should default to the argument class"))) 52 | 53 | 54 | (deftest encode-output-guard 55 | (is (thrown-with-msg? IllegalArgumentException #"coerce argument to an OutputStream" 56 | (cbor/encode cbor/default-codec :not-an-output-stream 123)))) 57 | 58 | 59 | (deftest multi-value-coding 60 | (is (= [true 123 :abc] (cbor/decode-seq (cbor/encode-seq [true 123 :abc]))))) 61 | 62 | 63 | (deftest eof-handling 64 | (testing "sequence of values" 65 | (is (= (list :a 123 true "foo") (decode-hex-all cbor/default-codec "D827623A61187BF563666F6F")))) 66 | (testing "interrupted data" 67 | (is (cbor-error? :clj-cbor.codec/end-of-input 68 | (decode-hex-all "D827623A61187BF563666F"))))) 69 | 70 | 71 | (deftest lazy-decoding 72 | (let [data (cbor/encode-seq [:a :b :c]) 73 | input (ByteArrayInputStream. data)] 74 | (is (= :a (cbor/decode input))) 75 | (is (= :b (cbor/decode input))) 76 | (is (= [:c] (cbor/decode-seq input))))) 77 | 78 | 79 | (deftest slobber-utils 80 | (let [file (File/createTempFile "clj-cbor.core-test" ".cbor") 81 | data-a [3 4.7 true "foo" :bar 'baz/qux {(Instant/ofEpochMilli 1234567890) {:foo 123.456M}}] 82 | data-b {:foo "bar", :abc 123}] 83 | (is (= 60 (cbor/spit file data-a))) 84 | (is (= 21 (cbor/spit file data-b :append true))) 85 | (is (= 81 (.length file))) 86 | (is (= data-a (cbor/slurp file))) 87 | (is (= [data-a data-b] (cbor/slurp-all file))) 88 | (.delete file) 89 | (is (= 81 (cbor/spit-all file [data-b data-a]))) 90 | (is (= [data-b data-a] (cbor/slurp-all file))))) 91 | -------------------------------------------------------------------------------- /test/clj_cbor/data/core_test.clj: -------------------------------------------------------------------------------- 1 | (ns clj-cbor.data.core-test 2 | (:require 3 | [clj-cbor.data.core :as data] 4 | [clojure.test :refer [deftest is]])) 5 | 6 | 7 | (deftest simple-value-construction 8 | (is (data/simple-value? (data/simple-value 21))) 9 | (is (thrown? IllegalArgumentException 10 | (data/simple-value -1))) 11 | (is (thrown? IllegalArgumentException 12 | (data/simple-value 256)))) 13 | -------------------------------------------------------------------------------- /test/clj_cbor/data/float16_test.clj: -------------------------------------------------------------------------------- 1 | (ns clj-cbor.data.float16-test 2 | (:require 3 | [clj-cbor.data.float16 :as float16] 4 | [clojure.test :refer [deftest testing is]])) 5 | 6 | 7 | (deftest float-decoding 8 | (testing "constants" 9 | (is (= 0.0 (float16/decode float16/zero))) 10 | (is (= Float/POSITIVE_INFINITY (float16/decode float16/positive-infinity))) 11 | (is (= Float/NEGATIVE_INFINITY (float16/decode float16/negative-infinity))) 12 | (is (Float/isNaN (float16/decode float16/not-a-number)))) 13 | (testing "examples" 14 | (is (= 1.0 (float16/decode 2r0011110000000000))) 15 | (is (= -2.0 (float16/decode 2r1100000000000000))) 16 | (is (= 65504.0 (float16/decode 2r0111101111111111))))) 17 | 18 | 19 | (deftest float-encoding 20 | (testing "constants" 21 | (is (= float16/zero (float16/encode 0.0))) 22 | (is (= float16/positive-infinity (float16/encode Float/POSITIVE_INFINITY))) 23 | (is (= float16/negative-infinity (float16/encode Float/NEGATIVE_INFINITY))) 24 | (is (= float16/not-a-number (float16/encode Float/NaN)))) 25 | (testing "examples" 26 | (is (= 2r1000000000000000 (float16/encode -0.0))) 27 | (is (= 2r0011110000000000 (float16/encode 1.0))) 28 | (is (= 2r0011110000000001 (float16/encode 1.0009765625))) 29 | (is (= 2r1100000000000000 (float16/encode -2.0))) 30 | (is (= 2r0111101111111111 (float16/encode 65504.0))) 31 | (is (= 2r0011010101010101 (float16/encode 1/3)))) 32 | (testing "edge cases" 33 | (is (= float16/positive-infinity (float16/encode 65520.0)) 34 | "overflow to positive infinity") 35 | (is (= float16/negative-infinity (float16/encode -65520.0)) 36 | "underflow to negative infinity") 37 | (is (= 2r0000000000000010 (float16/encode 0.0000001)) 38 | "subnormal value") 39 | (is (= 0 (float16/encode 0.000000001)) 40 | "underflow to zero"))) 41 | -------------------------------------------------------------------------------- /test/clj_cbor/data/simple_test.clj: -------------------------------------------------------------------------------- 1 | (ns clj-cbor.data.simple-test 2 | (:require 3 | [clj-cbor.data.simple :refer [->Undefined ->SimpleValue]] 4 | [clojure.test :refer [deftest testing is]])) 5 | 6 | 7 | (deftest undefined-values 8 | (let [undefined (->Undefined nil)] 9 | (testing "representation" 10 | (is (= "undefined" (str undefined)))) 11 | (testing "equality" 12 | (is (= undefined undefined)) 13 | (is (= undefined (->Undefined nil)) 14 | "all undefined values should be equal") 15 | (is (not= undefined :foo))) 16 | (testing "hash code" 17 | (is (integer? (hash undefined))) 18 | (is (= (hash undefined) (hash (->Undefined nil))) 19 | "all undefined values should have the same hash")) 20 | (testing "metadata" 21 | (is (nil? (meta undefined))) 22 | (is (= undefined (vary-meta undefined assoc :x 123)) 23 | "metadata does not affect equality") 24 | (is (= {:x 123} (meta (vary-meta undefined assoc :x 123))) 25 | "metadata is preserved")))) 26 | 27 | 28 | (deftest simple-values 29 | (let [simple24 (->SimpleValue 24 nil) 30 | simple64 (->SimpleValue 64 nil)] 31 | (testing "representation" 32 | (is (= "simple(24)" (str simple24))) 33 | (is (= "simple(64)" (str simple64)))) 34 | (testing "equality" 35 | (is (= simple24 simple24) 36 | "should be reflexive") 37 | (is (= simple64 simple64) 38 | "should be reflexive") 39 | (is (= simple24 (->SimpleValue 24 nil)) 40 | "different instances of the same value should be equal") 41 | (is (not= simple24 simple64) 42 | "different simple values should not be equal") 43 | (is (not= simple64 :foo) 44 | "different types should not be equal")) 45 | (testing "hash code" 46 | (is (integer? (hash simple24))) 47 | (is (= (hash simple24) (hash simple24)) 48 | "should be stable") 49 | (is (= (hash simple24) (hash (->SimpleValue 24 nil))) 50 | "different instances of the same value should have the same hash") 51 | (is (not= (hash simple24) (hash simple64)) 52 | "different simple values should have different hashes")) 53 | (testing "comparable" 54 | (is (zero? (compare simple24 simple24)) 55 | "identical instances should compare the same") 56 | (is (zero? (compare simple24 (->SimpleValue 24 nil))) 57 | "different instances of the same value should compare the same") 58 | (is (neg? (compare simple24 simple64)) 59 | "lower numbered values should sort earlier") 60 | (is (pos? (compare simple64 simple24)) 61 | "higher numbered values should sort later")) 62 | (testing "metadata" 63 | (is (nil? (meta simple24))) 64 | (is (= simple24 (vary-meta simple24 assoc :x 123)) 65 | "should not affect equality") 66 | (is (= (hash simple24) (hash (vary-meta simple24 assoc :y true))) 67 | "should not affect hash code") 68 | (is (zero? (compare simple24 (vary-meta simple24 assoc :foo :abc))) 69 | "should not affect comparison") 70 | (is (= {:x 123} (meta (vary-meta simple24 assoc :x 123))) 71 | "metadata is preserved")))) 72 | -------------------------------------------------------------------------------- /test/clj_cbor/data/tagged_test.clj: -------------------------------------------------------------------------------- 1 | (ns clj-cbor.data.tagged-test 2 | (:require 3 | [clj-cbor.data.tagged :refer [->TaggedValue]] 4 | [clojure.test :refer [deftest testing is]])) 5 | 6 | 7 | (deftest tagged-values 8 | (let [uri-value (->TaggedValue 32 "https://mvxcvi.com/" nil) 9 | ratio-value (->TaggedValue 30 [1 3] nil)] 10 | (testing "representation" 11 | (is (= "32(https://mvxcvi.com/)" (str uri-value))) 12 | (is (= "30([1 3])" (str ratio-value)))) 13 | (testing "equality" 14 | (is (= uri-value uri-value) 15 | "should be reflexive") 16 | (is (= ratio-value (->TaggedValue 30 [1 3] nil)) 17 | "different instances of the same value should be equal") 18 | (is (not= ratio-value (->TaggedValue 30 [1 4] nil)) 19 | "different values of the same tag should not be equal") 20 | (is (not= uri-value ratio-value) 21 | "different simple values should not be equal") 22 | (is (not= uri-value :foo) 23 | "different types should not be equal")) 24 | (testing "hash code" 25 | (is (integer? (hash uri-value))) 26 | (is (= (hash uri-value) (hash uri-value)) 27 | "should be stable") 28 | (is (= (hash ratio-value) (hash (->TaggedValue 30 [1 3] nil))) 29 | "different instances of the same value should have the same hash") 30 | (is (not= (hash uri-value) (hash ratio-value)) 31 | "different simple values should have different hashes")) 32 | (testing "metadata" 33 | (is (nil? (meta uri-value))) 34 | (is (= uri-value (vary-meta uri-value assoc :x 123)) 35 | "should not affect equality") 36 | (is (= (hash ratio-value) (hash (vary-meta ratio-value assoc :y true))) 37 | "should not affect hash code") 38 | (is (= {:x 123} (meta (vary-meta uri-value assoc :x 123))) 39 | "metadata is preserved")))) 40 | -------------------------------------------------------------------------------- /test/clj_cbor/generative_test.clj: -------------------------------------------------------------------------------- 1 | (ns clj-cbor.generative-test 2 | "Generative tests for finding complex data values which may not round-trip 3 | correctly." 4 | (:require 5 | [clj-cbor.core :as cbor] 6 | [clj-cbor.test-utils :refer [equivalent?]] 7 | [clojure.test.check.clojure-test :refer [defspec]] 8 | [clojure.test.check.generators :as gen] 9 | [clojure.test.check.properties :as prop])) 10 | 11 | 12 | (defspec ^:generative round-trip-equivalence 100 13 | (prop/for-all [x (gen/scale #(max 20 %) gen/any-printable)] 14 | (equivalent? x (cbor/decode (cbor/encode x))))) 15 | -------------------------------------------------------------------------------- /test/clj_cbor/header_test.clj: -------------------------------------------------------------------------------- 1 | (ns clj-cbor.header-test 2 | (:require 3 | [clj-cbor.header :as header] 4 | [clj-cbor.test-utils :refer [bin->hex]] 5 | [clojure.test :refer [deftest testing is]])) 6 | 7 | 8 | (defn check-header-int 9 | [value hex] 10 | (let [baos (java.io.ByteArrayOutputStream.)] 11 | (with-open [data-out (java.io.DataOutputStream. baos)] 12 | (header/write data-out :unsigned-integer value)) 13 | (is (= hex (bin->hex (.toByteArray baos)))))) 14 | 15 | 16 | (deftest header-int-representation 17 | (is (thrown? clojure.lang.ExceptionInfo 18 | (check-header-int -1 "")) 19 | "error on negative values") 20 | (check-header-int 0 "00") 21 | (check-header-int 23 "17") 22 | (check-header-int 24 "1818") 23 | (check-header-int 255 "18FF") 24 | (check-header-int 256 "190100") 25 | (check-header-int 65535 "19FFFF") 26 | (check-header-int 65536 "1A00010000") 27 | (check-header-int 2147483647 "1A7FFFFFFF") 28 | (check-header-int 2147483648 "1A80000000") 29 | (check-header-int 4294967295 "1AFFFFFFFF") 30 | (check-header-int 4294967296 "1B0000000100000000") 31 | (check-header-int 9223372036854775807 "1B7FFFFFFFFFFFFFFF") 32 | (check-header-int 9223372036854775808 "1B8000000000000000") 33 | (is (thrown? clojure.lang.ExceptionInfo 34 | (check-header-int 18446744073709551617N "1BFFFFFFFFFFFFFFFF")) 35 | "error on values over 8 bytes")) 36 | 37 | 38 | (deftest header-int-reading 39 | (testing "values 0 - 23" 40 | (dotimes [i 24] 41 | (is (= i (header/read-code nil i)) 42 | "should be represented directly"))) 43 | (testing "reserved values" 44 | (is (thrown? clojure.lang.ExceptionInfo 45 | (header/read-code nil 28))) 46 | (is (thrown? clojure.lang.ExceptionInfo 47 | (header/read-code nil 29))) 48 | (is (thrown? clojure.lang.ExceptionInfo 49 | (header/read-code nil 30)))) 50 | (testing "indefinite length" 51 | (is (= :indefinite (header/read-code nil 31)))) 52 | (testing "invalid value" 53 | (is (thrown? IllegalArgumentException 54 | (header/read-code nil 32))))) 55 | -------------------------------------------------------------------------------- /test/clj_cbor/tags/clojure_test.clj: -------------------------------------------------------------------------------- 1 | (ns clj-cbor.tags.clojure-test 2 | (:require 3 | [clj-cbor.tags.clojure :as tags.clj] 4 | [clj-cbor.test-utils :refer [check-roundtrip with-codec]] 5 | [clojure.test :refer [deftest testing is]])) 6 | 7 | 8 | (deftest keywords 9 | (testing "parsing checks" 10 | (is (thrown-with-msg? Exception #"must be tagged strings" 11 | (tags.clj/parse-symbol 123)))) 12 | (with-codec {:write-handlers tags.clj/clojure-write-handlers 13 | :read-handlers tags.clj/clojure-read-handlers} 14 | (check-roundtrip :a "D827623A61") 15 | (check-roundtrip :abc/def "D827683A6162632F646566") 16 | (check-roundtrip 'foo "D82763666F6F") 17 | (check-roundtrip 'bar/baz "D827676261722F62617A"))) 18 | 19 | 20 | (deftest tagged-literals 21 | (testing "parsing checks" 22 | (is (thrown-with-msg? Exception #"must be tagged two-element arrays" 23 | (tags.clj/parse-tagged-literal 123)))) 24 | (with-codec {:write-handlers tags.clj/clojure-write-handlers 25 | :read-handlers tags.clj/clojure-read-handlers} 26 | (check-roundtrip (juxt :tag :form) (tagged-literal 'foo 123) "D81B8263666F6F187B"))) 27 | -------------------------------------------------------------------------------- /test/clj_cbor/tags/content_test.clj: -------------------------------------------------------------------------------- 1 | (ns clj-cbor.tags.content-test 2 | (:require 3 | [clj-cbor.core :as cbor] 4 | [clj-cbor.test-utils :refer [decode-hex encoded-hex]] 5 | [clojure.test :refer [deftest is]])) 6 | 7 | 8 | (deftest self-described 9 | (is (= "D9D9F70F" (encoded-hex (cbor/self-describe 15)))) 10 | (is (= 15 (decode-hex cbor/default-codec "D9D9F70F")))) 11 | -------------------------------------------------------------------------------- /test/clj_cbor/tags/numbers_test.clj: -------------------------------------------------------------------------------- 1 | (ns clj-cbor.tags.numbers-test 2 | (:require 3 | [clj-cbor.tags.numbers :as tags.num] 4 | [clj-cbor.test-utils :refer [check-roundtrip with-codec]] 5 | [clojure.test :refer [deftest testing is]])) 6 | 7 | 8 | (deftest bignums 9 | (testing "parsing checks" 10 | (is (thrown-with-msg? Exception #"must be represented as a tagged byte string" 11 | (tags.num/parse-positive-bignum "not-bytes"))) 12 | (is (thrown-with-msg? Exception #"must be represented as a tagged byte string" 13 | (tags.num/parse-negative-bignum "not-bytes")))) 14 | (with-codec {:write-handlers tags.num/number-write-handlers 15 | :read-handlers tags.num/number-read-handlers} 16 | (check-roundtrip 18446744073709551616N "C249010000000000000000") 17 | (check-roundtrip -18446744073709551617N "C349010000000000000000"))) 18 | 19 | 20 | (deftest decimal-fractions 21 | (testing "parsing checks" 22 | (is (thrown-with-msg? Exception #"must be represented with a two-element array" 23 | (tags.num/parse-big-decimal "not-sequential"))) 24 | (is (thrown-with-msg? Exception #"must be represented with a two-element array" 25 | (tags.num/parse-big-decimal []))) 26 | (is (thrown-with-msg? Exception #"must be represented with a two-element array" 27 | (tags.num/parse-big-decimal [0]))) 28 | (is (thrown-with-msg? Exception #"must be represented with a two-element array" 29 | (tags.num/parse-big-decimal [0 123 456])))) 30 | (with-codec {:write-handlers tags.num/number-write-handlers 31 | :read-handlers tags.num/number-read-handlers} 32 | (check-roundtrip 273.15M "C48221196AB3") 33 | (check-roundtrip 3.14159M "C482241A0004CB2F"))) 34 | 35 | 36 | (deftest rationals 37 | (testing "parsing checks" 38 | (is (thrown-with-msg? Exception #"must be represented with a two-element array" 39 | (tags.num/parse-ratio "not-sequential"))) 40 | (is (thrown-with-msg? Exception #"must be represented with a two-element array" 41 | (tags.num/parse-ratio []))) 42 | (is (thrown-with-msg? Exception #"must be represented with a two-element array" 43 | (tags.num/parse-ratio [0]))) 44 | (is (thrown-with-msg? Exception #"must be represented with a two-element array" 45 | (tags.num/parse-ratio [0 123 456])))) 46 | (with-codec {:write-handlers tags.num/number-write-handlers 47 | :read-handlers tags.num/number-read-handlers} 48 | (check-roundtrip 1/3 "D81E820103") 49 | (check-roundtrip 11/37 "D81E820B1825"))) 50 | -------------------------------------------------------------------------------- /test/clj_cbor/tags/text_test.clj: -------------------------------------------------------------------------------- 1 | (ns clj-cbor.tags.text-test 2 | (:require 3 | [clj-cbor.tags.text :as tags.text] 4 | [clj-cbor.test-utils :refer [check-roundtrip with-codec]] 5 | [clojure.test :refer [deftest testing is]]) 6 | (:import 7 | java.net.URI 8 | java.util.UUID)) 9 | 10 | 11 | (deftest uri-coding 12 | (testing "parsing checks" 13 | (is (thrown-with-msg? Exception #"must be tagged strings" 14 | (tags.text/parse-uri (byte-array 4))))) 15 | (with-codec {:write-handlers tags.text/text-write-handlers 16 | :read-handlers tags.text/text-read-handlers} 17 | (check-roundtrip (URI. "http://www.example.com") "D82076687474703A2F2F7777772E6578616D706C652E636F6D"))) 18 | 19 | 20 | (deftest pattern-coding 21 | (testing "parsing checks" 22 | (is (thrown-with-msg? Exception #"must be tagged strings" 23 | (tags.text/parse-pattern (byte-array 4))))) 24 | (with-codec {:write-handlers tags.text/text-write-handlers 25 | :read-handlers tags.text/text-read-handlers} 26 | (check-roundtrip str #"abc123" "D82366616263313233"))) 27 | 28 | 29 | (deftest uuid-coding 30 | (testing "parsing checks" 31 | (is (thrown-with-msg? Exception #"must be tagged byte strings" 32 | (tags.text/parse-uuid true)))) 33 | (with-codec {:write-handlers tags.text/text-write-handlers 34 | :read-handlers tags.text/text-read-handlers} 35 | (check-roundtrip (UUID/fromString "dbd559ef-333b-4f11-96b1-b0654babe844") 36 | "D82550DBD559EF333B4F1196B1B0654BABE844"))) 37 | -------------------------------------------------------------------------------- /test/clj_cbor/tags/time_test.clj: -------------------------------------------------------------------------------- 1 | (ns clj-cbor.tags.time-test 2 | (:require 3 | [clj-cbor.tags.time :as tags.time] 4 | [clj-cbor.test-utils :refer [check-roundtrip with-codec]] 5 | [clojure.test :refer [deftest testing is]]) 6 | (:import 7 | (java.time 8 | Instant 9 | LocalDate) 10 | java.util.Date)) 11 | 12 | 13 | (deftest epoch-datetimes 14 | (testing "parsing checks" 15 | (is (thrown-with-msg? Exception #"must be tagged numbers" 16 | (tags.time/parse-epoch-instant "not-a-number")))) 17 | (testing "java.util.Date" 18 | (with-codec {:write-handlers tags.time/epoch-time-write-handlers 19 | :read-handlers tags.time/date-read-handlers} 20 | (check-roundtrip (Date. 1363896240000) "C11A514B67B0") 21 | (check-roundtrip (Date. 1363896240500) "C1FB41D452D9EC200000"))) 22 | (testing "java.time.Instant" 23 | (with-codec {:write-handlers tags.time/epoch-time-write-handlers 24 | :read-handlers tags.time/instant-read-handlers} 25 | (check-roundtrip (Instant/ofEpochMilli 1363896240000) "C11A514B67B0") 26 | (check-roundtrip (Instant/ofEpochMilli 1363896240500) "C1FB41D452D9EC200000")))) 27 | 28 | 29 | (deftest string-datetimes 30 | (testing "parsing checks" 31 | (is (thrown-with-msg? Exception #"must be tagged strings" 32 | (tags.time/parse-string-instant 123456.789)))) 33 | (testing "java.util.Date" 34 | (with-codec {:write-handlers tags.time/string-time-write-handlers 35 | :read-handlers tags.time/date-read-handlers} 36 | (check-roundtrip (Date. 1363896240000) "C074323031332D30332D32315432303A30343A30305A"))) 37 | (testing "java.time.Instant" 38 | (with-codec {:write-handlers tags.time/string-time-write-handlers 39 | :read-handlers tags.time/instant-read-handlers} 40 | (check-roundtrip (Instant/ofEpochMilli 1363896240000) "C074323031332D30332D32315432303A30343A30305A")))) 41 | 42 | 43 | (deftest epoch-dates 44 | (testing "parsing checks" 45 | (is (thrown-with-msg? Exception #"values must be integers" 46 | (tags.time/parse-epoch-local-date "not-a-number")))) 47 | (testing "java.time.LocalDate" 48 | (with-codec {:write-handlers tags.time/epoch-date-write-handlers 49 | :read-handlers tags.time/local-date-read-handlers} 50 | (check-roundtrip (LocalDate/ofEpochDay 0) "D86400") 51 | (check-roundtrip (LocalDate/parse "2020-06-14") "D8641947FB")))) 52 | 53 | 54 | (deftest string-dates 55 | (testing "parsing checks" 56 | (is (thrown-with-msg? Exception #"values must be strings" 57 | (tags.time/parse-string-local-date 1234)))) 58 | (testing "java.time.LocalDate" 59 | (with-codec {:write-handlers tags.time/string-date-write-handlers 60 | :read-handlers tags.time/local-date-read-handlers} 61 | (check-roundtrip (LocalDate/ofEpochDay 0) "D903EC6A313937302D30312D3031") 62 | (check-roundtrip (LocalDate/parse "2020-06-14") "D903EC6A323032302D30362D3134")))) 63 | -------------------------------------------------------------------------------- /test/clj_cbor/test_utils.clj: -------------------------------------------------------------------------------- 1 | (ns clj-cbor.test-utils 2 | (:require 3 | [clj-cbor.core :as cbor] 4 | [clj-cbor.error :as error] 5 | [clojure.string :as str] 6 | [clojure.test :refer [assert-expr do-report is]]) 7 | (:import 8 | (java.util 9 | List 10 | Map 11 | Set) 12 | java.util.regex.Pattern)) 13 | 14 | 15 | ;; ## Test Assertions 16 | 17 | (defmethod assert-expr 'cbor-error? 18 | [msg [_ expected & body]] 19 | `(let [errors# (volatile! []) 20 | record-error# (fn [error-type# message# data#] 21 | (let [error# {:type error-type# 22 | :message message# 23 | :data data#}] 24 | (vswap! errors# conj error#) 25 | (throw (ex-info "Abort CBOR codec" {:type ::interrupt}))))] 26 | (binding [error/*handler* record-error#] 27 | (try 28 | ~@body 29 | (catch clojure.lang.ExceptionInfo ex# 30 | (when-not (= ::interrupt (:type (ex-data ex#))) 31 | (throw ex#)))) 32 | (if-let [~'error (first @errors#)] 33 | ~(if (keyword? expected) 34 | `(is (~'= ~expected (:type ~'error)) ~msg) 35 | `(is (~'= ~expected (select-keys ~'error ~(vec (keys expected)))) ~msg)) 36 | (do-report 37 | {:type :fail 38 | :message ~(or msg (str "Expected error " expected " not found in handler calls")) 39 | :expected '~expected 40 | :actual nil}))))) 41 | 42 | 43 | ;; ## Value Equivalence 44 | 45 | (defn bytes= 46 | "Compares the byte-array `value` to the sequence of `expected` byte values. 47 | Returns true if the array has the same length and matching byte values." 48 | [expected value] 49 | (and (bytes? value) (= (seq expected) (seq value)))) 50 | 51 | 52 | (defmulti equivalent? 53 | "True if the two values are 'equivalent' after accounting for various 54 | idiosyncracies like Character support, NaN, and Patterns." 55 | (fn dispatch 56 | [a _b] 57 | (class a))) 58 | 59 | 60 | (defmethod equivalent? :default 61 | [a b] 62 | (= a b)) 63 | 64 | 65 | (defmethod equivalent? (class (byte-array 0)) 66 | [a b] 67 | (bytes= a b)) 68 | 69 | 70 | (defmethod equivalent? Character 71 | [a b] 72 | (= (str a) (str b))) 73 | 74 | 75 | (defmethod equivalent? Pattern 76 | [a b] 77 | (= (str a) (str b))) 78 | 79 | 80 | (defmethod equivalent? Double 81 | [a b] 82 | (if (Double/isNaN a) 83 | (and (number? b) (Double/isNaN b)) 84 | (= a b))) 85 | 86 | 87 | (defmethod equivalent? List 88 | [a b] 89 | (and (instance? List b) 90 | (= (count a) (count b)) 91 | (every? true? (map equivalent? a b)))) 92 | 93 | 94 | (defmethod equivalent? Set 95 | [a b] 96 | (and (instance? Set b) 97 | (= (count a) (count b)) 98 | (every? #(seq (filter (partial equivalent? %) b)) a))) 99 | 100 | 101 | (defmethod equivalent? Map 102 | [a b] 103 | (and (instance? Map b) 104 | (= (count a) (count b)) 105 | (loop [a a 106 | b b] 107 | (if-let [[k v] (first a)] 108 | (if-let [[match-k match-v] 109 | (first (filter (comp (partial equivalent? k) key) b))] 110 | (if (equivalent? v match-v) 111 | (recur (dissoc a k) (dissoc b match-k)) 112 | false) 113 | false) 114 | (empty? b))))) 115 | 116 | 117 | ;; ## Hex Conversion 118 | 119 | (defn- byte->hex 120 | "Convert a single byte value to a two-character hex string." 121 | [b] 122 | (let [hex (Integer/toHexString 123 | (if (neg? b) 124 | (+ b 256) 125 | b))] 126 | (if (= 1 (count hex)) 127 | (str "0" hex) 128 | hex))) 129 | 130 | 131 | (defn- hex->byte 132 | "Convert a two-character hex string to a byte value." 133 | [octet] 134 | (let [b (Integer/parseInt octet 16)] 135 | (if (< 127 b) 136 | (- b 256) 137 | b))) 138 | 139 | 140 | (defn bin->hex 141 | "Convert a byte array to a hex string." 142 | ^String 143 | [^bytes value] 144 | (str/upper-case (apply str (map byte->hex value)))) 145 | 146 | 147 | (defn hex->bin 148 | "Convert a hex string to a byte array." 149 | ^bytes 150 | [^String hex] 151 | (let [length (/ (count hex) 2) 152 | data (byte-array length)] 153 | (dotimes [i length] 154 | (let [octet (subs hex (* 2 i) (* 2 (inc i)))] 155 | (aset-byte data i (hex->byte octet)))) 156 | data)) 157 | 158 | 159 | (defn decode-hex 160 | "Decode a single CBOR value from a hex string." 161 | ([string] 162 | (decode-hex (cbor/cbor-codec) string)) 163 | ([decoder string] 164 | (cbor/decode decoder (hex->bin string)))) 165 | 166 | 167 | (defn decode-hex-all 168 | "Decode all CBOR values from a hex string." 169 | ([string] 170 | (decode-hex-all (cbor/cbor-codec) string)) 171 | ([decoder string] 172 | (doall (cbor/decode-seq decoder (hex->bin string))))) 173 | 174 | 175 | (defn encoded-hex 176 | "Encode the value as CBOR and convert to a hex string." 177 | ([value] 178 | (encoded-hex (cbor/cbor-codec) value)) 179 | ([encoder value] 180 | (bin->hex (cbor/encode encoder value)))) 181 | 182 | 183 | ;; ## Dynamic Codec 184 | 185 | (def ^:dynamic *test-codec* 186 | (cbor/cbor-codec)) 187 | 188 | 189 | (defmacro with-codec 190 | [opts & body] 191 | `(binding [*test-codec* (cbor/cbor-codec ~opts)] 192 | ~@body)) 193 | 194 | 195 | (defmacro check-roundtrip 196 | ([value hex-string] 197 | `(let [value# ~value] 198 | (is (~'= ~hex-string (encoded-hex *test-codec* value#))) 199 | (is (~'= value# (decode-hex *test-codec* ~hex-string))))) 200 | ([compare-by value hex-string] 201 | `(let [value# ~value] 202 | (is (~'= ~hex-string (encoded-hex *test-codec* value#))) 203 | (is (~'= (~compare-by value#) (~compare-by (decode-hex *test-codec* ~hex-string))))))) 204 | --------------------------------------------------------------------------------