├── .gitignore ├── .travis.yml ├── LICENSE ├── Makefile ├── README.md ├── Setup.hs ├── bin ├── Cauterize │ └── Options.hs └── Main.hs ├── cauterize.cabal ├── cauterize_drift_detection.md ├── examples ├── badName.scm ├── compiled_example-meta.txt ├── compiled_example-spec.txt ├── compiled_example.txt ├── dupName.scm ├── empty.scm ├── loop.scm ├── reservedName.scm └── schema.scm ├── src └── Cauterize │ ├── CommonTypes.hs │ ├── Dynamic.hs │ ├── Dynamic │ ├── Common.hs │ ├── Gen.hs │ ├── Meta.hs │ ├── Meta │ │ ├── Gen.hs │ │ ├── Pack.hs │ │ ├── Pretty.hs │ │ ├── Types.hs │ │ └── Unpack.hs │ ├── Pack.hs │ ├── Pretty.hs │ ├── Types.hs │ └── Unpack.hs │ ├── Generate.hs │ ├── Hash.hs │ ├── Schema.hs │ ├── Schema │ ├── Checker.hs │ ├── Parser.hs │ ├── Types.hs │ └── Util.hs │ ├── Specification.hs │ ├── Specification │ ├── Compile.hs │ ├── Parser.hs │ └── Types.hs │ └── Version.hs ├── stack.yaml └── tests ├── Cauterize ├── CommonTypesSpec.hs ├── Dynamic │ └── PackSpec.hs ├── HashSpec.hs ├── Schema │ └── ParserSpec.hs └── Specification │ ├── CompileSpec.hs │ └── ParserSpec.hs └── Spec.hs /.gitignore: -------------------------------------------------------------------------------- 1 | .cabal-sandbox 2 | cabal.sandbox.config 3 | dist 4 | .stack-work 5 | -------------------------------------------------------------------------------- /.travis.yml: -------------------------------------------------------------------------------- 1 | language: haskell 2 | 3 | sudo: false 4 | cache: 5 | directories: 6 | - $HOME/.stack/ 7 | 8 | before_install: 9 | - mkdir -p ~/.local/bin 10 | - export PATH=~/.local/bin:$PATH 11 | - travis_retry curl -L https://www.stackage.org/stack/linux-x86_64 | tar xz --wildcards --strip-components=1 -C ~/.local/bin '*/stack' 12 | - chmod a+x ~/.local/bin/stack 13 | 14 | install: 15 | - stack setup 16 | - stack build --only-snapshot 17 | 18 | script: 19 | - stack build --test 20 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Copyright (c) 2015, John Van Enk 2 | All rights reserved. 3 | 4 | Redistribution and use in source and binary forms, with or without 5 | modification, are permitted provided that the following conditions are met: 6 | * Redistributions of source code must retain the above copyright 7 | notice, this list of conditions and the following disclaimer. 8 | * Redistributions in binary form must reproduce the above copyright 9 | notice, this list of conditions and the following disclaimer in the 10 | documentation and/or other materials provided with the distribution. 11 | * Neither the name of Cauterize nor the names of its contributors may be 12 | used to endorse or promote products derived from this software without 13 | specific prior written permission. 14 | 15 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND 16 | ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED 17 | WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 18 | DISCLAIMED. IN NO EVENT SHALL JOHN VAN ENK BE LIABLE FOR ANY 19 | DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES 20 | (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 21 | LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND 22 | ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 23 | (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 24 | SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 25 | -------------------------------------------------------------------------------- /Makefile: -------------------------------------------------------------------------------- 1 | 2 | INSTALL_DIR?=$(HOME)/.local/bin 3 | export PATH := $(INSTALL_DIR):$(PATH) 4 | 5 | ifeq ($(OS),Windows_NT) 6 | EXECUTABLE=$(shell stack path --dist-dir)\build\cauterize\cauterize.exe 7 | else 8 | EXECUTABLE=$(shell stack path --dist-dir)/build/cauterize/cauterize 9 | endif 10 | 11 | .PHONY: default clean build test 12 | 13 | default: test 14 | 15 | clean: 16 | -@rm -rf .stack-work 17 | 18 | build: 19 | @stack build 20 | 21 | test: 22 | @stack test 23 | 24 | install: 25 | stack setup 26 | stack build 27 | mkdir -p $(INSTALL_DIR) 28 | cp $(EXECUTABLE) $(INSTALL_DIR) 29 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | ``` 2 | ) _____ _ _ 3 | /(( / __ \ | | (_) 4 | (_))\ | / \/ __ _ _ _| |_ ___ _ __ _ _______ 5 | _)((_) | | / _` | | | | __/ _ \ '__| |_ / _ \ 6 | \ ^ / | \__/\ (_| | |_| | || __/ | | |/ / __/ 7 | \_/ \____/\__,_|\__,_|\__\___|_| |_/___\___| 8 | ``` 9 | 10 | # Cauterize 11 | 12 | Cauterize is a data-description language suitable for hard-real-time systems 13 | and systems without dynamic memory allocation. It can be used instead of other 14 | data description languages like JSON, XML, or ProtocolBuffers. 15 | 16 | ## Introduction 17 | 18 | Cauterize is _first_ intended to serve the constraints of [hard 19 | real-time](http://en.wikipedia.org/wiki/Real-time_computing) embedded systems 20 | programming, but is still suitable for a variety of other situations. All 21 | Cauterize specifications have the following properties: 22 | 23 | * Encoded messages have a maximum and minimum size known at compile time 24 | * All types have a maximum referential depth 25 | * All types are neither directly nor indirectly recursive 26 | * All types are listed in a topographically sorted order (all types that 27 | depend on other types will follow those types in the specification output) 28 | * The specification has a version-hash that is based on all possible 29 | variation in the schema. This can be used to detect incidental (as opposed 30 | to adversarial) protocol drift with a very high probability. 31 | * All types have a type-hash based on the structure of that type and all 32 | types it depends on 33 | * All length, field, and type tags have their representation optimized to use 34 | as little space as possible while still maintaining a minimum alignment of 35 | 8 bits 36 | 37 | Cauterize was first designed to work on a small embedded system that required 38 | all memory to be allocated ahead of time (no dynamic memory allocation) yet 39 | still needed to interface with a Ruby and C# environment. The prototype was a 40 | Ruby DSL that, when executed, would emit C, C#, and Ruby libraries that were 41 | all capable of reading and writing the schema described by the DSL. 42 | 43 | This project is a successor to the original Ruby DSL prototype with a goals of 44 | being safer, being more complete, and including features making it easier to 45 | add new code generators beyond the original C, C# and Ruby generators. 46 | 47 | In order to better frame the context at which Cauterize is targeted, here's a 48 | incomplete list of other tools that attempt to perform some or all of the 49 | functions Cauterize is capable of performing. If Cauterize is not right for 50 | your purposes, perhaps one of these tools is. These are listed alphabetically. 51 | 52 | * [Abstract Syntax Notation One (ASN.1)](http://en.wikipedia.org/wiki/Abstract_Syntax_Notation_One) 53 | * [Apache Avro](http://avro.apache.org/docs/current/) 54 | * [Apache Thrift](https://thrift.apache.org/) 55 | * [BERT-RPC](http://bert-rpc.org/) 56 | * [Binary JSON (BSON)](http://bsonspec.org/) 57 | * [bond](https://github.com/Microsoft/bond) 58 | * [Cap'n Proto](https://capnproto.org/) 59 | * [Concise Binary Object Representation (CBOR)](http://cbor.io/) 60 | * [Extensible Binary Meta Language (EBML)](http://ebml.sourceforge.net/) 61 | * [extprot](https://github.com/mfp/extprot) 62 | * [FlatBuffers](http://google-opensource.blogspot.com/2014/06/flatbuffers-memory-efficient.html) 63 | * [Fressian (Datomic)](https://github.com/Datomic/fressian) 64 | * [Linden Labs Structured Data](http://wiki.secondlife.com/wiki/LLSD) 65 | * [Message Pack](http://msgpack.org/) 66 | * [Piqi](http://piqi.org/) 67 | * [Protocol Buffers](https://developers.google.com/protocol-buffers/) 68 | * [Simple Binary Encoding (SBE)](http://mechanical-sympathy.blogspot.com/2014/05/simple-binary-encoding.html) 69 | * [Transit](https://github.com/cognitect/transit-format) 70 | * [XDR](http://en.wikipedia.org/wiki/External_Data_Representation) 71 | 72 | ### Schema, Specification, and Code Generation 73 | 74 | Cauterize consists of several parts: a _schema language_ for describing 75 | ordinary data, a compiler to translate a schema into an intermediate 76 | representation known as a _specification_, and code generators that translate 77 | specifications into encoders and decoders. 78 | 79 | A schema is written by humans and it describes the semantic meaning in all 80 | types. 81 | 82 | The specification is created by the cauterize compiler and it describes all 83 | inferable information from the schema in order to make the creation of _code 84 | generators_ easier. 85 | 86 | Code generators consume the specification and output a library capable of 87 | encoding and decoding data represented by the schema. 88 | 89 | 90 | ### Goals 91 | 92 | Cauterize should be suitable for hard-real-time systems without dynamic memory 93 | allocation. From this goal, we can extract the following more specific goals: 94 | 95 | * Must be achievable with static memory allocation - not all embedded systems 96 | support dynamic allocation. 97 | * Must be achievable in bounded execution time - hard-real-time systems must 98 | know how long each operation can possibly take. 99 | * Must support methods for detecting protocol drift early - embedded systems 100 | are often harder to update than desktop systems. They have longer deployment 101 | in more unusual conditions. Therefore, it is very important that the version 102 | of the messages being used by the embedded systems is detectable by its 103 | partner systems and that they be kept in sync. 104 | * Specifications must be precise in as many ways as possible - many embedded 105 | systems vary from standard desktop and server systems in unusual ways. These 106 | variations can can include things such as: the number of bits in a byte, the 107 | amount of memory available on the system, the representation of pointers, the 108 | endianness of the processor, and the format of floating point numbers. 109 | 110 | After dealing with these points that enable Cauterize to be used in constrained 111 | systems, there are several other goals that make the quality of life for users 112 | better. 113 | 114 | * Should not preclude other systems - though embedded systems are a primary 115 | target, design choices for Cauterize should not preclude the use of Cauterize 116 | on systems such as mobile, desktop, and web development. 117 | * Ease of implementation - code generators should be able to represent the 118 | specification in idioms common in the target language. In C, this is structs, 119 | enumerations, and unions. In Ruby, this would likely be classes. Furthermore, 120 | code generators should not have to generate any overly-complicated structures 121 | to conform to a specification. When a Cauterize feature is proposed, it must 122 | be implementable in simple terms in a variety of languages. 123 | * Ease of verification - code generators are hard to validate for correctness. 124 | There should be some means of checking them automatically. 125 | * Simplicity - code generators should not be expected to perform complicated 126 | operations in order to emit code. Concepts should be simple in nature and 127 | have at least one obvious method for implementation. 128 | 129 | ## Schema Language 130 | 131 | The schema language uses parentheses to enclose each of its expressions. All 132 | expressions have an assigned order for all arguments. 133 | 134 | ### Schema Name 135 | 136 | The schema name is optional. If it's omitted, it will be set to "schema". 137 | 138 | ``` 139 | (name [name]) 140 | ``` 141 | 142 | Names can consist of the following characters and must be enclosed in 143 | double quotations: 144 | 145 | `[a-z]([a-z]|[0-9]|_)*` 146 | 147 | ### Schema Version 148 | 149 | The schema version is optional. If it's omittied, it will be set to 150 | "0.0.0". The version may consist of the following characters in 151 | quotations: 152 | 153 | ``` 154 | ([a-z]|[0-9])([a-z]|[0-9]|_|.|-)* 155 | ``` 156 | 157 | The reason the name and version patterns are restrictive is to ease the 158 | burden on code generators. Some languages, such as Haskell and Ruby, have 159 | specific rules about capitalization. Without the restrictive name pattern 160 | Cauterize uses, code generators would have to do a lot more work to emit code 161 | that is readable and matches the target language's normal conventions. 162 | 163 | ### Comments 164 | 165 | Line comments are defined by using two `;` characters in a row. Here's an example. 166 | 167 | ``` 168 | ;; this is comment 169 | (name "some name") 170 | (version "0.0.1") 171 | ``` 172 | 173 | ### Primitive Types 174 | 175 | There are several types that represent the foundation of the Cauterize types. 176 | These are the fundamental types available for creating your own more complex 177 | types. It is not possible to define new built-in types in a schema. All 178 | builtins referenced by a schema will have definitions in the output 179 | specification. 180 | 181 | #### Unsigned Primitive Types 182 | 183 | Unsigned values are encoded as [little 184 | endian](http://en.wikipedia.org/wiki/Endianness). 185 | 186 | * `u8` - 8 bits wide 187 | * `u16` - 16 bits wide 188 | * `u32` - 32 bits wide 189 | * `u64` - 64 bits wide 190 | 191 | #### Signed Primitive Types 192 | 193 | Signed values are encoded as [two's 194 | complement](http://en.wikipedia.org/wiki/Two%27s_complement) [little 195 | endian](http://en.wikipedia.org/wiki/Endianness) values. 196 | 197 | * `s8` - 8 bits wide 198 | * `s16` - 16 bits wide 199 | * `s32` - 32 bits wide 200 | * `s64` - 64 bits wide 201 | 202 | #### Boolean Primitive Type 203 | 204 | Booleans are encoded with a single byte. The only valid values are 0 and 1 205 | where 0 represents `false` and 1 represents `true`. 206 | 207 | * `bool` - 8 bits wide 208 | 209 | #### Floating Point Primitive Types 210 | 211 | Floating point types are hard. Their definitions can be different (or missing 212 | entirely!) across CPU architectures. Therefore, Cauterize only defines the 213 | `f32` and `f64` types. These are the [IEEE 754 single and double 214 | precision floating point 215 | values](http://en.wikipedia.org/wiki/IEEE_floating_point). The single precision 216 | value uses 32 bits while the double-precision value uses 64 bits. Both flavors 217 | are encoded in [little endian](http://en.wikipedia.org/wiki/Endianness). 218 | 219 | Floating point types: 220 | 221 | * `f32` - 32 bits wide, IEEE754 Single Precision 222 | * `f64` - 64 bits wide, IEEE754 Double Precision 223 | 224 | ### Prototypes 225 | 226 | Cauterize provides several prototypes that act as templates out of which other 227 | types can be created. 228 | 229 | All types must list a name. That name follows the following rule: 230 | 231 | ``` 232 | [a-z]([a-z]|[0-9]|_)* 233 | ``` 234 | 235 | #### Synonyms 236 | 237 | Synonyms are used to give one of the built-in types a new name. Their encoded 238 | representation is identical to that of the built-in value they wrap, but has 239 | a type that is distinct from the wrapped value. 240 | 241 | ``` 242 | (type [type name] synonym [built-in type name]) 243 | ``` 244 | 245 | The following example defines the type `age` that has the same representation 246 | as a `u8`. 247 | 248 | ``` 249 | (type age synonym u8) 250 | ``` 251 | 252 | #### Ranges 253 | 254 | Ranges are used to encode an integer value between two other integer 255 | values. They are encoded in a word suitable for expressing all 256 | possible values in the range. That is, a range with less than 256 257 | members will be encoded in a 8 bit word, a range with less than 65535 258 | members will be encoded in a 16 bit word, and so on. 259 | 260 | ``` 261 | (type [type name] range [minimum value] [maximum value]) 262 | ``` 263 | 264 | The following is an example of a range that only encodes the values 265 | from 1000 to 1010. 266 | 267 | ``` 268 | (type some_range range 1000 1010) 269 | ``` 270 | 271 | #### Arrays 272 | 273 | Arrays are fixed-length sequences of identially typed objects. These are to be 274 | used when the sequence only makes sense with a fixed number of elements. 275 | 276 | ``` 277 | (type [type name] array [element type] [array length]) 278 | ``` 279 | 280 | Consider the following example that defines a type `mac` that encodes a Media 281 | Access Control address (which is always 6 bytes long). 282 | 283 | ``` 284 | (type mac array u8 6) 285 | ``` 286 | 287 | #### Vector 288 | 289 | Vectors are variable-lengthed sequences of identically typed objects. These are 290 | to be used when a sequence of elements has a maximum length, but may contain 291 | fewer elements. 292 | 293 | ``` 294 | (type [type name] vector [target type] [maximum array length]) 295 | ``` 296 | 297 | The following example defines a generic byte buffer with a maximum length of 298 | 4096 bytes. 299 | 300 | ``` 301 | (type byte_buffer_4k vector u8 4096) 302 | ``` 303 | 304 | #### Enumeration 305 | 306 | Enumerations are types with a fixed set of named members. Members of 307 | an enumeration are assign integer values starting from 0. These values 308 | are assigned automatically by the Cauterize compiler. Enumerations are 309 | encoded in the smallest word necessary to express every value in the 310 | enumeration. 311 | 312 | ``` 313 | (type [type name] enumeration (values [space-separated list of identifiers]) 314 | ``` 315 | 316 | The following is an enumeration encoding the days of the week: 317 | 318 | ``` 319 | (type days_of_week enumeration 320 | (values 321 | sunday 322 | monday 323 | tuesday 324 | wednesday 325 | thursday 326 | friday 327 | saturday)) 328 | ``` 329 | 330 | #### Field Lists 331 | 332 | Field lists cannot be defined on their own; they can only be used as the last 333 | parameter to a `record`, `union`, or `combination` expression (which are 334 | defined later in this document). Field lists are used to designate a set of 335 | (name/type) pairs. 336 | 337 | Unions and combinations can use the `empty` keyword instead of the 338 | `field` keyword. Empty fields do not have any associated data. 339 | 340 | Field lists are defined like this: 341 | 342 | ``` 343 | (fields 344 | (field [field name] [type]) ;; a field with some data 345 | (empty [field name]) ;; an empty field (just the tag) 346 | (field ...)) 347 | ``` 348 | 349 | A type is not required. The behavior of a type-less field is dependent on the 350 | enclosing expression. 351 | 352 | #### Records 353 | 354 | Records are a collection of named fields where each field has a distinct type. 355 | 356 | ``` 357 | (type [type name] record [field list]) 358 | ``` 359 | 360 | An empty field in a record lacks any semantic meaning. It can neither be 361 | encoded or represented by code generators. 362 | 363 | This is an example of a record describing a person's age, height, and whether or 364 | not they like cats: 365 | 366 | ``` 367 | (type person record (fields (field age u8) 368 | (field height u8) 369 | (field likes_cats bool))) 370 | ``` 371 | 372 | #### Union 373 | 374 | Unions encode a set of possible values and some associated sub-value. Like 375 | records, their schema entries specify a list of fields, but, unlike records, 376 | only one of those fields can represented in a union at any given time. 377 | 378 | Unions in Cauterize are very similar to algebraic data types found in other 379 | languages such as OCaml, Haskell, and Rust. 380 | 381 | ``` 382 | (type [type name] union [field list]) 383 | ``` 384 | 385 | An empty field in a union represents that a variant of the union is set. This 386 | has meaning even if there is no associated data. A union where all fields lack 387 | associated data behaves similarly to a C enumeration. 388 | 389 | This example shows a `request` type for some key-value storage system. The 390 | system stores `u64` values according to names that are up to 128 bytes long. 391 | 392 | ``` 393 | (type key_name vector key_name u8 128) 394 | (type key_pair record (fields 395 | (field name key_name) 396 | (field value u64))) 397 | 398 | (type request union (fields 399 | (field get_key_count) 400 | (field check_key_exists key_name) 401 | (field get_key key_name) 402 | (field erase_key key_name) 403 | (field set_key key_pair)))) 404 | ``` 405 | 406 | The `get_key_count` variant does not contain any associated data while the 407 | `get_key` variant specifies a type that encodes the name of the key to get. 408 | Note: the `response` type is not defined in this example. 409 | 410 | #### Combination 411 | 412 | Combinations, like records, are a collection of named fields where each field 413 | has a distinct type. The difference is that each field in the combination can 414 | either be present or not present in the encoded output. 415 | 416 | ``` 417 | (type [type name] combination [field list]) 418 | ``` 419 | As an example, consider the following description of a type capable of storing 420 | changes in some observed values in a sensor rig: 421 | 422 | An empty field in a Combination behaves like a boolean flag. 423 | 424 | ``` 425 | (type sensed combination (fields 426 | (field ambient_temp u16) 427 | (field ambient_light u16) 428 | (field air_pressure u16) 429 | (field position_x u32) 430 | (field position_y u32) 431 | (field position_z u32))) 432 | ``` 433 | 434 | If a sensor value hasn't changed since the last time the message was sent, the 435 | message is able to omit that reading since there isn't new information to 436 | share. 437 | 438 | ## Specification Language 439 | 440 | The specification language also uses parenthesis to enclose each of its 441 | expressions. All expressions have an assigned order for all their arguments. 442 | Each expression explains one type defined by the schema. 443 | 444 | ### Specification-specific Expressions 445 | 446 | There are several expressions that show up in specifications that do not show 447 | up in schemas. These expressions represent data inferred from the schema. 448 | 449 | #### `sha1` Expression 450 | 451 | The `sha1` expression represents a SHA1 hash. It is 40 hexadecimal characters 452 | long. 453 | 454 | ##### Example 455 | 456 | ``` 457 | (sha1 77e8f0d33bd09411bbc2f94c839e0ccc34d55603) 458 | ``` 459 | 460 | #### `depth` Expression 461 | 462 | The `depth` expression represents the maximum referential depth of a schema. 463 | This expression is used in the top level `specification` expression. 464 | 465 | ##### Example 466 | 467 | ``` 468 | (depth 6) 469 | ``` 470 | 471 | #### `type-width` Expression 472 | 473 | The `type-width` expression represents the minimum length of the prefix of each 474 | type hash needed for a unique value. It is only used in the `specification` 475 | expression. 476 | 477 | ##### Example 478 | 479 | The two hashes below have the first two bytes in common. The `type-width` would 480 | be 3 because a 3-byte prefix of each hash is unique. 481 | 482 | ``` 483 | (sha1 77e8f0d33bd09411bbc2f94c839e0ccc34d55603) 484 | (sha1 77e878098602c275eb7a3408aff17e396220324d) 485 | ``` 486 | 487 | The `type-width` expression would show up in the specification as: 488 | 489 | ``` 490 | (type-width 3) 491 | ``` 492 | 493 | #### `length-width` Expression 494 | 495 | The `length-width` expression represents the number of bytes suitable for 496 | representing the maximum encoded length of any type in the schema. 497 | 498 | This width is always one of 1, 2, 4, or 8. 499 | 500 | ##### Example 501 | 502 | For a schema with a maximum length of 68: `(length-width 1)`. 503 | 504 | For a schema with a maximum length of 257: `(length-width 2)`. 505 | 506 | For a schema with a maximum length of 70,000: `(length-width 4)`. 507 | 508 | For a schema with a maximum length of 17,000,000: `(length-width 4)`. 509 | 510 | For a schema with a maximum length of 8,600,000,000: `(length-width 8)`. 511 | 512 | #### `fixed-size` Expression 513 | 514 | The `fixed-size` expression represents a size in bytes. This expression is used 515 | in type specifications that have a fixed encoding size. 516 | 517 | ##### Example 518 | 519 | ``` 520 | (fixed-size 8) 521 | ``` 522 | 523 | #### `range-size` Expression 524 | 525 | The `range-size` expression represents a minimum and maximum size in bytes. 526 | This expression is used in type specifications that have a variable encoding 527 | size. 528 | 529 | ##### Example 530 | 531 | ``` 532 | (range-size 22 16406) 533 | ``` 534 | 535 | #### `length-repr` Expression 536 | 537 | The `length-repr` expression represents the type used to encode a vector's 538 | length. It only occurs in `vector` specifications. 539 | 540 | ##### Example 541 | 542 | ``` 543 | (length-repr u8) 544 | 545 | ``` 546 | 547 | #### `flags-repr` Expression 548 | 549 | The `flags-repr` expression represents the type used to encode a combination's 550 | flags. It only occurs in `combination` specifications. 551 | 552 | ##### Example 553 | 554 | ``` 555 | (flags-repr u16) 556 | ``` 557 | 558 | #### `tag-repr` Expression 559 | 560 | The `tag-repr` expression represents the type used to encode a union's type 561 | tag. It only occurs in `union` specifications. 562 | 563 | ### `specification` Expression 564 | 565 | All specification documents contain several top-level expressions. 566 | 567 | ``` 568 | (name [schema name]) 569 | (version [schema version]) 570 | (sha1 [a sha1 hash]) 571 | (range-size [minimum encoded size] [maximum encoded size]) 572 | (depth [maximum referential depth of the schema]) 573 | (type-width [type-tag width hint]) 574 | (length-width [length-tag width hint]) 575 | 576 | [[type expressions]] 577 | ``` 578 | 579 | ### `synonym` Specification Expression 580 | 581 | All synonym expressions have the following layout: 582 | 583 | ``` 584 | (type [type name] synonym (sha1 ...) (fixed-size ...)) 585 | ``` 586 | 587 | ### `array` Specification Expression 588 | 589 | All array expressions have the following layout: 590 | 591 | ``` 592 | (type [type name] array (sha1 ...) (range-size ...) 593 | [array length] [element type]) 594 | ``` 595 | 596 | ### `vector` Specification Expression 597 | 598 | All vectors have the following layout: 599 | 600 | ``` 601 | (type [type name] vector (sha1 ...) (range-size ...) 602 | (length-repr ...) 603 | [vector max length] [element type]) 604 | ``` 605 | 606 | ### `fields` and `field` Specification Expressions 607 | 608 | Record, union, and combination types all include a list of fields. This list of 609 | fields is enclosed in a `fields` expression. 610 | 611 | The `fields` expression has this form: 612 | 613 | ``` 614 | (fields [[field expression]]) 615 | ``` 616 | 617 | `field` expressions come in two varieties. The first variety expresses a name 618 | for the field, a type the field references, and the index of the field. 619 | 620 | ``` 621 | (field [field name] [type name] [index]) 622 | ``` 623 | 624 | The second variety expresses only a name for the field and the index of the 625 | field. These are known as "empty" fields. 626 | 627 | ``` 628 | (field [field name] [index]) 629 | ``` 630 | 631 | ### `record` Specification Expression 632 | 633 | All records have the following layout: 634 | 635 | ``` 636 | (type [type name] record (sha1 ...) (range-size ...) 637 | (fields ...)) 638 | ``` 639 | 640 | ### `union` Specification Expression 641 | 642 | All unions have the following layout: 643 | 644 | ``` 645 | (type [type name] record (sha1 ...) (range-size ...) 646 | (tag-repr [built-in type]) 647 | (fields ...)) 648 | ``` 649 | 650 | ### `combination` Specification Expression 651 | 652 | All combinations have the following layout: 653 | 654 | ``` 655 | (type [type name] combination (sha1 ...) (range-size ...) 656 | (flags-repr [built-in type]) 657 | (fields ...)) 658 | ``` 659 | 660 | # Binary Interpretation 661 | 662 | It's possible, given a binary encoding of a Cauterize type and the 663 | specification document for the type's schema, to interpret the encoded bytes 664 | into the original structure. 665 | 666 | Types have two general flavors: types with a decision to make and types with 667 | only a single interpretation path. The types that have interpretation decisions 668 | are: `vector`, `union`, and `combination`. All others (`builtin`, `synonym`, 669 | `array`, and `record`) only have a single path of interpretation. 670 | 671 | Decision types all encode their decision variable in the binary stream. For 672 | `vector` this is a length tag. For `union`, this is a type tag. For 673 | `combination`, the tag encodes a series of flags representing which fields in 674 | the combination are present. 675 | 676 | Tag information for types *always* comes before the type data itself. For 677 | example, a vector with a `u8` tag for its length will encode that `u8` as the 678 | very first byte. 679 | 680 | Furthermore, the only types that actually contain data are the builtin types. 681 | All other types are constructed from builtin types or other user-defined types. 682 | With knowledge about tag position and where data is stored, we can begin to 683 | parse encoded binary strings. 684 | 685 | ## Sample Schema 686 | 687 | We'll use the following as our schema. There's nothing particularly interesting 688 | about it except that it uses all of the available prototypes. I've called this 689 | file `binterp.caut`. 690 | 691 | ``` 692 | (schema binterp 0.0.0.0 693 | (synonym syn_u32 u32) 694 | 695 | (array arr_u32 u32 4) 696 | 697 | (vector vec_u32 u32 4) 698 | 699 | (record rec_unsigned 700 | (fields 701 | (field fu8 u8) 702 | (field fu16 u16) 703 | (field fu32 u32) 704 | (field fu64 u64))) 705 | 706 | (union union_unsigned 707 | (fields 708 | (field fu8 u8) 709 | (field fu16 u16) 710 | (field fu32 u32) 711 | (field fu64 u64))) 712 | 713 | (combination comb_unsigned 714 | (fields 715 | (field fu8 u8) 716 | (field fu16 u16) 717 | (field fu32 u32) 718 | (field fu64 u64)))) 719 | ``` 720 | 721 | The following command converts this schema into a specification: `cauterize 722 | --schema=binterp.caut --output=binterp.cautspec`. 723 | 724 | When we inspect the specification, we see the following: 725 | 726 | ``` 727 | (name "binterp") 728 | (version "0.0.0.0") 729 | (sha1 ac9fda94cadb44d18af85d498f169609fd716efb) 730 | (range-size 1 17) (depth 2) (type-width 1) (length-width 1) 731 | (type vec_u32 vector 732 | (sha1 35832f3b7bd6dbeb8d3b5c92f73b2f06759d2e7a) 733 | (range-size 1 17) 734 | (length-repr u8) 735 | 4 u32) 736 | (type union_unsigned union 737 | (sha1 e59761d5c25294927e5026c278db565f7190b693) 738 | (range-size 2 9) 739 | (tag-repr u8) 740 | (fields 741 | (field fu8 u8 0) 742 | (field fu16 u16 1) 743 | (field fu32 u32 2) 744 | (field fu64 u64 3))) 745 | (type syn_u32 synonym 746 | (sha1 f180b823f00f965e1f0f68ba5c82400f2d9dd32a) 747 | (fixed-size 4) 748 | u32) 749 | (type rec_unsigned record 750 | (sha1 b58dd55deef9faf22ac07ced17cf6f87d1c95111) 751 | (range-size 15 15) 752 | (fields 753 | (field fu8 u8 0) 754 | (field fu16 u16 1) 755 | (field fu32 u32 2) 756 | (field fu64 u64 3))) 757 | (type comb_unsigned combination 758 | (sha1 d67b5d0a49e122140f418c12ad445ed013a52fc3) 759 | (range-size 1 16) 760 | (flags-repr u8) 761 | (fields 762 | (field fu8 u8 0) 763 | (field fu16 u16 1) 764 | (field fu32 u32 2) 765 | (field fu64 u64 3))) 766 | (type arr_u32 array 767 | (sha1 965f3610970341adb1132d27a668a4c94e9e3d57) 768 | (range-size 16 16) 769 | 4 u32)) 770 | ``` 771 | 772 | Using this document and the knowledge of what type we're trying to decode, we 773 | can decode any encoded message for a particular specification. 774 | 775 | It's important to remember that an encoded type, on its own, does not contain 776 | enough information to identify it as that type. Two peers wanting to transcode 777 | the same binary stream must agree on what type is being exchanged ahead of time 778 | *or* have a method for identifying which type is being encoded on the wire. 779 | Cauterize generators should normally emit the standard _message interface_ 780 | based off the `type-width` and `length-width` parameters in the specification. 781 | 782 | In the following exercises, all encoded messages will be listed in hexadecimal. 783 | 784 | ## Decoding a Primtive 785 | 786 | The following is an encoded `u64` type: 787 | 788 | ``` 789 | 2a75030000000000 790 | ``` 791 | 792 | Decoding primitives is pretty simple. Each builtin type has a fixed 793 | size. To decode a primitive, read that many bytes from the encoded 794 | string as a little endian value of the proper type. 795 | 796 | The above example is, therefore, the following 64-bit value: `0x000000000003752A`. 797 | 798 | ## Decoding an Array 799 | 800 | Decoding arrays is more complex than decoding builtins, but not much more. 801 | Array types all have a specific length. When decoding an array, one has to look 802 | at the array's element type and decode as many of that type as the array's 803 | length expression specifies. 804 | 805 | The following is an encoded `arr_u32` type. 806 | 807 | ``` 808 | 8c0f0000a30a0000d30d00002c080000 809 | ``` 810 | 811 | Let's take a look at the `arr_u32` specification: 812 | 813 | ``` 814 | (type arr_u32 array 815 | (sha1 965f3610970341adb1132d27a668a4c94e9e3d57) 816 | (range-size 16 16) 817 | 4 u32)) 818 | ``` 819 | 820 | We know, from the specification, that an `arr_u32` type has a length of 4 and 821 | its element type is `u32`. We know, from its specification, that each `u32` is 822 | made up of 4 bytes. So, all we need to do is read 4 `u32` types from the binary 823 | string. 824 | 825 | Therefore, we know that our decoded array is the folling list of `u32` values: 826 | 827 | ``` 828 | [ 0x00000F8C, 0x00000AA3, 0x00000DD3, 0x0000082C ] 829 | ``` 830 | 831 | ## Decoding a Record 832 | 833 | Decoding a record is quite similar to decoding an array. Both types have a 834 | fixed number of types to decode. The major difference is that arrays decode a 835 | specific number of the same types while records decode a specific number of 836 | varrying types. 837 | 838 | The following is an encoded `rec_unsigned` type. 839 | 840 | ``` 841 | fb5e0f0b080000ce85000000000000 842 | ``` 843 | 844 | Let's take a look at the `rec_unsigned` specification: 845 | 846 | ``` 847 | (type rec_unsigned record 848 | (sha1 b58dd55deef9faf22ac07ced17cf6f87d1c95111) 849 | (range-size 15 15) 850 | (fields 851 | (field fu8 u8 0) 852 | (field fu16 u16 1) 853 | (field fu32 u32 2) 854 | (field fu64 u64 3))) 855 | ``` 856 | 857 | To decode a record, we only need to decode each of the record's fields in order 858 | until we've decoded all the fields. For `rec_unsigned`, this means that we 859 | start by decodin `fu8` and finish by decoding `fu64`. 860 | 861 | So, we end up with the following list of decoded values: 862 | 863 | ``` 864 | [ 0xFB, 0x0F5E, 0x0000080B, 0x00000000000085CE ] 865 | ``` 866 | 867 | ## Decoding a Vector 868 | 869 | Decoding a vector is very similar to decoding an array. The only difference is 870 | that the number of elements to decode is goverend by a length tag rather than 871 | by the type. Vectors define a maximum number of elements to decode rather than 872 | a constant number of elements to decode. 873 | 874 | The following is an example of an encoded `vec_u32`. 875 | 876 | ``` 877 | 02f8050000aa030000 878 | ``` 879 | 880 | Let's take a look at the specification for a `vec_u32` again. 881 | 882 | ``` 883 | (type vec_u32 vector 884 | (sha1 35832f3b7bd6dbeb8d3b5c92f73b2f06759d2e7a) 885 | (range-size 1 17) 886 | (length-repr u8) 887 | 4 u32) 888 | ``` 889 | 890 | The specification for a vector has a `length-repr` expression. This tells us 891 | what type is used to encode the length of this vector. In this case, a `u8` is 892 | used to encode the length. Decoding a single byte from our binary string yields 893 | a value of `0x02`. Therefore, we know that our encoded vector contains two 894 | elements. Since our element type is `u32`, we know to decode two `u32` values 895 | from the binary string. This yields a vector of length 2 with the following value: 896 | 897 | ``` 898 | [ 0x000005F8, 0x000003AA ] 899 | ``` 900 | 901 | ## Decoding a Union 902 | 903 | The following is an encoded `union_unsigned`. 904 | 905 | ``` 906 | 01 af 04 907 | ``` 908 | 909 | To interpret this, we can reference the `union_unsigned` definition in our 910 | specification. 911 | 912 | ``` 913 | (type union_unsigned union 914 | (sha1 e59761d5c25294927e5026c278db565f7190b693) 915 | (range-size 2 9) 916 | (tag-repr u8) 917 | (fields 918 | (field fu8 u8 0) 919 | (field fu16 u16 1) 920 | (field fu32 u32 2) 921 | (field fu64 u64 3))) 922 | ``` 923 | 924 | This tells us a few things: first, the encoded size will be between 2 and 9 925 | bytes long. Our message is 3 bytes long. Next up, it tells us that the union's 926 | tag will be represented as a `u8`. Finally, we see 4 fields are associated with 927 | the union. Each field has an associated index. The value of the tag must match 928 | one of these indices. The index that matches is the type that the union will 929 | contain. 930 | 931 | In our encoded message, we see that our first byte is `01`. We know that, since 932 | we're decoding a union, this tag will match one of the field indices. As it 933 | turns out, this index maps to the field `fu16`. This field is associated with 934 | the `u16` type. 935 | 936 | A `u16` is a primitive type. This means that it is a type that 937 | contains an actual value. Furthermore, we know that that this type has 938 | a fixed-size of 2. To decode a `u16` type, all we need to do is read 939 | out 2 bytes from the binary stream 940 | 941 | The next two bytes are `af 04`. We know that all Cauterize primitives are 942 | expressed in little endian, so this yields the final hex value of 0x04AF, or 943 | 1199 in decimal. 944 | 945 | At this point, there's nothing else to decode! We are out of bytes and the type 946 | `union_unsigned` needs no more bytes to be complete. 947 | 948 | Therefore, we can say that our `union_unsigned` value wraps a `u16` with the 949 | value of 1199. 950 | 951 | ### Decoding a Combination 952 | 953 | The following is an encoded `comb_unsigned`. 954 | 955 | ``` 956 | 032cd506 957 | ``` 958 | 959 | To interpret this, we can reference the `comb_unsigned` type in our 960 | specification. 961 | 962 | ``` 963 | (type comb_unsigned combination 964 | (sha1 d67b5d0a49e122140f418c12ad445ed013a52fc3) 965 | (range-size 1 16) 966 | (flags-repr u8) 967 | (fields 968 | (field fu8 u8 0) 969 | (field fu16 u16 1) 970 | (field fu32 u32 2) 971 | (field fu64 u64 3))) 972 | ``` 973 | 974 | Combinations use a set of flags to indicate which fields in the message are 975 | encoded. The flags are always at the beginning of the message. We can look at 976 | the `flags-repr` expression in the combination specification to determine how 977 | wide the word used to represent the flags is. In the case of `comb_unsigned`, 978 | the flags are represented as a `u8`. 979 | 980 | We can see, based on the `flags-repr` expression in `comb_unsigned` that, the 981 | flags in our current example are represented by the byte `0x03`--the first byte 982 | of our message. Remember, if `flags-repr` was a differrent type, we'd use more 983 | than one byte for our flags. 984 | 985 | To start decoding fields in our combination, we start with the first field, 986 | shift `1` to the left by the index of the field, and check whether or not the 987 | bit is set in our flags. If the bit is set, we can then decode that type out of 988 | the binary string. If the bit is not set, the field is skipped and we move on 989 | to the next one. 990 | 991 | We can see that our example has bits `(1 << 0)` (bit index 0) and `(1 << 1)` 992 | (bit index 1) set. This means that our first two fields, `fu8` and `fu16` are 993 | present in the binary string. The first field has type `u8` and the second 994 | field has type `u16`. The field `fu8` deocdes as the value `0x2C` and the field 995 | `fu16` decodes as the value `0x06d5` (remember, all builtin types are little 996 | endian). 997 | 998 | Our final type is, therefore, a `comb_unsigned` with the field `fu8` set to the 999 | value `0x2C` and the field `fu16` set to the value `0x06d5`. The fields `fu32` 1000 | and `fu64` are not set in this encoded instance. 1001 | 1002 | # Message Interface 1003 | 1004 | TODO: Write about me. 1005 | 1006 | # Answers to Obvious Questions 1007 | 1008 | In this section, we'll try and justify a few of the obvious questions that come 1009 | up when reading this document. Cauterize has some odd restrictions, but they 1010 | are normally conscious decisions. If you have a question that you feel is 1011 | obvious and isn't listed here, feel free to open an issue with the question. 1012 | 1013 | ## Why isn't there a string type? 1014 | 1015 | TODO: Answer this well. Strings are weird. 1016 | 1017 | If your schema needs a string type, consider defining your own like this: 1018 | 1019 | ``` 1020 | (schema string_example 1.0.0 1021 | (vector utf8str8k u8 8192)) 1022 | ``` 1023 | 1024 | This is a vector of `u8` values. Its string encoding isn't checked, but it's 1025 | likely safe to assume that it should be valid UTF8 data based on the name. 1026 | 1027 | ## Why don't unions support multiple types per alternative? 1028 | 1029 | In languages like Haskell, Rust, and OCaml, we're able to define union 1030 | types/sum types/algebraic alternative types that can contain multiple types per 1031 | constructor. 1032 | 1033 | This behavior has not yet been supported in Cauterize because it adds 1034 | complexity to the C code that would need to be generated. 1035 | 1036 | Take this hypothetical (but invalid) example: 1037 | 1038 | ``` 1039 | (name "multi_data_union_example") 1040 | (version "1.0.0") 1041 | 1042 | (type multi_type_field union 1043 | (fields 1044 | (field a u8 u16 u32))) 1045 | ``` 1046 | 1047 | In Haskell, we might be able to expand this union expression into the following type: 1048 | 1049 | ```haskell 1050 | data MultiTypeField = A U8 U16 U32 1051 | ``` 1052 | 1053 | In C, we'd need to do something like this: 1054 | 1055 | ```c 1056 | struct multi_type_field { 1057 | enum multi_type_field_tag { 1058 | multi_type_field_tag_a, 1059 | } _tag; 1060 | 1061 | union { 1062 | struct { 1063 | u8 ix0; 1064 | u16 ix1; 1065 | u32 ix2; 1066 | } a; 1067 | }; 1068 | }; 1069 | ``` 1070 | 1071 | There's no good way to express the names for different elements in the field. 1072 | We could come up with something, but it's not an obvious or clear path forward. 1073 | For this reason, we've chosen to omit multiple types per field in unions. 1074 | 1075 | # TODO List 1076 | 1077 | * Exhaustive checking for hash-collisions... just in case 1078 | * Hash algorithm selection 1079 | * Add a "ranged" type that's similar to a scalar but only accepts n..m values in a builtin 1080 | * Add ability to load schemas into other schemas 1081 | * Add small expression language to schemas for computing sizes or sharing numeric information 1082 | * Consider addition of generics to the schema 1083 | * Expand the things synonyms can refer to. 1084 | -------------------------------------------------------------------------------- /Setup.hs: -------------------------------------------------------------------------------- 1 | import Distribution.Simple 2 | main = defaultMain 3 | -------------------------------------------------------------------------------- /bin/Cauterize/Options.hs: -------------------------------------------------------------------------------- 1 | module Cauterize.Options where 2 | 3 | import Cauterize.Version (versionString) 4 | import Options.Applicative 5 | 6 | data CautOpts = CautOpts 7 | { schemaFile :: FilePath 8 | , specPath :: FilePath 9 | } deriving (Show) 10 | 11 | runWithOptions :: (CautOpts -> IO ()) -> IO () 12 | runWithOptions fn = do 13 | mopts <- execParser options 14 | case mopts of 15 | Just opts -> fn opts 16 | Nothing -> putStr versionString 17 | 18 | options :: ParserInfo (Maybe CautOpts) 19 | options = info (helper <*> o) 20 | ( fullDesc 21 | `mappend` progDesc "Compile a Cauterize schema into a Cauterize specification" 22 | ) 23 | where 24 | o = flag' Nothing (long "version" `mappend` hidden) 25 | <|> (Just <$> optParser) 26 | 27 | optParser :: Parser CautOpts 28 | optParser = CautOpts 29 | <$> argument str 30 | ( metavar "SCHEMA" 31 | `mappend` help "Cauterize schema input file." 32 | ) 33 | <*> argument str 34 | ( metavar "SPEC" 35 | `mappend` help "Cauterize specification output file." 36 | ) 37 | -------------------------------------------------------------------------------- /bin/Main.hs: -------------------------------------------------------------------------------- 1 | module Main (main) where 2 | 3 | import Cauterize.Options 4 | 5 | import Cauterize.Schema as Sc 6 | import Cauterize.Schema.Checker as Sc 7 | import Cauterize.Specification as Sp 8 | 9 | import System.Exit 10 | 11 | import qualified Data.Text.IO as T 12 | 13 | main :: IO () 14 | main = runWithOptions $ \opts -> Sc.parseSchemaFromFile (schemaFile opts) >>= render (specPath opts) >>= exitWith 15 | where 16 | render _ (Left s) = print s >> return (ExitFailure (-1)) 17 | render outFile (Right s) = 18 | case Sc.checkSchema s of 19 | Right cs -> T.writeFile outFile (Sp.formatSpecification . Sp.mkSpecification $ cs) >> return ExitSuccess 20 | Left es -> print es >> return (ExitFailure 1) 21 | -------------------------------------------------------------------------------- /cauterize.cabal: -------------------------------------------------------------------------------- 1 | name: cauterize 2 | version: 1.1.0.0 3 | synopsis: Compiler for the Cauterize data description language. 4 | description: 5 | Cauterize is a data-description language suitable for hard-real-time systems 6 | and systems without dynamic memory allocation. It can be used instead of other 7 | data description languages like JSON, XML, or ProtocolBuffers. 8 | homepage: http://cauterize.info/ 9 | license: BSD3 10 | license-file: LICENSE 11 | author: John Van Enk 12 | maintainer: sw17ch@gmail.com 13 | copyright: 2015, John Van Enk 14 | category: Language 15 | build-type: Simple 16 | cabal-version: >=1.10 17 | 18 | library 19 | hs-source-dirs: src 20 | ghc-options: -Wall -Werror -O2 21 | build-depends: base >= 4.7 && < 5, 22 | mtl >=2.1, 23 | parsec, 24 | transformers, 25 | cryptohash, 26 | QuickCheck >= 2.7, 27 | containers, 28 | bytestring, 29 | wl-pprint-text, 30 | text >= 1.2, 31 | cereal >= 0.4.1.1, 32 | GraphSCC, 33 | s-cargot, 34 | gitrev >= 1.2.0 35 | default-language: Haskell2010 36 | exposed-modules: Cauterize.CommonTypes, 37 | Cauterize.Dynamic, 38 | Cauterize.Dynamic.Common, 39 | Cauterize.Dynamic.Gen, 40 | Cauterize.Dynamic.Meta, 41 | Cauterize.Dynamic.Meta.Gen, 42 | Cauterize.Dynamic.Meta.Pack, 43 | Cauterize.Dynamic.Meta.Pretty, 44 | Cauterize.Dynamic.Meta.Types, 45 | Cauterize.Dynamic.Meta.Unpack, 46 | Cauterize.Dynamic.Pack, 47 | Cauterize.Dynamic.Pretty, 48 | Cauterize.Dynamic.Types, 49 | Cauterize.Dynamic.Unpack, 50 | Cauterize.Generate, 51 | Cauterize.Hash, 52 | Cauterize.Schema, 53 | Cauterize.Schema.Checker, 54 | Cauterize.Schema.Parser, 55 | Cauterize.Schema.Types, 56 | Cauterize.Schema.Util, 57 | Cauterize.Specification, 58 | Cauterize.Specification.Parser, 59 | Cauterize.Specification.Compile, 60 | Cauterize.Specification.Types, 61 | Cauterize.Version 62 | other-modules: Paths_cauterize 63 | 64 | -- `cauterize` is the main entry point into the compiler/converter. This turns 65 | -- schemas into specifications. 66 | executable cauterize 67 | hs-source-dirs: bin 68 | ghc-options: -Wall -Werror -O2 -threaded -static 69 | main-is: Main.hs 70 | other-modules: Cauterize.Options 71 | build-depends: base, 72 | cauterize, 73 | text >= 1.2, 74 | optparse-applicative >=0.11.0.1 75 | default-language: Haskell2010 76 | other-modules: Cauterize.Options 77 | 78 | test-suite spec 79 | type: exitcode-stdio-1.0 80 | hs-source-dirs: tests 81 | ghc-options: -Wall -Werror -O2 -threaded 82 | main-is: Spec.hs 83 | build-depends: base, 84 | cauterize, 85 | hspec, 86 | text, 87 | bytestring 88 | default-language: Haskell2010 89 | other-modules: 90 | Cauterize.CommonTypesSpec, 91 | Cauterize.HashSpec, 92 | Cauterize.Schema.ParserSpec, 93 | Cauterize.Specification.ParserSpec, 94 | Cauterize.Dynamic.PackSpec 95 | -------------------------------------------------------------------------------- /cauterize_drift_detection.md: -------------------------------------------------------------------------------- 1 | # Cauterize version drift detection 2 | 3 | Cauterize creates a structural hash of all meaningful schema information. This 4 | can be used to detect, at runtime, whether two communicating partners are 5 | running the same version of the protocol or if they have drifted from 6 | eachother. This approach does not rely on a developer remembering to update a 7 | string version. 8 | 9 | Below are two schemas that have identical lengths, names, and fields. The only 10 | difference is the order in which the fields `x` and `y` occur. 11 | 12 | ``` 13 | (schema mismatch 1.0 14 | (record pair 15 | (fields 16 | (field x u32) 17 | (field y u32)))) 18 | ``` 19 | 20 | ``` 21 | (schema mismatch 1.0 22 | (record pair 23 | (fields 24 | (field y u32) 25 | (field x u32)))) 26 | ``` 27 | 28 | This sort of field mis-match is hard to track down at run time because both 29 | fields encode the same type of scalar information and have identical 30 | representations when encoded. 31 | 32 | Below, we see both of the specifications that result from running the Cauterize 33 | specification compiler on our schemas. Note that both the top level hash of 34 | the schema and the hash of the `pair` type are different in each. Everything 35 | else is the same. 36 | 37 | ``` 38 | (specification mismatch 1.0 (sha1 c2be7e466b7544161bb55c2a3ecafe0720f700fd) (range-size 4 8) (depth 2) 39 | (builtin u32 (sha1 13f56a24961b824565b27c3f7416dbd041ae6308) (fixed-size 4)) 40 | (record pair (sha1 2bfa3f587c8eb270d574c012a379fa0463f41c2e) 41 | (range-size 8 8) 42 | (fields 43 | (field x u32 0) 44 | (field y u32 1)))) 45 | ``` 46 | 47 | ``` 48 | (specification mismatch 1.0 (sha1 9ca2659b023b6260fbf1a290eef4c6ce46ae0b48) (range-size 4 8) (depth 2) 49 | (builtin u32 (sha1 13f56a24961b824565b27c3f7416dbd041ae6308) (fixed-size 4)) 50 | (record pair (sha1 d8ded099911f224a9f44917db71a6f365a3cc520) 51 | (range-size 8 8) 52 | (fields 53 | (field y u32 0) 54 | (field x u32 1)))) 55 | ``` 56 | 57 | This top-level hash can be traded at connection time to establish protocol 58 | compatability. 59 | -------------------------------------------------------------------------------- /examples/badName.scm: -------------------------------------------------------------------------------- 1 | (schema bad_name 0.0.1 2 | (union a (fields (field a u500)))) 3 | -------------------------------------------------------------------------------- /examples/compiled_example-meta.txt: -------------------------------------------------------------------------------- 1 | (meta-interface dynamic 0.0.1 (sha1 6e6856c6c5068a44c791320d64e43d29567a1cd3) 2 | (meta-variant 0) 3 | (type-length 1) 4 | (data-length 1) 5 | (types 6 | (type uthings 0f) 7 | (type u32 13) 8 | (type u8 3c) 9 | (type array_of_a_u8 46) 10 | (type u16 49) 11 | (type rthings 4b) 12 | (type array_of_array_of_a_u8 56) 13 | (type a_u8 78) 14 | (type vec_of_u32 a3) 15 | (type cthings b5) 16 | (type array_of_u16 c9) 17 | (type u64 ca))) 18 | -------------------------------------------------------------------------------- /examples/compiled_example-spec.txt: -------------------------------------------------------------------------------- 1 | (specification dynamic 0.0.1 (sha1 6e6856c6c5068a44c791320d64e43d29567a1cd3) (range-size 1 16) (depth 4) 2 | (builtin u8 (sha1 3c3c92ff20335765dbadd2930de367c0a8a9d9cb) (fixed-size 1)) 3 | (builtin u64 (sha1 ca58000caffa24364cf821488e348159a5d3ed11) (fixed-size 8)) 4 | (builtin u32 (sha1 13f56a24961b824565b27c3f7416dbd041ae6308) (fixed-size 4)) 5 | (vector vec_of_u32 (sha1 a3e0c3a8aa1c3b193c8ef1e29d4eb6973f1cc2c0) 6 | (range-size 1 13) 7 | (length-repr u8) 8 | 3 9 | u32) 10 | (builtin u16 (sha1 496042011a876c687fd713edb8388ab69e8b0bc6) (fixed-size 2)) 11 | (union uthings (sha1 0f2c65f3cc57ea5bb140f639c358cbd83dc87d18) 12 | (range-size 2 3) 13 | (tag-repr u8) 14 | (fields 15 | (field ix0 u8 0) 16 | (field ix1 u16 1) 17 | (field ix2 2))) 18 | (record rthings (sha1 4ba9457f2201d9907e26a462a6259d543cfbffa3) 19 | (range-size 15 15) 20 | (fields 21 | (field ix0 u8 0) 22 | (field ix1 u16 1) 23 | (field ix2 u32 2) 24 | (field ix3 u64 3))) 25 | (combination cthings (sha1 b5ed3e243d992ec6223f2a29d3db06851b1c1ca2) 26 | (range-size 1 16) 27 | (flags-repr u8) 28 | (fields 29 | (field ix0 u8 0) 30 | (field ix1 u16 1) 31 | (field ix2 u32 2) 32 | (field ix3 u64 3))) 33 | (array array_of_u16 (sha1 c971afb7a793e7d8b2ff00f0d1a736c3b4734488) 34 | (range-size 6 6) 35 | 3 36 | u16) 37 | (synonym a_u8 (sha1 7879b41399865154a220c516cf611103bde3cf83) 38 | (fixed-size 1) 39 | u8) 40 | (array array_of_a_u8 (sha1 463fb44e2926908a16129a5e538d343b005a6cd8) 41 | (range-size 3 3) 42 | 3 43 | a_u8) 44 | (array array_of_array_of_a_u8 (sha1 568fd0e761d9b5b89e668995b8d32718bc4d01e4) 45 | (range-size 9 9) 46 | 3 47 | array_of_a_u8)) 48 | -------------------------------------------------------------------------------- /examples/compiled_example.txt: -------------------------------------------------------------------------------- 1 | (schema dynamic 0.0.1 2 | (synonym a_u8 u8) 3 | (array array_of_a_u8 a_u8 3) 4 | (array array_of_array_of_a_u8 array_of_a_u8 3) 5 | (array array_of_u16 u16 3) 6 | (vector vec_of_u32 u32 3) 7 | (record rthings 8 | (fields 9 | (field ix0 u8) 10 | (field ix1 u16) 11 | (field ix2 u32) 12 | (field ix3 u64))) 13 | (combination cthings 14 | (fields 15 | (field ix0 u8) 16 | (field ix1 u16) 17 | (field ix2 u32) 18 | (field ix3 u64))) 19 | (union uthings 20 | (fields 21 | (field ix0 u8) 22 | (field ix1 u16) 23 | (field ix2)))) 24 | -------------------------------------------------------------------------------- /examples/dupName.scm: -------------------------------------------------------------------------------- 1 | (schema dup_name 0.0.1 2 | (record b (fields (field a u8))) 3 | (union b (fields (field a u8)))) 4 | -------------------------------------------------------------------------------- /examples/empty.scm: -------------------------------------------------------------------------------- 1 | (schema empty 0.0.1) 2 | -------------------------------------------------------------------------------- /examples/loop.scm: -------------------------------------------------------------------------------- 1 | (schema loop 0.0.1 2 | (record a 3 | (fields (field a b))) 4 | (record b 5 | (fields (field a a))) 6 | (record c 7 | (fields (field a d))) 8 | (record d 9 | (fields (field a c)))) 10 | -------------------------------------------------------------------------------- /examples/reservedName.scm: -------------------------------------------------------------------------------- 1 | (schema reserved_name 0.0.1 2 | (union u8 (fields (field a)))) 3 | 4 | -------------------------------------------------------------------------------- /examples/schema.scm: -------------------------------------------------------------------------------- 1 | (schema a_name 0.0.1 2 | (synonym number64 s64) 3 | (synonym unsigned8 u8) 4 | (array somearray number64 64) 5 | (vector somevector number64 64) 6 | (record arecord 7 | (fields 8 | (field z somevector) 9 | (field a s8) 10 | (field d brecord))) 11 | (record brecord 12 | (fields 13 | (field a s8) 14 | (field d crecord))) 15 | (record crecord 16 | (fields 17 | (field a s8) 18 | (field b s8))) 19 | (union a_union 20 | (fields 21 | (field a) 22 | (field b) 23 | (field c s8) 24 | (field d number64))) 25 | (combination a_combination 26 | (fields 27 | (field a number64) 28 | (field b s8) 29 | (field c a_union)))) 30 | -------------------------------------------------------------------------------- /src/Cauterize/CommonTypes.hs: -------------------------------------------------------------------------------- 1 | {-# LANGUAGE OverloadedStrings, DeriveDataTypeable #-} 2 | module Cauterize.CommonTypes 3 | ( Offset 4 | , Length 5 | , Identifier 6 | , unIdentifier 7 | , unsafeMkIdentifier 8 | , mkIdentifier 9 | , Prim(..) 10 | , allPrims 11 | , allPrimNames 12 | , primToText 13 | , primToSize 14 | , primMap 15 | , primFittingAllInts 16 | , Tag(..) 17 | , tagToText 18 | , tagToSize 19 | , tagRequired 20 | , tagForBits 21 | , Size 22 | , sizeMin 23 | , sizeMax 24 | , mkSize 25 | , mkConstSize 26 | ) where 27 | 28 | import Data.String 29 | import Data.Word 30 | import Data.Int 31 | import Data.Char 32 | import Data.Text (Text, pack, empty) 33 | import Data.Maybe 34 | import Data.Data 35 | 36 | import qualified Data.Map as M 37 | 38 | type Offset = Int64 39 | 40 | type Length = Word64 41 | 42 | newtype Identifier = Identifier { unIdentifier :: Text } 43 | deriving (Eq, Ord, Data, Typeable) 44 | 45 | data Prim 46 | = PU8 47 | | PU16 48 | | PU32 49 | | PU64 50 | | PS8 51 | | PS16 52 | | PS32 53 | | PS64 54 | | PF32 55 | | PF64 56 | | PBool 57 | deriving (Show, Eq, Enum, Bounded) 58 | 59 | data Tag = T1 | T2 | T4 | T8 60 | deriving (Show, Eq) 61 | 62 | data Size = Size { sizeMin :: Integer, sizeMax :: Integer } 63 | deriving (Eq) 64 | 65 | isValidIdentifier :: String -> Bool 66 | isValidIdentifier [] = False 67 | isValidIdentifier (s:r) = first && rest 68 | where 69 | first = isAsciiLower s 70 | rest = all (\c -> isAsciiLower c || isDigit c || ('_' == c)) r 71 | 72 | unsafeMkIdentifier :: String -> Identifier 73 | unsafeMkIdentifier s = 74 | fromMaybe 75 | (error $ "unsafeMkIdentifier: invalid input string \"" ++ s ++ "\"") 76 | (mkIdentifier s) 77 | 78 | mkIdentifier :: String -> Maybe Identifier 79 | mkIdentifier [] = Just (Identifier empty) 80 | mkIdentifier i = 81 | if isValidIdentifier i 82 | then Just (Identifier $ pack i) 83 | else Nothing 84 | 85 | mkSize :: Integer -> Integer -> Size 86 | mkSize rmin rmax | rmin < 1 = error ("Min size less than 1: " ++ show rmin) 87 | | rmax < 1 = error ("Max size less than 1: " ++ show rmax) 88 | | rmin <= rmax = Size rmin rmax 89 | | otherwise = error ("Bad min and max: min " ++ show rmin ++ " >= max " ++ show rmax ++ ".") 90 | 91 | mkConstSize :: Integer -> Size 92 | mkConstSize sz = mkSize sz sz 93 | 94 | primToText :: Prim -> Identifier 95 | primToText PU8 = "u8" 96 | primToText PU16 = "u16" 97 | primToText PU32 = "u32" 98 | primToText PU64 = "u64" 99 | primToText PS8 = "s8" 100 | primToText PS16 = "s16" 101 | primToText PS32 = "s32" 102 | primToText PS64 = "s64" 103 | primToText PF32 = "f32" 104 | primToText PF64 = "f64" 105 | primToText PBool = "bool" 106 | 107 | allPrims :: [Prim] 108 | allPrims = [minBound..maxBound] 109 | 110 | allPrimNames :: [Identifier] 111 | allPrimNames = map primToText allPrims 112 | 113 | primToSize :: Prim -> Size 114 | primToSize PU8 = mkConstSize 1 115 | primToSize PU16 = mkConstSize 2 116 | primToSize PU32 = mkConstSize 4 117 | primToSize PU64 = mkConstSize 8 118 | primToSize PS8 = mkConstSize 1 119 | primToSize PS16 = mkConstSize 2 120 | primToSize PS32 = mkConstSize 4 121 | primToSize PS64 = mkConstSize 8 122 | primToSize PF32 = mkConstSize 4 123 | primToSize PF64 = mkConstSize 8 124 | primToSize PBool = mkConstSize 1 125 | 126 | primFittingAllInts :: [Integer] -> Prim 127 | primFittingAllInts vs 128 | | not signed && all (<= w8max) vs = PU8 129 | | not signed && all (<= w16max) vs = PU16 130 | | not signed && all (<= w32max) vs = PU32 131 | | not signed && all (<= w64max) vs = PU64 132 | 133 | | signed && all (\i -> i8min <= i && i <= i8max) vs = PS8 134 | | signed && all (\i -> i16min <= i && i <= i16max) vs = PS16 135 | | signed && all (\i -> i32min <= i && i <= i32max) vs = PS32 136 | | signed && all (\i -> i64min <= i && i <= i64max) vs = PS64 137 | 138 | | otherwise = error $ "Unable to express all values in a single primitive: " ++ show vs 139 | where 140 | signed = any (< 0) vs 141 | 142 | w64max = fromIntegral (maxBound :: Word64) 143 | w32max = fromIntegral (maxBound :: Word32) 144 | w16max = fromIntegral (maxBound :: Word16) 145 | w8max = fromIntegral (maxBound :: Word8) 146 | 147 | i64max = fromIntegral (maxBound :: Int64) 148 | i32max = fromIntegral (maxBound :: Int32) 149 | i16max = fromIntegral (maxBound :: Int16) 150 | i8max = fromIntegral (maxBound :: Int8) 151 | 152 | i64min = fromIntegral (minBound :: Int64) 153 | i32min = fromIntegral (minBound :: Int32) 154 | i16min = fromIntegral (minBound :: Int16) 155 | i8min = fromIntegral (minBound :: Int8) 156 | 157 | primMap :: M.Map Identifier Prim 158 | primMap = M.fromList $ zip allPrimNames allPrims 159 | 160 | tagToText :: Tag -> Identifier 161 | tagToText T1 = "t1" 162 | tagToText T2 = "t2" 163 | tagToText T4 = "t4" 164 | tagToText T8 = "t8" 165 | 166 | tagToSize :: Tag -> Size 167 | tagToSize T1 = mkConstSize 1 168 | tagToSize T2 = mkConstSize 2 169 | tagToSize T4 = mkConstSize 4 170 | tagToSize T8 = mkConstSize 8 171 | 172 | tagRequired :: Integral a => a -> Tag 173 | tagRequired i | (0 <= i') && (i' < 256) = T1 174 | | (256 <= i') && (i' < 65536) = T2 175 | | (25536 <= i') && (i' < 4294967296) = T4 176 | | (4294967296 <= i') && (i' <= 18446744073709551615) = T8 177 | | otherwise = error $ "Cannot express tag for value: " ++ show i' 178 | where 179 | i' = fromIntegral i :: Integer 180 | 181 | tagForBits :: Integral a => a -> Tag 182 | tagForBits v | 0 <= v' && v' <= 8 = T1 183 | | 0 <= v' && v' <= 16 = T2 184 | | 0 <= v' && v' <= 32 = T4 185 | | 0 <= v' && v' <= 64 = T8 186 | | otherwise = error 187 | $ "Cannot express '" ++ show v' ++ "' bits in a bitfield." 188 | where 189 | v' = fromIntegral v :: Integer 190 | 191 | instance IsString Identifier where 192 | fromString s = 193 | fromMaybe 194 | (error $ "IsString Identifier: invalid input string \"" ++ s ++ "\"") 195 | (mkIdentifier s) 196 | 197 | instance Show Identifier where 198 | show (Identifier i) = "i" ++ show i 199 | 200 | instance Show Size where 201 | show (Size smin smax) | smin == smax = "Size " ++ show smax 202 | | otherwise = "Size " ++ show smin ++ ".." ++ show smax 203 | -------------------------------------------------------------------------------- /src/Cauterize/Dynamic.hs: -------------------------------------------------------------------------------- 1 | module Cauterize.Dynamic 2 | ( module M 3 | ) where 4 | 5 | 6 | import Cauterize.Dynamic.Pack as M 7 | import Cauterize.Dynamic.Unpack as M 8 | import Cauterize.Dynamic.Types as M 9 | import Cauterize.Dynamic.Gen as M 10 | -------------------------------------------------------------------------------- /src/Cauterize/Dynamic/Common.hs: -------------------------------------------------------------------------------- 1 | {-# LANGUAGE OverloadedStrings #-} 2 | module Cauterize.Dynamic.Common 3 | ( isNameOf 4 | , lu 5 | , fieldsToNameMap 6 | , fieldsToIndexMap 7 | , fieldNameSet 8 | 9 | , isSynonym 10 | , isRange 11 | , isArray 12 | , isVector 13 | , isEnumeration 14 | , isRecord 15 | , isCombination 16 | , isUnion 17 | ) where 18 | 19 | import Cauterize.Dynamic.Types 20 | import Control.Exception 21 | import Data.Maybe 22 | import qualified Data.Text.Lazy as T 23 | import qualified Cauterize.Specification as S 24 | import qualified Cauterize.CommonTypes as C 25 | import qualified Data.Map as M 26 | import qualified Data.Set as Set 27 | 28 | lu :: C.Identifier -> TyMap -> S.Type 29 | lu n m = fromMaybe (throw $ InvalidType n) 30 | (n `M.lookup` m) 31 | 32 | fieldsToNameMap :: [S.Field] -> M.Map C.Identifier S.Field 33 | fieldsToNameMap fs = M.fromList $ map go fs 34 | where 35 | go f = (S.fieldName f, f) 36 | 37 | fieldsToIndexMap :: [S.Field] -> M.Map Integer S.Field 38 | fieldsToIndexMap fs = M.fromList $ map go fs 39 | where 40 | go f = (S.fieldIndex f, f) 41 | 42 | fieldNameSet :: [S.Field] -> Set.Set C.Identifier 43 | fieldNameSet fs = Set.fromList $ map S.fieldName fs 44 | 45 | isNameOf :: T.Text -> PrimDetails -> Bool 46 | isNameOf "u8" (PDu8 _) = True 47 | isNameOf "u16" (PDu16 _) = True 48 | isNameOf "u32" (PDu32 _) = True 49 | isNameOf "u64" (PDu64 _) = True 50 | isNameOf "s8" (PDs8 _) = True 51 | isNameOf "s16" (PDs16 _) = True 52 | isNameOf "s32" (PDs32 _) = True 53 | isNameOf "s64" (PDs64 _) = True 54 | isNameOf "f32" (PDf32 _) = True 55 | isNameOf "f64" (PDf64 _) = True 56 | isNameOf "bool" (PDbool _) = True 57 | isNameOf _ _ = False 58 | 59 | isSynonym :: S.Type -> Bool 60 | isSynonym (S.Type { S.typeDesc = S.Synonym {} }) = True 61 | isSynonym _ = False 62 | 63 | isRange :: S.Type -> Bool 64 | isRange (S.Type { S.typeDesc = S.Range {} }) = True 65 | isRange _ = False 66 | 67 | isArray :: S.Type -> Bool 68 | isArray (S.Type { S.typeDesc = S.Array {} }) = True 69 | isArray _ = False 70 | 71 | isVector :: S.Type -> Bool 72 | isVector (S.Type { S.typeDesc = S.Vector {} }) = True 73 | isVector _ = False 74 | 75 | isEnumeration :: S.Type -> Bool 76 | isEnumeration (S.Type { S.typeDesc = S.Enumeration {} }) = True 77 | isEnumeration _ = False 78 | 79 | isRecord :: S.Type -> Bool 80 | isRecord (S.Type { S.typeDesc = S.Record {} }) = True 81 | isRecord _ = False 82 | 83 | isCombination :: S.Type -> Bool 84 | isCombination (S.Type { S.typeDesc = S.Combination {} }) = True 85 | isCombination _ = False 86 | 87 | isUnion :: S.Type -> Bool 88 | isUnion (S.Type { S.typeDesc = S.Union {} }) = True 89 | isUnion _ = False 90 | -------------------------------------------------------------------------------- /src/Cauterize/Dynamic/Gen.hs: -------------------------------------------------------------------------------- 1 | module Cauterize.Dynamic.Gen 2 | ( dynamicGen 3 | , dynamicGenType 4 | , dynamicGenType' 5 | ) where 6 | 7 | import Cauterize.Dynamic.Common 8 | import Cauterize.Dynamic.Types 9 | import Control.Monad 10 | import Test.QuickCheck.Arbitrary 11 | import Test.QuickCheck.Gen 12 | import qualified Cauterize.CommonTypes as C 13 | import qualified Cauterize.Specification as S 14 | import qualified Data.Map as M 15 | 16 | dynamicGen :: S.Specification -> IO CautType 17 | dynamicGen s = do 18 | n <- generate $ elements $ M.keys m 19 | dynamicGenType s n 20 | where 21 | m = S.specTypeMap s 22 | 23 | dynamicGenType :: S.Specification -> C.Identifier -> IO CautType 24 | dynamicGenType s n = generate $ dynamicGenType' s n 25 | 26 | dynamicGenType' :: S.Specification -> C.Identifier -> Gen CautType 27 | dynamicGenType' s n = do 28 | d <- dynamicGenDetails m n 29 | return CautType { ctName = n, ctDetails = d } 30 | where 31 | m = S.specTypeMap s 32 | 33 | dynamicGenDetails :: TyMap -> C.Identifier -> Gen CautDetails 34 | dynamicGenDetails m n = 35 | case n `M.lookup` C.primMap of 36 | Just p -> dynamicGenPrim p 37 | Nothing -> 38 | case S.typeDesc (n `lu` m) of 39 | S.Synonym { S.synonymRef = sy } -> dynamicGenSynonym m sy 40 | S.Range { S.rangeOffset = o, S.rangeLength = l } -> dynamicGenRange o l 41 | S.Array { S.arrayRef = a, S.arrayLength = l } -> dynamicGenArray m a l 42 | S.Vector { S.vectorRef = v, S.vectorLength = l } -> dynamicGenVector m v l 43 | S.Enumeration { S.enumerationValues = vs } -> dynamicGenEnumeration vs 44 | S.Record { S.recordFields = r } -> dynamicGenRecord m r 45 | S.Combination { S.combinationFields = c } -> dynamicGenCombination m c 46 | S.Union { S.unionFields = u } -> dynamicGenUnion m u 47 | 48 | dynamicGenPrim :: C.Prim -> Gen CautDetails 49 | dynamicGenPrim p = 50 | let p' = 51 | case p of 52 | C.PU8 -> liftM PDu8 arbitrary 53 | C.PU16 -> liftM PDu16 arbitrary 54 | C.PU32 -> liftM PDu32 arbitrary 55 | C.PU64 -> liftM PDu64 arbitrary 56 | C.PS8 -> liftM PDs8 arbitrary 57 | C.PS16 -> liftM PDs16 arbitrary 58 | C.PS32 -> liftM PDs32 arbitrary 59 | C.PS64 -> liftM PDs64 arbitrary 60 | C.PF32 -> liftM PDf32 arbitrary 61 | C.PF64 -> liftM PDf64 arbitrary 62 | C.PBool -> liftM PDbool arbitrary 63 | in liftM CDPrim p' 64 | 65 | dynamicGenSynonym :: TyMap -> C.Identifier -> Gen CautDetails 66 | dynamicGenSynonym m s = liftM CDSynonym (dynamicGenDetails m s) 67 | 68 | dynamicGenRange :: C.Offset -> C.Length -> Gen CautDetails 69 | dynamicGenRange o l = do 70 | v <- choose (0, l) 71 | return $ CDRange (fromIntegral v + fromIntegral o) 72 | 73 | dynamicGenArray :: TyMap -> C.Identifier -> C.Length -> Gen CautDetails 74 | dynamicGenArray m r l = 75 | liftM CDArray $ replicateM (fromIntegral l) getter 76 | where 77 | getter = dynamicGenDetails m r 78 | 79 | dynamicGenVector :: TyMap -> C.Identifier -> C.Length -> Gen CautDetails 80 | dynamicGenVector m r maxLen = do 81 | len <- choose (0, maxLen) 82 | liftM CDVector $ replicateM (fromIntegral len) getter 83 | where 84 | getter = dynamicGenDetails m r 85 | 86 | dynamicGenEnumeration :: [S.EnumVal] -> Gen CautDetails 87 | dynamicGenEnumeration vs = liftM CDEnumeration (elements (map S.enumValName vs)) 88 | 89 | dynamicGenRecord :: TyMap -> [S.Field] -> Gen CautDetails 90 | dynamicGenRecord m fs = 91 | liftM (CDRecord . M.fromList) $ mapM (genField m) fs 92 | 93 | dynamicGenCombination :: TyMap -> [S.Field] -> Gen CautDetails 94 | dynamicGenCombination m fs = do 95 | fields <- subset fs 96 | liftM (CDCombination . M.fromList) $ mapM (genField m) fields 97 | 98 | dynamicGenUnion :: TyMap -> [S.Field] -> Gen CautDetails 99 | dynamicGenUnion m fs = do 100 | f <- elements fs 101 | (n, d) <- genField m f 102 | return CDUnion { cdUnionFieldName = n, cdUnionFieldDetails = d } 103 | 104 | genField :: TyMap -> S.Field -> Gen (C.Identifier, FieldValue) 105 | genField _ (S.EmptyField { S.fieldName = n }) = return (n, EmptyField) 106 | genField m (S.DataField { S.fieldName = n, S.fieldRef = r }) = 107 | liftM (\d -> (n, DataField d)) (dynamicGenDetails m r) 108 | 109 | subset :: [a] -> Gen [a] 110 | subset [] = return [] 111 | subset (a:as) = do 112 | roll <- arbitrary 113 | if roll 114 | then liftM (a:) (subset as) 115 | else subset as 116 | 117 | -------------------------------------------------------------------------------- /src/Cauterize/Dynamic/Meta.hs: -------------------------------------------------------------------------------- 1 | module Cauterize.Dynamic.Meta 2 | ( module M 3 | ) where 4 | 5 | import Cauterize.Dynamic.Meta.Gen as M 6 | import Cauterize.Dynamic.Meta.Pack as M 7 | import Cauterize.Dynamic.Meta.Unpack as M 8 | import Cauterize.Dynamic.Meta.Types as M 9 | -------------------------------------------------------------------------------- /src/Cauterize/Dynamic/Meta/Gen.hs: -------------------------------------------------------------------------------- 1 | module Cauterize.Dynamic.Meta.Gen 2 | ( dynamicMetaGen 3 | , dynamicMetaGen' 4 | ) where 5 | 6 | import Cauterize.Dynamic.Meta.Types 7 | import Cauterize.Dynamic.Gen 8 | import Control.Monad 9 | import Test.QuickCheck.Gen 10 | import qualified Cauterize.Specification as Spec 11 | import qualified Data.Map as M 12 | 13 | dynamicMetaGen :: Spec.Specification -> IO MetaType 14 | dynamicMetaGen spec = generate $ dynamicMetaGen' spec 15 | 16 | dynamicMetaGen' :: Spec.Specification -> Gen MetaType 17 | dynamicMetaGen' spec = do 18 | n <- elements $ M.keys m 19 | liftM MetaType (dynamicGenType' spec n) 20 | where 21 | m = Spec.specTypeMap spec 22 | -------------------------------------------------------------------------------- /src/Cauterize/Dynamic/Meta/Pack.hs: -------------------------------------------------------------------------------- 1 | module Cauterize.Dynamic.Meta.Pack 2 | ( dynamicMetaPack 3 | , dynamicMetaPackHeaderAndPayload 4 | ) where 5 | 6 | import Cauterize.Dynamic.Meta.Types 7 | import Cauterize.Dynamic.Pack 8 | import Cauterize.Dynamic.Types 9 | import Control.Exception 10 | import Data.Serialize.Put 11 | import qualified Cauterize.CommonTypes as C 12 | import qualified Cauterize.Specification as Spec 13 | import qualified Data.ByteString as B 14 | import qualified Data.Map as M 15 | import qualified Cauterize.Hash as H 16 | 17 | dynamicMetaPack :: Spec.Specification -> MetaType -> B.ByteString 18 | dynamicMetaPack spec t = 19 | let (h, p) = dynamicMetaPackHeaderAndPayload spec t 20 | in h `B.append` p 21 | 22 | dynamicMetaPackHeaderAndPayload :: Spec.Specification -> MetaType -> (B.ByteString, B.ByteString) 23 | dynamicMetaPackHeaderAndPayload spec t = 24 | case tn `M.lookup` m of 25 | Nothing -> throw $ InvalidType tn 26 | Just ty -> 27 | let prefix = take (fromIntegral tw) $ H.hashToBytes $ Spec.typeFingerprint ty 28 | h = runPut $ do 29 | packLengthWithWidth (fromIntegral . B.length $ ctPacked) dl 30 | putByteString (B.pack prefix) 31 | in (h, ctPacked) 32 | where 33 | dl = (C.sizeMax . C.tagToSize . Spec.specLengthTag) spec 34 | tw = Spec.specTypeLength spec 35 | ct = unMetaType t 36 | tn = ctName ct 37 | m = Spec.specTypeMap spec 38 | ctPacked = dynamicPack spec ct 39 | 40 | packLengthWithWidth :: Integer -- length to pack 41 | -> Integer -- width to pack length into 42 | -> Put 43 | packLengthWithWidth len 1 | 0 <= len && len < 2^(8 :: Integer) = putWord8 (fromIntegral len) 44 | packLengthWithWidth len 2 | 0 <= len && len < 2^(16 :: Integer) = putWord16le (fromIntegral len) 45 | packLengthWithWidth len 4 | 0 <= len && len < 2^(32 :: Integer) = putWord32le (fromIntegral len) 46 | packLengthWithWidth len 8 | 0 <= len && len < 2^(64 :: Integer) = putWord64le (fromIntegral len) 47 | packLengthWithWidth len w = throw $ InvalidLengthForLengthWidth len w 48 | -------------------------------------------------------------------------------- /src/Cauterize/Dynamic/Meta/Pretty.hs: -------------------------------------------------------------------------------- 1 | module Cauterize.Dynamic.Meta.Pretty 2 | ( dynamicMetaPretty 3 | ) where 4 | 5 | import Cauterize.Specification 6 | import Cauterize.Dynamic.Pretty 7 | import Cauterize.Dynamic.Meta.Types 8 | 9 | import qualified Data.Text as T 10 | 11 | dynamicMetaPretty :: Specification -> MetaType -> T.Text 12 | dynamicMetaPretty s (MetaType t) = dynamicPretty s t 13 | -------------------------------------------------------------------------------- /src/Cauterize/Dynamic/Meta/Types.hs: -------------------------------------------------------------------------------- 1 | module Cauterize.Dynamic.Meta.Types 2 | ( MetaType(..) 3 | , MetaHeader(..) 4 | ) where 5 | 6 | import Cauterize.Dynamic.Types 7 | import Data.Word 8 | 9 | data MetaHeader = 10 | MetaHeader { metaLength :: Integer 11 | , metaTag :: [Word8] 12 | } 13 | deriving (Show, Eq, Ord) 14 | 15 | data MetaType = 16 | MetaType { unMetaType :: CautType } 17 | deriving (Show, Eq, Ord) 18 | -------------------------------------------------------------------------------- /src/Cauterize/Dynamic/Meta/Unpack.hs: -------------------------------------------------------------------------------- 1 | {-# LANGUAGE OverloadedStrings #-} 2 | module Cauterize.Dynamic.Meta.Unpack 3 | ( dynamicMetaUnpack 4 | , dynamicMetaUnpackHeader 5 | , dynamicMetaUnpackFromHeader 6 | ) where 7 | 8 | import Cauterize.Dynamic.Types 9 | import Cauterize.Dynamic.Meta.Types 10 | import Cauterize.Dynamic.Unpack 11 | import Control.Exception 12 | import Control.Monad 13 | import Data.Serialize.Get 14 | import qualified Cauterize.CommonTypes as C 15 | import qualified Cauterize.Specification as Spec 16 | import qualified Data.ByteString as B 17 | import qualified Data.Map as M 18 | import qualified Data.Text as T 19 | 20 | dynamicMetaUnpackHeader :: Spec.Specification -> B.ByteString -> Either String (MetaHeader, B.ByteString) 21 | dynamicMetaUnpackHeader spec b = runGetState p b 0 22 | where 23 | p = do 24 | let dataLen = (C.sizeMax . C.tagToSize . Spec.specLengthTag) spec 25 | let tagLen = fromIntegral . Spec.specTypeLength $ spec 26 | 27 | len <- liftM fromIntegral (unpackLengthWithWidth dataLen) 28 | tag <- liftM B.unpack $ getByteString tagLen 29 | 30 | return $ MetaHeader len tag 31 | 32 | dynamicMetaUnpackFromHeader :: Spec.Specification -> MetaHeader -> B.ByteString -> Either T.Text (MetaType, B.ByteString) 33 | dynamicMetaUnpackFromHeader spec (MetaHeader _ tag) b = case runGetState p b 0 of 34 | Right r -> Right r 35 | Left e -> Left $ T.pack e 36 | where 37 | p = case tag `M.lookup` m of 38 | Nothing -> fail $ "Invalid tag: " ++ show tag 39 | Just ty -> liftM MetaType (dynamicUnpack' spec (Spec.typeName ty)) 40 | m = Spec.specTypeTagMap spec 41 | 42 | dynamicMetaUnpack :: Spec.Specification -> B.ByteString -> Either T.Text (MetaType, B.ByteString) 43 | dynamicMetaUnpack spec b = 44 | case dynamicMetaUnpackHeader spec b of 45 | Left err -> Left $ "Failed to unpack meta header:" `T.append` T.pack err 46 | Right (mh, remainder) -> dynamicMetaUnpackFromHeader spec mh remainder 47 | 48 | unpackLengthWithWidth :: Integer -> Get Integer 49 | unpackLengthWithWidth 1 = liftM fromIntegral getWord8 50 | unpackLengthWithWidth 2 = liftM fromIntegral getWord16le 51 | unpackLengthWithWidth 4 = liftM fromIntegral getWord32le 52 | unpackLengthWithWidth 8 = liftM fromIntegral getWord64le 53 | unpackLengthWithWidth w = throw $ InvalidLengthWidth w 54 | -------------------------------------------------------------------------------- /src/Cauterize/Dynamic/Pack.hs: -------------------------------------------------------------------------------- 1 | {-# LANGUAGE OverloadedStrings #-} 2 | module Cauterize.Dynamic.Pack 3 | ( dynamicPack 4 | ) where 5 | 6 | import Cauterize.Dynamic.Common 7 | import Cauterize.Dynamic.Types 8 | import Control.Exception 9 | import Data.Maybe 10 | import Data.Serialize.IEEE754 11 | import Data.Serialize.Put 12 | import Data.Bits 13 | import qualified Cauterize.CommonTypes as C 14 | import qualified Cauterize.Specification as S 15 | import qualified Data.ByteString as B 16 | import qualified Data.Map as M 17 | import qualified Data.Set as Set 18 | import qualified Data.Text.Lazy as T 19 | 20 | dynamicPack :: S.Specification -> CautType -> B.ByteString 21 | dynamicPack s (CautType { ctName = n, ctDetails = d }) = 22 | let m = S.specTypeMap s 23 | in runPut $ dynamicPackDetails m n d 24 | 25 | dynamicPackDetails :: TyMap -> C.Identifier -> CautDetails -> Put 26 | dynamicPackDetails m n det = 27 | case det of 28 | CDSynonym bd -> dynamicPackSynonym m n bd 29 | CDRange v -> dynamicPackRange m n v 30 | CDArray es -> dynamicPackArray m n es 31 | CDVector es -> dynamicPackVector m n es 32 | CDRecord fs -> dynamicPackRecord m n fs 33 | CDEnumeration v -> dynamicPackEnumeration m n v 34 | CDCombination fs -> dynamicPackCombination m n fs 35 | CDUnion fn fd -> dynamicPackUnion m n fn fd 36 | CDPrim p -> dynamicPackPrim p 37 | 38 | -- There are 4 tag widths in Cauterize: 8, 16, 32, and 64 bits. This will pack 39 | -- an Integer as if it was one of those tag variants. If the specified Integer 40 | -- is out of range, an exception is thrown. 41 | dynamicPackTag :: C.Tag -> Integer -> Put 42 | dynamicPackTag b v = 43 | case b of 44 | C.T1 | v >= 0 && v <= t8Max -> putWord8 (fromIntegral v) 45 | C.T2 | v >= 0 && v <= t16Max -> putWord16le (fromIntegral v) 46 | C.T4 | v >= 0 && v <= t32Max -> putWord32le (fromIntegral v) 47 | C.T8 | v >= 0 && v <= t64Max -> putWord64le (fromIntegral v) 48 | _ -> throw $ InvalidTagForRepresentation v (T.pack . show $ b) 49 | where 50 | t8Max, t16Max, t32Max, t64Max :: Integer 51 | t8Max = 2^(8 :: Integer) - 1 52 | t16Max = 2^(16 :: Integer) - 1 53 | t32Max = 2^(32 :: Integer) - 1 54 | t64Max = 2^(64 :: Integer) - 1 55 | 56 | dynamicPackPrim :: PrimDetails -> Put 57 | dynamicPackPrim det = 58 | case det of 59 | PDu8 d -> putWord8 d 60 | PDu16 d -> putWord16le d 61 | PDu32 d -> putWord32le d 62 | PDu64 d -> putWord64le d 63 | PDs8 d -> putWord8 $ fromIntegral d 64 | PDs16 d -> putWord16le $ fromIntegral d 65 | PDs32 d -> putWord32le $ fromIntegral d 66 | PDs64 d -> putWord64le $ fromIntegral d 67 | PDf32 d -> putFloat32le d 68 | PDf64 d -> putFloat64le d 69 | PDbool d -> putWord8 $ if d then 1 else 0 70 | 71 | dynamicPackSynonym :: TyMap -> C.Identifier -> CautDetails -> Put 72 | dynamicPackSynonym m n det = 73 | let (S.Type { S.typeDesc = t }) = checkedTypeLookup m n isSynonym "synonym" 74 | in dynamicPackDetails m (S.synonymRef t) det 75 | 76 | dynamicPackRange :: TyMap -> C.Identifier -> Integer -> Put 77 | dynamicPackRange m n v = 78 | let (S.Type { S.typeDesc = t }) = checkedTypeLookup m n isRange "range" 79 | rmin = fromIntegral $ S.rangeOffset t 80 | rmax = fromIntegral (S.rangeLength t) - fromIntegral (S.rangeOffset t) 81 | in if v < rmin || v > rmax 82 | then throw $ RangeOutOfBounds rmin rmax v 83 | else dynamicPackTag (S.rangeTag t) (v - rmin) 84 | 85 | dynamicPackArray :: TyMap -> C.Identifier -> [CautDetails] -> Put 86 | dynamicPackArray m n elems = 87 | let (S.Type { S.typeDesc = t }) = checkedTypeLookup m n isArray "array" 88 | el = fromIntegral $ length elems 89 | al = S.arrayLength t 90 | etype = S.arrayRef t 91 | in if al /= el 92 | then throw $ IncorrectArrayLength (fromIntegral al) (fromIntegral el) 93 | else mapM_ (dynamicPackDetails m etype) elems 94 | 95 | dynamicPackVector :: TyMap -> C.Identifier -> [CautDetails] -> Put 96 | dynamicPackVector m n elems = 97 | let (S.Type { S.typeDesc = t }) = checkedTypeLookup m n isVector "vector" 98 | el = length elems 99 | vl = S.vectorLength t 100 | etype = S.vectorRef t 101 | vt = S.vectorTag t 102 | in if fromIntegral el > vl 103 | then throw $ IncorrectVectorLength (fromIntegral vl) (fromIntegral el) 104 | else do dynamicPackTag vt (fromIntegral el) 105 | mapM_ (dynamicPackDetails m etype) elems 106 | 107 | dynamicPackRecord :: TyMap -> C.Identifier -> M.Map C.Identifier FieldValue -> Put 108 | dynamicPackRecord m n fields = checkedDynamicFields fs fields go 109 | where 110 | (S.Type { S.typeDesc = t }) = checkedTypeLookup m n isRecord "record" 111 | fs = S.recordFields t 112 | go fields' = mapM_ (dynamicPackRecordField m fields') fs 113 | 114 | dynamicPackEnumeration :: TyMap -> C.Identifier -> C.Identifier -> Put 115 | dynamicPackEnumeration m n val = dynamicPackTag tag ix 116 | where 117 | (S.Type { S.typeDesc = t }) = checkedTypeLookup m n isEnumeration "enumeration" 118 | tag = S.enumerationTag t 119 | vals = let ev = S.enumerationValues t 120 | in zip (map S.enumValName ev) (map S.enumValIndex ev) 121 | ix = fromMaybe (throw $ InvalidEnumerable val) (val `lookup` vals) 122 | 123 | dynamicPackCombination :: TyMap -> C.Identifier -> M.Map C.Identifier FieldValue -> Put 124 | dynamicPackCombination m n fields = checkedDynamicFields fs fields go 125 | where 126 | (S.Type { S.typeDesc = t }) = checkedTypeLookup m n isCombination "combination" 127 | fs = S.combinationFields t 128 | ct = S.combinationTag t 129 | go fields' = 130 | let fm = fieldsToNameMap fs 131 | ixs = map (fromIntegral . S.fieldIndex . fromJust . (`M.lookup` fm)) (M.keys fields') 132 | ixbits = foldl setBit (0 :: Int) ixs 133 | in do dynamicPackTag ct (fromIntegral ixbits) 134 | mapM_ (dynamicPackCombinationField m fields') fs 135 | 136 | dynamicPackUnion :: TyMap -> C.Identifier -> C.Identifier -> FieldValue -> Put 137 | dynamicPackUnion m n fn fv = do 138 | dynamicPackTag fir (S.fieldIndex field) 139 | case (field, fv) of 140 | (S.EmptyField {}, EmptyField) -> return () 141 | (S.EmptyField { S.fieldName = efn }, DataField d) -> unexpectedData efn d 142 | (S.DataField { S.fieldRef = r }, DataField fd) -> dynamicPackDetails m r fd 143 | (S.DataField { S.fieldName = dfn }, EmptyField) -> unexpectedEmpty dfn 144 | where 145 | (S.Type { S.typeDesc = t }) = checkedTypeLookup m n isUnion "union" 146 | fm = fieldsToNameMap . S.unionFields $ t 147 | fir = S.unionTag t 148 | field = fromMaybe (throw $ UnexpectedFields [fn]) 149 | (fn `M.lookup` fm) 150 | 151 | -- Retrieve a type from the map while also ensuring that its type matches some 152 | -- expected condition. If the type does not match an exception is thrown. 153 | checkedTypeLookup :: TyMap -- the map of types from the schema 154 | -> C.Identifier -- the name of the type to check 155 | -> (S.Type -> Bool) -- a checking function: isArray, isRecord, etc 156 | -> T.Text -- a name to use for the expected type (THIS IS SUPER HACKY) 157 | -> S.Type 158 | checkedTypeLookup m n checker expectedStr = 159 | let t = n `lu` m 160 | in if not (checker t) 161 | then throw $ PrototypeMisMatch n expectedStr 162 | else t 163 | 164 | -- Validates that the dynamic fields are all found in the specification fields. 165 | -- The passed function can assume that there are no extra fields. There may 166 | -- still be *missing* fields. 167 | checkedDynamicFields :: [S.Field] -- input fields to compare againts 168 | -> M.Map C.Identifier FieldValue -- the dynamic fields that need checking 169 | -> (M.Map C.Identifier FieldValue -> Put) -- a function to accept the checked dynamic fields 170 | -> Put -- the result of the passed function 171 | checkedDynamicFields fs dfs a = 172 | let fset = fieldNameSet fs 173 | dset = M.keysSet dfs 174 | diff = dset `Set.difference` fset -- the fields in dset that are not in fset 175 | in if Set.empty /= diff 176 | then throw $ UnexpectedFields (Set.toList diff) 177 | else a dfs 178 | 179 | -- Insists that the dynamic field map is complete. 180 | dynamicPackRecordField :: TyMap -> M.Map C.Identifier FieldValue -> S.Field -> Put 181 | dynamicPackRecordField _ fm (S.EmptyField { S.fieldName = n }) = dynamicPackEmptyField fm n 182 | dynamicPackRecordField tym fm (S.DataField { S.fieldName = n, S.fieldRef = r }) = 183 | let det = fromMaybe (throw $ MissingField n) 184 | (n `M.lookup` fm) 185 | in case det of 186 | DataField det' -> dynamicPackDetails tym r det' 187 | EmptyField -> unexpectedEmpty n 188 | 189 | -- Skips fields not present in the dynamic field map. 190 | dynamicPackCombinationField :: TyMap -> M.Map C.Identifier FieldValue -> S.Field -> Put 191 | dynamicPackCombinationField _ fm (S.EmptyField { S.fieldName = n }) = 192 | case n `M.lookup` fm of 193 | Just (DataField det) -> unexpectedData n det 194 | Just EmptyField -> dynamicPackEmptyField fm n 195 | Nothing -> return () 196 | dynamicPackCombinationField tym fm (S.DataField { S.fieldName = n, S.fieldRef = r }) = 197 | case n `M.lookup` fm of 198 | Just EmptyField -> unexpectedEmpty n 199 | Just (DataField det) -> dynamicPackDetails tym r det 200 | Nothing -> return () 201 | 202 | unexpectedEmpty :: C.Identifier -> c 203 | unexpectedEmpty n = throw $ UnexpectedEmptyField n 204 | 205 | unexpectedData :: C.Identifier -> CautDetails -> c 206 | unexpectedData n d = throw $ UnexpectedDataField n d 207 | 208 | dynamicPackEmptyField :: M.Map C.Identifier FieldValue -> C.Identifier -> Put 209 | dynamicPackEmptyField fm n = 210 | let det = fromMaybe (throw $ MissingField n) 211 | (n `M.lookup` fm) 212 | in case det of 213 | EmptyField -> return () 214 | DataField d -> unexpectedData n d 215 | -------------------------------------------------------------------------------- /src/Cauterize/Dynamic/Pretty.hs: -------------------------------------------------------------------------------- 1 | {-# LANGUAGE OverloadedStrings, CPP #-} 2 | module Cauterize.Dynamic.Pretty 3 | ( dynamicPretty 4 | ) where 5 | 6 | #if __GLASGOW_HASKELL__ >= 710 7 | import Prelude hiding ((<$>)) 8 | #endif 9 | 10 | import Cauterize.CommonTypes 11 | import Cauterize.Dynamic.Types as DT 12 | import Cauterize.Specification 13 | import Data.Maybe 14 | import Text.PrettyPrint.Leijen.Text 15 | import qualified Data.Map as M 16 | import qualified Data.Text as T 17 | import qualified Data.Text.Lazy as TL 18 | 19 | dynamicPretty :: Specification -> CautType -> T.Text 20 | dynamicPretty s t = T.concat $ TL.toChunks (displayT $ renderPretty 0.9 80 $ dp s t) 21 | 22 | dp :: Specification -> CautType -> Doc 23 | dp s (CautType n d) = prettyDetails sm n d 24 | where 25 | sm = specTypeMap s 26 | 27 | lu :: Identifier -> M.Map Identifier a -> a 28 | lu k m = fromMaybe (error $ "key doesn't exist: " ++ T.unpack (unIdentifier k)) 29 | (k `M.lookup` m) 30 | 31 | ident :: Identifier -> Doc 32 | ident = text . TL.fromChunks . (:[]) . unIdentifier 33 | 34 | prettyDetails :: M.Map Identifier Type -> Identifier -> CautDetails -> Doc 35 | prettyDetails m n (CDSynonym s) = parens $ "synonym" <+> prettyDetails m r s 36 | where 37 | Type { typeDesc = (Synonym { synonymRef = r }) } = n `lu` m 38 | prettyDetails _ n (CDRange v) = 39 | parens ("range" <+> ident n <$> ii v) 40 | prettyDetails s n (CDArray elems) = 41 | parens ("array" <+> ident n <$> indent 2 (parens $ "elems" <$> indent 2 vs)) 42 | where 43 | vs = fillSep $ map (prettyDetails s elemsName) elems 44 | Type { typeDesc = Array { arrayRef = elemsName } } = n `lu` s 45 | prettyDetails s n (CDVector elems) = 46 | parens ("vector" <+> ident n <$> indent 2 (parens $ "elems" <$> indent 2 vs)) 47 | where 48 | vs = fillSep $ map (prettyDetails s elemsName) elems 49 | Type { typeDesc = Vector { vectorRef = elemsName } } = n `lu` s 50 | prettyDetails _ n (CDEnumeration v) = 51 | parens ("enumeration" <+> ident n <$> ident v) 52 | prettyDetails s n (CDRecord fields) = 53 | parens ("record" <+> ident n <$> indent 2 fs) 54 | where 55 | fs = sep $ map prettyRecField (M.toList fields) 56 | prettyRecField m = prettyField s n m (recordFields . typeDesc) 57 | prettyDetails s n (CDCombination fields) = 58 | parens ("combination" <+> ident n <$> indent 2 fs) 59 | where 60 | fs = sep $ map prettyCombField (M.toList fields) 61 | prettyCombField m = prettyField s n m (combinationFields . typeDesc) 62 | prettyDetails s n (CDUnion unionFieldName unionFieldValue) = 63 | parens ("union" <+> ident n <$> indent 2 (prettyUnionField unionFieldName unionFieldValue)) 64 | where 65 | prettyUnionField fn fv = prettyField s n (fn, fv) (unionFields . typeDesc) 66 | prettyDetails _ _ (CDPrim p) = prettyPrim p 67 | 68 | prettyPrim :: PrimDetails -> Doc 69 | prettyPrim d = parens (n <+> val) 70 | where 71 | (n, val) = case d of 72 | PDu8 v -> ("u8", ii v) 73 | PDu16 v -> ("u16", ii v) 74 | PDu32 v -> ("u32", ii v) 75 | PDu64 v -> ("u64", ii v) 76 | PDs8 v -> ("s8", ii v) 77 | PDs16 v -> ("s16", ii v) 78 | PDs32 v -> ("s32", ii v) 79 | PDs64 v -> ("s64", ii v) 80 | PDf32 v -> ("f32", float v) 81 | PDf64 v -> ("f64", double v) 82 | PDbool v -> ("bool", bool v) 83 | 84 | ii :: Integral a => a -> Doc 85 | ii = integer . fromIntegral 86 | 87 | prettyField :: M.Map Identifier Type -- the spec type map 88 | -> Identifier -- the field's parent type name 89 | -> (Identifier, FieldValue) -- the name/field-value pair to pretty print 90 | -> (Type -> [Field]) -- unwraps a record/combination/union into fields 91 | -> Doc 92 | prettyField _ _ (fn, DT.EmptyField) _ = parens $ "empty" <+> ident fn 93 | prettyField s n (fn, DT.DataField fd) unwrap = 94 | parens $ "field" <+> ident fn <$> indent 2 (prettyDetails s (typeName ft) fd) 95 | where 96 | fm = fieldsMap . unwrap $ n `lu` s 97 | ftn = fn `lu` fm 98 | ft = fieldRef ftn `lu` s 99 | 100 | fieldsMap :: [Field] -> M.Map Identifier Field 101 | fieldsMap fs = M.fromList $ zip (map fieldName fs) fs 102 | -------------------------------------------------------------------------------- /src/Cauterize/Dynamic/Types.hs: -------------------------------------------------------------------------------- 1 | {-# LANGUAGE DeriveDataTypeable #-} 2 | module Cauterize.Dynamic.Types 3 | ( CautType(..) 4 | , CautDetails(..) 5 | , PrimDetails(..) 6 | , FieldValue(..) 7 | , TyMap 8 | , Exceptions(..) 9 | ) where 10 | 11 | import Control.Exception 12 | import Data.Data 13 | import Data.Int 14 | import Data.Word 15 | import qualified Data.Map as M 16 | import qualified Data.Text.Lazy as T 17 | import qualified Cauterize.Specification as S 18 | import qualified Cauterize.CommonTypes as C 19 | 20 | data CautType = 21 | CautType { ctName :: C.Identifier 22 | , ctDetails :: CautDetails 23 | } 24 | deriving (Show, Ord, Eq) 25 | 26 | data CautDetails 27 | = CDSynonym { cdSynonymRef :: CautDetails } 28 | | CDRange { cdRangeValue :: Integer } 29 | | CDArray { cdArrayElems :: [CautDetails] } 30 | | CDVector { cdVectorElems :: [CautDetails] } 31 | | CDEnumeration { cdEnumVal :: C.Identifier } 32 | | CDRecord { cdRecordFields :: M.Map C.Identifier FieldValue } 33 | | CDCombination { cdCombinationFields :: M.Map C.Identifier FieldValue } 34 | | CDUnion { cdUnionFieldName :: C.Identifier, cdUnionFieldDetails :: FieldValue } 35 | | CDPrim { cdPrimDetails :: PrimDetails } 36 | deriving (Show, Ord, Eq, Data, Typeable) 37 | 38 | data PrimDetails = PDu8 Word8 39 | | PDu16 Word16 40 | | PDu32 Word32 41 | | PDu64 Word64 42 | | PDs8 Int8 43 | | PDs16 Int16 44 | | PDs32 Int32 45 | | PDs64 Int64 46 | | PDf32 Float 47 | | PDf64 Double 48 | | PDbool Bool 49 | deriving (Show, Ord, Eq, Data, Typeable) 50 | 51 | data FieldValue = DataField CautDetails 52 | | EmptyField 53 | deriving (Show, Ord, Eq, Data, Typeable) 54 | 55 | type TyMap = M.Map C.Identifier S.Type 56 | 57 | data Exceptions = TypeMisMatch { tmmExpected :: C.Identifier, tmmActual :: C.Identifier } 58 | | PrototypeMisMatch { ptmmTypeName :: C.Identifier, ptmmDetailType :: T.Text } 59 | | IncorrectArrayLength { ialExpected :: Integer, ialActual :: Integer } 60 | | IncorrectVectorLength { ivlMaximum :: Integer, ivlActual :: Integer } 61 | | RangeOutOfBounds { robMin :: Integer, robMax :: Integer, robValue :: Integer } 62 | | RangeDecodeOutOfBounds { rdobOffset :: C.Offset, rdobLength :: C.Length, rdobValue :: Integer } 63 | | InvalidType { invType :: C.Identifier } 64 | | InvalidTagForRepresentation { invTag :: Integer, invRepresentation :: T.Text } 65 | | InvalidLengthForLengthWidth { ilflwLength :: Integer, ilflwWidth :: Integer } 66 | | InvalidLengthWidth { ilwWidth :: Integer } 67 | | InvalidEnumerable { ieName :: C.Identifier } 68 | | NotATagType { invTagType :: T.Text } 69 | | MissingField { mfField :: C.Identifier } 70 | | UnexpectedFields { ufFields :: [C.Identifier] } 71 | | UnexpectedDataField { udfField :: C.Identifier, udfData :: CautDetails } 72 | | UnexpectedEmptyField { udfField :: C.Identifier } 73 | deriving (Show, Eq, Data, Typeable) 74 | 75 | instance Exception Exceptions 76 | -------------------------------------------------------------------------------- /src/Cauterize/Dynamic/Unpack.hs: -------------------------------------------------------------------------------- 1 | {-# LANGUAGE OverloadedStrings #-} 2 | module Cauterize.Dynamic.Unpack 3 | ( dynamicUnpack 4 | , dynamicUnpack' 5 | ) where 6 | 7 | import Cauterize.Dynamic.Types 8 | import Cauterize.Dynamic.Common 9 | import Control.Exception 10 | import Control.Monad 11 | import Data.Bits 12 | import Data.Maybe 13 | import qualified Data.Map as M 14 | import qualified Data.Text.Lazy as T 15 | import qualified Cauterize.Specification as S 16 | import qualified Cauterize.CommonTypes as C 17 | import qualified Data.ByteString as B 18 | 19 | import Data.Serialize.IEEE754 20 | import Data.Serialize.Get 21 | 22 | dynamicUnpack :: S.Specification -> C.Identifier -> B.ByteString -> Either T.Text CautType 23 | dynamicUnpack s n b = case flip runGet b $ dynamicUnpack' s n of 24 | Left e -> Left $ T.pack e 25 | Right r -> Right r 26 | 27 | dynamicUnpack' :: S.Specification -> C.Identifier -> Get CautType 28 | dynamicUnpack' s n = 29 | let m = S.specTypeMap s 30 | in do d <- dynamicUnpackDetails m n 31 | return CautType { ctName = n, ctDetails = d } 32 | 33 | dynamicUnpackDetails :: TyMap -> C.Identifier -> Get CautDetails 34 | dynamicUnpackDetails m n = 35 | case n `M.lookup` C.primMap of 36 | Just p -> dynamicUnpackPrim p 37 | Nothing -> 38 | let ty = n `lu` m 39 | in case S.typeDesc ty of 40 | S.Synonym { S.synonymRef = s } -> dynamicUnpackSynonym m s 41 | S.Range { S.rangeOffset = o, S.rangeLength = l, S.rangeTag = t } -> dynamicUnpackRange o l t 42 | S.Array { S.arrayRef = a, S.arrayLength = l } -> dynamicUnpackArray m a l 43 | S.Vector { S.vectorRef = v, S.vectorLength = l, S.vectorTag = t } -> dynamicUnpackVector m v l t 44 | S.Enumeration { S.enumerationValues = vs, S.enumerationTag = t } -> dynamicUnpackEnumeration vs t 45 | S.Record { S.recordFields = fs } -> dynamicUnpackRecord m fs 46 | S.Combination { S.combinationFields = fs, S.combinationTag = t } -> dynamicUnpackCombination m fs t 47 | S.Union { S.unionFields = fs, S.unionTag = t } -> dynamicUnpackUnion m fs t 48 | 49 | dynamicUnpackPrim :: C.Prim -> Get CautDetails 50 | dynamicUnpackPrim b = 51 | let b' = 52 | case b of 53 | C.PU8 -> liftM PDu8 getWord8 54 | C.PU16 -> liftM PDu16 getWord16le 55 | C.PU32 -> liftM PDu32 getWord32le 56 | C.PU64 -> liftM PDu64 getWord64le 57 | C.PS8 -> liftM (PDs8 . fromIntegral) getWord8 58 | C.PS16 -> liftM (PDs16 . fromIntegral) getWord16le 59 | C.PS32 -> liftM (PDs32 . fromIntegral) getWord32le 60 | C.PS64 -> liftM (PDs64 . fromIntegral) getWord64le 61 | C.PF32 -> liftM PDf32 getFloat32le 62 | C.PF64 -> liftM PDf64 getFloat64le 63 | C.PBool -> do 64 | w8 <- getWord8 65 | case w8 of 66 | 0 -> return $ PDbool False 67 | 1 -> return $ PDbool True 68 | x -> fail $ "unexpected value for boolean: " ++ show x 69 | in liftM CDPrim b' 70 | 71 | dynamicUnpackSynonym :: TyMap -> C.Identifier -> Get CautDetails 72 | dynamicUnpackSynonym m i = liftM CDSynonym (dynamicUnpackDetails m i) 73 | 74 | dynamicUnpackRange :: C.Offset -> C.Length -> C.Tag -> Get CautDetails 75 | dynamicUnpackRange o l t = do 76 | tag <- unpackTag t 77 | if fromIntegral tag > l 78 | then throw $ RangeDecodeOutOfBounds o l tag 79 | else return $ CDRange (tag + (fromIntegral o)) 80 | 81 | dynamicUnpackArray :: TyMap -> C.Identifier -> C.Length -> Get CautDetails 82 | dynamicUnpackArray m r l = 83 | liftM CDArray $ replicateM (fromIntegral l) getter 84 | where 85 | getter = dynamicUnpackDetails m r 86 | 87 | dynamicUnpackVector :: TyMap -> C.Identifier -> C.Length -> C.Tag -> Get CautDetails 88 | dynamicUnpackVector m r maxLen t = do 89 | len <- unpackTag t 90 | if len > fromIntegral maxLen 91 | then fail $ "vector length out of bounds: " ++ show len ++ " > " ++ show maxLen 92 | else liftM CDVector $ replicateM (fromIntegral len) getter 93 | where 94 | getter = dynamicUnpackDetails m r 95 | 96 | dynamicUnpackEnumeration :: [S.EnumVal] -> C.Tag -> Get CautDetails 97 | dynamicUnpackEnumeration [] _ = error "dynamicUnpackEnumeration: enumerations must have at least one value!" 98 | dynamicUnpackEnumeration vs t = do 99 | valIx <- unpackTag t 100 | if valIx > maxValIx 101 | then fail $ "enumeration tag out of bounds: " ++ show valIx ++ " > " ++ show maxValIx 102 | else return (CDEnumeration (S.enumValName (ixToVal valIx))) 103 | where 104 | maxValIx = S.enumValIndex (last vs) 105 | 106 | ixToVal :: Integer -> S.EnumVal 107 | ixToVal ix = 108 | let e = error "dynamicUnpackEnumeration: SHOULD NEVER HAPPEN. Tag not a val." 109 | in fromMaybe e (ix `M.lookup` ixMap) 110 | ixMap = M.fromList $ zip (map S.enumValIndex vs) vs 111 | 112 | dynamicUnpackRecord :: TyMap -> [S.Field] -> Get CautDetails 113 | dynamicUnpackRecord m fs = 114 | liftM (CDRecord . M.fromList) $ mapM (unpackField m) fs 115 | 116 | dynamicUnpackCombination :: TyMap -> [S.Field] -> C.Tag -> Get CautDetails 117 | dynamicUnpackCombination m fs t = do 118 | flags <- unpackTag t 119 | liftM (CDCombination . M.fromList) $ mapM (unpackField m) (setFields flags) 120 | where 121 | setFields flags = filter (\f -> flags `testBit` (fromIntegral . S.fieldIndex $ f)) fs 122 | 123 | dynamicUnpackUnion :: TyMap -> [S.Field] -> C.Tag -> Get CautDetails 124 | dynamicUnpackUnion m fs t = do 125 | tag <- liftM fromIntegral $ unpackTag t 126 | case tag `M.lookup` fm of 127 | Nothing -> fail $ "invalid union tag: " ++ show tag 128 | Just f -> do 129 | (n, d) <- unpackField m f 130 | return CDUnion { cdUnionFieldName = n, cdUnionFieldDetails = d } 131 | where 132 | fm = fieldsToIndexMap fs 133 | 134 | unpackField :: TyMap -> S.Field -> Get (C.Identifier, FieldValue) 135 | unpackField _ (S.EmptyField { S.fieldName = n }) = return (n, EmptyField) 136 | unpackField m (S.DataField { S.fieldName = n, S.fieldRef = r }) = 137 | liftM (\d -> (n, DataField d)) (dynamicUnpackDetails m r) 138 | 139 | unpackTag :: C.Tag -> Get Integer 140 | unpackTag C.T1 = liftM fromIntegral getWord8 141 | unpackTag C.T2 = liftM fromIntegral getWord16le 142 | unpackTag C.T4 = liftM fromIntegral getWord32le 143 | unpackTag C.T8 = liftM fromIntegral getWord64le 144 | -------------------------------------------------------------------------------- /src/Cauterize/Generate.hs: -------------------------------------------------------------------------------- 1 | {-# LANGUAGE OverloadedStrings #-} 2 | module Cauterize.Generate 3 | ( generateSchema 4 | , generateSchemaWith 5 | 6 | , PrototypeVariant(..) 7 | 8 | , defaultMaximumTypes 9 | , defaultMaximumSize 10 | , defaultAllowedPrototypes 11 | , defaultMargin 12 | ) where 13 | 14 | import Cauterize.CommonTypes 15 | import Control.Monad 16 | import Test.QuickCheck.Gen 17 | import Test.QuickCheck.Arbitrary 18 | import Data.Word 19 | import Data.Int 20 | import qualified Cauterize.Schema as Schema 21 | import qualified Cauterize.Specification as Spec 22 | import qualified Data.Text as T 23 | 24 | data PrototypeVariant 25 | = PVSynonym 26 | | PVRange 27 | | PVArray 28 | | PVVector 29 | | PVEnumeration 30 | | PVRecord 31 | | PVCombination 32 | | PVUnion 33 | deriving (Show, Eq, Ord, Enum, Bounded) 34 | 35 | generateSchema :: IO Schema.Schema 36 | generateSchema = generateSchemaWith defaultMaximumTypes defaultMaximumSize defaultMargin defaultAllowedPrototypes 37 | 38 | generateSchemaWith :: Integer -> Integer -> Double -> [PrototypeVariant] -> IO Schema.Schema 39 | generateSchemaWith maximumTypes maximumSize margin allowedPrototypes = generate $ generateSchemaWith' maximumTypes maximumSize margin allowedPrototypes 40 | 41 | generateSchemaWith' :: Integer -> Integer -> Double -> [PrototypeVariant] -> Gen Schema.Schema 42 | generateSchemaWith' maximumTypes maximumSize margin allowedPrototypes = 43 | liftM3 Schema.Schema (elements schemaNames) 44 | (elements schemaNames) 45 | (go maximumTypes maximumSize allNames []) 46 | where 47 | -- Returns the size of the schema 48 | size :: [Schema.Type] -> Integer 49 | size ts = let s = Spec.specSize . Spec.mkSpecification $ Schema.Schema "temp_schema" "0" ts 50 | in sizeMax s 51 | 52 | go :: Integer -> Integer -> [Identifier] -> [Schema.Type] -> Gen [Schema.Type] 53 | go _ _ [] _ = error "Ran out of names!" 54 | go tCount remSz (n:names) s 55 | | tCount <= 0 = return s 56 | | remSz <= 0 = return s 57 | | otherwise = do 58 | t <- genVariant s allowedPrototypes n 59 | 60 | let ts = t:s 61 | oldSz = size s 62 | sz = size (t:s) 63 | 64 | if sz <= maximumSize 65 | then go (tCount - 1) (maximumSize - sz) names ts 66 | else if let os = fromIntegral oldSz 67 | ms = fromIntegral maximumSize 68 | in (os / ms) > margin 69 | then return s -- if we're within 10% of the size, give up and return what we have 70 | else go tCount maximumSize (n:names) s 71 | 72 | genVariant :: [Schema.Type] -> [PrototypeVariant] -> Identifier -> Gen Schema.Type 73 | genVariant _ [] _ = error "Must specify at least one prototype variant." 74 | genVariant existingTypes variants name = do 75 | v <- elements variants 76 | case v of 77 | PVSynonym -> genSynonym 78 | PVRange -> genRange 79 | PVArray -> genArray 80 | PVVector -> genVector 81 | PVEnumeration -> genEnumeration 82 | PVRecord -> genRecord 83 | PVCombination -> genCombination 84 | PVUnion -> genUnion 85 | where 86 | twrap = Schema.Type name 87 | existingNames = allPrimNames ++ map Schema.typeName existingTypes 88 | genSynonym = liftM (twrap . Schema.Synonym) (elements existingNames) 89 | genArray = liftM2 (\t l -> twrap (Schema.Array t (fromIntegral l))) (elements existingNames) arbLength 90 | genVector = liftM2 (\t l -> twrap (Schema.Vector t (fromIntegral l))) (elements existingNames) arbLength 91 | genRecord = liftM (twrap . Schema.Record) (genFieldsWithoutEmpty existingNames) 92 | genCombination = liftM (twrap . Schema.Combination) (genFields existingNames) 93 | genUnion = liftM (twrap . Schema.Union) (genFields existingNames) 94 | genEnumeration = liftM (twrap . Schema.Enumeration) genEnumValues 95 | genRange = do 96 | o <- arbitrary 97 | 98 | let op1 = fromIntegral o + 1 :: Integer 99 | let mbi = fromIntegral (maxBound :: Int64) :: Integer 100 | let mbw = fromIntegral (maxBound :: Word64) :: Integer 101 | 102 | r <- if o < 0 103 | then liftM fromIntegral (choose (op1, mbi)) 104 | else liftM fromIntegral (choose (op1, mbw)) 105 | 106 | return $ twrap (Schema.Range o r) 107 | 108 | defaultMaximumTypes :: Integer 109 | defaultMaximumTypes = 10 110 | 111 | defaultMaximumSize :: Integer 112 | defaultMaximumSize = 1000 113 | 114 | defaultAllowedPrototypes :: [PrototypeVariant] 115 | defaultAllowedPrototypes = [minBound..maxBound] 116 | 117 | -- The margin to use as "close enough". Once our schema size is this proportion 118 | -- of the maximum size, we can give up. This keeps things from running for an 119 | -- absurdly long time as it seeks for an exact fit. 120 | defaultMargin :: Double 121 | defaultMargin = 0.9 122 | 123 | -- A few functions for generating then names we'll use throughout this 124 | -- generation process. 125 | schemaNames :: [T.Text] 126 | schemaNames = map unIdentifier $ take 100 allNames 127 | 128 | allNames :: [Identifier] 129 | allNames = let syms = ["a","e","i","o","u","y"] 130 | in map (unsafeMkIdentifier . concat) $ sequences syms 131 | 132 | sequences :: [a] -> [[a]] 133 | sequences ls = ls' ++ [i ++ [a] | i <- sequences ls, a <- ls] 134 | where 135 | ls' = map (:[]) ls 136 | 137 | -- This helps pick a size for a lengthed type like an array or vector. It favors shorter sizes. 138 | arbLength :: Gen Integer 139 | arbLength = frequency [ (1000, choose (pow0, pow8 - 1)) 140 | , (100, choose (pow8, pow16 - 1)) 141 | , (10, choose (pow16, pow32 - 1)) 142 | , (1, choose (pow32, pow64 - 1)) 143 | ] 144 | where 145 | pow0 = (2 :: Integer)^(0 :: Integer) 146 | pow8 = (2 :: Integer)^(8 :: Integer) 147 | pow16 = (2 :: Integer)^(16 :: Integer) 148 | pow32 = (2 :: Integer)^(32 :: Integer) 149 | pow64 = (2 :: Integer)^(64 :: Integer) 150 | 151 | -- This is used to build a set of fields for the types that use a field set. 152 | genFields_ :: Gen (Identifier -> c) -> Gen [c] 153 | genFields_ cstorGen = do 154 | count <- fieldCount 155 | fieldCstors <- replicateM count cstorGen 156 | let withNames = zipWith ($) fieldCstors allNames 157 | return withNames 158 | where 159 | fieldCount = let ixs = [1..64] 160 | freqs = reverse ixs 161 | gens = map return ixs 162 | in frequency $ zip freqs gens 163 | 164 | genFields :: [Identifier] -> Gen [Schema.Field] 165 | genFields ts = 166 | genFields_ $ frequency [(1, liftM (flip Schema.DataField) (elements ts)) 167 | ,(1, return Schema.EmptyField) 168 | ] 169 | 170 | -- TODO: Use shuffle here when we can move to QuickCheck >=2.8 171 | genEnumValues :: Gen [Identifier] 172 | genEnumValues = sized (\s -> let s' | s <= 0 = 1 173 | | otherwise = s 174 | in return $ take s' allNames) 175 | 176 | genFieldsWithoutEmpty :: [Identifier] -> Gen [Schema.Field] 177 | genFieldsWithoutEmpty ts = genFields_ $ liftM (flip Schema.DataField) (elements ts) 178 | -------------------------------------------------------------------------------- /src/Cauterize/Hash.hs: -------------------------------------------------------------------------------- 1 | {-# LANGUAGE OverloadedStrings #-} 2 | module Cauterize.Hash 3 | ( Hash(unHash) 4 | , mkHash 5 | , mkHashFromHexString 6 | , hashToHex 7 | , hashToBytes 8 | , hashNull 9 | ) where 10 | 11 | import Numeric 12 | import Data.Word (Word8) 13 | import Data.Version 14 | import qualified Crypto.Hash.SHA1 as SHA1 15 | import qualified Data.ByteString as B 16 | import qualified Data.Text as T 17 | import qualified Data.Text.Encoding as T 18 | 19 | import qualified Paths_cauterize as C (version) 20 | 21 | newtype Hash = Hash { unHash :: B.ByteString } 22 | deriving (Eq, Ord) 23 | 24 | mkHash :: T.Text -> Hash 25 | mkHash t = let (Version [vmaj, vmin, _, _] _) = C.version 26 | svmaj = T.pack . show $ vmaj 27 | svmin = T.pack . show $ vmin 28 | t' = T.concat ["{", svmaj, ".", svmin, "|", t, "}"] 29 | h = SHA1.init `SHA1.update` T.encodeUtf8 t' 30 | in Hash (SHA1.finalize h) 31 | 32 | mkHashFromHexString :: T.Text -> Hash 33 | mkHashFromHexString t | T.length t == 40 && (`elem` validChars) `T.all` t = go 34 | | otherwise = error $ "Unable to create hash. String wrong length (" 35 | ++ show (T.length t) 36 | ++ "): " 37 | ++ T.unpack t 38 | where 39 | validChars = ['0'..'9'] ++ ['a'..'f'] ++ ['A'..'F'] 40 | go = Hash . B.pack $ map (fromHex . T.unpack) (T.chunksOf 2 t) 41 | fromHex hbyte = case readHex hbyte of 42 | [(n,"")] -> n 43 | _ -> error $ "mkHashFromHexString: Unable to read hex byte '" ++ hbyte ++ "'." 44 | 45 | 46 | hashToHex :: Hash -> T.Text 47 | hashToHex (Hash bs) = T.concat $ map showByte (B.unpack bs) 48 | where 49 | showByte b = case showHex b "" of 50 | [x,y] -> T.pack [x, y] 51 | [x] -> T.pack ['0', x] 52 | _ -> error "hashToHex: This should be impossible." 53 | 54 | hashToBytes :: Hash -> [Word8] 55 | hashToBytes (Hash b) = B.unpack b 56 | 57 | hashNull :: Hash 58 | hashNull = Hash $ B.pack (replicate 20 0) 59 | 60 | instance Show Hash where 61 | show h = "SHA1:" ++ T.unpack (hashToHex h) 62 | -------------------------------------------------------------------------------- /src/Cauterize/Schema.hs: -------------------------------------------------------------------------------- 1 | module Cauterize.Schema (module X) where 2 | 3 | import Cauterize.Schema.Parser as X 4 | import Cauterize.Schema.Types as X 5 | -------------------------------------------------------------------------------- /src/Cauterize/Schema/Checker.hs: -------------------------------------------------------------------------------- 1 | module Cauterize.Schema.Checker 2 | ( CheckedSchema 3 | , checkSchema 4 | ) where 5 | 6 | import Data.Graph 7 | import Data.Int 8 | import Data.List (intersect, group, sort) 9 | import Data.Maybe 10 | import Data.Word 11 | 12 | import Cauterize.Schema.Types 13 | import Cauterize.CommonTypes 14 | import Cauterize.Schema.Util 15 | 16 | data CheckedSchema = CheckedSchema Schema 17 | deriving (Show) 18 | 19 | instance IsSchema CheckedSchema where 20 | getSchema (CheckedSchema s) = s 21 | 22 | {- 23 | - Checks to perform: 24 | - 25 | - * duplicate names 26 | - * reuse of primitive names 27 | - * cycles 28 | - * reference to non-existent types 29 | - * duplicate field names 30 | - * duplicate enumeration values 31 | - * inexpressible range type 32 | - ! ensure Record prototypes do not contain empty fields 33 | - ! ensure schemas have at least one type 34 | - ! ensure schema name conforms to C identifier rules 35 | - ! ensure Combinations do not have more than 64 fields 36 | - ! ensure that Records/Combinations/Unions/Enumerations all have at least 37 | - one field/value 38 | -} 39 | 40 | checkSchema :: Schema -> Either String CheckedSchema 41 | checkSchema s = 42 | case errs of 43 | [] -> Right (CheckedSchema s) 44 | _ -> Left (unlines errs) 45 | where 46 | errs = mapMaybe ($ s) checkers 47 | checkers = 48 | [ checkDuplicateNames 49 | , checkReusePrimitiveNames 50 | , checkCycles 51 | , checkReferenceToNonExistentType 52 | , checkForDuplicateFieldNames 53 | , checkForDuplicateEnumValues 54 | , checkForInexpressibleRange 55 | ] 56 | 57 | checkDuplicateNames :: Schema -> Maybe String 58 | checkDuplicateNames s = 59 | case duplicates (schemaTypeNames s) of 60 | [] -> Nothing 61 | ds -> Just ("Duplicate type names found: " ++ show ds) 62 | 63 | checkReusePrimitiveNames :: Schema -> Maybe String 64 | checkReusePrimitiveNames s = 65 | case schemaTypeNames s `intersect` allPrimNames of 66 | [] -> Nothing 67 | ns -> Just ("Reuse of primitive names found: " ++ show ns) 68 | 69 | checkCycles :: Schema -> Maybe String 70 | checkCycles s = 71 | case mapMaybe isScc nodesscc of 72 | [] -> Nothing 73 | cs -> Just ("Found referential cycles: " ++ show cs) 74 | where 75 | nodes = map (\t -> (typeName t, typeName t, typeReferences t)) (schemaTypes s) 76 | nodesscc = stronglyConnComp nodes 77 | 78 | isScc (CyclicSCC vs) = Just vs 79 | isScc _ = Nothing 80 | 81 | checkReferenceToNonExistentType :: Schema -> Maybe String 82 | checkReferenceToNonExistentType s = 83 | case mapMaybe checkType (schemaTypes s) of 84 | [] -> Nothing 85 | es -> Just ("Found invalid references: " ++ show es) 86 | where 87 | ts = schemaTypeNames s ++ allPrimNames 88 | checkType t = 89 | let rs = typeReferences t 90 | in case mapMaybe (\r -> if r `elem` ts then Nothing else Just r) rs of 91 | [] -> Nothing 92 | x -> Just (typeName t, x) 93 | 94 | checkForDuplicateFieldNames :: Schema -> Maybe String 95 | checkForDuplicateFieldNames s = 96 | case mapMaybe checkType (schemaTypes s) of 97 | [] -> Nothing 98 | es -> Just ("Found duplicate field names: " ++ show es) 99 | where 100 | checkFields n fs = 101 | let ns = map fieldName fs 102 | in case duplicates ns of 103 | [] -> Nothing 104 | ds -> Just (n, ds) 105 | 106 | checkType (Type n (Record fs)) = checkFields n fs 107 | checkType (Type n (Combination fs)) = checkFields n fs 108 | checkType (Type n (Union fs)) = checkFields n fs 109 | checkType _ = Nothing 110 | 111 | checkForDuplicateEnumValues :: Schema -> Maybe String 112 | checkForDuplicateEnumValues s = 113 | case mapMaybe checkType (schemaTypes s) of 114 | [] -> Nothing 115 | es -> Just ("Found duplicate enumeration values: " ++ show es) 116 | where 117 | checkType (Type n (Enumeration vs)) = 118 | case duplicates vs of 119 | [] -> Nothing 120 | ds -> Just (n, ds) 121 | checkType _ = Nothing 122 | 123 | {- 124 | - There are some ranges that cannot be represented in a single 64 bit word 125 | - that can be represented in the Cauterize schema. Make sure we catch these. 126 | - 127 | - If the range has a negative offset, then we must be sure that the maximum 128 | - value of the range fits within at least (maxBound :: Int64). 129 | -} 130 | checkForInexpressibleRange :: Schema -> Maybe String 131 | checkForInexpressibleRange s = 132 | case mapMaybe checkType (schemaTypes s) of 133 | [] -> Nothing 134 | es -> Just ("Cannot express range type: " ++ show es) 135 | where 136 | signedMax = fromIntegral (maxBound :: Int64) :: Word64 137 | checkType (Type n (Range ofst size)) | (ofst < 0) && ((fromIntegral ofst + size) > signedMax) 138 | = Just (n, (ofst,size)) 139 | checkType _ = Nothing 140 | 141 | duplicates :: Ord b => [b] -> [b] 142 | duplicates ls = map (!! 1) $ filter (\l -> 1 < length l) $ group . sort $ ls 143 | -------------------------------------------------------------------------------- /src/Cauterize/Schema/Parser.hs: -------------------------------------------------------------------------------- 1 | {-# LANGUAGE OverloadedStrings, PatternSynonyms #-} 2 | module Cauterize.Schema.Parser 3 | ( parseSchema 4 | , parseSchemaFromFile 5 | , formatSchema 6 | , toType 7 | , pAtom 8 | , Atom(..) 9 | ) where 10 | 11 | import Control.Monad 12 | import Data.SCargot 13 | import Data.SCargot.Repr.WellFormed 14 | import Data.SCargot.Comments 15 | import Data.Text (Text, pack, unpack) 16 | import qualified Data.Text as T 17 | import qualified Data.Text.IO as T 18 | import Text.Parsec 19 | import Text.Parsec.Text 20 | 21 | import Cauterize.Schema.Types 22 | import Cauterize.CommonTypes 23 | 24 | defaultSchema :: Schema 25 | defaultSchema = Schema 26 | { schemaName = "schema" 27 | , schemaVersion = "0.0.0.0" 28 | , schemaTypes = [] 29 | } 30 | 31 | data Atom 32 | = Number Integer 33 | | Ident Text 34 | | Str Text 35 | deriving (Show) 36 | 37 | data Component 38 | = Name Text 39 | | Version Text 40 | | TypeDef Type 41 | deriving (Show) 42 | 43 | pAtom :: Parser Atom 44 | pAtom = try pString <|> pNumber <|> pIdent 45 | where 46 | pString = do 47 | _ <- char '"' 48 | s <- many $ noneOf "\"" 49 | _ <- char '"' 50 | (return . Str . pack) s 51 | pNumber = do 52 | sign <- option '+' (oneOf "-+") 53 | v <- fmap (read :: String -> Integer) (many1 digit) 54 | let v' = if sign == '-' 55 | then (-1) * v 56 | else v 57 | return (Number v') 58 | pIdent = do 59 | f <- oneOf ['a'..'z'] 60 | r <- many $ oneOf ['a'..'z'] <|> digit <|> char '_' 61 | (return . Ident . pack) (f:r) 62 | 63 | sAtom :: Atom -> Text 64 | sAtom (Ident t) = t 65 | sAtom (Number n) = pack (show n) 66 | sAtom (Str v) = T.concat ["\"", v, "\""] 67 | 68 | toComponent :: WellFormedSExpr Atom -> Either String Component 69 | toComponent = asList go 70 | where 71 | go [A (Ident "name"), A (Str name)] = Right (Name name) 72 | go [A (Ident "version"), A (Str version)] = Right (Version version) 73 | go (A (Ident "type") : rs ) = TypeDef <$> toType rs 74 | go (A (Ident x) : _ ) = Left ("Unhandled component: " ++ show x) 75 | go y = Left ("Not a component name: " ++ show y) 76 | 77 | pattern AI :: Text -> WellFormedSExpr Atom 78 | pattern AI x = A (Ident x) 79 | 80 | pattern AN :: Integer -> WellFormedSExpr Atom 81 | pattern AN x = A (Number x) 82 | 83 | toType :: [WellFormedSExpr Atom] -> Either String Type 84 | toType [] = Left "Empty type expression." 85 | toType [_] = Left "Type expression without a prototype." 86 | toType (AI name:AI tproto:tbody) = 87 | case tproto of 88 | "synonym" -> toSynonym tbody 89 | "range" -> toRange tbody 90 | "array" -> toArray tbody 91 | "vector" -> toVector tbody 92 | "enumeration" -> toEnumeration tbody 93 | "record" -> toRecord tbody 94 | "combination" -> toCombination tbody 95 | "union" -> toUnion tbody 96 | x -> Left ("Invalid prototype: " ++ show x) 97 | where 98 | umi = unsafeMkIdentifier . unpack 99 | mkTD n t = Right (Type (umi n) t) 100 | 101 | toField (L [AI "field", AI fname, AI ref]) = 102 | Right $ DataField (umi fname) (umi ref) 103 | toField (L [AI "empty", AI fname]) = 104 | Right $ EmptyField (umi fname) 105 | toField x = Left ("Unexpected field body: " ++ show x) 106 | 107 | toSynonym [AI ref] = 108 | mkTD name (Synonym (umi ref)) 109 | toSynonym x = Left ("Unexpected synonym body: " ++ show x) 110 | 111 | toRange [AN rmin, AN rmax] = 112 | mkTD name (Range (fromIntegral rmin) (fromIntegral (rmax - rmin))) 113 | toRange x = Left ("Unexpected range body: " ++ show x) 114 | 115 | toArray [AI ref, AN size] 116 | | size < 0 = Left ("Size must be positive: " ++ show size) 117 | | otherwise = mkTD name (Array (umi ref) (fromIntegral size)) 118 | toArray x = Left ("Unexpected array body: " ++ show x) 119 | 120 | toVector [AI ref, AN size] 121 | | size < 0 = Left ("Size must be positive: " ++ show size) 122 | | otherwise = mkTD name (Vector (umi ref) (fromIntegral size)) 123 | toVector x = Left ("Unexpected vector body: " ++ show x) 124 | 125 | toEnumeration [L (AI "values":vs)] = do 126 | let toValue (AI val) = Right (umi val) 127 | toValue x = Left ("Unexpected value: " ++ show x) 128 | vs' <- mapM toValue vs 129 | mkTD name (Enumeration vs') 130 | toEnumeration x = Left ("Unexpected enumeration body: " ++ show x) 131 | 132 | toRecord [L (AI "fields":fs)] = do 133 | fs' <- mapM toField fs 134 | mkTD name (Record fs') 135 | toRecord x = Left ("Unexpected record body: " ++ show x) 136 | 137 | toCombination [L (AI "fields":fs)] = do 138 | fs' <- mapM toField fs 139 | mkTD name (Combination fs') 140 | toCombination x = Left ("Unexpected combination body: " ++ show x) 141 | 142 | toUnion [L (AI "fields":fs)] = do 143 | fs' <- mapM toField fs 144 | mkTD name (Union fs') 145 | toUnion x = Left ("Unexpected union body: " ++ show x) 146 | toType _ = Left "Unexpected atom types." 147 | 148 | fromType :: Type -> WellFormedSExpr Atom 149 | fromType (Type n d) = L (A (Ident "type") : rest) 150 | where 151 | na = A (Ident (unIdentifier n)) 152 | ai = A . Ident 153 | aiu = A . Ident . unIdentifier 154 | an = A . Number 155 | 156 | fromField (DataField fn fr) = L [ai "field", aiu fn, aiu fr] 157 | fromField (EmptyField fn) = L [ai "empty", aiu fn] 158 | 159 | rest = 160 | case d of 161 | Synonym { synonymRef = r } -> 162 | [na, ai "synonym", aiu r] 163 | Range { rangeOffset = o, rangeLength = l } -> 164 | [na, ai "range", an (fromIntegral o), an (fromIntegral o + fromIntegral l)] 165 | Array { arrayRef = r, arrayLength = l } -> 166 | [na, ai "array", aiu r, an (fromIntegral l)] 167 | Vector { vectorRef = r, vectorLength = l } -> 168 | [na, ai "vector", aiu r, an (fromIntegral l)] 169 | Enumeration { enumerationValues = vs } -> 170 | [na, ai "enumeration", L (ai "values" : map aiu vs)] 171 | Record { recordFields = fs } -> 172 | [na, ai "record", L (ai "fields" : map fromField fs)] 173 | Combination { combinationFields = fs } -> 174 | [na, ai "combination", L (ai "fields" : map fromField fs)] 175 | Union { unionFields = fs } -> 176 | [na, ai "union", L (ai "fields" : map fromField fs)] 177 | 178 | fromComponent :: Component -> WellFormedSExpr Atom 179 | fromComponent c = 180 | case c of 181 | (Name n) -> L [ ident "name", A . Str $ n ] 182 | (Version v) -> L [ ident "version", A . Str $ v ] 183 | (TypeDef t) -> fromType t 184 | where 185 | ident = A . Ident . pack 186 | 187 | cauterizeParser :: SExprParser Atom Component 188 | cauterizeParser = setCarrier toComponent $ withLispComments $ asWellFormed $ mkParser pAtom 189 | 190 | componentsToSchema :: [Component] -> Schema 191 | componentsToSchema = foldl go defaultSchema 192 | where 193 | go s (Name n) = s { schemaName = n } 194 | go s (Version v) = s { schemaVersion = v } 195 | go s (TypeDef t) = let ts = schemaTypes s 196 | in s { schemaTypes = t:ts } 197 | 198 | schemaToComponents :: Schema -> [Component] 199 | schemaToComponents s = 200 | Name (schemaName s) 201 | : Version (schemaVersion s) 202 | : map TypeDef (schemaTypes s) 203 | 204 | parseSchema :: Text -> Either String Schema 205 | parseSchema t = componentsToSchema `fmap` decode cauterizeParser t 206 | 207 | formatSchema :: IsSchema a => a -> Text 208 | formatSchema s = let pp = encodeOne (basicPrint sAtom) 209 | s' = map (pp . fromWellFormed . fromComponent) (schemaToComponents (getSchema s)) 210 | in T.unlines s' 211 | 212 | parseSchemaFromFile :: FilePath -> IO (Either String Schema) 213 | parseSchemaFromFile p = liftM parseSchema (T.readFile p) 214 | -------------------------------------------------------------------------------- /src/Cauterize/Schema/Types.hs: -------------------------------------------------------------------------------- 1 | module Cauterize.Schema.Types 2 | ( Schema(..) 3 | , Type(..) 4 | , TypeDesc(..) 5 | , Field(..) 6 | , Offset 7 | , Length 8 | , IsSchema(..) 9 | ) where 10 | 11 | import Cauterize.CommonTypes 12 | import Data.Text (Text) 13 | 14 | data Schema = Schema 15 | { schemaName :: Text 16 | , schemaVersion :: Text 17 | , schemaTypes :: [Type] 18 | } deriving (Show, Eq) 19 | 20 | data Type = Type 21 | { typeName :: Identifier 22 | , typeDesc :: TypeDesc 23 | } deriving (Show, Eq) 24 | 25 | data TypeDesc 26 | = Synonym { synonymRef :: Identifier } 27 | | Range { rangeOffset :: Offset, rangeLength :: Length } 28 | | Array { arrayRef :: Identifier, arrayLength :: Length } 29 | | Vector { vectorRef :: Identifier, vectorLength :: Length } 30 | | Enumeration { enumerationValues :: [Identifier] } 31 | | Record { recordFields :: [Field] } 32 | | Combination { combinationFields :: [Field] } 33 | | Union { unionFields :: [Field] } 34 | deriving (Show, Eq) 35 | 36 | data Field 37 | = DataField { fieldName :: Identifier, fieldRef :: Identifier } 38 | | EmptyField { fieldName :: Identifier } 39 | deriving (Show, Eq) 40 | 41 | class IsSchema a where 42 | getSchema :: a -> Schema 43 | 44 | instance IsSchema Schema where 45 | getSchema = id 46 | -------------------------------------------------------------------------------- /src/Cauterize/Schema/Util.hs: -------------------------------------------------------------------------------- 1 | {-# LANGUAGE OverloadedStrings #-} 2 | module Cauterize.Schema.Util 3 | ( schemaTypeNames 4 | , typeReferences 5 | , typeMap 6 | , typeHashMap 7 | , typeSizeMap 8 | , typeDepthMap 9 | , tagForType 10 | ) where 11 | 12 | import Cauterize.Schema.Types 13 | import Cauterize.Hash 14 | import Cauterize.CommonTypes 15 | import Data.Maybe 16 | import qualified Data.Map as M 17 | import qualified Data.Text as T 18 | 19 | schemaTypeNames :: IsSchema a => a -> [Identifier] 20 | schemaTypeNames s = 21 | let types = (schemaTypes . getSchema) s 22 | in map typeName types 23 | 24 | typeReferences :: Type -> [Identifier] 25 | typeReferences (Type _ d) = go d 26 | where 27 | go (Synonym r) = [r] 28 | go (Range _ _) = [] 29 | go (Array r _) = [r] 30 | go (Vector r _) = [r] 31 | go (Enumeration _) = [] 32 | go (Record fs) = concatMap fieldReference fs 33 | go (Combination fs) = concatMap fieldReference fs 34 | go (Union fs) = concatMap fieldReference fs 35 | 36 | fieldReference (DataField _ r) = [r] 37 | fieldReference (EmptyField _) = [] 38 | 39 | typeMap :: IsSchema a => a -> M.Map Identifier Type 40 | typeMap s = M.fromList pairs 41 | where 42 | ts = (schemaTypes . getSchema) s 43 | pairs = zip (map typeName ts) ts 44 | 45 | typeHashMap :: IsSchema a => a -> M.Map Identifier (T.Text,Hash) 46 | typeHashMap schema = th 47 | where 48 | s = getSchema schema 49 | tm = typeMap s 50 | th = primHashMap `M.union` fmap typeHash tm 51 | lu k = hashToHex . snd $ 52 | fromMaybe 53 | (error $ "typeHashMap: key '" ++ (T.unpack . unIdentifier) k ++ "' not in map") 54 | (k `M.lookup` th) 55 | 56 | typeHash (Type n d) = 57 | let n' = unIdentifier n 58 | fmt _ [] = error "typeHashMap: fmt must have some parts" 59 | fmt p parts = T.concat ["<",p,":",T.intercalate ":" parts,">"] 60 | 61 | fmtField (DataField fn fr) = T.concat ["[datafield:", unIdentifier fn, ":", unIdentifier fr, ":", lu fr, "]"] 62 | fmtField (EmptyField fn) = T.concat ["[emptyfield:", unIdentifier fn, "]"] 63 | 64 | hstr = 65 | case d of 66 | Synonym r -> fmt "synonym" [n',unIdentifier r,lu r] 67 | Range o l -> fmt "range" [n',showNumSigned o,showNumSigned l] 68 | Array r l -> fmt "array" [n',unIdentifier r,showNumSigned l, lu r] 69 | Vector r l -> fmt "vector" [n',unIdentifier r,showNumSigned l, lu r] 70 | Enumeration vs -> fmt "enumeration" (n':map unIdentifier vs) 71 | Record fs -> fmt "record" (n':map fmtField fs) 72 | Combination fs -> fmt "combination" (n':map fmtField fs) 73 | Union fs -> fmt "union" (n':map fmtField fs) 74 | in (hstr, mkHash hstr) 75 | 76 | typeSizeMap :: IsSchema a => a -> M.Map Identifier Size 77 | typeSizeMap schema = sm 78 | where 79 | s = getSchema schema 80 | tm = typeMap s 81 | sm = primSizeMap `M.union` fmap typeSize tm 82 | lu k = fromMaybe 83 | (error $ "typeSizeMap: key '" ++ (T.unpack . unIdentifier) k ++ "' not in map") 84 | (k `M.lookup` sm) 85 | typeSize t@(Type _ d) = 86 | case d of 87 | Synonym r -> lu r 88 | Range _ _ -> tagToSize (tagForType t) 89 | Array r l -> let sr = lu r 90 | smin = sizeMin sr 91 | smax = sizeMax sr 92 | l' = fromIntegral l 93 | in mkSize (l' * smin) (l' * smax) 94 | Vector r l -> let sr = lu r 95 | l' = fromIntegral l 96 | ts = sizeMax $ tagToSize (tagForType t) 97 | smax = sizeMax sr 98 | in mkSize ts (ts + (l' * smax)) 99 | Enumeration _ -> tagToSize $ tagForType t 100 | Record fs -> let fsizes = map fieldSize fs 101 | (mins,maxes) = unzip fsizes 102 | in mkSize (sum mins) (sum maxes) 103 | Combination fs -> let ts = sizeMax $ tagToSize $ tagForType t 104 | fsizes = map fieldSize fs 105 | (_,maxes) = unzip fsizes 106 | in mkSize ts (ts + sum maxes) 107 | Union fs -> let ts = sizeMax $ tagToSize $ tagForType t 108 | fsizes = map fieldSize fs 109 | (mins,maxes) = unzip fsizes 110 | in mkSize (ts + minimum mins) (ts + maximum maxes) 111 | 112 | -- In theory, empty fields shouldn't go in Records ever. But just in case 113 | -- we change our minds, let's handle that case any way. 114 | fieldSize (EmptyField _) = (0,0) 115 | fieldSize (DataField _ r) = let sz = lu r 116 | in (sizeMin sz, sizeMax sz) 117 | 118 | typeDepthMap :: IsSchema a => a -> M.Map Identifier Integer 119 | typeDepthMap schema = dm 120 | where 121 | s = getSchema schema 122 | tm = typeMap s 123 | dm = primDepthMap `M.union` fmap typeDepth tm 124 | lu n = fromJust $ n `M.lookup` dm 125 | 126 | typeDepth (Type _ d) = 127 | let rdepth = 128 | case d of 129 | Synonym r -> lu r 130 | Range _ _ -> 0 131 | Array r _ -> lu r 132 | Vector r _ -> lu r 133 | Enumeration _ -> 0 134 | Record fs -> maximum $ map fieldDepth fs 135 | Combination fs -> maximum $ map fieldDepth fs 136 | Union fs -> maximum $ map fieldDepth fs 137 | in 1 + rdepth 138 | 139 | fieldDepth (EmptyField _) = 0 140 | fieldDepth (DataField _ r) = lu r 141 | 142 | tagForType :: Type -> Tag 143 | tagForType (Type _ d) = 144 | case d of 145 | Synonym _ -> error "No tag for synonyms." 146 | Array _ _ -> error "No tag for array." 147 | Record _ -> error "No tag for records." 148 | Range _ l -> tagRequired l 149 | Vector _ l -> tagRequired l 150 | Combination fs -> tagForBits (length fs) 151 | 152 | -- For enumerations and unions, we add one to the length because 153 | -- we do not allow tags for these two prototypes with a value of 154 | -- 0. This is to avoid a common class of errors where a 155 | -- default-initialized struct/enum in C is 0. This helps catch 156 | -- cases where the user forgot to initialize the tag. 157 | Enumeration vs -> tagRequired (length vs + 1) 158 | Union fs -> tagRequired (length fs + 1) 159 | 160 | 161 | primHashMap :: M.Map Identifier (T.Text,Hash) 162 | primHashMap = M.fromList (zip allPrimNames vs) 163 | where 164 | ns = map unIdentifier allPrimNames 165 | hs = map mkHash ns 166 | vs = zip ns hs 167 | 168 | primSizeMap :: M.Map Identifier Size 169 | primSizeMap = M.fromList $ zip ns sz 170 | where 171 | ns = map primToText allPrims 172 | sz = map primToSize allPrims 173 | 174 | primDepthMap :: M.Map Identifier Integer 175 | primDepthMap = M.fromList $ zip ns ds 176 | where 177 | ns = map primToText allPrims 178 | ds = repeat 1 179 | 180 | showNumSigned :: (Ord a, Show a, Num a) => a -> T.Text 181 | showNumSigned v = let v' = abs v 182 | v'' = T.pack . show $ v' 183 | in if v < 0 184 | then '-' `T.cons` v'' 185 | else '+' `T.cons` v'' 186 | -------------------------------------------------------------------------------- /src/Cauterize/Specification.hs: -------------------------------------------------------------------------------- 1 | module Cauterize.Specification ( module X ) where 2 | 3 | import Cauterize.Specification.Types as X 4 | import Cauterize.Specification.Parser as X 5 | import Cauterize.Specification.Compile as X 6 | -------------------------------------------------------------------------------- /src/Cauterize/Specification/Compile.hs: -------------------------------------------------------------------------------- 1 | {-# LANGUAGE OverloadedStrings #-} 2 | module Cauterize.Specification.Compile 3 | ( mkSpecification 4 | ) where 5 | 6 | import Cauterize.CommonTypes 7 | import Cauterize.Hash 8 | import Cauterize.Specification.Types 9 | import Data.Graph 10 | import Data.Maybe 11 | import qualified Cauterize.Schema.Types as Schema 12 | import qualified Cauterize.Schema.Util as Schema 13 | import qualified Data.List as L 14 | import qualified Data.Map as M 15 | import qualified Data.Text as T 16 | 17 | mkSpecification :: Schema.IsSchema a => a -> Specification 18 | mkSpecification schema = compile s 19 | where 20 | s = Schema.getSchema schema 21 | 22 | compile :: Schema.Schema -> Specification 23 | compile s@(Schema.Schema schemaName schemaVersion schemaTypes) = Specification 24 | { specName = schemaName 25 | , specVersion = schemaVersion 26 | , specFingerprint = mkSpecHash 27 | , specSize = mkSpecSize 28 | , specDepth = mkSpecDepth 29 | , specTypeLength = mkSpecTypeTag 30 | , specLengthTag = mkSpecLengthTag 31 | , specTypes = topoSort schemaTypeMap $ map convertType schemaTypes 32 | } 33 | where 34 | schemaTypeMap = Schema.typeMap s 35 | schemaHashMap = Schema.typeHashMap s 36 | schemaSizeMap = Schema.typeSizeMap s 37 | schemaDepthMap = Schema.typeDepthMap s 38 | 39 | sortedTypeNames = L.sort $ map Schema.typeName schemaTypes 40 | sortedTypeHashes = map (\tn -> fromJust $ tn `M.lookup` schemaHashMap) sortedTypeNames 41 | 42 | -- 43 | mkSpecHash = 44 | let hstr = "<" `T.append` T.intercalate ":" (schemaName : schemaVersion : map (hashToHex . snd) sortedTypeHashes) `T.append` ">" 45 | in mkHash hstr 46 | 47 | mkSpecSize = let sizes = map snd $ M.toList schemaSizeMap 48 | mins = map sizeMin sizes 49 | maxes = map sizeMax sizes 50 | in mkSize (minimum mins) (maximum maxes) 51 | 52 | mkSpecDepth = maximum (map snd $ M.toList schemaDepthMap) 53 | 54 | mkSpecLengthTag = tagRequired (sizeMax mkSpecSize) 55 | 56 | mkSpecTypeTag = let prefixes = uniquePrefixes $ map (hashToBytes . snd . snd) (M.toList schemaHashMap) 57 | in case prefixes of 58 | Just (p:_) -> fromIntegral $ length p 59 | _ -> error "Need at least one prefix to determine a prefix length" 60 | 61 | luh k = fromMaybe 62 | (error $ "compile: key '" ++ (T.unpack . unIdentifier) k ++ "' not in hash schemaHashMap") 63 | (k `M.lookup` schemaHashMap) 64 | lus k = fromMaybe 65 | (error $ "compile: key '" ++ (T.unpack . unIdentifier) k ++ "' not in hash schemaSizeMap") 66 | (k `M.lookup` schemaSizeMap) 67 | lud k = fromMaybe 68 | (error $ "compile: key '" ++ (T.unpack . unIdentifier) k ++ "' not in hash schemaDepthMap") 69 | (k `M.lookup` schemaDepthMap) 70 | 71 | convertType t@(Schema.Type sn d) = 72 | let tt = Schema.tagForType t 73 | (_,tha) = luh sn 74 | tsz = lus sn 75 | d' = case d of 76 | Schema.Synonym r -> Synonym r 77 | Schema.Range o l -> Range o l tt (primFittingAllInts [fromIntegral o, fromIntegral l + fromIntegral o]) 78 | Schema.Array r l -> Array r l 79 | Schema.Vector r l -> Vector r l tt 80 | -- Enumeration tags start with 1 rather than 0 to 81 | -- avoid having the default initialization in C be a 82 | -- meaningful value. 83 | Schema.Enumeration vs -> Enumeration (zipWith EnumVal vs [1..]) tt 84 | Schema.Record fs -> Record (convertFields1 fs) 85 | Schema.Combination fs -> Combination (convertFields0 fs) tt 86 | Schema.Union fs -> Union (convertFields1 fs) tt 87 | in Type { typeName = sn 88 | , typeFingerprint = tha 89 | , typeSize = tsz 90 | , typeDesc = d' 91 | , typeDepth = lud sn 92 | } 93 | 94 | convertFields0 = zipWith convertField [0 ..] 95 | 96 | -- Field indicies start with 1 to avoid having the default 97 | -- initializer in C be a meaningful value. Really only useful for 98 | -- Unions but used for records as well. 99 | convertFields1 = zipWith convertField [1 ..] 100 | 101 | convertField ix (Schema.EmptyField sn) = EmptyField sn ix 102 | convertField ix (Schema.DataField sn r) = DataField sn ix r 103 | 104 | topoSort :: M.Map Identifier Schema.Type -> [Type] -> [Type] 105 | topoSort m types = flattenSCCs . stronglyConnComp $ map mkNode types 106 | where 107 | lu k = fromMaybe 108 | (error $ "topoSort: key '" ++ (T.unpack . unIdentifier) k ++ "' not in schema type map") 109 | (k `M.lookup` m) 110 | mkNode t = let n = typeName t 111 | in (t, n, Schema.typeReferences $ lu n) 112 | 113 | uniquePrefixes :: Eq a => [[a]] -> Maybe [[a]] 114 | uniquePrefixes ls = let count = length ls 115 | in case dropWhile (\l -> length l < count) $ map L.nub $ L.transpose $ map L.inits ls of 116 | [] -> Nothing 117 | l -> (Just . head) l 118 | -------------------------------------------------------------------------------- /src/Cauterize/Specification/Parser.hs: -------------------------------------------------------------------------------- 1 | {-# LANGUAGE OverloadedStrings, PatternSynonyms #-} 2 | module Cauterize.Specification.Parser 3 | ( parseSpecification 4 | , parseSpecificationFromFile 5 | , formatSpecification 6 | ) where 7 | 8 | import Control.Monad 9 | import Data.SCargot 10 | import Data.SCargot.Repr.WellFormed 11 | import Data.Text (Text, pack, unpack) 12 | import qualified Data.Text as T 13 | import qualified Data.Text.IO as T 14 | import qualified Data.Map as M 15 | import Text.Parsec 16 | import Text.Parsec.Text 17 | 18 | import Cauterize.Specification.Types 19 | import Cauterize.CommonTypes 20 | import qualified Cauterize.Hash as H 21 | 22 | defaultSpecification :: Specification 23 | defaultSpecification = Specification 24 | { specName = "specification" 25 | , specVersion = "0.0.0.0" 26 | , specFingerprint = H.hashNull 27 | , specSize = mkConstSize 1 28 | , specDepth = 0 29 | , specTypeLength = 0 30 | , specLengthTag = T1 31 | , specTypes = [] 32 | } 33 | 34 | data Atom 35 | = Number Integer 36 | | Ident Text 37 | | Str Text 38 | | Hash H.Hash 39 | | Tag Tag 40 | deriving (Show) 41 | 42 | data Component 43 | = Name Text 44 | | Version Text 45 | | Fingerprint H.Hash 46 | | SpecSize Size 47 | | Depth Integer 48 | | TypeLength Integer 49 | | LengthTag Tag 50 | | TypeDef Type 51 | deriving (Show) 52 | 53 | pIdentLeading :: Parser Char 54 | pIdentLeading = oneOf ['a'..'z'] 55 | 56 | pIdentTrailing :: Parser Char 57 | pIdentTrailing = oneOf ['a'..'z'] <|> digit <|> char '_' 58 | 59 | pAtom :: Parser Atom 60 | pAtom = try pString <|> try pTag <|> try pHash <|> pNumber <|> pIdent 61 | where 62 | pString = do 63 | _ <- char '"' 64 | s <- many $ noneOf "\"" 65 | _ <- char '"' 66 | (return . Str . pack) s 67 | pTag = do 68 | _ <- char 't' 69 | d <- oneOf "1248" 70 | return $ case d of 71 | '1' -> Tag T1 72 | '2' -> Tag T2 73 | '4' -> Tag T4 74 | '8' -> Tag T8 75 | _ -> error "pAtom: should never happen." 76 | pNumber = do 77 | sign <- option '+' (oneOf "-+") 78 | v <- fmap (read :: String -> Integer) (many1 digit) 79 | let v' = if sign == '-' 80 | then (-1) * v 81 | else v 82 | return (Number v') 83 | pIdent = do 84 | f <- pIdentLeading 85 | r <- many pIdentTrailing 86 | (return . Ident . pack) (f:r) 87 | pHash = let bytes = count 20 pHexByte 88 | htxt = liftM (pack . concat) bytes 89 | in liftM (Hash . H.mkHashFromHexString) htxt 90 | 91 | pHexByte = let pNibble = oneOf "abcdef0123456789" 92 | in count 2 pNibble 93 | 94 | sAtom :: Atom -> Text 95 | sAtom (Number n) = pack (show n) 96 | sAtom (Ident i) = i 97 | sAtom (Str s) = T.concat ["\"", s, "\""] 98 | sAtom (Hash h) = H.hashToHex h 99 | sAtom (Tag t) = unIdentifier $ tagToText t 100 | 101 | pattern AI :: Text -> WellFormedSExpr Atom 102 | pattern AI x = A (Ident x) 103 | pattern AN :: Integer -> WellFormedSExpr Atom 104 | pattern AN x = A (Number x) 105 | pattern AS :: Text -> WellFormedSExpr Atom 106 | pattern AS x = A (Str x) 107 | pattern AH :: H.Hash -> WellFormedSExpr Atom 108 | pattern AH x = A (Hash x) 109 | pattern AT :: Tag -> WellFormedSExpr Atom 110 | pattern AT x = A (Tag x) 111 | 112 | toComponent :: WellFormedSExpr Atom -> Either String Component 113 | toComponent = asList go 114 | where 115 | go [A (Ident "name"), AS name] = Right (Name name) 116 | go [A (Ident "version"), AS version] = Right (Version version) 117 | go [A (Ident "fingerprint"), AH h] = Right (Fingerprint h) 118 | go [A (Ident "size"), AN smin, AN smax] = Right (SpecSize $ mkSize smin smax) 119 | go [A (Ident "depth"), AN d] = Right (Depth d) 120 | go [A (Ident "typelength"), AN d] = Right (TypeLength d) 121 | go [A (Ident "lengthtag"), AT t] = Right (LengthTag t) 122 | 123 | go (A (Ident "type") : rs ) = toType rs 124 | 125 | go (A (Ident x) : _ ) = Left ("Unhandled component: " ++ show x) 126 | go y = Left ("Invalid component: " ++ show y) 127 | 128 | toType :: [WellFormedSExpr Atom] -> Either String Component 129 | toType [] = Left "Empty type expression." 130 | toType [_] = Left "Type expression without prototype." 131 | toType (AI name:AI tproto:tbody) = 132 | case tproto of 133 | "synonym" -> toSynonym tbody 134 | "range" -> toRange tbody 135 | "array" -> toArray tbody 136 | "vector" -> toVector tbody 137 | "enumeration" -> toEnumeration tbody 138 | "record" -> toRecord tbody 139 | "combination" -> toCombination tbody 140 | "union" -> toUnion tbody 141 | y -> Left ("Invalid type expression: " ++ show y) 142 | where 143 | umi = unsafeMkIdentifier . unpack 144 | 145 | mkTD :: Text 146 | -> WellFormedSExpr Atom -- Hash atom 147 | -> WellFormedSExpr Atom -- Size atom 148 | -> WellFormedSExpr Atom -- Depth atom 149 | -> TypeDesc 150 | -> Either String Component 151 | mkTD n f s d t = do 152 | f' <- toHash f 153 | s' <- toSize s 154 | d' <- toDepth d 155 | return (TypeDef $ Type (umi n) f' s' d' t) 156 | 157 | ueb p b = Left ("Unexpected " ++ p ++ " body: " ++ show b) 158 | 159 | toDepth (L [ AI "depth", AN d ]) = Right d 160 | toDepth x = ueb "depth" x 161 | 162 | toSize (L [ AI "size", AN smin, AN smax ]) = Right $ mkSize smin smax 163 | toSize x = ueb "size" x 164 | 165 | toHash (L [ AI "fingerprint", AH h]) = Right h 166 | toHash x = ueb "fingerprint" x 167 | 168 | toField (L [AI "field", AI fname, AN ix, AI ref]) = Right $ DataField (umi fname) ix (umi ref) 169 | toField (L [AI "empty", AI fname, AN ix]) = Right $ EmptyField (umi fname) ix 170 | toField x = ueb "field" x 171 | 172 | toSynonym [f, s, d, AI ref] = 173 | mkTD name f s d (Synonym $ umi ref) 174 | toSynonym x = ueb "synonym" x 175 | 176 | toRange [f, s, d, AN rmin, AN rmax, AT t, AI p] = 177 | case umi p `M.lookup` primMap of 178 | Nothing -> Left ("toRange: Not a primitive: '" ++ pstr ++ "'.") 179 | Just p' | p' `elem` badPrims -> Left ("toRange: Not a suitable primitve: '" ++ pstr ++ "'.") 180 | | otherwise -> mkTD name f s d (Range o l t p') 181 | where 182 | pstr = T.unpack p 183 | badPrims = [PBool, PF32, PF64] 184 | o = fromIntegral rmin 185 | l = fromIntegral rmax - fromIntegral rmin 186 | toRange x = ueb "range" x 187 | 188 | toArray [f, s, d, AI ref, AN l] = 189 | mkTD name f s d (Array (umi ref) (fromIntegral l)) 190 | toArray x = ueb "array" x 191 | 192 | toVector [f, s, d, AI ref, AN l, AT t] = 193 | mkTD name f s d (Vector (umi ref) (fromIntegral l) t) 194 | toVector x = ueb "vector" x 195 | 196 | toEnumeration [f, s, d, AT t, L (AI "values":vs)] = do 197 | vs' <- mapM toValue vs 198 | mkTD name f s d (Enumeration vs' t) 199 | where 200 | toValue (L [AI "value", AI n, AN ix]) = Right $ EnumVal (umi n) ix 201 | toValue x = ueb "value" x 202 | toEnumeration x = ueb "enumeration" x 203 | 204 | toRecord [f, s, d, L (AI "fields":fs)] = do 205 | fs' <- mapM toField fs 206 | mkTD name f s d (Record fs') 207 | toRecord x = ueb "record" x 208 | 209 | toCombination [f, s, d, AT t, L (AI "fields":fs)] = do 210 | fs' <- mapM toField fs 211 | mkTD name f s d (Combination fs' t) 212 | toCombination x = ueb "combination" x 213 | 214 | toUnion [f, s, d, AT t, L (AI "fields":fs)] = do 215 | fs' <- mapM toField fs 216 | mkTD name f s d (Union fs' t) 217 | toUnion x = ueb "union" x 218 | toType _ = Left "Unexpected atom types." 219 | 220 | fromComponent :: Component -> WellFormedSExpr Atom 221 | fromComponent c = 222 | case c of 223 | (Name n) -> L [ ident "name", A . Str $ n ] 224 | (Version v) -> L [ ident "version", A . Str $ v ] 225 | (Fingerprint h) -> L [ ident "fingerprint", hash h ] 226 | (SpecSize s) -> L [ ident "size", number (sizeMin s), number (sizeMax s) ] 227 | (Depth d) -> L [ ident "depth", number d ] 228 | (TypeLength l) -> L [ ident "typelength", number l ] 229 | (LengthTag t) -> L [ ident "lengthtag", tag t ] 230 | 231 | (TypeDef t) -> fromType t 232 | 233 | where 234 | ident = A . Ident 235 | hash = A . Hash 236 | number = A . Number 237 | tag = A . Tag 238 | 239 | fromType :: Type -> WellFormedSExpr Atom 240 | fromType (Type n f s depth desc) = L (A (Ident "type") : rest) 241 | where 242 | na = A (Ident (unIdentifier n)) 243 | fl = L [ ai "fingerprint", ah f ] 244 | sl = L [ ai "size", an (sizeMin s), an (sizeMax s) ] 245 | dp = L [ ai "depth", an depth ] 246 | 247 | aiu = A . Ident . unIdentifier 248 | ai = A . Ident 249 | an = A . Number 250 | ah = A . Hash 251 | at = A . Tag 252 | 253 | fromField (DataField fn ix fr) = L [ai "field", aiu fn, an ix, aiu fr] 254 | fromField (EmptyField fn ix) = L [ai "empty", aiu fn, an ix] 255 | 256 | fromEnumVal (EnumVal v i) = L [ ai "value", aiu v, an i ] 257 | 258 | rest = 259 | case desc of 260 | Synonym r -> [na, ai "synonym", fl, sl, dp, aiu r] 261 | Range o l t p -> 262 | let rmin = fromIntegral o 263 | rmax = (fromIntegral o + fromIntegral l) 264 | in [na, ai "range", fl, sl, dp, an rmin, an rmax, at t, ai (unIdentifier . primToText $ p)] 265 | Array r l -> [na, ai "array", fl, sl, dp, aiu r, an (fromIntegral l)] 266 | Vector r l t -> [na, ai "vector", fl, sl, dp, aiu r, an (fromIntegral l), at t] 267 | Enumeration vs t -> 268 | [na, ai "enumeration", fl, sl, dp, at t, L (ai "values" : map fromEnumVal vs)] 269 | Record fs -> 270 | [na, ai "record", fl, sl, dp, L (ai "fields" : map fromField fs)] 271 | Combination fs t -> 272 | [na, ai "combination", fl, sl, dp, at t, L (ai "fields" : map fromField fs)] 273 | Union fs t -> 274 | [na, ai "union", fl, sl, dp, at t, L (ai "fields" : map fromField fs)] 275 | 276 | cauterizeParser :: SExprParser Atom Component 277 | cauterizeParser = setCarrier toComponent $ asWellFormed $ mkParser pAtom 278 | 279 | componentsToSpec :: [Component] -> Specification 280 | componentsToSpec cs = spec { specTypes = reverse $ specTypes spec } 281 | where 282 | spec = foldl go defaultSpecification cs 283 | go s (Name n) = s { specName = n } 284 | go s (Version v) = s { specVersion = v } 285 | go s (Fingerprint h) = s { specFingerprint = h } 286 | go s (SpecSize sz) = s { specSize = sz } 287 | go s (Depth d) = s { specDepth = d } 288 | go s (TypeLength l) = s { specTypeLength = l } 289 | go s (LengthTag t) = s { specLengthTag = t } 290 | go s (TypeDef t) = let ts = specTypes s 291 | in s { specTypes = t:ts } 292 | 293 | specToComponents :: Specification -> [Component] 294 | specToComponents s = 295 | Name (specName s) 296 | : Version (specVersion s) 297 | : Fingerprint (specFingerprint s) 298 | : SpecSize (specSize s) 299 | : Depth (specDepth s) 300 | : TypeLength (specTypeLength s) 301 | : LengthTag (specLengthTag s) 302 | : map TypeDef (specTypes s) 303 | 304 | 305 | parseSpecification :: Text -> Either String Specification 306 | parseSpecification t = componentsToSpec `fmap` decode cauterizeParser t 307 | 308 | formatSpecification :: Specification -> Text 309 | formatSpecification s = let pp = encodeOne (basicPrint sAtom) 310 | s' = map (pp . fromWellFormed . fromComponent) (specToComponents s) 311 | in T.unlines s' 312 | 313 | parseSpecificationFromFile :: FilePath -> IO (Either String Specification) 314 | parseSpecificationFromFile p = liftM parseSpecification (T.readFile p) 315 | -------------------------------------------------------------------------------- /src/Cauterize/Specification/Types.hs: -------------------------------------------------------------------------------- 1 | {-# LANGUAGE OverloadedStrings #-} 2 | module Cauterize.Specification.Types 3 | ( Specification(..) 4 | , Type(..) 5 | , TypeDesc(..) 6 | , Field(..) 7 | , EnumVal(..) 8 | , specTypeMap 9 | , specTypeTagMap 10 | ) where 11 | 12 | import Cauterize.CommonTypes 13 | import Cauterize.Hash 14 | import Data.Word 15 | import qualified Cauterize.Schema.Types as Schema 16 | import qualified Data.Text as T 17 | import qualified Data.Map as M 18 | 19 | data Specification = Specification 20 | { specName :: T.Text 21 | , specVersion :: T.Text 22 | , specFingerprint :: Hash 23 | , specSize :: Size 24 | , specDepth :: Integer 25 | , specTypeLength :: Integer 26 | , specLengthTag :: Tag 27 | , specTypes :: [Type] 28 | } deriving (Show, Eq) 29 | 30 | data Type = Type 31 | { typeName :: Identifier 32 | , typeFingerprint :: Hash 33 | , typeSize :: Size 34 | , typeDepth :: Integer 35 | , typeDesc :: TypeDesc 36 | } deriving (Show, Eq) 37 | 38 | data TypeDesc 39 | = Synonym { synonymRef :: Identifier } 40 | | Range { rangeOffset :: Offset, rangeLength :: Length 41 | , rangeTag :: Tag, rangePrim :: Prim } 42 | | Array { arrayRef :: Identifier, arrayLength :: Length } 43 | | Vector { vectorRef :: Identifier, vectorLength :: Length 44 | , vectorTag :: Tag } 45 | | Enumeration { enumerationValues :: [EnumVal] 46 | , enumerationTag :: Tag } 47 | | Record { recordFields :: [Field] } 48 | | Combination { combinationFields :: [Field] 49 | , combinationTag :: Tag } 50 | | Union { unionFields :: [Field] 51 | , unionTag :: Tag} 52 | deriving (Show, Eq) 53 | 54 | data Field 55 | = DataField { fieldName :: Identifier, fieldIndex :: Integer, fieldRef :: Identifier } 56 | | EmptyField { fieldName :: Identifier, fieldIndex :: Integer } 57 | deriving (Show, Eq) 58 | 59 | data EnumVal = EnumVal { enumValName :: Identifier, enumValIndex :: Integer } 60 | deriving (Show, Eq) 61 | 62 | specTypeMap :: Specification -> M.Map Identifier Type 63 | specTypeMap s = M.fromList $ zip ns ts 64 | where 65 | ns = map typeName ts 66 | ts = specTypes s 67 | 68 | specTypeTagMap :: Specification -> M.Map [Word8] Type 69 | specTypeTagMap (Specification { specTypes = ts, specTypeLength = tl }) = 70 | let hs = map (take (fromIntegral tl) . hashToBytes . typeFingerprint) ts 71 | in M.fromList $ zip hs ts 72 | 73 | instance Schema.IsSchema Specification where 74 | getSchema spec = 75 | Schema.Schema 76 | { Schema.schemaName = specName spec 77 | , Schema.schemaVersion = specVersion spec 78 | , Schema.schemaTypes = map extractType (specTypes spec) 79 | } 80 | where 81 | extractType (Type { typeName = n, typeDesc = d}) = 82 | let 83 | d' = case d of 84 | Synonym r -> Schema.Synonym r 85 | Range o l _ _ -> Schema.Range o l 86 | Array r l -> Schema.Array r l 87 | Vector r l _ -> Schema.Vector r l 88 | Enumeration vs _ -> Schema.Enumeration (map enumValName vs) 89 | Record fs -> Schema.Record (map extractField fs) 90 | Combination fs _ -> Schema.Combination (map extractField fs) 91 | Union fs _ -> Schema.Union (map extractField fs) 92 | in Schema.Type n d' 93 | 94 | extractField (EmptyField n _) = Schema.EmptyField n 95 | extractField (DataField n _ r) = Schema.DataField n r 96 | -------------------------------------------------------------------------------- /src/Cauterize/Version.hs: -------------------------------------------------------------------------------- 1 | {-# LANGUAGE TemplateHaskell #-} 2 | 3 | module Cauterize.Version 4 | ( cabalVersion 5 | , versionInfo 6 | , versionString 7 | , dependencyString 8 | ) where 9 | 10 | import Development.GitRev 11 | import qualified Paths_cauterize as C (version) 12 | import Data.Version (showVersion) 13 | 14 | cabalVersion :: String 15 | cabalVersion = showVersion C.version 16 | 17 | versionInfo :: [(String, String)] 18 | versionInfo = 19 | [ ("version", cabalVersion) 20 | , ("git branch", $(gitBranch)) 21 | , ("git commit", $(gitHash)) 22 | , ("git dirty", dirty) 23 | , ("git commit date", $(gitCommitDate)) 24 | ] 25 | where 26 | dirty | $(gitDirty) = "yes" 27 | | otherwise = "no" 28 | 29 | dependencyInfo :: [(String, [(String, String)])] 30 | dependencyInfo = [] 31 | 32 | versionString :: String 33 | versionString = unlines 34 | [ k ++ ": " ++ v | (k, v) <- versionInfo] 35 | 36 | dependencyString :: String 37 | dependencyString = unlines 38 | [ d ++ " " ++ k ++ ": " ++ v 39 | | (d, kvs) <- dependencyInfo 40 | , (k, v) <- kvs 41 | ] 42 | 43 | -------------------------------------------------------------------------------- /stack.yaml: -------------------------------------------------------------------------------- 1 | flags: {} 2 | packages: 3 | - '.' 4 | extra-deps: 5 | - s-cargot-0.1.0.0 6 | - gitrev-1.2.0 7 | resolver: lts-7.18 8 | -------------------------------------------------------------------------------- /tests/Cauterize/CommonTypesSpec.hs: -------------------------------------------------------------------------------- 1 | module Cauterize.CommonTypesSpec 2 | ( spec 3 | ) where 4 | 5 | import Cauterize.CommonTypes 6 | import Test.Hspec 7 | import Data.Maybe 8 | 9 | spec :: Spec 10 | spec = do 11 | describe "mkIdentifier" $ do 12 | it "catches bad identifiers" $ do 13 | mkIdentifier "John" `shouldSatisfy` isNothing 14 | -------------------------------------------------------------------------------- /tests/Cauterize/Dynamic/PackSpec.hs: -------------------------------------------------------------------------------- 1 | {-# LANGUAGE OverloadedStrings #-} 2 | module Cauterize.Dynamic.PackSpec 3 | ( spec 4 | ) where 5 | 6 | import Cauterize.Dynamic.Pack as P 7 | 8 | import qualified Data.ByteString as B 9 | import Cauterize.Specification.Compile 10 | import Cauterize.Schema.Parser 11 | import Cauterize.Dynamic.Types 12 | import Test.Hspec 13 | 14 | spec :: Spec 15 | spec = do 16 | describe "dynamicPack -> dynamicPackRange" $ do 17 | it "subtracts out the offset" $ do 18 | let (Right schema) = parseSchema "(type test_range range 181538 13054296747032613673)" 19 | let sp = mkSpecification schema 20 | let ty = CautType "test_range" (CDRange 1759352636039645935) 21 | let expected = [0xcd, 0x85, 0xc7, 0xa9, 0xe6, 0x79, 0x6a, 0x18] 22 | let bs = B.unpack $ dynamicPack sp ty 23 | bs `shouldBe` expected 24 | -------------------------------------------------------------------------------- /tests/Cauterize/HashSpec.hs: -------------------------------------------------------------------------------- 1 | {-# LANGUAGE OverloadedStrings #-} 2 | module Cauterize.HashSpec 3 | ( spec 4 | ) where 5 | 6 | import Cauterize.Hash 7 | import Test.Hspec 8 | 9 | spec :: Spec 10 | spec = do 11 | describe "hashToBytes" $ do 12 | it "creates hashes 20 bytes in length" $ do 13 | hashToBytes h0 `shouldSatisfy` (\h -> 20 == length h) 14 | hashToBytes h1 `shouldSatisfy` (\h -> 20 == length h) 15 | where 16 | h0 = mkHash "cauterize is neat" 17 | h1 = mkHash "i've spent so much time on this library..." 18 | -------------------------------------------------------------------------------- /tests/Cauterize/Schema/ParserSpec.hs: -------------------------------------------------------------------------------- 1 | {-# LANGUAGE OverloadedStrings #-} 2 | module Cauterize.Schema.ParserSpec 3 | ( spec 4 | ) where 5 | 6 | import Cauterize.CommonTypes 7 | import Cauterize.Schema.Types 8 | import Cauterize.Schema.Parser 9 | import Test.Hspec 10 | 11 | import Data.Either 12 | 13 | spec :: Spec 14 | spec = do 15 | describe "parseSchema" $ do 16 | it "parses a synonym" $ do 17 | let s = parseSchema "(type foo synonym u8)" 18 | s `shouldSatisfy` isRight 19 | s `shouldSatisfy` hasSynNamedFoo 20 | it "parses a formatted schema" $ do 21 | let r = do 22 | s <- parseSchema "(type foo synonym u8)" 23 | let f = formatSchema s 24 | s' <- parseSchema f 25 | return (s == s') 26 | r `shouldBe` (Right True) 27 | where 28 | hasSynNamedFoo 29 | (Right 30 | (Schema { 31 | schemaTypes = [Type { 32 | typeName = n, 33 | typeDesc = Synonym _ 34 | }] 35 | })) = unIdentifier n == "foo" 36 | hasSynNamedFoo _ = False 37 | -------------------------------------------------------------------------------- /tests/Cauterize/Specification/CompileSpec.hs: -------------------------------------------------------------------------------- 1 | {-# LANGUAGE OverloadedStrings #-} 2 | module Cauterize.Specification.CompileSpec 3 | ( spec 4 | ) where 5 | 6 | import Cauterize.Specification.Types 7 | import Cauterize.Schema.Parser 8 | import Cauterize.Specification.Compile 9 | import Cauterize.CommonTypes 10 | import Test.Hspec 11 | 12 | import qualified Data.Text as T 13 | import qualified Data.List as L 14 | 15 | 16 | comp :: T.Text -> Either String Specification 17 | comp p = either (\e -> Left e) 18 | (\s -> Right (mkSpecification s)) 19 | (parseSchema p) 20 | 21 | spec :: Spec 22 | spec = do 23 | describe "compile" $ do 24 | it "produces a synonym" $ do 25 | let c = comp synFoo 26 | c `shouldSatisfy` typeNameIs "foo" 27 | c `shouldSatisfy` typeIsSynonym 28 | it "produces an enumeration" $ do 29 | let c = comp enumBar 30 | c `shouldSatisfy` typeNameIs "bar" 31 | c `shouldSatisfy` typeIsEnumeration 32 | enumIndicies c `shouldBe` [1..4] 33 | it "produces a union" $ do 34 | let c = comp unionBaz 35 | c `shouldSatisfy` typeNameIs "baz" 36 | c `shouldSatisfy` typeIsUnion 37 | unionIndicies c `shouldBe` [1..2] 38 | it "produces a record" $ do 39 | let c = comp recordFizz 40 | c `shouldSatisfy` typeNameIs "fizz" 41 | c `shouldSatisfy` typeIsRecord 42 | recordIndicies c `shouldBe` [1..3] 43 | 44 | it "picks the right tag type (T1)" $ do 45 | let c = comp (enumLong 254) 46 | c `shouldSatisfy` typeNameIs "long" 47 | c `shouldSatisfy` typeIsEnumeration 48 | enumIndicies c `shouldBe` [1..254] 49 | enumTag c `shouldBe` Just T1 50 | 51 | it "picks the right tag type (T2)" $ do 52 | let c = comp (enumLong 255) 53 | c `shouldSatisfy` typeNameIs "long" 54 | c `shouldSatisfy` typeIsEnumeration 55 | enumIndicies c `shouldBe` [1..255] 56 | enumTag c `shouldBe` Just T2 57 | 58 | synFoo :: T.Text 59 | synFoo = "(type foo synonym u8)" 60 | 61 | enumBar :: T.Text 62 | enumBar = "(type bar enumeration (values a b c d))" 63 | 64 | enumLong :: Int -> T.Text 65 | enumLong 0 = error "Must not be 0." 66 | enumLong c = let prefix = "(type long enumeration (values " 67 | vals = map (\v -> "v_" ++ show v) (take c [0..] :: [Int]) 68 | valstr = concat $ L.intersperse " " vals 69 | postfix = "))" 70 | in prefix `T.append` T.pack valstr `T.append` postfix 71 | 72 | unionBaz :: T.Text 73 | unionBaz = "(type baz union (fields (field a u32) (field b u16)))" 74 | 75 | recordFizz :: T.Text 76 | recordFizz = "(type fizz record (fields (field a u32) (field b u16) (field c f32)))" 77 | 78 | 79 | typeNameIs :: Identifier -> Either a Specification -> Bool 80 | typeNameIs n (Right (Specification { specTypes = [ Type { typeName = name } ] })) = n == name 81 | typeNameIs _ _ = False 82 | 83 | typeIsSynonym :: Either a Specification -> Bool 84 | typeIsSynonym (Right 85 | (Specification { specTypes = [ Type { typeDesc = Synonym {} } ] })) = True 86 | typeIsSynonym _ = False 87 | 88 | typeIsEnumeration :: Either a Specification -> Bool 89 | typeIsEnumeration (Right 90 | (Specification { specTypes = [ Type { typeDesc = Enumeration {} } ] })) = True 91 | typeIsEnumeration _ = False 92 | 93 | typeIsUnion :: Either a Specification -> Bool 94 | typeIsUnion (Right 95 | (Specification { specTypes = [ Type { typeDesc = Union {} } ] })) = True 96 | typeIsUnion _ = False 97 | 98 | typeIsRecord :: Either a Specification -> Bool 99 | typeIsRecord (Right 100 | (Specification { specTypes = [ Type { typeDesc = Record {} } ] })) = True 101 | typeIsRecord _ = False 102 | 103 | enumIndicies :: Either a Specification -> [Integer] 104 | enumIndicies (Right (Specification { specTypes = [ Type { typeDesc = desc } ] } ) ) = 105 | case desc of 106 | Enumeration { enumerationValues = vals } -> map getIndex vals 107 | _ -> [] 108 | where 109 | getIndex (EnumVal { enumValIndex = i }) = i 110 | enumIndicies _ = [] 111 | 112 | unionIndicies :: Either a Specification -> [Integer] 113 | unionIndicies (Right (Specification { specTypes = [ Type { typeDesc = desc } ] } ) ) = 114 | case desc of 115 | Union { unionFields = fs } -> map fieldIndex fs 116 | _ -> [] 117 | unionIndicies _ = [] 118 | 119 | recordIndicies :: Either a Specification -> [Integer] 120 | recordIndicies (Right (Specification { specTypes = [ Type { typeDesc = desc } ] } ) ) = 121 | case desc of 122 | Record { recordFields = fs } -> map fieldIndex fs 123 | _ -> [] 124 | recordIndicies _ = [] 125 | 126 | 127 | enumTag :: Either a Specification -> Maybe Tag 128 | enumTag (Right (Specification { specTypes = [ Type { typeDesc = desc } ] } ) ) = 129 | case desc of 130 | Enumeration { enumerationTag = t } -> Just t 131 | _ -> Nothing 132 | enumTag _ = Nothing 133 | -------------------------------------------------------------------------------- /tests/Cauterize/Specification/ParserSpec.hs: -------------------------------------------------------------------------------- 1 | {-# LANGUAGE OverloadedStrings #-} 2 | module Cauterize.Specification.ParserSpec 3 | ( spec 4 | ) where 5 | 6 | import Cauterize.Specification.Types 7 | import Cauterize.Specification.Parser 8 | import Cauterize.CommonTypes 9 | import Test.Hspec 10 | 11 | import Data.Either 12 | 13 | spec :: Spec 14 | spec = do 15 | describe "parseSpecification" $ do 16 | it "parses a specification" $ do 17 | let s = parseSpecification synFoo 18 | s `shouldSatisfy` isRight 19 | s `shouldSatisfy` hasSynNamedFoo 20 | it "parses a formatted specification" $ do 21 | let r = do 22 | s <- parseSpecification synFoo 23 | let f = formatSpecification s 24 | s' <- parseSpecification f 25 | return (s == s') 26 | r `shouldBe` (Right True) 27 | where 28 | synFoo = "(type foo synonym (fingerprint 0cb7bd78634eba6f3633dbf0a5f69537aa1916df) (size 1 1) (depth 1) u8)" 29 | hasSynNamedFoo 30 | (Right 31 | (Specification { 32 | specTypes = [Type { 33 | typeName = n, 34 | typeDesc = Synonym _, 35 | typeDepth = 1 36 | }] 37 | })) = unIdentifier n == "foo" 38 | hasSynNamedFoo _ = False 39 | -------------------------------------------------------------------------------- /tests/Spec.hs: -------------------------------------------------------------------------------- 1 | -- file test/Spec.hs 2 | {-# OPTIONS_GHC -F -pgmF hspec-discover #-} 3 | --------------------------------------------------------------------------------