├── .github └── workflows │ ├── publish.yml │ └── test.yml ├── .gitignore ├── Cargo.lock ├── Cargo.toml ├── LICENSE ├── README.md └── src ├── cli.yaml ├── hints.rs ├── inferred_number.rs ├── inferred_schema.rs ├── lib.rs └── main.rs /.github/workflows/publish.yml: -------------------------------------------------------------------------------- 1 | on: 2 | release: 3 | types: [published] 4 | 5 | jobs: 6 | publish_crate: 7 | runs-on: ubuntu-latest 8 | steps: 9 | - uses: actions/checkout@v2 10 | - uses: actions-rs/toolchain@v1 11 | with: 12 | toolchain: stable 13 | - run: cargo publish 14 | env: 15 | CARGO_REGISTRY_TOKEN: ${{ secrets.CARGO_REGISTRY_TOKEN }} 16 | release_binary: 17 | runs-on: ${{ matrix.runs_on }} 18 | strategy: 19 | matrix: 20 | target: 21 | - x86_64-unknown-linux-gnu 22 | - x86_64-unknown-linux-musl 23 | - x86_64-apple-darwin 24 | - x86_64-pc-windows-gnu 25 | include: 26 | - target: x86_64-unknown-linux-gnu 27 | runs_on: ubuntu-latest 28 | artifact: jtd-infer 29 | - target: x86_64-unknown-linux-musl 30 | runs_on: ubuntu-latest 31 | artifact: jtd-infer 32 | - target: x86_64-apple-darwin 33 | runs_on: macos-latest 34 | artifact: jtd-infer 35 | - target: x86_64-pc-windows-gnu 36 | runs_on: ubuntu-latest 37 | artifact: jtd-infer.exe 38 | steps: 39 | - uses: actions/checkout@v2 40 | - uses: actions-rs/toolchain@v1 41 | with: 42 | toolchain: stable 43 | - run: cargo install cross 44 | - run: cross build --release --target=$TARGET 45 | env: 46 | TARGET: ${{ matrix.target }} 47 | - run: zip --junk-paths $TARGET.zip target/$TARGET/release/$ARTIFACT 48 | env: 49 | TARGET: ${{ matrix.target }} 50 | ARTIFACT: ${{ matrix.artifact }} 51 | - uses: actions/upload-release-asset@v1 52 | env: 53 | GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} 54 | with: 55 | upload_url: ${{ github.event.release.upload_url }} 56 | asset_path: ${{ matrix.target }}.zip 57 | asset_name: ${{ matrix.target }}.zip 58 | asset_content_type: application/zip 59 | -------------------------------------------------------------------------------- /.github/workflows/test.yml: -------------------------------------------------------------------------------- 1 | on: push 2 | jobs: 3 | test: 4 | runs-on: ubuntu-latest 5 | steps: 6 | - uses: actions/checkout@v2 7 | - uses: actions-rs/toolchain@v1 8 | with: 9 | toolchain: stable 10 | - run: cargo test 11 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | /target 2 | -------------------------------------------------------------------------------- /Cargo.lock: -------------------------------------------------------------------------------- 1 | # This file is automatically @generated by Cargo. 2 | # It is not intended for manual editing. 3 | [[package]] 4 | name = "ansi_term" 5 | version = "0.11.0" 6 | source = "registry+https://github.com/rust-lang/crates.io-index" 7 | checksum = "ee49baf6cb617b853aa8d93bf420db2383fab46d314482ca2803b40d5fde979b" 8 | dependencies = [ 9 | "winapi", 10 | ] 11 | 12 | [[package]] 13 | name = "anyhow" 14 | version = "1.0.38" 15 | source = "registry+https://github.com/rust-lang/crates.io-index" 16 | checksum = "afddf7f520a80dbf76e6f50a35bca42a2331ef227a28b3b6dc5c2e2338d114b1" 17 | 18 | [[package]] 19 | name = "atty" 20 | version = "0.2.14" 21 | source = "registry+https://github.com/rust-lang/crates.io-index" 22 | checksum = "d9b39be18770d11421cdb1b9947a45dd3f37e93092cbf377614828a319d5fee8" 23 | dependencies = [ 24 | "hermit-abi", 25 | "libc", 26 | "winapi", 27 | ] 28 | 29 | [[package]] 30 | name = "autocfg" 31 | version = "1.0.0" 32 | source = "registry+https://github.com/rust-lang/crates.io-index" 33 | checksum = "f8aac770f1885fd7e387acedd76065302551364496e46b3dd00860b2f8359b9d" 34 | 35 | [[package]] 36 | name = "bitflags" 37 | version = "1.2.1" 38 | source = "registry+https://github.com/rust-lang/crates.io-index" 39 | checksum = "cf1de2fe8c75bc145a2f577add951f8134889b4795d47466a54a5c846d691693" 40 | 41 | [[package]] 42 | name = "chrono" 43 | version = "0.4.11" 44 | source = "registry+https://github.com/rust-lang/crates.io-index" 45 | checksum = "80094f509cf8b5ae86a4966a39b3ff66cd7e2a3e594accec3743ff3fabeab5b2" 46 | dependencies = [ 47 | "num-integer", 48 | "num-traits", 49 | "time", 50 | ] 51 | 52 | [[package]] 53 | name = "clap" 54 | version = "2.33.3" 55 | source = "registry+https://github.com/rust-lang/crates.io-index" 56 | checksum = "37e58ac78573c40708d45522f0d80fa2f01cc4f9b4e2bf749807255454312002" 57 | dependencies = [ 58 | "ansi_term", 59 | "atty", 60 | "bitflags", 61 | "strsim", 62 | "textwrap", 63 | "unicode-width", 64 | "vec_map", 65 | "yaml-rust", 66 | ] 67 | 68 | [[package]] 69 | name = "hermit-abi" 70 | version = "0.1.10" 71 | source = "registry+https://github.com/rust-lang/crates.io-index" 72 | checksum = "725cf19794cf90aa94e65050cb4191ff5d8fa87a498383774c47b332e3af952e" 73 | dependencies = [ 74 | "libc", 75 | ] 76 | 77 | [[package]] 78 | name = "itoa" 79 | version = "0.4.5" 80 | source = "registry+https://github.com/rust-lang/crates.io-index" 81 | checksum = "b8b7a7c0c47db5545ed3fef7468ee7bb5b74691498139e4b3f6a20685dc6dd8e" 82 | 83 | [[package]] 84 | name = "jtd" 85 | version = "0.3.1" 86 | source = "registry+https://github.com/rust-lang/crates.io-index" 87 | checksum = "30d9414910f23583a4f3e8941296e561f226f2f31b65843767a79a32bf8cb671" 88 | dependencies = [ 89 | "chrono", 90 | "serde", 91 | "serde_json", 92 | "thiserror", 93 | ] 94 | 95 | [[package]] 96 | name = "jtd-infer" 97 | version = "0.2.1" 98 | dependencies = [ 99 | "anyhow", 100 | "chrono", 101 | "clap", 102 | "jtd", 103 | "serde_json", 104 | ] 105 | 106 | [[package]] 107 | name = "libc" 108 | version = "0.2.68" 109 | source = "registry+https://github.com/rust-lang/crates.io-index" 110 | checksum = "dea0c0405123bba743ee3f91f49b1c7cfb684eef0da0a50110f758ccf24cdff0" 111 | 112 | [[package]] 113 | name = "num-integer" 114 | version = "0.1.42" 115 | source = "registry+https://github.com/rust-lang/crates.io-index" 116 | checksum = "3f6ea62e9d81a77cd3ee9a2a5b9b609447857f3d358704331e4ef39eb247fcba" 117 | dependencies = [ 118 | "autocfg", 119 | "num-traits", 120 | ] 121 | 122 | [[package]] 123 | name = "num-traits" 124 | version = "0.2.11" 125 | source = "registry+https://github.com/rust-lang/crates.io-index" 126 | checksum = "c62be47e61d1842b9170f0fdeec8eba98e60e90e5446449a0545e5152acd7096" 127 | dependencies = [ 128 | "autocfg", 129 | ] 130 | 131 | [[package]] 132 | name = "proc-macro2" 133 | version = "1.0.24" 134 | source = "registry+https://github.com/rust-lang/crates.io-index" 135 | checksum = "1e0704ee1a7e00d7bb417d0770ea303c1bccbabf0ef1667dae92b5967f5f8a71" 136 | dependencies = [ 137 | "unicode-xid", 138 | ] 139 | 140 | [[package]] 141 | name = "quote" 142 | version = "1.0.3" 143 | source = "registry+https://github.com/rust-lang/crates.io-index" 144 | checksum = "2bdc6c187c65bca4260c9011c9e3132efe4909da44726bad24cf7572ae338d7f" 145 | dependencies = [ 146 | "proc-macro2", 147 | ] 148 | 149 | [[package]] 150 | name = "redox_syscall" 151 | version = "0.1.56" 152 | source = "registry+https://github.com/rust-lang/crates.io-index" 153 | checksum = "2439c63f3f6139d1b57529d16bc3b8bb855230c8efcc5d3a896c8bea7c3b1e84" 154 | 155 | [[package]] 156 | name = "ryu" 157 | version = "1.0.3" 158 | source = "registry+https://github.com/rust-lang/crates.io-index" 159 | checksum = "535622e6be132bccd223f4bb2b8ac8d53cda3c7a6394944d3b2b33fb974f9d76" 160 | 161 | [[package]] 162 | name = "serde" 163 | version = "1.0.106" 164 | source = "registry+https://github.com/rust-lang/crates.io-index" 165 | checksum = "36df6ac6412072f67cf767ebbde4133a5b2e88e76dc6187fa7104cd16f783399" 166 | dependencies = [ 167 | "serde_derive", 168 | ] 169 | 170 | [[package]] 171 | name = "serde_derive" 172 | version = "1.0.106" 173 | source = "registry+https://github.com/rust-lang/crates.io-index" 174 | checksum = "9e549e3abf4fb8621bd1609f11dfc9f5e50320802273b12f3811a67e6716ea6c" 175 | dependencies = [ 176 | "proc-macro2", 177 | "quote", 178 | "syn", 179 | ] 180 | 181 | [[package]] 182 | name = "serde_json" 183 | version = "1.0.51" 184 | source = "registry+https://github.com/rust-lang/crates.io-index" 185 | checksum = "da07b57ee2623368351e9a0488bb0b261322a15a6e0ae53e243cbdc0f4208da9" 186 | dependencies = [ 187 | "itoa", 188 | "ryu", 189 | "serde", 190 | ] 191 | 192 | [[package]] 193 | name = "strsim" 194 | version = "0.8.0" 195 | source = "registry+https://github.com/rust-lang/crates.io-index" 196 | checksum = "8ea5119cdb4c55b55d432abb513a0429384878c15dde60cc77b1c99de1a95a6a" 197 | 198 | [[package]] 199 | name = "syn" 200 | version = "1.0.60" 201 | source = "registry+https://github.com/rust-lang/crates.io-index" 202 | checksum = "c700597eca8a5a762beb35753ef6b94df201c81cca676604f547495a0d7f0081" 203 | dependencies = [ 204 | "proc-macro2", 205 | "quote", 206 | "unicode-xid", 207 | ] 208 | 209 | [[package]] 210 | name = "textwrap" 211 | version = "0.11.0" 212 | source = "registry+https://github.com/rust-lang/crates.io-index" 213 | checksum = "d326610f408c7a4eb6f51c37c330e496b08506c9457c9d34287ecc38809fb060" 214 | dependencies = [ 215 | "unicode-width", 216 | ] 217 | 218 | [[package]] 219 | name = "thiserror" 220 | version = "1.0.23" 221 | source = "registry+https://github.com/rust-lang/crates.io-index" 222 | checksum = "76cc616c6abf8c8928e2fdcc0dbfab37175edd8fb49a4641066ad1364fdab146" 223 | dependencies = [ 224 | "thiserror-impl", 225 | ] 226 | 227 | [[package]] 228 | name = "thiserror-impl" 229 | version = "1.0.23" 230 | source = "registry+https://github.com/rust-lang/crates.io-index" 231 | checksum = "9be73a2caec27583d0046ef3796c3794f868a5bc813db689eed00c7631275cd1" 232 | dependencies = [ 233 | "proc-macro2", 234 | "quote", 235 | "syn", 236 | ] 237 | 238 | [[package]] 239 | name = "time" 240 | version = "0.1.42" 241 | source = "registry+https://github.com/rust-lang/crates.io-index" 242 | checksum = "db8dcfca086c1143c9270ac42a2bbd8a7ee477b78ac8e45b19abfb0cbede4b6f" 243 | dependencies = [ 244 | "libc", 245 | "redox_syscall", 246 | "winapi", 247 | ] 248 | 249 | [[package]] 250 | name = "unicode-width" 251 | version = "0.1.7" 252 | source = "registry+https://github.com/rust-lang/crates.io-index" 253 | checksum = "caaa9d531767d1ff2150b9332433f32a24622147e5ebb1f26409d5da67afd479" 254 | 255 | [[package]] 256 | name = "unicode-xid" 257 | version = "0.2.0" 258 | source = "registry+https://github.com/rust-lang/crates.io-index" 259 | checksum = "826e7639553986605ec5979c7dd957c7895e93eabed50ab2ffa7f6128a75097c" 260 | 261 | [[package]] 262 | name = "vec_map" 263 | version = "0.8.1" 264 | source = "registry+https://github.com/rust-lang/crates.io-index" 265 | checksum = "05c78687fb1a80548ae3250346c3db86a80a7cdd77bda190189f2d0a0987c81a" 266 | 267 | [[package]] 268 | name = "winapi" 269 | version = "0.3.8" 270 | source = "registry+https://github.com/rust-lang/crates.io-index" 271 | checksum = "8093091eeb260906a183e6ae1abdba2ef5ef2257a21801128899c3fc699229c6" 272 | dependencies = [ 273 | "winapi-i686-pc-windows-gnu", 274 | "winapi-x86_64-pc-windows-gnu", 275 | ] 276 | 277 | [[package]] 278 | name = "winapi-i686-pc-windows-gnu" 279 | version = "0.4.0" 280 | source = "registry+https://github.com/rust-lang/crates.io-index" 281 | checksum = "ac3b87c63620426dd9b991e5ce0329eff545bccbbb34f3be09ff6fb6ab51b7b6" 282 | 283 | [[package]] 284 | name = "winapi-x86_64-pc-windows-gnu" 285 | version = "0.4.0" 286 | source = "registry+https://github.com/rust-lang/crates.io-index" 287 | checksum = "712e227841d057c1ee1cd2fb22fa7e5a5461ae8e48fa2ca79ec42cfc1931183f" 288 | 289 | [[package]] 290 | name = "yaml-rust" 291 | version = "0.3.5" 292 | source = "registry+https://github.com/rust-lang/crates.io-index" 293 | checksum = "e66366e18dc58b46801afbf2ca7661a9f59cc8c5962c29892b6039b4f86fa992" 294 | -------------------------------------------------------------------------------- /Cargo.toml: -------------------------------------------------------------------------------- 1 | [package] 2 | name = "jtd-infer" 3 | description = "Generate JSON Typedef schemas from example data" 4 | version = "0.2.1" 5 | license = "MIT" 6 | authors = ["Ulysse Carion "] 7 | edition = "2018" 8 | 9 | [dependencies] 10 | clap = { version = "2.33.3", features = ["yaml"] } 11 | jtd = "0.3.1" 12 | serde_json = "1" 13 | anyhow = "1.0" 14 | chrono = "0.4" 15 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Copyright 2020 JSON Typedef Contributors 2 | 3 | Permission is hereby granted, free of charge, to any person obtaining a copy of 4 | this software and associated documentation files (the "Software"), to deal in 5 | the Software without restriction, including without limitation the rights to 6 | use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of 7 | the Software, and to permit persons to whom the Software is furnished to do so, 8 | subject to the following conditions: 9 | 10 | The above copyright notice and this permission notice shall be included in all 11 | copies or substantial portions of the Software. 12 | 13 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 14 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS 15 | FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR 16 | COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER 17 | IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN 18 | CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 19 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # jtd-infer: Generate JSON Typedef schemas from examples [![Crates.io](https://img.shields.io/crates/v/jtd_infer)](https://crates.io/crates/jtd_infer) [![Docs.rs](https://docs.rs/jtd-infer/badge.svg)](https://docs.rs/jtd_infer) 2 | 3 | [JSON Type Definition](https://jsontypedef.com), aka 4 | [RFC8927](https://tools.ietf.org/html/rfc8927), is an easy-to-learn, 5 | standardized way to define a schema for JSON data. You can use JSON Typedef to 6 | portably validate data across programming languages, create dummy data, generate 7 | code, and more. 8 | 9 | `jtd-infer` is a tool that generates ("infers") a JSON Typedef schema from 10 | example data. 11 | 12 | ```bash 13 | echo '{ "name": "Joe", "age": 42 }' | jtd-infer | jq 14 | ``` 15 | 16 | ```json 17 | { 18 | "properties": { 19 | "age": { 20 | "type": "uint8" 21 | }, 22 | "name": { 23 | "type": "string" 24 | } 25 | } 26 | } 27 | ``` 28 | 29 | ## Installation 30 | 31 | On macOS, you can install `jtd-infer` via Homebrew: 32 | 33 | ```bash 34 | brew install jsontypedef/jsontypedef/jtd-infer 35 | ``` 36 | 37 | For all other platforms, you can download and extract the binary yourself from 38 | [the latest release][latest]. You can also install using `cargo` by running: 39 | 40 | ```bash 41 | cargo install jtd_infer 42 | ``` 43 | 44 | ## Usage 45 | 46 | For high-level guidance on how to use `jtd-infer`, see ["Inferring a JSON 47 | Typedef Schema from Real Data"][jtd-jtd-infer] in the JSON Typedef website docs. 48 | 49 | ### Basic Usage 50 | 51 | To invoke `jtd-infer`, you can either: 52 | 53 | 1. Have it read from STDIN. This is the default behavior. 54 | 2. Have it read from a file. To do this, pass a file name as the last argument 55 | to `jtd-infer`. 56 | 57 | `jtd-infer` reads a _sequence_ of JSON messages. So for example, if you have a 58 | file like this in `data.json`: 59 | 60 | ```json 61 | { "name": "john doe", "age": 42 } 62 | { "name": "jane doe", "age": 45 } 63 | ``` 64 | 65 | You can give it to `jtd-infer` in two ways: 66 | 67 | ```bash 68 | # Both of these do the same thing. 69 | cat data.json | jtd-infer 70 | jtd-infer data.json 71 | ``` 72 | 73 | In both cases, you'd get this output: 74 | 75 | ```json 76 | {"properties":{"name":{"type":"string"},"age":{"type":"uint8"}}} 77 | ``` 78 | 79 | ### Changing the default number type 80 | 81 | > ⚠️ This section is often important if you are retrofitting JSON Typedef to a 82 | > JavaScript-based application. 83 | 84 | By default, JSON Typedef will infer the most specific possible type for inputs. 85 | So, for example, it will guess `uint8` if it sees a `12` in your input: 86 | 87 | ```bash 88 | echo "12" | jtd-infer 89 | ``` 90 | 91 | ```json 92 | {"type":"uint8"} 93 | ``` 94 | 95 | However, if you're giving JSON Typedef a small sample set, or if you in practice 96 | have data that is far smaller than the actual numerical datatypes your 97 | application supports, then this behavior may be undesirable. For example, it's 98 | common for JavaScript-based applications to actually support `float64` for all 99 | numeric inputs, because JavaScript numbers are IEEE double-precision floats. 100 | 101 | To tell JSON Typedef to prefer a different type than the one it would normally 102 | guess, you can use `--default-number-type` to change its behavior. For example: 103 | 104 | ```bash 105 | # JavaScript numbers are all float64s, and so it's pretty common for JavaScript 106 | # applications to not check if inputs are integers or within a particular range. 107 | # 108 | # If you don't want to make your JSON Typedef schema strict about decimal, 109 | # negative, or out of int range numbers, you can pass float64 as the default 110 | # number type. 111 | echo "12" | jtd-infer --default-number-type=float64 112 | ``` 113 | 114 | ```json 115 | {"type":"float64"} 116 | ``` 117 | 118 | Another use-case is if you're writing an application that uses signed 32-bit 119 | ints everywhere, and your example data simply never in practice has examples of 120 | negative numbers or numbers too big for 8- or 16-bit numbers. You can achieve 121 | that by using `int32` as your default number type: 122 | 123 | ```bash 124 | echo "12" | jtd-infer --default-number-type=int32 125 | ``` 126 | 127 | ```json 128 | {"type":"int32"} 129 | ``` 130 | 131 | Note that `jtd-infer` will ignore your default if it doesn't match with the 132 | data. For example, `int32` only works with whole numbers, so if a decimal number 133 | or a number too big for 32-bit signed integers comes in, it will fall back to 134 | `float64`: 135 | 136 | ```bash 137 | # both of these output {"type":"float64"} 138 | echo "3.14" | jtd-infer --default-number-type=int32 139 | echo "9999999999" | jtd-infer --default-number-type=int32 140 | ``` 141 | 142 | ### Advanced Usage: Providing Hints 143 | 144 | By default, `jtd-infer` will never output `enum`, `values`, or `discriminator` 145 | schemas. This is by design: by always being consistent with what it outputs, 146 | `jtd-infer` is more predictable and reliable. 147 | 148 | If you want `jtd-infer` to output an `enum`, `values`, or `discriminator`, you 149 | can use the `--enum-hint`, `--values-hint`, and `--discriminator-hint` flags. 150 | You can pass each of these flags multiple times. 151 | 152 | All of the hint flags accept [JSON 153 | Pointers](https://tools.ietf.org/html/rfc6901) as values. If you're used to the 154 | JavaScript-y syntax of referring to things as `$.foo.bar`, the equivalent JSON 155 | Pointer is `/foo/bar`. `jtd-infer` treats `-` as a "wildcard". `/foo/-/bar` is 156 | equivalent to the JavaScript-y `$.foo.*.bar`. 157 | 158 | As a corner-case, if you want to point to the *root* / top-level of your input, 159 | then use the empty string as the path. See ["Using 160 | `--values-hint`"](##using---values-hint) for an example of this. 161 | 162 | #### Using `--enum-hint` 163 | 164 | By default, strings are always inferred to be `{ "type": "string" }`: 165 | 166 | ```bash 167 | echo '["foo", "bar", "baz"]' | jtd-infer 168 | ``` 169 | 170 | ```json 171 | {"elements":{"type":"string"}} 172 | ``` 173 | 174 | But you can instead have `jtd-infer` output an enum by providing a path to the 175 | string you consider to be an enum. In this case, it's any element of the root of 176 | the array -- the JSON Pointer for that is `/-`: 177 | 178 | ```bash 179 | echo '["foo", "bar", "baz"]' | jtd-infer --enum-hint=/- 180 | ``` 181 | 182 | ```json 183 | {"elements":{"enum":["bar","baz","foo"]}} 184 | ``` 185 | 186 | #### Using `--values-hint` 187 | 188 | By default, objects are always assumed to be "structs", and `jtd-infer` will 189 | generate `properties` / `optionalProperties`. For example: 190 | 191 | ```bash 192 | echo '{"x": [1, 2, 3], "y": [4, 5, 6], "z": [7, 8, 9]}' | jtd-infer 193 | ``` 194 | 195 | ```json 196 | {"properties":{"y":{"elements":{"type":"uint8"}},"z":{"elements":{"type":"uint8"}},"x":{"elements":{"type":"uint8"}}}} 197 | ``` 198 | 199 | If your data is more like a map / dictionary, pass a `values-hint` that points 200 | to the object that you want a `values` schema from. In this case, that's the 201 | root-level object, which in JSON Pointer is just an empty string: 202 | 203 | ```bash 204 | echo '{"x": [1, 2, 3], "y": [4, 5, 6], "z": [7, 8, 9]}' | jtd-infer --values-hint= 205 | ``` 206 | 207 | ```json 208 | {"values":{"elements":{"type":"uint8"}}} 209 | ``` 210 | 211 | #### Using `--discriminator-hint` 212 | 213 | By default, objects are always assumed to be "structs", and `jtd-infer` will 214 | generate `properties` / `optionalProperties`. For example: 215 | 216 | ```bash 217 | echo '[{"type": "s", "value": "foo"},{"type": "n", "value": 3.14}]' | jtd-infer 218 | ``` 219 | 220 | ```json 221 | {"elements":{"properties":{"value":{},"type":{"type":"string"}}}} 222 | ``` 223 | 224 | If your data has a special "type" property that tells you what's in the rest of 225 | the object, then use `--discriminator-hint` to point to that property. 226 | `jtd-infer` will output an appropriate `discriminator` schema instead: 227 | 228 | ```bash 229 | echo '[{"type": "s", "value": "foo"},{"type": "n", "value": 3.14}]' | jtd-infer --discriminator-hint=/-/type | jq 230 | ``` 231 | 232 | ```json 233 | { 234 | "elements": { 235 | "discriminator": "type", 236 | "mapping": { 237 | "s": { 238 | "properties": { 239 | "value": { 240 | "type": "string" 241 | } 242 | } 243 | }, 244 | "n": { 245 | "properties": { 246 | "value": { 247 | "type": "float64" 248 | } 249 | } 250 | } 251 | } 252 | } 253 | } 254 | ``` 255 | 256 | [jtd-jtd-infer]: https://jsontypedef.com/docs/tools/jtd-infer 257 | [latest]: https://github.com/jsontypedef/json-typedef-infer/releases/latest 258 | -------------------------------------------------------------------------------- /src/cli.yaml: -------------------------------------------------------------------------------- 1 | name: jtd-infer 2 | args: 3 | - input: 4 | help: Where to read examples from. To read from stdin, use "-" 5 | required: true 6 | default_value: "-" 7 | - enum-hint: 8 | help: Treat a given part of the input as an enum. 9 | long: enum-hint 10 | takes_value: true 11 | multiple: true 12 | - values-hint: 13 | help: Treat a given part of the input as a dictionary / map. 14 | long: values-hint 15 | takes_value: true 16 | multiple: true 17 | - discriminator-hint: 18 | help: Treat a given part of the input as a discriminator "tag". 19 | long: discriminator-hint 20 | takes_value: true 21 | multiple: true 22 | - default-number-type: 23 | help: The default type to infer for JSON numbers. 24 | long: default-number-type 25 | takes_value: true 26 | default_value: uint8 27 | possible_values: 28 | - int8 29 | - uint8 30 | - int16 31 | - uint16 32 | - int32 33 | - uint32 34 | - float32 35 | - float64 36 | -------------------------------------------------------------------------------- /src/hints.rs: -------------------------------------------------------------------------------- 1 | use crate::inferred_number::NumType; 2 | 3 | /// Hints for [`Inferrer`][`crate::Inferrer`]. 4 | /// 5 | /// By default, [`Inferrer`][`crate::Inferrer`] will never produce enum, values, 6 | /// or discriminator forms. Hints tell [`Inferrer`][`crate::Inferrer`] to use 7 | /// these forms. See [`HintSet`] for details on how you can specify the "paths" 8 | /// to the pieces of the input that should use these forms. 9 | /// 10 | /// `default_num_type` tells [`Inferrer`][`crate::Inferrer`] what numeric type 11 | /// to attempt to use by default when it encounters a JSON number. This default 12 | /// will be ignored if it doesn't contain the example data. When the default is 13 | /// ignored, the inferrer will infer the narrowest numerical type possible for 14 | /// input data, preferring unsigned integers over signed integers. 15 | /// 16 | /// To adapt the example used at [the crate-level docs][`crate`], here's how you 17 | /// could change [`Inferrer`][`crate::Inferrer`] behavior using hints: 18 | /// 19 | /// ``` 20 | /// use serde_json::json; 21 | /// use jtd_infer::{Inferrer, Hints, HintSet, NumType}; 22 | /// 23 | /// let enum_path = vec!["bar".to_string()]; 24 | /// let mut inferrer = Inferrer::new(Hints::new( 25 | /// NumType::Float32, 26 | /// HintSet::new(vec![&enum_path]), 27 | /// HintSet::new(vec![]), 28 | /// HintSet::new(vec![]), 29 | /// )); 30 | /// 31 | /// inferrer = inferrer.infer(json!({ "foo": true, "bar": "xxx" })); 32 | /// inferrer = inferrer.infer(json!({ "foo": false, "bar": null, "baz": 5 })); 33 | /// 34 | /// let inference = inferrer.into_schema(); 35 | /// 36 | /// assert_eq!( 37 | /// json!({ 38 | /// "properties": { 39 | /// "foo": { "type": "boolean" }, 40 | /// "bar": { "enum": ["xxx"], "nullable": true }, // now an enum 41 | /// }, 42 | /// "optionalProperties": { 43 | /// "baz": { "type": "float32" }, // instead of uint8 44 | /// }, 45 | /// }), 46 | /// serde_json::to_value(inference.into_serde_schema()).unwrap(), 47 | /// ) 48 | /// ``` 49 | pub struct Hints<'a> { 50 | default_num_type: NumType, 51 | enums: HintSet<'a>, 52 | values: HintSet<'a>, 53 | discriminator: HintSet<'a>, 54 | } 55 | 56 | impl<'a> Hints<'a> { 57 | /// Constructs a new set of [`Hints`]. 58 | pub fn new( 59 | default_num_type: NumType, 60 | enums: HintSet<'a>, 61 | values: HintSet<'a>, 62 | discriminator: HintSet<'a>, 63 | ) -> Self { 64 | Hints { 65 | default_num_type, 66 | enums, 67 | values, 68 | discriminator, 69 | } 70 | } 71 | 72 | pub(crate) fn default_num_type(&self) -> &NumType { 73 | &self.default_num_type 74 | } 75 | 76 | pub(crate) fn sub_hints(&self, key: &str) -> Self { 77 | Self::new( 78 | self.default_num_type.clone(), 79 | self.enums.sub_hints(key), 80 | self.values.sub_hints(key), 81 | self.discriminator.sub_hints(key), 82 | ) 83 | } 84 | 85 | pub(crate) fn is_enum_active(&self) -> bool { 86 | self.enums.is_active() 87 | } 88 | 89 | pub(crate) fn is_values_active(&self) -> bool { 90 | self.values.is_active() 91 | } 92 | 93 | pub(crate) fn peek_active_discriminator(&self) -> Option<&str> { 94 | self.discriminator.peek_active() 95 | } 96 | } 97 | 98 | const WILDCARD: &'static str = "-"; 99 | 100 | /// A set of paths to parts of the input that are subject to a hint in 101 | /// [`Hints`]. 102 | pub struct HintSet<'a> { 103 | values: Vec<&'a [String]>, 104 | } 105 | 106 | impl<'a> HintSet<'a> { 107 | /// Constructs a new [`HintSet`]. 108 | /// 109 | /// Each element of `values` is a separate "path". Each element of a path is 110 | /// treated as a path "segment". So, for example, this: 111 | /// 112 | /// ``` 113 | /// use jtd_infer::HintSet; 114 | /// 115 | /// let path1 = vec!["foo".to_string(), "bar".to_string()]; 116 | /// let path2 = vec!["baz".to_string()]; 117 | /// HintSet::new(vec![&path1, &path2]); 118 | /// ``` 119 | /// 120 | /// Creates a set of paths pointing to `/foo/bar` and `/baz` in an input. 121 | /// 122 | /// The `-` path segment value is special, and acts as a wildcard, matching 123 | /// any property name. It also matches array elements, unlike ordinary path 124 | /// segments. 125 | pub fn new(values: Vec<&'a [String]>) -> Self { 126 | HintSet { values } 127 | } 128 | 129 | pub(crate) fn sub_hints(&self, key: &str) -> Self { 130 | Self::new( 131 | self.values 132 | .iter() 133 | .filter(|values| { 134 | let first = values.first().map(String::as_str); 135 | first == Some(WILDCARD) || first == Some(key) 136 | }) 137 | .map(|values| &values[1..]) 138 | .collect(), 139 | ) 140 | } 141 | 142 | pub(crate) fn is_active(&self) -> bool { 143 | self.values.iter().any(|values| values.is_empty()) 144 | } 145 | 146 | pub(crate) fn peek_active(&self) -> Option<&str> { 147 | self.values 148 | .iter() 149 | .find(|values| values.len() == 1) 150 | .and_then(|values| values.first().map(String::as_str)) 151 | } 152 | } 153 | 154 | #[cfg(test)] 155 | mod tests { 156 | use super::*; 157 | 158 | #[test] 159 | fn hint_set() { 160 | let path = vec!["a".to_string(), "b".to_string(), "c".to_string()]; 161 | let hint_set = HintSet::new(vec![&path]); 162 | assert!(!hint_set.is_active()); 163 | assert_eq!(None, hint_set.peek_active()); 164 | 165 | assert!(!hint_set.sub_hints("a").is_active()); 166 | assert_eq!(None, hint_set.sub_hints("a").peek_active()); 167 | 168 | assert!(!hint_set.sub_hints("a").sub_hints("b").is_active()); 169 | assert_eq!( 170 | Some("c"), 171 | hint_set.sub_hints("a").sub_hints("b").peek_active() 172 | ); 173 | 174 | assert!(hint_set 175 | .sub_hints("a") 176 | .sub_hints("b") 177 | .sub_hints("c") 178 | .is_active()); 179 | 180 | assert_eq!( 181 | None, 182 | hint_set 183 | .sub_hints("a") 184 | .sub_hints("b") 185 | .sub_hints("c") 186 | .peek_active() 187 | ); 188 | } 189 | 190 | #[test] 191 | fn hint_set_wildcard() { 192 | let path1 = vec!["a".to_string(), "b".to_string(), "c".to_string()]; 193 | let path2 = vec!["d".to_string(), "-".to_string(), "e".to_string()]; 194 | let hint_set = HintSet::new(vec![&path1, &path2]); 195 | 196 | assert!(!hint_set 197 | .sub_hints("a") 198 | .sub_hints("x") 199 | .sub_hints("c") 200 | .is_active()); 201 | 202 | assert!(hint_set 203 | .sub_hints("d") 204 | .sub_hints("x") 205 | .sub_hints("e") 206 | .is_active()); 207 | } 208 | } 209 | -------------------------------------------------------------------------------- /src/inferred_number.rs: -------------------------------------------------------------------------------- 1 | use jtd::Type; 2 | 3 | #[derive(Debug)] 4 | pub struct InferredNumber { 5 | min: f64, 6 | max: f64, 7 | int: bool, 8 | } 9 | 10 | impl InferredNumber { 11 | pub fn new() -> Self { 12 | Self { 13 | min: f64::MAX, 14 | max: f64::MIN, 15 | int: true, 16 | } 17 | } 18 | 19 | pub fn infer(&self, n: f64) -> Self { 20 | Self { 21 | min: self.min.min(n), 22 | max: self.max.max(n), 23 | int: self.int && n.fract() == 0.0, 24 | } 25 | } 26 | 27 | pub fn into_type(&self, default: &NumType) -> Type { 28 | if self.contained_by(default) { 29 | return default.into_type(); 30 | } 31 | 32 | let types = [ 33 | NumType::Uint8, 34 | NumType::Int8, 35 | NumType::Uint16, 36 | NumType::Int16, 37 | NumType::Uint32, 38 | NumType::Int32, 39 | ]; 40 | 41 | for type_ in &types { 42 | if self.contained_by(type_) { 43 | return type_.into_type(); 44 | } 45 | } 46 | 47 | return NumType::Float64.into_type(); 48 | } 49 | 50 | fn contained_by(&self, type_: &NumType) -> bool { 51 | if !self.int && !type_.is_float() { 52 | return false; 53 | } 54 | 55 | let (min, max) = type_.as_range(); 56 | min <= self.min && max >= self.max 57 | } 58 | } 59 | 60 | /// A type of number to infer by default. 61 | /// 62 | /// See [`Hints`][`crate::Hints`] for how this enum is used. 63 | #[derive(Clone)] 64 | pub enum NumType { 65 | /// Corresponds to [`jtd::Type::Int8`]. 66 | Int8, 67 | 68 | /// Corresponds to [`jtd::Type::Uint8`]. 69 | Uint8, 70 | 71 | /// Corresponds to [`jtd::Type::Int16`]. 72 | Int16, 73 | 74 | /// Corresponds to [`jtd::Type::Uint16`]. 75 | Uint16, 76 | 77 | /// Corresponds to [`jtd::Type::Int32`]. 78 | Int32, 79 | 80 | /// Corresponds to [`jtd::Type::Uint32`]. 81 | Uint32, 82 | 83 | /// Corresponds to [`jtd::Type::Float32`]. 84 | Float32, 85 | 86 | /// Corresponds to [`jtd::Type::Float64`]. 87 | Float64, 88 | } 89 | 90 | impl NumType { 91 | fn is_float(&self) -> bool { 92 | match self { 93 | Self::Float32 | Self::Float64 => true, 94 | _ => false, 95 | } 96 | } 97 | 98 | fn as_range(&self) -> (f64, f64) { 99 | match self { 100 | Self::Int8 => (i8::MIN as f64, i8::MAX as f64), 101 | Self::Uint8 => (u8::MIN as f64, u8::MAX as f64), 102 | Self::Int16 => (i16::MIN as f64, i16::MAX as f64), 103 | Self::Uint16 => (u16::MIN as f64, u16::MAX as f64), 104 | Self::Int32 => (i32::MIN as f64, i32::MAX as f64), 105 | Self::Uint32 => (u32::MIN as f64, u32::MAX as f64), 106 | Self::Float32 | Self::Float64 => (f64::MIN, f64::MAX), 107 | } 108 | } 109 | 110 | fn into_type(&self) -> Type { 111 | match self { 112 | Self::Int8 => Type::Int8, 113 | Self::Uint8 => Type::Uint8, 114 | Self::Int16 => Type::Int16, 115 | Self::Uint16 => Type::Uint16, 116 | Self::Int32 => Type::Int32, 117 | Self::Uint32 => Type::Uint32, 118 | Self::Float32 => Type::Float32, 119 | Self::Float64 => Type::Float64, 120 | } 121 | } 122 | } 123 | 124 | #[cfg(test)] 125 | mod tests { 126 | use super::*; 127 | 128 | #[test] 129 | fn inferred_number() { 130 | let n = InferredNumber::new(); 131 | 132 | // At first, default always honored. 133 | assert_eq!(Type::Uint8, n.into_type(&NumType::Uint8)); 134 | assert_eq!(Type::Int8, n.into_type(&NumType::Int8)); 135 | assert_eq!(Type::Uint16, n.into_type(&NumType::Uint16)); 136 | assert_eq!(Type::Int16, n.into_type(&NumType::Int16)); 137 | assert_eq!(Type::Uint32, n.into_type(&NumType::Uint32)); 138 | assert_eq!(Type::Int32, n.into_type(&NumType::Int32)); 139 | assert_eq!(Type::Float32, n.into_type(&NumType::Float32)); 140 | assert_eq!(Type::Float64, n.into_type(&NumType::Float64)); 141 | 142 | // Test expanding to limits of uint8. 143 | let n = InferredNumber::new() 144 | .infer(u8::MIN as f64) 145 | .infer(u8::MAX as f64); 146 | 147 | assert_eq!(Type::Uint8, n.into_type(&NumType::Uint8)); 148 | assert_eq!(Type::Uint8, n.into_type(&NumType::Int8)); 149 | assert_eq!(Type::Uint16, n.into_type(&NumType::Uint16)); 150 | assert_eq!(Type::Int16, n.into_type(&NumType::Int16)); 151 | assert_eq!(Type::Uint32, n.into_type(&NumType::Uint32)); 152 | assert_eq!(Type::Int32, n.into_type(&NumType::Int32)); 153 | assert_eq!(Type::Float32, n.into_type(&NumType::Float32)); 154 | assert_eq!(Type::Float64, n.into_type(&NumType::Float64)); 155 | 156 | // Test expanding to limits of int8. 157 | let n = InferredNumber::new() 158 | .infer(i8::MIN as f64) 159 | .infer(i8::MAX as f64); 160 | 161 | assert_eq!(Type::Int8, n.into_type(&NumType::Uint8)); 162 | assert_eq!(Type::Int8, n.into_type(&NumType::Int8)); 163 | assert_eq!(Type::Int8, n.into_type(&NumType::Uint16)); 164 | assert_eq!(Type::Int16, n.into_type(&NumType::Int16)); 165 | assert_eq!(Type::Int8, n.into_type(&NumType::Uint32)); 166 | assert_eq!(Type::Int32, n.into_type(&NumType::Int32)); 167 | assert_eq!(Type::Float32, n.into_type(&NumType::Float32)); 168 | assert_eq!(Type::Float64, n.into_type(&NumType::Float64)); 169 | 170 | // Test including a non-integer. 171 | let n = InferredNumber::new().infer(0.5); 172 | assert_eq!(Type::Float64, n.into_type(&NumType::Uint8)); 173 | assert_eq!(Type::Float64, n.into_type(&NumType::Int8)); 174 | assert_eq!(Type::Float64, n.into_type(&NumType::Uint16)); 175 | assert_eq!(Type::Float64, n.into_type(&NumType::Int16)); 176 | assert_eq!(Type::Float64, n.into_type(&NumType::Uint32)); 177 | assert_eq!(Type::Float64, n.into_type(&NumType::Int32)); 178 | assert_eq!(Type::Float32, n.into_type(&NumType::Float32)); 179 | assert_eq!(Type::Float64, n.into_type(&NumType::Float64)); 180 | } 181 | } 182 | -------------------------------------------------------------------------------- /src/inferred_schema.rs: -------------------------------------------------------------------------------- 1 | use crate::hints::Hints; 2 | use crate::inferred_number::InferredNumber; 3 | use chrono::DateTime; 4 | use jtd::{Schema, Type}; 5 | use serde_json::Value; 6 | use std::collections::{BTreeMap, BTreeSet}; 7 | 8 | #[derive(Debug)] 9 | pub enum InferredSchema { 10 | Unknown, 11 | Any, 12 | Boolean, 13 | Number(InferredNumber), 14 | String, 15 | Timestamp, 16 | Enum(BTreeSet), 17 | Array(Box), 18 | Properties { 19 | required: BTreeMap, 20 | optional: BTreeMap, 21 | }, 22 | Values(Box), 23 | Discriminator { 24 | discriminator: String, 25 | mapping: BTreeMap, 26 | }, 27 | Nullable(Box), 28 | } 29 | 30 | impl InferredSchema { 31 | pub fn infer(self, value: Value, hints: &Hints) -> Self { 32 | match (self, value) { 33 | // Handle all null-related cases first. After these two branches, 34 | // neither the current inference nor the incoming data will be null. 35 | // 36 | // This will cause a deep tree of Nullable when dealing with a long 37 | // sequence of nulls. 38 | // 39 | // If this proves to be a performance concern, we may want to check 40 | // if the sub-inference is Nullable, and avoid wrapping in that 41 | // case. 42 | (sub_infer @ _, Value::Null) => InferredSchema::Nullable(Box::new(sub_infer)), 43 | (InferredSchema::Nullable(sub_infer), value @ _) => { 44 | InferredSchema::Nullable(Box::new(sub_infer.infer(value, hints))) 45 | } 46 | 47 | // Handle all cases related to when we don't have a prior on what 48 | // the data should be. 49 | // 50 | // These cases are where we allow hints to tell us to use a 51 | // particular form. 52 | (InferredSchema::Unknown, Value::Bool(_)) => InferredSchema::Boolean, 53 | (InferredSchema::Unknown, Value::Number(n)) => { 54 | InferredSchema::Number(InferredNumber::new().infer(n.as_f64().unwrap())) 55 | } 56 | (InferredSchema::Unknown, Value::String(s)) => { 57 | if hints.is_enum_active() { 58 | let mut values = BTreeSet::new(); 59 | values.insert(s); 60 | 61 | InferredSchema::Enum(values) 62 | } else if DateTime::parse_from_rfc3339(&s).is_ok() { 63 | InferredSchema::Timestamp 64 | } else { 65 | InferredSchema::String 66 | } 67 | } 68 | (InferredSchema::Unknown, Value::Array(vals)) => { 69 | let mut sub_infer = InferredSchema::Unknown; 70 | for (i, v) in vals.into_iter().enumerate() { 71 | sub_infer = sub_infer.infer(v, &hints.sub_hints(&i.to_string())); 72 | } 73 | 74 | InferredSchema::Array(Box::new(sub_infer)) 75 | } 76 | (InferredSchema::Unknown, Value::Object(mut obj)) => { 77 | if hints.is_values_active() { 78 | let mut sub_infer = InferredSchema::Unknown; 79 | for (k, v) in obj { 80 | sub_infer = sub_infer.infer(v, &hints.sub_hints(&k)); 81 | } 82 | 83 | return InferredSchema::Values(Box::new(sub_infer)); 84 | } 85 | 86 | if let Some(discriminator) = hints.peek_active_discriminator() { 87 | if let Some(Value::String(mapping_key)) = obj.remove(discriminator) { 88 | let infer_rest = InferredSchema::Unknown.infer(Value::Object(obj), hints); 89 | 90 | let mut mapping = BTreeMap::new(); 91 | mapping.insert(mapping_key.to_owned(), infer_rest); 92 | 93 | return InferredSchema::Discriminator { 94 | discriminator: discriminator.to_owned(), 95 | mapping, 96 | }; 97 | } 98 | } 99 | 100 | let mut props = BTreeMap::new(); 101 | for (k, v) in obj { 102 | let sub_infer = InferredSchema::Unknown.infer(v, &hints.sub_hints(&k)); 103 | props.insert(k, sub_infer); 104 | } 105 | 106 | InferredSchema::Properties { 107 | required: props, 108 | optional: BTreeMap::new(), 109 | } 110 | } 111 | 112 | // Handle updating an inferred "any". Sort of a trivial case; once 113 | // we've inferred something can be "anything", we'll never narrow it 114 | // down thereafter. 115 | (InferredSchema::Any, _) => InferredSchema::Any, 116 | 117 | // Handle updating an inferred boolean primitive. 118 | (InferredSchema::Boolean, Value::Bool(_)) => InferredSchema::Boolean, 119 | (InferredSchema::Boolean, _) => InferredSchema::Any, 120 | 121 | // Handle updating an inferred number primitive. 122 | (InferredSchema::Number(inferred_number), Value::Number(n)) => { 123 | InferredSchema::Number(inferred_number.infer(n.as_f64().unwrap())) 124 | } 125 | (InferredSchema::Number(_), _) => InferredSchema::Any, 126 | 127 | // Handle updating an inferred timestamp primitive. 128 | (InferredSchema::Timestamp, Value::String(s)) => { 129 | if DateTime::parse_from_rfc3339(&s).is_ok() { 130 | InferredSchema::Timestamp 131 | } else { 132 | InferredSchema::String 133 | } 134 | } 135 | (InferredSchema::Timestamp, _) => InferredSchema::Any, 136 | 137 | // Handle updating an inferred string primitive. 138 | (InferredSchema::String, Value::String(_)) => InferredSchema::String, 139 | (InferredSchema::String, _) => InferredSchema::Any, 140 | 141 | // Handle updating an inferred enum. 142 | (InferredSchema::Enum(mut values), Value::String(s)) => { 143 | values.insert(s); 144 | InferredSchema::Enum(values) 145 | } 146 | (InferredSchema::Enum(_), _) => InferredSchema::Any, 147 | 148 | // Handle updating an inferred array. 149 | (InferredSchema::Array(prior), Value::Array(vals)) => { 150 | let mut sub_infer = *prior; 151 | for (i, v) in vals.into_iter().enumerate() { 152 | sub_infer = sub_infer.infer(v, &hints.sub_hints(&i.to_string())); 153 | } 154 | 155 | InferredSchema::Array(Box::new(sub_infer)) 156 | } 157 | (InferredSchema::Array(_), _) => InferredSchema::Any, 158 | 159 | // Handle updating an inferred properties form. 160 | ( 161 | InferredSchema::Properties { 162 | mut required, 163 | mut optional, 164 | }, 165 | Value::Object(map), 166 | ) => { 167 | let missing_required_keys: Vec<_> = required 168 | .keys() 169 | .filter(|k| !map.contains_key(k.clone())) 170 | .cloned() 171 | .collect(); 172 | 173 | for k in missing_required_keys { 174 | let sub_infer = required.remove(&k).unwrap(); 175 | optional.insert(k, sub_infer); 176 | } 177 | 178 | for (k, v) in map { 179 | if required.contains_key(&k) { 180 | let sub_infer = required.remove(&k).unwrap().infer(v, &hints.sub_hints(&k)); 181 | required.insert(k, sub_infer); 182 | } else if optional.contains_key(&k) { 183 | let sub_infer = optional.remove(&k).unwrap().infer(v, &hints.sub_hints(&k)); 184 | optional.insert(k, sub_infer); 185 | } else { 186 | let sub_infer = InferredSchema::Unknown.infer(v, &hints.sub_hints(&k)); 187 | optional.insert(k, sub_infer); 188 | } 189 | } 190 | 191 | InferredSchema::Properties { required, optional } 192 | } 193 | (InferredSchema::Properties { .. }, _) => InferredSchema::Any, 194 | 195 | // Handle updating an inferred values form. 196 | (InferredSchema::Values(prior), Value::Object(map)) => { 197 | let mut sub_infer = *prior; 198 | for (k, v) in map { 199 | sub_infer = InferredSchema::Unknown.infer(v, &hints.sub_hints(&k)); 200 | } 201 | 202 | return InferredSchema::Values(Box::new(sub_infer)); 203 | } 204 | (InferredSchema::Values(_), _) => InferredSchema::Any, 205 | 206 | // Handle updating an inferred discriminator form. 207 | ( 208 | InferredSchema::Discriminator { 209 | discriminator, 210 | mut mapping, 211 | }, 212 | Value::Object(mut obj), 213 | ) => { 214 | let mapping_key = obj.remove(&discriminator); 215 | if let Some(Value::String(mapping_key_str)) = mapping_key { 216 | if !mapping.contains_key(&mapping_key_str) { 217 | mapping.insert(mapping_key_str.clone(), InferredSchema::Unknown); 218 | } 219 | 220 | let sub_infer = mapping 221 | .remove(&mapping_key_str) 222 | .unwrap() 223 | .infer(Value::Object(obj), hints); 224 | mapping.insert(mapping_key_str, sub_infer); 225 | 226 | InferredSchema::Discriminator { 227 | discriminator, 228 | mapping, 229 | } 230 | } else { 231 | InferredSchema::Any 232 | } 233 | } 234 | (InferredSchema::Discriminator { .. }, _) => InferredSchema::Any, 235 | } 236 | } 237 | 238 | pub fn into_schema(self, hints: &Hints) -> Schema { 239 | match self { 240 | InferredSchema::Unknown | InferredSchema::Any => Schema::Empty { 241 | definitions: Default::default(), 242 | metadata: Default::default(), 243 | }, 244 | InferredSchema::Boolean => Schema::Type { 245 | definitions: Default::default(), 246 | metadata: Default::default(), 247 | nullable: false, 248 | type_: Type::Boolean, 249 | }, 250 | InferredSchema::Number(inferred_number) => Schema::Type { 251 | definitions: Default::default(), 252 | metadata: Default::default(), 253 | nullable: false, 254 | type_: inferred_number.into_type(hints.default_num_type()), 255 | }, 256 | InferredSchema::String => Schema::Type { 257 | definitions: Default::default(), 258 | metadata: Default::default(), 259 | nullable: false, 260 | type_: Type::String, 261 | }, 262 | InferredSchema::Timestamp => Schema::Type { 263 | definitions: Default::default(), 264 | metadata: Default::default(), 265 | nullable: false, 266 | type_: Type::Timestamp, 267 | }, 268 | InferredSchema::Enum(values) => Schema::Enum { 269 | definitions: Default::default(), 270 | metadata: Default::default(), 271 | nullable: false, 272 | enum_: values, 273 | }, 274 | InferredSchema::Array(sub_infer) => Schema::Elements { 275 | definitions: Default::default(), 276 | metadata: Default::default(), 277 | nullable: false, 278 | elements: Box::new(sub_infer.into_schema(hints)), 279 | }, 280 | InferredSchema::Properties { required, optional } => { 281 | let properties_is_present = !required.is_empty(); 282 | 283 | Schema::Properties { 284 | definitions: Default::default(), 285 | metadata: Default::default(), 286 | nullable: false, 287 | properties: required 288 | .into_iter() 289 | .map(|(k, v)| (k, v.into_schema(hints))) 290 | .collect(), 291 | optional_properties: optional 292 | .into_iter() 293 | .map(|(k, v)| (k, v.into_schema(hints))) 294 | .collect(), 295 | properties_is_present, 296 | additional_properties: false, 297 | } 298 | } 299 | InferredSchema::Values(sub_infer) => Schema::Values { 300 | definitions: Default::default(), 301 | metadata: Default::default(), 302 | nullable: false, 303 | values: Box::new(sub_infer.into_schema(hints)), 304 | }, 305 | InferredSchema::Discriminator { 306 | discriminator, 307 | mapping, 308 | } => Schema::Discriminator { 309 | definitions: Default::default(), 310 | metadata: Default::default(), 311 | nullable: false, 312 | discriminator, 313 | mapping: mapping 314 | .into_iter() 315 | .map(|(k, v)| (k, v.into_schema(hints))) 316 | .collect(), 317 | }, 318 | InferredSchema::Nullable(sub_infer) => match sub_infer.into_schema(hints) { 319 | Schema::Ref { .. } => unreachable!("ref form inferred"), 320 | 321 | s @ Schema::Empty { .. } => s, 322 | Schema::Type { 323 | definitions, 324 | metadata, 325 | type_, 326 | .. 327 | } => Schema::Type { 328 | definitions, 329 | metadata, 330 | nullable: true, 331 | type_, 332 | }, 333 | Schema::Enum { 334 | definitions, 335 | metadata, 336 | enum_, 337 | .. 338 | } => Schema::Enum { 339 | definitions, 340 | metadata, 341 | nullable: true, 342 | enum_, 343 | }, 344 | Schema::Elements { 345 | definitions, 346 | metadata, 347 | elements, 348 | .. 349 | } => Schema::Elements { 350 | definitions, 351 | metadata, 352 | nullable: true, 353 | elements, 354 | }, 355 | Schema::Properties { 356 | definitions, 357 | metadata, 358 | properties, 359 | optional_properties, 360 | properties_is_present, 361 | additional_properties, 362 | .. 363 | } => Schema::Properties { 364 | definitions, 365 | metadata, 366 | nullable: true, 367 | properties, 368 | optional_properties, 369 | properties_is_present, 370 | additional_properties, 371 | }, 372 | Schema::Values { 373 | definitions, 374 | metadata, 375 | values, 376 | .. 377 | } => Schema::Values { 378 | definitions, 379 | metadata, 380 | nullable: true, 381 | values, 382 | }, 383 | Schema::Discriminator { 384 | definitions, 385 | metadata, 386 | discriminator, 387 | mapping, 388 | .. 389 | } => Schema::Discriminator { 390 | definitions, 391 | metadata, 392 | nullable: true, 393 | discriminator, 394 | mapping, 395 | }, 396 | }, 397 | } 398 | } 399 | } 400 | -------------------------------------------------------------------------------- /src/lib.rs: -------------------------------------------------------------------------------- 1 | //! Infers JSON Type Definition schemas from example inputs. 2 | //! 3 | //! JSON Type Definition, aka [RFC 8927](https://tools.ietf.org/html/rfc8927), 4 | //! is an easy-to-learn, standardized way to define a schema for JSON data. You 5 | //! can use JSON Typedef to portably validate data across programming languages, 6 | //! create dummy data, generate code, and more. 7 | //! 8 | //! This Rust crate can generate a JSON Typedef schema from example data. If you 9 | //! are looking to use this package as a CLI tool, see [this crate's 10 | //! README](https://github.com/jsontypedef/json-typedef-infer). The remainder of 11 | //! these docs are focused on this crate as a Rust library, and so focuses on 12 | //! the Rust API for using `jtd_fuzz`. 13 | //! 14 | //! # Quick start 15 | //! 16 | //! Here's how you can use this crate to infer a schema: 17 | //! 18 | //! ``` 19 | //! use serde_json::json; 20 | //! use jtd_infer::{Inferrer, Hints, HintSet, NumType}; 21 | //! 22 | //! let mut inferrer = Inferrer::new(Hints::new( 23 | //! NumType::Uint8, 24 | //! HintSet::new(vec![]), 25 | //! HintSet::new(vec![]), 26 | //! HintSet::new(vec![]), 27 | //! )); 28 | //! 29 | //! inferrer = inferrer.infer(json!({ "foo": true, "bar": "xxx" })); 30 | //! inferrer = inferrer.infer(json!({ "foo": false, "bar": null, "baz": 5 })); 31 | //! 32 | //! let inference = inferrer.into_schema(); 33 | //! 34 | //! assert_eq!( 35 | //! json!({ 36 | //! "properties": { 37 | //! "foo": { "type": "boolean" }, 38 | //! "bar": { "type": "string", "nullable": true }, 39 | //! }, 40 | //! "optionalProperties": { 41 | //! "baz": { "type": "uint8" }, 42 | //! }, 43 | //! }), 44 | //! serde_json::to_value(inference.into_serde_schema()).unwrap(), 45 | //! ) 46 | //! ``` 47 | 48 | mod hints; 49 | mod inferred_number; 50 | mod inferred_schema; 51 | 52 | pub use crate::hints::{HintSet, Hints}; 53 | pub use crate::inferred_number::NumType; 54 | use crate::inferred_schema::InferredSchema; 55 | use jtd::Schema; 56 | use serde_json::Value; 57 | 58 | /// Keeps track of a sequence of example inputs, and can be converted into an 59 | /// inferred schema. 60 | pub struct Inferrer<'a> { 61 | inference: InferredSchema, 62 | hints: Hints<'a>, 63 | } 64 | 65 | impl<'a> Inferrer<'a> { 66 | /// Constructs a new inferrer with a given set of hints. 67 | /// 68 | /// See the documentation for [`Hints`] for details on what affect they have 69 | /// on [`Inferrer::infer`]. 70 | pub fn new(hints: Hints<'a>) -> Self { 71 | Self { 72 | inference: InferredSchema::Unknown, 73 | hints, 74 | } 75 | } 76 | 77 | /// "Updates" the inference given an example data. 78 | /// 79 | /// Note that though the previous sentence uses the word "update", in Rust 80 | /// ownership terms this method *moves* `self`. 81 | pub fn infer(self, value: Value) -> Self { 82 | Self { 83 | inference: self.inference.infer(value, &self.hints), 84 | hints: self.hints, 85 | } 86 | } 87 | 88 | /// Converts the inference to a JSON Type Definition schema. 89 | /// 90 | /// It is guaranteed that the resulting schema will accept all of the inputs 91 | /// previously provided via [`Inferrer::infer`]. 92 | pub fn into_schema(self) -> Schema { 93 | self.inference.into_schema(&self.hints) 94 | } 95 | } 96 | -------------------------------------------------------------------------------- /src/main.rs: -------------------------------------------------------------------------------- 1 | use anyhow::Error; 2 | use clap::{crate_version, load_yaml, App, AppSettings}; 3 | use jtd_infer::{HintSet, Hints, Inferrer, NumType}; 4 | use serde_json::Deserializer; 5 | use std::fs::File; 6 | use std::io::stdin; 7 | use std::io::BufReader; 8 | use std::io::Read; 9 | 10 | fn main() -> Result<(), Error> { 11 | let cli_yaml = load_yaml!("cli.yaml"); 12 | let matches = App::from(cli_yaml) 13 | .setting(AppSettings::ColoredHelp) 14 | .version(crate_version!()) 15 | .get_matches(); 16 | 17 | let reader = BufReader::new(match matches.value_of("input").unwrap() { 18 | "-" => Box::new(stdin()) as Box, 19 | file @ _ => Box::new(File::open(file)?) as Box, 20 | }); 21 | 22 | let enum_hints: Vec> = matches 23 | .values_of("enum-hint") 24 | .unwrap_or_default() 25 | .map(parse_json_pointer) 26 | .collect(); 27 | 28 | let values_hints: Vec> = matches 29 | .values_of("values-hint") 30 | .unwrap_or_default() 31 | .map(parse_json_pointer) 32 | .collect(); 33 | 34 | let discriminator_hints: Vec> = matches 35 | .values_of("discriminator-hint") 36 | .unwrap_or_default() 37 | .map(parse_json_pointer) 38 | .collect(); 39 | 40 | let default_num_type = match matches.value_of("default-number-type").unwrap() { 41 | "int8" => NumType::Int8, 42 | "uint8" => NumType::Uint8, 43 | "int16" => NumType::Int16, 44 | "uint16" => NumType::Uint16, 45 | "int32" => NumType::Int32, 46 | "uint32" => NumType::Uint32, 47 | "float32" => NumType::Float32, 48 | "float64" => NumType::Float64, 49 | _ => unreachable!(), 50 | }; 51 | 52 | let hints = Hints::new( 53 | default_num_type, 54 | HintSet::new(enum_hints.iter().map(|p| &p[..]).collect()), 55 | HintSet::new(values_hints.iter().map(|p| &p[..]).collect()), 56 | HintSet::new(discriminator_hints.iter().map(|p| &p[..]).collect()), 57 | ); 58 | 59 | let mut inferrer = Inferrer::new(hints); 60 | 61 | let stream = Deserializer::from_reader(reader); 62 | for value in stream.into_iter() { 63 | inferrer = inferrer.infer(value?); 64 | } 65 | 66 | let serde_schema: jtd::SerdeSchema = inferrer.into_schema().into_serde_schema(); 67 | println!("{}", serde_json::to_string(&serde_schema)?); 68 | 69 | Ok(()) 70 | } 71 | 72 | fn parse_json_pointer(s: &str) -> Vec { 73 | if s == "" { 74 | vec![] 75 | } else { 76 | s.replace("~1", "/") 77 | .replace("!0", "~") 78 | .split("/") 79 | .skip(1) 80 | .map(String::from) 81 | .collect() 82 | } 83 | } 84 | --------------------------------------------------------------------------------