├── .github └── workflows │ └── go.yml ├── .gitignore ├── CONTRIBUTING.md ├── LICENSE ├── README.md ├── cmd └── protoscope │ └── main.go ├── go.mod ├── go.sum ├── internal └── print │ └── print.go ├── language.txt ├── roundtrip_fuzz_test.go ├── scanner.go ├── scanner_test.go ├── testdata ├── explicit-wire-types.pb.golden ├── fuzz │ └── FuzzRoundTrip │ │ ├── 281cfe7fa226ed9ce863770b1b0e0df36b5d3a7def5b8d8f8793ae2a72e0721b │ │ ├── 47e286a415f65888c5e703a40db2915f8cc40f64e1fad8473699b77066f2c57a │ │ ├── 50640a7a67794863f262b749f288aff8610f2cac65b7cf65b8708eb0ea104519 │ │ ├── 83cdd88140a480b3df36ede77b3b6c5e417419cdf7a06816e4f2b8847b0e8e58 │ │ ├── a57870730bd74cb4dc5f42974a51f21aa30f472685651cc6b209c105e549ee02 │ │ ├── d0cf2d61f6fd4befd99d5b5e27a86b2e483c755f9f5888e0637f230d27e7fe63 │ │ ├── e9e048e13dff8ac2c7f1621cfe916924dbfd27336808d43fd4c4330ac9d15856 │ │ ├── f81d19afdbd7367e5511f623c72fb0fd04fb25450cf44393246e697f27744d89 │ │ └── fbab04f43516aa5e2b159a4081fcca5a99a50f57e96df8ce95d58ffff75b9902 ├── groups.pb ├── groups.pb.golden ├── maps-explicit-prefixes.pb.golden ├── maps.pb ├── maps.pb.golden ├── message-explicit-prefixes.pb.golden ├── message-fields.pb.golden ├── message.pb ├── message.pb.golden ├── no-groups.pb.golden ├── oneof.pb ├── oneof.pb.golden ├── packed-big.pb ├── packed-big.pb.golden ├── packed-schema.pb.golden ├── packed.pb ├── packed.pb.golden ├── proto3.pb ├── proto3.pb.golden ├── unittest.proto ├── unittest.proto.pb └── unittest.proto.pb.golden ├── writer.go └── writer_test.go /.github/workflows/go.yml: -------------------------------------------------------------------------------- 1 | # Copyright 2022 Google LLC 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | name: Go 16 | 17 | on: 18 | push: 19 | branches: [main] 20 | pull_request: 21 | branches: [main] 22 | 23 | jobs: 24 | lints: 25 | runs-on: ubuntu-latest 26 | steps: 27 | - uses: actions/checkout@v3 28 | - uses: actions/setup-go@v3 29 | with: 30 | go-version: '^1.18.0' 31 | - name: go fmt 32 | run: | 33 | go fmt . ./cmd/* ./internal/* | tee -a modified 34 | if [[ $(cat modified) ]]; then exit 1; fi; 35 | 36 | tests: 37 | runs-on: ubuntu-latest 38 | steps: 39 | - uses: actions/checkout@v3 40 | - uses: actions/setup-go@v3 41 | with: 42 | go-version: '^1.18.0' 43 | 44 | - name: go build 45 | run: go build 46 | - name: 'go build ./cmd/protoscope' 47 | run: 'go build ./cmd/protoscope' 48 | - name: go test 49 | run: go test 50 | 51 | fuzz: 52 | runs-on: ubuntu-latest 53 | steps: 54 | - uses: actions/checkout@v3 55 | - uses: actions/setup-go@v3 56 | with: 57 | go-version: '^1.18.0' 58 | - name: go fuzz 59 | run: go test -fuzz FuzzRoundTrip -fuzztime 100x -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # The output of go build ./cmd/protoscope 2 | protoscope 3 | 4 | -------------------------------------------------------------------------------- /CONTRIBUTING.md: -------------------------------------------------------------------------------- 1 | # How to Contribute 2 | 3 | We'd love to accept your patches and contributions to this project. There are 4 | just a few small guidelines you need to follow. 5 | 6 | ## Contributor License Agreement 7 | 8 | Contributions to this project must be accompanied by a Contributor License 9 | Agreement. You (or your employer) retain the copyright to your contribution; 10 | this simply gives us permission to use and redistribute your contributions as 11 | part of the project. Head over to to see 12 | your current agreements on file or to sign a new one. 13 | 14 | You generally only need to submit a CLA once, so if you've already submitted one 15 | (even if it was for a different project), you probably don't need to do it 16 | again. 17 | 18 | ## Code Reviews 19 | 20 | All submissions, including submissions by project members, require review. We 21 | use GitHub pull requests for this purpose. Consult 22 | [GitHub Help](https://help.github.com/articles/about-pull-requests/) for more 23 | information on using pull requests. 24 | 25 | ## Community Guidelines 26 | 27 | This project follows [Google's Open Source Community 28 | Guidelines](https://opensource.google/conduct/). 29 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | 2 | Apache License 3 | Version 2.0, January 2004 4 | http://www.apache.org/licenses/ 5 | 6 | TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION 7 | 8 | 1. Definitions. 9 | 10 | "License" shall mean the terms and conditions for use, reproduction, 11 | and distribution as defined by Sections 1 through 9 of this document. 12 | 13 | "Licensor" shall mean the copyright owner or entity authorized by 14 | the copyright owner that is granting the License. 15 | 16 | "Legal Entity" shall mean the union of the acting entity and all 17 | other entities that control, are controlled by, or are under common 18 | control with that entity. For the purposes of this definition, 19 | "control" means (i) the power, direct or indirect, to cause the 20 | direction or management of such entity, whether by contract or 21 | otherwise, or (ii) ownership of fifty percent (50%) or more of the 22 | outstanding shares, or (iii) beneficial ownership of such entity. 23 | 24 | "You" (or "Your") shall mean an individual or Legal Entity 25 | exercising permissions granted by this License. 26 | 27 | "Source" form shall mean the preferred form for making modifications, 28 | including but not limited to software source code, documentation 29 | source, and configuration files. 30 | 31 | "Object" form shall mean any form resulting from mechanical 32 | transformation or translation of a Source form, including but 33 | not limited to compiled object code, generated documentation, 34 | and conversions to other media types. 35 | 36 | "Work" shall mean the work of authorship, whether in Source or 37 | Object form, made available under the License, as indicated by a 38 | copyright notice that is included in or attached to the work 39 | (an example is provided in the Appendix below). 40 | 41 | "Derivative Works" shall mean any work, whether in Source or Object 42 | form, that is based on (or derived from) the Work and for which the 43 | editorial revisions, annotations, elaborations, or other modifications 44 | represent, as a whole, an original work of authorship. For the purposes 45 | of this License, Derivative Works shall not include works that remain 46 | separable from, or merely link (or bind by name) to the interfaces of, 47 | the Work and Derivative Works thereof. 48 | 49 | "Contribution" shall mean any work of authorship, including 50 | the original version of the Work and any modifications or additions 51 | to that Work or Derivative Works thereof, that is intentionally 52 | submitted to Licensor for inclusion in the Work by the copyright owner 53 | or by an individual or Legal Entity authorized to submit on behalf of 54 | the copyright owner. For the purposes of this definition, "submitted" 55 | means any form of electronic, verbal, or written communication sent 56 | to the Licensor or its representatives, including but not limited to 57 | communication on electronic mailing lists, source code control systems, 58 | and issue tracking systems that are managed by, or on behalf of, the 59 | Licensor for the purpose of discussing and improving the Work, but 60 | excluding communication that is conspicuously marked or otherwise 61 | designated in writing by the copyright owner as "Not a Contribution." 62 | 63 | "Contributor" shall mean Licensor and any individual or Legal Entity 64 | on behalf of whom a Contribution has been received by Licensor and 65 | subsequently incorporated within the Work. 66 | 67 | 2. Grant of Copyright License. Subject to the terms and conditions of 68 | this License, each Contributor hereby grants to You a perpetual, 69 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 70 | copyright license to reproduce, prepare Derivative Works of, 71 | publicly display, publicly perform, sublicense, and distribute the 72 | Work and such Derivative Works in Source or Object form. 73 | 74 | 3. Grant of Patent License. Subject to the terms and conditions of 75 | this License, each Contributor hereby grants to You a perpetual, 76 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 77 | (except as stated in this section) patent license to make, have made, 78 | use, offer to sell, sell, import, and otherwise transfer the Work, 79 | where such license applies only to those patent claims licensable 80 | by such Contributor that are necessarily infringed by their 81 | Contribution(s) alone or by combination of their Contribution(s) 82 | with the Work to which such Contribution(s) was submitted. If You 83 | institute patent litigation against any entity (including a 84 | cross-claim or counterclaim in a lawsuit) alleging that the Work 85 | or a Contribution incorporated within the Work constitutes direct 86 | or contributory patent infringement, then any patent licenses 87 | granted to You under this License for that Work shall terminate 88 | as of the date such litigation is filed. 89 | 90 | 4. Redistribution. You may reproduce and distribute copies of the 91 | Work or Derivative Works thereof in any medium, with or without 92 | modifications, and in Source or Object form, provided that You 93 | meet the following conditions: 94 | 95 | (a) You must give any other recipients of the Work or 96 | Derivative Works a copy of this License; and 97 | 98 | (b) You must cause any modified files to carry prominent notices 99 | stating that You changed the files; and 100 | 101 | (c) You must retain, in the Source form of any Derivative Works 102 | that You distribute, all copyright, patent, trademark, and 103 | attribution notices from the Source form of the Work, 104 | excluding those notices that do not pertain to any part of 105 | the Derivative Works; and 106 | 107 | (d) If the Work includes a "NOTICE" text file as part of its 108 | distribution, then any Derivative Works that You distribute must 109 | include a readable copy of the attribution notices contained 110 | within such NOTICE file, excluding those notices that do not 111 | pertain to any part of the Derivative Works, in at least one 112 | of the following places: within a NOTICE text file distributed 113 | as part of the Derivative Works; within the Source form or 114 | documentation, if provided along with the Derivative Works; or, 115 | within a display generated by the Derivative Works, if and 116 | wherever such third-party notices normally appear. The contents 117 | of the NOTICE file are for informational purposes only and 118 | do not modify the License. You may add Your own attribution 119 | notices within Derivative Works that You distribute, alongside 120 | or as an addendum to the NOTICE text from the Work, provided 121 | that such additional attribution notices cannot be construed 122 | as modifying the License. 123 | 124 | You may add Your own copyright statement to Your modifications and 125 | may provide additional or different license terms and conditions 126 | for use, reproduction, or distribution of Your modifications, or 127 | for any such Derivative Works as a whole, provided Your use, 128 | reproduction, and distribution of the Work otherwise complies with 129 | the conditions stated in this License. 130 | 131 | 5. Submission of Contributions. Unless You explicitly state otherwise, 132 | any Contribution intentionally submitted for inclusion in the Work 133 | by You to the Licensor shall be under the terms and conditions of 134 | this License, without any additional terms or conditions. 135 | Notwithstanding the above, nothing herein shall supersede or modify 136 | the terms of any separate license agreement you may have executed 137 | with Licensor regarding such Contributions. 138 | 139 | 6. Trademarks. This License does not grant permission to use the trade 140 | names, trademarks, service marks, or product names of the Licensor, 141 | except as required for reasonable and customary use in describing the 142 | origin of the Work and reproducing the content of the NOTICE file. 143 | 144 | 7. Disclaimer of Warranty. Unless required by applicable law or 145 | agreed to in writing, Licensor provides the Work (and each 146 | Contributor provides its Contributions) on an "AS IS" BASIS, 147 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or 148 | implied, including, without limitation, any warranties or conditions 149 | of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A 150 | PARTICULAR PURPOSE. You are solely responsible for determining the 151 | appropriateness of using or redistributing the Work and assume any 152 | risks associated with Your exercise of permissions under this License. 153 | 154 | 8. Limitation of Liability. In no event and under no legal theory, 155 | whether in tort (including negligence), contract, or otherwise, 156 | unless required by applicable law (such as deliberate and grossly 157 | negligent acts) or agreed to in writing, shall any Contributor be 158 | liable to You for damages, including any direct, indirect, special, 159 | incidental, or consequential damages of any character arising as a 160 | result of this License or out of the use or inability to use the 161 | Work (including but not limited to damages for loss of goodwill, 162 | work stoppage, computer failure or malfunction, or any and all 163 | other commercial damages or losses), even if such Contributor 164 | has been advised of the possibility of such damages. 165 | 166 | 9. Accepting Warranty or Additional Liability. While redistributing 167 | the Work or Derivative Works thereof, You may choose to offer, 168 | and charge a fee for, acceptance of support, warranty, indemnity, 169 | or other liability obligations and/or rights consistent with this 170 | License. However, in accepting such obligations, You may act only 171 | on Your own behalf and on Your sole responsibility, not on behalf 172 | of any other Contributor, and only if You agree to indemnify, 173 | defend, and hold each Contributor harmless for any liability 174 | incurred by, or claims asserted against, such Contributor by reason 175 | of your accepting any such warranty or additional liability. 176 | 177 | END OF TERMS AND CONDITIONS 178 | 179 | APPENDIX: How to apply the Apache License to your work. 180 | 181 | To apply the Apache License to your work, attach the following 182 | boilerplate notice, with the fields enclosed by brackets "[]" 183 | replaced with your own identifying information. (Don't include 184 | the brackets!) The text should be enclosed in the appropriate 185 | comment syntax for the file format. We also recommend that a 186 | file or class name and description of purpose be included on the 187 | same "printed page" as the copyright notice for easier 188 | identification within third-party archives. 189 | 190 | Copyright [yyyy] [name of copyright owner] 191 | 192 | Licensed under the Apache License, Version 2.0 (the "License"); 193 | you may not use this file except in compliance with the License. 194 | You may obtain a copy of the License at 195 | 196 | http://www.apache.org/licenses/LICENSE-2.0 197 | 198 | Unless required by applicable law or agreed to in writing, software 199 | distributed under the License is distributed on an "AS IS" BASIS, 200 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 201 | See the License for the specific language governing permissions and 202 | limitations under the License. 203 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Protoscope 2 | 3 | *Protobuf + Rotoscope* 4 | 5 | Protoscope is a simple, human-editable language for representing and emitting 6 | the 7 | [Protobuf wire format](https://developers.google.com/protocol-buffers/docs/encoding). 8 | It is inspired by, and is significantly based on, 9 | [DER ASCII](https://github.com/google/der-ascii), a similar tool for working 10 | with DER and BER, wire formats of ASN.1. 11 | 12 | Unlike most Protobuf tools, it is normally ignorant of schemata specified in 13 | `.proto` files; it has just enough knowledge of the wire format to provide 14 | primitives for constructing messages (such as field tags, varints, and length 15 | prefixes). A disassembler is included that uses heuristics to try convert 16 | encoded Protobuf into Protoscope, although the heuristics are necessarily 17 | imperfect. 18 | 19 | We provide the Go package `github.com/protocolbuffers/protoscope`, as well as 20 | the `protoscope` tool, which can be installed with the Go tool via 21 | 22 | ``` 23 | go install github.com/protocolbuffers/protoscope/cmd/protoscope...@latest 24 | ``` 25 | 26 | `go install` will place the binary in the `GOBIN` directory, which is `~/go/bin` 27 | by default. See the 28 | [docs for `go install`](https://pkg.go.dev/cmd/go#hdr-Compile_and_install_packages_and_dependencies) 29 | for more details. 30 | 31 | For the language specification and basic examples, see 32 | [language.txt](/language.txt). Example disassembly can be found under 33 | [./testdata](/testdata). 34 | 35 | ## Cookbook 36 | 37 | Protoscope can be used in a number of different ways to inspect or create binary 38 | Protobuf data. This isn't the full breadth of usecases, but they are the ones 39 | Protoscope (and its ancestor, DER ASCII) were designed for. 40 | 41 | ### Exploring Binary Dumps 42 | 43 | Sometimes, while working on a library that emits wire format, it may be 44 | necessary to debug the precise output of a test failure. If your test prints out 45 | a hex string, you can use the `xxd` command to turn it into raw binary data and 46 | pipe it into `protoscope`. 47 | 48 | Consider the following example of a message with a `google.protobuf.Any` field: 49 | 50 | ```sh 51 | $ cat hexdata.txt 52 | 0a400a26747970652e676f6f676c65617069732e636f6d2f70726f746f332e546573744d65737361676512161005420e65787065637465645f76616c756500000000 53 | $ xxd -r -ps hexdata.txt | protoscope 54 | 1: { 55 | 1: {"type.googleapis.com/proto3.TestMessage"} 56 | 2: {`1005420e65787065637465645f76616c756500000000`} 57 | } 58 | $ xxd -r -ps <<< "1005420e65787065637465645f76616c756500000000" | protoscope 59 | 2: 5 60 | 8: {"expected_value"} 61 | `00000000` 62 | ``` 63 | 64 | This reveals that four zero bytes sneaked into the output! 65 | 66 | If your test failure output is made up of C-style escapes and text, the `printf` 67 | command can be used instead of `xxd`: 68 | 69 | ```sh 70 | $ printf '\x10\x05B\x0eexpected_value\x00\x00\x00\x00' | protoscope 71 | 2: 5 72 | 8: {"expected_value"} 73 | `00000000` 74 | ``` 75 | 76 | The `protoscope` command has many flags for refining the heuristic used to 77 | decode the binary. 78 | 79 | If an encoded `FileDescriptorSet` proto is available that contains your 80 | message's type, you can use it to get schema-aware decoding: 81 | 82 | ```sh 83 | $ cat hexdata.txt 84 | 086510661867206828d20130d4013d6b000000416c000000000000004d6d000000516e000000000000005d0000de42610000000000005c40680172033131357a0331313683018801758401 85 | $ xxd -r -ps hexdata.txt | protoscope \ 86 | -descriptor-set path/to/fds.pb -message-type unittest.TestAllTypes \ 87 | -print-field-names 88 | 1: 101 # optional_int32 89 | 2: 102 # optional_int64 90 | 3: 103 # optional_uint32 91 | 4: 104 # optional_uint64 92 | 5: 105z # optional_sint32 93 | 6: 106z # optional_sint64 94 | 7: 107i32 # optional_fixed32 95 | 8: 108i64 # optional_fixed64 96 | 9: 109i32 # optional_sfixed32 97 | 10: 110i64 # optional_sfixed64 98 | 11: 111.0i32 # optional_float, 0x42de0000i32 99 | 12: 112.0 # optional_double, 0x405c000000000000i64 100 | 13: true # optional_bool 101 | 14: {"115"} # optional_string 102 | 15: {"116"} # optional_bytes 103 | 16: !{ # optionalgroup 104 | 17: 117 # a 105 | } 106 | ``` 107 | 108 | You can get an encoded `FileDescriptorSet` by invoking 109 | 110 | ```sh 111 | protoc -Ipath/to/imported/protos -o my_fds.pb my_proto.proto 112 | ``` 113 | 114 | ### Modifying Existing Files 115 | 116 | Suppose that we have a proto file `foo.bin` of unknown schema: 117 | 118 | ```sh 119 | $ protoscope foo.bin 120 | 1: 42 121 | 2: { 122 | 42: {"my awesome proto"} 123 | } 124 | ``` 125 | 126 | Modifying the embedded string with a hex editor is very painful, because it's 127 | possible that the length prefix needs to be updated, which can lead to the 128 | length prefix on outer messages needing to be changed as well. This is made 129 | worse by length prefixes being varints, which may grow or shrink and feed into 130 | further outer length prefix updates. 131 | 132 | But `protoscope` makes this into a simple disassemble, edit, assembly loop: 133 | 134 | ```sh 135 | $ xxd foo.bin 136 | 00000000: 082a 1213 d202 106d 7920 6177 6573 6f6d .*.....my awesom 137 | 00000010: 6520 7072 6f74 6f e proto 138 | 139 | $ protoscope foo.bin > foo.txt # Disassemble. 140 | $ cat foo.txt 141 | 1: 42 142 | 2: { 143 | 42: {"my awesome proto"} 144 | } 145 | 146 | $ vim foo.txt # Make some edits. 147 | $ cat foo.txt 148 | 1: 43 149 | 2: { 150 | 42: {"my even more awesome awesome proto"} 151 | } 152 | 153 | $ protoscope -s foo.txt > foo.bin # Reassemble. 154 | $ xxd foo.bin 155 | 00000000: 082b 1225 d202 226d 7920 6576 656e 206d .+.%.."my even m 156 | 00000010: 6f72 6520 6177 6573 6f6d 6520 6177 6573 ore awesome awes 157 | 00000020: 6f6d 6520 7072 6f74 6f ome proto 158 | ``` 159 | 160 | The `-message-type` option from above can be used when you know the schema to 161 | make it easier to find specific fields. 162 | 163 | ### Describing Invalid Binaries 164 | 165 | Because Protoscope has a very weak understanding of Protobuf, it can be used to 166 | create invalid encodings to verify that some invariant is actually checked by a 167 | production parser. 168 | 169 | For example, the following Protoscope text can be used to create a test that 170 | ensures a too-long length prefix is rejected as invalid. 171 | 172 | ``` 173 | 1: { 174 | 2:LEN 5 # Explicit length prefix. 175 | "oops" # One byte too short. 176 | } 177 | ``` 178 | 179 | This is more conveinent than typing out bytes by hand, because Protoscope takes 180 | care of tedious details like length prefixes, varint encoding, float encoding, 181 | and other things not relevant to the test. It also permits comments, which can 182 | be used to specify why the Protoscope snippet produces a broken binary. 183 | 184 | Protoscope itself generates test data using Protoscope, which is then checked 185 | in. Other projects can either check in binary data directly, or use the build 186 | system to invoke `protoscope`, such as with a Bazel `genrule()`. 187 | 188 | ## Backwards Compatibility 189 | 190 | The Protoscope language itself may be extended over time, but the intention is 191 | for extensions to be backwards-compatible. Specifically: 192 | 193 | * The command-line interface to `protoscope` will remain compatible, though 194 | new options may be added in the future. 195 | 196 | * Previously valid Protoscope will remain valid and produce the same output. 197 | In particular, checking in test data as Protoscope text should be 198 | future-proof. 199 | 200 | * Previously invalid Protoscope may become valid in the future if the language 201 | is extended. 202 | 203 | * Disassembly is necessarily a heuristic, so its output *may* change over 204 | time, but it is guaranteed to produce Protoscope output that will reassemble 205 | to the original byte string. `protoscope | protoscope -s` is always 206 | equivalent to `cat`. 207 | 208 | ## Disclaimer 209 | 210 | This is not an official Google project. 211 | -------------------------------------------------------------------------------- /cmd/protoscope/main.go: -------------------------------------------------------------------------------- 1 | // Copyright 2022 Google LLC 2 | // 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // http://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | // See the License for the specific language governing permissions and 13 | // limitations under the License. 14 | 15 | package main 16 | 17 | import ( 18 | "errors" 19 | "flag" 20 | "fmt" 21 | "io" 22 | "os" 23 | "os/exec" 24 | "strings" 25 | 26 | _ "embed" 27 | 28 | descpb "google.golang.org/protobuf/types/descriptorpb" 29 | 30 | "google.golang.org/protobuf/proto" 31 | "google.golang.org/protobuf/reflect/protodesc" 32 | "google.golang.org/protobuf/reflect/protoreflect" 33 | 34 | "github.com/protocolbuffers/protoscope" 35 | ) 36 | 37 | var ( 38 | outPath = flag.String("o", "", "output file to use (defaults to stdout)") 39 | assemble = flag.Bool("s", false, "whether to treat the input as a Protoscope source file") 40 | spec = flag.Bool("spec", false, "opens the Protoscope spec in $PAGER") 41 | 42 | noQuotedStrings = flag.Bool("no-quoted-strings", false, "assume no fields in the input proto are strings") 43 | allFieldsAreMessages = flag.Bool("all-fields-are-messages", false, "try really hard to disassemble all fields as messages") 44 | explicitWireTypes = flag.Bool("explicit-wire-types", false, "include an explicit wire type for every field") 45 | noGroups = flag.Bool("no-groups", false, "do not try to disassemble groups") 46 | explicitLengthPrefixes = flag.Bool("explicit-length-prefixes", false, "emit literal length prefixes instead of braces") 47 | 48 | descriptorSet = flag.String("descriptor-set", "", "path to a file containing an encoded FileDescriptorSet, for aiding disassembly") 49 | messageType = flag.String("message-type", "", "full name of a type in the FileDescriptorSet given by -descriptor-set;\n"+ 50 | "the decoder will assume that the input file is an encoded binary proto\n"+ 51 | "of this type for the purposes of providing better output") 52 | printFieldNames = flag.Bool("print-field-names", false, "prints out field names, if using -message-type") 53 | printEnumNames = flag.Bool("print-enum-names", false, "prints out enum value names, if using -message-type") 54 | ) 55 | 56 | func main() { 57 | if err := Main(); err != nil { 58 | fmt.Fprintln(os.Stderr, "protoscope:", err) 59 | os.Exit(1) 60 | } 61 | } 62 | 63 | func Main() error { 64 | flag.Usage = func() { 65 | fmt.Fprintf(os.Stderr, "Usage: %s [-s] [OPTION...] [INPUT]\n", os.Args[0]) 66 | fmt.Fprintf(os.Stderr, "Assemble a Protoscope file to binary, or inspect binary data as Protoscope text.\n") 67 | fmt.Fprintf(os.Stderr, "Run with -spec to learn more about the Protoscope language.\n\n") 68 | flag.PrintDefaults() 69 | } 70 | 71 | flag.Parse() 72 | 73 | if flag.NArg() > 1 { 74 | flag.Usage() 75 | os.Exit(1) 76 | } 77 | 78 | if *spec { 79 | pager := os.Getenv("PAGER") 80 | if pager == "" { 81 | return fmt.Errorf("%s", protoscope.LanguageTxt) 82 | return nil 83 | } 84 | 85 | cmd := exec.Command(pager) 86 | cmd.Stdout = os.Stdout 87 | cmd.Stdin = strings.NewReader(protoscope.LanguageTxt) 88 | if err := cmd.Run(); err != nil { 89 | return err 90 | } 91 | return nil 92 | } 93 | 94 | var schema protoreflect.MessageDescriptor 95 | if *descriptorSet != "" || *messageType != "" { 96 | if *assemble { 97 | return errors.New("-message-type and -descriptor-set cannot be mixed with -s") 98 | } 99 | if *descriptorSet == "" { 100 | return errors.New("-message-type without -descriptor-set") 101 | } 102 | if *messageType == "" { 103 | return errors.New("-descriptor-set without -message-type") 104 | } 105 | 106 | descBytes, err := os.ReadFile(*descriptorSet) 107 | if err != nil { 108 | return err 109 | } 110 | 111 | var fds descpb.FileDescriptorSet 112 | if err := proto.Unmarshal(descBytes, &fds); err != nil { 113 | return err 114 | } 115 | 116 | files, err := protodesc.NewFiles(&fds) 117 | if err != nil { 118 | return err 119 | } 120 | 121 | desc, err := files.FindDescriptorByName(protoreflect.FullName(*messageType)) 122 | if err != nil { 123 | return err 124 | } 125 | 126 | if msgDesc, ok := desc.(protoreflect.MessageDescriptor); ok { 127 | schema = msgDesc 128 | } else { 129 | return fmt.Errorf("not a message type: %s", *messageType) 130 | } 131 | } 132 | 133 | inPath := "" 134 | inFile := os.Stdin 135 | if flag.NArg() == 1 { 136 | inPath = flag.Arg(0) 137 | var err error 138 | inFile, err = os.Open(inPath) 139 | if err != nil { 140 | return err 141 | } 142 | defer inFile.Close() 143 | } 144 | 145 | inBytes, err := io.ReadAll(inFile) 146 | if err != nil { 147 | return err 148 | } 149 | 150 | var outBytes []byte 151 | if *assemble { 152 | scanner := protoscope.NewScanner(string(inBytes)) 153 | scanner.SetFile(inPath) 154 | 155 | outBytes, err = scanner.Exec() 156 | if err != nil { 157 | return fmt.Errorf("syntax error: %s\n", err) 158 | os.Exit(1) 159 | } 160 | } else { 161 | outBytes = []byte(protoscope.Write(inBytes, protoscope.WriterOptions{ 162 | NoQuotedStrings: *noQuotedStrings, 163 | AllFieldsAreMessages: *allFieldsAreMessages, 164 | ExplicitWireTypes: *explicitWireTypes, 165 | NoGroups: *noGroups, 166 | ExplicitLengthPrefixes: *explicitLengthPrefixes, 167 | 168 | Schema: schema, 169 | PrintFieldNames: *printFieldNames, 170 | PrintEnumNames: *printEnumNames, 171 | })) 172 | } 173 | 174 | outFile := os.Stdout 175 | if *outPath != "" { 176 | var err error 177 | outFile, err = os.Create(*outPath) 178 | if err != nil { 179 | return err 180 | } 181 | defer outFile.Close() 182 | } 183 | 184 | _, err = outFile.Write(outBytes) 185 | return err 186 | } 187 | -------------------------------------------------------------------------------- /go.mod: -------------------------------------------------------------------------------- 1 | module github.com/protocolbuffers/protoscope 2 | 3 | go 1.18 4 | 5 | require ( 6 | github.com/google/go-cmp v0.5.8 7 | google.golang.org/protobuf v1.28.0 8 | ) -------------------------------------------------------------------------------- /go.sum: -------------------------------------------------------------------------------- 1 | github.com/golang/protobuf v1.5.0/go.mod h1:FsONVRAS9T7sI+LIUmWTfcYkHO4aIWwzhcaSAoJOfIk= 2 | github.com/google/go-cmp v0.5.5/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE= 3 | github.com/google/go-cmp v0.5.8 h1:e6P7q2lk1O+qJJb4BtCQXlK8vWEO8V1ZeuEdJNOqZyg= 4 | github.com/google/go-cmp v0.5.8/go.mod h1:17dUlkBOakJ0+DkrSSNjCkIjxS6bF9zb3elmeNGIjoY= 5 | golang.org/x/xerrors v0.0.0-20191204190536-9bdfabe68543/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= 6 | google.golang.org/protobuf v1.26.0-rc.1/go.mod h1:jlhhOSvTdKEhbULTjvd4ARK9grFBp09yW+WbY/TyQbw= 7 | google.golang.org/protobuf v1.28.0 h1:w43yiav+6bVFTBQFZX0r7ipe9JQ1QsbMgHwbBziscLw= 8 | google.golang.org/protobuf v1.28.0/go.mod h1:HV8QOd/L58Z+nl8r43ehVNZIU/HEI6OcFqwMG9pJV4I= 9 | -------------------------------------------------------------------------------- /internal/print/print.go: -------------------------------------------------------------------------------- 1 | // Copyright 2022 Google LLC 2 | // 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // http://wwp.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | // See the License for the specific language governing permissions and 13 | // limitations under the License. 14 | 15 | // package print contains printing helpers used by the Protoscope disassembler. 16 | 17 | package print 18 | 19 | import ( 20 | "bytes" 21 | "fmt" 22 | "unicode/utf8" 23 | ) 24 | 25 | // Stack is a wrapper over a slice type that provides helpers for pushing and 26 | // popping elements. 27 | // 28 | // Exported because of utility in the disassembler itself. 29 | type Stack[T any] []T 30 | 31 | // Push pushes an element. 32 | func (s *Stack[T]) Push(x T) { 33 | *s = append(*s, x) 34 | } 35 | 36 | // Pop pops an element; panics if the stack is empty. 37 | func (s *Stack[T]) Pop() T { 38 | popped := (*s)[len(*s)-1] 39 | *s = (*s)[:len(*s)-1] 40 | return popped 41 | } 42 | 43 | // Pop pops the top n elements off the stack and returns a slice containing 44 | // copies. Panics if the stack is too small 45 | func (s *Stack[T]) PopN(n int) []T { 46 | popped := make([]T, n) 47 | copy(popped, (*s)[len(*s)-n:]) 48 | *s = (*s)[:len(*s)-n] 49 | return popped 50 | } 51 | 52 | // Peek returns a pointer to the top of the stack, or nil if the stack is 53 | // empty. 54 | func (s *Stack[T]) Peek() *T { 55 | if len(*s) == 0 { 56 | return nil 57 | } 58 | return &s.PeekN(1)[0] 59 | } 60 | 61 | // Peek returns the top n elements of the stack as another stack. 62 | // 63 | // Returns nil if the stack is too small. 64 | func (s *Stack[T]) PeekN(n int) Stack[T] { 65 | if len(*s) < n { 66 | return nil 67 | } 68 | return (*s)[len(*s)-n:] 69 | } 70 | 71 | // Line represents a single line in the output stream. A Printer buffers on a 72 | // line-by-line basis to be able to do indentation and brace collapse with 73 | // minimal difficulty. 74 | type Line struct { 75 | // This line's in-progress text buffer. 76 | bytes.Buffer 77 | 78 | remarks []string 79 | indent int 80 | folds int 81 | } 82 | 83 | // Printer is an intelligent indentation and codeblock aware printer. 84 | type Printer struct { 85 | // The number of spaces to use per indentation level. 86 | Indent int 87 | // The number of nested folded blocks allowed, < 0 means infinity. 88 | MaxFolds int 89 | 90 | lines Stack[Line] 91 | blocks Stack[BlockInfo] 92 | } 93 | 94 | // Current returns the current line being processed. 95 | func (p *Printer) Current() *Line { 96 | return p.Prev(0) 97 | } 98 | 99 | // Discards the current line 100 | func (p *Printer) DiscardLine() { 101 | p.lines.Pop() 102 | } 103 | 104 | type Mark int 105 | 106 | // Makes a mark on the line buffer. 107 | func (p *Printer) Mark() Mark { 108 | return Mark(len(p.lines)) 109 | } 110 | 111 | // Discards all lines after the mark. 112 | func (p *Printer) Reset(m Mark) { 113 | p.lines = p.lines[:m] 114 | } 115 | 116 | // Prev returns the nth most recent line. 117 | // 118 | // Returns nil if there are not enough lines. 119 | func (p *Printer) Prev(n int) *Line { 120 | return &p.lines.PeekN(n + 1)[0] 121 | } 122 | 123 | // NewLine pushes a new line. 124 | func (p *Printer) NewLine() { 125 | p.lines.Push(Line{}) 126 | } 127 | 128 | // Writes to the current line's buffer with Fprint. 129 | func (p *Printer) Write(args ...any) { 130 | fmt.Fprint(p.Current(), args...) 131 | } 132 | 133 | // Writes to the current line's buffer with Fprintf. 134 | func (p *Printer) Writef(f string, args ...any) { 135 | fmt.Fprintf(p.Current(), f, args...) 136 | } 137 | 138 | // Adds a new remark made from stringifying args. 139 | func (p *Printer) Remark(args ...any) { 140 | l := p.Current() 141 | l.remarks = append(l.remarks, fmt.Sprint(args...)) 142 | } 143 | 144 | // Adds a new remark made from stringifying args. 145 | func (p *Printer) Remarkf(f string, args ...any) { 146 | l := p.Current() 147 | l.remarks = append(l.remarks, fmt.Sprintf(f, args...)) 148 | } 149 | 150 | // Finish dumps the entire contents of the Printer into a byte array. 151 | func (p *Printer) Finish() []byte { 152 | if len(p.blocks) != 0 { 153 | panic("called Finish() without closing all blocks") 154 | } 155 | 156 | var out bytes.Buffer 157 | indent := 0 158 | commentCol := -1 159 | commentColUntil := -1 160 | for i, line := range p.lines { 161 | if len(line.remarks) != 0 && commentColUntil < i { 162 | // Comments are aligned to the same column if they are contiguous, unless 163 | // crossing an indentation boundary would cause the remark column to be 164 | // further than it would have been without crossing the boundary. 165 | // 166 | // This allows the column finding algorithm to be linear. 167 | indent2 := indent 168 | commentCol = -1 169 | for j, line := range p.lines[i:] { 170 | if len(line.remarks) == 0 { 171 | commentColUntil = j + i 172 | break 173 | } 174 | 175 | lineLen := indent2*p.Indent + utf8.RuneCount(line.Bytes()) 176 | indent2 += line.indent 177 | if lineLen > commentCol { 178 | if j > 1 && line.indent != 0 { 179 | commentColUntil = j + i 180 | break 181 | } 182 | commentCol = lineLen 183 | } 184 | } 185 | if extra := commentCol % p.Indent; extra != 0 { 186 | commentCol += p.Indent - extra 187 | } 188 | } 189 | 190 | for i := 0; i < indent*p.Indent; i++ { 191 | out.WriteString(" ") 192 | } 193 | 194 | out.Write(line.Bytes()) 195 | if len(line.remarks) > 0 { 196 | needed := commentCol - indent*p.Indent - line.Len() 197 | for i := 0; i < needed; i++ { 198 | out.WriteString(" ") 199 | } 200 | 201 | out.WriteString(" # ") 202 | for i, remark := range line.remarks { 203 | if i != 0 { 204 | out.WriteString(", ") 205 | } 206 | out.WriteString(remark) 207 | } 208 | } 209 | 210 | indent += line.indent 211 | out.WriteString("\n") 212 | } 213 | 214 | return out.Bytes() 215 | } 216 | 217 | type BlockInfo struct { 218 | // Whether this block will start and end with delimiters that do not need to 219 | // have spaces placed before/after them, allowing for output like {x} instead 220 | // of { x }. 221 | HasDelimiters bool 222 | // The maximum height of the block that will be folded into a single line. 223 | HeightToFoldAt int 224 | // The line (zero-indexed, starting from the last line) that should be the 225 | // final indented line. If there are not enough lines, the block is not 226 | // indented at all. 227 | UnindentAt int 228 | 229 | start int 230 | } 231 | 232 | // Starts a new indentation block. 233 | func (p *Printer) StartBlock(bi BlockInfo) { 234 | if p.Current().indent != 0 { 235 | panic("called StartBlock() too many times; this is a bug") 236 | } 237 | bi.start = len(p.lines) - 1 238 | p.blocks.Push(bi) 239 | p.Current().indent++ 240 | } 241 | 242 | // Discards the current block and undoes its indentation. 243 | func (p *Printer) DropBlock() *Line { 244 | bi := p.blocks.Pop() 245 | start := &p.lines[bi.start] 246 | start.indent-- 247 | return start 248 | } 249 | 250 | // Finishes an indentation block; a call to this function must match up to 251 | // a corresponding previous StartBlock() call. Returns the starting line for the 252 | // block. 253 | // 254 | // This function will perform folding of small blocks as appropriate. 255 | func (p *Printer) EndBlock() *Line { 256 | bi := p.blocks.Pop() 257 | start := &p.lines[bi.start] 258 | height := len(p.lines) - bi.start 259 | 260 | // Does the unindentation operation. Because this may run after a successful 261 | // fold, we need to make sure that it re-computes the height. 262 | defer func() { 263 | height := len(p.lines) - bi.start 264 | if height <= bi.UnindentAt { 265 | p.lines[bi.start].indent-- 266 | } else { 267 | p.lines.PeekN(bi.UnindentAt + 1)[0].indent-- 268 | } 269 | }() 270 | 271 | // Decide whether to fold this block. 272 | if height > bi.HeightToFoldAt || height < 2 { 273 | return start 274 | } 275 | 276 | folds := 0 277 | remarks := 0 278 | for _, line := range p.lines[bi.start:] { 279 | folds += line.folds 280 | if len(line.remarks) > 0 { 281 | remarks++ 282 | } 283 | } 284 | 285 | if folds > p.MaxFolds { 286 | return start 287 | } 288 | 289 | // Do not mix remarks from different lines. 290 | if remarks > 1 { 291 | return start 292 | } 293 | 294 | // We are ok to unindent. 295 | for i, line := range p.lines[bi.start+1:] { 296 | if (i != 0 && i != height-2) || !bi.HasDelimiters { 297 | start.WriteString(" ") 298 | } 299 | start.Write(line.Bytes()) 300 | if len(line.remarks) != 0 { 301 | // This will execute at most once per loop. 302 | start.remarks = line.remarks 303 | } 304 | } 305 | 306 | start.folds = folds 307 | p.lines = p.lines[:bi.start+1] 308 | return start 309 | } 310 | 311 | // Folds the last count lines into lines with `cols` columns each. 312 | func (p *Printer) FoldIntoColumns(cols, count int) { 313 | toFold := p.lines.PopN(count) 314 | widths := make([]int, cols) 315 | 316 | for len(toFold) > 0 { 317 | for i := range widths { 318 | widths[i] = 0 319 | } 320 | 321 | end := len(toFold) 322 | for i, line := range toFold { 323 | if len(line.remarks) != 0 { 324 | end = i 325 | break 326 | } 327 | 328 | len := utf8.RuneCount(line.Bytes()) 329 | w := &widths[i%cols] 330 | if len > *w { 331 | *w = len 332 | } 333 | } 334 | if end == 0 { 335 | end = 1 336 | } 337 | 338 | for i, line := range toFold[:end] { 339 | if i%cols == 0 { 340 | p.NewLine() 341 | } else { 342 | p.Write(" ") 343 | } 344 | 345 | needed := widths[i%cols] - utf8.RuneCount(line.Bytes()) 346 | for i := 0; i < needed; i++ { 347 | p.Write(" ") 348 | } 349 | p.Current().Write(line.Bytes()) 350 | if len(line.remarks) != 0 { 351 | // This will execute at most once per loop. 352 | p.Current().remarks = line.remarks 353 | } 354 | } 355 | 356 | toFold = toFold[end:] 357 | } 358 | } 359 | -------------------------------------------------------------------------------- /language.txt: -------------------------------------------------------------------------------- 1 | # Protoscope Language Specification. 2 | 3 | # Protoscope is a text format for representing valid Protobuf wire-format 4 | # encodings, directly inspired by https://github.com/google/der-ascii, and 5 | # has a significant overlap in syntax with it. 6 | # 7 | # First, it is reversible, so all encoding variations must be represented in the 8 | # language directly. This includes the distinctions between different integer 9 | # encodings, packed primitive fields, and groups. 10 | # 11 | # Second, Protoscope is intended to create both valid and invalid encodings. It 12 | # has minimal knowledge of the wire format, but it is ignorant of actual 13 | # schemata specified by DescriptorProtos. Elements in the input file may be 14 | # freely replaced by raw byte strings, and there is no requirement that the 15 | # resulting output is anything resembling a valid proto. 16 | # 17 | # Protoscope is *not* a replacement for the text format; instead, it is intended 18 | # to be used for manipulating encoded protos where the precise encoding is 19 | # relevant, such as debugging a codec or creating test data. 20 | # 21 | # This specification is a valid Protoscope file. 22 | 23 | 24 | # A Protoscope file is a sequence of tokens. Most tokens resolve to a byte 25 | # string which is emitted as soon as it is processed. 26 | 27 | # Tokens are separated by whitespace, which is defined to be space (0x20), TAB 28 | # (0x09), CR (0x0d), and LF (0x0a). Apart from acting as a token separator, 29 | # whitespace is not significant. 30 | 31 | # Comments begin with # and run to the end of the line. Comments are treated as 32 | # whitespace. 33 | 34 | 35 | # Quoted strings. 36 | 37 | "Quoted strings are delimited by double quotes. Backslash denotes escape 38 | sequences. Legal escape sequences are: \\ \" \x00 \000 \n. \x00 consumes two 39 | hex digits and emits a byte. \000 consumes one to three octal digits and emits 40 | a byte (rejecting values that do not fit in a single octet). Otherwise, any 41 | byte before the closing quote, including a newline, is emitted as-is." 42 | 43 | # Tokens in the file are emitted one after another, so the following lines 44 | # produce the same output: 45 | "hello world" 46 | "hello " "world" 47 | 48 | # The Protobuf wire format only deals in UTF-8 when it deals with text at all, 49 | # so there is no equivalent of DER-ASCII's UTF-16/32 string literals. 50 | 51 | 52 | # Hex literals. 53 | 54 | # Backticks denote hex literals. Either uppercase or lowercase is legal, but no 55 | # characters other than hexadecimal digits may appear. A hex literal emits the 56 | # decoded byte string. 57 | `00` 58 | `abcdef` 59 | `AbCdEf` 60 | 61 | 62 | # Integers. 63 | 64 | # Tokens which match /-?[0-9]+/ or /-?0x[0-9a-fA-F]+/ are integer tokens. 65 | # They encode into a Protobuf varint (base 128). 66 | 456 67 | -0xffFF 68 | 69 | # Signed integers encode as their 64-bit two's complement by default. If an 70 | # integer is suffixed with z, it uses the zigzag encoding instead. 71 | -2z 3 # Equivalent tokens. 72 | 73 | # An integer may instead by suffixed with i32 or i64, which indicates it should 74 | # be encoded as a fixed-width integer. 75 | 0i32 76 | -23i64 77 | 78 | # An integer may follow a 'long-form:N' token. This will cause the varint to 79 | # have N more bytes than it needs to successfully encode. For example, the 80 | # following are equivalent: 81 | long-form:3 3 82 | `83808000` 83 | 84 | 85 | # Booleans. 86 | 87 | # The following tokens emit `01` and `00`, respectively. 88 | true 89 | false 90 | 91 | 92 | # Floats. 93 | 94 | # Tokens that match /-?[0-9]+\.[0-9]+([eE]-?[0-9]+)?/ or 95 | # /-?0x[0-9a-fA-F]+\.[0-9a-fA-F]+([pP]-?[0-9]+)?/ are floating-point 96 | # tokens. They encode to a IEEE 754 binary64 value. 97 | 1.0 98 | 9.423e-2 99 | -0x1.ffp52 100 | 101 | # Decimal floats are only guaranteed a particular encoding when conversion from 102 | # decimal to binary is exact. Hex floats always have an exact conversion. The 103 | # i32 prefix from above may be used to specify a 32-bit float (i64 is permitted, 104 | # but redundant). 105 | 1.5i32 106 | 0xf.fi64 107 | 108 | # The strings inf32, inf64, -inf32, and -inf64 are recognized as shorthands for 109 | # 32-bit and 64-bit infinities. There is no shorthand for NaN (since there are 110 | # so many of them), and it is best spelled out as a fixed-size hex int. 111 | inf32 112 | -inf64 113 | 114 | 115 | # Tag expressions. 116 | 117 | # An integer followed by a : denotes a tag expression. This encodes the tag of 118 | # a Protobuf field. This is identical to an ordinary integer, except that a 119 | # wire type between 0 and 7 is prepended via the expression 120 | # 121 | # tag := int << 3 | wireType 122 | # 123 | # The integer specifies the field number, while what follows after the : 124 | # specifies the field type. In the examples below, no whitespace may be 125 | # present around the : rune. 126 | # 127 | # Field numbers may be hex, signed, and zigzag per the syntax above, but not 128 | # fixed-width. They may have a long-form prefix. 129 | 130 | 1:VARINT # A varint. 131 | 2:I64 # A fixed-width, 64-bit blob. 132 | 3:LEN # A length-prefixed blob. 133 | 4:SGROUP # A start-group marker. 134 | 5:EGROUP # An end-group marker. 135 | 6:I32 # A fixed-width, 32-bit blob. 136 | 137 | 0x10:0 # Also a varint, explicit value for the type. 138 | 8:6 # Invalid wire type (6 and 7 are unused). 139 | 140 | # This is an error: the wire type must be between 0 and 7. 141 | # 9:8 142 | 143 | # If the : is instead followed by any rune not matching /[\w-]/, the scanner 144 | # will seek forward to the next token. If it is a fixed-width integer or a 145 | # float, the wire type will be inferred to be I32 or I64 as appropriate; if it 146 | # is a {, or a 'long-form:N' followed by a {, the type is inferred as LEN; 147 | # if it is a '!', the type is inferred as SGROUP; otherwise, it defaults to 148 | # VARINT. 149 | 150 | 1: 55z 151 | 2: 1.23 152 | 3: {"text"} 153 | 6: -1i32 154 | 8: !{42} 155 | 156 | 157 | # Length prefixes. 158 | 159 | # Matching curly brace tokens denote length prefixes. They emit a varint-encoded 160 | # length prefix followed by the encoding of the brace contents. 161 | # 162 | # It may optionally be preceded by 'long-form:N', as an integer would, to 163 | # introduce redundant bytes in the encoding of the length prefix. 164 | 165 | # This is a string field. Note that 23:'s type is inferred to be LEN. 166 | 23: {"my cool string"} 167 | 168 | # This is a nested message field. 169 | 24: { 170 | 1: 5 171 | 2: {"nested string"} 172 | } 173 | 174 | # This is a packed repeated int field. 175 | 25: { 1 2 3 4 5 6 7 } 176 | 177 | # This string field's length prefix will be 3, rather than one, bytes. 178 | 23: long-form:2 {"non-minimally-prefixed"} 179 | 180 | 181 | # Groups 182 | 183 | # If matching curly braces are prefixed with a ! (no spaces before the first 184 | # {), it denotes a group. Encoding a group requires a field number, so the !{} 185 | # must come immediately before a tag expression without an explicit type (which 186 | # will be inferred to be SGROUP). The closing brace will generate a 187 | # corresponding EGROUP-typed tag to match the SGROUP tag. 188 | 26: !{ 189 | 1: 55z 190 | 2: 1.4 191 | 3: {"abcd"} 192 | } 193 | 194 | # long-form:N may be the last token between a group's braces, which will be 195 | # applied to the EGROUP tag. 196 | 27: !{long-form:3} 197 | 198 | 199 | # Examples. 200 | 201 | # These primitives may be combined with raw byte strings to produce other 202 | # encodings. 203 | 204 | # This is another way to write a message, using an explicit length 205 | 2:LEN 4 206 | "abcd" 207 | 208 | # This allows us to insert the wrong length. 209 | 2:LEN 5 210 | "abcd" 211 | 212 | # The wrong wire type can be used with no consequences. 213 | 5:I64 "stuff" 214 | 215 | 216 | # Disassembler. 217 | 218 | # Although the conversion from Protoscope to a byte string is well-defined, the 219 | # inverse is not. A given byte string may have multiple disassemblies. The 220 | # disassembler heuristically attempts to give a useful conversion for its 221 | # input. 222 | # 223 | # It is a goal that any valid protobuf input will be decoded reasonably, 224 | # although this is impossible in general, because length-prefixed blobs can be 225 | # either strings or protobufs, and fixed-width ints can also be floats. We try 226 | # to strike a balance that produces mostly readable output. 227 | # 228 | # Note that the output of the disassembler is not stable, and highly heuristic. 229 | # The only guarantee is that it will reassemble to the original input byte for 230 | # byte. 231 | # 232 | # The algorithm is as follows: 233 | # 234 | # 1. Greedily parse tags out of the input. If an invalid tag is found, encode 235 | # the remaining bytes as quoted strings or hex literals. Wire types 6 and 7 236 | # are treated as "invalid". 237 | # 238 | # 2. Encode the tag as N:, unless the wire type is 4, in which case encode it as 239 | # N:EGROUP. However, see below for cases when a wire type 3 tag precedes it. 240 | # 241 | # 3. If the wire type is 0, parse a varint and encode it as if it were an int64. 242 | # There is no useful way to distinguish sint32/sint64 here. 243 | # 244 | # 4. If the wire type is 1 or 5, parse eight or four bytes and interpret that as 245 | # a float of appropriate size. 246 | # 247 | # a. If the float is a NaN, print the bytes as a fixed-width hex integer. 248 | # 249 | # b. If the float is infinite, print inf32/inf64 as appropriate. 250 | # 251 | # c. If the float is zero or has an exponent that is not close to the largest 252 | # or smallest possible exponents, print as a decimal float. 253 | # 254 | # i. If this would produce a non-round-trip-able value, print as a hex 255 | # float instead. 256 | # 257 | # d. Otherwise, print a fixed-width decimal integer. 258 | # 259 | # 5. If the wire type is 2: 260 | # 261 | # a. Try to parse the contents of the field as a message, and print those 262 | # fields wrapped in {} no failures occur. (-all-fields-are-messages will 263 | # instead cause all fields that parsed successfully to be emitted followed 264 | # by hex strings with the remaining content.) 265 | # 266 | # b. Output the contents as quoted strings or hex literals. 267 | # 268 | # 6. If the wire type is 3: 269 | # 270 | # a. Encode !{ to begin a group and save the field number on the group stack. 271 | # 272 | # b. Upon coming to a wire type 4 tag, check if it matches the top of the 273 | # group stack. (Pop the stack unconditionally.) 274 | # 275 | # c. If it does, close the group with a } and do not emit a tag expression. 276 | # Emit a long-form:N as necessary. 277 | # 278 | # d. If it doesn't, re-encode the wire type 3 tag as N:SGROUP, and encode the 279 | # wire type 4 tag as above. If a type 2 message ends before the group is 280 | # closed, or the input reaches EOF, this step also applies. 281 | # 282 | # However, Protoscope offers the option of providing a descriptor to aid 283 | # disassembly. In this case the heuristic becomes much more intelligent. 284 | # Steps 1 and 2 remain the same, but from 3 onwards: 285 | # 286 | # 3. If the wire type is 0, parse a varint and encode it as: 287 | # 288 | # a. true/false if the field is bool-typed and 0- or 1-valued. 289 | # 290 | # b. A sint64 (a 42z literal) if the field is sint32/sint64-typed. 291 | # 292 | # c. A uint64 if any of the unsigned integer types. 293 | # 294 | # d. An int64 if none of the above apply. 295 | # 296 | # 4. If the wire type is 1 or 5, parse eight or four bytes and encode as: 297 | # 298 | # a. A fixed64/fixed32 (resp) if the field is any of the unsigned integer 299 | # types. 300 | # 301 | # b. A sfixed64/sfixed32 (resp) if the field is any of the other integer 302 | # types. 303 | # 304 | # c. The same way floats are encoded in the schema-less algorithm above, step 305 | # 4.a-4.c, but printing subnormals as floats, too. 306 | # 307 | # 5. If the wire type is 2: 308 | # 309 | # a. If the field type is a scalar, print as a packed field (numeric literals 310 | # inside of braces) per the steps 3 and 4 above. 311 | # 312 | # b. If the field is a message or group, use the algorithm described in the 313 | # schema-less scheme (step 5). 314 | # 315 | # c. If the field is bytes or string, print as quoted strings or hex 316 | # literals as suits the disassembler's fancy. 317 | # 318 | # 6. If the wire type is 3, proceed per the instructions for a group given for 319 | # the schema-less algorithm. -------------------------------------------------------------------------------- /roundtrip_fuzz_test.go: -------------------------------------------------------------------------------- 1 | // Copyright 2022 Google LLC 2 | // 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // http://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | // See the License for the specific language governing permissions and 13 | // limitations under the License. 14 | 15 | package protoscope 16 | 17 | import ( 18 | "bytes" 19 | "testing" 20 | ) 21 | 22 | var desc = GetDesc("unittest.TestAllTypes") 23 | 24 | func FuzzRoundTrip(f *testing.F) { 25 | f.Fuzz(func(t *testing.T, in []byte) { 26 | if len(in) == 0 { 27 | return 28 | } 29 | useSchema := in[0]&1 == 0 30 | in = in[1:] 31 | 32 | var opts WriterOptions 33 | if useSchema { 34 | opts.Schema = desc 35 | } 36 | 37 | text := Write(in, opts) 38 | out, err := NewScanner(text).Exec() 39 | 40 | if err != nil { 41 | t.Fatalf("%x: scan of %q failed: %s", in, text, err) 42 | } 43 | if !bytes.Equal(in, out) { 44 | t.Fatalf("%x: not equal after round trip through %q: %x", in, text, out) 45 | } 46 | }) 47 | } 48 | -------------------------------------------------------------------------------- /scanner.go: -------------------------------------------------------------------------------- 1 | // Copyright 2022 Google LLC 2 | // 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // http://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | // See the License for the specific language governing permissions and 13 | // limitations under the License. 14 | 15 | // package protoscope ... 16 | package protoscope 17 | 18 | import ( 19 | "encoding/binary" 20 | "encoding/hex" 21 | "errors" 22 | "fmt" 23 | "math" 24 | "regexp" 25 | "strconv" 26 | "strings" 27 | 28 | _ "embed" 29 | ) 30 | 31 | // The contents of language.text. 32 | // 33 | //go:embed language.txt 34 | var LanguageTxt string 35 | 36 | // A Position describes a location in the input stream. 37 | // 38 | // The zero-value Position represents the first byte of an anonymous input file. 39 | type Position struct { 40 | Offset int // Byte offset. 41 | Line int // Line number (zero-indexed). 42 | Column int // Column number (zero-indexed byte, not rune, count). 43 | File string // Optional file name for pretty-printing. 44 | } 45 | 46 | // String converts a Position to a string. 47 | func (p Position) String() string { 48 | file := p.File 49 | if file == "" { 50 | file = "" 51 | } 52 | return fmt.Sprintf("%s:%d:%d", file, p.Line+1, p.Column+1) 53 | } 54 | 55 | // A tokenKind is a kind of token. 56 | type tokenKind int 57 | 58 | const ( 59 | tokenBytes tokenKind = iota 60 | tokenLongForm 61 | tokenLeftCurly 62 | tokenRightCurly 63 | tokenGroupCurly 64 | tokenEOF 65 | ) 66 | 67 | // A ParseError may be produced while executing a Protoscope file, wrapping 68 | // another error along with a position. 69 | // 70 | // Errors produced by functions in this package my by type-asserted to 71 | // ParseError to try and obtain the position at which the error occurred. 72 | type ParseError struct { 73 | Pos Position 74 | Err error 75 | } 76 | 77 | // Error makes this type into an error type. 78 | func (e *ParseError) Error() string { 79 | return fmt.Sprintf("%s: %s", e.Pos, e.Err) 80 | } 81 | 82 | // Unwrap extracts the inner wrapped error. 83 | // 84 | // See errors.Unwrap(). 85 | func (e *ParseError) Unwrap() error { 86 | return e.Err 87 | } 88 | 89 | // A token is a token in a Protoscope file. 90 | type token struct { 91 | // Kind is the kind of the token. 92 | Kind tokenKind 93 | // Value, for a tokenBytes token, is the decoded value of the token in 94 | // bytes. 95 | Value []byte 96 | // WireType, for a tokenBytes token, is which wire type an InferredType 97 | // tag expression that preceded it should become. 98 | WireType int 99 | // InferredType indicates that this was a tag expression which wishes to infer 100 | // its type based on tokens that follow. 101 | InferredType bool 102 | // Pos is the position of the first byte of the token. 103 | Pos Position 104 | // Length, for a tokenLongForm token, is the number of bytes to use to 105 | // encode the length, not including the initial one. 106 | Length int 107 | // FieldNumber, if not -1, indicates that this was a tag token. This is used 108 | // for implementing group syntax. 109 | FieldNumber int64 110 | } 111 | 112 | var ( 113 | // The relevant capture groups are: 114 | // 1: The actual value. 115 | // 2: The encoding format. 116 | // 3: The wire type, including the colon, if this is a tag. 117 | // 4: The wire type expression, which may be empty if it is inferred. 118 | regexpIntOrTag = regexp.MustCompile(`^-?([0-9]+|0x[0-9a-fA-F]+)(z|i32|i64)?(:(\w*))?$`) 119 | regexpDecFp = regexp.MustCompile(`^(-?[0-9]+\.[0-9]+(?:[eE]-?[0-9]+)?)(i32|i64)?$`) 120 | regexpHexFp = regexp.MustCompile(`^(-?0x[0-9a-fA-F]+\.[0-9a-fA-F]+(?:[pP]-?[0-9]+)?)(i32|i64)?$`) 121 | regexpLongForm = regexp.MustCompile(`^long-form:([0-9]+)$`) 122 | ) 123 | 124 | // A Scanner represents parsing state for a Protoscope file. 125 | // 126 | // A zero-value Scanner is ready to begin parsing (given that Input is set to 127 | // a valid value). However, it is recommended to use NewScanner to create a new 128 | // Scanner, since it can pre-populate fields other than Input with default 129 | // settings. 130 | type Scanner struct { 131 | // Input is the input text being processed. 132 | Input string 133 | // Position is the current position at which parsing should 134 | // resume. The Offset field is used for indexing into Input; the remaining 135 | // fields are used for error-reporting. 136 | pos Position 137 | } 138 | 139 | // NewScanner creates a new scanner for parsing the given input. 140 | func NewScanner(input string) *Scanner { 141 | return &Scanner{Input: input} 142 | } 143 | 144 | // SetFile sets the file path shown in this Scanner's error reports. 145 | func (s *Scanner) SetFile(path string) { 146 | s.pos.File = path 147 | } 148 | 149 | // Exec consumes tokens until Input is exhausted, returning the resulting 150 | // encoded maybe-DER. 151 | func (s *Scanner) Exec() ([]byte, error) { 152 | return s.exec(nil) 153 | } 154 | 155 | // isEOF returns whether the cursor is at least n bytes ahead of the end of the 156 | // input. 157 | func (s *Scanner) isEOF(n int) bool { 158 | return s.pos.Offset+n >= len(s.Input) 159 | } 160 | 161 | // advance advances the scanner's cursor n positions. 162 | // 163 | // Unlike just s.pos.Offset += n, this will not proceed beyond the end of the 164 | // string, and will update the line and column information accordingly. 165 | func (s *Scanner) advance(n int) { 166 | for i := 0; i < n && !s.isEOF(0); i++ { 167 | if s.Input[s.pos.Offset] == '\n' { 168 | s.pos.Line++ 169 | s.pos.Column = 0 170 | } else { 171 | s.pos.Column++ 172 | } 173 | s.pos.Offset++ 174 | } 175 | } 176 | 177 | // consume advances exactly n times and returns all source bytes between the 178 | // initial cursor position and excluding the final cursor position. 179 | // 180 | // If EOF is reached before all n bytes are consumed, the function returns 181 | // false. 182 | func (s *Scanner) consume(n int) (string, bool) { 183 | start := s.pos.Offset 184 | s.advance(n) 185 | if s.pos.Offset-start != n { 186 | return "", false 187 | } 188 | 189 | return s.Input[start:s.pos.Offset], true 190 | } 191 | 192 | // consumeUntil advances the cursor until the given byte is seen, returning all 193 | // source bytes between the initial cursor position and excluding the given 194 | // byte. This function will advance past the searched-for byte. 195 | // 196 | // If EOF is reached before the byte is seen, the function returns false. 197 | func (s *Scanner) consumeUntil(b byte) (string, bool) { 198 | if i := strings.IndexByte(s.Input[s.pos.Offset:], b); i != -1 { 199 | text, _ := s.consume(i + 1) 200 | return text[:i], true 201 | } 202 | return "", false 203 | } 204 | 205 | // parseEscapeSequence parses a Protoscope escape sequence, returning the byte 206 | // it escapes. 207 | // 208 | // Valid escapes are: 209 | // \n \" \\ \xNN \NNN 210 | // 211 | // This function assumes that the scanner's cursor is currently on a \ rune. 212 | func (s *Scanner) parseEscapeSequence() (byte, error) { 213 | s.advance(1) // Skip the \. The caller is assumed to have validated it. 214 | if s.isEOF(0) { 215 | return 0, &ParseError{s.pos, errors.New("expected escape character")} 216 | } 217 | 218 | switch c := s.Input[s.pos.Offset]; c { 219 | case 'n': 220 | s.advance(1) 221 | return '\n', nil 222 | case '"', '\\': 223 | s.advance(1) 224 | return c, nil 225 | case 'x': 226 | s.advance(1) 227 | 228 | hexes, ok := s.consume(2) 229 | if !ok { 230 | return 0, &ParseError{s.pos, errors.New("unfinished escape sequence")} 231 | } 232 | 233 | bytes, err := hex.DecodeString(hexes) 234 | if err != nil { 235 | return 0, &ParseError{s.pos, err} 236 | } 237 | 238 | var r byte 239 | for _, b := range bytes { 240 | r <<= 8 241 | r |= b 242 | } 243 | return r, nil 244 | case '0', '1', '2', '3', '4', '5', '6', '7': 245 | start := s.pos.Offset 246 | for i := 0; i < 3 && !s.isEOF(0); i++ { 247 | c := s.Input[s.pos.Offset] 248 | if c < '0' || c > '7' { 249 | break 250 | } 251 | s.advance(1) 252 | } 253 | str := s.Input[start:s.pos.Offset] 254 | r, err := strconv.ParseUint(str, 8, 8) 255 | if err != nil { 256 | return 0, &ParseError{s.pos, err} 257 | } 258 | return byte(r), nil 259 | default: 260 | return 0, &ParseError{s.pos, fmt.Errorf("unknown escape sequence \\%c", c)} 261 | } 262 | } 263 | 264 | // parseQuotedString parses a UTF-8 string until the next ". 265 | // 266 | // This function assumes that the scanner's cursor is currently on a " rune. 267 | func (s *Scanner) parseQuotedString() (token, error) { 268 | s.advance(1) // Skip the ". The caller is assumed to have validated it. 269 | start := s.pos 270 | var bytes []byte 271 | for { 272 | if s.isEOF(0) { 273 | return token{}, &ParseError{start, errors.New("unmatched \"")} 274 | } 275 | switch c := s.Input[s.pos.Offset]; c { 276 | case '"': 277 | s.advance(1) 278 | return token{Kind: tokenBytes, Value: bytes, Pos: start}, nil 279 | case '\\': 280 | r, err := s.parseEscapeSequence() 281 | if err != nil { 282 | return token{}, err 283 | } 284 | bytes = append(bytes, r) 285 | default: 286 | s.advance(1) 287 | bytes = append(bytes, c) 288 | } 289 | } 290 | } 291 | 292 | // next lexes the next token. 293 | func (s *Scanner) next(lengthModifier **token) (token, error) { 294 | again: 295 | if s.isEOF(0) { 296 | return token{Kind: tokenEOF, Pos: s.pos}, nil 297 | } 298 | 299 | switch s.Input[s.pos.Offset] { 300 | case ' ', '\t', '\n', '\r': 301 | // Skip whitespace. 302 | s.advance(1) 303 | goto again 304 | case '#': 305 | // Skip to the end of the comment. 306 | s.advance(1) 307 | for !s.isEOF(0) { 308 | wasNewline := s.Input[s.pos.Offset] == '\n' 309 | s.advance(1) 310 | if wasNewline { 311 | break 312 | } 313 | } 314 | goto again 315 | case '!': 316 | s.advance(1) 317 | if s.Input[s.pos.Offset] != '{' { 318 | return token{}, &ParseError{s.pos, errors.New("expected { after !")} 319 | } 320 | s.advance(1) 321 | return token{Kind: tokenGroupCurly, Pos: s.pos}, nil 322 | case '{': 323 | s.advance(1) 324 | return token{Kind: tokenLeftCurly, Pos: s.pos}, nil 325 | case '}': 326 | s.advance(1) 327 | return token{Kind: tokenRightCurly, Pos: s.pos}, nil 328 | case '"': 329 | return s.parseQuotedString() 330 | case '`': 331 | s.advance(1) 332 | hexStr, ok := s.consumeUntil('`') 333 | if !ok { 334 | return token{}, &ParseError{s.pos, errors.New("unmatched `")} 335 | } 336 | bytes, err := hex.DecodeString(hexStr) 337 | if err != nil { 338 | return token{}, &ParseError{s.pos, err} 339 | } 340 | return token{Kind: tokenBytes, Value: bytes, Pos: s.pos}, nil 341 | } 342 | 343 | // Normal token. Consume up to the next whitespace character, symbol, or 344 | // EOF. 345 | start := s.pos 346 | s.advance(1) 347 | loop: 348 | for !s.isEOF(0) { 349 | switch s.Input[s.pos.Offset] { 350 | case ' ', '\t', '\n', '\r', '{', '}', '[', ']', '`', '"', '#', '!': 351 | break loop 352 | default: 353 | s.advance(1) 354 | } 355 | } 356 | 357 | symbol := s.Input[start.Offset:s.pos.Offset] 358 | 359 | if match := regexpIntOrTag.FindStringSubmatch(symbol); match != nil { 360 | // Go can detect the base if we set base=0, but it treats a leading 0 as 361 | // octal. 362 | base := 10 363 | isHex := strings.HasPrefix(match[0], "0x") || strings.HasPrefix(match[0], "-0x") 364 | if isHex { 365 | base = 16 366 | } 367 | 368 | // Use ParseUint so that we get the biggest unsigned ints possible. 369 | uvalue, err := strconv.ParseUint(strings.TrimPrefix(match[1], "0x"), base, 64) 370 | if err != nil { 371 | return token{}, &ParseError{start, err} 372 | } 373 | value := int64(uvalue) 374 | 375 | // The special value -9223372036854775808 is interesting, because it has the 376 | // same bit representation as 9223372036854775808, since it is MinInt64. 377 | if strings.HasPrefix(match[0], "-") && value != math.MinInt64 { 378 | if value < 0 { 379 | return token{}, &ParseError{start, fmt.Errorf("negation overflows: '%s'", match[0])} 380 | } 381 | value = -value 382 | } 383 | 384 | var fieldNumber int64 = -1 385 | inferredType := false 386 | if match[3] != "" { 387 | if match[2] == "i32" || match[2] == "i64" { 388 | return token{}, &ParseError{start, errors.New("cannot use fixed-width encoding on tag expressions")} 389 | } 390 | 391 | var wireType int64 392 | switch match[4] { 393 | case "": 394 | inferredType = true 395 | case "VARINT": 396 | wireType = 0 397 | case "I64": 398 | wireType = 1 399 | case "LEN": 400 | wireType = 2 401 | case "SGROUP": 402 | wireType = 3 403 | case "EGROUP": 404 | wireType = 4 405 | case "I32": 406 | wireType = 5 407 | default: 408 | var err error 409 | if strings.HasPrefix(match[4], "0x") { 410 | wireType, err = strconv.ParseInt(match[4], 16, 64) 411 | } else { 412 | wireType, err = strconv.ParseInt(match[4], 10, 64) 413 | } 414 | if err != nil { 415 | return token{}, &ParseError{start, err} 416 | } 417 | } 418 | 419 | if wireType > 7 { 420 | return token{}, &ParseError{start, errors.New("a tag's wire type must be between 0 and 7")} 421 | } 422 | 423 | if value>>61 != 0 && value>>61 != -1 { 424 | return token{}, &ParseError{start, errors.New("field number too large for three extra bits for the wire type.")} 425 | } 426 | fieldNumber = value 427 | 428 | value <<= 3 429 | value |= wireType 430 | } 431 | 432 | var enc []byte 433 | var wireType int 434 | switch match[2] { 435 | case "z": 436 | value = (value << 1) ^ (value >> 63) 437 | fallthrough 438 | case "": 439 | var len int 440 | if *lengthModifier != nil { 441 | len = (*lengthModifier).Length 442 | *lengthModifier = nil 443 | } 444 | enc = encodeVarint(nil, uint64(value), len) 445 | case "i32": 446 | wireType = 5 447 | if value > math.MaxUint32 || value < math.MinInt32 { 448 | return token{}, &ParseError{start, fmt.Errorf("'%s' does not fit in 32 bits", symbol)} 449 | } 450 | if value > math.MinInt32 { 451 | value -= math.MaxUint32 + 1 452 | } 453 | enc = make([]byte, 4) 454 | binary.LittleEndian.PutUint32(enc, uint32(value)) 455 | case "i64": 456 | wireType = 1 457 | enc = make([]byte, 8) 458 | binary.LittleEndian.PutUint64(enc, uint64(value)) 459 | default: 460 | panic("unreachable") 461 | } 462 | 463 | return token{ 464 | Kind: tokenBytes, 465 | InferredType: inferredType, 466 | WireType: wireType, 467 | Value: enc, 468 | Pos: s.pos, 469 | FieldNumber: fieldNumber, 470 | }, nil 471 | } 472 | 473 | match := regexpDecFp.FindStringSubmatch(symbol) 474 | if match == nil { 475 | match = regexpHexFp.FindStringSubmatch(symbol) 476 | } 477 | 478 | if match != nil { 479 | // This works fine regardless of base; ParseFloat will detect the base from 480 | // the 0x prefix. Go expects an exponent on a hex float, so we need to 481 | // modify match[1] appropriately. 482 | fp := match[1] 483 | if strings.Contains(fp, "0x") && !strings.ContainsAny(fp, "Pp") { 484 | fp += "p0" 485 | } 486 | 487 | var enc []byte 488 | var wireType int 489 | switch match[2] { 490 | case "i32": 491 | wireType = 5 492 | value, err := strconv.ParseFloat(fp, 32) 493 | if err != nil { 494 | return token{}, &ParseError{start, err} 495 | } 496 | if math.IsInf(value, 0) || math.IsNaN(value) || math.Abs(value) > math.MaxFloat32 { 497 | return token{}, &ParseError{start, fmt.Errorf("'%s' does not fit in a IEEE 754 binary32", match[0])} 498 | } 499 | enc = make([]byte, 4) 500 | binary.LittleEndian.PutUint32(enc, math.Float32bits(float32(value))) 501 | case "", "i64": 502 | wireType = 1 503 | value, err := strconv.ParseFloat(fp, 64) 504 | if err != nil { 505 | return token{}, &ParseError{start, err} 506 | } 507 | if math.IsInf(value, 0) || math.IsNaN(value) { 508 | return token{}, &ParseError{start, fmt.Errorf("'%s' does not fit in a IEEE 754 binary64", match[0])} 509 | } 510 | enc = make([]byte, 8) 511 | binary.LittleEndian.PutUint64(enc, math.Float64bits(value)) 512 | default: 513 | panic("unreachable") 514 | } 515 | 516 | return token{ 517 | Kind: tokenBytes, 518 | WireType: wireType, 519 | Value: enc, 520 | Pos: s.pos, 521 | FieldNumber: -1, 522 | }, nil 523 | } 524 | 525 | if match := regexpLongForm.FindStringSubmatch(symbol); match != nil { 526 | l, err := strconv.ParseInt(match[1], 10, 32) 527 | if err != nil { 528 | return token{}, &ParseError{start, err} 529 | } 530 | return token{Kind: tokenLongForm, Length: int(l), Pos: s.pos}, nil 531 | } 532 | 533 | switch symbol { 534 | case "true": 535 | return token{Kind: tokenBytes, Value: []byte{1}, Pos: s.pos, FieldNumber: -1}, nil 536 | case "false": 537 | return token{Kind: tokenBytes, Value: []byte{0}, Pos: s.pos, FieldNumber: -1}, nil 538 | case "inf32": 539 | return token{Kind: tokenBytes, WireType: 5, Value: []byte{0x00, 0x00, 0x80, 0x7f}, Pos: s.pos, FieldNumber: -1}, nil 540 | case "-inf32": 541 | return token{Kind: tokenBytes, WireType: 5, Value: []byte{0x00, 0x00, 0x80, 0xff}, Pos: s.pos, FieldNumber: -1}, nil 542 | case "inf64": 543 | return token{Kind: tokenBytes, WireType: 1, Value: []byte{0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xf0, 0x7f}, Pos: s.pos, FieldNumber: -1}, nil 544 | case "-inf64": 545 | return token{Kind: tokenBytes, WireType: 1, Value: []byte{0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xf0, 0xff}, Pos: s.pos, FieldNumber: -1}, nil 546 | } 547 | 548 | return token{}, fmt.Errorf("unrecognized symbol %q", symbol) 549 | } 550 | 551 | // exec is the main parser loop. 552 | // 553 | // The leftCurly argument, it not nil, represents the { that began the 554 | // length-prefixed block we're currently executing. Because we need to encode 555 | // the full extent of the contents of a {} before emitting the length prefix, 556 | // this function calls itself with a non-nil leftCurly to encode it. 557 | func (s *Scanner) exec(leftCurly *token) ([]byte, error) { 558 | var out []byte 559 | var lengthModifier *token 560 | var groupStack []int64 561 | inferredTypeIndex := -1 562 | lastToken := token{FieldNumber: -1} 563 | for { 564 | token, err := s.next(&lengthModifier) 565 | if err != nil { 566 | return nil, err 567 | } 568 | if lengthModifier != nil && token.Kind != tokenLeftCurly && !(token.Kind == tokenRightCurly && len(groupStack) != 0) { 569 | return nil, &ParseError{lengthModifier.Pos, errors.New("length modifier was not followed by '{', '}', or varint")} 570 | } 571 | prevToken := lastToken 572 | lastToken = token 573 | 574 | switch token.Kind { 575 | case tokenBytes: 576 | if inferredTypeIndex != -1 { 577 | out[inferredTypeIndex] |= byte(token.WireType) 578 | inferredTypeIndex = -1 579 | } 580 | 581 | if token.InferredType { 582 | inferredTypeIndex = len(out) 583 | } 584 | out = append(out, token.Value...) 585 | case tokenLongForm: 586 | lengthModifier = &token 587 | case tokenLeftCurly: 588 | if inferredTypeIndex != -1 { 589 | out[inferredTypeIndex] |= 2 590 | inferredTypeIndex = -1 591 | } 592 | 593 | child, err := s.exec(&token) 594 | if err != nil { 595 | return nil, err 596 | } 597 | var lengthOverride int 598 | if lengthModifier != nil { 599 | lengthOverride = lengthModifier.Length 600 | } 601 | out = encodeVarint(out, uint64(len(child)), lengthOverride) 602 | out = append(out, child...) 603 | lengthModifier = nil 604 | case tokenGroupCurly: 605 | if prevToken.FieldNumber == -1 || inferredTypeIndex == -1 { 606 | return nil, &ParseError{token.Pos, errors.New("group !{} must immediately follow untyped field number")} 607 | } 608 | 609 | out[inferredTypeIndex] |= byte(3) 610 | inferredTypeIndex = -1 611 | groupStack = append(groupStack, prevToken.FieldNumber) 612 | case tokenRightCurly: 613 | if inferredTypeIndex != -1 { 614 | inferredTypeIndex = -1 615 | } 616 | 617 | if len(groupStack) != 0 { 618 | innerGroup := groupStack[len(groupStack)-1] 619 | groupStack = groupStack[:len(groupStack)-1] 620 | 621 | var lengthOverride int 622 | if lengthModifier != nil { 623 | lengthOverride = lengthModifier.Length 624 | } 625 | out = encodeVarint(out, uint64(innerGroup<<3|4), lengthOverride) 626 | lengthModifier = nil 627 | } else if leftCurly != nil { 628 | return out, nil 629 | } else { 630 | return nil, &ParseError{token.Pos, errors.New("unmatched '}'")} 631 | } 632 | case tokenEOF: 633 | if inferredTypeIndex != -1 { 634 | inferredTypeIndex = -1 635 | } 636 | 637 | if leftCurly == nil && len(groupStack) == 0 { 638 | return out, nil 639 | } 640 | return nil, &ParseError{prevToken.Pos, errors.New("unmatched '{'")} 641 | default: 642 | panic(token) 643 | } 644 | } 645 | } 646 | 647 | // encodeVarint encodes a varint to dest. 648 | // 649 | // Unlike binary.PutUvarint, this function allows encoding non-minimal varints. 650 | func encodeVarint(dest []byte, value uint64, longForm int) []byte { 651 | for value > 0x7f { 652 | dest = append(dest, byte(value&0x7f)|0x80) 653 | value >>= 7 654 | } 655 | dest = append(dest, byte(value)) 656 | 657 | if longForm > 0 { 658 | dest[len(dest)-1] |= 0x80 659 | for longForm > 1 { 660 | dest = append(dest, 0x80) 661 | longForm-- 662 | } 663 | dest = append(dest, 0x00) 664 | } 665 | 666 | return dest 667 | } 668 | -------------------------------------------------------------------------------- /scanner_test.go: -------------------------------------------------------------------------------- 1 | // Copyright 2022 Google LLC 2 | // 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // http://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | // See the License for the specific language governing permissions and 13 | // limitations under the License. 14 | 15 | package protoscope 16 | 17 | import ( 18 | "encoding/binary" 19 | "fmt" 20 | "math" 21 | "reflect" 22 | "testing" 23 | 24 | "github.com/google/go-cmp/cmp" 25 | ) 26 | 27 | func num2le(x any) (b []byte) { 28 | switch i := x.(type) { 29 | case float32: 30 | b = make([]byte, 4) 31 | binary.LittleEndian.PutUint32(b, math.Float32bits(i)) 32 | case float64: 33 | b = make([]byte, 8) 34 | binary.LittleEndian.PutUint64(b, math.Float64bits(i)) 35 | default: 36 | panic(fmt.Sprintf("int2le: unsupported: '%s'", reflect.TypeOf(x))) 37 | } 38 | 39 | return 40 | } 41 | 42 | func concat(chunks ...any) (out []byte) { 43 | for _, c := range chunks { 44 | switch x := c.(type) { 45 | case int: 46 | out = append(out, byte(x)) 47 | case []byte: 48 | out = append(out, x...) 49 | case string: 50 | out = append(out, []byte(x)...) 51 | } 52 | } 53 | 54 | return 55 | } 56 | 57 | func TestScan(t *testing.T) { 58 | tests := []struct { 59 | name, text string 60 | // If `output` is `nil`, expects scanning to fail. 61 | want []byte 62 | }{ 63 | { 64 | name: "empty", 65 | text: "", 66 | want: []byte{}, 67 | }, 68 | { 69 | name: "comment", 70 | text: "#hello", 71 | want: []byte{}, 72 | }, 73 | { 74 | name: "comment with content", 75 | text: "#hello\n`abcd`", 76 | want: []byte{0xab, 0xcd}, 77 | }, 78 | { 79 | text: "garbage", 80 | }, 81 | 82 | { 83 | name: "empty hex", 84 | text: "``", 85 | want: []byte{}, 86 | }, 87 | { 88 | name: "hex", 89 | text: "`0123456789abcdefABCDEFAbCdEfaBcDeF0a1b3c4d5e6f`", 90 | want: []byte{ 91 | 0x01, 0x23, 0x45, 0x67, 0x89, 0xab, 0xcd, 0xef, 92 | 0xAB, 0xCD, 0xEF, 0xAb, 0xCd, 0xEf, 0xaB, 0xcD, 0xeF, 93 | 0x0a, 0x1b, 0x3c, 0x4d, 0x5e, 0x6f, 94 | }, 95 | }, 96 | { 97 | name: "broken hex", 98 | text: "`abcd", 99 | }, 100 | { 101 | name: "single hex", 102 | text: "`a`", 103 | }, 104 | { 105 | name: "odd hex", 106 | text: "`abc`", 107 | }, 108 | { 109 | name: "non-hex in hex", 110 | text: "`bear`", 111 | }, 112 | 113 | { 114 | name: "empty quotes", 115 | text: `""`, 116 | want: []byte{}, 117 | }, 118 | { 119 | name: "quotes", 120 | text: `"hello!"`, 121 | want: []byte("hello!"), 122 | }, 123 | { 124 | name: "quotes concat", 125 | text: `"hello," " world!"`, 126 | want: []byte("hello, world!"), 127 | }, 128 | { 129 | name: "quotes with non-latin", 130 | text: `"施氏食獅史🐈‍⬛🖤"`, 131 | want: []byte("施氏食獅史🐈‍⬛🖤"), 132 | }, 133 | { 134 | name: "quotes with escapes", 135 | text: `"\\\"\ntext\x00\xff"`, 136 | want: []byte("\\\"\ntext\x00\xff"), 137 | }, 138 | { 139 | name: "quotes with whitespace", 140 | text: `" 141 | "`, 142 | want: []byte(" \n\t\t\t "), 143 | }, 144 | { 145 | name: "broken quotes", 146 | text: `"hello!`, 147 | }, 148 | { 149 | name: "broken quotes by escape", 150 | text: `"hello!\"`, 151 | }, 152 | { 153 | name: "bad escape", 154 | text: `"\a"`, 155 | }, 156 | 157 | { 158 | name: "zero", 159 | text: "0", 160 | want: []byte{0x00}, 161 | }, 162 | { 163 | name: "minus zero", 164 | text: "-0", 165 | want: []byte{0x00}, 166 | }, 167 | { 168 | name: "long-form:0 zero", 169 | text: "long-form:0 0", 170 | want: []byte{0x00}, 171 | }, 172 | { 173 | name: "long zero", 174 | text: "long-form:5 0", 175 | want: []byte{0x80, 0x80, 0x80, 0x80, 0x80, 0x00}, 176 | }, 177 | 178 | { 179 | name: "one byte", 180 | text: "42", 181 | want: []byte{42}, 182 | }, 183 | { 184 | name: "three byte", 185 | text: "100000", 186 | want: []byte{0xa0, 0x8d, 0x06}, 187 | }, 188 | { 189 | name: "ten byte", 190 | text: "-1", 191 | want: []byte{0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x01}, 192 | }, 193 | { 194 | name: "one hex byte", 195 | text: "0x5a", 196 | want: []byte{0x5a}, 197 | }, 198 | { 199 | name: "two hex byte", 200 | text: "0xa5", 201 | want: []byte{0xa5, 0x01}, 202 | }, 203 | { 204 | name: "one zig", 205 | text: "-1z", 206 | want: []byte{0x01}, 207 | }, 208 | { 209 | name: "zig 42", 210 | text: "42z", 211 | want: []byte{42 * 2}, 212 | }, 213 | { 214 | name: "long answer", 215 | text: "long-form:5 -42z", 216 | want: []byte{ 217 | 0xd3, 0x80, 0x80, 0x80, 0x80, 0x00, 218 | }, 219 | }, 220 | 221 | { 222 | name: "long eof", 223 | text: "long-form:5", 224 | }, 225 | { 226 | name: "double long", 227 | text: "long-form:3 long-form:4 5", 228 | }, 229 | { 230 | name: "negative long", 231 | text: "long-form:-3 5", 232 | }, 233 | { 234 | name: "hex long", 235 | text: "long-form:0x3 5", 236 | }, 237 | 238 | { 239 | name: "int too big", 240 | text: "18446744073709551616", 241 | }, 242 | { 243 | name: "negative int too big", 244 | text: "-9223372036854775809", 245 | }, 246 | 247 | { 248 | name: "fixed32", 249 | text: "0xaaai32", 250 | want: []byte{ 251 | 0xaa, 0x0a, 0x00, 0x00, 252 | }, 253 | }, 254 | { 255 | name: "-fixed32", 256 | text: "-0xaaai32", 257 | want: []byte{ 258 | 0x56, 0xf5, 0xff, 0xff, 259 | }, 260 | }, 261 | { 262 | name: "fixed64", 263 | text: "0xaaai64", 264 | want: []byte{ 265 | 0xaa, 0x0a, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 266 | }, 267 | }, 268 | { 269 | name: "-fixed64", 270 | text: "-0xaaai64", 271 | want: []byte{ 272 | 0x56, 0xf5, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 273 | }, 274 | }, 275 | { 276 | name: "biggest fixed", 277 | text: ` 278 | 18446744073709551615i64 279 | -9223372036854775808i64 280 | 4294967295i32 281 | -2147483648i32 282 | `, 283 | want: []byte{ 284 | 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 285 | 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x80, 286 | 0xff, 0xff, 0xff, 0xff, 287 | 0x00, 0x00, 0x00, 0x80, 288 | }, 289 | }, 290 | { 291 | name: "fixed32 too big", 292 | text: "4294967296i32", 293 | }, 294 | { 295 | name: "fixed32 too small", 296 | text: "-2147483649i32", 297 | }, 298 | { 299 | name: "long fixed", 300 | text: "long-form:1 1i32", 301 | }, 302 | 303 | { 304 | name: "bools", 305 | text: "true false", 306 | want: []byte{1, 0}, 307 | }, 308 | 309 | { 310 | name: "fp zero", 311 | text: "0.0 -0.0 0.0i32 -0.0i32", 312 | want: concat( 313 | num2le(0.0), 314 | // Fun fact! -0.0 as a Go constant is *not* IEEE -0.0! 315 | num2le(math.Copysign(0, -1)), 316 | num2le(float32(0.0)), 317 | num2le(float32(math.Copysign(0, -1))), 318 | ), 319 | }, 320 | { 321 | name: "infinity", 322 | text: "inf64 -inf64 inf32 -inf32", 323 | want: concat( 324 | num2le(math.Inf(1)), 325 | num2le(math.Inf(-1)), 326 | num2le(float32(math.Inf(1))), 327 | num2le(float32(math.Inf(-1))), 328 | ), 329 | }, 330 | 331 | { 332 | name: "plank", 333 | text: "6.62607015e-34", 334 | want: num2le(6.62607015e-34), 335 | }, 336 | { 337 | name: "speed of light", 338 | text: "-3.0e9i32", 339 | want: num2le(float32(-3e9)), 340 | }, 341 | 342 | { 343 | name: "hex floats", 344 | text: ` 345 | -0xf.0 346 | 0xabcd.efp-10 347 | 0x1.8p5i32 348 | `, 349 | want: concat( 350 | num2le(-0xf.0p0), 351 | num2le(0xabcd.efp-10), 352 | num2le(float32(0x1.8p5)), 353 | ), 354 | }, 355 | 356 | { 357 | name: "oct null", 358 | text: `"\0"`, 359 | want: []byte{ 360 | 0x00, 361 | }, 362 | }, 363 | { 364 | name: "oct null 8", 365 | text: `"\08"`, 366 | want: []byte{ 367 | 0x00, '8', 368 | }, 369 | }, 370 | { 371 | name: "oct double", 372 | text: `"\13"`, 373 | want: []byte{ 374 | 0x0b, 375 | }, 376 | }, 377 | { 378 | name: "oct double X", 379 | text: `"\13X"`, 380 | want: []byte{ 381 | 0x0b, 'X', 382 | }, 383 | }, 384 | { 385 | name: "oct Y double", 386 | text: `"Y\13"`, 387 | want: []byte{ 388 | 'Y', 0x0b, 389 | }, 390 | }, 391 | { 392 | name: "oct triple", 393 | text: `"\007"`, 394 | want: []byte{ 395 | 0x07, 396 | }, 397 | }, 398 | { 399 | name: "oct WoW", 400 | text: `"\127o\127"`, 401 | want: []byte{ 402 | 0x57, 'o', 0x57, 403 | }, 404 | }, 405 | { 406 | name: "oct hex oct", 407 | text: `"\127\x40\127"`, 408 | want: []byte{ 409 | 0x57, 0x40, 0x57, 410 | }, 411 | }, 412 | { 413 | name: "oct quad", 414 | text: `"\1234"`, 415 | want: []byte{ 416 | 0x53, '4', 417 | }, 418 | }, 419 | { 420 | name: "oct 8", 421 | text: `"\8"`, 422 | }, 423 | { 424 | name: "oct 008", 425 | text: `"\008"`, 426 | want: []byte{ 427 | 0x00, '8', 428 | }, 429 | }, 430 | { 431 | name: "oct nullx3", 432 | text: `"\0\0\0"`, 433 | want: []byte{ 434 | 0x00, 0x00, 0x00, 435 | }, 436 | }, 437 | { 438 | name: "oct max", 439 | text: `"\377"`, 440 | want: []byte{ 441 | 0xff, 442 | }, 443 | }, 444 | { 445 | name: "oct overflow", 446 | text: `"\400"`, 447 | }, 448 | { 449 | name: "oct overflow 2: electric boogaloo", 450 | text: `"\777"`, 451 | }, 452 | 453 | { 454 | name: "no fraction float", 455 | text: "1.", 456 | }, 457 | { 458 | name: "no fraction float w/ exponent", 459 | text: "1e1", 460 | }, 461 | { 462 | name: "plus exponent", 463 | text: "1.0e+1", 464 | }, 465 | { 466 | name: "long float", 467 | text: "long-form:1 1.0", 468 | }, 469 | { 470 | name: "float64 too big", 471 | text: "1.7976931348623157e309", 472 | }, 473 | { 474 | name: "float32 too big", 475 | text: "3.40282347e39i32", 476 | }, 477 | 478 | { 479 | name: "tags", 480 | text: ` 481 | 1:VARINT # A varint. 482 | 2:I64 # A fixed-width, 64-bit blob. 483 | 3:LEN # A length-prefixed blob. 484 | 4:SGROUP # A start-group marker. 485 | 5:EGROUP # An end-group marker. 486 | 6:I32 # A fixed-width, 32-bit blob. 487 | `, 488 | want: []byte{ 489 | 1<<3 | 0, 490 | 2<<3 | 1, 491 | 3<<3 | 2, 492 | 4<<3 | 3, 493 | 5<<3 | 4, 494 | 6<<3 | 5, 495 | }, 496 | }, 497 | { 498 | name: "unusual field numbers", 499 | text: "-5:6 9z:7 0x22:1 0:0", 500 | want: []byte{ 501 | 0xde, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x01, 502 | 0x9e, 0x01, 503 | 0x91, 0x02, 504 | 0x00, 505 | }, 506 | }, 507 | 508 | { 509 | name: "max field number", 510 | text: "0x1fffffffffffffff:0", 511 | want: []byte{ 512 | 0xf8, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x01, 513 | }, 514 | }, 515 | { 516 | name: "bad named wire type", 517 | text: "1:LMAO", 518 | }, 519 | { 520 | name: "wire type not a u3", 521 | text: "1:8", 522 | }, 523 | { 524 | name: "field number too big", 525 | text: "0x2000000000000000:0", 526 | }, 527 | 528 | { 529 | name: "wire type inference", 530 | text: ` 531 | 1: 42z 532 | 22: {} 533 | 333: 42i32 534 | 4444: -42i64 535 | 55555: 42.0i32 536 | 666666: 0x42.0 537 | 7777777: inf64 538 | `, 539 | want: []byte{ 540 | 0x08, 0x54, 541 | 0xb2, 0x01, 0x00, 542 | 0xed, 0x14, 0x2a, 0x00, 0x00, 0x00, 543 | 0xe1, 0x95, 0x02, 0xd6, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 544 | 0x9d, 0x90, 0x1b, 0x00, 0x00, 0x28, 0x42, 545 | 0xd1, 0xc2, 0xc5, 0x02, 0x00, 0x00, 0x00, 0x00, 0x00, 0x80, 0x50, 0x40, 546 | 0x89, 0xdf, 0xd5, 0x1d, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xf0, 0x7f, 547 | }, 548 | }, 549 | { 550 | name: "long-form inference", 551 | text: ` 552 | 1: long-form:2 5 553 | 2: long-form:3 {} 554 | `, 555 | want: []byte{ 556 | 0x08, 0x85, 0x80, 0x00, 557 | 0x12, 0x80, 0x80, 0x80, 0x00, 558 | }, 559 | }, 560 | { 561 | name: "eof inference", 562 | text: "1:", 563 | want: []byte{0x08}, 564 | }, 565 | 566 | { 567 | name: "string field", 568 | text: `23: {"my cool string"}`, 569 | want: concat( 570 | 0xba, 0x01, 14, 571 | "my cool string", 572 | ), 573 | }, 574 | { 575 | name: "message field", 576 | text: `24: { 577 | 1: 5 578 | 2: {"nested string"} 579 | }`, 580 | want: concat( 581 | 0xc2, 0x01, 0x11, 582 | 0x08, 0x05, 583 | 0x12, 0x0d, 584 | "nested string", 585 | ), 586 | }, 587 | { 588 | name: "repeated varints", 589 | text: `25: { 1 2 3 4 5 6 7 }`, 590 | want: []byte{ 591 | 0xca, 0x01, 0x07, 592 | 1, 2, 3, 4, 5, 6, 7, 593 | }, 594 | }, 595 | { 596 | name: "long prefix", 597 | text: `23: long-form:2 {"non-minimally-prefixed"}`, 598 | want: concat( 599 | 0xba, 0x01, 0x96, 0x80, 0x00, 600 | "non-minimally-prefixed", 601 | ), 602 | }, 603 | 604 | { 605 | name: "unclosed prefix", 606 | text: "{", 607 | }, 608 | { 609 | name: "unclosed group", 610 | text: "1: !{", 611 | }, 612 | { 613 | name: "unopened prefix", 614 | text: "}", 615 | }, 616 | { 617 | name: "long end-of-prefix", 618 | text: "{long-form:2}", 619 | }, 620 | 621 | { 622 | name: "empty group", 623 | text: "1: !{}", 624 | want: []byte{0x0b, 0x0c}, 625 | }, 626 | { 627 | name: "group with stuff", 628 | text: `5: !{1: 5 "foo"}`, 629 | want: concat( 630 | 0x2b, 631 | 0x08, 0x05, 632 | "foo", 633 | 0x2c, 634 | ), 635 | }, 636 | { 637 | name: "nested groups", 638 | text: `1:!{2:!{3:!{"lmao"}}}`, 639 | want: concat( 640 | 0x0b, 641 | 0x13, 642 | 0x1b, 643 | "lmao", 644 | 0x1c, 645 | 0x14, 646 | 0x0c, 647 | ), 648 | }, 649 | 650 | { 651 | name: "nested groups and length prefixes", 652 | text: `1:!{2:{3:!{{"lmao"}}}}`, 653 | want: concat( 654 | 0x0b, 655 | 0x12, 0x07, 656 | 0x1b, 657 | 0x04, "lmao", 658 | 0x1c, 659 | 0x0c, 660 | ), 661 | }, 662 | 663 | { 664 | name: "bare group", 665 | text: "!{}", 666 | }, 667 | { 668 | name: "typed group", 669 | text: "1:SGROUP !{}", 670 | }, 671 | 672 | { 673 | name: "language.txt", 674 | text: LanguageTxt, 675 | want: concat( 676 | "Quoted strings are delimited by double quotes. Backslash denotes escape\n", 677 | "sequences. Legal escape sequences are: \\ \" \x00 \000 \n. \x00 consumes two\n", 678 | "hex digits and emits a byte. \000 consumes one to three octal digits and emits\n", 679 | "a byte (rejecting values that do not fit in a single octet). Otherwise, any\n", 680 | "byte before the closing quote, including a newline, is emitted as-is.", 681 | 682 | "hello world", 683 | "hello world", 684 | 685 | 0x00, 0xab, 0xcd, 0xef, 0xab, 0xcd, 0xef, 686 | 0xc8, 0x03, 687 | 0x81, 0x80, 0xfc, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x01, 688 | 0x03, 0x03, 689 | 690 | 0x00, 0x00, 0x00, 0x00, 691 | 0xe9, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 692 | 693 | 0x83, 0x80, 0x80, 0x00, 694 | 0x83, 0x80, 0x80, 0x00, 695 | 696 | 0x01, 697 | 0x00, 698 | 699 | num2le(1.0), 700 | num2le(9.423e-2), 701 | num2le(-0x1.ffp52), 702 | num2le(float32(1.5)), 703 | num2le(0xf.fp0), 704 | num2le(float32(math.Inf(1))), 705 | num2le(math.Inf(-1)), 706 | 707 | 0x08, 0x11, 0x1a, 0x23, 0x2c, 0x35, 708 | 0x80, 0x01, 709 | 0x46, 710 | 711 | 0x08, 55*2, 712 | 0x11, num2le(1.23), 713 | 0x1a, 0x04, "text", 714 | 0x35, 0xff, 0xff, 0xff, 0xff, 715 | 0x43, 42, 0x44, 716 | 717 | 0xba, 0x01, 14, "my cool string", 718 | 719 | 0xc2, 0x01, 0x11, 720 | 0x08, 0x05, 721 | 0x12, 0x0d, "nested string", 722 | 723 | 0xca, 0x01, 0x07, 1, 2, 3, 4, 5, 6, 7, 724 | 725 | 0xba, 0x01, 0x96, 0x80, 0x00, "non-minimally-prefixed", 726 | 727 | 0xd3, 0x01, 728 | 0x08, 0x6e, 729 | 0x11, 0x66, 0x66, 0x66, 0x66, 0x66, 0x66, 0xf6, 0x3f, 730 | 0x1a, 0x04, "abcd", 731 | 0xd4, 0x01, 732 | 733 | 0xdb, 0x01, 734 | 0xdc, 0x81, 0x80, 0x80, 0x00, 735 | 736 | 0x12, 0x04, "abcd", 737 | 0x12, 0x05, "abcd", 738 | 0x29, "stuff", 739 | ), 740 | }, 741 | } 742 | 743 | for _, tt := range tests { 744 | if tt.name == "" { 745 | tt.name = fmt.Sprintf("%q", tt.text) 746 | } 747 | t.Run(tt.name, func(t *testing.T) { 748 | got, err := NewScanner(tt.text).Exec() 749 | if got == nil { 750 | got = []byte{} 751 | } 752 | 753 | if tt.want == nil { 754 | if err == nil { 755 | t.Fatal("expected an error but didn't get one") 756 | } 757 | } else if err != nil { 758 | t.Fatal("unexpected error", err) 759 | } else if d := cmp.Diff(tt.want, got); d != "" { 760 | t.Fatal("output mismatch (-want, +got):", d) 761 | } 762 | }) 763 | } 764 | } 765 | -------------------------------------------------------------------------------- /testdata/explicit-wire-types.pb.golden: -------------------------------------------------------------------------------- 1 | # message.pb ExplicitWireTypes 2 | 1:VARINT 101 3 | 2:VARINT 102 4 | 3:VARINT 103 5 | 4:VARINT 104 6 | 5:VARINT 210 7 | 6:VARINT 212 8 | 7:I32 107i32 9 | 8:I64 108i64 10 | 9:I32 109i32 11 | 10:I64 110i64 12 | 11:I32 111.0i32 # 0x42de0000i32 13 | 12:I64 112.0 # 0x405c000000000000i64 14 | 13:VARINT 1 15 | 14:LEN {"115"} 16 | 15:LEN {"116"} 17 | 16:SGROUP 18 | 17:VARINT 117 19 | 16:EGROUP 20 | 18:LEN {1:VARINT 118} 21 | 19:LEN {1:VARINT 119} 22 | 20:LEN {1:VARINT 120} 23 | 21:VARINT 3 24 | 22:VARINT 6 25 | 23:VARINT 9 26 | 24:LEN {"124"} 27 | 25:LEN {"125"} 28 | 26:LEN {1:VARINT 126} 29 | 27:LEN {1:VARINT 127} 30 | 28:LEN {1:VARINT 128} 31 | 31:VARINT 201 32 | 31:VARINT 301 33 | 32:VARINT 202 34 | 32:VARINT 302 35 | 33:VARINT 203 36 | 33:VARINT 303 37 | 34:VARINT 204 38 | 34:VARINT 304 39 | 35:VARINT 410 40 | 35:VARINT 610 41 | 36:VARINT 412 42 | 36:VARINT 612 43 | 37:I32 207i32 44 | 37:I32 307i32 45 | 38:I64 208i64 46 | 38:I64 308i64 47 | 39:I32 209i32 48 | 39:I32 309i32 49 | 40:I64 210i64 50 | 40:I64 310i64 51 | 41:I32 211.0i32 # 0x43530000i32 52 | 41:I32 311.0i32 # 0x439b8000i32 53 | 42:I64 212.0 # 0x406a800000000000i64 54 | 42:I64 312.0 # 0x4073800000000000i64 55 | 43:VARINT 1 56 | 43:VARINT 0 57 | 44:LEN {"215"} 58 | 44:LEN {"315"} 59 | 45:LEN {"216"} 60 | 45:LEN {"316"} 61 | 46:SGROUP 62 | 47:VARINT 217 63 | 46:EGROUP 64 | 46:SGROUP 65 | 47:VARINT 317 66 | 46:EGROUP 67 | 48:LEN {1:VARINT 218} 68 | 48:LEN {1:VARINT 318} 69 | 49:LEN {1:VARINT 219} 70 | 49:LEN {1:VARINT 319} 71 | 50:LEN {1:VARINT 220} 72 | 50:LEN {1:VARINT 320} 73 | 51:VARINT 2 74 | 51:VARINT 3 75 | 52:VARINT 5 76 | 52:VARINT 6 77 | 53:VARINT 8 78 | 53:VARINT 9 79 | 54:LEN {"224"} 80 | 54:LEN {"324"} 81 | 55:LEN {"225"} 82 | 55:LEN {"325"} 83 | 57:LEN {1:VARINT 227} 84 | 57:LEN {1:VARINT 327} 85 | 61:VARINT 401 86 | 62:VARINT 402 87 | 63:VARINT 403 88 | 64:VARINT 404 89 | 65:VARINT 810 90 | 66:VARINT 812 91 | 67:I32 407i32 92 | 68:I64 408i64 93 | 69:I32 409i32 94 | 70:I64 410i64 95 | 71:I32 411.0i32 # 0x43cd8000i32 96 | 72:I64 412.0 # 0x4079c00000000000i64 97 | 73:VARINT 0 98 | 74:LEN {"415"} 99 | 75:LEN {"416"} 100 | 81:VARINT 1 101 | 82:VARINT 4 102 | 83:VARINT 7 103 | 84:LEN {"424"} 104 | 85:LEN {"425"} 105 | 111:VARINT 601 106 | 112:LEN {1:VARINT 602} 107 | 113:LEN {"603"} 108 | 114:LEN {"604"} 109 | -------------------------------------------------------------------------------- /testdata/fuzz/FuzzRoundTrip/281cfe7fa226ed9ce863770b1b0e0df36b5d3a7def5b8d8f8793ae2a72e0721b: -------------------------------------------------------------------------------- 1 | go test fuzz v1 2 | []byte("2\x01000000") 3 | -------------------------------------------------------------------------------- /testdata/fuzz/FuzzRoundTrip/47e286a415f65888c5e703a40db2915f8cc40f64e1fad8473699b77066f2c57a: -------------------------------------------------------------------------------- 1 | go test fuzz v1 2 | []byte("00\xf9\x00") 3 | -------------------------------------------------------------------------------- /testdata/fuzz/FuzzRoundTrip/50640a7a67794863f262b749f288aff8610f2cac65b7cf65b8708eb0ea104519: -------------------------------------------------------------------------------- 1 | go test fuzz v1 2 | []byte("001") 3 | -------------------------------------------------------------------------------- /testdata/fuzz/FuzzRoundTrip/83cdd88140a480b3df36ede77b3b6c5e417419cdf7a06816e4f2b8847b0e8e58: -------------------------------------------------------------------------------- 1 | go test fuzz v1 2 | []byte("2\v\x91\xcd000000000") 3 | -------------------------------------------------------------------------------- /testdata/fuzz/FuzzRoundTrip/a57870730bd74cb4dc5f42974a51f21aa30f472685651cc6b209c105e549ee02: -------------------------------------------------------------------------------- 1 | go test fuzz v1 2 | []byte("1000000\xff\xff") 3 | -------------------------------------------------------------------------------- /testdata/fuzz/FuzzRoundTrip/d0cf2d61f6fd4befd99d5b5e27a86b2e483c755f9f5888e0637f230d27e7fe63: -------------------------------------------------------------------------------- 1 | go test fuzz v1 2 | []byte("2\x01\x9f") 3 | -------------------------------------------------------------------------------- /testdata/fuzz/FuzzRoundTrip/e9e048e13dff8ac2c7f1621cfe916924dbfd27336808d43fd4c4330ac9d15856: -------------------------------------------------------------------------------- 1 | go test fuzz v1 2 | []byte("2\x00") 3 | -------------------------------------------------------------------------------- /testdata/fuzz/FuzzRoundTrip/f81d19afdbd7367e5511f623c72fb0fd04fb25450cf44393246e697f27744d89: -------------------------------------------------------------------------------- 1 | go test fuzz v1 2 | []byte("%00\xac\xff0000") 3 | -------------------------------------------------------------------------------- /testdata/fuzz/FuzzRoundTrip/fbab04f43516aa5e2b159a4081fcca5a99a50f57e96df8ce95d58ffff75b9902: -------------------------------------------------------------------------------- 1 | go test fuzz v1 2 | []byte("10") 3 | -------------------------------------------------------------------------------- /testdata/groups.pb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/protocolbuffers/protoscope/8e7a6aafa2c9958527b1e0747e66e1bfff045819/testdata/groups.pb -------------------------------------------------------------------------------- /testdata/groups.pb.golden: -------------------------------------------------------------------------------- 1 | # groups.pb 2 | 1: !{ 3 | 1: 101 4 | 2: 202i32 5 | 3: {12: 7.2232605e28i32} # 0x6f696569i32 6 | } 7 | 2:SGROUP 8 | 3:EGROUP 9 | 4:EGROUP 10 | 5: !{6: !{}} 11 | 6: !{long-form:5} 12 | 7: !{ 13 | 1: 1 14 | long-form:5 15 | } 16 | 7: !{ 17 | 1: 1 18 | 1: 1 19 | long-form:5 20 | } 21 | 10:SGROUP 22 | -------------------------------------------------------------------------------- /testdata/maps.pb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/protocolbuffers/protoscope/8e7a6aafa2c9958527b1e0747e66e1bfff045819/testdata/maps.pb -------------------------------------------------------------------------------- /testdata/message-explicit-prefixes.pb.golden: -------------------------------------------------------------------------------- 1 | # message.pb ExplicitLengthPrefixes 2 | 1: 101 3 | 2: 102 4 | 3: 103 5 | 4: 104 6 | 5: 210 7 | 6: 212 8 | 7: 107i32 9 | 8: 108i64 10 | 9: 109i32 11 | 10: 110i64 12 | 11: 111.0i32 # 0x42de0000i32 13 | 12: 112.0 # 0x405c000000000000i64 14 | 13: 1 15 | 14:LEN 3 "115" 16 | 15:LEN 3 "116" 17 | 16: !{17: 117} 18 | 18:LEN 2 1: 118 19 | 19:LEN 2 1: 119 20 | 20:LEN 2 1: 120 21 | 21: 3 22 | 22: 6 23 | 23: 9 24 | 24:LEN 3 "124" 25 | 25:LEN 3 "125" 26 | 26:LEN 2 1: 126 27 | 27:LEN 2 1: 127 28 | 28:LEN 3 1: 128 29 | 31: 201 30 | 31: 301 31 | 32: 202 32 | 32: 302 33 | 33: 203 34 | 33: 303 35 | 34: 204 36 | 34: 304 37 | 35: 410 38 | 35: 610 39 | 36: 412 40 | 36: 612 41 | 37: 207i32 42 | 37: 307i32 43 | 38: 208i64 44 | 38: 308i64 45 | 39: 209i32 46 | 39: 309i32 47 | 40: 210i64 48 | 40: 310i64 49 | 41: 211.0i32 # 0x43530000i32 50 | 41: 311.0i32 # 0x439b8000i32 51 | 42: 212.0 # 0x406a800000000000i64 52 | 42: 312.0 # 0x4073800000000000i64 53 | 43: 1 54 | 43: 0 55 | 44:LEN 3 "215" 56 | 44:LEN 3 "315" 57 | 45:LEN 3 "216" 58 | 45:LEN 3 "316" 59 | 46: !{47: 217} 60 | 46: !{47: 317} 61 | 48:LEN 3 1: 218 62 | 48:LEN 3 1: 318 63 | 49:LEN 3 1: 219 64 | 49:LEN 3 1: 319 65 | 50:LEN 3 1: 220 66 | 50:LEN 3 1: 320 67 | 51: 2 68 | 51: 3 69 | 52: 5 70 | 52: 6 71 | 53: 8 72 | 53: 9 73 | 54:LEN 3 "224" 74 | 54:LEN 3 "324" 75 | 55:LEN 3 "225" 76 | 55:LEN 3 "325" 77 | 57:LEN 3 1: 227 78 | 57:LEN 3 1: 327 79 | 61: 401 80 | 62: 402 81 | 63: 403 82 | 64: 404 83 | 65: 810 84 | 66: 812 85 | 67: 407i32 86 | 68: 408i64 87 | 69: 409i32 88 | 70: 410i64 89 | 71: 411.0i32 # 0x43cd8000i32 90 | 72: 412.0 # 0x4079c00000000000i64 91 | 73: 0 92 | 74:LEN 3 "415" 93 | 75:LEN 3 "416" 94 | 81: 1 95 | 82: 4 96 | 83: 7 97 | 84:LEN 3 "424" 98 | 85:LEN 3 "425" 99 | 111: 601 100 | 112:LEN 3 1: 602 101 | 113:LEN 3 "603" 102 | 114:LEN 3 "604" 103 | -------------------------------------------------------------------------------- /testdata/message-fields.pb.golden: -------------------------------------------------------------------------------- 1 | # message.pb Schema=unittest.TestAllTypes PrintFieldNames 2 | 1: 101 # optional_int32 3 | 2: 102 # optional_int64 4 | 3: 103 # optional_uint32 5 | 4: 104 # optional_uint64 6 | 5: 105z # optional_sint32 7 | 6: 106z # optional_sint64 8 | 7: 107i32 # optional_fixed32 9 | 8: 108i64 # optional_fixed64 10 | 9: 109i32 # optional_sfixed32 11 | 10: 110i64 # optional_sfixed64 12 | 11: 111.0i32 # optional_float, 0x42de0000i32 13 | 12: 112.0 # optional_double, 0x405c000000000000i64 14 | 13: true # optional_bool 15 | 14: {"115"} # optional_string 16 | 15: {"116"} # optional_bytes 17 | 16: !{ # optionalgroup 18 | 17: 117 # a 19 | } 20 | 18: { # optional_nested_message 21 | 1: 118 # bb 22 | } 23 | 19: { # optional_foreign_message 24 | 1: 119 # c 25 | } 26 | 20: {1: 120} # optional_int32 27 | 21: 3 # optional_nested_enum 28 | 22: 6 # optional_foreign_enum 29 | 23: 9 30 | 24: {"124"} # optional_string_piece 31 | 25: {"125"} # optional_cord 32 | 26: {1: 126} # optional_int32 33 | 27: { # optional_lazy_message 34 | 1: 127 # bb 35 | } 36 | 28: {1: 128} # optional_int32 37 | 31: 201 # repeated_int32 38 | 31: 301 # repeated_int32 39 | 32: 202 # repeated_int64 40 | 32: 302 # repeated_int64 41 | 33: 203 # repeated_uint32 42 | 33: 303 # repeated_uint32 43 | 34: 204 # repeated_uint64 44 | 34: 304 # repeated_uint64 45 | 35: 205z # repeated_sint32 46 | 35: 305z # repeated_sint32 47 | 36: 206z # repeated_sint64 48 | 36: 306z # repeated_sint64 49 | 37: 207i32 # repeated_fixed32 50 | 37: 307i32 # repeated_fixed32 51 | 38: 208i64 # repeated_fixed64 52 | 38: 308i64 # repeated_fixed64 53 | 39: 209i32 # repeated_sfixed32 54 | 39: 309i32 # repeated_sfixed32 55 | 40: 210i64 # repeated_sfixed64 56 | 40: 310i64 # repeated_sfixed64 57 | 41: 211.0i32 # repeated_float, 0x43530000i32 58 | 41: 311.0i32 # repeated_float, 0x439b8000i32 59 | 42: 212.0 # repeated_double, 0x406a800000000000i64 60 | 42: 312.0 # repeated_double, 0x4073800000000000i64 61 | 43: true # repeated_bool 62 | 43: false # repeated_bool 63 | 44: {"215"} # repeated_string 64 | 44: {"315"} # repeated_string 65 | 45: {"216"} # repeated_bytes 66 | 45: {"316"} # repeated_bytes 67 | 46: !{ # repeatedgroup 68 | 47: 217 # a 69 | } 70 | 46: !{ # repeatedgroup 71 | 47: 317 # a 72 | } 73 | 48: { # repeated_nested_message 74 | 1: 218 # bb 75 | } 76 | 48: { # repeated_nested_message 77 | 1: 318 # bb 78 | } 79 | 49: { # repeated_foreign_message 80 | 1: 219 # c 81 | } 82 | 49: { # repeated_foreign_message 83 | 1: 319 # c 84 | } 85 | 50: {1: 220} # optional_int32 86 | 50: {1: 320} # optional_int32 87 | 51: 2 # repeated_nested_enum 88 | 51: 3 # repeated_nested_enum 89 | 52: 5 # repeated_foreign_enum 90 | 52: 6 # repeated_foreign_enum 91 | 53: 8 92 | 53: 9 93 | 54: {"224"} # repeated_string_piece 94 | 54: {"324"} # repeated_string_piece 95 | 55: {"225"} # repeated_cord 96 | 55: {"325"} # repeated_cord 97 | 57: { # repeated_lazy_message 98 | 1: 227 # bb 99 | } 100 | 57: { # repeated_lazy_message 101 | 1: 327 # bb 102 | } 103 | 61: 401 # default_int32 104 | 62: 402 # default_int64 105 | 63: 403 # default_uint32 106 | 64: 404 # default_uint64 107 | 65: 405z # default_sint32 108 | 66: 406z # default_sint64 109 | 67: 407i32 # default_fixed32 110 | 68: 408i64 # default_fixed64 111 | 69: 409i32 # default_sfixed32 112 | 70: 410i64 # default_sfixed64 113 | 71: 411.0i32 # default_float, 0x43cd8000i32 114 | 72: 412.0 # default_double, 0x4079c00000000000i64 115 | 73: false # default_bool 116 | 74: {"415"} # default_string 117 | 75: {"416"} # default_bytes 118 | 81: 1 # default_nested_enum 119 | 82: 4 # default_foreign_enum 120 | 83: 7 121 | 84: {"424"} # default_string_piece 122 | 85: {"425"} # default_cord 123 | 111: 601 # oneof_uint32 124 | 112: { # oneof_nested_message 125 | 1: 602 # bb 126 | } 127 | 113: {"603"} # oneof_string 128 | 114: {"604"} # oneof_bytes 129 | -------------------------------------------------------------------------------- /testdata/message.pb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/protocolbuffers/protoscope/8e7a6aafa2c9958527b1e0747e66e1bfff045819/testdata/message.pb -------------------------------------------------------------------------------- /testdata/message.pb.golden: -------------------------------------------------------------------------------- 1 | # message.pb 2 | 1: 101 3 | 2: 102 4 | 3: 103 5 | 4: 104 6 | 5: 210 7 | 6: 212 8 | 7: 107i32 9 | 8: 108i64 10 | 9: 109i32 11 | 10: 110i64 12 | 11: 111.0i32 # 0x42de0000i32 13 | 12: 112.0 # 0x405c000000000000i64 14 | 13: 1 15 | 14: {"115"} 16 | 15: {"116"} 17 | 16: !{17: 117} 18 | 18: {1: 118} 19 | 19: {1: 119} 20 | 20: {1: 120} 21 | 21: 3 22 | 22: 6 23 | 23: 9 24 | 24: {"124"} 25 | 25: {"125"} 26 | 26: {1: 126} 27 | 27: {1: 127} 28 | 28: {1: 128} 29 | 31: 201 30 | 31: 301 31 | 32: 202 32 | 32: 302 33 | 33: 203 34 | 33: 303 35 | 34: 204 36 | 34: 304 37 | 35: 410 38 | 35: 610 39 | 36: 412 40 | 36: 612 41 | 37: 207i32 42 | 37: 307i32 43 | 38: 208i64 44 | 38: 308i64 45 | 39: 209i32 46 | 39: 309i32 47 | 40: 210i64 48 | 40: 310i64 49 | 41: 211.0i32 # 0x43530000i32 50 | 41: 311.0i32 # 0x439b8000i32 51 | 42: 212.0 # 0x406a800000000000i64 52 | 42: 312.0 # 0x4073800000000000i64 53 | 43: 1 54 | 43: 0 55 | 44: {"215"} 56 | 44: {"315"} 57 | 45: {"216"} 58 | 45: {"316"} 59 | 46: !{47: 217} 60 | 46: !{47: 317} 61 | 48: {1: 218} 62 | 48: {1: 318} 63 | 49: {1: 219} 64 | 49: {1: 319} 65 | 50: {1: 220} 66 | 50: {1: 320} 67 | 51: 2 68 | 51: 3 69 | 52: 5 70 | 52: 6 71 | 53: 8 72 | 53: 9 73 | 54: {"224"} 74 | 54: {"324"} 75 | 55: {"225"} 76 | 55: {"325"} 77 | 57: {1: 227} 78 | 57: {1: 327} 79 | 61: 401 80 | 62: 402 81 | 63: 403 82 | 64: 404 83 | 65: 810 84 | 66: 812 85 | 67: 407i32 86 | 68: 408i64 87 | 69: 409i32 88 | 70: 410i64 89 | 71: 411.0i32 # 0x43cd8000i32 90 | 72: 412.0 # 0x4079c00000000000i64 91 | 73: 0 92 | 74: {"415"} 93 | 75: {"416"} 94 | 81: 1 95 | 82: 4 96 | 83: 7 97 | 84: {"424"} 98 | 85: {"425"} 99 | 111: 601 100 | 112: {1: 602} 101 | 113: {"603"} 102 | 114: {"604"} 103 | -------------------------------------------------------------------------------- /testdata/no-groups.pb.golden: -------------------------------------------------------------------------------- 1 | # groups.pb NoGroups 2 | 1:SGROUP 3 | 1: 101 4 | 2: 202i32 5 | 3: {12: 7.2232605e28i32} # 0x6f696569i32 6 | 1:EGROUP 7 | 2:SGROUP 8 | 3:EGROUP 9 | 4:EGROUP 10 | 5:SGROUP 11 | 6:SGROUP 6:EGROUP 12 | 5:EGROUP 13 | 6:SGROUP long-form:5 6:EGROUP 14 | 7:SGROUP 15 | 1: 1 16 | long-form:5 7:EGROUP 17 | 7:SGROUP 18 | 1: 1 19 | 1: 1 20 | long-form:5 7:EGROUP 21 | 10:SGROUP 22 | -------------------------------------------------------------------------------- /testdata/oneof.pb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/protocolbuffers/protoscope/8e7a6aafa2c9958527b1e0747e66e1bfff045819/testdata/oneof.pb -------------------------------------------------------------------------------- /testdata/oneof.pb.golden: -------------------------------------------------------------------------------- 1 | # oneof.pb 2 | 1: 101 3 | 2: 102 4 | 3: 103 5 | 4: 104 6 | 5: 210 7 | 6: 212 8 | 7: 107i32 9 | 8: 108i64 10 | 9: 109i32 11 | 10: 110i64 12 | 11: 111.0i32 # 0x42de0000i32 13 | 12: 112.0 # 0x405c000000000000i64 14 | 13: 1 15 | 14: {"115"} 16 | 15: {"116"} 17 | 16: !{17: 117} 18 | 18: {1: 118} 19 | 19: {1: 119} 20 | 20: {1: 120} 21 | 21: 3 22 | 22: 6 23 | 23: 9 24 | 24: {"124"} 25 | 25: {"125"} 26 | 26: {1: 126} 27 | 27: {1: 127} 28 | 28: {1: 128} 29 | 31: 201 30 | 31: 301 31 | 32: 202 32 | 32: 302 33 | 33: 203 34 | 33: 303 35 | 34: 204 36 | 34: 304 37 | 35: 410 38 | 35: 610 39 | 36: 412 40 | 36: 612 41 | 37: 207i32 42 | 37: 307i32 43 | 38: 208i64 44 | 38: 308i64 45 | 39: 209i32 46 | 39: 309i32 47 | 40: 210i64 48 | 40: 310i64 49 | 41: 211.0i32 # 0x43530000i32 50 | 41: 311.0i32 # 0x439b8000i32 51 | 42: 212.0 # 0x406a800000000000i64 52 | 42: 312.0 # 0x4073800000000000i64 53 | 43: 1 54 | 43: 0 55 | 44: {"215"} 56 | 44: {"315"} 57 | 45: {"216"} 58 | 45: {"316"} 59 | 46: !{47: 217} 60 | 46: !{47: 317} 61 | 48: {1: 218} 62 | 48: {1: 318} 63 | 49: {1: 219} 64 | 49: {1: 319} 65 | 50: {1: 220} 66 | 50: {1: 320} 67 | 51: 2 68 | 51: 3 69 | 52: 5 70 | 52: 6 71 | 53: 8 72 | 53: 9 73 | 54: {"224"} 74 | 54: {"324"} 75 | 55: {"225"} 76 | 55: {"325"} 77 | 57: {1: 227} 78 | 57: {1: 327} 79 | 61: 401 80 | 62: 402 81 | 63: 403 82 | 64: 404 83 | 65: 810 84 | 66: 812 85 | 67: 407i32 86 | 68: 408i64 87 | 69: 409i32 88 | 70: 410i64 89 | 71: 411.0i32 # 0x43cd8000i32 90 | 72: 412.0 # 0x4079c00000000000i64 91 | 73: 0 92 | 74: {"415"} 93 | 75: {"416"} 94 | 81: 1 95 | 82: 4 96 | 83: 7 97 | 84: {"424"} 98 | 85: {"425"} 99 | 114: {"604"} 100 | -------------------------------------------------------------------------------- /testdata/packed-big.pb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/protocolbuffers/protoscope/8e7a6aafa2c9958527b1e0747e66e1bfff045819/testdata/packed-big.pb -------------------------------------------------------------------------------- /testdata/packed-big.pb.golden: -------------------------------------------------------------------------------- 1 | # packed-big.pb Schema=unittest.TestPackedTypes 2 | 90: { 3 | 42 42 42 42 42 42 42 42 4 | 42 42 42 42 42 42 42 42 5 | 42 42 42 42 42 42 42 42 6 | 42 42 42 42 42 42 42 42 7 | 42 42 42 42 42 42 42 42 8 | 42 42 42 42 42 42 42 42 9 | 42 42 10 | } 11 | 90: { 12 | 2 3 5 7 11 13 17 19 13 | 23 29 31 37 41 43 47 53 14 | 59 61 67 71 73 79 83 89 15 | 97 101 103 107 109 113 127 131 16 | 137 139 149 151 157 163 167 173 17 | 179 181 191 193 197 199 211 223 18 | 227 229 233 239 241 251 257 263 19 | 269 271 277 281 283 293 307 311 20 | 313 317 331 337 347 349 353 359 21 | 367 373 379 383 389 397 401 409 22 | 419 421 431 433 439 443 449 457 23 | 461 463 467 479 487 491 499 503 24 | 509 521 523 541 547 557 563 569 25 | 571 577 587 593 599 601 607 613 26 | 617 619 631 641 643 647 653 659 27 | 661 673 677 683 691 701 709 719 28 | 727 733 739 743 751 757 761 769 29 | 773 787 797 809 811 821 823 827 30 | 829 839 853 857 859 863 877 881 31 | 883 887 907 911 919 929 937 941 32 | 947 953 967 971 977 983 991 997 33 | 1009 1013 1019 1021 1031 1033 1039 1049 34 | 1051 1061 1063 1069 1087 1091 1093 1097 35 | 1103 1109 1117 1123 1129 1151 1153 1163 36 | 1171 1181 1187 1193 1201 1213 1217 1223 37 | } 38 | 101: { 39 | 19.97 # 0x4033f851eb851eb8i64 40 | 19.97 # 0x4033f851eb851eb8i64 41 | 19.97 # 0x4033f851eb851eb8i64 42 | 19.97 # 0x4033f851eb851eb8i64 43 | 19.97 # 0x4033f851eb851eb8i64 44 | 19.97 # 0x4033f851eb851eb8i64 45 | 19.97 # 0x4033f851eb851eb8i64 46 | 19.97 # 0x4033f851eb851eb8i64 47 | 19.97 # 0x4033f851eb851eb8i64 48 | 19.97 # 0x4033f851eb851eb8i64 49 | 19.97 # 0x4033f851eb851eb8i64 50 | 19.97 # 0x4033f851eb851eb8i64 51 | 19.97 # 0x4033f851eb851eb8i64 52 | 19.97 # 0x4033f851eb851eb8i64 53 | 19.97 # 0x4033f851eb851eb8i64 54 | 19.97 # 0x4033f851eb851eb8i64 55 | 19.97 # 0x4033f851eb851eb8i64 56 | 19.97 # 0x4033f851eb851eb8i64 57 | 19.97 # 0x4033f851eb851eb8i64 58 | 19.97 # 0x4033f851eb851eb8i64 59 | 19.97 # 0x4033f851eb851eb8i64 60 | 19.97 # 0x4033f851eb851eb8i64 61 | 19.97 # 0x4033f851eb851eb8i64 62 | 19.97 # 0x4033f851eb851eb8i64 63 | 19.97 # 0x4033f851eb851eb8i64 64 | 19.97 # 0x4033f851eb851eb8i64 65 | 19.97 # 0x4033f851eb851eb8i64 66 | } 67 | -------------------------------------------------------------------------------- /testdata/packed-schema.pb.golden: -------------------------------------------------------------------------------- 1 | # packed.pb Schema=unittest.TestPackedTypes 2 | 90: {601 701} 3 | 91: {602 702} 4 | 92: {603 703} 5 | 93: {604 704} 6 | 94: {605z 705z} 7 | 95: {606z 706z} 8 | 96: {607i32 707i32} 9 | 97: {608i64 708i64} 10 | 98: {609i32 709i32} 11 | 99: {610i64 710i64} 12 | 100: { 13 | 611.0i32 # 0x4418c000i32 14 | 711.0i32 # 0x4431c000i32 15 | } 16 | 101: { 17 | 612.0 # 0x4083200000000000i64 18 | 712.0 # 0x4086400000000000i64 19 | } 20 | 102: {true false} 21 | 103: {5 6} 22 | -------------------------------------------------------------------------------- /testdata/packed.pb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/protocolbuffers/protoscope/8e7a6aafa2c9958527b1e0747e66e1bfff045819/testdata/packed.pb -------------------------------------------------------------------------------- /testdata/packed.pb.golden: -------------------------------------------------------------------------------- 1 | # packed.pb 2 | 90: {`d904bd05`} 3 | 91: {`da04be05`} 4 | 92: {`db04bf05`} 5 | 93: {`dc04c005`} 6 | 94: {`ba09820b`} 7 | 95: { 8 | 151:EGROUP 9 | 176:EGROUP 10 | } 11 | 96: {`5f020000c3020000`} 12 | 97: {`6002000000000000c402000000000000`} 13 | 98: {`61020000c5020000`} 14 | 99: {`6202000000000000c602000000000000`} 15 | 100: {`00c0184400c03144`} 16 | 101: {`00000000002083400000000000408640`} 17 | 102: {`0100`} 18 | 103: {`0506`} 19 | -------------------------------------------------------------------------------- /testdata/proto3.pb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/protocolbuffers/protoscope/8e7a6aafa2c9958527b1e0747e66e1bfff045819/testdata/proto3.pb -------------------------------------------------------------------------------- /testdata/proto3.pb.golden: -------------------------------------------------------------------------------- 1 | # proto3.pb 2 | 31: {`65ad02`} 3 | 32: {`ca01ae02`} 4 | 33: {`cb01af02`} 5 | 34: {`cc01b002`} 6 | 35: {`9a03e204`} 7 | 36: { 8 | 51:EGROUP 9 | 76:EGROUP 10 | } 11 | 37: {`cf00000033010000`} 12 | 38: {`d0000000000000003401000000000000`} 13 | 39: {`d100000035010000`} 14 | 40: {`d2000000000000003601000000000000`} 15 | 41: {`0000534300809b43`} 16 | 42: {`0000000000806a400000000000807340`} 17 | 43: {`0100`} 18 | 44: {"215"} 19 | 44: {"315"} 20 | 45: {"216"} 21 | 45: {"316"} 22 | 48: {1: 218} 23 | 48: {1: 318} 24 | 49: {1: 219} 25 | 49: {1: 319} 26 | 50: {1: 220} 27 | 50: {1: 320} 28 | 51: {`0203`} 29 | 52: {`0506`} 30 | 54: {"224"} 31 | 54: {"324"} 32 | 55: {"225"} 33 | 55: {"325"} 34 | 57: {1: 227} 35 | 57: {1: 327} 36 | 114: {"604"} 37 | -------------------------------------------------------------------------------- /testdata/unittest.proto: -------------------------------------------------------------------------------- 1 | // Copyright 2022 Google LLC 2 | // 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // http://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | // See the License for the specific language governing permissions and 13 | // limitations under the License. 14 | 15 | // See 16 | // https://github.com/protocolbuffers/protobuf/blob/main/src/google/protobuf/unittest.proto 17 | 18 | syntax = "proto2"; 19 | 20 | package unittest; 21 | 22 | option optimize_for = SPEED; 23 | 24 | // This proto includes every type of field in both singular and repeated 25 | // forms. 26 | message TestAllTypes { 27 | message NestedMessage { 28 | // The field name "b" fails to compile in proto1 because it conflicts with 29 | // a local variable named "b" in one of the generated methods. Doh. 30 | // This file needs to compile in proto1 to test backwards-compatibility. 31 | optional int32 bb = 1; 32 | } 33 | 34 | enum NestedEnum { 35 | FOO = 1; 36 | BAR = 2; 37 | BAZ = 3; 38 | NEG = -1; // Intentionally negative. 39 | } 40 | 41 | // Singular 42 | optional int32 optional_int32 = 1; 43 | optional int64 optional_int64 = 2; 44 | optional uint32 optional_uint32 = 3; 45 | optional uint64 optional_uint64 = 4; 46 | optional sint32 optional_sint32 = 5; 47 | optional sint64 optional_sint64 = 6; 48 | optional fixed32 optional_fixed32 = 7; 49 | optional fixed64 optional_fixed64 = 8; 50 | optional sfixed32 optional_sfixed32 = 9; 51 | optional sfixed64 optional_sfixed64 = 10; 52 | optional float optional_float = 11; 53 | optional double optional_double = 12; 54 | optional bool optional_bool = 13; 55 | optional string optional_string = 14; 56 | optional bytes optional_bytes = 15; 57 | 58 | optional group OptionalGroup = 16 { 59 | optional int32 a = 17; 60 | } 61 | 62 | optional NestedMessage optional_nested_message = 18; 63 | optional ForeignMessage optional_foreign_message = 19; 64 | 65 | optional NestedEnum optional_nested_enum = 21; 66 | optional ForeignEnum optional_foreign_enum = 22; 67 | 68 | optional string optional_string_piece = 24 [ctype = STRING_PIECE]; 69 | optional string optional_cord = 25 [ctype = CORD]; 70 | 71 | optional NestedMessage optional_lazy_message = 27 [lazy = true]; 72 | 73 | // Repeated 74 | repeated int32 repeated_int32 = 31; 75 | repeated int64 repeated_int64 = 32; 76 | repeated uint32 repeated_uint32 = 33; 77 | repeated uint64 repeated_uint64 = 34; 78 | repeated sint32 repeated_sint32 = 35; 79 | repeated sint64 repeated_sint64 = 36; 80 | repeated fixed32 repeated_fixed32 = 37; 81 | repeated fixed64 repeated_fixed64 = 38; 82 | repeated sfixed32 repeated_sfixed32 = 39; 83 | repeated sfixed64 repeated_sfixed64 = 40; 84 | repeated float repeated_float = 41; 85 | repeated double repeated_double = 42; 86 | repeated bool repeated_bool = 43; 87 | repeated string repeated_string = 44; 88 | repeated bytes repeated_bytes = 45; 89 | 90 | repeated group RepeatedGroup = 46 { 91 | optional int32 a = 47; 92 | } 93 | 94 | repeated NestedMessage repeated_nested_message = 48; 95 | repeated ForeignMessage repeated_foreign_message = 49; 96 | 97 | repeated NestedEnum repeated_nested_enum = 51; 98 | repeated ForeignEnum repeated_foreign_enum = 52; 99 | 100 | repeated string repeated_string_piece = 54 [ctype = STRING_PIECE]; 101 | repeated string repeated_cord = 55 [ctype = CORD]; 102 | 103 | repeated NestedMessage repeated_lazy_message = 57 [lazy = true]; 104 | 105 | // Singular with defaults 106 | optional int32 default_int32 = 61 [default = 41]; 107 | optional int64 default_int64 = 62 [default = 42]; 108 | optional uint32 default_uint32 = 63 [default = 43]; 109 | optional uint64 default_uint64 = 64 [default = 44]; 110 | optional sint32 default_sint32 = 65 [default = -45]; 111 | optional sint64 default_sint64 = 66 [default = 46]; 112 | optional fixed32 default_fixed32 = 67 [default = 47]; 113 | optional fixed64 default_fixed64 = 68 [default = 48]; 114 | optional sfixed32 default_sfixed32 = 69 [default = 49]; 115 | optional sfixed64 default_sfixed64 = 70 [default = -50]; 116 | optional float default_float = 71 [default = 51.5]; 117 | optional double default_double = 72 [default = 52e3]; 118 | optional bool default_bool = 73 [default = true]; 119 | optional string default_string = 74 [default = "hello"]; 120 | optional bytes default_bytes = 75 [default = "world"]; 121 | 122 | optional NestedEnum default_nested_enum = 81 [default = BAR]; 123 | optional ForeignEnum default_foreign_enum = 82 [default = FOREIGN_BAR]; 124 | 125 | optional string default_string_piece = 84 126 | [ctype = STRING_PIECE, default = "abc"]; 127 | optional string default_cord = 85 [ctype = CORD, default = "123"]; 128 | 129 | // For oneof test 130 | oneof oneof_field { 131 | uint32 oneof_uint32 = 111; 132 | NestedMessage oneof_nested_message = 112; 133 | string oneof_string = 113; 134 | bytes oneof_bytes = 114; 135 | } 136 | } 137 | 138 | // This proto includes a recursively nested message. 139 | message NestedTestAllTypes { 140 | optional NestedTestAllTypes child = 1; 141 | optional TestAllTypes payload = 2; 142 | repeated NestedTestAllTypes repeated_child = 3; 143 | optional NestedTestAllTypes lazy_child = 4 [lazy = true]; 144 | optional TestAllTypes eager_child = 5 [lazy = false]; 145 | } 146 | 147 | message TestDeprecatedFields { 148 | optional int32 deprecated_int32 = 1 [deprecated = true]; 149 | oneof oneof_fields { 150 | int32 deprecated_int32_in_oneof = 2 [deprecated = true]; 151 | } 152 | } 153 | 154 | message TestDeprecatedMessage { 155 | option deprecated = true; 156 | } 157 | 158 | // Define these after TestAllTypes to make sure the compiler can handle 159 | // that. 160 | message ForeignMessage { 161 | optional int32 c = 1; 162 | optional int32 d = 2; 163 | } 164 | 165 | enum ForeignEnum { 166 | FOREIGN_FOO = 4; 167 | FOREIGN_BAR = 5; 168 | FOREIGN_BAZ = 6; 169 | } 170 | 171 | message TestReservedFields { 172 | reserved 2, 15, 9 to 11; 173 | reserved "bar", "baz"; 174 | } 175 | 176 | enum TestReservedEnumFields { 177 | UNKNOWN = 0; 178 | reserved 2, 15, 9 to 11; 179 | reserved "bar", "baz"; 180 | } 181 | 182 | message TestAllExtensions { 183 | extensions 1 to max; 184 | } 185 | 186 | extend TestAllExtensions { 187 | // Singular 188 | optional int32 optional_int32_extension = 1; 189 | optional int64 optional_int64_extension = 2; 190 | optional uint32 optional_uint32_extension = 3; 191 | optional uint64 optional_uint64_extension = 4; 192 | optional sint32 optional_sint32_extension = 5; 193 | optional sint64 optional_sint64_extension = 6; 194 | optional fixed32 optional_fixed32_extension = 7; 195 | optional fixed64 optional_fixed64_extension = 8; 196 | optional sfixed32 optional_sfixed32_extension = 9; 197 | optional sfixed64 optional_sfixed64_extension = 10; 198 | optional float optional_float_extension = 11; 199 | optional double optional_double_extension = 12; 200 | optional bool optional_bool_extension = 13; 201 | optional string optional_string_extension = 14; 202 | optional bytes optional_bytes_extension = 15; 203 | 204 | optional group OptionalGroup_extension = 16 { 205 | optional int32 a = 17; 206 | } 207 | 208 | optional TestAllTypes.NestedMessage optional_nested_message_extension = 18; 209 | optional ForeignMessage optional_foreign_message_extension = 19; 210 | 211 | optional TestAllTypes.NestedEnum optional_nested_enum_extension = 21; 212 | optional ForeignEnum optional_foreign_enum_extension = 22; 213 | 214 | optional string optional_string_piece_extension = 24 [ctype = STRING_PIECE]; 215 | optional string optional_cord_extension = 25 [ctype = CORD]; 216 | 217 | optional TestAllTypes.NestedMessage optional_lazy_message_extension = 27 218 | [lazy = true]; 219 | 220 | // Repeated 221 | repeated int32 repeated_int32_extension = 31; 222 | repeated int64 repeated_int64_extension = 32; 223 | repeated uint32 repeated_uint32_extension = 33; 224 | repeated uint64 repeated_uint64_extension = 34; 225 | repeated sint32 repeated_sint32_extension = 35; 226 | repeated sint64 repeated_sint64_extension = 36; 227 | repeated fixed32 repeated_fixed32_extension = 37; 228 | repeated fixed64 repeated_fixed64_extension = 38; 229 | repeated sfixed32 repeated_sfixed32_extension = 39; 230 | repeated sfixed64 repeated_sfixed64_extension = 40; 231 | repeated float repeated_float_extension = 41; 232 | repeated double repeated_double_extension = 42; 233 | repeated bool repeated_bool_extension = 43; 234 | repeated string repeated_string_extension = 44; 235 | repeated bytes repeated_bytes_extension = 45; 236 | 237 | repeated group RepeatedGroup_extension = 46 { 238 | optional int32 a = 47; 239 | } 240 | 241 | repeated TestAllTypes.NestedMessage repeated_nested_message_extension = 48; 242 | repeated ForeignMessage repeated_foreign_message_extension = 49; 243 | 244 | repeated TestAllTypes.NestedEnum repeated_nested_enum_extension = 51; 245 | repeated ForeignEnum repeated_foreign_enum_extension = 52; 246 | 247 | repeated string repeated_string_piece_extension = 54 [ctype = STRING_PIECE]; 248 | repeated string repeated_cord_extension = 55 [ctype = CORD]; 249 | 250 | repeated TestAllTypes.NestedMessage repeated_lazy_message_extension = 57 251 | [lazy = true]; 252 | 253 | // Singular with defaults 254 | optional int32 default_int32_extension = 61 [default = 41]; 255 | optional int64 default_int64_extension = 62 [default = 42]; 256 | optional uint32 default_uint32_extension = 63 [default = 43]; 257 | optional uint64 default_uint64_extension = 64 [default = 44]; 258 | optional sint32 default_sint32_extension = 65 [default = -45]; 259 | optional sint64 default_sint64_extension = 66 [default = 46]; 260 | optional fixed32 default_fixed32_extension = 67 [default = 47]; 261 | optional fixed64 default_fixed64_extension = 68 [default = 48]; 262 | optional sfixed32 default_sfixed32_extension = 69 [default = 49]; 263 | optional sfixed64 default_sfixed64_extension = 70 [default = -50]; 264 | optional float default_float_extension = 71 [default = 51.5]; 265 | optional double default_double_extension = 72 [default = 52e3]; 266 | optional bool default_bool_extension = 73 [default = true]; 267 | optional string default_string_extension = 74 [default = "hello"]; 268 | optional bytes default_bytes_extension = 75 [default = "world"]; 269 | 270 | optional TestAllTypes.NestedEnum default_nested_enum_extension = 81 271 | [default = BAR]; 272 | optional ForeignEnum default_foreign_enum_extension = 82 273 | [default = FOREIGN_BAR]; 274 | 275 | optional string default_string_piece_extension = 84 276 | [ctype = STRING_PIECE, default = "abc"]; 277 | optional string default_cord_extension = 85 [ctype = CORD, default = "123"]; 278 | 279 | // For oneof test 280 | optional uint32 oneof_uint32_extension = 111; 281 | optional TestAllTypes.NestedMessage oneof_nested_message_extension = 112; 282 | optional string oneof_string_extension = 113; 283 | optional bytes oneof_bytes_extension = 114; 284 | } 285 | 286 | message TestGroup { 287 | optional group OptionalGroup = 16 { 288 | optional int32 a = 17; 289 | } 290 | optional ForeignEnum optional_foreign_enum = 22; 291 | } 292 | 293 | message TestGroupExtension { 294 | extensions 1 to max; 295 | } 296 | 297 | message TestNestedExtension { 298 | extend TestAllExtensions { 299 | // Check for bug where string extensions declared in tested scope did not 300 | // compile. 301 | optional string test = 1002 [default = "test"]; 302 | // Used to test if generated extension name is correct when there are 303 | // underscores. 304 | optional string nested_string_extension = 1003; 305 | } 306 | 307 | extend TestGroupExtension { 308 | optional group OptionalGroup_extension = 16 { 309 | optional int32 a = 17; 310 | } 311 | optional ForeignEnum optional_foreign_enum_extension = 22; 312 | } 313 | } 314 | 315 | message TestChildExtension { 316 | optional string a = 1; 317 | optional string b = 2; 318 | optional TestAllExtensions optional_extension = 3; 319 | } 320 | 321 | // Emulates wireformat data of TestChildExtension with dynamic extension 322 | // (DynamicExtension). 323 | message TestChildExtensionData { 324 | message NestedTestAllExtensionsData { 325 | message NestedDynamicExtensions { 326 | optional int32 a = 1; 327 | optional int32 b = 2; 328 | } 329 | optional NestedDynamicExtensions dynamic = 409707008; 330 | } 331 | optional string a = 1; 332 | optional string b = 2; 333 | optional NestedTestAllExtensionsData optional_extension = 3; 334 | } 335 | 336 | message TestNestedChildExtension { 337 | optional int32 a = 1; 338 | optional TestChildExtension child = 2; 339 | } 340 | 341 | // Emulates wireformat data of TestNestedChildExtension with dynamic extension 342 | // (DynamicExtension). 343 | message TestNestedChildExtensionData { 344 | optional int32 a = 1; 345 | optional TestChildExtensionData child = 2; 346 | } 347 | 348 | // We have separate messages for testing required fields because it's 349 | // annoying to have to fill in required fields in TestProto in order to 350 | // do anything with it. Note that we don't need to test every type of 351 | // required filed because the code output is basically identical to 352 | // optional fields for all types. 353 | message TestRequired { 354 | required int32 a = 1; 355 | optional int32 dummy2 = 2; 356 | required int32 b = 3; 357 | 358 | extend TestAllExtensions { 359 | optional TestRequired single = 1000; 360 | repeated TestRequired multi = 1001; 361 | } 362 | 363 | // Pad the field count to 32 so that we can test that IsInitialized() 364 | // properly checks multiple elements of has_bits_. 365 | optional int32 dummy4 = 4; 366 | optional int32 dummy5 = 5; 367 | optional int32 dummy6 = 6; 368 | optional int32 dummy7 = 7; 369 | optional int32 dummy8 = 8; 370 | optional int32 dummy9 = 9; 371 | optional int32 dummy10 = 10; 372 | optional int32 dummy11 = 11; 373 | optional int32 dummy12 = 12; 374 | optional int32 dummy13 = 13; 375 | optional int32 dummy14 = 14; 376 | optional int32 dummy15 = 15; 377 | optional int32 dummy16 = 16; 378 | optional int32 dummy17 = 17; 379 | optional int32 dummy18 = 18; 380 | optional int32 dummy19 = 19; 381 | optional int32 dummy20 = 20; 382 | optional int32 dummy21 = 21; 383 | optional int32 dummy22 = 22; 384 | optional int32 dummy23 = 23; 385 | optional int32 dummy24 = 24; 386 | optional int32 dummy25 = 25; 387 | optional int32 dummy26 = 26; 388 | optional int32 dummy27 = 27; 389 | optional int32 dummy28 = 28; 390 | optional int32 dummy29 = 29; 391 | optional int32 dummy30 = 30; 392 | optional int32 dummy31 = 31; 393 | optional int32 dummy32 = 32; 394 | 395 | required int32 c = 33; 396 | 397 | // Add an optional child message to make this non-trivial for go/pdlazy. 398 | optional ForeignMessage optional_foreign = 34; 399 | } 400 | 401 | message TestRequiredForeign { 402 | optional TestRequired optional_message = 1; 403 | repeated TestRequired repeated_message = 2; 404 | optional int32 dummy = 3; 405 | 406 | // Missing required fields must not affect verification of child messages. 407 | optional NestedTestAllTypes optional_lazy_message = 4 [lazy = true]; 408 | } 409 | 410 | message TestRequiredMessage { 411 | optional TestRequired optional_message = 1; 412 | repeated TestRequired repeated_message = 2; 413 | required TestRequired required_message = 3; 414 | } 415 | 416 | message TestNestedRequiredForeign { 417 | optional TestNestedRequiredForeign child = 1; 418 | optional TestRequiredForeign payload = 2; 419 | optional int32 dummy = 3; 420 | } 421 | 422 | // Test that we can use NestedMessage from outside TestAllTypes. 423 | message TestForeignNested { 424 | optional TestAllTypes.NestedMessage foreign_nested = 1; 425 | } 426 | 427 | // TestEmptyMessage is used to test unknown field support. 428 | message TestEmptyMessage {} 429 | 430 | // Like above, but declare all field numbers as potential extensions. No 431 | // actual extensions should ever be defined for this type. 432 | message TestEmptyMessageWithExtensions { 433 | extensions 1 to max; 434 | } 435 | 436 | // Needed for a Python test. 437 | message TestPickleNestedMessage { 438 | message NestedMessage { 439 | optional int32 bb = 1; 440 | message NestedNestedMessage { 441 | optional int32 cc = 1; 442 | } 443 | } 444 | } 445 | 446 | message TestMultipleExtensionRanges { 447 | extensions 42; 448 | extensions 4143 to 4243; 449 | extensions 65536 to max; 450 | } 451 | 452 | // Test that really large tag numbers don't break anything. 453 | message TestReallyLargeTagNumber { 454 | // The largest possible tag number is 2^28 - 1, since the wire format uses 455 | // three bits to communicate wire type. 456 | optional int32 a = 1; 457 | optional int32 bb = 268435455; 458 | } 459 | 460 | message TestRecursiveMessage { 461 | optional TestRecursiveMessage a = 1; 462 | optional int32 i = 2; 463 | } 464 | 465 | // Test that mutual recursion works. 466 | message TestMutualRecursionA { 467 | message SubMessage { 468 | optional TestMutualRecursionB b = 1; 469 | } 470 | optional TestMutualRecursionB bb = 1; 471 | optional group SubGroup = 2 { 472 | optional SubMessage sub_message = 3; // Needed because of bug in javatest 473 | optional TestAllTypes not_in_this_scc = 4; 474 | } 475 | } 476 | 477 | message TestMutualRecursionB { 478 | optional TestMutualRecursionA a = 1; 479 | optional int32 optional_int32 = 2; 480 | } 481 | 482 | message TestIsInitialized { 483 | message SubMessage { 484 | optional group SubGroup = 1 { 485 | required int32 i = 2; 486 | } 487 | } 488 | optional SubMessage sub_message = 1; 489 | } 490 | 491 | // Test that groups have disjoint field numbers from their siblings and 492 | // parents. This is NOT possible in proto1; only google.protobuf. When 493 | // attempting to compile with proto1, this will emit an error; so we only 494 | // include it in protobuf_unittest_proto. 495 | message TestDupFieldNumber { // NO_PROTO1 496 | optional int32 a = 1; // NO_PROTO1 497 | optional group Foo = 2 { 498 | optional int32 a = 1; 499 | } // NO_PROTO1 500 | optional group Bar = 3 { 501 | optional int32 a = 1; 502 | } // NO_PROTO1 503 | } // NO_PROTO1 504 | 505 | // Additional messages for testing lazy fields. 506 | message TestEagerMessage { 507 | optional TestAllTypes sub_message = 1 [lazy = false]; 508 | } 509 | message TestLazyMessage { 510 | optional TestAllTypes sub_message = 1 [lazy = true]; 511 | } 512 | message TestEagerMaybeLazy { 513 | message NestedMessage { 514 | optional TestPackedTypes packed = 1; 515 | } 516 | optional TestAllTypes message_foo = 1; 517 | optional TestAllTypes message_bar = 2; 518 | optional NestedMessage message_baz = 3; 519 | } 520 | // Needed for a Python test. 521 | message TestNestedMessageHasBits { 522 | message NestedMessage { 523 | repeated int32 nestedmessage_repeated_int32 = 1; 524 | repeated ForeignMessage nestedmessage_repeated_foreignmessage = 2; 525 | } 526 | optional NestedMessage optional_nested_message = 1; 527 | } 528 | 529 | // Test an enum that has multiple values with the same number. 530 | enum TestEnumWithDupValue { 531 | option allow_alias = true; 532 | 533 | FOO1 = 1; 534 | BAR1 = 2; 535 | BAZ = 3; 536 | FOO2 = 1; 537 | BAR2 = 2; 538 | } 539 | 540 | // Test an enum with large, unordered values. 541 | enum TestSparseEnum { 542 | SPARSE_A = 123; 543 | SPARSE_B = 62374; 544 | SPARSE_C = 12589234; 545 | SPARSE_D = -15; 546 | SPARSE_E = -53452; 547 | SPARSE_F = 0; 548 | SPARSE_G = 2; 549 | } 550 | 551 | // Test message with CamelCase field names. This violates Protocol Buffer 552 | // standard style. 553 | message TestCamelCaseFieldNames { 554 | optional int32 PrimitiveField = 1; 555 | optional string StringField = 2; 556 | optional ForeignEnum EnumField = 3; 557 | optional ForeignMessage MessageField = 4; 558 | optional string StringPieceField = 5 [ctype = STRING_PIECE]; 559 | optional string CordField = 6 [ctype = CORD]; 560 | 561 | repeated int32 RepeatedPrimitiveField = 7; 562 | repeated string RepeatedStringField = 8; 563 | repeated ForeignEnum RepeatedEnumField = 9; 564 | repeated ForeignMessage RepeatedMessageField = 10; 565 | repeated string RepeatedStringPieceField = 11 [ctype = STRING_PIECE]; 566 | repeated string RepeatedCordField = 12 [ctype = CORD]; 567 | } 568 | 569 | // We list fields out of order, to ensure that we're using field number and not 570 | // field index to determine serialization order. 571 | message TestFieldOrderings { 572 | optional string my_string = 11; 573 | extensions 2 to 10; 574 | optional int64 my_int = 1; 575 | extensions 12 to 100; 576 | optional float my_float = 101; 577 | message NestedMessage { 578 | optional int64 oo = 2; 579 | // The field name "b" fails to compile in proto1 because it conflicts with 580 | // a local variable named "b" in one of the generated methods. Doh. 581 | // This file needs to compile in proto1 to test backwards-compatibility. 582 | optional int32 bb = 1; 583 | } 584 | 585 | optional NestedMessage optional_nested_message = 200; 586 | } 587 | 588 | extend TestFieldOrderings { 589 | optional string my_extension_string = 50; 590 | optional int32 my_extension_int = 5; 591 | } 592 | 593 | message TestExtensionOrderings1 { 594 | extend TestFieldOrderings { 595 | optional TestExtensionOrderings1 test_ext_orderings1 = 13; 596 | } 597 | optional string my_string = 1; 598 | } 599 | 600 | message TestExtensionOrderings2 { 601 | extend TestFieldOrderings { 602 | optional TestExtensionOrderings2 test_ext_orderings2 = 12; 603 | } 604 | message TestExtensionOrderings3 { 605 | extend TestFieldOrderings { 606 | optional TestExtensionOrderings3 test_ext_orderings3 = 14; 607 | } 608 | optional string my_string = 1; 609 | } 610 | optional string my_string = 1; 611 | } 612 | 613 | message TestExtremeDefaultValues { 614 | optional bytes escaped_bytes = 1 [default = "\0\001\a\b\f\n\r\t\v\\\'\"\xfe"]; 615 | optional uint32 large_uint32 = 2 [default = 0xFFFFFFFF]; 616 | optional uint64 large_uint64 = 3 [default = 0xFFFFFFFFFFFFFFFF]; 617 | optional int32 small_int32 = 4 [default = -0x7FFFFFFF]; 618 | optional int64 small_int64 = 5 [default = -0x7FFFFFFFFFFFFFFF]; 619 | optional int32 really_small_int32 = 21 [default = -0x80000000]; 620 | optional int64 really_small_int64 = 22 [default = -0x8000000000000000]; 621 | 622 | // The default value here is UTF-8 for "\u1234". (We could also just type 623 | // the UTF-8 text directly into this text file rather than escape it, but 624 | // lots of people use editors that would be confused by this.) 625 | optional string utf8_string = 6 [default = "\341\210\264"]; 626 | 627 | // Tests for single-precision floating-point values. 628 | optional float zero_float = 7 [default = 0]; 629 | optional float one_float = 8 [default = 1]; 630 | optional float small_float = 9 [default = 1.5]; 631 | optional float negative_one_float = 10 [default = -1]; 632 | optional float negative_float = 11 [default = -1.5]; 633 | // Using exponents 634 | optional float large_float = 12 [default = 2E8]; 635 | optional float small_negative_float = 13 [default = -8e-28]; 636 | 637 | // Text for nonfinite floating-point values. 638 | optional double inf_double = 14 [default = inf]; 639 | optional double neg_inf_double = 15 [default = -inf]; 640 | optional double nan_double = 16 [default = nan]; 641 | optional float inf_float = 17 [default = inf]; 642 | optional float neg_inf_float = 18 [default = -inf]; 643 | optional float nan_float = 19 [default = nan]; 644 | 645 | // Tests for C++ trigraphs. 646 | // Trigraphs should be escaped in C++ generated files, but they should not be 647 | // escaped for other languages. 648 | // Note that in .proto file, "\?" is a valid way to escape ? in string 649 | // literals. 650 | optional string cpp_trigraph = 20 [default = "? \? ?? \?? \??? ??/ ?\?-"]; 651 | 652 | // String defaults containing the character '\000' 653 | optional string string_with_zero = 23 [default = "hel\000lo"]; 654 | optional bytes bytes_with_zero = 24 [default = "wor\000ld"]; 655 | optional string string_piece_with_zero = 25 656 | [ctype = STRING_PIECE, default = "ab\000c"]; 657 | optional string cord_with_zero = 26 [ctype = CORD, default = "12\0003"]; 658 | optional string replacement_string = 27 [default = "${unknown}"]; 659 | } 660 | 661 | message SparseEnumMessage { 662 | optional TestSparseEnum sparse_enum = 1; 663 | } 664 | 665 | // Test String and Bytes: string is for valid UTF-8 strings 666 | message OneString { 667 | optional string data = 1; 668 | } 669 | 670 | message MoreString { 671 | repeated string data = 1; 672 | } 673 | 674 | message OneBytes { 675 | optional bytes data = 1; 676 | } 677 | 678 | message MoreBytes { 679 | repeated bytes data = 1; 680 | } 681 | 682 | message ManyOptionalString { 683 | optional string str1 = 1; 684 | optional string str2 = 2; 685 | optional string str3 = 3; 686 | optional string str4 = 4; 687 | optional string str5 = 5; 688 | optional string str6 = 6; 689 | optional string str7 = 7; 690 | optional string str8 = 8; 691 | optional string str9 = 9; 692 | optional string str10 = 10; 693 | optional string str11 = 11; 694 | optional string str12 = 12; 695 | optional string str13 = 13; 696 | optional string str14 = 14; 697 | optional string str15 = 15; 698 | optional string str16 = 16; 699 | optional string str17 = 17; 700 | optional string str18 = 18; 701 | optional string str19 = 19; 702 | optional string str20 = 20; 703 | optional string str21 = 21; 704 | optional string str22 = 22; 705 | optional string str23 = 23; 706 | optional string str24 = 24; 707 | optional string str25 = 25; 708 | optional string str26 = 26; 709 | optional string str27 = 27; 710 | optional string str28 = 28; 711 | optional string str29 = 29; 712 | optional string str30 = 30; 713 | optional string str31 = 31; 714 | optional string str32 = 32; 715 | } 716 | 717 | // Test int32, uint32, int64, uint64, and bool are all compatible 718 | message Int32Message { 719 | optional int32 data = 1; 720 | } 721 | 722 | message Uint32Message { 723 | optional uint32 data = 1; 724 | } 725 | 726 | message Int64Message { 727 | optional int64 data = 1; 728 | } 729 | 730 | message Uint64Message { 731 | optional uint64 data = 1; 732 | } 733 | 734 | message BoolMessage { 735 | optional bool data = 1; 736 | } 737 | 738 | // Test oneofs. 739 | message TestOneof { 740 | oneof foo { 741 | int32 foo_int = 1; 742 | string foo_string = 2; 743 | TestAllTypes foo_message = 3; 744 | group FooGroup = 4 { 745 | optional int32 a = 5; 746 | optional string b = 6; 747 | } 748 | } 749 | } 750 | 751 | message TestOneofBackwardsCompatible { 752 | optional int32 foo_int = 1; 753 | optional string foo_string = 2; 754 | optional TestAllTypes foo_message = 3; 755 | optional group FooGroup = 4 { 756 | optional int32 a = 5; 757 | optional string b = 6; 758 | } 759 | } 760 | 761 | message TestOneof2 { 762 | oneof foo { 763 | int32 foo_int = 1; 764 | string foo_string = 2; 765 | string foo_cord = 3 [ctype = CORD]; 766 | string foo_string_piece = 4 [ctype = STRING_PIECE]; 767 | bytes foo_bytes = 5; 768 | NestedEnum foo_enum = 6; 769 | NestedMessage foo_message = 7; 770 | group FooGroup = 8 { 771 | optional int32 a = 9; 772 | optional string b = 10; 773 | } 774 | NestedMessage foo_lazy_message = 11 [lazy = true]; 775 | } 776 | 777 | oneof bar { 778 | int32 bar_int = 12 [default = 5]; 779 | string bar_string = 13 [default = "STRING"]; 780 | string bar_cord = 14 [ctype = CORD, default = "CORD"]; 781 | string bar_string_piece = 15 [ctype = STRING_PIECE, default = "SPIECE"]; 782 | bytes bar_bytes = 16 [default = "BYTES"]; 783 | NestedEnum bar_enum = 17 [default = BAR]; 784 | string bar_string_with_empty_default = 20 [default = ""]; 785 | string bar_cord_with_empty_default = 21 [ctype = CORD, default = ""]; 786 | string bar_string_piece_with_empty_default = 22 787 | [ctype = STRING_PIECE, default = ""]; 788 | bytes bar_bytes_with_empty_default = 23 [default = ""]; 789 | } 790 | 791 | optional int32 baz_int = 18; 792 | optional string baz_string = 19 [default = "BAZ"]; 793 | 794 | message NestedMessage { 795 | optional int64 moo_int = 1; 796 | repeated int32 corge_int = 2; 797 | } 798 | 799 | enum NestedEnum { 800 | FOO = 1; 801 | BAR = 2; 802 | BAZ = 3; 803 | } 804 | } 805 | 806 | message TestRequiredOneof { 807 | oneof foo { 808 | int32 foo_int = 1; 809 | string foo_string = 2; 810 | NestedMessage foo_message = 3; 811 | } 812 | message NestedMessage { 813 | required double required_double = 1; 814 | } 815 | } 816 | 817 | // Test messages for packed fields 818 | 819 | message TestPackedTypes { 820 | repeated int32 packed_int32 = 90 [packed = true]; 821 | repeated int64 packed_int64 = 91 [packed = true]; 822 | repeated uint32 packed_uint32 = 92 [packed = true]; 823 | repeated uint64 packed_uint64 = 93 [packed = true]; 824 | repeated sint32 packed_sint32 = 94 [packed = true]; 825 | repeated sint64 packed_sint64 = 95 [packed = true]; 826 | repeated fixed32 packed_fixed32 = 96 [packed = true]; 827 | repeated fixed64 packed_fixed64 = 97 [packed = true]; 828 | repeated sfixed32 packed_sfixed32 = 98 [packed = true]; 829 | repeated sfixed64 packed_sfixed64 = 99 [packed = true]; 830 | repeated float packed_float = 100 [packed = true]; 831 | repeated double packed_double = 101 [packed = true]; 832 | repeated bool packed_bool = 102 [packed = true]; 833 | repeated ForeignEnum packed_enum = 103 [packed = true]; 834 | } 835 | 836 | // A message with the same fields as TestPackedTypes, but without packing. Used 837 | // to test packed <-> unpacked wire compatibility. 838 | message TestUnpackedTypes { 839 | repeated int32 unpacked_int32 = 90 [packed = false]; 840 | repeated int64 unpacked_int64 = 91 [packed = false]; 841 | repeated uint32 unpacked_uint32 = 92 [packed = false]; 842 | repeated uint64 unpacked_uint64 = 93 [packed = false]; 843 | repeated sint32 unpacked_sint32 = 94 [packed = false]; 844 | repeated sint64 unpacked_sint64 = 95 [packed = false]; 845 | repeated fixed32 unpacked_fixed32 = 96 [packed = false]; 846 | repeated fixed64 unpacked_fixed64 = 97 [packed = false]; 847 | repeated sfixed32 unpacked_sfixed32 = 98 [packed = false]; 848 | repeated sfixed64 unpacked_sfixed64 = 99 [packed = false]; 849 | repeated float unpacked_float = 100 [packed = false]; 850 | repeated double unpacked_double = 101 [packed = false]; 851 | repeated bool unpacked_bool = 102 [packed = false]; 852 | repeated ForeignEnum unpacked_enum = 103 [packed = false]; 853 | } 854 | 855 | message TestPackedExtensions { 856 | extensions 1 to max; 857 | } 858 | 859 | extend TestPackedExtensions { 860 | repeated int32 packed_int32_extension = 90 [packed = true]; 861 | repeated int64 packed_int64_extension = 91 [packed = true]; 862 | repeated uint32 packed_uint32_extension = 92 [packed = true]; 863 | repeated uint64 packed_uint64_extension = 93 [packed = true]; 864 | repeated sint32 packed_sint32_extension = 94 [packed = true]; 865 | repeated sint64 packed_sint64_extension = 95 [packed = true]; 866 | repeated fixed32 packed_fixed32_extension = 96 [packed = true]; 867 | repeated fixed64 packed_fixed64_extension = 97 [packed = true]; 868 | repeated sfixed32 packed_sfixed32_extension = 98 [packed = true]; 869 | repeated sfixed64 packed_sfixed64_extension = 99 [packed = true]; 870 | repeated float packed_float_extension = 100 [packed = true]; 871 | repeated double packed_double_extension = 101 [packed = true]; 872 | repeated bool packed_bool_extension = 102 [packed = true]; 873 | repeated ForeignEnum packed_enum_extension = 103 [packed = true]; 874 | } 875 | 876 | message TestUnpackedExtensions { 877 | extensions 1 to max; 878 | } 879 | 880 | extend TestUnpackedExtensions { 881 | repeated int32 unpacked_int32_extension = 90 [packed = false]; 882 | repeated int64 unpacked_int64_extension = 91 [packed = false]; 883 | repeated uint32 unpacked_uint32_extension = 92 [packed = false]; 884 | repeated uint64 unpacked_uint64_extension = 93 [packed = false]; 885 | repeated sint32 unpacked_sint32_extension = 94 [packed = false]; 886 | repeated sint64 unpacked_sint64_extension = 95 [packed = false]; 887 | repeated fixed32 unpacked_fixed32_extension = 96 [packed = false]; 888 | repeated fixed64 unpacked_fixed64_extension = 97 [packed = false]; 889 | repeated sfixed32 unpacked_sfixed32_extension = 98 [packed = false]; 890 | repeated sfixed64 unpacked_sfixed64_extension = 99 [packed = false]; 891 | repeated float unpacked_float_extension = 100 [packed = false]; 892 | repeated double unpacked_double_extension = 101 [packed = false]; 893 | repeated bool unpacked_bool_extension = 102 [packed = false]; 894 | repeated ForeignEnum unpacked_enum_extension = 103 [packed = false]; 895 | } 896 | 897 | // Used by ExtensionSetTest/DynamicExtensions. The test actually builds 898 | // a set of extensions to TestAllExtensions dynamically, based on the fields 899 | // of this message type. 900 | message TestDynamicExtensions { 901 | enum DynamicEnumType { 902 | DYNAMIC_FOO = 2200; 903 | DYNAMIC_BAR = 2201; 904 | DYNAMIC_BAZ = 2202; 905 | } 906 | message DynamicMessageType { 907 | optional int32 dynamic_field = 2100; 908 | } 909 | 910 | optional fixed32 scalar_extension = 2000; 911 | optional ForeignEnum enum_extension = 2001; 912 | optional DynamicEnumType dynamic_enum_extension = 2002; 913 | 914 | optional ForeignMessage message_extension = 2003; 915 | optional DynamicMessageType dynamic_message_extension = 2004; 916 | 917 | repeated string repeated_extension = 2005; 918 | repeated sint32 packed_extension = 2006 [packed = true]; 919 | } 920 | 921 | message TestRepeatedScalarDifferentTagSizes { 922 | // Parsing repeated fixed size values used to fail. This message needs to be 923 | // used in order to get a tag of the right size; all of the repeated fields 924 | // in TestAllTypes didn't trigger the check. 925 | repeated fixed32 repeated_fixed32 = 12; 926 | // Check for a varint type, just for good measure. 927 | repeated int32 repeated_int32 = 13; 928 | 929 | // These have two-byte tags. 930 | repeated fixed64 repeated_fixed64 = 2046; 931 | repeated int64 repeated_int64 = 2047; 932 | 933 | // Three byte tags. 934 | repeated float repeated_float = 262142; 935 | repeated uint64 repeated_uint64 = 262143; 936 | } 937 | 938 | // Test that if an optional or required message/group field appears multiple 939 | // times in the input, they need to be merged. 940 | message TestParsingMerge { 941 | // RepeatedFieldsGenerator defines matching field types as TestParsingMerge, 942 | // except that all fields are repeated. In the tests, we will serialize the 943 | // RepeatedFieldsGenerator to bytes, and parse the bytes to TestParsingMerge. 944 | // Repeated fields in RepeatedFieldsGenerator are expected to be merged into 945 | // the corresponding required/optional fields in TestParsingMerge. 946 | message RepeatedFieldsGenerator { 947 | repeated TestAllTypes field1 = 1; 948 | repeated TestAllTypes field2 = 2; 949 | repeated TestAllTypes field3 = 3; 950 | repeated group Group1 = 10 { 951 | optional TestAllTypes field1 = 11; 952 | } 953 | repeated group Group2 = 20 { 954 | optional TestAllTypes field1 = 21; 955 | } 956 | repeated TestAllTypes ext1 = 1000; 957 | repeated TestAllTypes ext2 = 1001; 958 | } 959 | required TestAllTypes required_all_types = 1; 960 | optional TestAllTypes optional_all_types = 2; 961 | repeated TestAllTypes repeated_all_types = 3; 962 | optional group OptionalGroup = 10 { 963 | optional TestAllTypes optional_group_all_types = 11; 964 | } 965 | repeated group RepeatedGroup = 20 { 966 | optional TestAllTypes repeated_group_all_types = 21; 967 | } 968 | extensions 1000 to max; 969 | extend TestParsingMerge { 970 | optional TestAllTypes optional_ext = 1000; 971 | repeated TestAllTypes repeated_ext = 1001; 972 | } 973 | } 974 | 975 | // Test that the correct exception is thrown by parseFrom in a corner case 976 | // involving merging, extensions, and required fields. 977 | message TestMergeException { 978 | optional TestAllExtensions all_extensions = 1; 979 | } 980 | 981 | message TestCommentInjectionMessage { 982 | // */ <- This should not close the generated doc comment 983 | optional string a = 1 [default = "*/ <- Neither should this."]; 984 | } 985 | 986 | // Used to check that the c++ code generator re-orders messages to reduce 987 | // padding. 988 | message TestMessageSize { 989 | optional bool m1 = 1; 990 | optional int64 m2 = 2; 991 | optional bool m3 = 3; 992 | optional string m4 = 4; 993 | optional int32 m5 = 5; 994 | optional int64 m6 = 6; 995 | } 996 | 997 | // Test that RPC services work. 998 | message FooRequest {} 999 | message FooResponse {} 1000 | 1001 | message FooClientMessage {} 1002 | message FooServerMessage {} 1003 | 1004 | service TestService { 1005 | rpc Foo(FooRequest) returns (FooResponse); 1006 | rpc Bar(BarRequest) returns (BarResponse); 1007 | } 1008 | 1009 | message BarRequest {} 1010 | message BarResponse {} 1011 | 1012 | message TestJsonName { 1013 | optional int32 field_name1 = 1; 1014 | optional int32 fieldName2 = 2; 1015 | optional int32 FieldName3 = 3; 1016 | optional int32 _field_name4 = 4; 1017 | optional int32 FIELD_NAME5 = 5; 1018 | optional int32 field_name6 = 6 [json_name = "@type"]; 1019 | optional int32 fieldname7 = 7; 1020 | } 1021 | 1022 | message TestHugeFieldNumbers { 1023 | optional int32 optional_int32 = 536870000; 1024 | optional int32 fixed_32 = 536870001; 1025 | repeated int32 repeated_int32 = 536870002 [packed = false]; 1026 | repeated int32 packed_int32 = 536870003 [packed = true]; 1027 | 1028 | optional ForeignEnum optional_enum = 536870004; 1029 | optional string optional_string = 536870005; 1030 | optional bytes optional_bytes = 536870006; 1031 | optional ForeignMessage optional_message = 536870007; 1032 | 1033 | optional group OptionalGroup = 536870008 { 1034 | optional int32 group_a = 536870009; 1035 | } 1036 | 1037 | map string_string_map = 536870010; 1038 | 1039 | oneof oneof_field { 1040 | uint32 oneof_uint32 = 536870011; 1041 | TestAllTypes oneof_test_all_types = 536870012; 1042 | string oneof_string = 536870013; 1043 | bytes oneof_bytes = 536870014; 1044 | } 1045 | 1046 | extensions 536860000 to 536869999; 1047 | } 1048 | 1049 | extend TestHugeFieldNumbers { 1050 | optional TestAllTypes test_all_types = 536860000; 1051 | } 1052 | 1053 | message TestExtensionInsideTable { 1054 | optional int32 field1 = 1; 1055 | optional int32 field2 = 2; 1056 | optional int32 field3 = 3; 1057 | optional int32 field4 = 4; 1058 | extensions 5 to 5; 1059 | optional int32 field6 = 6; 1060 | optional int32 field7 = 7; 1061 | optional int32 field8 = 8; 1062 | optional int32 field9 = 9; 1063 | optional int32 field10 = 10; 1064 | } 1065 | 1066 | extend TestExtensionInsideTable { 1067 | optional int32 test_extension_inside_table_extension = 5; 1068 | } 1069 | 1070 | message TestNestedGroupExtensionOuter { 1071 | optional group Layer1OptionalGroup = 1 { 1072 | repeated group Layer2RepeatedGroup = 2 { 1073 | extensions 3; 1074 | optional string another_field = 6; 1075 | } 1076 | repeated group Layer2AnotherOptionalRepeatedGroup = 4 { 1077 | optional string but_why_tho = 5; 1078 | } 1079 | } 1080 | } 1081 | 1082 | message TestNestedGroupExtensionInnerExtension { 1083 | optional string inner_name = 1; 1084 | } 1085 | 1086 | extend TestNestedGroupExtensionOuter.Layer1OptionalGroup.Layer2RepeatedGroup { 1087 | optional TestNestedGroupExtensionInnerExtension inner = 3; 1088 | } 1089 | 1090 | enum VeryLargeEnum { 1091 | ENUM_LABEL_DEFAULT = 0; 1092 | ENUM_LABEL_1 = 1; 1093 | ENUM_LABEL_2 = 2; 1094 | ENUM_LABEL_3 = 3; 1095 | ENUM_LABEL_4 = 4; 1096 | ENUM_LABEL_5 = 5; 1097 | ENUM_LABEL_6 = 6; 1098 | ENUM_LABEL_7 = 7; 1099 | ENUM_LABEL_8 = 8; 1100 | ENUM_LABEL_9 = 9; 1101 | ENUM_LABEL_10 = 10; 1102 | ENUM_LABEL_11 = 11; 1103 | ENUM_LABEL_12 = 12; 1104 | ENUM_LABEL_13 = 13; 1105 | ENUM_LABEL_14 = 14; 1106 | ENUM_LABEL_15 = 15; 1107 | ENUM_LABEL_16 = 16; 1108 | ENUM_LABEL_17 = 17; 1109 | ENUM_LABEL_18 = 18; 1110 | ENUM_LABEL_19 = 19; 1111 | ENUM_LABEL_20 = 20; 1112 | ENUM_LABEL_21 = 21; 1113 | ENUM_LABEL_22 = 22; 1114 | ENUM_LABEL_23 = 23; 1115 | ENUM_LABEL_24 = 24; 1116 | ENUM_LABEL_25 = 25; 1117 | ENUM_LABEL_26 = 26; 1118 | ENUM_LABEL_27 = 27; 1119 | ENUM_LABEL_28 = 28; 1120 | ENUM_LABEL_29 = 29; 1121 | ENUM_LABEL_30 = 30; 1122 | ENUM_LABEL_31 = 31; 1123 | ENUM_LABEL_32 = 32; 1124 | ENUM_LABEL_33 = 33; 1125 | ENUM_LABEL_34 = 34; 1126 | ENUM_LABEL_35 = 35; 1127 | ENUM_LABEL_36 = 36; 1128 | ENUM_LABEL_37 = 37; 1129 | ENUM_LABEL_38 = 38; 1130 | ENUM_LABEL_39 = 39; 1131 | ENUM_LABEL_40 = 40; 1132 | ENUM_LABEL_41 = 41; 1133 | ENUM_LABEL_42 = 42; 1134 | ENUM_LABEL_43 = 43; 1135 | ENUM_LABEL_44 = 44; 1136 | ENUM_LABEL_45 = 45; 1137 | ENUM_LABEL_46 = 46; 1138 | ENUM_LABEL_47 = 47; 1139 | ENUM_LABEL_48 = 48; 1140 | ENUM_LABEL_49 = 49; 1141 | ENUM_LABEL_50 = 50; 1142 | ENUM_LABEL_51 = 51; 1143 | ENUM_LABEL_52 = 52; 1144 | ENUM_LABEL_53 = 53; 1145 | ENUM_LABEL_54 = 54; 1146 | ENUM_LABEL_55 = 55; 1147 | ENUM_LABEL_56 = 56; 1148 | ENUM_LABEL_57 = 57; 1149 | ENUM_LABEL_58 = 58; 1150 | ENUM_LABEL_59 = 59; 1151 | ENUM_LABEL_60 = 60; 1152 | ENUM_LABEL_61 = 61; 1153 | ENUM_LABEL_62 = 62; 1154 | ENUM_LABEL_63 = 63; 1155 | ENUM_LABEL_64 = 64; 1156 | ENUM_LABEL_65 = 65; 1157 | ENUM_LABEL_66 = 66; 1158 | ENUM_LABEL_67 = 67; 1159 | ENUM_LABEL_68 = 68; 1160 | ENUM_LABEL_69 = 69; 1161 | ENUM_LABEL_70 = 70; 1162 | ENUM_LABEL_71 = 71; 1163 | ENUM_LABEL_72 = 72; 1164 | ENUM_LABEL_73 = 73; 1165 | ENUM_LABEL_74 = 74; 1166 | ENUM_LABEL_75 = 75; 1167 | ENUM_LABEL_76 = 76; 1168 | ENUM_LABEL_77 = 77; 1169 | ENUM_LABEL_78 = 78; 1170 | ENUM_LABEL_79 = 79; 1171 | ENUM_LABEL_80 = 80; 1172 | ENUM_LABEL_81 = 81; 1173 | ENUM_LABEL_82 = 82; 1174 | ENUM_LABEL_83 = 83; 1175 | ENUM_LABEL_84 = 84; 1176 | ENUM_LABEL_85 = 85; 1177 | ENUM_LABEL_86 = 86; 1178 | ENUM_LABEL_87 = 87; 1179 | ENUM_LABEL_88 = 88; 1180 | ENUM_LABEL_89 = 89; 1181 | ENUM_LABEL_90 = 90; 1182 | ENUM_LABEL_91 = 91; 1183 | ENUM_LABEL_92 = 92; 1184 | ENUM_LABEL_93 = 93; 1185 | ENUM_LABEL_94 = 94; 1186 | ENUM_LABEL_95 = 95; 1187 | ENUM_LABEL_96 = 96; 1188 | ENUM_LABEL_97 = 97; 1189 | ENUM_LABEL_98 = 98; 1190 | ENUM_LABEL_99 = 99; 1191 | ENUM_LABEL_100 = 100; 1192 | } 1193 | 1194 | message TestExtensionRangeSerialize { 1195 | optional int32 foo_one = 1; 1196 | 1197 | extensions 2 to 2; 1198 | extensions 3 to 4; 1199 | 1200 | optional int32 foo_two = 6; 1201 | optional int32 foo_three = 7; 1202 | 1203 | extensions 9 to 10; 1204 | 1205 | optional int32 foo_four = 13; 1206 | 1207 | extensions 15 to 15; 1208 | extensions 17 to 17; 1209 | extensions 19 to 19; 1210 | 1211 | extend TestExtensionRangeSerialize { 1212 | optional int32 bar_one = 2; 1213 | optional int32 bar_two = 4; 1214 | 1215 | optional int32 bar_three = 10; 1216 | 1217 | optional int32 bar_four = 15; 1218 | optional int32 bar_five = 19; 1219 | } 1220 | } 1221 | 1222 | message TestVerifyInt32Simple { 1223 | optional int32 optional_int32_1 = 1; 1224 | optional int32 optional_int32_2 = 2; 1225 | optional int32 optional_int32_63 = 63; 1226 | optional int32 optional_int32_64 = 64; 1227 | } 1228 | 1229 | message TestVerifyInt32 { 1230 | optional int32 optional_int32_1 = 1; 1231 | optional int32 optional_int32_2 = 2; 1232 | optional int32 optional_int32_63 = 63; 1233 | optional int32 optional_int32_64 = 64; 1234 | 1235 | optional TestAllTypes optional_all_types = 9; 1236 | repeated TestAllTypes repeated_all_types = 10; 1237 | } 1238 | 1239 | message TestVerifyMostlyInt32 { 1240 | optional int64 optional_int64_30 = 30; 1241 | 1242 | optional int32 optional_int32_1 = 1; 1243 | optional int32 optional_int32_2 = 2; 1244 | optional int32 optional_int32_3 = 3; 1245 | optional int32 optional_int32_4 = 4; 1246 | optional int32 optional_int32_63 = 63; 1247 | optional int32 optional_int32_64 = 64; 1248 | 1249 | optional TestAllTypes optional_all_types = 9; 1250 | repeated TestAllTypes repeated_all_types = 10; 1251 | } 1252 | 1253 | message TestVerifyMostlyInt32BigFieldNumber { 1254 | optional int64 optional_int64_30 = 30; 1255 | optional int32 optional_int32_300 = 300; 1256 | 1257 | optional int32 optional_int32_1 = 1; 1258 | optional int32 optional_int32_2 = 2; 1259 | optional int32 optional_int32_3 = 3; 1260 | optional int32 optional_int32_4 = 4; 1261 | optional int32 optional_int32_63 = 63; 1262 | optional int32 optional_int32_64 = 64; 1263 | 1264 | optional TestAllTypes optional_all_types = 9; 1265 | repeated TestAllTypes repeated_all_types = 10; 1266 | } 1267 | 1268 | message TestVerifyUint32Simple { 1269 | optional uint32 optional_uint32_1 = 1; 1270 | optional uint32 optional_uint32_2 = 2; 1271 | optional uint32 optional_uint32_63 = 63; 1272 | optional uint32 optional_uint32_64 = 64; 1273 | } 1274 | 1275 | message TestVerifyUint32 { 1276 | optional uint32 optional_uint32_1 = 1; 1277 | optional uint32 optional_uint32_2 = 2; 1278 | optional uint32 optional_uint32_63 = 63; 1279 | optional uint32 optional_uint32_64 = 64; 1280 | 1281 | optional TestAllTypes optional_all_types = 9; 1282 | repeated TestAllTypes repeated_all_types = 10; 1283 | } 1284 | 1285 | message TestVerifyOneUint32 { 1286 | optional uint32 optional_uint32_1 = 1; 1287 | optional int32 optional_int32_2 = 2; 1288 | optional int32 optional_int32_63 = 63; 1289 | optional int32 optional_int32_64 = 64; 1290 | 1291 | optional TestAllTypes optional_all_types = 9; 1292 | repeated TestAllTypes repeated_all_types = 10; 1293 | } 1294 | 1295 | message TestVerifyOneInt32BigFieldNumber { 1296 | optional int32 optional_int32_65 = 65; 1297 | 1298 | optional int64 optional_int64_1 = 1; 1299 | optional int64 optional_int64_2 = 2; 1300 | optional int64 optional_int64_63 = 63; 1301 | optional int64 optional_int64_64 = 64; 1302 | 1303 | optional TestAllTypes optional_all_types = 9; 1304 | repeated TestAllTypes repeated_all_types = 10; 1305 | } 1306 | 1307 | message TestVerifyInt32BigFieldNumber { 1308 | optional int32 optional_int32_1000 = 1000; 1309 | optional int32 optional_int32_65 = 65; 1310 | 1311 | optional int32 optional_int32_1 = 1; 1312 | optional int32 optional_int32_2 = 2; 1313 | optional int32 optional_int32_63 = 63; 1314 | optional int32 optional_int32_64 = 64; 1315 | 1316 | optional TestAllTypes optional_all_types = 9; 1317 | repeated TestAllTypes repeated_all_types = 10; 1318 | } 1319 | 1320 | message TestVerifyUint32BigFieldNumber { 1321 | optional uint32 optional_uint32_1000 = 1000; 1322 | optional uint32 optional_uint32_65 = 65; 1323 | 1324 | optional uint32 optional_uint32_1 = 1; 1325 | optional uint32 optional_uint32_2 = 2; 1326 | optional uint32 optional_uint32_63 = 63; 1327 | optional uint32 optional_uint32_64 = 64; 1328 | 1329 | optional TestAllTypes optional_all_types = 9; 1330 | repeated TestAllTypes repeated_all_types = 10; 1331 | } 1332 | 1333 | message TestVerifyBigFieldNumberUint32 { 1334 | message Nested { 1335 | optional uint32 optional_uint32_5000 = 5000; 1336 | optional uint32 optional_uint32_1000 = 1000; 1337 | optional uint32 optional_uint32_66 = 66; 1338 | optional uint32 optional_uint32_65 = 65; 1339 | 1340 | optional uint32 optional_uint32_1 = 1; 1341 | optional uint32 optional_uint32_2 = 2; 1342 | optional uint32 optional_uint32_63 = 63; 1343 | optional uint32 optional_uint32_64 = 64; 1344 | 1345 | optional Nested optional_nested = 9; 1346 | repeated Nested repeated_nested = 10; 1347 | } 1348 | optional Nested optional_nested = 1; 1349 | } 1350 | 1351 | // This message contains different kind of enums to exercise the different 1352 | // parsers in table-driven. 1353 | message EnumParseTester { 1354 | enum SeqSmall0 { 1355 | SEQ_SMALL_0_DEFAULT = 0; 1356 | SEQ_SMALL_0_1 = 1; 1357 | SEQ_SMALL_0_2 = 2; 1358 | } 1359 | optional SeqSmall0 optional_seq_small_0_lowfield = 1; 1360 | optional SeqSmall0 optional_seq_small_0_midfield = 1001; 1361 | optional SeqSmall0 optional_seq_small_0_hifield = 1000001; 1362 | repeated SeqSmall0 repeated_seq_small_0_lowfield = 2; 1363 | repeated SeqSmall0 repeated_seq_small_0_midfield = 1002; 1364 | repeated SeqSmall0 repeated_seq_small_0_hifield = 1000002; 1365 | repeated SeqSmall0 packed_seq_small_0_lowfield = 3 [packed = true]; 1366 | repeated SeqSmall0 packed_seq_small_0_midfield = 1003 [packed = true]; 1367 | repeated SeqSmall0 packed_seq_small_0_hifield = 1000003 [packed = true]; 1368 | 1369 | enum SeqSmall1 { 1370 | SEQ_SMALL_1_DEFAULT = 1; 1371 | SEQ_SMALL_1_2 = 2; 1372 | SEQ_SMALL_1_3 = 3; 1373 | } 1374 | optional SeqSmall1 optional_seq_small_1_lowfield = 4; 1375 | optional SeqSmall1 optional_seq_small_1_midfield = 1004; 1376 | optional SeqSmall1 optional_seq_small_1_hifield = 1000004; 1377 | repeated SeqSmall1 repeated_seq_small_1_lowfield = 5; 1378 | repeated SeqSmall1 repeated_seq_small_1_midfield = 1005; 1379 | repeated SeqSmall1 repeated_seq_small_1_hifield = 1000005; 1380 | repeated SeqSmall1 packed_seq_small_1_lowfield = 6 [packed = true]; 1381 | repeated SeqSmall1 packed_seq_small_1_midfield = 1006 [packed = true]; 1382 | repeated SeqSmall1 packed_seq_small_1_hifield = 1000006 [packed = true]; 1383 | 1384 | enum SeqLarge { 1385 | SEQ_LARGE_DEFAULT = -1; 1386 | SEQ_LARGE_0 = 0; 1387 | SEQ_LARGE_1 = 1; 1388 | SEQ_LARGE_2 = 2; 1389 | SEQ_LARGE_3 = 3; 1390 | SEQ_LARGE_4 = 4; 1391 | SEQ_LARGE_5 = 5; 1392 | SEQ_LARGE_6 = 6; 1393 | SEQ_LARGE_7 = 7; 1394 | SEQ_LARGE_8 = 8; 1395 | SEQ_LARGE_9 = 9; 1396 | SEQ_LARGE_10 = 10; 1397 | SEQ_LARGE_11 = 11; 1398 | SEQ_LARGE_12 = 12; 1399 | SEQ_LARGE_13 = 13; 1400 | SEQ_LARGE_14 = 14; 1401 | SEQ_LARGE_15 = 15; 1402 | SEQ_LARGE_16 = 16; 1403 | SEQ_LARGE_17 = 17; 1404 | SEQ_LARGE_18 = 18; 1405 | SEQ_LARGE_19 = 19; 1406 | SEQ_LARGE_20 = 20; 1407 | SEQ_LARGE_21 = 21; 1408 | SEQ_LARGE_22 = 22; 1409 | SEQ_LARGE_23 = 23; 1410 | SEQ_LARGE_24 = 24; 1411 | SEQ_LARGE_25 = 25; 1412 | SEQ_LARGE_26 = 26; 1413 | SEQ_LARGE_27 = 27; 1414 | SEQ_LARGE_28 = 28; 1415 | SEQ_LARGE_29 = 29; 1416 | SEQ_LARGE_30 = 30; 1417 | SEQ_LARGE_31 = 31; 1418 | SEQ_LARGE_32 = 32; 1419 | SEQ_LARGE_33 = 33; 1420 | } 1421 | optional SeqLarge optional_seq_large_lowfield = 7; 1422 | optional SeqLarge optional_seq_large_midfield = 1007; 1423 | optional SeqLarge optional_seq_large_hifield = 1000007; 1424 | repeated SeqLarge repeated_seq_large_lowfield = 8; 1425 | repeated SeqLarge repeated_seq_large_midfield = 1008; 1426 | repeated SeqLarge repeated_seq_large_hifield = 1000008; 1427 | repeated SeqLarge packed_seq_large_lowfield = 9 [packed = true]; 1428 | repeated SeqLarge packed_seq_large_midfield = 1009 [packed = true]; 1429 | repeated SeqLarge packed_seq_large_hifield = 1000009 [packed = true]; 1430 | 1431 | enum Arbitrary { 1432 | ARBITRARY_DEFAULT = -123123; 1433 | ARBITRARY_1 = -123; 1434 | ARBITRARY_2 = 213; 1435 | ARBITRARY_3 = 213213; 1436 | ARBITRARY_MIN = -2147483648; 1437 | ARBITRARY_MAX = 2147483647; 1438 | } 1439 | optional Arbitrary optional_arbitrary_lowfield = 10; 1440 | optional Arbitrary optional_arbitrary_midfield = 1010; 1441 | optional Arbitrary optional_arbitrary_hifield = 1000010; 1442 | repeated Arbitrary repeated_arbitrary_lowfield = 11; 1443 | repeated Arbitrary repeated_arbitrary_midfield = 1011; 1444 | repeated Arbitrary repeated_arbitrary_hifield = 1000011; 1445 | repeated Arbitrary packed_arbitrary_lowfield = 12 [packed = true]; 1446 | repeated Arbitrary packed_arbitrary_midfield = 1012 [packed = true]; 1447 | repeated Arbitrary packed_arbitrary_hifield = 1000012 [packed = true]; 1448 | 1449 | // An arbitrary field we can append to to break the runs of repeated fields. 1450 | optional int32 other_field = 99; 1451 | } 1452 | -------------------------------------------------------------------------------- /testdata/unittest.proto.pb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/protocolbuffers/protoscope/8e7a6aafa2c9958527b1e0747e66e1bfff045819/testdata/unittest.proto.pb -------------------------------------------------------------------------------- /writer.go: -------------------------------------------------------------------------------- 1 | // Copyright 2022 Google LLC 2 | // 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // http://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | // See the License for the specific language governing permissions and 13 | // limitations under the License. 14 | 15 | package protoscope 16 | 17 | import ( 18 | "encoding/binary" 19 | "math" 20 | "strconv" 21 | "strings" 22 | "unicode" 23 | "unicode/utf8" 24 | 25 | "google.golang.org/protobuf/encoding/protowire" 26 | "google.golang.org/protobuf/reflect/protoreflect" 27 | 28 | "github.com/protocolbuffers/protoscope/internal/print" 29 | ) 30 | 31 | // WriterOptions represents options that can be passed to control the writer's 32 | // decoding heuristics. 33 | type WriterOptions struct { 34 | // Disables treating any fields as containing UTF-8. 35 | NoQuotedStrings bool 36 | // Treats every length-prefixed field as being a message, printing hex if 37 | // an error is hit. 38 | AllFieldsAreMessages bool 39 | // Disables emitting !{}. 40 | NoGroups bool 41 | // Always prints the wire type of a field. Also disables !{} group syntax, 42 | // like NoGroups 43 | ExplicitWireTypes bool 44 | // Never prints {}; instead, prints out an explicit length prefix (but still 45 | // indents the contents of delimited things. 46 | ExplicitLengthPrefixes bool 47 | 48 | // Schema is a Descriptor that describes the message type we're expecting to 49 | // disassemble, if any. 50 | Schema protoreflect.MessageDescriptor 51 | // Prints field names, using Schema as the source of names. 52 | PrintFieldNames bool 53 | // Prints enum value names, using Schema as the source of names. 54 | PrintEnumNames bool 55 | } 56 | 57 | func Write(src []byte, opts WriterOptions) string { 58 | w := writer{WriterOptions: opts} 59 | w.Indent = 2 60 | w.MaxFolds = 3 61 | 62 | if opts.Schema != nil { 63 | w.descs.Push(opts.Schema) 64 | } 65 | 66 | for len(src) > 0 { 67 | w.NewLine() 68 | rest, ok := w.decodeField(src) 69 | if !ok { 70 | w.DiscardLine() 71 | break 72 | } 73 | src = rest 74 | } 75 | 76 | // Order does not matter for fixing up unclosed groups 77 | for _ = range w.groups { 78 | w.resetGroup() 79 | } 80 | 81 | w.dumpHexString(src) 82 | return string(w.Finish()) 83 | } 84 | 85 | type line struct { 86 | text *strings.Builder 87 | comments []string 88 | 89 | // indent is how much the *next* line should be indented compared to this 90 | // one. 91 | indent int 92 | } 93 | 94 | type group struct { 95 | number uint64 96 | hasDesc bool 97 | } 98 | 99 | type writer struct { 100 | WriterOptions 101 | print.Printer 102 | 103 | groups print.Stack[group] 104 | descs print.Stack[protoreflect.MessageDescriptor] 105 | } 106 | 107 | func (w *writer) dumpHexString(src []byte) { 108 | if len(src) == 0 { 109 | return 110 | } 111 | 112 | w.NewLine() 113 | w.Write("`") 114 | for i, b := range src { 115 | if i > 0 && i%40 == 0 { 116 | w.Write("`") 117 | w.NewLine() 118 | w.Write("`") 119 | } 120 | w.Writef("%02x", b) 121 | } 122 | w.Write("`") 123 | } 124 | 125 | func (w *writer) resetGroup() { 126 | // Do some surgery on the line with the !{ to replace it with an SGROUP. 127 | start := w.DropBlock() 128 | 129 | if !w.NoGroups { 130 | // Remove the trailing " !{" 131 | start.Truncate(start.Len() - 3) 132 | start.WriteString("SGROUP") 133 | } 134 | } 135 | 136 | func (w *writer) decodeVarint(src []byte, fd protoreflect.FieldDescriptor) ([]byte, bool) { 137 | rest, value, extra, ok := decodeVarint(src) 138 | if !ok { 139 | return nil, false 140 | } 141 | src = rest 142 | 143 | if extra > 0 { 144 | w.Writef("long-form:%d ", extra) 145 | } 146 | 147 | ftype := protoreflect.Int64Kind 148 | if fd != nil { 149 | ftype = fd.Kind() 150 | } 151 | 152 | // Pick a deserialization based on the type. If the type doesn't really 153 | // make sense (like a double), we fall back on int64. We ignore 32-bit-ness: 154 | // everything is 64 bit here. 155 | switch ftype { 156 | case protoreflect.BoolKind: 157 | switch value { 158 | case 0: 159 | w.Write("false") 160 | return src, true 161 | case 1: 162 | w.Write("true") 163 | return src, true 164 | } 165 | fallthrough 166 | case protoreflect.Uint32Kind, protoreflect.Uint64Kind, 167 | protoreflect.Fixed32Kind, protoreflect.Fixed64Kind: 168 | w.Write(value) 169 | case protoreflect.Sint32Kind, protoreflect.Sint64Kind: 170 | // Undo ZigZag encoding, then print as signed. 171 | value = (value >> 1) ^ -(value & 1) 172 | w.Writef("%dz", int64(value)) 173 | case protoreflect.EnumKind: 174 | if w.PrintEnumNames && value < math.MaxInt32 { 175 | ed := fd.Enum().Values() 176 | edv := ed.ByNumber(protoreflect.EnumNumber(value)) 177 | if edv != nil { 178 | w.Remark(string(edv.Name())) 179 | } 180 | } 181 | fallthrough 182 | default: 183 | w.Write(int64(value)) 184 | } 185 | 186 | return src, true 187 | } 188 | 189 | // decodeFixed prints out a single fixed-length value. 190 | // 191 | // This monster of a generic function exists to reduce keeping the two copies of 192 | // 32-bit and 64-bit logic in sync. 193 | func printFixed[ 194 | U uint32 | uint64, 195 | I int32 | int64, 196 | F float32 | float64, 197 | ]( 198 | w *writer, 199 | value U, 200 | suffix string, 201 | itof func(U) F, 202 | src []byte, 203 | fd protoreflect.FieldDescriptor, 204 | ) ([]byte, bool) { 205 | 206 | var ftype protoreflect.Kind 207 | if fd != nil { 208 | ftype = fd.Kind() 209 | } 210 | 211 | switch ftype { 212 | case protoreflect.Uint32Kind, protoreflect.Uint64Kind, 213 | protoreflect.Fixed32Kind, protoreflect.Fixed64Kind: 214 | w.Writef("%di%s", value, suffix) 215 | case protoreflect.EnumKind: 216 | if w.PrintEnumNames && value < math.MaxInt32 { 217 | ed := fd.Enum().Values() 218 | edv := ed.ByNumber(protoreflect.EnumNumber(value)) 219 | if edv != nil { 220 | w.Remark(string(edv.Name())) 221 | } 222 | } 223 | fallthrough 224 | case protoreflect.Int32Kind, protoreflect.Int64Kind, 225 | protoreflect.Sint32Kind, protoreflect.Sint64Kind, 226 | protoreflect.Sfixed32Kind, protoreflect.Sfixed64Kind, 227 | protoreflect.BoolKind: 228 | w.Writef("%di%s", I(value), suffix) 229 | default: 230 | // Assume this is a float by default. 231 | fvalue := float64(itof(value)) 232 | if math.IsInf(fvalue, 1) { 233 | w.Writef("inf%s", suffix) 234 | } else if math.IsInf(fvalue, -1) { 235 | w.Writef("-inf%s", suffix) 236 | } else if math.IsNaN(fvalue) { 237 | // NaNs always print as bits, because there are many NaNs. 238 | w.Writef("0x%xi%s", value, suffix) 239 | } else { 240 | if s := ftoa(value, ftype == protoreflect.DoubleKind || ftype == protoreflect.FloatKind); s != "" { 241 | // For floats, i64 is actually implied. 242 | if suffix == "64" { 243 | w.Write(s) 244 | } else { 245 | w.Writef("%si%s", s, suffix) 246 | } 247 | w.Remarkf("%#xi%s", U(value), suffix) 248 | } else { 249 | w.Writef("%di%s", I(value), suffix) 250 | } 251 | } 252 | } 253 | 254 | return src, true 255 | } 256 | 257 | func (w *writer) decodeI32(src []byte, fd protoreflect.FieldDescriptor) ([]byte, bool) { 258 | if len(src) < 4 { 259 | return nil, false 260 | } 261 | value := binary.LittleEndian.Uint32(src) 262 | src = src[4:] 263 | 264 | return printFixed[uint32, int32, float32](w, value, "32", math.Float32frombits, src, fd) 265 | } 266 | 267 | func (w *writer) decodeI64(src []byte, fd protoreflect.FieldDescriptor) ([]byte, bool) { 268 | if len(src) < 8 { 269 | return nil, false 270 | } 271 | value := binary.LittleEndian.Uint64(src) 272 | src = src[8:] 273 | 274 | return printFixed[uint64, int64, float64](w, value, "64", math.Float64frombits, src, fd) 275 | } 276 | 277 | func (w *writer) decodeField(src []byte) ([]byte, bool) { 278 | rest, value, extra, ok := decodeVarint(src) 279 | if !ok { 280 | return nil, false 281 | } 282 | src = rest 283 | 284 | // 0 is never a valid field number, so this probably isn't a message. 285 | if value>>3 == 0 && !w.AllFieldsAreMessages { 286 | return nil, false 287 | } 288 | 289 | if extra > 0 { 290 | w.Writef("long-form:%d ", extra) 291 | } 292 | number := value >> 3 293 | w.Writef("%d:", number) 294 | 295 | var fd protoreflect.FieldDescriptor 296 | if d := w.descs.Peek(); d != nil && *d != nil { 297 | fd = (*d).Fields().ByNumber(protowire.Number(number)) 298 | } 299 | 300 | if w.PrintFieldNames && fd != nil { 301 | w.Remark(fd.Name()) 302 | } 303 | 304 | switch value & 0x7 { 305 | case 0: 306 | if w.ExplicitWireTypes { 307 | w.Write("VARINT") 308 | } 309 | w.Write(" ") 310 | return w.decodeVarint(src, fd) 311 | 312 | case 1: 313 | if w.ExplicitWireTypes { 314 | w.Write("I64") 315 | } 316 | w.Write(" ") 317 | return w.decodeI64(src, fd) 318 | 319 | case 5: 320 | if w.ExplicitWireTypes { 321 | w.Write("I32") 322 | } 323 | w.Write(" ") 324 | return w.decodeI32(src, fd) 325 | 326 | case 3: 327 | if fd != nil { 328 | w.descs.Push(fd.Message()) 329 | } 330 | 331 | if w.ExplicitWireTypes || w.NoGroups { 332 | w.Write("SGROUP") 333 | w.StartBlock(print.BlockInfo{ 334 | HasDelimiters: false, 335 | HeightToFoldAt: 2, 336 | UnindentAt: 1, 337 | }) 338 | } else { 339 | w.Write(" !{") 340 | w.StartBlock(print.BlockInfo{ 341 | HasDelimiters: true, 342 | HeightToFoldAt: 3, 343 | UnindentAt: 1, 344 | }) 345 | } 346 | w.groups.Push(group{number, fd != nil}) 347 | 348 | case 4: 349 | if len(w.groups) == 0 { 350 | w.Write("EGROUP") 351 | } else { 352 | lastGroup := w.groups.Pop() 353 | if lastGroup.hasDesc { 354 | _ = w.descs.Pop() 355 | } 356 | 357 | if lastGroup.number == number { 358 | if w.ExplicitWireTypes || w.NoGroups { 359 | w.Write("EGROUP") 360 | } else { 361 | w.Current().Reset() 362 | /*if w.PrintFieldNames && fd != nil { 363 | // Drop the field comment for this line. 364 | w.line(-1).comments = w.line(-1).comments[1:] 365 | }*/ 366 | 367 | if extra > 0 { 368 | w.Writef("long-form:%d", extra) 369 | w.NewLine() 370 | } 371 | w.Write("}") 372 | } 373 | w.EndBlock() 374 | } else { 375 | w.resetGroup() 376 | w.Write("EGROUP") 377 | } 378 | } 379 | 380 | case 2: 381 | if w.ExplicitWireTypes || w.ExplicitLengthPrefixes { 382 | w.Write("LEN") 383 | } 384 | w.Write(" ") 385 | 386 | rest, value, extra, ok := decodeVarint(src) 387 | if !ok { 388 | return nil, false 389 | } 390 | src = rest 391 | 392 | if uint64(len(src)) < value { 393 | return nil, false 394 | } 395 | 396 | delimited := src[:int(value)] 397 | src = src[int(value):] 398 | 399 | if extra > 0 { 400 | w.Writef("long-form:%d ", extra) 401 | } 402 | if w.ExplicitLengthPrefixes { 403 | w.Write(int64(value)) 404 | w.StartBlock(print.BlockInfo{ 405 | HasDelimiters: false, 406 | HeightToFoldAt: 2, 407 | UnindentAt: 0, 408 | }) 409 | } else { 410 | w.Write("{") 411 | w.StartBlock(print.BlockInfo{ 412 | HasDelimiters: true, 413 | HeightToFoldAt: 3, 414 | UnindentAt: 1, 415 | }) 416 | } 417 | 418 | ftype := protoreflect.MessageKind 419 | if fd != nil { 420 | ftype = fd.Kind() 421 | } 422 | 423 | decodePacked := func(decode func([]byte, protoreflect.FieldDescriptor) ([]byte, bool)) { 424 | count := 0 425 | for ; ; count++ { 426 | w.NewLine() 427 | s, ok := decode(delimited, fd) 428 | if !ok { 429 | w.DiscardLine() 430 | break 431 | } 432 | delimited = s 433 | } 434 | 435 | w.FoldIntoColumns(8, count) 436 | } 437 | 438 | decodeBytes := func() ([]byte, bool) { 439 | w.dumpHexString(delimited) 440 | if !w.ExplicitLengthPrefixes { 441 | w.NewLine() 442 | w.Write("}") 443 | } 444 | w.EndBlock() 445 | return src, true 446 | } 447 | 448 | switch ftype { 449 | case protoreflect.BoolKind, protoreflect.EnumKind, 450 | protoreflect.Int32Kind, protoreflect.Int64Kind, 451 | protoreflect.Uint32Kind, protoreflect.Uint64Kind, 452 | protoreflect.Sint32Kind, protoreflect.Sint64Kind: 453 | decodePacked(w.decodeVarint) 454 | return decodeBytes() 455 | 456 | case protoreflect.Fixed32Kind, protoreflect.Sfixed32Kind, 457 | protoreflect.FloatKind: 458 | decodePacked(w.decodeI32) 459 | return decodeBytes() 460 | 461 | case protoreflect.Fixed64Kind, protoreflect.Sfixed64Kind, 462 | protoreflect.DoubleKind: 463 | decodePacked(w.decodeI64) 464 | return decodeBytes() 465 | 466 | case protoreflect.StringKind, protoreflect.BytesKind: 467 | goto decodeUtf8 468 | } 469 | 470 | // This is in a block so that the gotos can jump over the declarations 471 | // safely. 472 | { 473 | startLine := w.Mark() 474 | src2 := delimited 475 | outerGroups := w.groups 476 | w.groups = nil 477 | if fd != nil { 478 | w.descs.Push(fd.Message()) 479 | } 480 | for len(src2) > 0 { 481 | w.NewLine() 482 | s, ok := w.decodeField(src2) 483 | if !ok { 484 | // Clip off an incompletely printed line. 485 | w.DiscardLine() 486 | break 487 | } 488 | src2 = s 489 | } 490 | if fd != nil { 491 | w.descs.Pop() 492 | } 493 | 494 | // Order does not matter for fixing up unclosed groups 495 | for range w.groups { 496 | w.resetGroup() 497 | } 498 | w.groups = outerGroups 499 | 500 | // If we consumed all the bytes, we're done and can wrap up. However, if we 501 | // consumed *some* bytes, and the user requested unconditional message 502 | // parsing, we'll continue regardless. We don't bother in the case where we 503 | // failed at the start because the `...` case below will do a cleaner job. 504 | if len(src2) == 0 || (w.AllFieldsAreMessages && len(src2) < len(delimited)) { 505 | delimited = src2 506 | return decodeBytes() 507 | } else { 508 | w.Reset(startLine) 509 | } 510 | } 511 | 512 | // Otherwise, maybe it's a UTF-8 string. 513 | decodeUtf8: 514 | if !w.NoQuotedStrings && utf8.Valid(delimited) { 515 | runes := utf8.RuneCount(delimited) 516 | 517 | s := string(delimited) 518 | unprintable := 0 519 | for _, r := range s { 520 | if !unicode.IsGraphic(r) { 521 | unprintable++ 522 | } 523 | } 524 | if float64(unprintable)/float64(runes) > 0.3 { 525 | return decodeBytes() 526 | } 527 | 528 | w.NewLine() 529 | w.Write("\"") 530 | for i, r := range s { 531 | if i != 0 && i%80 == 0 { 532 | w.Write("\"") 533 | w.NewLine() 534 | w.Write("\"") 535 | } 536 | 537 | switch r { 538 | case '\n': 539 | w.Write("\\n") 540 | case '\\': 541 | w.Write("\\\\") 542 | case '"': 543 | w.Write("\\\"") 544 | default: 545 | if !unicode.IsGraphic(r) { 546 | enc := make([]byte, 4) 547 | enc = enc[:utf8.EncodeRune(enc, r)] 548 | for _, b := range enc { 549 | w.Writef("\\x%02x", b) 550 | } 551 | } else { 552 | w.Writef("%c", r) 553 | } 554 | } 555 | } 556 | w.Write("\"") 557 | delimited = nil 558 | } 559 | 560 | // Who knows what it is? Bytes or something. 561 | return decodeBytes() 562 | case 6, 7: 563 | return nil, false 564 | } 565 | return src, true 566 | } 567 | 568 | func ftoa[I uint32 | uint64](bits I, floatForSure bool) string { 569 | var mantLen, expLen, bitLen int 570 | var value float64 571 | switch b := any(bits).(type) { 572 | case uint32: 573 | bitLen = 32 574 | expLen = 8 575 | value = float64(math.Float32frombits(b)) 576 | case uint64: 577 | bitLen = 64 578 | expLen = 11 579 | value = math.Float64frombits(b) 580 | } 581 | mantLen = bitLen - expLen - 1 582 | 583 | if bits == 0 { 584 | return "0.0" 585 | } else if bits == 1<<(bitLen-1) { 586 | return "-0.0" 587 | } 588 | 589 | exp := int64((bits >> mantLen) & ((1 << expLen) - 1)) 590 | exp -= (1 << (expLen - 1)) - 1 591 | absExp := exp 592 | if absExp < 0 { 593 | absExp = -absExp 594 | } 595 | bigExp := int64(1)<<(expLen-1) - 1 596 | 597 | if absExp >= bigExp && !floatForSure { 598 | // Very large or very small exponents indicate this probably isn't actually 599 | // a float. 600 | return "" 601 | } 602 | 603 | // Only print floats in decimal if it can be round-tripped. 604 | decimal := strconv.FormatFloat(value, 'g', -1, bitLen) 605 | 606 | roundtrip, _ := strconv.ParseFloat(decimal, bitLen) 607 | var bits2 I 608 | switch any(bits).(type) { 609 | case uint32: 610 | bits2 = I(math.Float32bits(float32(roundtrip))) 611 | case uint64: 612 | bits2 = I(math.Float64bits(roundtrip)) 613 | } 614 | 615 | if bits2 != bits { 616 | decimal = strconv.FormatFloat(value, 'x', -1, bitLen) 617 | } 618 | 619 | // Discard a + after the exponent. 620 | decimal = strings.Replace(decimal, "+", "", -1) 621 | 622 | // Insert a decimal point if necessary. 623 | if !strings.Contains(decimal, ".") { 624 | if strings.Contains(decimal, "e") { 625 | decimal = strings.Replace(decimal, "e", ".0e", -1) 626 | } else { 627 | decimal += ".0" 628 | } 629 | } 630 | 631 | return decimal 632 | } 633 | 634 | func decodeVarint(src []byte) (rest []byte, value uint64, extraBytes int, ok bool) { 635 | count := 0 636 | for { 637 | if len(src) == 0 { 638 | ok = false 639 | return 640 | } 641 | 642 | var b byte 643 | b, src = src[0], src[1:] 644 | if count == 9 && b > 1 { 645 | // The tenth byte has a special upper limit: it may only be 0 or 1. 646 | ok = false 647 | return 648 | } 649 | 650 | value |= uint64(b&0x7f) << (count * 7) 651 | count++ 652 | 653 | if b&0x7f == 0 { 654 | extraBytes++ 655 | } else { 656 | extraBytes = 0 657 | } 658 | 659 | if b&0x80 == 0 { 660 | break 661 | } 662 | } 663 | 664 | if value == 0 { 665 | extraBytes-- 666 | } 667 | rest = src 668 | ok = true 669 | return 670 | } 671 | -------------------------------------------------------------------------------- /writer_test.go: -------------------------------------------------------------------------------- 1 | // Copyright 2022 Google LLC 2 | // 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // http://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | // See the License for the specific language governing permissions and 13 | // limitations under the License. 14 | 15 | package protoscope 16 | 17 | import ( 18 | "embed" 19 | "fmt" 20 | "os" 21 | "reflect" 22 | "strings" 23 | "testing" 24 | 25 | "github.com/google/go-cmp/cmp" 26 | 27 | descpb "google.golang.org/protobuf/types/descriptorpb" 28 | 29 | "google.golang.org/protobuf/proto" 30 | "google.golang.org/protobuf/reflect/protodesc" 31 | "google.golang.org/protobuf/reflect/protoreflect" 32 | "google.golang.org/protobuf/reflect/protoregistry" 33 | ) 34 | 35 | //go:embed testdata/* 36 | var testdata embed.FS 37 | 38 | var fileset = ParseFileSet() 39 | 40 | func ParseFileSet() *protoregistry.Files { 41 | data, err := testdata.ReadFile("testdata/unittest.proto.pb") 42 | if err != nil { 43 | panic(err) 44 | } 45 | 46 | fds := new(descpb.FileDescriptorSet) 47 | if err := proto.Unmarshal(data, fds); err != nil { 48 | panic(err) 49 | } 50 | 51 | files, err := protodesc.NewFiles(fds) 52 | if err != nil { 53 | panic(err) 54 | } 55 | 56 | return files 57 | } 58 | 59 | func GetDesc(name string) protoreflect.MessageDescriptor { 60 | desc, err := protoregistry.GlobalFiles.FindDescriptorByName(protoreflect.FullName(name)) 61 | if err != nil { 62 | desc, err = fileset.FindDescriptorByName(protoreflect.FullName(name)) 63 | } 64 | if err != nil { 65 | panic(err) 66 | } 67 | 68 | return desc.(protoreflect.MessageDescriptor) 69 | } 70 | 71 | func TestGoldens(t *testing.T) { 72 | type golden struct { 73 | name string 74 | pb []byte 75 | want string 76 | config string 77 | opts WriterOptions 78 | } 79 | 80 | var tests []golden 81 | dir, err := testdata.ReadDir("testdata") 82 | if err != nil { 83 | t.Fatal(err) 84 | } 85 | for _, d := range dir { 86 | if !strings.HasSuffix(d.Name(), ".golden") { 87 | continue 88 | } 89 | 90 | goldenBytes, err := testdata.ReadFile("testdata/" + d.Name()) 91 | if err != nil { 92 | t.Fatal(err) 93 | } 94 | goldenText := string(goldenBytes) 95 | 96 | // Pull off the first line, which must be a comment. 97 | comment, rest, _ := strings.Cut(goldenText, "\n") 98 | goldenText = rest 99 | 100 | config := strings.Fields(strings.TrimPrefix(comment, "#")) 101 | 102 | pb, err := testdata.ReadFile("testdata/" + config[0]) 103 | if err != nil { 104 | t.Fatal(err) 105 | } 106 | 107 | opts := WriterOptions{} 108 | v := reflect.ValueOf(&opts).Elem() 109 | for _, opt := range config[1:] { 110 | if name := strings.TrimPrefix(opt, "Schema="); name != opt { 111 | opts.Schema = GetDesc(name) 112 | continue 113 | } 114 | 115 | v.FieldByName(opt).SetBool(true) 116 | } 117 | 118 | tests = append(tests, golden{ 119 | name: d.Name(), 120 | pb: pb, 121 | want: goldenText, 122 | config: comment, 123 | opts: opts, 124 | }) 125 | } 126 | 127 | if _, ok := os.LookupEnv("REGEN_GOLDENS"); ok { 128 | for _, tt := range tests { 129 | got := Write(tt.pb, tt.opts) 130 | f, _ := os.Create("testdata/" + tt.name) 131 | defer f.Close() 132 | 133 | fmt.Fprintln(f, tt.config) 134 | fmt.Fprint(f, got) 135 | } 136 | return 137 | } 138 | 139 | for _, tt := range tests { 140 | t.Run(tt.name, func(t *testing.T) { 141 | got := Write(tt.pb, tt.opts) 142 | if d := cmp.Diff(tt.want, got); d != "" { 143 | t.Fatal("output mismatch (-want, +got):", d) 144 | } 145 | }) 146 | } 147 | } 148 | --------------------------------------------------------------------------------