├── .github └── workflows │ ├── build.yml │ └── release.yml ├── .gitignore ├── Cargo.toml ├── Foo.class ├── LICENSE.TXT ├── README.md ├── docs ├── assembly_specification.md └── assembly_tutorial.md ├── examples ├── greet1.j ├── greet2.j ├── hello.j └── minimal.j ├── rustfmt.toml └── src ├── ass_main.rs ├── dis_main.rs ├── file_input_util.rs ├── file_output_util.rs ├── lib ├── assemble │ ├── base_parser.rs │ ├── class_parser.rs │ ├── cpool │ │ ├── builder.rs │ │ ├── mod.rs │ │ ├── raw_const.rs │ │ ├── sym_ref_resolver.rs │ │ └── types.rs │ ├── flags.rs │ ├── label.rs │ ├── mod.rs │ ├── parse_attr.rs │ ├── parse_class.rs │ ├── parse_code.rs │ ├── parse_literal.rs │ ├── span.rs │ ├── string.rs │ ├── tokenize.rs │ └── writer.rs ├── classfile │ ├── attrs.rs │ ├── code.rs │ ├── cpool.rs │ ├── mod.rs │ ├── parse.rs │ └── reader.rs ├── disassemble │ ├── disassembler.rs │ ├── flags.rs │ ├── mod.rs │ ├── refprinter.rs │ └── string.rs ├── mhtags.rs ├── mod.rs └── util.rs └── main.rs /.github/workflows/build.yml: -------------------------------------------------------------------------------- 1 | # .github/workflows/release.yml 2 | 3 | on: push 4 | 5 | jobs: 6 | release: 7 | name: release ${{ matrix.target }} 8 | runs-on: ubuntu-latest 9 | strategy: 10 | fail-fast: false 11 | matrix: 12 | include: 13 | - target: x86_64-pc-windows-gnu 14 | archive: zip 15 | - target: x86_64-unknown-linux-musl 16 | archive: tar.gz 17 | steps: 18 | - uses: actions/checkout@master 19 | - name: Compile and release 20 | uses: rust-build/rust-build.action@v1.4.5 21 | env: 22 | GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} 23 | with: 24 | RUSTTARGET: ${{ matrix.target }} 25 | ARCHIVE_TYPES: ${{ matrix.archive }} 26 | msys2-build-test: 27 | strategy: 28 | fail-fast: false 29 | matrix: 30 | sys: [MINGW64, UCRT64,CLANG64] 31 | runs-on: windows-latest 32 | steps: 33 | - uses: actions/checkout@v4 34 | - name: Setup MSYS2 35 | uses: msys2/setup-msys2@v2 36 | with: 37 | msystem: ${{ matrix.sys }} 38 | update: true 39 | install: base-devel git 40 | pacboy: >- 41 | rust:p 42 | - name: Run cargo 43 | shell: msys2 {0} 44 | run: cargo build --release 45 | - name: Run tests 46 | shell: msys2 {0} 47 | run: cargo test --release 48 | - name: Rename Compile Output 49 | shell: msys2 {0} 50 | run: mv target/release/krak2.exe target/release/${{ matrix.sys }}_krak2.exe 51 | -------------------------------------------------------------------------------- /.github/workflows/release.yml: -------------------------------------------------------------------------------- 1 | # .github/workflows/release.yml 2 | 3 | on: 4 | release: 5 | types: [created] 6 | 7 | jobs: 8 | release: 9 | name: release ${{ matrix.target }} 10 | runs-on: ubuntu-latest 11 | strategy: 12 | fail-fast: false 13 | matrix: 14 | include: 15 | - target: x86_64-pc-windows-gnu 16 | archive: zip 17 | - target: x86_64-unknown-linux-musl 18 | archive: tar.gz 19 | steps: 20 | - uses: actions/checkout@master 21 | - name: Compile and release 22 | uses: rust-build/rust-build.action@v1.4.5 23 | env: 24 | GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} 25 | with: 26 | RUSTTARGET: ${{ matrix.target }} 27 | ARCHIVE_TYPES: ${{ matrix.archive }} 28 | msys2-build-test: 29 | strategy: 30 | fail-fast: false 31 | matrix: 32 | sys: [MINGW64, UCRT64,CLANG64] 33 | runs-on: windows-latest 34 | steps: 35 | - uses: actions/checkout@v4 36 | - name: Setup MSYS2 37 | uses: msys2/setup-msys2@v2 38 | with: 39 | msystem: ${{ matrix.sys }} 40 | update: true 41 | install: base-devel git 42 | pacboy: >- 43 | rust:p 44 | - name: Run cargo 45 | shell: msys2 {0} 46 | run: cargo build --release 47 | - name: Run tests 48 | shell: msys2 {0} 49 | run: cargo test --release 50 | - name: Rename Compile Output 51 | shell: msys2 {0} 52 | run: mv target/release/krak2.exe target/release/${{ matrix.sys }}_krak2.exe 53 | - name: Release with Notes 54 | uses: softprops/action-gh-release@v1 55 | with: 56 | files: | 57 | target/release/${{ matrix.sys }}_krak2.exe 58 | env: 59 | GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} 60 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | /target 2 | /out 3 | out.* 4 | temp 5 | temp.* 6 | Cargo.lock 7 | 8 | target/ 9 | !.mvn/wrapper/maven-wrapper.jar 10 | !**/src/main/**/target/ 11 | !**/src/test/**/target/ 12 | 13 | ### IntelliJ IDEA ### 14 | .idea/ 15 | *.iws 16 | *.iml 17 | *.ipr 18 | 19 | ### Eclipse ### 20 | .apt_generated 21 | .classpath 22 | .factorypath 23 | .project 24 | .settings 25 | .springBeans 26 | .sts4-cache 27 | 28 | ### NetBeans ### 29 | /nbproject/private/ 30 | /nbbuild/ 31 | /dist/ 32 | /nbdist/ 33 | /.nb-gradle/ 34 | build/ 35 | !**/src/main/**/build/ 36 | !**/src/test/**/build/ 37 | 38 | ### VS Code ### 39 | .vscode/ 40 | 41 | ### Mac OS ### 42 | .DS_Store -------------------------------------------------------------------------------- /Cargo.toml: -------------------------------------------------------------------------------- 1 | [package] 2 | name = "krakatau2" 3 | version = "2.0.0-alpha" 4 | edition = "2021" 5 | 6 | 7 | # See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html 8 | 9 | [dependencies] 10 | clap = { version = "3.1.18", features = ["derive"] } 11 | lazy_static = "1.4.0" 12 | regex = "1.4.3" 13 | zip = { git = "https://github.com/Storyyeller/zip.git", default-features = false, features=["deflate"] } 14 | hexf-parse = "0.2.1" 15 | typed-arena = "2.0.1" 16 | anyhow = "1.0.70" 17 | 18 | 19 | [[bin]] 20 | name = "krak2" 21 | path = "src/main.rs" 22 | -------------------------------------------------------------------------------- /Foo.class: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Storyyeller/Krakatau/6da0abc20603fecaaa0e3300ebd97e04f07c2fb6/Foo.class -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | Krakatau provides an assembler and disassembler for Java bytecode, which allows you to convert binary classfiles to a human readable text format, make changes, and convert it back to a classfile, even for obfuscated code. You can also create your own classfiles from scratch by writing bytecode manually, and can examine and compare low level details of Java binaries. Unlike `javap`, the Krakatau disassembler can handle even highly obfuscated code, and the disassembled output can be reassembled into a classfile. 2 | 3 | Krakatau also provides a decompiler for converting Java binaries to readable source code. Unlike other decompilers, the Krakatau decompiler was specifically designed for working with obfuscated code and can easily handle tricks that break other decompilers. However, the Krakatau decompiler does not support some Java 8+ features such as lambdas, so it works best on older code. 4 | 5 | Krakatau's assembler syntax is mostly a superset of Jasmin syntax with some minor incompatibilities, but unlike Jasmin, Krakatau has full support for the Java 19 bytecode specification and even supports some undocumented features found in old versions of the JVM. For an overview of the assembler syntax, see the [tutorial](docs/assembly_tutorial.md) or [complete specification](docs/assembly_specification.md). 6 | 7 | ## Installation 8 | 9 | First, you will need [to install Rust and Cargo](https://www.rust-lang.org/tools/install). Then clone this repo and run `cargo build --release`. This will produce a binary in `target/release/krak2`, which you can call directly, add to PATH, symlink, etc. 10 | 11 | 12 | ## Disassembly 13 | 14 | The disassembler has two modes: default and roundtrip. The default mode is optimized for readability and ease of modification of the resulting assembly files. When the output is reassembled, it will result in classfiles that are equivalent in behavior to the original from the perspective of the JVM specification, but not necessarily bit for bit identical (for example, the constant pool entries may be reordered). Roundtrip mode produces output that will reassemble into classfiles that are bit for bit identical to the original, but this means that the assembly files preserve low level encoding information that makes them harder to read, such as the exact order of constant pool entries. **It is recommended to use roundtrip mode when working with code that relies on non-standard attributes, such as CLDC code or Scala code**. 15 | 16 | Example usage: 17 | 18 | krak2 dis --out temp RecordTest.class 19 | 20 | krak2 dis --out disassembled.zip --roundtrip r0lling-challenge.jar 21 | 22 | You can either disassemble an individual classfile or an entire jar file. If the input filename ends in `.jar` or `.zip`, it will be treated as a jar file and every `.class` file inside the jar will be disassembler. 23 | 24 | The `--out` option controls the output location. If the `--out` value is a directory the output will be placed in individual files under that directory. Otherwise, if the `--out` value ends in `.jar` or `.zip`, the output will be placed in a single zipfile at that location. If the `--out` value ends with `.j`, output will be written to that file (note that a single `.j` file can contain multiple classes - all the classes will be disassembled and written to the same file, one after another.) 25 | 26 | To disassemble in roundtrip mode as described above, pass the `--roundtrip` option (or `-r` for short). 27 | 28 | ## Assembly 29 | 30 | The Krakatau assembler allows you to write Java bytecode in a human friendly text based format and convert it into binary Java classfiles. 31 | 32 | krak2 asm --out temp Krakatau/tests/assembler/good/strictfp.j 33 | 34 | You can either assemble an individual `.j` file or an entire jar file. If the input filename ends in `.jar` or `.zip`, it will be treated as a zip archive and every `.j` file inside will be assembled. 35 | 36 | The `--out` option controls the output location. If the `--out` value is a directory the output will be placed in individual files under that directory. Otherwise, if the `--out` value ends in `.jar` or `.zip`, the output will be placed in a single zipfile at that location. If the `--out` value ends with `.class`, the single output classfile will be written to that file (if the input has multiple classes, it will error out in this case.) 37 | 38 | ## Decompilation 39 | 40 | The v2 decompiler is still a work in progress. For decompilation, you currently need to use [Krakatau v1](https://github.com/Storyyeller/Krakatau/tree/master). 41 | -------------------------------------------------------------------------------- /docs/assembly_specification.md: -------------------------------------------------------------------------------- 1 | ## Krakatau assembly syntax 2 | 3 | This is a low level specification of the Krakatau assembler syntax. [For a high level introduction to the Krakatau assembler, click here](assembly_tutorial.md). 4 | 5 | ## Tokens 6 | 7 | `NL` represents one or more newlines, with optional comments or other whitespace. Comments begin with `;` and continue until the end of the line. Places where `NL` appears in the grammar *must* have a newline. All other tokens are implicitly separated by non-NL whitespace (you can't break lines except where permitted by the grammar). 8 | 9 | ``` 10 | WORD: 11 | (?:[a-zA-Z_$\(<]|\[[A-Z\[])[\w$;/\[\(\)<>*+-]* 12 | 13 | REF: 14 | \[[a-z0-9_]+\] 15 | 16 | BSREF: 17 | \[bs:[a-z0-9_]+\] 18 | 19 | LABEL_DEF: 20 | L\w+: 21 | 22 | STRING_LITERAL: 23 | b?"[^"\n\\]*(?:\\.[^"\n\\]*)*" 24 | b?'[^'\n\\]*(?:\\.[^'\n\\]*)*' 25 | 26 | INT_LITERAL: 27 | [+-]?(?:0x[0-9a-fA-F]+|[1-9][0-9]*|0) 28 | 29 | DOUBLE_LITERAL: 30 | [+-]Infinity 31 | [+-]NaN(?:<0x[0-9a-fA-F]+>)? 32 | [+-]?\d+\.\d+(?:e[+-]?\d+)? // decimal float 33 | [+-]?\d+(?:e[+-]?\d+) // decimal float without fraction (exponent mandatory) 34 | [+-]?0x[0-9a-fA-F]+(?:\.[0-9a-fA-F]+)?(?:p[+-]?\d+) // hex float 35 | 36 | ``` 37 | 38 | A `WORD` consists of one or more ascii letters, digits, `_`, `$`, `;`, `/`, `[`, `(`, `)`, `<`, `>`, `*`, `+`, or `-`, except that it must start with a letter, `_`, `$`, `[`, `<`, or `(`, and if it starts with `[`, the second character must be `A-Z` (upper case) or `[`. (A token with `[` followed by a digit or lowercase letter is instead parsed as the `BSREF` or `REF` token type). 39 | 40 | `LONG_LITERAL` has the same format as `INT_LITERAL` followed by an uppcercase `L`. `FLOAT_LITERAL` has the same format as `DOUBLE_LITERAL` followed by a lowercase `f`. 41 | 42 | NaNs with a specific binary representation can be represented by suffixing with the hexadecimal value in angle brackets. For example, `-NaN<0x7ff0123456789abc>` or `+NaN<0xFFABCDEF>f`. This must have exactly 8 or 16 hex digits for float and double literals respectively. 43 | 44 | As an example of hexidecimal float literals, the minimum positive float is `0x0.000001p-125F` and the maximum negative double is `-0x0.0000000000001p-1022`. Likewise, the maximum denormal is `0x0.fffffffffffffp-1022`. 45 | 46 | String literals may be double or single quoted, and may be proceeded by `b` to indicate a raw byte string. Additionally, the permitted escape sequences are as follows: `\\`, `\n`, `\r`, `\t`, `\"`, `\'`, `\uDDDD`, `\U00DDDDDD`, `\xDD`. `\u` and `\U` are 16 and 32 bit unicode escapes respectively, and must be followed by the appropriate hex digits. `\U` escapes must be a legal unicode code point. `\x` is a byte escape and can be used to represent non-ascii byte values in raw byte strings. 47 | 48 | 49 | 50 | ## Grammar 51 | 52 | The productions `u8`, `u16`, etc. rerepresent `INTEGER_LITERAL`s with value constrained to be an 8, 16, etc. bit unsigned integer. Likewise, `i8`, `i16`, etc. represent signed integer literals. 53 | 54 | The `flags` production represents zero or more of the tokens `"abstract"`, `"annotation"`, `"bridge"`, `"enum"`, `"final"`, `"interface"`, `"mandated"`, `"module"`, `"native"`, `"open"`, `"private"`, `"protected"`, `"public"`, `"static"`, `"static_phase"`, `"strict"`, `"strictfp"`, `"super"`, `"synchronized"`, `"synthetic"`, `"transient"`, `"transitive"`, `"varargs"`, `"volatile"`. Additionally, if the token following `flags` is a `WORD`, it must not be one of these values (in other words, `flags` is greedy). 55 | 56 | The `lbl` production represents a `WORD` that begins with an uppercase `L`. 57 | 58 | 59 | The top most production is `source_file`. A Krakatau assembly file can contain any number of class definitions. 60 | 61 | ``` 62 | source_file: 63 | NL? class_def* 64 | 65 | class_def: 66 | (".version" u16 u16 NL)? 67 | ".class" flags clsref NL 68 | ".super" clsref NL 69 | interface* 70 | clsitem* 71 | ".end" "class" NL 72 | 73 | interface: 74 | ".implements" clsref NL 75 | 76 | clsitem: 77 | ".bootstrap" BSREF "=" ref_or_tagged_bootstrap NL 78 | ".const" REF "=" ref_or_tagged_const NL 79 | field NL 80 | method NL 81 | attribute NL 82 | 83 | field: 84 | ".field" flags utfref utfref ("=" ldc_rhs)? fieldattrs? 85 | 86 | fieldattrs: 87 | ".fieldattributes" NL 88 | (attribute NL)* 89 | ".end" "fieldattributes" 90 | 91 | method: 92 | ".method" flags utfref ":" utfref NL 93 | (attribute NL)* 94 | ".end" "method" 95 | 96 | attribute: 97 | ".attribute" utfref ("length" u32)? STRING_LITERAL 98 | ".attribute" utfref ("length" u32)? attrbody 99 | attrbody 100 | ``` 101 | 102 | Attributes: 103 | ``` 104 | attrbody: 105 | ".annotationdefault" element_value 106 | ".bootstrapmethods" 107 | ".code" code_attr 108 | ".constantvalue" ldc_rhs 109 | ".deprecated" 110 | ".enclosing" "method" clsref natref 111 | ".exceptions" clsref* 112 | ".innerclasses" NL (clsref clsref utfref flags NL)* ".end" "innerclasses" 113 | ".linenumbertable" NL (lbl u16 NL)* ".end" "linenumbertable" 114 | ".localvariabletable" NL (local_var_table_item NL)* ".end" "localvariabletable" 115 | ".localvariabletypetable" NL (local_var_table_item NL)* ".end" "localvariabletypetable" 116 | ".methodparameters" NL (utfref flags NL)* ".end" "methodparameters" 117 | ".module" module 118 | ".modulemainclass" clsref 119 | ".modulepackages" single* 120 | ".nesthost" clsref 121 | ".nestmembers" clsref* 122 | ".permittedsubclasses" clsref* 123 | ".record" NL (recorD_item NL)* ".end" "record" 124 | ".runtime" runtime_visibility runtime_attr 125 | ".signature" utfref 126 | ".sourcedebugextension" STRING_LITERAL 127 | ".sourcefile" utfref 128 | ".stackmaptable" 129 | ".synthetic" 130 | 131 | annotation: 132 | annotation_sub "annotation" 133 | 134 | annotation_sub: 135 | utfref NL (utfref "=" element_value NL)* ".end" 136 | 137 | element_value: 138 | "annotation" annotation 139 | "array" NL (element_value NL)* ".end" "array" 140 | "boolean" ldc_rhs 141 | "byte" ldc_rhs 142 | "char" ldc_rhs 143 | "class" utfref 144 | "double" ldc_rhs 145 | "enum" utfref utfref 146 | "float" ldc_rhs 147 | "int" ldc_rhs 148 | "long" ldc_rhs 149 | "short" ldc_rhs 150 | "string" utfref 151 | 152 | local_var_table_item: 153 | u16 "is" utfref utfref "from" lbl "to" lbl 154 | 155 | module: 156 | utfref flags "version" utfref NL 157 | (".requires" single flags "version" utfref NL)* 158 | (".exports" exports_item NL)* 159 | (".opens" exports_item NL)* 160 | (".uses" clsref NL)* 161 | (".provides" clsref "with" (clsref NL)* NL)* 162 | ".end" "module" 163 | 164 | exports_item: 165 | single flags ("to" (single NL)*)? 166 | 167 | record_item: 168 | utfref utfref record_attrs? NL 169 | 170 | record_attrs: 171 | ".attributes" (attribute NL)* ".end" "attributes" 172 | 173 | runtime_visibility: 174 | "visible" 175 | "invisible" 176 | 177 | runtime_attr: 178 | "annotations" NL (annotation NL)* ".end" "annotations" 179 | "paramannotations" NL (param_annotation NL)* ".end" "paramannotations" 180 | "typeannotations" NL (type_annotation NL)* ".end" "typeannotations" 181 | 182 | param_annotation: 183 | ".paramannotation" NL 184 | (annotation NL)* 185 | ".end" "paramannotation" 186 | 187 | type_annotation: 188 | ".typeannotation" ta_target_info ta_target_path annotation_sub "typeannotation" 189 | 190 | ta_target_info: 191 | u8 ta_target_info_body NL 192 | 193 | ta_target_info_body: 194 | "typeparam" u8 195 | "super" u16 196 | "typeparambound" u8 u8 197 | "empty" 198 | "methodparam" u8 199 | "throws" u16 200 | "localvar" NL (localvar_info NL)* ".end" "localvar" 201 | "catch" u16 202 | "offset" lbl 203 | "typearg" lbl u8 204 | 205 | localvar_info: 206 | "nowhere" 207 | "from" lbl "to" lbl 208 | 209 | ta_target_path: 210 | ".typepath" NL (u8 u8 NL)* ".end" "typepath" NL 211 | 212 | ``` 213 | 214 | Code: 215 | ``` 216 | code_attr: 217 | "long"? "stack" u16 "locals" u16 NL 218 | (code_item NL)* 219 | (attribute NL)* 220 | ".end" "code" 221 | 222 | code_item: 223 | LABEL_DEF instruction? 224 | instruction 225 | code_directive 226 | 227 | code_directive: 228 | ".catch" clsref "from" lbl "to" lbl "using" lbl 229 | ".stack" stack_map_item 230 | 231 | stack_map_item: 232 | "same" 233 | "stack_1" vtype 234 | "stack_1_extended" vtype 235 | "chop" u8 236 | "same_extended" 237 | "append" vtype+ 238 | "full" NL "locals" vtype* NL "stack" vtype* NL ".end" "stack" 239 | 240 | vtype: 241 | "Float" 242 | "Integer" 243 | "Long" 244 | "Null" 245 | "Object" clsref 246 | "Top" 247 | "Uninitialized" lbl 248 | "UninitializedThis" 249 | ``` 250 | 251 | Bytecode instructions: 252 | ``` 253 | instruction: 254 | "aaload" 255 | "aastore" 256 | "aconst_null" 257 | "aload" u8 258 | "aload_0" 259 | "aload_1" 260 | "aload_2" 261 | "aload_3" 262 | "anewarray" clsref 263 | "areturn" 264 | "arraylength" 265 | "astore" u8 266 | "astore_0" 267 | "astore_1" 268 | "astore_2" 269 | "astore_3" 270 | "athrow" 271 | "baload" 272 | "bastore" 273 | "bipush" i8 274 | "caload" 275 | "castore" 276 | "checkcast" clsref 277 | "d2f" 278 | "d2i" 279 | "d2l" 280 | "dadd" 281 | "daload" 282 | "dastore" 283 | "dcmpg" 284 | "dcmpl" 285 | "dconst_0" 286 | "dconst_1" 287 | "ddiv" 288 | "dload" u8 289 | "dload_0" 290 | "dload_1" 291 | "dload_2" 292 | "dload_3" 293 | "dmul" 294 | "dneg" 295 | "drem" 296 | "dreturn" 297 | "dstore" u8 298 | "dstore_0" 299 | "dstore_1" 300 | "dstore_2" 301 | "dstore_3" 302 | "dsub" 303 | "dup" 304 | "dup2" 305 | "dup2_x1" 306 | "dup2_x2" 307 | "dup_x1" 308 | "dup_x2" 309 | "f2d" 310 | "f2i" 311 | "f2l" 312 | "fadd" 313 | "faload" 314 | "fastore" 315 | "fcmpg" 316 | "fcmpl" 317 | "fconst_0" 318 | "fconst_1" 319 | "fconst_2" 320 | "fdiv" 321 | "fload" u8 322 | "fload_0" 323 | "fload_1" 324 | "fload_2" 325 | "fload_3" 326 | "fmul" 327 | "fneg" 328 | "frem" 329 | "freturn" 330 | "fstore" u8 331 | "fstore_0" 332 | "fstore_1" 333 | "fstore_2" 334 | "fstore_3" 335 | "fsub" 336 | "getfield" ref_or_tagged_const 337 | "getstatic" ref_or_tagged_const 338 | "goto" lbl 339 | "goto_w" lbl 340 | "i2b" 341 | "i2c" 342 | "i2d" 343 | "i2f" 344 | "i2l" 345 | "i2s" 346 | "iadd" 347 | "iaload" 348 | "iand" 349 | "iastore" 350 | "iconst_0" 351 | "iconst_1" 352 | "iconst_2" 353 | "iconst_3" 354 | "iconst_4" 355 | "iconst_5" 356 | "iconst_m1" 357 | "idiv" 358 | "if_acmpeq" lbl 359 | "if_acmpne" lbl 360 | "if_icmpeq" lbl 361 | "if_icmpge" lbl 362 | "if_icmpgt" lbl 363 | "if_icmple" lbl 364 | "if_icmplt" lbl 365 | "if_icmpne" lbl 366 | "ifeq" lbl 367 | "ifge" lbl 368 | "ifgt" lbl 369 | "ifle" lbl 370 | "iflt" lbl 371 | "ifne" lbl 372 | "ifnonnull" lbl 373 | "ifnull" lbl 374 | "iinc" u8 i8 375 | "iload" u8 376 | "iload_0" 377 | "iload_1" 378 | "iload_2" 379 | "iload_3" 380 | "imul" 381 | "ineg" 382 | "instanceof" clsref 383 | "invokedynamic" ref_or_tagged_const 384 | "invokeinterface" ref_or_tagged_const u8? 385 | "invokespecial" ref_or_tagged_const 386 | "invokestatic" ref_or_tagged_const 387 | "invokevirtual" ref_or_tagged_const 388 | "ior" 389 | "irem" 390 | "ireturn" 391 | "ishl" 392 | "ishr" 393 | "istore" u8 394 | "istore_0" 395 | "istore_1" 396 | "istore_2" 397 | "istore_3" 398 | "isub" 399 | "iushr" 400 | "ixor" 401 | "jsr" lbl 402 | "jsr_w" lbl 403 | "l2d" 404 | "l2f" 405 | "l2i" 406 | "ladd" 407 | "laload" 408 | "land" 409 | "lastore" 410 | "lcmp" 411 | "lconst_0" 412 | "lconst_1" 413 | "ldc" ldc_rhs 414 | "ldc2_w" ldc_rhs 415 | "ldc_w" ldc_rhs 416 | "ldiv" 417 | "lload" u8 418 | "lload_0" 419 | "lload_1" 420 | "lload_2" 421 | "lload_3" 422 | "lmul" 423 | "lneg" 424 | "lookupswitch" lookupswitch 425 | "lor" 426 | "lrem" 427 | "lreturn" 428 | "lshl" 429 | "lshr" 430 | "lstore" u8 431 | "lstore_0" 432 | "lstore_1" 433 | "lstore_2" 434 | "lstore_3" 435 | "lsub" 436 | "lushr" 437 | "lxor" 438 | "monitorenter" 439 | "monitorexit" 440 | "multianewarray" clsref u8 441 | "new" clsref 442 | "newarray" ("boolean" | "char" | "float" | "double" | "byte" | "short" | "int" | "long") 443 | "nop" 444 | "pop" 445 | "pop2" 446 | "putfield" ref_or_tagged_const 447 | "putstatic" ref_or_tagged_const 448 | "ret" u8 449 | "return" 450 | "saload" 451 | "sastore" 452 | "sipush" i16 453 | "swap" 454 | "tableswitch" tableswitch 455 | "wide" wide_instruction 456 | 457 | lookupswitch: 458 | NL 459 | (i32 ":" lbl NL)* 460 | "default" ":" lbl 461 | 462 | tableswitch: 463 | i32 NL 464 | (lbl NL)+ 465 | "default" ":" lbl 466 | 467 | wide_instruction: 468 | "aload" u16 469 | "astore" u16 470 | "dload" u16 471 | "dstore" u16 472 | "fload" u16 473 | "fstore" u16 474 | "iinc" u16 i16 475 | "iload" u16 476 | "istore" u16 477 | "lload" u16 478 | "lstore" u16 479 | "ret" u16 480 | ``` 481 | 482 | 483 | Constants: 484 | ``` 485 | utf: 486 | WORD 487 | STRING_LITERAL 488 | 489 | utfref: 490 | REF 491 | utf 492 | 493 | clsref: 494 | REF 495 | utf 496 | 497 | single: 498 | REF 499 | utf 500 | 501 | natref: 502 | REF 503 | utf utfref 504 | 505 | mhnotref: 506 | mhtag ref_or_tagged_const 507 | 508 | mhtag: 509 | "getField" 510 | "getStatic" 511 | "putField" 512 | "putStatic" 513 | "invokeVirtual" 514 | "invokeStatic" 515 | "invokeSpecial" 516 | "newInvokeSpecial" 517 | "invokeInterface" 518 | 519 | tagged_const: 520 | "Utf8" utf 521 | "Int" i32 522 | "Float" FLOAT_LITERAL 523 | "Long" LONG_LITERAL 524 | "Double" DOUBLE_LITERAL 525 | "Class" utfref 526 | "String" utfref 527 | "MethodType" utfref 528 | "Module" utfref 529 | "Package" utfref 530 | "Field" clsref natref 531 | "Method" clsref natref 532 | "InterfaceMethod" clsref natref 533 | "NameAndType" utfref utfref 534 | "MethodHandle" mhnotref 535 | "Dynamic" bsref natref 536 | "InvokeDynamic" bsref natref 537 | 538 | ref_or_tagged_const: 539 | REF 540 | tagged_const 541 | 542 | bs_args: 543 | ref_or_tagged_const* ":" 544 | 545 | bsref: 546 | BSREF 547 | mhnotref bs_args 548 | 549 | ref_or_tagged_bootstrap: 550 | BSREF 551 | "Bootstrap" REF bs_args 552 | "Bootstrap" mhnotref bs_args 553 | 554 | ldc_rhs: 555 | INTEGER_LITERAL 556 | FLOAT_LITERAL 557 | LONG_LITERAL 558 | DOUBLE_LITERAL 559 | STRING_LITERAL 560 | REF 561 | tagged_const 562 | 563 | ``` 564 | -------------------------------------------------------------------------------- /docs/assembly_tutorial.md: -------------------------------------------------------------------------------- 1 | ## Krakatau assembly tutorial 2 | 3 | This is a high level introduction to Krakatau assembler syntax. [For a complete, low level specification of the syntax, click here](assembly_specification.md). 4 | 5 | 6 | _Note: This tutorial assumes that you already understand the classfile format and how Java bytecode works. To learn about bytecode, consult the JVM specification._ 7 | 8 | 9 | ## A minimal classfile 10 | 11 | Technically speaking, the simplest Krakatau assembly file is just an empty file, since one `.j` file can contain any number of class definitions, including zero. But that's boring, so let's try a minimal class definition: 12 | 13 | ``` 14 | ; This is a comment. Comments start with ; and go until end of the line 15 | .class public Foo 16 | .super java/lang/Object ; Java bytecode requires us to explicitly inherit from java.lang.Object 17 | .end class 18 | ``` 19 | 20 | This defines a class with no fields or methods. We can now assemble this `.j` file and try to run the resulting classfile: 21 | 22 | ``` 23 | > krak2 asm -o Foo.class examples/minimal.j 24 | got 1 classes 25 | Wrote 55 bytes to Foo.class 26 | > java Foo 27 | Error: Main method not found in class Foo, please define the main method as: 28 | public static void main(String[] args) 29 | or a JavaFX application class must extend javafx.application.Application 30 | 31 | ``` 32 | 33 | Unfortunately, since it has no main method, Java can't run it. Let's make a class with a `main` method that prints "Hello World!": 34 | 35 | ``` 36 | .class public Foo 37 | .super java/lang/Object 38 | 39 | ; ([Ljava/lang/String;)V means "takes a single String[] argument and returns void" 40 | .method public static main : ([Ljava/lang/String;)V 41 | ; We have to put an upper bound on the number of locals and the operand stack 42 | ; Machine generated code will usually calculate the exact limits, but that's a pain to do 43 | ; when writing bytecode by hand, especially as we'll be making changes to the code. 44 | ; Therefore, we'll just set a value that's way more than we're using, 13 in this case 45 | .code stack 13 locals 13 46 | ; Equivalent to "System.out" in Java code 47 | getstatic Field java/lang/System out Ljava/io/PrintStream; 48 | ; put our argument on the operand stack 49 | ldc "Hello World!" 50 | ; now invoke println() 51 | invokevirtual Method java/io/PrintStream println (Ljava/lang/Object;)V 52 | return 53 | .end code 54 | .end method 55 | .end class 56 | 57 | ``` 58 | 59 | Now we can assemble and run our class successfully! 60 | 61 | ``` 62 | > krak2 asm -o Foo.class examples/hello.j 63 | got 1 classes 64 | Wrote 278 bytes to Foo.class 65 | > java Foo 66 | Hello World! 67 | ``` 68 | 69 | Now let's try greeting the user by name, assuming that they supply their name as a command line parameter. Java include the command line parameters in the `String[]` array passed to `main()`, so we just need to access the first element: 70 | 71 | ``` 72 | .class public Foo 73 | .super java/lang/Object 74 | 75 | .method public static main : ([Ljava/lang/String;)V 76 | .code stack 13 locals 13 77 | getstatic Field java/lang/System out Ljava/io/PrintStream; 78 | ldc "Hello, " 79 | 80 | ; Access args[0] 81 | aload_0 82 | iconst_0 83 | aaload 84 | 85 | ; Concat the strings 86 | invokevirtual Method java/lang/String concat (Ljava/lang/String;)Ljava/lang/String; 87 | 88 | ; Now print like normal 89 | invokevirtual Method java/io/PrintStream println (Ljava/lang/Object;)V 90 | return 91 | .end code 92 | .end method 93 | .end class 94 | ``` 95 | 96 | Running it shows that our program correctly greets different people by name: 97 | 98 | ``` 99 | > krak2 asm -o Foo.class examples/greet1.j 100 | got 1 classes 101 | Wrote 361 bytes to Foo.class 102 | > java Foo Alice 103 | Hello, Alice 104 | > java Foo Bob 105 | Hello, Bob 106 | 107 | ``` 108 | 109 | ## Control flow 110 | 111 | Suppose we want to print a different message depending on the user's name. For example, we would like our program to print "Fuck you" if the name contains "Bob" and otherwise say "Hello" like normal. 112 | 113 | In order to have control flow, we need to use *labels*. A label can be any word starting with an uppercase `L`. In this case, we call `String.contains()` to see if the name contains "Bob". `contains()` returns `1` if the string does contain "Bob" and `0` otherwise (booleans are just ordinary ints at the bytecode level). 114 | 115 | We then use the `ifeq` instruction, which compares this value to `0`. If it is `0`, we jump to our `LELSE` label, otherwise, we fallthrough to the main branch, push "Fuck you, " onto the stack, and then `goto` `LEND`. The `LELSE` label then pushes "Hello, " onto the stack instead. 116 | 117 | 118 | ``` 119 | .class public Foo 120 | .super java/lang/Object 121 | 122 | .method public static main : ([Ljava/lang/String;)V 123 | .code stack 13 locals 13 124 | getstatic Field java/lang/System out Ljava/io/PrintStream; 125 | 126 | ; Access the user's name 127 | aload_0 128 | iconst_0 129 | aaload 130 | 131 | ; Store name in the first variable slot for later 132 | astore_0 133 | 134 | ; See if name contains "Bob" 135 | aload_0 136 | ldc "Bob" 137 | invokevirtual Method java/lang/String contains (Ljava/lang/CharSequence;)Z 138 | 139 | ifeq LELSE 140 | ldc "Fuck you, " 141 | goto LEND 142 | LELSE: 143 | ldc "Hello, " 144 | LEND: 145 | ; Load name again so we can concat it to the prefix above 146 | aload_0 147 | invokevirtual Method java/lang/String concat (Ljava/lang/String;)Ljava/lang/String; 148 | invokevirtual Method java/io/PrintStream println (Ljava/lang/Object;)V 149 | return 150 | .end code 151 | .end method 152 | .end class 153 | 154 | ``` 155 | 156 | As expected, our new class works like a charm. 157 | 158 | 159 | ``` 160 | > krak2 asm -o Foo.class examples/greet2.j 161 | got 1 classes 162 | Wrote 453 bytes to Foo.class 163 | > java Foo Alice 164 | Hello, Alice 165 | > java Foo Bob 166 | Fuck you, Bob 167 | > java Foo "Alice Margatroid" 168 | Hello, Alice Margatroid 169 | > java Foo "Totally Not Bob" 170 | Fuck you, Totally Not Bob 171 | ``` 172 | 173 | ## Conclusion 174 | 175 | That's the end of the tutorial for now. Hopefully, this at least gives you a very basic introduction to bytecode. 176 | 177 | _Tip: If you aren't sure how to do something, try compiling a Java class with code to do what you want, and then disassembling it with the Krakatau disassembler to see what the bytecode looks like._ 178 | -------------------------------------------------------------------------------- /examples/greet1.j: -------------------------------------------------------------------------------- 1 | .class public Foo 2 | .super java/lang/Object 3 | 4 | .method public static main : ([Ljava/lang/String;)V 5 | .code stack 13 locals 13 6 | getstatic Field java/lang/System out Ljava/io/PrintStream; 7 | ldc "Hello, " 8 | 9 | ; Access args[0] 10 | aload_0 11 | iconst_0 12 | aaload 13 | 14 | ; Concat the strings 15 | invokevirtual Method java/lang/String concat (Ljava/lang/String;)Ljava/lang/String; 16 | 17 | ; Now print like normal 18 | invokevirtual Method java/io/PrintStream println (Ljava/lang/Object;)V 19 | return 20 | .end code 21 | .end method 22 | .end class 23 | -------------------------------------------------------------------------------- /examples/greet2.j: -------------------------------------------------------------------------------- 1 | .class public Foo 2 | .super java/lang/Object 3 | 4 | .method public static main : ([Ljava/lang/String;)V 5 | .code stack 13 locals 13 6 | getstatic Field java/lang/System out Ljava/io/PrintStream; 7 | 8 | ; Access the user's name 9 | aload_0 10 | iconst_0 11 | aaload 12 | 13 | ; Store name in the first variable slot for later 14 | astore_0 15 | 16 | ; See if name contains "Bob" 17 | aload_0 18 | ldc "Bob" 19 | invokevirtual Method java/lang/String contains (Ljava/lang/CharSequence;)Z 20 | 21 | ifeq LELSE 22 | ldc "Fuck you, " 23 | goto LEND 24 | LELSE: 25 | ldc "Hello, " 26 | LEND: 27 | ; Load name again so we can concat it to the prefix above 28 | aload_0 29 | invokevirtual Method java/lang/String concat (Ljava/lang/String;)Ljava/lang/String; 30 | invokevirtual Method java/io/PrintStream println (Ljava/lang/Object;)V 31 | return 32 | .end code 33 | .end method 34 | .end class 35 | -------------------------------------------------------------------------------- /examples/hello.j: -------------------------------------------------------------------------------- 1 | .class public Foo 2 | .super java/lang/Object 3 | 4 | ; ([Ljava/lang/String;)V means "takes a single String[] argument and returns void" 5 | .method public static main : ([Ljava/lang/String;)V 6 | ; We have to put an upper bound on the number of locals and the operand stack 7 | ; Machine generated code will usually calculate the exact limits, but that's a pain to do 8 | ; when writing bytecode by hand, especially as we'll be making changes to the code. 9 | ; Therefore, we'll just set a value that's way more than we're using, 13 in this case 10 | .code stack 13 locals 13 11 | ; Equivalent to "System.out" in Java code 12 | getstatic Field java/lang/System out Ljava/io/PrintStream; 13 | ; put our argument on the operand stack 14 | ldc "Hello World!" 15 | ; now invoke println() 16 | invokevirtual Method java/io/PrintStream println (Ljava/lang/Object;)V 17 | return 18 | .end code 19 | .end method 20 | .end class 21 | -------------------------------------------------------------------------------- /examples/minimal.j: -------------------------------------------------------------------------------- 1 | ; This is a comment. Comments start with ; and go until end of the line 2 | .class public Foo 3 | .super java/lang/Object ; Java bytecode requires us to explicitly inherit from java.lang.Object 4 | .end class 5 | -------------------------------------------------------------------------------- /rustfmt.toml: -------------------------------------------------------------------------------- 1 | max_width = 125 2 | fn_call_width = 90 3 | -------------------------------------------------------------------------------- /src/ass_main.rs: -------------------------------------------------------------------------------- 1 | use anyhow::bail; 2 | use anyhow::Result; 3 | use clap::Parser; 4 | use std::path::PathBuf; 5 | 6 | use crate::file_input_util; 7 | use crate::file_output_util::Writer; 8 | use crate::lib::assemble; 9 | use crate::lib::AssemblerOptions; 10 | 11 | #[derive(Parser)] 12 | pub struct AssemblerCli { 13 | input: PathBuf, 14 | #[clap(short, long, parse(from_os_str))] 15 | out: PathBuf, 16 | } 17 | 18 | pub fn assembler_main(cli: AssemblerCli) -> Result<()> { 19 | let opts = AssemblerOptions {}; 20 | 21 | let mut writer = Writer::new(&cli.out)?; 22 | let mut error_count = 0; 23 | file_input_util::read_files(&cli.input, "j", |fname, data| { 24 | let data = std::str::from_utf8(data).expect(".j files must be utf8-encoded"); 25 | // let classes = assemble(&data, opts)?; 26 | let res = assemble(&data, opts); 27 | let classes = match res { 28 | Ok(classes) => classes, 29 | Err(err) => { 30 | err.display(fname, data); 31 | error_count += 1; 32 | return Ok(()); 33 | } 34 | }; 35 | println!("got {} classes", classes.len()); 36 | 37 | for (name, out) in classes { 38 | let name = name.map(|name| format!("{}.class", name)); 39 | writer.write(name.as_deref(), &out)?; 40 | } 41 | Ok(()) 42 | })?; 43 | 44 | if error_count > 0 { 45 | bail!("Finished with {} errors", error_count); 46 | } 47 | Ok(()) 48 | } 49 | -------------------------------------------------------------------------------- /src/dis_main.rs: -------------------------------------------------------------------------------- 1 | use anyhow::bail; 2 | use anyhow::Result; 3 | use clap::Parser; 4 | use std::path::PathBuf; 5 | 6 | use crate::file_input_util; 7 | use crate::file_output_util::Writer; 8 | use crate::lib::disassemble; 9 | use crate::lib::DisassemblerOptions; 10 | use crate::lib::ParserOptions; 11 | 12 | #[derive(Parser)] 13 | pub struct DisassemblerCli { 14 | input: PathBuf, 15 | #[clap(short, long, parse(from_os_str))] 16 | out: PathBuf, 17 | 18 | #[clap(short, long)] 19 | roundtrip: bool, 20 | 21 | #[clap(long)] 22 | no_short_code_attr: bool, 23 | } 24 | 25 | pub fn disassembler_main(cli: DisassemblerCli) -> Result<()> { 26 | let opts = DisassemblerOptions { 27 | roundtrip: cli.roundtrip, 28 | }; 29 | let parse_opts = ParserOptions { 30 | no_short_code_attr: cli.no_short_code_attr, 31 | }; 32 | 33 | let mut writer = Writer::new(&cli.out)?; 34 | let mut error_count = 0; 35 | file_input_util::read_files(&cli.input, "class", |fname, data| { 36 | println!("disassemble {}", fname); 37 | let (name, out) = match disassemble(&data, parse_opts, opts) { 38 | Ok(v) => v, 39 | Err(err) => { 40 | eprintln!("Parse error in {}: {}", fname, err.0); 41 | error_count += 1; 42 | return Ok(()); 43 | } 44 | }; 45 | let name = name.map(|name| format!("{}.j", name)); 46 | writer.write(name.as_deref(), &out)?; 47 | Ok(()) 48 | })?; 49 | 50 | if error_count > 0 { 51 | bail!("Finished with {} errors", error_count); 52 | } 53 | Ok(()) 54 | } 55 | -------------------------------------------------------------------------------- /src/file_input_util.rs: -------------------------------------------------------------------------------- 1 | use anyhow::anyhow; 2 | use anyhow::bail; 3 | use anyhow::Result; 4 | use std::fs; 5 | use std::io::Read; 6 | use std::path::Path; 7 | 8 | // pub fn read_files(p: &Path, ext: &str, mut cb: impl FnMut(&[u8]) -> Result<(), E>) -> Result<(), E> { 9 | pub fn read_files(p: &Path, ext: &str, mut cb: impl FnMut(&str, &[u8]) -> Result<()>) -> Result<()> { 10 | let input_ext = p 11 | .extension() 12 | .and_then(|s| s.to_str()) 13 | .ok_or_else(|| anyhow!("Missing input file extension for '{}'", p.display()))?; 14 | let input_ext = input_ext.to_ascii_lowercase(); 15 | 16 | if input_ext == ext { 17 | let data = fs::read(p)?; 18 | cb(&p.to_string_lossy(), &data)?; 19 | } else if input_ext == "jar" || input_ext == "zip" { 20 | let mut inbuf = Vec::new(); 21 | let file = fs::File::open(p)?; 22 | let mut zip = zip::ZipArchive::new(file)?; 23 | let ext = format!(".{}", ext); // temp hack 24 | 25 | for i in 0..zip.len() { 26 | let mut file = zip.by_index(i)?; 27 | // println!("found {} {:?} {} {}", i, file.name(), file.size(), file.compressed_size()); 28 | 29 | let name = file.name().to_owned(); 30 | if !name.trim_end_matches('/').ends_with(&ext) { 31 | continue; 32 | } 33 | 34 | inbuf.clear(); 35 | inbuf.reserve(file.size() as usize); 36 | file.read_to_end(&mut inbuf)?; 37 | // println!("read {} bytes", inbuf.len()); 38 | 39 | cb(&name, &inbuf)?; 40 | } 41 | } else { 42 | bail!("Unsupported input extension {}", input_ext) 43 | } 44 | Ok(()) 45 | } 46 | -------------------------------------------------------------------------------- /src/file_output_util.rs: -------------------------------------------------------------------------------- 1 | use std::fs; 2 | use std::io::Write; 3 | use std::path::Path; 4 | use std::path::PathBuf; 5 | 6 | use anyhow::anyhow; 7 | use anyhow::bail; 8 | use anyhow::Context; 9 | use anyhow::Result; 10 | 11 | pub enum Writer<'a> { 12 | Dir(PathBuf), 13 | Jar(&'a Path, zip::ZipWriter), 14 | Merged(&'a Path, fs::File), 15 | Single(&'a Path, fs::File, bool), 16 | } 17 | impl<'a> Writer<'a> { 18 | pub fn new(p: &'a Path) -> Result { 19 | create_parent(p)?; 20 | if p.is_dir() { 21 | return Ok(Self::Dir(p.into())); 22 | } 23 | 24 | let f = create_file(p)?; 25 | 26 | let ext = p.extension().and_then(|s| s.to_str()); 27 | Ok(if let Some(s) = ext { 28 | match s.to_ascii_lowercase().as_str() { 29 | "jar" | "zip" => Self::Jar(p, zip::ZipWriter::new(f)), 30 | "j" => Self::Merged(p, f), 31 | "class" => Self::Single(p, f, false), 32 | _ => bail!( 33 | "Unsupported output extension {} for {}, expected directory, .jar, .zip, .j, or .class", 34 | s, 35 | p.display() 36 | ), 37 | } 38 | } else { 39 | bail!( 40 | "Unsupported output extension None for {}, expected directory, .jar, .zip, .j, or .class", 41 | p.display() 42 | ) 43 | }) 44 | } 45 | 46 | pub fn write(&mut self, name: Option<&str>, data: &[u8]) -> Result<()> { 47 | use Writer::*; 48 | match self { 49 | Dir(dir) => { 50 | let name = name.ok_or_else(|| { 51 | anyhow!("Class has missing or invalid name. Try specifying a single file output name explicitly.") 52 | })?; 53 | if name.contains("..") { 54 | panic!("Invalid path {}. Try outputting to a zip file instead.", name) 55 | } else { 56 | let p = dir.join(name); 57 | println!("Writing to {}", p.display()); 58 | create_parent(&p)?; 59 | let mut f = create_file(&p)?; 60 | f.write_all(data)?; 61 | } 62 | } 63 | Jar(p, zw) => { 64 | let name = name.ok_or_else(|| { 65 | anyhow!("Class has missing or invalid name. Try specifying a single file output name explicitly.") 66 | })?; 67 | let options = zip::write::FileOptions::default() 68 | .compression_method(zip::CompressionMethod::Stored) 69 | .last_modified_time(zip::DateTime::default()); 70 | 71 | zw.start_file(name, options)?; 72 | zw.write_all(data)?; 73 | println!("Wrote {} bytes to {} in {}", data.len(), name, p.display()); 74 | } 75 | Merged(p, f) => { 76 | write(p, f, data)?; 77 | } 78 | Single(p, f, used) => { 79 | if *used { 80 | bail!( 81 | "Error: Attempting to write multiple classes to single file. Try outputting to a zip file instead." 82 | ) 83 | } 84 | write(p, f, data)?; 85 | *used = true; 86 | } 87 | } 88 | Ok(()) 89 | } 90 | } 91 | 92 | fn create_parent(p: &Path) -> Result<()> { 93 | let parent = p 94 | .parent() 95 | .ok_or_else(|| anyhow!("Unable to determine parent directory for {}", p.display()))?; 96 | fs::create_dir_all(parent) 97 | .with_context(|| format!("Failed to create parent directory {} for {}", parent.display(), p.display())) 98 | } 99 | 100 | fn create_file(p: &Path) -> Result { 101 | fs::File::create(p).with_context(|| format!("Failed to create output file {}", p.display())) 102 | } 103 | 104 | fn write(p: &Path, f: &mut std::fs::File, data: &[u8]) -> Result<()> { 105 | f.write_all(data) 106 | .with_context(|| format!("Failed to write output to {}", p.display()))?; 107 | println!("Wrote {} bytes to {}", data.len(), p.display()); 108 | Ok(()) 109 | } 110 | -------------------------------------------------------------------------------- /src/lib/assemble/base_parser.rs: -------------------------------------------------------------------------------- 1 | use super::parse_literal; 2 | use super::span::Error; 3 | use super::span::ErrorMaker; 4 | use super::span::Span; 5 | use super::tokenize::Token; 6 | use super::tokenize::TokenType; 7 | 8 | type Iter<'a> = std::iter::Peekable>>; 9 | 10 | macro_rules! define_int_parse { 11 | ($t:ident) => { 12 | pub fn $t(&mut self) -> Result<$t, Error> { 13 | let tok = self.int()?; 14 | parse_literal::int(tok.1 .0).ok_or_else(|| { 15 | self.error1(&format!("Value must be in range {} <= {} <= {}", $t::MIN, tok.1 .0, $t::MAX), tok.1) 16 | }) 17 | } 18 | }; 19 | } 20 | 21 | pub struct BaseParser<'a> { 22 | error_maker: ErrorMaker<'a>, 23 | source: &'a str, 24 | tokens: Iter<'a>, 25 | } 26 | impl<'a> BaseParser<'a> { 27 | pub fn new(source: &'a str, tokens: Vec>) -> Self { 28 | Self { 29 | error_maker: ErrorMaker::new(source), 30 | source, 31 | tokens: tokens.into_iter().peekable(), 32 | } 33 | } 34 | 35 | pub fn has_tokens_left(&mut self) -> bool { 36 | self.tokens.peek().is_some() 37 | } 38 | 39 | pub fn next(&mut self) -> Result, Error> { 40 | self.tokens.next().ok_or_else(|| { 41 | let tok = Token(TokenType::Newlines, Span(&self.source[self.source.len()..])); 42 | self.error1("Error: Unexpected end of file", tok.1) 43 | }) 44 | } 45 | 46 | pub fn peek(&mut self) -> Result, Error> { 47 | self.tokens.peek().copied().ok_or_else(|| { 48 | let tok = Token(TokenType::Newlines, Span(&self.source[self.source.len()..])); 49 | self.error1("Error: Unexpected end of file", tok.1) 50 | }) 51 | } 52 | 53 | pub fn fail(&mut self) -> Result { 54 | let tok = self.next()?; 55 | self.err1("Error: Unexpected token", tok.1) 56 | } 57 | 58 | pub fn tryv(&mut self, v: &str) -> bool { 59 | self.tokens.next_if(|tok| tok.1 .0 == v).is_some() 60 | } 61 | 62 | pub fn tryv2(&mut self, v: &str) -> Option> { 63 | self.tokens.next_if(|tok| tok.1 .0 == v).map(|tok| tok.1) 64 | } 65 | 66 | pub fn has_type(&mut self, ty: TokenType) -> bool { 67 | if let Some(tok) = self.tokens.peek() { 68 | tok.0 == ty 69 | } else { 70 | false 71 | } 72 | } 73 | 74 | pub fn val(&mut self, v: &str) -> Result<(), Error> { 75 | if self.tryv(v) { 76 | Ok(()) 77 | } else { 78 | let span = self.next()?.1; 79 | self.err1str(format!("Expected {}", v), span) 80 | } 81 | } 82 | 83 | pub fn assert_type(&mut self, ty: TokenType) -> Result, Error> { 84 | let tok = self.next()?; 85 | if tok.0 == ty { 86 | Ok(tok.1) 87 | } else { 88 | self.fail() 89 | } 90 | } 91 | 92 | pub fn eol(&mut self) -> Result<(), Error> { 93 | let tok = self.next()?; 94 | if tok.0 == TokenType::Newlines { 95 | Ok(()) 96 | } else { 97 | self.err1("Error: Expected end of line", tok.1) 98 | } 99 | } 100 | 101 | /////////////////////////////////////////////////////////////////////////// 102 | pub fn int(&mut self) -> Result, Error> { 103 | let tok = self.next()?; 104 | if tok.0 != TokenType::IntLit { 105 | self.err1("Expected integer", tok.1) 106 | } else { 107 | Ok(tok) 108 | } 109 | } 110 | 111 | define_int_parse!(u8); 112 | define_int_parse!(u16); 113 | define_int_parse!(u32); 114 | 115 | define_int_parse!(i8); 116 | define_int_parse!(i16); 117 | define_int_parse!(i32); 118 | // define_int_parse!(i64); 119 | /////////////////////////////////////////////////////////////////////////// 120 | } 121 | impl<'a> std::ops::Deref for BaseParser<'a> { 122 | type Target = ErrorMaker<'a>; 123 | 124 | fn deref(&self) -> &Self::Target { 125 | &self.error_maker 126 | } 127 | } 128 | -------------------------------------------------------------------------------- /src/lib/assemble/class_parser.rs: -------------------------------------------------------------------------------- 1 | use lazy_static::lazy_static; 2 | use regex::Regex; 3 | use std::collections::HashMap; 4 | use std::convert::TryFrom; 5 | use std::fmt::Display; 6 | use typed_arena::Arena; 7 | 8 | use super::base_parser::BaseParser; 9 | use super::cpool; 10 | use super::cpool::types; 11 | use super::cpool::InlineConst; 12 | use super::cpool::Or; 13 | use super::cpool::Pool; 14 | use super::flags::FlagList; 15 | use super::label::Pos; 16 | use super::parse_literal; 17 | use super::span::Error; 18 | use super::span::Span; 19 | use super::string; 20 | use super::tokenize::Token; 21 | use super::tokenize::TokenType; 22 | use super::writer::Writer; 23 | use crate::lib::assemble::span::Spanned; 24 | use crate::lib::mhtags; 25 | use crate::lib::util::BStr; 26 | 27 | /// Shorthand function to convert spanned const ref to non-spanned version 28 | pub fn ns<'a, T, U>(r: Or>) -> Or { 29 | r.map_b(|c| c.v) 30 | } 31 | 32 | pub struct ClassParser<'a> { 33 | pub parser: BaseParser<'a>, 34 | arena: &'a Arena>, 35 | 36 | pub version: (u16, u16), 37 | pub pool: cpool::Pool<'a>, 38 | // Temporary values only set during parsing of Code attributes 39 | pub labels: HashMap<&'a str, Pos>, 40 | pub stack_map_table: Option<(u16, Writer<'a>)>, 41 | } 42 | impl<'a> std::ops::Deref for ClassParser<'a> { 43 | type Target = BaseParser<'a>; 44 | 45 | fn deref(&self) -> &Self::Target { 46 | &self.parser 47 | } 48 | } 49 | impl<'a> std::ops::DerefMut for ClassParser<'a> { 50 | fn deref_mut(&mut self) -> &mut Self::Target { 51 | &mut self.parser 52 | } 53 | } 54 | impl<'a> ClassParser<'a> { 55 | pub fn new(parser: BaseParser<'a>, arena: &'a Arena>) -> Self { 56 | let pool = Pool::new(*parser); 57 | Self { 58 | parser, 59 | arena, 60 | version: (49, 0), 61 | pool, 62 | labels: HashMap::new(), 63 | stack_map_table: None, 64 | } 65 | } 66 | 67 | pub fn ref_type(&self, span: Span<'a>) -> Result, Error> { 68 | lazy_static! { 69 | static ref DIGITS_RE: Regex = Regex::new(r"\A[0-9]+\z").unwrap(); 70 | } 71 | 72 | let s = span.0; 73 | let mut s = &s[1..s.len() - 1]; 74 | if s.starts_with("bs:") { 75 | s = &s[3..]; 76 | } 77 | 78 | Ok(if DIGITS_RE.is_match(s) { 79 | let ind = s.parse().map_err(|_| self.error1("Invalid numeric reference", span))?; 80 | types::RefType::Raw(ind) 81 | } else { 82 | types::RefType::Sym(span.of(s)) 83 | }) 84 | } 85 | 86 | pub fn ref_from(&self, span: Span<'a>) -> Result, Error> { 87 | Ok(Or::A(self.ref_type(span)?)) 88 | } 89 | 90 | pub fn make_utf_ref(&self, b: &'a [u8], span: Span<'a>) -> Result, Error> { 91 | if b.len() > u16::MAX as usize { 92 | self.err1("Constant strings must be at most 65535 bytes in MUTF8 encoding.", span) 93 | } else { 94 | Ok(Or::B(span.of(types::InlineUtf8(BStr(b))))) 95 | } 96 | } 97 | 98 | pub fn utf_from(&self, tok: Token<'a>) -> Result, Error> { 99 | use TokenType::*; 100 | match tok.0 { 101 | Word => self.make_utf_ref(tok.1 .0.as_bytes(), tok.1), 102 | Ref => self.ref_from(tok.1), 103 | StringLit => { 104 | // let bs = string::unescape(tok.1 .0).ok_or_else(|| self.error1("Invalid string literal", tok.1))?; 105 | let bs = string::unescape(tok.1 .0).map_err(|(msg, s)| self.error1(msg, Span(s)))?; 106 | let bs = self.arena.alloc(bs); 107 | self.make_utf_ref(bs, tok.1) 108 | } 109 | _ => self.err1("Expected identifier or constant pool ref", tok.1), 110 | } 111 | } 112 | 113 | pub fn utf(&mut self) -> Result, Error> { 114 | let tok = self.next()?; 115 | self.utf_from(tok) 116 | } 117 | 118 | pub fn cls_from(&self, tok: Token<'a>) -> Result, Error> { 119 | match tok.0 { 120 | TokenType::Ref => self.ref_from(tok.1), 121 | _ => Ok(Or::B(tok.1.of(types::InlineClass(ns(self.utf_from(tok)?))))), 122 | } 123 | } 124 | 125 | pub fn cls(&mut self) -> Result, Error> { 126 | let tok = self.next()?; 127 | self.cls_from(tok) 128 | } 129 | 130 | pub fn single( 131 | &mut self, 132 | f: fn(types::SymUtf8Ref<'a>) -> types::SymConstInline<'a>, 133 | ) -> Result, Error> { 134 | let tok = self.next()?; 135 | match tok.0 { 136 | TokenType::Ref => self.ref_from(tok.1), 137 | _ => Ok(Or::B(tok.1.of(f(ns(self.utf_from(tok)?))))), 138 | } 139 | } 140 | 141 | pub fn nat(&mut self) -> Result, Error> { 142 | let tok = self.next()?; 143 | if tok.0 == TokenType::Ref { 144 | self.ref_from(tok.1) 145 | } else { 146 | let name = ns(self.utf_from(tok)?); 147 | let desc = ns(self.utf()?); 148 | Ok(Or::B(tok.1.of(types::InlineNat(name, desc)))) 149 | } 150 | } 151 | 152 | pub fn mhnotref(&mut self, tag_span: Span<'a>) -> Result, Error> { 153 | let tag = mhtags::parse(tag_span.0).ok_or_else(|| self.error1("Invalid method handle tag", tag_span))?; 154 | let body = Box::new(ns(self.ref_or_tagged_const()?)); 155 | Ok(tag_span.of(InlineConst::MethodHandle(tag, body))) 156 | } 157 | 158 | pub fn bs_args(&mut self, mh: types::SymSpanConst<'a>) -> Result, Error> { 159 | let mut bsargs = vec![mh]; 160 | while !self.tryv(":") { 161 | if bsargs.len() >= 65536 { 162 | // Can have up to 65536 elements because initial mh doesn't count towards length 163 | // todo - add test 164 | let next_span = self.peek()?.1; 165 | return self.err1("Maximum number of arguments to bootstrap method (65535) exceeded", next_span); 166 | } 167 | bsargs.push(self.ref_or_tagged_const()?); 168 | } 169 | 170 | Ok(types::InlineBs(bsargs)) 171 | } 172 | 173 | pub fn bsref(&mut self) -> Result, Error> { 174 | let tok = self.next()?; 175 | match tok.0 { 176 | TokenType::BsRef => self.ref_from(tok.1), 177 | TokenType::Word => { 178 | let mh = Or::B(self.mhnotref(tok.1)?); 179 | Ok(Or::B(self.bs_args(mh)?)) 180 | } 181 | _ => self.err1("Expected methodhandle tag or bootstrap ref", tok.1), 182 | } 183 | } 184 | 185 | pub fn float_from(&mut self, span: Span<'a>) -> Result, Error> { 186 | let s = span.0.trim_end_matches('f'); 187 | Ok(InlineConst::Float( 188 | parse_literal::float(s).ok_or_else(|| self.error1("Invalid float literal", span))?, 189 | )) 190 | } 191 | pub fn double_from(&mut self, span: Span<'a>) -> Result, Error> { 192 | Ok(InlineConst::Double( 193 | parse_literal::double(span.0).ok_or_else(|| self.error1("Invalid double literal", span))?, 194 | )) 195 | } 196 | 197 | pub fn long_from(&mut self, span: Span<'a>) -> Result, Error> { 198 | let s = span.0.trim_end_matches('L'); 199 | let i = parse_literal::int::(s).ok_or_else(|| self.error1("Invalid long literal", span))?; 200 | Ok(InlineConst::Long(i as u64)) 201 | } 202 | pub fn int_from(&mut self, span: Span<'a>) -> Result, Error> { 203 | let i = parse_literal::int::(span.0).ok_or_else(|| self.error1("Invalid integer literal", span))?; 204 | Ok(InlineConst::Int(i as u32)) 205 | } 206 | 207 | pub fn tagged_const_from(&mut self, span: Span<'a>) -> Result, Error> { 208 | use InlineConst::*; 209 | 210 | Ok(match span.0 { 211 | "Utf8" => { 212 | let tok = self.next()?; 213 | Utf8(match self.utf_from(tok)? { 214 | Or::A(_) => return self.err1("Expected identifier or string, not ref", span), 215 | Or::B(b) => b.v.0, 216 | }) 217 | } 218 | "Int" => Int(self.i32()? as u32), 219 | "Float" => { 220 | let span = self.assert_type(TokenType::FloatLit)?; 221 | self.float_from(span)? 222 | } 223 | "Long" => { 224 | let span = self.assert_type(TokenType::LongLit)?; 225 | self.long_from(span)? 226 | } 227 | "Double" => { 228 | let span = self.assert_type(TokenType::DoubleLit)?; 229 | self.double_from(span)? 230 | } 231 | 232 | "Class" => Class(ns(self.utf()?)), 233 | "String" => Str(ns(self.utf()?)), 234 | "MethodType" => MethodType(ns(self.utf()?)), 235 | "Module" => Module(ns(self.utf()?)), 236 | "Package" => Package(ns(self.utf()?)), 237 | 238 | "Field" => Field(ns(self.cls()?), ns(self.nat()?)), 239 | "Method" => Method(ns(self.cls()?), ns(self.nat()?)), 240 | "InterfaceMethod" => InterfaceMethod(ns(self.cls()?), ns(self.nat()?)), 241 | 242 | "NameAndType" => NameAndType(ns(self.utf()?), ns(self.utf()?)), 243 | "MethodHandle" => { 244 | let tag_span = self.assert_type(TokenType::Word)?; 245 | self.mhnotref(tag_span)?.v 246 | } 247 | 248 | "Dynamic" => Dynamic(self.bsref()?, ns(self.nat()?)), 249 | "InvokeDynamic" => InvokeDynamic(self.bsref()?, ns(self.nat()?)), 250 | 251 | _ => return self.err1("Unrecognized constant tag", span), 252 | }) 253 | } 254 | 255 | pub fn ref_or_tagged_const(&mut self) -> Result, Error> { 256 | use TokenType::*; 257 | let tok = self.next()?; 258 | match tok.0 { 259 | Ref => self.ref_from(tok.1), 260 | Word => Ok(Or::B(tok.1.of(self.tagged_const_from(tok.1)?))), 261 | _ => self.err1("Expected constant pool tag (Utf8, Int, String, NameAndType, etc.) or reference", tok.1), 262 | } 263 | } 264 | 265 | pub fn ref_or_tagged_bootstrap(&mut self) -> Result, Error> { 266 | use TokenType::*; 267 | let tok = self.next()?; 268 | 269 | match tok.0 { 270 | BsRef => self.ref_from(tok.1), 271 | Word => { 272 | let tag_span = tok.1; 273 | if tag_span.0 != "Bootstrap" { 274 | return self.err1("Expected 'Bootstrap' or bootstrap reference", tok.1); 275 | } 276 | 277 | let tok = self.next()?; 278 | let mh = match tok.0 { 279 | Ref => self.ref_from(tok.1)?, 280 | Word => Or::B(self.mhnotref(tok.1)?), 281 | _ => return self.err1("Expected methodhandle tag or ref", tok.1), 282 | }; 283 | Ok(Or::B(tag_span.of(self.bs_args(mh)?))) 284 | } 285 | _ => self.err1("Expected 'Bootstrap' or bootstrap reference", tok.1), 286 | } 287 | } 288 | 289 | pub fn ldc_rhs(&mut self) -> Result, Error> { 290 | use TokenType::*; 291 | 292 | let tok = self.next()?; 293 | match tok.0 { 294 | IntLit => Ok(Or::B(tok.1.of(self.int_from(tok.1)?))), 295 | FloatLit => Ok(Or::B(tok.1.of(self.float_from(tok.1)?))), 296 | LongLit => Ok(Or::B(tok.1.of(self.long_from(tok.1)?))), 297 | DoubleLit => Ok(Or::B(tok.1.of(self.double_from(tok.1)?))), 298 | StringLit => Ok(Or::B(tok.1.of(types::InlineConst::Str(ns(self.utf_from(tok)?))))), 299 | 300 | Ref => self.ref_from(tok.1), 301 | Word => Ok(Or::B(tok.1.of(self.tagged_const_from(tok.1)?))), 302 | _ => self.err1("Expected constant pool tag (Utf8, Int, String, NameAndType, etc.) or reference", tok.1), 303 | } 304 | } 305 | 306 | pub fn static_utf(name: &'static str, span: Span<'a>) -> types::SymSpanUtf8<'a> { 307 | Or::B(span.of(types::InlineUtf8(BStr(name.as_bytes())))) 308 | } 309 | 310 | pub fn flags(&mut self) -> Result { 311 | let mut flags = FlagList::new(); 312 | while self.has_type(TokenType::Word) { 313 | if flags.push(self.peek()?.1).is_ok() { 314 | self.next()?; 315 | } else { 316 | break; 317 | } 318 | } 319 | Ok(flags.flush()) 320 | } 321 | 322 | ///////////////////////////////////////////////////////////////////////////////////// 323 | pub fn lbl(&mut self) -> Result, Error> { 324 | let tok = self.next()?; 325 | if tok.0 != TokenType::Word || !tok.1 .0.starts_with('L') { 326 | self.err1("Expected label", tok.1) 327 | } else { 328 | Ok(tok.1) 329 | } 330 | } 331 | 332 | pub fn lbl_to_pos(&self, span: Span<'a>) -> Result, Error> { 333 | self.labels 334 | .get(span.0) 335 | .map(|p| span.of(*p)) 336 | .ok_or_else(|| self.error1("Undefined label", span)) 337 | } 338 | 339 | pub fn lblpos(&mut self) -> Result, Error> { 340 | let span = self.lbl()?; 341 | self.labels 342 | .get(span.0) 343 | .map(|p| span.of(*p)) 344 | .ok_or_else(|| self.error1("Undefined label", span)) 345 | } 346 | 347 | fn convert_pos + Display>(&self, span: Span<'a>, v: i64, min: T, max: T) -> Result { 348 | v.try_into() 349 | .map_err(|_| self.error1str(format!("Bytecode offset must be {} <= x <= {}, found {}", min, max, v), span)) 350 | } 351 | 352 | pub fn pos_to_u16(&self, pos: Spanned<'a, Pos>) -> Result { 353 | self.convert_pos(pos.span, pos.v.0 as i64, u16::MIN, u16::MAX) 354 | } 355 | 356 | pub fn pos_diff_to_u16(&self, base: Pos, pos: Spanned<'a, Pos>) -> Result { 357 | self.convert_pos(pos.span, pos.v.0 as i64 - base.0 as i64, u16::MIN, u16::MAX) 358 | } 359 | pub fn pos_diff_to_i16(&self, base: Pos, pos: Spanned<'a, Pos>) -> Result { 360 | self.convert_pos(pos.span, pos.v.0 as i64 - base.0 as i64, i16::MIN, i16::MAX) 361 | } 362 | pub fn pos_diff_to_i32(&self, base: Pos, pos: Spanned<'a, Pos>) -> Result { 363 | self.convert_pos(pos.span, pos.v.0 as i64 - base.0 as i64, i32::MIN, i32::MAX) 364 | } 365 | 366 | pub fn lbl16(&mut self) -> Result { 367 | let pos = self.lblpos()?; 368 | self.pos_to_u16(pos) 369 | } 370 | } 371 | -------------------------------------------------------------------------------- /src/lib/assemble/cpool/builder.rs: -------------------------------------------------------------------------------- 1 | use std::collections::HashMap; 2 | 3 | use super::raw_const::RawBsMeth; 4 | use super::raw_const::RawConst; 5 | use super::types::*; 6 | use crate::lib::assemble::span::Error; 7 | use crate::lib::assemble::span::ErrorMaker; 8 | use crate::lib::assemble::span::Span; 9 | use crate::lib::assemble::span::Spanned; 10 | use crate::lib::assemble::writer::BufWriter; 11 | use crate::lib::util::BStr; 12 | 13 | #[derive(Debug)] 14 | struct Range { 15 | first: u16, 16 | last: u16, 17 | } 18 | impl Range { 19 | fn len(&self) -> usize { 20 | (1 + self.last as usize) - self.first as usize 21 | } 22 | } 23 | 24 | struct ConstSlotAllocator { 25 | ranges: Vec, 26 | // indexes into the ranges array 27 | odd_ptr: usize, 28 | wide_ptr: usize, 29 | ptr: usize, 30 | } 31 | impl ConstSlotAllocator { 32 | fn new(ranges: Vec) -> Self { 33 | Self { 34 | ranges, 35 | odd_ptr: 0, 36 | wide_ptr: 0, 37 | ptr: 0, 38 | } 39 | } 40 | 41 | fn alloc(&mut self, is_ldc: bool) -> Option { 42 | while let Some(r) = self.ranges.get_mut(self.odd_ptr) { 43 | if r.len() % 2 == 0 { 44 | self.odd_ptr += 1; 45 | } else if is_ldc && r.first > 255 { 46 | break; 47 | } else { 48 | self.odd_ptr += 1; 49 | let chosen = r.first; 50 | r.first += 1; 51 | return Some(chosen); 52 | } 53 | } 54 | 55 | while let Some(r) = self.ranges.get_mut(self.ptr) { 56 | if is_ldc && r.first > 255 { 57 | return None; 58 | } 59 | 60 | if r.len() >= 1 { 61 | let chosen = r.first; 62 | r.first += 1; 63 | return Some(chosen); 64 | } else { 65 | self.ptr += 1; 66 | } 67 | } 68 | 69 | None 70 | } 71 | 72 | fn alloc_wide(&mut self) -> Option { 73 | while let Some(r) = self.ranges.get_mut(self.wide_ptr) { 74 | if r.len() >= 2 { 75 | let chosen = r.first; 76 | r.first += 2; 77 | return Some(chosen); 78 | } else { 79 | self.wide_ptr += 1; 80 | } 81 | } 82 | 83 | None 84 | } 85 | } 86 | 87 | pub enum BsAttrNameNeeded { 88 | Always, 89 | IfPresent, 90 | Never, 91 | } 92 | 93 | #[derive(Debug)] 94 | pub struct BsAttrInfo { 95 | pub buf: Vec, 96 | pub num_bs: u16, 97 | pub name: Option, 98 | } 99 | impl BsAttrInfo { 100 | pub fn data_len(&self) -> Option { 101 | (2 + self.buf.len() as u64).try_into().ok() 102 | } 103 | } 104 | 105 | pub struct PoolBuilder<'a> { 106 | error_maker: ErrorMaker<'a>, 107 | 108 | pending: Vec<(u16, Spanned<'a, RawConstInline<'a>>)>, 109 | allocated: HashMap, u16>, 110 | allocator: ConstSlotAllocator, 111 | raw_bs_defs: Vec<(u16, Spanned<'a, RawBsInline<'a>>)>, 112 | } 113 | impl<'a> PoolBuilder<'a> { 114 | pub fn new( 115 | error_maker: ErrorMaker<'a>, 116 | mut raw_defs: Vec<(u16, Spanned<'a, RawConstInline<'a>>)>, 117 | mut raw_bs_defs: Vec<(u16, Spanned<'a, RawBsInline<'a>>)>, 118 | ) -> Self { 119 | raw_defs.sort_unstable_by_key(|p| p.0); 120 | raw_bs_defs.sort_unstable_by_key(|p| !p.0); // use bitwise not to reverse sort order 121 | 122 | let mut ranges = Vec::new(); 123 | let mut first = 1; 124 | for &(ind, ref c) in &raw_defs { 125 | if ind > first { 126 | ranges.push(Range { first, last: ind - 1 }); 127 | } 128 | first = ind + if c.v.is_long() { 2 } else { 1 }; 129 | } 130 | if first <= 65534 { 131 | ranges.push(Range { first, last: 65534 }); 132 | } 133 | 134 | Self { 135 | error_maker, 136 | pending: raw_defs, 137 | allocated: HashMap::new(), 138 | allocator: ConstSlotAllocator::new(ranges), 139 | raw_bs_defs, 140 | } 141 | } 142 | 143 | pub fn allocate(&mut self, span: Span<'a>, c: RawConstInline<'a>, is_ldc: bool) -> Result { 144 | // println!("allocate {} {:?} {}", span.0, c, is_ldc); 145 | self.allocated 146 | .get(&c) 147 | .copied() 148 | .or_else(|| { 149 | let slot = if c.is_long() { 150 | self.allocator.alloc_wide() 151 | } else { 152 | self.allocator.alloc(is_ldc) 153 | }; 154 | if let Some(ind) = slot { 155 | self.allocated.insert(c.clone(), ind); 156 | self.pending.push((ind, span.of(c))); 157 | } 158 | slot 159 | }) 160 | .ok_or_else(|| { 161 | self.error_maker 162 | .error1("Exceeded maximum 65534 constants per class in constant pool", span) 163 | }) 164 | } 165 | 166 | fn fix(&mut self, span: Span<'a>, r: Or>) -> Result { 167 | match r { 168 | Or::A(ind) => Ok(ind), 169 | Or::B(u) => self.allocate(span, u.to_const(), false), 170 | } 171 | } 172 | 173 | fn fix2(&mut self, r: Or>>) -> Result { 174 | match r { 175 | Or::A(ind) => Ok(ind), 176 | Or::B(u) => self.allocate(u.span, u.v.to_const(), false), 177 | } 178 | } 179 | 180 | fn fix_bs(&mut self, span: Span<'a>, r: RawBsRef<'a>, bs_table: &mut BootstrapBuilder) -> Result { 181 | match r { 182 | Or::A(ind) => Ok(ind), 183 | Or::B(bs) => { 184 | let bs = self.resolve_bs_sub_refs(span.of(bs))?; 185 | bs_table.allocate(bs).ok_or_else(|| { 186 | self.error_maker 187 | .error1("Exceeded maximum 65535 bootstrap methods per class", span) 188 | }) 189 | } 190 | } 191 | } 192 | 193 | fn resolve_cp_sub_refs( 194 | &mut self, 195 | c: Spanned<'a, RawConstInline<'a>>, 196 | bs_table: &mut BootstrapBuilder, 197 | ) -> Result, Error> { 198 | let span = c.span; 199 | Ok(match c.v { 200 | InlineConst::Utf8(v) => RawConst::Utf8(v.0), 201 | 202 | InlineConst::Int(v) => RawConst::Int(v), 203 | InlineConst::Float(v) => RawConst::Float(v), 204 | InlineConst::Long(v) => RawConst::Long(v), 205 | InlineConst::Double(v) => RawConst::Double(v), 206 | InlineConst::Class(v) => RawConst::Class(self.fix(span, v)?), 207 | InlineConst::Str(v) => RawConst::Str(self.fix(span, v)?), 208 | InlineConst::Field(cls, nat) => RawConst::Field(self.fix(span, cls)?, self.fix(span, nat)?), 209 | InlineConst::Method(cls, nat) => RawConst::Method(self.fix(span, cls)?, self.fix(span, nat)?), 210 | InlineConst::InterfaceMethod(cls, nat) => RawConst::InterfaceMethod(self.fix(span, cls)?, self.fix(span, nat)?), 211 | InlineConst::NameAndType(u1, u2) => RawConst::NameAndType(self.fix(span, u1)?, self.fix(span, u2)?), 212 | 213 | InlineConst::MethodHandle(tag, val) => RawConst::MethodHandle(tag, self.fix(span, *val)?), 214 | InlineConst::MethodType(v) => RawConst::MethodType(self.fix(span, v)?), 215 | InlineConst::Dynamic(bs, nat) => RawConst::Dynamic(self.fix_bs(span, bs, bs_table)?, self.fix(span, nat)?), 216 | InlineConst::InvokeDynamic(bs, nat) => { 217 | RawConst::InvokeDynamic(self.fix_bs(span, bs, bs_table)?, self.fix(span, nat)?) 218 | } 219 | InlineConst::Module(v) => RawConst::Module(self.fix(span, v)?), 220 | InlineConst::Package(v) => RawConst::Package(self.fix(span, v)?), 221 | }) 222 | } 223 | 224 | fn resolve_bs_sub_refs(&mut self, bs: Spanned<'a, RawBsInline<'a>>) -> Result { 225 | let mut iter = bs.v.0.into_iter(); 226 | let mh = iter.next().unwrap(); 227 | let mhref = self.fix2(mh)?; 228 | let args = iter.map(|c| self.fix2(c)).collect::>()?; 229 | Ok(RawBsMeth { mhref, args }) 230 | } 231 | 232 | pub fn build( 233 | mut self, 234 | cpwriter: &mut BufWriter, 235 | bs_name_needed: BsAttrNameNeeded, 236 | class_name_ind: u16, 237 | ) -> Result<(BsAttrInfo, Option<&'a [u8]>), Error> { 238 | let raw_bs_defs = std::mem::take(&mut self.raw_bs_defs); 239 | let resolved_bs_defs = raw_bs_defs 240 | .into_iter() 241 | .map(|(ind, bs)| Ok((ind, self.resolve_bs_sub_refs(bs)?))) 242 | .collect::>()?; 243 | let mut bs_table = BootstrapBuilder::new(resolved_bs_defs); 244 | 245 | // Just choose a span arbitrarily to use for the case where we can't allocate 246 | // a name for an implicit BootstrapMethods attribute later. 247 | let bs_name_span = self.pending.last().map(|(_, c)| c.span); 248 | 249 | let filler_const = RawConst::Utf8(b""); 250 | let mut table = [Some(filler_const); 65535]; 251 | table[0] = None; 252 | 253 | while let Some((ind, c)) = self.pending.pop() { 254 | let is_long = c.v.is_long(); 255 | assert!(table[ind as usize] == Some(filler_const)); 256 | table[ind as usize] = Some(self.resolve_cp_sub_refs(c, &mut bs_table)?); 257 | if is_long { 258 | assert!(table[ind as usize + 1] == Some(filler_const)); 259 | table[ind as usize + 1] = None; 260 | } 261 | } 262 | 263 | let (buf, num_bs) = bs_table.finish(); 264 | let name_needed = match bs_name_needed { 265 | BsAttrNameNeeded::Always => true, 266 | BsAttrNameNeeded::IfPresent => num_bs > 0, 267 | BsAttrNameNeeded::Never => false, 268 | }; 269 | 270 | let name = if name_needed { 271 | let s = b"BootstrapMethods"; 272 | let c = InlineConst::Utf8(BStr(s)); 273 | let slot = self.allocate(bs_name_span.unwrap(), c, false)?; 274 | table[slot as usize] = Some(RawConst::Utf8(s)); 275 | Some(slot) 276 | } else { 277 | None 278 | }; 279 | let bs_info = BsAttrInfo { buf, num_bs, name }; 280 | 281 | let num_consts = if let Some(range) = self.allocator.ranges.last() { 282 | // todo - test this 283 | if range.last == 65534 { 284 | range.first 285 | } else { 286 | 65535 287 | } 288 | } else { 289 | 65535 290 | }; 291 | 292 | let class_name = match table[class_name_ind as usize] { 293 | Some(RawConst::Class(utf_ind)) => match table[utf_ind as usize] { 294 | Some(RawConst::Utf8(s)) => Some(s), 295 | _ => None, 296 | }, 297 | _ => None, 298 | }; 299 | 300 | let w = cpwriter; 301 | w.u16(num_consts); 302 | for c in &table[1..num_consts as usize] { 303 | // for (i, c) in table[..num_consts as usize].into_iter().enumerate() { 304 | // println!("[{}] {:?}", i, c); 305 | if let Some(c) = c { 306 | c.write(w); 307 | } 308 | } 309 | 310 | Ok((bs_info, class_name)) 311 | } 312 | } 313 | 314 | struct BootstrapBuilder { 315 | w: BufWriter, 316 | 317 | pending_bs_defs: Vec<(u16, RawBsMeth)>, 318 | next_bs_ind: u16, 319 | allocated: HashMap, 320 | } 321 | impl BootstrapBuilder { 322 | fn new(pending_bs_defs: Vec<(u16, RawBsMeth)>) -> Self { 323 | Self { 324 | w: BufWriter::default(), 325 | pending_bs_defs, 326 | next_bs_ind: 0, 327 | allocated: HashMap::new(), 328 | } 329 | } 330 | 331 | fn allocate(&mut self, bs: RawBsMeth) -> Option { 332 | self.allocated.get(&bs).copied().or_else(|| { 333 | while self.pending_bs_defs.last().map(|&(ind, _)| ind) == Some(self.next_bs_ind) { 334 | let (_, bs) = self.pending_bs_defs.pop().unwrap(); 335 | bs.write(&mut self.w); 336 | assert!(self.next_bs_ind < u16::MAX); 337 | self.next_bs_ind += 1; 338 | } 339 | 340 | if self.next_bs_ind == u16::MAX { 341 | return None; 342 | } 343 | let slot = self.next_bs_ind; 344 | bs.write(&mut self.w); 345 | self.next_bs_ind += 1; 346 | self.allocated.insert(bs, slot); 347 | 348 | Some(slot) 349 | }) 350 | } 351 | 352 | fn finish(mut self) -> (Vec, u16) { 353 | while let Some((ind, bs)) = self.pending_bs_defs.pop() { 354 | assert!(ind >= self.next_bs_ind); 355 | while self.next_bs_ind <= ind { 356 | bs.write(&mut self.w); 357 | self.next_bs_ind += 1; 358 | } 359 | } 360 | 361 | (self.w.into_buf(), self.next_bs_ind) 362 | } 363 | } 364 | -------------------------------------------------------------------------------- /src/lib/assemble/cpool/mod.rs: -------------------------------------------------------------------------------- 1 | mod builder; 2 | mod raw_const; 3 | mod sym_ref_resolver; 4 | pub mod types; 5 | use std::collections::HashMap; 6 | 7 | use crate::lib::assemble::span::Error; 8 | use crate::lib::assemble::span::ErrorMaker; 9 | use crate::lib::assemble::span::Span; 10 | use crate::lib::assemble::span::Spanned; 11 | pub use builder::BsAttrNameNeeded; 12 | use builder::PoolBuilder; 13 | use sym_ref_resolver::PoolSymDefs; 14 | pub use types::*; 15 | 16 | pub type RefOr<'a, T> = Or, T>; 17 | 18 | pub struct Pool<'a> { 19 | sym_defs: PoolSymDefs<'a>, 20 | raw_defs: HashMap, Option>)>, 21 | bs_raw_defs: HashMap, SymSpanBsInline<'a>)>, 22 | } 23 | // Real instance methods 24 | impl<'a> Pool<'a> { 25 | pub fn new(error_maker: ErrorMaker<'a>) -> Self { 26 | Self { 27 | sym_defs: PoolSymDefs::new(error_maker), 28 | raw_defs: HashMap::new(), 29 | bs_raw_defs: HashMap::new(), 30 | } 31 | } 32 | 33 | pub fn add_sym_def(&mut self, name: Spanned<'a, &'a str>, r: SymConstRef<'a>) -> Result<(), Error> { 34 | self.sym_defs.add_def(name, r) 35 | } 36 | 37 | pub fn add_bs_sym_def(&mut self, name: Spanned<'a, &'a str>, r: SymBsRef<'a>) -> Result<(), Error> { 38 | self.sym_defs.add_bs_def(name, r) 39 | } 40 | 41 | pub fn add_raw_def(&mut self, ind: u16, new_span: Span<'a>, r: SymSpanConstInline<'a>) -> Result<(), Error> { 42 | let is_long = r.v.is_long(); 43 | 44 | if ind == 0 || ind == 0xFFFF || (is_long && ind == 0xFFFE) { 45 | return self.err1("Invalid constant pool index", new_span); 46 | } 47 | 48 | if is_long { 49 | if let Some((old_span, _old)) = self.raw_defs.insert(ind + 1, (new_span, None)) { 50 | return self.err2( 51 | "Conflicting raw const definition", 52 | old_span, 53 | "Note: Conflicts with wide const definition here", 54 | new_span, 55 | ); 56 | } 57 | } 58 | 59 | if let Some((old_span, old)) = self.raw_defs.insert(ind, (new_span, Some(r))) { 60 | if old.is_some() { 61 | self.err2("Duplicate raw const definition", new_span, "Note: Previously defined here", old_span) 62 | } else { 63 | self.err2( 64 | "Conflicting raw const definition", 65 | new_span, 66 | "Note: Conflicts with wide const definition here", 67 | old_span, 68 | ) 69 | } 70 | } else { 71 | Ok(()) 72 | } 73 | } 74 | 75 | pub fn add_bs_raw_def(&mut self, ind: u16, new_span: Span<'a>, r: SymSpanBsInline<'a>) -> Result<(), Error> { 76 | if ind == 0xFFFF { 77 | return self.err1("Bootstrap method index must be <= 65534.", new_span); 78 | } 79 | 80 | if let Some((old_span, _)) = self.bs_raw_defs.insert(ind, (new_span, r)) { 81 | self.err2("Duplicate raw bootstrap definition", new_span, "Note: Previously defined here", old_span) 82 | } else { 83 | Ok(()) 84 | } 85 | } 86 | 87 | pub fn finish_defs(mut self) -> Result, Error> { 88 | let error_maker = *self; 89 | let raw_defs = self 90 | .raw_defs 91 | .into_iter() 92 | .filter_map(|(ind, (_span, slot))| slot.map(|c| Ok((ind, self.sym_defs.resolve_const2(c)?)))) 93 | .collect::>()?; 94 | let bs_raw_defs = self 95 | .bs_raw_defs 96 | .into_iter() 97 | .map(|(ind, (_span, bs))| Ok((ind, self.sym_defs.resolve_bsmeth2(bs)?))) 98 | .collect::>()?; 99 | 100 | Ok(PoolResolver { 101 | sym_defs: self.sym_defs, 102 | builder: PoolBuilder::new(error_maker, raw_defs, bs_raw_defs), 103 | }) 104 | } 105 | } 106 | impl<'a> std::ops::Deref for Pool<'a> { 107 | type Target = ErrorMaker<'a>; 108 | 109 | fn deref(&self) -> &Self::Target { 110 | &self.sym_defs 111 | } 112 | } 113 | 114 | pub struct PoolResolver<'a> { 115 | sym_defs: PoolSymDefs<'a>, 116 | builder: PoolBuilder<'a>, 117 | } 118 | impl<'a> PoolResolver<'a> { 119 | pub fn resolve(&mut self, c: SymSpanConst<'a>) -> Result { 120 | let c = self.sym_defs.resolve_ref2(c)?; 121 | match c { 122 | Or::A(ind) => Ok(ind), 123 | Or::B(c) => self.builder.allocate(c.span, c.v, false), 124 | } 125 | } 126 | 127 | pub fn resolve_ldc(&mut self, c: SymSpanConst<'a>, span: Span<'a>) -> Result { 128 | let c = self.sym_defs.resolve_ref2(c)?; 129 | let ind = match c { 130 | Or::A(ind) => Ok(ind), 131 | Or::B(c) => self.builder.allocate(c.span, c.v, true), 132 | }?; 133 | 134 | ind.try_into().map_err(|_| { 135 | self.sym_defs 136 | .error1("ldc constant index must be <= 255. Try using ldc_w instead.", span) 137 | }) 138 | } 139 | 140 | pub fn end(self) -> PoolBuilder<'a> { 141 | self.builder 142 | } 143 | } 144 | -------------------------------------------------------------------------------- /src/lib/assemble/cpool/raw_const.rs: -------------------------------------------------------------------------------- 1 | use crate::lib::assemble::writer::BufWriter; 2 | 3 | #[derive(Debug, Clone, Copy, PartialEq, Eq)] 4 | pub(super) enum RawConst<'a> { 5 | Utf8(&'a [u8]), 6 | 7 | Int(u32), 8 | Float(u32), 9 | Long(u64), 10 | Double(u64), 11 | Class(u16), 12 | Str(u16), 13 | Field(u16, u16), 14 | Method(u16, u16), 15 | InterfaceMethod(u16, u16), 16 | NameAndType(u16, u16), 17 | 18 | MethodHandle(u8, u16), 19 | MethodType(u16), 20 | Dynamic(u16, u16), 21 | InvokeDynamic(u16, u16), 22 | Module(u16), 23 | Package(u16), 24 | } 25 | impl RawConst<'_> { 26 | pub(super) fn write(self, w: &mut BufWriter) { 27 | use RawConst::*; 28 | match self { 29 | Utf8(s) => { 30 | w.u8(1); 31 | w.u16(s.len().try_into().unwrap()); 32 | w.write(s); 33 | } 34 | Int(v) => { 35 | w.u8(3); 36 | w.u32(v); 37 | } 38 | Float(v) => { 39 | w.u8(4); 40 | w.u32(v); 41 | } 42 | Long(v) => { 43 | w.u8(5); 44 | w.u64(v); 45 | } 46 | Double(v) => { 47 | w.u8(6); 48 | w.u64(v); 49 | } 50 | Class(v) => { 51 | w.u8(7); 52 | w.u16(v); 53 | } 54 | Str(v) => { 55 | w.u8(8); 56 | w.u16(v); 57 | } 58 | Field(cls, nat) => { 59 | w.u8(9); 60 | w.u16(cls); 61 | w.u16(nat); 62 | } 63 | Method(cls, nat) => { 64 | w.u8(10); 65 | w.u16(cls); 66 | w.u16(nat); 67 | } 68 | InterfaceMethod(cls, nat) => { 69 | w.u8(11); 70 | w.u16(cls); 71 | w.u16(nat); 72 | } 73 | NameAndType(n, t) => { 74 | w.u8(12); 75 | w.u16(n); 76 | w.u16(t); 77 | } 78 | MethodHandle(tag, val) => { 79 | w.u8(15); 80 | w.u8(tag); 81 | w.u16(val); 82 | } 83 | MethodType(v) => { 84 | w.u8(16); 85 | w.u16(v); 86 | } 87 | Dynamic(bs, nat) => { 88 | w.u8(17); 89 | w.u16(bs); 90 | w.u16(nat); 91 | } 92 | InvokeDynamic(bs, nat) => { 93 | w.u8(18); 94 | w.u16(bs); 95 | w.u16(nat); 96 | } 97 | Module(v) => { 98 | w.u8(19); 99 | w.u16(v); 100 | } 101 | Package(v) => { 102 | w.u8(20); 103 | w.u16(v); 104 | } 105 | } 106 | } 107 | } 108 | 109 | #[derive(Debug, Hash, PartialEq, Eq)] 110 | pub(super) struct RawBsMeth { 111 | pub(super) mhref: u16, 112 | pub(super) args: Vec, 113 | } 114 | impl RawBsMeth { 115 | pub(super) fn write(&self, w: &mut BufWriter) { 116 | w.u16(self.mhref); 117 | w.u16(self.args.len().try_into().unwrap()); 118 | for v in self.args.iter().copied() { 119 | w.u16(v); 120 | } 121 | } 122 | } 123 | -------------------------------------------------------------------------------- /src/lib/assemble/cpool/sym_ref_resolver.rs: -------------------------------------------------------------------------------- 1 | use std::collections::HashMap; 2 | 3 | use super::types::*; 4 | use crate::lib::assemble::span::Error; 5 | use crate::lib::assemble::span::ErrorMaker; 6 | use crate::lib::assemble::span::Span; 7 | use crate::lib::assemble::span::Spanned; 8 | 9 | enum ResolveState { 10 | Unresolved(Option), 11 | Resolved(Resolved), 12 | } 13 | 14 | pub struct PoolSymDefs<'a> { 15 | error_maker: ErrorMaker<'a>, 16 | 17 | sym_defs: HashMap<&'a str, (Span<'a>, ResolveState, RawConstRef<'a>>)>, 18 | bs_sym_defs: HashMap<&'a str, (Span<'a>, ResolveState, RawBsRef<'a>>)>, 19 | } 20 | impl<'a> PoolSymDefs<'a> { 21 | pub fn new(error_maker: ErrorMaker<'a>) -> Self { 22 | Self { 23 | error_maker, 24 | sym_defs: HashMap::new(), 25 | bs_sym_defs: HashMap::new(), 26 | } 27 | } 28 | 29 | fn add_def_generic( 30 | &mut self, 31 | name: Spanned<'a, &'a str>, 32 | r: Before, 33 | get_map: fn(&mut Self) -> &mut HashMap<&'a str, (Span<'a>, ResolveState)>, 34 | ) -> Result<(), Error> { 35 | let new_val = (name.span, ResolveState::Unresolved(Some(r))); 36 | if let Some((prev_span, _)) = get_map(self).insert(name.v, new_val) { 37 | self.err2( 38 | "Error: Duplicate definition of symbolic ref", 39 | name.span, 40 | "Note: Previous definition was here", 41 | prev_span, 42 | ) 43 | } else { 44 | Ok(()) 45 | } 46 | } 47 | 48 | pub fn add_def(&mut self, name: Spanned<'a, &'a str>, r: SymConstRef<'a>) -> Result<(), Error> { 49 | self.add_def_generic(name, r, |this| &mut this.sym_defs) 50 | } 51 | 52 | pub fn add_bs_def(&mut self, name: Spanned<'a, &'a str>, r: SymBsRef<'a>) -> Result<(), Error> { 53 | self.add_def_generic(name, r, |this| &mut this.bs_sym_defs) 54 | } 55 | 56 | fn resolve_sym_generic( 57 | &mut self, 58 | name: Spanned<'_, &str>, 59 | get_map: fn(&mut Self) -> &mut HashMap<&'a str, (Span<'a>, ResolveState)>, 60 | resolve: fn(&mut Self, Before) -> Result, 61 | ) -> Result { 62 | if let Some((_, v)) = get_map(self).get_mut(name.v) { 63 | use ResolveState::*; 64 | let to_resolve = match v { 65 | Unresolved(r) => r.take(), 66 | Resolved(r) => return Ok(r.clone()), 67 | }; 68 | 69 | if let Some(r) = to_resolve { 70 | let r = resolve(self, r)?; 71 | get_map(self).get_mut(name.v).unwrap().1 = Resolved(r.clone()); 72 | Ok(r) 73 | } else { 74 | self.err1("Circular definition of symbolic reference", name.span) 75 | } 76 | } else { 77 | self.err1("Undefined symbolic reference", name.span) 78 | } 79 | } 80 | 81 | fn resolve_sym(&mut self, name: Spanned<'_, &str>) -> Result, Error> { 82 | self.resolve_sym_generic(name, |this| &mut this.sym_defs, Self::resolve_ref) 83 | } 84 | 85 | fn resolve_bs_sym(&mut self, name: Spanned<'_, &str>) -> Result, Error> { 86 | self.resolve_sym_generic(name, |this| &mut this.bs_sym_defs, Self::resolve_bs_ref) 87 | } 88 | 89 | fn resolve_utf8(&mut self, r: SymUtf8Ref<'a>) -> Result, Error> { 90 | Ok(match r { 91 | Or::A(RefType::Raw(r)) => Or::A(r), 92 | Or::A(RefType::Sym(name)) => { 93 | let resolved = self.resolve_sym(name)?; 94 | match resolved { 95 | Or::A(r) => Or::A(r), 96 | Or::B(InlineConst::Utf8(sym)) => Or::B(InlineUtf8(sym)), 97 | _ => self.err1("Reference must resolve to raw or Utf8 reference.", name.span)?, 98 | } 99 | } 100 | Or::B(sym) => Or::B(sym), 101 | }) 102 | } 103 | 104 | fn resolve_class(&mut self, r: SymClassRef<'a>) -> Result, Error> { 105 | Ok(match r { 106 | Or::A(RefType::Raw(r)) => Or::A(r), 107 | Or::A(RefType::Sym(name)) => { 108 | let resolved = self.resolve_sym(name)?; 109 | match resolved { 110 | Or::A(r) => Or::A(r), 111 | Or::B(InlineConst::Class(r)) => Or::B(InlineClass(r)), 112 | _ => self.err1("Reference must resolve to raw or Class reference.", name.span)?, 113 | } 114 | } 115 | Or::B(InlineClass(u)) => Or::B(InlineClass(self.resolve_utf8(u)?)), 116 | }) 117 | } 118 | 119 | fn resolve_nat(&mut self, r: SymNatRef<'a>) -> Result, Error> { 120 | Ok(match r { 121 | Or::A(RefType::Raw(r)) => Or::A(r), 122 | Or::A(RefType::Sym(name)) => { 123 | let resolved = self.resolve_sym(name)?; 124 | match resolved { 125 | Or::A(r) => Or::A(r), 126 | Or::B(InlineConst::NameAndType(r1, r2)) => Or::B(InlineNat(r1, r2)), 127 | _ => self.err1("Reference must resolve to raw or NameAndType reference.", name.span)?, 128 | } 129 | } 130 | Or::B(InlineNat(r1, r2)) => Or::B(InlineNat(self.resolve_utf8(r1)?, self.resolve_utf8(r2)?)), 131 | }) 132 | } 133 | 134 | pub fn resolve_bsmeth(&mut self, bs: SymBsInline<'a>) -> Result, Error> { 135 | Ok(InlineBs(bs.0.into_iter().map(|r| self.resolve_ref2(r)).collect::>()?)) 136 | } 137 | 138 | fn resolve_bs_ref(&mut self, r: SymBsRef<'a>) -> Result, Error> { 139 | Ok(match r { 140 | Or::A(RefType::Raw(r)) => Or::A(r), 141 | Or::A(RefType::Sym(name)) => self.resolve_bs_sym(name)?, 142 | Or::B(bs) => Or::B(self.resolve_bsmeth(bs)?), 143 | }) 144 | } 145 | 146 | pub fn resolve_const(&mut self, c: SymConstInline<'a>) -> Result, Error> { 147 | use InlineConst::*; 148 | 149 | Ok(match c { 150 | Utf8(v) => Utf8(v), 151 | Int(v) => Int(v), 152 | Float(v) => Float(v), 153 | Long(v) => Long(v), 154 | Double(v) => Double(v), 155 | 156 | Class(v) => Class(self.resolve_utf8(v)?), 157 | Str(v) => Str(self.resolve_utf8(v)?), 158 | Field(clsr, natr) => Field(self.resolve_class(clsr)?, self.resolve_nat(natr)?), 159 | Method(clsr, natr) => Method(self.resolve_class(clsr)?, self.resolve_nat(natr)?), 160 | InterfaceMethod(clsr, natr) => InterfaceMethod(self.resolve_class(clsr)?, self.resolve_nat(natr)?), 161 | NameAndType(r1, r2) => NameAndType(self.resolve_utf8(r1)?, self.resolve_utf8(r2)?), 162 | MethodHandle(tag, r) => MethodHandle(tag, Box::new(self.resolve_ref(*r)?)), 163 | MethodType(v) => MethodType(self.resolve_utf8(v)?), 164 | Dynamic(dynr, natr) => Dynamic(self.resolve_bs_ref(dynr)?, self.resolve_nat(natr)?), 165 | InvokeDynamic(dynr, natr) => InvokeDynamic(self.resolve_bs_ref(dynr)?, self.resolve_nat(natr)?), 166 | Module(v) => Module(self.resolve_utf8(v)?), 167 | Package(v) => Package(self.resolve_utf8(v)?), 168 | }) 169 | } 170 | 171 | fn resolve_ref(&mut self, r: SymConstRef<'a>) -> Result, Error> { 172 | Ok(match r { 173 | Or::A(RefType::Raw(r)) => Or::A(r), 174 | Or::A(RefType::Sym(name)) => self.resolve_sym(name)?, 175 | Or::B(c) => Or::B(self.resolve_const(c)?), 176 | }) 177 | } 178 | 179 | // temp hack 180 | pub fn resolve_ref2(&mut self, r: SymSpanConst<'a>) -> Result, Error> { 181 | Ok(match r { 182 | Or::A(RefType::Raw(r)) => Or::A(r), 183 | Or::A(RefType::Sym(name)) => self.resolve_sym(name)?.map_b(|c| name.span.of(c)), 184 | Or::B(c) => Or::B(c.span.of(self.resolve_const(c.v)?)), 185 | }) 186 | } 187 | 188 | pub fn resolve_bsmeth2(&mut self, bs: SymSpanBsInline<'a>) -> Result, Error> { 189 | Ok(bs.span.of(self.resolve_bsmeth(bs.v)?)) 190 | } 191 | 192 | pub fn resolve_const2(&mut self, c: SymSpanConstInline<'a>) -> Result, Error> { 193 | Ok(c.span.of(self.resolve_const(c.v)?)) 194 | } 195 | } 196 | impl<'a> std::ops::Deref for PoolSymDefs<'a> { 197 | type Target = ErrorMaker<'a>; 198 | 199 | fn deref(&self) -> &Self::Target { 200 | &self.error_maker 201 | } 202 | } 203 | -------------------------------------------------------------------------------- /src/lib/assemble/cpool/types.rs: -------------------------------------------------------------------------------- 1 | use std::fmt::Debug; 2 | 3 | use crate::lib::assemble::span::Spanned; 4 | use crate::lib::util::BStr; 5 | 6 | #[derive(Hash, PartialEq, Eq, Clone, Copy)] 7 | pub enum Or { 8 | A(A), 9 | B(B), 10 | } 11 | impl Debug for Or { 12 | fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { 13 | match self { 14 | Or::A(v) => v.fmt(f), 15 | Or::B(v) => v.fmt(f), 16 | } 17 | } 18 | } 19 | impl Or { 20 | pub fn map_b(self, f: impl FnOnce(B) -> B2) -> Or { 21 | match self { 22 | Or::A(v) => Or::A(v), 23 | Or::B(v) => Or::B(f(v)), 24 | } 25 | } 26 | 27 | // pub fn try_map_b(self, f: impl FnOnce(B) -> Result) -> Result, E> { 28 | // Ok(match self { 29 | // Or::A(v) => Or::A(v), 30 | // Or::B(v) => Or::B(f(v)?), 31 | // }) 32 | // } 33 | 34 | pub fn as_b(&self) -> Option<&B> { 35 | if let Or::B(v) = self { 36 | Some(v) 37 | } else { 38 | None 39 | } 40 | } 41 | } 42 | 43 | pub type Utf8<'a, Ref> = Or>; 44 | #[derive(Debug, PartialEq, Eq, Hash, Clone)] 45 | pub struct InlineUtf8<'a>(pub BStr<'a>); 46 | 47 | pub type Class<'a, Ref> = Or>; 48 | #[derive(Debug, PartialEq, Eq, Hash, Clone)] 49 | pub struct InlineClass<'a, Ref>(pub Utf8<'a, Ref>); 50 | 51 | pub type Nat<'a, Ref> = Or>; 52 | #[derive(Debug, PartialEq, Eq, Hash, Clone)] 53 | pub struct InlineNat<'a, Ref>(pub Utf8<'a, Ref>, pub Utf8<'a, Ref>); 54 | 55 | pub type Bs<'a, Ref> = Or>; 56 | #[derive(Debug, PartialEq, Eq, Hash, Clone)] 57 | pub struct InlineBs<'a, Ref>(pub Vec>); 58 | 59 | pub type Const<'a, Ref> = Or>; 60 | pub type SpanConst<'a, Ref> = Or>>; 61 | 62 | #[derive(Debug, PartialEq, Eq, Hash, Clone)] 63 | pub enum InlineConst<'a, Ref> { 64 | Utf8(BStr<'a>), 65 | 66 | Int(u32), 67 | Float(u32), 68 | Long(u64), 69 | Double(u64), 70 | Class(Utf8<'a, Ref>), 71 | Str(Utf8<'a, Ref>), 72 | Field(Class<'a, Ref>, Nat<'a, Ref>), 73 | Method(Class<'a, Ref>, Nat<'a, Ref>), 74 | InterfaceMethod(Class<'a, Ref>, Nat<'a, Ref>), 75 | NameAndType(Utf8<'a, Ref>, Utf8<'a, Ref>), 76 | 77 | MethodHandle(u8, Box>), 78 | MethodType(Utf8<'a, Ref>), 79 | Dynamic(Bs<'a, Ref>, Nat<'a, Ref>), 80 | InvokeDynamic(Bs<'a, Ref>, Nat<'a, Ref>), 81 | Module(Utf8<'a, Ref>), 82 | Package(Utf8<'a, Ref>), 83 | } 84 | impl<'a, Ref> InlineConst<'a, Ref> { 85 | pub fn is_long(&self) -> bool { 86 | match self { 87 | InlineConst::Long(_) | InlineConst::Double(_) => true, 88 | _ => false, 89 | } 90 | } 91 | } 92 | 93 | pub trait ToConst<'a, Ref> { 94 | fn to_const(self) -> InlineConst<'a, Ref>; 95 | } 96 | impl<'a, Ref> ToConst<'a, Ref> for InlineUtf8<'a> { 97 | fn to_const(self) -> InlineConst<'a, Ref> { 98 | InlineConst::Utf8(self.0) 99 | } 100 | } 101 | impl<'a, Ref> ToConst<'a, Ref> for InlineClass<'a, Ref> { 102 | fn to_const(self) -> InlineConst<'a, Ref> { 103 | InlineConst::Class(self.0) 104 | } 105 | } 106 | impl<'a, Ref> ToConst<'a, Ref> for InlineNat<'a, Ref> { 107 | fn to_const(self) -> InlineConst<'a, Ref> { 108 | InlineConst::NameAndType(self.0, self.1) 109 | } 110 | } 111 | impl<'a, Ref> ToConst<'a, Ref> for InlineConst<'a, Ref> { 112 | fn to_const(self) -> InlineConst<'a, Ref> { 113 | self 114 | } 115 | } 116 | 117 | #[derive(Debug)] 118 | pub enum RefType<'a> { 119 | Raw(u16), 120 | Sym(Spanned<'a, &'a str>), 121 | } 122 | 123 | pub type SymUtf8Ref<'a> = Utf8<'a, RefType<'a>>; 124 | pub type SymSpanUtf8<'a> = Or, Spanned<'a, InlineUtf8<'a>>>; 125 | pub type RawUtf8Ref<'a> = Utf8<'a, u16>; 126 | 127 | pub type SymClassRef<'a> = Class<'a, RefType<'a>>; 128 | pub type SymSpanClass<'a> = Or, Spanned<'a, InlineClass<'a, RefType<'a>>>>; 129 | // pub type SymClassInline<'a> = InlineClass<'a, RefType<'a>>; 130 | pub type RawClassRef<'a> = Class<'a, u16>; 131 | // pub type RawClassInline<'a> = InlineClass<'a, u16>; 132 | 133 | pub type SymNatRef<'a> = Nat<'a, RefType<'a>>; 134 | pub type SymSpanNat<'a> = Or, Spanned<'a, InlineNat<'a, RefType<'a>>>>; 135 | // pub type SymNatInline<'a> = InlineNat<'a, RefType<'a>>; 136 | pub type RawNatRef<'a> = Nat<'a, u16>; 137 | // pub type RawNatInline<'a> = InlineNat<'a, u16>; 138 | 139 | pub type SymConstRef<'a> = Const<'a, RefType<'a>>; 140 | pub type SymSpanConst<'a> = Or, Spanned<'a, InlineConst<'a, RefType<'a>>>>; 141 | pub type SymConstInline<'a> = InlineConst<'a, RefType<'a>>; 142 | pub type SymSpanConstInline<'a> = Spanned<'a, InlineConst<'a, RefType<'a>>>; 143 | pub type RawConstRef<'a> = Const<'a, u16>; 144 | pub type RawSpanConst<'a> = Or>>; 145 | pub type RawConstInline<'a> = InlineConst<'a, u16>; 146 | pub type RawSpanConstInline<'a> = Spanned<'a, InlineConst<'a, u16>>; 147 | 148 | pub type SymBsRef<'a> = Bs<'a, RefType<'a>>; 149 | pub type SymSpanBs<'a> = Or, Spanned<'a, InlineBs<'a, RefType<'a>>>>; 150 | pub type SymBsInline<'a> = InlineBs<'a, RefType<'a>>; 151 | pub type SymSpanBsInline<'a> = Spanned<'a, InlineBs<'a, RefType<'a>>>; 152 | pub type RawBsRef<'a> = Bs<'a, u16>; 153 | pub type RawBsInline<'a> = InlineBs<'a, u16>; 154 | pub type RawSpanBsInline<'a> = Spanned<'a, InlineBs<'a, u16>>; 155 | -------------------------------------------------------------------------------- /src/lib/assemble/flags.rs: -------------------------------------------------------------------------------- 1 | // use std::collections::HashMap; 2 | 3 | use super::span::Span; 4 | 5 | const FLAG_PAIRS: [(&str, u16); 24] = [ 6 | ("abstract", 0x0400), 7 | ("annotation", 0x2000), 8 | ("bridge", 0x0040), 9 | ("enum", 0x4000), 10 | ("final", 0x0010), 11 | ("interface", 0x0200), 12 | ("mandated", 0x8000), 13 | ("module", 0x8000), 14 | ("native", 0x0100), 15 | ("open", 0x0020), 16 | ("private", 0x0002), 17 | ("protected", 0x0004), 18 | ("public", 0x0001), 19 | ("static", 0x0008), 20 | ("static_phase", 0x0040), 21 | ("strict", 0x0800), 22 | ("strictfp", 0x0800), 23 | ("super", 0x0020), 24 | ("synchronized", 0x0020), 25 | ("synthetic", 0x1000), 26 | ("transient", 0x0080), 27 | ("transitive", 0x0020), 28 | ("varargs", 0x0080), 29 | ("volatile", 0x0040), 30 | ]; 31 | 32 | // fn parse_flag(s: &str) -> Option { 33 | // lazy_static! { 34 | // static ref FLAG_MAP: HashMap<&'static str, u16> = FLAG_PAIRS.iter().copied().collect(); 35 | // } 36 | // FLAG_MAP.get(s).copied() 37 | // } 38 | 39 | /// Accumulate a bitset of flags while holding on to the last token in case it was meant to not be a flag 40 | pub struct FlagList { 41 | flags: u16, 42 | } 43 | impl FlagList { 44 | pub fn new() -> Self { 45 | Self { flags: 0 } 46 | } 47 | 48 | pub fn push<'a>(&mut self, span: Span<'a>) -> Result<(), ()> { 49 | let ind = FLAG_PAIRS.binary_search_by_key(&span.0, |t| t.0).map_err(|_| ())?; 50 | let flag = FLAG_PAIRS[ind].1; 51 | self.flags |= flag; 52 | Ok(()) 53 | } 54 | 55 | pub fn flush(self) -> u16 { 56 | self.flags 57 | } 58 | } 59 | -------------------------------------------------------------------------------- /src/lib/assemble/label.rs: -------------------------------------------------------------------------------- 1 | #[derive(Debug, Clone, Copy)] 2 | pub struct Pos(pub u32); 3 | -------------------------------------------------------------------------------- /src/lib/assemble/mod.rs: -------------------------------------------------------------------------------- 1 | mod base_parser; 2 | mod class_parser; 3 | mod cpool; 4 | mod flags; 5 | mod label; 6 | mod parse_attr; 7 | mod parse_class; 8 | mod parse_code; 9 | mod parse_literal; 10 | mod span; 11 | mod string; 12 | mod tokenize; 13 | mod writer; 14 | 15 | use typed_arena::Arena; 16 | 17 | use crate::lib::disassemble::string::parse_utf8; 18 | use base_parser::BaseParser; 19 | use class_parser::ClassParser; 20 | pub use span::Error; 21 | use tokenize::tokenize; 22 | 23 | #[derive(Debug, Clone, Copy)] 24 | pub struct AssemblerOptions {} 25 | 26 | pub fn assemble(source: &str, _opts: AssemblerOptions) -> Result, Vec)>, Error> { 27 | let tokens = tokenize(source)?; 28 | // for tok in &tokens { 29 | // println!("{:?}", tok); 30 | // } 31 | 32 | let arena = Arena::new(); 33 | let mut base_parser = BaseParser::new(source, tokens); 34 | let mut results = Vec::new(); 35 | 36 | while base_parser.has_tokens_left() { 37 | let parser = ClassParser::new(base_parser, &arena); 38 | let (parser, (class_name, data)) = parser.parse()?; 39 | // let class_name = class_name.and_then(|bs| std::str::from_utf8(bs).ok().map(str::to_owned)); 40 | let class_name = class_name.and_then(parse_utf8); 41 | results.push((class_name, data)); 42 | 43 | base_parser = parser; 44 | if writer::UNUSED_PH.load(std::sync::atomic::Ordering::Relaxed) { 45 | panic!("Unused placeholder!"); 46 | } 47 | } 48 | 49 | Ok(results) 50 | } 51 | -------------------------------------------------------------------------------- /src/lib/assemble/parse_attr.rs: -------------------------------------------------------------------------------- 1 | use super::class_parser::ClassParser; 2 | use super::cpool::types; 3 | use super::cpool::InlineConst; 4 | use super::span::Error; 5 | use super::span::Span; 6 | use super::string; 7 | use super::tokenize::TokenType; 8 | use super::writer::Writer; 9 | 10 | pub enum AttrResult<'a> { 11 | Normal, 12 | ImplicitBootstrap { 13 | name: Option>, 14 | len: Option, 15 | span: Span<'a>, 16 | }, 17 | } 18 | 19 | type ParseFn<'a> = fn(&mut ClassParser<'a>, &mut Writer<'a>) -> Result<(), Error>; 20 | 21 | enum ListKind { 22 | Line, 23 | DotEnd, 24 | Greedy(&'static str), 25 | } 26 | impl ListKind { 27 | fn at_end(&self, p: &mut ClassParser<'_>) -> bool { 28 | use ListKind::*; 29 | match self { 30 | Line => p.has_type(TokenType::Newlines), 31 | DotEnd => p.tryv(".end"), 32 | Greedy(dir) => !p.tryv(dir), 33 | } 34 | } 35 | } 36 | 37 | macro_rules! line_list { 38 | ($p: ident, $w:ident, $( $s:expr );+) => { 39 | |p, w| p.list(w, ListKind::Line, |$p, $w| {$( $s );+; Ok(())}) 40 | }; 41 | } 42 | 43 | macro_rules! dotend_list { 44 | ($tag:expr, $p: ident, $w:ident, $( $s:expr );+) => { 45 | |p, w| {p.eol()?; p.list(w, ListKind::DotEnd, |$p, $w| {$( $s );+; Ok(())})?; p.val($tag)} 46 | }; 47 | } 48 | 49 | macro_rules! dotend_list8 { 50 | ($tag:expr, $p: ident, $w:ident, $( $s:expr );+) => { 51 | |p, w| {p.eol()?; p.list8(w, ListKind::DotEnd, |$p, $w| {$( $s );+; Ok(())})?; p.val($tag)} 52 | }; 53 | } 54 | 55 | impl<'a> ClassParser<'a> { 56 | fn get_parse_attr_body_fn(&mut self, directive: Span<'a>) -> Result<(&'static str, ParseFn<'a>), Error> { 57 | Ok(match directive.0 { 58 | ".annotationdefault" => ("AnnotationDefault", Self::element_value), 59 | ".code" => ("Code", Self::parse_code), 60 | ".constantvalue" => ("ConstantValue", |p, w| { 61 | w.cp(p.ldc_rhs()?); 62 | Ok(()) 63 | }), 64 | ".deprecated" => ("Deprecated", |_p, _w| Ok(())), 65 | ".enclosing" => ("EnclosingMethod", |p, w| { 66 | p.val("method")?; 67 | w.cp(p.cls()?); 68 | w.cp(p.nat()?); 69 | Ok(()) 70 | }), 71 | ".exceptions" => ("Exceptions", line_list! {p, w, w.cp(p.cls()?)}), 72 | ".innerclasses" => ( 73 | "InnerClasses", 74 | dotend_list! {"innerclasses", p, w, 75 | w.cp(p.cls()?); w.cp(p.cls()?); w.cp(p.utf()?); w.u16(p.flags()?); p.eol()? 76 | }, 77 | ), 78 | ".linenumbertable" => ( 79 | "LineNumberTable", 80 | dotend_list! {"linenumbertable", p, w, w.u16(p.lbl16()?); w.u16(p.u16()?); p.eol()?}, 81 | ), 82 | ".localvariabletable" => ( 83 | "LocalVariableTable", 84 | dotend_list! {"localvariabletable", p, w, p.local_var_table_item(w)?}, 85 | ), 86 | ".localvariabletypetable" => ( 87 | "LocalVariableTypeTable", 88 | dotend_list! {"localvariabletypetable", p, w, p.local_var_table_item(w)?}, 89 | ), 90 | ".methodparameters" => ( 91 | "MethodParameters", 92 | dotend_list8! {"methodparameters", p, w, w.cp(p.utf()?); w.u16(p.flags()?); p.eol()?}, 93 | ), 94 | ".module" => ("Module", Self::module), 95 | ".modulemainclass" => ("ModuleMainClass", |p, w| { 96 | w.cp(p.cls()?); 97 | Ok(()) 98 | }), 99 | ".modulepackages" => ("ModulePackages", line_list! {p, w, w.cp(p.single(InlineConst::Package)?)}), 100 | ".nesthost" => ("NestHost", |p, w| { 101 | w.cp(p.cls()?); 102 | Ok(()) 103 | }), 104 | ".nestmembers" => ("NestMembers", line_list! {p, w, w.cp(p.cls()?)}), 105 | ".permittedsubclasses" => ("PermittedSubclasses", line_list! {p, w, w.cp(p.cls()?)}), 106 | ".record" => ("Record", dotend_list! {"record", p, w, p.record_item(w)?}), 107 | ".runtime" => { 108 | let tok = self.next()?; 109 | let visible = match tok.1 .0 { 110 | "visible" => true, 111 | "invisible" => false, 112 | _ => return self.err1("Expected visible or invisible", tok.1), 113 | }; 114 | 115 | let tok = self.next()?; 116 | match tok.1 .0 { 117 | "annotations" => ( 118 | if visible { 119 | "RuntimeVisibleAnnotations" 120 | } else { 121 | "RuntimeInvisibleAnnotations" 122 | }, 123 | dotend_list! {"runtime", p, w, p.val(".annotation")?; p.annotation(w, false)?; p.eol()?}, 124 | ), 125 | "paramannotations" => ( 126 | if visible { 127 | "RuntimeVisibleParameterAnnotations" 128 | } else { 129 | "RuntimeInvisibleParameterAnnotations" 130 | }, 131 | dotend_list8! {"runtime", p, w, 132 | p.val(".paramannotation")?; 133 | p.eol()?; 134 | p.list(w, ListKind::DotEnd, |p, w| {p.val(".annotation")?; p.annotation(w, false)?; p.eol()})?; 135 | p.val("paramannotation")?; 136 | p.eol()? 137 | }, 138 | ), 139 | "typeannotations" => ( 140 | if visible { 141 | "RuntimeVisibleTypeAnnotations" 142 | } else { 143 | "RuntimeInvisibleTypeAnnotations" 144 | }, 145 | dotend_list! {"runtime", p, w, p.val(".typeannotation")?; p.ta_target_info(w)?; p.ta_target_path(w)?; p.annotation(w, true)?; p.eol()?}, 146 | ), 147 | _ => return self.err1("Expected annotations, paramannotations, or typeannotations", tok.1), 148 | } 149 | } 150 | 151 | ".signature" => ("Signature", |p, w| { 152 | w.cp(p.utf()?); 153 | Ok(()) 154 | }), 155 | ".sourcedebugextension" => ("SourceDebugExtension", |p, w| { 156 | let span = p.assert_type(TokenType::StringLit)?; 157 | let bs = string::unescape(span.0).map_err(|(msg, s)| p.error1(msg, Span(s)))?; 158 | w.write(&bs); 159 | Ok(()) 160 | }), 161 | ".sourcefile" => ("SourceFile", |p, w| { 162 | w.cp(p.utf()?); 163 | Ok(()) 164 | }), 165 | ".stackmaptable" => ("StackMapTable", |p, w| { 166 | if let Some((count, buf)) = p.stack_map_table.take() { 167 | w.u16(count); 168 | w.extend_from_writer(buf); 169 | Ok(()) 170 | } else { 171 | let span = p.next()?.1; 172 | p.err1( 173 | "StackMapTable attribute may only be used inside Code attributes, and only once per method", 174 | span, 175 | ) 176 | } 177 | }), 178 | ".synthetic" => ("Synthetic", |_p, _w| Ok(())), 179 | _ => return self.err1("Unrecognized attribute directive", directive), 180 | }) 181 | } 182 | 183 | fn parse_attr_sub(&mut self, w: &mut Writer<'a>) -> Result, Error> { 184 | let (name, len) = if self.tryv(".attribute") { 185 | (Some(self.utf()?), if self.tryv("length") { Some(self.u32()?) } else { None }) 186 | } else { 187 | (None, None) 188 | }; 189 | 190 | if self.has_type(TokenType::StringLit) { 191 | if let Some(name) = name { 192 | let span = self.next()?.1; 193 | let bs = string::unescape(span.0).map_err(|(msg, s)| self.error1(msg, Span(s)))?; 194 | w.cp(name); 195 | w.u32(len.unwrap_or(bs.len() as u32)); 196 | w.write(&bs); 197 | return Ok(AttrResult::Normal); 198 | } 199 | } 200 | 201 | let directive = self.next()?.1; 202 | if directive.0 == ".bootstrapmethods" { 203 | return Ok(AttrResult::ImplicitBootstrap { 204 | name, 205 | len, 206 | span: directive, 207 | }); 208 | } 209 | 210 | let (name_str, body_cb) = self.get_parse_attr_body_fn(directive)?; 211 | w.cp(name.unwrap_or(Self::static_utf(name_str, directive))); 212 | let ph = w.ph32(); 213 | let start_buf_len = w.len(); 214 | 215 | body_cb(&mut *self, &mut *w)?; 216 | let end_buf_len = w.len(); 217 | 218 | let len = len.unwrap_or( 219 | (end_buf_len - start_buf_len) 220 | .try_into() 221 | .map_err(|_| self.error1("Exceeded maximum attribute length (2^32-1 bytes)", directive))?, 222 | ); 223 | w.fill32(ph, len); 224 | 225 | Ok(AttrResult::Normal) 226 | } 227 | 228 | pub fn parse_attr_allow_bsm(&mut self, w: &mut Writer<'a>, count: &mut u16) -> Result, Error> { 229 | let span = self.peek()?.1; 230 | let res = self.parse_attr_sub(w)?; 231 | if *count == u16::MAX { 232 | self.err1("Maximum number of attributes (65535) exceeded", span) 233 | } else { 234 | *count += 1; 235 | Ok(res) 236 | } 237 | } 238 | 239 | pub fn parse_attr(&mut self, w: &mut Writer<'a>, count: &mut u16) -> Result<(), Error> { 240 | match self.parse_attr_allow_bsm(w, count)? { 241 | AttrResult::Normal => Ok(()), 242 | AttrResult::ImplicitBootstrap { span, .. } => { 243 | self.err1("Implicit bootstrap method attributes can only be used at class level.", span) 244 | } 245 | } 246 | } 247 | 248 | /////////////////////////////////////////////////////////////////////////////// 249 | fn annotation(&mut self, w: &mut Writer<'a>, is_type: bool) -> Result<(), Error> { 250 | w.cp(self.utf()?); 251 | self.eol()?; 252 | let ph = w.ph(); 253 | let mut count = 0; 254 | while !self.tryv(".end") { 255 | if count == u16::MAX { 256 | let span = self.peek()?.1; 257 | return self.err1("Maximum number of annotations elements (65535) exceeded", span); 258 | } 259 | count += 1; 260 | w.cp(self.utf()?); 261 | self.val("=")?; 262 | self.element_value(w)?; 263 | self.eol()?; 264 | } 265 | if is_type { 266 | self.val("typeannotation")?; 267 | } else { 268 | self.val("annotation")?; 269 | } 270 | w.fill(ph, count); 271 | Ok(()) 272 | } 273 | 274 | fn element_value(&mut self, w: &mut Writer<'a>) -> Result<(), Error> { 275 | let tok = self.next()?; 276 | match tok.1 .0 { 277 | "annotation" => { 278 | w.u8(64); 279 | self.annotation(w, false)?; 280 | } 281 | "array" => { 282 | w.u8(91); 283 | self.eol()?; 284 | let ph = w.ph(); 285 | let mut count = 0; 286 | while !self.tryv(".end") { 287 | if count == u16::MAX { 288 | let span = self.peek()?.1; 289 | return self.err1("Maximum number of annotations in array element (65535) exceeded", span); 290 | } 291 | count += 1; 292 | self.element_value(w)?; 293 | self.eol()?; 294 | } 295 | self.val("array")?; 296 | w.fill(ph, count); 297 | } 298 | "boolean" => { 299 | w.u8(90); 300 | w.cp(self.ldc_rhs()?); 301 | } 302 | "byte" => { 303 | w.u8(66); 304 | w.cp(self.ldc_rhs()?); 305 | } 306 | "char" => { 307 | w.u8(67); 308 | w.cp(self.ldc_rhs()?); 309 | } 310 | "class" => { 311 | w.u8(99); 312 | w.cp(self.utf()?); 313 | } 314 | "double" => { 315 | w.u8(68); 316 | w.cp(self.ldc_rhs()?); 317 | } 318 | "enum" => { 319 | w.u8(101); 320 | w.cp(self.utf()?); 321 | w.cp(self.utf()?); 322 | } 323 | "float" => { 324 | w.u8(70); 325 | w.cp(self.ldc_rhs()?); 326 | } 327 | "int" => { 328 | w.u8(73); 329 | w.cp(self.ldc_rhs()?); 330 | } 331 | "long" => { 332 | w.u8(74); 333 | w.cp(self.ldc_rhs()?); 334 | } 335 | "short" => { 336 | w.u8(83); 337 | w.cp(self.ldc_rhs()?); 338 | } 339 | "string" => { 340 | w.u8(115); 341 | w.cp(self.utf()?); 342 | } 343 | _ => return self.err1("Unrecognized element value tag", tok.1), 344 | }; 345 | Ok(()) 346 | } 347 | 348 | fn local_var_table_item(&mut self, w: &mut Writer<'a>) -> Result<(), Error> { 349 | let (ind, _, name, desc, _, start, _, end) = ( 350 | self.u16()?, 351 | self.val("is")?, 352 | self.utf()?, 353 | self.utf()?, 354 | self.val("from")?, 355 | self.lblpos()?, 356 | self.val("to")?, 357 | self.lblpos()?, 358 | ); 359 | w.u16(self.pos_to_u16(start)?); 360 | w.u16(self.pos_diff_to_u16(start.v, end)?); 361 | w.cp(name); 362 | w.cp(desc); 363 | w.u16(ind); 364 | self.eol() 365 | } 366 | 367 | fn list(&mut self, w: &mut Writer<'a>, kind: ListKind, f: ParseFn<'a>) -> Result<(), Error> { 368 | let ph = w.ph(); 369 | let mut count = 0; 370 | while !kind.at_end(self) { 371 | if count == u16::MAX { 372 | let span = self.peek()?.1; 373 | return self.err1("Maximum number of elements (65535) exceeded", span); 374 | } 375 | count += 1; 376 | f(self, w)?; 377 | } 378 | w.fill(ph, count); 379 | Ok(()) 380 | } 381 | 382 | fn list8(&mut self, w: &mut Writer<'a>, kind: ListKind, f: ParseFn<'a>) -> Result<(), Error> { 383 | let ph = w.ph8(); 384 | let mut count = 0; 385 | while !kind.at_end(self) { 386 | if count == u8::MAX { 387 | let span = self.peek()?.1; 388 | return self.err1("Maximum number of elements (255) exceeded", span); 389 | } 390 | count += 1; 391 | f(self, w)?; 392 | } 393 | w.fill8(ph, count); 394 | Ok(()) 395 | } 396 | 397 | fn module(&mut self, w: &mut Writer<'a>) -> Result<(), Error> { 398 | use ListKind::Greedy; 399 | w.cp(self.utf()?); 400 | w.u16(self.flags()?); 401 | self.val("version")?; 402 | w.cp(self.utf()?); 403 | self.eol()?; 404 | 405 | let exports_item = |p: &mut Self, w: &mut Writer<'a>| { 406 | w.cp(p.single(InlineConst::Package)?); 407 | w.u16(p.flags()?); 408 | if p.tryv("to") { 409 | p.list(w, ListKind::Line, |p, w| Ok(w.cp(p.single(InlineConst::Package)?)))?; 410 | } else { 411 | w.u16(0); // count of 0 targets 412 | } 413 | p.eol() 414 | }; 415 | 416 | self.list(w, Greedy(".requires"), |p, w| { 417 | w.cp(p.single(InlineConst::Module)?); 418 | w.u16(p.flags()?); 419 | p.val("version")?; 420 | w.cp(p.utf()?); 421 | p.eol() 422 | })?; 423 | self.list(w, Greedy(".exports"), exports_item)?; 424 | self.list(w, Greedy(".opens"), exports_item)?; 425 | self.list(w, Greedy(".uses"), |p, w| { 426 | w.cp(p.cls()?); 427 | p.eol() 428 | })?; 429 | self.list(w, Greedy(".provides"), |p, w| { 430 | w.cp(p.cls()?); 431 | p.val("with")?; 432 | p.list(w, ListKind::Line, |p, w| Ok(w.cp(p.cls()?)))?; 433 | p.eol() 434 | })?; 435 | 436 | self.val(".end")?; 437 | self.val("module") 438 | } 439 | 440 | fn record_item(&mut self, w: &mut Writer<'a>) -> Result<(), Error> { 441 | w.cp(self.utf()?); 442 | w.cp(self.utf()?); 443 | if self.tryv(".attributes") { 444 | self.eol()?; 445 | 446 | let ph = w.ph(); 447 | let mut attr_count = 0; 448 | 449 | while !self.tryv(".end") { 450 | self.parse_attr(w, &mut attr_count)?; 451 | self.eol()?; 452 | } 453 | self.val("attributes")?; 454 | w.fill(ph, attr_count); 455 | } else { 456 | w.u16(0); 457 | } 458 | self.eol() 459 | } 460 | 461 | fn ta_target_info(&mut self, w: &mut Writer<'a>) -> Result<(), Error> { 462 | w.u8(self.u8()?); 463 | let span = self.next()?.1; 464 | match span.0 { 465 | "typeparam" => w.u8(self.u8()?), 466 | "super" => w.u16(self.u16()?), 467 | "typeparambound" => { 468 | w.u8(self.u8()?); 469 | w.u8(self.u8()?) 470 | } 471 | "empty" => (), 472 | "methodparam" => w.u8(self.u8()?), 473 | "throws" => w.u16(self.u16()?), 474 | "localvar" => { 475 | self.eol()?; 476 | self.list(w, ListKind::DotEnd, |p, w| { 477 | if p.tryv("nowhere") { 478 | w.u16(0xFFFF); 479 | w.u16(0xFFFF); 480 | } else { 481 | let (_, start, _, end) = (p.val("from")?, p.lblpos()?, p.val("to")?, p.lblpos()?); 482 | w.u16(p.pos_to_u16(start)?); 483 | w.u16(p.pos_diff_to_u16(start.v, end)?); 484 | } 485 | w.u16(p.u16()?); 486 | p.eol() 487 | })?; 488 | self.val("localvar")?; 489 | } 490 | "catch" => w.u16(self.u16()?), 491 | "offset" => w.u16(self.lbl16()?), 492 | "typearg" => { 493 | w.u16(self.lbl16()?); 494 | w.u8(self.u8()?) 495 | } 496 | 497 | _ => return self.err1("Expected type annotation target info type", span), 498 | } 499 | 500 | self.eol() 501 | } 502 | 503 | fn ta_target_path(&mut self, w: &mut Writer<'a>) -> Result<(), Error> { 504 | self.val(".typepath")?; 505 | self.eol()?; 506 | self.list8(w, ListKind::DotEnd, |p, w| { 507 | w.u8(p.u8()?); 508 | w.u8(p.u8()?); 509 | p.eol() 510 | })?; 511 | self.val("typepath")?; 512 | self.eol() 513 | } 514 | } 515 | -------------------------------------------------------------------------------- /src/lib/assemble/parse_class.rs: -------------------------------------------------------------------------------- 1 | use super::base_parser::BaseParser; 2 | use super::class_parser::ns; 3 | use super::class_parser::ClassParser; 4 | use super::cpool::types; 5 | use super::cpool::BsAttrNameNeeded; 6 | use super::cpool::Or; 7 | use super::parse_attr::AttrResult; 8 | use super::span::Error; 9 | use super::tokenize::TokenType; 10 | use super::writer::BufWriter; 11 | use super::writer::Writer; 12 | 13 | impl<'a> ClassParser<'a> { 14 | fn parse_const_def(&mut self) -> Result<(), Error> { 15 | self.val(".const")?; 16 | let lhs_span = self.assert_type(TokenType::Ref)?; 17 | let lhs = self.ref_type(lhs_span)?; 18 | self.val("=")?; 19 | let rhs_span = self.peek()?.1; 20 | let rhs = self.ref_or_tagged_const()?; 21 | 22 | match lhs { 23 | types::RefType::Raw(ind) => { 24 | let rhs = match rhs { 25 | Or::A(_) => return self.err1("Raw refs cannot be defined by another ref", rhs_span), 26 | Or::B(b) => b, 27 | }; 28 | self.pool.add_raw_def(ind, lhs_span, rhs)? 29 | } 30 | types::RefType::Sym(name) => self.pool.add_sym_def(name, ns(rhs))?, 31 | }; 32 | 33 | self.eol() 34 | } 35 | 36 | fn parse_bootstrap_def(&mut self) -> Result<(), Error> { 37 | self.val(".bootstrap")?; 38 | let lhs_span = self.assert_type(TokenType::BsRef)?; 39 | let lhs = self.ref_type(lhs_span)?; 40 | self.val("=")?; 41 | let rhs_span = self.peek()?.1; 42 | let rhs = self.ref_or_tagged_bootstrap()?; 43 | 44 | match lhs { 45 | types::RefType::Raw(ind) => { 46 | let rhs = match rhs { 47 | Or::A(_) => return self.err1("Raw refs cannot be defined by another ref", rhs_span), 48 | Or::B(b) => b, 49 | }; 50 | self.pool.add_bs_raw_def(ind, lhs_span, rhs)? 51 | } 52 | types::RefType::Sym(name) => self.pool.add_bs_sym_def(name, ns(rhs))?, 53 | }; 54 | 55 | self.eol() 56 | } 57 | /////////////////////////////////////////////////////////////////////////// 58 | 59 | fn parse_field_def(&mut self, w: &mut Writer<'a>) -> Result<(), Error> { 60 | self.val(".field")?; 61 | w.u16(self.flags()?); 62 | w.cp(self.utf()?); 63 | w.cp(self.utf()?); 64 | 65 | let ph = w.ph(); 66 | let mut attr_count = 0; 67 | 68 | if let Some(span) = self.tryv2("=") { 69 | w.cp(Self::static_utf("ConstantValue", span)); 70 | w.u32(2); 71 | w.cp(self.ldc_rhs()?); 72 | attr_count += 1; 73 | } 74 | 75 | if self.tryv(".fieldattributes") { 76 | self.eol()?; 77 | 78 | while !self.tryv(".end") { 79 | self.parse_attr(w, &mut attr_count)?; 80 | self.eol()?; 81 | } 82 | 83 | self.val("fieldattributes")?; 84 | } 85 | 86 | w.fill(ph, attr_count); 87 | self.eol()?; 88 | Ok(()) 89 | } 90 | 91 | fn parse_method_def(&mut self, w: &mut Writer<'a>) -> Result<(), Error> { 92 | self.val(".method")?; 93 | w.u16(self.flags()?); 94 | w.cp(self.utf()?); 95 | self.val(":")?; 96 | w.cp(self.utf()?); 97 | self.eol()?; 98 | 99 | let ph = w.ph(); 100 | let mut attr_count = 0; 101 | 102 | if self.peek()?.1 .0 == ".limit" { 103 | self.parse_legacy_method_body(w)?; 104 | attr_count = 1; 105 | } else { 106 | while !self.tryv(".end") { 107 | self.parse_attr(w, &mut attr_count)?; 108 | self.eol()?; 109 | } 110 | } 111 | self.val("method")?; 112 | self.eol()?; 113 | 114 | w.fill(ph, attr_count); 115 | Ok(()) 116 | } 117 | 118 | /////////////////////////////////////////////////////////////////////////// 119 | 120 | pub fn parse(mut self) -> Result<(BaseParser<'a>, (Option<&'a [u8]>, Vec)), Error> { 121 | if self.tryv(".version") { 122 | self.version = (self.u16()?, self.u16()?); 123 | self.eol()?; 124 | }; 125 | 126 | // todo 127 | let debug_span = self.peek()?.1; 128 | 129 | let mut w = Writer::default(); 130 | self.val(".class")?; 131 | w.u16(self.flags()?); 132 | 133 | w.cp(self.cls()?); 134 | self.eol()?; 135 | 136 | self.val(".super")?; 137 | w.cp(self.cls()?); 138 | self.eol()?; 139 | 140 | let ph = w.ph(); 141 | let mut interface_count = 0; 142 | while let Some(span) = self.tryv2(".implements") { 143 | if interface_count == u16::MAX { 144 | return self.err1("Maximum number of interfaces (65535) exceeded", span); 145 | } 146 | interface_count += 1; 147 | 148 | w.cp(self.cls()?); 149 | self.eol()?; 150 | } 151 | w.fill(ph, interface_count); 152 | 153 | let mut field_w = w; 154 | let field_ph = field_w.ph(); 155 | let mut field_count = 0; 156 | 157 | let mut method_w = Writer::default(); 158 | let mut method_count = 0; 159 | 160 | // We won't know how the contents of the bootstrap attr, and thus how long it should be, 161 | // until the constant pool has been fully resolved at the end. Therefore, we use two 162 | // writers, one for attributes before the .bootstrapmethods attr (if any), including the 163 | // name of the later, and second writer for any attributes that appear after the 164 | // .bootstrapmethods attr if one is present. Once we put all the pieces of the classfile together 165 | // at the end, we'll fill in the actual data for the bootstramp methods table in between. 166 | let mut attr_w1 = Writer::default(); 167 | let mut bs_attr_placeholder_info = None; 168 | let mut attr_w2 = Writer::default(); 169 | let mut attr_count = 0; 170 | 171 | while let Ok(tok) = self.peek() { 172 | match tok.1 .0 { 173 | ".bootstrap" => self.parse_bootstrap_def()?, 174 | ".const" => self.parse_const_def()?, 175 | ".field" => { 176 | if field_count == u16::MAX { 177 | return self.err1("Maximum number of fields (65535) exceeded", tok.1); 178 | } 179 | field_count += 1; 180 | self.parse_field_def(&mut field_w)?; 181 | } 182 | ".method" => { 183 | if method_count == u16::MAX { 184 | return self.err1("Maximum number of methods (65535) exceeded", tok.1); 185 | } 186 | method_count += 1; 187 | self.parse_method_def(&mut method_w)?; 188 | } 189 | ".end" => { 190 | self.val(".end")?; 191 | self.val("class")?; 192 | self.eol()?; 193 | break; 194 | } 195 | _ => { 196 | if bs_attr_placeholder_info.is_none() { 197 | match self.parse_attr_allow_bsm(&mut attr_w1, &mut attr_count)? { 198 | AttrResult::Normal => (), 199 | AttrResult::ImplicitBootstrap { name, len, span: _ } => { 200 | let bs_name_ph = match name { 201 | Some(name) => { 202 | // attr has name explicitly provided, so just write it and don't store a placeholder 203 | attr_w1.cp(name); 204 | None 205 | } 206 | None => { 207 | // attr has no explicit name, so store a placeholder 208 | // to be filled in with the implicitly created name later 209 | Some(attr_w1.ph()) 210 | } 211 | }; 212 | 213 | bs_attr_placeholder_info = Some((bs_name_ph, len)); 214 | } 215 | } 216 | } else { 217 | match self.parse_attr_allow_bsm(&mut attr_w2, &mut attr_count)? { 218 | AttrResult::Normal => (), 219 | AttrResult::ImplicitBootstrap { span, .. } => { 220 | return self.err1("Duplicate .bootstrapmethods attribute", span); 221 | } 222 | } 223 | } 224 | self.eol()?; 225 | 226 | // return self.err1("Expected .field, .method, .const, .bootstrap, .end class, or attribute directive", tok.1) 227 | } 228 | } 229 | } 230 | field_w.fill(field_ph, field_count); 231 | 232 | let mut pool = self.pool.finish_defs()?; 233 | // println!("data {:?}", field_w); 234 | 235 | field_w.resolve_ldc_refs(|r, s| pool.resolve_ldc(r, s))?; 236 | method_w.resolve_ldc_refs(|r, s| pool.resolve_ldc(r, s))?; 237 | attr_w1.resolve_ldc_refs(|r, s| pool.resolve_ldc(r, s))?; 238 | attr_w2.resolve_ldc_refs(|r, s| pool.resolve_ldc(r, s))?; 239 | 240 | let field_w = field_w.resolve_refs(|r| pool.resolve(r))?; 241 | let method_w = method_w.resolve_refs(|r| pool.resolve(r))?; 242 | let mut attr_w1 = attr_w1.resolve_refs(|r| pool.resolve(r))?; 243 | let attr_w2 = attr_w2.resolve_refs(|r| pool.resolve(r))?; 244 | 245 | // println!("data {:?}", field_w); 246 | 247 | let mut w = BufWriter::default(); 248 | w.u32(0xCAFEBABE); 249 | w.u16(self.version.1); 250 | w.u16(self.version.0); 251 | 252 | let bs_attr_needed = match bs_attr_placeholder_info { 253 | Some((Some(_), _)) => BsAttrNameNeeded::Always, 254 | Some((None, _)) => BsAttrNameNeeded::Never, 255 | None => BsAttrNameNeeded::IfPresent, 256 | }; 257 | let class_name_ind = field_w.read_u16(2); 258 | let (assembled_bs_attr_info, class_name) = pool.end().build(&mut w, bs_attr_needed, class_name_ind)?; 259 | 260 | w.extend(&field_w); 261 | w.u16(method_count); 262 | w.extend(&method_w); 263 | 264 | // println!("bsattr info {:?}", bs_attr_placeholder_info); 265 | if let Some((name_ph, len)) = bs_attr_placeholder_info { 266 | if let Some(ph) = name_ph { 267 | attr_w1.fill(ph, assembled_bs_attr_info.name.unwrap()); 268 | } 269 | 270 | w.u16(attr_count); 271 | w.extend(&attr_w1); 272 | 273 | let actual_length = assembled_bs_attr_info.data_len().ok_or_else(|| { 274 | self.parser 275 | .error1("BootstrapMethods table exceeds maximum attribute length", debug_span) 276 | })?; 277 | 278 | w.u32(len.unwrap_or(actual_length)); 279 | w.u16(assembled_bs_attr_info.num_bs); 280 | w.write(&assembled_bs_attr_info.buf); 281 | 282 | w.extend(&attr_w2); 283 | } else { 284 | // check if we need to add an implicit BootstrapMethods attribute at the end 285 | if let Some(name) = assembled_bs_attr_info.name { 286 | if attr_count == u16::MAX { 287 | return self.parser.err1( 288 | "Exceeded maximum class attribute count due to implicit BootstrapMethods attribute", 289 | debug_span, 290 | ); 291 | } 292 | let actual_length = assembled_bs_attr_info.data_len().ok_or_else(|| { 293 | self.parser 294 | .error1("BootstrapMethods table exceeds maximum attribute length", debug_span) 295 | })?; 296 | 297 | w.u16(attr_count + 1); 298 | w.extend(&attr_w1); 299 | w.u16(name); 300 | w.u32(actual_length); 301 | w.u16(assembled_bs_attr_info.num_bs); 302 | w.write(&assembled_bs_attr_info.buf); 303 | } else { 304 | w.u16(attr_count); 305 | w.extend(&attr_w1); 306 | } 307 | } 308 | // println!("finish bs stuff"); 309 | 310 | // println!("data {:?}", w); 311 | Ok((self.parser, (class_name, w.into_buf()))) 312 | } 313 | } 314 | -------------------------------------------------------------------------------- /src/lib/assemble/parse_literal.rs: -------------------------------------------------------------------------------- 1 | use hexf_parse::parse_hexf32; 2 | use hexf_parse::parse_hexf64; 3 | 4 | pub fn int(s: &str) -> Option 5 | where 6 | T: std::str::FromStr, 7 | T: TryFrom, 8 | { 9 | let s = s.trim_start_matches('+'); 10 | // Hack to support parsing '-0' as unsigned types 11 | let s = if s == "-0" { "0" } else { s }; 12 | 13 | if s.starts_with("-0x") { 14 | let m = u64::from_str_radix(&s[3..], 16).ok()?; 15 | if m > 1 << 63 { 16 | return None; 17 | } 18 | let m = (m as i64).wrapping_neg(); 19 | m.try_into().ok() 20 | } else if s.starts_with("0x") { 21 | let m = i64::from_str_radix(&s[2..], 16).ok()?; 22 | m.try_into().ok() 23 | } else { 24 | s.parse().ok() 25 | } 26 | } 27 | 28 | pub fn float(s: &str) -> Option { 29 | let mut s = s.trim_start_matches('+'); 30 | if s.starts_with("-NaN") { 31 | s = &s[1..]; 32 | } 33 | 34 | if s.ends_with(">") { 35 | // todo - test -NaN<...> 36 | assert!(s.starts_with("NaN<0x")); 37 | let hex_part = &s[6..s.len() - 1]; 38 | return u32::from_str_radix(hex_part, 16).ok(); 39 | } 40 | 41 | let f = if s.starts_with("0x") || s.starts_with("-0x") { 42 | parse_hexf32(s, false).ok() 43 | } else { 44 | s.parse().ok() 45 | }?; 46 | 47 | Some(f.to_bits()) 48 | } 49 | 50 | pub fn double(s: &str) -> Option { 51 | let mut s = s.trim_start_matches('+'); 52 | if s.starts_with("-NaN") { 53 | s = &s[1..]; 54 | } 55 | 56 | if s.ends_with(">") { 57 | assert!(s.starts_with("NaN<0x")); 58 | let hex_part = &s[6..s.len() - 1]; 59 | return u64::from_str_radix(hex_part, 16).ok(); 60 | } 61 | 62 | let f = if s.starts_with("0x") || s.starts_with("-0x") { 63 | parse_hexf64(s, false).ok() 64 | } else { 65 | s.parse().ok() 66 | }?; 67 | 68 | Some(f.to_bits()) 69 | } 70 | -------------------------------------------------------------------------------- /src/lib/assemble/span.rs: -------------------------------------------------------------------------------- 1 | use std::cmp::Eq; 2 | use std::cmp::PartialEq; 3 | use std::fmt::Debug; 4 | use std::hash::Hash; 5 | use std::hash::Hasher; 6 | 7 | #[derive(Debug, Clone, Copy)] 8 | pub struct Span<'a>(pub &'a str); 9 | impl<'a> Span<'a> { 10 | pub fn of(self, val: T) -> Spanned<'a, T> { 11 | Spanned { v: val, span: self } 12 | } 13 | } 14 | 15 | #[derive(Clone, Copy)] 16 | pub struct Spanned<'a, T> { 17 | pub v: T, 18 | pub span: Span<'a>, 19 | } 20 | impl Hash for Spanned<'_, T> { 21 | fn hash(&self, state: &mut H) { 22 | self.v.hash(state); 23 | } 24 | } 25 | impl PartialEq for Spanned<'_, T> { 26 | fn eq(&self, other: &Self) -> bool { 27 | self.v == other.v 28 | } 29 | } 30 | impl Eq for Spanned<'_, T> {} 31 | impl Debug for Spanned<'_, T> { 32 | fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { 33 | self.v.fmt(f) 34 | } 35 | } 36 | 37 | #[derive(Debug, Clone, Copy)] 38 | pub struct SpanBounds { 39 | start: usize, 40 | end: usize, 41 | } 42 | impl SpanBounds { 43 | fn new(span: Span, source: &str) -> Self { 44 | let start = span.0.as_ptr() as usize - source.as_ptr() as usize; 45 | assert!(start <= source.len()); 46 | let end = start + span.0.len(); 47 | assert!(end <= source.len()); 48 | Self { start, end } 49 | } 50 | } 51 | 52 | struct ErrorPrinter<'a> { 53 | fname: &'a str, 54 | lines: Vec<(Span<'a>, SpanBounds)>, 55 | } 56 | impl<'a> ErrorPrinter<'a> { 57 | fn new(fname: &'a str, source: &'a str) -> Self { 58 | // get line offsets 59 | // let mut pos = 0; 60 | let lines: Vec<_> = source 61 | .lines() 62 | .map(|line| { 63 | // println!("{line:?}"); 64 | // let start = pos; 65 | // pos += line.len(); 66 | // (start, pos) 67 | let span = Span(line); 68 | (span, SpanBounds::new(span, source)) 69 | }) 70 | .collect(); 71 | // offsets.push(pos); 72 | 73 | // println!("offsets {:?}", lines); 74 | // for sb in offsets { 75 | // println!("{:?}", &source[sb.start..sb.end]); 76 | // } 77 | // dbg!(lines.partition_point(|sb| sb.start <= 0)); 78 | Self { fname, lines } 79 | } 80 | 81 | fn print(&self, is_first: bool, msg: &str, span: SpanBounds) { 82 | // const MAXLINELEN: usize = 80; // todo 83 | const TABWIDTH: usize = 8; 84 | 85 | let line_no = self.lines.partition_point(|(_, bounds)| bounds.end < span.start); 86 | let (Span(line), line_bounds) = self.lines[line_no]; 87 | 88 | // convert byte positions to character positions (within the line) 89 | let mut start_ci = None; 90 | let mut end_ci = None; 91 | let mut ci = 0; 92 | // println!("{span:?}"); 93 | for (byte_offset, c) in line.char_indices() { 94 | let bpos = line_bounds.start + byte_offset; 95 | // println!("{bpos} {ci} {c}"); 96 | if span.start == bpos { 97 | start_ci = Some(ci); 98 | } 99 | if span.end == bpos { 100 | end_ci = Some(ci); 101 | } 102 | ci += if c == '\t' { TABWIDTH } else { 1 }; 103 | } 104 | let start_ci = start_ci.unwrap_or(ci); 105 | let end_ci = end_ci.unwrap_or(ci); 106 | 107 | let underline: String = (0..ci + 1) 108 | .map(|i| { 109 | if i == start_ci { 110 | '^' 111 | } else if i > start_ci && i < end_ci { 112 | '~' 113 | } else { 114 | ' ' 115 | } 116 | }) 117 | .collect(); 118 | 119 | // if is_first { 120 | // eprintln!("{}:{}:{} {}", self.fname, line_no + 1, start_ci + 1, msg); 121 | // } else { 122 | // eprintln!("{}:{}:{} {}", self.fname, line_no + 1, start_ci + 1, msg); 123 | // } 124 | 125 | // todo - better line limit 126 | fn trim(s: &str) -> &str { 127 | &s[0..std::cmp::min(115, s.len())] 128 | } 129 | 130 | eprintln!("{}:{}:{} {}", self.fname, line_no + 1, start_ci + 1, msg); 131 | // eprintln!( 132 | // "{}:{}:{} {} {}", 133 | // self.fname, 134 | // line_no + 1, 135 | // start_ci + 1, 136 | // msg, 137 | // trim(&self.source[span.start..span.end]) 138 | // ); 139 | if is_first && line_no > 0 { 140 | eprintln!("{}", trim(self.lines[line_no - 1].0 .0)); 141 | } 142 | eprintln!("{}", trim(line)); 143 | eprintln!("{}", trim(&underline)); 144 | if is_first && line_no + 1 < self.lines.len() { 145 | eprintln!("{}", trim(self.lines[line_no + 1].0 .0)); 146 | } 147 | } 148 | } 149 | 150 | #[derive(Debug)] 151 | pub struct Error(Vec<(String, SpanBounds)>); 152 | impl Error { 153 | pub fn new(source: &str, pairs: Vec<(&str, Span<'_>)>) -> Self { 154 | Self( 155 | pairs 156 | .into_iter() 157 | .map(|(msg, span)| (msg.to_owned(), SpanBounds::new(span, source))) 158 | .collect(), 159 | ) 160 | } 161 | 162 | pub fn display(&self, fname: &str, source: &str) { 163 | let printer = ErrorPrinter::new(fname, source); 164 | let mut is_first = true; 165 | for (msg, span) in self.0.iter() { 166 | printer.print(is_first, msg, *span); 167 | is_first = false; 168 | } 169 | } 170 | } 171 | 172 | #[derive(Clone, Copy)] 173 | pub struct ErrorMaker<'a> { 174 | source: &'a str, 175 | } 176 | impl<'a> ErrorMaker<'a> { 177 | pub fn new(source: &'a str) -> Self { 178 | Self { source } 179 | } 180 | 181 | pub fn error1(&self, msg: &str, span: Span<'_>) -> Error { 182 | Error::new(self.source, vec![(msg, span)]) 183 | } 184 | 185 | pub fn error2(&self, msg: &str, span: Span<'_>, msg2: &str, span2: Span<'_>) -> Error { 186 | Error::new(self.source, vec![(msg, span), (msg2, span2)]) 187 | } 188 | 189 | pub fn err1(&self, msg: &str, span: Span<'_>) -> Result { 190 | Err(self.error1(msg, span)) 191 | } 192 | 193 | pub fn err2(&self, msg: &str, span: Span<'_>, msg2: &str, span2: Span<'_>) -> Result { 194 | Err(self.error2(msg, span, msg2, span2)) 195 | } 196 | 197 | pub fn error1str(&self, msg: String, span: Span<'_>) -> Error { 198 | Error(vec![(msg, SpanBounds::new(span, self.source))]) 199 | } 200 | 201 | pub fn err1str(&self, msg: String, span: Span<'_>) -> Result { 202 | Err(self.error1str(msg, span)) 203 | } 204 | } 205 | -------------------------------------------------------------------------------- /src/lib/assemble/string.rs: -------------------------------------------------------------------------------- 1 | fn mutf8_codepoint(out: &mut Vec, c: u16) { 2 | match c { 3 | 1..=127 => out.push(c as u8), 4 | 0 | 0x80..=0x7FF => { 5 | out.push(0xC0 ^ ((c >> 6) as u8)); 6 | out.push(0x80 ^ ((c & 63) as u8)); 7 | } 8 | 0x800..=0xFFFF => { 9 | out.push(0xE0 ^ ((c >> 12) as u8)); 10 | out.push(0x80 ^ (((c >> 6) & 63) as u8)); 11 | out.push(0x80 ^ ((c & 63) as u8)); 12 | } 13 | } 14 | } 15 | 16 | fn mutf8_char(out: &mut Vec, c: char) { 17 | let c = c as u32; 18 | if c >= 0x10000 { 19 | let c = c - 0x10000; 20 | let high = 0xD800 + ((c >> 10) as u16); 21 | let low = 0xDC00 + ((c & 0x3FF) as u16); 22 | mutf8_codepoint(out, high); 23 | mutf8_codepoint(out, low); 24 | } else { 25 | mutf8_codepoint(out, c as u16); 26 | } 27 | } 28 | 29 | pub fn unescape(s: &str) -> Result, (&'static str, &str)> { 30 | let mut out = Vec::with_capacity(s.len() - 2); 31 | 32 | let is_binary = s.starts_with('b'); 33 | let s = s.trim_start_matches('b'); 34 | let mut chars = s.chars(); 35 | let quote = chars.next().unwrap(); 36 | assert!(quote == '"' || quote == '\''); 37 | 38 | while let Some(c) = chars.next() { 39 | if c == quote { 40 | break; 41 | } else if c == '\\' { 42 | let rest = chars.as_str(); 43 | 44 | match chars.next().ok_or(("Premature end of input", rest))? { 45 | '\\' => out.push('\\' as u8), 46 | 'n' => out.push('\n' as u8), 47 | 'r' => out.push('\r' as u8), 48 | 't' => out.push('\t' as u8), 49 | '"' => out.push('\"' as u8), 50 | '\'' => out.push('\'' as u8), 51 | 'u' => { 52 | let hex = chars.as_str().get(..4).ok_or(("Illegal unicode escape", rest))?; 53 | let c = u16::from_str_radix(hex, 16).map_err(|_| ("Illegal unicode escape", hex))?; 54 | mutf8_codepoint(&mut out, c); 55 | chars = rest[5..].chars(); 56 | } 57 | 'U' => { 58 | let hex = chars.as_str().get(..8).ok_or(("Illegal unicode escape", rest))?; 59 | let c = u32::from_str_radix(hex, 16).map_err(|_| ("Illegal unicode escape", hex))?; 60 | let c = c.try_into().map_err(|_| ("Illegal unicode code point value", hex))?; 61 | mutf8_char(&mut out, c); 62 | chars = rest[9..].chars(); 63 | } 64 | 'x' => { 65 | let hex = chars.as_str().get(..2).ok_or(("Illegal hex escape", rest))?; 66 | let c = u8::from_str_radix(hex, 16).map_err(|_| ("Illegal hex escape", hex))?; 67 | if is_binary { 68 | out.push(c); 69 | } else { 70 | // workaround for backwards compat with Krakatau 1 71 | mutf8_codepoint(&mut out, c as u16); 72 | } 73 | chars = rest[3..].chars(); 74 | } 75 | _ => return Err(("Illegal string escape", rest)), 76 | } 77 | } else { 78 | mutf8_char(&mut out, c); 79 | } 80 | } 81 | 82 | Ok(out) 83 | } 84 | -------------------------------------------------------------------------------- /src/lib/assemble/tokenize.rs: -------------------------------------------------------------------------------- 1 | use lazy_static::lazy_static; 2 | use regex::Regex; 3 | use regex::RegexSet; 4 | 5 | use super::span::Error; 6 | use super::span::Span; 7 | 8 | #[derive(Debug, Clone, Copy, PartialEq, Eq)] 9 | pub enum TokenType { 10 | Newlines, 11 | Colon, 12 | Equals, 13 | Directive, 14 | Word, 15 | Ref, 16 | BsRef, 17 | LabelDef, 18 | StringLit, 19 | IntLit, 20 | LongLit, 21 | FloatLit, 22 | DoubleLit, 23 | } 24 | 25 | #[derive(Debug, Clone, Copy)] 26 | pub struct Token<'a>(pub TokenType, pub Span<'a>); 27 | 28 | pub fn tokenize(source: &str) -> Result, Error> { 29 | use TokenType::*; 30 | 31 | static SET_PATTERNS: &[&str] = &[ 32 | r"\A(?:;.*)?\s+", 33 | // COLON 34 | r"\A:($|\s)", 35 | // EQUALS 36 | r"\A=($|\s)", 37 | // DIRECTIVE 38 | r"\A\.[a-z]+($|\s)", 39 | // WORD 40 | r"\A(?-u)(?:[a-zA-Z_$\(<]|\[[A-Z\[])[\w$;/\[\(\)<>*+-]*($|\s)", 41 | // REF 42 | r"\A\[[a-z0-9_]+\]($|\s)", 43 | r"\A\[bs:[a-z0-9_]+\]($|\s)", 44 | // LABEL_DEF 45 | r"\AL\w+:($|\s)", 46 | // STRING_LITERAL 47 | r#"\Ab?"[^"\n\\]*(?:\\.[^"\n\\]*)*"($|\s)"#, 48 | r#"\Ab?'[^'\n\\]*(?:\\.[^'\n\\]*)*'($|\s)"#, 49 | // INT_LITERAL 50 | r#"\A[+-]?(?:0x[0-9a-fA-F]+|[1-9][0-9]*|0)L?($|\s)"#, 51 | // FLOAT_LITERAL 52 | r#"\A[+-]Infinityf?($|\s)"#, 53 | r#"\A[+-]NaN(?:<0x[0-9a-fA-F]+>)?f?($|\s)"#, 54 | r#"\A(?-u)[+-]?\d+\.\d+(?:e[+-]?\d+)?f?($|\s)"#, // decimal float 55 | r#"\A(?-u)[+-]?\d+(?:e[+-]?\d+)f?($|\s)"#, // decimal float without fraction (exponent mandatory) 56 | r#"\A(?-u)[+-]?0x[0-9a-fA-F]+(?:\.[0-9a-fA-F]+)?(?:p[+-]?\d+)f?($|\s)"#, // hex float 57 | ]; 58 | 59 | lazy_static! { 60 | static ref RE_SET: RegexSet = RegexSet::new(SET_PATTERNS).unwrap(); 61 | static ref RE_VEC: Vec = SET_PATTERNS.iter().map(|pat| Regex::new(pat).unwrap()).collect(); 62 | } 63 | 64 | let error1 = |msg, tok| Err(Error::new(source, vec![(msg, tok)])); 65 | let error2 = |msg, tok, msg2, tok2| Err(Error::new(source, vec![(msg, tok), (msg2, tok2)])); 66 | 67 | let mut tokens = Vec::new(); 68 | let mut s = source.trim_end(); 69 | let mut has_newline = true; 70 | 71 | while s.len() > 0 { 72 | let matches: Vec<_> = RE_SET.matches(s).iter().collect(); 73 | 74 | // Invalid token 75 | if matches.len() == 0 { 76 | const SUFFIX_LEN: usize = r"($|\s)".len(); 77 | let trimmed_res: Vec<_> = SET_PATTERNS[1..] 78 | .iter() 79 | .map(|p| Regex::new(&p[..p.len() - SUFFIX_LEN]).unwrap()) 80 | .collect(); 81 | 82 | let best = trimmed_res.into_iter().filter_map(|re| re.find(s)).max_by_key(|m| m.end()); 83 | if let Some(best) = best { 84 | let size = best.end(); 85 | let tok = Span(&s[..size]); 86 | let tok2 = Span(&s[size..size]); 87 | return error2("Error: Invalid token", tok, "Hint: Try adding a space here.", tok2); 88 | } else if s.starts_with('"') || s.starts_with("'") { 89 | let tok = Span(&s[..1]); 90 | return error1("Error: Unclosed string literal", tok); 91 | } else { 92 | let tok = Span(s.split_whitespace().next().unwrap()); 93 | return error1("Error: Invalid token", tok); 94 | } 95 | } 96 | 97 | assert!(matches.len() == 1); 98 | let m_i = matches[0]; 99 | let m = RE_VEC[m_i].find(s).unwrap(); 100 | assert!(m.start() == 0); 101 | 102 | let (tok, rest) = s.split_at(m.end()); 103 | 104 | if m_i == 0 { 105 | // whitespace 106 | if !has_newline && tok.contains('\n') { 107 | tokens.push(Token(Newlines, Span(tok))); 108 | has_newline = true; 109 | } 110 | } else { 111 | has_newline = tok.ends_with('\n'); 112 | let tok = tok.trim_end(); 113 | 114 | let ty = match m_i { 115 | 1 => Colon, 116 | 2 => Equals, 117 | 3 => Directive, 118 | 4 => Word, 119 | 5 => Ref, 120 | 6 => BsRef, 121 | 7 => LabelDef, 122 | 8..=9 => StringLit, 123 | 10 => { 124 | if tok.ends_with('L') { 125 | LongLit 126 | } else { 127 | IntLit 128 | } 129 | } 130 | 11..=15 => { 131 | if tok.ends_with('f') { 132 | FloatLit 133 | } else { 134 | DoubleLit 135 | } 136 | } 137 | _ => panic!("Internal error, please report this"), 138 | }; 139 | 140 | tokens.push(Token(ty, Span(tok))); 141 | if has_newline { 142 | tokens.push(Token(Newlines, Span(&s[tok.len()..tok.len() + 1]))); 143 | } 144 | } 145 | s = rest; 146 | } 147 | if !has_newline { 148 | tokens.push(Token(Newlines, Span(s))); 149 | } 150 | 151 | Ok(tokens) 152 | } 153 | -------------------------------------------------------------------------------- /src/lib/assemble/writer.rs: -------------------------------------------------------------------------------- 1 | use std::fmt::Debug; 2 | use std::sync::atomic::AtomicBool; 3 | 4 | use super::cpool::types; 5 | use super::cpool::Or; 6 | use crate::lib::assemble::span::Span; 7 | use crate::lib::assemble::span::Spanned; 8 | 9 | #[derive(Debug)] 10 | pub struct Placeholder(usize); 11 | impl Placeholder { 12 | fn new(off: usize) -> Self { 13 | Self(off) 14 | } 15 | 16 | fn increment(mut self, off: usize) -> Self { 17 | self.0 += off; 18 | self 19 | } 20 | 21 | fn into_range(self) -> std::ops::Range { 22 | let off = self.0; 23 | std::mem::forget(self); // avoid drop check 24 | off..(off + N) 25 | } 26 | } 27 | // temporary debug check to make sure placeholders are all used 28 | pub static UNUSED_PH: AtomicBool = AtomicBool::new(false); 29 | impl Drop for Placeholder { 30 | fn drop(&mut self) { 31 | UNUSED_PH.store(true, std::sync::atomic::Ordering::Relaxed); 32 | } 33 | } 34 | fn assert_zero(buf: &mut [u8]) -> &mut [u8] { 35 | assert!(buf.into_iter().all(|b| *b == 0)); 36 | buf 37 | } 38 | 39 | #[derive(Default)] 40 | pub struct BufWriter { 41 | buf: Vec, 42 | } 43 | impl BufWriter { 44 | pub fn into_buf(self) -> Vec { 45 | self.buf 46 | } 47 | 48 | pub fn len(&self) -> usize { 49 | self.buf.len() 50 | } 51 | 52 | // pub fn len(&self) -> usize {self.buf.len()} 53 | pub fn extend(&mut self, v: &BufWriter) { 54 | self.buf.extend_from_slice(&v.buf) 55 | } 56 | pub fn write(&mut self, v: &[u8]) { 57 | self.buf.extend_from_slice(v) 58 | } 59 | pub fn u8(&mut self, v: u8) { 60 | self.buf.push(v) 61 | } 62 | pub fn u16(&mut self, v: u16) { 63 | self.write(&v.to_be_bytes()) 64 | } 65 | pub fn u32(&mut self, v: u32) { 66 | self.write(&v.to_be_bytes()) 67 | } 68 | pub fn u64(&mut self, v: u64) { 69 | self.write(&v.to_be_bytes()) 70 | } 71 | /////////////////////////////////////////////////////////////////////////// 72 | pub fn ph(&mut self) -> Placeholder<2> { 73 | let i = self.buf.len(); 74 | self.u16(0); 75 | Placeholder::new(i) 76 | } 77 | 78 | pub fn ph8(&mut self) -> Placeholder<1> { 79 | let i = self.buf.len(); 80 | self.u8(0); 81 | Placeholder::new(i) 82 | } 83 | 84 | pub fn ph32(&mut self) -> Placeholder<4> { 85 | let i = self.buf.len(); 86 | self.u32(0); 87 | Placeholder::new(i) 88 | } 89 | 90 | pub fn fill(&mut self, ph: Placeholder<2>, v: u16) { 91 | assert_zero(&mut self.buf[ph.into_range()]).copy_from_slice(&v.to_be_bytes()); 92 | } 93 | 94 | pub fn fill8(&mut self, ph: Placeholder<1>, v: u8) { 95 | assert_zero(&mut self.buf[ph.into_range()]).copy_from_slice(&v.to_be_bytes()); 96 | } 97 | 98 | pub fn fill32(&mut self, ph: Placeholder<4>, v: u32) { 99 | assert_zero(&mut self.buf[ph.into_range()]).copy_from_slice(&v.to_be_bytes()); 100 | } 101 | 102 | /////////////////////////////////////////////////////////////////////////// 103 | pub fn read_u16(&self, ind: usize) -> u16 { 104 | u16::from_be_bytes(self.buf[ind..ind + 2].try_into().unwrap()) 105 | } 106 | } 107 | impl Debug for BufWriter { 108 | fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { 109 | // self.buf.fmt(f) 110 | f.write_fmt(format_args!("{:02X?}", self.buf)) 111 | } 112 | } 113 | 114 | #[derive(Debug, Default)] 115 | pub struct Writer<'a> { 116 | w: BufWriter, 117 | ldc_refs: Vec<(Placeholder<1>, types::SymSpanConst<'a>, Span<'a>)>, 118 | refs: Vec<(Placeholder<2>, types::SymSpanConst<'a>)>, 119 | } 120 | impl<'a> Writer<'a> { 121 | pub fn cp(&mut self, r: Or, Spanned<'a, impl types::ToConst<'a, types::RefType<'a>>>>) { 122 | let ph = self.ph(); 123 | // self.refs.push((ph, r.map_b(types::ToConst::to_const))); 124 | self.refs.push((ph, r.map_b(|c| c.span.of(c.v.to_const())))); 125 | } 126 | 127 | pub fn cp_ldc(&mut self, r: types::SymSpanConst<'a>, ldc_span: Span<'a>) { 128 | let ph = self.ph8(); 129 | self.ldc_refs.push((ph, r, ldc_span)); 130 | } 131 | 132 | pub fn resolve_ldc_refs( 133 | &mut self, 134 | mut f: impl FnMut(types::SymSpanConst<'a>, Span<'a>) -> Result, 135 | ) -> Result<(), E> { 136 | for (ph, r, ldc_span) in self.ldc_refs.drain(..) { 137 | self.w.fill8(ph, f(r, ldc_span)?); 138 | } 139 | Ok(()) 140 | } 141 | 142 | pub fn resolve_refs(mut self, mut f: impl FnMut(types::SymSpanConst<'a>) -> Result) -> Result { 143 | assert!(self.ldc_refs.is_empty()); 144 | for (ph, r) in self.refs.drain(..) { 145 | self.w.fill(ph, f(r)?); 146 | } 147 | Ok(self.w) 148 | } 149 | 150 | pub fn extend_from_writer(&mut self, w: Writer<'a>) { 151 | let off = self.len(); 152 | self.buf.extend_from_slice(&w.buf); 153 | self.ldc_refs 154 | .extend(w.ldc_refs.into_iter().map(|(ph, cp, span)| (ph.increment(off), cp, span))); 155 | self.refs.extend(w.refs.into_iter().map(|(ph, cp)| (ph.increment(off), cp))); 156 | } 157 | } 158 | impl<'a> std::ops::Deref for Writer<'a> { 159 | type Target = BufWriter; 160 | 161 | fn deref(&self) -> &Self::Target { 162 | &self.w 163 | } 164 | } 165 | impl<'a> std::ops::DerefMut for Writer<'a> { 166 | fn deref_mut(&mut self) -> &mut Self::Target { 167 | &mut self.w 168 | } 169 | } 170 | -------------------------------------------------------------------------------- /src/lib/classfile/attrs.rs: -------------------------------------------------------------------------------- 1 | use super::code; 2 | use super::code::MaybePosSet; 3 | use super::code::Pos; 4 | use super::code::PosSet; 5 | use super::cpool::ConstPool; 6 | use super::reader::ParseError; 7 | use super::reader::Reader; 8 | use crate::lib::util::BStr; 9 | 10 | /////////////////////////////////////////////////////////////////////////////// 11 | #[derive(Debug)] 12 | pub struct BootstrapMethod { 13 | pub bsref: u16, 14 | pub args: Vec, 15 | } 16 | impl BootstrapMethod { 17 | fn new(r: &mut Reader) -> Result { 18 | Ok(Self { 19 | bsref: r.u16()?, 20 | args: r.parse_list(Reader::u16)?, 21 | }) 22 | } 23 | } 24 | /////////////////////////////////////////////////////////////////////////////// 25 | #[derive(Debug)] 26 | pub enum ElementValue { 27 | Anno(Annotation), 28 | Array(Vec), 29 | Enum(u16, u16), 30 | 31 | Class(u16), 32 | Str(u16), 33 | 34 | Byte(u16), 35 | Boolean(u16), 36 | Char(u16), 37 | Short(u16), 38 | Int(u16), 39 | Float(u16), 40 | Long(u16), 41 | Double(u16), 42 | } 43 | impl ElementValue { 44 | fn new(r: &mut Reader) -> Result { 45 | use ElementValue::*; 46 | Ok(match r.u8()? { 47 | 64 => Anno(Annotation::new(r)?), 48 | 66 => Byte(r.u16()?), 49 | 67 => Char(r.u16()?), 50 | 68 => Double(r.u16()?), 51 | 70 => Float(r.u16()?), 52 | 73 => Int(r.u16()?), 53 | 74 => Long(r.u16()?), 54 | 83 => Short(r.u16()?), 55 | 90 => Boolean(r.u16()?), 56 | 91 => Array(r.parse_list(ElementValue::new)?), 57 | 99 => Class(r.u16()?), 58 | 101 => Enum(r.u16()?, r.u16()?), 59 | 115 => Str(r.u16()?), 60 | _ => return ParseError::s("Invalid element value tag"), 61 | }) 62 | } 63 | } 64 | 65 | #[derive(Debug)] 66 | pub struct Annotation(pub u16, pub Vec<(u16, ElementValue)>); 67 | impl Annotation { 68 | fn new(r: &mut Reader) -> Result { 69 | let desc = r.u16()?; 70 | let vals = r.parse_list(|r| Ok((r.u16()?, ElementValue::new(r)?)))?; 71 | Ok(Self(desc, vals)) 72 | } 73 | } 74 | 75 | #[derive(Debug, Clone, Copy)] 76 | pub struct LocalVarTargetInfo { 77 | pub range: Option<(Pos, Pos)>, 78 | pub index: u16, 79 | } 80 | impl LocalVarTargetInfo { 81 | fn new(r: &mut Reader, pset: Option<&PosSet>) -> Result { 82 | let (start, length, index) = (r.u16()?, r.u16()?, r.u16()?); 83 | // WTF, Java? 84 | let range = if start == 0xFFFF && length == 0xFFFF { 85 | None 86 | } else { 87 | let start = pset.make(start)?; 88 | let end = pset.make_off(start, length)?; 89 | Some((start, end)) 90 | }; 91 | Ok(Self { range, index }) 92 | } 93 | } 94 | 95 | type TargetInfo = (u8, TargetInfoData); 96 | #[derive(Debug)] 97 | pub enum TargetInfoData { 98 | TypeParam(u8), 99 | Super(u16), 100 | TypeParamBound(u8, u8), 101 | Empty, 102 | FormalParam(u8), 103 | Throws(u16), 104 | LocalVar(Vec), 105 | Catch(u16), 106 | Offset(Pos), 107 | TypeArgument(Pos, u8), 108 | } 109 | impl TargetInfoData { 110 | fn new(r: &mut Reader, pset: Option<&PosSet>) -> Result { 111 | use TargetInfoData::*; 112 | let tag = r.u8()?; 113 | let body = match tag { 114 | 0x00 => TypeParam(r.u8()?), 115 | 0x01 => TypeParam(r.u8()?), 116 | 0x10 => Super(r.u16()?), 117 | 0x11 => TypeParamBound(r.u8()?, r.u8()?), 118 | 0x12 => TypeParamBound(r.u8()?, r.u8()?), 119 | 0x13 => Empty, 120 | 0x14 => Empty, 121 | 0x15 => Empty, 122 | 0x16 => FormalParam(r.u8()?), 123 | 0x17 => Throws(r.u16()?), 124 | 125 | 0x40 => LocalVar(r.parse_list(|r| LocalVarTargetInfo::new(r, pset))?), 126 | 0x41 => LocalVar(r.parse_list(|r| LocalVarTargetInfo::new(r, pset))?), 127 | 0x42 => Catch(r.u16()?), 128 | 0x43 => Offset(pset.make(r.u16()?)?), 129 | 0x44 => Offset(pset.make(r.u16()?)?), 130 | 0x45 => Offset(pset.make(r.u16()?)?), 131 | 0x46 => Offset(pset.make(r.u16()?)?), 132 | 0x47 => TypeArgument(pset.make(r.u16()?)?, r.u8()?), 133 | 0x48 => TypeArgument(pset.make(r.u16()?)?, r.u8()?), 134 | 0x49 => TypeArgument(pset.make(r.u16()?)?, r.u8()?), 135 | 0x4A => TypeArgument(pset.make(r.u16()?)?, r.u8()?), 136 | 0x4B => TypeArgument(pset.make(r.u16()?)?, r.u8()?), 137 | _ => return ParseError::s("Invalid target info tag"), 138 | }; 139 | Ok((tag, body)) 140 | } 141 | } 142 | 143 | #[derive(Debug)] 144 | pub struct ParameterAnnotation(pub Vec); 145 | impl ParameterAnnotation { 146 | fn new(r: &mut Reader) -> Result { 147 | Ok(Self(r.parse_list(Annotation::new)?)) 148 | } 149 | } 150 | 151 | #[derive(Debug)] 152 | pub struct TypeAnnotation { 153 | pub info: TargetInfo, 154 | pub path: Vec<(u8, u8)>, 155 | pub anno: Annotation, 156 | } 157 | impl TypeAnnotation { 158 | fn new(r: &mut Reader, pset: Option<&PosSet>) -> Result { 159 | let info = TargetInfoData::new(r, pset)?; 160 | let path = r.parse_list_bytelen(|r| Ok((r.u8()?, r.u8()?)))?; 161 | let anno = Annotation::new(r)?; 162 | Ok(Self { info, path, anno }) 163 | } 164 | } 165 | 166 | /////////////////////////////////////////////////////////////////////////////// 167 | #[derive(Debug)] 168 | pub struct RecordComponent<'a> { 169 | pub name: u16, 170 | pub desc: u16, 171 | pub attrs: Vec>, 172 | } 173 | impl<'a> RecordComponent<'a> { 174 | fn new(r: &mut Reader<'a>, cp: &ConstPool<'a>, pset: Option<&PosSet>) -> Result { 175 | let name = r.u16()?; 176 | let desc = r.u16()?; 177 | let attrs = Attribute::new_list(r, cp, pset, code::CodeOptions::default())?; 178 | Ok(Self { name, desc, attrs }) 179 | } 180 | } 181 | 182 | /////////////////////////////////////////////////////////////////////////////// 183 | #[derive(Debug)] 184 | pub struct Requires { 185 | pub module: u16, 186 | pub flags: u16, 187 | pub version: u16, 188 | } 189 | impl Requires { 190 | fn new(r: &mut Reader) -> Result { 191 | Ok(Self { 192 | module: r.u16()?, 193 | flags: r.u16()?, 194 | version: r.u16()?, 195 | }) 196 | } 197 | } 198 | 199 | #[derive(Debug)] 200 | pub struct ModPackage { 201 | pub package: u16, 202 | pub flags: u16, 203 | pub modules: Vec, 204 | } 205 | impl ModPackage { 206 | fn new(r: &mut Reader) -> Result { 207 | Ok(Self { 208 | package: r.u16()?, 209 | flags: r.u16()?, 210 | modules: r.parse_list(Reader::u16)?, 211 | }) 212 | } 213 | } 214 | 215 | #[derive(Debug)] 216 | pub struct Provides { 217 | pub cls: u16, 218 | pub provides_with: Vec, 219 | } 220 | impl Provides { 221 | fn new(r: &mut Reader) -> Result { 222 | Ok(Self { 223 | cls: r.u16()?, 224 | provides_with: r.parse_list(Reader::u16)?, 225 | }) 226 | } 227 | } 228 | 229 | #[derive(Debug)] 230 | pub struct ModuleAttr { 231 | pub module: u16, 232 | pub flags: u16, 233 | pub version: u16, 234 | pub requires: Vec, 235 | pub exports: Vec, 236 | pub opens: Vec, 237 | pub uses: Vec, 238 | pub provides: Vec, 239 | } 240 | impl ModuleAttr { 241 | fn new(r: &mut Reader) -> Result { 242 | Ok(Self { 243 | module: r.u16()?, 244 | flags: r.u16()?, 245 | version: r.u16()?, 246 | requires: r.parse_list(Requires::new)?, 247 | exports: r.parse_list(ModPackage::new)?, 248 | opens: r.parse_list(ModPackage::new)?, 249 | uses: r.parse_list(Reader::u16)?, 250 | provides: r.parse_list(Provides::new)?, 251 | }) 252 | } 253 | } 254 | 255 | #[derive(Debug, Clone, Copy)] 256 | pub struct LocalVarLine { 257 | pub start: Pos, 258 | pub end: Pos, 259 | pub name: u16, 260 | pub desc: u16, 261 | pub ind: u16, 262 | } 263 | impl LocalVarLine { 264 | fn new(r: &mut Reader, pset: Option<&PosSet>) -> Result { 265 | let start = pset.make(r.u16()?)?; 266 | let length = r.u16()?; 267 | let end = pset.make_off(start, length)?; 268 | 269 | Ok(Self { 270 | start, 271 | end, 272 | name: r.u16()?, 273 | desc: r.u16()?, 274 | ind: r.u16()?, 275 | }) 276 | } 277 | } 278 | /////////////////////////////////////////////////////////////////////////////// 279 | 280 | #[derive(Debug)] 281 | pub enum AttrBody<'a> { 282 | AnnotationDefault(Box), 283 | BootstrapMethods(Vec), 284 | Code((Box>, Option>>)), 285 | ConstantValue(u16), 286 | Deprecated, 287 | EnclosingMethod(u16, u16), 288 | Exceptions(Vec), 289 | InnerClasses(Vec<(u16, u16, u16, u16)>), 290 | LineNumberTable(Vec<(Pos, u16)>), 291 | LocalVariableTable(Vec), 292 | LocalVariableTypeTable(Vec), 293 | MethodParameters(Vec<(u16, u16)>), 294 | Module(Box), 295 | ModuleMainClass(u16), 296 | ModulePackages(Vec), 297 | NestHost(u16), 298 | NestMembers(Vec), 299 | PermittedSubclasses(Vec), 300 | Record(Vec>), 301 | 302 | RuntimeInvisibleAnnotations(Vec), 303 | RuntimeInvisibleParameterAnnotations(Vec), 304 | RuntimeInvisibleTypeAnnotations(Vec), 305 | RuntimeVisibleAnnotations(Vec), 306 | RuntimeVisibleParameterAnnotations(Vec), 307 | RuntimeVisibleTypeAnnotations(Vec), 308 | 309 | Signature(u16), 310 | SourceDebugExtension(&'a [u8]), 311 | SourceFile(u16), 312 | StackMapTable(code::StackMapTable), 313 | Synthetic, 314 | 315 | Raw(&'a [u8]), 316 | } 317 | impl<'a> AttrBody<'a> { 318 | pub fn new( 319 | name: &'a [u8], 320 | data: &'a [u8], 321 | cp: &ConstPool<'a>, 322 | pset: Option<&PosSet>, 323 | code_opts: code::CodeOptions, 324 | ) -> Self { 325 | Self::try_parse(name, data, cp, pset, code_opts).unwrap_or(Self::Raw(data)) 326 | // Self::try_parse(name, data, cp).unwrap() 327 | } 328 | 329 | fn try_parse( 330 | name: &'a [u8], 331 | data: &'a [u8], 332 | cp: &ConstPool<'a>, 333 | pset: Option<&PosSet>, 334 | code_opts: code::CodeOptions, 335 | ) -> Result { 336 | use AttrBody::*; 337 | let mut r = Reader(data); 338 | let r = &mut r; 339 | 340 | let parsed = match name { 341 | b"AnnotationDefault" => AnnotationDefault(Box::new(ElementValue::new(r)?)), 342 | b"BootstrapMethods" => BootstrapMethods(r.parse_list(BootstrapMethod::new)?), 343 | b"Code" => { 344 | let c = Code(code::Code::parse(r.clone(), cp, code_opts)?); 345 | r.0 = &[]; 346 | c 347 | } 348 | b"ConstantValue" => ConstantValue(r.u16()?), 349 | b"Deprecated" => Deprecated, 350 | b"EnclosingMethod" => EnclosingMethod(r.u16()?, r.u16()?), 351 | b"Exceptions" => Exceptions(r.parse_list(|r| Ok(r.u16()?))?), 352 | b"InnerClasses" => InnerClasses(r.parse_list(|r| Ok((r.u16()?, r.u16()?, r.u16()?, r.u16()?)))?), 353 | b"LineNumberTable" => LineNumberTable(r.parse_list(|r| Ok((pset.make(r.u16()?)?, r.u16()?)))?), 354 | b"LocalVariableTable" => LocalVariableTable(r.parse_list(|r| LocalVarLine::new(r, pset))?), 355 | b"LocalVariableTypeTable" => LocalVariableTypeTable(r.parse_list(|r| LocalVarLine::new(r, pset))?), 356 | b"MethodParameters" => MethodParameters(r.parse_list_bytelen(|r| Ok((r.u16()?, r.u16()?)))?), 357 | b"Module" => Module(Box::new(ModuleAttr::new(r)?)), 358 | b"ModuleMainClass" => ModuleMainClass(r.u16()?), 359 | b"ModulePackages" => ModulePackages(r.parse_list(|r| Ok(r.u16()?))?), 360 | b"NestHost" => NestHost(r.u16()?), 361 | b"NestMembers" => NestMembers(r.parse_list(|r| Ok(r.u16()?))?), 362 | b"PermittedSubclasses" => PermittedSubclasses(r.parse_list(|r| Ok(r.u16()?))?), 363 | b"Record" => Record(r.parse_list(|r| RecordComponent::new(r, cp, pset))?), 364 | 365 | b"RuntimeInvisibleAnnotations" => RuntimeInvisibleAnnotations(r.parse_list(Annotation::new)?), 366 | b"RuntimeInvisibleParameterAnnotations" => { 367 | RuntimeInvisibleParameterAnnotations(r.parse_list_bytelen(ParameterAnnotation::new)?) 368 | } 369 | b"RuntimeInvisibleTypeAnnotations" => { 370 | RuntimeInvisibleTypeAnnotations(r.parse_list(|r| TypeAnnotation::new(r, pset))?) 371 | } 372 | b"RuntimeVisibleAnnotations" => RuntimeVisibleAnnotations(r.parse_list(Annotation::new)?), 373 | b"RuntimeVisibleParameterAnnotations" => { 374 | RuntimeVisibleParameterAnnotations(r.parse_list_bytelen(ParameterAnnotation::new)?) 375 | } 376 | b"RuntimeVisibleTypeAnnotations" => { 377 | RuntimeVisibleTypeAnnotations(r.parse_list(|r| TypeAnnotation::new(r, pset))?) 378 | } 379 | 380 | b"Signature" => Signature(r.u16()?), 381 | b"SourceDebugExtension" => SourceDebugExtension(data), 382 | b"SourceFile" => SourceFile(r.u16()?), 383 | b"StackMapTable" => StackMapTable(code::StackMapTable::new(r, pset)?), 384 | b"Synthetic" => Synthetic, 385 | 386 | _ => Raw(data), 387 | }; 388 | // assert!(r.0.len() == 0); 389 | Ok(if r.0.len() > 0 { Raw(data) } else { parsed }) 390 | // Ok(parsed) 391 | } 392 | 393 | pub fn is_raw(&self) -> bool { 394 | matches!(self, AttrBody::Raw(_)) 395 | } 396 | } 397 | 398 | #[derive(Debug)] 399 | pub struct Attribute<'a> { 400 | pub name: u16, 401 | pub length: u32, 402 | pub actual_length: u32, 403 | pub name_utf: BStr<'a>, 404 | pub body: AttrBody<'a>, 405 | } 406 | impl<'a> Attribute<'a> { 407 | pub(super) fn new( 408 | r: &mut Reader<'a>, 409 | cp: &ConstPool<'a>, 410 | pset: Option<&PosSet>, 411 | allow_stackmap: bool, 412 | code_opts: code::CodeOptions, 413 | ) -> Result { 414 | let name_ind = r.u16()?; 415 | let length = r.u32()?; 416 | 417 | let name_utf = cp.utf8(name_ind).ok_or(ParseError("Attribute has invalid name index"))?; 418 | 419 | let actual_length = if name_utf == b"InnerClasses" { 420 | r.clone().u16()? as u32 * 8 + 2 421 | } else { 422 | length 423 | }; 424 | 425 | let data = r.get(actual_length as usize)?; 426 | let mut body = AttrBody::new(name_utf, data, cp, pset, code_opts); 427 | 428 | if !allow_stackmap { 429 | if let AttrBody::StackMapTable(..) = body { 430 | body = AttrBody::Raw(data); 431 | } 432 | } 433 | 434 | Ok(Self { 435 | name: name_ind, 436 | length, 437 | actual_length, 438 | name_utf: BStr(name_utf), 439 | body, 440 | }) 441 | } 442 | 443 | pub(super) fn new_list( 444 | r: &mut Reader<'a>, 445 | cp: &ConstPool<'a>, 446 | pset: Option<&PosSet>, 447 | code_opts: code::CodeOptions, 448 | ) -> Result, ParseError> { 449 | let mut allow_stackmap = true; 450 | r.parse_list(|r| { 451 | let attr = Attribute::new(r, cp, pset, allow_stackmap, code_opts)?; 452 | if let AttrBody::StackMapTable(..) = attr.body { 453 | allow_stackmap = false; 454 | } 455 | Ok(attr) 456 | }) 457 | } 458 | 459 | pub(super) fn has_ambiguous_short_code(&self) -> bool { 460 | if let AttrBody::Code((_, Some(_))) = self.body { 461 | true 462 | } else { 463 | false 464 | } 465 | } 466 | } 467 | -------------------------------------------------------------------------------- /src/lib/classfile/cpool.rs: -------------------------------------------------------------------------------- 1 | use super::reader::ParseError; 2 | use super::reader::Reader; 3 | use crate::lib::util::BStr; 4 | 5 | #[derive(Debug)] 6 | pub enum Const<'a> { 7 | Null, // 0 unused 8 | Utf8(BStr<'a>), 9 | // 2 unused 10 | Int(u32), 11 | Float(u32), 12 | Long(u64), 13 | Double(u64), 14 | Class(u16), 15 | Str(u16), 16 | Field(u16, u16), 17 | Method(u16, u16), 18 | InterfaceMethod(u16, u16), 19 | NameAndType(u16, u16), 20 | // 13 unused 21 | // 14 unused 22 | MethodHandle(u8, u16), 23 | MethodType(u16), 24 | Dynamic(u16, u16), 25 | InvokeDynamic(u16, u16), 26 | Module(u16), 27 | Package(u16), 28 | } 29 | impl<'a> Const<'a> { 30 | fn read(r: &mut Reader<'a>) -> Result<(Self, bool), ParseError> { 31 | use Const::*; 32 | let tag = r.u8()?; 33 | Ok(( 34 | match tag { 35 | 1 => { 36 | let count = r.u16()?; 37 | Utf8(BStr(r.get(count as usize)?)) 38 | } 39 | 3 => Int(r.u32()?), 40 | 4 => Float(r.u32()?), 41 | 5 => Long(r.u64()?), 42 | 6 => Double(r.u64()?), 43 | 7 => Class(r.u16()?), 44 | 8 => Str(r.u16()?), 45 | 9 => Field(r.u16()?, r.u16()?), 46 | 10 => Method(r.u16()?, r.u16()?), 47 | 11 => InterfaceMethod(r.u16()?, r.u16()?), 48 | 12 => NameAndType(r.u16()?, r.u16()?), 49 | 15 => MethodHandle(r.u8()?, r.u16()?), 50 | 16 => MethodType(r.u16()?), 51 | 17 => Dynamic(r.u16()?, r.u16()?), 52 | 18 => InvokeDynamic(r.u16()?, r.u16()?), 53 | 19 => Module(r.u16()?), 54 | 20 => Package(r.u16()?), 55 | _ => return ParseError::s("Unrecognized constant pool tag"), 56 | }, 57 | tag == 5 || tag == 6, 58 | )) 59 | } 60 | } 61 | 62 | #[derive(Debug)] 63 | pub struct ConstPool<'a>(pub Vec>); 64 | impl<'a> ConstPool<'a> { 65 | pub(super) fn new(r: &mut Reader<'a>) -> Result { 66 | let count = r.u16()? as usize; 67 | let mut cp = Vec::with_capacity(count); 68 | cp.push(Const::Null); 69 | while cp.len() < count { 70 | let (entry, extra) = Const::read(r)?; 71 | // println!("const[{}] = {:?}", cp.len(), entry); 72 | cp.push(entry); 73 | if extra { 74 | cp.push(Const::Null) 75 | } 76 | } 77 | Ok(Self(cp)) 78 | } 79 | 80 | pub fn utf8(&self, i: u16) -> Option<&'a [u8]> { 81 | self.0 82 | .get(i as usize) 83 | .and_then(|c| if let Const::Utf8(s) = c { Some(s.0) } else { None }) 84 | } 85 | 86 | pub fn clsutf(&self, i: u16) -> Option<&'a [u8]> { 87 | self.0.get(i as usize).and_then(|c| { 88 | if let Const::Class(utf_ind) = c { 89 | self.utf8(*utf_ind) 90 | } else { 91 | None 92 | } 93 | }) 94 | } 95 | } 96 | -------------------------------------------------------------------------------- /src/lib/classfile/mod.rs: -------------------------------------------------------------------------------- 1 | pub mod attrs; 2 | pub mod code; 3 | pub mod cpool; 4 | pub mod parse; 5 | pub mod reader; 6 | 7 | pub use parse::parse; 8 | pub use parse::ParserOptions; 9 | pub use reader::ParseError; 10 | -------------------------------------------------------------------------------- /src/lib/classfile/parse.rs: -------------------------------------------------------------------------------- 1 | use super::attrs::Attribute; 2 | use super::code::CodeOptions; 3 | use super::cpool::ConstPool; 4 | use super::reader::ParseError; 5 | use super::reader::Reader; 6 | 7 | #[derive(Debug, Clone, Copy)] 8 | pub struct ParserOptions { 9 | pub no_short_code_attr: bool, 10 | } 11 | 12 | #[derive(Debug)] 13 | pub struct Field<'a> { 14 | pub access: u16, 15 | pub name: u16, 16 | pub desc: u16, 17 | pub attrs: Vec>, 18 | } 19 | impl<'a> Field<'a> { 20 | fn new(r: &mut Reader<'a>, cp: &ConstPool<'a>, code_opts: CodeOptions) -> Result { 21 | let access = r.u16()?; 22 | let name = r.u16()?; 23 | let desc = r.u16()?; 24 | let attrs = Attribute::new_list(r, cp, None, code_opts)?; 25 | 26 | Ok(Self { 27 | access, 28 | name, 29 | desc, 30 | attrs, 31 | }) 32 | } 33 | } 34 | 35 | #[derive(Debug)] 36 | pub struct Class<'a> { 37 | pub version: (u16, u16), 38 | pub cp: ConstPool<'a>, 39 | pub access: u16, 40 | pub this: u16, 41 | pub super_: u16, 42 | 43 | pub interfaces: Vec, 44 | pub fields: Vec>, 45 | pub methods: Vec>, 46 | pub attrs: Vec>, 47 | 48 | pub has_ambiguous_short_code: bool, 49 | } 50 | impl<'a> Class<'a> { 51 | fn new(r: &mut Reader<'a>, opts: ParserOptions) -> Result { 52 | if r.u32()? != 0xCAFEBABE { 53 | return ParseError::s("Classfile does not start with magic bytes. Are you sure you passed in a classfile?"); 54 | } 55 | 56 | let minor = r.u16()?; 57 | let major = r.u16()?; 58 | let version = (major, minor); 59 | 60 | let code_opts = CodeOptions { 61 | allow_short: version <= (45, 2) && !opts.no_short_code_attr, 62 | }; 63 | 64 | let cp = ConstPool::new(r)?; 65 | 66 | let access = r.u16()?; 67 | let this = r.u16()?; 68 | let super_ = r.u16()?; 69 | 70 | let interfaces = r.parse_list(|r| r.u16())?; 71 | let fields = r.parse_list(|r| Field::new(r, &cp, code_opts))?; 72 | let methods = r.parse_list(|r| Field::new(r, &cp, code_opts))?; 73 | let attrs = Attribute::new_list(r, &cp, None, code_opts)?; 74 | 75 | let has_ambiguous_short_code = code_opts.allow_short 76 | && methods.len() > 0 77 | && methods 78 | .iter() 79 | .all(|m| m.attrs.iter().all(Attribute::has_ambiguous_short_code)); 80 | 81 | if r.0.len() > 0 { 82 | return ParseError::s("Extra data at end of classfile"); 83 | } 84 | 85 | Ok(Class { 86 | version, 87 | cp, 88 | access, 89 | this, 90 | super_, 91 | interfaces, 92 | fields, 93 | methods, 94 | attrs, 95 | has_ambiguous_short_code, 96 | }) 97 | } 98 | } 99 | 100 | pub fn parse(data: &[u8], opts: ParserOptions) -> Result { 101 | let mut r = Reader(data); 102 | Class::new(&mut r, opts) 103 | } 104 | -------------------------------------------------------------------------------- /src/lib/classfile/reader.rs: -------------------------------------------------------------------------------- 1 | #[derive(Debug)] 2 | pub struct ParseError(pub &'static str); 3 | impl ParseError { 4 | pub fn s(s: &'static str) -> Result { 5 | Err(ParseError(s)) 6 | } 7 | } 8 | 9 | #[derive(Debug, Clone)] 10 | pub(super) struct Reader<'a>(pub(super) &'a [u8]); 11 | impl<'a> Reader<'a> { 12 | pub(super) fn get(&mut self, n: usize) -> Result<&'a [u8], ParseError> { 13 | if n > self.0.len() { 14 | return ParseError::s("end of data"); 15 | } 16 | 17 | let (first, rest) = self.0.split_at(n); 18 | self.0 = rest; 19 | Ok(first) 20 | } 21 | 22 | pub(super) fn u8(&mut self) -> Result { 23 | Ok(self.get(1)?[0]) 24 | } 25 | pub(super) fn u16(&mut self) -> Result { 26 | Ok(u16::from_be_bytes(self.get(2)?.try_into().unwrap())) 27 | } 28 | pub(super) fn u32(&mut self) -> Result { 29 | Ok(u32::from_be_bytes(self.get(4)?.try_into().unwrap())) 30 | } 31 | pub(super) fn u64(&mut self) -> Result { 32 | Ok(u64::from_be_bytes(self.get(8)?.try_into().unwrap())) 33 | } 34 | 35 | pub(super) fn i8(&mut self) -> Result { 36 | Ok(self.u8()? as i8) 37 | } 38 | pub(super) fn i16(&mut self) -> Result { 39 | Ok(self.u16()? as i16) 40 | } 41 | pub(super) fn i32(&mut self) -> Result { 42 | Ok(self.u32()? as i32) 43 | } 44 | 45 | pub(super) fn parse_list( 46 | &mut self, 47 | mut cb: impl FnMut(&mut Self) -> Result, 48 | ) -> Result, ParseError> { 49 | let count = self.u16()? as usize; 50 | let mut vals = Vec::with_capacity(count); 51 | for _ in 0..count { 52 | vals.push(cb(self)?); 53 | } 54 | Ok(vals) 55 | } 56 | pub(super) fn parse_list_bytelen( 57 | &mut self, 58 | mut cb: impl FnMut(&mut Self) -> Result, 59 | ) -> Result, ParseError> { 60 | let count = self.u8()? as usize; 61 | let mut vals = Vec::with_capacity(count); 62 | for _ in 0..count { 63 | vals.push(cb(self)?); 64 | } 65 | Ok(vals) 66 | } 67 | } 68 | -------------------------------------------------------------------------------- /src/lib/disassemble/flags.rs: -------------------------------------------------------------------------------- 1 | use std::fmt; 2 | static CLASS: [&str; 16] = [ 3 | "public", 4 | "private", 5 | "protected", 6 | "static", 7 | "final", 8 | "super", 9 | "volatile", 10 | "transient", 11 | "native", 12 | "interface", 13 | "abstract", 14 | "strict", 15 | "synthetic", 16 | "annotation", 17 | "enum", 18 | "module", 19 | ]; 20 | static FIELD: [&str; 16] = [ 21 | "public", 22 | "private", 23 | "protected", 24 | "static", 25 | "final", 26 | "super", 27 | "volatile", 28 | "transient", 29 | "native", 30 | "interface", 31 | "abstract", 32 | "strict", 33 | "synthetic", 34 | "annotation", 35 | "enum", 36 | "module", 37 | ]; 38 | static METHOD: [&str; 16] = [ 39 | "public", 40 | "private", 41 | "protected", 42 | "static", 43 | "final", 44 | "synchronized", 45 | "bridge", 46 | "varargs", 47 | "native", 48 | "interface", 49 | "abstract", 50 | "strict", 51 | "synthetic", 52 | "annotation", 53 | "enum", 54 | "module", 55 | ]; 56 | static MOD_REQUIRES: [&str; 16] = [ 57 | "public", 58 | "private", 59 | "protected", 60 | "static", 61 | "final", 62 | "transitive", 63 | "static_phase", 64 | "transient", 65 | "native", 66 | "interface", 67 | "abstract", 68 | "strict", 69 | "synthetic", 70 | "annotation", 71 | "enum", 72 | "mandated", 73 | ]; 74 | static MOD_OTHER: [&str; 16] = [ 75 | "public", 76 | "private", 77 | "protected", 78 | "static", 79 | "final", 80 | "open", 81 | "volatile", 82 | "transient", 83 | "native", 84 | "interface", 85 | "abstract", 86 | "strict", 87 | "synthetic", 88 | "annotation", 89 | "enum", 90 | "mandated", 91 | ]; 92 | pub static ALL_FLAGS: [&str; 24] = [ 93 | "abstract", 94 | "annotation", 95 | "bridge", 96 | "enum", 97 | "final", 98 | "interface", 99 | "mandated", 100 | "module", 101 | "native", 102 | "open", 103 | "private", 104 | "protected", 105 | "public", 106 | "static", 107 | "static_phase", 108 | "strict", 109 | "strictfp", 110 | "super", 111 | "synchronized", 112 | "synthetic", 113 | "transient", 114 | "transitive", 115 | "varargs", 116 | "volatile", 117 | ]; 118 | 119 | pub(super) struct Flags(&'static [&'static str; 16], u16); 120 | impl fmt::Display for Flags { 121 | fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { 122 | for i in 0..16 { 123 | if self.1 & (1 << i) != 0 { 124 | f.write_str(" ")?; 125 | f.write_str(self.0[i])?; 126 | } 127 | } 128 | Ok(()) 129 | } 130 | } 131 | impl Flags { 132 | pub(super) fn class(v: u16) -> Flags { 133 | Flags(&CLASS, v) 134 | } 135 | pub(super) fn field(v: u16) -> Flags { 136 | Flags(&FIELD, v) 137 | } 138 | pub(super) fn method(v: u16) -> Flags { 139 | Flags(&METHOD, v) 140 | } 141 | pub(super) fn mod_requires(v: u16) -> Flags { 142 | Flags(&MOD_REQUIRES, v) 143 | } 144 | pub(super) fn mod_other(v: u16) -> Flags { 145 | Flags(&MOD_OTHER, v) 146 | } 147 | } 148 | -------------------------------------------------------------------------------- /src/lib/disassemble/mod.rs: -------------------------------------------------------------------------------- 1 | mod disassembler; 2 | mod flags; 3 | mod refprinter; 4 | pub mod string; 5 | 6 | pub use disassembler::disassemble; 7 | pub use disassembler::DisassemblerOptions; 8 | -------------------------------------------------------------------------------- /src/lib/disassemble/refprinter.rs: -------------------------------------------------------------------------------- 1 | use std::borrow::Cow; 2 | use std::cell::Cell; 3 | use std::fmt; 4 | use std::fmt::Display; 5 | 6 | use super::string::escape; 7 | use super::string::StrLitType; 8 | use crate::lib::classfile::attrs::BootstrapMethod; 9 | use crate::lib::classfile::cpool::Const; 10 | use crate::lib::classfile::cpool::ConstPool; 11 | use crate::lib::mhtags::MHTAGS; 12 | 13 | struct UtfData<'a> { 14 | stype: StrLitType, 15 | s: Cow<'a, str>, 16 | use_count: Cell, 17 | } 18 | impl<'a> UtfData<'a> { 19 | fn to_lit(&'a self) -> StringLit<'a> { 20 | let s = self.s.as_ref(); 21 | StringLit { stype: self.stype, s } 22 | } 23 | 24 | fn ident(&'a self) -> Option> { 25 | let s = self.s.as_ref(); 26 | if s.len() < 50 { 27 | Some(self.to_lit()) 28 | } else if s.len() < 300 && self.use_count.get() < 10 { 29 | self.use_count.set(self.use_count.get() + 1); 30 | Some(self.to_lit()) 31 | } else { 32 | None 33 | } 34 | } 35 | } 36 | 37 | pub(super) struct StringLit<'a> { 38 | stype: StrLitType, 39 | s: &'a str, 40 | } 41 | impl Display for StringLit<'_> { 42 | fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { 43 | use StrLitType::*; 44 | match self.stype { 45 | Unquoted => f.write_str(self.s), 46 | Regular => write!(f, "\"{}\"", self.s), 47 | Binary => write!(f, "b\"{}\"", self.s), 48 | } 49 | } 50 | } 51 | 52 | pub(super) enum RefOrString<'a> { 53 | Raw(u16), 54 | Sym(u16), 55 | RawBs(u16), 56 | Str(StringLit<'a>), 57 | } 58 | use RefOrString::*; 59 | impl Display for RefOrString<'_> { 60 | fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { 61 | use RefOrString::*; 62 | match self { 63 | Raw(ind) => write!(f, "[{}]", ind), 64 | Sym(ind) => write!(f, "[_{}]", ind), 65 | RawBs(ind) => write!(f, "[bs:{}]", ind), 66 | Str(sl) => sl.fmt(f), 67 | } 68 | } 69 | } 70 | 71 | #[derive(PartialEq, Eq, Clone, Copy, Debug)] 72 | pub enum SingleTag { 73 | Class, 74 | String, 75 | MethodType, 76 | Module, 77 | Package, 78 | } 79 | 80 | #[derive(PartialEq, Eq, Clone, Copy, Debug)] 81 | enum PrimTag { 82 | Int, 83 | Long, 84 | Float, 85 | Double, 86 | } 87 | 88 | #[derive(PartialEq, Eq, Clone, Copy, Debug)] 89 | enum FmimTag { 90 | Field, 91 | Method, 92 | InterfaceMethod, 93 | } 94 | 95 | #[derive(PartialEq, Eq, Clone, Copy, Debug)] 96 | enum DynTag { 97 | Dynamic, 98 | InvokeDynamic, 99 | } 100 | 101 | enum ConstData<'a> { 102 | Invalid, 103 | Utf8(UtfData<'a>), 104 | Prim(PrimTag, String), 105 | Single(SingleTag, u16), 106 | Fmim(FmimTag, u16, u16), 107 | Nat(u16, u16), 108 | 109 | MethodHandle(u8, u16), 110 | Dyn(DynTag, u16, u16), 111 | } 112 | impl<'a> ConstData<'a> { 113 | fn new(roundtrip: bool, c: &Const<'a>) -> Self { 114 | use Const::*; 115 | match c { 116 | Null => ConstData::Invalid, 117 | Utf8(s) => { 118 | let (stype, s) = escape(s.0); 119 | ConstData::Utf8(UtfData { 120 | stype, 121 | s, 122 | use_count: Cell::new(0), 123 | }) 124 | } 125 | 126 | Int(v) => ConstData::Prim(PrimTag::Int, format!("{}", *v as i32)), 127 | Long(v) => ConstData::Prim(PrimTag::Long, format!("{}L", *v as i64)), 128 | Float(v) => ConstData::Prim(PrimTag::Float, { 129 | let f = f32::from_bits(*v); 130 | if f.is_nan() { 131 | if roundtrip { 132 | format!("+NaN<0x{:08X}>f", *v) 133 | } else { 134 | format!("+NaNf") 135 | } 136 | } else if f.is_infinite() { 137 | if f > 0.0 { "+Infinityf" } else { "-Infinityf" }.to_string() 138 | } else { 139 | format!("{:e}f", f) 140 | } 141 | }), 142 | Double(v) => ConstData::Prim(PrimTag::Double, { 143 | let f = f64::from_bits(*v); 144 | if f.is_nan() { 145 | if roundtrip { 146 | format!("+NaN<0x{:016X}>", *v) 147 | } else { 148 | format!("+NaN") 149 | } 150 | } else if f.is_infinite() { 151 | if f > 0.0 { "+Infinity" } else { "-Infinity" }.to_string() 152 | } else { 153 | format!("{:e}", f) 154 | } 155 | }), 156 | 157 | Class(v) => ConstData::Single(SingleTag::Class, *v), 158 | Str(v) => ConstData::Single(SingleTag::String, *v), 159 | MethodType(v) => ConstData::Single(SingleTag::MethodType, *v), 160 | Module(v) => ConstData::Single(SingleTag::Module, *v), 161 | Package(v) => ConstData::Single(SingleTag::Package, *v), 162 | 163 | Field(c, nat) => ConstData::Fmim(FmimTag::Field, *c, *nat), 164 | Method(c, nat) => ConstData::Fmim(FmimTag::Method, *c, *nat), 165 | InterfaceMethod(c, nat) => ConstData::Fmim(FmimTag::InterfaceMethod, *c, *nat), 166 | 167 | NameAndType(n, t) => ConstData::Nat(*n, *t), 168 | MethodHandle(tag, t) => ConstData::MethodHandle(*tag, *t), 169 | 170 | Dynamic(r1, r2) => ConstData::Dyn(DynTag::Dynamic, *r1, *r2), 171 | InvokeDynamic(r1, r2) => ConstData::Dyn(DynTag::InvokeDynamic, *r1, *r2), 172 | } 173 | } 174 | } 175 | 176 | struct ConstLine<'a> { 177 | data: ConstData<'a>, 178 | force_raw: bool, 179 | is_defined: Cell, // used during printing at the end 180 | sym_used: Cell, 181 | } 182 | impl<'a> ConstLine<'a> { 183 | fn new(roundtrip: bool, c: &Const<'a>) -> Self { 184 | Self { 185 | data: ConstData::new(roundtrip, c), 186 | force_raw: roundtrip, 187 | is_defined: Cell::new(false), 188 | sym_used: Cell::new(false), 189 | } 190 | } 191 | } 192 | 193 | pub(super) struct RefPrinter<'a> { 194 | roundtrip: bool, 195 | cpool: Vec>, 196 | bs: &'a [BootstrapMethod], 197 | } 198 | impl<'a> RefPrinter<'a> { 199 | pub(super) fn new( 200 | roundtrip: bool, 201 | cp: &ConstPool<'a>, 202 | bs: Option<&'a [BootstrapMethod]>, 203 | inner_classes: Option<&'a [(u16, u16, u16, u16)]>, 204 | ) -> Self { 205 | let mut new = Self { 206 | roundtrip, 207 | cpool: cp.0.iter().map(|c| ConstLine::new(roundtrip, c)).collect(), 208 | bs: bs.unwrap_or(&[]), 209 | }; 210 | 211 | // There is one case where exact references are significant due to a bug in old versions of the JVM. In the InnerClasses attribute, specifying the same index for inner and outer class will fail verification, but specifying different indexes which point to identical class entries will pass (at least in old versions of Java). In this case, we force references to those indexes to be raw, so they don't get merged and potentially break the class. 212 | for (inner, outer, _, _) in inner_classes.unwrap_or(&[]).iter().copied() { 213 | if inner == outer { 214 | continue; 215 | } 216 | 217 | if let Some(s1) = cp.clsutf(inner) { 218 | if let Some(s2) = cp.clsutf(outer) { 219 | if s1 == s2 { 220 | new.cpool[inner as usize].force_raw = true; 221 | new.cpool[outer as usize].force_raw = true; 222 | } 223 | } 224 | } 225 | } 226 | 227 | new 228 | } 229 | 230 | fn get(&self, ind: u16) -> Option<&ConstData<'a>> { 231 | if let Some(ConstLine { 232 | data, force_raw: false, .. 233 | }) = self.cpool.get(ind as usize) 234 | { 235 | Some(data) 236 | } else { 237 | None 238 | } 239 | } 240 | 241 | fn ident(&self, ind: u16) -> Option { 242 | if let Some(ConstData::Utf8(d)) = self.get(ind) { 243 | d.ident() 244 | } else { 245 | None 246 | } 247 | } 248 | 249 | fn symref(&self, ind: u16) -> RefOrString { 250 | self.cpool[ind as usize].sym_used.set(true); 251 | Sym(ind) 252 | } 253 | 254 | pub(super) fn cpref(&self, ind: u16) -> RefOrString { 255 | if let Some(_) = self.get(ind) { 256 | self.symref(ind) 257 | } else { 258 | Raw(ind) 259 | } 260 | } 261 | 262 | pub(super) fn utf(&self, ind: u16) -> RefOrString { 263 | if let Some(ConstData::Utf8(d)) = self.get(ind) { 264 | if let Some(sl) = d.ident() { 265 | Str(sl) 266 | } else { 267 | self.symref(ind) 268 | } 269 | } else { 270 | Raw(ind) 271 | } 272 | } 273 | 274 | pub(super) fn single(&self, ind: u16, expected: SingleTag) -> RefOrString { 275 | if let Some(ConstData::Single(tag, v)) = self.get(ind) { 276 | if *tag != expected { 277 | return Raw(ind); 278 | } 279 | if let Some(sl) = self.ident(*v) { 280 | Str(sl) 281 | } else { 282 | self.symref(ind) 283 | } 284 | } else { 285 | Raw(ind) 286 | } 287 | } 288 | 289 | pub(super) fn cls(&self, ind: u16) -> RefOrString { 290 | self.single(ind, SingleTag::Class) 291 | } 292 | 293 | pub(super) fn nat(&self, ind: u16) -> impl Display + '_ { 294 | LazyPrint(move |f: &mut fmt::Formatter| { 295 | if let Some(ConstData::Nat(n, t)) = self.get(ind) { 296 | if let Some(sl) = self.ident(*n) { 297 | write!(f, "{} {}", sl, self.utf(*t)) 298 | } else { 299 | self.symref(ind).fmt(f) 300 | } 301 | } else { 302 | Raw(ind).fmt(f) 303 | } 304 | }) 305 | } 306 | 307 | pub(super) fn tagged_fmim(&self, ind: u16) -> impl Display + '_ { 308 | LazyPrint(move |f: &mut fmt::Formatter| { 309 | if let Some(ConstData::Fmim(tag, c, nat)) = self.get(ind) { 310 | write!(f, "{:?} {} {}", *tag, self.cls(*c), self.nat(*nat)) 311 | } else { 312 | Raw(ind).fmt(f) 313 | } 314 | }) 315 | } 316 | 317 | fn tagged_const_nomhdyn(&self, f: &mut fmt::Formatter, ind: u16) -> fmt::Result { 318 | // Like regular tagged_const except that MethodHandle and Dynamic/InvokeDynamic 319 | // are replaced with symrefs to prevent recursion or indefinite expansion 320 | if let Some(c) = self.get(ind) { 321 | use ConstData::*; 322 | match c { 323 | MethodHandle(..) | Dyn(..) => self.symref(ind).fmt(f), 324 | _ => self.tagged_const_sub(f, c), 325 | } 326 | } else { 327 | Raw(ind).fmt(f) 328 | } 329 | } 330 | 331 | fn mhnotref(&self, f: &mut fmt::Formatter, mhtag: u8, r: u16) -> fmt::Result { 332 | let tag_str = MHTAGS.get(mhtag as usize).copied().unwrap_or("INVALID"); 333 | // todo - inline tagged ref consts 334 | write!(f, "{} ", tag_str)?; 335 | self.tagged_const_nomhdyn(f, r) 336 | } 337 | 338 | fn bsnotref(&self, f: &mut fmt::Formatter, bsm: &BootstrapMethod, tagged: bool) -> fmt::Result { 339 | if tagged { 340 | write!(f, "Bootstrap {}", Raw(bsm.bsref))?; 341 | } else if let Some(ConstData::MethodHandle(mhtag, r)) = self.get(bsm.bsref) { 342 | self.mhnotref(f, *mhtag, *r)?; 343 | } else { 344 | Raw(bsm.bsref).fmt(f)?; 345 | } 346 | 347 | for bsarg in &bsm.args { 348 | write!(f, " ")?; 349 | self.tagged_const_nomhdyn(f, *bsarg)?; 350 | } 351 | write!(f, " :") 352 | } 353 | 354 | fn bs(&self, bsind: u16) -> impl Display + '_ { 355 | LazyPrint(move |f: &mut fmt::Formatter| { 356 | if !self.roundtrip { 357 | if let Some(bsm) = self.bs.get(bsind as usize) { 358 | return self.bsnotref(f, bsm, false); 359 | } 360 | } 361 | 362 | RawBs(bsind).fmt(f) 363 | }) 364 | } 365 | 366 | fn tagged_const_sub(&self, f: &mut fmt::Formatter, c: &ConstData) -> fmt::Result { 367 | use ConstData::*; 368 | match c { 369 | Invalid => panic!("Internal error: Please report this!"), 370 | Utf8(ud) => write!(f, "Utf8 {}", ud.to_lit()), 371 | Prim(tag, s) => write!(f, "{:?} {}", tag, s), 372 | Single(tag, r) => write!(f, "{:?} {}", tag, self.utf(*r)), 373 | Fmim(tag, r1, r2) => write!(f, "{:?} {} {}", tag, self.cls(*r1), self.nat(*r2)), 374 | Nat(r1, r2) => write!(f, "NameAndType {} {}", self.utf(*r1), self.utf(*r2)), 375 | MethodHandle(mhtag, r) => { 376 | f.write_str("MethodHandle ")?; 377 | self.mhnotref(f, *mhtag, *r) 378 | } 379 | Dyn(tag, bs, nat) => write!(f, "{:?} {} {}", tag, self.bs(*bs), self.nat(*nat)), 380 | } 381 | } 382 | 383 | #[allow(unused)] 384 | pub(super) fn tagged_const(&self, ind: u16) -> impl Display + '_ { 385 | LazyPrint(move |f: &mut fmt::Formatter| { 386 | if let Some(c) = self.get(ind) { 387 | self.tagged_const_sub(f, c) 388 | } else { 389 | Raw(ind).fmt(f) 390 | } 391 | }) 392 | } 393 | 394 | fn ldcrhs_sub(&self, f: &mut fmt::Formatter, ind: u16, c: &ConstData) -> fmt::Result { 395 | use ConstData::*; 396 | match c { 397 | Prim(_tag, s) => write!(f, "{}", s), 398 | Single(SingleTag::String, r) => { 399 | if let Some(mut sl) = self.ident(*r) { 400 | if sl.stype == StrLitType::Unquoted { 401 | sl.stype = StrLitType::Regular; 402 | } 403 | write!(f, "{}", sl) 404 | } else { 405 | write!(f, "{}", self.symref(ind)) 406 | } 407 | } 408 | Single(tag, r) => write!(f, "{:?} {}", tag, self.utf(*r)), 409 | 410 | MethodHandle(..) | Dyn(..) => self.tagged_const_sub(f, c), 411 | Invalid | Utf8(..) | Fmim(..) | Nat(..) => write!(f, "{}", Raw(ind)), 412 | } 413 | } 414 | 415 | pub(super) fn ldc(&self, ind: u16) -> impl Display + '_ { 416 | LazyPrint(move |f: &mut fmt::Formatter| { 417 | if let Some(c) = self.get(ind) { 418 | self.ldcrhs_sub(f, ind, c) 419 | } else { 420 | Raw(ind).fmt(f) 421 | } 422 | }) 423 | } 424 | 425 | fn cp_def_rhs(&'a self, c: &'a ConstData) -> impl Display + 'a { 426 | LazyPrint(move |f: &mut fmt::Formatter| self.tagged_const_sub(f, c)) 427 | } 428 | 429 | fn bs_def_rhs(&'a self, bsm: &'a BootstrapMethod) -> impl Display + 'a { 430 | LazyPrint(move |f: &mut fmt::Formatter| self.bsnotref(f, bsm, true)) 431 | } 432 | 433 | pub(super) fn print_const_defs(self, mut w: impl std::io::Write) -> std::io::Result<()> { 434 | loop { 435 | let mut done = true; 436 | for (ind, line) in self.cpool.iter().enumerate() { 437 | let ind = ind as u16; 438 | 439 | if let ConstData::Invalid = line.data { 440 | continue; 441 | } 442 | if !line.is_defined.get() && (line.force_raw || line.sym_used.get()) { 443 | let lhs = if line.force_raw { Raw(ind) } else { Sym(ind) }; 444 | writeln!(w, ".const {} = {}", lhs, self.cp_def_rhs(&line.data))?; 445 | line.is_defined.set(true); 446 | done = false; 447 | } 448 | } 449 | // In non-roundtrip mode, printing symref defs may result in other 450 | // constant pool entries being referenced for the first time, so we 451 | // have to repeat the loop until no more entries are printed 452 | if done || self.roundtrip { 453 | break; 454 | } 455 | } 456 | 457 | // We never create symbolic bs refs in non-roundtrip mode (printing them inline instead) 458 | // which makes things easy - print the whole table raw in roundtrip mode, do nothing otherwise 459 | if self.roundtrip { 460 | for (ind, bsm) in self.bs.iter().enumerate() { 461 | let ind = ind as u16; 462 | writeln!(w, ".bootstrap {} = {}", RawBs(ind), self.bs_def_rhs(bsm))?; 463 | } 464 | } 465 | Ok(()) 466 | } 467 | } 468 | 469 | struct LazyPrint(F); 470 | impl fmt::Result> Display for LazyPrint { 471 | fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { 472 | self.0(f) 473 | } 474 | } 475 | -------------------------------------------------------------------------------- /src/lib/disassemble/string.rs: -------------------------------------------------------------------------------- 1 | use super::flags::ALL_FLAGS; 2 | use lazy_static::lazy_static; 3 | use regex::Regex; 4 | use std::borrow::Cow; 5 | use std::fmt::Write; 6 | use std::str; 7 | 8 | lazy_static! { 9 | static ref WORD_RE: Regex = Regex::new(r"\A(?:[a-zA-Z_$\(<]|\[[A-Z\[])[a-zA-Z0-9_$;/\[\(\)<>*+-]*\z").unwrap(); 10 | } 11 | 12 | fn decode(mut iter: impl Iterator, mut cb: impl FnMut(u16)) -> bool { 13 | while let Some(b) = iter.next() { 14 | match b { 15 | 0b00000001..=0b01111111 => cb(b as u16), 16 | 0b11000000..=0b11011111 => { 17 | let val = (b as u16) & 31; 18 | if let Some(b) = iter.next() { 19 | let val = (val << 6) ^ (b as u16) & 63; 20 | cb(val); 21 | } 22 | } 23 | 0b11100000..=0b11101111 => { 24 | let val = (b as u16) & 15; 25 | if let Some(b) = iter.next() { 26 | let val = (val << 6) ^ (b as u16) & 63; 27 | if let Some(b) = iter.next() { 28 | let val = (val << 6) ^ (b as u16) & 63; 29 | cb(val); 30 | } 31 | } 32 | } 33 | _ => return false, // return false to indicate invalid MUTF8 34 | } 35 | } 36 | true 37 | } 38 | 39 | fn escape_sub(s: &[u8]) -> (StrLitType, String) { 40 | let mut out = String::with_capacity(s.len()); 41 | if decode(s.iter().copied(), |c| { 42 | match c { 43 | // 0..=7 => write!(&mut out, "\\{}", c), 44 | 34 => write!(&mut out, "\\\""), 45 | 92 => write!(&mut out, "\\\\"), 46 | 32..=126 => write!(&mut out, "{}", char::from_u32(c.into()).unwrap()), 47 | _ => write!(&mut out, "\\u{:04X}", c), 48 | } 49 | .unwrap(); 50 | }) { 51 | (StrLitType::Regular, out) 52 | } else { 53 | (StrLitType::Binary, escape_byte_string(s)) 54 | } 55 | } 56 | 57 | fn is_word(s: &str) -> bool { 58 | WORD_RE.is_match(s) && !ALL_FLAGS.contains(&s) 59 | } 60 | 61 | #[derive(PartialEq, Eq, Clone, Copy)] 62 | pub enum StrLitType { 63 | Unquoted, 64 | Regular, 65 | Binary, 66 | } 67 | 68 | pub(super) fn escape(s: &[u8]) -> (StrLitType, Cow) { 69 | if let Ok(s) = str::from_utf8(s) { 70 | if is_word(s) { 71 | return (StrLitType::Unquoted, Cow::from(s)); 72 | } 73 | } 74 | 75 | let (ty, s) = escape_sub(s); 76 | (ty, Cow::from(s)) 77 | } 78 | 79 | pub(super) fn escape_byte_string(s: &[u8]) -> String { 80 | let mut buf = String::with_capacity(s.len() * 4); 81 | for b in s { 82 | write!(buf, "\\x{:02X}", b).unwrap(); 83 | } 84 | buf 85 | } 86 | 87 | pub fn parse_utf8(s: &[u8]) -> Option { 88 | if let Ok(s) = str::from_utf8(s) { 89 | return Some(s.to_owned()); 90 | } 91 | 92 | let mut u16s = Vec::with_capacity(s.len()); 93 | if !decode(s.iter().copied(), |c16| { 94 | u16s.push(c16); 95 | }) { 96 | return None; 97 | } 98 | 99 | std::char::decode_utf16(u16s.into_iter()).collect::>().ok() 100 | } 101 | 102 | #[cfg(test)] 103 | mod tests { 104 | use super::*; 105 | 106 | #[test] 107 | fn test_is_word() { 108 | assert!(is_word("hello")); 109 | assert!(is_word("[Lhello/world;")); 110 | assert!(is_word("[[[Z")); 111 | assert!(is_word("
")); 112 | assert!(is_word("(ZZ)[LFoo;")); 113 | assert!(is_word("foo2")); 114 | assert!(!is_word("")); 115 | assert!(!is_word("[42]")); 116 | assert!(!is_word("0")); 117 | assert!(!is_word("\n")); 118 | assert!(!is_word("hello\n")); 119 | assert!(!is_word("a b")); 120 | assert!(!is_word("a.b")); 121 | } 122 | 123 | #[test] 124 | fn test_escape() { 125 | assert_eq!(escape(b"hello").1.as_ref(), "hello"); 126 | assert_eq!(escape(b"[42]").1.as_ref(), "[42]"); 127 | assert_eq!(escape(b"s = \"42\";").1.as_ref(), r#"s = \"42\";"#); 128 | assert_eq!(escape(b"\xC0\x80").1.as_ref(), r#"\u0000"#); 129 | assert_eq!(escape(b"\xdf\xbf\xef\xbf\xbf").1.as_ref(), r#"\u07FF\uFFFF"#); 130 | assert_eq!( 131 | escape(b"\xed\xaf\xbf\xed\xbf\xbf\xed\xaf\x80\xed\xb0\x81").1.as_ref(), 132 | r#"\uDBFF\uDFFF\uDBC0\uDC01"# 133 | ); 134 | assert_eq!( 135 | escape(b"\x3B\x0C\x3D\x06\x34\x01\x25\x04\x34\x16").1.as_ref(), 136 | r#";\u000C=\u00064\u0001%\u00044\u0016"# 137 | ); 138 | } 139 | 140 | #[test] 141 | fn test_escape_byte_string() { 142 | assert_eq!(escape_byte_string(b"\x00\xAB"), r"\x00\xAB"); 143 | } 144 | } 145 | -------------------------------------------------------------------------------- /src/lib/mhtags.rs: -------------------------------------------------------------------------------- 1 | pub static MHTAGS: [&str; 10] = [ 2 | "INVALID", 3 | "getField", 4 | "getStatic", 5 | "putField", 6 | "putStatic", 7 | "invokeVirtual", 8 | "invokeStatic", 9 | "invokeSpecial", 10 | "newInvokeSpecial", 11 | "invokeInterface", 12 | ]; 13 | 14 | pub fn parse(s: &str) -> Option { 15 | MHTAGS.into_iter().position(|v| v == s).map(|i| i as u8) 16 | } 17 | -------------------------------------------------------------------------------- /src/lib/mod.rs: -------------------------------------------------------------------------------- 1 | mod assemble; 2 | mod classfile; 3 | mod disassemble; 4 | mod mhtags; 5 | mod util; 6 | 7 | pub use assemble::assemble; 8 | pub use assemble::AssemblerOptions; 9 | pub use assemble::Error as AssembleError; 10 | pub use classfile::ParserOptions; 11 | pub use disassemble::string::parse_utf8; 12 | pub use disassemble::DisassemblerOptions; 13 | 14 | pub fn disassemble( 15 | data: &[u8], 16 | parse_opts: ParserOptions, 17 | opts: DisassemblerOptions, 18 | ) -> Result<(Option, Vec), classfile::ParseError> { 19 | let parsed = classfile::parse(data, parse_opts)?; 20 | 21 | let name = parsed.cp.clsutf(parsed.this).and_then(parse_utf8); 22 | 23 | let mut out = Vec::with_capacity(1000 + data.len() * 4); 24 | disassemble::disassemble(&mut out, &parsed, opts).expect("Internal error - please report this!"); 25 | Ok((name, out)) 26 | } 27 | -------------------------------------------------------------------------------- /src/lib/util.rs: -------------------------------------------------------------------------------- 1 | #[derive(Clone, Copy, PartialEq, Eq, Hash)] 2 | pub struct BStr<'a>(pub &'a [u8]); 3 | impl<'a> std::fmt::Debug for BStr<'a> { 4 | fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { 5 | String::from_utf8_lossy(self.0).fmt(f) 6 | } 7 | } 8 | -------------------------------------------------------------------------------- /src/main.rs: -------------------------------------------------------------------------------- 1 | // #![allow(unused)] 2 | #![allow(special_module_name)] 3 | 4 | mod ass_main; 5 | mod dis_main; 6 | mod file_input_util; 7 | mod file_output_util; 8 | mod lib; 9 | 10 | use std::str; 11 | use std::thread; 12 | 13 | use clap::{Parser, Subcommand}; 14 | 15 | use ass_main::assembler_main; 16 | use ass_main::AssemblerCli; 17 | use dis_main::disassembler_main; 18 | use dis_main::DisassemblerCli; 19 | 20 | #[derive(Parser)] 21 | #[clap(author, version, about, long_about = None)] 22 | struct Cli { 23 | #[clap(subcommand)] 24 | command: Command, 25 | } 26 | 27 | #[derive(Subcommand)] 28 | enum Command { 29 | Asm(AssemblerCli), 30 | Dis(DisassemblerCli), 31 | } 32 | 33 | fn real_main() -> i32 { 34 | let cli = Cli::parse(); 35 | let res = match cli.command { 36 | Command::Asm(cli) => assembler_main(cli), 37 | Command::Dis(cli) => disassembler_main(cli), 38 | }; 39 | if let Err(err) = res { 40 | println!("Error: {:?}", err); 41 | // set exit code 1 if there were errors 42 | 1 43 | } else { 44 | 0 45 | } 46 | } 47 | fn main() { 48 | // Workaround for limited stack size in Rust: Spawn a thread with 256mb stack and run everything there. 49 | let child = thread::Builder::new().stack_size(256 * 1024 * 1024).spawn(real_main).unwrap(); 50 | std::process::exit(child.join().unwrap()); 51 | // std::process::exit(real_main()); 52 | } 53 | --------------------------------------------------------------------------------