├── .github └── workflows │ └── go.yml ├── LICENSE ├── README.md ├── asm ├── compiler.go ├── compiler_analysis.go ├── compiler_eval.go ├── compiler_expand.go ├── compiler_prog.go ├── compiler_prog_test.go ├── compiler_test.go ├── error.go ├── evaluator.go ├── evaluator_builtins.go ├── evaluator_test.go ├── global.go ├── statements.go └── testdata │ ├── compiler-tests.yaml │ └── known-bytecode.yaml ├── assets └── geas-b.svg ├── cmd └── geas │ └── geas.go ├── disasm ├── disassembler.go └── disassembler_test.go ├── example ├── 4788asm.eas ├── 4788asm_ctor.eas └── erc20 │ ├── erc20.eas │ ├── erc20_ctor.eas │ ├── op_allowance.eas │ ├── op_approve.eas │ ├── op_balanceOf.eas │ ├── op_transfer.eas │ └── op_transferFrom.eas ├── go.mod ├── go.sum └── internal ├── ast ├── arith.go ├── arithop_string.go ├── ast.go ├── error.go ├── lexer.go ├── lexer_test.go ├── names.go ├── parse.go └── tokentype_string.go ├── evm ├── forkdefs.go ├── instruction_set.go ├── instruction_set_test.go └── ops.go ├── lzint ├── value.go └── value_test.go └── set └── set.go /.github/workflows/go.yml: -------------------------------------------------------------------------------- 1 | name: Go 2 | 3 | on: 4 | push: 5 | branches: [ "master" ] 6 | pull_request: 7 | branches: [ "master" ] 8 | 9 | jobs: 10 | 11 | build: 12 | runs-on: ubuntu-latest 13 | steps: 14 | - uses: actions/checkout@v3 15 | 16 | - name: Set up Go 17 | uses: actions/setup-go@v4 18 | with: 19 | go-version: '1.23' 20 | 21 | - name: Build 22 | run: go build -v ./... 23 | 24 | - name: Test 25 | run: go test -v ./... 26 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | GNU LESSER GENERAL PUBLIC LICENSE 2 | Version 3, 29 June 2007 3 | 4 | Copyright (C) 2007 Free Software Foundation, Inc. 5 | Everyone is permitted to copy and distribute verbatim copies 6 | of this license document, but changing it is not allowed. 7 | 8 | 9 | This version of the GNU Lesser General Public License incorporates 10 | the terms and conditions of version 3 of the GNU General Public 11 | License, supplemented by the additional permissions listed below. 12 | 13 | 0. Additional Definitions. 14 | 15 | As used herein, "this License" refers to version 3 of the GNU Lesser 16 | General Public License, and the "GNU GPL" refers to version 3 of the GNU 17 | General Public License. 18 | 19 | "The Library" refers to a covered work governed by this License, 20 | other than an Application or a Combined Work as defined below. 21 | 22 | An "Application" is any work that makes use of an interface provided 23 | by the Library, but which is not otherwise based on the Library. 24 | Defining a subclass of a class defined by the Library is deemed a mode 25 | of using an interface provided by the Library. 26 | 27 | A "Combined Work" is a work produced by combining or linking an 28 | Application with the Library. The particular version of the Library 29 | with which the Combined Work was made is also called the "Linked 30 | Version". 31 | 32 | The "Minimal Corresponding Source" for a Combined Work means the 33 | Corresponding Source for the Combined Work, excluding any source code 34 | for portions of the Combined Work that, considered in isolation, are 35 | based on the Application, and not on the Linked Version. 36 | 37 | The "Corresponding Application Code" for a Combined Work means the 38 | object code and/or source code for the Application, including any data 39 | and utility programs needed for reproducing the Combined Work from the 40 | Application, but excluding the System Libraries of the Combined Work. 41 | 42 | 1. Exception to Section 3 of the GNU GPL. 43 | 44 | You may convey a covered work under sections 3 and 4 of this License 45 | without being bound by section 3 of the GNU GPL. 46 | 47 | 2. Conveying Modified Versions. 48 | 49 | If you modify a copy of the Library, and, in your modifications, a 50 | facility refers to a function or data to be supplied by an Application 51 | that uses the facility (other than as an argument passed when the 52 | facility is invoked), then you may convey a copy of the modified 53 | version: 54 | 55 | a) under this License, provided that you make a good faith effort to 56 | ensure that, in the event an Application does not supply the 57 | function or data, the facility still operates, and performs 58 | whatever part of its purpose remains meaningful, or 59 | 60 | b) under the GNU GPL, with none of the additional permissions of 61 | this License applicable to that copy. 62 | 63 | 3. Object Code Incorporating Material from Library Header Files. 64 | 65 | The object code form of an Application may incorporate material from 66 | a header file that is part of the Library. You may convey such object 67 | code under terms of your choice, provided that, if the incorporated 68 | material is not limited to numerical parameters, data structure 69 | layouts and accessors, or small macros, inline functions and templates 70 | (ten or fewer lines in length), you do both of the following: 71 | 72 | a) Give prominent notice with each copy of the object code that the 73 | Library is used in it and that the Library and its use are 74 | covered by this License. 75 | 76 | b) Accompany the object code with a copy of the GNU GPL and this license 77 | document. 78 | 79 | 4. Combined Works. 80 | 81 | You may convey a Combined Work under terms of your choice that, 82 | taken together, effectively do not restrict modification of the 83 | portions of the Library contained in the Combined Work and reverse 84 | engineering for debugging such modifications, if you also do each of 85 | the following: 86 | 87 | a) Give prominent notice with each copy of the Combined Work that 88 | the Library is used in it and that the Library and its use are 89 | covered by this License. 90 | 91 | b) Accompany the Combined Work with a copy of the GNU GPL and this license 92 | document. 93 | 94 | c) For a Combined Work that displays copyright notices during 95 | execution, include the copyright notice for the Library among 96 | these notices, as well as a reference directing the user to the 97 | copies of the GNU GPL and this license document. 98 | 99 | d) Do one of the following: 100 | 101 | 0) Convey the Minimal Corresponding Source under the terms of this 102 | License, and the Corresponding Application Code in a form 103 | suitable for, and under terms that permit, the user to 104 | recombine or relink the Application with a modified version of 105 | the Linked Version to produce a modified Combined Work, in the 106 | manner specified by section 6 of the GNU GPL for conveying 107 | Corresponding Source. 108 | 109 | 1) Use a suitable shared library mechanism for linking with the 110 | Library. A suitable mechanism is one that (a) uses at run time 111 | a copy of the Library already present on the user's computer 112 | system, and (b) will operate properly with a modified version 113 | of the Library that is interface-compatible with the Linked 114 | Version. 115 | 116 | e) Provide Installation Information, but only if you would otherwise 117 | be required to provide such information under section 6 of the 118 | GNU GPL, and only to the extent that such information is 119 | necessary to install and execute a modified version of the 120 | Combined Work produced by recombining or relinking the 121 | Application with a modified version of the Linked Version. (If 122 | you use option 4d0, the Installation Information must accompany 123 | the Minimal Corresponding Source and Corresponding Application 124 | Code. If you use option 4d1, you must provide the Installation 125 | Information in the manner specified by section 6 of the GNU GPL 126 | for conveying Corresponding Source.) 127 | 128 | 5. Combined Libraries. 129 | 130 | You may place library facilities that are a work based on the 131 | Library side by side in a single library together with other library 132 | facilities that are not Applications and are not covered by this 133 | License, and convey such a combined library under terms of your 134 | choice, if you do both of the following: 135 | 136 | a) Accompany the combined library with a copy of the same work based 137 | on the Library, uncombined with any other library facilities, 138 | conveyed under the terms of this License. 139 | 140 | b) Give prominent notice with the combined library that part of it 141 | is a work based on the Library, and explaining where to find the 142 | accompanying uncombined form of the same work. 143 | 144 | 6. Revised Versions of the GNU Lesser General Public License. 145 | 146 | The Free Software Foundation may publish revised and/or new versions 147 | of the GNU Lesser General Public License from time to time. Such new 148 | versions will be similar in spirit to the present version, but may 149 | differ in detail to address new problems or concerns. 150 | 151 | Each version is given a distinguishing version number. If the 152 | Library as you received it specifies that a certain numbered version 153 | of the GNU Lesser General Public License "or any later version" 154 | applies to it, you have the option of following the terms and 155 | conditions either of that published version or of any later version 156 | published by the Free Software Foundation. If the Library as you 157 | received it does not specify a version number of the GNU Lesser 158 | General Public License, you may choose any version of the GNU Lesser 159 | General Public License ever published by the Free Software Foundation. 160 | 161 | If the Library as you received it specifies that a proxy can decide 162 | whether future versions of the GNU Lesser General Public License shall 163 | apply, that proxy's public statement of acceptance of any version is 164 | permanent authorization for you to choose that version for the 165 | Library. 166 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | ![geas](assets/geas-b.svg) 2 | 3 | This is geas – the Good Ethereum Assembler[^1] – a macro assembler for the EVM. 4 | 5 | You can use it to create any contract for Ethereum, though it's probably a bad idea. 6 | For real contracts, you should use a well-tested language compiler like Solidity. 7 | The purpose of geas is mostly creating specialty programs and tinkering with the EVM 8 | at a low level. 9 | 10 | ### Installation 11 | 12 | You can use the `go` tool to install the latest released version. 13 | This creates a `geas` binary in the current directory: 14 | 15 | env "GOBIN=$PWD" go install github.com/fjl/geas/cmd/geas@latest 16 | 17 | For development of geas, clone the repository and then run `go build ./cmd/geas`. 18 | 19 | ### Usage 20 | 21 | To create bytecode from an assembly file, run the tool with a filename as argument. 22 | 23 | ./geas file.eas 24 | 25 | There is also a disassembler. To disassemble hex bytecode from standard input, run: 26 | 27 | ./geas -d - 28 | 29 | To see all supported flags, run `geas` with no arguments. 30 | 31 | ### Editor Support 32 | 33 | VIM users may be interested in [vim-geas](https://github.com/lightclient/vim-geas). 34 | 35 | ### Use as a Go Library 36 | 37 | You can also use the assembler as a library. See the [API documentation](https://pkg.go.dev/github.com/fjl/geas/asm) 38 | to get started. 39 | 40 | ## Language 41 | 42 | Programs accepted by the assembler follow a simple structure. Each line is an instruction. 43 | Both uppercase and lowercase can be used for instruction names. All known EVM instructions 44 | are supported. 45 | 46 | Comments can appear anywhere and are introduced by the semicolon (;) character. 47 | 48 | push 1 ;; comment 49 | push 2 50 | add 51 | 52 | Opcodes listed in the program correspond directly with the bytecodes in output. 53 | 54 | ### Jump 55 | 56 | Jump destinations are written as a label followed by colon (:) and can be referred to 57 | using the notation `@label` together with JUMP or JUMPI. 58 | 59 | begin: 60 | push 1 61 | push 2 62 | add 63 | jump @begin 64 | 65 | When using JUMP with an argument, it turns into a PUSH of the label followed by the jump 66 | instruction, so the above is equivalent to: 67 | 68 | begin: 69 | push 1 70 | push 2 71 | add 72 | push @begin 73 | jump 74 | 75 | It is also possible to create labels without emitting a JUMPDEST instruction by prefixing 76 | the label name with the dot (.) character. While dotted labels are not valid for use as an 77 | argument to JUMP, they can be used with PUSH to measure code offsets. 78 | 79 | push @.end 80 | codesize 81 | eq 82 | .end: 83 | 84 | ### Push 85 | 86 | The EVM instruction has sized push instructions from size zero (`PUSH0`) up to a size of 87 | 32 bytes (`PUSH32`). While you can use sized push instructions directly, it is preferable 88 | to let the assembler figure out the right size for you. To do this use the variable-size 89 | `PUSH` instruction. 90 | 91 | All PUSH-type instructions must be followed by an immediate argument on the same line. 92 | Simple math expressions and label references can be used within the argument: 93 | 94 | .begin: 95 | push (@add_it * 2) - 3 96 | push 5 97 | add_it: 98 | add 99 | 100 | Supported arithmetic operations include addition (+), subtraction (-), multiplication (*), 101 | division (/), and modulo (%). There is also support for bit-shifts (<<, >>), bitwise AND 102 | (&), OR (|), XOR (^). Note operator precedence is same as Go. 103 | 104 | All arithmetic is performed with arbitrary precision integers. The result of calculations 105 | must fit into 256 bits in order to be valid as a PUSH argument. For sized push, the result 106 | must fit into the declared push size. Negative results are not allowed. 107 | 108 | ### Expression Macros 109 | 110 | Expression macros can be created with the `#define` directive. Macros can be used within 111 | PUSH argument expressions. 112 | 113 | Macros can have parameters. Refer to parameter values using the dollar sign ($) prefix 114 | within the macro. 115 | 116 | #define z = 0x8823 117 | #define myexpr(x, y) = ($x + $y) * z 118 | 119 | push myexpr(1, 2) 120 | 121 | ### Builtin Macros 122 | 123 | There are several builtin macros for common EVM tasks. Names of builtins start with a dot, 124 | and builtin macros cannot be redefined. Available builtins include: 125 | 126 | `.abs()` for getting the absolute value of a number: 127 | 128 | push .abs(0 - 100) 129 | 130 | `.selector()` for computing 4-byte ABI selectors: 131 | 132 | push .selector("transfer(address,uint256)") 133 | push 0 134 | mstore 135 | 136 | `.keccak256()`, `.sha256()` hash functions: 137 | 138 | push .sha256("data") 139 | 140 | `.address()` for declaring contract addresses. The checksum and byte length of the address 141 | are verified. 142 | 143 | #define otherContract = .address(0x658bdf435d810c91414ec09147daa6db62406379) 144 | 145 | ### Instruction Macros 146 | 147 | Common groups of instructions can be defined as instruction macros. Names of such macros 148 | always start with the percent (%) character. 149 | 150 | #define %add5_and_store(x, location) { 151 | push $x 152 | push 5 153 | add 154 | push $location 155 | mstore 156 | } 157 | 158 | To invoke an instruction macro, write the macro name as a statement on its own line. If 159 | the macro has no arguments, you can also leave the parentheses off. 160 | 161 | .begin: 162 | %add5_and_store(3, 64) 163 | %add5_and_store(4, 32) 164 | push 32 165 | push 64 166 | sha3 167 | 168 | Nested macro definitions are not allowed. Macro recursion is also not allowed. 169 | 170 | When defining (local) labels within instruction macros, they will only be visible within 171 | the macro. There is no way to refer to a local macro label from the outside, though you 172 | can pass references to such internal labels into another macro. The example below 173 | illustrates this, and also shows that in order to jump to a label argument within a macro, 174 | you must use explicit PUSH and JUMP. 175 | 176 | #define %jump_if_not(label) { 177 | iszero 178 | push $label 179 | jumpi 180 | } 181 | 182 | #define %read_input(bytes) { 183 | calldatasize 184 | push $bytes 185 | eq 186 | %jump_if_not(@revert) 187 | 188 | push 0 189 | push $bytes 190 | calldataload 191 | jump @continue 192 | 193 | revert: 194 | push 0 195 | push 0 196 | revert 197 | 198 | continue: 199 | } 200 | 201 | ### Including Files 202 | 203 | EVM assembly files can be included into the current program using the `#include` 204 | directive. Top-level instructions in the included file will be inserted at the position of 205 | the directive. 206 | 207 | `#include` filenames are resolved relative to the file containing the directive. 208 | 209 | .begin: 210 | push @.end 211 | push 32 212 | mstore 213 | 214 | #include "file.evm" 215 | .end: 216 | 217 | ### Local and Global Scope 218 | 219 | Names of labels and macros are case-sensitive. And just like in Go, the case of the first 220 | letter determines visibility of definitions. 221 | 222 | Macro and label definitions whose name begins with a lower-case letter are local to the 223 | file they're defined in. This means local definitions cannot be referenced by `#include` 224 | files. 225 | 226 | Identifiers beginning with an upper-case letter are registered in the global scope and are 227 | available for use across files. When using `#include`, global definitions in the included 228 | file also become available in all other files. 229 | 230 | Global identifiers must be unique across the program, i.e. they can only be defined once. 231 | Files defining global macros or labels can only be included into the program once. Note 232 | that the uniqueness requirement also means that instruction macros containing global 233 | labels can only be called once. Use good judgement when structuring your includes to avoid 234 | redefinition errors. 235 | 236 | lib.eas: 237 | 238 | #define result = 128 239 | #define StoreSum { 240 | add 241 | push result 242 | mstore 243 | } 244 | 245 | main.eas: 246 | 247 | #include "lib.eas" 248 | 249 | push 1 250 | push 2 251 | %StoreSum ;; calling global macro defined in lib.evm 252 | 253 | ### Configuring the target instruction set 254 | 255 | The EVM is a changing environment. Opcodes may be added (and sometimes removed) as new 256 | versions of the EVM are released in protocol forks. Geas is aware of EVM forks and their 257 | respective instruction sets. 258 | 259 | Geas always operates on a specific EVM instruction set. It targets the latest known eth 260 | mainnet fork by default, i.e. all opcodes available in that fork can be used, and opcodes 261 | that have been removed in any prior fork cannot. 262 | 263 | Use the `#pragma target` directive to change the target instruction set. The basic syntax is 264 | 265 | #pragma target "name" 266 | 267 | where `name` is a lower-case execution-layer fork name like `homestead`, `berlin`, or `prague`. 268 | 269 | Here is an example. This contract uses the CHAINID instruction to check if it is running 270 | on mainnet, and destroys itself otherwise. CHAINID became available in the "istanbul" 271 | fork, and SELFDESTRUCT was removed in a later revision of the EVM, so this program is only 272 | applicable to a certain range of past EVM versions. 273 | 274 | #pragma target "berlin" 275 | 276 | chainid ; [id] 277 | push 1 ; [1, id] 278 | eq ; [id = 1] 279 | jumpi @mainnet ; [] 280 | push 0x0 ; [zeroaddr] 281 | selfdestruct ; [] 282 | mainnet: 283 | 284 | Note that declaring the target instruction set using `#pragma target` will not prevent the 285 | output bytecode from running on a different EVM version, since it is just a compiler 286 | setting. The example program above will start behaving differently from its intended 287 | version on EVM version "cancun", because SELFDESTRUCT was turned into SENDALL in that 288 | fork. It may even stop working entirely in a later fork. 289 | 290 | `#pragma target` can only appear in the program once. It cannot be placed in an include 291 | file. You have to put the directive in the main program file. 292 | 293 | ### #assemble 294 | 295 | When writing contract constructors and advanced CALL scenarios, it can be necessary to 296 | include subprogram bytecode as-is. The `#assemble` directive does this for you. 297 | 298 | Using `#assemble` runs the assembler on the specified file, and includes the resulting 299 | bytecode into the current program. Labels of the subprogram will start at offset zero. 300 | Unlike with `#include`, global definitions of the subprogram are not imported. 301 | 302 | ;; copy subprogram to memory 303 | push @.end - @.begin ; [size] 304 | push @.begin ; [offset, size] 305 | push 128 ; [dest, offset, codesize] 306 | codecopy ; [] 307 | 308 | .begin: 309 | #assemble "subprogram.eas" 310 | .end 311 | 312 | If a target instruction set is configured with `#pragma target`, it will also be used for 313 | assembling the subprogram. However, the subprogram file can override the instruction set 314 | using its own `#pragma target` directive. 315 | 316 | [^1]: Under no circumstances must it be called the geth assembler. 317 | -------------------------------------------------------------------------------- /asm/compiler.go: -------------------------------------------------------------------------------- 1 | // Copyright 2023 The go-ethereum Authors 2 | // This file is part of the go-ethereum library. 3 | // 4 | // The go-ethereum library is free software: you can redistribute it and/or modify 5 | // it under the terms of the GNU Lesser General Public License as published by 6 | // the Free Software Foundation, either version 3 of the License, or 7 | // (at your option) any later version. 8 | // 9 | // The go-ethereum library is distributed in the hope that it will be useful, 10 | // but WITHOUT ANY WARRANTY; without even the implied warranty of 11 | // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 12 | // GNU Lesser General Public License for more details. 13 | // 14 | // You should have received a copy of the GNU Lesser General Public License 15 | // along with the go-ethereum library. If not, see . 16 | 17 | // Package asm implements the Good Ethereum Assembler (geas). 18 | // 19 | // For a description of the geas language, see the README.md file in the project root. 20 | package asm 21 | 22 | import ( 23 | "errors" 24 | "fmt" 25 | "io/fs" 26 | "math/big" 27 | "path" 28 | "strings" 29 | 30 | "github.com/fjl/geas/internal/ast" 31 | "github.com/fjl/geas/internal/evm" 32 | "github.com/fjl/geas/internal/lzint" 33 | ) 34 | 35 | // Compiler turns assembly source into bytecode. 36 | type Compiler struct { 37 | fsys fs.FS 38 | lexDebug bool 39 | maxIncDepth int 40 | maxErrors int 41 | defaultFork string 42 | macroOverrides map[string]*lzint.Value 43 | 44 | globals *globalScope 45 | macroStack map[*ast.InstructionMacroDef]struct{} 46 | includes map[*ast.IncludeSt]*ast.Document 47 | errors errorList 48 | } 49 | 50 | // NewCompiler creates a compiler. 51 | // Deprecated: use New. 52 | func NewCompiler(fsys fs.FS) *Compiler { 53 | return New(fsys) 54 | } 55 | 56 | // New creates a compiler. 57 | // The file system is used to resolve import file names. If a nil FS is given, 58 | // #import cannot be used. 59 | func New(fsys fs.FS) *Compiler { 60 | return &Compiler{ 61 | fsys: fsys, 62 | maxIncDepth: 128, 63 | maxErrors: 10, 64 | defaultFork: evm.LatestFork, 65 | macroOverrides: make(map[string]*lzint.Value), 66 | } 67 | } 68 | 69 | // reset prepares the compiler for the next run. 70 | func (c *Compiler) reset() { 71 | c.globals = newGlobalScope() 72 | c.macroStack = make(map[*ast.InstructionMacroDef]struct{}) 73 | c.includes = make(map[*ast.IncludeSt]*ast.Document) 74 | c.errors = errorList{maxErrors: c.maxErrors} 75 | } 76 | 77 | // SetFilesystem sets the file system used for resolving #include files. 78 | // Note: if set to a nil FS, #include is not allowed. 79 | func (c *Compiler) SetFilesystem(fsys fs.FS) { 80 | c.fsys = fsys 81 | } 82 | 83 | // SetDebugLexer enables/disables printing of the token stream to stdout. 84 | func (c *Compiler) SetDebugLexer(on bool) { 85 | c.lexDebug = on 86 | } 87 | 88 | // SetDefaultFork sets the EVM instruction set used by default. 89 | func (c *Compiler) SetDefaultFork(f string) { 90 | c.defaultFork = f 91 | } 92 | 93 | // SetDebugLexer enables/disables printing of the token stream to stdout. 94 | func (c *Compiler) SetIncludeDepthLimit(limit int) { 95 | c.maxIncDepth = limit 96 | } 97 | 98 | // SetMaxErrors sets the limit on the number of errors that can happen before the compiler gives up. 99 | func (c *Compiler) SetMaxErrors(limit int) { 100 | if limit < 1 { 101 | limit = 1 102 | } 103 | c.maxErrors = limit 104 | } 105 | 106 | // SetGlobal sets the value of a global expression macro. 107 | // Note the name must start with an uppercase letter to make it global. 108 | func (c *Compiler) SetGlobal(name string, v *big.Int) { 109 | if !ast.IsGlobal(name) { 110 | panic(fmt.Sprintf("override name %q is not global (uppercase)", name)) 111 | } 112 | if v == nil { 113 | delete(c.macroOverrides, name) 114 | } else { 115 | c.macroOverrides[name] = lzint.FromInt(v) 116 | } 117 | } 118 | 119 | // ClearGlobals removes all definitions created by SetGlobal. 120 | func (c *Compiler) ClearGlobals() { 121 | clear(c.macroOverrides) 122 | } 123 | 124 | // CompileString compiles the given program text and returns the corresponding bytecode. 125 | // If compilation fails, the returned slice is nil. Use the Errors method to get 126 | // parsing/compilation errors. 127 | func (c *Compiler) CompileString(input string) []byte { 128 | defer c.errors.catchAbort() 129 | 130 | return c.compileSource("", []byte(input)) 131 | } 132 | 133 | // CompileString compiles the given program text and returns the corresponding bytecode. 134 | // If compilation fails, the returned slice is nil. Use the Errors method to get 135 | // parsing/compilation errors. 136 | func (c *Compiler) CompileFile(filename string) []byte { 137 | defer c.errors.catchAbort() 138 | 139 | content, err := fs.ReadFile(c.fsys, filename) 140 | if err != nil { 141 | c.errors.add(err) 142 | return nil 143 | } 144 | return c.compileSource(filename, content) 145 | } 146 | 147 | // Errors returns errors that have accumulated during compilation. 148 | func (c *Compiler) Errors() []error { 149 | return c.errors.errors() 150 | } 151 | 152 | // Warnings returns all warnings that have accumulated during compilation. 153 | func (c *Compiler) Warnings() []error { 154 | return c.errors.warnings() 155 | } 156 | 157 | // Failed reports whether compilation has failed. 158 | func (c *Compiler) Failed() bool { 159 | return c.errors.numErrors > 0 160 | } 161 | 162 | // ErrorsAndWarnings returns all errors and warnings which have accumulated during compilation. 163 | func (c *Compiler) ErrorsAndWarnings() []error { 164 | return c.errors.list 165 | } 166 | 167 | // errorAt pushes an error to the compiler error list. 168 | func (c *Compiler) errorAt(inst ast.Statement, err error) { 169 | if err == nil { 170 | panic("BUG: errorAt(st, nil)") 171 | } 172 | c.errors.add(&statementError{inst: inst, err: err}) 173 | } 174 | 175 | // warnf pushes a warning to the error list. 176 | func (c *Compiler) warnf(inst ast.Statement, format string, args ...any) { 177 | c.errors.add(&simpleWarning{pos: inst.Position(), str: fmt.Sprintf(format, args...)}) 178 | } 179 | 180 | func (c *Compiler) compileSource(filename string, input []byte) []byte { 181 | c.reset() 182 | p := ast.NewParser(filename, input, c.lexDebug) 183 | doc, errs := p.Parse() 184 | if c.errors.addParseErrors(errs) { 185 | return nil // abort compilation due to failed parse 186 | } 187 | return c.compileDocument(doc) 188 | } 189 | 190 | // compileDocument creates bytecode from the AST. 191 | func (c *Compiler) compileDocument(doc *ast.Document) (output []byte) { 192 | prog := newCompilerProg(doc) 193 | 194 | // First, load all #include files and register their definitions. 195 | // This also configures the instruction set if specified by a #pragma. 196 | c.processIncludes(doc, prog, nil) 197 | 198 | // Apply macro overrides. This happens after include processing because macros 199 | // get their definitions assigned then. 200 | for name, val := range c.macroOverrides { 201 | if def, _ := c.globals.lookupExprMacro(name); def != nil && len(def.Params) > 0 { 202 | c.warnf(def, "overridden global macro %s has parameters", name) 203 | } 204 | c.globals.overrideExprMacroValue(name, val) 205 | } 206 | 207 | // Choose configured instruction set, but only if not configured by a pragma. 208 | if prog.evm == nil { 209 | prog.evm = evm.FindInstructionSet(c.defaultFork) 210 | } 211 | 212 | // Next, the AST document tree is expanded into a flat list of instructions. 213 | c.expand(doc, prog) 214 | if prog.cur != prog.toplevel { 215 | panic("section stack was not unwound by expansion") 216 | } 217 | 218 | // Expansion of is now done, and all further steps work on prog. 219 | e := newEvaluator(c.globals) 220 | c.preEvaluateArgs(e, prog) 221 | 222 | for { 223 | c.computePC(e, prog) 224 | 225 | // Assign immediate argument values. Here we use a trick to assign sizes for 226 | // "PUSH" instructions: their pushSizes are initially set to one. If we get an 227 | // overflow condition, the size of that PUSH increases by one and then we 228 | // recalculate everything. 229 | failedInst, err := c.evaluateArgs(e, prog) 230 | if err != nil { 231 | if errors.Is(err, ecVariablePushOverflow) { 232 | failedInst.pushSize += 1 233 | continue // try again 234 | } 235 | c.errorAt(failedInst.ast, err) 236 | break // there was some other error 237 | } 238 | break 239 | } 240 | 241 | if c.errors.hasError() { 242 | return nil // no output if source has errors 243 | } 244 | 245 | // Run analysis. Note this is also disabled if there are errors because there could 246 | // be lots of useless warnings otherwise. 247 | c.checkLabelsUsed(prog, e) 248 | 249 | // Create the bytecode. 250 | return c.generateOutput(prog) 251 | } 252 | 253 | // processIncludes reads all #included documents. 254 | func (c *Compiler) processIncludes(doc *ast.Document, prog *compilerProg, stack []ast.Statement) { 255 | errs := c.globals.registerDefinitions(doc) 256 | c.errors.add(errs...) 257 | 258 | var list []*ast.IncludeSt 259 | for _, st := range doc.Statements { 260 | switch st := st.(type) { 261 | case *ast.IncludeSt: 262 | file, err := resolveRelative(doc.File, st.Filename) 263 | if err != nil { 264 | c.errorAt(st, err) 265 | continue 266 | } 267 | incdoc := c.parseIncludeFile(file, st, len(stack)+1) 268 | if incdoc != nil { 269 | c.includes[st] = incdoc 270 | list = append(list, st) 271 | } 272 | 273 | case *ast.PragmaSt: 274 | switch st.Option { 275 | case "target": 276 | if len(stack) != 0 { 277 | c.errorAt(st, ecPragmaTargetInIncludeFile) 278 | } 279 | if prog.evm != nil { 280 | c.errorAt(st, ecPragmaTargetConflict) 281 | } 282 | prog.evm = evm.FindInstructionSet(st.Value) 283 | if prog.evm == nil { 284 | c.errorAt(st, fmt.Errorf("%w %q", ecPragmaTargetUnknown, st.Value)) 285 | } 286 | default: 287 | c.errorAt(st, fmt.Errorf("%w %s", ecUnknownPragma, st.Option)) 288 | } 289 | } 290 | } 291 | 292 | // Process includes in macros. 293 | for _, m := range doc.InstrMacros() { 294 | c.processIncludes(m.Body, prog, append(stack, m)) 295 | } 296 | 297 | // Recurse. 298 | for _, inst := range list { 299 | incdoc := c.includes[inst] 300 | c.processIncludes(incdoc, prog, append(stack, inst)) 301 | } 302 | } 303 | 304 | func resolveRelative(basepath string, filename string) (string, error) { 305 | res := path.Clean(path.Join(path.Dir(basepath), filename)) 306 | if strings.Contains(res, "..") { 307 | return "", fmt.Errorf("path %q escapes project root", filename) 308 | } 309 | return res, nil 310 | } 311 | 312 | func (c *Compiler) parseIncludeFile(file string, inst *ast.IncludeSt, depth int) *ast.Document { 313 | if c.fsys == nil { 314 | c.errorAt(inst, ecIncludeNoFS) 315 | return nil 316 | } 317 | if depth > c.maxIncDepth { 318 | c.errorAt(inst, ecIncludeDepthLimit) 319 | return nil 320 | } 321 | 322 | content, err := fs.ReadFile(c.fsys, file) 323 | if err != nil { 324 | c.errorAt(inst, err) 325 | return nil 326 | } 327 | p := ast.NewParser(file, content, c.lexDebug) 328 | doc, errors := p.Parse() 329 | if c.errors.addParseErrors(errors) { 330 | return nil 331 | } 332 | // Note that included documents do NOT have the including document set as Parent. 333 | // The parent relationship is used during lookup of labels, macros, etc. and 334 | // such definitions should not be shared between include files. 335 | // 336 | // Included documents do have a Creation though. 337 | doc.Creation = inst 338 | return doc 339 | } 340 | 341 | // generateOutput creates the bytecode. This is also where instruction names get resolved. 342 | func (c *Compiler) generateOutput(prog *compilerProg) []byte { 343 | var unreachable unreachableCodeCheck 344 | var output []byte 345 | for _, inst := range prog.iterInstructions() { 346 | if len(output) != inst.pc { 347 | panic(fmt.Sprintf("BUG: instruction pc=%d, but output has size %d", inst.pc, len(output))) 348 | } 349 | 350 | switch { 351 | case isPush(inst.op): 352 | if inst.pushSize > 32 { 353 | panic("BUG: pushSize > 32") 354 | } 355 | if len(inst.data) > inst.pushSize { 356 | panic(fmt.Sprintf("BUG: push inst.data %d > inst.pushSize %d", len(inst.data), inst.pushSize)) 357 | } 358 | 359 | // resolve the op 360 | var op *evm.Op 361 | if inst.op == "PUSH" { 362 | op = prog.evm.PushBySize(inst.pushSize) 363 | } else { 364 | op = prog.evm.OpByName(inst.op) 365 | } 366 | if op == nil { 367 | panic(fmt.Sprintf("BUG: opcode for %q (size %d) not found", inst.op, inst.pushSize)) 368 | } 369 | 370 | // Unreachable code check. 371 | if !c.errors.hasError() { 372 | unreachable.check(c, inst.ast, op) 373 | } 374 | 375 | // Add opcode and data padding to output. 376 | output = append(output, op.Code) 377 | if len(inst.data) < inst.pushSize { 378 | output = append(output, make([]byte, inst.pushSize-len(inst.data))...) 379 | } 380 | 381 | case inst.op != "": 382 | op := prog.evm.OpByName(inst.op) 383 | if op == nil { 384 | c.errorAt(inst.ast, fmt.Errorf("%w %s", ecUnknownOpcode, inst.op)) 385 | } 386 | // Unreachable code check. 387 | if !c.errors.hasError() { 388 | unreachable.check(c, inst.ast, op) 389 | } 390 | output = append(output, op.Code) 391 | } 392 | 393 | // Instruction data is always added to output. 394 | output = append(output, inst.data...) 395 | } 396 | return output 397 | } 398 | -------------------------------------------------------------------------------- /asm/compiler_analysis.go: -------------------------------------------------------------------------------- 1 | // Copyright 2024 The go-ethereum Authors 2 | // This file is part of the go-ethereum library. 3 | // 4 | // The go-ethereum library is free software: you can redistribute it and/or modify 5 | // it under the terms of the GNU Lesser General Public License as published by 6 | // the Free Software Foundation, either version 3 of the License, or 7 | // (at your option) any later version. 8 | // 9 | // The go-ethereum library is distributed in the hope that it will be useful, 10 | // but WITHOUT ANY WARRANTY; without even the implied warranty of 11 | // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 12 | // GNU Lesser General Public License for more details. 13 | // 14 | // You should have received a copy of the GNU Lesser General Public License 15 | // along with the go-ethereum library. If not, see . 16 | 17 | package asm 18 | 19 | import ( 20 | "github.com/fjl/geas/internal/ast" 21 | "github.com/fjl/geas/internal/evm" 22 | "github.com/fjl/geas/internal/set" 23 | ) 24 | 25 | // checkLabelsUsed warns about label definitions that were not hit by the evaluator. 26 | func (c *Compiler) checkLabelsUsed(prog *compilerProg, e *evaluator) { 27 | // Gather documents referenced by program. 28 | var docs []*ast.Document 29 | docset := make(set.Set[*ast.Document]) 30 | macroset := make(set.Set[*ast.InstructionMacroDef]) 31 | for section := range prog.iterSections() { 32 | // Ensure to walk macroexpansions only once. 33 | if section.macroArgs != nil { 34 | if macroset.Includes(section.macroArgs.def) { 35 | continue 36 | } 37 | macroset.Add(section.macroArgs.def) 38 | } 39 | if !docset.Includes(section.doc) { 40 | docset.Add(section.doc) 41 | docs = append(docs, section.doc) 42 | } 43 | } 44 | 45 | // Check against evaluator. 46 | for _, doc := range docs { 47 | for _, st := range doc.Statements { 48 | switch st := st.(type) { 49 | case *ast.LabelDefSt: 50 | if !e.isLabelUsed(st) { 51 | c.warnf(st, "label %s unused in program", st) 52 | } 53 | } 54 | } 55 | } 56 | } 57 | 58 | // unreachableCodeCheck finds instructions that cannot be reached by execution. 59 | // In the EVM, all jump targets must be marked by JUMPDEST. For terminal instructions 60 | // such as STOP, if the next instruction isn't JUMPDEST, it can never be reached. 61 | type unreachableCodeCheck struct { 62 | prevSt ast.Statement 63 | prevOp *evm.Op 64 | inUnreachable bool 65 | } 66 | 67 | func (chk *unreachableCodeCheck) check(c *Compiler, st ast.Statement, op *evm.Op) { 68 | if chk.inUnreachable && op.Name == "JUMPDEST" { 69 | chk.inUnreachable = false 70 | } 71 | if chk.prevOp != nil && (chk.prevOp.Term || chk.prevOp.Unconditional) && !op.JumpDest { 72 | c.warnf(st, "unreachable code (previous instruction is %s at %v)", chk.prevOp.Name, chk.prevSt.Position()) 73 | chk.inUnreachable = true 74 | } 75 | chk.prevSt, chk.prevOp = st, op 76 | } 77 | -------------------------------------------------------------------------------- /asm/compiler_eval.go: -------------------------------------------------------------------------------- 1 | // Copyright 2023 The go-ethereum Authors 2 | // This file is part of the go-ethereum library. 3 | // 4 | // The go-ethereum library is free software: you can redistribute it and/or modify 5 | // it under the terms of the GNU Lesser General Public License as published by 6 | // the Free Software Foundation, either version 3 of the License, or 7 | // (at your option) any later version. 8 | // 9 | // The go-ethereum library is distributed in the hope that it will be useful, 10 | // but WITHOUT ANY WARRANTY; without even the implied warranty of 11 | // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 12 | // GNU Lesser General Public License for more details. 13 | // 14 | // You should have received a copy of the GNU Lesser General Public License 15 | // along with the go-ethereum library. If not, see . 16 | 17 | package asm 18 | 19 | import ( 20 | "errors" 21 | "math/big" 22 | ) 23 | 24 | // preEvaluateArgs computes the initial argument values of instructions. 25 | // 26 | // Here we assign the inst.pushSize of all PUSH and PUSH instructions. 27 | // The argument value, inst.data, is assigned this compilation step if the arg expression 28 | // contains no label references. 29 | func (c *Compiler) preEvaluateArgs(e *evaluator, prog *compilerProg) { 30 | for section, inst := range prog.iterInstructions() { 31 | if inst.isBytes() { 32 | // Handle #bytes. 33 | v, err := e.evalAsBytes(inst.expr(), section.env) 34 | if err == nil { 35 | inst.argNoLabels = true 36 | inst.data = v 37 | } 38 | continue 39 | } 40 | 41 | // Handle PUSH. 42 | argument := inst.expr() 43 | if argument == nil { 44 | continue 45 | } 46 | inst.pushSize = 1 47 | if s, ok := inst.explicitPushSize(); ok { 48 | inst.pushSize = s 49 | } 50 | 51 | // Pre-evaluate argument. 52 | v, err := e.eval(argument, section.env) 53 | var labelErr unassignedLabelError 54 | if errors.As(err, &labelErr) { 55 | // Expression depends on label position calculation, leave it for later. 56 | continue 57 | } 58 | inst.argNoLabels = true 59 | if err != nil { 60 | c.errorAt(inst.ast, err) 61 | continue 62 | } 63 | if err := prog.assignPushArg(inst, v.Int(), true); err != nil { 64 | c.errorAt(inst.ast, err) 65 | continue 66 | } 67 | } 68 | } 69 | 70 | // computePC assigns the PC values of all instructions and labels. 71 | func (c *Compiler) computePC(e *evaluator, prog *compilerProg) { 72 | var pc int 73 | for section, inst := range prog.iterInstructions() { 74 | if li, ok := inst.ast.(labelDefStatement); ok { 75 | e.setLabelPC(section.doc, li.LabelDefSt, pc) 76 | } 77 | 78 | inst.pc = pc 79 | size := 0 80 | if inst.op != "" { 81 | size = 1 82 | } 83 | if isPush(inst.op) { 84 | size += inst.pushSize 85 | } else { 86 | size += len(inst.data) 87 | } 88 | pc += size 89 | } 90 | } 91 | 92 | // evaluateArgs computes the argument values of instructions. 93 | func (c *Compiler) evaluateArgs(e *evaluator, prog *compilerProg) (inst *instruction, err error) { 94 | for section, inst := range prog.iterInstructions() { 95 | if inst.argNoLabels { 96 | continue // pre-calculated 97 | } 98 | 99 | if inst.isBytes() { 100 | // handle #bytes 101 | v, err := e.evalAsBytes(inst.expr(), section.env) 102 | if err != nil { 103 | return inst, err 104 | } 105 | inst.data = v 106 | } else { 107 | // handle PUSH 108 | argument := inst.expr() 109 | if argument == nil { 110 | continue // no arg 111 | } 112 | v, err := e.eval(argument, section.env) 113 | if err != nil { 114 | return inst, err 115 | } 116 | if err := prog.assignPushArg(inst, v.Int(), false); err != nil { 117 | return inst, err 118 | } 119 | } 120 | } 121 | return nil, nil 122 | } 123 | 124 | // assignPushArg sets the argument value of an instruction to v. The byte size of the 125 | // value is checked against the declared "PUSH" data size. 126 | // 127 | // If setSize is true, the pushSize of variable-size "PUSH" instructions will be assigned 128 | // based on the value. 129 | func (prog *compilerProg) assignPushArg(inst *instruction, v *big.Int, setSize bool) error { 130 | if v.Sign() < 0 { 131 | return ecNegativeResult 132 | } 133 | b := v.Bytes() 134 | if len(b) > 32 { 135 | return ecPushOverflow256 136 | } 137 | 138 | _, hasExplicitSize := inst.explicitPushSize() 139 | if setSize && !hasExplicitSize { 140 | inst.pushSize = prog.autoPushSize(b) 141 | } 142 | if len(b) > inst.pushSize { 143 | if !hasExplicitSize { 144 | return ecVariablePushOverflow 145 | } 146 | return ecFixedSizePushOverflow 147 | } 148 | 149 | // Store data. Note there is no padding applied here. 150 | // Padding will be added at the bytecode output stage. 151 | inst.data = b 152 | return nil 153 | } 154 | 155 | func (prog *compilerProg) autoPushSize(value []byte) int { 156 | if len(value) > 32 { 157 | panic("value too big") 158 | } 159 | if len(value) == 0 && !prog.evm.SupportsPush0() { 160 | return 1 161 | } 162 | return len(value) 163 | } 164 | -------------------------------------------------------------------------------- /asm/compiler_expand.go: -------------------------------------------------------------------------------- 1 | // Copyright 2023 The go-ethereum Authors 2 | // This file is part of the go-ethereum library. 3 | // 4 | // The go-ethereum library is free software: you can redistribute it and/or modify 5 | // it under the terms of the GNU Lesser General Public License as published by 6 | // the Free Software Foundation, either version 3 of the License, or 7 | // (at your option) any later version. 8 | // 9 | // The go-ethereum library is distributed in the hope that it will be useful, 10 | // but WITHOUT ANY WARRANTY; without even the implied warranty of 11 | // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 12 | // GNU Lesser General Public License for more details. 13 | // 14 | // You should have received a copy of the GNU Lesser General Public License 15 | // along with the go-ethereum library. If not, see . 16 | 17 | package asm 18 | 19 | import ( 20 | "fmt" 21 | "math" 22 | "strings" 23 | 24 | "github.com/fjl/geas/internal/ast" 25 | "github.com/fjl/geas/internal/evm" 26 | ) 27 | 28 | // expand appends a list of AST instructions to the program. 29 | func (c *Compiler) expand(doc *ast.Document, prog *compilerProg) { 30 | for _, astSt := range doc.Statements { 31 | st := statementFromAST(astSt) 32 | if st == nil { 33 | continue 34 | } 35 | err := st.expand(c, doc, prog) 36 | if err != nil { 37 | c.errorAt(astSt, err) 38 | continue 39 | } 40 | } 41 | } 42 | 43 | // expand creates an instruction for the label. For dotted labels, the instruction is 44 | // empty (i.e. has size zero). For regular labels, a JUMPDEST is created. 45 | func (li labelDefStatement) expand(c *Compiler, doc *ast.Document, prog *compilerProg) error { 46 | if li.Global { 47 | ast := li.LabelDefSt 48 | if err := c.globals.setLabelDocument(ast, doc); err != nil { 49 | return err 50 | } 51 | } 52 | 53 | inst := newInstruction(li, "") 54 | if !li.Dotted { 55 | inst.op = "JUMPDEST" 56 | } 57 | prog.addInstruction(inst) 58 | return nil 59 | } 60 | 61 | // expand appends the instruction to a program. This is also where basic validation is done. 62 | func (op opcodeStatement) expand(c *Compiler, doc *ast.Document, prog *compilerProg) error { 63 | opcode := strings.ToUpper(op.Op) 64 | inst := newInstruction(op, opcode) 65 | 66 | switch { 67 | case isPush(opcode) && opcode != "PUSH0": 68 | if op.Arg == nil { 69 | return ecPushWithoutArgument 70 | } 71 | 72 | case isJump(opcode): 73 | if err := c.validateJumpArg(doc, op.Arg); err != nil { 74 | return err 75 | } 76 | if _, err := prog.resolveOp(opcode); err != nil { 77 | return err 78 | } 79 | // 'JUMP @label' instructions turn into 'PUSH @label' + 'JUMP'. 80 | if op.Arg != nil { 81 | push := newInstruction(op, "PUSH") 82 | prog.addInstruction(push) 83 | } 84 | 85 | default: 86 | if _, err := prog.resolveOp(opcode); err != nil { 87 | return err 88 | } 89 | if op.Arg != nil { 90 | if opcode == "PUSH0" { 91 | return ecPushzeroWithArgument 92 | } 93 | return ecUnexpectedArgument 94 | } 95 | } 96 | 97 | prog.addInstruction(inst) 98 | return nil 99 | } 100 | 101 | // resolveOp resolves an opcode name. 102 | func (prog *compilerProg) resolveOp(op string) (*evm.Op, error) { 103 | if op := prog.evm.OpByName(op); op != nil { 104 | return op, nil 105 | } 106 | remFork := prog.evm.ForkWhereOpRemoved(op) 107 | if remFork != "" { 108 | return nil, fmt.Errorf("%w %s (target = %q; removed in fork %q)", ecUnknownOpcode, op, prog.evm.Name(), remFork) 109 | } 110 | addedForks := evm.ForksWhereOpAdded(op) 111 | if len(addedForks) > 0 { 112 | list := strings.Join(addedForks, ", ") 113 | fork := "fork" 114 | if len(addedForks) > 1 { 115 | fork += "s" 116 | } 117 | return nil, fmt.Errorf("%w %s (target = %q; added in %s %q)", ecUnknownOpcode, op, prog.evm.Name(), fork, list) 118 | } 119 | return nil, fmt.Errorf("%w %s", ecUnknownOpcode, op) 120 | } 121 | 122 | // validateJumpArg checks that argument to JUMP is a defined label. 123 | func (c *Compiler) validateJumpArg(doc *ast.Document, arg ast.Expr) error { 124 | if arg == nil { 125 | return nil // no argument is fine. 126 | } 127 | lref, ok := arg.(*ast.LabelRefExpr) 128 | if !ok { 129 | return ecJumpNeedsLiteralLabel 130 | } 131 | if lref.Dotted { 132 | return fmt.Errorf("%w %v", ecJumpToDottedLabel, lref) 133 | } 134 | 135 | var li *ast.LabelDefSt 136 | if lref.Global { 137 | li = c.globals.label[lref.Ident] 138 | } else { 139 | li, _ = doc.LookupLabel(lref) 140 | } 141 | if li == nil { 142 | return fmt.Errorf("%w %v", ecJumpToUndefinedLabel, lref) 143 | } 144 | if li.Dotted { 145 | return fmt.Errorf("%w %v", ecJumpToDottedLabel, lref) 146 | } 147 | return nil 148 | } 149 | 150 | // expand appends the output of an instruction macro call to the program. 151 | func (inst macroCallStatement) expand(c *Compiler, doc *ast.Document, prog *compilerProg) error { 152 | var ( 153 | name = inst.Ident 154 | def *ast.InstructionMacroDef 155 | defdoc *ast.Document 156 | ) 157 | if ast.IsGlobal(name) { 158 | def, defdoc = c.globals.lookupInstrMacro(name) 159 | } else { 160 | def, defdoc = doc.LookupInstrMacro(name) 161 | } 162 | if def == nil { 163 | return fmt.Errorf("%w %%%s", ecUndefinedInstrMacro, name) 164 | } 165 | 166 | // Prevent recursion and check args match. 167 | if !c.enterMacro(def) { 168 | return fmt.Errorf("%w %%%s", ecRecursiveCall, name) 169 | } 170 | defer c.exitMacro(def) 171 | if len(inst.Args) != len(def.Params) { 172 | return fmt.Errorf("%w, macro %%%s needs %d", ecInvalidArgumentCount, name, len(def.Params)) 173 | } 174 | 175 | // Clone the macro's body document. This is a shallow clone for setting 176 | // Parent/Creation, which is done to for error location reporting reasons. Cloning the 177 | // document also means by-document caching does not treat all expansions of a macro as 178 | // the same code. 179 | macroDoc := *def.Body 180 | macroDoc.Parent = defdoc 181 | macroDoc.Creation = inst 182 | 183 | // Arguments of instruction macros cannot be evaluated during expansion. They are 184 | // evaluated in a later pass where all intermediate arguments are processed. In order 185 | // to compute the value then, we need to keep track of macro argument expressions and 186 | // their origin document chain. An example: 187 | // 188 | // #define %MacroA(a) { 189 | // %MacroB($a) 190 | // } 191 | // #define %MacroB(b) { 192 | // push $b 193 | // } 194 | // 195 | // When the evaluator processes 'push $b' generated by MacroB, it first finds 196 | // that $b = $a. However, the expression $a must not be evaluated in the context of 197 | // MacroB, but in the context of MacroA, because that's where $a is defined. 198 | // 199 | // To keep track of this, we store the callsite of the macro along with the arguments 200 | // into the output section. The evaluator uses this callsite as the evaluation context 201 | // for variables. 202 | callsite := prog.currentSection() 203 | args := &instrMacroArgs{callsite: callsite, def: def, args: inst.Args} 204 | prog.pushSection(¯oDoc, args) 205 | defer prog.popSection() 206 | 207 | // Expand body. 208 | c.expand(¯oDoc, prog) 209 | return nil 210 | } 211 | 212 | func (c *Compiler) enterMacro(m *ast.InstructionMacroDef) bool { 213 | if _, onStack := c.macroStack[m]; onStack { 214 | return false 215 | } 216 | c.macroStack[m] = struct{}{} 217 | return true 218 | } 219 | 220 | func (c *Compiler) exitMacro(m *ast.InstructionMacroDef) { 221 | delete(c.macroStack, m) 222 | } 223 | 224 | // expand of #include appends the included file's instructions to the program. 225 | // Note this accesses the documents parsed by processIncludes. 226 | func (inst includeStatement) expand(c *Compiler, doc *ast.Document, prog *compilerProg) error { 227 | incdoc := c.includes[inst.IncludeSt] 228 | if incdoc == nil { 229 | // The document is not in doc.includes, so there must've been a parse error. 230 | // We can just ignore the statement here since the error was already reported. 231 | return nil 232 | } 233 | prog.pushSection(incdoc, nil) 234 | defer prog.popSection() 235 | c.expand(incdoc, prog) 236 | return nil 237 | } 238 | 239 | // expand of #assemble performs compilation of the given assembly file. 240 | func (inst assembleStatement) expand(c *Compiler, doc *ast.Document, prog *compilerProg) error { 241 | subc := New(c.fsys) 242 | subc.SetIncludeDepthLimit(c.maxIncDepth) 243 | subc.SetMaxErrors(math.MaxInt) 244 | subc.SetDefaultFork(prog.evm.Name()) 245 | subc.macroOverrides = c.macroOverrides 246 | 247 | file, err := resolveRelative(doc.File, inst.Filename) 248 | if err != nil { 249 | return err 250 | } 251 | bytecode := subc.CompileFile(file) 252 | c.errors.add(subc.ErrorsAndWarnings()...) 253 | if len(bytecode) > 0 { 254 | datainst := &instruction{data: bytecode} 255 | prog.addInstruction(datainst) 256 | } 257 | return nil 258 | } 259 | 260 | func (inst bytesStatement) expand(c *Compiler, doc *ast.Document, prog *compilerProg) error { 261 | prog.addInstruction(&instruction{ast: inst}) 262 | return nil 263 | } 264 | -------------------------------------------------------------------------------- /asm/compiler_prog.go: -------------------------------------------------------------------------------- 1 | // Copyright 2023 The go-ethereum Authors 2 | // This file is part of the go-ethereum library. 3 | // 4 | // The go-ethereum library is free software: you can redistribute it and/or modify 5 | // it under the terms of the GNU Lesser General Public License as published by 6 | // the Free Software Foundation, either version 3 of the License, or 7 | // (at your option) any later version. 8 | // 9 | // The go-ethereum library is distributed in the hope that it will be useful, 10 | // but WITHOUT ANY WARRANTY; without even the implied warranty of 11 | // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 12 | // GNU Lesser General Public License for more details. 13 | // 14 | // You should have received a copy of the GNU Lesser General Public License 15 | // along with the go-ethereum library. If not, see . 16 | 17 | package asm 18 | 19 | import ( 20 | "iter" 21 | "slices" 22 | "strings" 23 | 24 | "github.com/fjl/geas/internal/ast" 25 | "github.com/fjl/geas/internal/evm" 26 | ) 27 | 28 | // compilerProg is the output program of the compiler. 29 | // It contains sections of instructions. 30 | type compilerProg struct { 31 | toplevel *compilerSection 32 | cur *compilerSection 33 | evm *evm.InstructionSet 34 | } 35 | 36 | // compilerSection is a section of the output program. 37 | type compilerSection struct { 38 | doc *ast.Document 39 | env *evalEnvironment 40 | 41 | // This tracks the arguments of instruction macro calls. When the compiler expands a 42 | // macro, it creates a unique section for each call site. The arguments of the call 43 | // are stored for use by the expression evaluator. 44 | macroArgs *instrMacroArgs 45 | 46 | parent *compilerSection 47 | children []any 48 | } 49 | 50 | type instrMacroArgs struct { 51 | callsite *compilerSection 52 | def *ast.InstructionMacroDef 53 | args []ast.Expr 54 | } 55 | 56 | func newCompilerProg(topdoc *ast.Document) *compilerProg { 57 | p := new(compilerProg) 58 | p.toplevel = p.pushSection(topdoc, nil) 59 | return p 60 | } 61 | 62 | // pushSection creates a new section as a child of the current one. 63 | func (p *compilerProg) pushSection(doc *ast.Document, macroArgs *instrMacroArgs) *compilerSection { 64 | s := &compilerSection{doc: doc, macroArgs: macroArgs} 65 | s.env = newEvalEnvironment(s) 66 | if p.cur != nil { 67 | s.parent = p.cur 68 | p.cur.children = append(p.cur.children, s) 69 | } 70 | p.cur = s 71 | return s 72 | } 73 | 74 | // popSection returns to the parent section. 75 | func (p *compilerProg) popSection() { 76 | if p.cur.parent == nil { 77 | panic("too much pop") 78 | } 79 | p.cur = p.cur.parent 80 | } 81 | 82 | // currentSection returns the current (most recently added) section. 83 | func (p *compilerProg) currentSection() *compilerSection { 84 | return p.cur 85 | } 86 | 87 | // addInstruction appends an instruction to the current section. 88 | func (p *compilerProg) addInstruction(inst *instruction) { 89 | p.cur.children = append(p.cur.children, inst) 90 | } 91 | 92 | // iterInstructions returns an iterator over all instructions in the program. 93 | func (p *compilerProg) iterInstructions() iter.Seq2[*compilerSection, *instruction] { 94 | type stackElem struct { 95 | s *compilerSection 96 | i int 97 | } 98 | stack := []stackElem{{p.toplevel, 0}} 99 | return func(yield func(*compilerSection, *instruction) bool) { 100 | outer: 101 | for len(stack) > 0 { 102 | e := &stack[len(stack)-1] 103 | for e.i < len(e.s.children) { 104 | cld := e.s.children[e.i] 105 | e.i++ 106 | switch cld := cld.(type) { 107 | case *instruction: 108 | if !yield(e.s, cld) { 109 | return 110 | } 111 | case *compilerSection: 112 | stack = append(stack, stackElem{cld, 0}) 113 | continue outer 114 | } 115 | } 116 | stack = stack[:len(stack)-1] 117 | } 118 | } 119 | } 120 | 121 | // iterSections returns an iterator over all sections in the program. 122 | func (p *compilerProg) iterSections() iter.Seq[*compilerSection] { 123 | stack := []*compilerSection{p.toplevel} 124 | return func(yield func(*compilerSection) bool) { 125 | for len(stack) > 0 { 126 | section := stack[len(stack)-1] 127 | stack = stack[:len(stack)-1] 128 | if !yield(section) { 129 | return 130 | } 131 | for _, cld := range slices.Backward(section.children) { 132 | if clds, ok := cld.(*compilerSection); ok { 133 | stack = append(stack, clds) 134 | } 135 | } 136 | } 137 | } 138 | } 139 | 140 | // instruction is a step of the compiler output program. 141 | type instruction struct { 142 | // fields assigned during expansion: 143 | ast statement 144 | op string 145 | 146 | // fields assigned during compilation: 147 | pc int // pc at this instruction 148 | pushSize int // computed size of push instruction 149 | data []byte // computed argument value 150 | argNoLabels bool // true if arg expression does not contain @label 151 | } 152 | 153 | func newInstruction(ast statement, op string) *instruction { 154 | return &instruction{ast: ast, op: op} 155 | } 156 | 157 | func isPush(op string) bool { 158 | return strings.HasPrefix(op, "PUSH") 159 | } 160 | 161 | func isJump(op string) bool { 162 | return strings.HasPrefix(op, "JUMP") 163 | } 164 | 165 | // explicitPushSize returns the declared PUSH size. 166 | func (inst *instruction) explicitPushSize() (int, bool) { 167 | op, ok := inst.ast.(opcodeStatement) 168 | if ok { 169 | return int(op.PushSize) - 1, op.PushSize > 0 170 | } 171 | return 0, false 172 | } 173 | 174 | // expr returns the instruction argument. 175 | func (inst *instruction) expr() ast.Expr { 176 | if inst.op != "" && !isPush(inst.op) { 177 | return nil 178 | } 179 | switch st := inst.ast.(type) { 180 | case opcodeStatement: 181 | return st.Arg 182 | case bytesStatement: 183 | return st.Value 184 | default: 185 | return nil 186 | } 187 | } 188 | 189 | func (inst *instruction) isBytes() bool { 190 | _, ok := inst.ast.(bytesStatement) 191 | return ok 192 | } 193 | -------------------------------------------------------------------------------- /asm/compiler_prog_test.go: -------------------------------------------------------------------------------- 1 | // Copyright 2023 The go-ethereum Authors 2 | // This file is part of the go-ethereum library. 3 | // 4 | // The go-ethereum library is free software: you can redistribute it and/or modify 5 | // it under the terms of the GNU Lesser General Public License as published by 6 | // the Free Software Foundation, either version 3 of the License, or 7 | // (at your option) any later version. 8 | // 9 | // The go-ethereum library is distributed in the hope that it will be useful, 10 | // but WITHOUT ANY WARRANTY; without even the implied warranty of 11 | // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 12 | // GNU Lesser General Public License for more details. 13 | // 14 | // You should have received a copy of the GNU Lesser General Public License 15 | // along with the go-ethereum library. If not, see . 16 | 17 | package asm 18 | 19 | import ( 20 | "reflect" 21 | "slices" 22 | "testing" 23 | 24 | "github.com/fjl/geas/internal/ast" 25 | ) 26 | 27 | func TestIterInstructions(t *testing.T) { 28 | var ( 29 | doc = make([]ast.Document, 4) 30 | instr = make([]*instruction, 8) 31 | prog = newCompilerProg(&doc[0]) 32 | section = make([]*compilerSection, 4) 33 | ) 34 | for i := range instr { 35 | instr[i] = new(instruction) 36 | } 37 | 38 | // create section structure 39 | { 40 | section[0] = prog.toplevel 41 | prog.addInstruction(instr[0]) 42 | { 43 | section[1] = prog.pushSection(&doc[1], nil) 44 | prog.addInstruction(instr[1]) 45 | prog.addInstruction(instr[2]) 46 | prog.popSection() 47 | } 48 | prog.addInstruction(instr[3]) 49 | { 50 | section[2] = prog.pushSection(&doc[2], nil) 51 | prog.addInstruction(instr[4]) 52 | { 53 | section[3] = prog.pushSection(&doc[3], nil) 54 | prog.addInstruction(instr[5]) 55 | prog.popSection() 56 | } 57 | prog.addInstruction(instr[6]) 58 | prog.addInstruction(instr[7]) 59 | } 60 | prog.popSection() 61 | } 62 | 63 | // iterate and gather list 64 | type item struct { 65 | *compilerSection 66 | *instruction 67 | } 68 | var result []item 69 | for section, inst := range prog.iterInstructions() { 70 | result = append(result, item{section, inst}) 71 | } 72 | 73 | // compare 74 | expected := []item{ 75 | {section[0], instr[0]}, 76 | {section[1], instr[1]}, 77 | {section[1], instr[2]}, 78 | {section[0], instr[3]}, 79 | {section[2], instr[4]}, 80 | {section[3], instr[5]}, 81 | {section[2], instr[6]}, 82 | {section[2], instr[7]}, 83 | } 84 | if !reflect.DeepEqual(result, expected) { 85 | t.Log("result:") 86 | for _, item := range result { 87 | t.Logf(" s%d (%p): instr%d (%p)", slices.Index(section, item.compilerSection), item.compilerSection, slices.Index(instr, item.instruction), item.instruction) 88 | } 89 | t.Error("result mismatch") 90 | } 91 | } 92 | -------------------------------------------------------------------------------- /asm/compiler_test.go: -------------------------------------------------------------------------------- 1 | // Copyright 2023 The go-ethereum Authors 2 | // This file is part of the go-ethereum library. 3 | // 4 | // The go-ethereum library is free software: you can redistribute it and/or modify 5 | // it under the terms of the GNU Lesser General Public License as published by 6 | // the Free Software Foundation, either version 3 of the License, or 7 | // (at your option) any later version. 8 | // 9 | // The go-ethereum library is distributed in the hope that it will be useful, 10 | // but WITHOUT ANY WARRANTY; without even the implied warranty of 11 | // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 12 | // GNU Lesser General Public License for more details. 13 | // 14 | // You should have received a copy of the GNU Lesser General Public License 15 | // along with the go-ethereum library. If not, see . 16 | 17 | package asm 18 | 19 | import ( 20 | "bytes" 21 | "encoding/hex" 22 | "maps" 23 | "math/big" 24 | "os" 25 | "path/filepath" 26 | "slices" 27 | "strings" 28 | "testing" 29 | "testing/fstest" 30 | 31 | "gopkg.in/yaml.v3" 32 | ) 33 | 34 | type compilerTestInput struct { 35 | Code string `yaml:"code"` 36 | Files map[string]string `yaml:"files,omitempty"` 37 | Globals map[string]*big.Int `yaml:"globals,omitempty"` 38 | } 39 | 40 | type compilerTestOutput struct { 41 | Bytecode string `yaml:"bytecode"` 42 | Errors []string `yaml:"errors,omitempty"` 43 | Warnings []string `yaml:"warnings,omitempty"` 44 | } 45 | 46 | type compilerTestYAML struct { 47 | Input compilerTestInput `yaml:"input"` 48 | Output compilerTestOutput `yaml:"output"` 49 | } 50 | 51 | func TestCompiler(t *testing.T) { 52 | content, err := os.ReadFile(filepath.Join("testdata", "compiler-tests.yaml")) 53 | if err != nil { 54 | t.Fatal(err) 55 | } 56 | var tests = make(map[string]compilerTestYAML) 57 | dec := yaml.NewDecoder(bytes.NewReader(content)) 58 | dec.KnownFields(true) 59 | if err := dec.Decode(&tests); err != nil { 60 | t.Fatal(err) 61 | } 62 | 63 | names := slices.Sorted(maps.Keys(tests)) 64 | for _, name := range names { 65 | test := tests[name] 66 | t.Run(name, func(t *testing.T) { 67 | c := New(nil) 68 | if len(test.Input.Files) > 0 { 69 | fm := make(fstest.MapFS, len(test.Input.Files)) 70 | for name, content := range test.Input.Files { 71 | fm[name] = &fstest.MapFile{Data: []byte(content)} 72 | } 73 | c.SetFilesystem(fm) 74 | } 75 | for name, val := range test.Input.Globals { 76 | c.SetGlobal(name, val) 77 | } 78 | output := c.CompileString(test.Input.Code) 79 | 80 | if len(test.Output.Errors) > 0 { 81 | // expecting errors... 82 | if output != nil { 83 | t.Error("expected nil output") 84 | } 85 | checkErrors(t, "errors", c.Errors(), test.Output.Errors) 86 | checkErrors(t, "warnings", c.Warnings(), test.Output.Warnings) 87 | return 88 | } 89 | 90 | // Test expects no errors, compilation should succeed. 91 | if c.Failed() { 92 | for _, err := range c.Errors() { 93 | t.Error(err) 94 | } 95 | t.Fatal("compilation failed") 96 | } 97 | checkErrors(t, "warnings", c.Warnings(), test.Output.Warnings) 98 | expectedOutput, err := hex.DecodeString(strings.Replace(test.Output.Bytecode, " ", "", -1)) 99 | if err != nil { 100 | t.Fatalf("invalid hex: %v", err) 101 | } 102 | if !bytes.Equal(output, expectedOutput) { 103 | t.Errorf("incorrect output\ngot: %x\nwant: %x\n", output, expectedOutput) 104 | } 105 | }) 106 | } 107 | } 108 | 109 | func checkErrors(t *testing.T, kind string, errlist []error, expected []string) { 110 | if len(errlist) != len(expected) { 111 | t.Errorf("got %d %s, expected %d", len(errlist), kind, len(expected)) 112 | for i := range errlist { 113 | t.Errorf(" [%d] %v", i, errlist[i]) 114 | } 115 | return 116 | } 117 | for i := range errlist { 118 | if errlist[i].Error() != expected[i] { 119 | t.Errorf("wrong error %d: %v\n want: %s", i, errlist[i], expected[i]) 120 | } 121 | } 122 | } 123 | 124 | func TestExamplePrograms(t *testing.T) { 125 | exampleDir, err := filepath.Abs("../example") 126 | if err != nil { 127 | t.Fatal(err) 128 | } 129 | 130 | bytecodes := make(map[string]string) 131 | t.Run("erc20", func(t *testing.T) { 132 | bytecodes["erc20"] = compileExample(t, exampleDir, "erc20/erc20.eas") 133 | }) 134 | t.Run("erc20_ctor", func(t *testing.T) { 135 | bytecodes["erc20_ctor"] = compileExample(t, exampleDir, "erc20/erc20_ctor.eas") 136 | }) 137 | t.Run("4788asm", func(t *testing.T) { 138 | bytecodes["4788asm"] = compileExample(t, exampleDir, "4788asm.eas") 139 | }) 140 | t.Run("4788asm_ctor", func(t *testing.T) { 141 | bytecodes["4788asm_ctor"] = compileExample(t, exampleDir, "4788asm_ctor.eas") 142 | }) 143 | 144 | if os.Getenv("WRITE_TEST_FILES") == "1" { 145 | content, _ := yaml.Marshal(bytecodes) 146 | os.WriteFile("testdata/known-bytecode.yaml", content, 0644) 147 | } 148 | 149 | // compare codes 150 | var known map[string]string 151 | data, err := os.ReadFile("testdata/known-bytecode.yaml") 152 | if err != nil { 153 | t.Fatal(err) 154 | } 155 | if err := yaml.Unmarshal(data, &known); err != nil { 156 | t.Fatal("YAML unmarshal failed:", err) 157 | } 158 | for name, code := range bytecodes { 159 | if code != known[name] { 160 | t.Errorf("bytecode mismatch for %s:", name) 161 | t.Errorf(" compiled: %s", code) 162 | t.Errorf(" known: %s", known[name]) 163 | } 164 | } 165 | } 166 | 167 | func compileExample(t *testing.T, exampleDir string, file string) string { 168 | c := New(os.DirFS(exampleDir)) 169 | output := c.CompileFile(file) 170 | for _, err := range c.ErrorsAndWarnings() { 171 | t.Log(err) 172 | } 173 | if c.Failed() { 174 | t.Error("compilation failed:") 175 | } 176 | return hex.EncodeToString(output) 177 | } 178 | -------------------------------------------------------------------------------- /asm/error.go: -------------------------------------------------------------------------------- 1 | // Copyright 2023 The go-ethereum Authors 2 | // This file is part of the go-ethereum library. 3 | // 4 | // The go-ethereum library is free software: you can redistribute it and/or modify 5 | // it under the terms of the GNU Lesser General Public License as published by 6 | // the Free Software Foundation, either version 3 of the License, or 7 | // (at your option) any later version. 8 | // 9 | // The go-ethereum library is distributed in the hope that it will be useful, 10 | // but WITHOUT ANY WARRANTY; without even the implied warranty of 11 | // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 12 | // GNU Lesser General Public License for more details. 13 | // 14 | // You should have received a copy of the GNU Lesser General Public License 15 | // along with the go-ethereum library. If not, see . 16 | 17 | package asm 18 | 19 | import ( 20 | "errors" 21 | "fmt" 22 | 23 | "github.com/fjl/geas/internal/ast" 24 | ) 25 | 26 | // panic sentinel value: 27 | var errCancelCompilation = errors.New("end compilation") 28 | 29 | // PositionError is an error containing a file position. 30 | type PositionError interface { 31 | error 32 | Position() ast.Position 33 | } 34 | 35 | // compilerErrorCode represents an error detected by the compiler. 36 | type compilerError int 37 | 38 | const ( 39 | ecPushOverflow256 compilerError = iota 40 | ecPushzeroWithArgument 41 | ecFixedSizePushOverflow 42 | ecVariablePushOverflow 43 | ecPushWithoutArgument 44 | ecUnexpectedArgument 45 | ecJumpNeedsLiteralLabel 46 | ecJumpToDottedLabel 47 | ecJumpToUndefinedLabel 48 | ecUnknownOpcode 49 | ecUndefinedVariable 50 | ecUndefinedMacro 51 | ecUndefinedInstrMacro 52 | ecUndefinedBuiltinMacro 53 | ecRecursiveCall 54 | ecInvalidArgumentCount 55 | ecNegativeResult 56 | ecOddLengthBytesLiteral 57 | ecIncludeNoFS 58 | ecIncludeDepthLimit 59 | ecUnknownPragma 60 | ecPragmaTargetInIncludeFile 61 | ecPragmaTargetConflict 62 | ecPragmaTargetUnknown 63 | ) 64 | 65 | func (e compilerError) Error() string { 66 | switch e { 67 | case ecPushOverflow256: 68 | return "instruction argument > 256 bits" 69 | case ecPushzeroWithArgument: 70 | return "PUSH0 can't have argument" 71 | case ecFixedSizePushOverflow: 72 | return "instruction argument overflows explicitly given PUSH size" 73 | case ecVariablePushOverflow: 74 | return "instruction argument overflows push" 75 | case ecUnexpectedArgument: 76 | return "only JUMP* and PUSH* support immediate arguments" 77 | case ecPushWithoutArgument: 78 | return "PUSH requires an immediate argument" 79 | case ecJumpNeedsLiteralLabel: 80 | return "JUMP argument must be literal label" 81 | case ecJumpToDottedLabel: 82 | return "JUMP to dotted label" 83 | case ecJumpToUndefinedLabel: 84 | return "JUMP to undefined label" 85 | case ecUnknownOpcode: 86 | return "unknown op" 87 | case ecUndefinedVariable: 88 | return "undefined macro parameter" 89 | case ecUndefinedMacro: 90 | return "undefined macro" 91 | case ecUndefinedBuiltinMacro: 92 | return "undefined builtin macro" 93 | case ecUndefinedInstrMacro: 94 | return "undefined instruction macro" 95 | case ecRecursiveCall: 96 | return "recursive call of macro" 97 | case ecInvalidArgumentCount: 98 | return "invalid number of arguments" 99 | case ecNegativeResult: 100 | return "expression result is negative number" 101 | case ecOddLengthBytesLiteral: 102 | return "odd-length hex in bytes context" 103 | case ecIncludeNoFS: 104 | return "#include not allowed" 105 | case ecIncludeDepthLimit: 106 | return "#include depth limit reached" 107 | case ecUnknownPragma: 108 | return "unknown #pragma" 109 | case ecPragmaTargetInIncludeFile: 110 | return "#pragma target cannot be used in #include'd files" 111 | case ecPragmaTargetConflict: 112 | return "duplicate '#pragma target ...' directive" 113 | case ecPragmaTargetUnknown: 114 | return "unknown #pragma target" 115 | default: 116 | return fmt.Sprintf("invalid error %d", e) 117 | } 118 | } 119 | 120 | // statementError is an error related to an assembler instruction. 121 | type statementError struct { 122 | inst ast.Statement 123 | err error 124 | } 125 | 126 | func (e *statementError) Position() ast.Position { 127 | return e.inst.Position() 128 | } 129 | 130 | func (e *statementError) Unwrap() error { 131 | return e.err 132 | } 133 | 134 | func (e *statementError) Error() string { 135 | return fmt.Sprintf("%v: %s", e.inst.Position(), e.err.Error()) 136 | } 137 | 138 | // simpleWarning is a warning issued by the compiler. 139 | type simpleWarning struct { 140 | pos ast.Position 141 | str string 142 | } 143 | 144 | func (e *simpleWarning) Error() string { 145 | return fmt.Sprintf("%v: warning: %s", e.pos, e.str) 146 | } 147 | 148 | func (e *simpleWarning) IsWarning() bool { 149 | return true 150 | } 151 | 152 | // unassignedLabelError signals use of a label that doesn't have a valid PC. 153 | type unassignedLabelError struct { 154 | lref *ast.LabelRefExpr 155 | } 156 | 157 | func (e unassignedLabelError) Error() string { 158 | return fmt.Sprintf("%v not instantiated in program", e.lref) 159 | } 160 | 161 | // Warning is implemented by errors that could also be just a warning. 162 | type Warning interface { 163 | error 164 | IsWarning() bool 165 | } 166 | 167 | // IsWarning reports whether an error is a warning. 168 | func IsWarning(err error) bool { 169 | var w Warning 170 | return errors.As(err, &w) && w.IsWarning() 171 | } 172 | 173 | // errorList maintains a list of errors and warnings. It also implements the mechanism 174 | // that aborts compilation when too many errors have accumulated. 175 | type errorList struct { 176 | list []error 177 | numErrors int 178 | numWarnings int 179 | maxErrors int 180 | } 181 | 182 | // catchAbort traps the panic condition that gets thrown when too many errors have accumulated. 183 | // A call to catchAbort must be deferred around any code that uses [errorList.add]. 184 | func (e *errorList) catchAbort() { 185 | ok := recover() 186 | if ok != nil && ok != errCancelCompilation { 187 | panic(ok) 188 | } 189 | } 190 | 191 | // add puts errors into the list. 192 | // This returns true if there were any actual errors in the arguments. 193 | func (e *errorList) add(errs ...error) (anyRealError bool) { 194 | for _, err := range errs { 195 | if err == nil { 196 | continue 197 | } 198 | e.list = append(e.list, err) 199 | if IsWarning(err) { 200 | e.numWarnings++ 201 | } else { 202 | e.numErrors++ 203 | anyRealError = true 204 | } 205 | if e.numErrors > e.maxErrors { 206 | panic(errCancelCompilation) 207 | } 208 | } 209 | return 210 | } 211 | 212 | // addParseErrors is like add, but for errors from the parser. 213 | func (e *errorList) addParseErrors(errs []*ast.ParseError) bool { 214 | conv := make([]error, len(errs)) 215 | for i := range errs { 216 | conv[i] = errs[i] 217 | } 218 | return e.add(conv...) 219 | } 220 | 221 | // warnings returns the current warning list. 222 | func (e *errorList) warnings() []error { 223 | s := make([]error, 0, e.numWarnings) 224 | for _, err := range e.list { 225 | if IsWarning(err) { 226 | s = append(s, err) 227 | } 228 | } 229 | return s 230 | } 231 | 232 | // warnings returns the current error list. 233 | func (e *errorList) errors() []error { 234 | s := make([]error, 0, e.numErrors) 235 | for _, err := range e.list { 236 | if !IsWarning(err) { 237 | s = append(s, err) 238 | } 239 | } 240 | return s 241 | } 242 | 243 | // hasError reports whether there were any actual errors. 244 | func (e *errorList) hasError() bool { 245 | return e.numErrors > 0 246 | } 247 | -------------------------------------------------------------------------------- /asm/evaluator.go: -------------------------------------------------------------------------------- 1 | // Copyright 2023 The go-ethereum Authors 2 | // This file is part of the go-ethereum library. 3 | // 4 | // The go-ethereum library is free software: you can redistribute it and/or modify 5 | // it under the terms of the GNU Lesser General Public License as published by 6 | // the Free Software Foundation, either version 3 of the License, or 7 | // (at your option) any later version. 8 | // 9 | // The go-ethereum library is distributed in the hope that it will be useful, 10 | // but WITHOUT ANY WARRANTY; without even the implied warranty of 11 | // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 12 | // GNU Lesser General Public License for more details. 13 | // 14 | // You should have received a copy of the GNU Lesser General Public License 15 | // along with the go-ethereum library. If not, see . 16 | 17 | package asm 18 | 19 | import ( 20 | "errors" 21 | "fmt" 22 | "math" 23 | "math/big" 24 | "slices" 25 | 26 | "github.com/fjl/geas/internal/ast" 27 | "github.com/fjl/geas/internal/lzint" 28 | ) 29 | 30 | // evaluator is for evaluating expressions. 31 | type evaluator struct { 32 | inStack map[*ast.ExpressionMacroDef]struct{} 33 | labelPC map[evalLabelKey]int 34 | usedLabels map[*ast.LabelDefSt]struct{} 35 | globals *globalScope 36 | } 37 | 38 | type evalLabelKey struct { 39 | doc *ast.Document 40 | l *ast.LabelDefSt 41 | } 42 | 43 | type evalEnvironment struct { 44 | doc *ast.Document 45 | macroArgs *instrMacroArgs 46 | variables map[string]*lzint.Value 47 | } 48 | 49 | func newEvaluator(gs *globalScope) *evaluator { 50 | return &evaluator{ 51 | inStack: make(map[*ast.ExpressionMacroDef]struct{}), 52 | labelPC: make(map[evalLabelKey]int), 53 | usedLabels: make(map[*ast.LabelDefSt]struct{}), 54 | globals: gs, 55 | } 56 | } 57 | 58 | func newEvalEnvironment(s *compilerSection) *evalEnvironment { 59 | if s == nil { 60 | panic("nil section") 61 | } 62 | return &evalEnvironment{doc: s.doc, macroArgs: s.macroArgs} 63 | } 64 | 65 | // lookupExprMacro finds a macro definition in the document chain. 66 | func (e *evaluator) lookupExprMacro(env *evalEnvironment, name string) (*ast.ExpressionMacroDef, *ast.Document) { 67 | if ast.IsGlobal(name) { 68 | return e.globals.lookupExprMacro(name) 69 | } 70 | if e, doc := env.doc.LookupExprMacro(name); e != nil { 71 | return e, doc 72 | } 73 | return nil, nil 74 | } 75 | 76 | // setLabelPC stores the offset of a label within a document. 77 | func (e *evaluator) setLabelPC(doc *ast.Document, li *ast.LabelDefSt, pc int) { 78 | if li.Global { 79 | e.globals.setLabelPC(li.Name(), pc) 80 | } else { 81 | e.labelPC[evalLabelKey{doc, li}] = pc 82 | } 83 | } 84 | 85 | // lookupLabel resolves a label reference. 86 | func (e *evaluator) lookupLabel(doc *ast.Document, lref *ast.LabelRefExpr) (pc int, pcValid bool, err error) { 87 | var li *ast.LabelDefSt 88 | if lref.Global { 89 | pc, pcValid, li = e.globals.lookupLabel(lref) 90 | } else { 91 | var srcdoc *ast.Document 92 | li, srcdoc = doc.LookupLabel(lref) 93 | pc, pcValid = e.labelPC[evalLabelKey{srcdoc, li}] 94 | } 95 | if li == nil { 96 | return 0, false, fmt.Errorf("undefined label %v", lref) 97 | } 98 | if lref.Dotted && !li.Dotted { 99 | return 0, false, fmt.Errorf("can't use %v to refer to label %s:", lref, li.Name()) 100 | } 101 | // mark label used (for unused label analysis) 102 | e.usedLabels[li] = struct{}{} 103 | return pc, pcValid, nil 104 | } 105 | 106 | // isLabelUsed reports whether the given label definition was used during expression evaluation. 107 | func (e *evaluator) isLabelUsed(li *ast.LabelDefSt) bool { 108 | _, ok := e.usedLabels[li] 109 | return ok 110 | } 111 | 112 | func (e *evaluator) eval(expr ast.Expr, env *evalEnvironment) (*lzint.Value, error) { 113 | switch expr := expr.(type) { 114 | case *ast.LiteralExpr: 115 | return e.evalLiteral(expr) 116 | case *ast.LabelRefExpr: 117 | return e.evalLabelRef(expr, env) 118 | case *ast.ArithExpr: 119 | return e.evalArith(expr, env) 120 | case *ast.VariableExpr: 121 | return e.evalVariable(expr, env) 122 | case *ast.MacroCallExpr: 123 | return e.evalMacroCall(expr, env) 124 | default: 125 | panic(fmt.Sprintf("unhandled expr %T", expr)) 126 | } 127 | } 128 | 129 | // evalAsBytes gives the byte value of an expression. 130 | func (e *evaluator) evalAsBytes(expr ast.Expr, env *evalEnvironment) ([]byte, error) { 131 | v, err := e.eval(expr, env) 132 | if err != nil { 133 | return nil, err 134 | } 135 | return v.Bytes() 136 | } 137 | 138 | func (e *evaluator) evalLiteral(expr *ast.LiteralExpr) (*lzint.Value, error) { 139 | if expr.Value != nil { 140 | return expr.Value, nil 141 | } 142 | 143 | switch { 144 | case expr.IsNumber(): 145 | val, err := lzint.ParseNumberLiteral(expr.Text()) 146 | if err != nil { 147 | return nil, err 148 | } 149 | expr.Value = val 150 | return val, nil 151 | 152 | case expr.IsString(): 153 | val := lzint.FromBytes([]byte(expr.Text())) 154 | expr.Value = val 155 | return val, nil 156 | 157 | default: 158 | panic(fmt.Errorf("unhandled astLiteral %q (not string|number)", expr.Text())) 159 | } 160 | } 161 | 162 | func (e *evaluator) evalLabelRef(expr *ast.LabelRefExpr, env *evalEnvironment) (*lzint.Value, error) { 163 | pc, pcValid, err := e.lookupLabel(env.doc, expr) 164 | if err != nil { 165 | return nil, err 166 | } 167 | if !pcValid { 168 | // We hit this case if evaluating before labels have been calculated. A 169 | // special error value is returned here to allow the compiler to recognize 170 | // this case. 171 | return nil, unassignedLabelError{lref: expr} 172 | } 173 | return lzint.FromInt(big.NewInt(int64(pc))), nil 174 | } 175 | 176 | var bigMaxUint = new(big.Int).SetUint64(math.MaxUint) 177 | 178 | func (e *evaluator) evalArith(expr *ast.ArithExpr, env *evalEnvironment) (*lzint.Value, error) { 179 | // compute operands 180 | leftVal, err := e.eval(expr.Left, env) 181 | if err != nil { 182 | return nil, err 183 | } 184 | rightVal, err := e.eval(expr.Right, env) 185 | if err != nil { 186 | return nil, err 187 | } 188 | left, right := leftVal.Int(), rightVal.Int() 189 | 190 | // apply op 191 | var v *big.Int 192 | switch expr.Op { 193 | case ast.ArithPlus: 194 | v = new(big.Int).Add(left, right) 195 | 196 | case ast.ArithMinus: 197 | v = new(big.Int).Sub(left, right) 198 | 199 | case ast.ArithMul: 200 | v = new(big.Int).Mul(left, right) 201 | 202 | case ast.ArithDiv: 203 | if right.Sign() == 0 { 204 | return nil, errors.New("division by zero") 205 | } 206 | v = new(big.Int).Div(left, right) 207 | 208 | case ast.ArithMod: 209 | v = new(big.Int).Mod(left, right) 210 | 211 | case ast.ArithAnd: 212 | v = new(big.Int).And(left, right) 213 | 214 | case ast.ArithOr: 215 | v = new(big.Int).Or(left, right) 216 | 217 | case ast.ArithXor: 218 | v = new(big.Int).Xor(left, right) 219 | 220 | case ast.ArithLshift: 221 | if right.Sign() == -1 { 222 | return nil, errors.New("negative lshift amount") 223 | } 224 | if right.Cmp(bigMaxUint) > 0 { 225 | return nil, fmt.Errorf("lshift amount %d overflows uint", right) 226 | } 227 | amount := uint(right.Uint64()) 228 | v = new(big.Int).Lsh(left, amount) 229 | 230 | case ast.ArithRshift: 231 | if right.Sign() == -1 { 232 | return nil, errors.New("negative rshift amount") 233 | } 234 | if right.Cmp(bigMaxUint) > 0 { 235 | return nil, fmt.Errorf("rshift amount %d overflows uint", right) 236 | } 237 | amount := uint(right.Uint64()) 238 | v = new(big.Int).Rsh(left, amount) 239 | 240 | default: 241 | panic(fmt.Errorf("invalid arith op %v", expr.Op)) 242 | } 243 | 244 | return lzint.FromInt(v), nil 245 | } 246 | 247 | func (e *evaluator) evalVariable(expr *ast.VariableExpr, env *evalEnvironment) (*lzint.Value, error) { 248 | v, ok := env.variables[expr.Ident] 249 | if ok { 250 | return v, nil 251 | } 252 | // Check for instruction macro args. 253 | if a := env.macroArgs; a != nil { 254 | i := slices.Index(a.def.Params, expr.Ident) 255 | if i == -1 { 256 | return nil, fmt.Errorf("%w $%s", ecUndefinedVariable, expr.Ident) 257 | } 258 | arg := a.args[i] 259 | // Evaluate it in the parent scope. 260 | return e.eval(arg, newEvalEnvironment(a.callsite)) 261 | } 262 | return nil, fmt.Errorf("%w $%s", ecUndefinedVariable, expr.Ident) 263 | } 264 | 265 | func (e *evaluator) evalMacroCall(expr *ast.MacroCallExpr, env *evalEnvironment) (*lzint.Value, error) { 266 | if expr.Builtin { 267 | builtin, ok := builtinMacros[expr.Ident] 268 | if ok { 269 | return builtin(e, env, expr) 270 | } 271 | return nil, fmt.Errorf("%w .%s", ecUndefinedBuiltinMacro, expr.Ident) 272 | } 273 | def, defdoc := e.lookupExprMacro(env, expr.Ident) 274 | if def == nil { 275 | return nil, fmt.Errorf("%w %s", ecUndefinedMacro, expr.Ident) 276 | } 277 | 278 | // Prevent recursion. 279 | if !e.enterMacro(def) { 280 | return nil, fmt.Errorf("%w %s", ecRecursiveCall, expr.Ident) 281 | } 282 | defer e.exitMacro(def) 283 | 284 | // Bind arguments. 285 | macroEnv := &evalEnvironment{ 286 | variables: make(map[string]*lzint.Value, len(def.Params)), 287 | doc: defdoc, 288 | } 289 | if err := checkArgCount(expr, len(def.Params)); err != nil { 290 | return nil, err 291 | } 292 | if len(expr.Args) != len(def.Params) { 293 | return nil, fmt.Errorf("%w, macro %s needs %d", ecInvalidArgumentCount, expr.Ident, len(def.Params)) 294 | } 295 | for i, param := range def.Params { 296 | v, err := e.eval(expr.Args[i], env) 297 | if err != nil { 298 | return nil, err 299 | } 300 | macroEnv.variables[param] = v 301 | } 302 | 303 | // Compute the macro result value. 304 | return e.eval(def.Body, macroEnv) 305 | } 306 | 307 | func checkArgCount(expr *ast.MacroCallExpr, n int) error { 308 | if len(expr.Args) != n { 309 | return fmt.Errorf("%w, macro %s needs %d", ecInvalidArgumentCount, expr.Ident, n) 310 | } 311 | return nil 312 | } 313 | 314 | func (e *evaluator) enterMacro(m *ast.ExpressionMacroDef) bool { 315 | _, found := e.inStack[m] 316 | if found { 317 | return false 318 | } 319 | e.inStack[m] = struct{}{} 320 | return true 321 | } 322 | 323 | func (e *evaluator) exitMacro(m *ast.ExpressionMacroDef) { 324 | delete(e.inStack, m) 325 | } 326 | -------------------------------------------------------------------------------- /asm/evaluator_builtins.go: -------------------------------------------------------------------------------- 1 | // Copyright 2023 The go-ethereum Authors 2 | // This file is part of the go-ethereum library. 3 | // 4 | // The go-ethereum library is free software: you can redistribute it and/or modify 5 | // it under the terms of the GNU Lesser General Public License as published by 6 | // the Free Software Foundation, either version 3 of the License, or 7 | // (at your option) any later version. 8 | // 9 | // The go-ethereum library is distributed in the hope that it will be useful, 10 | // but WITHOUT ANY WARRANTY; without even the implied warranty of 11 | // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 12 | // GNU Lesser General Public License for more details. 13 | // 14 | // You should have received a copy of the GNU Lesser General Public License 15 | // along with the go-ethereum library. If not, see . 16 | 17 | package asm 18 | 19 | import ( 20 | "crypto/sha256" 21 | "errors" 22 | "fmt" 23 | "math/big" 24 | "strings" 25 | 26 | "github.com/ethereum/go-ethereum/accounts/abi" 27 | "github.com/ethereum/go-ethereum/common" 28 | "github.com/fjl/geas/internal/ast" 29 | "github.com/fjl/geas/internal/lzint" 30 | "golang.org/x/crypto/sha3" 31 | ) 32 | 33 | var builtinMacros = make(map[string]builtinMacroFn) 34 | 35 | func init() { 36 | builtinMacros["bitlen"] = bitlenMacro 37 | builtinMacros["bytelen"] = bytelenMacro 38 | builtinMacros["abs"] = absMacro 39 | builtinMacros["address"] = addressMacro 40 | builtinMacros["selector"] = selectorMacro 41 | builtinMacros["keccak256"] = keccak256Macro 42 | builtinMacros["sha256"] = sha256Macro 43 | } 44 | 45 | type builtinMacroFn func(*evaluator, *evalEnvironment, *ast.MacroCallExpr) (*lzint.Value, error) 46 | 47 | func bitlenMacro(e *evaluator, env *evalEnvironment, call *ast.MacroCallExpr) (*lzint.Value, error) { 48 | if err := checkArgCount(call, 1); err != nil { 49 | return nil, err 50 | } 51 | v, err := e.eval(call.Args[0], env) 52 | if err != nil { 53 | return nil, err 54 | } 55 | return lzint.FromInt64(v.IntegerBitLen()), nil 56 | } 57 | 58 | func bytelenMacro(e *evaluator, env *evalEnvironment, call *ast.MacroCallExpr) (*lzint.Value, error) { 59 | if err := checkArgCount(call, 1); err != nil { 60 | return nil, err 61 | } 62 | v, err := e.eval(call.Args[0], env) 63 | if err != nil { 64 | return nil, err 65 | } 66 | return lzint.FromInt64(v.ByteLen()), nil 67 | } 68 | 69 | func absMacro(e *evaluator, env *evalEnvironment, call *ast.MacroCallExpr) (*lzint.Value, error) { 70 | if err := checkArgCount(call, 1); err != nil { 71 | return nil, err 72 | } 73 | v, err := e.eval(call.Args[0], env) 74 | if err != nil { 75 | return nil, err 76 | } 77 | return lzint.FromInt(new(big.Int).Abs(v.Int())), nil 78 | } 79 | 80 | func sha256Macro(e *evaluator, env *evalEnvironment, call *ast.MacroCallExpr) (*lzint.Value, error) { 81 | if err := checkArgCount(call, 1); err != nil { 82 | return nil, err 83 | } 84 | bytes, err := e.evalAsBytes(call.Args[0], env) 85 | if err != nil { 86 | return nil, err 87 | } 88 | hash := sha256.Sum256(bytes) 89 | return lzint.FromBytes(hash[:]), nil 90 | } 91 | 92 | func keccak256Macro(e *evaluator, env *evalEnvironment, call *ast.MacroCallExpr) (*lzint.Value, error) { 93 | if err := checkArgCount(call, 1); err != nil { 94 | return nil, err 95 | } 96 | bytes, err := e.evalAsBytes(call.Args[0], env) 97 | if err != nil { 98 | return nil, err 99 | } 100 | w := sha3.NewLegacyKeccak256() 101 | w.Write(bytes) 102 | hash := w.Sum(nil) 103 | return lzint.FromBytes(hash[:]), nil 104 | } 105 | 106 | var ( 107 | errSelectorWantsLiteral = fmt.Errorf(".selector(...) requires literal string argument") 108 | ) 109 | 110 | func selectorMacro(e *evaluator, env *evalEnvironment, call *ast.MacroCallExpr) (*lzint.Value, error) { 111 | if err := checkArgCount(call, 1); err != nil { 112 | return nil, err 113 | } 114 | lit, ok := call.Args[0].(*ast.LiteralExpr) 115 | if !ok { 116 | return nil, errSelectorWantsLiteral 117 | } 118 | text := lit.Text() 119 | if _, err := abi.ParseSelector(text); err != nil { 120 | return nil, fmt.Errorf("invalid ABI selector") 121 | } 122 | w := sha3.NewLegacyKeccak256() 123 | w.Write([]byte(text)) 124 | hash := w.Sum(nil) 125 | return lzint.FromBytes(hash[:4]), nil 126 | } 127 | 128 | var ( 129 | errAddressWantsLiteral = errors.New(".address(...) requires literal argument") 130 | errAddressInvalid = errors.New("invalid Ethereum address") 131 | errAddressChecksum = errors.New("address has invalid checksum") 132 | ) 133 | 134 | func addressMacro(e *evaluator, env *evalEnvironment, call *ast.MacroCallExpr) (*lzint.Value, error) { 135 | if err := checkArgCount(call, 1); err != nil { 136 | return nil, err 137 | } 138 | lit, ok := call.Args[0].(*ast.LiteralExpr) 139 | if !ok { 140 | return nil, errAddressWantsLiteral 141 | } 142 | text := lit.Text() 143 | addr, err := common.NewMixedcaseAddressFromString(text) 144 | if err != nil { 145 | return nil, errAddressInvalid 146 | } 147 | if isChecksumAddress(text) { 148 | if !addr.ValidChecksum() { 149 | return nil, errAddressChecksum 150 | } 151 | } 152 | return lzint.FromBytes(addr.Address().Bytes()), nil 153 | } 154 | 155 | func isChecksumAddress(str string) bool { 156 | return strings.ContainsAny(str, "ABCDEF") 157 | } 158 | -------------------------------------------------------------------------------- /asm/evaluator_test.go: -------------------------------------------------------------------------------- 1 | // Copyright 2023 The go-ethereum Authors 2 | // This file is part of the go-ethereum library. 3 | // 4 | // The go-ethereum library is free software: you can redistribute it and/or modify 5 | // it under the terms of the GNU Lesser General Public License as published by 6 | // the Free Software Foundation, either version 3 of the License, or 7 | // (at your option) any later version. 8 | // 9 | // The go-ethereum library is distributed in the hope that it will be useful, 10 | // but WITHOUT ANY WARRANTY; without even the implied warranty of 11 | // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 12 | // GNU Lesser General Public License for more details. 13 | // 14 | // You should have received a copy of the GNU Lesser General Public License 15 | // along with the go-ethereum library. If not, see . 16 | 17 | package asm 18 | 19 | import ( 20 | "fmt" 21 | "math/big" 22 | "testing" 23 | 24 | "github.com/fjl/geas/internal/ast" 25 | ) 26 | 27 | type evalTest struct { 28 | expr string 29 | result string 30 | } 31 | 32 | type evalErrorTest struct { 33 | expr string 34 | err string 35 | } 36 | 37 | var evalIntTests = []evalTest{ 38 | // arithmetic 39 | {expr: `1`, result: "1"}, 40 | {expr: `1 + 4`, result: "5"}, 41 | {expr: `1 + 1 + 4`, result: "6"}, 42 | {expr: `1 << 48`, result: "281474976710656"}, 43 | {expr: `32 >> 1`, result: "16"}, 44 | {expr: `0xf1 & 0xe1`, result: "0xe1"}, 45 | {expr: `0x0f & 0xff`, result: "0x0f"}, 46 | {expr: `0x0f | 0xf0`, result: "0xff"}, 47 | {expr: `0xf ^ 0xf`, result: "0x00"}, 48 | {expr: `0x0 ^ 0xf`, result: "0xf"}, 49 | // arithmetic precedence rules 50 | {expr: `(2 * 3) + 4`, result: "10"}, 51 | {expr: `2 * 3 + 4`, result: "10"}, 52 | {expr: `4 + 2 * 3`, result: "10"}, 53 | {expr: `10 / 5 + 2`, result: "4"}, 54 | {expr: `1 + 1024 * 1024 * 1024`, result: "1073741825"}, 55 | {expr: `1024 * 1024 * 1024 * 1024 + 1`, result: "1099511627777"}, 56 | {expr: `1 + 1024 * 1024 * 1024 & 2 + 3`, result: "4"}, 57 | {expr: `(1 + ((1024 * 1024 * 1024) & 2)) + 3`, result: "4"}, 58 | // -- division and multiplication have same precedence 59 | {expr: `12 / 6 * 3`, result: "6"}, 60 | {expr: `12 / 6 * 3`, result: "6"}, 61 | // -- and binds more strongly than or 62 | {expr: `0xff00 | 0xff & 0x0f`, result: "0xff0f"}, 63 | {expr: `0xff & 0x0f | 0xff00`, result: "0xff0f"}, 64 | {expr: `0xff & (0x0f | 0xff00)`, result: "0x0f"}, 65 | // -- shift binds more strongly than and/or 66 | {expr: `0xff >> 4 & 0x05`, result: "0x05"}, 67 | // macro and label references 68 | {expr: `@label1`, result: "1"}, 69 | {expr: `@label1 + 2`, result: "3"}, 70 | {expr: `macro3 / @label1`, result: "3"}, 71 | {expr: `@.label2`, result: "2"}, 72 | {expr: `@Label3`, result: "3"}, 73 | {expr: `@.Label4`, result: "4"}, 74 | {expr: `macroFunc(2)`, result: "2"}, 75 | // string literals 76 | {expr: `"A"`, result: "65"}, 77 | {expr: `"foo"`, result: "6713199"}, 78 | // builtins 79 | {expr: `.bitlen(0)`, result: "0"}, 80 | {expr: `.bitlen(0xff)`, result: "8"}, 81 | {expr: `.bitlen(0x1ff)`, result: "9"}, 82 | {expr: `.bitlen(0x01ff)`, result: "9"}, 83 | {expr: `.bytelen(0)`, result: "0"}, 84 | {expr: `.bytelen(0xff)`, result: "1"}, 85 | {expr: `.bytelen(0x1ff)`, result: "2"}, 86 | {expr: `.bytelen(0x01ff)`, result: "2"}, 87 | {expr: `.bytelen(0x0001ff)`, result: "3"}, // note: leading zero byte 88 | {expr: `.bytelen(0x000001ff)`, result: "4"}, // two leading zero bytes 89 | {expr: `.bytelen("foobar")`, result: "6"}, 90 | {expr: `.abs(0 - 10)`, result: "10"}, 91 | {expr: `.sha256("text")`, result: "68832153269555879243704685382415794081420120252170153643880971663484982053329"}, 92 | {expr: `.sha256(33)`, result: "84783983549258160669137366770885509408211009960610860350324922232842582506338"}, 93 | {expr: `.selector("transfer(address,uint256)")`, result: "2835717307"}, 94 | {expr: `.address(0x658bdf435d810c91414ec09147daa6db62406379)`, result: "579727320398773179602058954232328055508812456825"}, 95 | {expr: `.address("0x658bdf435d810c91414ec09147daa6db62406379")`, result: "579727320398773179602058954232328055508812456825"}, 96 | } 97 | 98 | var evalErrorTests = []evalErrorTest{ 99 | {expr: `20 / 0`, err: "division by zero"}, 100 | {expr: `1 << (1 << 64)`, err: "lshift amount 18446744073709551616 overflows uint"}, 101 | {expr: `1 >> (1 << 64)`, err: "rshift amount 18446744073709551616 overflows uint"}, 102 | {expr: `macro3(foo, 1)`, err: "invalid number of arguments, macro macro3 needs 0"}, 103 | // builtins 104 | {expr: `.selector("transfer(,,uint256)")`, err: "invalid ABI selector"}, 105 | {expr: `.address(0x658bdf435d810c91414EC09147daa6db62406379)`, err: errAddressChecksum.Error()}, 106 | {expr: `.sha256(0x011)`, err: "odd-length hex in bytes context"}, 107 | } 108 | 109 | var evalTestDoc *ast.Document 110 | 111 | func init() { 112 | source := ` 113 | label1: 114 | .label2: 115 | Label3: 116 | .Label4: 117 | #define macro3() = 3 118 | #define macroFunc(a) = $a 119 | ` 120 | doc, errs := ast.NewParser("", []byte(source), false).Parse() 121 | if len(errs) != 0 { 122 | panic("parse error: " + errs[0].Error()) 123 | } 124 | evalTestDoc = doc 125 | } 126 | 127 | func evaluatorForTesting() *evaluator { 128 | gs := newGlobalScope() 129 | errs := gs.registerDefinitions(evalTestDoc) 130 | if len(errs) > 0 { 131 | panic(fmt.Errorf("error in registerDefinitions: %v", errs[0])) 132 | } 133 | e := newEvaluator(gs) 134 | e.setLabelPC(evalTestDoc, evalTestDoc.Statements[0].(*ast.LabelDefSt), 1) 135 | e.setLabelPC(evalTestDoc, evalTestDoc.Statements[1].(*ast.LabelDefSt), 2) 136 | e.setLabelPC(evalTestDoc, evalTestDoc.Statements[2].(*ast.LabelDefSt), 3) 137 | e.setLabelPC(evalTestDoc, evalTestDoc.Statements[3].(*ast.LabelDefSt), 4) 138 | return e 139 | } 140 | 141 | func evalEnvironmentForTesting() *evalEnvironment { 142 | return newEvalEnvironment(&compilerSection{ 143 | doc: evalTestDoc, 144 | }) 145 | } 146 | 147 | func TestExprEval(t *testing.T) { 148 | for _, test := range evalIntTests { 149 | expr, err := parseExprString(test.expr) 150 | if err != nil { 151 | t.Errorf("invalid expr %q: %v", test.expr, err) 152 | continue 153 | } 154 | expectedResult := mustParseBigInt(test.result) 155 | e := evaluatorForTesting() 156 | env := evalEnvironmentForTesting() 157 | result, err := e.eval(expr, env) 158 | if err != nil { 159 | t.Errorf("eval error in %q: %v", test.expr, err) 160 | continue 161 | } 162 | if result.Int().Cmp(expectedResult) != 0 { 163 | t.Errorf("expr %q result %v, want %v", test.expr, result, expectedResult) 164 | continue 165 | } 166 | } 167 | } 168 | 169 | func TestExprEvalErrors(t *testing.T) { 170 | for _, test := range evalErrorTests { 171 | expr, err := parseExprString(test.expr) 172 | if err != nil { 173 | t.Errorf("invalid expr %q: %v", test.expr, err) 174 | continue 175 | } 176 | e := evaluatorForTesting() 177 | env := evalEnvironmentForTesting() 178 | result, err := e.eval(expr, env) 179 | if err == nil { 180 | t.Errorf("expected error evaluating %q, got %v", test.expr, result) 181 | continue 182 | } 183 | if err.Error() != test.err { 184 | t.Errorf("expr %q wrong error %q, want %q", test.expr, err, test.err) 185 | continue 186 | } 187 | } 188 | } 189 | 190 | func parseExprString(str string) (ast.Expr, error) { 191 | p := ast.NewParser("string", []byte(str), false) 192 | return p.ParseExpression() 193 | } 194 | 195 | func mustParseBigInt(str string) *big.Int { 196 | i, ok := new(big.Int).SetString(str, 0) 197 | if !ok { 198 | panic("invalid bigint: " + str) 199 | } 200 | return i 201 | } 202 | -------------------------------------------------------------------------------- /asm/global.go: -------------------------------------------------------------------------------- 1 | // Copyright 2023 The go-ethereum Authors 2 | // This file is part of the go-ethereum library. 3 | // 4 | // The go-ethereum library is free software: you can redistribute it and/or modify 5 | // it under the terms of the GNU Lesser General Public License as published by 6 | // the Free Software Foundation, either version 3 of the License, or 7 | // (at your option) any later version. 8 | // 9 | // The go-ethereum library is distributed in the hope that it will be useful, 10 | // but WITHOUT ANY WARRANTY; without even the implied warranty of 11 | // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 12 | // GNU Lesser General Public License for more details. 13 | // 14 | // You should have received a copy of the GNU Lesser General Public License 15 | // along with the go-ethereum library. If not, see . 16 | 17 | package asm 18 | 19 | import ( 20 | "fmt" 21 | 22 | "github.com/fjl/geas/internal/ast" 23 | "github.com/fjl/geas/internal/lzint" 24 | ) 25 | 26 | // globalScope holds definitions across files. 27 | type globalScope struct { 28 | label map[string]*ast.LabelDefSt 29 | labelPC map[string]int 30 | labelDoc map[string]*ast.Document 31 | instrMacro map[string]globalDef[*ast.InstructionMacroDef] 32 | exprMacro map[string]globalDef[*ast.ExpressionMacroDef] 33 | } 34 | 35 | type globalDef[M any] struct { 36 | def M 37 | doc *ast.Document 38 | } 39 | 40 | func newGlobalScope() *globalScope { 41 | return &globalScope{ 42 | label: make(map[string]*ast.LabelDefSt), 43 | labelPC: make(map[string]int), 44 | labelDoc: make(map[string]*ast.Document), 45 | instrMacro: make(map[string]globalDef[*ast.InstructionMacroDef]), 46 | exprMacro: make(map[string]globalDef[*ast.ExpressionMacroDef]), 47 | } 48 | } 49 | 50 | // registerDefinitions processes a document and registers the globals contained in it. 51 | func (gs *globalScope) registerDefinitions(doc *ast.Document) (errs []error) { 52 | for _, li := range doc.GlobalLabels() { 53 | gs.registerLabel(li, doc) 54 | } 55 | for _, mac := range doc.GlobalExprMacros() { 56 | def := globalDef[*ast.ExpressionMacroDef]{mac, doc} 57 | if err := gs.registerExprMacro(mac.Name, def); err != nil { 58 | errs = append(errs, err) 59 | } 60 | } 61 | for _, mac := range doc.GlobalInstrMacros() { 62 | def := globalDef[*ast.InstructionMacroDef]{mac, doc} 63 | if err := gs.registerInstrMacro(mac.Name, def); err != nil { 64 | errs = append(errs, err) 65 | } 66 | } 67 | return errs 68 | } 69 | 70 | // registerLabel registers a label as known. 71 | func (gs *globalScope) registerLabel(def *ast.LabelDefSt, doc *ast.Document) { 72 | _, found := gs.label[def.Name()] 73 | if !found { 74 | gs.label[def.Name()] = def 75 | } 76 | } 77 | 78 | // registerInstrMacro registers the first definition of an instruction macro. 79 | func (gs *globalScope) registerInstrMacro(name string, def globalDef[*ast.InstructionMacroDef]) error { 80 | firstDef, found := gs.instrMacro[name] 81 | if found { 82 | return &statementError{ 83 | inst: def.def, 84 | err: fmt.Errorf("macro %%%s already defined%s", name, firstDef.doc.CreationString()), 85 | } 86 | } 87 | gs.instrMacro[name] = def 88 | return nil 89 | } 90 | 91 | // registerExprMacro registers the first definition of an expression macro. 92 | func (gs *globalScope) registerExprMacro(name string, def globalDef[*ast.ExpressionMacroDef]) error { 93 | firstDef, found := gs.exprMacro[name] 94 | if found { 95 | return &statementError{ 96 | inst: def.def, 97 | err: fmt.Errorf("macro %s already defined%s", name, firstDef.doc.CreationString()), 98 | } 99 | } 100 | gs.exprMacro[name] = def 101 | return nil 102 | } 103 | 104 | // overrideExprMacroValue sets a macro to the given value, overriding its definition. 105 | func (gs *globalScope) overrideExprMacroValue(name string, val *lzint.Value) { 106 | gs.exprMacro[name] = globalDef[*ast.ExpressionMacroDef]{ 107 | doc: nil, 108 | def: &ast.ExpressionMacroDef{ 109 | Name: name, 110 | Body: &ast.LiteralExpr{Value: val}, 111 | }, 112 | } 113 | } 114 | 115 | func (gs *globalScope) lookupInstrMacro(name string) (*ast.InstructionMacroDef, *ast.Document) { 116 | gdef := gs.instrMacro[name] 117 | return gdef.def, gdef.doc 118 | } 119 | 120 | func (gs *globalScope) lookupExprMacro(name string) (*ast.ExpressionMacroDef, *ast.Document) { 121 | gdef := gs.exprMacro[name] 122 | return gdef.def, gdef.doc 123 | } 124 | 125 | // setLabelDocument registers the document that a label was created in. This is subtly 126 | // different from the source document of the labelDefInstruction. The distinction matters 127 | // for labels created by macros, because macros create a new document on expansion. 128 | // 129 | // These documents need to be tracked here in order to report the first macro invocation 130 | // or #include statement that created a label. 131 | func (gs *globalScope) setLabelDocument(li *ast.LabelDefSt, doc *ast.Document) error { 132 | name := li.Name() 133 | firstDefDoc := gs.labelDoc[name] 134 | if firstDefDoc == nil { 135 | gs.labelDoc[name] = doc 136 | return nil 137 | } 138 | firstDef := gs.label[name] 139 | err := ast.ErrLabelAlreadyDef(firstDef, li) 140 | if loc := firstDefDoc.CreationString(); loc != "" { 141 | err = fmt.Errorf("%w%s", err, loc) 142 | } 143 | return err 144 | } 145 | 146 | // setLabelPC is called by the compiler when the PC value of a label becomes available. 147 | func (gs *globalScope) setLabelPC(name string, pc int) { 148 | gs.labelPC[name] = pc 149 | } 150 | 151 | // lookupLabel returns the PC value of a label, and also reports whether the label was found at all. 152 | func (gs *globalScope) lookupLabel(lref *ast.LabelRefExpr) (pc int, pcValid bool, def *ast.LabelDefSt) { 153 | li, ok := gs.label[lref.Ident] 154 | if !ok { 155 | return 0, false, nil 156 | } 157 | pc, pcValid = gs.labelPC[lref.Ident] 158 | return pc, pcValid, li 159 | } 160 | -------------------------------------------------------------------------------- /asm/statements.go: -------------------------------------------------------------------------------- 1 | // Copyright 2024 The go-ethereum Authors 2 | // This file is part of the go-ethereum library. 3 | // 4 | // The go-ethereum library is free software: you can redistribute it and/or modify 5 | // it under the terms of the GNU Lesser General Public License as published by 6 | // the Free Software Foundation, either version 3 of the License, or 7 | // (at your option) any later version. 8 | // 9 | // The go-ethereum library is distributed in the hope that it will be useful, 10 | // but WITHOUT ANY WARRANTY; without even the implied warranty of 11 | // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 12 | // GNU Lesser General Public License for more details. 13 | // 14 | // You should have received a copy of the GNU Lesser General Public License 15 | // along with the go-ethereum library. If not, see . 16 | 17 | package asm 18 | 19 | import ( 20 | "github.com/fjl/geas/internal/ast" 21 | ) 22 | 23 | // statement wraps an AST statement in a document. 24 | type statement interface { 25 | ast.Statement 26 | expand(c *Compiler, doc *ast.Document, prog *compilerProg) error 27 | } 28 | 29 | // Statement types. 30 | type ( 31 | opcodeStatement struct{ *ast.OpcodeSt } 32 | labelDefStatement struct{ *ast.LabelDefSt } 33 | macroCallStatement struct{ *ast.MacroCallSt } 34 | includeStatement struct{ *ast.IncludeSt } 35 | assembleStatement struct{ *ast.AssembleSt } 36 | bytesStatement struct{ *ast.BytesSt } 37 | ) 38 | 39 | // statementFromAST converts AST statements into compiler statements. Note this function 40 | // returns nil for statement types the compiler doesn't care about. 41 | func statementFromAST(st ast.Statement) statement { 42 | switch st := st.(type) { 43 | case *ast.OpcodeSt: 44 | return opcodeStatement{st} 45 | case *ast.LabelDefSt: 46 | return labelDefStatement{st} 47 | case *ast.MacroCallSt: 48 | return macroCallStatement{st} 49 | case *ast.IncludeSt: 50 | return includeStatement{st} 51 | case *ast.AssembleSt: 52 | return assembleStatement{st} 53 | case *ast.BytesSt: 54 | return bytesStatement{st} 55 | default: 56 | return nil 57 | } 58 | } 59 | -------------------------------------------------------------------------------- /asm/testdata/known-bytecode.yaml: -------------------------------------------------------------------------------- 1 | 4788asm: 3373fffffffffffffffffffffffffffffffffffffffe14604d57602036146024575f5ffd5b5f35801560495762016da0810690815414603c575f5ffd5b62016da001545f5260205ff35b5f5ffd5b62016da042064281555f359062016da0015500 2 | 4788asm_ctor: 60618060095f395ff33373fffffffffffffffffffffffffffffffffffffffe14604d57602036146024575f5ffd5b5f35801560495762016da0810690815414603c575f5ffd5b62016da001545f5260205ff35b5f5ffd5b62016da042064281555f359062016da0015500 3 | erc20: 366000803760005160e01c806323b872dd14605c578063095ea7b31460c7578063a9059cbb1461011257806370a082311461015e578063dd62ed3e1461016a578063313ce5671461017957806318160ddd14610179575b60006000fd5b604060042080546044518181116056576004355410605657604435900390556004358054604435809103909155602435805490910190556024356004356044356000527fddf252ad1be2c89b69c2b068fc378daa952ba7f163c4a11628f55a4df523b3ef60206000a3005b60245160045160245233600452604060042080549091019055600435336024356000527f8c5be1e5ebec7d5bd14f71427d1e84f3dd0314c0f7b2291e5b200ac8c7c3b92560206000a3005b3354602451818111605657900333556004518054602451019055600435336024356000527fddf252ad1be2c89b69c2b068fc378daa952ba7f163c4a11628f55a4df523b3ef60206000a3005b60206000600451548152f35b60406004205460005260206000f35b 4 | erc20_ctor: 5861271033556012803803919082908239f3366000803760005160e01c806323b872dd14605c578063095ea7b31460c7578063a9059cbb1461011257806370a082311461015e578063dd62ed3e1461016a578063313ce5671461017957806318160ddd14610179575b60006000fd5b604060042080546044518181116056576004355410605657604435900390556004358054604435809103909155602435805490910190556024356004356044356000527fddf252ad1be2c89b69c2b068fc378daa952ba7f163c4a11628f55a4df523b3ef60206000a3005b60245160045160245233600452604060042080549091019055600435336024356000527f8c5be1e5ebec7d5bd14f71427d1e84f3dd0314c0f7b2291e5b200ac8c7c3b92560206000a3005b3354602451818111605657900333556004518054602451019055600435336024356000527fddf252ad1be2c89b69c2b068fc378daa952ba7f163c4a11628f55a4df523b3ef60206000a3005b60206000600451548152f35b60406004205460005260206000f35b 5 | -------------------------------------------------------------------------------- /cmd/geas/geas.go: -------------------------------------------------------------------------------- 1 | // Copyright 2023 The go-ethereum Authors 2 | // This file is part of the go-ethereum library. 3 | // 4 | // The go-ethereum library is free software: you can redistribute it and/or modify 5 | // it under the terms of the GNU Lesser General Public License as published by 6 | // the Free Software Foundation, either version 3 of the License, or 7 | // (at your option) any later version. 8 | // 9 | // The go-ethereum library is distributed in the hope that it will be useful, 10 | // but WITHOUT ANY WARRANTY; without even the implied warranty of 11 | // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 12 | // GNU Lesser General Public License for more details. 13 | // 14 | // You should have received a copy of the GNU Lesser General Public License 15 | // along with the go-ethereum library. If not, see . 16 | 17 | package main 18 | 19 | import ( 20 | "bytes" 21 | "encoding/hex" 22 | "flag" 23 | "fmt" 24 | "io" 25 | "os" 26 | "path" 27 | "path/filepath" 28 | "runtime/debug" 29 | "slices" 30 | "strings" 31 | 32 | "github.com/fjl/geas/asm" 33 | "github.com/fjl/geas/disasm" 34 | "github.com/fjl/geas/internal/evm" 35 | ) 36 | 37 | var t2s = strings.NewReplacer("\t", " ") 38 | 39 | func usage() { 40 | vsn := version() 41 | if len(vsn) > 0 { 42 | fmt.Fprintln(os.Stderr, "Version:", vsn) 43 | } 44 | fmt.Fprint(os.Stderr, `Usage: geas {-a | -d | -i} [options...] `+ 45 | t2s.Replace(` 46 | -a: ASSEMBLER (default) 47 | 48 | -o output file name 49 | -bin output binary instead of hex 50 | -no-nl skip newline at end of hex output 51 | 52 | -d: DISASSEMBLER 53 | 54 | -bin input is binary bytecode 55 | -target configure instruction set 56 | -o output file name 57 | -blocks blank lines between logical blocks 58 | -pc show program counter 59 | -uppercase show instruction names as uppercase 60 | 61 | -i: INFORMATION 62 | 63 | -targets show supported target fork names 64 | -ops show all opcodes in target 65 | -lineage show target fork chain 66 | 67 | -h: HELP 68 | 69 | `)) 70 | } 71 | 72 | func main() { 73 | if len(os.Args) < 2 { 74 | usage() 75 | os.Exit(2) 76 | } 77 | 78 | mode := os.Args[1] 79 | switch { 80 | case mode == "-a": 81 | assembler(os.Args[2:]) 82 | 83 | case mode == "-d": 84 | disassembler(os.Args[2:]) 85 | 86 | case mode == "-i": 87 | information(os.Args[2:]) 88 | 89 | case mode == "-h", mode == "-help", mode == "--help": 90 | usage() 91 | os.Exit(0) 92 | 93 | default: 94 | assembler(os.Args[1:]) 95 | } 96 | } 97 | 98 | const inputLimit = 10 * 1024 * 1024 99 | 100 | func assembler(args []string) { 101 | var ( 102 | fs = newFlagSet("-a") 103 | outputFile = fs.String("o", "", "") 104 | binary = fs.Bool("bin", false, "") 105 | noNL = fs.Bool("no-nl", false, "") 106 | ) 107 | parseFlags(fs, args) 108 | 109 | // Assemble. 110 | var c = asm.New(nil) 111 | var bin []byte 112 | file := fileArg(fs) 113 | if file != "-" { 114 | wd, _ := os.Getwd() 115 | c.SetFilesystem(os.DirFS(wd)) 116 | fp := path.Clean(filepath.ToSlash(file)) 117 | bin = c.CompileFile(fp) 118 | } else { 119 | source, err := io.ReadAll(io.LimitReader(os.Stdin, inputLimit)) 120 | if err != nil { 121 | exit(1, err) 122 | } 123 | bin = c.CompileString(string(source)) 124 | } 125 | 126 | // Show errors. 127 | for _, err := range c.ErrorsAndWarnings() { 128 | fmt.Fprintln(os.Stderr, err) 129 | } 130 | if c.Failed() { 131 | os.Exit(1) 132 | } 133 | 134 | // Write output. 135 | var err error 136 | output := os.Stdout 137 | if *outputFile != "" { 138 | output, err = os.OpenFile(*outputFile, os.O_WRONLY|os.O_TRUNC|os.O_CREATE, 0644) 139 | if err != nil { 140 | exit(1, err) 141 | } 142 | defer output.Close() 143 | } 144 | if *binary { 145 | _, err = output.Write(bin) 146 | } else { 147 | nl := "\n" 148 | if *noNL { 149 | nl = "" 150 | } 151 | _, err = fmt.Fprintf(output, "%x%s", bin, nl) 152 | } 153 | if err != nil { 154 | exit(1, err) 155 | } 156 | } 157 | 158 | func disassembler(args []string) { 159 | var ( 160 | fs = newFlagSet("-d") 161 | outputFile = fs.String("o", "", "") 162 | showPC = fs.Bool("pc", false, "") 163 | showBlocks = fs.Bool("blocks", true, "") 164 | uppercase = fs.Bool("uppercase", false, "") 165 | binary = fs.Bool("bin", false, "") 166 | target = fs.String("target", "", "") 167 | ) 168 | parseFlags(fs, args) 169 | 170 | // Read input. 171 | var err error 172 | var infd io.ReadCloser 173 | file := fileArg(fs) 174 | if file == "-" { 175 | infd = os.Stdin 176 | } else { 177 | infd, err = os.Open(file) 178 | if err != nil { 179 | exit(1, err) 180 | } 181 | } 182 | bytecode, err := io.ReadAll(io.LimitReader(infd, inputLimit)) 183 | if err != nil { 184 | exit(1, err) 185 | } 186 | infd.Close() 187 | 188 | // Possibly convert from hex. 189 | if !*binary { 190 | dec := make([]byte, hex.DecodedLen(len(bytecode))) 191 | l, err := hex.Decode(dec, bytes.TrimSpace(bytecode)) 192 | if err != nil { 193 | exit(1, err) 194 | } 195 | bytecode = dec[:l] 196 | } 197 | 198 | output := os.Stdout 199 | if *outputFile != "" { 200 | output, err = os.OpenFile(*outputFile, os.O_WRONLY|os.O_TRUNC|os.O_CREATE, 0644) 201 | if err != nil { 202 | exit(1, err) 203 | } 204 | defer output.Close() 205 | } 206 | 207 | // Disassemble. 208 | d := disasm.New() 209 | d.SetShowBlocks(*showBlocks) 210 | d.SetShowPC(*showPC) 211 | d.SetUppercase(*uppercase) 212 | if *target != "" { 213 | if err := d.SetTarget(*target); err != nil { 214 | exit(2, err) 215 | } 216 | } 217 | err = d.Disassemble(bytecode, output) 218 | exit(1, err) 219 | } 220 | 221 | func information(args []string) { 222 | var ran bool 223 | checkRunOnce := func() { 224 | if ran { 225 | exit(2, fmt.Errorf("can't show more than one thing at once in -i mode")) 226 | } 227 | ran = true 228 | } 229 | showTargets := func(arg string) error { 230 | checkRunOnce() 231 | for _, name := range evm.AllForks() { 232 | fmt.Println(name) 233 | } 234 | return nil 235 | } 236 | showOps := func(arg string) error { 237 | checkRunOnce() 238 | is := evm.FindInstructionSet(arg) 239 | if is == nil { 240 | return fmt.Errorf("unknown fork %q", arg) 241 | } 242 | for _, op := range is.AllOps() { 243 | fmt.Println(op.Name) 244 | } 245 | return nil 246 | } 247 | showParents := func(arg string) error { 248 | checkRunOnce() 249 | is := evm.FindInstructionSet(arg) 250 | if is == nil { 251 | return fmt.Errorf("unknown fork %q", arg) 252 | } 253 | for _, f := range is.Parents() { 254 | fmt.Println(f) 255 | } 256 | return nil 257 | } 258 | 259 | var fs = newFlagSet("-i") 260 | fs.BoolFunc("targets", "", showTargets) 261 | fs.Func("ops", "", showOps) 262 | fs.Func("lineage", "", showParents) 263 | parseFlags(fs, args) 264 | if !ran { 265 | usage() 266 | exit(2, fmt.Errorf("please select information topic")) 267 | } 268 | if fs.NArg() > 0 { 269 | exit(2, fmt.Errorf("too many arguments")) 270 | } 271 | } 272 | 273 | func newFlagSet(mode string) *flag.FlagSet { 274 | fs := flag.NewFlagSet("geas "+mode, flag.ContinueOnError) 275 | fs.Usage = usage 276 | fs.SetOutput(io.Discard) 277 | return fs 278 | } 279 | 280 | func parseFlags(fs *flag.FlagSet, args []string) { 281 | if err := fs.Parse(args); err != nil { 282 | exit(2, err) 283 | } 284 | } 285 | 286 | func fileArg(fs *flag.FlagSet) string { 287 | switch fs.NArg() { 288 | case 1: 289 | return fs.Arg(0) 290 | case 0: 291 | exit(2, fmt.Errorf("need file name as argument")) 292 | default: 293 | if slices.ContainsFunc(fs.Args(), func(s string) bool { return strings.HasPrefix(s, "-") }) { 294 | exit(2, fmt.Errorf("too many arguments (flags must precede input filename)")) 295 | } 296 | exit(2, fmt.Errorf("too many arguments")) 297 | } 298 | return "" 299 | } 300 | 301 | func exit(code int, err error) { 302 | if err == nil || err == flag.ErrHelp { 303 | os.Exit(0) 304 | } 305 | fmt.Fprintf(os.Stderr, "Error: %v\n", err) 306 | os.Exit(code) 307 | } 308 | 309 | func version() string { 310 | info, _ := debug.ReadBuildInfo() 311 | if info == nil { 312 | return "" 313 | } 314 | if info.Main.Version != "(devel)" { 315 | return info.Main.Version 316 | } 317 | gitVersion := "" 318 | dirty := false 319 | for _, s := range info.Settings { 320 | switch s.Key { 321 | case "vcs.revision": 322 | gitVersion = s.Value[:16] 323 | case "vcs.modified": 324 | if s.Value == "true" { 325 | dirty = true 326 | } 327 | } 328 | } 329 | if gitVersion == "" { 330 | return "" 331 | } 332 | if dirty { 333 | gitVersion += "-dirty" 334 | } 335 | return "git:" + gitVersion 336 | } 337 | -------------------------------------------------------------------------------- /disasm/disassembler.go: -------------------------------------------------------------------------------- 1 | // Copyright 2024 The go-ethereum Authors 2 | // This file is part of the go-ethereum library. 3 | // 4 | // The go-ethereum library is free software: you can redistribute it and/or modify 5 | // it under the terms of the GNU Lesser General Public License as published by 6 | // the Free Software Foundation, either version 3 of the License, or 7 | // (at your option) any later version. 8 | // 9 | // The go-ethereum library is distributed in the hope that it will be useful, 10 | // but WITHOUT ANY WARRANTY; without even the implied warranty of 11 | // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 12 | // GNU Lesser General Public License for more details. 13 | // 14 | // You should have received a copy of the GNU Lesser General Public License 15 | // along with the go-ethereum library. If not, see . 16 | 17 | // Package disasm is a disassembler for EVM bytecode. 18 | package disasm 19 | 20 | import ( 21 | "bufio" 22 | "encoding/hex" 23 | "fmt" 24 | "io" 25 | "strings" 26 | 27 | "github.com/fjl/geas/internal/evm" 28 | ) 29 | 30 | // Disassembler turns EVM bytecode into readable text instructions. 31 | type Disassembler struct { 32 | evm *evm.InstructionSet 33 | uppercase bool 34 | showPC bool 35 | noBlanks bool 36 | 37 | pcBuffer, pcHex []byte 38 | } 39 | 40 | func (d *Disassembler) setDefaults() { 41 | if d.evm == nil { 42 | d.evm = evm.FindInstructionSet(evm.LatestFork) 43 | } 44 | } 45 | 46 | // New creates a disassembler. 47 | func New() *Disassembler { 48 | return new(Disassembler) 49 | } 50 | 51 | // SetTarger sets the instruction set used by the disassembler. 52 | // It defauls to the latest known Ethereum fork. 53 | func (d *Disassembler) SetTarget(name string) error { 54 | is := evm.FindInstructionSet(name) 55 | if is == nil { 56 | return fmt.Errorf("unknown instruction set %q", name) 57 | } 58 | d.evm = is 59 | return nil 60 | } 61 | 62 | // SetUppercase toggles printing instruction names in uppercase. 63 | func (d *Disassembler) SetUppercase(on bool) { 64 | d.uppercase = on 65 | } 66 | 67 | // SetShowPC toggles printing of program counter on each line. 68 | func (d *Disassembler) SetShowPC(on bool) { 69 | d.showPC = on 70 | } 71 | 72 | // SetShowBlocks toggles printing of blank lines at block boundaries. 73 | func (d *Disassembler) SetShowBlocks(on bool) { 74 | d.noBlanks = !on 75 | } 76 | 77 | // Disassemble is the main entry point of the disassembler. 78 | // It runs through the bytecode and emits text to outW. 79 | func (d *Disassembler) Disassemble(bytecode []byte, outW io.Writer) error { 80 | d.setDefaults() 81 | d.pcBuffer = make([]byte, digitsOfPC(len(bytecode))) 82 | d.pcHex = make([]byte, hex.EncodedLen(len(d.pcBuffer))) 83 | out := bufio.NewWriter(outW) 84 | 85 | var prevOp *evm.Op 86 | for pc := 0; pc < len(bytecode); pc++ { 87 | op := d.evm.OpByCode(bytecode[pc]) 88 | d.newline(out, prevOp, op) 89 | d.printPrefix(out, pc) 90 | if op == nil { 91 | d.printInvalid(out, bytecode[pc]) 92 | } else { 93 | if op.Push { 94 | size := d.printPush(out, op, bytecode[pc:]) 95 | pc += size 96 | } else { 97 | d.printOp(out, op) 98 | } 99 | } 100 | 101 | prevOp = op 102 | } 103 | d.newline(out, prevOp, nil) 104 | return out.Flush() 105 | } 106 | 107 | func (d *Disassembler) printPrefix(out io.Writer, pc int) { 108 | if d.showPC { 109 | for i := range d.pcBuffer { 110 | d.pcBuffer[len(d.pcBuffer)-1-i] = byte(pc >> (8 * i)) 111 | } 112 | hex.Encode(d.pcHex, d.pcBuffer) 113 | fmt.Fprintf(out, "%s: ", d.pcHex) 114 | } 115 | } 116 | 117 | func (d *Disassembler) printInvalid(out io.Writer, b byte) { 118 | fmt.Fprintf(out, "#bytes %#x\n", b) 119 | } 120 | 121 | func (d *Disassembler) printOp(out io.Writer, op *evm.Op) { 122 | name := op.Name 123 | if !d.uppercase { 124 | name = strings.ToLower(op.Name) 125 | } 126 | fmt.Fprint(out, name) 127 | } 128 | 129 | func (d *Disassembler) printPush(out io.Writer, op *evm.Op, code []byte) (dataSize int) { 130 | size := op.PushSize() 131 | if size == 0 { 132 | d.printOp(out, op) 133 | return 0 134 | } 135 | if size > len(code)-1 { 136 | // Handle truncated PUSH at end of code. 137 | fmt.Fprintf(out, "#bytes %#x", code) 138 | return len(code) - 1 139 | } 140 | d.printOp(out, op) 141 | data := code[1 : size+1] 142 | fmt.Fprintf(out, " %#x", data) 143 | return len(data) 144 | } 145 | 146 | func (d *Disassembler) newline(out io.Writer, prevOp *evm.Op, nextOp *evm.Op) { 147 | if prevOp == nil { 148 | return 149 | } 150 | out.Write([]byte{'\n'}) 151 | if d.noBlanks || nextOp == nil { 152 | return 153 | } 154 | if prevOp.Jump || nextOp.JumpDest || prevOp.Term { 155 | out.Write([]byte{'\n'}) 156 | } 157 | } 158 | 159 | func digitsOfPC(codesize int) int { 160 | switch { 161 | case codesize < (1<<16 - 1): 162 | return 2 163 | case codesize < (1<<24 - 1): 164 | return 3 165 | case codesize < (1<<32 - 1): 166 | return 4 167 | default: 168 | return 8 169 | } 170 | } 171 | -------------------------------------------------------------------------------- /disasm/disassembler_test.go: -------------------------------------------------------------------------------- 1 | // Copyright 2025 The go-ethereum Authors 2 | // This file is part of the go-ethereum library. 3 | // 4 | // The go-ethereum library is free software: you can redistribute it and/or modify 5 | // it under the terms of the GNU Lesser General Public License as published by 6 | // the Free Software Foundation, either version 3 of the License, or 7 | // (at your option) any later version. 8 | // 9 | // The go-ethereum library is distributed in the hope that it will be useful, 10 | // but WITHOUT ANY WARRANTY; without even the implied warranty of 11 | // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 12 | // GNU Lesser General Public License for more details. 13 | // 14 | // You should have received a copy of the GNU Lesser General Public License 15 | // along with the go-ethereum library. If not, see . 16 | 17 | package disasm 18 | 19 | import ( 20 | "bytes" 21 | "encoding/hex" 22 | "strings" 23 | "testing" 24 | 25 | "github.com/fjl/geas/asm" 26 | ) 27 | 28 | func TestIncompletePush(t *testing.T) { 29 | bytecode, _ := hex.DecodeString("6080604052348015600e575f80fd5b50603e80601a5f395ff3fe60806040525f80fdfea2646970667358221220ba4339602dd535d09d71fae3164f7aa7f6e098ec879fc9e8f36bd912d4877c5264736f6c63430008190033") 30 | expectedOutput := strings.TrimSpace(` 31 | push1 0x80 32 | push1 0x40 33 | mstore 34 | callvalue 35 | dup1 36 | iszero 37 | push1 0x0e 38 | jumpi 39 | push0 40 | dup1 41 | revert 42 | jumpdest 43 | pop 44 | push1 0x3e 45 | dup1 46 | push1 0x1a 47 | push0 48 | codecopy 49 | push0 50 | return 51 | #bytes 0xfe 52 | push1 0x80 53 | push1 0x40 54 | mstore 55 | push0 56 | dup1 57 | revert 58 | #bytes 0xfe 59 | log2 60 | push5 0x6970667358 61 | #bytes 0x22 62 | slt 63 | keccak256 64 | #bytes 0xba 65 | number 66 | codecopy 67 | push1 0x2d 68 | #bytes 0xd5 69 | calldataload 70 | #bytes 0xd0 71 | swap14 72 | push18 0xfae3164f7aa7f6e098ec879fc9e8f36bd912 73 | #bytes 0xd4 74 | dup8 75 | #bytes 0x7c5264736f6c63430008190033 76 | `) 77 | 78 | var buf strings.Builder 79 | d := New() 80 | d.SetShowBlocks(false) 81 | d.SetTarget("cancun") 82 | d.Disassemble(bytecode, &buf) 83 | output := strings.TrimSpace(buf.String()) 84 | if output != expectedOutput { 85 | t.Fatal("wrong output:", output) 86 | } 87 | 88 | // try round trip 89 | a := asm.New(nil) 90 | rtcode := a.CompileString(output) 91 | if !bytes.Equal(rtcode, bytecode) { 92 | t.Error("disassembly did not round-trip") 93 | } 94 | } 95 | -------------------------------------------------------------------------------- /example/4788asm.eas: -------------------------------------------------------------------------------- 1 | ;;; __ ___________ ____ 2 | ;;; / // /__ ( __ )( __ )____ __________ ___ 3 | ;;; / // /_ / / __ / __ / __ `/ ___/ __ `__ \ 4 | ;;; /__ __// / /_/ / /_/ / /_/ (__ ) / / / / / 5 | ;;; /_/ /_/\____/\____/\__,_/____/_/ /_/ /_/ 6 | ;;; 7 | ;;; This is an implementation of EIP-4788's predeploy contract. It implements two 8 | ;;; ring buffers to create bounded beacon root lookup. The first ring buffer is a 9 | ;;; timestamp % buflen -> timestamp mapping. This is used to ensure timestamp 10 | ;;; argument actually matches the stored root and isn't different dividend. The 11 | ;;; second ring buffer store the beacon root. It's also keyed by timestamp % 12 | ;;; buflen and the shifted right by buflen so the two don't overlap. 13 | ;;; 14 | ;;; The ring buffers can be visualized as follows: 15 | ;;; 16 | ;;; buflen = 10 17 | ;;; |--------------|--------------| 18 | ;;; 0 10 20 19 | ;;; timestamps beacon roots 20 | ;;; 21 | ;;; To get the corresponding beacon root for a specific timestamp, simply add 22 | ;;; buflen to the timestamp's index in the first ring buffer. The sum will be 23 | ;;; the storage slot in the second ring buffer where it is stored. 24 | 25 | #pragma target "cancun" 26 | 27 | ;;; ----------------------------------------------------------------------------- 28 | ;;; MACROS ---------------------------------------------------------------------- 29 | 30 | ;;; aka. HISTORY_BUFFER_LENGTH as defined in the EIP. 31 | #define buflen = 93600 32 | 33 | ;;; sysaddr is the address which calls the contract to submit a new root. 34 | #define sysaddr = .address(0xfffffffffffffffffffffffffffffffffffffffe) 35 | 36 | ;;; %do_revert sets up and then executes a revert(0,0) operation. 37 | #define %do_revert { 38 | push 0 ; [0] 39 | push 0 ; [0, 0] 40 | revert ; [] 41 | } 42 | 43 | ;;; ----------------------------------------------------------------------------- 44 | ;;; PROGRAM START---------------------------------------------------------------- 45 | 46 | ;; Protect the submit routine by verifying the caller is equal to sysaddr. 47 | caller ; [caller] 48 | push sysaddr ; [sysaddr, caller] 49 | eq ; [sysaddr == caller] 50 | jumpi @submit ; [] 51 | 52 | ;; Fallthrough if addresses don't match -- this means the caller intends 53 | ;; to read a root. 54 | 55 | ;; Check if calldata is equal to 32 bytes. 56 | push 32 ; [32] 57 | calldatasize ; [calldatasize, 32] 58 | eq ; [calldatasize == 32] 59 | 60 | ;; Jump to continue if length-check passed, otherwise revert. 61 | jumpi @loadtime ; [] 62 | %do_revert ; [] 63 | 64 | loadtime: 65 | ;; Load input timestamp. 66 | push 0 ; [0] 67 | calldataload ; [input_timestamp] 68 | dup1 ; [input_timestamp, input_timestamp] 69 | 70 | ;; Verify input timestamp is non-zero. 71 | iszero ; [input_timestamp == 0, input_timestamp] 72 | jumpi @throw ; [input_timestamp] 73 | 74 | ;; Compute the timestamp index and load from storage. 75 | push buflen ; [buflen, input_timestamp] 76 | dup2 ; [input_timestamp, buflen, input_timestamp] 77 | mod ; [time_index, input_timestamp] 78 | swap1 ; [input_timestamp, time_index] 79 | dup2 ; [time_index, input_timestamp, time_index] 80 | sload ; [stored_timestamp, input_timestamp, time_index] 81 | 82 | ;; Verify stored timestamp matches input timestamp. It's possible these 83 | ;; don't match if the slot has been overwritten by the ring buffer or if 84 | ;; the timestamp input wasn't a valid previous timestamp. 85 | eq ; [stored_timestamp == input_timestamp, time_index] 86 | jumpi @loadroot ; [time_index] 87 | %do_revert ; [] 88 | 89 | loadroot: 90 | ;; Extend index to get root index. 91 | push buflen ; [buflen, time_index] 92 | add ; [root_index] 93 | sload ; [root] 94 | 95 | ;; Write the retrieved root to memory so it can be returned. 96 | push 0 ; [0, root] 97 | mstore ; [] 98 | 99 | ;; Return the root. 100 | push 32 ; [size] 101 | push 0 ; [offset, size] 102 | return ; [] 103 | 104 | throw: 105 | ;; Reverts current execution with no return data. 106 | %do_revert 107 | 108 | submit: 109 | ;; Calculate the index the timestamp should be stored at, e.g. 110 | ;; time_index = (time % buflen). 111 | push buflen ; [buflen] 112 | timestamp ; [time, buflen] 113 | mod ; [time % buflen] 114 | 115 | ;; Write timestamp into storage slot at time_index. 116 | timestamp ; [time, time_index] 117 | dup2 ; [time_index, time, time_index] 118 | sstore ; [time_index] 119 | 120 | ;; Get root from calldata and write into root_index. No validation is 121 | ;; done on the input root. Becuase the routine is protected by a caller 122 | ;; check against sysaddr, it's okay to assume the value is correctly 123 | ;; given. 124 | push 0 ; [0, time_index] 125 | calldataload ; [root, time_index] 126 | swap1 ; [time_index, root] 127 | push buflen ; [buflen, time_index, root] 128 | add ; [root_index, root] 129 | sstore ; [] 130 | 131 | stop ; [] 132 | -------------------------------------------------------------------------------- /example/4788asm_ctor.eas: -------------------------------------------------------------------------------- 1 | ;;; __ ___________ ____ 2 | ;;; / // /__ ( __ )( __ )____ __________ ___ 3 | ;;; / // /_ / / __ / __ / __ `/ ___/ __ `__ \ 4 | ;;; /__ __// / /_/ / /_/ / /_/ (__ ) / / / / / 5 | ;;; /_/ /_/\____/\____/\__,_/____/_/ /_/ /_/ 6 | ;;; 7 | ;;; constructor code 8 | 9 | #pragma target "cancun" 10 | 11 | push @.end - @.start ; [size] 12 | dup1 ; [size, size] 13 | push @.start ; [start, size, size] 14 | push 0 ; [0, start, size, size] 15 | codecopy ; [size] 16 | push 0 ; [0, size] 17 | return ; [] 18 | 19 | .start: 20 | #assemble "4788asm.eas" 21 | .end: 22 | -------------------------------------------------------------------------------- /example/erc20/erc20.eas: -------------------------------------------------------------------------------- 1 | ;;; ERC20 2 | ;;; 3 | ;;; Minimal ERC-20 implementation in raw assembly. 4 | ;;; 5 | ;;; Storage Layout 6 | ;;; -- 7 | ;;; balance(address) => 0x000000000000000000000000 || address 8 | ;;; allowance(owner, spender) => keccak(owner || spender) 9 | 10 | #pragma target "constantinople" 11 | 12 | #define %match(candidate, label) { ; [selector] 13 | dup1 ; [selector, selector] 14 | push $candidate ; [candidate, selector, selector] 15 | eq ; [success, selector] 16 | push $label ; [label, success, selector] 17 | jumpi ; [selector] 18 | } 19 | 20 | ;;; Program start. 21 | 22 | ;; Read the calldata into memory. 23 | calldatasize ; [calldatasize] 24 | push 0 ; [0, calldatasize] 25 | dup1 ; [0, 0, calldatasize] 26 | calldatacopy ; [] 27 | 28 | ;; Extract only the function selector 29 | push 0 ; [0] 30 | mload ; [dirty_selector] 31 | push 224 ; [224, dirty_selector] 32 | shr ; [selector] 33 | 34 | ;; Jump to the selected function. 35 | %match(S_transferFrom, @TransferFrom) 36 | %match(S_approve, @Approve) 37 | %match(S_transfer, @Transfer) 38 | 39 | ;; Check the view functions last to not waste gas on-chain. 40 | %match(S_balanceOf, @BalanceOf) 41 | %match(S_allowance, @Allowance) 42 | %match(S_decimals, @Return0) 43 | %match(S_totalSupply, @Return0) 44 | 45 | ; [selector] is left on stack here. 46 | 47 | FAIL: 48 | ;; Catchall for reverts. 49 | push 0 ; [0, selector] 50 | push 0 ; [0, 0, selector] 51 | revert ; [selector] 52 | 53 | ;;; Write operations. 54 | ;;; These do not return, so there are STOPs in between. 55 | 56 | #include "op_transferFrom.eas" 57 | stop 58 | #include "op_approve.eas" 59 | stop 60 | #include "op_transfer.eas" 61 | stop 62 | 63 | ;;; View functions, these return on their own. 64 | 65 | #include "op_balanceOf.eas" 66 | #include "op_allowance.eas" 67 | 68 | ;;; View functions for token metadata, these just return zero. 69 | 70 | #define S_decimals = .selector("decimals()") 71 | #define S_totalSupply = .selector("totalSupply()") 72 | 73 | Return0: 74 | -------------------------------------------------------------------------------- /example/erc20/erc20_ctor.eas: -------------------------------------------------------------------------------- 1 | ;;; ERC20 - constructor 2 | ;;; 3 | 4 | #pragma target "constantinople" 5 | 6 | pc ; [0] 7 | 8 | ;; give deployer initial supply 9 | push 10000 ; [balance, 0] 10 | caller ; [caller, balance, 0] 11 | sstore ; [0] 12 | 13 | push @.start ; [start, 0] 14 | dup1 ; [start, start, 0] 15 | codesize ; [codesize, start, start, 0] 16 | sub ; [length, start, 0] 17 | swap2 ; [0, start, length] 18 | swap1 ; [start, 0, length] 19 | dup3 ; [length, start, 0, length] 20 | swap1 ; [start, length, 0, length] 21 | dup3 ; [0, start, length, 0, length] 22 | codecopy ; [0, length] 23 | return ; [] 24 | 25 | .start: 26 | #assemble "erc20.eas" 27 | -------------------------------------------------------------------------------- /example/erc20/op_allowance.eas: -------------------------------------------------------------------------------- 1 | ;;; ERC20 - allowance 2 | ;;; 3 | ;;; calldata structure 4 | ;;; +--------------------+------------------+------------------+------------------+--------------------+ 5 | ;;; | selector (4 bytes) | zeros (12 bytes) | owner (20 bytes) | zeros (12 bytes) | spender (20 bytes) | 6 | ;;; +--------------------+------------------+------------------+------------------+--------------------+ 7 | 8 | #define S_allowance = .selector("allowance(address,address)") 9 | 10 | Allowance: 11 | push 64 ; [len] 12 | push 4 ; [offset, len] 13 | keccak256 ; [key] 14 | 15 | sload ; [allowance] 16 | push 0 ; [offset, allowance] 17 | mstore ; [] 18 | 19 | push 32 ; [32] 20 | push 0 ; [0, 32] 21 | return ; [] 22 | -------------------------------------------------------------------------------- /example/erc20/op_approve.eas: -------------------------------------------------------------------------------- 1 | ;;; ERC20 - approve 2 | ;;; 3 | ;;; calldata structure 4 | ;;; +--------------------+------------------+--------------------+-------------------+ 5 | ;;; | selector (4 bytes) | zeros (12 bytes) | spender (20 bytes) | amount (32 bytes) | 6 | ;;; +--------------------+------------------+--------------------+-------------------+ 7 | 8 | #define S_approve = .selector("approve(address,uint256)") 9 | #define logtopic = .keccak256("Approval(address,address,uint256)") 10 | 11 | Approve: 12 | push 36 ; [36] 13 | mload ; [amt] 14 | 15 | push 4 ; [4, amt] 16 | mload ; [sender, amt] 17 | 18 | ;; write the sender to mem[36..68] 19 | push 36 ; [36, sender, amt] 20 | mstore ; [amt] 21 | 22 | ;; write the caller to mem[4..36] 23 | caller ; [caller, amt] 24 | push 4 ; [4, caller, amt] 25 | mstore ; [amt] 26 | 27 | ;; hash mem[4..68] 28 | push 64 ; [len, amt] 29 | push 4 ; [offset, len, amt] 30 | keccak256 ; [key, amt] 31 | dup1 ; [key, key, amt] 32 | 33 | sload ; [allowance, key, amt] 34 | swap1 ; [key, allowance, amt] 35 | swap2 ; [amt, allowance, key] 36 | add ; [new_allowance, key] 37 | swap1 ; [key, new_allowance] 38 | sstore ; [] 39 | 40 | ;; output event 41 | push 4 ; [4] 42 | calldataload ; [spender] 43 | caller ; [owner, spender] 44 | push 36 ; [36, owner, spender] 45 | calldataload ; [amt, owner, spender] 46 | push 0 ; [0, value, owner, spender] 47 | mstore ; [owner, spender] 48 | push logtopic ; [topic, owner, spender] 49 | push 32 ; [32, topic, owner, spender] 50 | push 0 ; [0, 32, topic, ownder, spender] 51 | log3 ; [] 52 | -------------------------------------------------------------------------------- /example/erc20/op_balanceOf.eas: -------------------------------------------------------------------------------- 1 | ;;; ERC20 - balanceOf 2 | ;;; 3 | ;;; calldata structure 4 | ;;; +--------------------+------------------+--------------------+ 5 | ;;; | selector (4 bytes) | zeros (12 bytes) | address (20 bytes) | 6 | ;;; +--------------------+------------------+--------------------+ 7 | 8 | #define S_balanceOf = .selector("balanceOf(address)") 9 | 10 | BalanceOf: 11 | ;; prepare return parameters 12 | push 32 ; [ret_len] 13 | push 0 ; [ret_offset, ret_len] 14 | 15 | ;; skip the selector, load the address 16 | push 4 ; [4, ret_offset, ret_len] 17 | mload ; [addr, ret_offset, ret_len] 18 | 19 | ;; load balance 20 | sload ; [balance, ret_offset, ret_len] 21 | 22 | ;; prepare balance to return 23 | dup2 ; [0, balance, ret_offset, ret_len] 24 | mstore ; [ret_offset, ret_len] 25 | 26 | return ; [] 27 | -------------------------------------------------------------------------------- /example/erc20/op_transfer.eas: -------------------------------------------------------------------------------- 1 | ;;; ERC20 - transfer 2 | ;;; 3 | ;;; calldata structure 4 | ;;; +--------------------+------------------+---------------+-------------------+ 5 | ;;; | selector (4 bytes) | zeros (12 bytes) | to (20 bytes) | amount (32 bytes) | 6 | ;;; +--------------------+------------------+---------------+-------------------+ 7 | 8 | #define S_transfer = .selector("transfer(address,uint256)") 9 | #define logtopic = .keccak256("Transfer(address,address,uint256)") 10 | 11 | Transfer: 12 | caller ; [from] 13 | sload ; [from_balance] 14 | push1 36 ; [36, from_balance] 15 | mload ; [amt, from_balance] 16 | dup2 ; [from_balance, amt, from_balance] 17 | dup2 ; [amt, from_balance, amt, from_balance] 18 | 19 | ;; if amt > from_balance, revert 20 | gt ; [amt > from_balance, amt, from_balance] 21 | jumpi @FAIL ; [amt, from_balance] 22 | 23 | ;; sstore(from, from_balance - amt) 24 | swap1 ; [from_balance, amt] 25 | sub ; [from_balance - amt] 26 | caller ; [from, from_balance - amt] 27 | sstore ; [] 28 | 29 | ;; sstore(to, to_balance + amt) 30 | push 4 ; [4] 31 | mload ; [to] 32 | dup1 ; [to, to] 33 | sload ; [to_balance, to] 34 | push 36 ; [36, to_balance, to] 35 | mload ; [amt, to_balance, to] 36 | add ; [amt + to_balance, to] 37 | swap1 ; [to, amt + to_balance] 38 | sstore ; [] 39 | 40 | ;; output event 41 | push 4 ; [4] 42 | calldataload ; [to] 43 | caller ; [from, to] 44 | push 36 ; [36, from, to] 45 | calldataload ; [amt, from, to] 46 | push 0 ; [0, amt, from, to] 47 | mstore ; [from, to] 48 | push logtopic ; [topic, from, to] 49 | push 32 ; [32, topic, from, to] 50 | push 0 ; [0, 32, topic, from, to] 51 | log3 ; [] 52 | -------------------------------------------------------------------------------- /example/erc20/op_transferFrom.eas: -------------------------------------------------------------------------------- 1 | ;;; ERC20 - transferFrom 2 | ;;; 3 | ;;; calldata structure 4 | ;;; +--------------------+-----------------+---------------+-------------------+ 5 | ;;; | selector (4 bytes) | from (32 bytes) | to (32 bytes) | amount (32 bytes) | 6 | ;;; +--------------------+-----------------+---------------+-------------------+ 7 | 8 | #define S_transferFrom = .selector("transferFrom(address,address,uint256)") 9 | #define logtopic = .keccak256("Transfer(address,address,uint256)") 10 | 11 | TransferFrom: 12 | push 64 ; [len] 13 | push 4 ; [offset, len] 14 | keccak256 ; [key] 15 | dup1 ; [key, key] 16 | 17 | sload ; [available, key] 18 | 19 | push 68 ; [68, available, key] 20 | mload ; [amt, available, key] 21 | 22 | ;; if amt > available, revert 23 | dup2 ; [available, amt, available, key] 24 | dup2 ; [amt, available, amt, available, key] 25 | gt ; [amt > available, amt, available, key] 26 | jumpi @FAIL ; [amt, available, key] 27 | 28 | ;; load the sender balance 29 | push 4 ; [4, amt, available, key] 30 | calldataload ; [from, amt, available, key] 31 | sload ; [from_balance, amt, available, key] 32 | 33 | lt ; [from_balance < amt, available, key] 34 | jumpi @FAIL ; [available, key] 35 | 36 | ;; reduce allowance by amt 37 | push 68 ; [68, available, key] 38 | calldataload ; [amt, available, key] 39 | swap1 ; [available, amt, key] 40 | sub ; [available - amt, key] 41 | swap1 ; [key, available - amt] 42 | sstore ; [] 43 | 44 | ;; reduce from balance by amt 45 | push 4 ; [4] 46 | calldataload ; [from] 47 | dup1 ; [from, from] 48 | sload ; [from_balance, from] 49 | 50 | push 68 ; [68, from_balance, from] 51 | calldataload ; [amt, from_balance, from] 52 | dup1 ; [amt, amt, from_balance, from] 53 | 54 | swap2 ; [from_balance, amt, amt, from] 55 | sub ; [from_balance - amt, amt from] 56 | swap1 ; [amt, from_balance - amt, from] 57 | swap2 ; [from, from_balance - amt, amt]] 58 | sstore ; [amt] 59 | 60 | ;; increase to balance by amt 61 | push 36 ; [36, amt] 62 | calldataload ; [to, amt] 63 | dup1 ; [to, to, amt] 64 | sload ; [to_balance, to, amt] 65 | 66 | swap1 ; [to, to_balance, amt] 67 | swap2 ; [amt, to_balance, to] 68 | add ; [amt + to_balance, to] 69 | swap1 ; [to, amt + to_balance] 70 | sstore 71 | 72 | ;; output event 73 | push 36 ; [36] 74 | calldataload ; [to] 75 | push 4 ; [4, to] 76 | calldataload ; [from, to] 77 | push 68 ; [68, from, to] 78 | calldataload ; [amt, from, to] 79 | push 0 ; [0, amt, from, to] 80 | mstore ; [from, to] 81 | push logtopic ; [topic, from, to] 82 | push 32 ; [32, topic, from, to] 83 | push 0 ; [0, 32, topic, from, to] 84 | log3 ; [] 85 | -------------------------------------------------------------------------------- /go.mod: -------------------------------------------------------------------------------- 1 | module github.com/fjl/geas 2 | 3 | go 1.23 4 | 5 | require ( 6 | github.com/ethereum/go-ethereum v1.12.2 7 | golang.org/x/crypto v0.13.0 8 | gopkg.in/yaml.v3 v3.0.1 9 | ) 10 | 11 | require ( 12 | github.com/btcsuite/btcd/btcec/v2 v2.2.0 // indirect 13 | github.com/decred/dcrd/dcrec/secp256k1/v4 v4.0.1 // indirect 14 | github.com/holiman/uint256 v1.2.3 // indirect 15 | golang.org/x/sys v0.12.0 // indirect 16 | ) 17 | -------------------------------------------------------------------------------- /go.sum: -------------------------------------------------------------------------------- 1 | github.com/btcsuite/btcd/btcec/v2 v2.2.0 h1:fzn1qaOt32TuLjFlkzYSsBC35Q3KUjT1SwPxiMSCF5k= 2 | github.com/btcsuite/btcd/btcec/v2 v2.2.0/go.mod h1:U7MHm051Al6XmscBQ0BoNydpOTsFAn707034b5nY8zU= 3 | github.com/btcsuite/btcd/chaincfg/chainhash v1.0.1 h1:q0rUy8C/TYNBQS1+CGKw68tLOFYSNEs0TFnxxnS9+4U= 4 | github.com/btcsuite/btcd/chaincfg/chainhash v1.0.1/go.mod h1:7SFka0XMvUgj3hfZtydOrQY2mwhPclbT2snogU7SQQc= 5 | github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c= 6 | github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= 7 | github.com/decred/dcrd/crypto/blake256 v1.0.0 h1:/8DMNYp9SGi5f0w7uCm6d6M4OU2rGFK09Y2A4Xv7EE0= 8 | github.com/decred/dcrd/crypto/blake256 v1.0.0/go.mod h1:sQl2p6Y26YV+ZOcSTP6thNdn47hh8kt6rqSlvmrXFAc= 9 | github.com/decred/dcrd/dcrec/secp256k1/v4 v4.0.1 h1:YLtO71vCjJRCBcrPMtQ9nqBsqpA1m5sE92cU+pd5Mcc= 10 | github.com/decred/dcrd/dcrec/secp256k1/v4 v4.0.1/go.mod h1:hyedUtir6IdtD/7lIxGeCxkaw7y45JueMRL4DIyJDKs= 11 | github.com/ethereum/go-ethereum v1.12.2 h1:eGHJ4ij7oyVqUQn48LBz3B7pvQ8sV0wGJiIE6gDq/6Y= 12 | github.com/ethereum/go-ethereum v1.12.2/go.mod h1:1cRAEV+rp/xX0zraSCBnu9Py3HQ+geRMj3HdR+k0wfI= 13 | github.com/holiman/uint256 v1.2.3 h1:K8UWO1HUJpRMXBxbmaY1Y8IAMZC/RsKB+ArEnnK4l5o= 14 | github.com/holiman/uint256 v1.2.3/go.mod h1:SC8Ryt4n+UBbPbIBKaG9zbbDlp4jOru9xFZmPzLUTxw= 15 | github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM= 16 | github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4= 17 | github.com/stretchr/testify v1.8.1 h1:w7B6lhMri9wdJUVmEZPGGhZzrYTPvgJArz7wNPgYKsk= 18 | github.com/stretchr/testify v1.8.1/go.mod h1:w2LPCIKwWwSfY2zedu0+kehJoqGctiVI29o6fzry7u4= 19 | golang.org/x/crypto v0.13.0 h1:mvySKfSWJ+UKUii46M40LOvyWfN0s2U+46/jDd0e6Ck= 20 | golang.org/x/crypto v0.13.0/go.mod h1:y6Z2r+Rw4iayiXXAIxJIDAJ1zMW4yaTpebo8fPOliYc= 21 | golang.org/x/sys v0.12.0 h1:CM0HF96J0hcLAwsHPJZjfdNzs0gftsLfgKt57wWHJ0o= 22 | golang.org/x/sys v0.12.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= 23 | gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405 h1:yhCVgyC4o1eVCa2tZl7eS0r+SDo693bJlVdllGtEeKM= 24 | gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= 25 | gopkg.in/yaml.v3 v3.0.1 h1:fxVm/GzAzEWqLHuvctI91KS9hhNmmWOoWu0XTYJS7CA= 26 | gopkg.in/yaml.v3 v3.0.1/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM= 27 | -------------------------------------------------------------------------------- /internal/ast/arith.go: -------------------------------------------------------------------------------- 1 | // Copyright 2024 The go-ethereum Authors 2 | // This file is part of the go-ethereum library. 3 | // 4 | // The go-ethereum library is free software: you can redistribute it and/or modify 5 | // it under the terms of the GNU Lesser General Public License as published by 6 | // the Free Software Foundation, either version 3 of the License, or 7 | // (at your option) any later version. 8 | // 9 | // The go-ethereum library is distributed in the hope that it will be useful, 10 | // but WITHOUT ANY WARRANTY; without even the implied warranty of 11 | // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 12 | // GNU Lesser General Public License for more details. 13 | // 14 | // You should have received a copy of the GNU Lesser General Public License 15 | // along with the go-ethereum library. If not, see . 16 | 17 | package ast 18 | 19 | //go:generate go run golang.org/x/tools/cmd/stringer@latest -type ArithOp 20 | 21 | // ArithOp is an arithmetic operation. 22 | type ArithOp byte 23 | 24 | const ( 25 | ArithPlus = ArithOp(iota + 1) // + 26 | ArithMinus // - 27 | ArithMul // * 28 | ArithDiv // / 29 | ArithMod // % 30 | ArithLshift // << 31 | ArithRshift // >> 32 | ArithAnd // & 33 | ArithOr // | 34 | ArithXor // ^ 35 | ArithMax = ArithXor 36 | ) 37 | 38 | // arithChars contains all the single-character arithmetic operations. 39 | // note that '%' is also absent from this list since it has a dual purpose. 40 | var arithChars = map[rune]ArithOp{ 41 | '+': ArithPlus, 42 | '-': ArithMinus, 43 | '*': ArithMul, 44 | '/': ArithDiv, 45 | '&': ArithAnd, 46 | '|': ArithOr, 47 | '^': ArithXor, 48 | } 49 | 50 | func tokenArithOp(tok token) ArithOp { 51 | if tok.typ != arith { 52 | panic("token is not arith") 53 | } 54 | switch { 55 | case tok.text == "<<": 56 | return ArithLshift 57 | case tok.text == ">>": 58 | return ArithRshift 59 | case tok.text == "%": 60 | return ArithMod 61 | default: 62 | op, ok := arithChars[[]rune(tok.text)[0]] 63 | if !ok { 64 | panic("invalid arith op") 65 | } 66 | return op 67 | } 68 | } 69 | 70 | var precedence = [ArithMax + 1]int{ 71 | ArithMul: 2, 72 | ArithDiv: 2, 73 | ArithMod: 2, 74 | ArithLshift: 2, 75 | ArithRshift: 2, 76 | ArithAnd: 2, 77 | ArithPlus: 1, 78 | ArithMinus: 1, 79 | ArithOr: 1, 80 | ArithXor: 1, 81 | } 82 | -------------------------------------------------------------------------------- /internal/ast/arithop_string.go: -------------------------------------------------------------------------------- 1 | // Code generated by "stringer -type ArithOp"; DO NOT EDIT. 2 | 3 | package ast 4 | 5 | import "strconv" 6 | 7 | func _() { 8 | // An "invalid array index" compiler error signifies that the constant values have changed. 9 | // Re-run the stringer command to generate them again. 10 | var x [1]struct{} 11 | _ = x[ArithPlus-1] 12 | _ = x[ArithMinus-2] 13 | _ = x[ArithMul-3] 14 | _ = x[ArithDiv-4] 15 | _ = x[ArithMod-5] 16 | _ = x[ArithLshift-6] 17 | _ = x[ArithRshift-7] 18 | _ = x[ArithAnd-8] 19 | _ = x[ArithOr-9] 20 | _ = x[ArithXor-10] 21 | } 22 | 23 | const _ArithOp_name = "ArithPlusArithMinusArithMulArithDivArithModArithLshiftArithRshiftArithAndArithOrArithNot" 24 | 25 | var _ArithOp_index = [...]uint8{0, 9, 19, 27, 35, 43, 54, 65, 73, 80, 88} 26 | 27 | func (i ArithOp) String() string { 28 | i -= 1 29 | if i >= ArithOp(len(_ArithOp_index)-1) { 30 | return "ArithOp(" + strconv.FormatInt(int64(i+1), 10) + ")" 31 | } 32 | return _ArithOp_name[_ArithOp_index[i]:_ArithOp_index[i+1]] 33 | } 34 | -------------------------------------------------------------------------------- /internal/ast/ast.go: -------------------------------------------------------------------------------- 1 | // Copyright 2023 The go-ethereum Authors 2 | // This file is part of the go-ethereum library. 3 | // 4 | // The go-ethereum library is free software: you can redistribute it and/or modify 5 | // it under the terms of the GNU Lesser General Public License as published by 6 | // the Free Software Foundation, either version 3 of the License, or 7 | // (at your option) any later version. 8 | // 9 | // The go-ethereum library is distributed in the hope that it will be useful, 10 | // but WITHOUT ANY WARRANTY; without even the implied warranty of 11 | // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 12 | // GNU Lesser General Public License for more details. 13 | // 14 | // You should have received a copy of the GNU Lesser General Public License 15 | // along with the go-ethereum library. If not, see . 16 | 17 | package ast 18 | 19 | import ( 20 | "fmt" 21 | "maps" 22 | "slices" 23 | 24 | "github.com/fjl/geas/internal/lzint" 25 | ) 26 | 27 | // Document is the toplevel of the AST. It represents a list of abstract instructions and 28 | // macro definitions. 29 | type Document struct { 30 | File string 31 | Statements []Statement 32 | 33 | // The document that contains/encloses this document. 34 | Parent *Document 35 | 36 | // The statement that created this document. 37 | // This is filled in for instruction macros, #include/#assemble, etc. 38 | Creation Statement 39 | 40 | labels map[string]*LabelDefSt 41 | exprMacros map[string]*ExpressionMacroDef 42 | instrMacros map[string]*InstructionMacroDef 43 | } 44 | 45 | // LookupLabel finds the definition of a label. 46 | func (doc *Document) LookupLabel(lref *LabelRefExpr) (*LabelDefSt, *Document) { 47 | for doc != nil { 48 | li, ok := doc.labels[lref.Ident] 49 | if ok { 50 | return li, doc 51 | } 52 | doc = doc.Parent 53 | } 54 | return nil, nil 55 | } 56 | 57 | // LookupInstrMacro finds the definition of an instruction macro. 58 | func (doc *Document) LookupInstrMacro(name string) (*InstructionMacroDef, *Document) { 59 | for doc != nil { 60 | if def, ok := doc.instrMacros[name]; ok { 61 | return def, doc 62 | } 63 | doc = doc.Parent 64 | } 65 | return nil, nil 66 | } 67 | 68 | // LookupExprMacro finds the definition of an expression macro. 69 | func (doc *Document) LookupExprMacro(name string) (*ExpressionMacroDef, *Document) { 70 | for doc != nil { 71 | if def, ok := doc.exprMacros[name]; ok { 72 | return def, doc 73 | } 74 | doc = doc.Parent 75 | } 76 | return nil, nil 77 | } 78 | 79 | // GlobalLabels returns the list of global label definitions in the docment. 80 | func (doc *Document) GlobalLabels() []*LabelDefSt { 81 | result := make([]*LabelDefSt, 0) 82 | for _, name := range slices.Sorted(maps.Keys(doc.labels)) { 83 | if IsGlobal(name) { 84 | result = append(result, doc.labels[name]) 85 | } 86 | } 87 | return result 88 | } 89 | 90 | // GlobalExprMacros returns the list of global expression macro definitions in the docment. 91 | func (doc *Document) GlobalExprMacros() []*ExpressionMacroDef { 92 | result := make([]*ExpressionMacroDef, 0) 93 | for _, name := range slices.Sorted(maps.Keys(doc.exprMacros)) { 94 | if IsGlobal(name) { 95 | result = append(result, doc.exprMacros[name]) 96 | } 97 | } 98 | return result 99 | } 100 | 101 | // GlobalInstrMacros returns the list of global instruction macro definitions in the docment. 102 | func (doc *Document) GlobalInstrMacros() []*InstructionMacroDef { 103 | result := make([]*InstructionMacroDef, 0) 104 | for _, name := range slices.Sorted(maps.Keys(doc.instrMacros)) { 105 | if IsGlobal(name) { 106 | result = append(result, doc.instrMacros[name]) 107 | } 108 | } 109 | return result 110 | } 111 | 112 | // InstrMacros returns the list of all instruction macro definitions in the docment. 113 | func (doc *Document) InstrMacros() []*InstructionMacroDef { 114 | result := make([]*InstructionMacroDef, 0) 115 | for _, name := range slices.Sorted(maps.Keys(doc.instrMacros)) { 116 | result = append(result, doc.instrMacros[name]) 117 | } 118 | return result 119 | } 120 | 121 | func (doc *Document) CreationString() string { 122 | if doc.Creation == nil { 123 | if doc.File == "" { 124 | return "" 125 | } 126 | return " in " + doc.File 127 | } 128 | return fmt.Sprintf(" by %s at %v", doc.Creation.Description(), doc.Creation.Position()) 129 | } 130 | 131 | type Statement interface { 132 | Position() Position 133 | Description() string 134 | } 135 | 136 | // toplevel statement types 137 | type ( 138 | OpcodeSt struct { 139 | Op string 140 | Src *Document 141 | Arg Expr // Immediate argument for PUSH* / JUMP*. 142 | PushSize byte // For PUSH, this is n+1. 143 | tok token 144 | } 145 | 146 | LabelDefSt struct { 147 | Src *Document 148 | Dotted bool 149 | Global bool 150 | tok token 151 | } 152 | 153 | MacroCallSt struct { 154 | Ident string 155 | Src *Document 156 | Args []Expr 157 | tok token 158 | } 159 | 160 | IncludeSt struct { 161 | tok token 162 | Src *Document 163 | Filename string 164 | } 165 | 166 | AssembleSt struct { 167 | tok token 168 | Src *Document 169 | Filename string 170 | } 171 | 172 | PragmaSt struct { 173 | pos Position 174 | Option string 175 | Value string 176 | } 177 | 178 | BytesSt struct { 179 | pos Position 180 | Value Expr 181 | } 182 | ) 183 | 184 | // definitions 185 | type ( 186 | ExpressionMacroDef struct { 187 | Name string 188 | Params []string 189 | Body Expr 190 | pos Position 191 | } 192 | 193 | InstructionMacroDef struct { 194 | Name string 195 | Params []string 196 | Body *Document 197 | pos Position 198 | } 199 | ) 200 | 201 | // expression types 202 | type ( 203 | Expr any 204 | 205 | LiteralExpr struct { 206 | tok token 207 | Value *lzint.Value // cached value 208 | } 209 | 210 | LabelRefExpr struct { 211 | Ident string 212 | Dotted bool 213 | Global bool 214 | } 215 | 216 | VariableExpr struct { 217 | Ident string 218 | } 219 | 220 | MacroCallExpr struct { 221 | Ident string 222 | Builtin bool 223 | Args []Expr 224 | } 225 | 226 | ArithExpr struct { 227 | Op ArithOp 228 | Left Expr 229 | Right Expr 230 | } 231 | ) 232 | 233 | func (inst *MacroCallSt) Position() Position { 234 | return Position{File: inst.Src.File, Line: inst.tok.line} 235 | } 236 | 237 | func (inst *MacroCallSt) Description() string { 238 | return fmt.Sprintf("invocation of %%%s", inst.Ident) 239 | } 240 | 241 | func (inst *IncludeSt) Position() Position { 242 | return Position{File: inst.Src.File, Line: inst.tok.line} 243 | } 244 | 245 | func (inst *IncludeSt) Description() string { 246 | return fmt.Sprintf("#include %q", inst.Filename) 247 | } 248 | 249 | func (inst *AssembleSt) Position() Position { 250 | return Position{File: inst.Src.File, Line: inst.tok.line} 251 | } 252 | 253 | func (inst *AssembleSt) Description() string { 254 | return fmt.Sprintf("#assemble %q", inst.Filename) 255 | } 256 | 257 | func (inst *PragmaSt) Position() Position { 258 | return inst.pos 259 | } 260 | 261 | func (inst *PragmaSt) Description() string { 262 | return fmt.Sprintf("#pragma %s %q", inst.Option, inst.Value) 263 | } 264 | 265 | func (inst *BytesSt) Position() Position { 266 | return inst.pos 267 | } 268 | 269 | func (inst *BytesSt) Description() string { 270 | return "#bytes" 271 | } 272 | 273 | func (inst *OpcodeSt) Position() Position { 274 | return Position{File: inst.Src.File, Line: inst.tok.line} 275 | } 276 | 277 | func (inst *OpcodeSt) Description() string { 278 | return fmt.Sprintf("opcode %s", inst.tok.text) 279 | } 280 | 281 | func (inst *LabelDefSt) Position() Position { 282 | return Position{File: inst.Src.File, Line: inst.tok.line} 283 | } 284 | 285 | func (inst *LabelDefSt) Description() string { 286 | return fmt.Sprintf("definition of %s", inst.String()) 287 | } 288 | 289 | func (def *InstructionMacroDef) Position() Position { 290 | return def.pos 291 | } 292 | 293 | func (def *InstructionMacroDef) Description() string { 294 | return fmt.Sprintf("definition of %%%s", def.Name) 295 | } 296 | 297 | func (def *ExpressionMacroDef) Position() Position { 298 | return def.pos 299 | } 300 | 301 | func (def *ExpressionMacroDef) Description() string { 302 | return fmt.Sprintf("definition of %s", def.Name) 303 | } 304 | 305 | func (l *LabelRefExpr) String() string { 306 | dot := "" 307 | if l.Dotted { 308 | dot = "." 309 | } 310 | return "@" + dot + l.Ident 311 | } 312 | 313 | func (l *LabelDefSt) String() string { 314 | r := LabelRefExpr{Dotted: l.Dotted, Ident: l.tok.text} 315 | return r.String() 316 | } 317 | 318 | func (l *LabelDefSt) Name() string { 319 | return l.tok.text 320 | } 321 | 322 | func (e *LiteralExpr) IsString() bool { 323 | return e.tok.typ == stringLiteral 324 | } 325 | 326 | func (e *LiteralExpr) IsNumber() bool { 327 | return e.tok.typ == numberLiteral 328 | } 329 | 330 | func (e *LiteralExpr) Text() string { 331 | return e.tok.text 332 | } 333 | -------------------------------------------------------------------------------- /internal/ast/error.go: -------------------------------------------------------------------------------- 1 | // Copyright 2024 The go-ethereum Authors 2 | // This file is part of the go-ethereum library. 3 | // 4 | // The go-ethereum library is free software: you can redistribute it and/or modify 5 | // it under the terms of the GNU Lesser General Public License as published by 6 | // the Free Software Foundation, either version 3 of the License, or 7 | // (at your option) any later version. 8 | // 9 | // The go-ethereum library is distributed in the hope that it will be useful, 10 | // but WITHOUT ANY WARRANTY; without even the implied warranty of 11 | // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 12 | // GNU Lesser General Public License for more details. 13 | // 14 | // You should have received a copy of the GNU Lesser General Public License 15 | // along with the go-ethereum library. If not, see . 16 | 17 | package ast 18 | 19 | import "fmt" 20 | 21 | // Position represents a line in a file. 22 | type Position struct { 23 | File string 24 | Line int 25 | } 26 | 27 | func (p Position) String() string { 28 | return fmt.Sprintf("%s:%d", p.File, p.Line) 29 | } 30 | 31 | // ParseError is an error that happened during parsing. 32 | type ParseError struct { 33 | tok token 34 | file string 35 | err error 36 | warning bool 37 | } 38 | 39 | func (e *ParseError) Error() string { 40 | warn := "" 41 | if e.warning { 42 | warn = "warning: " 43 | } 44 | return fmt.Sprintf("%s:%d: %s%v", e.file, e.tok.line, warn, e.err) 45 | } 46 | 47 | func (e *ParseError) Position() Position { 48 | return Position{File: e.file, Line: e.tok.line} 49 | } 50 | 51 | func (e *ParseError) IsWarning() bool { 52 | return e.warning 53 | } 54 | 55 | func (e *ParseError) Unwrap() error { 56 | return e.err 57 | } 58 | 59 | func ErrLabelAlreadyDef(firstDef, secondDef *LabelDefSt) error { 60 | dotInfo := "" 61 | if firstDef.Dotted && !secondDef.Dotted { 62 | dotInfo = " (as dotted label)" 63 | } 64 | if !firstDef.Dotted && secondDef.Dotted { 65 | dotInfo = " (as jumpdest)" 66 | } 67 | return fmt.Errorf("%v already defined%s", secondDef, dotInfo) 68 | } 69 | -------------------------------------------------------------------------------- /internal/ast/lexer.go: -------------------------------------------------------------------------------- 1 | // Copyright 2017 The go-ethereum Authors 2 | // This file is part of the go-ethereum library. 3 | // 4 | // The go-ethereum library is free software: you can redistribute it and/or modify 5 | // it under the terms of the GNU Lesser General Public License as published by 6 | // the Free Software Foundation, either version 3 of the License, or 7 | // (at your option) any later version. 8 | // 9 | // The go-ethereum library is distributed in the hope that it will be useful, 10 | // but WITHOUT ANY WARRANTY; without even the implied warranty of 11 | // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 12 | // GNU Lesser General Public License for more details. 13 | // 14 | // You should have received a copy of the GNU Lesser General Public License 15 | // along with the go-ethereum library. If not, see . 16 | 17 | package ast 18 | 19 | import ( 20 | "fmt" 21 | "os" 22 | "strings" 23 | "unicode" 24 | "unicode/utf8" 25 | ) 26 | 27 | // stateFn is used through the lifetime of the 28 | // lexer to parse the different values at the 29 | // current state. 30 | type stateFn func(*lexer) stateFn 31 | 32 | // token is emitted when the lexer has discovered 33 | // a new parsable token. These are delivered over 34 | // the tokens channels of the lexer 35 | type token struct { 36 | text string 37 | line int 38 | typ tokenType 39 | } 40 | 41 | func (t *token) String() string { 42 | return fmt.Sprintf("%v %s (line %d)", t.typ, t.text, t.line) 43 | } 44 | 45 | // tokenType are the different types the lexer 46 | // is able to parse and return. 47 | type tokenType byte 48 | 49 | //go:generate go run golang.org/x/tools/cmd/stringer@latest -linecomment -type tokenType 50 | 51 | const ( 52 | eof tokenType = iota // end of file 53 | lineStart // beginning of line 54 | lineEnd // end of line 55 | invalidToken // invalid character 56 | identifier // identifier 57 | dottedIdentifier // dotted identifier 58 | variableIdentifier // parameter reference 59 | labelRef // label reference 60 | dottedLabelRef // dotted label reference 61 | label // label definition 62 | dottedLabel // dotted label definition 63 | numberLiteral // number literal 64 | stringLiteral // string literal 65 | openParen // open parenthesis 66 | closeParen // close parenthesis 67 | comma // comma 68 | directive // directive 69 | instMacroIdent // macro identifier 70 | openBrace // open brace 71 | closeBrace // closing brace 72 | equals // equals sign 73 | arith // arithmetic operation 74 | ) 75 | 76 | // lexer is the basic construct for parsing 77 | // source code and turning them in to tokens. 78 | // Tokens are interpreted by the compiler. 79 | type lexer struct { 80 | input string // input contains the source code of the program 81 | 82 | tokens chan token // tokens is used to deliver tokens to the listener 83 | state stateFn // the current state function 84 | 85 | lineno int // current line number in the source file 86 | start, pos, width int // positions for lexing and returning value 87 | 88 | debug bool // flag for triggering debug output 89 | } 90 | 91 | // runLexer lexes the program by name with the given source. It returns a 92 | // channel on which the tokens are delivered. 93 | func runLexer(source []byte, debug bool) <-chan token { 94 | ch := make(chan token) 95 | l := &lexer{ 96 | input: string(source), 97 | tokens: ch, 98 | state: lexNext, 99 | debug: debug, 100 | lineno: 1, 101 | } 102 | go func() { 103 | l.emit(lineStart) 104 | for l.state != nil { 105 | l.state = l.state(l) 106 | } 107 | l.emit(eof) 108 | close(l.tokens) 109 | }() 110 | 111 | return ch 112 | } 113 | 114 | // next returns the next rune in the program's source. 115 | func (l *lexer) next() (rune rune) { 116 | if l.pos >= len(l.input) { 117 | l.width = 0 118 | return 0 119 | } 120 | rune, l.width = utf8.DecodeRuneInString(l.input[l.pos:]) 121 | l.pos += l.width 122 | return rune 123 | } 124 | 125 | // backup backsup the last parsed element (multi-character) 126 | func (l *lexer) backup() { 127 | l.pos -= l.width 128 | } 129 | 130 | // peek returns the next rune but does not advance the seeker 131 | func (l *lexer) peek() rune { 132 | r := l.next() 133 | l.backup() 134 | return r 135 | } 136 | 137 | // ignore advances the seeker and ignores the value 138 | func (l *lexer) ignore() { 139 | l.start = l.pos 140 | } 141 | 142 | // Accepts checks whether the given input matches the next rune 143 | func (l *lexer) accept(valid string) bool { 144 | if strings.ContainsRune(valid, l.next()) { 145 | return true 146 | } 147 | l.backup() 148 | return false 149 | } 150 | 151 | // acceptRun will continue to advance the seeker until valid 152 | // can no longer be met. 153 | func (l *lexer) acceptRun(fn func(rune) bool) { 154 | for fn(l.next()) { 155 | } 156 | l.backup() 157 | } 158 | 159 | // acceptRunUntil is the inverse of acceptRun and will continue 160 | // to advance the seeker until the rune has been found. 161 | func (l *lexer) acceptRunUntil(until rune) bool { 162 | for { 163 | i := l.next() 164 | if i == until { 165 | l.pos-- 166 | return true 167 | } 168 | if i == 0 { 169 | return false // eof 170 | } 171 | } 172 | } 173 | 174 | // emit creates a new token and sends it to token channel for processing. 175 | func (l *lexer) emit(t tokenType) { 176 | token := token{line: l.lineno, text: l.input[l.start:l.pos], typ: t} 177 | 178 | if l.debug { 179 | fmt.Fprintf(os.Stderr, "%04d: (%-20v) %s\n", token.line, token.typ, token.text) 180 | } 181 | 182 | l.tokens <- token 183 | l.start = l.pos 184 | } 185 | 186 | // lexNext is state function for lexing lines 187 | func lexNext(l *lexer) stateFn { 188 | for { 189 | switch r := l.next(); { 190 | // known symbols: 191 | 192 | case r == ';': 193 | return lexComment 194 | 195 | case r == '@': 196 | l.ignore() 197 | return lexLabel 198 | 199 | case r == '$': 200 | l.ignore() 201 | return lexVariable 202 | 203 | case r == '"': 204 | return lexInsideString 205 | 206 | case r == '(': 207 | l.emit(openParen) 208 | return lexNext 209 | 210 | case r == ')': 211 | l.emit(closeParen) 212 | return lexNext 213 | 214 | case r == '{': 215 | l.emit(openBrace) 216 | return lexNext 217 | 218 | case r == '}': 219 | l.emit(closeBrace) 220 | return lexNext 221 | 222 | case r == ',': 223 | l.emit(comma) 224 | return lexNext 225 | 226 | case r == '#': 227 | return lexPreprocessor 228 | 229 | case r == '=': 230 | l.emit(equals) 231 | return lexNext 232 | 233 | // numbers and identifiers: 234 | 235 | case unicode.IsDigit(r): 236 | return lexNumber 237 | 238 | case r == '.' || isIdentBegin(r): 239 | return lexIdentifier 240 | 241 | // arithmetic: 242 | 243 | case r == '<': 244 | return lexLshift 245 | 246 | case r == '>': 247 | return lexRshift 248 | 249 | case r == '%': 250 | return lexPercent 251 | 252 | case arithChars[r] != 0: 253 | l.emit(arith) 254 | return lexNext 255 | 256 | // whitespace, etc. 257 | 258 | case r == '\n': 259 | l.emit(lineEnd) 260 | l.ignore() 261 | l.lineno++ 262 | l.emit(lineStart) 263 | 264 | case isSpace(r): 265 | l.ignore() 266 | 267 | case r == 0: 268 | return nil // eof 269 | 270 | default: 271 | l.emit(invalidToken) 272 | } 273 | } 274 | } 275 | 276 | // lexComment parses the current position until the end 277 | // of the line and discards the text. 278 | func lexComment(l *lexer) stateFn { 279 | l.acceptRunUntil('\n') 280 | l.ignore() 281 | return lexNext 282 | } 283 | 284 | // lexLabel parses a label reference. 285 | func lexLabel(l *lexer) stateFn { 286 | typ := labelRef 287 | if l.peek() == '.' { 288 | typ = dottedLabelRef 289 | l.next() // consume optional . 290 | l.ignore() 291 | } 292 | l.acceptRun(isIdent) 293 | l.emit(typ) 294 | return lexNext 295 | } 296 | 297 | func lexPercent(l *lexer) stateFn { 298 | r := l.peek() 299 | if isIdentBegin(r) { 300 | l.ignore() 301 | l.acceptRun(isIdent) 302 | l.emit(instMacroIdent) 303 | } else { 304 | l.emit(arith) 305 | } 306 | return lexNext 307 | } 308 | 309 | // lexInsideString lexes the inside of a string until 310 | // the state function finds the closing quote. 311 | // It returns the lex text state function. 312 | func lexInsideString(l *lexer) stateFn { 313 | // TODO: allow escaping quotes 314 | if l.acceptRunUntil('"') { 315 | l.start += 1 // remove beginning quote 316 | l.emit(stringLiteral) 317 | l.next() // consume " 318 | } 319 | return lexNext 320 | } 321 | 322 | func lexNumber(l *lexer) stateFn { 323 | acceptance := unicode.IsDigit 324 | if l.accept("xX") { 325 | acceptance = isHex 326 | } 327 | l.acceptRun(acceptance) 328 | l.emit(numberLiteral) 329 | return lexNext 330 | } 331 | 332 | func lexLshift(l *lexer) stateFn { 333 | if !l.accept("<") { 334 | l.emit(invalidToken) 335 | } else { 336 | l.emit(arith) 337 | } 338 | return lexNext 339 | } 340 | 341 | func lexRshift(l *lexer) stateFn { 342 | if !l.accept(">") { 343 | l.emit(invalidToken) 344 | } else { 345 | l.emit(arith) 346 | } 347 | return lexNext 348 | } 349 | 350 | func lexPreprocessor(l *lexer) stateFn { 351 | l.acceptRun(isIdent) 352 | l.emit(directive) 353 | return lexNext 354 | } 355 | 356 | func lexVariable(l *lexer) stateFn { 357 | l.acceptRun(isIdent) 358 | l.emit(variableIdentifier) 359 | return lexNext 360 | } 361 | 362 | func lexIdentifier(l *lexer) stateFn { 363 | firstIsDot := l.input[l.start] == '.' 364 | if firstIsDot { 365 | l.ignore() 366 | } 367 | l.acceptRun(isIdent) 368 | 369 | if l.peek() == ':' { 370 | if firstIsDot { 371 | l.emit(dottedLabel) 372 | } else { 373 | l.emit(label) 374 | } 375 | l.accept(":") 376 | l.ignore() 377 | } else { 378 | if firstIsDot { 379 | l.emit(dottedIdentifier) 380 | } else { 381 | l.emit(identifier) 382 | } 383 | } 384 | return lexNext 385 | } 386 | 387 | func isSpace(t rune) bool { 388 | return unicode.IsSpace(t) 389 | } 390 | 391 | func isHex(t rune) bool { 392 | return unicode.IsDigit(t) || (t >= 'a' && t <= 'f') || (t >= 'A' && t <= 'F') 393 | } 394 | 395 | func isIdentBegin(t rune) bool { 396 | return t == '_' || unicode.IsLetter(t) 397 | } 398 | 399 | func isIdent(t rune) bool { 400 | return t == '_' || unicode.IsLetter(t) || unicode.IsNumber(t) 401 | } 402 | -------------------------------------------------------------------------------- /internal/ast/lexer_test.go: -------------------------------------------------------------------------------- 1 | // Copyright 2023 The go-ethereum Authors 2 | // This file is part of the go-ethereum library. 3 | // 4 | // The go-ethereum library is free software: you can redistribute it and/or modify 5 | // it under the terms of the GNU Lesser General Public License as published by 6 | // the Free Software Foundation, either version 3 of the License, or 7 | // (at your option) any later version. 8 | // 9 | // The go-ethereum library is distributed in the hope that it will be useful, 10 | // but WITHOUT ANY WARRANTY; without even the implied warranty of 11 | // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 12 | // GNU Lesser General Public License for more details. 13 | // 14 | // You should have received a copy of the GNU Lesser General Public License 15 | // along with the go-ethereum library. If not, see . 16 | 17 | package ast 18 | 19 | import ( 20 | "reflect" 21 | "testing" 22 | ) 23 | 24 | func lexAll(src string) []token { 25 | ch := runLexer([]byte(src), false) 26 | 27 | var tokens []token 28 | for i := range ch { 29 | tokens = append(tokens, i) 30 | } 31 | return tokens 32 | } 33 | 34 | func TestLexer(t *testing.T) { 35 | tests := []struct { 36 | input string 37 | tokens []token 38 | }{ 39 | { 40 | input: ";; this is a comment", 41 | tokens: []token{{typ: lineStart, line: 1}, {typ: eof, line: 1}}, 42 | }, 43 | { 44 | input: "0x12345678", 45 | tokens: []token{{typ: lineStart, line: 1}, {typ: numberLiteral, text: "0x12345678", line: 1}, {typ: eof, line: 1}}, 46 | }, 47 | { 48 | input: "0x123ggg", 49 | tokens: []token{{typ: lineStart, line: 1}, {typ: numberLiteral, text: "0x123", line: 1}, {typ: identifier, text: "ggg", line: 1}, {typ: eof, line: 1}}, 50 | }, 51 | { 52 | input: "12345678", 53 | tokens: []token{{typ: lineStart, line: 1}, {typ: numberLiteral, text: "12345678", line: 1}, {typ: eof, line: 1}}, 54 | }, 55 | { 56 | input: "123abc", 57 | tokens: []token{{typ: lineStart, line: 1}, {typ: numberLiteral, text: "123", line: 1}, {typ: identifier, text: "abc", line: 1}, {typ: eof, line: 1}}, 58 | }, 59 | { 60 | input: "0123abc", 61 | tokens: []token{{typ: lineStart, line: 1}, {typ: numberLiteral, text: "0123", line: 1}, {typ: identifier, text: "abc", line: 1}, {typ: eof, line: 1}}, 62 | }, 63 | { 64 | input: "00123abc", 65 | tokens: []token{{typ: lineStart, line: 1}, {typ: numberLiteral, text: "00123", line: 1}, {typ: identifier, text: "abc", line: 1}, {typ: eof, line: 1}}, 66 | }, 67 | { 68 | input: "@foo", 69 | tokens: []token{{typ: lineStart, line: 1}, {typ: labelRef, text: "foo", line: 1}, {typ: eof, line: 1}}, 70 | }, 71 | { 72 | input: "@label123", 73 | tokens: []token{{typ: lineStart, line: 1}, {typ: labelRef, text: "label123", line: 1}, {typ: eof, line: 1}}, 74 | }, 75 | { 76 | input: "@.label .label: .ident", 77 | tokens: []token{{typ: lineStart, line: 1}, {typ: dottedLabelRef, text: "label", line: 1}, {typ: dottedLabel, text: "label", line: 1}, {typ: dottedIdentifier, text: "ident", line: 1}, {typ: eof, line: 1}}, 78 | }, 79 | // comment after label 80 | { 81 | input: "@label123 ;; comment", 82 | tokens: []token{{typ: lineStart, line: 1}, {typ: labelRef, text: "label123", line: 1}, {typ: eof, line: 1}}, 83 | }, 84 | // comment after instruction 85 | { 86 | input: "push 3 ;; comment\nadd", 87 | tokens: []token{{typ: lineStart, line: 1}, {typ: identifier, text: "push", line: 1}, {typ: numberLiteral, text: "3", line: 1}, {typ: lineEnd, text: "\n", line: 1}, {typ: lineStart, line: 2}, {typ: identifier, line: 2, text: "add"}, {typ: eof, line: 2}}, 88 | }, 89 | } 90 | 91 | for _, test := range tests { 92 | tokens := lexAll(test.input) 93 | if !reflect.DeepEqual(tokens, test.tokens) { 94 | t.Errorf("input %q\ngot: %+v\nwant: %+v", test.input, tokens, test.tokens) 95 | } 96 | } 97 | } 98 | -------------------------------------------------------------------------------- /internal/ast/names.go: -------------------------------------------------------------------------------- 1 | // Copyright 2024 The go-ethereum Authors 2 | // This file is part of the go-ethereum library. 3 | // 4 | // The go-ethereum library is free software: you can redistribute it and/or modify 5 | // it under the terms of the GNU Lesser General Public License as published by 6 | // the Free Software Foundation, either version 3 of the License, or 7 | // (at your option) any later version. 8 | // 9 | // The go-ethereum library is distributed in the hope that it will be useful, 10 | // but WITHOUT ANY WARRANTY; without even the implied warranty of 11 | // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 12 | // GNU Lesser General Public License for more details. 13 | // 14 | // You should have received a copy of the GNU Lesser General Public License 15 | // along with the go-ethereum library. If not, see . 16 | 17 | package ast 18 | 19 | import "unicode" 20 | 21 | // IsGlobal returns true when 'name' is a global identifier. 22 | func IsGlobal(name string) bool { 23 | return len(name) > 0 && unicode.IsUpper([]rune(name)[0]) 24 | } 25 | -------------------------------------------------------------------------------- /internal/ast/parse.go: -------------------------------------------------------------------------------- 1 | // Copyright 2024 The go-ethereum Authors 2 | // This file is part of the go-ethereum library. 3 | // 4 | // The go-ethereum library is free software: you can redistribute it and/or modify 5 | // it under the terms of the GNU Lesser General Public License as published by 6 | // the Free Software Foundation, either version 3 of the License, or 7 | // (at your option) any later version. 8 | // 9 | // The go-ethereum library is distributed in the hope that it will be useful, 10 | // but WITHOUT ANY WARRANTY; without even the implied warranty of 11 | // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 12 | // GNU Lesser General Public License for more details. 13 | // 14 | // You should have received a copy of the GNU Lesser General Public License 15 | // along with the go-ethereum library. If not, see . 16 | 17 | package ast 18 | 19 | import ( 20 | "fmt" 21 | "regexp" 22 | "strconv" 23 | ) 24 | 25 | // Parser performs parsing of the token stream. 26 | type Parser struct { 27 | in <-chan token 28 | buffer []token 29 | doc *Document 30 | errors []*ParseError 31 | } 32 | 33 | // NewParser creates a parser. 34 | func NewParser(file string, content []byte, debug bool) *Parser { 35 | return &Parser{ 36 | in: runLexer(content, debug), 37 | doc: newDocument(file, nil), 38 | } 39 | } 40 | 41 | func newDocument(file string, parent *Document) *Document { 42 | return &Document{ 43 | File: file, 44 | labels: make(map[string]*LabelDefSt), 45 | exprMacros: make(map[string]*ExpressionMacroDef), 46 | instrMacros: make(map[string]*InstructionMacroDef), 47 | Parent: parent, 48 | } 49 | } 50 | 51 | // next reads the next token from the lexer. 52 | func (p *Parser) next() token { 53 | if len(p.buffer) > 0 { 54 | t := p.buffer[len(p.buffer)-1] 55 | p.buffer = p.buffer[:len(p.buffer)-1] 56 | return t 57 | } 58 | t := <-p.in 59 | return t 60 | } 61 | 62 | // unread puts a token back into the queue for reading. 63 | func (p *Parser) unread(t token) { 64 | p.buffer = append(p.buffer, t) 65 | } 66 | 67 | // drainLexer runs the lexer to completion. 68 | func (p *Parser) drainLexer() { 69 | for p.next().typ != eof { 70 | } 71 | } 72 | 73 | // throwError adds a new error to the error list. 74 | // The parser is returned to the toplevel and will continue parsing 75 | // at the next line. 76 | func (p *Parser) throwError(tok token, format string, args ...any) { 77 | err := &ParseError{tok: tok, file: p.doc.File, err: fmt.Errorf(format, args...)} 78 | p.errors = append(p.errors, err) 79 | // resync to start of next line 80 | for { 81 | switch tok.typ { 82 | case lineEnd, eof: 83 | panic(err) 84 | } 85 | tok = p.next() 86 | } 87 | } 88 | 89 | // unexpected signals that an unexpected token occurred in the input. 90 | func (p *Parser) unexpected(tok token) { 91 | p.throwError(tok, "unexpected %s %s", tok.typ.String(), tok.text) 92 | } 93 | 94 | // Parse runs the parser, outputting a document. 95 | func (p *Parser) Parse() (*Document, []*ParseError) { 96 | defer p.drainLexer() 97 | for { 98 | if p.parseOne() { 99 | return p.doc, p.errors 100 | } 101 | } 102 | } 103 | 104 | func (p *Parser) parseOne() bool { 105 | defer func() { 106 | err := recover() 107 | if _, ok := err.(*ParseError); !ok && err != nil { 108 | panic(err) 109 | } 110 | }() 111 | return parseStatement(p) 112 | } 113 | 114 | // ParseExpression parses the input as a single expression. 115 | // This is used in evaluator tests. 116 | func (p *Parser) ParseExpression() (expr Expr, err error) { 117 | defer p.drainLexer() 118 | defer func() { 119 | e := recover() 120 | if pe, ok := e.(*ParseError); ok { 121 | err = pe 122 | } else if e != nil { 123 | panic(e) 124 | } 125 | }() 126 | 127 | // skip lineStart 128 | switch tok := p.next(); tok.typ { 129 | case lineStart: 130 | expr = parseExpr(p, p.next()) 131 | return expr, nil 132 | case lineEnd, eof: 133 | p.unexpected(tok) 134 | } 135 | return nil, nil 136 | } 137 | 138 | // atDocumentTop reports whether the parser is at the toplevel. 139 | // This returns false while parsing an instruction macro definition. 140 | func (p *Parser) atDocumentTop() bool { 141 | return p.doc.Parent == nil 142 | } 143 | 144 | // ------------- start parser functions ------------- 145 | 146 | func parseStatement(p *Parser) (done bool) { 147 | switch tok := p.next(); tok.typ { 148 | case eof, closeBrace: 149 | if p.atDocumentTop() != (tok.typ == eof) { 150 | p.unexpected(tok) 151 | } 152 | return true 153 | case label, dottedLabel: 154 | parseLabelDef(p, tok) 155 | case directive: 156 | parseDirective(p, tok) 157 | case identifier: 158 | parseInstruction(p, tok) 159 | case instMacroIdent: 160 | parseInstructionMacroCall(p, tok) 161 | case lineStart, lineEnd: 162 | return false 163 | default: 164 | p.unexpected(tok) 165 | } 166 | return false 167 | } 168 | 169 | func parseLabelDef(p *Parser, tok token) { 170 | name := tok.text 171 | li := &LabelDefSt{ 172 | tok: tok, 173 | Src: p.doc, 174 | Dotted: tok.typ == dottedLabel, 175 | Global: IsGlobal(name), 176 | } 177 | p.doc.Statements = append(p.doc.Statements, li) 178 | if firstDef, ok := p.doc.labels[name]; ok { 179 | p.throwError(tok, "%w", ErrLabelAlreadyDef(firstDef, li)) 180 | return 181 | } 182 | p.doc.labels[name] = li 183 | } 184 | 185 | func parseDirective(p *Parser, tok token) { 186 | switch tok.text { 187 | case "#define": 188 | if !p.atDocumentTop() { 189 | p.throwError(tok, "nested macro definitions are not allowed") 190 | } 191 | parseMacroDef(p) 192 | case "#include": 193 | parseInclude(p, tok) 194 | case "#assemble": 195 | parseAssemble(p, tok) 196 | case "#pragma": 197 | parsePragma(p, tok) 198 | case "#bytes": 199 | parseBytes(p, tok) 200 | default: 201 | p.throwError(tok, "unknown compiler directive %q", tok.text) 202 | } 203 | } 204 | 205 | func parseMacroDef(p *Parser) { 206 | name := p.next() 207 | switch name.typ { 208 | case dottedIdentifier: 209 | p.throwError(name, "attempt to redefine builtin macro .%s", name.text) 210 | case instMacroIdent: 211 | parseInstructionMacroDef(p, name) 212 | return 213 | case identifier: 214 | default: 215 | p.unexpected(name) 216 | } 217 | 218 | // Parse parameters and body. 219 | var ( 220 | pos = Position{File: p.doc.File, Line: name.line} 221 | def = &ExpressionMacroDef{Name: name.text, pos: pos} 222 | bodyTok token 223 | didParams bool 224 | legacySyntax bool 225 | ) 226 | loop: 227 | for { 228 | switch tok := p.next(); tok.typ { 229 | case lineEnd, eof: 230 | p.throwError(tok, "incomplete macro definition") 231 | 232 | case openBrace: 233 | p.throwError(tok, "unexpected { in expression macro definition") 234 | 235 | case openParen: 236 | if didParams { 237 | bodyTok, legacySyntax = tok, true 238 | break loop 239 | } else { 240 | def.Params = parseParameterList(p) 241 | didParams = true 242 | } 243 | 244 | case equals: 245 | bodyTok = p.next() 246 | break loop 247 | 248 | default: 249 | bodyTok, legacySyntax = tok, true 250 | break loop 251 | } 252 | } 253 | 254 | if legacySyntax { 255 | p.errors = append(p.errors, &ParseError{ 256 | tok: bodyTok, 257 | file: p.doc.File, 258 | err: fmt.Errorf("legacy definition syntax, missing '=' before expression"), 259 | warning: true, 260 | }) 261 | } 262 | def.Body = parseExpr(p, bodyTok) 263 | 264 | // Register the macro. 265 | checkDuplicateMacro(p, name) 266 | p.doc.exprMacros[name.text] = def 267 | } 268 | 269 | func parseInstructionMacroDef(p *Parser, nameTok token) { 270 | var params []string 271 | var didParams bool 272 | paramLoop: 273 | for { 274 | switch tok := p.next(); tok.typ { 275 | case lineEnd, eof: 276 | p.throwError(tok, "incomplete macro definition") 277 | case openBrace: 278 | break paramLoop // start of body 279 | case openParen: 280 | if !didParams { 281 | params = parseParameterList(p) 282 | didParams = true 283 | continue paramLoop 284 | } 285 | default: 286 | p.unexpected(tok) 287 | } 288 | } 289 | 290 | // Set definition context in parser. 291 | topdoc := p.doc 292 | doc := newDocument(p.doc.File, p.doc) 293 | p.doc = doc 294 | defer func() { p.doc = topdoc }() 295 | 296 | // Parse macro body. 297 | for !parseStatement(p) { 298 | } 299 | 300 | // Register definition. 301 | checkDuplicateMacro(p, nameTok) 302 | pos := Position{File: p.doc.File, Line: nameTok.line} 303 | def := &InstructionMacroDef{Name: nameTok.text, pos: pos, Params: params, Body: doc} 304 | doc.Creation = def 305 | topdoc.instrMacros[nameTok.text] = def 306 | } 307 | 308 | func checkDuplicateMacro(p *Parser, nameTok token) { 309 | name := nameTok.text 310 | if _, ok := p.doc.instrMacros[name]; ok { 311 | p.throwError(nameTok, "instruction macro %s already defined", name) 312 | } 313 | if _, ok := p.doc.exprMacros[name]; ok { 314 | p.throwError(nameTok, "expression macro %s already defined", name) 315 | } 316 | } 317 | 318 | func parseInclude(p *Parser, d token) { 319 | instr := &IncludeSt{Src: p.doc, tok: d} 320 | switch tok := p.next(); tok.typ { 321 | case stringLiteral: 322 | instr.Filename = tok.text 323 | p.doc.Statements = append(p.doc.Statements, instr) 324 | default: 325 | p.throwError(tok, "expected filename following #include") 326 | } 327 | } 328 | 329 | func parseAssemble(p *Parser, d token) { 330 | instr := &AssembleSt{Src: p.doc, tok: d} 331 | switch tok := p.next(); tok.typ { 332 | case stringLiteral: 333 | instr.Filename = tok.text 334 | p.doc.Statements = append(p.doc.Statements, instr) 335 | default: 336 | p.throwError(tok, "expected filename following #assemble") 337 | } 338 | } 339 | 340 | func parsePragma(p *Parser, d token) { 341 | instr := &PragmaSt{pos: Position{p.doc.File, d.line}} 342 | switch tok := p.next(); tok.typ { 343 | case identifier: 344 | instr.Option = tok.text 345 | switch v := p.next(); v.typ { 346 | case stringLiteral, numberLiteral: 347 | instr.Value = v.text 348 | case equals: 349 | p.throwError(tok, "unexpected = after #pragma %s", instr.Option) 350 | default: 351 | p.throwError(tok, "#pragma option value must be string or number literal") 352 | } 353 | p.doc.Statements = append(p.doc.Statements, instr) 354 | default: 355 | p.throwError(tok, "expected option name following #pragma") 356 | } 357 | } 358 | 359 | func parseBytes(p *Parser, d token) { 360 | instr := &BytesSt{pos: Position{p.doc.File, d.line}} 361 | switch tok := p.next(); tok.typ { 362 | case lineEnd, eof: 363 | p.throwError(d, "expected expression following #bytes") 364 | default: 365 | instr.Value = parseExpr(p, tok) 366 | p.doc.Statements = append(p.doc.Statements, instr) 367 | } 368 | } 369 | 370 | func parseInstruction(p *Parser, tok token) { 371 | opcode := &OpcodeSt{Op: tok.text, Src: p.doc, tok: tok} 372 | size, isPush := parsePushSize(tok.text) 373 | if isPush { 374 | opcode.PushSize = byte(size + 1) 375 | } 376 | 377 | // Register in document. 378 | p.doc.Statements = append(p.doc.Statements, opcode) 379 | 380 | // Parse optional argument. 381 | argToken := p.next() 382 | switch argToken.typ { 383 | case lineEnd, eof: 384 | return 385 | default: 386 | opcode.Arg = parseExpr(p, argToken) 387 | } 388 | } 389 | 390 | var sizedPushRE = regexp.MustCompile("(?i)^PUSH([0-9]*)$") 391 | 392 | func parsePushSize(name string) (int, bool) { 393 | m := sizedPushRE.FindStringSubmatch(name) 394 | if len(m) == 0 { 395 | return 0, false 396 | } 397 | if len(m[1]) > 0 { 398 | sz, _ := strconv.Atoi(m[1]) 399 | return sz, true 400 | } 401 | return -1, true 402 | } 403 | 404 | func parseInstructionMacroCall(p *Parser, nameTok token) { 405 | call := &MacroCallSt{Src: p.doc, Ident: nameTok.text, tok: nameTok} 406 | p.doc.Statements = append(p.doc.Statements, call) 407 | 408 | switch tok := p.next(); tok.typ { 409 | case lineEnd, eof: 410 | return 411 | case openParen: 412 | call.Args = parseCallArguments(p) 413 | default: 414 | p.unexpected(tok) 415 | } 416 | } 417 | 418 | // parseExpr parses an expression. 419 | func parseExpr(p *Parser, tok token) Expr { 420 | left := parsePrimaryExpr(p, tok) 421 | return parseArith(p, left, p.next(), 0) 422 | } 423 | 424 | // parseArith parses an arithmetic expression. 425 | func parseArith(p *Parser, left Expr, tok token, minPrecedence int) Expr { 426 | for ; ; tok = p.next() { 427 | // Check for (another) arithmetic op. 428 | var op ArithOp 429 | switch tok.typ { 430 | case arith: 431 | op = tokenArithOp(tok) 432 | if precedence[op] < minPrecedence { 433 | p.unread(tok) 434 | return left 435 | } 436 | default: 437 | // End of binary expression. 438 | p.unread(tok) 439 | return left 440 | } 441 | 442 | // Parse right operand. 443 | var right Expr 444 | switch tok = p.next(); tok.typ { 445 | case comma, closeParen, closeBrace, lineEnd, eof: 446 | p.throwError(tok, "expected right operand in arithmetic expression") 447 | default: 448 | right = parsePrimaryExpr(p, tok) 449 | } 450 | 451 | // Check for next op of higher precedence. 452 | right = parseArithInner(p, right, precedence[op]) 453 | 454 | // Combine into binary expression. 455 | left = &ArithExpr{Op: op, Left: left, Right: right} 456 | } 457 | } 458 | 459 | func parseArithInner(p *Parser, right Expr, curPrecedence int) Expr { 460 | for { 461 | switch tok := p.next(); tok.typ { 462 | case arith: 463 | nextop := tokenArithOp(tok) 464 | if precedence[nextop] <= curPrecedence { 465 | p.unread(tok) 466 | return right 467 | } 468 | right = parseArith(p, right, tok, curPrecedence+1) 469 | 470 | default: 471 | p.unread(tok) 472 | return right 473 | } 474 | } 475 | } 476 | 477 | func parsePrimaryExpr(p *Parser, tok token) Expr { 478 | switch tok.typ { 479 | case identifier, dottedIdentifier: 480 | call := &MacroCallExpr{Ident: tok.text, Builtin: tok.typ == dottedIdentifier} 481 | switch tok := p.next(); tok.typ { 482 | case openParen: 483 | call.Args = parseCallArguments(p) 484 | default: 485 | p.unread(tok) 486 | } 487 | return call 488 | 489 | case variableIdentifier: 490 | return &VariableExpr{Ident: tok.text} 491 | 492 | case labelRef, dottedLabelRef: 493 | return &LabelRefExpr{ 494 | Ident: tok.text, 495 | Dotted: tok.typ == dottedLabelRef, 496 | Global: IsGlobal(tok.text), 497 | } 498 | 499 | case numberLiteral, stringLiteral: 500 | return &LiteralExpr{tok: tok} 501 | 502 | case openParen: 503 | return parseParenExpr(p) 504 | 505 | default: 506 | p.unexpected(tok) 507 | return nil 508 | } 509 | } 510 | 511 | func parseParenExpr(p *Parser) Expr { 512 | var expr Expr 513 | switch tok := p.next(); tok.typ { 514 | case closeParen: 515 | p.throwError(tok, "empty parenthesized expression") 516 | return nil 517 | default: 518 | expr = parseExpr(p, tok) 519 | } 520 | // Ensure closing paren is there. 521 | for { 522 | switch tok := p.next(); tok.typ { 523 | case closeParen: 524 | return expr 525 | case lineStart, lineEnd: 526 | continue 527 | default: 528 | p.unexpected(tok) 529 | } 530 | } 531 | } 532 | 533 | // parseParameterList parses a comma-separated list of names. 534 | func parseParameterList(p *Parser) (names []string) { 535 | for { 536 | tok := p.next() 537 | switch tok.typ { 538 | case closeParen: 539 | return names 540 | case identifier: 541 | names = append(names, tok.text) 542 | default: 543 | p.unexpected(tok) 544 | } 545 | if parseListEnd(p) { 546 | return names 547 | } 548 | } 549 | } 550 | 551 | // parseCallArguments parses the argument list of a macro call. 552 | func parseCallArguments(p *Parser) (args []Expr) { 553 | for { 554 | tok := p.next() 555 | switch tok.typ { 556 | case closeParen: 557 | return args 558 | default: 559 | if arg := parseExpr(p, tok); arg != nil { 560 | args = append(args, arg) 561 | } 562 | } 563 | if parseListEnd(p) { 564 | return args 565 | } 566 | } 567 | } 568 | 569 | func parseListEnd(p *Parser) bool { 570 | for { 571 | tok := p.next() 572 | switch tok.typ { 573 | case comma: 574 | return false 575 | case lineStart, lineEnd: 576 | continue 577 | case closeParen: 578 | return true 579 | default: 580 | p.unexpected(tok) 581 | } 582 | } 583 | } 584 | -------------------------------------------------------------------------------- /internal/ast/tokentype_string.go: -------------------------------------------------------------------------------- 1 | // Code generated by "stringer -linecomment -type tokenType"; DO NOT EDIT. 2 | 3 | package ast 4 | 5 | import "strconv" 6 | 7 | func _() { 8 | // An "invalid array index" compiler error signifies that the constant values have changed. 9 | // Re-run the stringer command to generate them again. 10 | var x [1]struct{} 11 | _ = x[eof-0] 12 | _ = x[lineStart-1] 13 | _ = x[lineEnd-2] 14 | _ = x[invalidToken-3] 15 | _ = x[identifier-4] 16 | _ = x[dottedIdentifier-5] 17 | _ = x[variableIdentifier-6] 18 | _ = x[labelRef-7] 19 | _ = x[dottedLabelRef-8] 20 | _ = x[label-9] 21 | _ = x[dottedLabel-10] 22 | _ = x[numberLiteral-11] 23 | _ = x[stringLiteral-12] 24 | _ = x[openParen-13] 25 | _ = x[closeParen-14] 26 | _ = x[comma-15] 27 | _ = x[directive-16] 28 | _ = x[instMacroIdent-17] 29 | _ = x[openBrace-18] 30 | _ = x[closeBrace-19] 31 | _ = x[equals-20] 32 | _ = x[arith-21] 33 | } 34 | 35 | const _tokenType_name = "end of filebeginning of lineend of lineinvalid characteridentifierdotted identifierparameter referencelabel referencedotted label referencelabel definitiondotted label definitionnumber literalstring literalopen parenthesisclose parenthesiscommadirectivemacro identifieropen braceclosing braceequals signarithmetic operation" 36 | 37 | var _tokenType_index = [...]uint16{0, 11, 28, 39, 56, 66, 83, 102, 117, 139, 155, 178, 192, 206, 222, 239, 244, 253, 269, 279, 292, 303, 323} 38 | 39 | func (i tokenType) String() string { 40 | if i >= tokenType(len(_tokenType_index)-1) { 41 | return "tokenType(" + strconv.FormatInt(int64(i), 10) + ")" 42 | } 43 | return _tokenType_name[_tokenType_index[i]:_tokenType_index[i+1]] 44 | } 45 | -------------------------------------------------------------------------------- /internal/evm/forkdefs.go: -------------------------------------------------------------------------------- 1 | package evm 2 | 3 | var LatestFork = "cancun" 4 | 5 | var forkReg = map[string]*InstructionSetDef{ 6 | "frontier": { 7 | Names: []string{"frontier"}, 8 | Added: []*Op{ 9 | opm["STOP"], 10 | opm["ADD"], 11 | opm["MUL"], 12 | opm["SUB"], 13 | opm["DIV"], 14 | opm["SDIV"], 15 | opm["MOD"], 16 | opm["SMOD"], 17 | opm["ADDMOD"], 18 | opm["MULMOD"], 19 | opm["EXP"], 20 | opm["SIGNEXTEND"], 21 | opm["LT"], 22 | opm["GT"], 23 | opm["SLT"], 24 | opm["SGT"], 25 | opm["EQ"], 26 | opm["ISZERO"], 27 | opm["AND"], 28 | opm["XOR"], 29 | opm["OR"], 30 | opm["NOT"], 31 | opm["BYTE"], 32 | opm["KECCAK256"], 33 | opm["ADDRESS"], 34 | opm["BALANCE"], 35 | opm["ORIGIN"], 36 | opm["CALLER"], 37 | opm["CALLVALUE"], 38 | opm["CALLDATALOAD"], 39 | opm["CALLDATASIZE"], 40 | opm["CALLDATACOPY"], 41 | opm["CODESIZE"], 42 | opm["CODECOPY"], 43 | opm["GASPRICE"], 44 | opm["EXTCODESIZE"], 45 | opm["EXTCODECOPY"], 46 | opm["BLOCKHASH"], 47 | opm["COINBASE"], 48 | opm["TIMESTAMP"], 49 | opm["NUMBER"], 50 | opm["DIFFICULTY"], 51 | opm["GASLIMIT"], 52 | opm["POP"], 53 | opm["MLOAD"], 54 | opm["MSTORE"], 55 | opm["MSTORE8"], 56 | opm["SLOAD"], 57 | opm["SSTORE"], 58 | opm["JUMP"], 59 | opm["JUMPI"], 60 | opm["PC"], 61 | opm["MSIZE"], 62 | opm["GAS"], 63 | opm["JUMPDEST"], 64 | opm["PUSH1"], 65 | opm["PUSH2"], 66 | opm["PUSH3"], 67 | opm["PUSH4"], 68 | opm["PUSH5"], 69 | opm["PUSH6"], 70 | opm["PUSH7"], 71 | opm["PUSH8"], 72 | opm["PUSH9"], 73 | opm["PUSH10"], 74 | opm["PUSH11"], 75 | opm["PUSH12"], 76 | opm["PUSH13"], 77 | opm["PUSH14"], 78 | opm["PUSH15"], 79 | opm["PUSH16"], 80 | opm["PUSH17"], 81 | opm["PUSH18"], 82 | opm["PUSH19"], 83 | opm["PUSH20"], 84 | opm["PUSH21"], 85 | opm["PUSH22"], 86 | opm["PUSH23"], 87 | opm["PUSH24"], 88 | opm["PUSH25"], 89 | opm["PUSH26"], 90 | opm["PUSH27"], 91 | opm["PUSH28"], 92 | opm["PUSH29"], 93 | opm["PUSH30"], 94 | opm["PUSH31"], 95 | opm["PUSH32"], 96 | opm["DUP1"], 97 | opm["DUP2"], 98 | opm["DUP3"], 99 | opm["DUP4"], 100 | opm["DUP5"], 101 | opm["DUP6"], 102 | opm["DUP7"], 103 | opm["DUP8"], 104 | opm["DUP9"], 105 | opm["DUP10"], 106 | opm["DUP11"], 107 | opm["DUP12"], 108 | opm["DUP13"], 109 | opm["DUP14"], 110 | opm["DUP15"], 111 | opm["DUP16"], 112 | opm["SWAP1"], 113 | opm["SWAP2"], 114 | opm["SWAP3"], 115 | opm["SWAP4"], 116 | opm["SWAP5"], 117 | opm["SWAP6"], 118 | opm["SWAP7"], 119 | opm["SWAP8"], 120 | opm["SWAP9"], 121 | opm["SWAP10"], 122 | opm["SWAP11"], 123 | opm["SWAP12"], 124 | opm["SWAP13"], 125 | opm["SWAP14"], 126 | opm["SWAP15"], 127 | opm["SWAP16"], 128 | opm["LOG0"], 129 | opm["LOG1"], 130 | opm["LOG2"], 131 | opm["LOG3"], 132 | opm["LOG4"], 133 | opm["CREATE"], 134 | opm["CALL"], 135 | opm["CALLCODE"], 136 | opm["RETURN"], 137 | opm["SELFDESTRUCT"], 138 | }, 139 | }, 140 | 141 | "homestead": { 142 | Names: []string{"homestead"}, 143 | Parent: "frontier", 144 | Added: []*Op{ 145 | opm["DELEGATECALL"], 146 | }, 147 | }, 148 | 149 | "tangerinewhistle": { 150 | Names: []string{"tangerinewhistle", "eip150"}, 151 | Parent: "homestead", 152 | }, 153 | 154 | "spuriousdragon": { 155 | Names: []string{"spuriousdragon", "eip158"}, 156 | Parent: "tangerinewhistle", 157 | }, 158 | 159 | "byzantium": { 160 | Names: []string{"byzantium"}, 161 | Parent: "spuriousdragon", 162 | Added: []*Op{ 163 | opm["STATICCALL"], 164 | opm["RETURNDATASIZE"], 165 | opm["RETURNDATACOPY"], 166 | opm["REVERT"], 167 | }, 168 | }, 169 | 170 | "petersburg": { 171 | Names: []string{"petersburg"}, 172 | Parent: "byzantium", 173 | }, 174 | 175 | "constantinople": { 176 | Names: []string{"constantinople"}, 177 | Parent: "petersburg", 178 | Added: []*Op{ 179 | opm["SHL"], 180 | opm["SHR"], 181 | opm["SAR"], 182 | opm["EXTCODEHASH"], 183 | opm["CREATE2"], 184 | }, 185 | }, 186 | 187 | "istanbul": { 188 | Names: []string{"istanbul"}, 189 | Parent: "constantinople", 190 | Added: []*Op{ 191 | opm["CHAINID"], 192 | opm["SELFBALANCE"], 193 | }, 194 | }, 195 | 196 | "berlin": { 197 | Names: []string{"berlin"}, 198 | Parent: "istanbul", 199 | }, 200 | 201 | "london": { 202 | Names: []string{"london"}, 203 | Parent: "berlin", 204 | Added: []*Op{ 205 | opm["BASEFEE"], 206 | }, 207 | }, 208 | 209 | "paris": { 210 | Names: []string{"paris", "merge"}, 211 | Parent: "istanbul", 212 | Added: []*Op{ 213 | opm["RANDOM"], 214 | }, 215 | Removed: []*Op{ 216 | opm["DIFFICULTY"], 217 | }, 218 | }, 219 | 220 | "shanghai": { 221 | Names: []string{"shanghai"}, 222 | Parent: "paris", 223 | Added: []*Op{ 224 | opm["PUSH0"], 225 | }, 226 | }, 227 | 228 | "cancun": { 229 | Names: []string{"cancun"}, 230 | Parent: "shanghai", 231 | Added: []*Op{ 232 | opm["BLOBHASH"], 233 | opm["TSTORE"], 234 | opm["TLOAD"], 235 | opm["MCOPY"], 236 | opm["SENDALL"], 237 | }, 238 | Removed: []*Op{ 239 | opm["SELFDESTRUCT"], 240 | }, 241 | }, 242 | 243 | "prague": { 244 | Names: []string{"prague"}, 245 | Parent: "cancun", 246 | }, 247 | } 248 | -------------------------------------------------------------------------------- /internal/evm/instruction_set.go: -------------------------------------------------------------------------------- 1 | // Copyright 2014 The go-ethereum Authors 2 | // This file is part of the go-ethereum library. 3 | // 4 | // The go-ethereum library is free software: you can redistribute it and/or modify 5 | // it under the terms of the GNU Lesser General Public License as published by 6 | // the Free Software Foundation, either version 3 of the License, or 7 | // (at your option) any later version. 8 | // 9 | // The go-ethereum library is distributed in the hope that it will be useful, 10 | // but WITHOUT ANY WARRANTY; without even the implied warranty of 11 | // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 12 | // GNU Lesser General Public License for more details. 13 | // 14 | // You should have received a copy of the GNU Lesser General Public License 15 | // along with the go-ethereum library. If not, see . 16 | 17 | package evm 18 | 19 | import ( 20 | "fmt" 21 | "slices" 22 | "sort" 23 | "strconv" 24 | "strings" 25 | 26 | "github.com/fjl/geas/internal/set" 27 | ) 28 | 29 | // InstructionSetDef is the definition of an EVM instruction set. 30 | type InstructionSetDef struct { 31 | Names []string // all names of this instruction set 32 | Parent string 33 | Added []*Op 34 | Removed []*Op 35 | } 36 | 37 | // Name returns the canonical name. 38 | func (def *InstructionSetDef) Name() string { 39 | return def.Names[0] 40 | } 41 | 42 | // InstructionSet is an EVM instruction set. 43 | type InstructionSet struct { 44 | name string 45 | byName map[string]*Op 46 | byCode map[byte]*Op 47 | opRemoved map[string]string // forks where op was last removed 48 | } 49 | 50 | // FindInstructionSet resolves a fork name to a set of opcodes. 51 | func FindInstructionSet(name string) *InstructionSet { 52 | name = strings.ToLower(name) 53 | var def *InstructionSetDef 54 | if def = forkReg[name]; def == nil { 55 | // Might be non-canonical name. 56 | for _, entry := range forkReg { 57 | if slices.Contains(entry.Names, name) { 58 | def = entry 59 | break 60 | } 61 | } 62 | } 63 | if def == nil { 64 | return nil 65 | } 66 | is := &InstructionSet{ 67 | name: def.Name(), 68 | byName: make(map[string]*Op), 69 | byCode: make(map[byte]*Op), 70 | opRemoved: make(map[string]string), 71 | } 72 | if err := is.resolveDefs(def); err != nil { 73 | panic(err) 74 | } 75 | return is 76 | } 77 | 78 | // Name returns the canonical instruction set name. 79 | func (is *InstructionSet) Name() string { 80 | return is.name 81 | } 82 | 83 | // SupportsPush0 reports whether the instruction set includes the PUSH0 instruction. 84 | func (is *InstructionSet) SupportsPush0() bool { 85 | return is.byName["PUSH0"] != nil 86 | } 87 | 88 | // OpByName resolves an opcode by its name. 89 | // Name has to be all uppercase. 90 | func (is *InstructionSet) OpByName(opname string) *Op { 91 | return is.byName[opname] 92 | } 93 | 94 | // PushBySize resolves a push op by its size. 95 | func (is *InstructionSet) PushBySize(size int) *Op { 96 | buf := []byte{'P', 'U', 'S', 'H', 0, 0} 97 | name := strconv.AppendInt(buf[:4], int64(size), 10) 98 | return is.byName[string(name)] 99 | } 100 | 101 | // OpByCode resolves an opcode by its code. 102 | func (is *InstructionSet) OpByCode(code byte) *Op { 103 | return is.byCode[code] 104 | } 105 | 106 | // AllOps returns all operations. 107 | func (is *InstructionSet) AllOps() []*Op { 108 | ops := make([]*Op, 0, len(is.byName)) 109 | for _, op := range is.byName { 110 | ops = append(ops, op) 111 | } 112 | slices.SortFunc(ops, func(a, b *Op) int { return strings.Compare(a.Name, b.Name) }) 113 | return ops 114 | } 115 | 116 | // Parents returns the parent fork chain of the instruction set. 117 | func (is *InstructionSet) Parents() []string { 118 | var chain []string 119 | f := forkReg[is.name] 120 | for f.Parent != "" { 121 | f = forkReg[f.Parent] 122 | chain = append(chain, f.Name()) 123 | } 124 | return chain 125 | } 126 | 127 | // ForkWhereOpRemoved returns the fork where a given op was removed from the instruction 128 | // set. This is intended to be called when op is known to not exist. Note this will return 129 | // an empty string in several cases: 130 | // 131 | // - op is invalid 132 | // - op is valid, but does not appear in lineage of instruction set 133 | // - op is valid and exists in instruction set 134 | func (is *InstructionSet) ForkWhereOpRemoved(op string) string { 135 | return is.opRemoved[op] 136 | } 137 | 138 | // lineage computes the definition chain of an instruction set. 139 | func (def *InstructionSetDef) lineage() ([]*InstructionSetDef, error) { 140 | var visited = make(set.Set[*InstructionSetDef]) 141 | var lin []*InstructionSetDef 142 | for { 143 | if visited.Includes(def) { 144 | return nil, fmt.Errorf("instruction set parent cycle: %s <- %s", lin[len(lin)-1].Name(), def.Name()) 145 | } 146 | visited.Add(def) 147 | lin = append(lin, def) 148 | 149 | if def.Parent == "" { 150 | break 151 | } 152 | parent, ok := forkReg[def.Parent] 153 | if !ok { 154 | return nil, fmt.Errorf("instruction set %s has unknown parent %s", def.Name(), def.Parent) 155 | } 156 | def = parent 157 | } 158 | slices.Reverse(lin) 159 | return lin, nil 160 | } 161 | 162 | // resolveDefs computes the full opcode set of a fork from its lineage. 163 | func (is *InstructionSet) resolveDefs(toplevel *InstructionSetDef) error { 164 | lineage, err := toplevel.lineage() 165 | if err != nil { 166 | return err 167 | } 168 | 169 | for _, def := range lineage { 170 | for _, op := range def.Removed { 171 | if _, ok := is.byName[op.Name]; !ok { 172 | return fmt.Errorf("removed op %s does not exist in fork %s", op.Name, def.Name()) 173 | } 174 | if _, ok := is.byCode[op.Code]; !ok { 175 | return fmt.Errorf("removed opcode %d (%s) does not exist in fork %s", op.Code, op.Name, def.Name()) 176 | } 177 | delete(is.byName, op.Name) 178 | delete(is.byCode, op.Code) 179 | is.opRemoved[op.Name] = def.Name() 180 | } 181 | for _, op := range def.Added { 182 | _, nameDefined := is.byName[op.Name] 183 | if nameDefined { 184 | return fmt.Errorf("instruction %s added multiple times", op.Name) 185 | } 186 | is.byName[op.Name] = op 187 | _, codeDefined := is.byCode[op.Code] 188 | if codeDefined { 189 | return fmt.Errorf("opcode %v added multiple times (adding %s, existing def %s)", op.Code, op.Name, is.byCode[op.Code].Name) 190 | } 191 | is.byCode[op.Code] = op 192 | delete(is.opRemoved, op.Name) 193 | } 194 | } 195 | return nil 196 | } 197 | 198 | // opAddedInForkMap contains all ops and the forks they were added in. 199 | var opAddedInForkMap = computeOpAddedInFork() 200 | 201 | func computeOpAddedInFork() map[string][]string { 202 | m := make(map[string][]string) 203 | for _, def := range forkReg { 204 | for _, op := range def.Added { 205 | m[op.Name] = append(m[op.Name], def.Name()) 206 | } 207 | } 208 | return m 209 | } 210 | 211 | // ForksWhereOpAdded returns the fork names where a given op is added. 212 | // If this returns nil, op is invalid. 213 | func ForksWhereOpAdded(op string) []string { 214 | return opAddedInForkMap[op] 215 | } 216 | 217 | func AllForks() (names []string) { 218 | for _, def := range forkReg { 219 | names = append(names, def.Names...) 220 | } 221 | sort.Strings(names) 222 | return names 223 | } 224 | -------------------------------------------------------------------------------- /internal/evm/instruction_set_test.go: -------------------------------------------------------------------------------- 1 | package evm 2 | 3 | import ( 4 | "maps" 5 | "slices" 6 | "strings" 7 | "testing" 8 | 9 | "github.com/fjl/geas/internal/set" 10 | ) 11 | 12 | func TestOps(t *testing.T) { 13 | // Check op all names are uppercase. 14 | for _, op := range oplist { 15 | if op.Name != strings.ToUpper(op.Name) { 16 | t.Fatalf("op %s name is not all-uppercase", op.Name) 17 | } 18 | } 19 | 20 | // Check all ops are used in a fork. 21 | // First compute set of used op names. 22 | defnames := slices.Sorted(maps.Keys(forkReg)) 23 | used := make(set.Set[string], len(oplist)) 24 | for _, name := range defnames { 25 | for _, op := range forkReg[name].Added { 26 | used.Add(op.Name) 27 | } 28 | } 29 | usedopnames := used.Members() 30 | slices.Sort(usedopnames) 31 | // Now compute sorted list of all ops. 32 | allopnames := make([]string, len(oplist)) 33 | for i, op := range oplist { 34 | allopnames[i] = op.Name 35 | } 36 | slices.Sort(allopnames) 37 | // Compare. 38 | d := set.Diff(allopnames, usedopnames) 39 | if len(d) > 0 { 40 | t.Error("unused ops:", d) 41 | } 42 | if len(usedopnames) > len(allopnames) { 43 | t.Error("forkdefs uses ops which are not in oplist") 44 | } 45 | } 46 | 47 | func TestForkDefs(t *testing.T) { 48 | defnames := slices.Sorted(maps.Keys(forkReg)) 49 | 50 | // Check canon name is listed first in def.Names. 51 | for _, name := range defnames { 52 | def := forkReg[name] 53 | if len(def.Names) == 0 { 54 | t.Fatalf("instruction set %q has no Names", name) 55 | } 56 | if def.Names[0] != name { 57 | t.Fatalf("canon name of instruction set %q not listed first in def.Names", name) 58 | } 59 | } 60 | 61 | // Check lineage works. 62 | for _, name := range defnames { 63 | def := forkReg[name] 64 | _, err := def.lineage() 65 | if err != nil { 66 | t.Errorf("problem in lineage() of %q: %v", name, err) 67 | } 68 | } 69 | } 70 | 71 | // In this test, we just check for a few known ops. 72 | func TestForkOps(t *testing.T) { 73 | is := FindInstructionSet("cancun") 74 | 75 | { 76 | op := is.OpByName("ADD") 77 | if op.Name != "ADD" { 78 | t.Fatal("wrong op name:", op.Name) 79 | } 80 | if op.Code != 0x01 { 81 | t.Fatal("wrong op code:", op.Code) 82 | } 83 | if op2 := is.OpByCode(0x01); op2 != op { 84 | t.Fatal("reverse lookup returned incorrect op", op2) 85 | } 86 | } 87 | { 88 | op := is.OpByName("SHR") 89 | if op.Name != "SHR" { 90 | t.Fatal("wrong op name:", op.Name) 91 | } 92 | if op.Code != 0x1c { 93 | t.Fatal("wrong op code:", op.Code) 94 | } 95 | if op2 := is.OpByCode(0x1c); op2 != op { 96 | t.Fatal("reverse lookup returned incorrect op", op2) 97 | } 98 | } 99 | { 100 | op := is.OpByName("RANDOM") 101 | if op.Name != "RANDOM" { 102 | t.Fatal("wrong op name:", op.Name) 103 | } 104 | if op.Code != 0x44 { 105 | t.Fatal("wrong op code:", op.Code) 106 | } 107 | if op2 := is.OpByCode(0x44); op2 != op { 108 | t.Fatal("reverse lookup returned incorrect op", op2) 109 | } 110 | } 111 | { 112 | op := is.OpByName("DIFFICULTY") 113 | if op != nil { 114 | t.Fatal("DIFFICULTY op found even though it was removed") 115 | } 116 | rf := is.ForkWhereOpRemoved("DIFFICULTY") 117 | if rf != "paris" { 118 | t.Fatalf("ForkWhereOpRemoved(DIFFICULTY) -> %s != %s", rf, "paris") 119 | } 120 | } 121 | } 122 | 123 | func TestForksWhereOpAdded(t *testing.T) { 124 | f := ForksWhereOpAdded("BASEFEE") 125 | if !slices.Equal(f, []string{"london"}) { 126 | t.Fatalf("wrong list for BASEFEE: %v", f) 127 | } 128 | } 129 | -------------------------------------------------------------------------------- /internal/evm/ops.go: -------------------------------------------------------------------------------- 1 | // Copyright 2023 The go-ethereum Authors 2 | // This file is part of the go-ethereum library. 3 | // 4 | // The go-ethereum library is free software: you can redistribute it and/or modify 5 | // it under the terms of the GNU Lesser General Public License as published by 6 | // the Free Software Foundation, either version 3 of the License, or 7 | // (at your option) any later version. 8 | // 9 | // The go-ethereum library is distributed in the hope that it will be useful, 10 | // but WITHOUT ANY WARRANTY; without even the implied warranty of 11 | // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 12 | // GNU Lesser General Public License for more details. 13 | // 14 | // You should have received a copy of the GNU Lesser General Public License 15 | // along with the go-ethereum library. If not, see . 16 | 17 | package evm 18 | 19 | import ( 20 | "strconv" 21 | "strings" 22 | ) 23 | 24 | // Op is an EVM opcode. 25 | type Op struct { 26 | Name string 27 | Code byte 28 | 29 | // Flags: 30 | // - Push is set for PUSHx 31 | // - Term is set for instructions that end execution 32 | // - Jump is set for all jumps 33 | // - Unconditional is set for unconditional jumps 34 | // - JumpDest is set for JUMPDEST 35 | Push, Term, Jump, Unconditional, JumpDest bool 36 | } 37 | 38 | func (op Op) PushSize() int { 39 | n, _ := strconv.Atoi(strings.TrimPrefix(op.Name, "PUSH")) 40 | return n 41 | } 42 | 43 | // This is the list of all opcodes. 44 | var oplist = []*Op{ 45 | {Name: "STOP", Code: 0x0, Term: true}, 46 | {Name: "ADD", Code: 0x1}, 47 | {Name: "MUL", Code: 0x2}, 48 | {Name: "SUB", Code: 0x3}, 49 | {Name: "DIV", Code: 0x4}, 50 | {Name: "SDIV", Code: 0x5}, 51 | {Name: "MOD", Code: 0x6}, 52 | {Name: "SMOD", Code: 0x7}, 53 | {Name: "ADDMOD", Code: 0x8}, 54 | {Name: "MULMOD", Code: 0x9}, 55 | {Name: "EXP", Code: 0xa}, 56 | {Name: "SIGNEXTEND", Code: 0xb}, 57 | {Name: "LT", Code: 0x10}, 58 | {Name: "GT", Code: 0x11}, 59 | {Name: "SLT", Code: 0x12}, 60 | {Name: "SGT", Code: 0x13}, 61 | {Name: "EQ", Code: 0x14}, 62 | {Name: "ISZERO", Code: 0x15}, 63 | {Name: "AND", Code: 0x16}, 64 | {Name: "OR", Code: 0x17}, 65 | {Name: "XOR", Code: 0x18}, 66 | {Name: "NOT", Code: 0x19}, 67 | {Name: "BYTE", Code: 0x1a}, 68 | {Name: "SHL", Code: 0x1b}, 69 | {Name: "SHR", Code: 0x1c}, 70 | {Name: "SAR", Code: 0x1d}, 71 | {Name: "KECCAK256", Code: 0x20}, 72 | {Name: "ADDRESS", Code: 0x30}, 73 | {Name: "BALANCE", Code: 0x31}, 74 | {Name: "ORIGIN", Code: 0x32}, 75 | {Name: "CALLER", Code: 0x33}, 76 | {Name: "CALLVALUE", Code: 0x34}, 77 | {Name: "CALLDATALOAD", Code: 0x35}, 78 | {Name: "CALLDATASIZE", Code: 0x36}, 79 | {Name: "CALLDATACOPY", Code: 0x37}, 80 | {Name: "CODESIZE", Code: 0x38}, 81 | {Name: "CODECOPY", Code: 0x39}, 82 | {Name: "GASPRICE", Code: 0x3a}, 83 | {Name: "EXTCODESIZE", Code: 0x3b}, 84 | {Name: "EXTCODECOPY", Code: 0x3c}, 85 | {Name: "RETURNDATASIZE", Code: 0x3d}, 86 | {Name: "RETURNDATACOPY", Code: 0x3e}, 87 | {Name: "EXTCODEHASH", Code: 0x3f}, 88 | {Name: "BLOCKHASH", Code: 0x40}, 89 | {Name: "COINBASE", Code: 0x41}, 90 | {Name: "TIMESTAMP", Code: 0x42}, 91 | {Name: "NUMBER", Code: 0x43}, 92 | {Name: "DIFFICULTY", Code: 0x44}, 93 | {Name: "RANDOM", Code: 0x44}, 94 | {Name: "GASLIMIT", Code: 0x45}, 95 | {Name: "CHAINID", Code: 0x46}, 96 | {Name: "SELFBALANCE", Code: 0x47}, 97 | {Name: "BASEFEE", Code: 0x48}, 98 | {Name: "BLOBHASH", Code: 0x49}, 99 | {Name: "POP", Code: 0x50}, 100 | {Name: "MLOAD", Code: 0x51}, 101 | {Name: "MSTORE", Code: 0x52}, 102 | {Name: "MSTORE8", Code: 0x53}, 103 | {Name: "SLOAD", Code: 0x54}, 104 | {Name: "SSTORE", Code: 0x55}, 105 | {Name: "JUMP", Code: 0x56, Jump: true, Unconditional: true}, 106 | {Name: "JUMPI", Code: 0x57, Jump: true}, 107 | {Name: "PC", Code: 0x58}, 108 | {Name: "MSIZE", Code: 0x59}, 109 | {Name: "GAS", Code: 0x5a}, 110 | {Name: "JUMPDEST", Code: 0x5b, JumpDest: true}, 111 | {Name: "TLOAD", Code: 0x5c}, 112 | {Name: "TSTORE", Code: 0x5d}, 113 | {Name: "MCOPY", Code: 0x5e}, 114 | {Name: "PUSH0", Code: 0x5f, Push: true}, 115 | {Name: "PUSH1", Code: 0x60, Push: true}, 116 | {Name: "PUSH2", Code: 0x61, Push: true}, 117 | {Name: "PUSH3", Code: 0x62, Push: true}, 118 | {Name: "PUSH4", Code: 0x63, Push: true}, 119 | {Name: "PUSH5", Code: 0x64, Push: true}, 120 | {Name: "PUSH6", Code: 0x65, Push: true}, 121 | {Name: "PUSH7", Code: 0x66, Push: true}, 122 | {Name: "PUSH8", Code: 0x67, Push: true}, 123 | {Name: "PUSH9", Code: 0x68, Push: true}, 124 | {Name: "PUSH10", Code: 0x69, Push: true}, 125 | {Name: "PUSH11", Code: 0x6a, Push: true}, 126 | {Name: "PUSH12", Code: 0x6b, Push: true}, 127 | {Name: "PUSH13", Code: 0x6c, Push: true}, 128 | {Name: "PUSH14", Code: 0x6d, Push: true}, 129 | {Name: "PUSH15", Code: 0x6e, Push: true}, 130 | {Name: "PUSH16", Code: 0x6f, Push: true}, 131 | {Name: "PUSH17", Code: 0x70, Push: true}, 132 | {Name: "PUSH18", Code: 0x71, Push: true}, 133 | {Name: "PUSH19", Code: 0x72, Push: true}, 134 | {Name: "PUSH20", Code: 0x73, Push: true}, 135 | {Name: "PUSH21", Code: 0x74, Push: true}, 136 | {Name: "PUSH22", Code: 0x75, Push: true}, 137 | {Name: "PUSH23", Code: 0x76, Push: true}, 138 | {Name: "PUSH24", Code: 0x77, Push: true}, 139 | {Name: "PUSH25", Code: 0x78, Push: true}, 140 | {Name: "PUSH26", Code: 0x79, Push: true}, 141 | {Name: "PUSH27", Code: 0x7a, Push: true}, 142 | {Name: "PUSH28", Code: 0x7b, Push: true}, 143 | {Name: "PUSH29", Code: 0x7c, Push: true}, 144 | {Name: "PUSH30", Code: 0x7d, Push: true}, 145 | {Name: "PUSH31", Code: 0x7e, Push: true}, 146 | {Name: "PUSH32", Code: 0x7f, Push: true}, 147 | {Name: "DUP1", Code: 0x80}, 148 | {Name: "DUP2", Code: 0x81}, 149 | {Name: "DUP3", Code: 0x82}, 150 | {Name: "DUP4", Code: 0x83}, 151 | {Name: "DUP5", Code: 0x84}, 152 | {Name: "DUP6", Code: 0x85}, 153 | {Name: "DUP7", Code: 0x86}, 154 | {Name: "DUP8", Code: 0x87}, 155 | {Name: "DUP9", Code: 0x88}, 156 | {Name: "DUP10", Code: 0x89}, 157 | {Name: "DUP11", Code: 0x8a}, 158 | {Name: "DUP12", Code: 0x8b}, 159 | {Name: "DUP13", Code: 0x8c}, 160 | {Name: "DUP14", Code: 0x8d}, 161 | {Name: "DUP15", Code: 0x8e}, 162 | {Name: "DUP16", Code: 0x8f}, 163 | {Name: "SWAP1", Code: 0x90}, 164 | {Name: "SWAP2", Code: 0x91}, 165 | {Name: "SWAP3", Code: 0x92}, 166 | {Name: "SWAP4", Code: 0x93}, 167 | {Name: "SWAP5", Code: 0x94}, 168 | {Name: "SWAP6", Code: 0x95}, 169 | {Name: "SWAP7", Code: 0x96}, 170 | {Name: "SWAP8", Code: 0x97}, 171 | {Name: "SWAP9", Code: 0x98}, 172 | {Name: "SWAP10", Code: 0x99}, 173 | {Name: "SWAP11", Code: 0x9a}, 174 | {Name: "SWAP12", Code: 0x9b}, 175 | {Name: "SWAP13", Code: 0x9c}, 176 | {Name: "SWAP14", Code: 0x9d}, 177 | {Name: "SWAP15", Code: 0x9e}, 178 | {Name: "SWAP16", Code: 0x9f}, 179 | {Name: "LOG0", Code: 0xa0}, 180 | {Name: "LOG1", Code: 0xa1}, 181 | {Name: "LOG2", Code: 0xa2}, 182 | {Name: "LOG3", Code: 0xa3}, 183 | {Name: "LOG4", Code: 0xa4}, 184 | {Name: "CREATE", Code: 0xf0}, 185 | {Name: "CALL", Code: 0xf1}, 186 | {Name: "CALLCODE", Code: 0xf2}, 187 | {Name: "RETURN", Code: 0xf3, Term: true}, 188 | {Name: "DELEGATECALL", Code: 0xf4}, 189 | {Name: "CREATE2", Code: 0xf5}, 190 | {Name: "STATICCALL", Code: 0xfa}, 191 | {Name: "REVERT", Code: 0xfd, Term: true}, 192 | {Name: "SELFDESTRUCT", Code: 0xff, Term: true}, 193 | {Name: "SENDALL", Code: 0xff, Term: true}, 194 | } 195 | 196 | var opm = computeOpsMap() 197 | 198 | func computeOpsMap() map[string]*Op { 199 | m := make(map[string]*Op, len(oplist)) 200 | for _, op := range oplist { 201 | if m[op.Name] != nil { 202 | panic("duplicate op " + op.Name) 203 | } 204 | m[op.Name] = op 205 | } 206 | return m 207 | } 208 | -------------------------------------------------------------------------------- /internal/lzint/value.go: -------------------------------------------------------------------------------- 1 | // Copyright 2025 The go-ethereum Authors 2 | // This file is part of the go-ethereum library. 3 | // 4 | // The go-ethereum library is free software: you can redistribute it and/or modify 5 | // it under the terms of the GNU Lesser General Public License as published by 6 | // the Free Software Foundation, either version 3 of the License, or 7 | // (at your option) any later version. 8 | // 9 | // The go-ethereum library is distributed in the hope that it will be useful, 10 | // but WITHOUT ANY WARRANTY; without even the implied warranty of 11 | // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 12 | // GNU Lesser General Public License for more details. 13 | // 14 | // You should have received a copy of the GNU Lesser General Public License 15 | // along with the go-ethereum library. If not, see . 16 | 17 | package lzint 18 | 19 | import ( 20 | "errors" 21 | "fmt" 22 | "math/big" 23 | "strings" 24 | ) 25 | 26 | var ( 27 | errNegativeBytes = errors.New("negative int in bytes context") 28 | errOddHexBytes = errors.New("odd-length hex in bytes context") 29 | ) 30 | 31 | const ( 32 | flagWasHex byte = 1 << iota 33 | flagHexOddLength 34 | flagWasBytes 35 | ) 36 | 37 | // Value is a big-integer that also tracks the number of leading zero bytes. 38 | // This type is used to represent values during macro evaluation. 39 | // 40 | // Storing values this way may seem like a strange choice at first, so let me 41 | // explain: The Geas language is meant to be simple, and values generally do not 42 | // have a 'type'. Many macro operations are simple arithmetic and work with 43 | // integers, and the EVM itself also operates on a stack of 256bit integers. So 44 | // using integers as the basic type was an easy choice. However, Geas has a few 45 | // operations on bytes as well (such as hash functions) and the language contains 46 | // string literals. I didn't want to introduce a type system into the evaluator 47 | // just to support them, since this would make some macros incompatible with 48 | // others. 49 | // 50 | // Instead, I have chosen to stick to all values being integers, but this 51 | // introduces some problems when an evaluation produces leading zero bytes. They 52 | // cannot be represented by *big.Int, and thus using a hash function or including 53 | // such values into the bytecode output would produce unexpected results. 54 | // 55 | // So this is how this type came to be. When a Value is created from a decimal 56 | // integer literal, it is just an integer with no special properties. However, 57 | // when created from a hexadecimal literal, string, or []byte in Go, leading 58 | // zeros may be created and will be reproduced when the value is converted to 59 | // []byte. Using an arithmetic operation on a value with leading zeros will drop 60 | // them though. 61 | type Value struct { 62 | int big.Int 63 | lznib uint // leading zero nibble count 64 | flag byte 65 | } 66 | 67 | func FromInt(i *big.Int) *Value { 68 | if i == nil { 69 | panic("nil int") 70 | } 71 | return &Value{int: *i} 72 | } 73 | 74 | func FromInt64(i int64) *Value { 75 | v := new(Value) 76 | v.int.SetInt64(i) 77 | return v 78 | } 79 | 80 | func FromBytes(slice []byte) *Value { 81 | v := new(Value) 82 | for _, b := range slice { 83 | if b != 0 { 84 | break 85 | } 86 | v.lznib += 2 87 | } 88 | v.int.SetBytes(slice) 89 | v.flag = flagWasBytes 90 | return v 91 | } 92 | 93 | // ParseNumberLiteral creates a value from a number literal. 94 | func ParseNumberLiteral(text string) (*Value, error) { 95 | switch { 96 | case len(text) == 0: 97 | return nil, errors.New("empty number text") 98 | 99 | case strings.HasPrefix(text, "0x") || strings.HasPrefix(text, "0X"): 100 | hex := text[2:] 101 | v := &Value{flag: flagWasHex} 102 | if len(hex)%2 != 0 { 103 | v.flag |= flagHexOddLength 104 | } 105 | for _, c := range hex { 106 | if c != '0' { 107 | break 108 | } 109 | v.lznib++ 110 | } 111 | if _, ok := v.int.SetString(hex, 16); !ok { 112 | return nil, fmt.Errorf("invalid hex: %s", text) 113 | } 114 | return v, nil 115 | 116 | case len(text) > 1 && text[0] == '0': 117 | return nil, errors.New("leading zero not allowed in decimal integer") 118 | 119 | default: 120 | var v Value 121 | if _, ok := v.int.SetString(text, 10); !ok { 122 | return nil, fmt.Errorf("invalid number %s", text) 123 | } 124 | return &v, nil 125 | } 126 | } 127 | 128 | // Int converts the value to a bigint. 129 | // This is always possible. Leading zero bytes are dropped. 130 | func (v *Value) Int() *big.Int { 131 | if v == nil { 132 | return nil 133 | } 134 | return &v.int 135 | } 136 | 137 | // Bytes converts the value to a byte slice. This returns an error if the 138 | // conversion is lossy, i.e. if the integer is negative or was an odd-length literal. 139 | func (v *Value) Bytes() ([]byte, error) { 140 | if v == nil { 141 | return nil, nil 142 | } 143 | if v.int.Sign() < 0 { 144 | return nil, errNegativeBytes 145 | } 146 | if v.flag&flagHexOddLength != 0 { 147 | return nil, errOddHexBytes 148 | } 149 | b := make([]byte, v.ByteLen()) 150 | return v.int.FillBytes(b), nil 151 | } 152 | 153 | // ByteLen returns the length in bytes. This is always equal to the length of the slice 154 | // that Bytes() would return, i.e. leading zeros are counted. 155 | func (v *Value) ByteLen() int64 { 156 | if v == nil { 157 | return 0 158 | } 159 | return int64(v.lznib)/2 + (int64(v.int.BitLen())+7)/8 160 | } 161 | 162 | // IntegerBitLen returns the bit length of v as an integer, i.e. leading zero 163 | // bytes are not counted. 164 | func (v *Value) IntegerBitLen() int64 { 165 | if v == nil { 166 | return 0 167 | } 168 | return int64(v.int.BitLen()) 169 | } 170 | 171 | func (v *Value) String() string { 172 | switch { 173 | case v == nil: 174 | return "nil" 175 | 176 | case v.flag&(flagWasHex|flagWasBytes) != 0: 177 | var b strings.Builder 178 | b.WriteString("0x") 179 | for range v.lznib { 180 | b.WriteByte('0') 181 | } 182 | if v.flag&flagWasBytes != 0 { 183 | fmt.Fprintf(&b, "%x", v.int.Bytes()) 184 | } else if v.int.Sign() > 0 { 185 | fmt.Fprintf(&b, "%x", &v.int) 186 | } 187 | return b.String() 188 | 189 | default: 190 | return v.int.String() 191 | } 192 | } 193 | -------------------------------------------------------------------------------- /internal/lzint/value_test.go: -------------------------------------------------------------------------------- 1 | // Copyright 2025 The go-ethereum Authors 2 | // This file is part of the go-ethereum library. 3 | // 4 | // The go-ethereum library is free software: you can redistribute it and/or modify 5 | // it under the terms of the GNU Lesser General Public License as published by 6 | // the Free Software Foundation, either version 3 of the License, or 7 | // (at your option) any later version. 8 | // 9 | // The go-ethereum library is distributed in the hope that it will be useful, 10 | // but WITHOUT ANY WARRANTY; without even the implied warranty of 11 | // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 12 | // GNU Lesser General Public License for more details. 13 | // 14 | // You should have received a copy of the GNU Lesser General Public License 15 | // along with the go-ethereum library. If not, see . 16 | 17 | package lzint 18 | 19 | import ( 20 | "math/big" 21 | "reflect" 22 | "testing" 23 | ) 24 | 25 | var valueTests = []struct { 26 | Name string 27 | V *Value 28 | ExpectedString string 29 | ExpectedByteLen int64 30 | ExpectedBitLen int64 31 | ExpectedInt *big.Int 32 | ExpectedBytes []byte 33 | ExpectedBytesErr error 34 | }{ 35 | { 36 | Name: "nil", 37 | V: nil, 38 | ExpectedString: "nil", 39 | ExpectedByteLen: 0, 40 | ExpectedBitLen: 0, 41 | ExpectedBytes: nil, 42 | ExpectedInt: nil, 43 | }, 44 | { 45 | Name: "Int64(0)", 46 | V: FromInt64(0), 47 | ExpectedString: "0", 48 | ExpectedByteLen: 0, 49 | ExpectedBitLen: 0, 50 | ExpectedBytes: []byte{}, 51 | ExpectedInt: new(big.Int), 52 | }, 53 | { 54 | Name: "Int64(99)", 55 | V: FromInt64(99), 56 | ExpectedString: "99", 57 | ExpectedByteLen: 1, 58 | ExpectedBitLen: 7, 59 | ExpectedBytes: []byte{99}, 60 | ExpectedInt: big.NewInt(99), 61 | }, 62 | { 63 | Name: "Int(256)", 64 | V: FromInt(big.NewInt(256)), 65 | ExpectedString: "256", 66 | ExpectedByteLen: 2, 67 | ExpectedBitLen: 9, 68 | ExpectedBytes: []byte{1, 0}, 69 | ExpectedInt: big.NewInt(256), 70 | }, 71 | { 72 | Name: "Int(-256)", 73 | V: FromInt(big.NewInt(-256)), 74 | ExpectedString: "-256", 75 | ExpectedByteLen: 2, 76 | ExpectedBitLen: 9, 77 | ExpectedBytesErr: errNegativeBytes, 78 | ExpectedInt: big.NewInt(-256), 79 | }, 80 | { 81 | Name: "Bytes(0x)", 82 | V: FromBytes([]byte{}), 83 | ExpectedString: "0x", 84 | ExpectedByteLen: 0, 85 | ExpectedBitLen: 0, 86 | ExpectedBytes: []byte{}, 87 | ExpectedInt: new(big.Int), 88 | }, 89 | { 90 | Name: "Bytes(0x00)", 91 | V: FromBytes([]byte{0}), 92 | ExpectedString: "0x00", 93 | ExpectedByteLen: 1, 94 | ExpectedBitLen: 0, 95 | ExpectedBytes: []byte{0}, 96 | ExpectedInt: new(big.Int), 97 | }, 98 | { 99 | Name: "Bytes(0x00000102)", 100 | V: FromBytes([]byte{0, 0, 1, 2}), 101 | ExpectedString: "0x00000102", 102 | ExpectedByteLen: 4, 103 | ExpectedBitLen: 9, 104 | ExpectedBytes: []byte{0, 0, 1, 2}, 105 | ExpectedInt: new(big.Int).SetBytes([]byte{1, 2}), 106 | }, 107 | { 108 | Name: "NumberLiteral(0)", 109 | V: mustParseNum("0"), 110 | ExpectedString: "0", 111 | ExpectedByteLen: 0, 112 | ExpectedBitLen: 0, 113 | ExpectedBytes: []byte{}, 114 | ExpectedInt: big.NewInt(0), 115 | }, 116 | { 117 | Name: "NumberLiteral(99)", 118 | V: mustParseNum("99"), 119 | ExpectedString: "99", 120 | ExpectedByteLen: 1, 121 | ExpectedBitLen: 7, 122 | ExpectedBytes: []byte{99}, 123 | ExpectedInt: big.NewInt(99), 124 | }, 125 | { 126 | Name: "NumberLiteral(0xff01)", 127 | V: mustParseNum("0xff01"), 128 | ExpectedString: "0xff01", 129 | ExpectedByteLen: 2, 130 | ExpectedBitLen: 16, 131 | ExpectedBytes: []byte{0xff, 0x01}, 132 | ExpectedInt: big.NewInt(0xff01), 133 | }, 134 | { 135 | Name: "NumberLiteral(0x00000099ff01)", 136 | V: mustParseNum("0x00000099ff01"), 137 | ExpectedString: "0x00000099ff01", 138 | ExpectedByteLen: 6, 139 | ExpectedBitLen: 24, 140 | ExpectedBytes: []byte{0x00, 0x00, 0x00, 0x99, 0xff, 01}, 141 | ExpectedInt: big.NewInt(0x99ff01), 142 | }, 143 | { 144 | Name: "NumberLiteral(0x0)", 145 | V: mustParseNum("0x0"), 146 | ExpectedString: "0x0", 147 | ExpectedByteLen: 0, 148 | ExpectedBitLen: 0, 149 | ExpectedBytesErr: errOddHexBytes, 150 | ExpectedInt: big.NewInt(0), 151 | }, 152 | { 153 | Name: "NumberLiteral(0xf)", 154 | V: mustParseNum("0xf"), 155 | ExpectedString: "0xf", 156 | ExpectedByteLen: 1, 157 | ExpectedBitLen: 4, 158 | ExpectedBytesErr: errOddHexBytes, 159 | ExpectedInt: big.NewInt(15), 160 | }, 161 | { 162 | Name: "NumberLiteral(0x456)", 163 | V: mustParseNum("0x456"), 164 | ExpectedString: "0x456", 165 | ExpectedByteLen: 2, 166 | ExpectedBitLen: 11, 167 | ExpectedBytesErr: errOddHexBytes, 168 | ExpectedInt: big.NewInt(0x456), 169 | }, 170 | } 171 | 172 | func mustParseNum(input string) *Value { 173 | v, err := ParseNumberLiteral(input) 174 | if err != nil { 175 | panic(err) 176 | } 177 | return v 178 | } 179 | 180 | func TestValue(t *testing.T) { 181 | for _, test := range valueTests { 182 | t.Run(test.Name, func(t *testing.T) { 183 | if s := test.V.String(); s != test.ExpectedString { 184 | t.Errorf("wrong String: %q", s) 185 | } 186 | if l := test.V.ByteLen(); l != test.ExpectedByteLen { 187 | t.Errorf("wrong ByteLen: %d", l) 188 | } 189 | if l := test.V.IntegerBitLen(); l != test.ExpectedBitLen { 190 | t.Errorf("wrong BitLen: %d", l) 191 | } 192 | 193 | i := test.V.Int() 194 | if i == nil && test.ExpectedInt != nil { 195 | t.Errorf("wrong Int: , expected %d", test.ExpectedInt) 196 | } else if i != nil && test.ExpectedInt == nil { 197 | t.Errorf("wrong Int: %d, expected ", i) 198 | } else if i.Cmp(test.ExpectedInt) != 0 { 199 | t.Errorf("wrong Int: %d, expected %d", i, test.ExpectedInt) 200 | } 201 | 202 | b, err := test.V.Bytes() 203 | if test.ExpectedBytesErr != nil { 204 | if err == nil { 205 | t.Errorf("Bytes did not return expected error") 206 | } else if err != test.ExpectedBytesErr { 207 | t.Errorf("Bytes returned wrong error: %v", err) 208 | } 209 | } else { 210 | if err != nil { 211 | t.Errorf("Bytes returned error: %v", err) 212 | } else if !reflect.DeepEqual(b, test.ExpectedBytes) { 213 | t.Errorf("wrong Bytes: %+v", b) 214 | } 215 | } 216 | }) 217 | } 218 | } 219 | 220 | var literalErrorTests = []struct { 221 | Input string 222 | Err string 223 | }{ 224 | { 225 | Input: "0xag", 226 | Err: "invalid hex: 0xag", 227 | }, 228 | { 229 | Input: "006", 230 | Err: "leading zero not allowed in decimal integer", 231 | }, 232 | { 233 | Input: "", 234 | Err: "empty number text", 235 | }, 236 | { 237 | Input: "42g", 238 | Err: "invalid number 42g", 239 | }, 240 | } 241 | 242 | func TestParseLiteral(t *testing.T) { 243 | for _, test := range literalErrorTests { 244 | _, err := ParseNumberLiteral(test.Input) 245 | if err == nil { 246 | t.Errorf("input %q: expected error", test.Input) 247 | } else if err.Error() != test.Err { 248 | t.Errorf("input %q: wrong error %v", test.Input, err) 249 | } 250 | } 251 | } 252 | -------------------------------------------------------------------------------- /internal/set/set.go: -------------------------------------------------------------------------------- 1 | // Copyright 2024 The go-ethereum Authors 2 | // This file is part of the go-ethereum library. 3 | // 4 | // The go-ethereum library is free software: you can redistribute it and/or modify 5 | // it under the terms of the GNU Lesser General Public License as published by 6 | // the Free Software Foundation, either version 3 of the License, or 7 | // (at your option) any later version. 8 | // 9 | // The go-ethereum library is distributed in the hope that it will be useful, 10 | // but WITHOUT ANY WARRANTY; without even the implied warranty of 11 | // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 12 | // GNU Lesser General Public License for more details. 13 | // 14 | // You should have received a copy of the GNU Lesser General Public License 15 | // along with the go-ethereum library. If not, see . 16 | 17 | package set 18 | 19 | import ( 20 | "maps" 21 | "slices" 22 | ) 23 | 24 | // Set is a wrapper over map. 25 | // I don't want to depend on a set library just for this. 26 | type Set[X comparable] map[X]struct{} 27 | 28 | func (s Set[X]) Add(k X) { 29 | s[k] = struct{}{} 30 | } 31 | 32 | func (s Set[X]) Includes(k X) bool { 33 | _, ok := s[k] 34 | return ok 35 | } 36 | 37 | func (s Set[X]) Members() []X { 38 | return slices.Collect(maps.Keys(s)) 39 | } 40 | 41 | // Diff returns the elements of a which are not in b. 42 | func Diff[X comparable](a, b []X) []X { 43 | set := make(Set[X], len(b)) 44 | for _, x := range b { 45 | set.Add(x) 46 | } 47 | var diff []X 48 | for _, x := range a { 49 | if !set.Includes(x) { 50 | diff = append(diff, x) 51 | } 52 | } 53 | return diff 54 | } 55 | --------------------------------------------------------------------------------