├── .github
    └── workflows
    │   └── go.yml
├── LICENSE
├── README.md
├── asm
    ├── compiler.go
    ├── compiler_analysis.go
    ├── compiler_eval.go
    ├── compiler_expand.go
    ├── compiler_prog.go
    ├── compiler_prog_test.go
    ├── compiler_test.go
    ├── error.go
    ├── evaluator.go
    ├── evaluator_builtins.go
    ├── evaluator_test.go
    ├── global.go
    ├── statements.go
    └── testdata
    │   ├── compiler-tests.yaml
    │   └── known-bytecode.yaml
├── assets
    └── geas-b.svg
├── cmd
    └── geas
    │   └── geas.go
├── disasm
    ├── disassembler.go
    └── disassembler_test.go
├── example
    ├── 4788asm.eas
    ├── 4788asm_ctor.eas
    └── erc20
    │   ├── erc20.eas
    │   ├── erc20_ctor.eas
    │   ├── op_allowance.eas
    │   ├── op_approve.eas
    │   ├── op_balanceOf.eas
    │   ├── op_transfer.eas
    │   └── op_transferFrom.eas
├── go.mod
├── go.sum
└── internal
    ├── ast
        ├── arith.go
        ├── arithop_string.go
        ├── ast.go
        ├── error.go
        ├── lexer.go
        ├── lexer_test.go
        ├── names.go
        ├── parse.go
        └── tokentype_string.go
    ├── evm
        ├── forkdefs.go
        ├── instruction_set.go
        ├── instruction_set_test.go
        └── ops.go
    ├── lzint
        ├── value.go
        └── value_test.go
    └── set
        └── set.go


/.github/workflows/go.yml:
--------------------------------------------------------------------------------
 1 | name: Go
 2 | 
 3 | on:
 4 |   push:
 5 |     branches: [ "master" ]
 6 |   pull_request:
 7 |     branches: [ "master" ]
 8 | 
 9 | jobs:
10 | 
11 |   build:
12 |     runs-on: ubuntu-latest
13 |     steps:
14 |     - uses: actions/checkout@v3
15 | 
16 |     - name: Set up Go
17 |       uses: actions/setup-go@v4
18 |       with:
19 |         go-version: '1.23'
20 | 
21 |     - name: Build
22 |       run: go build -v ./...
23 | 
24 |     - name: Test
25 |       run: go test -v ./...
26 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
  1 |                    GNU LESSER GENERAL PUBLIC LICENSE
  2 |                        Version 3, 29 June 2007
  3 | 
  4 |  Copyright (C) 2007 Free Software Foundation, Inc. <http://fsf.org/>
  5 |  Everyone is permitted to copy and distribute verbatim copies
  6 |  of this license document, but changing it is not allowed.
  7 | 
  8 | 
  9 |   This version of the GNU Lesser General Public License incorporates
 10 | the terms and conditions of version 3 of the GNU General Public
 11 | License, supplemented by the additional permissions listed below.
 12 | 
 13 |   0. Additional Definitions.
 14 | 
 15 |   As used herein, "this License" refers to version 3 of the GNU Lesser
 16 | General Public License, and the "GNU GPL" refers to version 3 of the GNU
 17 | General Public License.
 18 | 
 19 |   "The Library" refers to a covered work governed by this License,
 20 | other than an Application or a Combined Work as defined below.
 21 | 
 22 |   An "Application" is any work that makes use of an interface provided
 23 | by the Library, but which is not otherwise based on the Library.
 24 | Defining a subclass of a class defined by the Library is deemed a mode
 25 | of using an interface provided by the Library.
 26 | 
 27 |   A "Combined Work" is a work produced by combining or linking an
 28 | Application with the Library.  The particular version of the Library
 29 | with which the Combined Work was made is also called the "Linked
 30 | Version".
 31 | 
 32 |   The "Minimal Corresponding Source" for a Combined Work means the
 33 | Corresponding Source for the Combined Work, excluding any source code
 34 | for portions of the Combined Work that, considered in isolation, are
 35 | based on the Application, and not on the Linked Version.
 36 | 
 37 |   The "Corresponding Application Code" for a Combined Work means the
 38 | object code and/or source code for the Application, including any data
 39 | and utility programs needed for reproducing the Combined Work from the
 40 | Application, but excluding the System Libraries of the Combined Work.
 41 | 
 42 |   1. Exception to Section 3 of the GNU GPL.
 43 | 
 44 |   You may convey a covered work under sections 3 and 4 of this License
 45 | without being bound by section 3 of the GNU GPL.
 46 | 
 47 |   2. Conveying Modified Versions.
 48 | 
 49 |   If you modify a copy of the Library, and, in your modifications, a
 50 | facility refers to a function or data to be supplied by an Application
 51 | that uses the facility (other than as an argument passed when the
 52 | facility is invoked), then you may convey a copy of the modified
 53 | version:
 54 | 
 55 |    a) under this License, provided that you make a good faith effort to
 56 |    ensure that, in the event an Application does not supply the
 57 |    function or data, the facility still operates, and performs
 58 |    whatever part of its purpose remains meaningful, or
 59 | 
 60 |    b) under the GNU GPL, with none of the additional permissions of
 61 |    this License applicable to that copy.
 62 | 
 63 |   3. Object Code Incorporating Material from Library Header Files.
 64 | 
 65 |   The object code form of an Application may incorporate material from
 66 | a header file that is part of the Library.  You may convey such object
 67 | code under terms of your choice, provided that, if the incorporated
 68 | material is not limited to numerical parameters, data structure
 69 | layouts and accessors, or small macros, inline functions and templates
 70 | (ten or fewer lines in length), you do both of the following:
 71 | 
 72 |    a) Give prominent notice with each copy of the object code that the
 73 |    Library is used in it and that the Library and its use are
 74 |    covered by this License.
 75 | 
 76 |    b) Accompany the object code with a copy of the GNU GPL and this license
 77 |    document.
 78 | 
 79 |   4. Combined Works.
 80 | 
 81 |   You may convey a Combined Work under terms of your choice that,
 82 | taken together, effectively do not restrict modification of the
 83 | portions of the Library contained in the Combined Work and reverse
 84 | engineering for debugging such modifications, if you also do each of
 85 | the following:
 86 | 
 87 |    a) Give prominent notice with each copy of the Combined Work that
 88 |    the Library is used in it and that the Library and its use are
 89 |    covered by this License.
 90 | 
 91 |    b) Accompany the Combined Work with a copy of the GNU GPL and this license
 92 |    document.
 93 | 
 94 |    c) For a Combined Work that displays copyright notices during
 95 |    execution, include the copyright notice for the Library among
 96 |    these notices, as well as a reference directing the user to the
 97 |    copies of the GNU GPL and this license document.
 98 | 
 99 |    d) Do one of the following:
100 | 
101 |        0) Convey the Minimal Corresponding Source under the terms of this
102 |        License, and the Corresponding Application Code in a form
103 |        suitable for, and under terms that permit, the user to
104 |        recombine or relink the Application with a modified version of
105 |        the Linked Version to produce a modified Combined Work, in the
106 |        manner specified by section 6 of the GNU GPL for conveying
107 |        Corresponding Source.
108 | 
109 |        1) Use a suitable shared library mechanism for linking with the
110 |        Library.  A suitable mechanism is one that (a) uses at run time
111 |        a copy of the Library already present on the user's computer
112 |        system, and (b) will operate properly with a modified version
113 |        of the Library that is interface-compatible with the Linked
114 |        Version.
115 | 
116 |    e) Provide Installation Information, but only if you would otherwise
117 |    be required to provide such information under section 6 of the
118 |    GNU GPL, and only to the extent that such information is
119 |    necessary to install and execute a modified version of the
120 |    Combined Work produced by recombining or relinking the
121 |    Application with a modified version of the Linked Version. (If
122 |    you use option 4d0, the Installation Information must accompany
123 |    the Minimal Corresponding Source and Corresponding Application
124 |    Code. If you use option 4d1, you must provide the Installation
125 |    Information in the manner specified by section 6 of the GNU GPL
126 |    for conveying Corresponding Source.)
127 | 
128 |   5. Combined Libraries.
129 | 
130 |   You may place library facilities that are a work based on the
131 | Library side by side in a single library together with other library
132 | facilities that are not Applications and are not covered by this
133 | License, and convey such a combined library under terms of your
134 | choice, if you do both of the following:
135 | 
136 |    a) Accompany the combined library with a copy of the same work based
137 |    on the Library, uncombined with any other library facilities,
138 |    conveyed under the terms of this License.
139 | 
140 |    b) Give prominent notice with the combined library that part of it
141 |    is a work based on the Library, and explaining where to find the
142 |    accompanying uncombined form of the same work.
143 | 
144 |   6. Revised Versions of the GNU Lesser General Public License.
145 | 
146 |   The Free Software Foundation may publish revised and/or new versions
147 | of the GNU Lesser General Public License from time to time. Such new
148 | versions will be similar in spirit to the present version, but may
149 | differ in detail to address new problems or concerns.
150 | 
151 |   Each version is given a distinguishing version number. If the
152 | Library as you received it specifies that a certain numbered version
153 | of the GNU Lesser General Public License "or any later version"
154 | applies to it, you have the option of following the terms and
155 | conditions either of that published version or of any later version
156 | published by the Free Software Foundation. If the Library as you
157 | received it does not specify a version number of the GNU Lesser
158 | General Public License, you may choose any version of the GNU Lesser
159 | General Public License ever published by the Free Software Foundation.
160 | 
161 |   If the Library as you received it specifies that a proxy can decide
162 | whether future versions of the GNU Lesser General Public License shall
163 | apply, that proxy's public statement of acceptance of any version is
164 | permanent authorization for you to choose that version for the
165 | Library.
166 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | ![geas](assets/geas-b.svg)
  2 | 
  3 | This is geas – the Good Ethereum Assembler[^1] – a macro assembler for the EVM.
  4 | 
  5 | You can use it to create any contract for Ethereum, though it's probably a bad idea.
  6 | For real contracts, you should use a well-tested language compiler like Solidity.
  7 | The purpose of geas is mostly creating specialty programs and tinkering with the EVM
  8 | at a low level.
  9 | 
 10 | ### Installation
 11 | 
 12 | You can use the `go` tool to install the latest released version.
 13 | This creates a `geas` binary in the current directory:
 14 | 
 15 |     env "GOBIN=$PWD" go install github.com/fjl/geas/cmd/geas@latest
 16 | 
 17 | For development of geas, clone the repository and then run `go build ./cmd/geas`.
 18 | 
 19 | ### Usage
 20 | 
 21 | To create bytecode from an assembly file, run the tool with a filename as argument.
 22 | 
 23 |     ./geas file.eas
 24 | 
 25 | There is also a disassembler. To disassemble hex bytecode from standard input, run:
 26 | 
 27 |     ./geas -d -
 28 | 
 29 | To see all supported flags, run `geas` with no arguments.
 30 | 
 31 | ### Editor Support
 32 | 
 33 | VIM users may be interested in [vim-geas](https://github.com/lightclient/vim-geas).
 34 | 
 35 | ### Use as a Go Library
 36 | 
 37 | You can also use the assembler as a library. See the [API documentation](https://pkg.go.dev/github.com/fjl/geas/asm)
 38 | to get started.
 39 | 
 40 | ## Language
 41 | 
 42 | Programs accepted by the assembler follow a simple structure. Each line is an instruction.
 43 | Both uppercase and lowercase can be used for instruction names. All known EVM instructions
 44 | are supported.
 45 | 
 46 | Comments can appear anywhere and are introduced by the semicolon (;) character.
 47 | 
 48 |         push 1  ;; comment
 49 |         push 2
 50 |         add
 51 | 
 52 | Opcodes listed in the program correspond directly with the bytecodes in output.
 53 | 
 54 | ### Jump
 55 | 
 56 | Jump destinations are written as a label followed by colon (:) and can be referred to
 57 | using the notation `@label` together with JUMP or JUMPI.
 58 | 
 59 |     begin:
 60 |         push 1
 61 |         push 2
 62 |         add
 63 |         jump @begin
 64 | 
 65 | When using JUMP with an argument, it turns into a PUSH of the label followed by the jump
 66 | instruction, so the above is equivalent to:
 67 | 
 68 |     begin:
 69 |         push 1
 70 |         push 2
 71 |         add
 72 |         push @begin
 73 |         jump
 74 | 
 75 | It is also possible to create labels without emitting a JUMPDEST instruction by prefixing
 76 | the label name with the dot (.) character. While dotted labels are not valid for use as an
 77 | argument to JUMP, they can be used with PUSH to measure code offsets.
 78 | 
 79 |         push @.end
 80 |         codesize
 81 |         eq
 82 |     .end:
 83 | 
 84 | ### Push
 85 | 
 86 | The EVM instruction has sized push instructions from size zero (`PUSH0`) up to a size of
 87 | 32 bytes (`PUSH32`). While you can use sized push instructions directly, it is preferable
 88 | to let the assembler figure out the right size for you. To do this use the variable-size
 89 | `PUSH` instruction.
 90 | 
 91 | All PUSH-type instructions must be followed by an immediate argument on the same line.
 92 | Simple math expressions and label references can be used within the argument:
 93 | 
 94 |     .begin:
 95 |         push (@add_it * 2) - 3
 96 |         push 5
 97 |     add_it:
 98 |         add
 99 | 
100 | Supported arithmetic operations include addition (+), subtraction (-), multiplication (*),
101 | division (/), and modulo (%). There is also support for bit-shifts (<<, >>), bitwise AND
102 | (&), OR (|), XOR (^). Note operator precedence is same as Go.
103 | 
104 | All arithmetic is performed with arbitrary precision integers. The result of calculations
105 | must fit into 256 bits in order to be valid as a PUSH argument. For sized push, the result
106 | must fit into the declared push size. Negative results are not allowed.
107 | 
108 | ### Expression Macros
109 | 
110 | Expression macros can be created with the `#define` directive. Macros can be used within
111 | PUSH argument expressions.
112 | 
113 | Macros can have parameters. Refer to parameter values using the dollar sign ($) prefix
114 | within the macro.
115 | 
116 |     #define z = 0x8823
117 |     #define myexpr(x, y) = ($x + $y) * z
118 | 
119 |         push myexpr(1, 2)
120 | 
121 | ### Builtin Macros
122 | 
123 | There are several builtin macros for common EVM tasks. Names of builtins start with a dot,
124 | and builtin macros cannot be redefined. Available builtins include:
125 | 
126 | `.abs()` for getting the absolute value of a number:
127 | 
128 |     push .abs(0 - 100)
129 | 
130 | `.selector()` for computing 4-byte ABI selectors:
131 | 
132 |     push .selector("transfer(address,uint256)")
133 |     push 0
134 |     mstore
135 | 
136 | `.keccak256()`, `.sha256()` hash functions:
137 | 
138 |     push .sha256("data")
139 | 
140 | `.address()` for declaring contract addresses. The checksum and byte length of the address
141 | are verified.
142 | 
143 |     #define otherContract = .address(0x658bdf435d810c91414ec09147daa6db62406379)
144 | 
145 | ### Instruction Macros
146 | 
147 | Common groups of instructions can be defined as instruction macros. Names of such macros
148 | always start with the percent (%) character.
149 | 
150 |     #define %add5_and_store(x, location) {
151 |         push $x
152 |         push 5
153 |         add
154 |         push $location
155 |         mstore
156 |     }
157 | 
158 | To invoke an instruction macro, write the macro name as a statement on its own line. If
159 | the macro has no arguments, you can also leave the parentheses off.
160 | 
161 |     .begin:
162 |         %add5_and_store(3, 64)
163 |         %add5_and_store(4, 32)
164 |         push 32
165 |         push 64
166 |         sha3
167 | 
168 | Nested macro definitions are not allowed. Macro recursion is also not allowed.
169 | 
170 | When defining (local) labels within instruction macros, they will only be visible within
171 | the macro. There is no way to refer to a local macro label from the outside, though you
172 | can pass references to such internal labels into another macro. The example below
173 | illustrates this, and also shows that in order to jump to a label argument within a macro,
174 | you must use explicit PUSH and JUMP.
175 | 
176 |     #define %jump_if_not(label) {
177 |         iszero
178 |         push $label
179 |         jumpi
180 |     }
181 | 
182 |     #define %read_input(bytes) {
183 |         calldatasize
184 |         push $bytes
185 |         eq
186 |         %jump_if_not(@revert)
187 | 
188 |         push 0
189 |         push $bytes
190 |         calldataload
191 |         jump @continue
192 | 
193 |       revert:
194 |         push 0
195 |         push 0
196 |         revert
197 | 
198 |       continue:
199 |     }
200 | 
201 | ### Including Files
202 | 
203 | EVM assembly files can be included into the current program using the `#include`
204 | directive. Top-level instructions in the included file will be inserted at the position of
205 | the directive.
206 | 
207 | `#include` filenames are resolved relative to the file containing the directive.
208 | 
209 |     .begin:
210 |         push @.end
211 |         push 32
212 |         mstore
213 | 
214 |     #include "file.evm"
215 |     .end:
216 | 
217 | ### Local and Global Scope
218 | 
219 | Names of labels and macros are case-sensitive. And just like in Go, the case of the first
220 | letter determines visibility of definitions.
221 | 
222 | Macro and label definitions whose name begins with a lower-case letter are local to the
223 | file they're defined in. This means local definitions cannot be referenced by `#include`
224 | files.
225 | 
226 | Identifiers beginning with an upper-case letter are registered in the global scope and are
227 | available for use across files. When using `#include`, global definitions in the included
228 | file also become available in all other files.
229 | 
230 | Global identifiers must be unique across the program, i.e. they can only be defined once.
231 | Files defining global macros or labels can only be included into the program once. Note
232 | that the uniqueness requirement also means that instruction macros containing global
233 | labels can only be called once. Use good judgement when structuring your includes to avoid
234 | redefinition errors.
235 | 
236 | lib.eas:
237 | 
238 |     #define result = 128
239 |     #define StoreSum {
240 |         add
241 |         push result
242 |         mstore
243 |     }
244 | 
245 | main.eas:
246 | 
247 |     #include "lib.eas"
248 | 
249 |         push 1
250 |         push 2
251 |         %StoreSum  ;; calling global macro defined in lib.evm
252 | 
253 | ### Configuring the target instruction set
254 | 
255 | The EVM is a changing environment. Opcodes may be added (and sometimes removed) as new
256 | versions of the EVM are released in protocol forks. Geas is aware of EVM forks and their
257 | respective instruction sets.
258 | 
259 | Geas always operates on a specific EVM instruction set. It targets the latest known eth
260 | mainnet fork by default, i.e. all opcodes available in that fork can be used, and opcodes
261 | that have been removed in any prior fork cannot.
262 | 
263 | Use the `#pragma target` directive to change the target instruction set. The basic syntax is
264 | 
265 |     #pragma target "name"
266 | 
267 | where `name` is a lower-case execution-layer fork name like `homestead`, `berlin`, or `prague`.
268 | 
269 | Here is an example. This contract uses the CHAINID instruction to check if it is running
270 | on mainnet, and destroys itself otherwise. CHAINID became available in the "istanbul"
271 | fork, and SELFDESTRUCT was removed in a later revision of the EVM, so this program is only
272 | applicable to a certain range of past EVM versions.
273 | 
274 |     #pragma target "berlin"
275 | 
276 |         chainid                ; [id]
277 |         push 1                 ; [1, id]
278 |         eq                     ; [id = 1]
279 |         jumpi @mainnet         ; []
280 |         push 0x0               ; [zeroaddr]
281 |         selfdestruct           ; []
282 |     mainnet:
283 | 
284 | Note that declaring the target instruction set using `#pragma target` will not prevent the
285 | output bytecode from running on a different EVM version, since it is just a compiler
286 | setting. The example program above will start behaving differently from its intended
287 | version on EVM version "cancun", because SELFDESTRUCT was turned into SENDALL in that
288 | fork. It may even stop working entirely in a later fork.
289 | 
290 | `#pragma target` can only appear in the program once. It cannot be placed in an include
291 | file. You have to put the directive in the main program file.
292 | 
293 | ### #assemble
294 | 
295 | When writing contract constructors and advanced CALL scenarios, it can be necessary to
296 | include subprogram bytecode as-is. The `#assemble` directive does this for you.
297 | 
298 | Using `#assemble` runs the assembler on the specified file, and includes the resulting
299 | bytecode into the current program. Labels of the subprogram will start at offset zero.
300 | Unlike with `#include`, global definitions of the subprogram are not imported.
301 | 
302 |         ;; copy subprogram to memory
303 |         push @.end - @.begin   ; [size]
304 |         push @.begin           ; [offset, size]
305 |         push 128               ; [dest, offset, codesize]
306 |         codecopy               ; []
307 | 
308 |     .begin:
309 |     #assemble "subprogram.eas"
310 |     .end
311 | 
312 | If a target instruction set is configured with `#pragma target`, it will also be used for
313 | assembling the subprogram. However, the subprogram file can override the instruction set
314 | using its own `#pragma target` directive.
315 | 
316 | [^1]: Under no circumstances must it be called the geth assembler.
317 | 


--------------------------------------------------------------------------------
/asm/compiler.go:
--------------------------------------------------------------------------------
  1 | // Copyright 2023 The go-ethereum Authors
  2 | // This file is part of the go-ethereum library.
  3 | //
  4 | // The go-ethereum library is free software: you can redistribute it and/or modify
  5 | // it under the terms of the GNU Lesser General Public License as published by
  6 | // the Free Software Foundation, either version 3 of the License, or
  7 | // (at your option) any later version.
  8 | //
  9 | // The go-ethereum library is distributed in the hope that it will be useful,
 10 | // but WITHOUT ANY WARRANTY; without even the implied warranty of
 11 | // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
 12 | // GNU Lesser General Public License for more details.
 13 | //
 14 | // You should have received a copy of the GNU Lesser General Public License
 15 | // along with the go-ethereum library. If not, see <http://www.gnu.org/licenses/>.
 16 | 
 17 | // Package asm implements the Good Ethereum Assembler (geas).
 18 | //
 19 | // For a description of the geas language, see the README.md file in the project root.
 20 | package asm
 21 | 
 22 | import (
 23 | 	"errors"
 24 | 	"fmt"
 25 | 	"io/fs"
 26 | 	"math/big"
 27 | 	"path"
 28 | 	"strings"
 29 | 
 30 | 	"github.com/fjl/geas/internal/ast"
 31 | 	"github.com/fjl/geas/internal/evm"
 32 | 	"github.com/fjl/geas/internal/lzint"
 33 | )
 34 | 
 35 | // Compiler turns assembly source into bytecode.
 36 | type Compiler struct {
 37 | 	fsys           fs.FS
 38 | 	lexDebug       bool
 39 | 	maxIncDepth    int
 40 | 	maxErrors      int
 41 | 	defaultFork    string
 42 | 	macroOverrides map[string]*lzint.Value
 43 | 
 44 | 	globals    *globalScope
 45 | 	macroStack map[*ast.InstructionMacroDef]struct{}
 46 | 	includes   map[*ast.IncludeSt]*ast.Document
 47 | 	errors     errorList
 48 | }
 49 | 
 50 | // NewCompiler creates a compiler.
 51 | // Deprecated: use New.
 52 | func NewCompiler(fsys fs.FS) *Compiler {
 53 | 	return New(fsys)
 54 | }
 55 | 
 56 | // New creates a compiler.
 57 | // The file system is used to resolve import file names. If a nil FS is given,
 58 | // #import cannot be used.
 59 | func New(fsys fs.FS) *Compiler {
 60 | 	return &Compiler{
 61 | 		fsys:           fsys,
 62 | 		maxIncDepth:    128,
 63 | 		maxErrors:      10,
 64 | 		defaultFork:    evm.LatestFork,
 65 | 		macroOverrides: make(map[string]*lzint.Value),
 66 | 	}
 67 | }
 68 | 
 69 | // reset prepares the compiler for the next run.
 70 | func (c *Compiler) reset() {
 71 | 	c.globals = newGlobalScope()
 72 | 	c.macroStack = make(map[*ast.InstructionMacroDef]struct{})
 73 | 	c.includes = make(map[*ast.IncludeSt]*ast.Document)
 74 | 	c.errors = errorList{maxErrors: c.maxErrors}
 75 | }
 76 | 
 77 | // SetFilesystem sets the file system used for resolving #include files.
 78 | // Note: if set to a nil FS, #include is not allowed.
 79 | func (c *Compiler) SetFilesystem(fsys fs.FS) {
 80 | 	c.fsys = fsys
 81 | }
 82 | 
 83 | // SetDebugLexer enables/disables printing of the token stream to stdout.
 84 | func (c *Compiler) SetDebugLexer(on bool) {
 85 | 	c.lexDebug = on
 86 | }
 87 | 
 88 | // SetDefaultFork sets the EVM instruction set used by default.
 89 | func (c *Compiler) SetDefaultFork(f string) {
 90 | 	c.defaultFork = f
 91 | }
 92 | 
 93 | // SetDebugLexer enables/disables printing of the token stream to stdout.
 94 | func (c *Compiler) SetIncludeDepthLimit(limit int) {
 95 | 	c.maxIncDepth = limit
 96 | }
 97 | 
 98 | // SetMaxErrors sets the limit on the number of errors that can happen before the compiler gives up.
 99 | func (c *Compiler) SetMaxErrors(limit int) {
100 | 	if limit < 1 {
101 | 		limit = 1
102 | 	}
103 | 	c.maxErrors = limit
104 | }
105 | 
106 | // SetGlobal sets the value of a global expression macro.
107 | // Note the name must start with an uppercase letter to make it global.
108 | func (c *Compiler) SetGlobal(name string, v *big.Int) {
109 | 	if !ast.IsGlobal(name) {
110 | 		panic(fmt.Sprintf("override name %q is not global (uppercase)", name))
111 | 	}
112 | 	if v == nil {
113 | 		delete(c.macroOverrides, name)
114 | 	} else {
115 | 		c.macroOverrides[name] = lzint.FromInt(v)
116 | 	}
117 | }
118 | 
119 | // ClearGlobals removes all definitions created by SetGlobal.
120 | func (c *Compiler) ClearGlobals() {
121 | 	clear(c.macroOverrides)
122 | }
123 | 
124 | // CompileString compiles the given program text and returns the corresponding bytecode.
125 | // If compilation fails, the returned slice is nil. Use the Errors method to get
126 | // parsing/compilation errors.
127 | func (c *Compiler) CompileString(input string) []byte {
128 | 	defer c.errors.catchAbort()
129 | 
130 | 	return c.compileSource("", []byte(input))
131 | }
132 | 
133 | // CompileString compiles the given program text and returns the corresponding bytecode.
134 | // If compilation fails, the returned slice is nil. Use the Errors method to get
135 | // parsing/compilation errors.
136 | func (c *Compiler) CompileFile(filename string) []byte {
137 | 	defer c.errors.catchAbort()
138 | 
139 | 	content, err := fs.ReadFile(c.fsys, filename)
140 | 	if err != nil {
141 | 		c.errors.add(err)
142 | 		return nil
143 | 	}
144 | 	return c.compileSource(filename, content)
145 | }
146 | 
147 | // Errors returns errors that have accumulated during compilation.
148 | func (c *Compiler) Errors() []error {
149 | 	return c.errors.errors()
150 | }
151 | 
152 | // Warnings returns all warnings that have accumulated during compilation.
153 | func (c *Compiler) Warnings() []error {
154 | 	return c.errors.warnings()
155 | }
156 | 
157 | // Failed reports whether compilation has failed.
158 | func (c *Compiler) Failed() bool {
159 | 	return c.errors.numErrors > 0
160 | }
161 | 
162 | // ErrorsAndWarnings returns all errors and warnings which have accumulated during compilation.
163 | func (c *Compiler) ErrorsAndWarnings() []error {
164 | 	return c.errors.list
165 | }
166 | 
167 | // errorAt pushes an error to the compiler error list.
168 | func (c *Compiler) errorAt(inst ast.Statement, err error) {
169 | 	if err == nil {
170 | 		panic("BUG: errorAt(st, nil)")
171 | 	}
172 | 	c.errors.add(&statementError{inst: inst, err: err})
173 | }
174 | 
175 | // warnf pushes a warning to the error list.
176 | func (c *Compiler) warnf(inst ast.Statement, format string, args ...any) {
177 | 	c.errors.add(&simpleWarning{pos: inst.Position(), str: fmt.Sprintf(format, args...)})
178 | }
179 | 
180 | func (c *Compiler) compileSource(filename string, input []byte) []byte {
181 | 	c.reset()
182 | 	p := ast.NewParser(filename, input, c.lexDebug)
183 | 	doc, errs := p.Parse()
184 | 	if c.errors.addParseErrors(errs) {
185 | 		return nil // abort compilation due to failed parse
186 | 	}
187 | 	return c.compileDocument(doc)
188 | }
189 | 
190 | // compileDocument creates bytecode from the AST.
191 | func (c *Compiler) compileDocument(doc *ast.Document) (output []byte) {
192 | 	prog := newCompilerProg(doc)
193 | 
194 | 	// First, load all #include files and register their definitions.
195 | 	// This also configures the instruction set if specified by a #pragma.
196 | 	c.processIncludes(doc, prog, nil)
197 | 
198 | 	// Apply macro overrides. This happens after include processing because macros
199 | 	// get their definitions assigned then.
200 | 	for name, val := range c.macroOverrides {
201 | 		if def, _ := c.globals.lookupExprMacro(name); def != nil && len(def.Params) > 0 {
202 | 			c.warnf(def, "overridden global macro %s has parameters", name)
203 | 		}
204 | 		c.globals.overrideExprMacroValue(name, val)
205 | 	}
206 | 
207 | 	// Choose configured instruction set, but only if not configured by a pragma.
208 | 	if prog.evm == nil {
209 | 		prog.evm = evm.FindInstructionSet(c.defaultFork)
210 | 	}
211 | 
212 | 	// Next, the AST document tree is expanded into a flat list of instructions.
213 | 	c.expand(doc, prog)
214 | 	if prog.cur != prog.toplevel {
215 | 		panic("section stack was not unwound by expansion")
216 | 	}
217 | 
218 | 	// Expansion of is now done, and all further steps work on prog.
219 | 	e := newEvaluator(c.globals)
220 | 	c.preEvaluateArgs(e, prog)
221 | 
222 | 	for {
223 | 		c.computePC(e, prog)
224 | 
225 | 		// Assign immediate argument values. Here we use a trick to assign sizes for
226 | 		// "PUSH" instructions: their pushSizes are initially set to one. If we get an
227 | 		// overflow condition, the size of that PUSH increases by one and then we
228 | 		// recalculate everything.
229 | 		failedInst, err := c.evaluateArgs(e, prog)
230 | 		if err != nil {
231 | 			if errors.Is(err, ecVariablePushOverflow) {
232 | 				failedInst.pushSize += 1
233 | 				continue // try again
234 | 			}
235 | 			c.errorAt(failedInst.ast, err)
236 | 			break // there was some other error
237 | 		}
238 | 		break
239 | 	}
240 | 
241 | 	if c.errors.hasError() {
242 | 		return nil // no output if source has errors
243 | 	}
244 | 
245 | 	// Run analysis. Note this is also disabled if there are errors because there could
246 | 	// be lots of useless warnings otherwise.
247 | 	c.checkLabelsUsed(prog, e)
248 | 
249 | 	// Create the bytecode.
250 | 	return c.generateOutput(prog)
251 | }
252 | 
253 | // processIncludes reads all #included documents.
254 | func (c *Compiler) processIncludes(doc *ast.Document, prog *compilerProg, stack []ast.Statement) {
255 | 	errs := c.globals.registerDefinitions(doc)
256 | 	c.errors.add(errs...)
257 | 
258 | 	var list []*ast.IncludeSt
259 | 	for _, st := range doc.Statements {
260 | 		switch st := st.(type) {
261 | 		case *ast.IncludeSt:
262 | 			file, err := resolveRelative(doc.File, st.Filename)
263 | 			if err != nil {
264 | 				c.errorAt(st, err)
265 | 				continue
266 | 			}
267 | 			incdoc := c.parseIncludeFile(file, st, len(stack)+1)
268 | 			if incdoc != nil {
269 | 				c.includes[st] = incdoc
270 | 				list = append(list, st)
271 | 			}
272 | 
273 | 		case *ast.PragmaSt:
274 | 			switch st.Option {
275 | 			case "target":
276 | 				if len(stack) != 0 {
277 | 					c.errorAt(st, ecPragmaTargetInIncludeFile)
278 | 				}
279 | 				if prog.evm != nil {
280 | 					c.errorAt(st, ecPragmaTargetConflict)
281 | 				}
282 | 				prog.evm = evm.FindInstructionSet(st.Value)
283 | 				if prog.evm == nil {
284 | 					c.errorAt(st, fmt.Errorf("%w %q", ecPragmaTargetUnknown, st.Value))
285 | 				}
286 | 			default:
287 | 				c.errorAt(st, fmt.Errorf("%w %s", ecUnknownPragma, st.Option))
288 | 			}
289 | 		}
290 | 	}
291 | 
292 | 	// Process includes in macros.
293 | 	for _, m := range doc.InstrMacros() {
294 | 		c.processIncludes(m.Body, prog, append(stack, m))
295 | 	}
296 | 
297 | 	// Recurse.
298 | 	for _, inst := range list {
299 | 		incdoc := c.includes[inst]
300 | 		c.processIncludes(incdoc, prog, append(stack, inst))
301 | 	}
302 | }
303 | 
304 | func resolveRelative(basepath string, filename string) (string, error) {
305 | 	res := path.Clean(path.Join(path.Dir(basepath), filename))
306 | 	if strings.Contains(res, "..") {
307 | 		return "", fmt.Errorf("path %q escapes project root", filename)
308 | 	}
309 | 	return res, nil
310 | }
311 | 
312 | func (c *Compiler) parseIncludeFile(file string, inst *ast.IncludeSt, depth int) *ast.Document {
313 | 	if c.fsys == nil {
314 | 		c.errorAt(inst, ecIncludeNoFS)
315 | 		return nil
316 | 	}
317 | 	if depth > c.maxIncDepth {
318 | 		c.errorAt(inst, ecIncludeDepthLimit)
319 | 		return nil
320 | 	}
321 | 
322 | 	content, err := fs.ReadFile(c.fsys, file)
323 | 	if err != nil {
324 | 		c.errorAt(inst, err)
325 | 		return nil
326 | 	}
327 | 	p := ast.NewParser(file, content, c.lexDebug)
328 | 	doc, errors := p.Parse()
329 | 	if c.errors.addParseErrors(errors) {
330 | 		return nil
331 | 	}
332 | 	// Note that included documents do NOT have the including document set as Parent.
333 | 	// The parent relationship is used during lookup of labels, macros, etc. and
334 | 	// such definitions should not be shared between include files.
335 | 	//
336 | 	// Included documents do have a Creation though.
337 | 	doc.Creation = inst
338 | 	return doc
339 | }
340 | 
341 | // generateOutput creates the bytecode. This is also where instruction names get resolved.
342 | func (c *Compiler) generateOutput(prog *compilerProg) []byte {
343 | 	var unreachable unreachableCodeCheck
344 | 	var output []byte
345 | 	for _, inst := range prog.iterInstructions() {
346 | 		if len(output) != inst.pc {
347 | 			panic(fmt.Sprintf("BUG: instruction pc=%d, but output has size %d", inst.pc, len(output)))
348 | 		}
349 | 
350 | 		switch {
351 | 		case isPush(inst.op):
352 | 			if inst.pushSize > 32 {
353 | 				panic("BUG: pushSize > 32")
354 | 			}
355 | 			if len(inst.data) > inst.pushSize {
356 | 				panic(fmt.Sprintf("BUG: push inst.data %d > inst.pushSize %d", len(inst.data), inst.pushSize))
357 | 			}
358 | 
359 | 			// resolve the op
360 | 			var op *evm.Op
361 | 			if inst.op == "PUSH" {
362 | 				op = prog.evm.PushBySize(inst.pushSize)
363 | 			} else {
364 | 				op = prog.evm.OpByName(inst.op)
365 | 			}
366 | 			if op == nil {
367 | 				panic(fmt.Sprintf("BUG: opcode for %q (size %d) not found", inst.op, inst.pushSize))
368 | 			}
369 | 
370 | 			// Unreachable code check.
371 | 			if !c.errors.hasError() {
372 | 				unreachable.check(c, inst.ast, op)
373 | 			}
374 | 
375 | 			// Add opcode and data padding to output.
376 | 			output = append(output, op.Code)
377 | 			if len(inst.data) < inst.pushSize {
378 | 				output = append(output, make([]byte, inst.pushSize-len(inst.data))...)
379 | 			}
380 | 
381 | 		case inst.op != "":
382 | 			op := prog.evm.OpByName(inst.op)
383 | 			if op == nil {
384 | 				c.errorAt(inst.ast, fmt.Errorf("%w %s", ecUnknownOpcode, inst.op))
385 | 			}
386 | 			// Unreachable code check.
387 | 			if !c.errors.hasError() {
388 | 				unreachable.check(c, inst.ast, op)
389 | 			}
390 | 			output = append(output, op.Code)
391 | 		}
392 | 
393 | 		// Instruction data is always added to output.
394 | 		output = append(output, inst.data...)
395 | 	}
396 | 	return output
397 | }
398 | 


--------------------------------------------------------------------------------
/asm/compiler_analysis.go:
--------------------------------------------------------------------------------
 1 | // Copyright 2024 The go-ethereum Authors
 2 | // This file is part of the go-ethereum library.
 3 | //
 4 | // The go-ethereum library is free software: you can redistribute it and/or modify
 5 | // it under the terms of the GNU Lesser General Public License as published by
 6 | // the Free Software Foundation, either version 3 of the License, or
 7 | // (at your option) any later version.
 8 | //
 9 | // The go-ethereum library is distributed in the hope that it will be useful,
10 | // but WITHOUT ANY WARRANTY; without even the implied warranty of
11 | // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 | // GNU Lesser General Public License for more details.
13 | //
14 | // You should have received a copy of the GNU Lesser General Public License
15 | // along with the go-ethereum library. If not, see <http://www.gnu.org/licenses/>.
16 | 
17 | package asm
18 | 
19 | import (
20 | 	"github.com/fjl/geas/internal/ast"
21 | 	"github.com/fjl/geas/internal/evm"
22 | 	"github.com/fjl/geas/internal/set"
23 | )
24 | 
25 | // checkLabelsUsed warns about label definitions that were not hit by the evaluator.
26 | func (c *Compiler) checkLabelsUsed(prog *compilerProg, e *evaluator) {
27 | 	// Gather documents referenced by program.
28 | 	var docs []*ast.Document
29 | 	docset := make(set.Set[*ast.Document])
30 | 	macroset := make(set.Set[*ast.InstructionMacroDef])
31 | 	for section := range prog.iterSections() {
32 | 		// Ensure to walk macroexpansions only once.
33 | 		if section.macroArgs != nil {
34 | 			if macroset.Includes(section.macroArgs.def) {
35 | 				continue
36 | 			}
37 | 			macroset.Add(section.macroArgs.def)
38 | 		}
39 | 		if !docset.Includes(section.doc) {
40 | 			docset.Add(section.doc)
41 | 			docs = append(docs, section.doc)
42 | 		}
43 | 	}
44 | 
45 | 	// Check against evaluator.
46 | 	for _, doc := range docs {
47 | 		for _, st := range doc.Statements {
48 | 			switch st := st.(type) {
49 | 			case *ast.LabelDefSt:
50 | 				if !e.isLabelUsed(st) {
51 | 					c.warnf(st, "label %s unused in program", st)
52 | 				}
53 | 			}
54 | 		}
55 | 	}
56 | }
57 | 
58 | // unreachableCodeCheck finds instructions that cannot be reached by execution.
59 | // In the EVM, all jump targets must be marked by JUMPDEST. For terminal instructions
60 | // such as STOP, if the next instruction isn't JUMPDEST, it can never be reached.
61 | type unreachableCodeCheck struct {
62 | 	prevSt        ast.Statement
63 | 	prevOp        *evm.Op
64 | 	inUnreachable bool
65 | }
66 | 
67 | func (chk *unreachableCodeCheck) check(c *Compiler, st ast.Statement, op *evm.Op) {
68 | 	if chk.inUnreachable && op.Name == "JUMPDEST" {
69 | 		chk.inUnreachable = false
70 | 	}
71 | 	if chk.prevOp != nil && (chk.prevOp.Term || chk.prevOp.Unconditional) && !op.JumpDest {
72 | 		c.warnf(st, "unreachable code (previous instruction is %s at %v)", chk.prevOp.Name, chk.prevSt.Position())
73 | 		chk.inUnreachable = true
74 | 	}
75 | 	chk.prevSt, chk.prevOp = st, op
76 | }
77 | 


--------------------------------------------------------------------------------
/asm/compiler_eval.go:
--------------------------------------------------------------------------------
  1 | // Copyright 2023 The go-ethereum Authors
  2 | // This file is part of the go-ethereum library.
  3 | //
  4 | // The go-ethereum library is free software: you can redistribute it and/or modify
  5 | // it under the terms of the GNU Lesser General Public License as published by
  6 | // the Free Software Foundation, either version 3 of the License, or
  7 | // (at your option) any later version.
  8 | //
  9 | // The go-ethereum library is distributed in the hope that it will be useful,
 10 | // but WITHOUT ANY WARRANTY; without even the implied warranty of
 11 | // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
 12 | // GNU Lesser General Public License for more details.
 13 | //
 14 | // You should have received a copy of the GNU Lesser General Public License
 15 | // along with the go-ethereum library. If not, see <http://www.gnu.org/licenses/>.
 16 | 
 17 | package asm
 18 | 
 19 | import (
 20 | 	"errors"
 21 | 	"math/big"
 22 | )
 23 | 
 24 | // preEvaluateArgs computes the initial argument values of instructions.
 25 | //
 26 | // Here we assign the inst.pushSize of all PUSH and PUSH<n> instructions.
 27 | // The argument value, inst.data, is assigned this compilation step if the arg expression
 28 | // contains no label references.
 29 | func (c *Compiler) preEvaluateArgs(e *evaluator, prog *compilerProg) {
 30 | 	for section, inst := range prog.iterInstructions() {
 31 | 		if inst.isBytes() {
 32 | 			// Handle #bytes.
 33 | 			v, err := e.evalAsBytes(inst.expr(), section.env)
 34 | 			if err == nil {
 35 | 				inst.argNoLabels = true
 36 | 				inst.data = v
 37 | 			}
 38 | 			continue
 39 | 		}
 40 | 
 41 | 		// Handle PUSH.
 42 | 		argument := inst.expr()
 43 | 		if argument == nil {
 44 | 			continue
 45 | 		}
 46 | 		inst.pushSize = 1
 47 | 		if s, ok := inst.explicitPushSize(); ok {
 48 | 			inst.pushSize = s
 49 | 		}
 50 | 
 51 | 		// Pre-evaluate argument.
 52 | 		v, err := e.eval(argument, section.env)
 53 | 		var labelErr unassignedLabelError
 54 | 		if errors.As(err, &labelErr) {
 55 | 			// Expression depends on label position calculation, leave it for later.
 56 | 			continue
 57 | 		}
 58 | 		inst.argNoLabels = true
 59 | 		if err != nil {
 60 | 			c.errorAt(inst.ast, err)
 61 | 			continue
 62 | 		}
 63 | 		if err := prog.assignPushArg(inst, v.Int(), true); err != nil {
 64 | 			c.errorAt(inst.ast, err)
 65 | 			continue
 66 | 		}
 67 | 	}
 68 | }
 69 | 
 70 | // computePC assigns the PC values of all instructions and labels.
 71 | func (c *Compiler) computePC(e *evaluator, prog *compilerProg) {
 72 | 	var pc int
 73 | 	for section, inst := range prog.iterInstructions() {
 74 | 		if li, ok := inst.ast.(labelDefStatement); ok {
 75 | 			e.setLabelPC(section.doc, li.LabelDefSt, pc)
 76 | 		}
 77 | 
 78 | 		inst.pc = pc
 79 | 		size := 0
 80 | 		if inst.op != "" {
 81 | 			size = 1
 82 | 		}
 83 | 		if isPush(inst.op) {
 84 | 			size += inst.pushSize
 85 | 		} else {
 86 | 			size += len(inst.data)
 87 | 		}
 88 | 		pc += size
 89 | 	}
 90 | }
 91 | 
 92 | // evaluateArgs computes the argument values of instructions.
 93 | func (c *Compiler) evaluateArgs(e *evaluator, prog *compilerProg) (inst *instruction, err error) {
 94 | 	for section, inst := range prog.iterInstructions() {
 95 | 		if inst.argNoLabels {
 96 | 			continue // pre-calculated
 97 | 		}
 98 | 
 99 | 		if inst.isBytes() {
100 | 			// handle #bytes
101 | 			v, err := e.evalAsBytes(inst.expr(), section.env)
102 | 			if err != nil {
103 | 				return inst, err
104 | 			}
105 | 			inst.data = v
106 | 		} else {
107 | 			// handle PUSH
108 | 			argument := inst.expr()
109 | 			if argument == nil {
110 | 				continue // no arg
111 | 			}
112 | 			v, err := e.eval(argument, section.env)
113 | 			if err != nil {
114 | 				return inst, err
115 | 			}
116 | 			if err := prog.assignPushArg(inst, v.Int(), false); err != nil {
117 | 				return inst, err
118 | 			}
119 | 		}
120 | 	}
121 | 	return nil, nil
122 | }
123 | 
124 | // assignPushArg sets the argument value of an instruction to v. The byte size of the
125 | // value is checked against the declared "PUSH<n>" data size.
126 | //
127 | // If setSize is true, the pushSize of variable-size "PUSH" instructions will be assigned
128 | // based on the value.
129 | func (prog *compilerProg) assignPushArg(inst *instruction, v *big.Int, setSize bool) error {
130 | 	if v.Sign() < 0 {
131 | 		return ecNegativeResult
132 | 	}
133 | 	b := v.Bytes()
134 | 	if len(b) > 32 {
135 | 		return ecPushOverflow256
136 | 	}
137 | 
138 | 	_, hasExplicitSize := inst.explicitPushSize()
139 | 	if setSize && !hasExplicitSize {
140 | 		inst.pushSize = prog.autoPushSize(b)
141 | 	}
142 | 	if len(b) > inst.pushSize {
143 | 		if !hasExplicitSize {
144 | 			return ecVariablePushOverflow
145 | 		}
146 | 		return ecFixedSizePushOverflow
147 | 	}
148 | 
149 | 	// Store data. Note there is no padding applied here.
150 | 	// Padding will be added at the bytecode output stage.
151 | 	inst.data = b
152 | 	return nil
153 | }
154 | 
155 | func (prog *compilerProg) autoPushSize(value []byte) int {
156 | 	if len(value) > 32 {
157 | 		panic("value too big")
158 | 	}
159 | 	if len(value) == 0 && !prog.evm.SupportsPush0() {
160 | 		return 1
161 | 	}
162 | 	return len(value)
163 | }
164 | 


--------------------------------------------------------------------------------
/asm/compiler_expand.go:
--------------------------------------------------------------------------------
  1 | // Copyright 2023 The go-ethereum Authors
  2 | // This file is part of the go-ethereum library.
  3 | //
  4 | // The go-ethereum library is free software: you can redistribute it and/or modify
  5 | // it under the terms of the GNU Lesser General Public License as published by
  6 | // the Free Software Foundation, either version 3 of the License, or
  7 | // (at your option) any later version.
  8 | //
  9 | // The go-ethereum library is distributed in the hope that it will be useful,
 10 | // but WITHOUT ANY WARRANTY; without even the implied warranty of
 11 | // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
 12 | // GNU Lesser General Public License for more details.
 13 | //
 14 | // You should have received a copy of the GNU Lesser General Public License
 15 | // along with the go-ethereum library. If not, see <http://www.gnu.org/licenses/>.
 16 | 
 17 | package asm
 18 | 
 19 | import (
 20 | 	"fmt"
 21 | 	"math"
 22 | 	"strings"
 23 | 
 24 | 	"github.com/fjl/geas/internal/ast"
 25 | 	"github.com/fjl/geas/internal/evm"
 26 | )
 27 | 
 28 | // expand appends a list of AST instructions to the program.
 29 | func (c *Compiler) expand(doc *ast.Document, prog *compilerProg) {
 30 | 	for _, astSt := range doc.Statements {
 31 | 		st := statementFromAST(astSt)
 32 | 		if st == nil {
 33 | 			continue
 34 | 		}
 35 | 		err := st.expand(c, doc, prog)
 36 | 		if err != nil {
 37 | 			c.errorAt(astSt, err)
 38 | 			continue
 39 | 		}
 40 | 	}
 41 | }
 42 | 
 43 | // expand creates an instruction for the label. For dotted labels, the instruction is
 44 | // empty (i.e. has size zero). For regular labels, a JUMPDEST is created.
 45 | func (li labelDefStatement) expand(c *Compiler, doc *ast.Document, prog *compilerProg) error {
 46 | 	if li.Global {
 47 | 		ast := li.LabelDefSt
 48 | 		if err := c.globals.setLabelDocument(ast, doc); err != nil {
 49 | 			return err
 50 | 		}
 51 | 	}
 52 | 
 53 | 	inst := newInstruction(li, "")
 54 | 	if !li.Dotted {
 55 | 		inst.op = "JUMPDEST"
 56 | 	}
 57 | 	prog.addInstruction(inst)
 58 | 	return nil
 59 | }
 60 | 
 61 | // expand appends the instruction to a program. This is also where basic validation is done.
 62 | func (op opcodeStatement) expand(c *Compiler, doc *ast.Document, prog *compilerProg) error {
 63 | 	opcode := strings.ToUpper(op.Op)
 64 | 	inst := newInstruction(op, opcode)
 65 | 
 66 | 	switch {
 67 | 	case isPush(opcode) && opcode != "PUSH0":
 68 | 		if op.Arg == nil {
 69 | 			return ecPushWithoutArgument
 70 | 		}
 71 | 
 72 | 	case isJump(opcode):
 73 | 		if err := c.validateJumpArg(doc, op.Arg); err != nil {
 74 | 			return err
 75 | 		}
 76 | 		if _, err := prog.resolveOp(opcode); err != nil {
 77 | 			return err
 78 | 		}
 79 | 		// 'JUMP @label' instructions turn into 'PUSH @label' + 'JUMP'.
 80 | 		if op.Arg != nil {
 81 | 			push := newInstruction(op, "PUSH")
 82 | 			prog.addInstruction(push)
 83 | 		}
 84 | 
 85 | 	default:
 86 | 		if _, err := prog.resolveOp(opcode); err != nil {
 87 | 			return err
 88 | 		}
 89 | 		if op.Arg != nil {
 90 | 			if opcode == "PUSH0" {
 91 | 				return ecPushzeroWithArgument
 92 | 			}
 93 | 			return ecUnexpectedArgument
 94 | 		}
 95 | 	}
 96 | 
 97 | 	prog.addInstruction(inst)
 98 | 	return nil
 99 | }
100 | 
101 | // resolveOp resolves an opcode name.
102 | func (prog *compilerProg) resolveOp(op string) (*evm.Op, error) {
103 | 	if op := prog.evm.OpByName(op); op != nil {
104 | 		return op, nil
105 | 	}
106 | 	remFork := prog.evm.ForkWhereOpRemoved(op)
107 | 	if remFork != "" {
108 | 		return nil, fmt.Errorf("%w %s (target = %q; removed in fork %q)", ecUnknownOpcode, op, prog.evm.Name(), remFork)
109 | 	}
110 | 	addedForks := evm.ForksWhereOpAdded(op)
111 | 	if len(addedForks) > 0 {
112 | 		list := strings.Join(addedForks, ", ")
113 | 		fork := "fork"
114 | 		if len(addedForks) > 1 {
115 | 			fork += "s"
116 | 		}
117 | 		return nil, fmt.Errorf("%w %s (target = %q; added in %s %q)", ecUnknownOpcode, op, prog.evm.Name(), fork, list)
118 | 	}
119 | 	return nil, fmt.Errorf("%w %s", ecUnknownOpcode, op)
120 | }
121 | 
122 | // validateJumpArg checks that argument to JUMP is a defined label.
123 | func (c *Compiler) validateJumpArg(doc *ast.Document, arg ast.Expr) error {
124 | 	if arg == nil {
125 | 		return nil // no argument is fine.
126 | 	}
127 | 	lref, ok := arg.(*ast.LabelRefExpr)
128 | 	if !ok {
129 | 		return ecJumpNeedsLiteralLabel
130 | 	}
131 | 	if lref.Dotted {
132 | 		return fmt.Errorf("%w %v", ecJumpToDottedLabel, lref)
133 | 	}
134 | 
135 | 	var li *ast.LabelDefSt
136 | 	if lref.Global {
137 | 		li = c.globals.label[lref.Ident]
138 | 	} else {
139 | 		li, _ = doc.LookupLabel(lref)
140 | 	}
141 | 	if li == nil {
142 | 		return fmt.Errorf("%w %v", ecJumpToUndefinedLabel, lref)
143 | 	}
144 | 	if li.Dotted {
145 | 		return fmt.Errorf("%w %v", ecJumpToDottedLabel, lref)
146 | 	}
147 | 	return nil
148 | }
149 | 
150 | // expand appends the output of an instruction macro call to the program.
151 | func (inst macroCallStatement) expand(c *Compiler, doc *ast.Document, prog *compilerProg) error {
152 | 	var (
153 | 		name   = inst.Ident
154 | 		def    *ast.InstructionMacroDef
155 | 		defdoc *ast.Document
156 | 	)
157 | 	if ast.IsGlobal(name) {
158 | 		def, defdoc = c.globals.lookupInstrMacro(name)
159 | 	} else {
160 | 		def, defdoc = doc.LookupInstrMacro(name)
161 | 	}
162 | 	if def == nil {
163 | 		return fmt.Errorf("%w %%%s", ecUndefinedInstrMacro, name)
164 | 	}
165 | 
166 | 	// Prevent recursion and check args match.
167 | 	if !c.enterMacro(def) {
168 | 		return fmt.Errorf("%w %%%s", ecRecursiveCall, name)
169 | 	}
170 | 	defer c.exitMacro(def)
171 | 	if len(inst.Args) != len(def.Params) {
172 | 		return fmt.Errorf("%w, macro %%%s needs %d", ecInvalidArgumentCount, name, len(def.Params))
173 | 	}
174 | 
175 | 	// Clone the macro's body document. This is a shallow clone for setting
176 | 	// Parent/Creation, which is done to for error location reporting reasons. Cloning the
177 | 	// document also means by-document caching does not treat all expansions of a macro as
178 | 	// the same code.
179 | 	macroDoc := *def.Body
180 | 	macroDoc.Parent = defdoc
181 | 	macroDoc.Creation = inst
182 | 
183 | 	// Arguments of instruction macros cannot be evaluated during expansion. They are
184 | 	// evaluated in a later pass where all intermediate arguments are processed. In order
185 | 	// to compute the value then, we need to keep track of macro argument expressions and
186 | 	// their origin document chain. An example:
187 | 	//
188 | 	// #define %MacroA(a) {
189 | 	//      %MacroB($a)
190 | 	// }
191 | 	// #define %MacroB(b) {
192 | 	//      push $b
193 | 	// }
194 | 	//
195 | 	// When the evaluator processes 'push $b' generated by MacroB, it first finds
196 | 	// that $b = $a. However, the expression $a must not be evaluated in the context of
197 | 	// MacroB, but in the context of MacroA, because that's where $a is defined.
198 | 	//
199 | 	// To keep track of this, we store the callsite of the macro along with the arguments
200 | 	// into the output section. The evaluator uses this callsite as the evaluation context
201 | 	// for variables.
202 | 	callsite := prog.currentSection()
203 | 	args := &instrMacroArgs{callsite: callsite, def: def, args: inst.Args}
204 | 	prog.pushSection(&macroDoc, args)
205 | 	defer prog.popSection()
206 | 
207 | 	// Expand body.
208 | 	c.expand(&macroDoc, prog)
209 | 	return nil
210 | }
211 | 
212 | func (c *Compiler) enterMacro(m *ast.InstructionMacroDef) bool {
213 | 	if _, onStack := c.macroStack[m]; onStack {
214 | 		return false
215 | 	}
216 | 	c.macroStack[m] = struct{}{}
217 | 	return true
218 | }
219 | 
220 | func (c *Compiler) exitMacro(m *ast.InstructionMacroDef) {
221 | 	delete(c.macroStack, m)
222 | }
223 | 
224 | // expand of #include appends the included file's instructions to the program.
225 | // Note this accesses the documents parsed by processIncludes.
226 | func (inst includeStatement) expand(c *Compiler, doc *ast.Document, prog *compilerProg) error {
227 | 	incdoc := c.includes[inst.IncludeSt]
228 | 	if incdoc == nil {
229 | 		// The document is not in doc.includes, so there must've been a parse error.
230 | 		// We can just ignore the statement here since the error was already reported.
231 | 		return nil
232 | 	}
233 | 	prog.pushSection(incdoc, nil)
234 | 	defer prog.popSection()
235 | 	c.expand(incdoc, prog)
236 | 	return nil
237 | }
238 | 
239 | // expand of #assemble performs compilation of the given assembly file.
240 | func (inst assembleStatement) expand(c *Compiler, doc *ast.Document, prog *compilerProg) error {
241 | 	subc := New(c.fsys)
242 | 	subc.SetIncludeDepthLimit(c.maxIncDepth)
243 | 	subc.SetMaxErrors(math.MaxInt)
244 | 	subc.SetDefaultFork(prog.evm.Name())
245 | 	subc.macroOverrides = c.macroOverrides
246 | 
247 | 	file, err := resolveRelative(doc.File, inst.Filename)
248 | 	if err != nil {
249 | 		return err
250 | 	}
251 | 	bytecode := subc.CompileFile(file)
252 | 	c.errors.add(subc.ErrorsAndWarnings()...)
253 | 	if len(bytecode) > 0 {
254 | 		datainst := &instruction{data: bytecode}
255 | 		prog.addInstruction(datainst)
256 | 	}
257 | 	return nil
258 | }
259 | 
260 | func (inst bytesStatement) expand(c *Compiler, doc *ast.Document, prog *compilerProg) error {
261 | 	prog.addInstruction(&instruction{ast: inst})
262 | 	return nil
263 | }
264 | 


--------------------------------------------------------------------------------
/asm/compiler_prog.go:
--------------------------------------------------------------------------------
  1 | // Copyright 2023 The go-ethereum Authors
  2 | // This file is part of the go-ethereum library.
  3 | //
  4 | // The go-ethereum library is free software: you can redistribute it and/or modify
  5 | // it under the terms of the GNU Lesser General Public License as published by
  6 | // the Free Software Foundation, either version 3 of the License, or
  7 | // (at your option) any later version.
  8 | //
  9 | // The go-ethereum library is distributed in the hope that it will be useful,
 10 | // but WITHOUT ANY WARRANTY; without even the implied warranty of
 11 | // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
 12 | // GNU Lesser General Public License for more details.
 13 | //
 14 | // You should have received a copy of the GNU Lesser General Public License
 15 | // along with the go-ethereum library. If not, see <http://www.gnu.org/licenses/>.
 16 | 
 17 | package asm
 18 | 
 19 | import (
 20 | 	"iter"
 21 | 	"slices"
 22 | 	"strings"
 23 | 
 24 | 	"github.com/fjl/geas/internal/ast"
 25 | 	"github.com/fjl/geas/internal/evm"
 26 | )
 27 | 
 28 | // compilerProg is the output program of the compiler.
 29 | // It contains sections of instructions.
 30 | type compilerProg struct {
 31 | 	toplevel *compilerSection
 32 | 	cur      *compilerSection
 33 | 	evm      *evm.InstructionSet
 34 | }
 35 | 
 36 | // compilerSection is a section of the output program.
 37 | type compilerSection struct {
 38 | 	doc *ast.Document
 39 | 	env *evalEnvironment
 40 | 
 41 | 	// This tracks the arguments of instruction macro calls. When the compiler expands a
 42 | 	// macro, it creates a unique section for each call site. The arguments of the call
 43 | 	// are stored for use by the expression evaluator.
 44 | 	macroArgs *instrMacroArgs
 45 | 
 46 | 	parent   *compilerSection
 47 | 	children []any
 48 | }
 49 | 
 50 | type instrMacroArgs struct {
 51 | 	callsite *compilerSection
 52 | 	def      *ast.InstructionMacroDef
 53 | 	args     []ast.Expr
 54 | }
 55 | 
 56 | func newCompilerProg(topdoc *ast.Document) *compilerProg {
 57 | 	p := new(compilerProg)
 58 | 	p.toplevel = p.pushSection(topdoc, nil)
 59 | 	return p
 60 | }
 61 | 
 62 | // pushSection creates a new section as a child of the current one.
 63 | func (p *compilerProg) pushSection(doc *ast.Document, macroArgs *instrMacroArgs) *compilerSection {
 64 | 	s := &compilerSection{doc: doc, macroArgs: macroArgs}
 65 | 	s.env = newEvalEnvironment(s)
 66 | 	if p.cur != nil {
 67 | 		s.parent = p.cur
 68 | 		p.cur.children = append(p.cur.children, s)
 69 | 	}
 70 | 	p.cur = s
 71 | 	return s
 72 | }
 73 | 
 74 | // popSection returns to the parent section.
 75 | func (p *compilerProg) popSection() {
 76 | 	if p.cur.parent == nil {
 77 | 		panic("too much pop")
 78 | 	}
 79 | 	p.cur = p.cur.parent
 80 | }
 81 | 
 82 | // currentSection returns the current (most recently added) section.
 83 | func (p *compilerProg) currentSection() *compilerSection {
 84 | 	return p.cur
 85 | }
 86 | 
 87 | // addInstruction appends an instruction to the current section.
 88 | func (p *compilerProg) addInstruction(inst *instruction) {
 89 | 	p.cur.children = append(p.cur.children, inst)
 90 | }
 91 | 
 92 | // iterInstructions returns an iterator over all instructions in the program.
 93 | func (p *compilerProg) iterInstructions() iter.Seq2[*compilerSection, *instruction] {
 94 | 	type stackElem struct {
 95 | 		s *compilerSection
 96 | 		i int
 97 | 	}
 98 | 	stack := []stackElem{{p.toplevel, 0}}
 99 | 	return func(yield func(*compilerSection, *instruction) bool) {
100 | 	outer:
101 | 		for len(stack) > 0 {
102 | 			e := &stack[len(stack)-1]
103 | 			for e.i < len(e.s.children) {
104 | 				cld := e.s.children[e.i]
105 | 				e.i++
106 | 				switch cld := cld.(type) {
107 | 				case *instruction:
108 | 					if !yield(e.s, cld) {
109 | 						return
110 | 					}
111 | 				case *compilerSection:
112 | 					stack = append(stack, stackElem{cld, 0})
113 | 					continue outer
114 | 				}
115 | 			}
116 | 			stack = stack[:len(stack)-1]
117 | 		}
118 | 	}
119 | }
120 | 
121 | // iterSections returns an iterator over all sections in the program.
122 | func (p *compilerProg) iterSections() iter.Seq[*compilerSection] {
123 | 	stack := []*compilerSection{p.toplevel}
124 | 	return func(yield func(*compilerSection) bool) {
125 | 		for len(stack) > 0 {
126 | 			section := stack[len(stack)-1]
127 | 			stack = stack[:len(stack)-1]
128 | 			if !yield(section) {
129 | 				return
130 | 			}
131 | 			for _, cld := range slices.Backward(section.children) {
132 | 				if clds, ok := cld.(*compilerSection); ok {
133 | 					stack = append(stack, clds)
134 | 				}
135 | 			}
136 | 		}
137 | 	}
138 | }
139 | 
140 | // instruction is a step of the compiler output program.
141 | type instruction struct {
142 | 	// fields assigned during expansion:
143 | 	ast statement
144 | 	op  string
145 | 
146 | 	// fields assigned during compilation:
147 | 	pc          int    // pc at this instruction
148 | 	pushSize    int    // computed size of push instruction
149 | 	data        []byte // computed argument value
150 | 	argNoLabels bool   // true if arg expression does not contain @label
151 | }
152 | 
153 | func newInstruction(ast statement, op string) *instruction {
154 | 	return &instruction{ast: ast, op: op}
155 | }
156 | 
157 | func isPush(op string) bool {
158 | 	return strings.HasPrefix(op, "PUSH")
159 | }
160 | 
161 | func isJump(op string) bool {
162 | 	return strings.HasPrefix(op, "JUMP")
163 | }
164 | 
165 | // explicitPushSize returns the declared PUSH size.
166 | func (inst *instruction) explicitPushSize() (int, bool) {
167 | 	op, ok := inst.ast.(opcodeStatement)
168 | 	if ok {
169 | 		return int(op.PushSize) - 1, op.PushSize > 0
170 | 	}
171 | 	return 0, false
172 | }
173 | 
174 | // expr returns the instruction argument.
175 | func (inst *instruction) expr() ast.Expr {
176 | 	if inst.op != "" && !isPush(inst.op) {
177 | 		return nil
178 | 	}
179 | 	switch st := inst.ast.(type) {
180 | 	case opcodeStatement:
181 | 		return st.Arg
182 | 	case bytesStatement:
183 | 		return st.Value
184 | 	default:
185 | 		return nil
186 | 	}
187 | }
188 | 
189 | func (inst *instruction) isBytes() bool {
190 | 	_, ok := inst.ast.(bytesStatement)
191 | 	return ok
192 | }
193 | 


--------------------------------------------------------------------------------
/asm/compiler_prog_test.go:
--------------------------------------------------------------------------------
 1 | // Copyright 2023 The go-ethereum Authors
 2 | // This file is part of the go-ethereum library.
 3 | //
 4 | // The go-ethereum library is free software: you can redistribute it and/or modify
 5 | // it under the terms of the GNU Lesser General Public License as published by
 6 | // the Free Software Foundation, either version 3 of the License, or
 7 | // (at your option) any later version.
 8 | //
 9 | // The go-ethereum library is distributed in the hope that it will be useful,
10 | // but WITHOUT ANY WARRANTY; without even the implied warranty of
11 | // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 | // GNU Lesser General Public License for more details.
13 | //
14 | // You should have received a copy of the GNU Lesser General Public License
15 | // along with the go-ethereum library. If not, see <http://www.gnu.org/licenses/>.
16 | 
17 | package asm
18 | 
19 | import (
20 | 	"reflect"
21 | 	"slices"
22 | 	"testing"
23 | 
24 | 	"github.com/fjl/geas/internal/ast"
25 | )
26 | 
27 | func TestIterInstructions(t *testing.T) {
28 | 	var (
29 | 		doc     = make([]ast.Document, 4)
30 | 		instr   = make([]*instruction, 8)
31 | 		prog    = newCompilerProg(&doc[0])
32 | 		section = make([]*compilerSection, 4)
33 | 	)
34 | 	for i := range instr {
35 | 		instr[i] = new(instruction)
36 | 	}
37 | 
38 | 	// create section structure
39 | 	{
40 | 		section[0] = prog.toplevel
41 | 		prog.addInstruction(instr[0])
42 | 		{
43 | 			section[1] = prog.pushSection(&doc[1], nil)
44 | 			prog.addInstruction(instr[1])
45 | 			prog.addInstruction(instr[2])
46 | 			prog.popSection()
47 | 		}
48 | 		prog.addInstruction(instr[3])
49 | 		{
50 | 			section[2] = prog.pushSection(&doc[2], nil)
51 | 			prog.addInstruction(instr[4])
52 | 			{
53 | 				section[3] = prog.pushSection(&doc[3], nil)
54 | 				prog.addInstruction(instr[5])
55 | 				prog.popSection()
56 | 			}
57 | 			prog.addInstruction(instr[6])
58 | 			prog.addInstruction(instr[7])
59 | 		}
60 | 		prog.popSection()
61 | 	}
62 | 
63 | 	// iterate and gather list
64 | 	type item struct {
65 | 		*compilerSection
66 | 		*instruction
67 | 	}
68 | 	var result []item
69 | 	for section, inst := range prog.iterInstructions() {
70 | 		result = append(result, item{section, inst})
71 | 	}
72 | 
73 | 	// compare
74 | 	expected := []item{
75 | 		{section[0], instr[0]},
76 | 		{section[1], instr[1]},
77 | 		{section[1], instr[2]},
78 | 		{section[0], instr[3]},
79 | 		{section[2], instr[4]},
80 | 		{section[3], instr[5]},
81 | 		{section[2], instr[6]},
82 | 		{section[2], instr[7]},
83 | 	}
84 | 	if !reflect.DeepEqual(result, expected) {
85 | 		t.Log("result:")
86 | 		for _, item := range result {
87 | 			t.Logf("  s%d (%p): instr%d (%p)", slices.Index(section, item.compilerSection), item.compilerSection, slices.Index(instr, item.instruction), item.instruction)
88 | 		}
89 | 		t.Error("result mismatch")
90 | 	}
91 | }
92 | 


--------------------------------------------------------------------------------
/asm/compiler_test.go:
--------------------------------------------------------------------------------
  1 | // Copyright 2023 The go-ethereum Authors
  2 | // This file is part of the go-ethereum library.
  3 | //
  4 | // The go-ethereum library is free software: you can redistribute it and/or modify
  5 | // it under the terms of the GNU Lesser General Public License as published by
  6 | // the Free Software Foundation, either version 3 of the License, or
  7 | // (at your option) any later version.
  8 | //
  9 | // The go-ethereum library is distributed in the hope that it will be useful,
 10 | // but WITHOUT ANY WARRANTY; without even the implied warranty of
 11 | // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
 12 | // GNU Lesser General Public License for more details.
 13 | //
 14 | // You should have received a copy of the GNU Lesser General Public License
 15 | // along with the go-ethereum library. If not, see <http://www.gnu.org/licenses/>.
 16 | 
 17 | package asm
 18 | 
 19 | import (
 20 | 	"bytes"
 21 | 	"encoding/hex"
 22 | 	"maps"
 23 | 	"math/big"
 24 | 	"os"
 25 | 	"path/filepath"
 26 | 	"slices"
 27 | 	"strings"
 28 | 	"testing"
 29 | 	"testing/fstest"
 30 | 
 31 | 	"gopkg.in/yaml.v3"
 32 | )
 33 | 
 34 | type compilerTestInput struct {
 35 | 	Code    string              `yaml:"code"`
 36 | 	Files   map[string]string   `yaml:"files,omitempty"`
 37 | 	Globals map[string]*big.Int `yaml:"globals,omitempty"`
 38 | }
 39 | 
 40 | type compilerTestOutput struct {
 41 | 	Bytecode string   `yaml:"bytecode"`
 42 | 	Errors   []string `yaml:"errors,omitempty"`
 43 | 	Warnings []string `yaml:"warnings,omitempty"`
 44 | }
 45 | 
 46 | type compilerTestYAML struct {
 47 | 	Input  compilerTestInput  `yaml:"input"`
 48 | 	Output compilerTestOutput `yaml:"output"`
 49 | }
 50 | 
 51 | func TestCompiler(t *testing.T) {
 52 | 	content, err := os.ReadFile(filepath.Join("testdata", "compiler-tests.yaml"))
 53 | 	if err != nil {
 54 | 		t.Fatal(err)
 55 | 	}
 56 | 	var tests = make(map[string]compilerTestYAML)
 57 | 	dec := yaml.NewDecoder(bytes.NewReader(content))
 58 | 	dec.KnownFields(true)
 59 | 	if err := dec.Decode(&tests); err != nil {
 60 | 		t.Fatal(err)
 61 | 	}
 62 | 
 63 | 	names := slices.Sorted(maps.Keys(tests))
 64 | 	for _, name := range names {
 65 | 		test := tests[name]
 66 | 		t.Run(name, func(t *testing.T) {
 67 | 			c := New(nil)
 68 | 			if len(test.Input.Files) > 0 {
 69 | 				fm := make(fstest.MapFS, len(test.Input.Files))
 70 | 				for name, content := range test.Input.Files {
 71 | 					fm[name] = &fstest.MapFile{Data: []byte(content)}
 72 | 				}
 73 | 				c.SetFilesystem(fm)
 74 | 			}
 75 | 			for name, val := range test.Input.Globals {
 76 | 				c.SetGlobal(name, val)
 77 | 			}
 78 | 			output := c.CompileString(test.Input.Code)
 79 | 
 80 | 			if len(test.Output.Errors) > 0 {
 81 | 				// expecting errors...
 82 | 				if output != nil {
 83 | 					t.Error("expected nil output")
 84 | 				}
 85 | 				checkErrors(t, "errors", c.Errors(), test.Output.Errors)
 86 | 				checkErrors(t, "warnings", c.Warnings(), test.Output.Warnings)
 87 | 				return
 88 | 			}
 89 | 
 90 | 			// Test expects no errors, compilation should succeed.
 91 | 			if c.Failed() {
 92 | 				for _, err := range c.Errors() {
 93 | 					t.Error(err)
 94 | 				}
 95 | 				t.Fatal("compilation failed")
 96 | 			}
 97 | 			checkErrors(t, "warnings", c.Warnings(), test.Output.Warnings)
 98 | 			expectedOutput, err := hex.DecodeString(strings.Replace(test.Output.Bytecode, " ", "", -1))
 99 | 			if err != nil {
100 | 				t.Fatalf("invalid hex: %v", err)
101 | 			}
102 | 			if !bytes.Equal(output, expectedOutput) {
103 | 				t.Errorf("incorrect output\ngot:  %x\nwant: %x\n", output, expectedOutput)
104 | 			}
105 | 		})
106 | 	}
107 | }
108 | 
109 | func checkErrors(t *testing.T, kind string, errlist []error, expected []string) {
110 | 	if len(errlist) != len(expected) {
111 | 		t.Errorf("got %d %s, expected %d", len(errlist), kind, len(expected))
112 | 		for i := range errlist {
113 | 			t.Errorf("  [%d] %v", i, errlist[i])
114 | 		}
115 | 		return
116 | 	}
117 | 	for i := range errlist {
118 | 		if errlist[i].Error() != expected[i] {
119 | 			t.Errorf("wrong error %d: %v\n    want: %s", i, errlist[i], expected[i])
120 | 		}
121 | 	}
122 | }
123 | 
124 | func TestExamplePrograms(t *testing.T) {
125 | 	exampleDir, err := filepath.Abs("../example")
126 | 	if err != nil {
127 | 		t.Fatal(err)
128 | 	}
129 | 
130 | 	bytecodes := make(map[string]string)
131 | 	t.Run("erc20", func(t *testing.T) {
132 | 		bytecodes["erc20"] = compileExample(t, exampleDir, "erc20/erc20.eas")
133 | 	})
134 | 	t.Run("erc20_ctor", func(t *testing.T) {
135 | 		bytecodes["erc20_ctor"] = compileExample(t, exampleDir, "erc20/erc20_ctor.eas")
136 | 	})
137 | 	t.Run("4788asm", func(t *testing.T) {
138 | 		bytecodes["4788asm"] = compileExample(t, exampleDir, "4788asm.eas")
139 | 	})
140 | 	t.Run("4788asm_ctor", func(t *testing.T) {
141 | 		bytecodes["4788asm_ctor"] = compileExample(t, exampleDir, "4788asm_ctor.eas")
142 | 	})
143 | 
144 | 	if os.Getenv("WRITE_TEST_FILES") == "1" {
145 | 		content, _ := yaml.Marshal(bytecodes)
146 | 		os.WriteFile("testdata/known-bytecode.yaml", content, 0644)
147 | 	}
148 | 
149 | 	// compare codes
150 | 	var known map[string]string
151 | 	data, err := os.ReadFile("testdata/known-bytecode.yaml")
152 | 	if err != nil {
153 | 		t.Fatal(err)
154 | 	}
155 | 	if err := yaml.Unmarshal(data, &known); err != nil {
156 | 		t.Fatal("YAML unmarshal failed:", err)
157 | 	}
158 | 	for name, code := range bytecodes {
159 | 		if code != known[name] {
160 | 			t.Errorf("bytecode mismatch for %s:", name)
161 | 			t.Errorf("   compiled: %s", code)
162 | 			t.Errorf("      known: %s", known[name])
163 | 		}
164 | 	}
165 | }
166 | 
167 | func compileExample(t *testing.T, exampleDir string, file string) string {
168 | 	c := New(os.DirFS(exampleDir))
169 | 	output := c.CompileFile(file)
170 | 	for _, err := range c.ErrorsAndWarnings() {
171 | 		t.Log(err)
172 | 	}
173 | 	if c.Failed() {
174 | 		t.Error("compilation failed:")
175 | 	}
176 | 	return hex.EncodeToString(output)
177 | }
178 | 


--------------------------------------------------------------------------------
/asm/error.go:
--------------------------------------------------------------------------------
  1 | // Copyright 2023 The go-ethereum Authors
  2 | // This file is part of the go-ethereum library.
  3 | //
  4 | // The go-ethereum library is free software: you can redistribute it and/or modify
  5 | // it under the terms of the GNU Lesser General Public License as published by
  6 | // the Free Software Foundation, either version 3 of the License, or
  7 | // (at your option) any later version.
  8 | //
  9 | // The go-ethereum library is distributed in the hope that it will be useful,
 10 | // but WITHOUT ANY WARRANTY; without even the implied warranty of
 11 | // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
 12 | // GNU Lesser General Public License for more details.
 13 | //
 14 | // You should have received a copy of the GNU Lesser General Public License
 15 | // along with the go-ethereum library. If not, see <http://www.gnu.org/licenses/>.
 16 | 
 17 | package asm
 18 | 
 19 | import (
 20 | 	"errors"
 21 | 	"fmt"
 22 | 
 23 | 	"github.com/fjl/geas/internal/ast"
 24 | )
 25 | 
 26 | // panic sentinel value:
 27 | var errCancelCompilation = errors.New("end compilation")
 28 | 
 29 | // PositionError is an error containing a file position.
 30 | type PositionError interface {
 31 | 	error
 32 | 	Position() ast.Position
 33 | }
 34 | 
 35 | // compilerErrorCode represents an error detected by the compiler.
 36 | type compilerError int
 37 | 
 38 | const (
 39 | 	ecPushOverflow256 compilerError = iota
 40 | 	ecPushzeroWithArgument
 41 | 	ecFixedSizePushOverflow
 42 | 	ecVariablePushOverflow
 43 | 	ecPushWithoutArgument
 44 | 	ecUnexpectedArgument
 45 | 	ecJumpNeedsLiteralLabel
 46 | 	ecJumpToDottedLabel
 47 | 	ecJumpToUndefinedLabel
 48 | 	ecUnknownOpcode
 49 | 	ecUndefinedVariable
 50 | 	ecUndefinedMacro
 51 | 	ecUndefinedInstrMacro
 52 | 	ecUndefinedBuiltinMacro
 53 | 	ecRecursiveCall
 54 | 	ecInvalidArgumentCount
 55 | 	ecNegativeResult
 56 | 	ecOddLengthBytesLiteral
 57 | 	ecIncludeNoFS
 58 | 	ecIncludeDepthLimit
 59 | 	ecUnknownPragma
 60 | 	ecPragmaTargetInIncludeFile
 61 | 	ecPragmaTargetConflict
 62 | 	ecPragmaTargetUnknown
 63 | )
 64 | 
 65 | func (e compilerError) Error() string {
 66 | 	switch e {
 67 | 	case ecPushOverflow256:
 68 | 		return "instruction argument > 256 bits"
 69 | 	case ecPushzeroWithArgument:
 70 | 		return "PUSH0 can't have argument"
 71 | 	case ecFixedSizePushOverflow:
 72 | 		return "instruction argument overflows explicitly given PUSH<n> size"
 73 | 	case ecVariablePushOverflow:
 74 | 		return "instruction argument overflows push"
 75 | 	case ecUnexpectedArgument:
 76 | 		return "only JUMP* and PUSH* support immediate arguments"
 77 | 	case ecPushWithoutArgument:
 78 | 		return "PUSH requires an immediate argument"
 79 | 	case ecJumpNeedsLiteralLabel:
 80 | 		return "JUMP argument must be literal label"
 81 | 	case ecJumpToDottedLabel:
 82 | 		return "JUMP to dotted label"
 83 | 	case ecJumpToUndefinedLabel:
 84 | 		return "JUMP to undefined label"
 85 | 	case ecUnknownOpcode:
 86 | 		return "unknown op"
 87 | 	case ecUndefinedVariable:
 88 | 		return "undefined macro parameter"
 89 | 	case ecUndefinedMacro:
 90 | 		return "undefined macro"
 91 | 	case ecUndefinedBuiltinMacro:
 92 | 		return "undefined builtin macro"
 93 | 	case ecUndefinedInstrMacro:
 94 | 		return "undefined instruction macro"
 95 | 	case ecRecursiveCall:
 96 | 		return "recursive call of macro"
 97 | 	case ecInvalidArgumentCount:
 98 | 		return "invalid number of arguments"
 99 | 	case ecNegativeResult:
100 | 		return "expression result is negative number"
101 | 	case ecOddLengthBytesLiteral:
102 | 		return "odd-length hex in bytes context"
103 | 	case ecIncludeNoFS:
104 | 		return "#include not allowed"
105 | 	case ecIncludeDepthLimit:
106 | 		return "#include depth limit reached"
107 | 	case ecUnknownPragma:
108 | 		return "unknown #pragma"
109 | 	case ecPragmaTargetInIncludeFile:
110 | 		return "#pragma target cannot be used in #include'd files"
111 | 	case ecPragmaTargetConflict:
112 | 		return "duplicate '#pragma target ...' directive"
113 | 	case ecPragmaTargetUnknown:
114 | 		return "unknown #pragma target"
115 | 	default:
116 | 		return fmt.Sprintf("invalid error %d", e)
117 | 	}
118 | }
119 | 
120 | // statementError is an error related to an assembler instruction.
121 | type statementError struct {
122 | 	inst ast.Statement
123 | 	err  error
124 | }
125 | 
126 | func (e *statementError) Position() ast.Position {
127 | 	return e.inst.Position()
128 | }
129 | 
130 | func (e *statementError) Unwrap() error {
131 | 	return e.err
132 | }
133 | 
134 | func (e *statementError) Error() string {
135 | 	return fmt.Sprintf("%v: %s", e.inst.Position(), e.err.Error())
136 | }
137 | 
138 | // simpleWarning is a warning issued by the compiler.
139 | type simpleWarning struct {
140 | 	pos ast.Position
141 | 	str string
142 | }
143 | 
144 | func (e *simpleWarning) Error() string {
145 | 	return fmt.Sprintf("%v: warning: %s", e.pos, e.str)
146 | }
147 | 
148 | func (e *simpleWarning) IsWarning() bool {
149 | 	return true
150 | }
151 | 
152 | // unassignedLabelError signals use of a label that doesn't have a valid PC.
153 | type unassignedLabelError struct {
154 | 	lref *ast.LabelRefExpr
155 | }
156 | 
157 | func (e unassignedLabelError) Error() string {
158 | 	return fmt.Sprintf("%v not instantiated in program", e.lref)
159 | }
160 | 
161 | // Warning is implemented by errors that could also be just a warning.
162 | type Warning interface {
163 | 	error
164 | 	IsWarning() bool
165 | }
166 | 
167 | // IsWarning reports whether an error is a warning.
168 | func IsWarning(err error) bool {
169 | 	var w Warning
170 | 	return errors.As(err, &w) && w.IsWarning()
171 | }
172 | 
173 | // errorList maintains a list of errors and warnings. It also implements the mechanism
174 | // that aborts compilation when too many errors have accumulated.
175 | type errorList struct {
176 | 	list        []error
177 | 	numErrors   int
178 | 	numWarnings int
179 | 	maxErrors   int
180 | }
181 | 
182 | // catchAbort traps the panic condition that gets thrown when too many errors have accumulated.
183 | // A call to catchAbort must be deferred around any code that uses [errorList.add].
184 | func (e *errorList) catchAbort() {
185 | 	ok := recover()
186 | 	if ok != nil && ok != errCancelCompilation {
187 | 		panic(ok)
188 | 	}
189 | }
190 | 
191 | // add puts errors into the list.
192 | // This returns true if there were any actual errors in the arguments.
193 | func (e *errorList) add(errs ...error) (anyRealError bool) {
194 | 	for _, err := range errs {
195 | 		if err == nil {
196 | 			continue
197 | 		}
198 | 		e.list = append(e.list, err)
199 | 		if IsWarning(err) {
200 | 			e.numWarnings++
201 | 		} else {
202 | 			e.numErrors++
203 | 			anyRealError = true
204 | 		}
205 | 		if e.numErrors > e.maxErrors {
206 | 			panic(errCancelCompilation)
207 | 		}
208 | 	}
209 | 	return
210 | }
211 | 
212 | // addParseErrors is like add, but for errors from the parser.
213 | func (e *errorList) addParseErrors(errs []*ast.ParseError) bool {
214 | 	conv := make([]error, len(errs))
215 | 	for i := range errs {
216 | 		conv[i] = errs[i]
217 | 	}
218 | 	return e.add(conv...)
219 | }
220 | 
221 | // warnings returns the current warning list.
222 | func (e *errorList) warnings() []error {
223 | 	s := make([]error, 0, e.numWarnings)
224 | 	for _, err := range e.list {
225 | 		if IsWarning(err) {
226 | 			s = append(s, err)
227 | 		}
228 | 	}
229 | 	return s
230 | }
231 | 
232 | // warnings returns the current error list.
233 | func (e *errorList) errors() []error {
234 | 	s := make([]error, 0, e.numErrors)
235 | 	for _, err := range e.list {
236 | 		if !IsWarning(err) {
237 | 			s = append(s, err)
238 | 		}
239 | 	}
240 | 	return s
241 | }
242 | 
243 | // hasError reports whether there were any actual errors.
244 | func (e *errorList) hasError() bool {
245 | 	return e.numErrors > 0
246 | }
247 | 


--------------------------------------------------------------------------------
/asm/evaluator.go:
--------------------------------------------------------------------------------
  1 | // Copyright 2023 The go-ethereum Authors
  2 | // This file is part of the go-ethereum library.
  3 | //
  4 | // The go-ethereum library is free software: you can redistribute it and/or modify
  5 | // it under the terms of the GNU Lesser General Public License as published by
  6 | // the Free Software Foundation, either version 3 of the License, or
  7 | // (at your option) any later version.
  8 | //
  9 | // The go-ethereum library is distributed in the hope that it will be useful,
 10 | // but WITHOUT ANY WARRANTY; without even the implied warranty of
 11 | // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
 12 | // GNU Lesser General Public License for more details.
 13 | //
 14 | // You should have received a copy of the GNU Lesser General Public License
 15 | // along with the go-ethereum library. If not, see <http://www.gnu.org/licenses/>.
 16 | 
 17 | package asm
 18 | 
 19 | import (
 20 | 	"errors"
 21 | 	"fmt"
 22 | 	"math"
 23 | 	"math/big"
 24 | 	"slices"
 25 | 
 26 | 	"github.com/fjl/geas/internal/ast"
 27 | 	"github.com/fjl/geas/internal/lzint"
 28 | )
 29 | 
 30 | // evaluator is for evaluating expressions.
 31 | type evaluator struct {
 32 | 	inStack    map[*ast.ExpressionMacroDef]struct{}
 33 | 	labelPC    map[evalLabelKey]int
 34 | 	usedLabels map[*ast.LabelDefSt]struct{}
 35 | 	globals    *globalScope
 36 | }
 37 | 
 38 | type evalLabelKey struct {
 39 | 	doc *ast.Document
 40 | 	l   *ast.LabelDefSt
 41 | }
 42 | 
 43 | type evalEnvironment struct {
 44 | 	doc       *ast.Document
 45 | 	macroArgs *instrMacroArgs
 46 | 	variables map[string]*lzint.Value
 47 | }
 48 | 
 49 | func newEvaluator(gs *globalScope) *evaluator {
 50 | 	return &evaluator{
 51 | 		inStack:    make(map[*ast.ExpressionMacroDef]struct{}),
 52 | 		labelPC:    make(map[evalLabelKey]int),
 53 | 		usedLabels: make(map[*ast.LabelDefSt]struct{}),
 54 | 		globals:    gs,
 55 | 	}
 56 | }
 57 | 
 58 | func newEvalEnvironment(s *compilerSection) *evalEnvironment {
 59 | 	if s == nil {
 60 | 		panic("nil section")
 61 | 	}
 62 | 	return &evalEnvironment{doc: s.doc, macroArgs: s.macroArgs}
 63 | }
 64 | 
 65 | // lookupExprMacro finds a macro definition in the document chain.
 66 | func (e *evaluator) lookupExprMacro(env *evalEnvironment, name string) (*ast.ExpressionMacroDef, *ast.Document) {
 67 | 	if ast.IsGlobal(name) {
 68 | 		return e.globals.lookupExprMacro(name)
 69 | 	}
 70 | 	if e, doc := env.doc.LookupExprMacro(name); e != nil {
 71 | 		return e, doc
 72 | 	}
 73 | 	return nil, nil
 74 | }
 75 | 
 76 | // setLabelPC stores the offset of a label within a document.
 77 | func (e *evaluator) setLabelPC(doc *ast.Document, li *ast.LabelDefSt, pc int) {
 78 | 	if li.Global {
 79 | 		e.globals.setLabelPC(li.Name(), pc)
 80 | 	} else {
 81 | 		e.labelPC[evalLabelKey{doc, li}] = pc
 82 | 	}
 83 | }
 84 | 
 85 | // lookupLabel resolves a label reference.
 86 | func (e *evaluator) lookupLabel(doc *ast.Document, lref *ast.LabelRefExpr) (pc int, pcValid bool, err error) {
 87 | 	var li *ast.LabelDefSt
 88 | 	if lref.Global {
 89 | 		pc, pcValid, li = e.globals.lookupLabel(lref)
 90 | 	} else {
 91 | 		var srcdoc *ast.Document
 92 | 		li, srcdoc = doc.LookupLabel(lref)
 93 | 		pc, pcValid = e.labelPC[evalLabelKey{srcdoc, li}]
 94 | 	}
 95 | 	if li == nil {
 96 | 		return 0, false, fmt.Errorf("undefined label %v", lref)
 97 | 	}
 98 | 	if lref.Dotted && !li.Dotted {
 99 | 		return 0, false, fmt.Errorf("can't use %v to refer to label %s:", lref, li.Name())
100 | 	}
101 | 	// mark label used (for unused label analysis)
102 | 	e.usedLabels[li] = struct{}{}
103 | 	return pc, pcValid, nil
104 | }
105 | 
106 | // isLabelUsed reports whether the given label definition was used during expression evaluation.
107 | func (e *evaluator) isLabelUsed(li *ast.LabelDefSt) bool {
108 | 	_, ok := e.usedLabels[li]
109 | 	return ok
110 | }
111 | 
112 | func (e *evaluator) eval(expr ast.Expr, env *evalEnvironment) (*lzint.Value, error) {
113 | 	switch expr := expr.(type) {
114 | 	case *ast.LiteralExpr:
115 | 		return e.evalLiteral(expr)
116 | 	case *ast.LabelRefExpr:
117 | 		return e.evalLabelRef(expr, env)
118 | 	case *ast.ArithExpr:
119 | 		return e.evalArith(expr, env)
120 | 	case *ast.VariableExpr:
121 | 		return e.evalVariable(expr, env)
122 | 	case *ast.MacroCallExpr:
123 | 		return e.evalMacroCall(expr, env)
124 | 	default:
125 | 		panic(fmt.Sprintf("unhandled expr %T", expr))
126 | 	}
127 | }
128 | 
129 | // evalAsBytes gives the byte value of an expression.
130 | func (e *evaluator) evalAsBytes(expr ast.Expr, env *evalEnvironment) ([]byte, error) {
131 | 	v, err := e.eval(expr, env)
132 | 	if err != nil {
133 | 		return nil, err
134 | 	}
135 | 	return v.Bytes()
136 | }
137 | 
138 | func (e *evaluator) evalLiteral(expr *ast.LiteralExpr) (*lzint.Value, error) {
139 | 	if expr.Value != nil {
140 | 		return expr.Value, nil
141 | 	}
142 | 
143 | 	switch {
144 | 	case expr.IsNumber():
145 | 		val, err := lzint.ParseNumberLiteral(expr.Text())
146 | 		if err != nil {
147 | 			return nil, err
148 | 		}
149 | 		expr.Value = val
150 | 		return val, nil
151 | 
152 | 	case expr.IsString():
153 | 		val := lzint.FromBytes([]byte(expr.Text()))
154 | 		expr.Value = val
155 | 		return val, nil
156 | 
157 | 	default:
158 | 		panic(fmt.Errorf("unhandled astLiteral %q (not string|number)", expr.Text()))
159 | 	}
160 | }
161 | 
162 | func (e *evaluator) evalLabelRef(expr *ast.LabelRefExpr, env *evalEnvironment) (*lzint.Value, error) {
163 | 	pc, pcValid, err := e.lookupLabel(env.doc, expr)
164 | 	if err != nil {
165 | 		return nil, err
166 | 	}
167 | 	if !pcValid {
168 | 		// We hit this case if evaluating before labels have been calculated. A
169 | 		// special error value is returned here to allow the compiler to recognize
170 | 		// this case.
171 | 		return nil, unassignedLabelError{lref: expr}
172 | 	}
173 | 	return lzint.FromInt(big.NewInt(int64(pc))), nil
174 | }
175 | 
176 | var bigMaxUint = new(big.Int).SetUint64(math.MaxUint)
177 | 
178 | func (e *evaluator) evalArith(expr *ast.ArithExpr, env *evalEnvironment) (*lzint.Value, error) {
179 | 	// compute operands
180 | 	leftVal, err := e.eval(expr.Left, env)
181 | 	if err != nil {
182 | 		return nil, err
183 | 	}
184 | 	rightVal, err := e.eval(expr.Right, env)
185 | 	if err != nil {
186 | 		return nil, err
187 | 	}
188 | 	left, right := leftVal.Int(), rightVal.Int()
189 | 
190 | 	// apply op
191 | 	var v *big.Int
192 | 	switch expr.Op {
193 | 	case ast.ArithPlus:
194 | 		v = new(big.Int).Add(left, right)
195 | 
196 | 	case ast.ArithMinus:
197 | 		v = new(big.Int).Sub(left, right)
198 | 
199 | 	case ast.ArithMul:
200 | 		v = new(big.Int).Mul(left, right)
201 | 
202 | 	case ast.ArithDiv:
203 | 		if right.Sign() == 0 {
204 | 			return nil, errors.New("division by zero")
205 | 		}
206 | 		v = new(big.Int).Div(left, right)
207 | 
208 | 	case ast.ArithMod:
209 | 		v = new(big.Int).Mod(left, right)
210 | 
211 | 	case ast.ArithAnd:
212 | 		v = new(big.Int).And(left, right)
213 | 
214 | 	case ast.ArithOr:
215 | 		v = new(big.Int).Or(left, right)
216 | 
217 | 	case ast.ArithXor:
218 | 		v = new(big.Int).Xor(left, right)
219 | 
220 | 	case ast.ArithLshift:
221 | 		if right.Sign() == -1 {
222 | 			return nil, errors.New("negative lshift amount")
223 | 		}
224 | 		if right.Cmp(bigMaxUint) > 0 {
225 | 			return nil, fmt.Errorf("lshift amount %d overflows uint", right)
226 | 		}
227 | 		amount := uint(right.Uint64())
228 | 		v = new(big.Int).Lsh(left, amount)
229 | 
230 | 	case ast.ArithRshift:
231 | 		if right.Sign() == -1 {
232 | 			return nil, errors.New("negative rshift amount")
233 | 		}
234 | 		if right.Cmp(bigMaxUint) > 0 {
235 | 			return nil, fmt.Errorf("rshift amount %d overflows uint", right)
236 | 		}
237 | 		amount := uint(right.Uint64())
238 | 		v = new(big.Int).Rsh(left, amount)
239 | 
240 | 	default:
241 | 		panic(fmt.Errorf("invalid arith op %v", expr.Op))
242 | 	}
243 | 
244 | 	return lzint.FromInt(v), nil
245 | }
246 | 
247 | func (e *evaluator) evalVariable(expr *ast.VariableExpr, env *evalEnvironment) (*lzint.Value, error) {
248 | 	v, ok := env.variables[expr.Ident]
249 | 	if ok {
250 | 		return v, nil
251 | 	}
252 | 	// Check for instruction macro args.
253 | 	if a := env.macroArgs; a != nil {
254 | 		i := slices.Index(a.def.Params, expr.Ident)
255 | 		if i == -1 {
256 | 			return nil, fmt.Errorf("%w $%s", ecUndefinedVariable, expr.Ident)
257 | 		}
258 | 		arg := a.args[i]
259 | 		// Evaluate it in the parent scope.
260 | 		return e.eval(arg, newEvalEnvironment(a.callsite))
261 | 	}
262 | 	return nil, fmt.Errorf("%w $%s", ecUndefinedVariable, expr.Ident)
263 | }
264 | 
265 | func (e *evaluator) evalMacroCall(expr *ast.MacroCallExpr, env *evalEnvironment) (*lzint.Value, error) {
266 | 	if expr.Builtin {
267 | 		builtin, ok := builtinMacros[expr.Ident]
268 | 		if ok {
269 | 			return builtin(e, env, expr)
270 | 		}
271 | 		return nil, fmt.Errorf("%w .%s", ecUndefinedBuiltinMacro, expr.Ident)
272 | 	}
273 | 	def, defdoc := e.lookupExprMacro(env, expr.Ident)
274 | 	if def == nil {
275 | 		return nil, fmt.Errorf("%w %s", ecUndefinedMacro, expr.Ident)
276 | 	}
277 | 
278 | 	// Prevent recursion.
279 | 	if !e.enterMacro(def) {
280 | 		return nil, fmt.Errorf("%w %s", ecRecursiveCall, expr.Ident)
281 | 	}
282 | 	defer e.exitMacro(def)
283 | 
284 | 	// Bind arguments.
285 | 	macroEnv := &evalEnvironment{
286 | 		variables: make(map[string]*lzint.Value, len(def.Params)),
287 | 		doc:       defdoc,
288 | 	}
289 | 	if err := checkArgCount(expr, len(def.Params)); err != nil {
290 | 		return nil, err
291 | 	}
292 | 	if len(expr.Args) != len(def.Params) {
293 | 		return nil, fmt.Errorf("%w, macro %s needs %d", ecInvalidArgumentCount, expr.Ident, len(def.Params))
294 | 	}
295 | 	for i, param := range def.Params {
296 | 		v, err := e.eval(expr.Args[i], env)
297 | 		if err != nil {
298 | 			return nil, err
299 | 		}
300 | 		macroEnv.variables[param] = v
301 | 	}
302 | 
303 | 	// Compute the macro result value.
304 | 	return e.eval(def.Body, macroEnv)
305 | }
306 | 
307 | func checkArgCount(expr *ast.MacroCallExpr, n int) error {
308 | 	if len(expr.Args) != n {
309 | 		return fmt.Errorf("%w, macro %s needs %d", ecInvalidArgumentCount, expr.Ident, n)
310 | 	}
311 | 	return nil
312 | }
313 | 
314 | func (e *evaluator) enterMacro(m *ast.ExpressionMacroDef) bool {
315 | 	_, found := e.inStack[m]
316 | 	if found {
317 | 		return false
318 | 	}
319 | 	e.inStack[m] = struct{}{}
320 | 	return true
321 | }
322 | 
323 | func (e *evaluator) exitMacro(m *ast.ExpressionMacroDef) {
324 | 	delete(e.inStack, m)
325 | }
326 | 


--------------------------------------------------------------------------------
/asm/evaluator_builtins.go:
--------------------------------------------------------------------------------
  1 | // Copyright 2023 The go-ethereum Authors
  2 | // This file is part of the go-ethereum library.
  3 | //
  4 | // The go-ethereum library is free software: you can redistribute it and/or modify
  5 | // it under the terms of the GNU Lesser General Public License as published by
  6 | // the Free Software Foundation, either version 3 of the License, or
  7 | // (at your option) any later version.
  8 | //
  9 | // The go-ethereum library is distributed in the hope that it will be useful,
 10 | // but WITHOUT ANY WARRANTY; without even the implied warranty of
 11 | // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
 12 | // GNU Lesser General Public License for more details.
 13 | //
 14 | // You should have received a copy of the GNU Lesser General Public License
 15 | // along with the go-ethereum library. If not, see <http://www.gnu.org/licenses/>.
 16 | 
 17 | package asm
 18 | 
 19 | import (
 20 | 	"crypto/sha256"
 21 | 	"errors"
 22 | 	"fmt"
 23 | 	"math/big"
 24 | 	"strings"
 25 | 
 26 | 	"github.com/ethereum/go-ethereum/accounts/abi"
 27 | 	"github.com/ethereum/go-ethereum/common"
 28 | 	"github.com/fjl/geas/internal/ast"
 29 | 	"github.com/fjl/geas/internal/lzint"
 30 | 	"golang.org/x/crypto/sha3"
 31 | )
 32 | 
 33 | var builtinMacros = make(map[string]builtinMacroFn)
 34 | 
 35 | func init() {
 36 | 	builtinMacros["bitlen"] = bitlenMacro
 37 | 	builtinMacros["bytelen"] = bytelenMacro
 38 | 	builtinMacros["abs"] = absMacro
 39 | 	builtinMacros["address"] = addressMacro
 40 | 	builtinMacros["selector"] = selectorMacro
 41 | 	builtinMacros["keccak256"] = keccak256Macro
 42 | 	builtinMacros["sha256"] = sha256Macro
 43 | }
 44 | 
 45 | type builtinMacroFn func(*evaluator, *evalEnvironment, *ast.MacroCallExpr) (*lzint.Value, error)
 46 | 
 47 | func bitlenMacro(e *evaluator, env *evalEnvironment, call *ast.MacroCallExpr) (*lzint.Value, error) {
 48 | 	if err := checkArgCount(call, 1); err != nil {
 49 | 		return nil, err
 50 | 	}
 51 | 	v, err := e.eval(call.Args[0], env)
 52 | 	if err != nil {
 53 | 		return nil, err
 54 | 	}
 55 | 	return lzint.FromInt64(v.IntegerBitLen()), nil
 56 | }
 57 | 
 58 | func bytelenMacro(e *evaluator, env *evalEnvironment, call *ast.MacroCallExpr) (*lzint.Value, error) {
 59 | 	if err := checkArgCount(call, 1); err != nil {
 60 | 		return nil, err
 61 | 	}
 62 | 	v, err := e.eval(call.Args[0], env)
 63 | 	if err != nil {
 64 | 		return nil, err
 65 | 	}
 66 | 	return lzint.FromInt64(v.ByteLen()), nil
 67 | }
 68 | 
 69 | func absMacro(e *evaluator, env *evalEnvironment, call *ast.MacroCallExpr) (*lzint.Value, error) {
 70 | 	if err := checkArgCount(call, 1); err != nil {
 71 | 		return nil, err
 72 | 	}
 73 | 	v, err := e.eval(call.Args[0], env)
 74 | 	if err != nil {
 75 | 		return nil, err
 76 | 	}
 77 | 	return lzint.FromInt(new(big.Int).Abs(v.Int())), nil
 78 | }
 79 | 
 80 | func sha256Macro(e *evaluator, env *evalEnvironment, call *ast.MacroCallExpr) (*lzint.Value, error) {
 81 | 	if err := checkArgCount(call, 1); err != nil {
 82 | 		return nil, err
 83 | 	}
 84 | 	bytes, err := e.evalAsBytes(call.Args[0], env)
 85 | 	if err != nil {
 86 | 		return nil, err
 87 | 	}
 88 | 	hash := sha256.Sum256(bytes)
 89 | 	return lzint.FromBytes(hash[:]), nil
 90 | }
 91 | 
 92 | func keccak256Macro(e *evaluator, env *evalEnvironment, call *ast.MacroCallExpr) (*lzint.Value, error) {
 93 | 	if err := checkArgCount(call, 1); err != nil {
 94 | 		return nil, err
 95 | 	}
 96 | 	bytes, err := e.evalAsBytes(call.Args[0], env)
 97 | 	if err != nil {
 98 | 		return nil, err
 99 | 	}
100 | 	w := sha3.NewLegacyKeccak256()
101 | 	w.Write(bytes)
102 | 	hash := w.Sum(nil)
103 | 	return lzint.FromBytes(hash[:]), nil
104 | }
105 | 
106 | var (
107 | 	errSelectorWantsLiteral = fmt.Errorf(".selector(...) requires literal string argument")
108 | )
109 | 
110 | func selectorMacro(e *evaluator, env *evalEnvironment, call *ast.MacroCallExpr) (*lzint.Value, error) {
111 | 	if err := checkArgCount(call, 1); err != nil {
112 | 		return nil, err
113 | 	}
114 | 	lit, ok := call.Args[0].(*ast.LiteralExpr)
115 | 	if !ok {
116 | 		return nil, errSelectorWantsLiteral
117 | 	}
118 | 	text := lit.Text()
119 | 	if _, err := abi.ParseSelector(text); err != nil {
120 | 		return nil, fmt.Errorf("invalid ABI selector")
121 | 	}
122 | 	w := sha3.NewLegacyKeccak256()
123 | 	w.Write([]byte(text))
124 | 	hash := w.Sum(nil)
125 | 	return lzint.FromBytes(hash[:4]), nil
126 | }
127 | 
128 | var (
129 | 	errAddressWantsLiteral = errors.New(".address(...) requires literal argument")
130 | 	errAddressInvalid      = errors.New("invalid Ethereum address")
131 | 	errAddressChecksum     = errors.New("address has invalid checksum")
132 | )
133 | 
134 | func addressMacro(e *evaluator, env *evalEnvironment, call *ast.MacroCallExpr) (*lzint.Value, error) {
135 | 	if err := checkArgCount(call, 1); err != nil {
136 | 		return nil, err
137 | 	}
138 | 	lit, ok := call.Args[0].(*ast.LiteralExpr)
139 | 	if !ok {
140 | 		return nil, errAddressWantsLiteral
141 | 	}
142 | 	text := lit.Text()
143 | 	addr, err := common.NewMixedcaseAddressFromString(text)
144 | 	if err != nil {
145 | 		return nil, errAddressInvalid
146 | 	}
147 | 	if isChecksumAddress(text) {
148 | 		if !addr.ValidChecksum() {
149 | 			return nil, errAddressChecksum
150 | 		}
151 | 	}
152 | 	return lzint.FromBytes(addr.Address().Bytes()), nil
153 | }
154 | 
155 | func isChecksumAddress(str string) bool {
156 | 	return strings.ContainsAny(str, "ABCDEF")
157 | }
158 | 


--------------------------------------------------------------------------------
/asm/evaluator_test.go:
--------------------------------------------------------------------------------
  1 | // Copyright 2023 The go-ethereum Authors
  2 | // This file is part of the go-ethereum library.
  3 | //
  4 | // The go-ethereum library is free software: you can redistribute it and/or modify
  5 | // it under the terms of the GNU Lesser General Public License as published by
  6 | // the Free Software Foundation, either version 3 of the License, or
  7 | // (at your option) any later version.
  8 | //
  9 | // The go-ethereum library is distributed in the hope that it will be useful,
 10 | // but WITHOUT ANY WARRANTY; without even the implied warranty of
 11 | // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
 12 | // GNU Lesser General Public License for more details.
 13 | //
 14 | // You should have received a copy of the GNU Lesser General Public License
 15 | // along with the go-ethereum library. If not, see <http://www.gnu.org/licenses/>.
 16 | 
 17 | package asm
 18 | 
 19 | import (
 20 | 	"fmt"
 21 | 	"math/big"
 22 | 	"testing"
 23 | 
 24 | 	"github.com/fjl/geas/internal/ast"
 25 | )
 26 | 
 27 | type evalTest struct {
 28 | 	expr   string
 29 | 	result string
 30 | }
 31 | 
 32 | type evalErrorTest struct {
 33 | 	expr string
 34 | 	err  string
 35 | }
 36 | 
 37 | var evalIntTests = []evalTest{
 38 | 	// arithmetic
 39 | 	{expr: `1`, result: "1"},
 40 | 	{expr: `1 + 4`, result: "5"},
 41 | 	{expr: `1 + 1 + 4`, result: "6"},
 42 | 	{expr: `1 << 48`, result: "281474976710656"},
 43 | 	{expr: `32 >> 1`, result: "16"},
 44 | 	{expr: `0xf1 & 0xe1`, result: "0xe1"},
 45 | 	{expr: `0x0f & 0xff`, result: "0x0f"},
 46 | 	{expr: `0x0f | 0xf0`, result: "0xff"},
 47 | 	{expr: `0xf ^ 0xf`, result: "0x00"},
 48 | 	{expr: `0x0 ^ 0xf`, result: "0xf"},
 49 | 	// arithmetic precedence rules
 50 | 	{expr: `(2 * 3) + 4`, result: "10"},
 51 | 	{expr: `2 * 3 + 4`, result: "10"},
 52 | 	{expr: `4 + 2 * 3`, result: "10"},
 53 | 	{expr: `10 / 5 + 2`, result: "4"},
 54 | 	{expr: `1 + 1024 * 1024 * 1024`, result: "1073741825"},
 55 | 	{expr: `1024 * 1024 * 1024 * 1024 + 1`, result: "1099511627777"},
 56 | 	{expr: `1 + 1024 * 1024 * 1024 & 2 + 3`, result: "4"},
 57 | 	{expr: `(1 + ((1024 * 1024 * 1024) & 2)) + 3`, result: "4"},
 58 | 	// -- division and multiplication have same precedence
 59 | 	{expr: `12 / 6 * 3`, result: "6"},
 60 | 	{expr: `12 / 6 * 3`, result: "6"},
 61 | 	// -- and binds more strongly than or
 62 | 	{expr: `0xff00 | 0xff & 0x0f`, result: "0xff0f"},
 63 | 	{expr: `0xff & 0x0f | 0xff00`, result: "0xff0f"},
 64 | 	{expr: `0xff & (0x0f | 0xff00)`, result: "0x0f"},
 65 | 	// -- shift binds more strongly than and/or
 66 | 	{expr: `0xff >> 4 & 0x05`, result: "0x05"},
 67 | 	// macro and label references
 68 | 	{expr: `@label1`, result: "1"},
 69 | 	{expr: `@label1 + 2`, result: "3"},
 70 | 	{expr: `macro3 / @label1`, result: "3"},
 71 | 	{expr: `@.label2`, result: "2"},
 72 | 	{expr: `@Label3`, result: "3"},
 73 | 	{expr: `@.Label4`, result: "4"},
 74 | 	{expr: `macroFunc(2)`, result: "2"},
 75 | 	// string literals
 76 | 	{expr: `"A"`, result: "65"},
 77 | 	{expr: `"foo"`, result: "6713199"},
 78 | 	// builtins
 79 | 	{expr: `.bitlen(0)`, result: "0"},
 80 | 	{expr: `.bitlen(0xff)`, result: "8"},
 81 | 	{expr: `.bitlen(0x1ff)`, result: "9"},
 82 | 	{expr: `.bitlen(0x01ff)`, result: "9"},
 83 | 	{expr: `.bytelen(0)`, result: "0"},
 84 | 	{expr: `.bytelen(0xff)`, result: "1"},
 85 | 	{expr: `.bytelen(0x1ff)`, result: "2"},
 86 | 	{expr: `.bytelen(0x01ff)`, result: "2"},
 87 | 	{expr: `.bytelen(0x0001ff)`, result: "3"},   // note: leading zero byte
 88 | 	{expr: `.bytelen(0x000001ff)`, result: "4"}, // two leading zero bytes
 89 | 	{expr: `.bytelen("foobar")`, result: "6"},
 90 | 	{expr: `.abs(0 - 10)`, result: "10"},
 91 | 	{expr: `.sha256("text")`, result: "68832153269555879243704685382415794081420120252170153643880971663484982053329"},
 92 | 	{expr: `.sha256(33)`, result: "84783983549258160669137366770885509408211009960610860350324922232842582506338"},
 93 | 	{expr: `.selector("transfer(address,uint256)")`, result: "2835717307"},
 94 | 	{expr: `.address(0x658bdf435d810c91414ec09147daa6db62406379)`, result: "579727320398773179602058954232328055508812456825"},
 95 | 	{expr: `.address("0x658bdf435d810c91414ec09147daa6db62406379")`, result: "579727320398773179602058954232328055508812456825"},
 96 | }
 97 | 
 98 | var evalErrorTests = []evalErrorTest{
 99 | 	{expr: `20 / 0`, err: "division by zero"},
100 | 	{expr: `1 << (1 << 64)`, err: "lshift amount 18446744073709551616 overflows uint"},
101 | 	{expr: `1 >> (1 << 64)`, err: "rshift amount 18446744073709551616 overflows uint"},
102 | 	{expr: `macro3(foo, 1)`, err: "invalid number of arguments, macro macro3 needs 0"},
103 | 	// builtins
104 | 	{expr: `.selector("transfer(,,uint256)")`, err: "invalid ABI selector"},
105 | 	{expr: `.address(0x658bdf435d810c91414EC09147daa6db62406379)`, err: errAddressChecksum.Error()},
106 | 	{expr: `.sha256(0x011)`, err: "odd-length hex in bytes context"},
107 | }
108 | 
109 | var evalTestDoc *ast.Document
110 | 
111 | func init() {
112 | 	source := `
113 | label1:
114 | .label2:
115 | Label3:
116 | .Label4:
117 | #define macro3() = 3
118 | #define macroFunc(a) = $a
119 | `
120 | 	doc, errs := ast.NewParser("", []byte(source), false).Parse()
121 | 	if len(errs) != 0 {
122 | 		panic("parse error: " + errs[0].Error())
123 | 	}
124 | 	evalTestDoc = doc
125 | }
126 | 
127 | func evaluatorForTesting() *evaluator {
128 | 	gs := newGlobalScope()
129 | 	errs := gs.registerDefinitions(evalTestDoc)
130 | 	if len(errs) > 0 {
131 | 		panic(fmt.Errorf("error in registerDefinitions: %v", errs[0]))
132 | 	}
133 | 	e := newEvaluator(gs)
134 | 	e.setLabelPC(evalTestDoc, evalTestDoc.Statements[0].(*ast.LabelDefSt), 1)
135 | 	e.setLabelPC(evalTestDoc, evalTestDoc.Statements[1].(*ast.LabelDefSt), 2)
136 | 	e.setLabelPC(evalTestDoc, evalTestDoc.Statements[2].(*ast.LabelDefSt), 3)
137 | 	e.setLabelPC(evalTestDoc, evalTestDoc.Statements[3].(*ast.LabelDefSt), 4)
138 | 	return e
139 | }
140 | 
141 | func evalEnvironmentForTesting() *evalEnvironment {
142 | 	return newEvalEnvironment(&compilerSection{
143 | 		doc: evalTestDoc,
144 | 	})
145 | }
146 | 
147 | func TestExprEval(t *testing.T) {
148 | 	for _, test := range evalIntTests {
149 | 		expr, err := parseExprString(test.expr)
150 | 		if err != nil {
151 | 			t.Errorf("invalid expr %q: %v", test.expr, err)
152 | 			continue
153 | 		}
154 | 		expectedResult := mustParseBigInt(test.result)
155 | 		e := evaluatorForTesting()
156 | 		env := evalEnvironmentForTesting()
157 | 		result, err := e.eval(expr, env)
158 | 		if err != nil {
159 | 			t.Errorf("eval error in %q: %v", test.expr, err)
160 | 			continue
161 | 		}
162 | 		if result.Int().Cmp(expectedResult) != 0 {
163 | 			t.Errorf("expr %q result %v, want %v", test.expr, result, expectedResult)
164 | 			continue
165 | 		}
166 | 	}
167 | }
168 | 
169 | func TestExprEvalErrors(t *testing.T) {
170 | 	for _, test := range evalErrorTests {
171 | 		expr, err := parseExprString(test.expr)
172 | 		if err != nil {
173 | 			t.Errorf("invalid expr %q: %v", test.expr, err)
174 | 			continue
175 | 		}
176 | 		e := evaluatorForTesting()
177 | 		env := evalEnvironmentForTesting()
178 | 		result, err := e.eval(expr, env)
179 | 		if err == nil {
180 | 			t.Errorf("expected error evaluating %q, got %v", test.expr, result)
181 | 			continue
182 | 		}
183 | 		if err.Error() != test.err {
184 | 			t.Errorf("expr %q wrong error %q, want %q", test.expr, err, test.err)
185 | 			continue
186 | 		}
187 | 	}
188 | }
189 | 
190 | func parseExprString(str string) (ast.Expr, error) {
191 | 	p := ast.NewParser("string", []byte(str), false)
192 | 	return p.ParseExpression()
193 | }
194 | 
195 | func mustParseBigInt(str string) *big.Int {
196 | 	i, ok := new(big.Int).SetString(str, 0)
197 | 	if !ok {
198 | 		panic("invalid bigint: " + str)
199 | 	}
200 | 	return i
201 | }
202 | 


--------------------------------------------------------------------------------
/asm/global.go:
--------------------------------------------------------------------------------
  1 | // Copyright 2023 The go-ethereum Authors
  2 | // This file is part of the go-ethereum library.
  3 | //
  4 | // The go-ethereum library is free software: you can redistribute it and/or modify
  5 | // it under the terms of the GNU Lesser General Public License as published by
  6 | // the Free Software Foundation, either version 3 of the License, or
  7 | // (at your option) any later version.
  8 | //
  9 | // The go-ethereum library is distributed in the hope that it will be useful,
 10 | // but WITHOUT ANY WARRANTY; without even the implied warranty of
 11 | // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
 12 | // GNU Lesser General Public License for more details.
 13 | //
 14 | // You should have received a copy of the GNU Lesser General Public License
 15 | // along with the go-ethereum library. If not, see <http://www.gnu.org/licenses/>.
 16 | 
 17 | package asm
 18 | 
 19 | import (
 20 | 	"fmt"
 21 | 
 22 | 	"github.com/fjl/geas/internal/ast"
 23 | 	"github.com/fjl/geas/internal/lzint"
 24 | )
 25 | 
 26 | // globalScope holds definitions across files.
 27 | type globalScope struct {
 28 | 	label      map[string]*ast.LabelDefSt
 29 | 	labelPC    map[string]int
 30 | 	labelDoc   map[string]*ast.Document
 31 | 	instrMacro map[string]globalDef[*ast.InstructionMacroDef]
 32 | 	exprMacro  map[string]globalDef[*ast.ExpressionMacroDef]
 33 | }
 34 | 
 35 | type globalDef[M any] struct {
 36 | 	def M
 37 | 	doc *ast.Document
 38 | }
 39 | 
 40 | func newGlobalScope() *globalScope {
 41 | 	return &globalScope{
 42 | 		label:      make(map[string]*ast.LabelDefSt),
 43 | 		labelPC:    make(map[string]int),
 44 | 		labelDoc:   make(map[string]*ast.Document),
 45 | 		instrMacro: make(map[string]globalDef[*ast.InstructionMacroDef]),
 46 | 		exprMacro:  make(map[string]globalDef[*ast.ExpressionMacroDef]),
 47 | 	}
 48 | }
 49 | 
 50 | // registerDefinitions processes a document and registers the globals contained in it.
 51 | func (gs *globalScope) registerDefinitions(doc *ast.Document) (errs []error) {
 52 | 	for _, li := range doc.GlobalLabels() {
 53 | 		gs.registerLabel(li, doc)
 54 | 	}
 55 | 	for _, mac := range doc.GlobalExprMacros() {
 56 | 		def := globalDef[*ast.ExpressionMacroDef]{mac, doc}
 57 | 		if err := gs.registerExprMacro(mac.Name, def); err != nil {
 58 | 			errs = append(errs, err)
 59 | 		}
 60 | 	}
 61 | 	for _, mac := range doc.GlobalInstrMacros() {
 62 | 		def := globalDef[*ast.InstructionMacroDef]{mac, doc}
 63 | 		if err := gs.registerInstrMacro(mac.Name, def); err != nil {
 64 | 			errs = append(errs, err)
 65 | 		}
 66 | 	}
 67 | 	return errs
 68 | }
 69 | 
 70 | // registerLabel registers a label as known.
 71 | func (gs *globalScope) registerLabel(def *ast.LabelDefSt, doc *ast.Document) {
 72 | 	_, found := gs.label[def.Name()]
 73 | 	if !found {
 74 | 		gs.label[def.Name()] = def
 75 | 	}
 76 | }
 77 | 
 78 | // registerInstrMacro registers the first definition of an instruction macro.
 79 | func (gs *globalScope) registerInstrMacro(name string, def globalDef[*ast.InstructionMacroDef]) error {
 80 | 	firstDef, found := gs.instrMacro[name]
 81 | 	if found {
 82 | 		return &statementError{
 83 | 			inst: def.def,
 84 | 			err:  fmt.Errorf("macro %%%s already defined%s", name, firstDef.doc.CreationString()),
 85 | 		}
 86 | 	}
 87 | 	gs.instrMacro[name] = def
 88 | 	return nil
 89 | }
 90 | 
 91 | // registerExprMacro registers the first definition of an expression macro.
 92 | func (gs *globalScope) registerExprMacro(name string, def globalDef[*ast.ExpressionMacroDef]) error {
 93 | 	firstDef, found := gs.exprMacro[name]
 94 | 	if found {
 95 | 		return &statementError{
 96 | 			inst: def.def,
 97 | 			err:  fmt.Errorf("macro %s already defined%s", name, firstDef.doc.CreationString()),
 98 | 		}
 99 | 	}
100 | 	gs.exprMacro[name] = def
101 | 	return nil
102 | }
103 | 
104 | // overrideExprMacroValue sets a macro to the given value, overriding its definition.
105 | func (gs *globalScope) overrideExprMacroValue(name string, val *lzint.Value) {
106 | 	gs.exprMacro[name] = globalDef[*ast.ExpressionMacroDef]{
107 | 		doc: nil,
108 | 		def: &ast.ExpressionMacroDef{
109 | 			Name: name,
110 | 			Body: &ast.LiteralExpr{Value: val},
111 | 		},
112 | 	}
113 | }
114 | 
115 | func (gs *globalScope) lookupInstrMacro(name string) (*ast.InstructionMacroDef, *ast.Document) {
116 | 	gdef := gs.instrMacro[name]
117 | 	return gdef.def, gdef.doc
118 | }
119 | 
120 | func (gs *globalScope) lookupExprMacro(name string) (*ast.ExpressionMacroDef, *ast.Document) {
121 | 	gdef := gs.exprMacro[name]
122 | 	return gdef.def, gdef.doc
123 | }
124 | 
125 | // setLabelDocument registers the document that a label was created in. This is subtly
126 | // different from the source document of the labelDefInstruction. The distinction matters
127 | // for labels created by macros, because macros create a new document on expansion.
128 | //
129 | // These documents need to be tracked here in order to report the first macro invocation
130 | // or #include statement that created a label.
131 | func (gs *globalScope) setLabelDocument(li *ast.LabelDefSt, doc *ast.Document) error {
132 | 	name := li.Name()
133 | 	firstDefDoc := gs.labelDoc[name]
134 | 	if firstDefDoc == nil {
135 | 		gs.labelDoc[name] = doc
136 | 		return nil
137 | 	}
138 | 	firstDef := gs.label[name]
139 | 	err := ast.ErrLabelAlreadyDef(firstDef, li)
140 | 	if loc := firstDefDoc.CreationString(); loc != "" {
141 | 		err = fmt.Errorf("%w%s", err, loc)
142 | 	}
143 | 	return err
144 | }
145 | 
146 | // setLabelPC is called by the compiler when the PC value of a label becomes available.
147 | func (gs *globalScope) setLabelPC(name string, pc int) {
148 | 	gs.labelPC[name] = pc
149 | }
150 | 
151 | // lookupLabel returns the PC value of a label, and also reports whether the label was found at all.
152 | func (gs *globalScope) lookupLabel(lref *ast.LabelRefExpr) (pc int, pcValid bool, def *ast.LabelDefSt) {
153 | 	li, ok := gs.label[lref.Ident]
154 | 	if !ok {
155 | 		return 0, false, nil
156 | 	}
157 | 	pc, pcValid = gs.labelPC[lref.Ident]
158 | 	return pc, pcValid, li
159 | }
160 | 


--------------------------------------------------------------------------------
/asm/statements.go:
--------------------------------------------------------------------------------
 1 | // Copyright 2024 The go-ethereum Authors
 2 | // This file is part of the go-ethereum library.
 3 | //
 4 | // The go-ethereum library is free software: you can redistribute it and/or modify
 5 | // it under the terms of the GNU Lesser General Public License as published by
 6 | // the Free Software Foundation, either version 3 of the License, or
 7 | // (at your option) any later version.
 8 | //
 9 | // The go-ethereum library is distributed in the hope that it will be useful,
10 | // but WITHOUT ANY WARRANTY; without even the implied warranty of
11 | // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 | // GNU Lesser General Public License for more details.
13 | //
14 | // You should have received a copy of the GNU Lesser General Public License
15 | // along with the go-ethereum library. If not, see <http://www.gnu.org/licenses/>.
16 | 
17 | package asm
18 | 
19 | import (
20 | 	"github.com/fjl/geas/internal/ast"
21 | )
22 | 
23 | // statement wraps an AST statement in a document.
24 | type statement interface {
25 | 	ast.Statement
26 | 	expand(c *Compiler, doc *ast.Document, prog *compilerProg) error
27 | }
28 | 
29 | // Statement types.
30 | type (
31 | 	opcodeStatement    struct{ *ast.OpcodeSt }
32 | 	labelDefStatement  struct{ *ast.LabelDefSt }
33 | 	macroCallStatement struct{ *ast.MacroCallSt }
34 | 	includeStatement   struct{ *ast.IncludeSt }
35 | 	assembleStatement  struct{ *ast.AssembleSt }
36 | 	bytesStatement     struct{ *ast.BytesSt }
37 | )
38 | 
39 | // statementFromAST converts AST statements into compiler statements. Note this function
40 | // returns nil for statement types the compiler doesn't care about.
41 | func statementFromAST(st ast.Statement) statement {
42 | 	switch st := st.(type) {
43 | 	case *ast.OpcodeSt:
44 | 		return opcodeStatement{st}
45 | 	case *ast.LabelDefSt:
46 | 		return labelDefStatement{st}
47 | 	case *ast.MacroCallSt:
48 | 		return macroCallStatement{st}
49 | 	case *ast.IncludeSt:
50 | 		return includeStatement{st}
51 | 	case *ast.AssembleSt:
52 | 		return assembleStatement{st}
53 | 	case *ast.BytesSt:
54 | 		return bytesStatement{st}
55 | 	default:
56 | 		return nil
57 | 	}
58 | }
59 | 


--------------------------------------------------------------------------------
/asm/testdata/known-bytecode.yaml:
--------------------------------------------------------------------------------
1 | 4788asm: 3373fffffffffffffffffffffffffffffffffffffffe14604d57602036146024575f5ffd5b5f35801560495762016da0810690815414603c575f5ffd5b62016da001545f5260205ff35b5f5ffd5b62016da042064281555f359062016da0015500
2 | 4788asm_ctor: 60618060095f395ff33373fffffffffffffffffffffffffffffffffffffffe14604d57602036146024575f5ffd5b5f35801560495762016da0810690815414603c575f5ffd5b62016da001545f5260205ff35b5f5ffd5b62016da042064281555f359062016da0015500
3 | erc20: 366000803760005160e01c806323b872dd14605c578063095ea7b31460c7578063a9059cbb1461011257806370a082311461015e578063dd62ed3e1461016a578063313ce5671461017957806318160ddd14610179575b60006000fd5b604060042080546044518181116056576004355410605657604435900390556004358054604435809103909155602435805490910190556024356004356044356000527fddf252ad1be2c89b69c2b068fc378daa952ba7f163c4a11628f55a4df523b3ef60206000a3005b60245160045160245233600452604060042080549091019055600435336024356000527f8c5be1e5ebec7d5bd14f71427d1e84f3dd0314c0f7b2291e5b200ac8c7c3b92560206000a3005b3354602451818111605657900333556004518054602451019055600435336024356000527fddf252ad1be2c89b69c2b068fc378daa952ba7f163c4a11628f55a4df523b3ef60206000a3005b60206000600451548152f35b60406004205460005260206000f35b
4 | erc20_ctor: 5861271033556012803803919082908239f3366000803760005160e01c806323b872dd14605c578063095ea7b31460c7578063a9059cbb1461011257806370a082311461015e578063dd62ed3e1461016a578063313ce5671461017957806318160ddd14610179575b60006000fd5b604060042080546044518181116056576004355410605657604435900390556004358054604435809103909155602435805490910190556024356004356044356000527fddf252ad1be2c89b69c2b068fc378daa952ba7f163c4a11628f55a4df523b3ef60206000a3005b60245160045160245233600452604060042080549091019055600435336024356000527f8c5be1e5ebec7d5bd14f71427d1e84f3dd0314c0f7b2291e5b200ac8c7c3b92560206000a3005b3354602451818111605657900333556004518054602451019055600435336024356000527fddf252ad1be2c89b69c2b068fc378daa952ba7f163c4a11628f55a4df523b3ef60206000a3005b60206000600451548152f35b60406004205460005260206000f35b
5 | 


--------------------------------------------------------------------------------
/cmd/geas/geas.go:
--------------------------------------------------------------------------------
  1 | // Copyright 2023 The go-ethereum Authors
  2 | // This file is part of the go-ethereum library.
  3 | //
  4 | // The go-ethereum library is free software: you can redistribute it and/or modify
  5 | // it under the terms of the GNU Lesser General Public License as published by
  6 | // the Free Software Foundation, either version 3 of the License, or
  7 | // (at your option) any later version.
  8 | //
  9 | // The go-ethereum library is distributed in the hope that it will be useful,
 10 | // but WITHOUT ANY WARRANTY; without even the implied warranty of
 11 | // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
 12 | // GNU Lesser General Public License for more details.
 13 | //
 14 | // You should have received a copy of the GNU Lesser General Public License
 15 | // along with the go-ethereum library. If not, see <http://www.gnu.org/licenses/>.
 16 | 
 17 | package main
 18 | 
 19 | import (
 20 | 	"bytes"
 21 | 	"encoding/hex"
 22 | 	"flag"
 23 | 	"fmt"
 24 | 	"io"
 25 | 	"os"
 26 | 	"path"
 27 | 	"path/filepath"
 28 | 	"runtime/debug"
 29 | 	"slices"
 30 | 	"strings"
 31 | 
 32 | 	"github.com/fjl/geas/asm"
 33 | 	"github.com/fjl/geas/disasm"
 34 | 	"github.com/fjl/geas/internal/evm"
 35 | )
 36 | 
 37 | var t2s = strings.NewReplacer("\t", "  ")
 38 | 
 39 | func usage() {
 40 | 	vsn := version()
 41 | 	if len(vsn) > 0 {
 42 | 		fmt.Fprintln(os.Stderr, "Version:", vsn)
 43 | 	}
 44 | 	fmt.Fprint(os.Stderr, `Usage: geas {-a | -d | -i} [options...] <file>`+
 45 | 		t2s.Replace(`
 46 |  -a: ASSEMBLER (default)
 47 | 
 48 | 	 -o <file>          output file name
 49 | 	 -bin               output binary instead of hex
 50 | 	 -no-nl             skip newline at end of hex output
 51 | 
 52 |  -d: DISASSEMBLER
 53 | 
 54 | 	 -bin               input is binary bytecode
 55 | 	 -target <name>     configure instruction set
 56 | 	 -o <file>          output file name
 57 | 	 -blocks            blank lines between logical blocks
 58 | 	 -pc                show program counter
 59 | 	 -uppercase         show instruction names as uppercase
 60 | 
 61 |  -i: INFORMATION
 62 | 
 63 | 	 -targets           show supported target fork names
 64 | 	 -ops <target>      show all opcodes in target
 65 | 	 -lineage <target>  show target fork chain
 66 | 
 67 |  -h: HELP
 68 | 
 69 | `))
 70 | }
 71 | 
 72 | func main() {
 73 | 	if len(os.Args) < 2 {
 74 | 		usage()
 75 | 		os.Exit(2)
 76 | 	}
 77 | 
 78 | 	mode := os.Args[1]
 79 | 	switch {
 80 | 	case mode == "-a":
 81 | 		assembler(os.Args[2:])
 82 | 
 83 | 	case mode == "-d":
 84 | 		disassembler(os.Args[2:])
 85 | 
 86 | 	case mode == "-i":
 87 | 		information(os.Args[2:])
 88 | 
 89 | 	case mode == "-h", mode == "-help", mode == "--help":
 90 | 		usage()
 91 | 		os.Exit(0)
 92 | 
 93 | 	default:
 94 | 		assembler(os.Args[1:])
 95 | 	}
 96 | }
 97 | 
 98 | const inputLimit = 10 * 1024 * 1024
 99 | 
100 | func assembler(args []string) {
101 | 	var (
102 | 		fs         = newFlagSet("-a")
103 | 		outputFile = fs.String("o", "", "")
104 | 		binary     = fs.Bool("bin", false, "")
105 | 		noNL       = fs.Bool("no-nl", false, "")
106 | 	)
107 | 	parseFlags(fs, args)
108 | 
109 | 	// Assemble.
110 | 	var c = asm.New(nil)
111 | 	var bin []byte
112 | 	file := fileArg(fs)
113 | 	if file != "-" {
114 | 		wd, _ := os.Getwd()
115 | 		c.SetFilesystem(os.DirFS(wd))
116 | 		fp := path.Clean(filepath.ToSlash(file))
117 | 		bin = c.CompileFile(fp)
118 | 	} else {
119 | 		source, err := io.ReadAll(io.LimitReader(os.Stdin, inputLimit))
120 | 		if err != nil {
121 | 			exit(1, err)
122 | 		}
123 | 		bin = c.CompileString(string(source))
124 | 	}
125 | 
126 | 	// Show errors.
127 | 	for _, err := range c.ErrorsAndWarnings() {
128 | 		fmt.Fprintln(os.Stderr, err)
129 | 	}
130 | 	if c.Failed() {
131 | 		os.Exit(1)
132 | 	}
133 | 
134 | 	// Write output.
135 | 	var err error
136 | 	output := os.Stdout
137 | 	if *outputFile != "" {
138 | 		output, err = os.OpenFile(*outputFile, os.O_WRONLY|os.O_TRUNC|os.O_CREATE, 0644)
139 | 		if err != nil {
140 | 			exit(1, err)
141 | 		}
142 | 		defer output.Close()
143 | 	}
144 | 	if *binary {
145 | 		_, err = output.Write(bin)
146 | 	} else {
147 | 		nl := "\n"
148 | 		if *noNL {
149 | 			nl = ""
150 | 		}
151 | 		_, err = fmt.Fprintf(output, "%x%s", bin, nl)
152 | 	}
153 | 	if err != nil {
154 | 		exit(1, err)
155 | 	}
156 | }
157 | 
158 | func disassembler(args []string) {
159 | 	var (
160 | 		fs         = newFlagSet("-d")
161 | 		outputFile = fs.String("o", "", "")
162 | 		showPC     = fs.Bool("pc", false, "")
163 | 		showBlocks = fs.Bool("blocks", true, "")
164 | 		uppercase  = fs.Bool("uppercase", false, "")
165 | 		binary     = fs.Bool("bin", false, "")
166 | 		target     = fs.String("target", "", "")
167 | 	)
168 | 	parseFlags(fs, args)
169 | 
170 | 	// Read input.
171 | 	var err error
172 | 	var infd io.ReadCloser
173 | 	file := fileArg(fs)
174 | 	if file == "-" {
175 | 		infd = os.Stdin
176 | 	} else {
177 | 		infd, err = os.Open(file)
178 | 		if err != nil {
179 | 			exit(1, err)
180 | 		}
181 | 	}
182 | 	bytecode, err := io.ReadAll(io.LimitReader(infd, inputLimit))
183 | 	if err != nil {
184 | 		exit(1, err)
185 | 	}
186 | 	infd.Close()
187 | 
188 | 	// Possibly convert from hex.
189 | 	if !*binary {
190 | 		dec := make([]byte, hex.DecodedLen(len(bytecode)))
191 | 		l, err := hex.Decode(dec, bytes.TrimSpace(bytecode))
192 | 		if err != nil {
193 | 			exit(1, err)
194 | 		}
195 | 		bytecode = dec[:l]
196 | 	}
197 | 
198 | 	output := os.Stdout
199 | 	if *outputFile != "" {
200 | 		output, err = os.OpenFile(*outputFile, os.O_WRONLY|os.O_TRUNC|os.O_CREATE, 0644)
201 | 		if err != nil {
202 | 			exit(1, err)
203 | 		}
204 | 		defer output.Close()
205 | 	}
206 | 
207 | 	// Disassemble.
208 | 	d := disasm.New()
209 | 	d.SetShowBlocks(*showBlocks)
210 | 	d.SetShowPC(*showPC)
211 | 	d.SetUppercase(*uppercase)
212 | 	if *target != "" {
213 | 		if err := d.SetTarget(*target); err != nil {
214 | 			exit(2, err)
215 | 		}
216 | 	}
217 | 	err = d.Disassemble(bytecode, output)
218 | 	exit(1, err)
219 | }
220 | 
221 | func information(args []string) {
222 | 	var ran bool
223 | 	checkRunOnce := func() {
224 | 		if ran {
225 | 			exit(2, fmt.Errorf("can't show more than one thing at once in -i mode"))
226 | 		}
227 | 		ran = true
228 | 	}
229 | 	showTargets := func(arg string) error {
230 | 		checkRunOnce()
231 | 		for _, name := range evm.AllForks() {
232 | 			fmt.Println(name)
233 | 		}
234 | 		return nil
235 | 	}
236 | 	showOps := func(arg string) error {
237 | 		checkRunOnce()
238 | 		is := evm.FindInstructionSet(arg)
239 | 		if is == nil {
240 | 			return fmt.Errorf("unknown fork %q", arg)
241 | 		}
242 | 		for _, op := range is.AllOps() {
243 | 			fmt.Println(op.Name)
244 | 		}
245 | 		return nil
246 | 	}
247 | 	showParents := func(arg string) error {
248 | 		checkRunOnce()
249 | 		is := evm.FindInstructionSet(arg)
250 | 		if is == nil {
251 | 			return fmt.Errorf("unknown fork %q", arg)
252 | 		}
253 | 		for _, f := range is.Parents() {
254 | 			fmt.Println(f)
255 | 		}
256 | 		return nil
257 | 	}
258 | 
259 | 	var fs = newFlagSet("-i")
260 | 	fs.BoolFunc("targets", "", showTargets)
261 | 	fs.Func("ops", "", showOps)
262 | 	fs.Func("lineage", "", showParents)
263 | 	parseFlags(fs, args)
264 | 	if !ran {
265 | 		usage()
266 | 		exit(2, fmt.Errorf("please select information topic"))
267 | 	}
268 | 	if fs.NArg() > 0 {
269 | 		exit(2, fmt.Errorf("too many arguments"))
270 | 	}
271 | }
272 | 
273 | func newFlagSet(mode string) *flag.FlagSet {
274 | 	fs := flag.NewFlagSet("geas "+mode, flag.ContinueOnError)
275 | 	fs.Usage = usage
276 | 	fs.SetOutput(io.Discard)
277 | 	return fs
278 | }
279 | 
280 | func parseFlags(fs *flag.FlagSet, args []string) {
281 | 	if err := fs.Parse(args); err != nil {
282 | 		exit(2, err)
283 | 	}
284 | }
285 | 
286 | func fileArg(fs *flag.FlagSet) string {
287 | 	switch fs.NArg() {
288 | 	case 1:
289 | 		return fs.Arg(0)
290 | 	case 0:
291 | 		exit(2, fmt.Errorf("need file name as argument"))
292 | 	default:
293 | 		if slices.ContainsFunc(fs.Args(), func(s string) bool { return strings.HasPrefix(s, "-") }) {
294 | 			exit(2, fmt.Errorf("too many arguments (flags must precede input filename)"))
295 | 		}
296 | 		exit(2, fmt.Errorf("too many arguments"))
297 | 	}
298 | 	return ""
299 | }
300 | 
301 | func exit(code int, err error) {
302 | 	if err == nil || err == flag.ErrHelp {
303 | 		os.Exit(0)
304 | 	}
305 | 	fmt.Fprintf(os.Stderr, "Error: %v\n", err)
306 | 	os.Exit(code)
307 | }
308 | 
309 | func version() string {
310 | 	info, _ := debug.ReadBuildInfo()
311 | 	if info == nil {
312 | 		return ""
313 | 	}
314 | 	if info.Main.Version != "(devel)" {
315 | 		return info.Main.Version
316 | 	}
317 | 	gitVersion := ""
318 | 	dirty := false
319 | 	for _, s := range info.Settings {
320 | 		switch s.Key {
321 | 		case "vcs.revision":
322 | 			gitVersion = s.Value[:16]
323 | 		case "vcs.modified":
324 | 			if s.Value == "true" {
325 | 				dirty = true
326 | 			}
327 | 		}
328 | 	}
329 | 	if gitVersion == "" {
330 | 		return ""
331 | 	}
332 | 	if dirty {
333 | 		gitVersion += "-dirty"
334 | 	}
335 | 	return "git:" + gitVersion
336 | }
337 | 


--------------------------------------------------------------------------------
/disasm/disassembler.go:
--------------------------------------------------------------------------------
  1 | // Copyright 2024 The go-ethereum Authors
  2 | // This file is part of the go-ethereum library.
  3 | //
  4 | // The go-ethereum library is free software: you can redistribute it and/or modify
  5 | // it under the terms of the GNU Lesser General Public License as published by
  6 | // the Free Software Foundation, either version 3 of the License, or
  7 | // (at your option) any later version.
  8 | //
  9 | // The go-ethereum library is distributed in the hope that it will be useful,
 10 | // but WITHOUT ANY WARRANTY; without even the implied warranty of
 11 | // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
 12 | // GNU Lesser General Public License for more details.
 13 | //
 14 | // You should have received a copy of the GNU Lesser General Public License
 15 | // along with the go-ethereum library. If not, see <http://www.gnu.org/licenses/>.
 16 | 
 17 | // Package disasm is a disassembler for EVM bytecode.
 18 | package disasm
 19 | 
 20 | import (
 21 | 	"bufio"
 22 | 	"encoding/hex"
 23 | 	"fmt"
 24 | 	"io"
 25 | 	"strings"
 26 | 
 27 | 	"github.com/fjl/geas/internal/evm"
 28 | )
 29 | 
 30 | // Disassembler turns EVM bytecode into readable text instructions.
 31 | type Disassembler struct {
 32 | 	evm       *evm.InstructionSet
 33 | 	uppercase bool
 34 | 	showPC    bool
 35 | 	noBlanks  bool
 36 | 
 37 | 	pcBuffer, pcHex []byte
 38 | }
 39 | 
 40 | func (d *Disassembler) setDefaults() {
 41 | 	if d.evm == nil {
 42 | 		d.evm = evm.FindInstructionSet(evm.LatestFork)
 43 | 	}
 44 | }
 45 | 
 46 | // New creates a disassembler.
 47 | func New() *Disassembler {
 48 | 	return new(Disassembler)
 49 | }
 50 | 
 51 | // SetTarger sets the instruction set used by the disassembler.
 52 | // It defauls to the latest known Ethereum fork.
 53 | func (d *Disassembler) SetTarget(name string) error {
 54 | 	is := evm.FindInstructionSet(name)
 55 | 	if is == nil {
 56 | 		return fmt.Errorf("unknown instruction set %q", name)
 57 | 	}
 58 | 	d.evm = is
 59 | 	return nil
 60 | }
 61 | 
 62 | // SetUppercase toggles printing instruction names in uppercase.
 63 | func (d *Disassembler) SetUppercase(on bool) {
 64 | 	d.uppercase = on
 65 | }
 66 | 
 67 | // SetShowPC toggles printing of program counter on each line.
 68 | func (d *Disassembler) SetShowPC(on bool) {
 69 | 	d.showPC = on
 70 | }
 71 | 
 72 | // SetShowBlocks toggles printing of blank lines at block boundaries.
 73 | func (d *Disassembler) SetShowBlocks(on bool) {
 74 | 	d.noBlanks = !on
 75 | }
 76 | 
 77 | // Disassemble is the main entry point of the disassembler.
 78 | // It runs through the bytecode and emits text to outW.
 79 | func (d *Disassembler) Disassemble(bytecode []byte, outW io.Writer) error {
 80 | 	d.setDefaults()
 81 | 	d.pcBuffer = make([]byte, digitsOfPC(len(bytecode)))
 82 | 	d.pcHex = make([]byte, hex.EncodedLen(len(d.pcBuffer)))
 83 | 	out := bufio.NewWriter(outW)
 84 | 
 85 | 	var prevOp *evm.Op
 86 | 	for pc := 0; pc < len(bytecode); pc++ {
 87 | 		op := d.evm.OpByCode(bytecode[pc])
 88 | 		d.newline(out, prevOp, op)
 89 | 		d.printPrefix(out, pc)
 90 | 		if op == nil {
 91 | 			d.printInvalid(out, bytecode[pc])
 92 | 		} else {
 93 | 			if op.Push {
 94 | 				size := d.printPush(out, op, bytecode[pc:])
 95 | 				pc += size
 96 | 			} else {
 97 | 				d.printOp(out, op)
 98 | 			}
 99 | 		}
100 | 
101 | 		prevOp = op
102 | 	}
103 | 	d.newline(out, prevOp, nil)
104 | 	return out.Flush()
105 | }
106 | 
107 | func (d *Disassembler) printPrefix(out io.Writer, pc int) {
108 | 	if d.showPC {
109 | 		for i := range d.pcBuffer {
110 | 			d.pcBuffer[len(d.pcBuffer)-1-i] = byte(pc >> (8 * i))
111 | 		}
112 | 		hex.Encode(d.pcHex, d.pcBuffer)
113 | 		fmt.Fprintf(out, "%s: ", d.pcHex)
114 | 	}
115 | }
116 | 
117 | func (d *Disassembler) printInvalid(out io.Writer, b byte) {
118 | 	fmt.Fprintf(out, "#bytes %#x\n", b)
119 | }
120 | 
121 | func (d *Disassembler) printOp(out io.Writer, op *evm.Op) {
122 | 	name := op.Name
123 | 	if !d.uppercase {
124 | 		name = strings.ToLower(op.Name)
125 | 	}
126 | 	fmt.Fprint(out, name)
127 | }
128 | 
129 | func (d *Disassembler) printPush(out io.Writer, op *evm.Op, code []byte) (dataSize int) {
130 | 	size := op.PushSize()
131 | 	if size == 0 {
132 | 		d.printOp(out, op)
133 | 		return 0
134 | 	}
135 | 	if size > len(code)-1 {
136 | 		// Handle truncated PUSH at end of code.
137 | 		fmt.Fprintf(out, "#bytes %#x", code)
138 | 		return len(code) - 1
139 | 	}
140 | 	d.printOp(out, op)
141 | 	data := code[1 : size+1]
142 | 	fmt.Fprintf(out, " %#x", data)
143 | 	return len(data)
144 | }
145 | 
146 | func (d *Disassembler) newline(out io.Writer, prevOp *evm.Op, nextOp *evm.Op) {
147 | 	if prevOp == nil {
148 | 		return
149 | 	}
150 | 	out.Write([]byte{'\n'})
151 | 	if d.noBlanks || nextOp == nil {
152 | 		return
153 | 	}
154 | 	if prevOp.Jump || nextOp.JumpDest || prevOp.Term {
155 | 		out.Write([]byte{'\n'})
156 | 	}
157 | }
158 | 
159 | func digitsOfPC(codesize int) int {
160 | 	switch {
161 | 	case codesize < (1<<16 - 1):
162 | 		return 2
163 | 	case codesize < (1<<24 - 1):
164 | 		return 3
165 | 	case codesize < (1<<32 - 1):
166 | 		return 4
167 | 	default:
168 | 		return 8
169 | 	}
170 | }
171 | 


--------------------------------------------------------------------------------
/disasm/disassembler_test.go:
--------------------------------------------------------------------------------
 1 | // Copyright 2025 The go-ethereum Authors
 2 | // This file is part of the go-ethereum library.
 3 | //
 4 | // The go-ethereum library is free software: you can redistribute it and/or modify
 5 | // it under the terms of the GNU Lesser General Public License as published by
 6 | // the Free Software Foundation, either version 3 of the License, or
 7 | // (at your option) any later version.
 8 | //
 9 | // The go-ethereum library is distributed in the hope that it will be useful,
10 | // but WITHOUT ANY WARRANTY; without even the implied warranty of
11 | // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 | // GNU Lesser General Public License for more details.
13 | //
14 | // You should have received a copy of the GNU Lesser General Public License
15 | // along with the go-ethereum library. If not, see <http://www.gnu.org/licenses/>.
16 | 
17 | package disasm
18 | 
19 | import (
20 | 	"bytes"
21 | 	"encoding/hex"
22 | 	"strings"
23 | 	"testing"
24 | 
25 | 	"github.com/fjl/geas/asm"
26 | )
27 | 
28 | func TestIncompletePush(t *testing.T) {
29 | 	bytecode, _ := hex.DecodeString("6080604052348015600e575f80fd5b50603e80601a5f395ff3fe60806040525f80fdfea2646970667358221220ba4339602dd535d09d71fae3164f7aa7f6e098ec879fc9e8f36bd912d4877c5264736f6c63430008190033")
30 | 	expectedOutput := strings.TrimSpace(`
31 | push1 0x80
32 | push1 0x40
33 | mstore
34 | callvalue
35 | dup1
36 | iszero
37 | push1 0x0e
38 | jumpi
39 | push0
40 | dup1
41 | revert
42 | jumpdest
43 | pop
44 | push1 0x3e
45 | dup1
46 | push1 0x1a
47 | push0
48 | codecopy
49 | push0
50 | return
51 | #bytes 0xfe
52 | push1 0x80
53 | push1 0x40
54 | mstore
55 | push0
56 | dup1
57 | revert
58 | #bytes 0xfe
59 | log2
60 | push5 0x6970667358
61 | #bytes 0x22
62 | slt
63 | keccak256
64 | #bytes 0xba
65 | number
66 | codecopy
67 | push1 0x2d
68 | #bytes 0xd5
69 | calldataload
70 | #bytes 0xd0
71 | swap14
72 | push18 0xfae3164f7aa7f6e098ec879fc9e8f36bd912
73 | #bytes 0xd4
74 | dup8
75 | #bytes 0x7c5264736f6c63430008190033
76 | `)
77 | 
78 | 	var buf strings.Builder
79 | 	d := New()
80 | 	d.SetShowBlocks(false)
81 | 	d.SetTarget("cancun")
82 | 	d.Disassemble(bytecode, &buf)
83 | 	output := strings.TrimSpace(buf.String())
84 | 	if output != expectedOutput {
85 | 		t.Fatal("wrong output:", output)
86 | 	}
87 | 
88 | 	// try round trip
89 | 	a := asm.New(nil)
90 | 	rtcode := a.CompileString(output)
91 | 	if !bytes.Equal(rtcode, bytecode) {
92 | 		t.Error("disassembly did not round-trip")
93 | 	}
94 | }
95 | 


--------------------------------------------------------------------------------
/example/4788asm.eas:
--------------------------------------------------------------------------------
  1 | ;;;    __ ___________  ____
  2 | ;;;   / // /__  ( __ )( __ )____ __________ ___
  3 | ;;;  / // /_ / / __  / __  / __ `/ ___/ __ `__ \
  4 | ;;; /__  __// / /_/ / /_/ / /_/ (__  ) / / / / /
  5 | ;;;   /_/  /_/\____/\____/\__,_/____/_/ /_/ /_/
  6 | ;;;
  7 | ;;; This is an implementation of EIP-4788's predeploy contract. It implements two
  8 | ;;; ring buffers to create bounded beacon root lookup. The first ring buffer is a
  9 | ;;; timestamp % buflen -> timestamp mapping. This is used to ensure timestamp
 10 | ;;; argument actually matches the stored root and isn't different dividend. The
 11 | ;;; second ring buffer store the beacon root. It's also keyed by timestamp %
 12 | ;;; buflen and the shifted right by buflen so the two don't overlap.
 13 | ;;;
 14 | ;;; The ring buffers can be visualized as follows:
 15 | ;;;
 16 | ;;;  buflen = 10
 17 | ;;; |--------------|--------------|
 18 | ;;; 0             10              20
 19 | ;;;   timestamps     beacon roots
 20 | ;;;
 21 | ;;; To get the corresponding beacon root for a specific timestamp, simply add
 22 | ;;; buflen to the timestamp's index in the first ring buffer. The sum will be
 23 | ;;; the storage slot in the second ring buffer where it is stored.
 24 | 
 25 | #pragma target "cancun"
 26 | 
 27 | ;;; -----------------------------------------------------------------------------
 28 | ;;; MACROS ----------------------------------------------------------------------
 29 | 
 30 | ;;; aka. HISTORY_BUFFER_LENGTH as defined in the EIP.
 31 | #define buflen = 93600
 32 | 
 33 | ;;; sysaddr is the address which calls the contract to submit a new root.
 34 | #define sysaddr = .address(0xfffffffffffffffffffffffffffffffffffffffe)
 35 | 
 36 | ;;; %do_revert sets up and then executes a revert(0,0) operation.
 37 | #define %do_revert {
 38 |     push 0           ; [0]
 39 |     push 0           ; [0, 0]
 40 |     revert           ; []
 41 | }
 42 | 
 43 | ;;; -----------------------------------------------------------------------------
 44 | ;;; PROGRAM START----------------------------------------------------------------
 45 | 
 46 |     ;; Protect the submit routine by verifying the caller is equal to sysaddr.
 47 |     caller           ; [caller]
 48 |     push sysaddr     ; [sysaddr, caller]
 49 |     eq               ; [sysaddr == caller]
 50 |     jumpi @submit    ; []
 51 | 
 52 |     ;; Fallthrough if addresses don't match -- this means the caller intends
 53 |     ;; to read a root.
 54 | 
 55 |     ;; Check if calldata is equal to 32 bytes.
 56 |     push 32          ; [32]
 57 |     calldatasize     ; [calldatasize, 32]
 58 |     eq               ; [calldatasize == 32]
 59 | 
 60 |     ;; Jump to continue if length-check passed, otherwise revert.
 61 |     jumpi @loadtime  ; []
 62 |     %do_revert       ; []
 63 | 
 64 | loadtime:
 65 |     ;; Load input timestamp.
 66 |     push 0           ; [0]
 67 |     calldataload     ; [input_timestamp]
 68 |     dup1             ; [input_timestamp, input_timestamp]
 69 | 
 70 |     ;; Verify input timestamp is non-zero.
 71 |     iszero           ; [input_timestamp == 0, input_timestamp]
 72 |     jumpi @throw     ; [input_timestamp]
 73 | 
 74 |     ;; Compute the timestamp index and load from storage.
 75 |     push buflen      ; [buflen, input_timestamp]
 76 |     dup2             ; [input_timestamp, buflen, input_timestamp]
 77 |     mod              ; [time_index, input_timestamp]
 78 |     swap1            ; [input_timestamp, time_index]
 79 |     dup2             ; [time_index, input_timestamp, time_index]
 80 |     sload            ; [stored_timestamp, input_timestamp, time_index]
 81 | 
 82 |     ;; Verify stored timestamp matches input timestamp. It's possible these
 83 |     ;; don't match if the slot has been overwritten by the ring buffer or if
 84 |     ;; the timestamp input wasn't a valid previous timestamp.
 85 |     eq               ; [stored_timestamp == input_timestamp, time_index]
 86 |     jumpi @loadroot  ; [time_index]
 87 |     %do_revert       ; []
 88 | 
 89 | loadroot:
 90 |     ;; Extend index to get root index.
 91 |     push buflen      ; [buflen, time_index]
 92 |     add              ; [root_index]
 93 |     sload            ; [root]
 94 | 
 95 |     ;; Write the retrieved root to memory so it can be returned.
 96 |     push 0           ; [0, root]
 97 |     mstore           ; []
 98 | 
 99 |     ;; Return the root.
100 |     push 32          ; [size]
101 |     push 0           ; [offset, size]
102 |     return           ; []
103 | 
104 | throw:
105 |     ;; Reverts current execution with no return data.
106 |     %do_revert
107 | 
108 | submit:
109 |     ;; Calculate the index the timestamp should be stored at, e.g.
110 |     ;; time_index = (time % buflen).
111 |     push buflen      ; [buflen]
112 |     timestamp        ; [time, buflen]
113 |     mod              ; [time % buflen]
114 | 
115 |     ;; Write timestamp into storage slot at time_index.
116 |     timestamp        ; [time, time_index]
117 |     dup2             ; [time_index, time, time_index]
118 |     sstore           ; [time_index]
119 | 
120 |     ;; Get root from calldata and write into root_index. No validation is
121 |     ;; done on the input root. Becuase the routine is protected by a caller
122 |     ;; check against sysaddr, it's okay to assume the value is correctly
123 |     ;; given.
124 |     push 0           ; [0, time_index]
125 |     calldataload     ; [root, time_index]
126 |     swap1            ; [time_index, root]
127 |     push buflen      ; [buflen, time_index, root]
128 |     add              ; [root_index, root]
129 |     sstore           ; []
130 | 
131 |     stop             ; []
132 | 


--------------------------------------------------------------------------------
/example/4788asm_ctor.eas:
--------------------------------------------------------------------------------
 1 | ;;;    __ ___________  ____
 2 | ;;;   / // /__  ( __ )( __ )____ __________ ___
 3 | ;;;  / // /_ / / __  / __  / __ `/ ___/ __ `__ \
 4 | ;;; /__  __// / /_/ / /_/ / /_/ (__  ) / / / / /
 5 | ;;;   /_/  /_/\____/\____/\__,_/____/_/ /_/ /_/
 6 | ;;;
 7 | ;;; constructor code
 8 | 
 9 | #pragma target "cancun"
10 | 
11 |     push @.end - @.start  ; [size]
12 |     dup1                  ; [size, size] 
13 |     push @.start          ; [start, size, size]
14 |     push 0                ; [0, start, size, size]
15 |     codecopy              ; [size]
16 |     push 0                ; [0, size]
17 |     return                ; []
18 | 
19 | .start:
20 | #assemble "4788asm.eas"
21 | .end:
22 | 


--------------------------------------------------------------------------------
/example/erc20/erc20.eas:
--------------------------------------------------------------------------------
 1 | ;;; ERC20
 2 | ;;;
 3 | ;;; Minimal ERC-20 implementation in raw assembly.
 4 | ;;;
 5 | ;;; Storage Layout
 6 | ;;; --
 7 | ;;; balance(address)          => 0x000000000000000000000000 || address
 8 | ;;; allowance(owner, spender) => keccak(owner || spender)
 9 | 
10 | #pragma target "constantinople"
11 | 
12 | #define %match(candidate, label) { ; [selector]
13 |     dup1             ; [selector, selector]
14 |     push $candidate  ; [candidate, selector, selector]
15 |     eq               ; [success, selector]
16 |     push $label      ; [label, success, selector]
17 |     jumpi            ; [selector]
18 | }
19 | 
20 | ;;; Program start.
21 |     
22 |     ;; Read the calldata into memory.
23 |     calldatasize     ; [calldatasize]
24 |     push 0           ; [0, calldatasize]
25 |     dup1             ; [0, 0, calldatasize]
26 |     calldatacopy     ; []
27 | 
28 |     ;; Extract only the function selector
29 |     push 0           ; [0]
30 |     mload            ; [dirty_selector]
31 |     push 224         ; [224, dirty_selector]
32 |     shr              ; [selector]
33 | 
34 |     ;; Jump to the selected function.
35 |     %match(S_transferFrom, @TransferFrom)
36 |     %match(S_approve, @Approve)
37 |     %match(S_transfer, @Transfer)
38 | 
39 |     ;; Check the view functions last to not waste gas on-chain.
40 |     %match(S_balanceOf, @BalanceOf)
41 |     %match(S_allowance, @Allowance)
42 |     %match(S_decimals, @Return0)
43 |     %match(S_totalSupply, @Return0)
44 | 
45 |                      ; [selector] is left on stack here.
46 | 
47 | FAIL:
48 |     ;; Catchall for reverts.
49 |     push 0           ; [0, selector]
50 |     push 0           ; [0, 0, selector]
51 |     revert           ; [selector]
52 | 
53 | ;;; Write operations.
54 | ;;; These do not return, so there are STOPs in between.
55 | 
56 | #include "op_transferFrom.eas"
57 |     stop
58 | #include "op_approve.eas"
59 |     stop
60 | #include "op_transfer.eas"
61 |     stop
62 | 
63 | ;;; View functions, these return on their own.
64 | 
65 | #include "op_balanceOf.eas"
66 | #include "op_allowance.eas"
67 | 
68 | ;;; View functions for token metadata, these just return zero.
69 | 
70 | #define S_decimals = .selector("decimals()")
71 | #define S_totalSupply = .selector("totalSupply()")
72 | 
73 | Return0:
74 | 


--------------------------------------------------------------------------------
/example/erc20/erc20_ctor.eas:
--------------------------------------------------------------------------------
 1 | ;;; ERC20 - constructor
 2 | ;;;
 3 | 
 4 | #pragma target "constantinople"
 5 | 
 6 |     pc                  ; [0]
 7 | 
 8 |     ;; give deployer initial supply
 9 |     push 10000          ; [balance, 0]
10 |     caller              ; [caller, balance, 0]
11 |     sstore              ; [0]
12 | 
13 |     push @.start        ; [start, 0]
14 |     dup1                ; [start, start, 0]
15 |     codesize            ; [codesize, start, start, 0]
16 |     sub                 ; [length, start, 0]
17 |     swap2               ; [0, start, length]
18 |     swap1               ; [start, 0, length]
19 |     dup3                ; [length, start, 0, length]
20 |     swap1               ; [start, length, 0, length]
21 |     dup3                ; [0, start, length, 0, length]
22 |     codecopy            ; [0, length]
23 |     return              ; []
24 | 
25 | .start:
26 | #assemble "erc20.eas"
27 | 


--------------------------------------------------------------------------------
/example/erc20/op_allowance.eas:
--------------------------------------------------------------------------------
 1 | ;;; ERC20 - allowance
 2 | ;;;
 3 | ;;; calldata structure
 4 | ;;; +--------------------+------------------+------------------+------------------+--------------------+
 5 | ;;; | selector (4 bytes) | zeros (12 bytes) | owner (20 bytes) | zeros (12 bytes) | spender (20 bytes) |
 6 | ;;; +--------------------+------------------+------------------+------------------+--------------------+
 7 | 
 8 | #define S_allowance = .selector("allowance(address,address)")
 9 | 
10 | Allowance:
11 |     push 64          ; [len]
12 |     push 4           ; [offset, len]
13 |     keccak256        ; [key]
14 | 
15 |     sload            ; [allowance]
16 |     push 0           ; [offset, allowance]
17 |     mstore           ; []
18 | 
19 |     push 32          ; [32]
20 |     push 0           ; [0, 32]
21 |     return           ; []
22 | 


--------------------------------------------------------------------------------
/example/erc20/op_approve.eas:
--------------------------------------------------------------------------------
 1 | ;;; ERC20 - approve
 2 | ;;;
 3 | ;;; calldata structure
 4 | ;;; +--------------------+------------------+--------------------+-------------------+
 5 | ;;; | selector (4 bytes) | zeros (12 bytes) | spender (20 bytes) | amount (32 bytes) |
 6 | ;;; +--------------------+------------------+--------------------+-------------------+
 7 | 
 8 | #define S_approve = .selector("approve(address,uint256)")
 9 | #define logtopic = .keccak256("Approval(address,address,uint256)")
10 | 
11 | Approve:
12 |     push 36          ; [36]
13 |     mload            ; [amt]
14 | 
15 |     push 4           ; [4, amt]
16 |     mload            ; [sender, amt]
17 | 
18 |     ;; write the sender to mem[36..68]
19 |     push 36          ; [36, sender, amt]
20 |     mstore           ; [amt]
21 | 
22 |     ;; write the caller to mem[4..36]
23 |     caller           ; [caller, amt]
24 |     push 4           ; [4, caller, amt]
25 |     mstore           ; [amt]
26 | 
27 |     ;; hash mem[4..68]
28 |     push 64          ; [len, amt]
29 |     push 4           ; [offset, len, amt]
30 |     keccak256        ; [key, amt]
31 |     dup1             ; [key, key, amt]
32 | 
33 |     sload            ; [allowance, key, amt]
34 |     swap1            ; [key, allowance, amt]
35 |     swap2            ; [amt, allowance, key]
36 |     add              ; [new_allowance, key]
37 |     swap1            ; [key, new_allowance]
38 |     sstore           ; []
39 | 
40 |     ;; output event
41 |     push 4           ; [4]
42 |     calldataload     ; [spender]
43 |     caller           ; [owner, spender]
44 |     push 36          ; [36, owner, spender]
45 |     calldataload     ; [amt, owner, spender]
46 |     push 0           ; [0, value, owner, spender]
47 |     mstore           ; [owner, spender]
48 |     push logtopic    ; [topic, owner, spender]
49 |     push 32          ; [32, topic, owner, spender]
50 |     push 0           ; [0, 32, topic, ownder, spender]
51 |     log3             ; []
52 | 


--------------------------------------------------------------------------------
/example/erc20/op_balanceOf.eas:
--------------------------------------------------------------------------------
 1 | ;;; ERC20 - balanceOf
 2 | ;;;
 3 | ;;; calldata structure
 4 | ;;; +--------------------+------------------+--------------------+
 5 | ;;; | selector (4 bytes) | zeros (12 bytes) | address (20 bytes) |
 6 | ;;; +--------------------+------------------+--------------------+
 7 | 
 8 | #define S_balanceOf = .selector("balanceOf(address)")
 9 | 
10 | BalanceOf:
11 |     ;; prepare return parameters
12 |     push 32          ; [ret_len]
13 |     push 0           ; [ret_offset, ret_len]
14 | 
15 |     ;; skip the selector, load the address
16 |     push 4           ; [4, ret_offset, ret_len]
17 |     mload            ; [addr, ret_offset, ret_len]
18 | 
19 |     ;; load balance
20 |     sload            ; [balance, ret_offset, ret_len]
21 | 
22 |     ;; prepare balance to return
23 |     dup2             ; [0, balance, ret_offset, ret_len]
24 |     mstore           ; [ret_offset, ret_len]
25 | 
26 |     return           ; []
27 | 


--------------------------------------------------------------------------------
/example/erc20/op_transfer.eas:
--------------------------------------------------------------------------------
 1 | ;;; ERC20 - transfer
 2 | ;;;
 3 | ;;; calldata structure
 4 | ;;; +--------------------+------------------+---------------+-------------------+
 5 | ;;; | selector (4 bytes) | zeros (12 bytes) | to (20 bytes) | amount (32 bytes) |
 6 | ;;; +--------------------+------------------+---------------+-------------------+
 7 | 
 8 | #define S_transfer = .selector("transfer(address,uint256)")
 9 | #define logtopic = .keccak256("Transfer(address,address,uint256)")
10 | 
11 | Transfer:
12 |     caller           ; [from]
13 |     sload            ; [from_balance]
14 |     push1 36         ; [36, from_balance]
15 |     mload            ; [amt, from_balance]
16 |     dup2             ; [from_balance, amt, from_balance]
17 |     dup2             ; [amt, from_balance, amt, from_balance]
18 | 
19 |     ;; if amt > from_balance, revert
20 |     gt               ; [amt > from_balance, amt, from_balance]
21 |     jumpi @FAIL      ; [amt, from_balance]
22 | 
23 |     ;; sstore(from, from_balance - amt)
24 |     swap1            ; [from_balance, amt]
25 |     sub              ; [from_balance - amt]
26 |     caller           ; [from, from_balance - amt]
27 |     sstore           ; []
28 | 
29 |     ;; sstore(to, to_balance + amt)
30 |     push 4           ; [4]
31 |     mload            ; [to]
32 |     dup1             ; [to, to]
33 |     sload            ; [to_balance, to]
34 |     push 36          ; [36, to_balance, to]
35 |     mload            ; [amt, to_balance, to]
36 |     add              ; [amt + to_balance, to]
37 |     swap1            ; [to, amt + to_balance]
38 |     sstore           ; []
39 | 
40 |     ;; output event
41 |     push 4           ; [4]
42 |     calldataload     ; [to]
43 |     caller           ; [from, to]
44 |     push 36          ; [36, from, to]
45 |     calldataload     ; [amt, from, to]
46 |     push 0           ; [0, amt, from, to]
47 |     mstore           ; [from, to]
48 |     push logtopic    ; [topic, from, to]
49 |     push 32          ; [32, topic, from, to]
50 |     push 0           ; [0, 32, topic, from, to]
51 |     log3             ; []
52 | 


--------------------------------------------------------------------------------
/example/erc20/op_transferFrom.eas:
--------------------------------------------------------------------------------
 1 | ;;; ERC20 - transferFrom
 2 | ;;;
 3 | ;;; calldata structure
 4 | ;;; +--------------------+-----------------+---------------+-------------------+
 5 | ;;; | selector (4 bytes) | from (32 bytes) | to (32 bytes) | amount (32 bytes) |
 6 | ;;; +--------------------+-----------------+---------------+-------------------+
 7 | 
 8 | #define S_transferFrom = .selector("transferFrom(address,address,uint256)")
 9 | #define logtopic = .keccak256("Transfer(address,address,uint256)")
10 | 
11 | TransferFrom:
12 |     push 64          ; [len]
13 |     push 4           ; [offset, len]
14 |     keccak256        ; [key]
15 |     dup1             ; [key, key]
16 | 
17 |     sload            ; [available, key]
18 | 
19 |     push 68          ; [68, available, key]
20 |     mload            ; [amt, available, key]
21 | 
22 |     ;; if amt > available, revert
23 |     dup2             ; [available, amt, available, key]
24 |     dup2             ; [amt, available, amt, available, key]
25 |     gt               ; [amt > available, amt, available, key]
26 |     jumpi @FAIL      ; [amt, available, key]
27 | 
28 |     ;; load the sender balance
29 |     push 4           ; [4, amt, available, key]
30 |     calldataload     ; [from, amt, available, key]
31 |     sload            ; [from_balance, amt, available, key]
32 | 
33 |     lt               ; [from_balance < amt, available, key]
34 |     jumpi @FAIL      ; [available, key]
35 | 
36 |     ;; reduce allowance by amt
37 |     push 68          ; [68, available, key]
38 |     calldataload     ; [amt, available, key]
39 |     swap1            ; [available, amt, key]
40 |     sub              ; [available - amt, key]
41 |     swap1            ; [key, available - amt]
42 |     sstore           ; []
43 | 
44 |     ;; reduce from balance by amt
45 |     push 4           ; [4]
46 |     calldataload     ; [from]
47 |     dup1             ; [from, from]
48 |     sload            ; [from_balance, from]
49 | 
50 |     push 68          ; [68, from_balance, from]
51 |     calldataload     ; [amt, from_balance, from]
52 |     dup1             ; [amt, amt, from_balance, from]
53 | 
54 |     swap2            ; [from_balance, amt, amt, from]
55 |     sub              ; [from_balance - amt, amt from]
56 |     swap1            ; [amt, from_balance - amt, from]
57 |     swap2            ; [from, from_balance - amt, amt]]
58 |     sstore           ; [amt]
59 | 
60 |     ;; increase to balance by amt
61 |     push 36          ; [36, amt]
62 |     calldataload     ; [to, amt]
63 |     dup1             ; [to, to, amt]
64 |     sload            ; [to_balance, to, amt]
65 | 
66 |     swap1            ; [to, to_balance, amt]
67 |     swap2            ; [amt, to_balance, to]
68 |     add              ; [amt + to_balance, to]
69 |     swap1            ; [to, amt + to_balance]
70 |     sstore
71 | 
72 |     ;; output event
73 |     push 36          ; [36]
74 |     calldataload     ; [to]
75 |     push 4           ; [4, to]
76 |     calldataload     ; [from, to]
77 |     push 68          ; [68, from, to]
78 |     calldataload     ; [amt, from, to]
79 |     push 0           ; [0, amt, from, to]
80 |     mstore           ; [from, to]
81 |     push logtopic    ; [topic, from, to]
82 |     push 32          ; [32, topic, from, to]
83 |     push 0           ; [0, 32, topic, from, to]
84 |     log3             ; []
85 | 


--------------------------------------------------------------------------------
/go.mod:
--------------------------------------------------------------------------------
 1 | module github.com/fjl/geas
 2 | 
 3 | go 1.23
 4 | 
 5 | require (
 6 | 	github.com/ethereum/go-ethereum v1.12.2
 7 | 	golang.org/x/crypto v0.13.0
 8 | 	gopkg.in/yaml.v3 v3.0.1
 9 | )
10 | 
11 | require (
12 | 	github.com/btcsuite/btcd/btcec/v2 v2.2.0 // indirect
13 | 	github.com/decred/dcrd/dcrec/secp256k1/v4 v4.0.1 // indirect
14 | 	github.com/holiman/uint256 v1.2.3 // indirect
15 | 	golang.org/x/sys v0.12.0 // indirect
16 | )
17 | 


--------------------------------------------------------------------------------
/go.sum:
--------------------------------------------------------------------------------
 1 | github.com/btcsuite/btcd/btcec/v2 v2.2.0 h1:fzn1qaOt32TuLjFlkzYSsBC35Q3KUjT1SwPxiMSCF5k=
 2 | github.com/btcsuite/btcd/btcec/v2 v2.2.0/go.mod h1:U7MHm051Al6XmscBQ0BoNydpOTsFAn707034b5nY8zU=
 3 | github.com/btcsuite/btcd/chaincfg/chainhash v1.0.1 h1:q0rUy8C/TYNBQS1+CGKw68tLOFYSNEs0TFnxxnS9+4U=
 4 | github.com/btcsuite/btcd/chaincfg/chainhash v1.0.1/go.mod h1:7SFka0XMvUgj3hfZtydOrQY2mwhPclbT2snogU7SQQc=
 5 | github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c=
 6 | github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
 7 | github.com/decred/dcrd/crypto/blake256 v1.0.0 h1:/8DMNYp9SGi5f0w7uCm6d6M4OU2rGFK09Y2A4Xv7EE0=
 8 | github.com/decred/dcrd/crypto/blake256 v1.0.0/go.mod h1:sQl2p6Y26YV+ZOcSTP6thNdn47hh8kt6rqSlvmrXFAc=
 9 | github.com/decred/dcrd/dcrec/secp256k1/v4 v4.0.1 h1:YLtO71vCjJRCBcrPMtQ9nqBsqpA1m5sE92cU+pd5Mcc=
10 | github.com/decred/dcrd/dcrec/secp256k1/v4 v4.0.1/go.mod h1:hyedUtir6IdtD/7lIxGeCxkaw7y45JueMRL4DIyJDKs=
11 | github.com/ethereum/go-ethereum v1.12.2 h1:eGHJ4ij7oyVqUQn48LBz3B7pvQ8sV0wGJiIE6gDq/6Y=
12 | github.com/ethereum/go-ethereum v1.12.2/go.mod h1:1cRAEV+rp/xX0zraSCBnu9Py3HQ+geRMj3HdR+k0wfI=
13 | github.com/holiman/uint256 v1.2.3 h1:K8UWO1HUJpRMXBxbmaY1Y8IAMZC/RsKB+ArEnnK4l5o=
14 | github.com/holiman/uint256 v1.2.3/go.mod h1:SC8Ryt4n+UBbPbIBKaG9zbbDlp4jOru9xFZmPzLUTxw=
15 | github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM=
16 | github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4=
17 | github.com/stretchr/testify v1.8.1 h1:w7B6lhMri9wdJUVmEZPGGhZzrYTPvgJArz7wNPgYKsk=
18 | github.com/stretchr/testify v1.8.1/go.mod h1:w2LPCIKwWwSfY2zedu0+kehJoqGctiVI29o6fzry7u4=
19 | golang.org/x/crypto v0.13.0 h1:mvySKfSWJ+UKUii46M40LOvyWfN0s2U+46/jDd0e6Ck=
20 | golang.org/x/crypto v0.13.0/go.mod h1:y6Z2r+Rw4iayiXXAIxJIDAJ1zMW4yaTpebo8fPOliYc=
21 | golang.org/x/sys v0.12.0 h1:CM0HF96J0hcLAwsHPJZjfdNzs0gftsLfgKt57wWHJ0o=
22 | golang.org/x/sys v0.12.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
23 | gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405 h1:yhCVgyC4o1eVCa2tZl7eS0r+SDo693bJlVdllGtEeKM=
24 | gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0=
25 | gopkg.in/yaml.v3 v3.0.1 h1:fxVm/GzAzEWqLHuvctI91KS9hhNmmWOoWu0XTYJS7CA=
26 | gopkg.in/yaml.v3 v3.0.1/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM=
27 | 


--------------------------------------------------------------------------------
/internal/ast/arith.go:
--------------------------------------------------------------------------------
 1 | // Copyright 2024 The go-ethereum Authors
 2 | // This file is part of the go-ethereum library.
 3 | //
 4 | // The go-ethereum library is free software: you can redistribute it and/or modify
 5 | // it under the terms of the GNU Lesser General Public License as published by
 6 | // the Free Software Foundation, either version 3 of the License, or
 7 | // (at your option) any later version.
 8 | //
 9 | // The go-ethereum library is distributed in the hope that it will be useful,
10 | // but WITHOUT ANY WARRANTY; without even the implied warranty of
11 | // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 | // GNU Lesser General Public License for more details.
13 | //
14 | // You should have received a copy of the GNU Lesser General Public License
15 | // along with the go-ethereum library. If not, see <http://www.gnu.org/licenses/>.
16 | 
17 | package ast
18 | 
19 | //go:generate go run golang.org/x/tools/cmd/stringer@latest -type ArithOp
20 | 
21 | // ArithOp is an arithmetic operation.
22 | type ArithOp byte
23 | 
24 | const (
25 | 	ArithPlus   = ArithOp(iota + 1) // +
26 | 	ArithMinus                      // -
27 | 	ArithMul                        // *
28 | 	ArithDiv                        // /
29 | 	ArithMod                        // %
30 | 	ArithLshift                     // <<
31 | 	ArithRshift                     // >>
32 | 	ArithAnd                        // &
33 | 	ArithOr                         // |
34 | 	ArithXor                        // ^
35 | 	ArithMax    = ArithXor
36 | )
37 | 
38 | // arithChars contains all the single-character arithmetic operations.
39 | // note that '%' is also absent from this list since it has a dual purpose.
40 | var arithChars = map[rune]ArithOp{
41 | 	'+': ArithPlus,
42 | 	'-': ArithMinus,
43 | 	'*': ArithMul,
44 | 	'/': ArithDiv,
45 | 	'&': ArithAnd,
46 | 	'|': ArithOr,
47 | 	'^': ArithXor,
48 | }
49 | 
50 | func tokenArithOp(tok token) ArithOp {
51 | 	if tok.typ != arith {
52 | 		panic("token is not arith")
53 | 	}
54 | 	switch {
55 | 	case tok.text == "<<":
56 | 		return ArithLshift
57 | 	case tok.text == ">>":
58 | 		return ArithRshift
59 | 	case tok.text == "%":
60 | 		return ArithMod
61 | 	default:
62 | 		op, ok := arithChars[[]rune(tok.text)[0]]
63 | 		if !ok {
64 | 			panic("invalid arith op")
65 | 		}
66 | 		return op
67 | 	}
68 | }
69 | 
70 | var precedence = [ArithMax + 1]int{
71 | 	ArithMul:    2,
72 | 	ArithDiv:    2,
73 | 	ArithMod:    2,
74 | 	ArithLshift: 2,
75 | 	ArithRshift: 2,
76 | 	ArithAnd:    2,
77 | 	ArithPlus:   1,
78 | 	ArithMinus:  1,
79 | 	ArithOr:     1,
80 | 	ArithXor:    1,
81 | }
82 | 


--------------------------------------------------------------------------------
/internal/ast/arithop_string.go:
--------------------------------------------------------------------------------
 1 | // Code generated by "stringer -type ArithOp"; DO NOT EDIT.
 2 | 
 3 | package ast
 4 | 
 5 | import "strconv"
 6 | 
 7 | func _() {
 8 | 	// An "invalid array index" compiler error signifies that the constant values have changed.
 9 | 	// Re-run the stringer command to generate them again.
10 | 	var x [1]struct{}
11 | 	_ = x[ArithPlus-1]
12 | 	_ = x[ArithMinus-2]
13 | 	_ = x[ArithMul-3]
14 | 	_ = x[ArithDiv-4]
15 | 	_ = x[ArithMod-5]
16 | 	_ = x[ArithLshift-6]
17 | 	_ = x[ArithRshift-7]
18 | 	_ = x[ArithAnd-8]
19 | 	_ = x[ArithOr-9]
20 | 	_ = x[ArithXor-10]
21 | }
22 | 
23 | const _ArithOp_name = "ArithPlusArithMinusArithMulArithDivArithModArithLshiftArithRshiftArithAndArithOrArithNot"
24 | 
25 | var _ArithOp_index = [...]uint8{0, 9, 19, 27, 35, 43, 54, 65, 73, 80, 88}
26 | 
27 | func (i ArithOp) String() string {
28 | 	i -= 1
29 | 	if i >= ArithOp(len(_ArithOp_index)-1) {
30 | 		return "ArithOp(" + strconv.FormatInt(int64(i+1), 10) + ")"
31 | 	}
32 | 	return _ArithOp_name[_ArithOp_index[i]:_ArithOp_index[i+1]]
33 | }
34 | 


--------------------------------------------------------------------------------
/internal/ast/ast.go:
--------------------------------------------------------------------------------
  1 | // Copyright 2023 The go-ethereum Authors
  2 | // This file is part of the go-ethereum library.
  3 | //
  4 | // The go-ethereum library is free software: you can redistribute it and/or modify
  5 | // it under the terms of the GNU Lesser General Public License as published by
  6 | // the Free Software Foundation, either version 3 of the License, or
  7 | // (at your option) any later version.
  8 | //
  9 | // The go-ethereum library is distributed in the hope that it will be useful,
 10 | // but WITHOUT ANY WARRANTY; without even the implied warranty of
 11 | // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
 12 | // GNU Lesser General Public License for more details.
 13 | //
 14 | // You should have received a copy of the GNU Lesser General Public License
 15 | // along with the go-ethereum library. If not, see <http://www.gnu.org/licenses/>.
 16 | 
 17 | package ast
 18 | 
 19 | import (
 20 | 	"fmt"
 21 | 	"maps"
 22 | 	"slices"
 23 | 
 24 | 	"github.com/fjl/geas/internal/lzint"
 25 | )
 26 | 
 27 | // Document is the toplevel of the AST. It represents a list of abstract instructions and
 28 | // macro definitions.
 29 | type Document struct {
 30 | 	File       string
 31 | 	Statements []Statement
 32 | 
 33 | 	// The document that contains/encloses this document.
 34 | 	Parent *Document
 35 | 
 36 | 	// The statement that created this document.
 37 | 	// This is filled in for instruction macros, #include/#assemble, etc.
 38 | 	Creation Statement
 39 | 
 40 | 	labels      map[string]*LabelDefSt
 41 | 	exprMacros  map[string]*ExpressionMacroDef
 42 | 	instrMacros map[string]*InstructionMacroDef
 43 | }
 44 | 
 45 | // LookupLabel finds the definition of a label.
 46 | func (doc *Document) LookupLabel(lref *LabelRefExpr) (*LabelDefSt, *Document) {
 47 | 	for doc != nil {
 48 | 		li, ok := doc.labels[lref.Ident]
 49 | 		if ok {
 50 | 			return li, doc
 51 | 		}
 52 | 		doc = doc.Parent
 53 | 	}
 54 | 	return nil, nil
 55 | }
 56 | 
 57 | // LookupInstrMacro finds the definition of an instruction macro.
 58 | func (doc *Document) LookupInstrMacro(name string) (*InstructionMacroDef, *Document) {
 59 | 	for doc != nil {
 60 | 		if def, ok := doc.instrMacros[name]; ok {
 61 | 			return def, doc
 62 | 		}
 63 | 		doc = doc.Parent
 64 | 	}
 65 | 	return nil, nil
 66 | }
 67 | 
 68 | // LookupExprMacro finds the definition of an expression macro.
 69 | func (doc *Document) LookupExprMacro(name string) (*ExpressionMacroDef, *Document) {
 70 | 	for doc != nil {
 71 | 		if def, ok := doc.exprMacros[name]; ok {
 72 | 			return def, doc
 73 | 		}
 74 | 		doc = doc.Parent
 75 | 	}
 76 | 	return nil, nil
 77 | }
 78 | 
 79 | // GlobalLabels returns the list of global label definitions in the docment.
 80 | func (doc *Document) GlobalLabels() []*LabelDefSt {
 81 | 	result := make([]*LabelDefSt, 0)
 82 | 	for _, name := range slices.Sorted(maps.Keys(doc.labels)) {
 83 | 		if IsGlobal(name) {
 84 | 			result = append(result, doc.labels[name])
 85 | 		}
 86 | 	}
 87 | 	return result
 88 | }
 89 | 
 90 | // GlobalExprMacros returns the list of global expression macro definitions in the docment.
 91 | func (doc *Document) GlobalExprMacros() []*ExpressionMacroDef {
 92 | 	result := make([]*ExpressionMacroDef, 0)
 93 | 	for _, name := range slices.Sorted(maps.Keys(doc.exprMacros)) {
 94 | 		if IsGlobal(name) {
 95 | 			result = append(result, doc.exprMacros[name])
 96 | 		}
 97 | 	}
 98 | 	return result
 99 | }
100 | 
101 | // GlobalInstrMacros returns the list of global instruction macro definitions in the docment.
102 | func (doc *Document) GlobalInstrMacros() []*InstructionMacroDef {
103 | 	result := make([]*InstructionMacroDef, 0)
104 | 	for _, name := range slices.Sorted(maps.Keys(doc.instrMacros)) {
105 | 		if IsGlobal(name) {
106 | 			result = append(result, doc.instrMacros[name])
107 | 		}
108 | 	}
109 | 	return result
110 | }
111 | 
112 | // InstrMacros returns the list of all instruction macro definitions in the docment.
113 | func (doc *Document) InstrMacros() []*InstructionMacroDef {
114 | 	result := make([]*InstructionMacroDef, 0)
115 | 	for _, name := range slices.Sorted(maps.Keys(doc.instrMacros)) {
116 | 		result = append(result, doc.instrMacros[name])
117 | 	}
118 | 	return result
119 | }
120 | 
121 | func (doc *Document) CreationString() string {
122 | 	if doc.Creation == nil {
123 | 		if doc.File == "" {
124 | 			return ""
125 | 		}
126 | 		return " in " + doc.File
127 | 	}
128 | 	return fmt.Sprintf(" by %s at %v", doc.Creation.Description(), doc.Creation.Position())
129 | }
130 | 
131 | type Statement interface {
132 | 	Position() Position
133 | 	Description() string
134 | }
135 | 
136 | // toplevel statement types
137 | type (
138 | 	OpcodeSt struct {
139 | 		Op       string
140 | 		Src      *Document
141 | 		Arg      Expr // Immediate argument for PUSH* / JUMP*.
142 | 		PushSize byte // For PUSH<n>, this is n+1.
143 | 		tok      token
144 | 	}
145 | 
146 | 	LabelDefSt struct {
147 | 		Src    *Document
148 | 		Dotted bool
149 | 		Global bool
150 | 		tok    token
151 | 	}
152 | 
153 | 	MacroCallSt struct {
154 | 		Ident string
155 | 		Src   *Document
156 | 		Args  []Expr
157 | 		tok   token
158 | 	}
159 | 
160 | 	IncludeSt struct {
161 | 		tok      token
162 | 		Src      *Document
163 | 		Filename string
164 | 	}
165 | 
166 | 	AssembleSt struct {
167 | 		tok      token
168 | 		Src      *Document
169 | 		Filename string
170 | 	}
171 | 
172 | 	PragmaSt struct {
173 | 		pos    Position
174 | 		Option string
175 | 		Value  string
176 | 	}
177 | 
178 | 	BytesSt struct {
179 | 		pos   Position
180 | 		Value Expr
181 | 	}
182 | )
183 | 
184 | // definitions
185 | type (
186 | 	ExpressionMacroDef struct {
187 | 		Name   string
188 | 		Params []string
189 | 		Body   Expr
190 | 		pos    Position
191 | 	}
192 | 
193 | 	InstructionMacroDef struct {
194 | 		Name   string
195 | 		Params []string
196 | 		Body   *Document
197 | 		pos    Position
198 | 	}
199 | )
200 | 
201 | // expression types
202 | type (
203 | 	Expr any
204 | 
205 | 	LiteralExpr struct {
206 | 		tok   token
207 | 		Value *lzint.Value // cached value
208 | 	}
209 | 
210 | 	LabelRefExpr struct {
211 | 		Ident  string
212 | 		Dotted bool
213 | 		Global bool
214 | 	}
215 | 
216 | 	VariableExpr struct {
217 | 		Ident string
218 | 	}
219 | 
220 | 	MacroCallExpr struct {
221 | 		Ident   string
222 | 		Builtin bool
223 | 		Args    []Expr
224 | 	}
225 | 
226 | 	ArithExpr struct {
227 | 		Op    ArithOp
228 | 		Left  Expr
229 | 		Right Expr
230 | 	}
231 | )
232 | 
233 | func (inst *MacroCallSt) Position() Position {
234 | 	return Position{File: inst.Src.File, Line: inst.tok.line}
235 | }
236 | 
237 | func (inst *MacroCallSt) Description() string {
238 | 	return fmt.Sprintf("invocation of %%%s", inst.Ident)
239 | }
240 | 
241 | func (inst *IncludeSt) Position() Position {
242 | 	return Position{File: inst.Src.File, Line: inst.tok.line}
243 | }
244 | 
245 | func (inst *IncludeSt) Description() string {
246 | 	return fmt.Sprintf("#include %q", inst.Filename)
247 | }
248 | 
249 | func (inst *AssembleSt) Position() Position {
250 | 	return Position{File: inst.Src.File, Line: inst.tok.line}
251 | }
252 | 
253 | func (inst *AssembleSt) Description() string {
254 | 	return fmt.Sprintf("#assemble %q", inst.Filename)
255 | }
256 | 
257 | func (inst *PragmaSt) Position() Position {
258 | 	return inst.pos
259 | }
260 | 
261 | func (inst *PragmaSt) Description() string {
262 | 	return fmt.Sprintf("#pragma %s %q", inst.Option, inst.Value)
263 | }
264 | 
265 | func (inst *BytesSt) Position() Position {
266 | 	return inst.pos
267 | }
268 | 
269 | func (inst *BytesSt) Description() string {
270 | 	return "#bytes"
271 | }
272 | 
273 | func (inst *OpcodeSt) Position() Position {
274 | 	return Position{File: inst.Src.File, Line: inst.tok.line}
275 | }
276 | 
277 | func (inst *OpcodeSt) Description() string {
278 | 	return fmt.Sprintf("opcode %s", inst.tok.text)
279 | }
280 | 
281 | func (inst *LabelDefSt) Position() Position {
282 | 	return Position{File: inst.Src.File, Line: inst.tok.line}
283 | }
284 | 
285 | func (inst *LabelDefSt) Description() string {
286 | 	return fmt.Sprintf("definition of %s", inst.String())
287 | }
288 | 
289 | func (def *InstructionMacroDef) Position() Position {
290 | 	return def.pos
291 | }
292 | 
293 | func (def *InstructionMacroDef) Description() string {
294 | 	return fmt.Sprintf("definition of %%%s", def.Name)
295 | }
296 | 
297 | func (def *ExpressionMacroDef) Position() Position {
298 | 	return def.pos
299 | }
300 | 
301 | func (def *ExpressionMacroDef) Description() string {
302 | 	return fmt.Sprintf("definition of %s", def.Name)
303 | }
304 | 
305 | func (l *LabelRefExpr) String() string {
306 | 	dot := ""
307 | 	if l.Dotted {
308 | 		dot = "."
309 | 	}
310 | 	return "@" + dot + l.Ident
311 | }
312 | 
313 | func (l *LabelDefSt) String() string {
314 | 	r := LabelRefExpr{Dotted: l.Dotted, Ident: l.tok.text}
315 | 	return r.String()
316 | }
317 | 
318 | func (l *LabelDefSt) Name() string {
319 | 	return l.tok.text
320 | }
321 | 
322 | func (e *LiteralExpr) IsString() bool {
323 | 	return e.tok.typ == stringLiteral
324 | }
325 | 
326 | func (e *LiteralExpr) IsNumber() bool {
327 | 	return e.tok.typ == numberLiteral
328 | }
329 | 
330 | func (e *LiteralExpr) Text() string {
331 | 	return e.tok.text
332 | }
333 | 


--------------------------------------------------------------------------------
/internal/ast/error.go:
--------------------------------------------------------------------------------
 1 | // Copyright 2024 The go-ethereum Authors
 2 | // This file is part of the go-ethereum library.
 3 | //
 4 | // The go-ethereum library is free software: you can redistribute it and/or modify
 5 | // it under the terms of the GNU Lesser General Public License as published by
 6 | // the Free Software Foundation, either version 3 of the License, or
 7 | // (at your option) any later version.
 8 | //
 9 | // The go-ethereum library is distributed in the hope that it will be useful,
10 | // but WITHOUT ANY WARRANTY; without even the implied warranty of
11 | // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 | // GNU Lesser General Public License for more details.
13 | //
14 | // You should have received a copy of the GNU Lesser General Public License
15 | // along with the go-ethereum library. If not, see <http://www.gnu.org/licenses/>.
16 | 
17 | package ast
18 | 
19 | import "fmt"
20 | 
21 | // Position represents a line in a file.
22 | type Position struct {
23 | 	File string
24 | 	Line int
25 | }
26 | 
27 | func (p Position) String() string {
28 | 	return fmt.Sprintf("%s:%d", p.File, p.Line)
29 | }
30 | 
31 | // ParseError is an error that happened during parsing.
32 | type ParseError struct {
33 | 	tok     token
34 | 	file    string
35 | 	err     error
36 | 	warning bool
37 | }
38 | 
39 | func (e *ParseError) Error() string {
40 | 	warn := ""
41 | 	if e.warning {
42 | 		warn = "warning: "
43 | 	}
44 | 	return fmt.Sprintf("%s:%d: %s%v", e.file, e.tok.line, warn, e.err)
45 | }
46 | 
47 | func (e *ParseError) Position() Position {
48 | 	return Position{File: e.file, Line: e.tok.line}
49 | }
50 | 
51 | func (e *ParseError) IsWarning() bool {
52 | 	return e.warning
53 | }
54 | 
55 | func (e *ParseError) Unwrap() error {
56 | 	return e.err
57 | }
58 | 
59 | func ErrLabelAlreadyDef(firstDef, secondDef *LabelDefSt) error {
60 | 	dotInfo := ""
61 | 	if firstDef.Dotted && !secondDef.Dotted {
62 | 		dotInfo = " (as dotted label)"
63 | 	}
64 | 	if !firstDef.Dotted && secondDef.Dotted {
65 | 		dotInfo = " (as jumpdest)"
66 | 	}
67 | 	return fmt.Errorf("%v already defined%s", secondDef, dotInfo)
68 | }
69 | 


--------------------------------------------------------------------------------
/internal/ast/lexer.go:
--------------------------------------------------------------------------------
  1 | // Copyright 2017 The go-ethereum Authors
  2 | // This file is part of the go-ethereum library.
  3 | //
  4 | // The go-ethereum library is free software: you can redistribute it and/or modify
  5 | // it under the terms of the GNU Lesser General Public License as published by
  6 | // the Free Software Foundation, either version 3 of the License, or
  7 | // (at your option) any later version.
  8 | //
  9 | // The go-ethereum library is distributed in the hope that it will be useful,
 10 | // but WITHOUT ANY WARRANTY; without even the implied warranty of
 11 | // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
 12 | // GNU Lesser General Public License for more details.
 13 | //
 14 | // You should have received a copy of the GNU Lesser General Public License
 15 | // along with the go-ethereum library. If not, see <http://www.gnu.org/licenses/>.
 16 | 
 17 | package ast
 18 | 
 19 | import (
 20 | 	"fmt"
 21 | 	"os"
 22 | 	"strings"
 23 | 	"unicode"
 24 | 	"unicode/utf8"
 25 | )
 26 | 
 27 | // stateFn is used through the lifetime of the
 28 | // lexer to parse the different values at the
 29 | // current state.
 30 | type stateFn func(*lexer) stateFn
 31 | 
 32 | // token is emitted when the lexer has discovered
 33 | // a new parsable token. These are delivered over
 34 | // the tokens channels of the lexer
 35 | type token struct {
 36 | 	text string
 37 | 	line int
 38 | 	typ  tokenType
 39 | }
 40 | 
 41 | func (t *token) String() string {
 42 | 	return fmt.Sprintf("%v %s (line %d)", t.typ, t.text, t.line)
 43 | }
 44 | 
 45 | // tokenType are the different types the lexer
 46 | // is able to parse and return.
 47 | type tokenType byte
 48 | 
 49 | //go:generate go run golang.org/x/tools/cmd/stringer@latest -linecomment -type tokenType
 50 | 
 51 | const (
 52 | 	eof                tokenType = iota // end of file
 53 | 	lineStart                           // beginning of line
 54 | 	lineEnd                             // end of line
 55 | 	invalidToken                        // invalid character
 56 | 	identifier                          // identifier
 57 | 	dottedIdentifier                    // dotted identifier
 58 | 	variableIdentifier                  // parameter reference
 59 | 	labelRef                            // label reference
 60 | 	dottedLabelRef                      // dotted label reference
 61 | 	label                               // label definition
 62 | 	dottedLabel                         // dotted label definition
 63 | 	numberLiteral                       // number literal
 64 | 	stringLiteral                       // string literal
 65 | 	openParen                           // open parenthesis
 66 | 	closeParen                          // close parenthesis
 67 | 	comma                               // comma
 68 | 	directive                           // directive
 69 | 	instMacroIdent                      // macro identifier
 70 | 	openBrace                           // open brace
 71 | 	closeBrace                          // closing brace
 72 | 	equals                              // equals sign
 73 | 	arith                               // arithmetic operation
 74 | )
 75 | 
 76 | // lexer is the basic construct for parsing
 77 | // source code and turning them in to tokens.
 78 | // Tokens are interpreted by the compiler.
 79 | type lexer struct {
 80 | 	input string // input contains the source code of the program
 81 | 
 82 | 	tokens chan token // tokens is used to deliver tokens to the listener
 83 | 	state  stateFn    // the current state function
 84 | 
 85 | 	lineno            int // current line number in the source file
 86 | 	start, pos, width int // positions for lexing and returning value
 87 | 
 88 | 	debug bool // flag for triggering debug output
 89 | }
 90 | 
 91 | // runLexer lexes the program by name with the given source. It returns a
 92 | // channel on which the tokens are delivered.
 93 | func runLexer(source []byte, debug bool) <-chan token {
 94 | 	ch := make(chan token)
 95 | 	l := &lexer{
 96 | 		input:  string(source),
 97 | 		tokens: ch,
 98 | 		state:  lexNext,
 99 | 		debug:  debug,
100 | 		lineno: 1,
101 | 	}
102 | 	go func() {
103 | 		l.emit(lineStart)
104 | 		for l.state != nil {
105 | 			l.state = l.state(l)
106 | 		}
107 | 		l.emit(eof)
108 | 		close(l.tokens)
109 | 	}()
110 | 
111 | 	return ch
112 | }
113 | 
114 | // next returns the next rune in the program's source.
115 | func (l *lexer) next() (rune rune) {
116 | 	if l.pos >= len(l.input) {
117 | 		l.width = 0
118 | 		return 0
119 | 	}
120 | 	rune, l.width = utf8.DecodeRuneInString(l.input[l.pos:])
121 | 	l.pos += l.width
122 | 	return rune
123 | }
124 | 
125 | // backup backsup the last parsed element (multi-character)
126 | func (l *lexer) backup() {
127 | 	l.pos -= l.width
128 | }
129 | 
130 | // peek returns the next rune but does not advance the seeker
131 | func (l *lexer) peek() rune {
132 | 	r := l.next()
133 | 	l.backup()
134 | 	return r
135 | }
136 | 
137 | // ignore advances the seeker and ignores the value
138 | func (l *lexer) ignore() {
139 | 	l.start = l.pos
140 | }
141 | 
142 | // Accepts checks whether the given input matches the next rune
143 | func (l *lexer) accept(valid string) bool {
144 | 	if strings.ContainsRune(valid, l.next()) {
145 | 		return true
146 | 	}
147 | 	l.backup()
148 | 	return false
149 | }
150 | 
151 | // acceptRun will continue to advance the seeker until valid
152 | // can no longer be met.
153 | func (l *lexer) acceptRun(fn func(rune) bool) {
154 | 	for fn(l.next()) {
155 | 	}
156 | 	l.backup()
157 | }
158 | 
159 | // acceptRunUntil is the inverse of acceptRun and will continue
160 | // to advance the seeker until the rune has been found.
161 | func (l *lexer) acceptRunUntil(until rune) bool {
162 | 	for {
163 | 		i := l.next()
164 | 		if i == until {
165 | 			l.pos--
166 | 			return true
167 | 		}
168 | 		if i == 0 {
169 | 			return false // eof
170 | 		}
171 | 	}
172 | }
173 | 
174 | // emit creates a new token and sends it to token channel for processing.
175 | func (l *lexer) emit(t tokenType) {
176 | 	token := token{line: l.lineno, text: l.input[l.start:l.pos], typ: t}
177 | 
178 | 	if l.debug {
179 | 		fmt.Fprintf(os.Stderr, "%04d: (%-20v) %s\n", token.line, token.typ, token.text)
180 | 	}
181 | 
182 | 	l.tokens <- token
183 | 	l.start = l.pos
184 | }
185 | 
186 | // lexNext is state function for lexing lines
187 | func lexNext(l *lexer) stateFn {
188 | 	for {
189 | 		switch r := l.next(); {
190 | 		// known symbols:
191 | 
192 | 		case r == ';':
193 | 			return lexComment
194 | 
195 | 		case r == '@':
196 | 			l.ignore()
197 | 			return lexLabel
198 | 
199 | 		case r == '$':
200 | 			l.ignore()
201 | 			return lexVariable
202 | 
203 | 		case r == '"':
204 | 			return lexInsideString
205 | 
206 | 		case r == '(':
207 | 			l.emit(openParen)
208 | 			return lexNext
209 | 
210 | 		case r == ')':
211 | 			l.emit(closeParen)
212 | 			return lexNext
213 | 
214 | 		case r == '{':
215 | 			l.emit(openBrace)
216 | 			return lexNext
217 | 
218 | 		case r == '}':
219 | 			l.emit(closeBrace)
220 | 			return lexNext
221 | 
222 | 		case r == ',':
223 | 			l.emit(comma)
224 | 			return lexNext
225 | 
226 | 		case r == '#':
227 | 			return lexPreprocessor
228 | 
229 | 		case r == '=':
230 | 			l.emit(equals)
231 | 			return lexNext
232 | 
233 | 		// numbers and identifiers:
234 | 
235 | 		case unicode.IsDigit(r):
236 | 			return lexNumber
237 | 
238 | 		case r == '.' || isIdentBegin(r):
239 | 			return lexIdentifier
240 | 
241 | 		// arithmetic:
242 | 
243 | 		case r == '<':
244 | 			return lexLshift
245 | 
246 | 		case r == '>':
247 | 			return lexRshift
248 | 
249 | 		case r == '%':
250 | 			return lexPercent
251 | 
252 | 		case arithChars[r] != 0:
253 | 			l.emit(arith)
254 | 			return lexNext
255 | 
256 | 		// whitespace, etc.
257 | 
258 | 		case r == '\n':
259 | 			l.emit(lineEnd)
260 | 			l.ignore()
261 | 			l.lineno++
262 | 			l.emit(lineStart)
263 | 
264 | 		case isSpace(r):
265 | 			l.ignore()
266 | 
267 | 		case r == 0:
268 | 			return nil // eof
269 | 
270 | 		default:
271 | 			l.emit(invalidToken)
272 | 		}
273 | 	}
274 | }
275 | 
276 | // lexComment parses the current position until the end
277 | // of the line and discards the text.
278 | func lexComment(l *lexer) stateFn {
279 | 	l.acceptRunUntil('\n')
280 | 	l.ignore()
281 | 	return lexNext
282 | }
283 | 
284 | // lexLabel parses a label reference.
285 | func lexLabel(l *lexer) stateFn {
286 | 	typ := labelRef
287 | 	if l.peek() == '.' {
288 | 		typ = dottedLabelRef
289 | 		l.next() // consume optional .
290 | 		l.ignore()
291 | 	}
292 | 	l.acceptRun(isIdent)
293 | 	l.emit(typ)
294 | 	return lexNext
295 | }
296 | 
297 | func lexPercent(l *lexer) stateFn {
298 | 	r := l.peek()
299 | 	if isIdentBegin(r) {
300 | 		l.ignore()
301 | 		l.acceptRun(isIdent)
302 | 		l.emit(instMacroIdent)
303 | 	} else {
304 | 		l.emit(arith)
305 | 	}
306 | 	return lexNext
307 | }
308 | 
309 | // lexInsideString lexes the inside of a string until
310 | // the state function finds the closing quote.
311 | // It returns the lex text state function.
312 | func lexInsideString(l *lexer) stateFn {
313 | 	// TODO: allow escaping quotes
314 | 	if l.acceptRunUntil('"') {
315 | 		l.start += 1 // remove beginning quote
316 | 		l.emit(stringLiteral)
317 | 		l.next() // consume "
318 | 	}
319 | 	return lexNext
320 | }
321 | 
322 | func lexNumber(l *lexer) stateFn {
323 | 	acceptance := unicode.IsDigit
324 | 	if l.accept("xX") {
325 | 		acceptance = isHex
326 | 	}
327 | 	l.acceptRun(acceptance)
328 | 	l.emit(numberLiteral)
329 | 	return lexNext
330 | }
331 | 
332 | func lexLshift(l *lexer) stateFn {
333 | 	if !l.accept("<") {
334 | 		l.emit(invalidToken)
335 | 	} else {
336 | 		l.emit(arith)
337 | 	}
338 | 	return lexNext
339 | }
340 | 
341 | func lexRshift(l *lexer) stateFn {
342 | 	if !l.accept(">") {
343 | 		l.emit(invalidToken)
344 | 	} else {
345 | 		l.emit(arith)
346 | 	}
347 | 	return lexNext
348 | }
349 | 
350 | func lexPreprocessor(l *lexer) stateFn {
351 | 	l.acceptRun(isIdent)
352 | 	l.emit(directive)
353 | 	return lexNext
354 | }
355 | 
356 | func lexVariable(l *lexer) stateFn {
357 | 	l.acceptRun(isIdent)
358 | 	l.emit(variableIdentifier)
359 | 	return lexNext
360 | }
361 | 
362 | func lexIdentifier(l *lexer) stateFn {
363 | 	firstIsDot := l.input[l.start] == '.'
364 | 	if firstIsDot {
365 | 		l.ignore()
366 | 	}
367 | 	l.acceptRun(isIdent)
368 | 
369 | 	if l.peek() == ':' {
370 | 		if firstIsDot {
371 | 			l.emit(dottedLabel)
372 | 		} else {
373 | 			l.emit(label)
374 | 		}
375 | 		l.accept(":")
376 | 		l.ignore()
377 | 	} else {
378 | 		if firstIsDot {
379 | 			l.emit(dottedIdentifier)
380 | 		} else {
381 | 			l.emit(identifier)
382 | 		}
383 | 	}
384 | 	return lexNext
385 | }
386 | 
387 | func isSpace(t rune) bool {
388 | 	return unicode.IsSpace(t)
389 | }
390 | 
391 | func isHex(t rune) bool {
392 | 	return unicode.IsDigit(t) || (t >= 'a' && t <= 'f') || (t >= 'A' && t <= 'F')
393 | }
394 | 
395 | func isIdentBegin(t rune) bool {
396 | 	return t == '_' || unicode.IsLetter(t)
397 | }
398 | 
399 | func isIdent(t rune) bool {
400 | 	return t == '_' || unicode.IsLetter(t) || unicode.IsNumber(t)
401 | }
402 | 


--------------------------------------------------------------------------------
/internal/ast/lexer_test.go:
--------------------------------------------------------------------------------
 1 | // Copyright 2023 The go-ethereum Authors
 2 | // This file is part of the go-ethereum library.
 3 | //
 4 | // The go-ethereum library is free software: you can redistribute it and/or modify
 5 | // it under the terms of the GNU Lesser General Public License as published by
 6 | // the Free Software Foundation, either version 3 of the License, or
 7 | // (at your option) any later version.
 8 | //
 9 | // The go-ethereum library is distributed in the hope that it will be useful,
10 | // but WITHOUT ANY WARRANTY; without even the implied warranty of
11 | // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 | // GNU Lesser General Public License for more details.
13 | //
14 | // You should have received a copy of the GNU Lesser General Public License
15 | // along with the go-ethereum library. If not, see <http://www.gnu.org/licenses/>.
16 | 
17 | package ast
18 | 
19 | import (
20 | 	"reflect"
21 | 	"testing"
22 | )
23 | 
24 | func lexAll(src string) []token {
25 | 	ch := runLexer([]byte(src), false)
26 | 
27 | 	var tokens []token
28 | 	for i := range ch {
29 | 		tokens = append(tokens, i)
30 | 	}
31 | 	return tokens
32 | }
33 | 
34 | func TestLexer(t *testing.T) {
35 | 	tests := []struct {
36 | 		input  string
37 | 		tokens []token
38 | 	}{
39 | 		{
40 | 			input:  ";; this is a comment",
41 | 			tokens: []token{{typ: lineStart, line: 1}, {typ: eof, line: 1}},
42 | 		},
43 | 		{
44 | 			input:  "0x12345678",
45 | 			tokens: []token{{typ: lineStart, line: 1}, {typ: numberLiteral, text: "0x12345678", line: 1}, {typ: eof, line: 1}},
46 | 		},
47 | 		{
48 | 			input:  "0x123ggg",
49 | 			tokens: []token{{typ: lineStart, line: 1}, {typ: numberLiteral, text: "0x123", line: 1}, {typ: identifier, text: "ggg", line: 1}, {typ: eof, line: 1}},
50 | 		},
51 | 		{
52 | 			input:  "12345678",
53 | 			tokens: []token{{typ: lineStart, line: 1}, {typ: numberLiteral, text: "12345678", line: 1}, {typ: eof, line: 1}},
54 | 		},
55 | 		{
56 | 			input:  "123abc",
57 | 			tokens: []token{{typ: lineStart, line: 1}, {typ: numberLiteral, text: "123", line: 1}, {typ: identifier, text: "abc", line: 1}, {typ: eof, line: 1}},
58 | 		},
59 | 		{
60 | 			input:  "0123abc",
61 | 			tokens: []token{{typ: lineStart, line: 1}, {typ: numberLiteral, text: "0123", line: 1}, {typ: identifier, text: "abc", line: 1}, {typ: eof, line: 1}},
62 | 		},
63 | 		{
64 | 			input:  "00123abc",
65 | 			tokens: []token{{typ: lineStart, line: 1}, {typ: numberLiteral, text: "00123", line: 1}, {typ: identifier, text: "abc", line: 1}, {typ: eof, line: 1}},
66 | 		},
67 | 		{
68 | 			input:  "@foo",
69 | 			tokens: []token{{typ: lineStart, line: 1}, {typ: labelRef, text: "foo", line: 1}, {typ: eof, line: 1}},
70 | 		},
71 | 		{
72 | 			input:  "@label123",
73 | 			tokens: []token{{typ: lineStart, line: 1}, {typ: labelRef, text: "label123", line: 1}, {typ: eof, line: 1}},
74 | 		},
75 | 		{
76 | 			input:  "@.label .label: .ident",
77 | 			tokens: []token{{typ: lineStart, line: 1}, {typ: dottedLabelRef, text: "label", line: 1}, {typ: dottedLabel, text: "label", line: 1}, {typ: dottedIdentifier, text: "ident", line: 1}, {typ: eof, line: 1}},
78 | 		},
79 | 		// comment after label
80 | 		{
81 | 			input:  "@label123 ;; comment",
82 | 			tokens: []token{{typ: lineStart, line: 1}, {typ: labelRef, text: "label123", line: 1}, {typ: eof, line: 1}},
83 | 		},
84 | 		// comment after instruction
85 | 		{
86 | 			input:  "push 3 ;; comment\nadd",
87 | 			tokens: []token{{typ: lineStart, line: 1}, {typ: identifier, text: "push", line: 1}, {typ: numberLiteral, text: "3", line: 1}, {typ: lineEnd, text: "\n", line: 1}, {typ: lineStart, line: 2}, {typ: identifier, line: 2, text: "add"}, {typ: eof, line: 2}},
88 | 		},
89 | 	}
90 | 
91 | 	for _, test := range tests {
92 | 		tokens := lexAll(test.input)
93 | 		if !reflect.DeepEqual(tokens, test.tokens) {
94 | 			t.Errorf("input %q\ngot:  %+v\nwant: %+v", test.input, tokens, test.tokens)
95 | 		}
96 | 	}
97 | }
98 | 


--------------------------------------------------------------------------------
/internal/ast/names.go:
--------------------------------------------------------------------------------
 1 | // Copyright 2024 The go-ethereum Authors
 2 | // This file is part of the go-ethereum library.
 3 | //
 4 | // The go-ethereum library is free software: you can redistribute it and/or modify
 5 | // it under the terms of the GNU Lesser General Public License as published by
 6 | // the Free Software Foundation, either version 3 of the License, or
 7 | // (at your option) any later version.
 8 | //
 9 | // The go-ethereum library is distributed in the hope that it will be useful,
10 | // but WITHOUT ANY WARRANTY; without even the implied warranty of
11 | // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 | // GNU Lesser General Public License for more details.
13 | //
14 | // You should have received a copy of the GNU Lesser General Public License
15 | // along with the go-ethereum library. If not, see <http://www.gnu.org/licenses/>.
16 | 
17 | package ast
18 | 
19 | import "unicode"
20 | 
21 | // IsGlobal returns true when 'name' is a global identifier.
22 | func IsGlobal(name string) bool {
23 | 	return len(name) > 0 && unicode.IsUpper([]rune(name)[0])
24 | }
25 | 


--------------------------------------------------------------------------------
/internal/ast/parse.go:
--------------------------------------------------------------------------------
  1 | // Copyright 2024 The go-ethereum Authors
  2 | // This file is part of the go-ethereum library.
  3 | //
  4 | // The go-ethereum library is free software: you can redistribute it and/or modify
  5 | // it under the terms of the GNU Lesser General Public License as published by
  6 | // the Free Software Foundation, either version 3 of the License, or
  7 | // (at your option) any later version.
  8 | //
  9 | // The go-ethereum library is distributed in the hope that it will be useful,
 10 | // but WITHOUT ANY WARRANTY; without even the implied warranty of
 11 | // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
 12 | // GNU Lesser General Public License for more details.
 13 | //
 14 | // You should have received a copy of the GNU Lesser General Public License
 15 | // along with the go-ethereum library. If not, see <http://www.gnu.org/licenses/>.
 16 | 
 17 | package ast
 18 | 
 19 | import (
 20 | 	"fmt"
 21 | 	"regexp"
 22 | 	"strconv"
 23 | )
 24 | 
 25 | // Parser performs parsing of the token stream.
 26 | type Parser struct {
 27 | 	in     <-chan token
 28 | 	buffer []token
 29 | 	doc    *Document
 30 | 	errors []*ParseError
 31 | }
 32 | 
 33 | // NewParser creates a parser.
 34 | func NewParser(file string, content []byte, debug bool) *Parser {
 35 | 	return &Parser{
 36 | 		in:  runLexer(content, debug),
 37 | 		doc: newDocument(file, nil),
 38 | 	}
 39 | }
 40 | 
 41 | func newDocument(file string, parent *Document) *Document {
 42 | 	return &Document{
 43 | 		File:        file,
 44 | 		labels:      make(map[string]*LabelDefSt),
 45 | 		exprMacros:  make(map[string]*ExpressionMacroDef),
 46 | 		instrMacros: make(map[string]*InstructionMacroDef),
 47 | 		Parent:      parent,
 48 | 	}
 49 | }
 50 | 
 51 | // next reads the next token from the lexer.
 52 | func (p *Parser) next() token {
 53 | 	if len(p.buffer) > 0 {
 54 | 		t := p.buffer[len(p.buffer)-1]
 55 | 		p.buffer = p.buffer[:len(p.buffer)-1]
 56 | 		return t
 57 | 	}
 58 | 	t := <-p.in
 59 | 	return t
 60 | }
 61 | 
 62 | // unread puts a token back into the queue for reading.
 63 | func (p *Parser) unread(t token) {
 64 | 	p.buffer = append(p.buffer, t)
 65 | }
 66 | 
 67 | // drainLexer runs the lexer to completion.
 68 | func (p *Parser) drainLexer() {
 69 | 	for p.next().typ != eof {
 70 | 	}
 71 | }
 72 | 
 73 | // throwError adds a new error to the error list.
 74 | // The parser is returned to the toplevel and will continue parsing
 75 | // at the next line.
 76 | func (p *Parser) throwError(tok token, format string, args ...any) {
 77 | 	err := &ParseError{tok: tok, file: p.doc.File, err: fmt.Errorf(format, args...)}
 78 | 	p.errors = append(p.errors, err)
 79 | 	// resync to start of next line
 80 | 	for {
 81 | 		switch tok.typ {
 82 | 		case lineEnd, eof:
 83 | 			panic(err)
 84 | 		}
 85 | 		tok = p.next()
 86 | 	}
 87 | }
 88 | 
 89 | // unexpected signals that an unexpected token occurred in the input.
 90 | func (p *Parser) unexpected(tok token) {
 91 | 	p.throwError(tok, "unexpected %s %s", tok.typ.String(), tok.text)
 92 | }
 93 | 
 94 | // Parse runs the parser, outputting a document.
 95 | func (p *Parser) Parse() (*Document, []*ParseError) {
 96 | 	defer p.drainLexer()
 97 | 	for {
 98 | 		if p.parseOne() {
 99 | 			return p.doc, p.errors
100 | 		}
101 | 	}
102 | }
103 | 
104 | func (p *Parser) parseOne() bool {
105 | 	defer func() {
106 | 		err := recover()
107 | 		if _, ok := err.(*ParseError); !ok && err != nil {
108 | 			panic(err)
109 | 		}
110 | 	}()
111 | 	return parseStatement(p)
112 | }
113 | 
114 | // ParseExpression parses the input as a single expression.
115 | // This is used in evaluator tests.
116 | func (p *Parser) ParseExpression() (expr Expr, err error) {
117 | 	defer p.drainLexer()
118 | 	defer func() {
119 | 		e := recover()
120 | 		if pe, ok := e.(*ParseError); ok {
121 | 			err = pe
122 | 		} else if e != nil {
123 | 			panic(e)
124 | 		}
125 | 	}()
126 | 
127 | 	// skip lineStart
128 | 	switch tok := p.next(); tok.typ {
129 | 	case lineStart:
130 | 		expr = parseExpr(p, p.next())
131 | 		return expr, nil
132 | 	case lineEnd, eof:
133 | 		p.unexpected(tok)
134 | 	}
135 | 	return nil, nil
136 | }
137 | 
138 | // atDocumentTop reports whether the parser is at the toplevel.
139 | // This returns false while parsing an instruction macro definition.
140 | func (p *Parser) atDocumentTop() bool {
141 | 	return p.doc.Parent == nil
142 | }
143 | 
144 | // ------------- start parser functions -------------
145 | 
146 | func parseStatement(p *Parser) (done bool) {
147 | 	switch tok := p.next(); tok.typ {
148 | 	case eof, closeBrace:
149 | 		if p.atDocumentTop() != (tok.typ == eof) {
150 | 			p.unexpected(tok)
151 | 		}
152 | 		return true
153 | 	case label, dottedLabel:
154 | 		parseLabelDef(p, tok)
155 | 	case directive:
156 | 		parseDirective(p, tok)
157 | 	case identifier:
158 | 		parseInstruction(p, tok)
159 | 	case instMacroIdent:
160 | 		parseInstructionMacroCall(p, tok)
161 | 	case lineStart, lineEnd:
162 | 		return false
163 | 	default:
164 | 		p.unexpected(tok)
165 | 	}
166 | 	return false
167 | }
168 | 
169 | func parseLabelDef(p *Parser, tok token) {
170 | 	name := tok.text
171 | 	li := &LabelDefSt{
172 | 		tok:    tok,
173 | 		Src:    p.doc,
174 | 		Dotted: tok.typ == dottedLabel,
175 | 		Global: IsGlobal(name),
176 | 	}
177 | 	p.doc.Statements = append(p.doc.Statements, li)
178 | 	if firstDef, ok := p.doc.labels[name]; ok {
179 | 		p.throwError(tok, "%w", ErrLabelAlreadyDef(firstDef, li))
180 | 		return
181 | 	}
182 | 	p.doc.labels[name] = li
183 | }
184 | 
185 | func parseDirective(p *Parser, tok token) {
186 | 	switch tok.text {
187 | 	case "#define":
188 | 		if !p.atDocumentTop() {
189 | 			p.throwError(tok, "nested macro definitions are not allowed")
190 | 		}
191 | 		parseMacroDef(p)
192 | 	case "#include":
193 | 		parseInclude(p, tok)
194 | 	case "#assemble":
195 | 		parseAssemble(p, tok)
196 | 	case "#pragma":
197 | 		parsePragma(p, tok)
198 | 	case "#bytes":
199 | 		parseBytes(p, tok)
200 | 	default:
201 | 		p.throwError(tok, "unknown compiler directive %q", tok.text)
202 | 	}
203 | }
204 | 
205 | func parseMacroDef(p *Parser) {
206 | 	name := p.next()
207 | 	switch name.typ {
208 | 	case dottedIdentifier:
209 | 		p.throwError(name, "attempt to redefine builtin macro .%s", name.text)
210 | 	case instMacroIdent:
211 | 		parseInstructionMacroDef(p, name)
212 | 		return
213 | 	case identifier:
214 | 	default:
215 | 		p.unexpected(name)
216 | 	}
217 | 
218 | 	// Parse parameters and body.
219 | 	var (
220 | 		pos          = Position{File: p.doc.File, Line: name.line}
221 | 		def          = &ExpressionMacroDef{Name: name.text, pos: pos}
222 | 		bodyTok      token
223 | 		didParams    bool
224 | 		legacySyntax bool
225 | 	)
226 | loop:
227 | 	for {
228 | 		switch tok := p.next(); tok.typ {
229 | 		case lineEnd, eof:
230 | 			p.throwError(tok, "incomplete macro definition")
231 | 
232 | 		case openBrace:
233 | 			p.throwError(tok, "unexpected { in expression macro definition")
234 | 
235 | 		case openParen:
236 | 			if didParams {
237 | 				bodyTok, legacySyntax = tok, true
238 | 				break loop
239 | 			} else {
240 | 				def.Params = parseParameterList(p)
241 | 				didParams = true
242 | 			}
243 | 
244 | 		case equals:
245 | 			bodyTok = p.next()
246 | 			break loop
247 | 
248 | 		default:
249 | 			bodyTok, legacySyntax = tok, true
250 | 			break loop
251 | 		}
252 | 	}
253 | 
254 | 	if legacySyntax {
255 | 		p.errors = append(p.errors, &ParseError{
256 | 			tok:     bodyTok,
257 | 			file:    p.doc.File,
258 | 			err:     fmt.Errorf("legacy definition syntax, missing '=' before expression"),
259 | 			warning: true,
260 | 		})
261 | 	}
262 | 	def.Body = parseExpr(p, bodyTok)
263 | 
264 | 	// Register the macro.
265 | 	checkDuplicateMacro(p, name)
266 | 	p.doc.exprMacros[name.text] = def
267 | }
268 | 
269 | func parseInstructionMacroDef(p *Parser, nameTok token) {
270 | 	var params []string
271 | 	var didParams bool
272 | paramLoop:
273 | 	for {
274 | 		switch tok := p.next(); tok.typ {
275 | 		case lineEnd, eof:
276 | 			p.throwError(tok, "incomplete macro definition")
277 | 		case openBrace:
278 | 			break paramLoop // start of body
279 | 		case openParen:
280 | 			if !didParams {
281 | 				params = parseParameterList(p)
282 | 				didParams = true
283 | 				continue paramLoop
284 | 			}
285 | 		default:
286 | 			p.unexpected(tok)
287 | 		}
288 | 	}
289 | 
290 | 	// Set definition context in parser.
291 | 	topdoc := p.doc
292 | 	doc := newDocument(p.doc.File, p.doc)
293 | 	p.doc = doc
294 | 	defer func() { p.doc = topdoc }()
295 | 
296 | 	// Parse macro body.
297 | 	for !parseStatement(p) {
298 | 	}
299 | 
300 | 	// Register definition.
301 | 	checkDuplicateMacro(p, nameTok)
302 | 	pos := Position{File: p.doc.File, Line: nameTok.line}
303 | 	def := &InstructionMacroDef{Name: nameTok.text, pos: pos, Params: params, Body: doc}
304 | 	doc.Creation = def
305 | 	topdoc.instrMacros[nameTok.text] = def
306 | }
307 | 
308 | func checkDuplicateMacro(p *Parser, nameTok token) {
309 | 	name := nameTok.text
310 | 	if _, ok := p.doc.instrMacros[name]; ok {
311 | 		p.throwError(nameTok, "instruction macro %s already defined", name)
312 | 	}
313 | 	if _, ok := p.doc.exprMacros[name]; ok {
314 | 		p.throwError(nameTok, "expression macro %s already defined", name)
315 | 	}
316 | }
317 | 
318 | func parseInclude(p *Parser, d token) {
319 | 	instr := &IncludeSt{Src: p.doc, tok: d}
320 | 	switch tok := p.next(); tok.typ {
321 | 	case stringLiteral:
322 | 		instr.Filename = tok.text
323 | 		p.doc.Statements = append(p.doc.Statements, instr)
324 | 	default:
325 | 		p.throwError(tok, "expected filename following #include")
326 | 	}
327 | }
328 | 
329 | func parseAssemble(p *Parser, d token) {
330 | 	instr := &AssembleSt{Src: p.doc, tok: d}
331 | 	switch tok := p.next(); tok.typ {
332 | 	case stringLiteral:
333 | 		instr.Filename = tok.text
334 | 		p.doc.Statements = append(p.doc.Statements, instr)
335 | 	default:
336 | 		p.throwError(tok, "expected filename following #assemble")
337 | 	}
338 | }
339 | 
340 | func parsePragma(p *Parser, d token) {
341 | 	instr := &PragmaSt{pos: Position{p.doc.File, d.line}}
342 | 	switch tok := p.next(); tok.typ {
343 | 	case identifier:
344 | 		instr.Option = tok.text
345 | 		switch v := p.next(); v.typ {
346 | 		case stringLiteral, numberLiteral:
347 | 			instr.Value = v.text
348 | 		case equals:
349 | 			p.throwError(tok, "unexpected = after #pragma %s", instr.Option)
350 | 		default:
351 | 			p.throwError(tok, "#pragma option value must be string or number literal")
352 | 		}
353 | 		p.doc.Statements = append(p.doc.Statements, instr)
354 | 	default:
355 | 		p.throwError(tok, "expected option name following #pragma")
356 | 	}
357 | }
358 | 
359 | func parseBytes(p *Parser, d token) {
360 | 	instr := &BytesSt{pos: Position{p.doc.File, d.line}}
361 | 	switch tok := p.next(); tok.typ {
362 | 	case lineEnd, eof:
363 | 		p.throwError(d, "expected expression following #bytes")
364 | 	default:
365 | 		instr.Value = parseExpr(p, tok)
366 | 		p.doc.Statements = append(p.doc.Statements, instr)
367 | 	}
368 | }
369 | 
370 | func parseInstruction(p *Parser, tok token) {
371 | 	opcode := &OpcodeSt{Op: tok.text, Src: p.doc, tok: tok}
372 | 	size, isPush := parsePushSize(tok.text)
373 | 	if isPush {
374 | 		opcode.PushSize = byte(size + 1)
375 | 	}
376 | 
377 | 	// Register in document.
378 | 	p.doc.Statements = append(p.doc.Statements, opcode)
379 | 
380 | 	// Parse optional argument.
381 | 	argToken := p.next()
382 | 	switch argToken.typ {
383 | 	case lineEnd, eof:
384 | 		return
385 | 	default:
386 | 		opcode.Arg = parseExpr(p, argToken)
387 | 	}
388 | }
389 | 
390 | var sizedPushRE = regexp.MustCompile("(?i)^PUSH([0-9]*)$")
391 | 
392 | func parsePushSize(name string) (int, bool) {
393 | 	m := sizedPushRE.FindStringSubmatch(name)
394 | 	if len(m) == 0 {
395 | 		return 0, false
396 | 	}
397 | 	if len(m[1]) > 0 {
398 | 		sz, _ := strconv.Atoi(m[1])
399 | 		return sz, true
400 | 	}
401 | 	return -1, true
402 | }
403 | 
404 | func parseInstructionMacroCall(p *Parser, nameTok token) {
405 | 	call := &MacroCallSt{Src: p.doc, Ident: nameTok.text, tok: nameTok}
406 | 	p.doc.Statements = append(p.doc.Statements, call)
407 | 
408 | 	switch tok := p.next(); tok.typ {
409 | 	case lineEnd, eof:
410 | 		return
411 | 	case openParen:
412 | 		call.Args = parseCallArguments(p)
413 | 	default:
414 | 		p.unexpected(tok)
415 | 	}
416 | }
417 | 
418 | // parseExpr parses an expression.
419 | func parseExpr(p *Parser, tok token) Expr {
420 | 	left := parsePrimaryExpr(p, tok)
421 | 	return parseArith(p, left, p.next(), 0)
422 | }
423 | 
424 | // parseArith parses an arithmetic expression.
425 | func parseArith(p *Parser, left Expr, tok token, minPrecedence int) Expr {
426 | 	for ; ; tok = p.next() {
427 | 		// Check for (another) arithmetic op.
428 | 		var op ArithOp
429 | 		switch tok.typ {
430 | 		case arith:
431 | 			op = tokenArithOp(tok)
432 | 			if precedence[op] < minPrecedence {
433 | 				p.unread(tok)
434 | 				return left
435 | 			}
436 | 		default:
437 | 			// End of binary expression.
438 | 			p.unread(tok)
439 | 			return left
440 | 		}
441 | 
442 | 		// Parse right operand.
443 | 		var right Expr
444 | 		switch tok = p.next(); tok.typ {
445 | 		case comma, closeParen, closeBrace, lineEnd, eof:
446 | 			p.throwError(tok, "expected right operand in arithmetic expression")
447 | 		default:
448 | 			right = parsePrimaryExpr(p, tok)
449 | 		}
450 | 
451 | 		// Check for next op of higher precedence.
452 | 		right = parseArithInner(p, right, precedence[op])
453 | 
454 | 		// Combine into binary expression.
455 | 		left = &ArithExpr{Op: op, Left: left, Right: right}
456 | 	}
457 | }
458 | 
459 | func parseArithInner(p *Parser, right Expr, curPrecedence int) Expr {
460 | 	for {
461 | 		switch tok := p.next(); tok.typ {
462 | 		case arith:
463 | 			nextop := tokenArithOp(tok)
464 | 			if precedence[nextop] <= curPrecedence {
465 | 				p.unread(tok)
466 | 				return right
467 | 			}
468 | 			right = parseArith(p, right, tok, curPrecedence+1)
469 | 
470 | 		default:
471 | 			p.unread(tok)
472 | 			return right
473 | 		}
474 | 	}
475 | }
476 | 
477 | func parsePrimaryExpr(p *Parser, tok token) Expr {
478 | 	switch tok.typ {
479 | 	case identifier, dottedIdentifier:
480 | 		call := &MacroCallExpr{Ident: tok.text, Builtin: tok.typ == dottedIdentifier}
481 | 		switch tok := p.next(); tok.typ {
482 | 		case openParen:
483 | 			call.Args = parseCallArguments(p)
484 | 		default:
485 | 			p.unread(tok)
486 | 		}
487 | 		return call
488 | 
489 | 	case variableIdentifier:
490 | 		return &VariableExpr{Ident: tok.text}
491 | 
492 | 	case labelRef, dottedLabelRef:
493 | 		return &LabelRefExpr{
494 | 			Ident:  tok.text,
495 | 			Dotted: tok.typ == dottedLabelRef,
496 | 			Global: IsGlobal(tok.text),
497 | 		}
498 | 
499 | 	case numberLiteral, stringLiteral:
500 | 		return &LiteralExpr{tok: tok}
501 | 
502 | 	case openParen:
503 | 		return parseParenExpr(p)
504 | 
505 | 	default:
506 | 		p.unexpected(tok)
507 | 		return nil
508 | 	}
509 | }
510 | 
511 | func parseParenExpr(p *Parser) Expr {
512 | 	var expr Expr
513 | 	switch tok := p.next(); tok.typ {
514 | 	case closeParen:
515 | 		p.throwError(tok, "empty parenthesized expression")
516 | 		return nil
517 | 	default:
518 | 		expr = parseExpr(p, tok)
519 | 	}
520 | 	// Ensure closing paren is there.
521 | 	for {
522 | 		switch tok := p.next(); tok.typ {
523 | 		case closeParen:
524 | 			return expr
525 | 		case lineStart, lineEnd:
526 | 			continue
527 | 		default:
528 | 			p.unexpected(tok)
529 | 		}
530 | 	}
531 | }
532 | 
533 | // parseParameterList parses a comma-separated list of names.
534 | func parseParameterList(p *Parser) (names []string) {
535 | 	for {
536 | 		tok := p.next()
537 | 		switch tok.typ {
538 | 		case closeParen:
539 | 			return names
540 | 		case identifier:
541 | 			names = append(names, tok.text)
542 | 		default:
543 | 			p.unexpected(tok)
544 | 		}
545 | 		if parseListEnd(p) {
546 | 			return names
547 | 		}
548 | 	}
549 | }
550 | 
551 | // parseCallArguments parses the argument list of a macro call.
552 | func parseCallArguments(p *Parser) (args []Expr) {
553 | 	for {
554 | 		tok := p.next()
555 | 		switch tok.typ {
556 | 		case closeParen:
557 | 			return args
558 | 		default:
559 | 			if arg := parseExpr(p, tok); arg != nil {
560 | 				args = append(args, arg)
561 | 			}
562 | 		}
563 | 		if parseListEnd(p) {
564 | 			return args
565 | 		}
566 | 	}
567 | }
568 | 
569 | func parseListEnd(p *Parser) bool {
570 | 	for {
571 | 		tok := p.next()
572 | 		switch tok.typ {
573 | 		case comma:
574 | 			return false
575 | 		case lineStart, lineEnd:
576 | 			continue
577 | 		case closeParen:
578 | 			return true
579 | 		default:
580 | 			p.unexpected(tok)
581 | 		}
582 | 	}
583 | }
584 | 


--------------------------------------------------------------------------------
/internal/ast/tokentype_string.go:
--------------------------------------------------------------------------------
 1 | // Code generated by "stringer -linecomment -type tokenType"; DO NOT EDIT.
 2 | 
 3 | package ast
 4 | 
 5 | import "strconv"
 6 | 
 7 | func _() {
 8 | 	// An "invalid array index" compiler error signifies that the constant values have changed.
 9 | 	// Re-run the stringer command to generate them again.
10 | 	var x [1]struct{}
11 | 	_ = x[eof-0]
12 | 	_ = x[lineStart-1]
13 | 	_ = x[lineEnd-2]
14 | 	_ = x[invalidToken-3]
15 | 	_ = x[identifier-4]
16 | 	_ = x[dottedIdentifier-5]
17 | 	_ = x[variableIdentifier-6]
18 | 	_ = x[labelRef-7]
19 | 	_ = x[dottedLabelRef-8]
20 | 	_ = x[label-9]
21 | 	_ = x[dottedLabel-10]
22 | 	_ = x[numberLiteral-11]
23 | 	_ = x[stringLiteral-12]
24 | 	_ = x[openParen-13]
25 | 	_ = x[closeParen-14]
26 | 	_ = x[comma-15]
27 | 	_ = x[directive-16]
28 | 	_ = x[instMacroIdent-17]
29 | 	_ = x[openBrace-18]
30 | 	_ = x[closeBrace-19]
31 | 	_ = x[equals-20]
32 | 	_ = x[arith-21]
33 | }
34 | 
35 | const _tokenType_name = "end of filebeginning of lineend of lineinvalid characteridentifierdotted identifierparameter referencelabel referencedotted label referencelabel definitiondotted label definitionnumber literalstring literalopen parenthesisclose parenthesiscommadirectivemacro identifieropen braceclosing braceequals signarithmetic operation"
36 | 
37 | var _tokenType_index = [...]uint16{0, 11, 28, 39, 56, 66, 83, 102, 117, 139, 155, 178, 192, 206, 222, 239, 244, 253, 269, 279, 292, 303, 323}
38 | 
39 | func (i tokenType) String() string {
40 | 	if i >= tokenType(len(_tokenType_index)-1) {
41 | 		return "tokenType(" + strconv.FormatInt(int64(i), 10) + ")"
42 | 	}
43 | 	return _tokenType_name[_tokenType_index[i]:_tokenType_index[i+1]]
44 | }
45 | 


--------------------------------------------------------------------------------
/internal/evm/forkdefs.go:
--------------------------------------------------------------------------------
  1 | package evm
  2 | 
  3 | var LatestFork = "cancun"
  4 | 
  5 | var forkReg = map[string]*InstructionSetDef{
  6 | 	"frontier": {
  7 | 		Names: []string{"frontier"},
  8 | 		Added: []*Op{
  9 | 			opm["STOP"],
 10 | 			opm["ADD"],
 11 | 			opm["MUL"],
 12 | 			opm["SUB"],
 13 | 			opm["DIV"],
 14 | 			opm["SDIV"],
 15 | 			opm["MOD"],
 16 | 			opm["SMOD"],
 17 | 			opm["ADDMOD"],
 18 | 			opm["MULMOD"],
 19 | 			opm["EXP"],
 20 | 			opm["SIGNEXTEND"],
 21 | 			opm["LT"],
 22 | 			opm["GT"],
 23 | 			opm["SLT"],
 24 | 			opm["SGT"],
 25 | 			opm["EQ"],
 26 | 			opm["ISZERO"],
 27 | 			opm["AND"],
 28 | 			opm["XOR"],
 29 | 			opm["OR"],
 30 | 			opm["NOT"],
 31 | 			opm["BYTE"],
 32 | 			opm["KECCAK256"],
 33 | 			opm["ADDRESS"],
 34 | 			opm["BALANCE"],
 35 | 			opm["ORIGIN"],
 36 | 			opm["CALLER"],
 37 | 			opm["CALLVALUE"],
 38 | 			opm["CALLDATALOAD"],
 39 | 			opm["CALLDATASIZE"],
 40 | 			opm["CALLDATACOPY"],
 41 | 			opm["CODESIZE"],
 42 | 			opm["CODECOPY"],
 43 | 			opm["GASPRICE"],
 44 | 			opm["EXTCODESIZE"],
 45 | 			opm["EXTCODECOPY"],
 46 | 			opm["BLOCKHASH"],
 47 | 			opm["COINBASE"],
 48 | 			opm["TIMESTAMP"],
 49 | 			opm["NUMBER"],
 50 | 			opm["DIFFICULTY"],
 51 | 			opm["GASLIMIT"],
 52 | 			opm["POP"],
 53 | 			opm["MLOAD"],
 54 | 			opm["MSTORE"],
 55 | 			opm["MSTORE8"],
 56 | 			opm["SLOAD"],
 57 | 			opm["SSTORE"],
 58 | 			opm["JUMP"],
 59 | 			opm["JUMPI"],
 60 | 			opm["PC"],
 61 | 			opm["MSIZE"],
 62 | 			opm["GAS"],
 63 | 			opm["JUMPDEST"],
 64 | 			opm["PUSH1"],
 65 | 			opm["PUSH2"],
 66 | 			opm["PUSH3"],
 67 | 			opm["PUSH4"],
 68 | 			opm["PUSH5"],
 69 | 			opm["PUSH6"],
 70 | 			opm["PUSH7"],
 71 | 			opm["PUSH8"],
 72 | 			opm["PUSH9"],
 73 | 			opm["PUSH10"],
 74 | 			opm["PUSH11"],
 75 | 			opm["PUSH12"],
 76 | 			opm["PUSH13"],
 77 | 			opm["PUSH14"],
 78 | 			opm["PUSH15"],
 79 | 			opm["PUSH16"],
 80 | 			opm["PUSH17"],
 81 | 			opm["PUSH18"],
 82 | 			opm["PUSH19"],
 83 | 			opm["PUSH20"],
 84 | 			opm["PUSH21"],
 85 | 			opm["PUSH22"],
 86 | 			opm["PUSH23"],
 87 | 			opm["PUSH24"],
 88 | 			opm["PUSH25"],
 89 | 			opm["PUSH26"],
 90 | 			opm["PUSH27"],
 91 | 			opm["PUSH28"],
 92 | 			opm["PUSH29"],
 93 | 			opm["PUSH30"],
 94 | 			opm["PUSH31"],
 95 | 			opm["PUSH32"],
 96 | 			opm["DUP1"],
 97 | 			opm["DUP2"],
 98 | 			opm["DUP3"],
 99 | 			opm["DUP4"],
100 | 			opm["DUP5"],
101 | 			opm["DUP6"],
102 | 			opm["DUP7"],
103 | 			opm["DUP8"],
104 | 			opm["DUP9"],
105 | 			opm["DUP10"],
106 | 			opm["DUP11"],
107 | 			opm["DUP12"],
108 | 			opm["DUP13"],
109 | 			opm["DUP14"],
110 | 			opm["DUP15"],
111 | 			opm["DUP16"],
112 | 			opm["SWAP1"],
113 | 			opm["SWAP2"],
114 | 			opm["SWAP3"],
115 | 			opm["SWAP4"],
116 | 			opm["SWAP5"],
117 | 			opm["SWAP6"],
118 | 			opm["SWAP7"],
119 | 			opm["SWAP8"],
120 | 			opm["SWAP9"],
121 | 			opm["SWAP10"],
122 | 			opm["SWAP11"],
123 | 			opm["SWAP12"],
124 | 			opm["SWAP13"],
125 | 			opm["SWAP14"],
126 | 			opm["SWAP15"],
127 | 			opm["SWAP16"],
128 | 			opm["LOG0"],
129 | 			opm["LOG1"],
130 | 			opm["LOG2"],
131 | 			opm["LOG3"],
132 | 			opm["LOG4"],
133 | 			opm["CREATE"],
134 | 			opm["CALL"],
135 | 			opm["CALLCODE"],
136 | 			opm["RETURN"],
137 | 			opm["SELFDESTRUCT"],
138 | 		},
139 | 	},
140 | 
141 | 	"homestead": {
142 | 		Names:  []string{"homestead"},
143 | 		Parent: "frontier",
144 | 		Added: []*Op{
145 | 			opm["DELEGATECALL"],
146 | 		},
147 | 	},
148 | 
149 | 	"tangerinewhistle": {
150 | 		Names:  []string{"tangerinewhistle", "eip150"},
151 | 		Parent: "homestead",
152 | 	},
153 | 
154 | 	"spuriousdragon": {
155 | 		Names:  []string{"spuriousdragon", "eip158"},
156 | 		Parent: "tangerinewhistle",
157 | 	},
158 | 
159 | 	"byzantium": {
160 | 		Names:  []string{"byzantium"},
161 | 		Parent: "spuriousdragon",
162 | 		Added: []*Op{
163 | 			opm["STATICCALL"],
164 | 			opm["RETURNDATASIZE"],
165 | 			opm["RETURNDATACOPY"],
166 | 			opm["REVERT"],
167 | 		},
168 | 	},
169 | 
170 | 	"petersburg": {
171 | 		Names:  []string{"petersburg"},
172 | 		Parent: "byzantium",
173 | 	},
174 | 
175 | 	"constantinople": {
176 | 		Names:  []string{"constantinople"},
177 | 		Parent: "petersburg",
178 | 		Added: []*Op{
179 | 			opm["SHL"],
180 | 			opm["SHR"],
181 | 			opm["SAR"],
182 | 			opm["EXTCODEHASH"],
183 | 			opm["CREATE2"],
184 | 		},
185 | 	},
186 | 
187 | 	"istanbul": {
188 | 		Names:  []string{"istanbul"},
189 | 		Parent: "constantinople",
190 | 		Added: []*Op{
191 | 			opm["CHAINID"],
192 | 			opm["SELFBALANCE"],
193 | 		},
194 | 	},
195 | 
196 | 	"berlin": {
197 | 		Names:  []string{"berlin"},
198 | 		Parent: "istanbul",
199 | 	},
200 | 
201 | 	"london": {
202 | 		Names:  []string{"london"},
203 | 		Parent: "berlin",
204 | 		Added: []*Op{
205 | 			opm["BASEFEE"],
206 | 		},
207 | 	},
208 | 
209 | 	"paris": {
210 | 		Names:  []string{"paris", "merge"},
211 | 		Parent: "istanbul",
212 | 		Added: []*Op{
213 | 			opm["RANDOM"],
214 | 		},
215 | 		Removed: []*Op{
216 | 			opm["DIFFICULTY"],
217 | 		},
218 | 	},
219 | 
220 | 	"shanghai": {
221 | 		Names:  []string{"shanghai"},
222 | 		Parent: "paris",
223 | 		Added: []*Op{
224 | 			opm["PUSH0"],
225 | 		},
226 | 	},
227 | 
228 | 	"cancun": {
229 | 		Names:  []string{"cancun"},
230 | 		Parent: "shanghai",
231 | 		Added: []*Op{
232 | 			opm["BLOBHASH"],
233 | 			opm["TSTORE"],
234 | 			opm["TLOAD"],
235 | 			opm["MCOPY"],
236 | 			opm["SENDALL"],
237 | 		},
238 | 		Removed: []*Op{
239 | 			opm["SELFDESTRUCT"],
240 | 		},
241 | 	},
242 | 
243 | 	"prague": {
244 | 		Names:  []string{"prague"},
245 | 		Parent: "cancun",
246 | 	},
247 | }
248 | 


--------------------------------------------------------------------------------
/internal/evm/instruction_set.go:
--------------------------------------------------------------------------------
  1 | // Copyright 2014 The go-ethereum Authors
  2 | // This file is part of the go-ethereum library.
  3 | //
  4 | // The go-ethereum library is free software: you can redistribute it and/or modify
  5 | // it under the terms of the GNU Lesser General Public License as published by
  6 | // the Free Software Foundation, either version 3 of the License, or
  7 | // (at your option) any later version.
  8 | //
  9 | // The go-ethereum library is distributed in the hope that it will be useful,
 10 | // but WITHOUT ANY WARRANTY; without even the implied warranty of
 11 | // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
 12 | // GNU Lesser General Public License for more details.
 13 | //
 14 | // You should have received a copy of the GNU Lesser General Public License
 15 | // along with the go-ethereum library. If not, see <http://www.gnu.org/licenses/>.
 16 | 
 17 | package evm
 18 | 
 19 | import (
 20 | 	"fmt"
 21 | 	"slices"
 22 | 	"sort"
 23 | 	"strconv"
 24 | 	"strings"
 25 | 
 26 | 	"github.com/fjl/geas/internal/set"
 27 | )
 28 | 
 29 | // InstructionSetDef is the definition of an EVM instruction set.
 30 | type InstructionSetDef struct {
 31 | 	Names   []string // all names of this instruction set
 32 | 	Parent  string
 33 | 	Added   []*Op
 34 | 	Removed []*Op
 35 | }
 36 | 
 37 | // Name returns the canonical name.
 38 | func (def *InstructionSetDef) Name() string {
 39 | 	return def.Names[0]
 40 | }
 41 | 
 42 | // InstructionSet is an EVM instruction set.
 43 | type InstructionSet struct {
 44 | 	name      string
 45 | 	byName    map[string]*Op
 46 | 	byCode    map[byte]*Op
 47 | 	opRemoved map[string]string // forks where op was last removed
 48 | }
 49 | 
 50 | // FindInstructionSet resolves a fork name to a set of opcodes.
 51 | func FindInstructionSet(name string) *InstructionSet {
 52 | 	name = strings.ToLower(name)
 53 | 	var def *InstructionSetDef
 54 | 	if def = forkReg[name]; def == nil {
 55 | 		// Might be non-canonical name.
 56 | 		for _, entry := range forkReg {
 57 | 			if slices.Contains(entry.Names, name) {
 58 | 				def = entry
 59 | 				break
 60 | 			}
 61 | 		}
 62 | 	}
 63 | 	if def == nil {
 64 | 		return nil
 65 | 	}
 66 | 	is := &InstructionSet{
 67 | 		name:      def.Name(),
 68 | 		byName:    make(map[string]*Op),
 69 | 		byCode:    make(map[byte]*Op),
 70 | 		opRemoved: make(map[string]string),
 71 | 	}
 72 | 	if err := is.resolveDefs(def); err != nil {
 73 | 		panic(err)
 74 | 	}
 75 | 	return is
 76 | }
 77 | 
 78 | // Name returns the canonical instruction set name.
 79 | func (is *InstructionSet) Name() string {
 80 | 	return is.name
 81 | }
 82 | 
 83 | // SupportsPush0 reports whether the instruction set includes the PUSH0 instruction.
 84 | func (is *InstructionSet) SupportsPush0() bool {
 85 | 	return is.byName["PUSH0"] != nil
 86 | }
 87 | 
 88 | // OpByName resolves an opcode by its name.
 89 | // Name has to be all uppercase.
 90 | func (is *InstructionSet) OpByName(opname string) *Op {
 91 | 	return is.byName[opname]
 92 | }
 93 | 
 94 | // PushBySize resolves a push op by its size.
 95 | func (is *InstructionSet) PushBySize(size int) *Op {
 96 | 	buf := []byte{'P', 'U', 'S', 'H', 0, 0}
 97 | 	name := strconv.AppendInt(buf[:4], int64(size), 10)
 98 | 	return is.byName[string(name)]
 99 | }
100 | 
101 | // OpByCode resolves an opcode by its code.
102 | func (is *InstructionSet) OpByCode(code byte) *Op {
103 | 	return is.byCode[code]
104 | }
105 | 
106 | // AllOps returns all operations.
107 | func (is *InstructionSet) AllOps() []*Op {
108 | 	ops := make([]*Op, 0, len(is.byName))
109 | 	for _, op := range is.byName {
110 | 		ops = append(ops, op)
111 | 	}
112 | 	slices.SortFunc(ops, func(a, b *Op) int { return strings.Compare(a.Name, b.Name) })
113 | 	return ops
114 | }
115 | 
116 | // Parents returns the parent fork chain of the instruction set.
117 | func (is *InstructionSet) Parents() []string {
118 | 	var chain []string
119 | 	f := forkReg[is.name]
120 | 	for f.Parent != "" {
121 | 		f = forkReg[f.Parent]
122 | 		chain = append(chain, f.Name())
123 | 	}
124 | 	return chain
125 | }
126 | 
127 | // ForkWhereOpRemoved returns the fork where a given op was removed from the instruction
128 | // set. This is intended to be called when op is known to not exist. Note this will return
129 | // an empty string in several cases:
130 | //
131 | //   - op is invalid
132 | //   - op is valid, but does not appear in lineage of instruction set
133 | //   - op is valid and exists in instruction set
134 | func (is *InstructionSet) ForkWhereOpRemoved(op string) string {
135 | 	return is.opRemoved[op]
136 | }
137 | 
138 | // lineage computes the definition chain of an instruction set.
139 | func (def *InstructionSetDef) lineage() ([]*InstructionSetDef, error) {
140 | 	var visited = make(set.Set[*InstructionSetDef])
141 | 	var lin []*InstructionSetDef
142 | 	for {
143 | 		if visited.Includes(def) {
144 | 			return nil, fmt.Errorf("instruction set parent cycle: %s <- %s", lin[len(lin)-1].Name(), def.Name())
145 | 		}
146 | 		visited.Add(def)
147 | 		lin = append(lin, def)
148 | 
149 | 		if def.Parent == "" {
150 | 			break
151 | 		}
152 | 		parent, ok := forkReg[def.Parent]
153 | 		if !ok {
154 | 			return nil, fmt.Errorf("instruction set %s has unknown parent %s", def.Name(), def.Parent)
155 | 		}
156 | 		def = parent
157 | 	}
158 | 	slices.Reverse(lin)
159 | 	return lin, nil
160 | }
161 | 
162 | // resolveDefs computes the full opcode set of a fork from its lineage.
163 | func (is *InstructionSet) resolveDefs(toplevel *InstructionSetDef) error {
164 | 	lineage, err := toplevel.lineage()
165 | 	if err != nil {
166 | 		return err
167 | 	}
168 | 
169 | 	for _, def := range lineage {
170 | 		for _, op := range def.Removed {
171 | 			if _, ok := is.byName[op.Name]; !ok {
172 | 				return fmt.Errorf("removed op %s does not exist in fork %s", op.Name, def.Name())
173 | 			}
174 | 			if _, ok := is.byCode[op.Code]; !ok {
175 | 				return fmt.Errorf("removed opcode %d (%s) does not exist in fork %s", op.Code, op.Name, def.Name())
176 | 			}
177 | 			delete(is.byName, op.Name)
178 | 			delete(is.byCode, op.Code)
179 | 			is.opRemoved[op.Name] = def.Name()
180 | 		}
181 | 		for _, op := range def.Added {
182 | 			_, nameDefined := is.byName[op.Name]
183 | 			if nameDefined {
184 | 				return fmt.Errorf("instruction %s added multiple times", op.Name)
185 | 			}
186 | 			is.byName[op.Name] = op
187 | 			_, codeDefined := is.byCode[op.Code]
188 | 			if codeDefined {
189 | 				return fmt.Errorf("opcode %v added multiple times (adding %s, existing def %s)", op.Code, op.Name, is.byCode[op.Code].Name)
190 | 			}
191 | 			is.byCode[op.Code] = op
192 | 			delete(is.opRemoved, op.Name)
193 | 		}
194 | 	}
195 | 	return nil
196 | }
197 | 
198 | // opAddedInForkMap contains all ops and the forks they were added in.
199 | var opAddedInForkMap = computeOpAddedInFork()
200 | 
201 | func computeOpAddedInFork() map[string][]string {
202 | 	m := make(map[string][]string)
203 | 	for _, def := range forkReg {
204 | 		for _, op := range def.Added {
205 | 			m[op.Name] = append(m[op.Name], def.Name())
206 | 		}
207 | 	}
208 | 	return m
209 | }
210 | 
211 | // ForksWhereOpAdded returns the fork names where a given op is added.
212 | // If this returns nil, op is invalid.
213 | func ForksWhereOpAdded(op string) []string {
214 | 	return opAddedInForkMap[op]
215 | }
216 | 
217 | func AllForks() (names []string) {
218 | 	for _, def := range forkReg {
219 | 		names = append(names, def.Names...)
220 | 	}
221 | 	sort.Strings(names)
222 | 	return names
223 | }
224 | 


--------------------------------------------------------------------------------
/internal/evm/instruction_set_test.go:
--------------------------------------------------------------------------------
  1 | package evm
  2 | 
  3 | import (
  4 | 	"maps"
  5 | 	"slices"
  6 | 	"strings"
  7 | 	"testing"
  8 | 
  9 | 	"github.com/fjl/geas/internal/set"
 10 | )
 11 | 
 12 | func TestOps(t *testing.T) {
 13 | 	// Check op all names are uppercase.
 14 | 	for _, op := range oplist {
 15 | 		if op.Name != strings.ToUpper(op.Name) {
 16 | 			t.Fatalf("op %s name is not all-uppercase", op.Name)
 17 | 		}
 18 | 	}
 19 | 
 20 | 	// Check all ops are used in a fork.
 21 | 	// First compute set of used op names.
 22 | 	defnames := slices.Sorted(maps.Keys(forkReg))
 23 | 	used := make(set.Set[string], len(oplist))
 24 | 	for _, name := range defnames {
 25 | 		for _, op := range forkReg[name].Added {
 26 | 			used.Add(op.Name)
 27 | 		}
 28 | 	}
 29 | 	usedopnames := used.Members()
 30 | 	slices.Sort(usedopnames)
 31 | 	// Now compute sorted list of all ops.
 32 | 	allopnames := make([]string, len(oplist))
 33 | 	for i, op := range oplist {
 34 | 		allopnames[i] = op.Name
 35 | 	}
 36 | 	slices.Sort(allopnames)
 37 | 	// Compare.
 38 | 	d := set.Diff(allopnames, usedopnames)
 39 | 	if len(d) > 0 {
 40 | 		t.Error("unused ops:", d)
 41 | 	}
 42 | 	if len(usedopnames) > len(allopnames) {
 43 | 		t.Error("forkdefs uses ops which are not in oplist")
 44 | 	}
 45 | }
 46 | 
 47 | func TestForkDefs(t *testing.T) {
 48 | 	defnames := slices.Sorted(maps.Keys(forkReg))
 49 | 
 50 | 	// Check canon name is listed first in def.Names.
 51 | 	for _, name := range defnames {
 52 | 		def := forkReg[name]
 53 | 		if len(def.Names) == 0 {
 54 | 			t.Fatalf("instruction set %q has no Names", name)
 55 | 		}
 56 | 		if def.Names[0] != name {
 57 | 			t.Fatalf("canon name of instruction set %q not listed first in def.Names", name)
 58 | 		}
 59 | 	}
 60 | 
 61 | 	// Check lineage works.
 62 | 	for _, name := range defnames {
 63 | 		def := forkReg[name]
 64 | 		_, err := def.lineage()
 65 | 		if err != nil {
 66 | 			t.Errorf("problem in lineage() of %q: %v", name, err)
 67 | 		}
 68 | 	}
 69 | }
 70 | 
 71 | // In this test, we just check for a few known ops.
 72 | func TestForkOps(t *testing.T) {
 73 | 	is := FindInstructionSet("cancun")
 74 | 
 75 | 	{
 76 | 		op := is.OpByName("ADD")
 77 | 		if op.Name != "ADD" {
 78 | 			t.Fatal("wrong op name:", op.Name)
 79 | 		}
 80 | 		if op.Code != 0x01 {
 81 | 			t.Fatal("wrong op code:", op.Code)
 82 | 		}
 83 | 		if op2 := is.OpByCode(0x01); op2 != op {
 84 | 			t.Fatal("reverse lookup returned incorrect op", op2)
 85 | 		}
 86 | 	}
 87 | 	{
 88 | 		op := is.OpByName("SHR")
 89 | 		if op.Name != "SHR" {
 90 | 			t.Fatal("wrong op name:", op.Name)
 91 | 		}
 92 | 		if op.Code != 0x1c {
 93 | 			t.Fatal("wrong op code:", op.Code)
 94 | 		}
 95 | 		if op2 := is.OpByCode(0x1c); op2 != op {
 96 | 			t.Fatal("reverse lookup returned incorrect op", op2)
 97 | 		}
 98 | 	}
 99 | 	{
100 | 		op := is.OpByName("RANDOM")
101 | 		if op.Name != "RANDOM" {
102 | 			t.Fatal("wrong op name:", op.Name)
103 | 		}
104 | 		if op.Code != 0x44 {
105 | 			t.Fatal("wrong op code:", op.Code)
106 | 		}
107 | 		if op2 := is.OpByCode(0x44); op2 != op {
108 | 			t.Fatal("reverse lookup returned incorrect op", op2)
109 | 		}
110 | 	}
111 | 	{
112 | 		op := is.OpByName("DIFFICULTY")
113 | 		if op != nil {
114 | 			t.Fatal("DIFFICULTY op found even though it was removed")
115 | 		}
116 | 		rf := is.ForkWhereOpRemoved("DIFFICULTY")
117 | 		if rf != "paris" {
118 | 			t.Fatalf("ForkWhereOpRemoved(DIFFICULTY) -> %s != %s", rf, "paris")
119 | 		}
120 | 	}
121 | }
122 | 
123 | func TestForksWhereOpAdded(t *testing.T) {
124 | 	f := ForksWhereOpAdded("BASEFEE")
125 | 	if !slices.Equal(f, []string{"london"}) {
126 | 		t.Fatalf("wrong list for BASEFEE: %v", f)
127 | 	}
128 | }
129 | 


--------------------------------------------------------------------------------
/internal/evm/ops.go:
--------------------------------------------------------------------------------
  1 | // Copyright 2023 The go-ethereum Authors
  2 | // This file is part of the go-ethereum library.
  3 | //
  4 | // The go-ethereum library is free software: you can redistribute it and/or modify
  5 | // it under the terms of the GNU Lesser General Public License as published by
  6 | // the Free Software Foundation, either version 3 of the License, or
  7 | // (at your option) any later version.
  8 | //
  9 | // The go-ethereum library is distributed in the hope that it will be useful,
 10 | // but WITHOUT ANY WARRANTY; without even the implied warranty of
 11 | // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
 12 | // GNU Lesser General Public License for more details.
 13 | //
 14 | // You should have received a copy of the GNU Lesser General Public License
 15 | // along with the go-ethereum library. If not, see <http://www.gnu.org/licenses/>.
 16 | 
 17 | package evm
 18 | 
 19 | import (
 20 | 	"strconv"
 21 | 	"strings"
 22 | )
 23 | 
 24 | // Op is an EVM opcode.
 25 | type Op struct {
 26 | 	Name string
 27 | 	Code byte
 28 | 
 29 | 	// Flags:
 30 | 	// - Push is set for PUSHx
 31 | 	// - Term is set for instructions that end execution
 32 | 	// - Jump is set for all jumps
 33 | 	// - Unconditional is set for unconditional jumps
 34 | 	// - JumpDest is set for JUMPDEST
 35 | 	Push, Term, Jump, Unconditional, JumpDest bool
 36 | }
 37 | 
 38 | func (op Op) PushSize() int {
 39 | 	n, _ := strconv.Atoi(strings.TrimPrefix(op.Name, "PUSH"))
 40 | 	return n
 41 | }
 42 | 
 43 | // This is the list of all opcodes.
 44 | var oplist = []*Op{
 45 | 	{Name: "STOP", Code: 0x0, Term: true},
 46 | 	{Name: "ADD", Code: 0x1},
 47 | 	{Name: "MUL", Code: 0x2},
 48 | 	{Name: "SUB", Code: 0x3},
 49 | 	{Name: "DIV", Code: 0x4},
 50 | 	{Name: "SDIV", Code: 0x5},
 51 | 	{Name: "MOD", Code: 0x6},
 52 | 	{Name: "SMOD", Code: 0x7},
 53 | 	{Name: "ADDMOD", Code: 0x8},
 54 | 	{Name: "MULMOD", Code: 0x9},
 55 | 	{Name: "EXP", Code: 0xa},
 56 | 	{Name: "SIGNEXTEND", Code: 0xb},
 57 | 	{Name: "LT", Code: 0x10},
 58 | 	{Name: "GT", Code: 0x11},
 59 | 	{Name: "SLT", Code: 0x12},
 60 | 	{Name: "SGT", Code: 0x13},
 61 | 	{Name: "EQ", Code: 0x14},
 62 | 	{Name: "ISZERO", Code: 0x15},
 63 | 	{Name: "AND", Code: 0x16},
 64 | 	{Name: "OR", Code: 0x17},
 65 | 	{Name: "XOR", Code: 0x18},
 66 | 	{Name: "NOT", Code: 0x19},
 67 | 	{Name: "BYTE", Code: 0x1a},
 68 | 	{Name: "SHL", Code: 0x1b},
 69 | 	{Name: "SHR", Code: 0x1c},
 70 | 	{Name: "SAR", Code: 0x1d},
 71 | 	{Name: "KECCAK256", Code: 0x20},
 72 | 	{Name: "ADDRESS", Code: 0x30},
 73 | 	{Name: "BALANCE", Code: 0x31},
 74 | 	{Name: "ORIGIN", Code: 0x32},
 75 | 	{Name: "CALLER", Code: 0x33},
 76 | 	{Name: "CALLVALUE", Code: 0x34},
 77 | 	{Name: "CALLDATALOAD", Code: 0x35},
 78 | 	{Name: "CALLDATASIZE", Code: 0x36},
 79 | 	{Name: "CALLDATACOPY", Code: 0x37},
 80 | 	{Name: "CODESIZE", Code: 0x38},
 81 | 	{Name: "CODECOPY", Code: 0x39},
 82 | 	{Name: "GASPRICE", Code: 0x3a},
 83 | 	{Name: "EXTCODESIZE", Code: 0x3b},
 84 | 	{Name: "EXTCODECOPY", Code: 0x3c},
 85 | 	{Name: "RETURNDATASIZE", Code: 0x3d},
 86 | 	{Name: "RETURNDATACOPY", Code: 0x3e},
 87 | 	{Name: "EXTCODEHASH", Code: 0x3f},
 88 | 	{Name: "BLOCKHASH", Code: 0x40},
 89 | 	{Name: "COINBASE", Code: 0x41},
 90 | 	{Name: "TIMESTAMP", Code: 0x42},
 91 | 	{Name: "NUMBER", Code: 0x43},
 92 | 	{Name: "DIFFICULTY", Code: 0x44},
 93 | 	{Name: "RANDOM", Code: 0x44},
 94 | 	{Name: "GASLIMIT", Code: 0x45},
 95 | 	{Name: "CHAINID", Code: 0x46},
 96 | 	{Name: "SELFBALANCE", Code: 0x47},
 97 | 	{Name: "BASEFEE", Code: 0x48},
 98 | 	{Name: "BLOBHASH", Code: 0x49},
 99 | 	{Name: "POP", Code: 0x50},
100 | 	{Name: "MLOAD", Code: 0x51},
101 | 	{Name: "MSTORE", Code: 0x52},
102 | 	{Name: "MSTORE8", Code: 0x53},
103 | 	{Name: "SLOAD", Code: 0x54},
104 | 	{Name: "SSTORE", Code: 0x55},
105 | 	{Name: "JUMP", Code: 0x56, Jump: true, Unconditional: true},
106 | 	{Name: "JUMPI", Code: 0x57, Jump: true},
107 | 	{Name: "PC", Code: 0x58},
108 | 	{Name: "MSIZE", Code: 0x59},
109 | 	{Name: "GAS", Code: 0x5a},
110 | 	{Name: "JUMPDEST", Code: 0x5b, JumpDest: true},
111 | 	{Name: "TLOAD", Code: 0x5c},
112 | 	{Name: "TSTORE", Code: 0x5d},
113 | 	{Name: "MCOPY", Code: 0x5e},
114 | 	{Name: "PUSH0", Code: 0x5f, Push: true},
115 | 	{Name: "PUSH1", Code: 0x60, Push: true},
116 | 	{Name: "PUSH2", Code: 0x61, Push: true},
117 | 	{Name: "PUSH3", Code: 0x62, Push: true},
118 | 	{Name: "PUSH4", Code: 0x63, Push: true},
119 | 	{Name: "PUSH5", Code: 0x64, Push: true},
120 | 	{Name: "PUSH6", Code: 0x65, Push: true},
121 | 	{Name: "PUSH7", Code: 0x66, Push: true},
122 | 	{Name: "PUSH8", Code: 0x67, Push: true},
123 | 	{Name: "PUSH9", Code: 0x68, Push: true},
124 | 	{Name: "PUSH10", Code: 0x69, Push: true},
125 | 	{Name: "PUSH11", Code: 0x6a, Push: true},
126 | 	{Name: "PUSH12", Code: 0x6b, Push: true},
127 | 	{Name: "PUSH13", Code: 0x6c, Push: true},
128 | 	{Name: "PUSH14", Code: 0x6d, Push: true},
129 | 	{Name: "PUSH15", Code: 0x6e, Push: true},
130 | 	{Name: "PUSH16", Code: 0x6f, Push: true},
131 | 	{Name: "PUSH17", Code: 0x70, Push: true},
132 | 	{Name: "PUSH18", Code: 0x71, Push: true},
133 | 	{Name: "PUSH19", Code: 0x72, Push: true},
134 | 	{Name: "PUSH20", Code: 0x73, Push: true},
135 | 	{Name: "PUSH21", Code: 0x74, Push: true},
136 | 	{Name: "PUSH22", Code: 0x75, Push: true},
137 | 	{Name: "PUSH23", Code: 0x76, Push: true},
138 | 	{Name: "PUSH24", Code: 0x77, Push: true},
139 | 	{Name: "PUSH25", Code: 0x78, Push: true},
140 | 	{Name: "PUSH26", Code: 0x79, Push: true},
141 | 	{Name: "PUSH27", Code: 0x7a, Push: true},
142 | 	{Name: "PUSH28", Code: 0x7b, Push: true},
143 | 	{Name: "PUSH29", Code: 0x7c, Push: true},
144 | 	{Name: "PUSH30", Code: 0x7d, Push: true},
145 | 	{Name: "PUSH31", Code: 0x7e, Push: true},
146 | 	{Name: "PUSH32", Code: 0x7f, Push: true},
147 | 	{Name: "DUP1", Code: 0x80},
148 | 	{Name: "DUP2", Code: 0x81},
149 | 	{Name: "DUP3", Code: 0x82},
150 | 	{Name: "DUP4", Code: 0x83},
151 | 	{Name: "DUP5", Code: 0x84},
152 | 	{Name: "DUP6", Code: 0x85},
153 | 	{Name: "DUP7", Code: 0x86},
154 | 	{Name: "DUP8", Code: 0x87},
155 | 	{Name: "DUP9", Code: 0x88},
156 | 	{Name: "DUP10", Code: 0x89},
157 | 	{Name: "DUP11", Code: 0x8a},
158 | 	{Name: "DUP12", Code: 0x8b},
159 | 	{Name: "DUP13", Code: 0x8c},
160 | 	{Name: "DUP14", Code: 0x8d},
161 | 	{Name: "DUP15", Code: 0x8e},
162 | 	{Name: "DUP16", Code: 0x8f},
163 | 	{Name: "SWAP1", Code: 0x90},
164 | 	{Name: "SWAP2", Code: 0x91},
165 | 	{Name: "SWAP3", Code: 0x92},
166 | 	{Name: "SWAP4", Code: 0x93},
167 | 	{Name: "SWAP5", Code: 0x94},
168 | 	{Name: "SWAP6", Code: 0x95},
169 | 	{Name: "SWAP7", Code: 0x96},
170 | 	{Name: "SWAP8", Code: 0x97},
171 | 	{Name: "SWAP9", Code: 0x98},
172 | 	{Name: "SWAP10", Code: 0x99},
173 | 	{Name: "SWAP11", Code: 0x9a},
174 | 	{Name: "SWAP12", Code: 0x9b},
175 | 	{Name: "SWAP13", Code: 0x9c},
176 | 	{Name: "SWAP14", Code: 0x9d},
177 | 	{Name: "SWAP15", Code: 0x9e},
178 | 	{Name: "SWAP16", Code: 0x9f},
179 | 	{Name: "LOG0", Code: 0xa0},
180 | 	{Name: "LOG1", Code: 0xa1},
181 | 	{Name: "LOG2", Code: 0xa2},
182 | 	{Name: "LOG3", Code: 0xa3},
183 | 	{Name: "LOG4", Code: 0xa4},
184 | 	{Name: "CREATE", Code: 0xf0},
185 | 	{Name: "CALL", Code: 0xf1},
186 | 	{Name: "CALLCODE", Code: 0xf2},
187 | 	{Name: "RETURN", Code: 0xf3, Term: true},
188 | 	{Name: "DELEGATECALL", Code: 0xf4},
189 | 	{Name: "CREATE2", Code: 0xf5},
190 | 	{Name: "STATICCALL", Code: 0xfa},
191 | 	{Name: "REVERT", Code: 0xfd, Term: true},
192 | 	{Name: "SELFDESTRUCT", Code: 0xff, Term: true},
193 | 	{Name: "SENDALL", Code: 0xff, Term: true},
194 | }
195 | 
196 | var opm = computeOpsMap()
197 | 
198 | func computeOpsMap() map[string]*Op {
199 | 	m := make(map[string]*Op, len(oplist))
200 | 	for _, op := range oplist {
201 | 		if m[op.Name] != nil {
202 | 			panic("duplicate op " + op.Name)
203 | 		}
204 | 		m[op.Name] = op
205 | 	}
206 | 	return m
207 | }
208 | 


--------------------------------------------------------------------------------
/internal/lzint/value.go:
--------------------------------------------------------------------------------
  1 | // Copyright 2025 The go-ethereum Authors
  2 | // This file is part of the go-ethereum library.
  3 | //
  4 | // The go-ethereum library is free software: you can redistribute it and/or modify
  5 | // it under the terms of the GNU Lesser General Public License as published by
  6 | // the Free Software Foundation, either version 3 of the License, or
  7 | // (at your option) any later version.
  8 | //
  9 | // The go-ethereum library is distributed in the hope that it will be useful,
 10 | // but WITHOUT ANY WARRANTY; without even the implied warranty of
 11 | // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
 12 | // GNU Lesser General Public License for more details.
 13 | //
 14 | // You should have received a copy of the GNU Lesser General Public License
 15 | // along with the go-ethereum library. If not, see <http://www.gnu.org/licenses/>.
 16 | 
 17 | package lzint
 18 | 
 19 | import (
 20 | 	"errors"
 21 | 	"fmt"
 22 | 	"math/big"
 23 | 	"strings"
 24 | )
 25 | 
 26 | var (
 27 | 	errNegativeBytes = errors.New("negative int in bytes context")
 28 | 	errOddHexBytes   = errors.New("odd-length hex in bytes context")
 29 | )
 30 | 
 31 | const (
 32 | 	flagWasHex byte = 1 << iota
 33 | 	flagHexOddLength
 34 | 	flagWasBytes
 35 | )
 36 | 
 37 | // Value is a big-integer that also tracks the number of leading zero bytes.
 38 | // This type is used to represent values during macro evaluation.
 39 | //
 40 | // Storing values this way may seem like a strange choice at first, so let me
 41 | // explain: The Geas language is meant to be simple, and values generally do not
 42 | // have a 'type'. Many macro operations are simple arithmetic and work with
 43 | // integers, and the EVM itself also operates on a stack of 256bit integers. So
 44 | // using integers as the basic type was an easy choice. However, Geas has a few
 45 | // operations on bytes as well (such as hash functions) and the language contains
 46 | // string literals. I didn't want to introduce a type system into the evaluator
 47 | // just to support them, since this would make some macros incompatible with
 48 | // others.
 49 | //
 50 | // Instead, I have chosen to stick to all values being integers, but this
 51 | // introduces some problems when an evaluation produces leading zero bytes. They
 52 | // cannot be represented by *big.Int, and thus using a hash function or including
 53 | // such values into the bytecode output would produce unexpected results.
 54 | //
 55 | // So this is how this type came to be. When a Value is created from a decimal
 56 | // integer literal, it is just an integer with no special properties. However,
 57 | // when created from a hexadecimal literal, string, or []byte in Go, leading
 58 | // zeros may be created and will be reproduced when the value is converted to
 59 | // []byte. Using an arithmetic operation on a value with leading zeros will drop
 60 | // them though.
 61 | type Value struct {
 62 | 	int   big.Int
 63 | 	lznib uint // leading zero nibble count
 64 | 	flag  byte
 65 | }
 66 | 
 67 | func FromInt(i *big.Int) *Value {
 68 | 	if i == nil {
 69 | 		panic("nil int")
 70 | 	}
 71 | 	return &Value{int: *i}
 72 | }
 73 | 
 74 | func FromInt64(i int64) *Value {
 75 | 	v := new(Value)
 76 | 	v.int.SetInt64(i)
 77 | 	return v
 78 | }
 79 | 
 80 | func FromBytes(slice []byte) *Value {
 81 | 	v := new(Value)
 82 | 	for _, b := range slice {
 83 | 		if b != 0 {
 84 | 			break
 85 | 		}
 86 | 		v.lznib += 2
 87 | 	}
 88 | 	v.int.SetBytes(slice)
 89 | 	v.flag = flagWasBytes
 90 | 	return v
 91 | }
 92 | 
 93 | // ParseNumberLiteral creates a value from a number literal.
 94 | func ParseNumberLiteral(text string) (*Value, error) {
 95 | 	switch {
 96 | 	case len(text) == 0:
 97 | 		return nil, errors.New("empty number text")
 98 | 
 99 | 	case strings.HasPrefix(text, "0x") || strings.HasPrefix(text, "0X"):
100 | 		hex := text[2:]
101 | 		v := &Value{flag: flagWasHex}
102 | 		if len(hex)%2 != 0 {
103 | 			v.flag |= flagHexOddLength
104 | 		}
105 | 		for _, c := range hex {
106 | 			if c != '0' {
107 | 				break
108 | 			}
109 | 			v.lznib++
110 | 		}
111 | 		if _, ok := v.int.SetString(hex, 16); !ok {
112 | 			return nil, fmt.Errorf("invalid hex: %s", text)
113 | 		}
114 | 		return v, nil
115 | 
116 | 	case len(text) > 1 && text[0] == '0':
117 | 		return nil, errors.New("leading zero not allowed in decimal integer")
118 | 
119 | 	default:
120 | 		var v Value
121 | 		if _, ok := v.int.SetString(text, 10); !ok {
122 | 			return nil, fmt.Errorf("invalid number %s", text)
123 | 		}
124 | 		return &v, nil
125 | 	}
126 | }
127 | 
128 | // Int converts the value to a bigint.
129 | // This is always possible. Leading zero bytes are dropped.
130 | func (v *Value) Int() *big.Int {
131 | 	if v == nil {
132 | 		return nil
133 | 	}
134 | 	return &v.int
135 | }
136 | 
137 | // Bytes converts the value to a byte slice. This returns an error if the
138 | // conversion is lossy, i.e. if the integer is negative or was an odd-length literal.
139 | func (v *Value) Bytes() ([]byte, error) {
140 | 	if v == nil {
141 | 		return nil, nil
142 | 	}
143 | 	if v.int.Sign() < 0 {
144 | 		return nil, errNegativeBytes
145 | 	}
146 | 	if v.flag&flagHexOddLength != 0 {
147 | 		return nil, errOddHexBytes
148 | 	}
149 | 	b := make([]byte, v.ByteLen())
150 | 	return v.int.FillBytes(b), nil
151 | }
152 | 
153 | // ByteLen returns the length in bytes. This is always equal to the length of the slice
154 | // that Bytes() would return, i.e. leading zeros are counted.
155 | func (v *Value) ByteLen() int64 {
156 | 	if v == nil {
157 | 		return 0
158 | 	}
159 | 	return int64(v.lznib)/2 + (int64(v.int.BitLen())+7)/8
160 | }
161 | 
162 | // IntegerBitLen returns the bit length of v as an integer, i.e. leading zero
163 | // bytes are not counted.
164 | func (v *Value) IntegerBitLen() int64 {
165 | 	if v == nil {
166 | 		return 0
167 | 	}
168 | 	return int64(v.int.BitLen())
169 | }
170 | 
171 | func (v *Value) String() string {
172 | 	switch {
173 | 	case v == nil:
174 | 		return "nil"
175 | 
176 | 	case v.flag&(flagWasHex|flagWasBytes) != 0:
177 | 		var b strings.Builder
178 | 		b.WriteString("0x")
179 | 		for range v.lznib {
180 | 			b.WriteByte('0')
181 | 		}
182 | 		if v.flag&flagWasBytes != 0 {
183 | 			fmt.Fprintf(&b, "%x", v.int.Bytes())
184 | 		} else if v.int.Sign() > 0 {
185 | 			fmt.Fprintf(&b, "%x", &v.int)
186 | 		}
187 | 		return b.String()
188 | 
189 | 	default:
190 | 		return v.int.String()
191 | 	}
192 | }
193 | 


--------------------------------------------------------------------------------
/internal/lzint/value_test.go:
--------------------------------------------------------------------------------
  1 | // Copyright 2025 The go-ethereum Authors
  2 | // This file is part of the go-ethereum library.
  3 | //
  4 | // The go-ethereum library is free software: you can redistribute it and/or modify
  5 | // it under the terms of the GNU Lesser General Public License as published by
  6 | // the Free Software Foundation, either version 3 of the License, or
  7 | // (at your option) any later version.
  8 | //
  9 | // The go-ethereum library is distributed in the hope that it will be useful,
 10 | // but WITHOUT ANY WARRANTY; without even the implied warranty of
 11 | // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
 12 | // GNU Lesser General Public License for more details.
 13 | //
 14 | // You should have received a copy of the GNU Lesser General Public License
 15 | // along with the go-ethereum library. If not, see <http://www.gnu.org/licenses/>.
 16 | 
 17 | package lzint
 18 | 
 19 | import (
 20 | 	"math/big"
 21 | 	"reflect"
 22 | 	"testing"
 23 | )
 24 | 
 25 | var valueTests = []struct {
 26 | 	Name             string
 27 | 	V                *Value
 28 | 	ExpectedString   string
 29 | 	ExpectedByteLen  int64
 30 | 	ExpectedBitLen   int64
 31 | 	ExpectedInt      *big.Int
 32 | 	ExpectedBytes    []byte
 33 | 	ExpectedBytesErr error
 34 | }{
 35 | 	{
 36 | 		Name:            "nil",
 37 | 		V:               nil,
 38 | 		ExpectedString:  "nil",
 39 | 		ExpectedByteLen: 0,
 40 | 		ExpectedBitLen:  0,
 41 | 		ExpectedBytes:   nil,
 42 | 		ExpectedInt:     nil,
 43 | 	},
 44 | 	{
 45 | 		Name:            "Int64(0)",
 46 | 		V:               FromInt64(0),
 47 | 		ExpectedString:  "0",
 48 | 		ExpectedByteLen: 0,
 49 | 		ExpectedBitLen:  0,
 50 | 		ExpectedBytes:   []byte{},
 51 | 		ExpectedInt:     new(big.Int),
 52 | 	},
 53 | 	{
 54 | 		Name:            "Int64(99)",
 55 | 		V:               FromInt64(99),
 56 | 		ExpectedString:  "99",
 57 | 		ExpectedByteLen: 1,
 58 | 		ExpectedBitLen:  7,
 59 | 		ExpectedBytes:   []byte{99},
 60 | 		ExpectedInt:     big.NewInt(99),
 61 | 	},
 62 | 	{
 63 | 		Name:            "Int(256)",
 64 | 		V:               FromInt(big.NewInt(256)),
 65 | 		ExpectedString:  "256",
 66 | 		ExpectedByteLen: 2,
 67 | 		ExpectedBitLen:  9,
 68 | 		ExpectedBytes:   []byte{1, 0},
 69 | 		ExpectedInt:     big.NewInt(256),
 70 | 	},
 71 | 	{
 72 | 		Name:             "Int(-256)",
 73 | 		V:                FromInt(big.NewInt(-256)),
 74 | 		ExpectedString:   "-256",
 75 | 		ExpectedByteLen:  2,
 76 | 		ExpectedBitLen:   9,
 77 | 		ExpectedBytesErr: errNegativeBytes,
 78 | 		ExpectedInt:      big.NewInt(-256),
 79 | 	},
 80 | 	{
 81 | 		Name:            "Bytes(0x)",
 82 | 		V:               FromBytes([]byte{}),
 83 | 		ExpectedString:  "0x",
 84 | 		ExpectedByteLen: 0,
 85 | 		ExpectedBitLen:  0,
 86 | 		ExpectedBytes:   []byte{},
 87 | 		ExpectedInt:     new(big.Int),
 88 | 	},
 89 | 	{
 90 | 		Name:            "Bytes(0x00)",
 91 | 		V:               FromBytes([]byte{0}),
 92 | 		ExpectedString:  "0x00",
 93 | 		ExpectedByteLen: 1,
 94 | 		ExpectedBitLen:  0,
 95 | 		ExpectedBytes:   []byte{0},
 96 | 		ExpectedInt:     new(big.Int),
 97 | 	},
 98 | 	{
 99 | 		Name:            "Bytes(0x00000102)",
100 | 		V:               FromBytes([]byte{0, 0, 1, 2}),
101 | 		ExpectedString:  "0x00000102",
102 | 		ExpectedByteLen: 4,
103 | 		ExpectedBitLen:  9,
104 | 		ExpectedBytes:   []byte{0, 0, 1, 2},
105 | 		ExpectedInt:     new(big.Int).SetBytes([]byte{1, 2}),
106 | 	},
107 | 	{
108 | 		Name:            "NumberLiteral(0)",
109 | 		V:               mustParseNum("0"),
110 | 		ExpectedString:  "0",
111 | 		ExpectedByteLen: 0,
112 | 		ExpectedBitLen:  0,
113 | 		ExpectedBytes:   []byte{},
114 | 		ExpectedInt:     big.NewInt(0),
115 | 	},
116 | 	{
117 | 		Name:            "NumberLiteral(99)",
118 | 		V:               mustParseNum("99"),
119 | 		ExpectedString:  "99",
120 | 		ExpectedByteLen: 1,
121 | 		ExpectedBitLen:  7,
122 | 		ExpectedBytes:   []byte{99},
123 | 		ExpectedInt:     big.NewInt(99),
124 | 	},
125 | 	{
126 | 		Name:            "NumberLiteral(0xff01)",
127 | 		V:               mustParseNum("0xff01"),
128 | 		ExpectedString:  "0xff01",
129 | 		ExpectedByteLen: 2,
130 | 		ExpectedBitLen:  16,
131 | 		ExpectedBytes:   []byte{0xff, 0x01},
132 | 		ExpectedInt:     big.NewInt(0xff01),
133 | 	},
134 | 	{
135 | 		Name:            "NumberLiteral(0x00000099ff01)",
136 | 		V:               mustParseNum("0x00000099ff01"),
137 | 		ExpectedString:  "0x00000099ff01",
138 | 		ExpectedByteLen: 6,
139 | 		ExpectedBitLen:  24,
140 | 		ExpectedBytes:   []byte{0x00, 0x00, 0x00, 0x99, 0xff, 01},
141 | 		ExpectedInt:     big.NewInt(0x99ff01),
142 | 	},
143 | 	{
144 | 		Name:             "NumberLiteral(0x0)",
145 | 		V:                mustParseNum("0x0"),
146 | 		ExpectedString:   "0x0",
147 | 		ExpectedByteLen:  0,
148 | 		ExpectedBitLen:   0,
149 | 		ExpectedBytesErr: errOddHexBytes,
150 | 		ExpectedInt:      big.NewInt(0),
151 | 	},
152 | 	{
153 | 		Name:             "NumberLiteral(0xf)",
154 | 		V:                mustParseNum("0xf"),
155 | 		ExpectedString:   "0xf",
156 | 		ExpectedByteLen:  1,
157 | 		ExpectedBitLen:   4,
158 | 		ExpectedBytesErr: errOddHexBytes,
159 | 		ExpectedInt:      big.NewInt(15),
160 | 	},
161 | 	{
162 | 		Name:             "NumberLiteral(0x456)",
163 | 		V:                mustParseNum("0x456"),
164 | 		ExpectedString:   "0x456",
165 | 		ExpectedByteLen:  2,
166 | 		ExpectedBitLen:   11,
167 | 		ExpectedBytesErr: errOddHexBytes,
168 | 		ExpectedInt:      big.NewInt(0x456),
169 | 	},
170 | }
171 | 
172 | func mustParseNum(input string) *Value {
173 | 	v, err := ParseNumberLiteral(input)
174 | 	if err != nil {
175 | 		panic(err)
176 | 	}
177 | 	return v
178 | }
179 | 
180 | func TestValue(t *testing.T) {
181 | 	for _, test := range valueTests {
182 | 		t.Run(test.Name, func(t *testing.T) {
183 | 			if s := test.V.String(); s != test.ExpectedString {
184 | 				t.Errorf("wrong String: %q", s)
185 | 			}
186 | 			if l := test.V.ByteLen(); l != test.ExpectedByteLen {
187 | 				t.Errorf("wrong ByteLen: %d", l)
188 | 			}
189 | 			if l := test.V.IntegerBitLen(); l != test.ExpectedBitLen {
190 | 				t.Errorf("wrong BitLen: %d", l)
191 | 			}
192 | 
193 | 			i := test.V.Int()
194 | 			if i == nil && test.ExpectedInt != nil {
195 | 				t.Errorf("wrong Int: <nil>, expected %d", test.ExpectedInt)
196 | 			} else if i != nil && test.ExpectedInt == nil {
197 | 				t.Errorf("wrong Int: %d, expected <nil>", i)
198 | 			} else if i.Cmp(test.ExpectedInt) != 0 {
199 | 				t.Errorf("wrong Int: %d, expected %d", i, test.ExpectedInt)
200 | 			}
201 | 
202 | 			b, err := test.V.Bytes()
203 | 			if test.ExpectedBytesErr != nil {
204 | 				if err == nil {
205 | 					t.Errorf("Bytes did not return expected error")
206 | 				} else if err != test.ExpectedBytesErr {
207 | 					t.Errorf("Bytes returned wrong error: %v", err)
208 | 				}
209 | 			} else {
210 | 				if err != nil {
211 | 					t.Errorf("Bytes returned error: %v", err)
212 | 				} else if !reflect.DeepEqual(b, test.ExpectedBytes) {
213 | 					t.Errorf("wrong Bytes: %+v", b)
214 | 				}
215 | 			}
216 | 		})
217 | 	}
218 | }
219 | 
220 | var literalErrorTests = []struct {
221 | 	Input string
222 | 	Err   string
223 | }{
224 | 	{
225 | 		Input: "0xag",
226 | 		Err:   "invalid hex: 0xag",
227 | 	},
228 | 	{
229 | 		Input: "006",
230 | 		Err:   "leading zero not allowed in decimal integer",
231 | 	},
232 | 	{
233 | 		Input: "",
234 | 		Err:   "empty number text",
235 | 	},
236 | 	{
237 | 		Input: "42g",
238 | 		Err:   "invalid number 42g",
239 | 	},
240 | }
241 | 
242 | func TestParseLiteral(t *testing.T) {
243 | 	for _, test := range literalErrorTests {
244 | 		_, err := ParseNumberLiteral(test.Input)
245 | 		if err == nil {
246 | 			t.Errorf("input %q: expected error", test.Input)
247 | 		} else if err.Error() != test.Err {
248 | 			t.Errorf("input %q: wrong error %v", test.Input, err)
249 | 		}
250 | 	}
251 | }
252 | 


--------------------------------------------------------------------------------
/internal/set/set.go:
--------------------------------------------------------------------------------
 1 | // Copyright 2024 The go-ethereum Authors
 2 | // This file is part of the go-ethereum library.
 3 | //
 4 | // The go-ethereum library is free software: you can redistribute it and/or modify
 5 | // it under the terms of the GNU Lesser General Public License as published by
 6 | // the Free Software Foundation, either version 3 of the License, or
 7 | // (at your option) any later version.
 8 | //
 9 | // The go-ethereum library is distributed in the hope that it will be useful,
10 | // but WITHOUT ANY WARRANTY; without even the implied warranty of
11 | // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 | // GNU Lesser General Public License for more details.
13 | //
14 | // You should have received a copy of the GNU Lesser General Public License
15 | // along with the go-ethereum library. If not, see <http://www.gnu.org/licenses/>.
16 | 
17 | package set
18 | 
19 | import (
20 | 	"maps"
21 | 	"slices"
22 | )
23 | 
24 | // Set is a wrapper over map.
25 | // I don't want to depend on a set library just for this.
26 | type Set[X comparable] map[X]struct{}
27 | 
28 | func (s Set[X]) Add(k X) {
29 | 	s[k] = struct{}{}
30 | }
31 | 
32 | func (s Set[X]) Includes(k X) bool {
33 | 	_, ok := s[k]
34 | 	return ok
35 | }
36 | 
37 | func (s Set[X]) Members() []X {
38 | 	return slices.Collect(maps.Keys(s))
39 | }
40 | 
41 | // Diff returns the elements of a which are not in b.
42 | func Diff[X comparable](a, b []X) []X {
43 | 	set := make(Set[X], len(b))
44 | 	for _, x := range b {
45 | 		set.Add(x)
46 | 	}
47 | 	var diff []X
48 | 	for _, x := range a {
49 | 		if !set.Includes(x) {
50 | 			diff = append(diff, x)
51 | 		}
52 | 	}
53 | 	return diff
54 | }
55 | 


--------------------------------------------------------------------------------